diff --git a/.gitattributes b/.gitattributes
index a6344aac8c09253b3b630fb776ae94478aa0275b..5313f27f7e74524ca470ebef2d4c04202f4ca81b 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -33,3 +33,11 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+data_gen/utils/mp_feature_extractors/face_landmarker.task filter=lfs diff=lfs merge=lfs -text
+pytorch3d/.github/bundle_adjust.gif filter=lfs diff=lfs merge=lfs -text
+pytorch3d/.github/camera_position_teapot.gif filter=lfs diff=lfs merge=lfs -text
+pytorch3d/.github/fit_nerf.gif filter=lfs diff=lfs merge=lfs -text
+pytorch3d/.github/fit_textured_volume.gif filter=lfs diff=lfs merge=lfs -text
+pytorch3d/.github/implicitron_config.gif filter=lfs diff=lfs merge=lfs -text
+pytorch3d/.github/nerf_project_logo.gif filter=lfs diff=lfs merge=lfs -text
+pytorch3d/docs/notes/assets/batch_modes.gif filter=lfs diff=lfs merge=lfs -text
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..a269d9ca55511e478e976768a1302f2121c17868
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,199 @@
+# big files
+data_util/face_tracking/3DMM/01_MorphableModel.mat
+data_util/face_tracking/3DMM/3DMM_info.npy
+
+!/deep_3drecon/BFM/.gitkeep
+deep_3drecon/BFM/Exp_Pca.bin
+deep_3drecon/BFM/01_MorphableModel.mat
+deep_3drecon/BFM/BFM_model_front.mat
+deep_3drecon/network/FaceReconModel.pb
+deep_3drecon/checkpoints/*
+
+.vscode
+### Project ignore
+/checkpoints/*
+!/checkpoints/.gitkeep
+/data/*
+!/data/.gitkeep
+infer_out
+rsync
+.idea
+.DS_Store
+bak
+tmp
+*.tar.gz
+mos
+nbs
+/configs_usr/*
+!/configs_usr/.gitkeep
+/egs_usr/*
+!/egs_usr/.gitkeep
+/rnnoise
+#/usr/*
+#!/usr/.gitkeep
+scripts_usr
+
+# Created by .ignore support plugin (hsz.mobi)
+### Python template
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+.python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# celery beat schedule file
+celerybeat-schedule
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+data_util/deepspeech_features/deepspeech-0.9.2-models.pbmm
+deep_3drecon/mesh_renderer/bazel-bin
+deep_3drecon/mesh_renderer/bazel-mesh_renderer
+deep_3drecon/mesh_renderer/bazel-out
+deep_3drecon/mesh_renderer/bazel-testlogs
+
+.nfs*
+infer_outs/*
+
+*.pth
+venv_113/*
+*.pt
+experiments/trials
+flame_3drecon/*
+
+temp/
+/kill.sh
+/datasets
+data_util/imagenet_classes.txt
+process_data_May.sh
+/env_prepare_reproduce.md
+/my_debug.py
+
+utils/metrics/shape_predictor_68_face_landmarks.dat
+*.mp4
+_torchshow/
+*.png
+*.jpg
+
+*.mrc
+
+deep_3drecon/BFM/BFM_exp_idx.mat
+deep_3drecon/BFM/BFM_front_idx.mat
+deep_3drecon/BFM/facemodel_info.mat
+deep_3drecon/BFM/index_mp468_from_mesh35709.npy
+deep_3drecon/BFM/mediapipe_in_bfm53201.npy
+deep_3drecon/BFM/std_exp.txt
+!data/raw/examples/*
\ No newline at end of file
diff --git a/README-zh.md b/README-zh.md
new file mode 100644
index 0000000000000000000000000000000000000000..cc29e185268d89c8d4e7ca3b58e063e12a8c4533
--- /dev/null
+++ b/README-zh.md
@@ -0,0 +1,137 @@
+# Real3D-Portrait: One-shot Realistic 3D Talking Portrait Synthesis | ICLR 2024 Spotlight
+[![arXiv](https://img.shields.io/badge/arXiv-Paper-%3CCOLOR%3E.svg)](https://arxiv.org/abs/2401.08503)| [![GitHub Stars](https://img.shields.io/github/stars/yerfor/Real3DPortrait
+)](https://github.com/yerfor/Real3DPortrait) | [English Readme](./README.md)
+
+这个仓库是Real3D-Portrait的官方PyTorch实现, 用于实现单参考图(one-shot)、高视频真实度(video reality)的虚拟人视频合成。您可以访问我们的[项目页面](https://real3dportrait.github.io/)以观看Demo视频, 阅读我们的[论文](https://arxiv.org/pdf/2401.08503.pdf)以了解技术细节。
+
+<p align="center">
+    <br>
+    <img src="assets/real3dportrait.png" width="100%"/>
+    <br>
+</p>
+
+# 快速上手！
+## 安装环境
+请参照[环境配置文档](docs/prepare_env/install_guide-zh.md)，配置Conda环境`real3dportrait`
+## 下载预训练与第三方模型
+### 3DMM BFM模型
+下载3DMM BFM模型：[Google Drive](https://drive.google.com/drive/folders/1o4t5YIw7w4cMUN4bgU9nPf6IyWVG1bEk?usp=sharing) 或 [BaiduYun Disk](https://pan.baidu.com/s/1aqv1z_qZ23Vp2VP4uxxblQ?pwd=m9q5 ) 提取码: m9q5
+
+
+下载完成后，放置全部的文件到`deep_3drecon/BFM`里，文件结构如下：
+```
+deep_3drecon/BFM/
+├── 01_MorphableModel.mat
+├── BFM_exp_idx.mat
+├── BFM_front_idx.mat
+├── BFM_model_front.mat
+├── Exp_Pca.bin
+├── facemodel_info.mat
+├── index_mp468_from_mesh35709.npy
+├── mediapipe_in_bfm53201.npy
+└── std_exp.txt
+```
+
+### 预训练模型
+下载预训练的Real3D-Portrait：[Google Drive](https://drive.google.com/drive/folders/1MAveJf7RvJ-Opg1f5qhLdoRoC_Gc6nD9?usp=sharing) 或 [BaiduYun Disk](https://pan.baidu.com/s/1Mjmbn0UtA1Zm9owZ7zWNgQ?pwd=6x4f ) 提取码: 6x4f
+  
+下载完成后，放置全部的文件到`checkpoints`里并解压，文件结构如下：
+```
+checkpoints/
+├── 240126_real3dportrait_orig
+│   ├── audio2secc_vae
+│   │   ├── config.yaml
+│   │   └── model_ckpt_steps_400000.ckpt
+│   └── secc2plane_torso_orig
+│       ├── config.yaml
+│       └── model_ckpt_steps_100000.ckpt
+└── pretrained_ckpts
+    └── mit_b0.pth
+```
+
+## 推理测试
+我们目前提供了**命令行（CLI）**与**Gradio WebUI**推理方式，并将在未来提供Google Colab方式。我们同时支持音频驱动（Audio-Driven）与视频驱动（Video-Driven）：
+
+- 音频驱动场景下，需要至少提供`source image`与`driving audio`
+- 视频驱动场景下，需要至少提供`source image`与`driving expression video`
+
+### Gradio WebUI推理
+启动Gradio WebUI，按照提示上传素材，点击`Generate`按钮即可推理：
+```bash
+python inference/app_real3dportrait.py
+```
+
+### 命令行推理
+首先，切换至项目根目录并启用Conda环境：
+```bash
+cd <Real3DPortraitRoot>
+conda activate real3dportrait
+export PYTHON_PATH=./
+```
+音频驱动场景下，需要至少提供source image与driving audio，推理指令：
+```bash
+python inference/real3d_infer.py \
+--src_img <PATH_TO_SOURCE_IMAGE> \
+--drv_aud <PATH_TO_AUDIO> \
+--drv_pose <PATH_TO_POSE_VIDEO, OPTIONAL> \
+--bg_img <PATH_TO_BACKGROUND_IMAGE, OPTIONAL> \
+--out_name <PATH_TO_OUTPUT_VIDEO, OPTIONAL>
+```
+视频驱动场景下，需要至少提供source image与driving expression video（作为drv_aud参数），推理指令：
+```bash
+python inference/real3d_infer.py \
+--src_img <PATH_TO_SOURCE_IMAGE> \
+--drv_aud <PATH_TO_EXP_VIDEO> \
+--drv_pose <PATH_TO_POSE_VIDEO, OPTIONAL> \
+--bg_img <PATH_TO_BACKGROUND_IMAGE, OPTIONAL> \
+--out_name <PATH_TO_OUTPUT_VIDEO, OPTIONAL>
+```
+一些可选参数注释：
+- `--drv_pose` 指定时提供了运动pose信息，不指定则为静态运动
+- `--bg_img` 指定时提供了背景信息，不指定则为source image提取的背景
+- `--mouth_amp` 嘴部张幅参数，值越大张幅越大
+- `--map_to_init_pose` 值为`True`时，首帧的pose将被映射到source pose，后续帧也作相同变换
+- `--temperature` 代表audio2motion的采样温度，值越大结果越多样，但同时精确度越低
+- `--out_name` 不指定时，结果将保存在`infer_out/tmp/`中
+- `--out_mode` 值为`final`时，只输出说话人视频；值为`concat_debug`时，同时输出一些可视化的中间结果
+
+指令示例：
+```bash
+python inference/real3d_infer.py \
+--src_img data/raw/examples/Macron.png \
+--drv_aud data/raw/examples/Obama_5s.wav \
+--drv_pose data/raw/examples/May_5s.mp4 \
+--bg_img data/raw/examples/bg.png \
+--out_name output.mp4 \
+--out_mode concat_debug
+```
+
+## ToDo
+- [x] **Release Pre-trained weights of Real3D-Portrait.**
+- [x] **Release Inference Code of Real3D-Portrait.**
+- [x] **Release Gradio Demo of Real3D-Portrait..**
+- [ ] **Release Google Colab of Real3D-Portrait..**
+- [ ] **Release Training Code of Real3D-Portrait.**
+
+# 引用我们
+如果这个仓库对你有帮助，请考虑引用我们的工作：
+```
+@article{ye2024real3d,
+  title={Real3D-Portrait: One-shot Realistic 3D Talking Portrait Synthesis},
+  author={Ye, Zhenhui and Zhong, Tianyun and Ren, Yi and Yang, Jiaqi and Li, Weichuang and Huang, Jiawei and Jiang, Ziyue and He, Jinzheng and Huang, Rongjie and Liu, Jinglin and others},
+  journal={arXiv preprint arXiv:2401.08503},
+  year={2024}
+}
+@article{ye2023geneface++,
+  title={GeneFace++: Generalized and Stable Real-Time Audio-Driven 3D Talking Face Generation},
+  author={Ye, Zhenhui and He, Jinzheng and Jiang, Ziyue and Huang, Rongjie and Huang, Jiawei and Liu, Jinglin and Ren, Yi and Yin, Xiang and Ma, Zejun and Zhao, Zhou},
+  journal={arXiv preprint arXiv:2305.00787},
+  year={2023}
+}
+@article{ye2023geneface,
+  title={GeneFace: Generalized and High-Fidelity Audio-Driven 3D Talking Face Synthesis},
+  author={Ye, Zhenhui and Jiang, Ziyue and Ren, Yi and Liu, Jinglin and He, Jinzheng and Zhao, Zhou},
+  journal={arXiv preprint arXiv:2301.13430},
+  year={2023}
+}
+```
\ No newline at end of file
diff --git a/README.md b/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..3b25bd20d9032bc7fccc9ff27c93d52053cbbbd6
--- /dev/null
+++ b/README.md
@@ -0,0 +1,137 @@
+# Real3D-Portrait: One-shot Realistic 3D Talking Portrait Synthesis | ICLR 2024 Spotlight
+[![arXiv](https://img.shields.io/badge/arXiv-Paper-%3CCOLOR%3E.svg)](https://arxiv.org/abs/2401.08503)| [![GitHub Stars](https://img.shields.io/github/stars/yerfor/Real3DPortrait
+)](https://github.com/yerfor/Real3DPortrait) | [中文文档](./README-zh.md)
+
+This is the official repo of Real3D-Portrait with Pytorch implementation, for one-shot and high video reality talking portrait synthesis. You can visit our [Demo Page](https://real3dportrait.github.io/) for watching demo videos, and read our [Paper](https://arxiv.org/pdf/2401.08503.pdf) for technical details.
+
+<p align="center">
+    <br>
+    <img src="assets/real3dportrait.png" width="100%"/>
+    <br>
+</p>
+
+# Quick Start!
+## Environment Installation
+Please refer to [Installation Guide](docs/prepare_env/install_guide.md), prepare a Conda environment `real3dportrait`.
+## Download Pre-trained & Third-Party Models
+### 3DMM BFM Model
+Download 3DMM BFM Model from [Google Drive](https://drive.google.com/drive/folders/1o4t5YIw7w4cMUN4bgU9nPf6IyWVG1bEk?usp=sharing) or [BaiduYun Disk](https://pan.baidu.com/s/1aqv1z_qZ23Vp2VP4uxxblQ?pwd=m9q5 ) with Password m9q5. 
+
+
+Put all the files in `deep_3drecon/BFM`, the file structure will be like this:
+```
+deep_3drecon/BFM/
+├── 01_MorphableModel.mat
+├── BFM_exp_idx.mat
+├── BFM_front_idx.mat
+├── BFM_model_front.mat
+├── Exp_Pca.bin
+├── facemodel_info.mat
+├── index_mp468_from_mesh35709.npy
+├── mediapipe_in_bfm53201.npy
+└── std_exp.txt
+```
+
+### Pre-trained Real3D-Portrait
+Download Pre-trained Real3D-Portrait：[Google Drive](https://drive.google.com/drive/folders/1MAveJf7RvJ-Opg1f5qhLdoRoC_Gc6nD9?usp=sharing) or [BaiduYun Disk](https://pan.baidu.com/s/1Mjmbn0UtA1Zm9owZ7zWNgQ?pwd=6x4f ) with Password 6x4f
+  
+Put the zip files in `checkpoints` and unzip them, the file structure will be like this:
+```
+checkpoints/
+├── 240126_real3dportrait_orig
+│   ├── audio2secc_vae
+│   │   ├── config.yaml
+│   │   └── model_ckpt_steps_400000.ckpt
+│   └── secc2plane_torso_orig
+│       ├── config.yaml
+│       └── model_ckpt_steps_100000.ckpt
+└── pretrained_ckpts
+    └── mit_b0.pth
+```
+
+## Inference
+Currently, we provide **CLI** and **Gradio WebUI** for inference, and Google Colab will be provided in the future. We support both Audio-Driven and Video-Driven methods:
+
+- For audio-driven, at least prepare `source image` and `driving audio`
+- For video-driven, at least prepare `source image` and `driving expression video`
+
+### Gradio WebUI
+Run Gradio WebUI demo, upload resouces in webpage，click `Generate` button to inference：
+```bash
+python inference/app_real3dportrait.py
+```
+
+### CLI Inference
+Firstly, switch to project folder and activate conda environment:
+```bash
+cd <Real3DPortraitRoot>
+conda activate real3dportrait
+export PYTHON_PATH=./
+```
+For audio-driven, provide source image and driving audio:
+```bash
+python inference/real3d_infer.py \
+--src_img <PATH_TO_SOURCE_IMAGE> \
+--drv_aud <PATH_TO_AUDIO> \
+--drv_pose <PATH_TO_POSE_VIDEO, OPTIONAL> \
+--bg_img <PATH_TO_BACKGROUND_IMAGE, OPTIONAL> \
+--out_name <PATH_TO_OUTPUT_VIDEO, OPTIONAL>
+```
+For video-driven, provide source image and driving expression video(as `--drv_aud` parameter):
+```bash
+python inference/real3d_infer.py \
+--src_img <PATH_TO_SOURCE_IMAGE> \
+--drv_aud <PATH_TO_EXP_VIDEO> \
+--drv_pose <PATH_TO_POSE_VIDEO, OPTIONAL> \
+--bg_img <PATH_TO_BACKGROUND_IMAGE, OPTIONAL> \
+--out_name <PATH_TO_OUTPUT_VIDEO, OPTIONAL>
+```
+Some optional parameters：
+- `--drv_pose` provide motion pose information, default to be static poses
+- `--bg_img` provide background information, default to be image extracted from source
+- `--mouth_amp` mouth amplitude, higher value leads to wider mouth
+- `--map_to_init_pose` when set to `True`, the initial pose will be mapped to source pose, and other poses will be equally transformed
+- `--temperature` stands for the sampling temperature of audio2motion, higher for more diverse results at the expense of lower accuracy
+- `--out_name` When not assigned, the results will be stored at `infer_out/tmp/`.
+- `--out_mode` When `final`, only outputs the final result; when `concat_debug`, also outputs visualization of several intermediate process.
+
+Commandline example:
+```bash
+python inference/real3d_infer.py \
+--src_img data/raw/examples/Macron.png \
+--drv_aud data/raw/examples/Obama_5s.wav \
+--drv_pose data/raw/examples/May_5s.mp4 \
+--bg_img data/raw/examples/bg.png \
+--out_name output.mp4 \
+--out_mode concat_debug
+```
+
+# ToDo
+- [x] **Release Pre-trained weights of Real3D-Portrait.**
+- [x] **Release Inference Code of Real3D-Portrait.**
+- [x] **Release Gradio Demo of Real3D-Portrait..**
+- [ ] **Release Google Colab of Real3D-Portrait..**
+- [ ] **Release Training Code of Real3D-Portrait.**
+
+# Citation
+If you found this repo helpful to your work, please consider cite us:
+```
+@article{ye2024real3d,
+  title={Real3D-Portrait: One-shot Realistic 3D Talking Portrait Synthesis},
+  author={Ye, Zhenhui and Zhong, Tianyun and Ren, Yi and Yang, Jiaqi and Li, Weichuang and Huang, Jiawei and Jiang, Ziyue and He, Jinzheng and Huang, Rongjie and Liu, Jinglin and others},
+  journal={arXiv preprint arXiv:2401.08503},
+  year={2024}
+}
+@article{ye2023geneface++,
+  title={GeneFace++: Generalized and Stable Real-Time Audio-Driven 3D Talking Face Generation},
+  author={Ye, Zhenhui and He, Jinzheng and Jiang, Ziyue and Huang, Rongjie and Huang, Jiawei and Liu, Jinglin and Ren, Yi and Yin, Xiang and Ma, Zejun and Zhao, Zhou},
+  journal={arXiv preprint arXiv:2305.00787},
+  year={2023}
+}
+@article{ye2023geneface,
+  title={GeneFace: Generalized and High-Fidelity Audio-Driven 3D Talking Face Synthesis},
+  author={Ye, Zhenhui and Jiang, Ziyue and Ren, Yi and Liu, Jinglin and He, Jinzheng and Zhao, Zhou},
+  journal={arXiv preprint arXiv:2301.13430},
+  year={2023}
+}
+```
\ No newline at end of file
diff --git a/checkpoints/.gitkeep b/checkpoints/.gitkeep
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/data_gen/eg3d/convert_to_eg3d_convention.py b/data_gen/eg3d/convert_to_eg3d_convention.py
new file mode 100644
index 0000000000000000000000000000000000000000..45d4e4b11dc69aa82ac0194c0df1b30d0ff020a7
--- /dev/null
+++ b/data_gen/eg3d/convert_to_eg3d_convention.py
@@ -0,0 +1,146 @@
+import numpy as np
+import torch
+import copy
+from utils.commons.tensor_utils import convert_to_tensor, convert_to_np
+from deep_3drecon.deep_3drecon_models.bfm import ParametricFaceModel
+
+
+def _fix_intrinsics(intrinsics):
+    """
+    intrinsics: [3,3], not batch-wise
+    """
+    # unnormalized                                normalized
+
+    # [[ f_x, s=0,    x_0]             [[ f_x/size_x,   s=0,            x_0/size_x=0.5]
+    #  [ 0,   f_y,  y_0]      ->      [ 0,            f_y/size_y,   y_0/size_y=0.5]
+    #  [ 0,   0,    1  ]]             [ 0,            0,            1         ]]
+    intrinsics = np.array(intrinsics).copy()
+    assert intrinsics.shape == (3, 3), intrinsics
+    intrinsics[0,0] = 2985.29/700
+    intrinsics[1,1] = 2985.29/700
+    intrinsics[0,2] = 1/2
+    intrinsics[1,2] = 1/2
+    assert intrinsics[0,1] == 0
+    assert intrinsics[2,2] == 1
+    assert intrinsics[1,0] == 0
+    assert intrinsics[2,0] == 0
+    assert intrinsics[2,1] == 0
+    return intrinsics
+
+# Used in original submission
+def _fix_pose_orig(pose):
+    """
+    pose: [4,4], not batch-wise
+    """
+    pose = np.array(pose).copy()
+    location = pose[:3, 3]
+    radius = np.linalg.norm(location)
+    pose[:3, 3] = pose[:3, 3]/radius * 2.7
+    return pose
+
+
+def get_eg3d_convention_camera_pose_intrinsic(item):
+    """
+    item: a dict during binarize
+
+    """
+    if item['euler'].ndim == 1:
+        angle = convert_to_tensor(copy.copy(item['euler']))
+        trans = copy.deepcopy(item['trans'])
+
+        # handle the difference of euler axis between eg3d and ours
+        # see data_gen/process_ffhq_for_eg3d/transplant_eg3d_ckpt_into_our_convention.ipynb
+        # angle += torch.tensor([0, 3.1415926535, 3.1415926535], device=angle.device)
+        R = ParametricFaceModel.compute_rotation(angle.unsqueeze(0))[0].cpu().numpy()
+        trans[2] += -10
+        c = -np.dot(R, trans)
+        pose = np.eye(4)
+        pose[:3,:3] = R
+        c *= 0.27 # normalize camera radius
+        c[1] += 0.006 # additional offset used in submission
+        c[2] += 0.161 # additional offset used in submission
+        pose[0,3] = c[0]
+        pose[1,3] = c[1]
+        pose[2,3] = c[2]
+
+        focal = 2985.29 # = 1015*1024/224*(300/466.285),
+        # todo： 如果修改了fit 3dmm阶段的camera intrinsic，这里也要跟着改
+        pp = 512#112
+        w = 1024#224
+        h = 1024#224
+
+        K = np.eye(3)
+        K[0][0] = focal
+        K[1][1] = focal
+        K[0][2] = w/2.0
+        K[1][2] = h/2.0
+        convention_K = _fix_intrinsics(K)
+
+        Rot = np.eye(3)
+        Rot[0, 0] = 1
+        Rot[1, 1] = -1
+        Rot[2, 2] = -1        
+        pose[:3, :3] = np.dot(pose[:3, :3], Rot) # permute axes
+        convention_pose = _fix_pose_orig(pose)
+
+        item['c2w'] = pose
+        item['convention_c2w'] = convention_pose
+        item['intrinsics'] = convention_K
+        return item
+    else:
+        num_samples = len(item['euler'])
+        eulers_all = convert_to_tensor(copy.deepcopy(item['euler'])) # [B, 3]
+        trans_all = copy.deepcopy(item['trans']) # [B, 3]
+
+        # handle the difference of euler axis between eg3d and ours
+        # see data_gen/process_ffhq_for_eg3d/transplant_eg3d_ckpt_into_our_convention.ipynb
+        # eulers_all += torch.tensor([0, 3.1415926535, 3.1415926535], device=eulers_all.device).unsqueeze(0).repeat([eulers_all.shape[0],1])
+
+        intrinsics = []
+        poses = []
+        convention_poses = []
+        for i in range(num_samples):
+            angle = eulers_all[i]
+            trans = trans_all[i]
+            R = ParametricFaceModel.compute_rotation(angle.unsqueeze(0))[0].cpu().numpy()
+            trans[2] += -10
+            c = -np.dot(R, trans)
+            pose = np.eye(4)
+            pose[:3,:3] = R
+            c *= 0.27 # normalize camera radius
+            c[1] += 0.006 # additional offset used in submission
+            c[2] += 0.161 # additional offset used in submission
+            pose[0,3] = c[0]
+            pose[1,3] = c[1]
+            pose[2,3] = c[2]
+
+            focal = 2985.29 # = 1015*1024/224*(300/466.285),
+            # todo： 如果修改了fit 3dmm阶段的camera intrinsic，这里也要跟着改
+            pp = 512#112
+            w = 1024#224
+            h = 1024#224
+
+            K = np.eye(3)
+            K[0][0] = focal
+            K[1][1] = focal
+            K[0][2] = w/2.0
+            K[1][2] = h/2.0
+            convention_K = _fix_intrinsics(K)
+            intrinsics.append(convention_K)
+
+            Rot = np.eye(3)
+            Rot[0, 0] = 1
+            Rot[1, 1] = -1
+            Rot[2, 2] = -1        
+            pose[:3, :3] = np.dot(pose[:3, :3], Rot)
+            convention_pose = _fix_pose_orig(pose)
+            convention_poses.append(convention_pose)
+            poses.append(pose)
+
+        intrinsics = np.stack(intrinsics) # [B, 3, 3]
+        poses = np.stack(poses) # [B, 4, 4]
+        convention_poses = np.stack(convention_poses) # [B, 4, 4]
+        item['intrinsics'] = intrinsics
+        item['c2w'] = poses
+        item['convention_c2w'] = convention_poses
+        return item
diff --git a/data_gen/runs/binarizer_nerf.py b/data_gen/runs/binarizer_nerf.py
new file mode 100644
index 0000000000000000000000000000000000000000..623cd17f6b52c9a981721a8ca14e24af1edfe202
--- /dev/null
+++ b/data_gen/runs/binarizer_nerf.py
@@ -0,0 +1,335 @@
+import os
+import numpy as np
+import math
+import json
+import imageio
+import torch
+import tqdm
+import cv2
+
+from data_util.face3d_helper import Face3DHelper
+from utils.commons.euler2rot import euler_trans_2_c2w, c2w_to_euler_trans
+from data_gen.utils.process_video.euler2quaterion import euler2quaterion, quaterion2euler
+from deep_3drecon.deep_3drecon_models.bfm import ParametricFaceModel
+
+
+def euler2rot(euler_angle):
+    batch_size = euler_angle.shape[0]
+    theta = euler_angle[:, 0].reshape(-1, 1, 1)
+    phi = euler_angle[:, 1].reshape(-1, 1, 1)
+    psi = euler_angle[:, 2].reshape(-1, 1, 1)
+    one = torch.ones(batch_size, 1, 1).to(euler_angle.device)
+    zero = torch.zeros(batch_size, 1, 1).to(euler_angle.device)
+    rot_x = torch.cat((
+        torch.cat((one, zero, zero), 1),
+        torch.cat((zero, theta.cos(), theta.sin()), 1),
+        torch.cat((zero, -theta.sin(), theta.cos()), 1),
+    ), 2)
+    rot_y = torch.cat((
+        torch.cat((phi.cos(), zero, -phi.sin()), 1),
+        torch.cat((zero, one, zero), 1),
+        torch.cat((phi.sin(), zero, phi.cos()), 1),
+    ), 2)
+    rot_z = torch.cat((
+        torch.cat((psi.cos(), -psi.sin(), zero), 1),
+        torch.cat((psi.sin(), psi.cos(), zero), 1),
+        torch.cat((zero, zero, one), 1)
+    ), 2)
+    return torch.bmm(rot_x, torch.bmm(rot_y, rot_z))
+
+
+def rot2euler(rot_mat):
+    batch_size = len(rot_mat)
+    # we assert that y in in [-0.5pi, 0.5pi]
+    cos_y = torch.sqrt(rot_mat[:, 1, 2] * rot_mat[:, 1, 2] + rot_mat[:, 2, 2] * rot_mat[:, 2, 2])
+    theta_x = torch.atan2(-rot_mat[:, 1, 2], rot_mat[:, 2, 2])
+    theta_y = torch.atan2(rot_mat[:, 2, 0], cos_y)
+    theta_z = torch.atan2(rot_mat[:, 0, 1], rot_mat[:, 0, 0])
+    euler_angles = torch.zeros([batch_size, 3])
+    euler_angles[:, 0] = theta_x
+    euler_angles[:, 1] = theta_y
+    euler_angles[:, 2] = theta_z
+    return euler_angles
+
+index_lm68_from_lm468 = [127,234,93,132,58,136,150,176,152,400,379,365,288,361,323,454,356,70,63,105,66,107,336,296,334,293,300,168,197,5,4,75,97,2,326,305,
+                         33,160,158,133,153,144,362,385,387,263,373,380,61,40,37,0,267,270,291,321,314,17,84,91,78,81,13,311,308,402,14,178]
+
+def plot_lm2d(lm2d):
+    WH = 512
+    img = np.ones([WH, WH, 3], dtype=np.uint8) * 255
+    
+    for i in range(len(lm2d)):
+        x, y = lm2d[i]
+        color = (255,0,0)
+        img = cv2.circle(img, center=(int(x),int(y)), radius=3, color=color, thickness=-1)
+        font = cv2.FONT_HERSHEY_SIMPLEX
+    for i in range(len(lm2d)):
+        x, y = lm2d[i]
+        img = cv2.putText(img, f"{i}", org=(int(x),int(y)), fontFace=font, fontScale=0.3, color=(255,0,0))
+    return img
+
+def get_face_rect(lms, h, w):
+    """
+    lms: [68, 2]
+    h, w: int
+    return: [4,]
+    """
+    assert len(lms) == 68
+    # min_x, max_x = np.min(lms, 0)[0], np.max(lms, 0)[0]
+    min_x, max_x = np.min(lms[:, 0]), np.max(lms[:, 0])
+    cx = int((min_x+max_x)/2.0)
+    cy = int(lms[27, 1])
+    h_w = int((max_x-cx)*1.5)
+    h_h = int((lms[8, 1]-cy)*1.15)
+    rect_x = cx - h_w
+    rect_y = cy - h_h
+    if rect_x < 0:
+        rect_x = 0
+    if rect_y < 0:
+        rect_y = 0
+    rect_w = min(w-1-rect_x, 2*h_w)
+    rect_h = min(h-1-rect_y, 2*h_h)
+    # rect = np.array((rect_x, rect_y, rect_w, rect_h), dtype=np.int32)
+    # rect = [rect_x, rect_y, rect_w, rect_h]
+    rect = [rect_x, rect_x + rect_w, rect_y, rect_y + rect_h] # min_j,  max_j, min_i, max_i
+    return rect # this x is width, y is height
+
+def get_lip_rect(lms, h, w):
+    """
+    lms: [68, 2]
+    h, w: int
+    return: [4,]
+    """
+    # this x is width, y is height
+    # for lms, lms[:, 0] is width, lms[:, 1] is height
+    assert len(lms) == 68
+    lips = slice(48, 60)
+    lms = lms[lips]
+    min_x, max_x = np.min(lms[:, 0]), np.max(lms[:, 0])
+    min_y, max_y = np.min(lms[:, 1]), np.max(lms[:, 1])
+    cx = int((min_x+max_x)/2.0)
+    cy = int((min_y+max_y)/2.0)
+    h_w = int((max_x-cx)*1.2)
+    h_h = int((max_y-cy)*1.2)
+    
+    h_w = max(h_w, h_h)
+    h_h = h_w
+
+    rect_x = cx - h_w
+    rect_y = cy - h_h
+    rect_w = 2*h_w
+    rect_h = 2*h_h
+    if rect_x < 0:
+        rect_x = 0
+    if rect_y < 0:
+        rect_y = 0
+    
+    if rect_x + rect_w > w:
+        rect_x = w - rect_w
+    if rect_y + rect_h > h:
+        rect_y = h - rect_h
+
+    rect = [rect_x, rect_x + rect_w, rect_y, rect_y + rect_h] # min_j,  max_j, min_i, max_i
+    return rect # this x is width, y is height
+
+
+# def get_lip_rect(lms, h, w):
+#     """
+#     lms: [68, 2]
+#     h, w: int
+#     return: [4,]
+#     """
+#     assert len(lms) == 68
+#     lips = slice(48, 60)
+#     # this x is width, y is height
+#     xmin, xmax = int(lms[lips, 1].min()), int(lms[lips, 1].max())
+#     ymin, ymax = int(lms[lips, 0].min()), int(lms[lips, 0].max())
+#     # padding to H == W
+#     cx = (xmin + xmax) // 2
+#     cy = (ymin + ymax) // 2
+#     l = max(xmax - xmin, ymax - ymin) // 2
+#     xmin = max(0, cx - l)
+#     xmax = min(h, cx + l)
+#     ymin = max(0, cy - l)
+#     ymax = min(w, cy + l)
+#     lip_rect = [xmin, xmax, ymin, ymax]
+#     return lip_rect
+
+def get_win_conds(conds, idx, smo_win_size=8, pad_option='zero'):
+    """
+    conds: [b, t=16, h=29]
+    idx: long, time index of the selected frame
+    """
+    idx = max(0, idx)
+    idx = min(idx, conds.shape[0]-1)
+    smo_half_win_size = smo_win_size//2
+    left_i = idx - smo_half_win_size
+    right_i = idx + (smo_win_size - smo_half_win_size)
+    pad_left, pad_right = 0, 0
+    if left_i < 0:
+        pad_left = -left_i
+        left_i = 0
+    if right_i > conds.shape[0]:
+        pad_right = right_i - conds.shape[0]
+        right_i = conds.shape[0]
+    conds_win = conds[left_i:right_i]
+    if pad_left > 0:
+        if pad_option == 'zero':
+            conds_win = np.concatenate([np.zeros_like(conds_win)[:pad_left], conds_win], axis=0)
+        elif pad_option == 'edge':
+            edge_value = conds[0][np.newaxis, ...]
+            conds_win = np.concatenate([edge_value] * pad_left + [conds_win], axis=0)
+        else: 
+            raise NotImplementedError
+    if pad_right > 0:
+        if pad_option == 'zero':
+            conds_win = np.concatenate([conds_win, np.zeros_like(conds_win)[:pad_right]], axis=0)
+        elif pad_option == 'edge':
+            edge_value = conds[-1][np.newaxis, ...]
+            conds_win = np.concatenate([conds_win] + [edge_value] * pad_right , axis=0)
+        else: 
+            raise NotImplementedError
+    assert conds_win.shape[0] == smo_win_size
+    return conds_win
+
+
+def load_processed_data(processed_dir):
+    # load necessary files
+    background_img_name = os.path.join(processed_dir, "bg.jpg")
+    assert os.path.exists(background_img_name)
+    head_img_dir = os.path.join(processed_dir, "head_imgs")
+    torso_img_dir = os.path.join(processed_dir, "inpaint_torso_imgs")
+    gt_img_dir = os.path.join(processed_dir, "gt_imgs")
+
+    hubert_npy_name = os.path.join(processed_dir, "aud_hubert.npy")
+    mel_f0_npy_name = os.path.join(processed_dir, "aud_mel_f0.npy")
+    coeff_npy_name = os.path.join(processed_dir, "coeff_fit_mp.npy")
+    lm2d_npy_name = os.path.join(processed_dir, "lms_2d.npy")
+    
+    ret_dict = {}
+
+    ret_dict['bg_img'] = imageio.imread(background_img_name)
+    ret_dict['H'], ret_dict['W'] = ret_dict['bg_img'].shape[:2]
+    ret_dict['focal'], ret_dict['cx'], ret_dict['cy'] = face_model.focal, face_model.center, face_model.center
+
+    print("loading lm2d coeff ...")
+    lm2d_arr = np.load(lm2d_npy_name)
+    face_rect_lst = []
+    lip_rect_lst = []
+    for lm2d in lm2d_arr:
+        if len(lm2d) in [468, 478]:
+            lm2d = lm2d[index_lm68_from_lm468]
+        face_rect = get_face_rect(lm2d, ret_dict['H'], ret_dict['W'])
+        lip_rect = get_lip_rect(lm2d, ret_dict['H'], ret_dict['W'])
+        face_rect_lst.append(face_rect)
+        lip_rect_lst.append(lip_rect)
+    face_rects = np.stack(face_rect_lst, axis=0) # [T, 4]
+
+    print("loading fitted 3dmm coeff ...")
+    coeff_dict = np.load(coeff_npy_name, allow_pickle=True).tolist()
+    identity_arr = coeff_dict['id']
+    exp_arr = coeff_dict['exp']
+    ret_dict['id'] = identity_arr
+    ret_dict['exp'] = exp_arr
+    euler_arr = ret_dict['euler'] = coeff_dict['euler']
+    trans_arr = ret_dict['trans'] = coeff_dict['trans']
+    print("calculating lm3d ...")
+    idexp_lm3d_arr = face3d_helper.reconstruct_idexp_lm3d(torch.from_numpy(identity_arr), torch.from_numpy(exp_arr)).cpu().numpy().reshape([-1, 68*3])
+    len_motion = len(idexp_lm3d_arr)
+    video_idexp_lm3d_mean = idexp_lm3d_arr.mean(axis=0)
+    video_idexp_lm3d_std = idexp_lm3d_arr.std(axis=0)
+    ret_dict['idexp_lm3d'] = idexp_lm3d_arr
+    ret_dict['idexp_lm3d_mean'] = video_idexp_lm3d_mean
+    ret_dict['idexp_lm3d_std'] = video_idexp_lm3d_std
+    
+    # now we convert the euler_trans from deep3d convention to adnerf convention
+    eulers = torch.FloatTensor(euler_arr)
+    trans = torch.FloatTensor(trans_arr)
+    rots = face_model.compute_rotation(eulers) # rotation matrix is a better intermediate for convention-transplan than euler
+
+    # handle the camera pose to geneface's convention
+    trans[:, 2] = 10 - trans[:, 2] # 抵消fit阶段的to_camera操作，即trans[...,2] = 10 - trans[...,2]
+    rots = rots.permute(0, 2, 1)
+    trans[:, 2] = - trans[:,2] # 因为intrinsic proj不同
+    # below is the NeRF camera preprocessing strategy, see `save_transforms` in data_util/process.py 
+    trans = trans / 10.0
+    rots_inv = rots.permute(0, 2, 1)
+    trans_inv = - torch.bmm(rots_inv, trans.unsqueeze(2))
+
+    pose = torch.eye(4, dtype=torch.float32).unsqueeze(0).repeat([len_motion, 1, 1]) # [T, 4, 4]
+    pose[:, :3, :3] = rots_inv
+    pose[:, :3, 3] = trans_inv[:, :, 0]
+    c2w_transform_matrices = pose.numpy()
+
+    # process the audio features used for postnet training
+    print("loading hubert ...")
+    hubert_features = np.load(hubert_npy_name)
+    print("loading Mel and F0 ...")
+    mel_f0_features = np.load(mel_f0_npy_name, allow_pickle=True).tolist()
+
+    ret_dict['hubert'] = hubert_features
+    ret_dict['mel'] = mel_f0_features['mel']
+    ret_dict['f0'] = mel_f0_features['f0']
+
+    # obtaining train samples
+    frame_indices = list(range(len_motion))
+    num_train = len_motion // 11 * 10
+    train_indices = frame_indices[:num_train]
+    val_indices = frame_indices[num_train:]
+
+    for split in ['train', 'val']:
+        if split == 'train':
+            indices = train_indices
+            samples = []
+            ret_dict['train_samples'] = samples
+        elif split == 'val':
+            indices = val_indices
+            samples = []
+            ret_dict['val_samples'] = samples
+        
+        for idx in indices:
+            sample = {}
+            sample['idx'] = idx
+            sample['head_img_fname'] = os.path.join(head_img_dir,f"{idx:08d}.png")
+            sample['torso_img_fname'] = os.path.join(torso_img_dir,f"{idx:08d}.png")
+            sample['gt_img_fname'] = os.path.join(gt_img_dir,f"{idx:08d}.jpg")
+            # assert os.path.exists(sample['head_img_fname']) and os.path.exists(sample['torso_img_fname']) and os.path.exists(sample['gt_img_fname'])
+            sample['face_rect'] = face_rects[idx]
+            sample['lip_rect'] = lip_rect_lst[idx]
+            sample['c2w'] = c2w_transform_matrices[idx]
+            samples.append(sample)
+    return ret_dict
+
+
+class Binarizer:
+    def __init__(self):
+        self.data_dir = 'data/'
+        
+    def parse(self, video_id):
+        processed_dir = os.path.join(self.data_dir, 'processed/videos', video_id)
+        binary_dir = os.path.join(self.data_dir, 'binary/videos', video_id)
+        out_fname = os.path.join(binary_dir, "trainval_dataset.npy")
+        os.makedirs(binary_dir, exist_ok=True)
+        ret = load_processed_data(processed_dir)
+        mel_name = os.path.join(processed_dir, 'aud_mel_f0.npy')
+        mel_f0_dict = np.load(mel_name, allow_pickle=True).tolist()
+        ret.update(mel_f0_dict)
+        np.save(out_fname, ret, allow_pickle=True)
+
+
+
+if __name__ == '__main__':
+    from argparse import ArgumentParser
+    parser = ArgumentParser()
+    parser.add_argument('--video_id', type=str, default='May', help='')
+    args = parser.parse_args()
+    ### Process Single Long Audio for NeRF dataset
+    video_id = args.video_id
+    face_model = ParametricFaceModel(bfm_folder='deep_3drecon/BFM', 
+                camera_distance=10, focal=1015)
+    face_model.to("cpu")
+    face3d_helper = Face3DHelper()
+
+    binarizer = Binarizer()
+    binarizer.parse(video_id)
+    print(f"Binarization for {video_id} Done!")
diff --git a/data_gen/runs/nerf/process_guide.md b/data_gen/runs/nerf/process_guide.md
new file mode 100644
index 0000000000000000000000000000000000000000..2312d416fcd50cee8656803fe2fdba141e62e86f
--- /dev/null
+++ b/data_gen/runs/nerf/process_guide.md
@@ -0,0 +1,49 @@
+# 温馨提示：第一次执行可以先一步步跑完下面的命令行，把环境跑通后，之后可以直接运行同目录的run.sh，一键完成下面的所有步骤。
+
+# Step0. 将视频Crop到512x512分辨率，25FPS，确保每一帧都有目标人脸
+```
+ffmpeg -i data/raw/videos/${VIDEO_ID}.mp4 -vf fps=25,scale=w=512:h=512 -qmin 1 -q:v 1 data/raw/videos/${VIDEO_ID}_512.mp4
+mv data/raw/videos/${VIDEO_ID}.mp4 data/raw/videos/${VIDEO_ID}_to_rm.mp4
+mv data/raw/videos/${VIDEO_ID}_512.mp4 data/raw/videos/${VIDEO_ID}.mp4
+```
+# step1: 提取音频特征, 如mel, f0, hubuert, esperanto
+```
+export CUDA_VISIBLE_DEVICES=0
+export VIDEO_ID=May
+mkdir -p data/processed/videos/${VIDEO_ID}
+ffmpeg -i data/raw/videos/${VIDEO_ID}.mp4 -f wav -ar 16000 data/processed/videos/${VIDEO_ID}/aud.wav 
+python data_gen/utils/process_audio/extract_hubert.py --video_id=${VIDEO_ID}
+python data_gen/utils/process_audio/extract_mel_f0.py --video_id=${VIDEO_ID}
+```
+
+# Step2. 提取图片
+```
+export VIDEO_ID=May
+export CUDA_VISIBLE_DEVICES=0
+mkdir -p data/processed/videos/${VIDEO_ID}/gt_imgs
+ffmpeg -i data/raw/videos/${VIDEO_ID}.mp4 -vf fps=25,scale=w=512:h=512 -qmin 1 -q:v 1 -start_number 0 data/processed/videos/${VIDEO_ID}/gt_imgs/%08d.jpg
+python data_gen/utils/process_video/extract_segment_imgs.py --ds_name=nerf --vid_dir=data/raw/videos/${VIDEO_ID}.mp4 # extract image, segmap, and background
+```
+
+# Step3. 提取lm2d_mediapipe
+### 提取2D landmark用于之后Fit 3DMM
+### num_workers是本机上的CPU worker数量；total_process是使用的机器数；process_id是本机的编号
+
+```
+export VIDEO_ID=May
+python data_gen/utils/process_video/extract_lm2d.py --ds_name=nerf --vid_dir=data/raw/videos/${VIDEO_ID}.mp4
+```
+
+# Step3. fit 3dmm
+```
+export VIDEO_ID=May
+export CUDA_VISIBLE_DEVICES=0
+python data_gen/utils/process_video/fit_3dmm_landmark.py --ds_name=nerf --vid_dir=data/raw/videos/${VIDEO_ID}.mp4 --reset  --debug --id_mode=global
+```
+
+# Step4. Binarize
+```
+export VIDEO_ID=May
+python data_gen/runs/binarizer_nerf.py --video_id=${VIDEO_ID}
+```
+可以看到在`data/binary/videos/Mayssss`目录下得到了数据集。
\ No newline at end of file
diff --git a/data_gen/runs/nerf/run.sh b/data_gen/runs/nerf/run.sh
new file mode 100644
index 0000000000000000000000000000000000000000..f028ad9e061c925e51946ff83c27e99c35cbb15c
--- /dev/null
+++ b/data_gen/runs/nerf/run.sh
@@ -0,0 +1,51 @@
+# usage: CUDA_VISIBLE_DEVICES=0 bash data_gen/runs/nerf/run.sh <VIDEO_ID>
+# please place video to data/raw/videos/${VIDEO_ID}.mp4 
+VIDEO_ID=$1
+echo Processing $VIDEO_ID
+
+echo Resizing the video to 512x512
+ffmpeg -i data/raw/videos/${VIDEO_ID}.mp4 -vf fps=25,scale=w=512:h=512 -qmin 1 -q:v 1 -y data/raw/videos/${VIDEO_ID}_512.mp4
+mv data/raw/videos/${VIDEO_ID}.mp4 data/raw/videos/${VIDEO_ID}_to_rm.mp4
+mv data/raw/videos/${VIDEO_ID}_512.mp4 data/raw/videos/${VIDEO_ID}.mp4
+echo Done
+echo The old video is moved to data/raw/videos/${VIDEO_ID}.mp4 data/raw/videos/${VIDEO_ID}_to_rm.mp4
+
+echo mkdir -p data/processed/videos/${VIDEO_ID}
+mkdir -p data/processed/videos/${VIDEO_ID}
+echo Done
+
+# extract audio file from the training video
+echo ffmpeg -i data/raw/videos/${VIDEO_ID}.mp4 -f wav -ar 16000 -v quiet -y data/processed/videos/${VIDEO_ID}/aud.wav 
+ffmpeg -i data/raw/videos/${VIDEO_ID}.mp4 -f wav -ar 16000 -v quiet -y data/processed/videos/${VIDEO_ID}/aud.wav 
+echo Done
+
+# extract hubert_mel_f0 from audio
+echo python data_gen/utils/process_audio/extract_hubert.py --video_id=${VIDEO_ID}
+python data_gen/utils/process_audio/extract_hubert.py --video_id=${VIDEO_ID}
+echo python data_gen/utils/process_audio/extract_mel_f0.py --video_id=${VIDEO_ID}
+python data_gen/utils/process_audio/extract_mel_f0.py --video_id=${VIDEO_ID}
+echo Done
+
+# extract segment images
+echo mkdir -p data/processed/videos/${VIDEO_ID}/gt_imgs
+mkdir -p data/processed/videos/${VIDEO_ID}/gt_imgs
+echo ffmpeg -i data/raw/videos/${VIDEO_ID}.mp4 -vf fps=25,scale=w=512:h=512 -qmin 1 -q:v 1 -start_number 0 -v quiet data/processed/videos/${VIDEO_ID}/gt_imgs/%08d.jpg
+ffmpeg -i data/raw/videos/${VIDEO_ID}.mp4 -vf fps=25,scale=w=512:h=512 -qmin 1 -q:v 1 -start_number 0 -v quiet data/processed/videos/${VIDEO_ID}/gt_imgs/%08d.jpg
+echo Done
+
+echo python data_gen/utils/process_video/extract_segment_imgs.py --ds_name=nerf --vid_dir=data/raw/videos/${VIDEO_ID}.mp4 # extract image, segmap, and background
+python data_gen/utils/process_video/extract_segment_imgs.py --ds_name=nerf --vid_dir=data/raw/videos/${VIDEO_ID}.mp4 # extract image, segmap, and background
+echo Done
+
+echo python data_gen/utils/process_video/extract_lm2d.py --ds_name=nerf --vid_dir=data/raw/videos/${VIDEO_ID}.mp4
+python data_gen/utils/process_video/extract_lm2d.py --ds_name=nerf --vid_dir=data/raw/videos/${VIDEO_ID}.mp4
+echo Done
+
+pkill -f void*
+echo python data_gen/utils/process_video/fit_3dmm_landmark.py --ds_name=nerf --vid_dir=data/raw/videos/${VIDEO_ID}.mp4 --reset --debug --id_mode=global
+python data_gen/utils/process_video/fit_3dmm_landmark.py --ds_name=nerf --vid_dir=data/raw/videos/${VIDEO_ID}.mp4 --reset --debug --id_mode=global
+echo Done
+
+echo python data_gen/runs/binarizer_nerf.py --video_id=${VIDEO_ID}
+python data_gen/runs/binarizer_nerf.py --video_id=${VIDEO_ID}
+echo Done
\ No newline at end of file
diff --git a/data_gen/utils/mp_feature_extractors/face_landmarker.py b/data_gen/utils/mp_feature_extractors/face_landmarker.py
new file mode 100644
index 0000000000000000000000000000000000000000..7b5904a46809352ef08fd1b3d6948ec4fbc6b7fd
--- /dev/null
+++ b/data_gen/utils/mp_feature_extractors/face_landmarker.py
@@ -0,0 +1,130 @@
+import mediapipe as mp
+from mediapipe.tasks import python
+from mediapipe.tasks.python import vision
+import numpy as np
+import cv2
+import os
+import copy
+
+# simplified mediapipe ldm at https://github.com/k-m-irfan/simplified_mediapipe_face_landmarks
+index_lm141_from_lm478 = [70,63,105,66,107,55,65,52,53,46] + [300,293,334,296,336,285,295,282,283,276] + [33,246,161,160,159,158,157,173,133,155,154,153,145,144,163,7] + [263,466,388,387,386,385,384,398,362,382,381,380,374,373,390,249] + [78,191,80,81,82,13,312,311,310,415,308,324,318,402,317,14,87,178,88,95] + [61,185,40,39,37,0,267,269,270,409,291,375,321,405,314,17,84,181,91,146] + [10,338,297,332,284,251,389,356,454,323,361,288,397,365,379,378,400,377,152,148,176,149,150,136,172,58,132,93,234,127,162,21,54,103,67,109] + [468,469,470,471,472] + [473,474,475,476,477] + [64,4,294]
+# lm141 without iris
+index_lm131_from_lm478 = [70,63,105,66,107,55,65,52,53,46] + [300,293,334,296,336,285,295,282,283,276] + [33,246,161,160,159,158,157,173,133,155,154,153,145,144,163,7] + [263,466,388,387,386,385,384,398,362,382,381,380,374,373,390,249] + [78,191,80,81,82,13,312,311,310,415,308,324,318,402,317,14,87,178,88,95] + [61,185,40,39,37,0,267,269,270,409,291,375,321,405,314,17,84,181,91,146] + [10,338,297,332,284,251,389,356,454,323,361,288,397,365,379,378,400,377,152,148,176,149,150,136,172,58,132,93,234,127,162,21,54,103,67,109] + [64,4,294]
+
+# face alignment lm68
+index_lm68_from_lm478 = [127,234,93,132,58,136,150,176,152,400,379,365,288,361,323,454,356,70,63,105,66,107,336,296,334,293,300,168,197,5,4,75,97,2,326,305,
+                         33,160,158,133,153,144,362,385,387,263,373,380,61,40,37,0,267,270,291,321,314,17,84,91,78,81,13,311,308,402,14,178]
+# used for weights for key parts
+unmatch_mask_from_lm478 = [ 93, 127, 132, 234, 323, 356, 361, 454]
+index_eye_from_lm478 = [33,246,161,160,159,158,157,173,133,155,154,153,145,144,163,7] + [263,466,388,387,386,385,384,398,362,382,381,380,374,373,390,249]
+index_innerlip_from_lm478 = [78,191,80,81,82,13,312,311,310,415,308,324,318,402,317,14,87,178,88,95]
+index_outerlip_from_lm478 = [61,185,40,39,37,0,267,269,270,409,291,375,321,405,314,17,84,181,91,146]
+index_withinmouth_from_lm478 = [76, 62] + [184, 183, 74, 72, 73, 41, 72, 38, 11, 12, 302, 268, 303, 271, 304, 272, 408, 407] + [292, 306] +  [325, 307, 319, 320, 403, 404, 316, 315, 15, 16, 86, 85, 179, 180, 89, 90, 96, 77]
+index_mouth_from_lm478 = index_innerlip_from_lm478 + index_outerlip_from_lm478 + index_withinmouth_from_lm478
+
+index_yaw_from_lm68 = list(range(0, 17))
+index_brow_from_lm68 = list(range(17, 27))
+index_nose_from_lm68 = list(range(27, 36))
+index_eye_from_lm68 = list(range(36, 48))
+index_mouth_from_lm68 = list(range(48, 68))
+
+
+def read_video_to_frames(video_name):
+    frames = []
+    cap = cv2.VideoCapture(video_name)
+    while cap.isOpened():
+        ret, frame_bgr = cap.read()
+        if frame_bgr is None:
+            break
+        frames.append(frame_bgr)
+    frames = np.stack(frames)
+    frames = np.flip(frames, -1) # BGR ==> RGB
+    return frames
+
+class MediapipeLandmarker:
+    def __init__(self):
+        model_path = 'data_gen/utils/mp_feature_extractors/face_landmarker.task'
+        if not os.path.exists(model_path):
+            os.makedirs(os.path.dirname(model_path), exist_ok=True)
+            print("downloading face_landmarker model from mediapipe...")
+            model_url = 'https://storage.googleapis.com/mediapipe-models/face_landmarker/face_landmarker/float16/latest/face_landmarker.task'
+            os.system(f"wget {model_url}")
+            os.system(f"mv face_landmarker.task {model_path}")
+            print("download success")
+        base_options = python.BaseOptions(model_asset_path=model_path)
+        self.image_mode_options = vision.FaceLandmarkerOptions(base_options=base_options, 
+                        running_mode=vision.RunningMode.IMAGE, # IMAGE, VIDEO, LIVE_STREAM
+                        num_faces=1)
+        self.video_mode_options = vision.FaceLandmarkerOptions(base_options=base_options, 
+                        running_mode=vision.RunningMode.VIDEO, # IMAGE, VIDEO, LIVE_STREAM
+                        num_faces=1)
+
+    def extract_lm478_from_img_name(self, img_name):
+        img = cv2.imread(img_name)
+        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+        img_lm478 = self.extract_lm478_from_img(img)
+        return img_lm478
+    
+    def extract_lm478_from_img(self, img):
+        img_landmarker = vision.FaceLandmarker.create_from_options(self.image_mode_options)
+        frame = mp.Image(image_format=mp.ImageFormat.SRGB, data=img.astype(np.uint8))
+        img_face_landmarker_result = img_landmarker.detect(image=frame)
+        img_ldm_i = img_face_landmarker_result.face_landmarks[0]
+        img_face_landmarks = np.array([[l.x, l.y, l.z] for l in img_ldm_i])
+        H, W, _ = img.shape
+        img_lm478 = np.array(img_face_landmarks)[:, :2] * np.array([W, H]).reshape([1,2]) # [478, 2]
+        return img_lm478
+
+    def extract_lm478_from_video_name(self, video_name, fps=25, anti_smooth_factor=2):
+        frames = read_video_to_frames(video_name)
+        img_lm478, vid_lm478 = self.extract_lm478_from_frames(frames, fps, anti_smooth_factor)
+        return img_lm478, vid_lm478
+
+    def extract_lm478_from_frames(self, frames, fps=25, anti_smooth_factor=20):
+        """
+        frames: RGB, uint8
+        anti_smooth_factor: float, 对video模式的interval进行修改, 1代表无修改, 越大越接近image mode
+        """
+        img_mpldms = []
+        vid_mpldms = []
+        img_landmarker = vision.FaceLandmarker.create_from_options(self.image_mode_options)
+        vid_landmarker = vision.FaceLandmarker.create_from_options(self.video_mode_options)
+
+        for i in range(len(frames)):
+            frame = mp.Image(image_format=mp.ImageFormat.SRGB, data=frames[i].astype(np.uint8))
+            img_face_landmarker_result = img_landmarker.detect(image=frame)
+            vid_face_landmarker_result = vid_landmarker.detect_for_video(image=frame, timestamp_ms=int((1000/fps)*anti_smooth_factor*i))
+            try:
+                img_ldm_i = img_face_landmarker_result.face_landmarks[0]
+                vid_ldm_i = vid_face_landmarker_result.face_landmarks[0]
+            except:
+                print(f"Warning: failed detect ldm in idx={i}, use previous frame results.")
+            img_face_landmarks = np.array([[l.x, l.y, l.z] for l in img_ldm_i])
+            vid_face_landmarks = np.array([[l.x, l.y, l.z] for l in vid_ldm_i])
+            img_mpldms.append(img_face_landmarks)
+            vid_mpldms.append(vid_face_landmarks)
+        img_lm478 = np.stack(img_mpldms)[..., :2]
+        vid_lm478 = np.stack(vid_mpldms)[..., :2]
+        bs, H, W, _ = frames.shape
+        img_lm478 = np.array(img_lm478)[..., :2] * np.array([W, H]).reshape([1,1,2]) # [T, 478, 2]
+        vid_lm478 = np.array(vid_lm478)[..., :2] * np.array([W, H]).reshape([1,1,2]) # [T, 478, 2]
+        return img_lm478, vid_lm478
+
+    def combine_vid_img_lm478_to_lm68(self, img_lm478, vid_lm478):
+        img_lm68 = img_lm478[:, index_lm68_from_lm478]
+        vid_lm68 = vid_lm478[:, index_lm68_from_lm478]
+        combined_lm68 = copy.deepcopy(img_lm68)
+        combined_lm68[:, index_yaw_from_lm68] = vid_lm68[:, index_yaw_from_lm68]
+        combined_lm68[:, index_brow_from_lm68] = vid_lm68[:, index_brow_from_lm68]
+        combined_lm68[:, index_nose_from_lm68] = vid_lm68[:, index_nose_from_lm68]
+        return combined_lm68
+     
+    def combine_vid_img_lm478_to_lm478(self, img_lm478, vid_lm478):
+        combined_lm478 = copy.deepcopy(vid_lm478)
+        combined_lm478[:, index_mouth_from_lm478] = img_lm478[:, index_mouth_from_lm478]
+        combined_lm478[:, index_eye_from_lm478] = img_lm478[:, index_eye_from_lm478]
+        return combined_lm478
+
+if __name__ == '__main__':
+    landmarker = MediapipeLandmarker()
+    ret = landmarker.extract_lm478_from_video_name("00000.mp4")
diff --git a/data_gen/utils/mp_feature_extractors/face_landmarker.task b/data_gen/utils/mp_feature_extractors/face_landmarker.task
new file mode 100644
index 0000000000000000000000000000000000000000..fedb14de6d2b6708a56c04ae259783e23404c1aa
--- /dev/null
+++ b/data_gen/utils/mp_feature_extractors/face_landmarker.task
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:64184e229b263107bc2b804c6625db1341ff2bb731874b0bcc2fe6544e0bc9ff
+size 3758596
diff --git a/data_gen/utils/mp_feature_extractors/mp_segmenter.py b/data_gen/utils/mp_feature_extractors/mp_segmenter.py
new file mode 100644
index 0000000000000000000000000000000000000000..14ed79201e82c13cdcf67dd9d75ea1b945edfbe1
--- /dev/null
+++ b/data_gen/utils/mp_feature_extractors/mp_segmenter.py
@@ -0,0 +1,274 @@
+import os
+import copy
+import numpy as np
+import tqdm
+import mediapipe as mp
+import torch
+from mediapipe.tasks import python
+from mediapipe.tasks.python import vision
+from utils.commons.multiprocess_utils import multiprocess_run_tqdm, multiprocess_run
+from utils.commons.tensor_utils import convert_to_np
+from sklearn.neighbors import NearestNeighbors
+
+def scatter_np(condition_img, classSeg=5):
+# def scatter(condition_img, classSeg=19, label_size=(512, 512)):
+    batch, c, height, width = condition_img.shape
+    # if height != label_size[0] or width != label_size[1]:
+        # condition_img= F.interpolate(condition_img, size=label_size, mode='nearest')
+    input_label = np.zeros([batch, classSeg, condition_img.shape[2], condition_img.shape[3]]).astype(np.int_)
+    # input_label = torch.zeros(batch, classSeg, *label_size, device=condition_img.device)
+    np.put_along_axis(input_label, condition_img, 1, 1)
+    return input_label
+
+def scatter(condition_img, classSeg=19):
+# def scatter(condition_img, classSeg=19, label_size=(512, 512)):
+    batch, c, height, width = condition_img.size()
+    # if height != label_size[0] or width != label_size[1]:
+        # condition_img= F.interpolate(condition_img, size=label_size, mode='nearest')
+    input_label = torch.zeros(batch, classSeg, condition_img.shape[2], condition_img.shape[3], device=condition_img.device)
+    # input_label = torch.zeros(batch, classSeg, *label_size, device=condition_img.device)
+    return input_label.scatter_(1, condition_img.long(), 1)
+
+def encode_segmap_mask_to_image(segmap):
+    # rgb
+    _,h,w = segmap.shape
+    encoded_img = np.ones([h,w,3],dtype=np.uint8) * 255
+    colors = [(255,255,255),(255,255,0),(255,0,255),(0,255,255),(255,0,0),(0,255,0)]
+    for i, color in enumerate(colors):
+        mask = segmap[i].astype(int)
+        index = np.where(mask != 0)
+        encoded_img[index[0], index[1], :] = np.array(color)
+    return encoded_img.astype(np.uint8)
+        
+def decode_segmap_mask_from_image(encoded_img):
+    # rgb
+    colors = [(255,255,255),(255,255,0),(255,0,255),(0,255,255),(255,0,0),(0,255,0)]
+    bg = (encoded_img[..., 0] == 255) & (encoded_img[..., 1] == 255) & (encoded_img[..., 2] == 255)
+    hair = (encoded_img[..., 0] == 255) & (encoded_img[..., 1] == 255) & (encoded_img[..., 2] == 0)
+    body_skin = (encoded_img[..., 0] == 255) & (encoded_img[..., 1] == 0) & (encoded_img[..., 2] == 255)
+    face_skin = (encoded_img[..., 0] == 0) & (encoded_img[..., 1] == 255) & (encoded_img[..., 2] == 255)
+    clothes = (encoded_img[..., 0] == 255) & (encoded_img[..., 1] == 0) & (encoded_img[..., 2] == 0)
+    others = (encoded_img[..., 0] == 0) & (encoded_img[..., 1] == 255) & (encoded_img[..., 2] == 0)
+    segmap = np.stack([bg, hair, body_skin, face_skin, clothes, others], axis=0)
+    return segmap.astype(np.uint8)
+
+def read_video_frame(video_name, frame_id):
+    # https://blog.csdn.net/bby1987/article/details/108923361
+    # frame_num = video_capture.get(cv2.CAP_PROP_FRAME_COUNT) # ==> 总帧数
+    # fps = video_capture.get(cv2.CAP_PROP_FPS)               # ==> 帧率
+    # width = video_capture.get(cv2.CAP_PROP_FRAME_WIDTH)     # ==> 视频宽度
+    # height = video_capture.get(cv2.CAP_PROP_FRAME_HEIGHT)   # ==> 视频高度
+    # pos = video_capture.get(cv2.CAP_PROP_POS_FRAMES)        # ==> 句柄位置
+    # video_capture.set(cv2.CAP_PROP_POS_FRAMES, 1000)        # ==> 设置句柄位置
+    # pos = video_capture.get(cv2.CAP_PROP_POS_FRAMES)        # ==> 此时 pos = 1000.0
+    # video_capture.release()
+    vr = cv2.VideoCapture(video_name)
+    vr.set(cv2.CAP_PROP_POS_FRAMES, frame_id)
+    _, frame = vr.read()
+    return frame
+
+def decode_segmap_mask_from_segmap_video_frame(video_frame):
+    # video_frame: 0~255 BGR, obtained by read_video_frame
+    def assign_values(array):
+        remainder = array % 40  # 计算数组中每个值与40的余数
+        assigned_values = np.where(remainder <= 20, array - remainder, array + (40 - remainder))
+        return assigned_values
+    segmap = video_frame.mean(-1)
+    segmap = assign_values(segmap) // 40 # [H, W] with value 0~5 
+    segmap_mask = scatter_np(segmap[None, None, ...], classSeg=6)[0] # [6, H, W]
+    return segmap.astype(np.uint8)
+
+def extract_background(img_lst, segmap_lst=None):
+    """
+    img_lst: list of rgb ndarray
+    """
+    # only use 1/20 images
+    num_frames = len(img_lst)
+    img_lst = img_lst[::20] if num_frames > 20 else img_lst[0:1]
+        
+    if segmap_lst is not None:
+        segmap_lst = segmap_lst[::20] if num_frames > 20 else segmap_lst[0:1]
+        assert len(img_lst) == len(segmap_lst)
+    # get H/W
+    h, w = img_lst[0].shape[:2]
+
+    # nearest neighbors
+    all_xys = np.mgrid[0:h, 0:w].reshape(2, -1).transpose()
+    distss = []
+    for idx, img in enumerate(img_lst):
+        if segmap_lst is not None:
+            segmap = segmap_lst[idx]
+        else:
+            segmap = seg_model._cal_seg_map(img)
+        bg = (segmap[0]).astype(bool)
+        fg_xys = np.stack(np.nonzero(~bg)).transpose(1, 0)
+        nbrs = NearestNeighbors(n_neighbors=1, algorithm='kd_tree').fit(fg_xys)
+        dists, _ = nbrs.kneighbors(all_xys)
+        distss.append(dists)
+
+    distss = np.stack(distss)
+    max_dist = np.max(distss, 0)
+    max_id = np.argmax(distss, 0)
+
+    bc_pixs = max_dist > 10 # 5
+    bc_pixs_id = np.nonzero(bc_pixs)
+    bc_ids = max_id[bc_pixs]
+
+    num_pixs = distss.shape[1]
+    imgs = np.stack(img_lst).reshape(-1, num_pixs, 3)
+
+    bg_img = np.zeros((h*w, 3), dtype=np.uint8)
+    bg_img[bc_pixs_id, :] = imgs[bc_ids, bc_pixs_id, :]
+    bg_img = bg_img.reshape(h, w, 3)
+
+    max_dist = max_dist.reshape(h, w)
+    bc_pixs = max_dist > 10 # 5
+    bg_xys = np.stack(np.nonzero(~bc_pixs)).transpose()
+    fg_xys = np.stack(np.nonzero(bc_pixs)).transpose()
+    nbrs = NearestNeighbors(n_neighbors=1, algorithm='kd_tree').fit(fg_xys)
+    distances, indices = nbrs.kneighbors(bg_xys)
+    bg_fg_xys = fg_xys[indices[:, 0]]
+    bg_img[bg_xys[:, 0], bg_xys[:, 1], :] = bg_img[bg_fg_xys[:, 0], bg_fg_xys[:, 1], :]
+    return bg_img
+
+
+class MediapipeSegmenter:
+    def __init__(self):
+        model_path = 'data_gen/utils/mp_feature_extractors/selfie_multiclass_256x256.tflite'
+        if not os.path.exists(model_path):
+            os.makedirs(os.path.dirname(model_path), exist_ok=True)
+            print("downloading segmenter model from mediapipe...")
+            os.system(f"wget https://storage.googleapis.com/mediapipe-models/image_segmenter/selfie_multiclass_256x256/float32/latest/selfie_multiclass_256x256.tflite")
+            os.system(f"mv selfie_multiclass_256x256.tflite {model_path}")
+            print("download success")
+        base_options = python.BaseOptions(model_asset_path=model_path)
+        self.options = vision.ImageSegmenterOptions(base_options=base_options,running_mode=vision.RunningMode.IMAGE, output_category_mask=True)
+        self.video_options = vision.ImageSegmenterOptions(base_options=base_options,running_mode=vision.RunningMode.VIDEO, output_category_mask=True)
+    
+    def _cal_seg_map_for_video(self, imgs, segmenter=None, return_onehot_mask=True, return_segmap_image=True, debug_fill=False):
+        segmenter = vision.ImageSegmenter.create_from_options(self.video_options) if segmenter is None else segmenter
+        assert return_onehot_mask or return_segmap_image # you should at least return one
+        segmap_masks = []
+        segmap_images = []
+        for i in tqdm.trange(len(imgs), desc="extracting segmaps from a video..."):
+        # for i in range(len(imgs)):
+            img = imgs[i]
+            mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=img)
+            out = segmenter.segment_for_video(mp_image, 40 * i)
+            segmap = out.category_mask.numpy_view().copy() # [H, W]
+            if debug_fill:
+                # print(f'segmap {segmap}')
+                for x in range(-80 + 1, 0):
+                    for y in range(200, 350):
+                        segmap[x][y] = 4
+
+            if return_onehot_mask:
+                segmap_mask = scatter_np(segmap[None, None, ...], classSeg=6)[0] # [6, H, W]
+                segmap_masks.append(segmap_mask)
+            if return_segmap_image:
+                segmap_image = segmap[:, :, None].repeat(3, 2).astype(float)
+                segmap_image = (segmap_image * 40).astype(np.uint8)
+                segmap_images.append(segmap_image)
+        
+        if return_onehot_mask and return_segmap_image:
+            return segmap_masks, segmap_images
+        elif return_onehot_mask:
+            return segmap_masks
+        elif return_segmap_image:
+            return segmap_images
+    
+    def _cal_seg_map(self, img, segmenter=None, return_onehot_mask=True):
+        """
+        segmenter: vision.ImageSegmenter.create_from_options(options)
+        img: numpy, [H, W, 3], 0~255
+        segmap: [C, H, W]
+        0 - background
+        1 - hair
+        2 - body-skin
+        3 - face-skin
+        4 - clothes
+        5 - others (accessories)
+        """
+        assert img.ndim == 3
+        segmenter = vision.ImageSegmenter.create_from_options(self.options) if segmenter is None else segmenter 
+        image = mp.Image(image_format=mp.ImageFormat.SRGB, data=img)
+        out = segmenter.segment(image) 
+        segmap = out.category_mask.numpy_view().copy() # [H, W]
+        if return_onehot_mask:
+            segmap = scatter_np(segmap[None, None, ...], classSeg=6)[0] # [6, H, W]
+        return segmap
+
+    def _seg_out_img_with_segmap(self, img, segmap, mode='head'):
+        """
+        img: [h,w,c], img is in 0~255, np
+        """
+        # 
+        img = copy.deepcopy(img)
+        if mode == 'head':
+            selected_mask = segmap[[1,3,5] , :, :].sum(axis=0)[None,:] > 0.5 # glasses 也属于others
+            img[~selected_mask.repeat(3,axis=0).transpose(1,2,0)] = 0 # (-1,-1,-1) denotes black in our [-1,1] convention
+            # selected_mask = segmap[[1,3] , :, :].sum(dim=0, keepdim=True) > 0.5
+        elif mode == 'person':
+            selected_mask = segmap[[1,2,3,4,5], :, :].sum(axis=0)[None,:] > 0.5 
+            img[~selected_mask.repeat(3,axis=0).transpose(1,2,0)] = 0 # (-1,-1,-1) denotes black in our [-1,1] convention
+        elif mode == 'torso':
+            selected_mask = segmap[[2,4], :, :].sum(axis=0)[None,:] > 0.5
+            img[~selected_mask.repeat(3,axis=0).transpose(1,2,0)] = 0 # (-1,-1,-1) denotes black in our [-1,1] convention
+        elif mode == 'torso_with_bg':
+            selected_mask = segmap[[0, 2,4], :, :].sum(axis=0)[None,:] > 0.5
+            img[~selected_mask.repeat(3,axis=0).transpose(1,2,0)] = 0 # (-1,-1,-1) denotes black in our [-1,1] convention
+        elif mode == 'bg':
+            selected_mask = segmap[[0], :, :].sum(axis=0)[None,:] > 0.5  # only seg out 0, which means background
+            img[~selected_mask.repeat(3,axis=0).transpose(1,2,0)] = 0 # (-1,-1,-1) denotes black in our [-1,1] convention
+        elif mode == 'full':
+            pass
+        else:
+            raise NotImplementedError()
+        return img, selected_mask
+    
+    def _seg_out_img(self, img, segmenter=None, mode='head'):
+        """
+        imgs [H, W, 3] 0-255
+        return : person_img [B, 3, H, W]
+        """
+        segmenter = vision.ImageSegmenter.create_from_options(self.options) if segmenter is None else segmenter 
+        segmap = self._cal_seg_map(img, segmenter=segmenter, return_onehot_mask=True) # [B, 19, H, W]
+        return self._seg_out_img_with_segmap(img, segmap, mode=mode)
+
+    def seg_out_imgs(self, img, mode='head'):
+        """
+        api for pytorch img, -1~1
+        img: [B, 3, H, W], -1~1
+        """
+        device = img.device
+        img = convert_to_np(img.permute(0, 2, 3, 1)) # [B, H, W, 3]
+        img = ((img + 1) * 127.5).astype(np.uint8)
+        img_lst = [copy.deepcopy(img[i]) for i in range(len(img))]
+        out_lst = []
+        for im in img_lst:
+            out = self._seg_out_img(im, mode=mode)
+            out_lst.append(out)
+        seg_imgs = np.stack(out_lst) # [B, H, W, 3]
+        seg_imgs = (seg_imgs - 127.5) / 127.5
+        seg_imgs = torch.from_numpy(seg_imgs).permute(0, 3, 1, 2).to(device)
+        return seg_imgs
+
+if __name__ == '__main__':
+    import imageio, cv2, tqdm
+    import torchshow as ts
+    img = imageio.imread("1.png")
+    img = cv2.resize(img, (512,512))
+
+    seg_model = MediapipeSegmenter()
+    img = torch.tensor(img).unsqueeze(0).repeat([1, 1, 1, 1]).permute(0, 3,1,2)
+    img = (img-127.5)/127.5
+    out = seg_model.seg_out_imgs(img, 'torso')
+    ts.save(out,"torso.png")
+    out = seg_model.seg_out_imgs(img, 'head')
+    ts.save(out,"head.png")
+    out = seg_model.seg_out_imgs(img, 'bg')
+    ts.save(out,"bg.png")
+    img = convert_to_np(img.permute(0, 2, 3, 1)) # [B, H, W, 3]
+    img = ((img + 1) * 127.5).astype(np.uint8)
+    bg = extract_background(img)
+    ts.save(bg,"bg2.png")
diff --git a/data_gen/utils/mp_feature_extractors/selfie_multiclass_256x256.tflite b/data_gen/utils/mp_feature_extractors/selfie_multiclass_256x256.tflite
new file mode 100644
index 0000000000000000000000000000000000000000..9ebdec318f4426502f8d825b8f0332c3e20e29b7
--- /dev/null
+++ b/data_gen/utils/mp_feature_extractors/selfie_multiclass_256x256.tflite
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c6748b1253a99067ef71f7e26ca71096cd449baefa8f101900ea23016507e0e0
+size 16371837
diff --git a/data_gen/utils/path_converter.py b/data_gen/utils/path_converter.py
new file mode 100644
index 0000000000000000000000000000000000000000..b6e862fb1810da7c6771d358a39a4043f93c9795
--- /dev/null
+++ b/data_gen/utils/path_converter.py
@@ -0,0 +1,24 @@
+import os
+
+
+class PathConverter():
+    def __init__(self):
+        self.prefixs = {
+            "vid": "/video/",
+            "gt": "/gt_imgs/",
+            "head": "/head_imgs/", 
+            "torso": "/torso_imgs/", 
+            "person": "/person_imgs/", 
+            "torso_with_bg": "/torso_with_bg_imgs/", 
+            "single_bg": "/bg_img/",
+            "bg": "/bg_imgs/",
+            "segmaps": "/segmaps/",
+            "inpaint_torso": "/inpaint_torso_imgs/",
+            "com": "/com_imgs/",
+            "inpaint_torso_with_com_bg": "/inpaint_torso_with_com_bg_imgs/",
+        }
+        
+    def to(self, path: str, old_pattern: str, new_pattern: str):
+        return path.replace(self.prefixs[old_pattern], self.prefixs[new_pattern], 1)
+
+pc = PathConverter()
\ No newline at end of file
diff --git a/data_gen/utils/process_audio/extract_hubert.py b/data_gen/utils/process_audio/extract_hubert.py
new file mode 100644
index 0000000000000000000000000000000000000000..85af486a983b7706f05ea2861565bc7b32d480dd
--- /dev/null
+++ b/data_gen/utils/process_audio/extract_hubert.py
@@ -0,0 +1,95 @@
+from transformers import Wav2Vec2Processor, HubertModel
+import soundfile as sf
+import numpy as np
+import torch
+import os
+from utils.commons.hparams import set_hparams, hparams
+
+
+wav2vec2_processor = None
+hubert_model = None
+
+
+def get_hubert_from_16k_wav(wav_16k_name):
+    speech_16k, _ = sf.read(wav_16k_name)
+    hubert = get_hubert_from_16k_speech(speech_16k)
+    return hubert
+
+@torch.no_grad()
+def get_hubert_from_16k_speech(speech, device="cuda:0"):
+    global hubert_model, wav2vec2_processor
+    local_path = '/home/tiger/.cache/huggingface/hub/models--facebook--hubert-large-ls960-ft/snapshots/ece5fabbf034c1073acae96d5401b25be96709d8'
+    if hubert_model is None:
+        print("Loading the HuBERT Model...")
+        if os.path.exists(local_path):
+            hubert_model = HubertModel.from_pretrained(local_path)
+        else:
+            hubert_model = HubertModel.from_pretrained("facebook/hubert-large-ls960-ft")
+    hubert_model = hubert_model.to(device)
+    if wav2vec2_processor is None:
+        print("Loading the Wav2Vec2 Processor...")
+        if os.path.exists(local_path):
+            wav2vec2_processor = Wav2Vec2Processor.from_pretrained(local_path)
+        else:
+            wav2vec2_processor = Wav2Vec2Processor.from_pretrained("facebook/hubert-large-ls960-ft")
+
+    if speech.ndim ==2:
+        speech = speech[:, 0] # [T, 2] ==> [T,]
+    
+    input_values_all = wav2vec2_processor(speech, return_tensors="pt", sampling_rate=16000).input_values # [1, T]
+    input_values_all = input_values_all.to(device)
+    # For long audio sequence, due to the memory limitation, we cannot process them in one run
+    # HuBERT process the wav with a CNN of stride [5,2,2,2,2,2], making a stride of 320
+    # Besides, the kernel is [10,3,3,3,3,2,2], making 400 a fundamental unit to get 1 time step.
+    # So the CNN is euqal to a big Conv1D with kernel k=400 and stride s=320
+    # We have the equation to calculate out time step: T = floor((t-k)/s)
+    # To prevent overlap, we set each clip length of (K+S*(N-1)), where N is the expected length T of this clip
+    # The start point of next clip should roll back with a length of (kernel-stride) so it is stride * N
+    kernel = 400
+    stride = 320
+    clip_length = stride * 1000
+    num_iter = input_values_all.shape[1] // clip_length
+    expected_T = (input_values_all.shape[1] - (kernel-stride)) // stride
+    res_lst = []
+    for i in range(num_iter):
+        if i == 0:
+            start_idx = 0
+            end_idx = clip_length - stride + kernel
+        else:
+            start_idx = clip_length * i
+            end_idx = start_idx + (clip_length - stride + kernel)
+        input_values = input_values_all[:, start_idx: end_idx]
+        hidden_states = hubert_model.forward(input_values).last_hidden_state # [B=1, T=pts//320, hid=1024]
+        res_lst.append(hidden_states[0])
+    if num_iter > 0:
+        input_values = input_values_all[:, clip_length * num_iter:]
+    else:
+        input_values = input_values_all
+
+    if input_values.shape[1] >= kernel: # if the last batch is shorter than kernel_size, skip it            
+        hidden_states = hubert_model(input_values).last_hidden_state # [B=1, T=pts//320, hid=1024]
+        res_lst.append(hidden_states[0])
+    ret = torch.cat(res_lst, dim=0).cpu() # [T, 1024]
+
+    assert abs(ret.shape[0] - expected_T) <= 1
+    if ret.shape[0] < expected_T: # if skipping the last short 
+        ret = torch.cat([ret, ret[:, -1:, :].repeat([1,expected_T-ret.shape[0],1])], dim=1)
+    else:
+        ret = ret[:expected_T]
+
+    return ret
+
+
+if __name__ == '__main__':
+    from argparse import ArgumentParser
+    parser = ArgumentParser()
+    parser.add_argument('--video_id', type=str, default='May', help='')
+    args = parser.parse_args()
+    ### Process Single Long Audio for NeRF dataset
+    person_id = args.video_id
+    wav_16k_name = f"data/processed/videos/{person_id}/aud.wav"
+    hubert_npy_name = f"data/processed/videos/{person_id}/aud_hubert.npy"
+    speech_16k, _ = sf.read(wav_16k_name)
+    hubert_hidden = get_hubert_from_16k_speech(speech_16k)
+    np.save(hubert_npy_name, hubert_hidden.detach().numpy())
+    print(f"Saved at {hubert_npy_name}")
diff --git a/data_gen/utils/process_audio/extract_mel_f0.py b/data_gen/utils/process_audio/extract_mel_f0.py
new file mode 100644
index 0000000000000000000000000000000000000000..b7d29fe8515f61448431af70c5d3169856b4cef9
--- /dev/null
+++ b/data_gen/utils/process_audio/extract_mel_f0.py
@@ -0,0 +1,148 @@
+import numpy as np
+import torch
+import glob
+import os
+import tqdm
+import librosa
+import parselmouth
+from utils.commons.pitch_utils import f0_to_coarse
+from utils.commons.multiprocess_utils import multiprocess_run_tqdm
+from utils.commons.os_utils import multiprocess_glob
+from utils.audio.io import save_wav
+
+from moviepy.editor import VideoFileClip
+from utils.commons.hparams import hparams, set_hparams
+
+def resample_wav(wav_name, out_name, sr=16000):
+    wav_raw, sr = librosa.core.load(wav_name, sr=sr)
+    save_wav(wav_raw, out_name, sr)
+    
+def split_wav(mp4_name, wav_name=None):
+    if wav_name is None:
+        wav_name = mp4_name.replace(".mp4", ".wav").replace("/video/", "/audio/")
+    if os.path.exists(wav_name):
+        return wav_name
+    os.makedirs(os.path.dirname(wav_name), exist_ok=True)
+    
+    video = VideoFileClip(mp4_name,verbose=False)
+    dur = video.duration
+    audio = video.audio 
+    assert audio is not None
+    audio.write_audiofile(wav_name,fps=16000,verbose=False,logger=None)
+    return wav_name
+
+def librosa_pad_lr(x, fsize, fshift, pad_sides=1):
+    '''compute right padding (final frame) or both sides padding (first and final frames)
+    '''
+    assert pad_sides in (1, 2)
+    # return int(fsize // 2)
+    pad = (x.shape[0] // fshift + 1) * fshift - x.shape[0]
+    if pad_sides == 1:
+        return 0, pad
+    else:
+        return pad // 2, pad // 2 + pad % 2
+
+def extract_mel_from_fname(wav_path,
+                      fft_size=512,
+                      hop_size=320,
+                      win_length=512,
+                      window="hann",
+                      num_mels=80,
+                      fmin=80,
+                      fmax=7600,
+                      eps=1e-6,
+                      sample_rate=16000,
+                      min_level_db=-100):
+    if isinstance(wav_path, str):
+        wav, _ = librosa.core.load(wav_path, sr=sample_rate)
+    else:
+        wav = wav_path
+
+    # get amplitude spectrogram
+    x_stft = librosa.stft(wav, n_fft=fft_size, hop_length=hop_size,
+                          win_length=win_length, window=window, center=False)
+    spc = np.abs(x_stft)  # (n_bins, T)
+
+    # get mel basis
+    fmin = 0 if fmin == -1 else fmin
+    fmax = sample_rate / 2 if fmax == -1 else fmax
+    mel_basis = librosa.filters.mel(sr=sample_rate, n_fft=fft_size, n_mels=num_mels, fmin=fmin, fmax=fmax)
+    mel = mel_basis @ spc
+
+    mel = np.log10(np.maximum(eps, mel))  # (n_mel_bins, T)
+    mel = mel.T
+
+    l_pad, r_pad = librosa_pad_lr(wav, fft_size, hop_size, 1)
+    wav = np.pad(wav, (l_pad, r_pad), mode='constant', constant_values=0.0)
+
+    return wav.T, mel
+
+def extract_f0_from_wav_and_mel(wav, mel,
+                        hop_size=320,
+                        audio_sample_rate=16000,
+                        ):
+    time_step = hop_size / audio_sample_rate * 1000
+    f0_min = 80
+    f0_max = 750
+    f0 = parselmouth.Sound(wav, audio_sample_rate).to_pitch_ac(
+        time_step=time_step / 1000, voicing_threshold=0.6,
+        pitch_floor=f0_min, pitch_ceiling=f0_max).selected_array['frequency']
+
+    delta_l = len(mel) - len(f0)
+    assert np.abs(delta_l) <= 8
+    if delta_l > 0:
+        f0 = np.concatenate([f0, [f0[-1]] * delta_l], 0)
+    f0 = f0[:len(mel)]
+    pitch_coarse = f0_to_coarse(f0)
+    return f0, pitch_coarse
+
+
+def extract_mel_f0_from_fname(wav_name=None, out_name=None):
+    try:
+        out_name = wav_name.replace(".wav", "_mel_f0.npy").replace("/audio/", "/mel_f0/")
+        os.makedirs(os.path.dirname(out_name), exist_ok=True)
+
+        wav, mel = extract_mel_from_fname(wav_name)
+        f0, f0_coarse = extract_f0_from_wav_and_mel(wav, mel)
+        out_dict = {
+            "mel": mel, # [T, 80]
+            "f0": f0,
+        }
+        np.save(out_name, out_dict)
+    except Exception as e:
+        print(e)
+
+def extract_mel_f0_from_video_name(mp4_name, wav_name=None, out_name=None):
+    if mp4_name.endswith(".mp4"):
+        wav_name = split_wav(mp4_name, wav_name)
+        if out_name is None:
+            out_name = mp4_name.replace(".mp4", "_mel_f0.npy").replace("/video/", "/mel_f0/")
+    elif mp4_name.endswith(".wav"):
+        wav_name = mp4_name
+        if out_name is None:
+            out_name = mp4_name.replace(".wav", "_mel_f0.npy").replace("/audio/", "/mel_f0/")
+
+    os.makedirs(os.path.dirname(out_name), exist_ok=True)
+
+    wav, mel = extract_mel_from_fname(wav_name)
+
+    f0, f0_coarse = extract_f0_from_wav_and_mel(wav, mel)
+    out_dict = {
+        "mel": mel, # [T, 80]
+        "f0": f0,
+    }
+    np.save(out_name, out_dict)
+
+
+if __name__ == '__main__':
+    from argparse import ArgumentParser
+    parser = ArgumentParser()
+    parser.add_argument('--video_id', type=str, default='May', help='')
+    args = parser.parse_args()
+    ### Process Single Long Audio for NeRF dataset
+    person_id = args.video_id
+
+    wav_16k_name = f"data/processed/videos/{person_id}/aud.wav"
+    out_name = f"data/processed/videos/{person_id}/aud_mel_f0.npy"
+    extract_mel_f0_from_video_name(wav_16k_name, out_name)
+    print(f"Saved at {out_name}")
\ No newline at end of file
diff --git a/data_gen/utils/process_audio/resample_audio_to_16k.py b/data_gen/utils/process_audio/resample_audio_to_16k.py
new file mode 100644
index 0000000000000000000000000000000000000000..0cc353b9385dc22c30256eb7dedbfb610cd33036
--- /dev/null
+++ b/data_gen/utils/process_audio/resample_audio_to_16k.py
@@ -0,0 +1,49 @@
+import os, glob
+from utils.commons.os_utils import multiprocess_glob
+from utils.commons.multiprocess_utils import multiprocess_run_tqdm
+
+
+def extract_wav16k_job(audio_name:str):
+    out_path = audio_name.replace("/audio_raw/","/audio/",1)
+    assert out_path != audio_name # prevent inplace
+    os.makedirs(os.path.dirname(out_path), exist_ok=True)
+    ffmpeg_path = "/usr/bin/ffmpeg"
+
+    cmd = f'{ffmpeg_path} -i {audio_name} -ar 16000 -v quiet -y {out_path}'
+    os.system(cmd)
+
+if __name__ == '__main__':
+    import argparse, glob, tqdm, random
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--aud_dir", default='/home/tiger/datasets/raw/CMLR/audio_raw/')
+    parser.add_argument("--ds_name", default='CMLR')
+    parser.add_argument("--num_workers", default=64, type=int)
+    parser.add_argument("--process_id", default=0, type=int)
+    parser.add_argument("--total_process", default=1, type=int)
+    args = parser.parse_args()
+    print(f"args {args}")
+
+    aud_dir = args.aud_dir
+    ds_name = args.ds_name
+    if ds_name in ['CMLR']:
+        aud_name_pattern = os.path.join(aud_dir, "*/*/*.wav")
+        aud_names = multiprocess_glob(aud_name_pattern)
+    else:
+        raise NotImplementedError()
+    aud_names = sorted(aud_names)
+    print(f"total audio number : {len(aud_names)}")
+    print(f"first {aud_names[0]} last {aud_names[-1]}")
+    # exit()
+    process_id = args.process_id
+    total_process = args.total_process
+    if total_process > 1:
+        assert process_id <= total_process -1
+        num_samples_per_process = len(aud_names) // total_process
+        if process_id == total_process:
+            aud_names = aud_names[process_id * num_samples_per_process : ]
+        else:
+            aud_names = aud_names[process_id * num_samples_per_process : (process_id+1) * num_samples_per_process]
+    
+    for i, res in multiprocess_run_tqdm(extract_wav16k_job, aud_names, num_workers=args.num_workers, desc="resampling videos"):
+        pass
+
diff --git a/data_gen/utils/process_image/extract_lm2d.py b/data_gen/utils/process_image/extract_lm2d.py
new file mode 100644
index 0000000000000000000000000000000000000000..7ee0ecc02dc94a04b69682a05a7b089d9cd4c8d6
--- /dev/null
+++ b/data_gen/utils/process_image/extract_lm2d.py
@@ -0,0 +1,197 @@
+import os
+os.environ["OMP_NUM_THREADS"] = "1"
+import sys
+
+import glob
+import cv2
+import tqdm
+import numpy as np
+from data_gen.utils.mp_feature_extractors.face_landmarker import MediapipeLandmarker
+from utils.commons.multiprocess_utils import multiprocess_run_tqdm
+import warnings
+warnings.filterwarnings('ignore')
+
+import random
+random.seed(42)
+
+import pickle
+import json
+import gzip
+from typing import Any
+
+def load_file(filename, is_gzip: bool = False, is_json: bool = False) -> Any:
+    if is_json:
+        if is_gzip:
+            with gzip.open(filename, "r", encoding="utf-8") as f:
+                loaded_object = json.load(f)
+                return loaded_object
+        else:
+            with open(filename, "r", encoding="utf-8") as f:
+                loaded_object = json.load(f)
+                return loaded_object
+    else:
+        if is_gzip:
+            with gzip.open(filename, "rb") as f:
+                loaded_object = pickle.load(f)
+                return loaded_object
+        else:
+            with open(filename, "rb") as f:
+                loaded_object = pickle.load(f)
+                return loaded_object
+        
+def save_file(filename, content, is_gzip: bool = False, is_json: bool = False) -> None:
+    if is_json:
+        if is_gzip:
+            with gzip.open(filename, "w", encoding="utf-8") as f:
+                json.dump(content, f)
+        else:
+            with open(filename, "w", encoding="utf-8") as f:
+                json.dump(content, f)
+    else:
+        if is_gzip:
+            with gzip.open(filename, "wb") as f:
+                pickle.dump(content, f)
+        else:
+            with open(filename, "wb") as f:
+                pickle.dump(content, f)
+
+face_landmarker = None
+
+def extract_lms_mediapipe_job(img):
+    if img is None:
+        return None
+    global face_landmarker
+    if face_landmarker is None:
+        face_landmarker = MediapipeLandmarker()
+    lm478 = face_landmarker.extract_lm478_from_img(img)
+    return lm478
+    
+def extract_landmark_job(img_name):
+    try:
+        # if img_name == 'datasets/PanoHeadGen/raw/images/multi_view/chunk_0/seed0000002.png':
+            # print(1)
+            # input()
+        out_name = img_name.replace("/images_512/", "/lms_2d/").replace(".png","_lms.npy")
+        if os.path.exists(out_name):
+            print("out exists, skip...")
+            return
+        try:
+            os.makedirs(os.path.dirname(out_name), exist_ok=True)
+        except:
+            pass
+        img = cv2.imread(img_name)[:,:,::-1]
+
+        if img is not None:
+            lm468 = extract_lms_mediapipe_job(img)
+            if lm468 is not None:
+                np.save(out_name, lm468)
+        # print("Hahaha, solve one item!!!")
+    except Exception as e:
+        print(e)
+        pass
+        
+def out_exist_job(img_name):
+    out_name = img_name.replace("/images_512/", "/lms_2d/").replace(".png","_lms.npy") 
+    if  os.path.exists(out_name):
+        return None
+    else:
+        return img_name
+
+# def get_todo_img_names(img_names):
+#     todo_img_names = []
+#     for i, res in multiprocess_run_tqdm(out_exist_job, img_names, num_workers=64):
+#         if res is not None:
+#             todo_img_names.append(res)
+#     return todo_img_names
+
+
+if __name__ == '__main__':
+    import argparse, glob, tqdm, random
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--img_dir", default='/home/tiger/datasets/raw/FFHQ/images_512/')
+    parser.add_argument("--ds_name", default='FFHQ')
+    parser.add_argument("--num_workers", default=64, type=int)
+    parser.add_argument("--process_id", default=0, type=int)
+    parser.add_argument("--total_process", default=1, type=int)
+    parser.add_argument("--reset", action='store_true')
+    parser.add_argument("--img_names_file", default="img_names.pkl", type=str)
+    parser.add_argument("--load_img_names", action="store_true")
+
+    args = parser.parse_args()
+    print(f"args {args}")
+    img_dir = args.img_dir
+    img_names_file = os.path.join(img_dir, args.img_names_file)
+    if args.load_img_names:
+        img_names = load_file(img_names_file)
+        print(f"load image names from {img_names_file}")
+    else:
+        if args.ds_name == 'FFHQ_MV':
+            img_name_pattern1 = os.path.join(img_dir, "ref_imgs/*.png")
+            img_names1 = glob.glob(img_name_pattern1)
+            img_name_pattern2 = os.path.join(img_dir, "mv_imgs/*.png")
+            img_names2 = glob.glob(img_name_pattern2)
+            img_names = img_names1 + img_names2
+            img_names = sorted(img_names)
+        elif args.ds_name == 'FFHQ':
+            img_name_pattern = os.path.join(img_dir, "*.png")
+            img_names = glob.glob(img_name_pattern)
+            img_names = sorted(img_names)
+        elif args.ds_name == "PanoHeadGen":
+            # img_name_patterns = ["ref/*/*.png", "multi_view/*/*.png", "reverse/*/*.png"]
+            img_name_patterns = ["ref/*/*.png"]
+            img_names = []
+            for img_name_pattern in img_name_patterns:
+                img_name_pattern_full = os.path.join(img_dir, img_name_pattern)
+                img_names_part = glob.glob(img_name_pattern_full)
+                img_names.extend(img_names_part)
+            img_names = sorted(img_names)
+        
+    # save image names
+    if not args.load_img_names:
+        save_file(img_names_file, img_names)
+        print(f"save image names in {img_names_file}")
+        
+    print(f"total images number: {len(img_names)}")
+        
+        
+    process_id = args.process_id
+    total_process = args.total_process
+    if total_process > 1:
+        assert process_id <= total_process -1
+        num_samples_per_process = len(img_names) // total_process
+        if process_id == total_process:
+            img_names = img_names[process_id * num_samples_per_process : ]
+        else:
+            img_names = img_names[process_id * num_samples_per_process : (process_id+1) * num_samples_per_process]
+    
+    # if not args.reset:
+        # img_names = get_todo_img_names(img_names)
+        
+
+    print(f"todo_image {img_names[:10]}")
+    print(f"processing images number in this process: {len(img_names)}")
+    # print(f"todo images number: {len(img_names)}")
+    # input()
+    # exit()
+
+    if args.num_workers == 1:
+        index = 0
+        for img_name in tqdm.tqdm(img_names, desc=f"Root process {args.process_id}: extracting MP-based landmark2d"):
+            try:
+                extract_landmark_job(img_name)
+            except Exception as e:
+                print(e)
+                pass
+            if index % max(1, int(len(img_names) * 0.003)) == 0:
+                print(f"processed {index} / {len(img_names)}")
+                sys.stdout.flush()
+            index += 1
+    else:
+        for i, res in multiprocess_run_tqdm(
+            extract_landmark_job, img_names, 
+            num_workers=args.num_workers, 
+            desc=f"Root {args.process_id}: extracing MP-based landmark2d"): 
+            # if index % max(1, int(len(img_names) * 0.003)) == 0:
+            print(f"processed {i+1} / {len(img_names)}")
+            sys.stdout.flush()
+        print(f"Root {args.process_id}: Finished extracting.")
\ No newline at end of file
diff --git a/data_gen/utils/process_image/extract_segment_imgs.py b/data_gen/utils/process_image/extract_segment_imgs.py
new file mode 100644
index 0000000000000000000000000000000000000000..408a6d1b6229e9bd7e2aa1c7c7cdeb067cc0ae7f
--- /dev/null
+++ b/data_gen/utils/process_image/extract_segment_imgs.py
@@ -0,0 +1,114 @@
+import os
+os.environ["OMP_NUM_THREADS"] = "1"
+
+import glob
+import cv2
+import tqdm
+import numpy as np
+import PIL
+from utils.commons.tensor_utils import convert_to_np
+import torch
+import mediapipe as mp
+from utils.commons.multiprocess_utils import multiprocess_run_tqdm
+from data_gen.utils.mp_feature_extractors.mp_segmenter import MediapipeSegmenter
+from data_gen.utils.process_video.extract_segment_imgs import inpaint_torso_job, extract_background, save_rgb_image_to_path
+seg_model = MediapipeSegmenter()
+
+
+def extract_segment_job(img_name):
+    try:
+        img = cv2.imread(img_name)
+        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+
+        segmap = seg_model._cal_seg_map(img)
+        bg_img = extract_background([img], [segmap])
+        out_img_name = img_name.replace("/images_512/",f"/bg_img/").replace(".mp4", ".jpg")
+        save_rgb_image_to_path(bg_img, out_img_name)
+
+        com_img = img.copy()
+        bg_part = segmap[0].astype(bool)[..., None].repeat(3,axis=-1)
+        com_img[bg_part] = bg_img[bg_part]
+        out_img_name = img_name.replace("/images_512/",f"/com_imgs/")
+        save_rgb_image_to_path(com_img, out_img_name)
+
+        for mode in ['head', 'torso', 'person', 'torso_with_bg', 'bg']:
+            out_img, _ = seg_model._seg_out_img_with_segmap(img, segmap, mode=mode)
+            out_img_name = img_name.replace("/images_512/",f"/{mode}_imgs/")
+            out_img = cv2.cvtColor(out_img, cv2.COLOR_RGB2BGR)
+            try: os.makedirs(os.path.dirname(out_img_name), exist_ok=True)
+            except: pass
+            cv2.imwrite(out_img_name, out_img)
+
+        inpaint_torso_img, inpaint_torso_with_bg_img, _, _ = inpaint_torso_job(img, segmap)
+        out_img_name = img_name.replace("/images_512/",f"/inpaint_torso_imgs/")
+        save_rgb_image_to_path(inpaint_torso_img, out_img_name)
+        inpaint_torso_with_bg_img[bg_part] = bg_img[bg_part]
+        out_img_name = img_name.replace("/images_512/",f"/inpaint_torso_with_com_bg_imgs/")
+        save_rgb_image_to_path(inpaint_torso_with_bg_img, out_img_name)
+        return 0
+    except Exception as e:
+        print(e)
+        return 1
+
+def out_exist_job(img_name):
+    out_name1 = img_name.replace("/images_512/", "/head_imgs/")
+    out_name2 = img_name.replace("/images_512/", "/com_imgs/")
+    out_name3 = img_name.replace("/images_512/", "/inpaint_torso_with_com_bg_imgs/")
+    
+    if  os.path.exists(out_name1) and os.path.exists(out_name2) and os.path.exists(out_name3):
+        return None
+    else:
+        return img_name
+
+def get_todo_img_names(img_names):
+    todo_img_names = []
+    for i, res in multiprocess_run_tqdm(out_exist_job, img_names, num_workers=64):
+        if res is not None:
+            todo_img_names.append(res)
+    return todo_img_names
+
+
+if __name__ == '__main__':
+    import argparse, glob, tqdm, random
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--img_dir", default='./images_512')
+    # parser.add_argument("--img_dir", default='/home/tiger/datasets/raw/FFHQ/images_512')
+    parser.add_argument("--ds_name", default='FFHQ')
+    parser.add_argument("--num_workers", default=1, type=int)
+    parser.add_argument("--seed", default=0, type=int)
+    parser.add_argument("--process_id", default=0, type=int)
+    parser.add_argument("--total_process", default=1, type=int)
+    parser.add_argument("--reset", action='store_true')
+
+    args = parser.parse_args()
+    img_dir = args.img_dir
+    if args.ds_name == 'FFHQ_MV':
+        img_name_pattern1 = os.path.join(img_dir, "ref_imgs/*.png")
+        img_names1 = glob.glob(img_name_pattern1)
+        img_name_pattern2 = os.path.join(img_dir, "mv_imgs/*.png")
+        img_names2 = glob.glob(img_name_pattern2)
+        img_names = img_names1 + img_names2
+    elif args.ds_name == 'FFHQ':
+        img_name_pattern = os.path.join(img_dir, "*.png")
+        img_names = glob.glob(img_name_pattern)
+    
+    img_names = sorted(img_names)
+    random.seed(args.seed)
+    random.shuffle(img_names)
+
+    process_id = args.process_id
+    total_process = args.total_process
+    if total_process > 1:
+        assert process_id <= total_process -1
+        num_samples_per_process = len(img_names) // total_process
+        if process_id == total_process:
+            img_names = img_names[process_id * num_samples_per_process : ]
+        else:
+            img_names = img_names[process_id * num_samples_per_process : (process_id+1) * num_samples_per_process]
+    
+    if not args.reset:
+        img_names = get_todo_img_names(img_names)
+    print(f"todo images number: {len(img_names)}")
+
+    for vid_name in multiprocess_run_tqdm(extract_segment_job ,img_names, desc=f"Root process {args.process_id}: extracting segment images", num_workers=args.num_workers):
+        pass
\ No newline at end of file
diff --git a/data_gen/utils/process_image/fit_3dmm_landmark.py b/data_gen/utils/process_image/fit_3dmm_landmark.py
new file mode 100644
index 0000000000000000000000000000000000000000..4fde7d94d919ab2b582fe7ac2e1a11fbe8129fad
--- /dev/null
+++ b/data_gen/utils/process_image/fit_3dmm_landmark.py
@@ -0,0 +1,369 @@
+from numpy.core.numeric import require
+from numpy.lib.function_base import quantile
+import torch
+import torch.nn.functional as F
+import copy
+import numpy as np
+
+import os
+import sys
+import cv2
+import argparse
+import tqdm
+from utils.commons.multiprocess_utils import multiprocess_run_tqdm
+from data_gen.utils.mp_feature_extractors.face_landmarker import MediapipeLandmarker
+
+from deep_3drecon.deep_3drecon_models.bfm import ParametricFaceModel
+import pickle
+
+face_model = ParametricFaceModel(bfm_folder='deep_3drecon/BFM', 
+            camera_distance=10, focal=1015, keypoint_mode='mediapipe')
+face_model.to("cuda")
+     
+
+index_lm68_from_lm468 = [127,234,93,132,58,136,150,176,152,400,379,365,288,361,323,454,356,70,63,105,66,107,336,296,334,293,300,168,197,5,4,75,97,2,326,305,
+                         33,160,158,133,153,144,362,385,387,263,373,380,61,40,37,0,267,270,291,321,314,17,84,91,78,81,13,311,308,402,14,178]
+
+dir_path = os.path.dirname(os.path.realpath(__file__))
+
+LAMBDA_REG_ID = 0.3
+LAMBDA_REG_EXP = 0.05
+
+def save_file(name, content):
+    with open(name, "wb") as f:
+        pickle.dump(content, f) 
+        
+def load_file(name):
+    with open(name, "rb") as f:
+        content = pickle.load(f)
+    return content
+
+def cal_lan_loss_mp(proj_lan, gt_lan):
+    # [B, 68, 2]
+    loss = (proj_lan - gt_lan).pow(2)
+    # loss = (proj_lan - gt_lan).abs()
+    unmatch_mask = [ 93, 127, 132, 234, 323, 356, 361, 454]
+    eye = [33,246,161,160,159,158,157,173,133,155,154,153,145,144,163,7] + [263,466,388,387,386,385,384,398,362,382,381,380,374,373,390,249]
+    inner_lip = [78,191,80,81,82,13,312,311,310,415,308,324,318,402,317,14,87,178,88,95]
+    outer_lip = [61,185,40,39,37,0,267,269,270,409,291,375,321,405,314,17,84,181,91,146]
+    weights = torch.ones_like(loss)
+    weights[:, eye] = 5
+    weights[:, inner_lip] = 2
+    weights[:, outer_lip] = 2
+    weights[:, unmatch_mask] = 0
+    loss = loss * weights
+    return torch.mean(loss)
+ 
+def cal_lan_loss(proj_lan, gt_lan):
+    # [B, 68, 2]
+    loss = (proj_lan - gt_lan)** 2
+    # use the ldm weights from deep3drecon, see deep_3drecon/deep_3drecon_models/losses.py
+    weights = torch.zeros_like(loss)
+    weights = torch.ones_like(loss)
+    weights[:, 36:48, :] = 3 # eye 12 points
+    weights[:, -8:, :] =  3 # inner lip 8 points
+    weights[:, 28:31, :] =  3 # nose 3 points
+    loss = loss * weights
+    return torch.mean(loss)
+
+def set_requires_grad(tensor_list):
+    for tensor in tensor_list:
+        tensor.requires_grad = True
+
+def read_video_to_frames(img_name):
+    frames = []
+    cap = cv2.VideoCapture(img_name)
+    while cap.isOpened():
+        ret, frame_bgr = cap.read()
+        if frame_bgr is None:
+            break
+        frame_rgb = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB)
+        frames.append(frame_rgb)
+    return np.stack(frames)
+    
+@torch.enable_grad()
+def fit_3dmm_for_a_image(img_name, debug=False, keypoint_mode='mediapipe', device="cuda:0", save=True):
+    img = cv2.imread(img_name)
+    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+    img_h, img_w = img.shape[0], img.shape[0]
+    assert img_h == img_w
+    num_frames = 1
+
+    lm_name = img_name.replace("/images_512/", "/lms_2d/").replace(".png", "_lms.npy")
+    if lm_name.endswith('_lms.npy') and os.path.exists(lm_name):
+        lms = np.load(lm_name)
+    else:
+        # print("lms_2d file not found, try to extract it from image...")
+        try:
+            landmarker = MediapipeLandmarker()
+            lms = landmarker.extract_lm478_from_img_name(img_name)
+            # lms = landmarker.extract_lm478_from_img(img)
+        except Exception as e:
+            print(e)
+            return
+        if lms is None:
+            print("get None lms_2d, please check whether each frame has one head, exiting...")
+            return
+    lms = lms[:468].reshape([468,2])
+    lms = torch.FloatTensor(lms).to(device=device)
+    lms[..., 1] = img_h - lms[..., 1] # flip the height axis
+
+    if keypoint_mode == 'mediapipe':
+        cal_lan_loss_fn = cal_lan_loss_mp
+        out_name = img_name.replace("/images_512/", "/coeff_fit_mp/").replace(".png", "_coeff_fit_mp.npy")
+    else:
+        cal_lan_loss_fn = cal_lan_loss
+        out_name = img_name.replace("/images_512/", "/coeff_fit_lm68/").replace(".png", "_coeff_fit_lm68.npy")
+    try:
+        os.makedirs(os.path.dirname(out_name), exist_ok=True)
+    except:
+        pass
+
+    id_dim, exp_dim = 80, 64
+    sel_ids = np.arange(0, num_frames, 40)
+    sel_num = sel_ids.shape[0]
+    arg_focal = face_model.focal
+
+    h = w = face_model.center * 2
+    img_scale_factor = img_h / h
+    lms /= img_scale_factor
+    cxy = torch.tensor((w / 2.0, h / 2.0), dtype=torch.float).to(device=device)
+
+    id_para = lms.new_zeros((num_frames, id_dim), requires_grad=True) # lms.new_zeros((1, id_dim), requires_grad=True)
+    exp_para = lms.new_zeros((num_frames, exp_dim), requires_grad=True)
+    euler_angle = lms.new_zeros((num_frames, 3), requires_grad=True)
+    trans = lms.new_zeros((num_frames, 3), requires_grad=True)
+
+    focal_length = lms.new_zeros(1, requires_grad=True)
+    focal_length.data += arg_focal
+
+    set_requires_grad([id_para, exp_para, euler_angle, trans])
+
+    optimizer_idexp = torch.optim.Adam([id_para, exp_para], lr=.1)
+    optimizer_frame = torch.optim.Adam([euler_angle, trans], lr=.1)
+
+    # 其他参数初始化，先训练euler和trans
+    for _ in range(200):
+        proj_geo = face_model.compute_for_landmark_fit(
+            id_para, exp_para, euler_angle, trans)
+        loss_lan = cal_lan_loss_fn(proj_geo[:, :, :2], lms.detach())
+        loss = loss_lan
+        optimizer_frame.zero_grad()
+        loss.backward()
+        optimizer_frame.step()
+    # print(f"loss_lan: {loss_lan.item():.2f}, euler_abs_mean: {euler_angle.abs().mean().item():.4f}, euler_std: {euler_angle.std().item():.4f}, euler_min: {euler_angle.min().item():.4f}, euler_max: {euler_angle.max().item():.4f}")
+    # print(f"trans_z_mean: {trans[...,2].mean().item():.4f}, trans_z_std: {trans[...,2].std().item():.4f}, trans_min: {trans[...,2].min().item():.4f}, trans_max: {trans[...,2].max().item():.4f}")
+
+    for param_group in optimizer_frame.param_groups:
+        param_group['lr'] = 0.1
+
+    # "jointly roughly training id exp euler trans"
+    for _ in range(200):
+        proj_geo = face_model.compute_for_landmark_fit(
+            id_para, exp_para, euler_angle, trans)
+        loss_lan = cal_lan_loss_fn(
+            proj_geo[:, :, :2], lms.detach())
+        loss_regid = torch.mean(id_para*id_para) # 正则化
+        loss_regexp = torch.mean(exp_para * exp_para)
+
+        loss = loss_lan  + loss_regid * LAMBDA_REG_ID + loss_regexp * LAMBDA_REG_EXP
+        optimizer_idexp.zero_grad()
+        optimizer_frame.zero_grad()
+        loss.backward()
+        optimizer_idexp.step()
+        optimizer_frame.step()
+    # print(f"loss_lan: {loss_lan.item():.2f}, loss_reg_id: {loss_regid.item():.2f},loss_reg_exp: {loss_regexp.item():.2f},")
+    # print(f"euler_abs_mean: {euler_angle.abs().mean().item():.4f}, euler_std: {euler_angle.std().item():.4f}, euler_min: {euler_angle.min().item():.4f}, euler_max: {euler_angle.max().item():.4f}")
+    # print(f"trans_z_mean: {trans[...,2].mean().item():.4f}, trans_z_std: {trans[...,2].std().item():.4f}, trans_min: {trans[...,2].min().item():.4f}, trans_max: {trans[...,2].max().item():.4f}")
+
+    # start fine training, intialize from the roughly trained results
+    id_para_ = lms.new_zeros((num_frames, id_dim), requires_grad=True)
+    id_para_.data = id_para.data.clone()
+    id_para = id_para_
+    exp_para_ = lms.new_zeros((num_frames, exp_dim), requires_grad=True)
+    exp_para_.data = exp_para.data.clone()
+    exp_para = exp_para_
+    euler_angle_ = lms.new_zeros((num_frames, 3), requires_grad=True)
+    euler_angle_.data = euler_angle.data.clone()
+    euler_angle = euler_angle_
+    trans_ = lms.new_zeros((num_frames, 3), requires_grad=True)
+    trans_.data = trans.data.clone()
+    trans = trans_
+
+    batch_size = 1
+
+    # "fine fitting the 3DMM in batches"
+    for i in range(int((num_frames-1)/batch_size+1)):
+        if (i+1)*batch_size > num_frames:
+            start_n = num_frames-batch_size
+            sel_ids = np.arange(max(num_frames-batch_size,0), num_frames)
+        else:
+            start_n = i*batch_size
+            sel_ids = np.arange(i*batch_size, i*batch_size+batch_size)
+        sel_lms = lms[sel_ids]
+
+        sel_id_para = id_para.new_zeros(
+            (batch_size, id_dim), requires_grad=True)
+        sel_id_para.data = id_para[sel_ids].clone()
+        sel_exp_para = exp_para.new_zeros(
+            (batch_size, exp_dim), requires_grad=True)
+        sel_exp_para.data = exp_para[sel_ids].clone()
+        sel_euler_angle = euler_angle.new_zeros(
+            (batch_size, 3), requires_grad=True)
+        sel_euler_angle.data = euler_angle[sel_ids].clone()
+        sel_trans = trans.new_zeros((batch_size, 3), requires_grad=True)
+        sel_trans.data = trans[sel_ids].clone()
+        
+        set_requires_grad([sel_id_para, sel_exp_para, sel_euler_angle, sel_trans])
+        optimizer_cur_batch = torch.optim.Adam(
+            [sel_id_para, sel_exp_para, sel_euler_angle, sel_trans], lr=0.005)
+
+        for j in range(50):
+            proj_geo = face_model.compute_for_landmark_fit(
+                sel_id_para, sel_exp_para, sel_euler_angle, sel_trans)
+            loss_lan = cal_lan_loss_fn(
+                proj_geo[:, :, :2], lms.unsqueeze(0).detach())
+
+            loss_regid = torch.mean(sel_id_para*sel_id_para) # 正则化
+            loss_regexp = torch.mean(sel_exp_para*sel_exp_para)
+            loss = loss_lan + loss_regid * LAMBDA_REG_ID + loss_regexp * LAMBDA_REG_EXP 
+            optimizer_cur_batch.zero_grad()
+            loss.backward()
+            optimizer_cur_batch.step()
+        print(f"batch {i} | loss_lan: {loss_lan.item():.2f}, loss_reg_id: {loss_regid.item():.2f},loss_reg_exp: {loss_regexp.item():.2f}")
+        id_para[sel_ids].data = sel_id_para.data.clone()
+        exp_para[sel_ids].data = sel_exp_para.data.clone()
+        euler_angle[sel_ids].data = sel_euler_angle.data.clone()
+        trans[sel_ids].data = sel_trans.data.clone()
+
+    coeff_dict = {'id': id_para.detach().cpu().numpy(), 'exp': exp_para.detach().cpu().numpy(),
+                'euler': euler_angle.detach().cpu().numpy(), 'trans': trans.detach().cpu().numpy()}
+    if save:
+        np.save(out_name, coeff_dict, allow_pickle=True)
+    
+    if debug:
+        import imageio
+        debug_name = img_name.replace("/images_512/", "/coeff_fit_mp_debug/").replace(".png", "_debug.png").replace(".jpg", "_debug.jpg")
+        try: os.makedirs(os.path.dirname(debug_name), exist_ok=True)
+        except: pass
+        proj_geo = face_model.compute_for_landmark_fit(id_para, exp_para, euler_angle, trans)
+        lm68s = proj_geo[:,:,:2].detach().cpu().numpy()  # [T, 68,2]
+        lm68s = lm68s * img_scale_factor
+        lms = lms * img_scale_factor
+        lm68s[..., 1] = img_h - lm68s[..., 1] # flip the height axis
+        lms[..., 1] = img_h - lms[..., 1] # flip the height axis
+        lm68s = lm68s.astype(int)
+        lm68s = lm68s.reshape([-1,2])
+        lms = lms.cpu().numpy().astype(int).reshape([-1,2])
+        for lm in lm68s:
+            img = cv2.circle(img, lm, 1, (0, 0, 255), thickness=-1)
+        for gt_lm in lms:
+            img = cv2.circle(img, gt_lm, 2, (255, 0, 0), thickness=1)
+        imageio.imwrite(debug_name, img)
+        print(f"debug img saved at {debug_name}")
+    return coeff_dict
+
+def out_exist_job(vid_name):
+    out_name = vid_name.replace("/images_512/", "/coeff_fit_mp/").replace(".png","_coeff_fit_mp.npy") 
+    # if os.path.exists(out_name) or not os.path.exists(lms_name):
+    if os.path.exists(out_name):
+        return None
+    else:
+        return vid_name
+
+def get_todo_img_names(img_names):
+    todo_img_names = []
+    for i, res in multiprocess_run_tqdm(out_exist_job, img_names, num_workers=16):
+        if res is not None:
+            todo_img_names.append(res)
+    return todo_img_names
+
+
+if __name__ == '__main__':
+    import argparse, glob, tqdm
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--img_dir", default='/home/tiger/datasets/raw/FFHQ/images_512')
+    parser.add_argument("--ds_name", default='FFHQ')
+    parser.add_argument("--seed", default=0, type=int)
+    parser.add_argument("--process_id", default=0, type=int)
+    parser.add_argument("--total_process", default=1, type=int)
+    parser.add_argument("--keypoint_mode", default='mediapipe', type=str)
+    parser.add_argument("--debug", action='store_true')
+    parser.add_argument("--reset", action='store_true')
+    parser.add_argument("--device", default="cuda:0", type=str)
+    parser.add_argument("--output_log", action='store_true')
+    parser.add_argument("--load_names", action="store_true")
+
+    args = parser.parse_args()
+    img_dir = args.img_dir
+    load_names = args.load_names
+    
+    print(f"args {args}")
+    
+    if args.ds_name == 'single_img':
+        img_names = [img_dir]
+    else:
+        img_names_path = os.path.join(img_dir, "img_dir.pkl")
+        if os.path.exists(img_names_path) and load_names:
+            print(f"loading vid names from {img_names_path}")
+            img_names = load_file(img_names_path)
+        else:
+            if args.ds_name == 'FFHQ_MV':
+                img_name_pattern1 = os.path.join(img_dir, "ref_imgs/*.png")
+                img_names1 = glob.glob(img_name_pattern1)
+                img_name_pattern2 = os.path.join(img_dir, "mv_imgs/*.png")
+                img_names2 = glob.glob(img_name_pattern2)
+                img_names = img_names1 + img_names2
+                img_names = sorted(img_names)
+            elif args.ds_name == 'FFHQ':
+                img_name_pattern = os.path.join(img_dir, "*.png")
+                img_names = glob.glob(img_name_pattern)
+                img_names = sorted(img_names)
+            elif args.ds_name == "PanoHeadGen":
+                img_name_patterns = ["ref/*/*.png"]
+                img_names = []
+                for img_name_pattern in img_name_patterns:
+                    img_name_pattern_full = os.path.join(img_dir, img_name_pattern)
+                    img_names_part = glob.glob(img_name_pattern_full)
+                    img_names.extend(img_names_part)
+                img_names = sorted(img_names)
+            print(f"saving image names to {img_names_path}")
+            save_file(img_names_path, img_names)
+            
+    # import random
+    # random.seed(args.seed)
+    # random.shuffle(img_names)
+
+    face_model = ParametricFaceModel(bfm_folder='deep_3drecon/BFM', 
+                camera_distance=10, focal=1015, keypoint_mode=args.keypoint_mode)
+    face_model.to(torch.device(args.device))
+     
+    process_id = args.process_id
+    total_process = args.total_process
+    if total_process > 1:
+        assert process_id <= total_process -1 and process_id >= 0
+        num_samples_per_process = len(img_names) // total_process
+        if process_id == total_process:
+            img_names = img_names[process_id * num_samples_per_process : ]
+        else:
+            img_names = img_names[process_id * num_samples_per_process : (process_id+1) * num_samples_per_process]
+    print(f"image names number (before fileter): {len(img_names)}")
+
+
+    if not args.reset:
+        img_names = get_todo_img_names(img_names)
+
+    print(f"image names number (after  fileter): {len(img_names)}")
+    for i in tqdm.trange(len(img_names), desc=f"process {process_id}: fitting 3dmm ..."):
+        img_name = img_names[i]
+        try:
+            fit_3dmm_for_a_image(img_name, args.debug, device=args.device)
+        except Exception as e:
+            print(img_name, e)
+        if args.output_log and i % max(int(len(img_names) * 0.003), 1) == 0:
+            print(f"process {process_id}: {i + 1} / {len(img_names)} done")
+            sys.stdout.flush()
+            sys.stderr.flush()
+            
+    print(f"process {process_id}: fitting 3dmm all done")
+
diff --git a/data_gen/utils/process_video/euler2quaterion.py b/data_gen/utils/process_video/euler2quaterion.py
new file mode 100644
index 0000000000000000000000000000000000000000..e3fd35af0e26285dafac2931fad5904e9d30321a
--- /dev/null
+++ b/data_gen/utils/process_video/euler2quaterion.py
@@ -0,0 +1,35 @@
+import numpy as np
+import torch
+import math
+import numba
+from scipy.spatial.transform import Rotation as R
+
+def euler2quaterion(euler, use_radian=True):
+    """
+    euler: np.array, [batch, 3]
+    return: the quaterion, np.array, [batch, 4]
+    """
+    r = R.from_euler('xyz',euler, degrees=not use_radian)
+    return r.as_quat()
+
+def quaterion2euler(quat, use_radian=True):
+    """
+    quat: np.array, [batch, 4]
+    return: the euler, np.array, [batch, 3]
+    """
+    r = R.from_quat(quat)
+    return r.as_euler('xyz', degrees=not use_radian)
+
+def rot2quaterion(rot):
+    r = R.from_matrix(rot)
+    return r.as_quat()
+
+def quaterion2rot(quat):
+    r = R.from_quat(quat)
+    return r.as_matrix()
+
+if __name__ == '__main__':
+    euler = np.array([89.999,89.999,89.999] * 100).reshape([100,3])
+    q = euler2quaterion(euler, use_radian=False)
+    e = quaterion2euler(q, use_radian=False)
+    print(" ")
diff --git a/data_gen/utils/process_video/extract_blink.py b/data_gen/utils/process_video/extract_blink.py
new file mode 100644
index 0000000000000000000000000000000000000000..f6d27bb077d401a9c8e9b5b19b121c538db9e037
--- /dev/null
+++ b/data_gen/utils/process_video/extract_blink.py
@@ -0,0 +1,50 @@
+import numpy as np
+from data_util.face3d_helper import Face3DHelper
+from utils.commons.tensor_utils import convert_to_tensor
+
+def polygon_area(x, y):
+    """
+    x: [T, K=6]
+    y: [T, K=6]
+    return: [T,]
+    """
+    x_ = x - x.mean(axis=-1, keepdims=True)
+    y_ = y - y.mean(axis=-1, keepdims=True)
+    correction = x_[:,-1] * y_[:,0] - y_[:,-1]* x_[:,0]
+    main_area = (x_[:,:-1] * y_[:,1:]).sum(axis=-1) - (y_[:,:-1] * x_[:,1:]).sum(axis=-1)
+    return 0.5 * np.abs(main_area + correction)
+
+def get_eye_area_percent(id, exp, face3d_helper):
+    id = convert_to_tensor(id)
+    exp = convert_to_tensor(exp)
+    cano_lm3d = face3d_helper.reconstruct_cano_lm3d(id, exp)
+    cano_lm2d = (cano_lm3d[..., :2] + 1) / 2
+    lms = cano_lm2d.cpu().numpy()
+    eyes_left = slice(36, 42)
+    eyes_right = slice(42, 48)
+    area_left = polygon_area(lms[:, eyes_left, 0], lms[:, eyes_left, 1])
+    area_right = polygon_area(lms[:, eyes_right, 0], lms[:, eyes_right, 1])
+    # area percentage of two eyes of the whole image...
+    area_percent = (area_left + area_right) / 1 * 100 # recommend threshold is 0.25%
+    return area_percent # [T,]
+
+
+if __name__ == '__main__':
+    import numpy as np
+    import imageio
+    import cv2
+    import torch
+    from data_gen.utils.process_video.extract_lm2d import extract_lms_mediapipe_job, read_video_to_frames, index_lm68_from_lm468
+    from data_gen.utils.process_video.fit_3dmm_landmark import fit_3dmm_for_a_video
+    from data_util.face3d_helper import Face3DHelper
+
+    face3d_helper = Face3DHelper()
+    video_name = 'data/raw/videos/May_10s.mp4'
+    frames = read_video_to_frames(video_name)
+    coeff = fit_3dmm_for_a_video(video_name, save=False)
+    area_percent = get_eye_area_percent(torch.tensor(coeff['id']), torch.tensor(coeff['exp']), face3d_helper)
+    writer = imageio.get_writer("1.mp4", fps=25)
+    for idx, frame in enumerate(frames):
+        frame = cv2.putText(frame, f"{area_percent[idx]:.2f}", org=(128,128), fontFace=cv2.FONT_HERSHEY_COMPLEX, fontScale=1, color=(255,0,0), thickness=1)
+        writer.append_data(frame)
+    writer.close()
\ No newline at end of file
diff --git a/data_gen/utils/process_video/extract_lm2d.py b/data_gen/utils/process_video/extract_lm2d.py
new file mode 100644
index 0000000000000000000000000000000000000000..f9ae0b13408c1d837af4c40912ffc58e0043469b
--- /dev/null
+++ b/data_gen/utils/process_video/extract_lm2d.py
@@ -0,0 +1,164 @@
+import os
+os.environ["OMP_NUM_THREADS"] = "1"
+import sys
+import glob
+import cv2
+import pickle
+import tqdm
+import numpy as np
+import mediapipe as mp
+from utils.commons.multiprocess_utils import multiprocess_run_tqdm
+from utils.commons.os_utils import multiprocess_glob
+from data_gen.utils.mp_feature_extractors.face_landmarker import MediapipeLandmarker
+import warnings
+import traceback
+
+warnings.filterwarnings('ignore')
+
+"""
+基于Face_aligment的lm68已被弃用,因为其：
+1. 对眼睛部位的预测精度极低
+2. 无法在大偏转角度时准确预测被遮挡的下颚线, 导致大角度时3dmm的GT label就是有问题的, 从而影响性能
+我们目前转而使用基于mediapipe的lm68
+"""
+# def extract_landmarks(ori_imgs_dir):
+
+#     print(f'[INFO] ===== extract face landmarks from {ori_imgs_dir} =====')
+
+#     fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D, flip_input=False)
+#     image_paths = glob.glob(os.path.join(ori_imgs_dir, '*.png'))
+#     for image_path in tqdm.tqdm(image_paths):
+#         out_name = image_path.replace("/images_512/", "/lms_2d/").replace(".png",".lms")
+#         if os.path.exists(out_name):
+#             continue
+#         input = cv2.imread(image_path, cv2.IMREAD_UNCHANGED) # [H, W, 3]
+#         input = cv2.cvtColor(input, cv2.COLOR_BGR2RGB)
+#         preds = fa.get_landmarks(input)
+#         if preds is None:
+#             print(f"Skip {image_path} for no face detected")
+#             continue
+#         if len(preds) > 0:
+#             lands = preds[0].reshape(-1, 2)[:,:2]
+#             os.makedirs(os.path.dirname(out_name), exist_ok=True)
+#             np.savetxt(out_name, lands, '%f')
+#     del fa
+#     print(f'[INFO] ===== extracted face landmarks =====')
+
+def save_file(name, content):
+    with open(name, "wb") as f:
+        pickle.dump(content, f) 
+        
+def load_file(name):
+    with open(name, "rb") as f:
+        content = pickle.load(f)
+    return content
+
+
+face_landmarker = None
+    
+def extract_landmark_job(video_name, nerf=False):
+    try:
+        if nerf:
+            out_name = video_name.replace("/raw/", "/processed/").replace(".mp4","/lms_2d.npy")
+        else:
+            out_name = video_name.replace("/video/", "/lms_2d/").replace(".mp4","_lms.npy")
+        if os.path.exists(out_name):
+            # print("out exists, skip...")
+            return
+        try:
+            os.makedirs(os.path.dirname(out_name), exist_ok=True)
+        except:
+            pass
+        global face_landmarker
+        if face_landmarker is None:
+            face_landmarker = MediapipeLandmarker()
+        img_lm478, vid_lm478 = face_landmarker.extract_lm478_from_video_name(video_name)
+        lm478 = face_landmarker.combine_vid_img_lm478_to_lm478(img_lm478, vid_lm478)
+        np.save(out_name, lm478)
+        return True
+        # print("Hahaha, solve one item!!!")
+    except Exception as e:
+        traceback.print_exc()
+        return False
+        
+def out_exist_job(vid_name):
+    out_name = vid_name.replace("/video/", "/lms_2d/").replace(".mp4","_lms.npy") 
+    if os.path.exists(out_name):
+        return None
+    else:
+        return vid_name
+    
+def get_todo_vid_names(vid_names):
+    if len(vid_names) == 1: # nerf
+        return vid_names
+    todo_vid_names = []
+    for i, res in multiprocess_run_tqdm(out_exist_job, vid_names, num_workers=128):
+        if res is not None:
+            todo_vid_names.append(res)
+    return todo_vid_names
+
+if __name__ == '__main__':
+    import argparse, glob, tqdm, random
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--vid_dir", default='nerf')
+    parser.add_argument("--ds_name", default='data/raw/videos/May.mp4')
+    parser.add_argument("--num_workers", default=2, type=int)
+    parser.add_argument("--process_id", default=0, type=int)
+    parser.add_argument("--total_process", default=1, type=int)
+    parser.add_argument("--reset", action="store_true")
+    parser.add_argument("--load_names", action="store_true")
+
+    args = parser.parse_args()
+    vid_dir = args.vid_dir
+    ds_name = args.ds_name
+    load_names = args.load_names
+
+    if ds_name.lower() == 'nerf': # 处理单个视频
+        vid_names = [vid_dir]
+        out_names = [video_name.replace("/raw/", "/processed/").replace(".mp4","/lms_2d.npy") for video_name in vid_names]
+    else: # 处理整个数据集
+        if ds_name in ['lrs3_trainval']:
+            vid_name_pattern = os.path.join(vid_dir, "*/*.mp4")
+        elif ds_name in ['TH1KH_512', 'CelebV-HQ']:
+            vid_name_pattern = os.path.join(vid_dir, "*.mp4")
+        elif ds_name in ['lrs2', 'lrs3', 'voxceleb2', 'CMLR']:
+            vid_name_pattern = os.path.join(vid_dir, "*/*/*.mp4")
+        elif ds_name in ["RAVDESS", 'VFHQ']:
+            vid_name_pattern = os.path.join(vid_dir, "*/*/*/*.mp4")
+        else:
+            raise NotImplementedError()
+        
+        vid_names_path = os.path.join(vid_dir, "vid_names.pkl")
+        if os.path.exists(vid_names_path) and load_names:
+            print(f"loading vid names from {vid_names_path}")
+            vid_names = load_file(vid_names_path)
+        else:
+            vid_names = multiprocess_glob(vid_name_pattern)
+        vid_names = sorted(vid_names)
+        if not load_names:
+            print(f"saving vid names to {vid_names_path}")
+            save_file(vid_names_path, vid_names)
+        out_names = [video_name.replace("/video/", "/lms_2d/").replace(".mp4","_lms.npy") for video_name in vid_names]
+
+    process_id = args.process_id
+    total_process = args.total_process
+    if total_process > 1:
+        assert process_id <= total_process -1
+        num_samples_per_process = len(vid_names) // total_process
+        if process_id == total_process:
+            vid_names = vid_names[process_id * num_samples_per_process : ]
+        else:
+            vid_names = vid_names[process_id * num_samples_per_process : (process_id+1) * num_samples_per_process]
+    
+    if not args.reset:
+        vid_names = get_todo_vid_names(vid_names)
+    print(f"todo videos number: {len(vid_names)}")
+
+    fail_cnt = 0
+    job_args = [(vid_name, ds_name=='nerf') for vid_name in vid_names]
+    for (i, res) in multiprocess_run_tqdm(extract_landmark_job, job_args, num_workers=args.num_workers, desc=f"Root {args.process_id}: extracing MP-based landmark2d"): 
+        if res is False:
+            fail_cnt += 1
+        print(f"finished {i + 1} / {len(vid_names)} = {(i + 1) / len(vid_names):.4f}, failed {fail_cnt} / {i + 1} = {fail_cnt / (i + 1):.4f}")
+        sys.stdout.flush()
+        pass
\ No newline at end of file
diff --git a/data_gen/utils/process_video/extract_segment_imgs.py b/data_gen/utils/process_video/extract_segment_imgs.py
new file mode 100644
index 0000000000000000000000000000000000000000..868773c4b323897c6654834ed09b87820b1dc7b4
--- /dev/null
+++ b/data_gen/utils/process_video/extract_segment_imgs.py
@@ -0,0 +1,500 @@
+import os
+os.environ["OMP_NUM_THREADS"] = "1"
+import random
+import glob
+import cv2
+import tqdm
+import numpy as np
+import PIL
+from utils.commons.tensor_utils import convert_to_np
+from utils.commons.os_utils import multiprocess_glob
+import pickle
+import torch
+import mediapipe as mp
+import traceback
+import multiprocessing
+from utils.commons.multiprocess_utils import multiprocess_run_tqdm
+from scipy.ndimage import binary_erosion, binary_dilation
+from sklearn.neighbors import NearestNeighbors
+from mediapipe.tasks.python import vision
+from data_gen.utils.mp_feature_extractors.mp_segmenter import MediapipeSegmenter, encode_segmap_mask_to_image, decode_segmap_mask_from_image
+
+seg_model   = None
+segmenter   = None
+mat_model   = None
+lama_model  = None
+lama_config = None
+
+from data_gen.utils.process_video.split_video_to_imgs import extract_img_job
+
+BG_NAME_MAP = {
+    "knn": "",
+    "mat": "_mat",
+    "ddnm": "_ddnm",
+    "lama": "_lama",
+}
+FRAME_SELECT_INTERVAL = 5
+SIM_METHOD = "mse"
+SIM_THRESHOLD = 3
+
+def save_file(name, content):
+    with open(name, "wb") as f:
+        pickle.dump(content, f) 
+        
+def load_file(name):
+    with open(name, "rb") as f:
+        content = pickle.load(f)
+    return content
+
+def save_rgb_alpha_image_to_path(img, alpha, img_path):
+    try: os.makedirs(os.path.dirname(img_path), exist_ok=True)
+    except: pass
+    cv2.imwrite(img_path, np.concatenate([cv2.cvtColor(img, cv2.COLOR_RGB2BGR), alpha], axis=-1))
+
+def save_rgb_image_to_path(img, img_path):
+    try: os.makedirs(os.path.dirname(img_path), exist_ok=True)
+    except: pass
+    cv2.imwrite(img_path, cv2.cvtColor(img, cv2.COLOR_RGB2BGR))
+
+def load_rgb_image_to_path(img_path):
+    return cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_BGR2RGB)
+
+def image_similarity(x: np.ndarray, y: np.ndarray, method="mse"):
+    if method == "mse":
+        return np.mean((x - y) ** 2)
+    else:
+        raise NotImplementedError
+
+def extract_background(img_lst, segmap_mask_lst=None, method="knn", device='cpu', mix_bg=True):
+    """
+    img_lst: list of rgb ndarray
+    method: "knn", "mat" or "ddnm"
+    """
+    # only use 1/20 images
+    global segmenter
+    global seg_model
+    global mat_model
+    global lama_model
+    global lama_config
+    
+    assert len(img_lst) > 0
+    if segmap_mask_lst is not None:
+        assert len(segmap_mask_lst) == len(img_lst)
+    else:
+        del segmenter
+        del seg_model
+        seg_model = MediapipeSegmenter()
+        segmenter = vision.ImageSegmenter.create_from_options(seg_model.video_options)
+        
+    def get_segmap_mask(img_lst, segmap_mask_lst, index):
+        if segmap_mask_lst is not None:
+            segmap = segmap_mask_lst[index]
+        else:
+            segmap = seg_model._cal_seg_map(img_lst[index], segmenter=segmenter)
+        return segmap
+        
+    if method == "knn":
+        num_frames = len(img_lst)
+        img_lst = img_lst[::FRAME_SELECT_INTERVAL] if num_frames > FRAME_SELECT_INTERVAL else img_lst[0:1]
+            
+        if segmap_mask_lst is not None:
+            segmap_mask_lst = segmap_mask_lst[::FRAME_SELECT_INTERVAL] if num_frames > FRAME_SELECT_INTERVAL else segmap_mask_lst[0:1]
+            assert len(img_lst) == len(segmap_mask_lst)
+        # get H/W
+        h, w = img_lst[0].shape[:2]
+
+        # nearest neighbors
+        all_xys = np.mgrid[0:h, 0:w].reshape(2, -1).transpose() # [512*512, 2] coordinate grid
+        distss = []
+        for idx, img in enumerate(img_lst):
+            segmap = get_segmap_mask(img_lst=img_lst, segmap_mask_lst=segmap_mask_lst, index=idx)
+            bg = (segmap[0]).astype(bool) # [h,w] bool mask
+            fg_xys = np.stack(np.nonzero(~bg)).transpose(1, 0) # [N_nonbg,2] coordinate of non-bg pixels
+            nbrs = NearestNeighbors(n_neighbors=1, algorithm='kd_tree').fit(fg_xys)
+            dists, _ = nbrs.kneighbors(all_xys) # [512*512, 1] distance to nearest non-bg pixel
+            distss.append(dists)
+
+        distss = np.stack(distss) # [B, 512*512, 1]
+        max_dist = np.max(distss, 0) # [512*512, 1]
+        max_id = np.argmax(distss, 0) # id of frame
+
+        bc_pixs = max_dist > 10 # 在各个frame有一个出现过是bg的pixel，bg标准是离最近的non-bg pixel距离大于10
+        bc_pixs_id = np.nonzero(bc_pixs)
+        bc_ids = max_id[bc_pixs]
+
+        num_pixs = distss.shape[1]
+        imgs = np.stack(img_lst).reshape(-1, num_pixs, 3)
+
+        bg_img = np.zeros((h*w, 3), dtype=np.uint8)
+        bg_img[bc_pixs_id, :] = imgs[bc_ids, bc_pixs_id, :] # 对那些铁bg的pixel，直接去对应的image里面采样
+        bg_img = bg_img.reshape(h, w, 3)
+
+        max_dist = max_dist.reshape(h, w)
+        bc_pixs = max_dist > 10 # 5
+        bg_xys = np.stack(np.nonzero(~bc_pixs)).transpose()
+        fg_xys = np.stack(np.nonzero(bc_pixs)).transpose()
+        nbrs = NearestNeighbors(n_neighbors=1, algorithm='kd_tree').fit(fg_xys)
+        distances, indices = nbrs.kneighbors(bg_xys) # 对non-bg img，用KNN找最近的bg pixel
+        bg_fg_xys = fg_xys[indices[:, 0]]
+        bg_img[bg_xys[:, 0], bg_xys[:, 1], :] = bg_img[bg_fg_xys[:, 0], bg_fg_xys[:, 1], :]
+    else:
+        raise NotImplementedError # deperated
+    
+    return bg_img
+
+def inpaint_torso_job(gt_img, segmap):
+    bg_part = (segmap[0]).astype(bool)
+    head_part = (segmap[1] + segmap[3] + segmap[5]).astype(bool)
+    neck_part = (segmap[2]).astype(bool)
+    torso_part = (segmap[4]).astype(bool) 
+    img = gt_img.copy()
+    img[head_part] = 0
+
+    # torso part "vertical" in-painting...
+    L = 8 + 1
+    torso_coords = np.stack(np.nonzero(torso_part), axis=-1) # [M, 2]
+    # lexsort: sort 2D coords first by y then by x, 
+    # ref: https://stackoverflow.com/questions/2706605/sorting-a-2d-numpy-array-by-multiple-axes
+    inds = np.lexsort((torso_coords[:, 0], torso_coords[:, 1]))
+    torso_coords = torso_coords[inds]
+    # choose the top pixel for each column
+    u, uid, ucnt = np.unique(torso_coords[:, 1], return_index=True, return_counts=True)
+    top_torso_coords = torso_coords[uid] # [m, 2]
+    # only keep top-is-head pixels
+    top_torso_coords_up = top_torso_coords.copy() - np.array([1, 0]) # [N, 2]
+    mask = head_part[tuple(top_torso_coords_up.T)] 
+    if mask.any():
+        top_torso_coords = top_torso_coords[mask]
+        # get the color
+        top_torso_colors = gt_img[tuple(top_torso_coords.T)] # [m, 3]
+        # construct inpaint coords (vertically up, or minus in x)
+        inpaint_torso_coords = top_torso_coords[None].repeat(L, 0) # [L, m, 2]
+        inpaint_offsets = np.stack([-np.arange(L), np.zeros(L, dtype=np.int32)], axis=-1)[:, None] # [L, 1, 2]
+        inpaint_torso_coords += inpaint_offsets
+        inpaint_torso_coords = inpaint_torso_coords.reshape(-1, 2) # [Lm, 2]
+        inpaint_torso_colors = top_torso_colors[None].repeat(L, 0) # [L, m, 3]
+        darken_scaler = 0.98 ** np.arange(L).reshape(L, 1, 1) # [L, 1, 1]
+        inpaint_torso_colors = (inpaint_torso_colors * darken_scaler).reshape(-1, 3) # [Lm, 3]
+        # set color
+        img[tuple(inpaint_torso_coords.T)] = inpaint_torso_colors
+        inpaint_torso_mask = np.zeros_like(img[..., 0]).astype(bool)
+        inpaint_torso_mask[tuple(inpaint_torso_coords.T)] = True
+    else:
+        inpaint_torso_mask = None
+    
+    # neck part "vertical" in-painting...
+    push_down = 4
+    L = 48 + push_down + 1
+    neck_part = binary_dilation(neck_part, structure=np.array([[0, 1, 0], [0, 1, 0], [0, 1, 0]], dtype=bool), iterations=3)
+    neck_coords = np.stack(np.nonzero(neck_part), axis=-1) # [M, 2]
+    # lexsort: sort 2D coords first by y then by x, 
+    # ref: https://stackoverflow.com/questions/2706605/sorting-a-2d-numpy-array-by-multiple-axes
+    inds = np.lexsort((neck_coords[:, 0], neck_coords[:, 1]))
+    neck_coords = neck_coords[inds]
+    # choose the top pixel for each column
+    u, uid, ucnt = np.unique(neck_coords[:, 1], return_index=True, return_counts=True)
+    top_neck_coords = neck_coords[uid] # [m, 2]
+    # only keep top-is-head pixels
+    top_neck_coords_up = top_neck_coords.copy() - np.array([1, 0])
+    mask = head_part[tuple(top_neck_coords_up.T)] 
+    top_neck_coords = top_neck_coords[mask]
+    # push these top down for 4 pixels to make the neck inpainting more natural...
+    offset_down = np.minimum(ucnt[mask] - 1, push_down)
+    top_neck_coords += np.stack([offset_down, np.zeros_like(offset_down)], axis=-1)
+    # get the color
+    top_neck_colors = gt_img[tuple(top_neck_coords.T)] # [m, 3]
+    # construct inpaint coords (vertically up, or minus in x)
+    inpaint_neck_coords = top_neck_coords[None].repeat(L, 0) # [L, m, 2]
+    inpaint_offsets = np.stack([-np.arange(L), np.zeros(L, dtype=np.int32)], axis=-1)[:, None] # [L, 1, 2]
+    inpaint_neck_coords += inpaint_offsets
+    inpaint_neck_coords = inpaint_neck_coords.reshape(-1, 2) # [Lm, 2]
+    inpaint_neck_colors = top_neck_colors[None].repeat(L, 0) # [L, m, 3]
+    darken_scaler = 0.98 ** np.arange(L).reshape(L, 1, 1) # [L, 1, 1]
+    inpaint_neck_colors = (inpaint_neck_colors * darken_scaler).reshape(-1, 3) # [Lm, 3]
+    # set color
+    img[tuple(inpaint_neck_coords.T)] = inpaint_neck_colors
+    # apply blurring to the inpaint area to avoid vertical-line artifects...
+    inpaint_mask = np.zeros_like(img[..., 0]).astype(bool)
+    inpaint_mask[tuple(inpaint_neck_coords.T)] = True
+
+    blur_img = img.copy()
+    blur_img = cv2.GaussianBlur(blur_img, (5, 5), cv2.BORDER_DEFAULT)
+    img[inpaint_mask] = blur_img[inpaint_mask]
+
+    # set mask
+    torso_img_mask = (neck_part | torso_part | inpaint_mask)
+    torso_with_bg_img_mask = (bg_part | neck_part | torso_part | inpaint_mask)
+    if inpaint_torso_mask is not None:
+        torso_img_mask = torso_img_mask | inpaint_torso_mask
+        torso_with_bg_img_mask = torso_with_bg_img_mask | inpaint_torso_mask
+    
+    torso_img = img.copy()
+    torso_img[~torso_img_mask] = 0
+    torso_with_bg_img = img.copy()
+    torso_img[~torso_with_bg_img_mask] = 0
+
+    return torso_img, torso_img_mask, torso_with_bg_img, torso_with_bg_img_mask
+
+
+def extract_segment_job(video_name, nerf=False, idx=None, total=None, background_method='knn', device="cpu", total_gpus=0, mix_bg=True):
+    global segmenter
+    global seg_model
+    del segmenter
+    del seg_model
+    seg_model = MediapipeSegmenter()
+    segmenter = vision.ImageSegmenter.create_from_options(seg_model.video_options)
+    try:
+        if "cuda" in device:
+            # determine which cuda index from subprocess id
+            pname = multiprocessing.current_process().name
+            pid = int(pname.rsplit("-", 1)[-1]) - 1
+            cuda_id = pid % total_gpus
+            device = f"cuda:{cuda_id}"
+
+        if nerf: # single video
+            raw_img_dir = video_name.replace(".mp4", "/gt_imgs/").replace("/raw/","/processed/")
+        else: # whole dataset
+            raw_img_dir = video_name.replace(".mp4", "").replace("/video/", "/gt_imgs/")
+        if not os.path.exists(raw_img_dir):
+            extract_img_job(video_name, raw_img_dir) # use ffmpeg to split video into imgs
+        
+        img_names = glob.glob(os.path.join(raw_img_dir, "*.jpg"))
+
+        img_lst = []
+
+        for img_name in img_names:
+            img = cv2.imread(img_name)
+            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+            img_lst.append(img)
+
+        segmap_mask_lst, segmap_image_lst = seg_model._cal_seg_map_for_video(img_lst, segmenter=segmenter, return_onehot_mask=True, return_segmap_image=True)
+        del segmap_image_lst
+        # for i in range(len(img_lst)):
+        for i in tqdm.trange(len(img_lst), desc='generating segment images using segmaps...'):
+            img_name = img_names[i]
+            segmap = segmap_mask_lst[i]
+            img = img_lst[i]
+            out_img_name = img_name.replace("/gt_imgs/", "/segmaps/").replace(".jpg", ".png") # 存成jpg的话，pixel value会有误差
+            try: os.makedirs(os.path.dirname(out_img_name), exist_ok=True)
+            except: pass
+            encoded_segmap = encode_segmap_mask_to_image(segmap)
+            save_rgb_image_to_path(encoded_segmap, out_img_name)
+        
+            for mode in ['head', 'torso', 'person', 'bg']:
+                out_img, mask = seg_model._seg_out_img_with_segmap(img, segmap, mode=mode)
+                img_alpha = 255 * np.ones((img.shape[0], img.shape[1], 1), dtype=np.uint8) # alpha
+                mask = mask[0][..., None]
+                img_alpha[~mask] = 0
+                out_img_name = img_name.replace("/gt_imgs/", f"/{mode}_imgs/").replace(".jpg", ".png")
+                save_rgb_alpha_image_to_path(out_img, img_alpha, out_img_name)
+            
+            inpaint_torso_img, inpaint_torso_img_mask, inpaint_torso_with_bg_img, inpaint_torso_with_bg_img_mask = inpaint_torso_job(img, segmap)
+            img_alpha = 255 * np.ones((img.shape[0], img.shape[1], 1), dtype=np.uint8) # alpha
+            img_alpha[~inpaint_torso_img_mask[..., None]] = 0
+            out_img_name = img_name.replace("/gt_imgs/", f"/inpaint_torso_imgs/").replace(".jpg", ".png")
+            save_rgb_alpha_image_to_path(inpaint_torso_img, img_alpha, out_img_name)
+            
+        bg_prefix_name = f"bg{BG_NAME_MAP[background_method]}"
+        bg_img = extract_background(img_lst, segmap_mask_lst, method=background_method, device=device, mix_bg=mix_bg)
+        if nerf:
+            out_img_name = video_name.replace("/raw/", "/processed/").replace(".mp4", f"/{bg_prefix_name}.jpg")
+        else:
+            out_img_name = video_name.replace("/video/", f"/{bg_prefix_name}_img/").replace(".mp4", ".jpg")
+        save_rgb_image_to_path(bg_img, out_img_name)
+        
+        com_prefix_name = f"com{BG_NAME_MAP[background_method]}"
+        for i, img_name in enumerate(img_names):
+            com_img = img_lst[i].copy()
+            segmap = segmap_mask_lst[i]
+            bg_part = segmap[0].astype(bool)[..., None].repeat(3,axis=-1)
+            com_img[bg_part] = bg_img[bg_part]
+            out_img_name = img_name.replace("/gt_imgs/", f"/{com_prefix_name}_imgs/")
+            save_rgb_image_to_path(com_img, out_img_name)
+        return 0
+    except Exception as e:
+        print(str(type(e)), e)
+        traceback.print_exc(e)
+        return 1
+
+# def check_bg_img_job_finished(raw_img_dir, bg_name, com_dir):
+#     img_names = glob.glob(os.path.join(raw_img_dir, "*.jpg"))
+#     com_names = glob.glob(os.path.join(com_dir, "*.jpg"))
+#     return len(img_names) == len(com_names) and os.path.exists(bg_name)
+
+# extract background and combined image
+# need pre-processed "gt_imgs" and "segmaps"
+def extract_bg_img_job(video_name, nerf=False, idx=None, total=None, background_method='knn', device="cpu", total_gpus=0, mix_bg=True):
+    try:
+        bg_prefix_name = f"bg{BG_NAME_MAP[background_method]}"
+        com_prefix_name = f"com{BG_NAME_MAP[background_method]}"
+        
+        if "cuda" in device:
+            # determine which cuda index from subprocess id
+            pname = multiprocessing.current_process().name
+            pid = int(pname.rsplit("-", 1)[-1]) - 1
+            cuda_id = pid % total_gpus
+            device = f"cuda:{cuda_id}"
+            
+        if nerf: # single video
+            raw_img_dir = video_name.replace(".mp4", "/gt_imgs/").replace("/raw/","/processed/")
+        else: # whole dataset
+            raw_img_dir = video_name.replace(".mp4", "").replace("/video/", "/gt_imgs/")
+        if nerf:
+            bg_name = video_name.replace("/raw/", "/processed/").replace(".mp4", f"/{bg_prefix_name}.jpg")
+        else:
+            bg_name = video_name.replace("/video/", f"/{bg_prefix_name}_img/").replace(".mp4", ".jpg")
+        # com_dir = raw_img_dir.replace("/gt_imgs/", f"/{com_prefix_name}_imgs/")
+        # if check_bg_img_job_finished(raw_img_dir=raw_img_dir, bg_name=bg_name, com_dir=com_dir):
+        #     print(f"Already finished, skip {raw_img_dir} ")
+        #     return 0
+        
+        img_names = glob.glob(os.path.join(raw_img_dir, "*.jpg"))
+        img_lst = []
+        for img_name in img_names:
+            img = cv2.imread(img_name)
+            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+            img_lst.append(img)
+            
+        segmap_mask_lst = []
+        for img_name in img_names:
+            segmap_img_name = img_name.replace("/gt_imgs/", "/segmaps/").replace(".jpg", ".png")
+            segmap_img = load_rgb_image_to_path(segmap_img_name)
+            
+            segmap_mask = decode_segmap_mask_from_image(segmap_img)
+            segmap_mask_lst.append(segmap_mask)
+            
+        bg_img = extract_background(img_lst, segmap_mask_lst, method=background_method, device=device, mix_bg=mix_bg)
+        save_rgb_image_to_path(bg_img, bg_name)
+        
+        for i, img_name in enumerate(img_names):
+            com_img = img_lst[i].copy()
+            segmap = segmap_mask_lst[i]
+            bg_part = segmap[0].astype(bool)[..., None].repeat(3, axis=-1)
+            com_img[bg_part] = bg_img[bg_part]
+            com_name = img_name.replace("/gt_imgs/", f"/{com_prefix_name}_imgs/")
+            save_rgb_image_to_path(com_img, com_name)
+        return 0
+    
+    except Exception as e:
+        print(str(type(e)), e)
+        traceback.print_exc(e)
+        return 1
+
+def out_exist_job(vid_name, background_method='knn', only_bg_img=False):
+    com_prefix_name = f"com{BG_NAME_MAP[background_method]}"
+    img_dir = vid_name.replace("/video/", "/gt_imgs/").replace(".mp4", "")
+    out_dir1 = img_dir.replace("/gt_imgs/", "/head_imgs/")
+    out_dir2 = img_dir.replace("/gt_imgs/", f"/{com_prefix_name}_imgs/")
+    
+    if not only_bg_img:
+        if os.path.exists(img_dir) and os.path.exists(out_dir1) and os.path.exists(out_dir1) and os.path.exists(out_dir2) :
+            num_frames = len(os.listdir(img_dir))
+            if len(os.listdir(out_dir1)) == num_frames and len(os.listdir(out_dir2)) == num_frames:
+                return None
+            else:
+                return vid_name
+        else:
+            return vid_name
+    else:
+        if os.path.exists(img_dir) and os.path.exists(out_dir2):
+            num_frames = len(os.listdir(img_dir))
+            if len(os.listdir(out_dir2)) == num_frames:
+                return None
+            else:
+                return vid_name
+        else:
+            return vid_name
+
+def get_todo_vid_names(vid_names, background_method='knn', only_bg_img=False):
+    if len(vid_names) == 1: # nerf
+        return vid_names
+    todo_vid_names = []
+    fn_args = [(vid_name, background_method, only_bg_img) for vid_name in vid_names]
+    for i, res in multiprocess_run_tqdm(out_exist_job, fn_args, num_workers=16, desc="checking todo videos..."):
+        if res is not None:
+            todo_vid_names.append(res)
+    return todo_vid_names
+
+if __name__ == '__main__':
+    import argparse, glob, tqdm, random
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--vid_dir", default='/home/tiger/datasets/raw/CelebV-HQ/video')
+    parser.add_argument("--ds_name", default='CelebV-HQ')
+    parser.add_argument("--num_workers", default=48, type=int)
+    parser.add_argument("--seed", default=0, type=int)
+    parser.add_argument("--process_id", default=0, type=int)
+    parser.add_argument("--total_process", default=1, type=int)
+    parser.add_argument("--reset", action='store_true')
+    parser.add_argument("--load_names", action="store_true")
+    parser.add_argument("--background_method", choices=['knn', 'mat', 'ddnm', 'lama'], type=str, default='knn')
+    parser.add_argument("--total_gpus", default=0, type=int) # zero gpus means utilizing cpu
+    parser.add_argument("--only_bg_img", action="store_true")
+    parser.add_argument("--no_mix_bg", action="store_true")
+
+    args = parser.parse_args()
+    vid_dir = args.vid_dir
+    ds_name = args.ds_name
+    load_names = args.load_names
+    background_method = args.background_method
+    total_gpus = args.total_gpus
+    only_bg_img = args.only_bg_img
+    mix_bg = not args.no_mix_bg
+
+    devices = os.environ.get('CUDA_VISIBLE_DEVICES', '').split(",")
+    for d in devices[:total_gpus]:
+        os.system(f'pkill -f "voidgpu{d}"')
+        
+    if ds_name.lower() == 'nerf': # 处理单个视频
+        vid_names = [vid_dir]
+        out_names = [video_name.replace("/raw/", "/processed/").replace(".mp4","_lms.npy") for video_name in vid_names]
+    else: # 处理整个数据集
+        if ds_name in ['lrs3_trainval']:
+            vid_name_pattern = os.path.join(vid_dir, "*/*.mp4")
+        elif ds_name in ['TH1KH_512', 'CelebV-HQ']:
+            vid_name_pattern = os.path.join(vid_dir, "*.mp4")
+        elif ds_name in ['lrs2', 'lrs3', 'voxceleb2']:
+            vid_name_pattern = os.path.join(vid_dir, "*/*/*.mp4")
+        elif ds_name in ["RAVDESS", 'VFHQ']:
+            vid_name_pattern = os.path.join(vid_dir, "*/*/*/*.mp4")
+        else:
+            raise NotImplementedError()
+        
+        vid_names_path = os.path.join(vid_dir, "vid_names.pkl")
+        if os.path.exists(vid_names_path) and load_names:
+            print(f"loading vid names from {vid_names_path}")
+            vid_names = load_file(vid_names_path)
+        else:
+            vid_names = multiprocess_glob(vid_name_pattern)
+        vid_names = sorted(vid_names)
+        print(f"saving vid names to {vid_names_path}")
+        save_file(vid_names_path, vid_names)
+
+    vid_names = sorted(vid_names)
+    random.seed(args.seed)
+    random.shuffle(vid_names)
+
+    process_id = args.process_id
+    total_process = args.total_process
+    if total_process > 1:
+        assert process_id <= total_process -1
+        num_samples_per_process = len(vid_names) // total_process
+        if process_id == total_process:
+            vid_names = vid_names[process_id * num_samples_per_process : ]
+        else:
+            vid_names = vid_names[process_id * num_samples_per_process : (process_id+1) * num_samples_per_process]
+    
+    if not args.reset:
+        vid_names = get_todo_vid_names(vid_names, background_method, only_bg_img)
+    print(f"todo videos number: {len(vid_names)}")
+    # exit()
+
+    device = "cuda" if total_gpus > 0 else "cpu"
+    if only_bg_img:
+        extract_job = extract_bg_img_job
+        fn_args = [(vid_name,ds_name=='nerf',i,len(vid_names), background_method, device, total_gpus, mix_bg) for i, vid_name in enumerate(vid_names)]
+    else:
+        extract_job = extract_segment_job
+        fn_args = [(vid_name,ds_name=='nerf',i,len(vid_names), background_method, device, total_gpus, mix_bg) for i, vid_name in enumerate(vid_names)]
+        
+    for vid_name in multiprocess_run_tqdm(extract_job, fn_args, desc=f"Root process {args.process_id}:  segment images", num_workers=args.num_workers):
+        pass
\ No newline at end of file
diff --git a/data_gen/utils/process_video/fit_3dmm_landmark.py b/data_gen/utils/process_video/fit_3dmm_landmark.py
new file mode 100644
index 0000000000000000000000000000000000000000..2622860f66989758fafc7f244b89b4f6da5f43f1
--- /dev/null
+++ b/data_gen/utils/process_video/fit_3dmm_landmark.py
@@ -0,0 +1,565 @@
+# This is a script for efficienct 3DMM coefficient extraction.
+# It could reconstruct accurate 3D face in real-time.
+# It is built upon BFM 2009 model and mediapipe landmark extractor.
+# It is authored by ZhenhuiYe (zhenhuiye@zju.edu.cn), free to contact him for any suggestion on improvement!
+
+from numpy.core.numeric import require
+from numpy.lib.function_base import quantile
+import torch
+import torch.nn.functional as F
+import copy
+import numpy as np
+
+import random
+import pickle
+import os
+import sys
+import cv2
+import argparse
+import tqdm
+from utils.commons.multiprocess_utils import multiprocess_run_tqdm
+from data_gen.utils.mp_feature_extractors.face_landmarker import MediapipeLandmarker, read_video_to_frames
+from deep_3drecon.deep_3drecon_models.bfm import ParametricFaceModel
+from deep_3drecon.secc_renderer import SECC_Renderer
+from utils.commons.os_utils import multiprocess_glob
+
+
+face_model = ParametricFaceModel(bfm_folder='deep_3drecon/BFM', 
+            camera_distance=10, focal=1015, keypoint_mode='mediapipe')
+face_model.to(torch.device("cuda:0"))
+
+dir_path = os.path.dirname(os.path.realpath(__file__))
+
+
+def draw_axes(img, pitch, yaw, roll, tx, ty, size=50):
+    # yaw = -yaw
+    pitch = - pitch
+    roll = - roll
+    rotation_matrix = cv2.Rodrigues(np.array([pitch, yaw, roll]))[0].astype(np.float64)
+    axes_points = np.array([
+        [1, 0, 0, 0],
+        [0, 1, 0, 0],
+        [0, 0, 1, 0]
+    ], dtype=np.float64)
+    axes_points = rotation_matrix @ axes_points
+    axes_points = (axes_points[:2, :] * size).astype(int)
+    axes_points[0, :] = axes_points[0, :] + tx
+    axes_points[1, :] = axes_points[1, :] + ty
+    
+    new_img = img.copy()
+    cv2.line(new_img, tuple(axes_points[:, 3].ravel()), tuple(axes_points[:, 0].ravel()), (255, 0, 0), 3)    
+    cv2.line(new_img, tuple(axes_points[:, 3].ravel()), tuple(axes_points[:, 1].ravel()), (0, 255, 0), 3)    
+    cv2.line(new_img, tuple(axes_points[:, 3].ravel()), tuple(axes_points[:, 2].ravel()), (0, 0, 255), 3)
+    return new_img
+
+def save_file(name, content):
+    with open(name, "wb") as f:
+        pickle.dump(content, f) 
+        
+def load_file(name):
+    with open(name, "rb") as f:
+        content = pickle.load(f)
+    return content
+
+def cal_lap_loss(in_tensor):
+    # [T, 68, 2]
+    t = in_tensor.shape[0]
+    in_tensor = in_tensor.reshape([t, -1]).permute(1,0).unsqueeze(1) # [c, 1, t]
+    in_tensor = torch.cat([in_tensor[:, :, 0:1], in_tensor, in_tensor[:, :, -1:]], dim=-1)
+    lap_kernel = torch.Tensor((-0.5, 1.0, -0.5)).reshape([1,1,3]).float().to(in_tensor.device) # [1, 1, kw]
+    loss_lap = 0
+
+    out_tensor = F.conv1d(in_tensor, lap_kernel)
+    loss_lap += torch.mean(out_tensor**2)
+    return loss_lap
+
+def cal_vel_loss(ldm):
+    # [B, 68, 2]
+    vel = ldm[1:] - ldm[:-1]
+    return torch.mean(torch.abs(vel))
+
+def cal_lan_loss(proj_lan, gt_lan):
+    # [B, 68, 2]
+    loss = (proj_lan - gt_lan)** 2
+    # use the ldm weights from deep3drecon, see deep_3drecon/deep_3drecon_models/losses.py
+    weights = torch.zeros_like(loss)
+    weights = torch.ones_like(loss)
+    weights[:, 36:48, :] = 3 # eye 12 points
+    weights[:, -8:, :] =  3 # inner lip 8 points
+    weights[:, 28:31, :] =  3 # nose 3 points
+    loss = loss * weights
+    return torch.mean(loss)
+
+def cal_lan_loss_mp(proj_lan, gt_lan, mean:bool=True):
+    # [B, 68, 2]
+    loss = (proj_lan - gt_lan).pow(2)
+    # loss = (proj_lan - gt_lan).abs()
+    unmatch_mask = [ 93, 127, 132, 234, 323, 356, 361, 454]
+    upper_eye = [161,160,159,158,157] + [388,387,386,385,384]
+    eye = [33,246,161,160,159,158,157,173,133,155,154,153,145,144,163,7] + [263,466,388,387,386,385,384,398,362,382,381,380,374,373,390,249]
+    inner_lip = [78,191,80,81,82,13,312,311,310,415,308,324,318,402,317,14,87,178,88,95]
+    outer_lip = [61,185,40,39,37,0,267,269,270,409,291,375,321,405,314,17,84,181,91,146]
+    weights = torch.ones_like(loss)
+    weights[:, eye] = 3
+    weights[:, upper_eye] = 20
+    weights[:, inner_lip] = 5
+    weights[:, outer_lip] = 5
+    weights[:, unmatch_mask] = 0
+    loss = loss * weights
+    if mean:
+        loss = torch.mean(loss)
+    return loss
+
+def cal_acceleration_loss(trans):
+    vel = trans[1:] - trans[:-1]
+    acc = vel[1:] - vel[:-1]
+    return torch.mean(torch.abs(acc))
+
+def cal_acceleration_ldm_loss(ldm):
+    # [B, 68, 2]
+    vel = ldm[1:] - ldm[:-1]
+    acc = vel[1:] - vel[:-1]
+    lip_weight = 0.25 # we dont want smooth the lip too much
+    acc[48:68] *= lip_weight
+    return torch.mean(torch.abs(acc))
+ 
+def set_requires_grad(tensor_list):
+    for tensor in tensor_list:
+        tensor.requires_grad = True
+
+@torch.enable_grad()
+def fit_3dmm_for_a_video(
+    video_name, 
+    nerf=False, # use the file name convention for GeneFace++
+    id_mode='global', 
+    debug=False, 
+    keypoint_mode='mediapipe',
+    large_yaw_threshold=9999999.9,
+    save=True
+) -> bool: # True: good, False: bad 
+    assert video_name.endswith(".mp4"), "this function only support video as input"
+    if id_mode == 'global':
+        LAMBDA_REG_ID = 0.2
+        LAMBDA_REG_EXP = 0.6
+        LAMBDA_REG_LAP = 1.0
+        LAMBDA_REG_VEL_ID = 0.0 # laplcaian is all you need for temporal consistency
+        LAMBDA_REG_VEL_EXP = 0.0 # laplcaian is all you need for temporal consistency
+    else:
+        LAMBDA_REG_ID = 0.3
+        LAMBDA_REG_EXP = 0.05
+        LAMBDA_REG_LAP = 1.0
+        LAMBDA_REG_VEL_ID = 0.0 # laplcaian is all you need for temporal consistency
+        LAMBDA_REG_VEL_EXP = 0.0 # laplcaian is all you need for temporal consistency
+
+    frames = read_video_to_frames(video_name) # [T, H, W, 3]
+    img_h, img_w = frames.shape[1], frames.shape[2]
+    assert img_h == img_w
+    num_frames = len(frames)
+
+    if nerf: # single video
+        lm_name = video_name.replace("/raw/", "/processed/").replace(".mp4","/lms_2d.npy")
+    else:
+        lm_name = video_name.replace("/video/", "/lms_2d/").replace(".mp4", "_lms.npy")
+
+    if os.path.exists(lm_name):
+        lms = np.load(lm_name)
+    else:
+        print(f"lms_2d file not found, try to extract it from video... {lm_name}")
+        try:
+            landmarker = MediapipeLandmarker()
+            img_lm478, vid_lm478 = landmarker.extract_lm478_from_frames(frames, anti_smooth_factor=20)
+            lms = landmarker.combine_vid_img_lm478_to_lm478(img_lm478, vid_lm478)
+        except Exception as e:
+            print(e)
+            return False
+        if lms is None:
+            print(f"get None lms_2d, please check whether each frame has one head, exiting... {lm_name}")
+            return False
+    lms = lms[:, :468, :]
+    lms = torch.FloatTensor(lms).cuda()
+    lms[..., 1] = img_h - lms[..., 1] # flip the height axis
+
+    if keypoint_mode == 'mediapipe':
+        # default
+        cal_lan_loss_fn = cal_lan_loss_mp
+        if nerf: # single video
+            out_name = video_name.replace("/raw/", "/processed/").replace(".mp4", "/coeff_fit_mp.npy")
+        else:
+            out_name = video_name.replace("/video/", "/coeff_fit_mp/").replace(".mp4", "_coeff_fit_mp.npy")
+    else:
+        # lm68 is less accurate than mp
+        cal_lan_loss_fn = cal_lan_loss
+        if nerf: # single video
+            out_name = video_name.replace("/raw/", "/processed/").replace(".mp4", "_coeff_fit_lm68.npy")
+        else:
+            out_name = video_name.replace("/video/", "/coeff_fit_lm68/").replace(".mp4", "_coeff_fit_lm68.npy")
+    try:
+        os.makedirs(os.path.dirname(out_name), exist_ok=True)
+    except:
+        pass
+
+    id_dim, exp_dim = 80, 64
+    sel_ids = np.arange(0, num_frames, 40)
+
+    h = w = face_model.center * 2
+    img_scale_factor = img_h / h
+    lms /= img_scale_factor # rescale lms into [0,224]
+
+    if id_mode == 'global':
+        # default choice by GeneFace++ and later works
+        id_para = lms.new_zeros((1, id_dim), requires_grad=True)
+    elif id_mode == 'finegrained':
+        # legacy choice by GeneFace1 (ICLR 2023)
+        id_para = lms.new_zeros((num_frames, id_dim), requires_grad=True)
+    else: raise NotImplementedError(f"id mode {id_mode} not supported! we only support global or finegrained.")
+    exp_para = lms.new_zeros((num_frames, exp_dim), requires_grad=True)
+    euler_angle = lms.new_zeros((num_frames, 3), requires_grad=True)
+    trans = lms.new_zeros((num_frames, 3), requires_grad=True)
+
+    set_requires_grad([id_para, exp_para, euler_angle, trans])
+
+    optimizer_idexp = torch.optim.Adam([id_para, exp_para], lr=.1)
+    optimizer_frame = torch.optim.Adam([euler_angle, trans], lr=.1)
+
+    # 其他参数初始化，先训练euler和trans
+    for _ in range(200):
+        if id_mode == 'global':
+            proj_geo = face_model.compute_for_landmark_fit(
+                id_para.expand((num_frames, id_dim)), exp_para, euler_angle, trans)
+        else:
+            proj_geo = face_model.compute_for_landmark_fit(
+                id_para, exp_para, euler_angle, trans)
+        loss_lan = cal_lan_loss_fn(proj_geo[:, :, :2], lms.detach())
+        loss = loss_lan
+        optimizer_frame.zero_grad()
+        loss.backward()
+        optimizer_frame.step()
+
+    # print(f"loss_lan: {loss_lan.item():.2f}, euler_abs_mean: {euler_angle.abs().mean().item():.4f}, euler_std: {euler_angle.std().item():.4f}, euler_min: {euler_angle.min().item():.4f}, euler_max: {euler_angle.max().item():.4f}")
+    # print(f"trans_z_mean: {trans[...,2].mean().item():.4f}, trans_z_std: {trans[...,2].std().item():.4f}, trans_min: {trans[...,2].min().item():.4f}, trans_max: {trans[...,2].max().item():.4f}")
+
+    for param_group in optimizer_frame.param_groups:
+        param_group['lr'] = 0.1
+
+    # "jointly roughly training id exp euler trans"
+    for _ in range(200):
+        ret = {}
+        if id_mode == 'global':
+            proj_geo = face_model.compute_for_landmark_fit(
+                id_para.expand((num_frames, id_dim)), exp_para, euler_angle, trans, ret)
+        else:
+            proj_geo = face_model.compute_for_landmark_fit(
+                id_para, exp_para, euler_angle, trans, ret)
+        loss_lan = cal_lan_loss_fn(
+            proj_geo[:, :, :2], lms.detach())
+        # loss_lap = cal_lap_loss(proj_geo)
+        # laplacian对euler影响不大，但是对trans的提升很大
+        loss_lap = cal_lap_loss(id_para) + cal_lap_loss(exp_para) + cal_lap_loss(euler_angle) * 0.3 + cal_lap_loss(trans) * 0.3
+
+        loss_regid = torch.mean(id_para*id_para) # 正则化
+        loss_regexp = torch.mean(exp_para * exp_para)
+
+        loss_vel_id = cal_vel_loss(id_para)
+        loss_vel_exp = cal_vel_loss(exp_para)
+        loss = loss_lan  + loss_regid * LAMBDA_REG_ID + loss_regexp * LAMBDA_REG_EXP  + loss_vel_id * LAMBDA_REG_VEL_ID + loss_vel_exp * LAMBDA_REG_VEL_EXP + loss_lap * LAMBDA_REG_LAP
+        optimizer_idexp.zero_grad()
+        optimizer_frame.zero_grad()
+        loss.backward()
+        optimizer_idexp.step()
+        optimizer_frame.step()
+
+    # print(f"loss_lan: {loss_lan.item():.2f}, loss_reg_id: {loss_regid.item():.2f},loss_reg_exp: {loss_regexp.item():.2f},")
+    # print(f"euler_abs_mean: {euler_angle.abs().mean().item():.4f}, euler_std: {euler_angle.std().item():.4f}, euler_min: {euler_angle.min().item():.4f}, euler_max: {euler_angle.max().item():.4f}")
+    # print(f"trans_z_mean: {trans[...,2].mean().item():.4f}, trans_z_std: {trans[...,2].std().item():.4f}, trans_min: {trans[...,2].min().item():.4f}, trans_max: {trans[...,2].max().item():.4f}")
+
+    # start fine training, intialize from the roughly trained results
+    if id_mode == 'global':
+        id_para_ = lms.new_zeros((1, id_dim), requires_grad=False)
+    else:
+        id_para_ = lms.new_zeros((num_frames, id_dim), requires_grad=True)
+    id_para_.data = id_para.data.clone()
+    id_para = id_para_
+    exp_para_ = lms.new_zeros((num_frames, exp_dim), requires_grad=True)
+    exp_para_.data = exp_para.data.clone()
+    exp_para = exp_para_
+    euler_angle_ = lms.new_zeros((num_frames, 3), requires_grad=True)
+    euler_angle_.data = euler_angle.data.clone()
+    euler_angle = euler_angle_
+    trans_ = lms.new_zeros((num_frames, 3), requires_grad=True)
+    trans_.data = trans.data.clone()
+    trans = trans_
+    
+    batch_size = 50
+    # "fine fitting the 3DMM in batches"
+    for i in range(int((num_frames-1)/batch_size+1)):
+        if (i+1)*batch_size > num_frames:
+            start_n = num_frames-batch_size
+            sel_ids = np.arange(max(num_frames-batch_size,0), num_frames)
+        else:
+            start_n = i*batch_size
+            sel_ids = np.arange(i*batch_size, i*batch_size+batch_size)
+        sel_lms = lms[sel_ids]
+
+        if id_mode == 'global':
+            sel_id_para = id_para.expand((sel_ids.shape[0], id_dim))
+        else:
+            sel_id_para = id_para.new_zeros((batch_size, id_dim), requires_grad=True)
+            sel_id_para.data = id_para[sel_ids].clone()
+        sel_exp_para = exp_para.new_zeros(
+            (batch_size, exp_dim), requires_grad=True)
+        sel_exp_para.data = exp_para[sel_ids].clone()
+        sel_euler_angle = euler_angle.new_zeros(
+            (batch_size, 3), requires_grad=True)
+        sel_euler_angle.data = euler_angle[sel_ids].clone()
+        sel_trans = trans.new_zeros((batch_size, 3), requires_grad=True)
+        sel_trans.data = trans[sel_ids].clone()
+        
+        if id_mode == 'global':
+            set_requires_grad([sel_exp_para, sel_euler_angle, sel_trans])
+            optimizer_cur_batch = torch.optim.Adam(
+                [sel_exp_para, sel_euler_angle, sel_trans], lr=0.005)
+        else:
+            set_requires_grad([sel_id_para, sel_exp_para, sel_euler_angle, sel_trans])
+            optimizer_cur_batch = torch.optim.Adam(
+                [sel_id_para, sel_exp_para, sel_euler_angle, sel_trans], lr=0.005)
+
+        for j in range(50):
+            ret = {}
+            proj_geo = face_model.compute_for_landmark_fit(
+                sel_id_para, sel_exp_para, sel_euler_angle, sel_trans, ret)
+            loss_lan = cal_lan_loss_fn(
+                proj_geo[:, :, :2], lms[sel_ids].detach())
+            
+            # loss_lap = cal_lap_loss(proj_geo)
+            loss_lap = cal_lap_loss(sel_id_para) + cal_lap_loss(sel_exp_para) + cal_lap_loss(sel_euler_angle) * 0.3 + cal_lap_loss(sel_trans) * 0.3
+            loss_vel_id = cal_vel_loss(sel_id_para)
+            loss_vel_exp = cal_vel_loss(sel_exp_para)
+            log_dict = {
+                'loss_vel_id': loss_vel_id,
+                'loss_vel_exp': loss_vel_exp,
+                'loss_vel_euler': cal_vel_loss(sel_euler_angle),
+                'loss_vel_trans': cal_vel_loss(sel_trans),
+            }
+            loss_regid = torch.mean(sel_id_para*sel_id_para) # 正则化
+            loss_regexp = torch.mean(sel_exp_para*sel_exp_para)
+            loss = loss_lan + loss_regid * LAMBDA_REG_ID + loss_regexp * LAMBDA_REG_EXP + loss_lap * LAMBDA_REG_LAP + loss_vel_id * LAMBDA_REG_VEL_ID + loss_vel_exp * LAMBDA_REG_VEL_EXP
+
+            optimizer_cur_batch.zero_grad()
+            loss.backward()
+            optimizer_cur_batch.step()
+            
+        if debug:
+            print(f"batch {i} | loss_lan: {loss_lan.item():.2f}, loss_reg_id: {loss_regid.item():.2f},loss_reg_exp: {loss_regexp.item():.2f},loss_lap_ldm:{loss_lap.item():.4f}")
+            print("|--------" + ', '.join([f"{k}: {v:.4f}" for k,v in log_dict.items()]))
+        if id_mode != 'global':
+            id_para[sel_ids].data = sel_id_para.data.clone()
+        exp_para[sel_ids].data = sel_exp_para.data.clone()
+        euler_angle[sel_ids].data = sel_euler_angle.data.clone()
+        trans[sel_ids].data = sel_trans.data.clone()
+
+    coeff_dict = {'id': id_para.detach().cpu().numpy(), 'exp': exp_para.detach().cpu().numpy(),
+                'euler': euler_angle.detach().cpu().numpy(), 'trans': trans.detach().cpu().numpy()}
+
+    # filter data by side-view pose    
+    # bad_yaw = False
+    # yaws = [] # not so accurate
+    # for index in range(coeff_dict["trans"].shape[0]):
+    #     yaw = coeff_dict["euler"][index][1]
+    #     yaw = np.abs(yaw)
+    #     yaws.append(yaw)
+    #     if yaw > large_yaw_threshold:
+    #         bad_yaw = True
+    
+    if debug:
+        import imageio
+        from utils.visualization.vis_cam3d.camera_pose_visualizer import CameraPoseVisualizer
+        from data_util.face3d_helper import Face3DHelper
+        from data_gen.utils.process_video.extract_blink import get_eye_area_percent
+        face3d_helper = Face3DHelper('deep_3drecon/BFM', keypoint_mode='mediapipe')
+
+        t = coeff_dict['exp'].shape[0]
+        if len(coeff_dict['id']) == 1:
+            coeff_dict['id'] = np.repeat(coeff_dict['id'], t, axis=0)
+        idexp_lm3d = face3d_helper.reconstruct_idexp_lm3d_np(coeff_dict['id'], coeff_dict['exp']).reshape([t, -1])
+        cano_lm3d = idexp_lm3d / 10 + face3d_helper.key_mean_shape.squeeze().reshape([1, -1]).cpu().numpy()
+        cano_lm3d = cano_lm3d.reshape([t, -1, 3])
+        WH = 512
+        cano_lm3d = (cano_lm3d * WH/2 + WH/2).astype(int)
+
+        with torch.no_grad():
+            rot = ParametricFaceModel.compute_rotation(euler_angle)
+            extrinsic = torch.zeros([rot.shape[0], 4, 4]).to(rot.device)
+            extrinsic[:, :3,:3] = rot
+            extrinsic[:, :3, 3] = trans # / 10
+            extrinsic[:, 3, 3] = 1
+        extrinsic = extrinsic.cpu().numpy()
+
+        xy_camera_visualizer = CameraPoseVisualizer(xlim=[extrinsic[:,0,3].min().item()-0.5,extrinsic[:,0,3].max().item()+0.5],ylim=[extrinsic[:,1,3].min().item()-0.5,extrinsic[:,1,3].max().item()+0.5], zlim=[extrinsic[:,2,3].min().item()-0.5,extrinsic[:,2,3].max().item()+0.5], view_mode='xy')
+        xz_camera_visualizer = CameraPoseVisualizer(xlim=[extrinsic[:,0,3].min().item()-0.5,extrinsic[:,0,3].max().item()+0.5],ylim=[extrinsic[:,1,3].min().item()-0.5,extrinsic[:,1,3].max().item()+0.5], zlim=[extrinsic[:,2,3].min().item()-0.5,extrinsic[:,2,3].max().item()+0.5], view_mode='xz')
+
+        if nerf:
+            debug_name = video_name.replace("/raw/", "/processed/").replace(".mp4", "/debug_fit_3dmm.mp4")
+        else:
+            debug_name = video_name.replace("/video/", "/coeff_fit_debug/").replace(".mp4", "_debug.mp4")
+        try:
+            os.makedirs(os.path.dirname(debug_name), exist_ok=True)
+        except: pass
+        writer = imageio.get_writer(debug_name, fps=25)
+        if id_mode == 'global':
+            id_para = id_para.repeat([exp_para.shape[0], 1])
+        proj_geo = face_model.compute_for_landmark_fit(id_para, exp_para, euler_angle, trans)
+        lm68s = proj_geo[:,:,:2].detach().cpu().numpy()  # [T, 68,2]
+        lm68s = lm68s * img_scale_factor
+        lms = lms * img_scale_factor
+        lm68s[..., 1] = img_h - lm68s[..., 1] # flip the height axis
+        lms[..., 1] = img_h - lms[..., 1] # flip the height axis
+        lm68s = lm68s.astype(int)
+        for i in tqdm.trange(min(250, len(frames)), desc=f'rendering debug video to {debug_name}..'):
+            xy_cam3d_img = xy_camera_visualizer.extrinsic2pyramid(extrinsic[i], focal_len_scaled=0.25)
+            xy_cam3d_img = cv2.resize(xy_cam3d_img, (512,512))
+            xz_cam3d_img = xz_camera_visualizer.extrinsic2pyramid(extrinsic[i], focal_len_scaled=0.25)
+            xz_cam3d_img = cv2.resize(xz_cam3d_img, (512,512))
+            
+            img = copy.deepcopy(frames[i])
+            img2 = copy.deepcopy(frames[i])
+
+            img = draw_axes(img, euler_angle[i,0].item(), euler_angle[i,1].item(), euler_angle[i,2].item(), lm68s[i][4][0].item(), lm68s[i, 4][1].item(), size=50)
+
+            gt_lm_color = (255, 0, 0)
+                
+            for lm in lm68s[i]:
+                img = cv2.circle(img, lm, 1, (0, 0, 255), thickness=-1) # blue
+            for gt_lm in lms[i]:
+                img2 = cv2.circle(img2, gt_lm.cpu().numpy().astype(int), 2, gt_lm_color, thickness=1)
+            
+            cano_lm3d_img = np.ones([WH, WH, 3], dtype=np.uint8) * 255
+            for j in range(len(cano_lm3d[i])):
+                x, y, _ = cano_lm3d[i, j]
+                color = (255,0,0)
+                cano_lm3d_img = cv2.circle(cano_lm3d_img, center=(x,y), radius=3, color=color, thickness=-1)
+            cano_lm3d_img = cv2.flip(cano_lm3d_img, 0)
+
+            _, secc_img = secc_renderer(id_para[0:1], exp_para[i:i+1], euler_angle[i:i+1]*0, trans[i:i+1]*0)
+            secc_img = (secc_img +1)*127.5
+            secc_img = F.interpolate(secc_img, size=(img_h, img_w))
+            secc_img = secc_img.permute(0, 2,3,1).int().cpu().numpy()[0]
+            out_img1 = np.concatenate([img, img2, secc_img], axis=1).astype(np.uint8)
+            font = cv2.FONT_HERSHEY_SIMPLEX
+            out_img2 = np.concatenate([xy_cam3d_img, xz_cam3d_img, cano_lm3d_img], axis=1).astype(np.uint8)
+            out_img = np.concatenate([out_img1, out_img2], axis=0)
+            writer.append_data(out_img)
+        writer.close()
+        
+    # if bad_yaw:
+    #     print(f"Skip {video_name} due to TOO LARGE YAW")
+    #     return False
+
+    if save:
+        np.save(out_name, coeff_dict, allow_pickle=True) 
+    return coeff_dict
+
+def out_exist_job(vid_name):
+    out_name = vid_name.replace("/video/", "/coeff_fit_mp/").replace(".mp4","_coeff_fit_mp.npy") 
+    lms_name = vid_name.replace("/video/", "/lms_2d/").replace(".mp4","_lms.npy") 
+    if os.path.exists(out_name) or not os.path.exists(lms_name):
+        return None
+    else:
+        return vid_name
+
+def get_todo_vid_names(vid_names):
+    if len(vid_names) == 1: # single video, nerf
+        return vid_names
+    todo_vid_names = []
+    for i, res in multiprocess_run_tqdm(out_exist_job, vid_names, num_workers=16):
+        if res is not None:
+            todo_vid_names.append(res)
+    return todo_vid_names
+
+
+if __name__ == '__main__':
+    import argparse, glob, tqdm
+    parser = argparse.ArgumentParser()
+    # parser.add_argument("--vid_dir", default='/home/tiger/datasets/raw/CelebV-HQ/video')
+    parser.add_argument("--vid_dir", default='data/raw/videos/May_10s.mp4')
+    parser.add_argument("--ds_name", default='nerf') # 'nerf' | 'CelebV-HQ' | 'TH1KH_512' | etc
+    parser.add_argument("--seed", default=0, type=int)
+    parser.add_argument("--process_id", default=0, type=int)
+    parser.add_argument("--total_process", default=1, type=int)
+    parser.add_argument("--id_mode", default='global', type=str) # global | finegrained
+    parser.add_argument("--keypoint_mode", default='mediapipe', type=str)
+    parser.add_argument("--large_yaw_threshold", default=9999999.9, type=float) # could be 0.7
+    parser.add_argument("--debug", action='store_true')
+    parser.add_argument("--reset", action='store_true')
+    parser.add_argument("--load_names", action="store_true")
+
+    args = parser.parse_args()
+    vid_dir = args.vid_dir
+    ds_name = args.ds_name
+    load_names = args.load_names
+    
+    print(f"args {args}")
+    
+    if ds_name.lower() == 'nerf': # 处理单个视频
+        vid_names = [vid_dir]
+        out_names = [video_name.replace("/raw/", "/processed/").replace(".mp4","_coeff_fit_mp.npy") for video_name in vid_names]
+    else: # 处理整个数据集
+        if ds_name in ['lrs3_trainval']:
+            vid_name_pattern = os.path.join(vid_dir, "*/*.mp4")
+        elif ds_name in ['TH1KH_512', 'CelebV-HQ']:
+            vid_name_pattern = os.path.join(vid_dir, "*.mp4")
+        elif ds_name in ['lrs2', 'lrs3', 'voxceleb2', 'CMLR']:
+            vid_name_pattern = os.path.join(vid_dir, "*/*/*.mp4")
+        elif ds_name in ["RAVDESS", 'VFHQ']:
+            vid_name_pattern = os.path.join(vid_dir, "*/*/*/*.mp4")
+        else:
+            raise NotImplementedError()
+        
+        vid_names_path = os.path.join(vid_dir, "vid_names.pkl")
+        if os.path.exists(vid_names_path) and load_names:
+            print(f"loading vid names from {vid_names_path}")
+            vid_names = load_file(vid_names_path)
+        else:
+            vid_names = multiprocess_glob(vid_name_pattern)
+        vid_names = sorted(vid_names)
+        print(f"saving vid names to {vid_names_path}")
+        save_file(vid_names_path, vid_names)
+        out_names = [video_name.replace("/video/", "/coeff_fit_mp/").replace(".mp4","_coeff_fit_mp.npy") for video_name in vid_names]
+
+    print(vid_names[:10])
+    random.seed(args.seed)
+    random.shuffle(vid_names)
+
+    face_model = ParametricFaceModel(bfm_folder='deep_3drecon/BFM', 
+                camera_distance=10, focal=1015, keypoint_mode=args.keypoint_mode)
+    face_model.to(torch.device("cuda:0"))
+    secc_renderer = SECC_Renderer(512)
+    secc_renderer.to("cuda:0")
+    
+    process_id = args.process_id
+    total_process = args.total_process
+    if total_process > 1:
+        assert process_id <= total_process -1
+        num_samples_per_process = len(vid_names) // total_process
+        if process_id == total_process:
+            vid_names = vid_names[process_id * num_samples_per_process : ]
+        else:
+            vid_names = vid_names[process_id * num_samples_per_process : (process_id+1) * num_samples_per_process]
+
+    if not args.reset:
+        vid_names = get_todo_vid_names(vid_names)
+
+    failed_img_names = []
+    for i in tqdm.trange(len(vid_names), desc=f"process {process_id}: fitting 3dmm ..."):
+        img_name = vid_names[i]
+        try:
+            is_person_specific_data = ds_name=='nerf'
+            success = fit_3dmm_for_a_video(img_name, is_person_specific_data, args.id_mode, args.debug, large_yaw_threshold=args.large_yaw_threshold)
+            if not success:
+                failed_img_names.append(img_name)   
+        except Exception as e:
+            print(img_name, e)
+            failed_img_names.append(img_name)
+        print(f"finished {i + 1} / {len(vid_names)} = {(i + 1) / len(vid_names):.4f}, failed {len(failed_img_names)} / {i + 1} = {len(failed_img_names) / (i + 1):.4f}")
+        sys.stdout.flush()
+    print(f"all failed image names: {failed_img_names}")
+    print(f"All finished!")
\ No newline at end of file
diff --git a/data_gen/utils/process_video/inpaint_torso_imgs.py b/data_gen/utils/process_video/inpaint_torso_imgs.py
new file mode 100644
index 0000000000000000000000000000000000000000..c938a6f79e7b796cc6f321e332eb7840244b4cf9
--- /dev/null
+++ b/data_gen/utils/process_video/inpaint_torso_imgs.py
@@ -0,0 +1,193 @@
+import cv2
+import os
+import numpy as np
+from utils.commons.multiprocess_utils import multiprocess_run_tqdm
+from scipy.ndimage import binary_erosion, binary_dilation
+
+from tasks.eg3ds.loss_utils.segment_loss.mp_segmenter import MediapipeSegmenter
+seg_model = MediapipeSegmenter()
+
+def inpaint_torso_job(video_name, idx=None, total=None):
+    raw_img_dir = video_name.replace(".mp4", "").replace("/video/","/gt_imgs/")
+    img_names = glob.glob(os.path.join(raw_img_dir, "*.jpg"))
+
+    for image_path in tqdm.tqdm(img_names):
+        # read ori image
+        ori_image = cv2.imread(image_path, cv2.IMREAD_UNCHANGED) # [H, W, 3]
+        segmap = seg_model._cal_seg_map(cv2.cvtColor(ori_image, cv2.COLOR_BGR2RGB))
+        head_part = (segmap[1] + segmap[3] + segmap[5]).astype(np.bool)
+        torso_part = (segmap[4]).astype(np.bool)
+        neck_part = (segmap[2]).astype(np.bool)
+        bg_part = segmap[0].astype(np.bool)
+        head_image = cv2.imread(image_path.replace("/gt_imgs/", "/head_imgs/"), cv2.IMREAD_UNCHANGED) # [H, W, 3]
+        torso_image = cv2.imread(image_path.replace("/gt_imgs/", "/torso_imgs/"), cv2.IMREAD_UNCHANGED) # [H, W, 3]
+        bg_image = cv2.imread(image_path.replace("/gt_imgs/", "/bg_imgs/"), cv2.IMREAD_UNCHANGED) # [H, W, 3]
+
+        # head_part = (head_image[...,0] != 0) & (head_image[...,1] != 0) & (head_image[...,2] != 0)
+        # torso_part = (torso_image[...,0] != 0) & (torso_image[...,1] != 0) & (torso_image[...,2] != 0)
+        # bg_part = (bg_image[...,0] != 0) & (bg_image[...,1] != 0) & (bg_image[...,2] != 0)
+
+        # get gt image
+        gt_image = ori_image.copy()
+        gt_image[bg_part] = bg_image[bg_part]
+        cv2.imwrite(image_path.replace('ori_imgs', 'gt_imgs'), gt_image)
+
+        # get torso image
+        torso_image = gt_image.copy() # rgb
+        torso_image[head_part] = 0
+        torso_alpha = 255 * np.ones((gt_image.shape[0], gt_image.shape[1], 1), dtype=np.uint8) # alpha
+        
+        # torso part "vertical" in-painting...
+        L = 8 + 1
+        torso_coords = np.stack(np.nonzero(torso_part), axis=-1) # [M, 2]
+        # lexsort: sort 2D coords first by y then by x, 
+        # ref: https://stackoverflow.com/questions/2706605/sorting-a-2d-numpy-array-by-multiple-axes
+        inds = np.lexsort((torso_coords[:, 0], torso_coords[:, 1]))
+        torso_coords = torso_coords[inds]
+        # choose the top pixel for each column
+        u, uid, ucnt = np.unique(torso_coords[:, 1], return_index=True, return_counts=True)
+        top_torso_coords = torso_coords[uid] # [m, 2]
+        # only keep top-is-head pixels
+        top_torso_coords_up = top_torso_coords.copy() - np.array([1, 0]) # [N, 2]
+        mask = head_part[tuple(top_torso_coords_up.T)] 
+        if mask.any():
+            top_torso_coords = top_torso_coords[mask]
+            # get the color
+            top_torso_colors = gt_image[tuple(top_torso_coords.T)] # [m, 3]
+            # construct inpaint coords (vertically up, or minus in x)
+            inpaint_torso_coords = top_torso_coords[None].repeat(L, 0) # [L, m, 2]
+            inpaint_offsets = np.stack([-np.arange(L), np.zeros(L, dtype=np.int32)], axis=-1)[:, None] # [L, 1, 2]
+            inpaint_torso_coords += inpaint_offsets
+            inpaint_torso_coords = inpaint_torso_coords.reshape(-1, 2) # [Lm, 2]
+            inpaint_torso_colors = top_torso_colors[None].repeat(L, 0) # [L, m, 3]
+            darken_scaler = 0.98 ** np.arange(L).reshape(L, 1, 1) # [L, 1, 1]
+            inpaint_torso_colors = (inpaint_torso_colors * darken_scaler).reshape(-1, 3) # [Lm, 3]
+            # set color
+            torso_image[tuple(inpaint_torso_coords.T)] = inpaint_torso_colors
+
+            inpaint_torso_mask = np.zeros_like(torso_image[..., 0]).astype(bool)
+            inpaint_torso_mask[tuple(inpaint_torso_coords.T)] = True
+        else:
+            inpaint_torso_mask = None
+            
+        # neck part "vertical" in-painting...
+        push_down = 4
+        L = 48 + push_down + 1
+
+        neck_part = binary_dilation(neck_part, structure=np.array([[0, 1, 0], [0, 1, 0], [0, 1, 0]], dtype=bool), iterations=3)
+
+        neck_coords = np.stack(np.nonzero(neck_part), axis=-1) # [M, 2]
+        # lexsort: sort 2D coords first by y then by x, 
+        # ref: https://stackoverflow.com/questions/2706605/sorting-a-2d-numpy-array-by-multiple-axes
+        inds = np.lexsort((neck_coords[:, 0], neck_coords[:, 1]))
+        neck_coords = neck_coords[inds]
+        # choose the top pixel for each column
+        u, uid, ucnt = np.unique(neck_coords[:, 1], return_index=True, return_counts=True)
+        top_neck_coords = neck_coords[uid] # [m, 2]
+        # only keep top-is-head pixels
+        top_neck_coords_up = top_neck_coords.copy() - np.array([1, 0])
+        mask = head_part[tuple(top_neck_coords_up.T)] 
+        
+        top_neck_coords = top_neck_coords[mask]
+        # push these top down for 4 pixels to make the neck inpainting more natural...
+        offset_down = np.minimum(ucnt[mask] - 1, push_down)
+        top_neck_coords += np.stack([offset_down, np.zeros_like(offset_down)], axis=-1)
+        # get the color
+        top_neck_colors = gt_image[tuple(top_neck_coords.T)] # [m, 3]
+        # construct inpaint coords (vertically up, or minus in x)
+        inpaint_neck_coords = top_neck_coords[None].repeat(L, 0) # [L, m, 2]
+        inpaint_offsets = np.stack([-np.arange(L), np.zeros(L, dtype=np.int32)], axis=-1)[:, None] # [L, 1, 2]
+        inpaint_neck_coords += inpaint_offsets
+        inpaint_neck_coords = inpaint_neck_coords.reshape(-1, 2) # [Lm, 2]
+        inpaint_neck_colors = top_neck_colors[None].repeat(L, 0) # [L, m, 3]
+        darken_scaler = 0.98 ** np.arange(L).reshape(L, 1, 1) # [L, 1, 1]
+        inpaint_neck_colors = (inpaint_neck_colors * darken_scaler).reshape(-1, 3) # [Lm, 3]
+        # set color
+        torso_image[tuple(inpaint_neck_coords.T)] = inpaint_neck_colors
+
+        # apply blurring to the inpaint area to avoid vertical-line artifects...
+        inpaint_mask = np.zeros_like(torso_image[..., 0]).astype(bool)
+        inpaint_mask[tuple(inpaint_neck_coords.T)] = True
+
+        blur_img = torso_image.copy()
+        blur_img = cv2.GaussianBlur(blur_img, (5, 5), cv2.BORDER_DEFAULT)
+
+        torso_image[inpaint_mask] = blur_img[inpaint_mask]
+
+        # set mask
+        mask = (neck_part | torso_part | inpaint_mask)
+        if inpaint_torso_mask is not None:
+            mask = mask | inpaint_torso_mask
+        torso_image[~mask] = 0
+        torso_alpha[~mask] = 0
+
+        cv2.imwrite("0.png", np.concatenate([torso_image, torso_alpha], axis=-1))
+
+    print(f'[INFO] ===== extracted torso and gt images =====')
+
+
+def out_exist_job(vid_name):
+    out_dir1 = vid_name.replace("/video/", "/inpaint_torso_imgs/").replace(".mp4","") 
+    out_dir2 = vid_name.replace("/video/", "/inpaint_torso_with_bg_imgs/").replace(".mp4","") 
+    out_dir3 = vid_name.replace("/video/", "/torso_imgs/").replace(".mp4","") 
+    out_dir4 = vid_name.replace("/video/", "/torso_with_bg_imgs/").replace(".mp4","") 
+    
+    if os.path.exists(out_dir1) and os.path.exists(out_dir1) and os.path.exists(out_dir2) and os.path.exists(out_dir3) and os.path.exists(out_dir4):
+        num_frames = len(os.listdir(out_dir1))
+        if len(os.listdir(out_dir1)) == num_frames and len(os.listdir(out_dir2)) == num_frames and len(os.listdir(out_dir3)) == num_frames and len(os.listdir(out_dir4)) == num_frames:
+            return None
+        else:
+            return vid_name
+    else:
+        return vid_name
+    
+def get_todo_vid_names(vid_names):
+    todo_vid_names = []
+    for i, res in multiprocess_run_tqdm(out_exist_job, vid_names, num_workers=16):
+        if res is not None:
+            todo_vid_names.append(res)
+    return todo_vid_names
+
+if __name__ == '__main__':
+    import argparse, glob, tqdm, random
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--vid_dir", default='/home/tiger/datasets/raw/CelebV-HQ/video')
+    parser.add_argument("--ds_name", default='CelebV-HQ')
+    parser.add_argument("--num_workers", default=48, type=int)
+    parser.add_argument("--seed", default=0, type=int)
+    parser.add_argument("--process_id", default=0, type=int)
+    parser.add_argument("--total_process", default=1, type=int)
+    parser.add_argument("--reset", action='store_true')
+    
+    inpaint_torso_job('/home/tiger/datasets/raw/CelebV-HQ/video/dgdEr-mXQT4_8.mp4')
+    # args = parser.parse_args()
+    # vid_dir = args.vid_dir
+    # ds_name = args.ds_name
+    # if ds_name in ['lrs3_trainval']:
+    #     mp4_name_pattern = os.path.join(vid_dir, "*/*.mp4")
+    # if ds_name in ['TH1KH_512', 'CelebV-HQ']:
+    #     vid_names = glob.glob(os.path.join(vid_dir, "*.mp4"))
+    # elif ds_name in ['lrs2', 'lrs3', 'voxceleb2']:
+    #     vid_name_pattern = os.path.join(vid_dir, "*/*/*.mp4")
+    #     vid_names = glob.glob(vid_name_pattern)
+    # vid_names = sorted(vid_names)
+    # random.seed(args.seed)
+    # random.shuffle(vid_names)
+
+    # process_id = args.process_id
+    # total_process = args.total_process
+    # if total_process > 1:
+    #     assert process_id <= total_process -1
+    #     num_samples_per_process = len(vid_names) // total_process
+    #     if process_id == total_process:
+    #         vid_names = vid_names[process_id * num_samples_per_process : ]
+    #     else:
+    #         vid_names = vid_names[process_id * num_samples_per_process : (process_id+1) * num_samples_per_process]
+    
+    # if not args.reset:
+    #     vid_names = get_todo_vid_names(vid_names)
+    # print(f"todo videos number: {len(vid_names)}")
+
+    # fn_args = [(vid_name,i,len(vid_names)) for i, vid_name in enumerate(vid_names)]
+    # for vid_name in multiprocess_run_tqdm(inpaint_torso_job ,fn_args, desc=f"Root process {args.process_id}: extracting segment images", num_workers=args.num_workers):
+    #     pass
\ No newline at end of file
diff --git a/data_gen/utils/process_video/resample_video_to_25fps_resize_to_512.py b/data_gen/utils/process_video/resample_video_to_25fps_resize_to_512.py
new file mode 100644
index 0000000000000000000000000000000000000000..f01c1681a8e39046645cfdb3e5d79b4b82cf9b46
--- /dev/null
+++ b/data_gen/utils/process_video/resample_video_to_25fps_resize_to_512.py
@@ -0,0 +1,87 @@
+import os, glob
+import cv2
+from utils.commons.os_utils import multiprocess_glob
+from utils.commons.multiprocess_utils import multiprocess_run_tqdm
+
+def get_video_infos(video_path):
+    vid_cap = cv2.VideoCapture(video_path)
+    height = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+    width = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+    fps = vid_cap.get(cv2.CAP_PROP_FPS)
+    total_frames = int(vid_cap.get(cv2.CAP_PROP_FRAME_COUNT))
+    return {'height': height, 'width': width, 'fps': fps, 'total_frames':total_frames}
+
+def extract_img_job(video_name:str):
+    out_path = video_name.replace("/video_raw/","/video/",1)
+    os.makedirs(os.path.dirname(out_path), exist_ok=True)
+    ffmpeg_path = "/usr/bin/ffmpeg"
+    vid_info = get_video_infos(video_name)
+    assert vid_info['width'] == vid_info['height']
+    cmd = f'{ffmpeg_path} -i {video_name} -vf fps={25},scale=w=512:h=512 -q:v 1 -c:v libx264 -pix_fmt yuv420p -b:v 2000k -v quiet -y {out_path}'
+    os.system(cmd)
+
+def extract_img_job_crop(video_name:str):
+    out_path = video_name.replace("/video_raw/","/video/",1)
+    os.makedirs(os.path.dirname(out_path), exist_ok=True)
+    ffmpeg_path = "/usr/bin/ffmpeg"
+    vid_info = get_video_infos(video_name)
+    wh = min(vid_info['width'], vid_info['height'])
+    cmd = f'{ffmpeg_path} -i {video_name} -vf fps={25},crop={wh}:{wh},scale=w=512:h=512 -q:v 1 -c:v libx264 -pix_fmt yuv420p -b:v 2000k -v quiet -y {out_path}'
+    os.system(cmd)
+
+def extract_img_job_crop_ravdess(video_name:str):
+    out_path = video_name.replace("/video_raw/","/video/",1)
+    os.makedirs(os.path.dirname(out_path), exist_ok=True)
+    ffmpeg_path = "/usr/bin/ffmpeg"
+    cmd = f'{ffmpeg_path} -i {video_name} -vf fps={25},crop=720:720,scale=w=512:h=512 -q:v 1 -c:v libx264 -pix_fmt yuv420p -b:v 2000k -v quiet -y {out_path}'
+    os.system(cmd)
+
+if __name__ == '__main__':
+    import argparse, glob, tqdm, random
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--vid_dir", default='/home/tiger/datasets/raw/CelebV-HQ/video_raw/')
+    parser.add_argument("--ds_name", default='CelebV-HQ')
+    parser.add_argument("--num_workers", default=32, type=int)
+    parser.add_argument("--process_id", default=0, type=int)
+    parser.add_argument("--total_process", default=1, type=int)
+    args = parser.parse_args()
+    print(f"args {args}")
+
+    vid_dir = args.vid_dir
+    ds_name = args.ds_name
+    if ds_name in ['lrs3_trainval']:
+        mp4_name_pattern = os.path.join(vid_dir, "*/*.mp4")
+    elif ds_name in ['TH1KH_512', 'CelebV-HQ']:
+        vid_names = multiprocess_glob(os.path.join(vid_dir, "*.mp4"))
+    elif ds_name in ['lrs2', 'lrs3', 'voxceleb2', 'CMLR']:
+        vid_name_pattern = os.path.join(vid_dir, "*/*/*.mp4")
+        vid_names = multiprocess_glob(vid_name_pattern)
+    elif ds_name in ["RAVDESS", 'VFHQ']:
+        vid_name_pattern = os.path.join(vid_dir, "*/*/*/*.mp4")
+        vid_names = multiprocess_glob(vid_name_pattern)
+    else:
+        raise NotImplementedError()
+    vid_names = sorted(vid_names)
+    print(f"total video number : {len(vid_names)}")
+    print(f"first {vid_names[0]} last {vid_names[-1]}")
+    # exit()
+    process_id = args.process_id
+    total_process = args.total_process
+    if total_process > 1:
+        assert process_id <= total_process -1
+        num_samples_per_process = len(vid_names) // total_process
+        if process_id == total_process:
+            vid_names = vid_names[process_id * num_samples_per_process : ]
+        else:
+            vid_names = vid_names[process_id * num_samples_per_process : (process_id+1) * num_samples_per_process]
+    
+    if ds_name == "RAVDESS":
+        for i, res in multiprocess_run_tqdm(extract_img_job_crop_ravdess, vid_names, num_workers=args.num_workers, desc="resampling videos"):
+            pass
+    elif ds_name == "CMLR":
+        for i, res in multiprocess_run_tqdm(extract_img_job_crop, vid_names, num_workers=args.num_workers, desc="resampling videos"):
+            pass
+    else:
+        for i, res in multiprocess_run_tqdm(extract_img_job, vid_names, num_workers=args.num_workers, desc="resampling videos"):
+            pass
+
diff --git a/data_gen/utils/process_video/split_video_to_imgs.py b/data_gen/utils/process_video/split_video_to_imgs.py
new file mode 100644
index 0000000000000000000000000000000000000000..a1c16c3415fb953c965cf56b3161a59460375079
--- /dev/null
+++ b/data_gen/utils/process_video/split_video_to_imgs.py
@@ -0,0 +1,53 @@
+import os, glob
+from utils.commons.multiprocess_utils import multiprocess_run_tqdm
+
+from data_gen.utils.path_converter import PathConverter, pc
+
+# mp4_names = glob.glob("/home/tiger/datasets/raw/CelebV-HQ/video/*.mp4")
+
+def extract_img_job(video_name, raw_img_dir=None):
+    if raw_img_dir is not None:
+        out_path = raw_img_dir
+    else:
+        out_path = pc.to(video_name.replace(".mp4", ""), "vid", "gt")
+    os.makedirs(out_path, exist_ok=True)
+    ffmpeg_path = "/usr/bin/ffmpeg"
+    cmd = f'{ffmpeg_path} -i {video_name} -vf fps={25},scale=w=512:h=512 -qmin 1 -q:v 1 -start_number 0 -v quiet {os.path.join(out_path, "%8d.jpg")}'
+    os.system(cmd)
+
+if __name__ == '__main__':
+    import argparse, glob, tqdm, random
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--vid_dir", default='/home/tiger/datasets/raw/CelebV-HQ/video')
+    parser.add_argument("--ds_name", default='CelebV-HQ')
+    parser.add_argument("--num_workers", default=64, type=int)
+    parser.add_argument("--process_id", default=0, type=int)
+    parser.add_argument("--total_process", default=1, type=int)
+    args = parser.parse_args()
+    vid_dir = args.vid_dir
+    ds_name = args.ds_name
+    if ds_name in ['lrs3_trainval']:
+        mp4_name_pattern = os.path.join(vid_dir, "*/*.mp4")
+    elif ds_name in ['TH1KH_512', 'CelebV-HQ']:
+        vid_names = glob.glob(os.path.join(vid_dir, "*.mp4"))
+    elif ds_name in ['lrs2', 'lrs3', 'voxceleb2']:
+        vid_name_pattern = os.path.join(vid_dir, "*/*/*.mp4")
+        vid_names = glob.glob(vid_name_pattern)
+    elif ds_name in ["RAVDESS", 'VFHQ']:
+        vid_name_pattern = os.path.join(vid_dir, "*/*/*/*.mp4")
+        vid_names = glob.glob(vid_name_pattern)
+    vid_names = sorted(vid_names)
+    
+    process_id = args.process_id
+    total_process = args.total_process
+    if total_process > 1:
+        assert process_id <= total_process -1
+        num_samples_per_process = len(vid_names) // total_process
+        if process_id == total_process:
+            vid_names = vid_names[process_id * num_samples_per_process : ]
+        else:
+            vid_names = vid_names[process_id * num_samples_per_process : (process_id+1) * num_samples_per_process]
+    
+    for i, res in multiprocess_run_tqdm(extract_img_job, vid_names, num_workers=args.num_workers, desc="extracting images"):
+        pass
+
diff --git a/data_util/face3d_helper.py b/data_util/face3d_helper.py
new file mode 100644
index 0000000000000000000000000000000000000000..c4d260b1a9c320639ad035bb1d804ed21b076092
--- /dev/null
+++ b/data_util/face3d_helper.py
@@ -0,0 +1,309 @@
+import os
+import numpy as np
+import torch
+import torch.nn as nn
+from scipy.io import loadmat
+
+from deep_3drecon.deep_3drecon_models.bfm import perspective_projection
+
+
+class Face3DHelper(nn.Module):
+    def __init__(self, bfm_dir='deep_3drecon/BFM', keypoint_mode='lm68', use_gpu=True):
+        super().__init__()
+        self.keypoint_mode = keypoint_mode # lm68 | mediapipe
+        self.bfm_dir = bfm_dir
+        self.load_3dmm()
+        if use_gpu: self.to("cuda")
+            
+    def load_3dmm(self):
+        model = loadmat(os.path.join(self.bfm_dir, "BFM_model_front.mat"))
+        self.register_buffer('mean_shape',torch.from_numpy(model['meanshape'].transpose()).float()) # mean face shape. [3*N, 1], N=35709, xyz=3, ==> 3*N=107127
+        mean_shape = self.mean_shape.reshape([-1, 3])
+        # re-center
+        mean_shape = mean_shape - torch.mean(mean_shape, dim=0, keepdims=True)
+        self.mean_shape = mean_shape.reshape([-1, 1])
+        self.register_buffer('id_base',torch.from_numpy(model['idBase']).float()) # identity basis. [3*N,80], we have 80 eigen faces for identity
+        self.register_buffer('exp_base',torch.from_numpy(model['exBase']).float()) # expression basis. [3*N,64], we have 64 eigen faces for expression
+        
+        self.register_buffer('mean_texure',torch.from_numpy(model['meantex'].transpose()).float()) # mean face texture. [3*N,1] (0-255)
+        self.register_buffer('tex_base',torch.from_numpy(model['texBase']).float()) # texture basis. [3*N,80], rgb=3
+        
+        self.register_buffer('point_buf',torch.from_numpy(model['point_buf']).float()) # triangle indices for each vertex that lies in. starts from 1. [N,8] (1-F)
+        self.register_buffer('face_buf',torch.from_numpy(model['tri']).float()) # vertex indices in each triangle. starts from 1. [F,3] (1-N)
+        if self.keypoint_mode == 'mediapipe':
+            self.register_buffer('key_points', torch.from_numpy(np.load("deep_3drecon/BFM/index_mp468_from_mesh35709.npy").astype(np.int64)))
+            unmatch_mask = self.key_points < 0
+            self.key_points[unmatch_mask] = 0
+        else:
+            self.register_buffer('key_points',torch.from_numpy(model['keypoints'].squeeze().astype(np.int_)).long()) # vertex indices of 68 facial landmarks. starts from 1. [68,1]
+        
+
+        self.register_buffer('key_mean_shape',self.mean_shape.reshape([-1,3])[self.key_points,:])
+        self.register_buffer('key_id_base', self.id_base.reshape([-1,3,80])[self.key_points, :, :].reshape([-1,80])) 
+        self.register_buffer('key_exp_base', self.exp_base.reshape([-1,3,64])[self.key_points, :, :].reshape([-1,64])) 
+        self.key_id_base_np = self.key_id_base.cpu().numpy()
+        self.key_exp_base_np = self.key_exp_base.cpu().numpy()
+
+        self.register_buffer('persc_proj', torch.tensor(perspective_projection(focal=1015, center=112))) 
+    def split_coeff(self, coeff):
+        """
+        coeff: Tensor[B, T, c=257] or [T, c=257]
+        """
+        ret_dict = {
+            'identity': coeff[..., :80],  # identity, [b, t, c=80] 
+            'expression': coeff[..., 80:144],  # expression, [b, t, c=80]
+            'texture': coeff[..., 144:224],  # texture, [b, t, c=80]
+            'euler': coeff[..., 224:227],  # euler euler for pose, [b, t, c=3]
+            'translation':  coeff[..., 254:257], # translation, [b, t, c=3]
+            'gamma': coeff[..., 227:254] # lighting, [b, t, c=27]
+        }
+        return ret_dict
+    
+    def reconstruct_face_mesh(self, id_coeff, exp_coeff):
+        """
+        Generate a pose-independent 3D face mesh!
+        id_coeff: Tensor[T, c=80]
+        exp_coeff: Tensor[T, c=64]
+        """
+        id_coeff = id_coeff.to(self.key_id_base.device)
+        exp_coeff = exp_coeff.to(self.key_id_base.device)
+        mean_face = self.mean_shape.squeeze().reshape([1, -1]) # [3N, 1] ==> [1, 3N]
+        id_base, exp_base = self.id_base, self.exp_base # [3*N, C]
+        identity_diff_face = torch.matmul(id_coeff, id_base.transpose(0,1)) # [t,c],[c,3N] ==> [t,3N]
+        expression_diff_face = torch.matmul(exp_coeff, exp_base.transpose(0,1)) # [t,c],[c,3N] ==> [t,3N]
+        
+        face = mean_face + identity_diff_face + expression_diff_face # [t,3N]
+        face = face.reshape([face.shape[0], -1, 3]) # [t,N,3]
+        # re-centering the face with mean_xyz, so the face will be in [-1, 1]
+        # mean_xyz = self.mean_shape.squeeze().reshape([-1,3]).mean(dim=0) # [1, 3]
+        # face_mesh = face - mean_xyz.unsqueeze(0) # [t,N,3]
+        return face
+
+    def reconstruct_cano_lm3d(self, id_coeff, exp_coeff):
+        """
+        Generate 3D landmark with keypoint base!
+        id_coeff: Tensor[T, c=80]
+        exp_coeff: Tensor[T, c=64]
+        """
+        id_coeff = id_coeff.to(self.key_id_base.device)
+        exp_coeff = exp_coeff.to(self.key_id_base.device)
+        mean_face = self.key_mean_shape.squeeze().reshape([1, -1]) # [3*68, 1] ==> [1, 3*68]
+        id_base, exp_base = self.key_id_base, self.key_exp_base # [3*68, C]
+        identity_diff_face = torch.matmul(id_coeff, id_base.transpose(0,1)) # [t,c],[c,3*68] ==> [t,3*68]
+        expression_diff_face = torch.matmul(exp_coeff, exp_base.transpose(0,1)) # [t,c],[c,3*68] ==> [t,3*68]
+        
+        face = mean_face + identity_diff_face + expression_diff_face # [t,3N]
+        face = face.reshape([face.shape[0], -1, 3]) # [t,N,3]
+        # re-centering the face with mean_xyz, so the face will be in [-1, 1]
+        # mean_xyz = self.key_mean_shape.squeeze().reshape([-1,3]).mean(dim=0) # [1, 3]
+        # lm3d = face - mean_xyz.unsqueeze(0) # [t,N,3]
+        return face
+
+    def reconstruct_lm3d(self, id_coeff, exp_coeff, euler, trans, to_camera=True):
+        """
+        Generate 3D landmark with keypoint base!
+        id_coeff: Tensor[T, c=80]
+        exp_coeff: Tensor[T, c=64]
+        """
+        id_coeff = id_coeff.to(self.key_id_base.device)
+        exp_coeff = exp_coeff.to(self.key_id_base.device)
+        mean_face = self.key_mean_shape.squeeze().reshape([1, -1]) # [3*68, 1] ==> [1, 3*68]
+        id_base, exp_base = self.key_id_base, self.key_exp_base # [3*68, C]
+        identity_diff_face = torch.matmul(id_coeff, id_base.transpose(0,1)) # [t,c],[c,3*68] ==> [t,3*68]
+        expression_diff_face = torch.matmul(exp_coeff, exp_base.transpose(0,1)) # [t,c],[c,3*68] ==> [t,3*68]
+        
+        face = mean_face + identity_diff_face + expression_diff_face # [t,3N]
+        face = face.reshape([face.shape[0], -1, 3]) # [t,N,3]
+        # re-centering the face with mean_xyz, so the face will be in [-1, 1]
+        rot = self.compute_rotation(euler)
+        # transform
+        lm3d = face @ rot + trans.unsqueeze(1) # [t, N, 3]
+        # to camera
+        if to_camera:
+            lm3d[...,-1] = 10 - lm3d[...,-1] 
+        return lm3d
+
+    def reconstruct_lm2d_nerf(self, id_coeff, exp_coeff, euler, trans):
+        lm2d = self.reconstruct_lm2d(id_coeff, exp_coeff, euler, trans, to_camera=False)
+        lm2d[..., 0] = 1 - lm2d[..., 0]
+        lm2d[..., 1] = 1 - lm2d[..., 1]
+        return lm2d
+
+    def reconstruct_lm2d(self, id_coeff, exp_coeff, euler, trans, to_camera=True):
+        """
+        Generate 3D landmark with keypoint base!
+        id_coeff: Tensor[T, c=80]
+        exp_coeff: Tensor[T, c=64]
+        """
+        is_btc_flag = True if id_coeff.ndim == 3 else False
+        if is_btc_flag:
+            b,t,_ = id_coeff.shape
+            id_coeff = id_coeff.reshape([b*t,-1])
+            exp_coeff = exp_coeff.reshape([b*t,-1])
+            euler = euler.reshape([b*t,-1])
+            trans = trans.reshape([b*t,-1])
+        id_coeff = id_coeff.to(self.key_id_base.device)
+        exp_coeff = exp_coeff.to(self.key_id_base.device)
+        mean_face = self.key_mean_shape.squeeze().reshape([1, -1]) # [3*68, 1] ==> [1, 3*68]
+        id_base, exp_base = self.key_id_base, self.key_exp_base # [3*68, C]
+        identity_diff_face = torch.matmul(id_coeff, id_base.transpose(0,1)) # [t,c],[c,3*68] ==> [t,3*68]
+        expression_diff_face = torch.matmul(exp_coeff, exp_base.transpose(0,1)) # [t,c],[c,3*68] ==> [t,3*68]
+        
+        face = mean_face + identity_diff_face + expression_diff_face # [t,3N]
+        face = face.reshape([face.shape[0], -1, 3]) # [t,N,3]
+        # re-centering the face with mean_xyz, so the face will be in [-1, 1]
+        rot = self.compute_rotation(euler)
+        # transform
+        lm3d = face @ rot + trans.unsqueeze(1) # [t, N, 3]
+        # to camera
+        if to_camera:
+            lm3d[...,-1] = 10 - lm3d[...,-1] 
+        # to image_plane
+        lm3d = lm3d @ self.persc_proj
+        lm2d = lm3d[..., :2] / lm3d[..., 2:]
+        # flip
+        lm2d[..., 1] = 224 - lm2d[..., 1]
+        lm2d /= 224
+        if is_btc_flag:
+            return lm2d.reshape([b,t,-1,2])
+        return lm2d
+    
+    def compute_rotation(self, euler):
+        """
+        Return:
+            rot              -- torch.tensor, size (B, 3, 3) pts @ trans_mat
+
+        Parameters:
+            euler           -- torch.tensor, size (B, 3), radian
+        """
+
+        batch_size = euler.shape[0]
+        euler = euler.to(self.key_id_base.device)
+        ones = torch.ones([batch_size, 1]).to(self.key_id_base.device)
+        zeros = torch.zeros([batch_size, 1]).to(self.key_id_base.device)
+        x, y, z = euler[:, :1], euler[:, 1:2], euler[:, 2:],
+        
+        rot_x = torch.cat([
+            ones, zeros, zeros,
+            zeros, torch.cos(x), -torch.sin(x), 
+            zeros, torch.sin(x), torch.cos(x)
+        ], dim=1).reshape([batch_size, 3, 3])
+        
+        rot_y = torch.cat([
+            torch.cos(y), zeros, torch.sin(y),
+            zeros, ones, zeros,
+            -torch.sin(y), zeros, torch.cos(y)
+        ], dim=1).reshape([batch_size, 3, 3])
+
+        rot_z = torch.cat([
+            torch.cos(z), -torch.sin(z), zeros,
+            torch.sin(z), torch.cos(z), zeros,
+            zeros, zeros, ones
+        ], dim=1).reshape([batch_size, 3, 3])
+
+        rot = rot_z @ rot_y @ rot_x
+        return rot.permute(0, 2, 1)
+    
+    def reconstruct_idexp_lm3d(self, id_coeff, exp_coeff):
+        """
+        Generate 3D landmark with keypoint base!
+        id_coeff: Tensor[T, c=80]
+        exp_coeff: Tensor[T, c=64]
+        """
+        id_coeff = id_coeff.to(self.key_id_base.device)
+        exp_coeff = exp_coeff.to(self.key_id_base.device)
+        id_base, exp_base = self.key_id_base, self.key_exp_base # [3*68, C]
+        identity_diff_face = torch.matmul(id_coeff, id_base.transpose(0,1)) # [t,c],[c,3*68] ==> [t,3*68]
+        expression_diff_face = torch.matmul(exp_coeff, exp_base.transpose(0,1)) # [t,c],[c,3*68] ==> [t,3*68]
+        
+        face = identity_diff_face + expression_diff_face # [t,3N]
+        face = face.reshape([face.shape[0], -1, 3]) # [t,N,3]
+        lm3d = face * 10
+        return lm3d
+    
+    def reconstruct_idexp_lm3d_np(self, id_coeff, exp_coeff):
+        """
+        Generate 3D landmark with keypoint base!
+        id_coeff: Tensor[T, c=80]
+        exp_coeff: Tensor[T, c=64]
+        """
+        id_base, exp_base = self.key_id_base_np, self.key_exp_base_np # [3*68, C]
+        identity_diff_face = np.dot(id_coeff, id_base.T) # [t,c],[c,3*68] ==> [t,3*68]
+        expression_diff_face = np.dot(exp_coeff, exp_base.T) # [t,c],[c,3*68] ==> [t,3*68]
+        
+        face = identity_diff_face + expression_diff_face # [t,3N]
+        face = face.reshape([face.shape[0], -1, 3]) # [t,N,3]
+        lm3d = face * 10
+        return lm3d
+    
+    def get_eye_mouth_lm_from_lm3d(self, lm3d):
+        eye_lm = lm3d[:, 17:48] # [T, 31, 3]
+        mouth_lm = lm3d[:, 48:68] # [T, 20, 3]
+        return eye_lm, mouth_lm
+    
+    def get_eye_mouth_lm_from_lm3d_batch(self, lm3d):
+        eye_lm = lm3d[:, :, 17:48] # [T, 31, 3]
+        mouth_lm = lm3d[:, :, 48:68] # [T, 20, 3]
+        return eye_lm, mouth_lm
+    
+    def close_mouth_for_idexp_lm3d(self, idexp_lm3d, freeze_as_first_frame=True):
+        idexp_lm3d = idexp_lm3d.reshape([-1, 68,3])
+        num_frames = idexp_lm3d.shape[0]
+        eps = 0.0
+        # [n_landmarks=68,xyz=3], x 代表左右，y代表上下，z代表深度
+        idexp_lm3d[:,49:54, 1] = (idexp_lm3d[:,49:54, 1] + idexp_lm3d[:,range(59,54,-1), 1])/2 + eps * 2
+        idexp_lm3d[:,range(59,54,-1), 1] = (idexp_lm3d[:,49:54, 1] + idexp_lm3d[:,range(59,54,-1), 1])/2 - eps * 2
+
+        idexp_lm3d[:,61:64, 1] = (idexp_lm3d[:,61:64, 1] + idexp_lm3d[:,range(67,64,-1), 1])/2 + eps
+        idexp_lm3d[:,range(67,64,-1), 1] = (idexp_lm3d[:,61:64, 1] + idexp_lm3d[:,range(67,64,-1), 1])/2 - eps
+
+        idexp_lm3d[:,49:54, 1] += (0.03 - idexp_lm3d[:,49:54, 1].mean(dim=1) + idexp_lm3d[:,61:64, 1].mean(dim=1)).unsqueeze(1).repeat([1,5])
+        idexp_lm3d[:,range(59,54,-1), 1] += (-0.03 - idexp_lm3d[:,range(59,54,-1), 1].mean(dim=1) + idexp_lm3d[:,range(67,64,-1), 1].mean(dim=1)).unsqueeze(1).repeat([1,5])
+
+        if freeze_as_first_frame:
+            idexp_lm3d[:, 48:68,] = idexp_lm3d[0, 48:68].unsqueeze(0).clone().repeat([num_frames, 1,1])*0
+        return idexp_lm3d.cpu()
+
+    def close_eyes_for_idexp_lm3d(self, idexp_lm3d):
+        idexp_lm3d = idexp_lm3d.reshape([-1, 68,3])
+        eps = 0.003
+        idexp_lm3d[:,37:39, 1] = (idexp_lm3d[:,37:39, 1] + idexp_lm3d[:,range(41,39,-1), 1])/2 + eps
+        idexp_lm3d[:,range(41,39,-1), 1] = (idexp_lm3d[:,37:39, 1] + idexp_lm3d[:,range(41,39,-1), 1])/2 - eps
+
+        idexp_lm3d[:,43:45, 1] = (idexp_lm3d[:,43:45, 1] + idexp_lm3d[:,range(47,45,-1), 1])/2 + eps
+        idexp_lm3d[:,range(47,45,-1), 1] = (idexp_lm3d[:,43:45, 1] + idexp_lm3d[:,range(47,45,-1), 1])/2 - eps
+        
+        return idexp_lm3d
+
+if __name__ == '__main__':
+    import cv2
+    
+    font = cv2.FONT_HERSHEY_SIMPLEX
+
+    face_mesh_helper = Face3DHelper('deep_3drecon/BFM')
+    coeff_npy = 'data/coeff_fit_mp/crop_nana_003_coeff_fit_mp.npy'
+    coeff_dict = np.load(coeff_npy, allow_pickle=True).tolist()
+    lm3d = face_mesh_helper.reconstruct_lm2d(torch.tensor(coeff_dict['id']).cuda(), torch.tensor(coeff_dict['exp']).cuda(), torch.tensor(coeff_dict['euler']).cuda(), torch.tensor(coeff_dict['trans']).cuda() )
+
+    WH = 512
+    lm3d = (lm3d * WH).cpu().int().numpy()
+    eye_idx = list(range(36,48))
+    mouth_idx = list(range(48,68))
+    import imageio
+    debug_name = 'debug_lm3d.mp4'
+    writer = imageio.get_writer(debug_name, fps=25)
+    for i_img in range(len(lm3d)):
+        lm2d = lm3d[i_img ,:, :2] # [68, 2]
+        img = np.ones([WH, WH, 3], dtype=np.uint8) * 255
+        for i in range(len(lm2d)):
+            x, y = lm2d[i]
+            if i in eye_idx:
+                color = (0,0,255)
+            elif i in mouth_idx:
+                color = (0,255,0)
+            else:
+                color = (255,0,0)
+            img = cv2.circle(img, center=(x,y), radius=3, color=color, thickness=-1)
+            img = cv2.putText(img, f"{i}", org=(x,y), fontFace=font, fontScale=0.3, color=(255,0,0))
+        writer.append_data(img)
+    writer.close()
diff --git a/deep_3drecon/BFM/.gitkeep b/deep_3drecon/BFM/.gitkeep
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/deep_3drecon/bfm_left_eye_faces.npy b/deep_3drecon/bfm_left_eye_faces.npy
new file mode 100644
index 0000000000000000000000000000000000000000..7044bb788d7f382888649a1b138912be259bbd78
--- /dev/null
+++ b/deep_3drecon/bfm_left_eye_faces.npy
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9651756ea2c0fac069a1edf858ed1f125eddc358fa74c529a370c1e7b5730d28
+size 4680
diff --git a/deep_3drecon/bfm_right_eye_faces.npy b/deep_3drecon/bfm_right_eye_faces.npy
new file mode 100644
index 0000000000000000000000000000000000000000..b995860e0c2021a548c413e5add0976f4dc34db7
--- /dev/null
+++ b/deep_3drecon/bfm_right_eye_faces.npy
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:28cb5bbacf578d30a3d5006ec28c617fe5a3ecaeeeb87d9433a884e0f0301a2e
+size 4648
diff --git a/deep_3drecon/deep_3drecon_models/bfm.py b/deep_3drecon/deep_3drecon_models/bfm.py
new file mode 100644
index 0000000000000000000000000000000000000000..ce2cb08ba673a7d7e7c5db11dc2b394aca879ebb
--- /dev/null
+++ b/deep_3drecon/deep_3drecon_models/bfm.py
@@ -0,0 +1,426 @@
+"""This script defines the parametric 3d face model for Deep3DFaceRecon_pytorch
+"""
+
+import numpy as np
+import torch
+import torch.nn.functional as F
+from scipy.io import loadmat
+import os
+# from utils.commons.tensor_utils import convert_like
+
+
+def perspective_projection(focal, center):
+    # return p.T (N, 3) @ (3, 3) 
+    return np.array([
+        focal, 0, center,
+        0, focal, center,
+        0, 0, 1
+    ]).reshape([3, 3]).astype(np.float32).transpose() # 注意这里的transpose！
+
+class SH:
+    def __init__(self):
+        self.a = [np.pi, 2 * np.pi / np.sqrt(3.), 2 * np.pi / np.sqrt(8.)]
+        self.c = [1/np.sqrt(4 * np.pi), np.sqrt(3.) / np.sqrt(4 * np.pi), 3 * np.sqrt(5.) / np.sqrt(12 * np.pi)]
+
+
+
+class ParametricFaceModel:
+    def __init__(self, 
+                bfm_folder='./BFM', 
+                recenter=True,
+                camera_distance=10.,
+                init_lit=np.array([
+                    0.8, 0, 0, 0, 0, 0, 0, 0, 0
+                    ]),
+                focal=1015.,
+                center=112.,
+                is_train=True,
+                default_name='BFM_model_front.mat',
+                keypoint_mode='mediapipe'):
+        
+        model = loadmat(os.path.join(bfm_folder, default_name))
+        # mean face shape. [3*N,1]
+        self.mean_shape = model['meanshape'].astype(np.float32)
+        # identity basis. [3*N,80]
+        self.id_base = model['idBase'].astype(np.float32)
+        # expression basis. [3*N,64]
+        self.exp_base = model['exBase'].astype(np.float32)
+        # mean face texture. [3*N,1] (0-255)
+        self.mean_tex = model['meantex'].astype(np.float32)
+        # texture basis. [3*N,80]
+        self.tex_base = model['texBase'].astype(np.float32)
+        # face indices for each vertex that lies in. starts from 0. [N,8]
+        self.point_buf = model['point_buf'].astype(np.int64) - 1
+        # vertex indices for each face. starts from 0. [F,3]
+        self.face_buf = model['tri'].astype(np.int64) - 1
+        # vertex indices for 68 landmarks. starts from 0. [68,1]
+        if keypoint_mode == 'mediapipe':
+            self.keypoints = np.load("deep_3drecon/BFM/index_mp468_from_mesh35709.npy").astype(np.int64)
+            unmatch_mask = self.keypoints < 0
+            self.keypoints[unmatch_mask] = 0
+        else:
+            self.keypoints = np.squeeze(model['keypoints']).astype(np.int64) - 1
+
+        if is_train:
+            # vertex indices for small face region to compute photometric error. starts from 0.
+            self.front_mask = np.squeeze(model['frontmask2_idx']).astype(np.int64) - 1
+            # vertex indices for each face from small face region. starts from 0. [f,3]
+            self.front_face_buf = model['tri_mask2'].astype(np.int64) - 1
+            # vertex indices for pre-defined skin region to compute reflectance loss
+            self.skin_mask = np.squeeze(model['skinmask'])
+        
+        if recenter:
+            mean_shape = self.mean_shape.reshape([-1, 3])
+            mean_shape = mean_shape - np.mean(mean_shape, axis=0, keepdims=True)
+            self.mean_shape = mean_shape.reshape([-1, 1])
+
+        self.key_mean_shape = self.mean_shape.reshape([-1, 3])[self.keypoints, :].reshape([-1, 3])
+        self.key_id_base = self.id_base.reshape([-1, 3,80])[self.keypoints, :].reshape([-1, 80])
+        self.key_exp_base = self.exp_base.reshape([-1, 3, 64])[self.keypoints, :].reshape([-1, 64])
+
+        self.focal = focal
+        self.center = center
+        self.persc_proj = perspective_projection(focal, center)
+        self.device = 'cpu'
+        self.camera_distance = camera_distance
+        self.SH = SH()
+        self.init_lit = init_lit.reshape([1, 1, -1]).astype(np.float32)
+
+        self.initialized = False
+
+    def to(self, device):
+        self.device = device
+        for key, value in self.__dict__.items():
+            if type(value).__module__ == np.__name__:
+                setattr(self, key, torch.tensor(value).to(device))
+        self.initialized = True
+        return self
+    
+    def compute_shape(self, id_coeff, exp_coeff):
+        """
+        Return:
+            face_shape       -- torch.tensor, size (B, N, 3)
+
+        Parameters:
+            id_coeff         -- torch.tensor, size (B, 80), identity coeffs
+            exp_coeff        -- torch.tensor, size (B, 64), expression coeffs
+        """
+        batch_size = id_coeff.shape[0]
+        id_part = torch.einsum('ij,aj->ai', self.id_base, id_coeff)
+        exp_part = torch.einsum('ij,aj->ai', self.exp_base, exp_coeff)
+        face_shape = id_part + exp_part + self.mean_shape.reshape([1, -1])
+        return face_shape.reshape([batch_size, -1, 3])
+    
+    def compute_key_shape(self, id_coeff, exp_coeff):
+        """
+        Return:
+            face_shape       -- torch.tensor, size (B, N, 3)
+
+        Parameters:
+            id_coeff         -- torch.tensor, size (B, 80), identity coeffs
+            exp_coeff        -- torch.tensor, size (B, 64), expression coeffs
+        """
+        batch_size = id_coeff.shape[0]
+        id_part = torch.einsum('ij,aj->ai', self.key_id_base, id_coeff)
+        exp_part = torch.einsum('ij,aj->ai', self.key_exp_base, exp_coeff)
+        face_shape = id_part + exp_part + self.key_mean_shape.reshape([1, -1])
+        return face_shape.reshape([batch_size, -1, 3])
+
+    def compute_texture(self, tex_coeff, normalize=True):
+        """
+        Return:
+            face_texture     -- torch.tensor, size (B, N, 3), in RGB order, range (0, 1.)
+
+        Parameters:
+            tex_coeff        -- torch.tensor, size (B, 80)
+        """
+        batch_size = tex_coeff.shape[0]
+        face_texture = torch.einsum('ij,aj->ai', self.tex_base, tex_coeff) + self.mean_tex
+        if normalize:
+            face_texture = face_texture / 255.
+        return face_texture.reshape([batch_size, -1, 3])
+
+
+    def compute_norm(self, face_shape):
+        """
+        Return:
+            vertex_norm      -- torch.tensor, size (B, N, 3)
+
+        Parameters:
+            face_shape       -- torch.tensor, size (B, N, 3)
+        """
+
+        v1 = face_shape[:, self.face_buf[:, 0]]
+        v2 = face_shape[:, self.face_buf[:, 1]]
+        v3 = face_shape[:, self.face_buf[:, 2]]
+        e1 = v1 - v2
+        e2 = v2 - v3
+        face_norm = torch.cross(e1, e2, dim=-1)
+        face_norm = F.normalize(face_norm, dim=-1, p=2)
+        face_norm = torch.cat([face_norm, torch.zeros(face_norm.shape[0], 1, 3).to(self.device)], dim=1)
+        
+        vertex_norm = torch.sum(face_norm[:, self.point_buf], dim=2)
+        vertex_norm = F.normalize(vertex_norm, dim=-1, p=2)
+        return vertex_norm
+
+
+    def compute_color(self, face_texture, face_norm, gamma):
+        """
+        Return:
+            face_color       -- torch.tensor, size (B, N, 3), range (0, 1.)
+
+        Parameters:
+            face_texture     -- torch.tensor, size (B, N, 3), from texture model, range (0, 1.)
+            face_norm        -- torch.tensor, size (B, N, 3), rotated face normal
+            gamma            -- torch.tensor, size (B, 27), SH coeffs
+        """
+        batch_size = gamma.shape[0]
+        v_num = face_texture.shape[1]
+        a, c = self.SH.a, self.SH.c
+        gamma = gamma.reshape([batch_size, 3, 9])
+        gamma = gamma + self.init_lit
+        gamma = gamma.permute(0, 2, 1)
+        Y = torch.cat([
+             a[0] * c[0] * torch.ones_like(face_norm[..., :1]).to(self.device),
+            -a[1] * c[1] * face_norm[..., 1:2],
+             a[1] * c[1] * face_norm[..., 2:],
+            -a[1] * c[1] * face_norm[..., :1],
+             a[2] * c[2] * face_norm[..., :1] * face_norm[..., 1:2],
+            -a[2] * c[2] * face_norm[..., 1:2] * face_norm[..., 2:],
+            0.5 * a[2] * c[2] / np.sqrt(3.) * (3 * face_norm[..., 2:] ** 2 - 1),
+            -a[2] * c[2] * face_norm[..., :1] * face_norm[..., 2:],
+            0.5 * a[2] * c[2] * (face_norm[..., :1] ** 2  - face_norm[..., 1:2] ** 2)
+        ], dim=-1)
+        r = Y @ gamma[..., :1]
+        g = Y @ gamma[..., 1:2]
+        b = Y @ gamma[..., 2:]
+        face_color = torch.cat([r, g, b], dim=-1) * face_texture
+        return face_color
+
+    @staticmethod
+    def compute_rotation(angles, device='cpu'):
+        """
+        Return:
+            rot              -- torch.tensor, size (B, 3, 3) pts @ trans_mat
+
+        Parameters:
+            angles           -- torch.tensor, size (B, 3), radian
+        """
+
+        batch_size = angles.shape[0]
+        angles = angles.to(device)
+        ones = torch.ones([batch_size, 1]).to(device)
+        zeros = torch.zeros([batch_size, 1]).to(device)
+        x, y, z = angles[:, :1], angles[:, 1:2], angles[:, 2:],
+        
+        rot_x = torch.cat([
+            ones, zeros, zeros,
+            zeros, torch.cos(x), -torch.sin(x), 
+            zeros, torch.sin(x), torch.cos(x)
+        ], dim=1).reshape([batch_size, 3, 3])
+        
+        rot_y = torch.cat([
+            torch.cos(y), zeros, torch.sin(y),
+            zeros, ones, zeros,
+            -torch.sin(y), zeros, torch.cos(y)
+        ], dim=1).reshape([batch_size, 3, 3])
+
+        rot_z = torch.cat([
+            torch.cos(z), -torch.sin(z), zeros,
+            torch.sin(z), torch.cos(z), zeros,
+            zeros, zeros, ones
+        ], dim=1).reshape([batch_size, 3, 3])
+
+        rot = rot_z @ rot_y @ rot_x
+        return rot.permute(0, 2, 1)
+
+
+    def to_camera(self, face_shape):
+        face_shape[..., -1] = self.camera_distance - face_shape[..., -1] # reverse the depth axis, add a fixed offset of length
+        return face_shape
+
+    def to_image(self, face_shape):
+        """
+        Return:
+            face_proj        -- torch.tensor, size (B, N, 2), y direction is opposite to v direction
+
+        Parameters:
+            face_shape       -- torch.tensor, size (B, N, 3)
+        """
+        # to image_plane
+        face_proj = face_shape @ self.persc_proj
+        face_proj = face_proj[..., :2] / face_proj[..., 2:]
+
+        return face_proj
+
+
+    def transform(self, face_shape, rot, trans):
+        """
+        Return:
+            face_shape       -- torch.tensor, size (B, N, 3) pts @ rot + trans
+
+        Parameters:
+            face_shape       -- torch.tensor, si≥ze (B, N, 3)
+            rot              -- torch.tensor, size (B, 3, 3)
+            trans            -- torch.tensor, size (B, 3)
+        """
+        return face_shape @ rot + trans.unsqueeze(1)
+
+
+    def get_landmarks(self, face_proj):
+        """
+        Return:
+            face_lms         -- torch.tensor, size (B, 68, 2)
+
+        Parameters:
+            face_proj       -- torch.tensor, size (B, N, 2)
+        """  
+        return face_proj[:, self.keypoints]
+
+    def split_coeff(self, coeffs):
+        """
+        Return:
+            coeffs_dict     -- a dict of torch.tensors
+
+        Parameters:
+            coeffs          -- torch.tensor, size (B, 256)
+        """
+        id_coeffs = coeffs[:, :80]
+        exp_coeffs = coeffs[:, 80: 144]
+        tex_coeffs = coeffs[:, 144: 224]
+        angles = coeffs[:, 224: 227]
+        gammas = coeffs[:, 227: 254]
+        translations = coeffs[:, 254:]
+        return {
+            'id': id_coeffs,
+            'exp': exp_coeffs,
+            'tex': tex_coeffs,
+            'angle': angles,
+            'gamma': gammas,
+            'trans': translations
+        }
+    def compute_for_render(self, coeffs):
+        """
+        Return:
+            face_vertex     -- torch.tensor, size (B, N, 3), in camera coordinate
+            face_color      -- torch.tensor, size (B, N, 3), in RGB order
+            landmark        -- torch.tensor, size (B, 68, 2), y direction is opposite to v direction
+        Parameters:
+            coeffs          -- torch.tensor, size (B, 257)
+        """
+        coef_dict = self.split_coeff(coeffs)
+        face_shape = self.compute_shape(coef_dict['id'], coef_dict['exp'])
+        rotation = self.compute_rotation(coef_dict['angle'], device=self.device)
+
+
+        face_shape_transformed = self.transform(face_shape, rotation, coef_dict['trans'])
+        face_vertex = self.to_camera(face_shape_transformed)
+        
+        face_proj = self.to_image(face_vertex)
+        landmark = self.get_landmarks(face_proj)
+
+        face_texture = self.compute_texture(coef_dict['tex'])
+        face_norm = self.compute_norm(face_shape)
+        face_norm_roted = face_norm @ rotation
+        face_color = self.compute_color(face_texture, face_norm_roted, coef_dict['gamma'])
+
+        return face_vertex, face_texture, face_color, landmark
+
+    def compute_face_vertex(self, id, exp, angle, trans):
+        """
+        Return:
+            face_vertex     -- torch.tensor, size (B, N, 3), in camera coordinate
+            face_color      -- torch.tensor, size (B, N, 3), in RGB order
+            landmark        -- torch.tensor, size (B, 68, 2), y direction is opposite to v direction
+        Parameters:
+            coeffs          -- torch.tensor, size (B, 257)
+        """
+        if not self.initialized:
+            self.to(id.device)
+        face_shape = self.compute_shape(id, exp)
+        rotation = self.compute_rotation(angle, device=self.device)
+        face_shape_transformed = self.transform(face_shape, rotation, trans)
+        face_vertex = self.to_camera(face_shape_transformed)
+        return face_vertex
+    
+    def compute_for_landmark_fit(self, id, exp, angles, trans, ret=None):
+        """
+        Return:
+            face_vertex     -- torch.tensor, size (B, N, 3), in camera coordinate
+            face_color      -- torch.tensor, size (B, N, 3), in RGB order
+            landmark        -- torch.tensor, size (B, 68, 2), y direction is opposite to v direction
+        Parameters:
+            coeffs          -- torch.tensor, size (B, 257)
+        """
+        face_shape = self.compute_key_shape(id, exp)
+        rotation = self.compute_rotation(angles, device=self.device)
+
+        face_shape_transformed = self.transform(face_shape, rotation, trans)
+        face_vertex = self.to_camera(face_shape_transformed)
+        
+        face_proj = self.to_image(face_vertex)
+        landmark = face_proj
+        return landmark
+
+    def compute_for_landmark_fit_nerf(self, id, exp, angles, trans, ret=None):
+        """
+        Return:
+            face_vertex     -- torch.tensor, size (B, N, 3), in camera coordinate
+            face_color      -- torch.tensor, size (B, N, 3), in RGB order
+            landmark        -- torch.tensor, size (B, 68, 2), y direction is opposite to v direction
+        Parameters:
+            coeffs          -- torch.tensor, size (B, 257)
+        """
+        face_shape = self.compute_key_shape(id, exp)
+        rotation = self.compute_rotation(angles, device=self.device)
+
+        face_shape_transformed = self.transform(face_shape, rotation, trans)
+        face_vertex = face_shape_transformed # no to_camera
+        
+        face_proj = self.to_image(face_vertex)
+        landmark = face_proj
+        return landmark
+
+    # def compute_for_landmark_fit(self, id, exp, angles, trans, ret={}):
+    #     """
+    #     Return:
+    #         face_vertex     -- torch.tensor, size (B, N, 3), in camera coordinate
+    #         face_color      -- torch.tensor, size (B, N, 3), in RGB order
+    #         landmark        -- torch.tensor, size (B, 68, 2), y direction is opposite to v direction
+    #     Parameters:
+    #         coeffs          -- torch.tensor, size (B, 257)
+    #     """
+    #     face_shape = self.compute_shape(id, exp)
+    #     rotation = self.compute_rotation(angles)
+
+    #     face_shape_transformed = self.transform(face_shape, rotation, trans)
+    #     face_vertex = self.to_camera(face_shape_transformed)
+        
+    #     face_proj = self.to_image(face_vertex)
+    #     landmark = self.get_landmarks(face_proj)
+    #     return landmark
+    
+    def compute_for_render_fit(self, id, exp, angles, trans, tex, gamma):
+        """
+        Return:
+            face_vertex     -- torch.tensor, size (B, N, 3), in camera coordinate
+            face_color      -- torch.tensor, size (B, N, 3), in RGB order
+            landmark        -- torch.tensor, size (B, 68, 2), y direction is opposite to v direction
+        Parameters:
+            coeffs          -- torch.tensor, size (B, 257)
+        """
+        face_shape = self.compute_shape(id, exp)
+        rotation = self.compute_rotation(angles, device=self.device)
+
+        face_shape_transformed = self.transform(face_shape, rotation, trans)
+        face_vertex = self.to_camera(face_shape_transformed)
+        
+        face_proj = self.to_image(face_vertex)
+        landmark = self.get_landmarks(face_proj)
+
+        face_texture = self.compute_texture(tex)
+        face_norm = self.compute_norm(face_shape)
+        face_norm_roted = face_norm @ rotation
+        face_color = self.compute_color(face_texture, face_norm_roted, gamma)
+
+        return face_color, face_vertex, landmark
\ No newline at end of file
diff --git a/deep_3drecon/ncc_code.npy b/deep_3drecon/ncc_code.npy
new file mode 100644
index 0000000000000000000000000000000000000000..79568a9ce3c7a903cea7ec76f1870f15fd052f13
--- /dev/null
+++ b/deep_3drecon/ncc_code.npy
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:da54a620c0981d43cc9f30b3d8b3f5d4beb0ec0e27127a1ef3fb62ea50913609
+size 428636
diff --git a/deep_3drecon/secc_renderer.py b/deep_3drecon/secc_renderer.py
new file mode 100644
index 0000000000000000000000000000000000000000..0d6b3cdc4051c1ad9ed98b3228a35f35d573ab7c
--- /dev/null
+++ b/deep_3drecon/secc_renderer.py
@@ -0,0 +1,78 @@
+import torch
+import torch.nn as nn
+import numpy as np
+from einops import rearrange
+
+from deep_3drecon.util.mesh_renderer import MeshRenderer
+from deep_3drecon.deep_3drecon_models.bfm import ParametricFaceModel
+
+
+class SECC_Renderer(nn.Module):
+    def __init__(self, rasterize_size=None, device="cuda"):
+        super().__init__()
+        self.face_model = ParametricFaceModel('deep_3drecon/BFM')
+        self.fov = 2 * np.arctan(self.face_model.center / self.face_model.focal) * 180 / np.pi
+        self.znear = 5.
+        self.zfar = 15.
+        if rasterize_size is None:
+            rasterize_size = 2*self.face_model.center 
+        self.face_renderer = MeshRenderer(rasterize_fov=self.fov, znear=self.znear, zfar=self.zfar, rasterize_size=rasterize_size, use_opengl=False).cuda()
+        face_feat = np.load("deep_3drecon/ncc_code.npy", allow_pickle=True)
+        self.face_feat = torch.tensor(face_feat.T).unsqueeze(0).to(device=device)
+
+        del_index_re = np.load('deep_3drecon/bfm_right_eye_faces.npy')
+        del_index_re = del_index_re - 1
+        del_index_le = np.load('deep_3drecon/bfm_left_eye_faces.npy')
+        del_index_le = del_index_le - 1
+        face_buf_list = []
+        for i in range(self.face_model.face_buf.shape[0]):
+            if i not in del_index_re and i not in del_index_le:
+                face_buf_list.append(self.face_model.face_buf[i])
+        face_buf_arr = np.array(face_buf_list)
+        self.face_buf = torch.tensor(face_buf_arr).to(device=device)
+    
+    def forward(self, id, exp, euler, trans):
+        """
+        id, exp, euler, euler: [B, C] or [B, T, C]
+        return:
+            MASK: [B, 1, 512, 512], value[0. or 1.0], 1.0 denotes is face
+            SECC MAP: [B, 3, 512, 512], value[0~1]
+            if input is BTC format, return [B, C, T, H, W]
+        """
+        bs = id.shape[0]
+        is_btc_flag = id.ndim == 3
+        if is_btc_flag:
+            t = id.shape[1]
+            bs = bs * t
+            id, exp, euler, trans = id.reshape([bs,-1]), exp.reshape([bs,-1]), euler.reshape([bs,-1]), trans.reshape([bs,-1])
+
+        face_vertex = self.face_model.compute_face_vertex(id, exp, euler, trans)
+        face_mask, _, secc_face = self.face_renderer(
+                face_vertex, self.face_buf.unsqueeze(0).repeat([bs, 1, 1]), feat=self.face_feat.repeat([bs,1,1]))
+        secc_face = (secc_face - 0.5) / 0.5 # scale to -1~1 
+
+        if is_btc_flag:
+            bs = bs // t
+            face_mask = rearrange(face_mask, "(n t) c h w -> n c t h w", n=bs, t=t)
+            secc_face = rearrange(secc_face, "(n t) c h w -> n c t h w", n=bs, t=t)
+        return face_mask, secc_face
+
+
+if __name__ == '__main__':
+    import imageio
+
+    renderer = SECC_Renderer(rasterize_size=512)
+    ret = np.load("data/processed/videos/May/vid_coeff_fit.npy", allow_pickle=True).tolist()
+    idx = 6
+    id = torch.tensor(ret['id']).cuda()[idx:idx+1]
+    exp = torch.tensor(ret['exp']).cuda()[idx:idx+1]
+    angle = torch.tensor(ret['euler']).cuda()[idx:idx+1]
+    trans = torch.tensor(ret['trans']).cuda()[idx:idx+1]
+    mask, secc = renderer(id, exp, angle*0, trans*0) # [1, 1, 512, 512], [1, 3, 512, 512]
+
+    out_mask = mask[0].permute(1,2,0)
+    out_mask = (out_mask * 127.5 + 127.5).int().cpu().numpy()
+    imageio.imwrite("out_mask.png", out_mask)
+    out_img = secc[0].permute(1,2,0)
+    out_img = (out_img * 127.5 + 127.5).int().cpu().numpy()
+    imageio.imwrite("out_secc.png", out_img)
\ No newline at end of file
diff --git a/deep_3drecon/util/mesh_renderer.py b/deep_3drecon/util/mesh_renderer.py
new file mode 100644
index 0000000000000000000000000000000000000000..0b6e765d706fb31cbe7f0b4403b492893ca32221
--- /dev/null
+++ b/deep_3drecon/util/mesh_renderer.py
@@ -0,0 +1,131 @@
+"""This script is the differentiable renderer for Deep3DFaceRecon_pytorch
+    Attention, antialiasing step is missing in current version.
+"""
+import torch
+import torch.nn.functional as F
+import kornia
+from kornia.geometry.camera import pixel2cam
+import numpy as np
+from typing import List
+from scipy.io import loadmat
+from torch import nn
+import traceback
+
+try:
+    import pytorch3d.ops
+    from pytorch3d.structures import Meshes
+    from pytorch3d.renderer import (
+        look_at_view_transform,
+        FoVPerspectiveCameras,
+        DirectionalLights,
+        RasterizationSettings,
+        MeshRenderer,
+        MeshRasterizer,
+        SoftPhongShader,
+        TexturesUV,
+    )
+except:
+    traceback.print_exc()
+# def ndc_projection(x=0.1, n=1.0, f=50.0):
+#     return np.array([[n/x,    0,            0,              0],
+#                      [  0, n/-x,            0,              0],
+#                      [  0,    0, -(f+n)/(f-n), -(2*f*n)/(f-n)],
+#                      [  0,    0,           -1,              0]]).astype(np.float32)
+
+class MeshRenderer(nn.Module):
+    def __init__(self,
+                rasterize_fov,
+                znear=0.1,
+                zfar=10, 
+                rasterize_size=224,**args):
+        super(MeshRenderer, self).__init__()
+
+        # x = np.tan(np.deg2rad(rasterize_fov * 0.5)) * znear
+        # self.ndc_proj = torch.tensor(ndc_projection(x=x, n=znear, f=zfar)).matmul(
+        #         torch.diag(torch.tensor([1., -1, -1, 1])))
+        self.rasterize_size = rasterize_size
+        self.fov = rasterize_fov
+        self.znear = znear
+        self.zfar = zfar
+
+        self.rasterizer = None
+    
+    def forward(self, vertex, tri, feat=None):
+        """
+        Return:
+            mask               -- torch.tensor, size (B, 1, H, W)
+            depth              -- torch.tensor, size (B, 1, H, W)
+            features(optional) -- torch.tensor, size (B, C, H, W) if feat is not None
+
+        Parameters:
+            vertex          -- torch.tensor, size (B, N, 3)
+            tri             -- torch.tensor, size (B, M, 3) or (M, 3), triangles
+            feat(optional)  -- torch.tensor, size (B, N ,C), features
+        """
+        device = vertex.device
+        rsize = int(self.rasterize_size)
+        # ndc_proj = self.ndc_proj.to(device)
+        # trans to homogeneous coordinates of 3d vertices, the direction of y is the same as v
+        if vertex.shape[-1] == 3:
+            vertex = torch.cat([vertex, torch.ones([*vertex.shape[:2], 1]).to(device)], dim=-1)
+            vertex[..., 0] = -vertex[..., 0]
+
+
+        # vertex_ndc = vertex @ ndc_proj.t()
+        if self.rasterizer is None:
+            self.rasterizer = MeshRasterizer()
+            print("create rasterizer on device cuda:%d"%device.index)
+        
+        # ranges = None
+        # if isinstance(tri, List) or len(tri.shape) == 3:
+        #     vum = vertex_ndc.shape[1]
+        #     fnum = torch.tensor([f.shape[0] for f in tri]).unsqueeze(1).to(device)
+        #     fstartidx = torch.cumsum(fnum, dim=0) - fnum
+        #     ranges = torch.cat([fstartidx, fnum], axis=1).type(torch.int32).cpu()
+        #     for i in range(tri.shape[0]):
+        #         tri[i] = tri[i] + i*vum
+        #     vertex_ndc = torch.cat(vertex_ndc, dim=0)
+        #     tri = torch.cat(tri, dim=0)
+
+        # for range_mode vetex: [B*N, 4], tri: [B*M, 3], for instance_mode vetex: [B, N, 4], tri: [M, 3]
+        tri = tri.type(torch.int32).contiguous()
+
+        # rasterize
+        cameras = FoVPerspectiveCameras(
+            device=device,
+            fov=self.fov,
+            znear=self.znear,
+            zfar=self.zfar,
+        )
+
+        raster_settings = RasterizationSettings(
+            image_size=rsize
+        )
+
+        # print(vertex.shape, tri.shape)
+        if tri.ndim == 2:
+            tri = tri.unsqueeze(0)
+        mesh = Meshes(vertex.contiguous()[...,:3], tri)
+
+        fragments = self.rasterizer(mesh, cameras = cameras, raster_settings = raster_settings)
+        rast_out = fragments.pix_to_face.squeeze(-1)
+        depth = fragments.zbuf
+
+        # render depth
+        depth = depth.permute(0, 3, 1, 2)
+        mask = (rast_out > 0).float().unsqueeze(1)
+        depth = mask * depth
+        
+
+        image = None
+        if feat is not None:
+            attributes = feat.reshape(-1,3)[mesh.faces_packed()]
+            image = pytorch3d.ops.interpolate_face_attributes(fragments.pix_to_face,
+                                                      fragments.bary_coords,
+                                                      attributes)
+            # print(image.shape)
+            image = image.squeeze(-2).permute(0, 3, 1, 2)
+            image = mask * image
+        
+        return mask, depth, image
+
diff --git a/docs/prepare_env/install_guide-zh.md b/docs/prepare_env/install_guide-zh.md
new file mode 100644
index 0000000000000000000000000000000000000000..26d23b3178c471f155c0fb84fc11c6dff78ab884
--- /dev/null
+++ b/docs/prepare_env/install_guide-zh.md
@@ -0,0 +1,35 @@
+# 环境配置
+[English Doc](./install_guide.md)
+
+本文档陈述了搭建Real3D-Portrait Python环境的步骤，我们使用了Conda来管理依赖。
+
+以下配置已在 A100/V100 + CUDA11.7 中进行了验证。
+
+
+# 1. 安装CUDA
+我们推荐安装CUDA `11.7`，其他CUDA版本（例如`10.2`、`12.x`）也可能有效。 
+
+# 2. 安装Python依赖
+```
+cd <Real3DPortraitRoot>
+source <CondaRoot>/bin/activate
+conda create -n real3dportrait python=3.9
+conda activate real3dportrait
+conda install conda-forge::ffmpeg # ffmpeg with libx264 codec to turn images to video
+
+# 我们推荐安装torch2.0.1+cuda11.7. 
+conda install pytorch==2.0.1 torchvision==0.15.2 torchaudio==2.0.2 pytorch-cuda=11.7 -c pytorch -c nvidia
+
+# 从源代码安装，需要比较长的时间 (如果遇到各种time-out问题，建议使用代理)
+pip install "git+https://github.com/facebookresearch/pytorch3d.git@stable"
+
+# MMCV安装
+pip install cython
+pip install openmim==0.3.9
+mim install mmcv==2.1.0 # 使用mim来加速mmcv安装
+
+# 其他依赖项
+pip install -r docs/prepare_env/requirements.txt -v
+
+```
+
diff --git a/docs/prepare_env/install_guide.md b/docs/prepare_env/install_guide.md
new file mode 100644
index 0000000000000000000000000000000000000000..67f2df44022671e2710e1213bf3133dec89ca382
--- /dev/null
+++ b/docs/prepare_env/install_guide.md
@@ -0,0 +1,34 @@
+# Prepare the Environment
+[中文文档](./install_guide-zh.md)
+
+This guide is about building a python environment for Real3D-Portrait with Conda.
+
+The following installation process is verified in A100/V100 + CUDA11.7.
+
+
+# 1. Install CUDA
+ We recommend to install CUDA `11.7` (which is verified in various types of GPUs), but other CUDA versions (such as `10.2`, `12.x`) may also work well. 
+
+# 2. Install Python Packages
+```
+cd <Real3DPortraitRoot>
+source <CondaRoot>/bin/activate
+conda create -n real3dportrait python=3.9
+conda activate real3dportrait
+conda install conda-forge::ffmpeg # ffmpeg with libx264 codec to turn images to video
+
+### We recommend torch2.0.1+cuda11.7. 
+conda install pytorch==2.0.1 torchvision==0.15.2 torchaudio==2.0.2 pytorch-cuda=11.7 -c pytorch -c nvidia
+
+# Build from source, it may take a long time (Proxy is recommended if encountering the time-out problem)
+pip install "git+https://github.com/facebookresearch/pytorch3d.git@stable"
+
+# MMCV for some network structure
+pip install cython
+pip install openmim==0.3.9
+mim install mmcv==2.1.0 # use mim to speed up installation for mmcv
+
+# other dependencies
+pip install -r docs/prepare_env/requirements.txt -v
+
+```
diff --git a/docs/prepare_env/requirements.txt b/docs/prepare_env/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d521e594fff966039a7b48de47367fbc11d2c69e
--- /dev/null
+++ b/docs/prepare_env/requirements.txt
@@ -0,0 +1,75 @@
+Cython
+numpy # ==1.23.0
+numba==0.56.4
+pandas
+transformers
+scipy==1.11.1 # required by cal_fid. https://github.com/mseitzer/pytorch-fid/issues/103
+scikit-learn
+scikit-image
+# tensorflow # you can flexible it, this is gpu version
+tensorboard
+tensorboardX
+python_speech_features
+resampy
+opencv_python
+face_alignment
+matplotlib
+configargparse
+librosa==0.9.2
+praat-parselmouth # ==0.4.3
+trimesh
+kornia==0.5.0
+PyMCubes
+lpips
+setuptools # ==59.5.0
+ffmpeg-python
+moviepy
+dearpygui
+ninja
+# pyaudio # for extract esperanto
+mediapipe
+protobuf
+decord
+soundfile
+pillow
+# torch # it's better to install torch with conda 
+av
+timm
+pretrainedmodels
+faiss-cpu # for fast nearest camera pose retriveal
+einops
+# mmcv # use mim install is faster
+
+# conditional flow matching
+beartype
+torchode
+torchdiffeq
+
+# tts
+cython
+textgrid
+pyloudnorm
+websocket-client
+pyworld==0.2.1rc0
+pypinyin==0.42.0
+webrtcvad
+torchshow
+
+# cal spk sim
+s3prl
+fire
+
+# cal LMD
+dlib
+
+# debug
+ipykernel
+
+# lama
+hydra-core
+pytorch_lightning
+setproctitle
+
+# Gradio GUI
+httpx==0.23.3
+gradio==4.16.0
\ No newline at end of file
diff --git a/inference/app_real3dportrait.py b/inference/app_real3dportrait.py
new file mode 100644
index 0000000000000000000000000000000000000000..b87dd7c8b50fcb287fa68a725a6dc293793bcf62
--- /dev/null
+++ b/inference/app_real3dportrait.py
@@ -0,0 +1,244 @@
+import os, sys
+import argparse
+import gradio as gr
+from inference.real3d_infer import GeneFace2Infer
+from utils.commons.hparams import hparams
+
+class Inferer(GeneFace2Infer):
+    def infer_once_args(self, *args, **kargs):
+        assert len(kargs) == 0
+        keys = [
+            'src_image_name',
+            'drv_audio_name',
+            'drv_pose_name',
+            'bg_image_name',
+            'blink_mode',
+            'temperature',
+            'mouth_amp',
+            'out_mode',
+            'map_to_init_pose',
+            'hold_eye_opened',
+            'head_torso_threshold',
+            'a2m_ckpt',
+            'head_ckpt',
+            'torso_ckpt',
+        ]
+        inp = {}
+        out_name = None
+        info = ""
+        
+        try: # try to catch errors and jump to return 
+            for key_index in range(len(keys)):
+                key = keys[key_index]
+                inp[key] = args[key_index]
+                if '_name' in key:
+                    inp[key] = inp[key] if inp[key] is not None else ''
+            
+            if inp['src_image_name'] == '':
+                info = "Input Error: Source image is REQUIRED!"
+                raise ValueError
+            if inp['drv_audio_name'] == '' and inp['drv_pose_name'] == '':
+                info = "Input Error: At least one of driving audio or video is REQUIRED!"
+                raise ValueError
+
+
+            if inp['drv_audio_name'] == '' and inp['drv_pose_name'] != '':
+                inp['drv_audio_name'] = inp['drv_pose_name']
+                print("No audio input, we use driving pose video for video driving")
+                
+            if inp['drv_pose_name'] == '':
+                inp['drv_pose_name'] = 'static'    
+            
+            reload_flag = False
+            if inp['a2m_ckpt'] != self.audio2secc_dir:
+                print("Changes of a2m_ckpt detected, reloading model")
+                reload_flag = True
+            if inp['head_ckpt'] != self.head_model_dir:
+                print("Changes of head_ckpt detected, reloading model")
+                reload_flag = True
+            if inp['torso_ckpt'] != self.torso_model_dir:
+                print("Changes of torso_ckpt detected, reloading model")
+                reload_flag = True
+
+            inp['out_name'] = ''
+            inp['seed'] = 42
+            
+            print(f"infer inputs : {inp}")
+            if self.secc2video_hparams['htbsr_head_threshold'] != inp['head_torso_threshold']:  
+                print("Changes of head_torso_threshold detected, reloading model")
+                reload_flag = True
+                
+            try:
+                if reload_flag:
+                    self.__init__(inp['a2m_ckpt'], inp['head_ckpt'], inp['torso_ckpt'], inp=inp, device=self.device)
+            except Exception as e:
+                content = f"{e}"
+                info = f"Reload ERROR: {content}"
+                raise ValueError
+            try:
+                out_name = self.infer_once(inp)
+            except Exception as e:
+                content = f"{e}"
+                info = f"Inference ERROR: {content}"
+                raise ValueError
+        except Exception as e:
+            if info == "": # unexpected errors
+                content = f"{e}"
+                info = f"WebUI ERROR: {content}"
+        
+        # output part
+        if len(info) > 0 : # there is errors    
+            print(info)
+            info_gr = gr.update(visible=True, value=info)
+        else: # no errors
+            info_gr = gr.update(visible=False, value=info)
+        if out_name is not None and len(out_name) > 0 and os.path.exists(out_name): # good output
+            print(f"Succefully generated in {out_name}")
+            video_gr = gr.update(visible=True, value=out_name)
+        else:
+            print(f"Failed to generate")
+            video_gr = gr.update(visible=True, value=out_name)
+            
+        return video_gr, info_gr
+
+def toggle_audio_file(choice):
+    if choice == False:
+        return gr.update(visible=True), gr.update(visible=False)
+    else:
+        return gr.update(visible=False), gr.update(visible=True)
+    
+def ref_video_fn(path_of_ref_video):
+    if path_of_ref_video is not None:
+        return gr.update(value=True)
+    else:
+        return gr.update(value=False)
+
+def real3dportrait_demo(
+    audio2secc_dir,
+    head_model_dir,
+    torso_model_dir, 
+    device          = 'cuda',
+    warpfn          = None,
+    ):
+
+    sep_line = "-" * 40
+
+    infer_obj = Inferer(
+        audio2secc_dir=audio2secc_dir, 
+        head_model_dir=head_model_dir,
+        torso_model_dir=torso_model_dir,
+        device=device,
+    )
+
+    print(sep_line)
+    print("Model loading is finished.")
+    print(sep_line)
+    with gr.Blocks(analytics_enabled=False) as real3dportrait_interface:
+        gr.Markdown("\
+            <div align='center'> <h2> Real3D-Portrait: One-shot Realistic 3D Talking Portrait Synthesis (ICLR 2024 Spotlight) </span> </h2> \
+            <a style='font-size:18px;color: #a0a0a0' href='https://arxiv.org/pdf/2401.08503.pdf'>Arxiv</a> &nbsp;&nbsp;&nbsp;&nbsp;&nbsp; \
+            <a style='font-size:18px;color: #a0a0a0' href='https://real3dportrait.github.io/'>Homepage</a>  &nbsp;&nbsp;&nbsp;&nbsp;&nbsp; \
+            <a style='font-size:18px;color: #a0a0a0' href='https://baidu.com'> Github </div>")
+        
+        sources = None
+        with gr.Row():
+            with gr.Column(variant='panel'):
+                with gr.Tabs(elem_id="source_image"):
+                    with gr.TabItem('Upload image'):
+                        with gr.Row():
+                            src_image_name = gr.Image(label="Source image (required)", sources=sources, type="filepath", value="data/raw/examples/Macron.png")
+                with gr.Tabs(elem_id="driven_audio"):
+                    with gr.TabItem('Upload audio'):
+                        with gr.Column(variant='panel'):
+                            drv_audio_name = gr.Audio(label="Input audio (required for audio-driven)", sources=sources, type="filepath", value="data/raw/examples/Obama_5s.wav")
+                with gr.Tabs(elem_id="driven_pose"):
+                    with gr.TabItem('Upload video'):
+                        with gr.Column(variant='panel'):
+                            drv_pose_name = gr.Video(label="Driven Pose (required for video-driven, optional for audio-driven)", sources=sources, value="data/raw/examples/May_5s.mp4")
+                with gr.Tabs(elem_id="bg_image"):
+                    with gr.TabItem('Upload image'):
+                        with gr.Row():
+                            bg_image_name = gr.Image(label="Background image (optional)", sources=sources, type="filepath", value="data/raw/examples/bg.png")
+
+                             
+            with gr.Column(variant='panel'): 
+                with gr.Tabs(elem_id="checkbox"):
+                    with gr.TabItem('General Settings'):
+                        with gr.Column(variant='panel'):
+
+                            blink_mode = gr.Radio(['none', 'period'], value='period', label='blink mode', info="whether to blink periodly") #        
+                            temperature = gr.Slider(minimum=0.0, maximum=1.0, step=0.025, label="temperature",  value=0.2, info='audio to secc temperature',)
+                            mouth_amp = gr.Slider(minimum=0.0, maximum=1.0, step=0.025, label="mouth amplitude",  value=0.45, info='higher -> mouth will open wider, default to be 0.4',)
+                            out_mode = gr.Radio(['final', 'concat_debug'], value='final', label='output layout', info="final: only final output ; concat_debug: final output concated with internel features") 
+                            map_to_init_pose = gr.Checkbox(label="Whether to map pose of first frame to initial pose")
+                            hold_eye_opened  = gr.Checkbox(label="Whether to maintain eyes always open")
+                            head_torso_threshold = gr.Slider(minimum=0.0, maximum=1.0, step=0.025, label="head torso threshold",  value=0.7, info='make it higher if you find ghosting around hair of output, default to be 0.7',)
+                                
+                            submit = gr.Button('Generate', elem_id="generate", variant='primary')
+                        
+                    with gr.Tabs(elem_id="genearted_video"):
+                        info_box = gr.Textbox(label="Error", interactive=False, visible=False)
+                        gen_video = gr.Video(label="Generated video", format="mp4", visible=True)
+            with gr.Column(variant='panel'): 
+                with gr.Tabs(elem_id="checkbox"):
+                    with gr.TabItem('Checkpoints'):
+                        with gr.Column(variant='panel'):
+                            ckpt_info_box = gr.Textbox(value="Please select \"ckpt\" under the checkpoint folder ", interactive=False, visible=True, show_label=False)
+                            audio2secc_dir = gr.FileExplorer(glob="checkpoints/**/*.ckpt", value=audio2secc_dir, file_count='single', label='audio2secc model ckpt path or directory')
+                            head_model_dir = gr.FileExplorer(glob="checkpoints/**/*.ckpt", value=head_model_dir, file_count='single', label='head model ckpt path or directory (will be ignored if torso model is set)')
+                            torso_model_dir = gr.FileExplorer(glob="checkpoints/**/*.ckpt", value=torso_model_dir, file_count='single', label='torso model ckpt path or directory')
+                            # audio2secc_dir = gr.Textbox(audio2secc_dir, max_lines=1, label='audio2secc model ckpt path or directory (will be ignored if torso model is set)')
+                            # head_model_dir = gr.Textbox(head_model_dir, max_lines=1, label='head model ckpt path or directory (will be ignored if torso model is set)')
+                            # torso_model_dir = gr.Textbox(torso_model_dir, max_lines=1, label='torso model ckpt path or directory')
+
+
+        fn = infer_obj.infer_once_args
+        if warpfn:
+            fn = warpfn(fn)
+        submit.click(
+                    fn=fn, 
+                    inputs=[
+                        src_image_name, 
+                        drv_audio_name,
+                        drv_pose_name,
+                        bg_image_name,
+                        blink_mode,
+                        temperature,
+                        mouth_amp,
+                        out_mode,
+                        map_to_init_pose,
+                        hold_eye_opened,
+                        head_torso_threshold,
+                        audio2secc_dir,
+                        head_model_dir,
+                        torso_model_dir,
+                    ], 
+                    outputs=[
+                        gen_video,
+                        info_box,
+                    ],
+                    )
+
+    print(sep_line)
+    print("Gradio page is constructed.")
+    print(sep_line)
+
+    return real3dportrait_interface
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--a2m_ckpt", type=str, default='checkpoints/240126_real3dportrait_orig/audio2secc_vae/model_ckpt_steps_400000.ckpt')
+    parser.add_argument("--head_ckpt", type=str, default='')
+    parser.add_argument("--torso_ckpt", type=str, default='checkpoints/240126_real3dportrait_orig/secc2plane_torso_orig/model_ckpt_steps_100000.ckpt') 
+    parser.add_argument("--port", type=int, default=None) 
+    args = parser.parse_args()
+    demo = real3dportrait_demo(
+        audio2secc_dir=args.a2m_ckpt,
+        head_model_dir=args.head_ckpt,
+        torso_model_dir=args.torso_ckpt,
+        device='cuda:0',
+        warpfn=None,
+    )
+    demo.queue()
+    demo.launch(server_port=args.port)
+
diff --git a/inference/edit_secc.py b/inference/edit_secc.py
new file mode 100644
index 0000000000000000000000000000000000000000..0c1e602b389665c2710eb76e5ab1244030096db2
--- /dev/null
+++ b/inference/edit_secc.py
@@ -0,0 +1,147 @@
+import cv2
+import torch
+from utils.commons.image_utils import dilate, erode
+from sklearn.neighbors import NearestNeighbors
+import copy
+import numpy as np
+from utils.commons.meters import Timer
+
+def hold_eye_opened_for_secc(img):
+    img = img.permute(1,2,0).cpu().numpy()
+    img = ((img +1)/2*255).astype(np.uint)
+    face_mask = (img[...,0] != 0) & (img[...,1] != 0) & (img[...,2] != 0)
+    face_xys = np.stack(np.nonzero(face_mask)).transpose(1, 0) # [N_nonbg,2] coordinate of non-face pixels
+    h,w = face_mask.shape
+    # get face and eye mask
+    left_eye_prior_reigon = np.zeros([h,w], dtype=bool)
+    right_eye_prior_reigon = np.zeros([h,w], dtype=bool)
+    left_eye_prior_reigon[h//4:h//2, w//4:w//2] = True
+    right_eye_prior_reigon[h//4:h//2, w//2:w//4*3] = True
+    eye_prior_reigon = left_eye_prior_reigon | right_eye_prior_reigon
+    coarse_eye_mask = (~ face_mask) & eye_prior_reigon
+    coarse_eye_xys = np.stack(np.nonzero(coarse_eye_mask)).transpose(1, 0) # [N_nonbg,2] coordinate of non-face pixels
+
+    opened_eye_mask = cv2.imread('inference/os_avatar/opened_eye_mask.png')
+    opened_eye_mask = torch.nn.functional.interpolate(torch.tensor(opened_eye_mask).permute(2,0,1).unsqueeze(0), size=(img.shape[0], img.shape[1]), mode='nearest')[0].permute(1,2,0).sum(-1).bool().cpu() # [512,512,3]
+    coarse_opened_eye_xys = np.stack(np.nonzero(opened_eye_mask)) # [N_nonbg,2] coordinate of non-face pixels
+    
+    nbrs = NearestNeighbors(n_neighbors=1, algorithm='kd_tree').fit(coarse_eye_xys)
+    dists, _ = nbrs.kneighbors(coarse_opened_eye_xys) # [512*512, 1] distance to nearest non-bg pixel
+    # print(dists.max())
+    non_opened_eye_pixs = dists > max(dists.max()*0.75, 4) # 大于这个距离的opened eye部分会被合上
+    non_opened_eye_pixs = non_opened_eye_pixs.reshape([-1])
+    opened_eye_xys_to_erode = coarse_opened_eye_xys[non_opened_eye_pixs]
+    opened_eye_mask[opened_eye_xys_to_erode[...,0], opened_eye_xys_to_erode[...,1]] = False # shrink 将mask在face-eye边界收缩3pixel，为了平滑
+
+    img[opened_eye_mask] = 0
+    return torch.tensor(img.astype(np.float32) / 127.5 - 1).permute(2,0,1)
+    
+
+# def hold_eye_opened_for_secc(img):
+#     img = copy.copy(img)
+#     eye_mask = cv2.imread('inference/os_avatar/opened_eye_mask.png')
+#     eye_mask = torch.nn.functional.interpolate(torch.tensor(eye_mask).permute(2,0,1).unsqueeze(0), size=(img.shape[-2], img.shape[-1]), mode='nearest')[0].bool().to(img.device) # [3,512,512]
+#     img[eye_mask] = -1
+#     return img
+    
+def blink_eye_for_secc(img, close_eye_percent=0.5):
+    """
+    secc_img: [3,h,w], tensor, -1~1
+    """
+    img = img.permute(1,2,0).cpu().numpy()
+    img = ((img +1)/2*255).astype(np.uint)
+    assert close_eye_percent <= 1.0 and close_eye_percent >= 0.
+    if close_eye_percent == 0: return torch.tensor(img.astype(np.float32) / 127.5 - 1).permute(2,0,1)
+    img = copy.deepcopy(img)
+    face_mask = (img[...,0] != 0) & (img[...,1] != 0) & (img[...,2] != 0)
+    h,w = face_mask.shape
+
+    # get face and eye mask
+    left_eye_prior_reigon = np.zeros([h,w], dtype=bool)
+    right_eye_prior_reigon = np.zeros([h,w], dtype=bool)
+    left_eye_prior_reigon[h//4:h//2, w//4:w//2] = True
+    right_eye_prior_reigon[h//4:h//2, w//2:w//4*3] = True
+    eye_prior_reigon = left_eye_prior_reigon | right_eye_prior_reigon
+    coarse_eye_mask = (~ face_mask) & eye_prior_reigon
+    coarse_left_eye_mask = (~ face_mask) & left_eye_prior_reigon
+    coarse_right_eye_mask = (~ face_mask) & right_eye_prior_reigon
+    coarse_eye_xys = np.stack(np.nonzero(coarse_eye_mask)).transpose(1, 0) # [N_nonbg,2] coordinate of non-face pixels
+    min_h = coarse_eye_xys[:, 0].min()
+    max_h = coarse_eye_xys[:, 0].max()
+    coarse_left_eye_xys = np.stack(np.nonzero(coarse_left_eye_mask)).transpose(1, 0) # [N_nonbg,2] coordinate of non-face pixels
+    left_min_w = coarse_left_eye_xys[:, 1].min()
+    left_max_w = coarse_left_eye_xys[:, 1].max()
+    coarse_right_eye_xys = np.stack(np.nonzero(coarse_right_eye_mask)).transpose(1, 0) # [N_nonbg,2] coordinate of non-face pixels
+    right_min_w = coarse_right_eye_xys[:, 1].min()
+    right_max_w = coarse_right_eye_xys[:, 1].max()
+
+    # 尽力较少需要考虑的face_xyz,以降低KNN的损耗
+    left_eye_prior_reigon = np.zeros([h,w], dtype=bool)
+    more_room = 4 # 过小会导致一些问题
+    left_eye_prior_reigon[min_h-more_room:max_h+more_room, left_min_w-more_room:left_max_w+more_room] = True
+    right_eye_prior_reigon = np.zeros([h,w], dtype=bool)
+    right_eye_prior_reigon[min_h-more_room:max_h+more_room, right_min_w-more_room:right_max_w+more_room] = True
+    eye_prior_reigon = left_eye_prior_reigon | right_eye_prior_reigon
+
+    around_eye_face_mask = face_mask & eye_prior_reigon
+    face_mask = around_eye_face_mask
+    face_xys = np.stack(np.nonzero(around_eye_face_mask)).transpose(1, 0) # [N_nonbg,2] coordinate of non-face pixels
+
+    nbrs = NearestNeighbors(n_neighbors=1, algorithm='kd_tree').fit(coarse_eye_xys)
+    dists, _ = nbrs.kneighbors(face_xys) # [512*512, 1] distance to nearest non-bg pixel
+    face_pixs = dists > 5 # 只有距离最近的eye pixel大于5的才被认为是face，过小会导致一些问题
+    face_pixs = face_pixs.reshape([-1])
+    face_xys_to_erode = face_xys[~face_pixs]
+    face_mask[face_xys_to_erode[...,0], face_xys_to_erode[...,1]] = False # shrink 将mask在face-eye边界收缩3pixel，为了平滑
+    eye_mask = (~ face_mask) & eye_prior_reigon
+
+    h_grid = np.mgrid[0:h, 0:w][0]
+    eye_num_pixel_along_w_axis = eye_mask.sum(axis=0)
+    eye_mask_along_w_axis = eye_num_pixel_along_w_axis != 0
+
+    tmp_h_grid = h_grid.copy()
+    tmp_h_grid[~eye_mask] = 0
+    eye_mean_h_coord_along_w_axis = tmp_h_grid.sum(axis=0) / np.clip(eye_num_pixel_along_w_axis, a_min=1, a_max=h)
+    tmp_h_grid = h_grid.copy()
+    tmp_h_grid[~eye_mask] = 99999
+    eye_min_h_coord_along_w_axis = tmp_h_grid.min(axis=0)
+    tmp_h_grid = h_grid.copy()
+    tmp_h_grid[~eye_mask] = -99999
+    eye_max_h_coord_along_w_axis = tmp_h_grid.max(axis=0)
+
+    eye_low_h_coord_along_w_axis = close_eye_percent * eye_mean_h_coord_along_w_axis + (1-close_eye_percent) * eye_min_h_coord_along_w_axis # upper eye 
+    eye_high_h_coord_along_w_axis = close_eye_percent * eye_mean_h_coord_along_w_axis + (1-close_eye_percent) * eye_max_h_coord_along_w_axis # lower eye 
+
+    tmp_h_grid = h_grid.copy()
+    tmp_h_grid[~eye_mask] = 99999
+    upper_eye_blink_mask = tmp_h_grid <= eye_low_h_coord_along_w_axis
+    tmp_h_grid = h_grid.copy()
+    tmp_h_grid[~eye_mask] = -99999
+    lower_eye_blink_mask = tmp_h_grid >= eye_high_h_coord_along_w_axis
+    eye_blink_mask = upper_eye_blink_mask | lower_eye_blink_mask
+
+    face_xys = np.stack(np.nonzero(around_eye_face_mask)).transpose(1, 0) # [N_nonbg,2] coordinate of non-face pixels
+    eye_blink_xys = np.stack(np.nonzero(eye_blink_mask)).transpose(1, 0) # [N_nonbg,hw] coordinate of non-face pixels
+    nbrs = NearestNeighbors(n_neighbors=1, algorithm='kd_tree').fit(face_xys)
+    distances, indices = nbrs.kneighbors(eye_blink_xys)
+    bg_fg_xys = face_xys[indices[:, 0]]
+    img[eye_blink_xys[:, 0], eye_blink_xys[:, 1], :] = img[bg_fg_xys[:, 0], bg_fg_xys[:, 1], :]
+    return torch.tensor(img.astype(np.float32) / 127.5 - 1).permute(2,0,1)
+
+
+if __name__ == '__main__':
+    import imageio
+    import tqdm
+    img = cv2.imread("assets/cano_secc.png")
+    img = img / 127.5 - 1
+    img = torch.FloatTensor(img).permute(2, 0, 1)
+    fps = 25
+    writer = imageio.get_writer('demo_blink.mp4', fps=fps)
+
+    for i in tqdm.trange(33):
+        blink_percent = 0.03 * i
+        with Timer("Blink", True):
+            out_img = blink_eye_for_secc(img, blink_percent)
+        out_img = ((out_img.permute(1,2,0)+1)*127.5).int().numpy()
+        writer.append_data(out_img)
+    writer.close()
\ No newline at end of file
diff --git a/inference/infer_utils.py b/inference/infer_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..eb406fa8c734bd5295aae3f2a6e276f4b697da48
--- /dev/null
+++ b/inference/infer_utils.py
@@ -0,0 +1,154 @@
+import os
+import torch
+import torch.nn.functional as F
+import librosa
+import numpy as np
+import importlib
+import tqdm
+import copy
+import cv2
+from scipy.spatial.transform import Rotation
+
+
+def load_img_to_512_hwc_array(img_name):
+    img = cv2.imread(img_name)
+    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+    img = cv2.resize(img, (512, 512))
+    return img
+
+def load_img_to_normalized_512_bchw_tensor(img_name):
+    img = load_img_to_512_hwc_array(img_name)
+    img = ((torch.tensor(img) - 127.5)/127.5).float().unsqueeze(0).permute(0, 3, 1,2) # [b,c,h,w]
+    return img
+
+def mirror_index(index, len_seq):
+    """
+    get mirror index when indexing a sequence and the index is larger than len_pose
+    args:
+        index: int
+        len_pose: int
+    return:
+        mirror_index: int
+    """
+    turn = index // len_seq
+    res = index % len_seq
+    if turn % 2 == 0:
+        return res # forward indexing
+    else:
+        return len_seq - res - 1 # reverse indexing
+    
+def smooth_camera_sequence(camera, kernel_size=7):
+    """
+    smooth the camera trajectory (i.e., rotation & translation)...
+    args:
+        camera: [N, 25] or [N, 16]. np.ndarray
+        kernel_size: int
+    return: 
+        smoothed_camera: [N, 25] or [N, 16]. np.ndarray
+    """
+    # poses: [N, 25], numpy array
+    N = camera.shape[0]
+    K = kernel_size // 2
+    poses = camera[:, :16].reshape([-1, 4, 4]).copy()
+    trans = poses[:, :3, 3].copy() # [N, 3]
+    rots = poses[:, :3, :3].copy() # [N, 3, 3]
+
+    for i in range(N):
+        start = max(0, i - K)
+        end = min(N, i + K + 1)
+        poses[i, :3, 3] = trans[start:end].mean(0)
+        try:
+            poses[i, :3, :3] = Rotation.from_matrix(rots[start:end]).mean().as_matrix()
+        except:
+            if i == 0:
+                poses[i, :3, :3] = rots[i]
+            else:
+                poses[i, :3, :3] = poses[i-1, :3, :3]
+    poses = poses.reshape([-1, 16])
+    camera[:, :16] = poses
+    return camera
+
+def smooth_features_xd(in_tensor, kernel_size=7):
+    """
+    smooth the feature maps
+    args:
+        in_tensor: [T, c,h,w] or [T, c1,c2,h,w]
+        kernel_size: int
+    return:
+        out_tensor: [T, c,h,w] or [T, c1,c2,h,w]
+    """
+    t = in_tensor.shape[0]
+    ndim = in_tensor.ndim
+    pad = (kernel_size- 1)//2
+    in_tensor = torch.cat([torch.flip(in_tensor[0:pad], dims=[0]), in_tensor, torch.flip(in_tensor[t-pad:t], dims=[0])], dim=0)
+    if ndim == 2: # tc
+        _,c = in_tensor.shape
+        in_tensor = in_tensor.permute(1,0).reshape([-1,1,t+2*pad]) # [c, 1, t]
+    elif ndim == 4: # tchw
+        _,c,h,w = in_tensor.shape
+        in_tensor = in_tensor.permute(1,2,3,0).reshape([-1,1,t+2*pad]) # [c, 1, t]
+    elif ndim == 5: # tcchw, like deformation
+        _,c1,c2, h,w = in_tensor.shape
+        in_tensor = in_tensor.permute(1,2,3,4,0).reshape([-1,1,t+2*pad]) # [c, 1, t]
+    else: raise NotImplementedError()
+    avg_kernel = 1 / kernel_size * torch.Tensor([1.]*kernel_size).reshape([1,1,kernel_size]).float().to(in_tensor.device) # [1, 1, kw]
+    out_tensor = F.conv1d(in_tensor, avg_kernel)
+    if ndim == 2: # tc
+        return out_tensor.reshape([c,t]).permute(1,0)
+    elif ndim == 4: # tchw
+        return out_tensor.reshape([c,h,w,t]).permute(3,0,1,2)
+    elif ndim == 5: # tcchw, like deformation
+        return out_tensor.reshape([c1,c2,h,w,t]).permute(4,0,1,2,3)
+
+
+def extract_audio_motion_from_ref_video(video_name):
+    def save_wav16k(audio_name):
+        supported_types = ('.wav', '.mp3', '.mp4', '.avi')
+        assert audio_name.endswith(supported_types), f"Now we only support {','.join(supported_types)} as audio source!"
+        wav16k_name = audio_name[:-4] + '_16k.wav'
+        extract_wav_cmd = f"ffmpeg -i {audio_name} -f wav -ar 16000 -v quiet -y {wav16k_name} -y"
+        os.system(extract_wav_cmd)
+        print(f"Extracted wav file (16khz) from {audio_name} to {wav16k_name}.")
+        return wav16k_name
+    
+    def get_f0( wav16k_name):
+        from data_gen.process_lrs3.process_audio_mel_f0 import extract_mel_from_fname,extract_f0_from_wav_and_mel
+        wav, mel = extract_mel_from_fname(wav16k_name)
+        f0, f0_coarse = extract_f0_from_wav_and_mel(wav, mel)
+        f0 = f0.reshape([-1,1])
+        f0 = torch.tensor(f0)
+        return f0
+    
+    def get_hubert(wav16k_name):
+        from data_gen.utils.process_audio.extract_hubert import get_hubert_from_16k_wav
+        hubert = get_hubert_from_16k_wav(wav16k_name).detach().numpy()
+        len_mel = hubert.shape[0]
+        x_multiply = 8
+        if len_mel % x_multiply == 0:
+            num_to_pad = 0
+        else:
+            num_to_pad = x_multiply - len_mel % x_multiply
+        hubert = np.pad(hubert, pad_width=((0,num_to_pad), (0,0)))
+        hubert = torch.tensor(hubert)
+        return hubert
+
+    def get_exp(video_name):
+        from data_gen.utils.process_video.fit_3dmm_landmark import fit_3dmm_for_a_video
+        drv_motion_coeff_dict = fit_3dmm_for_a_video(video_name, save=False)
+        exp = torch.tensor(drv_motion_coeff_dict['exp'])
+        return exp
+    
+    wav16k_name = save_wav16k(video_name)
+    f0 = get_f0(wav16k_name)
+    hubert = get_hubert(wav16k_name)
+    os.system(f"rm {wav16k_name}")
+    exp = get_exp(video_name)
+    target_length = min(len(exp), len(hubert)//2, len(f0)//2)
+    exp = exp[:target_length]
+    f0 = f0[:target_length*2]
+    hubert = hubert[:target_length*2]
+    return exp.unsqueeze(0), hubert.unsqueeze(0), f0.unsqueeze(0)
+
+
+if __name__ == '__main__':
+    extract_audio_motion_from_ref_video('data/raw/videos/crop_0213.mp4')
\ No newline at end of file
diff --git a/inference/real3d_infer.py b/inference/real3d_infer.py
new file mode 100644
index 0000000000000000000000000000000000000000..4314e13b566acd208f9059588b50c4b09b9c91e4
--- /dev/null
+++ b/inference/real3d_infer.py
@@ -0,0 +1,542 @@
+import os
+import torch
+import torch.nn.functional as F
+import torchshow as ts
+import librosa
+import random
+import time
+import numpy as np
+import importlib
+import tqdm
+import copy
+import cv2
+
+# common utils
+from utils.commons.hparams import hparams, set_hparams
+from utils.commons.tensor_utils import move_to_cuda, convert_to_tensor
+from utils.commons.ckpt_utils import load_ckpt, get_last_checkpoint
+# 3DMM-related utils
+from deep_3drecon.deep_3drecon_models.bfm import ParametricFaceModel
+from data_util.face3d_helper import Face3DHelper
+from data_gen.utils.process_image.fit_3dmm_landmark import fit_3dmm_for_a_image
+from data_gen.utils.process_video.fit_3dmm_landmark import fit_3dmm_for_a_video
+from deep_3drecon.secc_renderer import SECC_Renderer
+from data_gen.eg3d.convert_to_eg3d_convention import get_eg3d_convention_camera_pose_intrinsic
+# Face Parsing 
+from data_gen.utils.mp_feature_extractors.mp_segmenter import MediapipeSegmenter
+from data_gen.utils.process_video.extract_segment_imgs import inpaint_torso_job, extract_background
+# other inference utils
+from inference.infer_utils import mirror_index, load_img_to_512_hwc_array, load_img_to_normalized_512_bchw_tensor
+from inference.infer_utils import smooth_camera_sequence, smooth_features_xd
+from Real3DPortrait.inference.edit_secc import blink_eye_for_secc
+
+
+def read_first_frame_from_a_video(vid_name):
+    frames = []
+    cap = cv2.VideoCapture(vid_name)
+    ret, frame_bgr = cap.read()
+    frame_rgb = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB)
+    return frame_rgb
+
+def analyze_weights_img(gen_output):
+    img_raw = gen_output['image_raw']
+    mask_005_to_03 = torch.bitwise_and(gen_output['weights_img']>0.05, gen_output['weights_img']<0.3).repeat([1,3,1,1])
+    mask_005_to_05 = torch.bitwise_and(gen_output['weights_img']>0.05, gen_output['weights_img']<0.5).repeat([1,3,1,1])
+    mask_005_to_07 = torch.bitwise_and(gen_output['weights_img']>0.05, gen_output['weights_img']<0.7).repeat([1,3,1,1])
+    mask_005_to_09 = torch.bitwise_and(gen_output['weights_img']>0.05, gen_output['weights_img']<0.9).repeat([1,3,1,1])
+    mask_005_to_10 = torch.bitwise_and(gen_output['weights_img']>0.05, gen_output['weights_img']<1.0).repeat([1,3,1,1])
+
+    img_raw_005_to_03 = img_raw.clone()
+    img_raw_005_to_03[~mask_005_to_03] = -1
+    img_raw_005_to_05 = img_raw.clone()
+    img_raw_005_to_05[~mask_005_to_05] = -1
+    img_raw_005_to_07 = img_raw.clone()
+    img_raw_005_to_07[~mask_005_to_07] = -1
+    img_raw_005_to_09 = img_raw.clone()
+    img_raw_005_to_09[~mask_005_to_09] = -1
+    img_raw_005_to_10 = img_raw.clone()
+    img_raw_005_to_10[~mask_005_to_10] = -1
+    ts.save([img_raw_005_to_03[0], img_raw_005_to_05[0], img_raw_005_to_07[0], img_raw_005_to_09[0], img_raw_005_to_10[0]])
+
+class GeneFace2Infer:
+    def __init__(self, audio2secc_dir, head_model_dir, torso_model_dir, device=None, inp=None):
+        if device is None:
+            device = 'cuda' if torch.cuda.is_available() else 'cpu'
+        self.device = device
+        self.audio2secc_model = self.load_audio2secc(audio2secc_dir)
+        self.secc2video_model = self.load_secc2video(head_model_dir, torso_model_dir, inp)
+        self.audio2secc_model.to(device).eval()
+        self.secc2video_model.to(device).eval()
+        self.seg_model = MediapipeSegmenter()
+        self.secc_renderer = SECC_Renderer(512)
+        self.face3d_helper = Face3DHelper(use_gpu=True, keypoint_mode='lm68')
+        self.mp_face3d_helper = Face3DHelper(use_gpu=True, keypoint_mode='mediapipe')
+
+    def load_audio2secc(self, audio2secc_dir):
+        config_name = f"{audio2secc_dir}/config.yaml" if not audio2secc_dir.endswith(".ckpt") else f"{os.path.dirname(audio2secc_dir)}/config.yaml"
+        set_hparams(f"{config_name}", print_hparams=False)
+        self.audio2secc_dir = audio2secc_dir
+        self.audio2secc_hparams = copy.deepcopy(hparams)
+        from modules.audio2motion.vae import VAEModel, PitchContourVAEModel
+        if self.audio2secc_hparams['audio_type'] == 'hubert':
+            audio_in_dim = 1024
+        elif self.audio2secc_hparams['audio_type'] == 'mfcc':
+            audio_in_dim = 13
+
+        if 'icl' in hparams['task_cls']:
+            self.use_icl_audio2motion = True
+            model = InContextAudio2MotionModel(hparams['icl_model_type'], hparams=self.audio2secc_hparams)
+        else:
+            self.use_icl_audio2motion = False
+            if hparams.get("use_pitch", False) is True:
+                model = PitchContourVAEModel(hparams, in_out_dim=64, audio_in_dim=audio_in_dim)
+            else:
+                model = VAEModel(in_out_dim=64, audio_in_dim=audio_in_dim)
+        load_ckpt(model, f"{audio2secc_dir}", model_name='model', strict=True)
+        return model
+
+    def load_secc2video(self, head_model_dir, torso_model_dir, inp):
+        if inp is None:
+            inp = {}
+        self.head_model_dir = head_model_dir
+        self.torso_model_dir = torso_model_dir
+        if torso_model_dir != '':
+            if torso_model_dir.endswith(".ckpt"):
+                set_hparams(f"{os.path.dirname(torso_model_dir)}/config.yaml", print_hparams=False)
+            else:
+                set_hparams(f"{torso_model_dir}/config.yaml", print_hparams=False)
+            if inp.get('head_torso_threshold', None) is not None:
+                hparams['htbsr_head_threshold'] = inp['head_torso_threshold']
+            self.secc2video_hparams = copy.deepcopy(hparams)
+            from modules.real3d.secc_img2plane_torso import OSAvatarSECC_Img2plane_Torso
+            model = OSAvatarSECC_Img2plane_Torso()
+            load_ckpt(model, f"{torso_model_dir}", model_name='model', strict=False)
+            if head_model_dir != '':
+                print("| Warning: Assigned --torso_ckpt which also contains head, but --head_ckpt is also assigned, skipping the --head_ckpt.")
+        else:
+            from modules.real3d.secc_img2plane_torso import OSAvatarSECC_Img2plane
+            if head_model_dir.endswith(".ckpt"):
+                set_hparams(f"{os.path.dirname(head_model_dir)}/config.yaml", print_hparams=False)
+            else:
+                set_hparams(f"{head_model_dir}/config.yaml", print_hparams=False)
+            if inp.get('head_torso_threshold', None) is not None:
+                hparams['htbsr_head_threshold'] = inp['head_torso_threshold']
+            self.secc2video_hparams = copy.deepcopy(hparams)
+            model = OSAvatarSECC_Img2plane()
+            load_ckpt(model, f"{head_model_dir}", model_name='model', strict=False)
+        return model
+
+    def infer_once(self, inp):
+        self.inp = inp
+        samples = self.prepare_batch_from_inp(inp)
+        seed = inp['seed'] if inp['seed'] is not None else int(time.time())
+        random.seed(seed)
+        torch.manual_seed(seed)
+        np.random.seed(seed)
+        out_name = self.forward_system(samples, inp)
+        return out_name
+    
+    def prepare_batch_from_inp(self, inp):
+        """
+        :param inp: {'audio_source_name': (str)}
+        :return: a dict that contains the condition feature of NeRF
+        """
+        sample = {}
+        # Process Driving Motion
+        if inp['drv_audio_name'][-4:] in ['.wav', '.mp3']:
+            self.save_wav16k(inp['drv_audio_name'])
+            if self.audio2secc_hparams['audio_type'] == 'hubert':
+                hubert = self.get_hubert(self.wav16k_name)
+            elif self.audio2secc_hparams['audio_type'] == 'mfcc':
+                hubert = self.get_mfcc(self.wav16k_name) / 100
+
+            f0 = self.get_f0(self.wav16k_name)
+            if f0.shape[0] > len(hubert):
+                f0 = f0[:len(hubert)]
+            else:
+                num_to_pad = len(hubert) - len(f0)
+                f0 = np.pad(f0, pad_width=((0,num_to_pad), (0,0)))
+            t_x = hubert.shape[0]
+            x_mask = torch.ones([1, t_x]).float() # mask for audio frames
+            y_mask = torch.ones([1, t_x//2]).float() # mask for motion/image frames
+            sample.update({
+                'hubert': torch.from_numpy(hubert).float().unsqueeze(0).cuda(),
+                'f0': torch.from_numpy(f0).float().reshape([1,-1]).cuda(),
+                'x_mask': x_mask.cuda(),
+                'y_mask': y_mask.cuda(),
+                })
+            sample['blink'] = torch.zeros([1, t_x, 1]).long().cuda()
+            sample['audio'] = sample['hubert']
+            sample['eye_amp'] = torch.ones([1, 1]).cuda() * 1.0
+            sample['mouth_amp'] = torch.ones([1, 1]).cuda() * inp['mouth_amp']
+        elif inp['drv_audio_name'][-4:] in ['.mp4']:
+            drv_motion_coeff_dict = fit_3dmm_for_a_video(inp['drv_audio_name'], save=False)
+            drv_motion_coeff_dict = convert_to_tensor(drv_motion_coeff_dict)
+            t_x = drv_motion_coeff_dict['exp'].shape[0] * 2
+            self.drv_motion_coeff_dict = drv_motion_coeff_dict
+        elif inp['drv_audio_name'][-4:] in ['.npy']:
+            drv_motion_coeff_dict = np.load(inp['drv_audio_name'], allow_pickle=True).tolist()
+            drv_motion_coeff_dict = convert_to_tensor(drv_motion_coeff_dict)
+            t_x = drv_motion_coeff_dict['exp'].shape[0] * 2
+            self.drv_motion_coeff_dict = drv_motion_coeff_dict
+
+        # Face Parsing
+        image_name = inp['src_image_name']
+        if image_name.endswith(".mp4"):
+            img = read_first_frame_from_a_video(image_name)
+            image_name = inp['src_image_name'] = image_name[:-4] + '.png'
+            cv2.imwrite(image_name, cv2.cvtColor(img, cv2.COLOR_RGB2BGR))
+        sample['ref_gt_img'] = load_img_to_normalized_512_bchw_tensor(image_name).cuda()
+        img = load_img_to_512_hwc_array(image_name)
+        segmap = self.seg_model._cal_seg_map(img)
+        sample['segmap'] = torch.tensor(segmap).float().unsqueeze(0).cuda()
+        head_img = self.seg_model._seg_out_img_with_segmap(img, segmap, mode='head')[0]
+        sample['ref_head_img'] = ((torch.tensor(head_img) - 127.5)/127.5).float().unsqueeze(0).permute(0, 3, 1,2).cuda() # [b,c,h,w]
+        inpaint_torso_img, _, _, _ = inpaint_torso_job(img, segmap)
+        sample['ref_torso_img'] = ((torch.tensor(inpaint_torso_img) - 127.5)/127.5).float().unsqueeze(0).permute(0, 3, 1,2).cuda() # [b,c,h,w]
+        
+        if inp['bg_image_name'] == '':
+            bg_img = extract_background([img], [segmap], 'knn')
+        else:
+            bg_img = cv2.imread(inp['bg_image_name'])
+            bg_img = cv2.cvtColor(bg_img, cv2.COLOR_BGR2RGB)
+            bg_img = cv2.resize(bg_img, (512,512))
+        sample['bg_img'] = ((torch.tensor(bg_img) - 127.5)/127.5).float().unsqueeze(0).permute(0, 3, 1,2).cuda() # [b,c,h,w]
+
+        # 3DMM, get identity code and camera pose
+        coeff_dict = fit_3dmm_for_a_image(image_name, save=False)
+        assert coeff_dict is not None
+        src_id = torch.tensor(coeff_dict['id']).reshape([1,80]).cuda()
+        src_exp = torch.tensor(coeff_dict['exp']).reshape([1,64]).cuda()
+        src_euler = torch.tensor(coeff_dict['euler']).reshape([1,3]).cuda()
+        src_trans = torch.tensor(coeff_dict['trans']).reshape([1,3]).cuda()
+        sample['id'] = src_id.repeat([t_x//2,1])
+
+        # get the src_kp for torso model
+        src_kp = self.face3d_helper.reconstruct_lm2d(src_id, src_exp, src_euler, src_trans) # [1, 68, 2]
+        src_kp = (src_kp-0.5) / 0.5 # rescale to -1~1
+        sample['src_kp'] = torch.clamp(src_kp, -1, 1).repeat([t_x//2,1,1])
+
+        # get camera pose file
+        # random.seed(time.time())
+        inp['drv_pose_name'] = inp['drv_pose_name']
+        print(f"| To extract pose from {inp['drv_pose_name']}")
+
+        # extract camera pose 
+        if inp['drv_pose_name'] == 'static':
+            sample['euler'] = torch.tensor(coeff_dict['euler']).reshape([1,3]).cuda().repeat([t_x//2,1]) # default static pose
+            sample['trans'] = torch.tensor(coeff_dict['trans']).reshape([1,3]).cuda().repeat([t_x//2,1])
+        else: # from file
+            if inp['drv_pose_name'].endswith('.mp4'):
+                # extract coeff from video
+                drv_pose_coeff_dict = fit_3dmm_for_a_video(inp['drv_pose_name'], save=False)
+            else:
+                # load from npy
+                drv_pose_coeff_dict = np.load(inp['drv_pose_name'], allow_pickle=True).tolist()
+            print(f"| Extracted pose from {inp['drv_pose_name']}")
+            eulers = convert_to_tensor(drv_pose_coeff_dict['euler']).reshape([-1,3]).cuda()
+            trans = convert_to_tensor(drv_pose_coeff_dict['trans']).reshape([-1,3]).cuda()
+            len_pose = len(eulers)
+            index_lst = [mirror_index(i, len_pose) for i in range(t_x//2)]
+            sample['euler'] = eulers[index_lst]
+            sample['trans'] = trans[index_lst]
+
+        # fix the z axis
+        sample['trans'][:, -1] = sample['trans'][0:1, -1].repeat([sample['trans'].shape[0]])
+
+        # mapping to the init pose
+        if inp.get("map_to_init_pose", 'False') == 'True':
+            diff_euler = torch.tensor(coeff_dict['euler']).reshape([1,3]).cuda() - sample['euler'][0:1]
+            sample['euler'] = sample['euler'] + diff_euler
+            diff_trans = torch.tensor(coeff_dict['trans']).reshape([1,3]).cuda() - sample['trans'][0:1]
+            sample['trans'] = sample['trans'] + diff_trans
+
+        # prepare camera
+        camera_ret = get_eg3d_convention_camera_pose_intrinsic({'euler':sample['euler'].cpu(), 'trans':sample['trans'].cpu()})
+        c2w, intrinsics = camera_ret['c2w'], camera_ret['intrinsics']
+        # smooth camera
+        camera_smo_ksize = 7
+        camera = np.concatenate([c2w.reshape([-1,16]), intrinsics.reshape([-1,9])], axis=-1)
+        camera = smooth_camera_sequence(camera, kernel_size=camera_smo_ksize) # [T, 25]
+        camera = torch.tensor(camera).cuda().float()
+        sample['camera'] = camera
+
+        return sample
+
+    @torch.no_grad()
+    def get_hubert(self, wav16k_name):
+        from data_gen.utils.process_audio.extract_hubert import get_hubert_from_16k_wav
+        hubert = get_hubert_from_16k_wav(wav16k_name).detach().numpy()
+        len_mel = hubert.shape[0]
+        x_multiply = 8
+        if len_mel % x_multiply == 0:
+            num_to_pad = 0
+        else:
+            num_to_pad = x_multiply - len_mel % x_multiply
+        hubert = np.pad(hubert, pad_width=((0,num_to_pad), (0,0)))
+        return hubert
+
+    def get_mfcc(self, wav16k_name):
+        from utils.audio import librosa_wav2mfcc
+        hparams['fft_size'] = 1200
+        hparams['win_size'] = 1200
+        hparams['hop_size'] = 480
+        hparams['audio_num_mel_bins'] = 80
+        hparams['fmin'] = 80
+        hparams['fmax'] = 12000
+        hparams['audio_sample_rate'] = 24000
+        mfcc = librosa_wav2mfcc(wav16k_name,
+            fft_size=hparams['fft_size'],
+            hop_size=hparams['hop_size'],
+            win_length=hparams['win_size'],
+            num_mels=hparams['audio_num_mel_bins'],
+            fmin=hparams['fmin'],
+            fmax=hparams['fmax'],
+            sample_rate=hparams['audio_sample_rate'],
+            center=True)
+        mfcc = np.array(mfcc).reshape([-1, 13])
+        len_mel = mfcc.shape[0]
+        x_multiply = 8
+        if len_mel % x_multiply == 0:
+            num_to_pad = 0
+        else:
+            num_to_pad = x_multiply - len_mel % x_multiply
+        mfcc = np.pad(mfcc, pad_width=((0,num_to_pad), (0,0)))
+        return mfcc
+
+    @torch.no_grad()
+    def forward_audio2secc(self, batch, inp=None):
+        if inp['drv_audio_name'][-4:] in ['.wav', '.mp3']:
+            # audio-to-exp
+            ret = {}
+            pred = self.audio2secc_model.forward(batch, ret=ret,train=False, temperature=inp['temperature'],)
+            print("| audio-to-motion finished")
+            if pred.shape[-1] == 144:
+                id = ret['pred'][0][:,:80]
+                exp = ret['pred'][0][:,80:]
+            else:
+                id = batch['id']
+                exp = ret['pred'][0]
+            if len(id) < len(exp): # happens when use ICL
+                id = torch.cat([id, id[0].unsqueeze(0).repeat([len(exp)-len(id),1])])
+            batch['id'] = id
+            batch['exp'] = exp
+        else:
+            drv_motion_coeff_dict = self.drv_motion_coeff_dict
+            batch['exp'] = torch.FloatTensor(drv_motion_coeff_dict['exp']).cuda()
+
+        batch = self.get_driving_motion(batch['id'], batch['exp'], batch['euler'], batch['trans'], batch, inp)
+        if self.use_icl_audio2motion:
+            self.audio2secc_model.empty_context()
+        return batch
+
+    @torch.no_grad()
+    def get_driving_motion(self, id, exp, euler, trans, batch, inp):
+        zero_eulers = torch.zeros([id.shape[0], 3]).to(id.device)
+        zero_trans = torch.zeros([id.shape[0], 3]).to(exp.device)
+        # render the secc given the id,exp
+        with torch.no_grad():
+            chunk_size = 50
+            drv_secc_color_lst = []
+            num_iters = len(id)//chunk_size if len(id)%chunk_size == 0 else len(id)//chunk_size+1
+            for i in tqdm.trange(num_iters, desc="rendering drv secc"):
+                torch.cuda.empty_cache()
+                face_mask, drv_secc_color = self.secc_renderer(id[i*chunk_size:(i+1)*chunk_size], exp[i*chunk_size:(i+1)*chunk_size], zero_eulers[i*chunk_size:(i+1)*chunk_size], zero_trans[i*chunk_size:(i+1)*chunk_size])
+                drv_secc_color_lst.append(drv_secc_color.cpu())
+        drv_secc_colors = torch.cat(drv_secc_color_lst, dim=0)
+        _, src_secc_color = self.secc_renderer(id[0:1], exp[0:1], zero_eulers[0:1], zero_trans[0:1])
+        _, cano_secc_color = self.secc_renderer(id[0:1], exp[0:1]*0, zero_eulers[0:1], zero_trans[0:1])
+        batch['drv_secc'] = drv_secc_colors.cuda()
+        batch['src_secc'] = src_secc_color.cuda()
+        batch['cano_secc'] = cano_secc_color.cuda()
+        
+        # blinking secc
+        if inp['blink_mode'] == 'period':
+            period = 5 # second
+
+            for i in tqdm.trange(len(drv_secc_colors),desc="blinking secc"):
+                if i % (25*period) == 0:
+                    blink_dur_frames = random.randint(8, 12)
+                    for offset in range(blink_dur_frames):
+                        j = offset + i
+                        if j >= len(drv_secc_colors)-1: break
+                        def blink_percent_fn(t, T):
+                            return -4/T**2 * t**2 + 4/T * t
+                        blink_percent = blink_percent_fn(offset, blink_dur_frames)
+                        secc = batch['drv_secc'][j]
+                        out_secc = blink_eye_for_secc(secc, blink_percent)
+                        out_secc = out_secc.cuda()
+                        batch['drv_secc'][j] = out_secc
+
+        # get the drv_kp for torso model, using the transformed trajectory
+        drv_kp = self.face3d_helper.reconstruct_lm2d(id, exp, euler, trans) # [T, 68, 2]
+
+        drv_kp = (drv_kp-0.5) / 0.5 # rescale to -1~1
+        batch['drv_kp'] = torch.clamp(drv_kp, -1, 1)
+        return batch
+
+    @torch.no_grad()
+    def forward_secc2video(self, batch, inp=None):
+        num_frames = len(batch['drv_secc'])
+        camera = batch['camera']
+        src_kps = batch['src_kp']
+        drv_kps = batch['drv_kp']
+        cano_secc_color = batch['cano_secc']
+        src_secc_color = batch['src_secc']
+        drv_secc_colors = batch['drv_secc']
+        ref_img_gt = batch['ref_gt_img']
+        ref_img_head = batch['ref_head_img']
+        ref_torso_img = batch['ref_torso_img']
+        bg_img = batch['bg_img']
+        segmap = batch['segmap']
+        
+        # smooth torso drv_kp
+        torso_smo_ksize = 7
+        drv_kps = smooth_features_xd(drv_kps.reshape([-1, 68*2]), kernel_size=torso_smo_ksize).reshape([-1, 68, 2])
+
+        # forward renderer
+        img_raw_lst = []
+        img_lst = []
+        depth_img_lst = []
+        with torch.no_grad():
+            for i in tqdm.trange(num_frames, desc="Real3D-Portrait is rendering frames"):
+                kp_src = torch.cat([src_kps[i:i+1].reshape([1, 68, 2]), torch.zeros([1, 68,1]).to(src_kps.device)],dim=-1)
+                kp_drv = torch.cat([drv_kps[i:i+1].reshape([1, 68, 2]), torch.zeros([1, 68,1]).to(drv_kps.device)],dim=-1)
+                cond={'cond_cano': cano_secc_color,'cond_src': src_secc_color, 'cond_tgt': drv_secc_colors[i:i+1].cuda(),
+                        'ref_torso_img': ref_torso_img, 'bg_img': bg_img, 'segmap': segmap,
+                        'kp_s': kp_src, 'kp_d': kp_drv}
+                if i == 0:
+                    gen_output = self.secc2video_model.forward(img=ref_img_head, camera=camera[i:i+1], cond=cond, ret={}, cache_backbone=True, use_cached_backbone=False)
+                else:
+                    gen_output = self.secc2video_model.forward(img=ref_img_head, camera=camera[i:i+1], cond=cond, ret={}, cache_backbone=False, use_cached_backbone=True)
+                img_lst.append(gen_output['image'])
+                img_raw_lst.append(gen_output['image_raw'])
+                depth_img_lst.append(gen_output['image_depth'])
+
+        # save demo video
+        depth_imgs = torch.cat(depth_img_lst)
+        imgs = torch.cat(img_lst)
+        imgs_raw = torch.cat(img_raw_lst)
+        secc_img = torch.cat([torch.nn.functional.interpolate(drv_secc_colors[i:i+1], (512,512)) for i in range(num_frames)])
+        
+        if inp['out_mode'] == 'concat_debug':
+            secc_img = secc_img.cpu()
+            secc_img = ((secc_img + 1) * 127.5).permute(0, 2, 3, 1).int().numpy()
+
+            depth_img = F.interpolate(depth_imgs, (512,512)).cpu()
+            depth_img = depth_img.repeat([1,3,1,1])
+            depth_img = (depth_img - depth_img.min()) / (depth_img.max() - depth_img.min())
+            depth_img = depth_img * 2 - 1
+            depth_img = depth_img.clamp(-1,1)
+
+            secc_img = secc_img / 127.5 - 1
+            secc_img = torch.from_numpy(secc_img).permute(0, 3, 1, 2)
+            imgs = torch.cat([ref_img_gt.repeat([imgs.shape[0],1,1,1]).cpu(), secc_img, F.interpolate(imgs_raw, (512,512)).cpu(), depth_img, imgs.cpu()], dim=-1)
+        elif inp['out_mode'] == 'final':
+            imgs = imgs.cpu()
+        elif inp['out_mode'] == 'debug':
+            raise NotImplementedError("to do: save separate videos")
+        imgs = imgs.clamp(-1,1)
+
+        import imageio
+        debug_name = 'demo.mp4'
+        out_imgs = ((imgs.permute(0, 2, 3, 1) + 1)/2 * 255).int().cpu().numpy().astype(np.uint8)
+        writer = imageio.get_writer(debug_name, fps=25, format='FFMPEG', codec='h264')
+        
+        for i in tqdm.trange(len(out_imgs), desc="Imageio is saving video"):
+            writer.append_data(out_imgs[i])
+        writer.close()
+        
+        out_fname = 'infer_out/tmp/' + os.path.basename(inp['src_image_name'])[:-4] + '_' + os.path.basename(inp['drv_pose_name'])[:-4] + '.mp4' if inp['out_name'] == '' else inp['out_name']
+        try:
+            os.makedirs(os.path.dirname(out_fname), exist_ok=True)
+        except: pass
+        if inp['drv_audio_name'][-4:] in ['.wav', '.mp3']:
+            os.system(f"ffmpeg -i {debug_name} -i {self.wav16k_name} -y -v quiet -shortest {out_fname}")
+            os.system(f"rm {debug_name}")
+            os.system(f"rm {self.wav16k_name}")
+        else:
+            ret = os.system(f"ffmpeg -i {debug_name} -i {inp['drv_audio_name']} -map 0:v -map 1:a -y -v quiet -shortest {out_fname}")
+            if ret != 0: # 没有成功从drv_audio_name里面提取到音频, 则直接输出无音频轨道的纯视频
+                os.system(f"mv {debug_name} {out_fname}")
+        print(f"Saved at {out_fname}")
+        return out_fname
+        
+    @torch.no_grad()
+    def forward_system(self, batch, inp):
+        self.forward_audio2secc(batch, inp)
+        out_fname = self.forward_secc2video(batch, inp)
+        return out_fname
+
+    @classmethod
+    def example_run(cls, inp=None):
+        inp_tmp = {
+            'drv_audio_name': 'data/raw/val_wavs/zozo.wav',
+            'src_image_name': 'data/raw/val_imgs/Macron.png'
+            }
+        if inp is not None:
+            inp_tmp.update(inp)
+        inp = inp_tmp
+
+        infer_instance = cls(inp['a2m_ckpt'], inp['head_ckpt'], inp['torso_ckpt'], inp=inp)
+        infer_instance.infer_once(inp)
+
+    ##############
+    # IO-related
+    ##############
+    def save_wav16k(self, audio_name):
+        supported_types = ('.wav', '.mp3', '.mp4', '.avi')
+        assert audio_name.endswith(supported_types), f"Now we only support {','.join(supported_types)} as audio source!"
+        wav16k_name = audio_name[:-4] + '_16k.wav'
+        self.wav16k_name = wav16k_name
+        extract_wav_cmd = f"ffmpeg -i {audio_name} -f wav -ar 16000 -v quiet -y {wav16k_name} -y"
+        os.system(extract_wav_cmd)
+        print(f"Extracted wav file (16khz) from {audio_name} to {wav16k_name}.")
+
+    def get_f0(self, wav16k_name):
+        from data_gen.utils.process_audio.extract_mel_f0 import extract_mel_from_fname, extract_f0_from_wav_and_mel
+        wav, mel = extract_mel_from_fname(self.wav16k_name)
+        f0, f0_coarse = extract_f0_from_wav_and_mel(wav, mel)
+        f0 = f0.reshape([-1,1])
+        return f0
+
+if __name__ == '__main__':
+    import argparse, glob, tqdm
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--a2m_ckpt", default='checkpoints/240126_real3dportrait_orig/audio2secc_vae', type=str) 
+    parser.add_argument("--head_ckpt", default='', type=str)
+    parser.add_argument("--torso_ckpt", default='checkpoints/240126_real3dportrait_orig/secc2plane_torso_orig', type=str) 
+    parser.add_argument("--src_img", default='', type=str) # data/raw/examples/Macron.png
+    parser.add_argument("--bg_img", default='', type=str) # data/raw/examples/bg.png
+    parser.add_argument("--drv_aud", default='', type=str) # data/raw/examples/Obama_5s.wav
+    parser.add_argument("--drv_pose", default='static', type=str) # data/raw/examples/May_5s.mp4
+    parser.add_argument("--blink_mode", default='none', type=str) # none | period
+    parser.add_argument("--temperature", default=0.2, type=float) # sampling temperature in audio2motion, higher -> more diverse, less accurate
+    parser.add_argument("--mouth_amp", default=0.45, type=float) # scale of predicted mouth, enabled in audio-driven
+    parser.add_argument("--head_torso_threshold", default=0.9, type=float, help="0.1~1.0, turn up this value if the hair is translucent")
+    parser.add_argument("--out_name", default='') # output filename
+    parser.add_argument("--out_mode", default='final') # final: only output talking head video; concat_debug: talking head with internel features  
+    parser.add_argument("--map_to_init_pose", default='True') # whether to map the pose of first frame to source image
+    parser.add_argument("--seed", default=None, type=int) # random seed, default None to use time.time()
+
+    args = parser.parse_args()
+
+    inp = {
+            'a2m_ckpt': args.a2m_ckpt,
+            'head_ckpt': args.head_ckpt,
+            'torso_ckpt': args.torso_ckpt,
+            'src_image_name': args.src_img,
+            'bg_image_name': args.bg_img,
+            'drv_audio_name': args.drv_aud,
+            'drv_pose_name': args.drv_pose,
+            'blink_mode': args.blink_mode,
+            'temperature': args.temperature,
+            'mouth_amp': args.mouth_amp,
+            'out_name': args.out_name,
+            'out_mode': args.out_mode,
+            'map_to_init_pose': args.map_to_init_pose,
+            'head_torso_threshold': args.head_torso_threshold,
+            'seed': args.seed,
+            }
+
+    GeneFace2Infer.example_run(inp)
\ No newline at end of file
diff --git a/insta.sh b/insta.sh
new file mode 100644
index 0000000000000000000000000000000000000000..d8a4f7f20406600990707675c5faa4c51f31c35f
--- /dev/null
+++ b/insta.sh
@@ -0,0 +1,18 @@
+
+#conda create -n real3dportrait python=3.9
+#conda activate real3dportrait
+conda install conda-forge::ffmpeg # ffmpeg with libx264 codec to turn images to video
+
+### We recommend torch2.0.1+cuda11.7. 
+conda install pytorch==2.0.1 torchvision==0.15.2 torchaudio==2.0.2 pytorch-cuda=11.7 -c pytorch -c nvidia
+
+# Build from source, it may take a long time (Proxy is recommended if encountering the time-out problem)
+pip install "git+https://github.com/facebookresearch/pytorch3d.git@stable"
+
+# MMCV for some network structure
+pip install cython
+pip install openmim==0.3.9
+mim install mmcv==2.1.0 # use mim to speed up installation for mmcv
+
+# other dependencies
+pip install -r docs/prepare_env/requirements.txt -v
diff --git a/modules/audio2motion/cnn_models.py b/modules/audio2motion/cnn_models.py
new file mode 100644
index 0000000000000000000000000000000000000000..b58e8c472349f59ab1f733a384906644a0b796c2
--- /dev/null
+++ b/modules/audio2motion/cnn_models.py
@@ -0,0 +1,359 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+def init_weights_func(m):
+    classname = m.__class__.__name__
+    if classname.find("Conv1d") != -1:
+        torch.nn.init.xavier_uniform_(m.weight)
+
+
+class LambdaLayer(nn.Module):
+    def __init__(self, lambd):
+        super(LambdaLayer, self).__init__()
+        self.lambd = lambd
+
+    def forward(self, x):
+        return self.lambd(x)
+
+
+class LayerNorm(torch.nn.LayerNorm):
+    """Layer normalization module.
+    :param int nout: output dim size
+    :param int dim: dimension to be normalized
+    """
+
+    def __init__(self, nout, dim=-1, eps=1e-5):
+        """Construct an LayerNorm object."""
+        super(LayerNorm, self).__init__(nout, eps=eps)
+        self.dim = dim
+
+    def forward(self, x):
+        """Apply layer normalization.
+        :param torch.Tensor x: input tensor
+        :return: layer normalized tensor
+        :rtype torch.Tensor
+        """
+        if self.dim == -1:
+            return super(LayerNorm, self).forward(x)
+        return super(LayerNorm, self).forward(x.transpose(1, -1)).transpose(1, -1)
+
+
+
+class ResidualBlock(nn.Module):
+    """Implements conv->PReLU->norm n-times"""
+
+    def __init__(self, channels, kernel_size, dilation, n=2, norm_type='bn', dropout=0.0,
+                 c_multiple=2, ln_eps=1e-12, bias=False):
+        super(ResidualBlock, self).__init__()
+
+        if norm_type == 'bn':
+            norm_builder = lambda: nn.BatchNorm1d(channels)
+        elif norm_type == 'in':
+            norm_builder = lambda: nn.InstanceNorm1d(channels, affine=True)
+        elif norm_type == 'gn':
+            norm_builder = lambda: nn.GroupNorm(8, channels)
+        elif norm_type == 'ln':
+            norm_builder = lambda: LayerNorm(channels, dim=1, eps=ln_eps)
+        else:
+            norm_builder = lambda: nn.Identity()
+
+        self.blocks = [
+            nn.Sequential(
+                norm_builder(),
+                nn.Conv1d(channels, c_multiple * channels, kernel_size, dilation=dilation,
+                          padding=(dilation * (kernel_size - 1)) // 2, bias=bias),
+                LambdaLayer(lambda x: x * kernel_size ** -0.5),
+                nn.GELU(),
+                nn.Conv1d(c_multiple * channels, channels, 1, dilation=dilation, bias=bias),
+            )
+            for _ in range(n)
+        ]
+
+        self.blocks = nn.ModuleList(self.blocks)
+        self.dropout = dropout
+
+    def forward(self, x):
+        nonpadding = (x.abs().sum(1) > 0).float()[:, None, :]
+        for b in self.blocks:
+            x_ = b(x)
+            if self.dropout > 0 and self.training:
+                x_ = F.dropout(x_, self.dropout, training=self.training)
+            x = x + x_
+            x = x * nonpadding
+        return x
+
+
+class ConvBlocks(nn.Module):
+    """Decodes the expanded phoneme encoding into spectrograms"""
+
+    def __init__(self, channels, out_dims, dilations, kernel_size,
+                 norm_type='ln', layers_in_block=2, c_multiple=2,
+                 dropout=0.0, ln_eps=1e-5, init_weights=True, is_BTC=True, bias=False):
+        super(ConvBlocks, self).__init__()
+        self.is_BTC = is_BTC
+        self.res_blocks = nn.Sequential(
+            *[ResidualBlock(channels, kernel_size, d,
+                            n=layers_in_block, norm_type=norm_type, c_multiple=c_multiple,
+                            dropout=dropout, ln_eps=ln_eps, bias=bias)
+              for d in dilations],
+        )
+        if norm_type == 'bn':
+            norm = nn.BatchNorm1d(channels)
+        elif norm_type == 'in':
+            norm = nn.InstanceNorm1d(channels, affine=True)
+        elif norm_type == 'gn':
+            norm = nn.GroupNorm(8, channels)
+        elif norm_type == 'ln':
+            norm = LayerNorm(channels, dim=1, eps=ln_eps)
+        self.last_norm = norm
+        self.post_net1 = nn.Conv1d(channels, out_dims, kernel_size=3, padding=1, bias=bias)
+        if init_weights:
+            self.apply(init_weights_func)
+
+    def forward(self, x):
+        """
+
+        :param x: [B, T, H]
+        :return:  [B, T, H]
+        """
+        if self.is_BTC:
+            x = x.transpose(1, 2) # [B, C, T]
+        nonpadding = (x.abs().sum(1) > 0).float()[:, None, :]
+        x = self.res_blocks(x) * nonpadding
+        x = self.last_norm(x) * nonpadding
+        x = self.post_net1(x) * nonpadding
+        if self.is_BTC:
+            x = x.transpose(1, 2)
+        return x
+
+
+class SeqLevelConvolutionalModel(nn.Module):
+    def __init__(self, out_dim=64, dropout=0.5, audio_feat_type='ppg', backbone_type='unet', norm_type='bn'):
+        nn.Module.__init__(self)
+        self.audio_feat_type = audio_feat_type
+        if audio_feat_type == 'ppg':
+            self.audio_encoder = nn.Sequential(*[
+                nn.Conv1d(29, 48, 3, 1, 1, bias=False),
+                nn.BatchNorm1d(48) if norm_type=='bn' else LayerNorm(48, dim=1),
+                nn.GELU(),
+                nn.Conv1d(48, 48, 3, 1, 1, bias=False)
+            ])  
+            self.energy_encoder = nn.Sequential(*[
+                nn.Conv1d(1, 16, 3, 1, 1, bias=False),
+                nn.BatchNorm1d(16) if norm_type=='bn' else LayerNorm(16, dim=1),
+                nn.GELU(),
+                nn.Conv1d(16, 16, 3, 1, 1, bias=False)
+            ]) 
+        elif audio_feat_type == 'mel':
+            self.mel_encoder = nn.Sequential(*[
+                nn.Conv1d(80, 64, 3, 1, 1, bias=False),
+                nn.BatchNorm1d(64) if norm_type=='bn' else LayerNorm(64, dim=1),
+                nn.GELU(),
+                nn.Conv1d(64, 64, 3, 1, 1, bias=False)
+            ])  
+        else:
+            raise NotImplementedError("now only ppg or mel are supported!")
+
+        self.style_encoder = nn.Sequential(*[
+            nn.Linear(135, 256),
+            nn.GELU(),
+            nn.Linear(256, 256)
+        ]) 
+
+        if backbone_type == 'resnet':
+            self.backbone = ResNetBackbone()
+        elif backbone_type == 'unet':
+            self.backbone = UNetBackbone()
+        elif backbone_type == 'resblocks':
+            self.backbone = ResBlocksBackbone()
+        else:
+            raise NotImplementedError("Now only resnet and unet are supported!")
+
+        self.out_layer = nn.Sequential(
+            nn.BatchNorm1d(512) if norm_type=='bn' else LayerNorm(512, dim=1),
+            nn.Conv1d(512, 64, 3, 1, 1, bias=False),
+            nn.PReLU(),
+            nn.Conv1d(64, out_dim, 3, 1, 1, bias=False)
+        )
+        self.feat_dropout = nn.Dropout(p=dropout)   
+
+    @property
+    def device(self):
+        return self.backbone.parameters().__next__().device
+
+    def forward(self, batch, ret, log_dict=None):
+        style, x_mask = batch['style'].to(self.device), batch['x_mask'].to(self.device)
+        style_feat = self.style_encoder(style)  # [B,C=135] => [B,C=128]
+
+        if self.audio_feat_type == 'ppg':
+            audio, energy = batch['audio'].to(self.device), batch['energy'].to(self.device)
+            audio_feat = self.audio_encoder(audio.transpose(1,2)).transpose(1,2) * x_mask.unsqueeze(2)  # [B,T,C=29] => [B,T,C=48] 
+            energy_feat = self.energy_encoder(energy.transpose(1,2)).transpose(1,2) * x_mask.unsqueeze(2)  # [B,T,C=1] => [B,T,C=16]
+            feat = torch.cat([audio_feat, energy_feat], dim=2) # [B,T,C=48+16]
+        elif self.audio_feat_type == 'mel':
+            mel = batch['mel'].to(self.device)
+            feat = self.mel_encoder(mel.transpose(1,2)).transpose(1,2) * x_mask.unsqueeze(2) # [B,T,C=64]
+        
+        feat, x_mask = self.backbone(x=feat, sty=style_feat, x_mask=x_mask)
+        
+        out = self.out_layer(feat.transpose(1,2)).transpose(1,2) * x_mask.unsqueeze(2)  # [B,T//2,C=256] => [B,T//2,C=64]
+        
+        ret['pred'] = out
+        ret['mask'] = x_mask
+        return out
+
+
+class ResBlocksBackbone(nn.Module):
+    def __init__(self, in_dim=64, out_dim=512, p_dropout=0.5, norm_type='bn'):
+        super(ResBlocksBackbone,self).__init__()
+        self.resblocks_0 = ConvBlocks(channels=in_dim, out_dims=64, dilations=[1]*3, kernel_size=3, norm_type=norm_type, is_BTC=False)
+        self.resblocks_1 = ConvBlocks(channels=64, out_dims=128, dilations=[1]*4, kernel_size=3, norm_type=norm_type, is_BTC=False)
+        self.resblocks_2 = ConvBlocks(channels=128, out_dims=256, dilations=[1]*14, kernel_size=3, norm_type=norm_type, is_BTC=False)
+        self.resblocks_3 = ConvBlocks(channels=512, out_dims=512, dilations=[1]*3, kernel_size=3, norm_type=norm_type, is_BTC=False)
+        self.resblocks_4 = ConvBlocks(channels=512, out_dims=out_dim, dilations=[1]*3, kernel_size=3, norm_type=norm_type, is_BTC=False)
+ 
+        self.downsampler = LambdaLayer(lambda x: F.interpolate(x, scale_factor=0.5, mode='linear'))
+        self.upsampler = LambdaLayer(lambda x: F.interpolate(x, scale_factor=4, mode='linear'))
+
+        self.dropout = nn.Dropout(p=p_dropout)
+
+    def forward(self, x, sty, x_mask=1.):
+        """
+        x: [B, T, C]
+        sty: [B, C=256]
+        x_mask: [B, T]
+        ret: [B, T/2, C]
+        """
+        x = x.transpose(1, 2)  # [B, C, T]
+        x_mask = x_mask[:, None, :] # [B, 1, T]
+
+        x = self.resblocks_0(x) * x_mask # [B, C, T]
+
+        x_mask = self.downsampler(x_mask) # [B, 1, T/2]
+        x = self.downsampler(x) * x_mask # [B, C, T/2]
+        x = self.resblocks_1(x) * x_mask # [B, C, T/2]
+        x = self.resblocks_2(x) * x_mask # [B, C, T/2]
+
+        x = self.dropout(x.transpose(1,2)).transpose(1,2)
+        sty = sty[:, :, None].repeat([1,1,x_mask.shape[2]]) # [B,C=256,T/2]
+        x = torch.cat([x, sty], dim=1) # [B, C=256+256, T/2]
+
+        x = self.resblocks_3(x) * x_mask # [B, C, T/2]
+        x = self.resblocks_4(x) * x_mask # [B, C, T/2]
+
+        x = x.transpose(1,2)
+        x_mask = x_mask.squeeze(1)
+        return x, x_mask
+
+
+
+class ResNetBackbone(nn.Module):
+    def __init__(self, in_dim=64, out_dim=512, p_dropout=0.5, norm_type='bn'):
+        super(ResNetBackbone,self).__init__()
+        self.resblocks_0 = ConvBlocks(channels=in_dim, out_dims=64, dilations=[1]*3, kernel_size=3, norm_type=norm_type, is_BTC=False)
+        self.resblocks_1 = ConvBlocks(channels=64, out_dims=128, dilations=[1]*4, kernel_size=3, norm_type=norm_type, is_BTC=False)
+        self.resblocks_2 = ConvBlocks(channels=128, out_dims=256, dilations=[1]*14, kernel_size=3, norm_type=norm_type, is_BTC=False)
+        self.resblocks_3 = ConvBlocks(channels=512, out_dims=512, dilations=[1]*3, kernel_size=3, norm_type=norm_type, is_BTC=False)
+        self.resblocks_4 = ConvBlocks(channels=512, out_dims=out_dim, dilations=[1]*3, kernel_size=3, norm_type=norm_type, is_BTC=False)
+ 
+        self.downsampler = LambdaLayer(lambda x: F.interpolate(x, scale_factor=0.5, mode='linear'))
+        self.upsampler = LambdaLayer(lambda x: F.interpolate(x, scale_factor=4, mode='linear'))
+
+        self.dropout = nn.Dropout(p=p_dropout)
+
+    def forward(self, x, sty, x_mask=1.):
+        """
+        x: [B, T, C]
+        sty: [B, C=256]
+        x_mask: [B, T]
+        ret: [B, T/2, C]
+        """
+        x = x.transpose(1, 2)  # [B, C, T]
+        x_mask = x_mask[:, None, :] # [B, 1, T]
+
+        x = self.resblocks_0(x) * x_mask # [B, C, T]
+
+        x_mask = self.downsampler(x_mask) # [B, 1, T/2]
+        x = self.downsampler(x) * x_mask # [B, C, T/2]
+        x = self.resblocks_1(x) * x_mask # [B, C, T/2]
+
+        x_mask = self.downsampler(x_mask) # [B, 1, T/4]
+        x = self.downsampler(x) * x_mask # [B, C, T/4]
+        x = self.resblocks_2(x) * x_mask # [B, C, T/4]
+
+        x_mask = self.downsampler(x_mask) # [B, 1, T/8]
+        x = self.downsampler(x) * x_mask # [B, C, T/8]
+        x = self.dropout(x.transpose(1,2)).transpose(1,2)
+        sty = sty[:, :, None].repeat([1,1,x_mask.shape[2]]) # [B,C=256,T/8]
+        x = torch.cat([x, sty], dim=1) # [B, C=256+256, T/8]
+        x = self.resblocks_3(x) * x_mask # [B, C, T/8]
+
+        x_mask = self.upsampler(x_mask) # [B, 1, T/2]
+        x = self.upsampler(x) * x_mask # [B, C, T/2]
+        x = self.resblocks_4(x) * x_mask # [B, C, T/2]
+        
+        x = x.transpose(1,2)
+        x_mask = x_mask.squeeze(1)
+        return x, x_mask
+
+
+class UNetBackbone(nn.Module):
+    def __init__(self, in_dim=64, out_dim=512, p_dropout=0.5, norm_type='bn'):
+        super(UNetBackbone, self).__init__()
+        self.resblocks_0 = ConvBlocks(channels=in_dim, out_dims=64, dilations=[1]*3, kernel_size=3, norm_type=norm_type, is_BTC=False)
+        self.resblocks_1 = ConvBlocks(channels=64, out_dims=128, dilations=[1]*4, kernel_size=3, norm_type=norm_type, is_BTC=False)
+        self.resblocks_2 = ConvBlocks(channels=128, out_dims=256, dilations=[1]*8, kernel_size=3, norm_type=norm_type, is_BTC=False)
+        self.resblocks_3 = ConvBlocks(channels=512, out_dims=512, dilations=[1]*3, kernel_size=3, norm_type=norm_type, is_BTC=False)
+        self.resblocks_4 = ConvBlocks(channels=768, out_dims=512, dilations=[1]*3, kernel_size=3, norm_type=norm_type, is_BTC=False) # [768 = c3(512) + c2(256)]
+        self.resblocks_5 = ConvBlocks(channels=640, out_dims=out_dim, dilations=[1]*3, kernel_size=3, norm_type=norm_type, is_BTC=False) # [640 = c4(512) + c1(128)]
+
+        self.downsampler = nn.Upsample(scale_factor=0.5, mode='linear')
+        self.upsampler = nn.Upsample(scale_factor=2, mode='linear')
+        self.dropout = nn.Dropout(p=p_dropout)
+
+    def forward(self, x, sty, x_mask=1.):
+        """
+        x: [B, T, C]
+        sty: [B, C=256]
+        x_mask: [B, T]
+        ret: [B, T/2, C]
+        """
+        x = x.transpose(1, 2)  # [B, C, T]
+        x_mask = x_mask[:, None, :] # [B, 1, T]
+
+        x0 = self.resblocks_0(x) * x_mask # [B, C, T]
+
+        x_mask = self.downsampler(x_mask) # [B, 1, T/2]
+        x = self.downsampler(x0) * x_mask # [B, C, T/2]
+        x1 = self.resblocks_1(x) * x_mask # [B, C, T/2]
+
+        x_mask = self.downsampler(x_mask) # [B, 1, T/4]
+        x = self.downsampler(x1) * x_mask # [B, C, T/4]
+        x2 = self.resblocks_2(x) * x_mask # [B, C, T/4]
+
+        x_mask = self.downsampler(x_mask) # [B, 1, T/8]
+        x = self.downsampler(x2) * x_mask # [B, C, T/8]
+        x = self.dropout(x.transpose(1,2)).transpose(1,2)
+        sty = sty[:, :, None].repeat([1,1,x_mask.shape[2]]) # [B,C=256,T/8]
+        x = torch.cat([x, sty], dim=1) # [B, C=256+256, T/8]
+        x3 = self.resblocks_3(x) * x_mask # [B, C, T/8]
+
+        x_mask = self.upsampler(x_mask) # [B, 1, T/4]
+        x = self.upsampler(x3) * x_mask # [B, C, T/4]
+        x = torch.cat([x, self.dropout(x2.transpose(1,2)).transpose(1,2)], dim=1) # 
+        x4 = self.resblocks_4(x) * x_mask # [B, C, T/4]
+
+        x_mask = self.upsampler(x_mask) # [B, 1, T/2]
+        x = self.upsampler(x4) * x_mask # [B, C, T/2]
+        x = torch.cat([x, self.dropout(x1.transpose(1,2)).transpose(1,2)], dim=1)
+        x5 = self.resblocks_5(x) * x_mask # [B, C, T/2]
+
+        x = x5.transpose(1,2)
+        x_mask = x_mask.squeeze(1)
+        return x, x_mask
+
+
+if __name__ == '__main__':
+    pass
diff --git a/modules/audio2motion/flow_base.py b/modules/audio2motion/flow_base.py
new file mode 100644
index 0000000000000000000000000000000000000000..d2ff1c626cc3e4aef72406e16971db7331aa5c85
--- /dev/null
+++ b/modules/audio2motion/flow_base.py
@@ -0,0 +1,838 @@
+import scipy
+from scipy import linalg
+from torch.nn import functional as F
+import torch
+from torch import nn
+import numpy as np
+
+import modules.audio2motion.utils as utils
+from modules.audio2motion.transformer_models import FFTBlocks
+from utils.commons.hparams import hparams
+
+
+def fused_add_tanh_sigmoid_multiply(input_a, input_b, n_channels):
+    n_channels_int = n_channels[0]
+    in_act = input_a + input_b
+    t_act = torch.tanh(in_act[:, :n_channels_int, :])
+    s_act = torch.sigmoid(in_act[:, n_channels_int:, :])
+    acts = t_act * s_act
+    return acts
+
+class WN(torch.nn.Module):
+    def __init__(self, hidden_channels, kernel_size, dilation_rate, n_layers, gin_channels=0,
+                 p_dropout=0, share_cond_layers=False):
+        super(WN, self).__init__()
+        assert (kernel_size % 2 == 1)
+        assert (hidden_channels % 2 == 0)
+        self.hidden_channels = hidden_channels
+        self.kernel_size = kernel_size
+        self.dilation_rate = dilation_rate
+        self.n_layers = n_layers
+        self.gin_channels = gin_channels
+        self.p_dropout = p_dropout
+        self.share_cond_layers = share_cond_layers
+
+        self.in_layers = torch.nn.ModuleList()
+        self.res_skip_layers = torch.nn.ModuleList()
+        
+        self.drop = nn.Dropout(p_dropout)
+
+        self.use_adapters = hparams.get("use_adapters", False)
+        if self.use_adapters:
+            self.adapter_layers = torch.nn.ModuleList()
+
+        if gin_channels != 0 and not share_cond_layers:
+            cond_layer = torch.nn.Conv1d(gin_channels, 2 * hidden_channels * n_layers, 1)
+            self.cond_layer = torch.nn.utils.weight_norm(cond_layer, name='weight')
+
+        for i in range(n_layers):
+            dilation = dilation_rate ** i
+            padding = int((kernel_size * dilation - dilation) / 2)
+            in_layer = torch.nn.Conv1d(hidden_channels, 2 * hidden_channels, kernel_size,
+                                       dilation=dilation, padding=padding)
+            in_layer = torch.nn.utils.weight_norm(in_layer, name='weight')
+            self.in_layers.append(in_layer)
+
+            # last one is not necessary
+            if i < n_layers - 1:
+                res_skip_channels = 2 * hidden_channels
+            else:
+                res_skip_channels = hidden_channels
+
+            res_skip_layer = torch.nn.Conv1d(hidden_channels, res_skip_channels, 1)
+            res_skip_layer = torch.nn.utils.weight_norm(res_skip_layer, name='weight')
+            self.res_skip_layers.append(res_skip_layer)
+
+            if self.use_adapters:
+                adapter_layer = MlpAdapter(in_out_dim=res_skip_channels, hid_dim=res_skip_channels//4)
+                self.adapter_layers.append(adapter_layer)
+
+    def forward(self, x, x_mask=None, g=None, **kwargs):
+        output = torch.zeros_like(x)
+        n_channels_tensor = torch.IntTensor([self.hidden_channels])
+
+        if g is not None and not self.share_cond_layers:
+            g = self.cond_layer(g)
+
+        for i in range(self.n_layers):
+            x_in = self.in_layers[i](x)
+            x_in = self.drop(x_in)
+            if g is not None:
+                cond_offset = i * 2 * self.hidden_channels
+                g_l = g[:, cond_offset:cond_offset + 2 * self.hidden_channels, :]
+            else:
+                g_l = torch.zeros_like(x_in)
+
+            acts = fused_add_tanh_sigmoid_multiply(x_in, g_l, n_channels_tensor)
+
+            res_skip_acts = self.res_skip_layers[i](acts)
+            if self.use_adapters:
+                res_skip_acts = self.adapter_layers[i](res_skip_acts.transpose(1,2)).transpose(1,2)
+            if i < self.n_layers - 1:
+                x = (x + res_skip_acts[:, :self.hidden_channels, :]) * x_mask
+                output = output + res_skip_acts[:, self.hidden_channels:, :]
+            else:
+                output = output + res_skip_acts
+        return output * x_mask
+
+    def remove_weight_norm(self):
+        def remove_weight_norm(m):
+            try:
+                nn.utils.remove_weight_norm(m)
+            except ValueError:  # this module didn't have weight norm
+                return
+
+        self.apply(remove_weight_norm)
+    
+    def enable_adapters(self):
+        if not self.use_adapters:
+            return
+        for adapter_layer in self.adapter_layers:
+            adapter_layer.enable()
+
+    def disable_adapters(self):
+        if not self.use_adapters:
+            return
+        for adapter_layer in self.adapter_layers:
+            adapter_layer.disable()
+
+class Permute(nn.Module):
+    def __init__(self, *args):
+        super(Permute, self).__init__()
+        self.args = args
+
+    def forward(self, x):
+        return x.permute(self.args)
+
+
+class LayerNorm(nn.Module):
+    def __init__(self, channels, eps=1e-4):
+        super().__init__()
+        self.channels = channels
+        self.eps = eps
+
+        self.gamma = nn.Parameter(torch.ones(channels))
+        self.beta = nn.Parameter(torch.zeros(channels))
+
+    def forward(self, x):
+        n_dims = len(x.shape)
+        mean = torch.mean(x, 1, keepdim=True)
+        variance = torch.mean((x - mean) ** 2, 1, keepdim=True)
+
+        x = (x - mean) * torch.rsqrt(variance + self.eps)
+
+        shape = [1, -1] + [1] * (n_dims - 2)
+        x = x * self.gamma.view(*shape) + self.beta.view(*shape)
+        return x
+
+
+class ConvReluNorm(nn.Module):
+    def __init__(self, in_channels, hidden_channels, out_channels, kernel_size, n_layers, p_dropout):
+        super().__init__()
+        self.in_channels = in_channels
+        self.hidden_channels = hidden_channels
+        self.out_channels = out_channels
+        self.kernel_size = kernel_size
+        self.n_layers = n_layers
+        self.p_dropout = p_dropout
+        assert n_layers > 1, "Number of layers should be larger than 0."
+
+        self.conv_layers = nn.ModuleList()
+        self.norm_layers = nn.ModuleList()
+        self.conv_layers.append(nn.Conv1d(in_channels, hidden_channels, kernel_size, padding=kernel_size // 2))
+        self.norm_layers.append(LayerNorm(hidden_channels))
+        self.relu_drop = nn.Sequential(
+            nn.ReLU(),
+            nn.Dropout(p_dropout))
+        for _ in range(n_layers - 1):
+            self.conv_layers.append(nn.Conv1d(hidden_channels, hidden_channels, kernel_size, padding=kernel_size // 2))
+            self.norm_layers.append(LayerNorm(hidden_channels))
+        self.proj = nn.Conv1d(hidden_channels, out_channels, 1)
+        self.proj.weight.data.zero_()
+        self.proj.bias.data.zero_()
+
+    def forward(self, x, x_mask):
+        x_org = x
+        for i in range(self.n_layers):
+            x = self.conv_layers[i](x * x_mask)
+            x = self.norm_layers[i](x)
+            x = self.relu_drop(x)
+        x = x_org + self.proj(x)
+        return x * x_mask
+
+
+
+class ActNorm(nn.Module):
+    def __init__(self, channels, ddi=False, **kwargs):
+        super().__init__()
+        self.channels = channels
+        self.initialized = not ddi
+
+        self.logs = nn.Parameter(torch.zeros(1, channels, 1))
+        self.bias = nn.Parameter(torch.zeros(1, channels, 1))
+
+    def forward(self, x, x_mask=None, reverse=False, **kwargs):
+        if x_mask is None:
+            x_mask = torch.ones(x.size(0), 1, x.size(2)).to(device=x.device, dtype=x.dtype)
+        x_len = torch.sum(x_mask, [1, 2])
+        if not self.initialized:
+            self.initialize(x, x_mask)
+            self.initialized = True
+
+        if reverse:
+            z = (x - self.bias) * torch.exp(-self.logs) * x_mask
+            logdet = torch.sum(-self.logs) * x_len
+        else:
+            z = (self.bias + torch.exp(self.logs) * x) * x_mask
+            logdet = torch.sum(self.logs) * x_len  # [b]
+        return z, logdet
+
+    def store_inverse(self):
+        pass
+
+    def set_ddi(self, ddi):
+        self.initialized = not ddi
+
+    def initialize(self, x, x_mask):
+        with torch.no_grad():
+            denom = torch.sum(x_mask, [0, 2])
+            m = torch.sum(x * x_mask, [0, 2]) / denom
+            m_sq = torch.sum(x * x * x_mask, [0, 2]) / denom
+            v = m_sq - (m ** 2)
+            logs = 0.5 * torch.log(torch.clamp_min(v, 1e-6))
+
+            bias_init = (-m * torch.exp(-logs)).view(*self.bias.shape).to(dtype=self.bias.dtype)
+            logs_init = (-logs).view(*self.logs.shape).to(dtype=self.logs.dtype)
+
+            self.bias.data.copy_(bias_init)
+            self.logs.data.copy_(logs_init)
+
+
+class InvConvNear(nn.Module):
+    def __init__(self, channels, n_split=4, no_jacobian=False, lu=True, n_sqz=2, **kwargs):
+        super().__init__()
+        assert (n_split % 2 == 0)
+        self.channels = channels
+        self.n_split = n_split
+        self.n_sqz = n_sqz
+        self.no_jacobian = no_jacobian
+
+        w_init = torch.qr(torch.FloatTensor(self.n_split, self.n_split).normal_())[0]
+        if torch.det(w_init) < 0:
+            w_init[:, 0] = -1 * w_init[:, 0]
+        self.lu = lu
+        if lu:
+            # LU decomposition can slightly speed up the inverse
+            np_p, np_l, np_u = linalg.lu(w_init)
+            np_s = np.diag(np_u)
+            np_sign_s = np.sign(np_s)
+            np_log_s = np.log(np.abs(np_s))
+            np_u = np.triu(np_u, k=1)
+            l_mask = np.tril(np.ones(w_init.shape, dtype=float), -1)
+            eye = np.eye(*w_init.shape, dtype=float)
+
+            self.register_buffer('p', torch.Tensor(np_p.astype(float)))
+            self.register_buffer('sign_s', torch.Tensor(np_sign_s.astype(float)))
+            self.l = nn.Parameter(torch.Tensor(np_l.astype(float)), requires_grad=True)
+            self.log_s = nn.Parameter(torch.Tensor(np_log_s.astype(float)), requires_grad=True)
+            self.u = nn.Parameter(torch.Tensor(np_u.astype(float)), requires_grad=True)
+            self.register_buffer('l_mask', torch.Tensor(l_mask))
+            self.register_buffer('eye', torch.Tensor(eye))
+        else:
+            self.weight = nn.Parameter(w_init)
+
+    def forward(self, x, x_mask=None, reverse=False, **kwargs):
+        b, c, t = x.size()
+        assert (c % self.n_split == 0)
+        if x_mask is None:
+            x_mask = 1
+            x_len = torch.ones((b,), dtype=x.dtype, device=x.device) * t
+        else:
+            x_len = torch.sum(x_mask, [1, 2])
+
+        x = x.view(b, self.n_sqz, c // self.n_split, self.n_split // self.n_sqz, t)
+        x = x.permute(0, 1, 3, 2, 4).contiguous().view(b, self.n_split, c // self.n_split, t)
+
+        if self.lu:
+            self.weight, log_s = self._get_weight()
+            logdet = log_s.sum()
+            logdet = logdet * (c / self.n_split) * x_len
+        else:
+            logdet = torch.logdet(self.weight) * (c / self.n_split) * x_len  # [b]
+
+        if reverse:
+            if hasattr(self, "weight_inv"):
+                weight = self.weight_inv
+            else:
+                weight = torch.inverse(self.weight.float()).to(dtype=self.weight.dtype)
+            logdet = -logdet
+        else:
+            weight = self.weight
+            if self.no_jacobian:
+                logdet = 0
+
+        weight = weight.view(self.n_split, self.n_split, 1, 1)
+        z = F.conv2d(x, weight)
+
+        z = z.view(b, self.n_sqz, self.n_split // self.n_sqz, c // self.n_split, t)
+        z = z.permute(0, 1, 3, 2, 4).contiguous().view(b, c, t) * x_mask
+        return z, logdet
+
+    def _get_weight(self):
+        l, log_s, u = self.l, self.log_s, self.u
+        l = l * self.l_mask + self.eye
+        u = u * self.l_mask.transpose(0, 1).contiguous() + torch.diag(self.sign_s * torch.exp(log_s))
+        weight = torch.matmul(self.p, torch.matmul(l, u))
+        return weight, log_s
+
+    def store_inverse(self):
+        weight, _ = self._get_weight()
+        self.weight_inv = torch.inverse(weight.float()).to(next(self.parameters()).device)
+
+
+class InvConv(nn.Module):
+    def __init__(self, channels, no_jacobian=False, lu=True, **kwargs):
+        super().__init__()
+        w_shape = [channels, channels]
+        w_init = np.linalg.qr(np.random.randn(*w_shape))[0].astype(float)
+        LU_decomposed = lu
+        if not LU_decomposed:
+            # Sample a random orthogonal matrix:
+            self.register_parameter("weight", nn.Parameter(torch.Tensor(w_init)))
+        else:
+            np_p, np_l, np_u = linalg.lu(w_init)
+            np_s = np.diag(np_u)
+            np_sign_s = np.sign(np_s)
+            np_log_s = np.log(np.abs(np_s))
+            np_u = np.triu(np_u, k=1)
+            l_mask = np.tril(np.ones(w_shape, dtype=float), -1)
+            eye = np.eye(*w_shape, dtype=float)
+
+            self.register_buffer('p', torch.Tensor(np_p.astype(float)))
+            self.register_buffer('sign_s', torch.Tensor(np_sign_s.astype(float)))
+            self.l = nn.Parameter(torch.Tensor(np_l.astype(float)))
+            self.log_s = nn.Parameter(torch.Tensor(np_log_s.astype(float)))
+            self.u = nn.Parameter(torch.Tensor(np_u.astype(float)))
+            self.l_mask = torch.Tensor(l_mask)
+            self.eye = torch.Tensor(eye)
+        self.w_shape = w_shape
+        self.LU = LU_decomposed
+        self.weight = None
+
+    def get_weight(self, device, reverse):
+        w_shape = self.w_shape
+        self.p = self.p.to(device)
+        self.sign_s = self.sign_s.to(device)
+        self.l_mask = self.l_mask.to(device)
+        self.eye = self.eye.to(device)
+        l = self.l * self.l_mask + self.eye
+        u = self.u * self.l_mask.transpose(0, 1).contiguous() + torch.diag(self.sign_s * torch.exp(self.log_s))
+        dlogdet = self.log_s.sum()
+        if not reverse:
+            w = torch.matmul(self.p, torch.matmul(l, u))
+        else:
+            l = torch.inverse(l.double()).float()
+            u = torch.inverse(u.double()).float()
+            w = torch.matmul(u, torch.matmul(l, self.p.inverse()))
+        return w.view(w_shape[0], w_shape[1], 1), dlogdet
+
+    def forward(self, x, x_mask=None, reverse=False, **kwargs):
+        """
+        log-det = log|abs(|W|)| * pixels
+        """
+        b, c, t = x.size()
+        if x_mask is None:
+            x_len = torch.ones((b,), dtype=x.dtype, device=x.device) * t
+        else:
+            x_len = torch.sum(x_mask, [1, 2])
+        logdet = 0
+        if not reverse:
+            weight, dlogdet = self.get_weight(x.device, reverse)
+            z = F.conv1d(x, weight)
+            if logdet is not None:
+                logdet = logdet + dlogdet * x_len
+            return z, logdet
+        else:
+            if self.weight is None:
+                weight, dlogdet = self.get_weight(x.device, reverse)
+            else:
+                weight, dlogdet = self.weight, self.dlogdet
+            z = F.conv1d(x, weight)
+            if logdet is not None:
+                logdet = logdet - dlogdet * x_len
+            return z, logdet
+
+    def store_inverse(self):
+        self.weight, self.dlogdet = self.get_weight('cuda', reverse=True)
+
+
+class Flip(nn.Module):
+    def forward(self, x, *args, reverse=False, **kwargs):
+        x = torch.flip(x, [1])
+        logdet = torch.zeros(x.size(0)).to(dtype=x.dtype, device=x.device)
+        return x, logdet
+
+    def store_inverse(self):
+        pass
+
+
+class CouplingBlock(nn.Module):
+    def __init__(self, in_channels, hidden_channels, kernel_size, dilation_rate, n_layers,
+                 gin_channels=0, p_dropout=0, sigmoid_scale=False,
+                 share_cond_layers=False, wn=None):
+        super().__init__()
+        self.in_channels = in_channels
+        self.hidden_channels = hidden_channels
+        self.kernel_size = kernel_size
+        self.dilation_rate = dilation_rate
+        self.n_layers = n_layers
+        self.gin_channels = gin_channels
+        self.p_dropout = p_dropout
+        self.sigmoid_scale = sigmoid_scale
+
+        start = torch.nn.Conv1d(in_channels // 2, hidden_channels, 1)
+        start = torch.nn.utils.weight_norm(start)
+        self.start = start
+        # Initializing last layer to 0 makes the affine coupling layers
+        # do nothing at first.  This helps with training stability
+        end = torch.nn.Conv1d(hidden_channels, in_channels, 1)
+        end.weight.data.zero_()
+        end.bias.data.zero_()
+        self.end = end
+        self.wn = WN(hidden_channels, kernel_size, dilation_rate, n_layers, gin_channels,
+                     p_dropout, share_cond_layers)
+        if wn is not None:
+            self.wn.in_layers = wn.in_layers
+            self.wn.res_skip_layers = wn.res_skip_layers
+
+    def forward(self, x, x_mask=None, reverse=False, g=None, **kwargs):
+        if x_mask is None:
+            x_mask = 1
+        x_0, x_1 = x[:, :self.in_channels // 2], x[:, self.in_channels // 2:]
+
+        x = self.start(x_0) * x_mask
+        x = self.wn(x, x_mask, g)
+        out = self.end(x)
+
+        z_0 = x_0
+        m = out[:, :self.in_channels // 2, :]
+        logs = out[:, self.in_channels // 2:, :]
+        if self.sigmoid_scale:
+            logs = torch.log(1e-6 + torch.sigmoid(logs + 2))
+        if reverse:
+            z_1 = (x_1 - m) * torch.exp(-logs) * x_mask
+            logdet = torch.sum(-logs * x_mask, [1, 2])
+        else:
+            z_1 = (m + torch.exp(logs) * x_1) * x_mask
+            logdet = torch.sum(logs * x_mask, [1, 2])
+        z = torch.cat([z_0, z_1], 1)
+        return z, logdet
+
+    def store_inverse(self):
+        self.wn.remove_weight_norm()
+
+
+class GlowFFTBlocks(FFTBlocks):
+    def __init__(self, hidden_size=128, gin_channels=256, num_layers=2, ffn_kernel_size=5,
+                 dropout=None, num_heads=4, use_pos_embed=True, use_last_norm=True,
+                 norm='ln', use_pos_embed_alpha=True):
+        super().__init__(hidden_size, num_layers, ffn_kernel_size, dropout, num_heads, use_pos_embed,
+                         use_last_norm, norm, use_pos_embed_alpha)
+        self.inp_proj = nn.Conv1d(hidden_size + gin_channels, hidden_size, 1)
+
+    def forward(self, x, x_mask=None, g=None):
+        """
+        :param x: [B, C_x, T]
+        :param x_mask: [B, 1, T]
+        :param g: [B, C_g, T]
+        :return: [B, C_x, T]
+        """
+        if g is not None:
+            x = self.inp_proj(torch.cat([x, g], 1))
+        x = x.transpose(1, 2)
+        x = super(GlowFFTBlocks, self).forward(x, x_mask[:, 0] == 0)
+        x = x.transpose(1, 2)
+        return x
+
+
+class TransformerCouplingBlock(nn.Module):
+    def __init__(self, in_channels, hidden_channels, n_layers,
+                 gin_channels=0, p_dropout=0, sigmoid_scale=False):
+        super().__init__()
+        self.in_channels = in_channels
+        self.hidden_channels = hidden_channels
+        self.n_layers = n_layers
+        self.gin_channels = gin_channels
+        self.p_dropout = p_dropout
+        self.sigmoid_scale = sigmoid_scale
+
+        start = torch.nn.Conv1d(in_channels // 2, hidden_channels, 1)
+        self.start = start
+        # Initializing last layer to 0 makes the affine coupling layers
+        # do nothing at first.  This helps with training stability
+        end = torch.nn.Conv1d(hidden_channels, in_channels, 1)
+        end.weight.data.zero_()
+        end.bias.data.zero_()
+        self.end = end
+        self.fft_blocks = GlowFFTBlocks(
+            hidden_size=hidden_channels,
+            ffn_kernel_size=3,
+            gin_channels=gin_channels,
+            num_layers=n_layers)
+
+    def forward(self, x, x_mask=None, reverse=False, g=None, **kwargs):
+        if x_mask is None:
+            x_mask = 1
+        x_0, x_1 = x[:, :self.in_channels // 2], x[:, self.in_channels // 2:]
+
+        x = self.start(x_0) * x_mask
+        x = self.fft_blocks(x, x_mask, g)
+        out = self.end(x)
+
+        z_0 = x_0
+        m = out[:, :self.in_channels // 2, :]
+        logs = out[:, self.in_channels // 2:, :]
+        if self.sigmoid_scale:
+            logs = torch.log(1e-6 + torch.sigmoid(logs + 2))
+        if reverse:
+            z_1 = (x_1 - m) * torch.exp(-logs) * x_mask
+            logdet = torch.sum(-logs * x_mask, [1, 2])
+        else:
+            z_1 = (m + torch.exp(logs) * x_1) * x_mask
+            logdet = torch.sum(logs * x_mask, [1, 2])
+        z = torch.cat([z_0, z_1], 1)
+        return z, logdet
+
+    def store_inverse(self):
+        pass
+
+
+class FreqFFTCouplingBlock(nn.Module):
+    def __init__(self, in_channels, hidden_channels, n_layers,
+                 gin_channels=0, p_dropout=0, sigmoid_scale=False):
+        super().__init__()
+        self.in_channels = in_channels
+        self.hidden_channels = hidden_channels
+        self.n_layers = n_layers
+        self.gin_channels = gin_channels
+        self.p_dropout = p_dropout
+        self.sigmoid_scale = sigmoid_scale
+
+        hs = hidden_channels
+        stride = 8
+        self.start = torch.nn.Conv2d(3, hs, kernel_size=stride * 2,
+                                     stride=stride, padding=stride // 2)
+        end = nn.ConvTranspose2d(hs, 2, kernel_size=stride, stride=stride)
+        end.weight.data.zero_()
+        end.bias.data.zero_()
+        self.end = nn.Sequential(
+            nn.Conv2d(hs * 3, hs, 3, 1, 1),
+            nn.ReLU(),
+            nn.GroupNorm(4, hs),
+            nn.Conv2d(hs, hs, 3, 1, 1),
+            end
+        )
+        self.fft_v = FFTBlocks(hidden_size=hs, ffn_kernel_size=1, num_layers=n_layers)
+        self.fft_h = nn.Sequential(
+            nn.Conv1d(hs, hs, 3, 1, 1),
+            nn.ReLU(),
+            nn.Conv1d(hs, hs, 3, 1, 1),
+        )
+        self.fft_g = nn.Sequential(
+            nn.Conv1d(
+                gin_channels - 160, hs, kernel_size=stride * 2, stride=stride, padding=stride // 2),
+            Permute(0, 2, 1),
+            FFTBlocks(hidden_size=hs, ffn_kernel_size=1, num_layers=n_layers),
+            Permute(0, 2, 1),
+        )
+
+    def forward(self, x, x_mask=None, reverse=False, g=None, **kwargs):
+        g_, _ = utils.unsqueeze(g)
+        g_mel = g_[:, :80]
+        g_txt = g_[:, 80:]
+        g_mel, _ = utils.squeeze(g_mel)
+        g_txt, _ = utils.squeeze(g_txt)  # [B, C, T]
+
+        if x_mask is None:
+            x_mask = 1
+        x_0, x_1 = x[:, :self.in_channels // 2], x[:, self.in_channels // 2:]
+        x = torch.stack([x_0, g_mel[:, :80], g_mel[:, 80:]], 1)
+        x = self.start(x)  # [B, C, N_bins, T]
+        B, C, N_bins, T = x.shape
+
+        x_v = self.fft_v(x.permute(0, 3, 2, 1).reshape(B * T, N_bins, C))
+        x_v = x_v.reshape(B, T, N_bins, -1).permute(0, 3, 2, 1)
+        # x_v = x
+
+        x_h = self.fft_h(x.permute(0, 2, 1, 3).reshape(B * N_bins, C, T))
+        x_h = x_h.reshape(B, N_bins, -1, T).permute(0, 2, 1, 3)
+        # x_h = x
+
+        x_g = self.fft_g(g_txt)[:, :, None, :].repeat(1, 1, 10, 1)
+        x = torch.cat([x_v, x_h, x_g], 1)
+        out = self.end(x)
+
+        z_0 = x_0
+        m = out[:, 0]
+        logs = out[:, 1]
+        if self.sigmoid_scale:
+            logs = torch.log(1e-6 + torch.sigmoid(logs + 2))
+        if reverse:
+            z_1 = (x_1 - m) * torch.exp(-logs) * x_mask
+            logdet = torch.sum(-logs * x_mask, [1, 2])
+        else:
+            z_1 = (m + torch.exp(logs) * x_1) * x_mask
+            logdet = torch.sum(logs * x_mask, [1, 2])
+        z = torch.cat([z_0, z_1], 1)
+        return z, logdet
+
+    def store_inverse(self):
+        pass
+
+
+
+class ResidualCouplingLayer(nn.Module):
+    def __init__(self,
+                 channels,
+                 hidden_channels,
+                 kernel_size,
+                 dilation_rate,
+                 n_layers,
+                 p_dropout=0,
+                 gin_channels=0,
+                 mean_only=False,
+                 nn_type='wn'):
+        assert channels % 2 == 0, "channels should be divisible by 2"
+        super().__init__()
+        self.channels = channels
+        self.hidden_channels = hidden_channels
+        self.kernel_size = kernel_size
+        self.dilation_rate = dilation_rate
+        self.n_layers = n_layers
+        self.half_channels = channels // 2
+        self.mean_only = mean_only
+
+        self.pre = nn.Conv1d(self.half_channels, hidden_channels, 1)
+        if nn_type == 'wn':
+            self.enc = WN(hidden_channels, kernel_size, dilation_rate, n_layers, p_dropout=p_dropout,
+                          gin_channels=gin_channels)
+        # elif nn_type == 'conv':
+        #     self.enc = ConditionalConvBlocks(
+        #         hidden_channels, gin_channels, hidden_channels, [1] * n_layers, kernel_size,
+        #         layers_in_block=1, is_BTC=False)
+        self.post = nn.Conv1d(hidden_channels, self.half_channels * (2 - mean_only), 1)
+        self.post.weight.data.zero_()
+        self.post.bias.data.zero_()
+
+    def forward(self, x, x_mask, g=None, reverse=False):
+        x0, x1 = torch.split(x, [self.half_channels] * 2, 1)
+        h = self.pre(x0) * x_mask
+        h = self.enc(h, x_mask=x_mask, g=g)
+        stats = self.post(h) * x_mask
+        if not self.mean_only:
+            m, logs = torch.split(stats, [self.half_channels] * 2, 1)
+        else:
+            m = stats
+            logs = torch.zeros_like(m)
+
+        if not reverse:
+            x1 = m + x1 * torch.exp(logs) * x_mask
+            x = torch.cat([x0, x1], 1)
+            logdet = torch.sum(logs, [1, 2])
+            return x, logdet
+        else:
+            x1 = (x1 - m) * torch.exp(-logs) * x_mask
+            x = torch.cat([x0, x1], 1)
+            logdet = -torch.sum(logs, [1, 2])
+            return x, logdet
+
+
+class ResidualCouplingBlock(nn.Module):
+    def __init__(self,
+                 channels,
+                 hidden_channels,
+                 kernel_size,
+                 dilation_rate,
+                 n_layers,
+                 n_flows=4,
+                 gin_channels=0,
+                 nn_type='wn'):
+        super().__init__()
+        self.channels = channels
+        self.hidden_channels = hidden_channels
+        self.kernel_size = kernel_size
+        self.dilation_rate = dilation_rate
+        self.n_layers = n_layers
+        self.n_flows = n_flows
+        self.gin_channels = gin_channels
+
+        self.flows = nn.ModuleList()
+        for i in range(n_flows):
+            self.flows.append(
+                ResidualCouplingLayer(channels, hidden_channels, kernel_size, dilation_rate, n_layers,
+                                      gin_channels=gin_channels, mean_only=True, nn_type=nn_type))
+            self.flows.append(Flip())
+
+    def forward(self, x, x_mask, g=None, reverse=False):
+        if not reverse:
+            for flow in self.flows:
+                x, _ = flow(x, x_mask, g=g, reverse=reverse)
+        else:
+            for flow in reversed(self.flows):
+                x, _ = flow(x, x_mask, g=g, reverse=reverse)
+        return x
+
+
+class Glow(nn.Module):
+    def __init__(self,
+                 in_channels,
+                 hidden_channels,
+                 kernel_size,
+                 dilation_rate,
+                 n_blocks,
+                 n_layers,
+                 p_dropout=0.,
+                 n_split=4,
+                 n_sqz=2,
+                 sigmoid_scale=False,
+                 gin_channels=0,
+                 inv_conv_type='near',
+                 share_cond_layers=False,
+                 share_wn_layers=0,
+                 ):
+        super().__init__()
+        """
+        Note that regularization likes weight decay can leads to Nan error!
+        """
+
+        self.in_channels = in_channels
+        self.hidden_channels = hidden_channels
+        self.kernel_size = kernel_size
+        self.dilation_rate = dilation_rate
+        self.n_blocks = n_blocks
+        self.n_layers = n_layers
+        self.p_dropout = p_dropout
+        self.n_split = n_split
+        self.n_sqz = n_sqz
+        self.sigmoid_scale = sigmoid_scale
+        self.gin_channels = gin_channels
+        self.share_cond_layers = share_cond_layers
+        if gin_channels != 0 and share_cond_layers:
+            cond_layer = torch.nn.Conv1d(gin_channels * n_sqz, 2 * hidden_channels * n_layers, 1)
+            self.cond_layer = torch.nn.utils.weight_norm(cond_layer, name='weight')
+        wn = None
+        self.flows = nn.ModuleList()
+        for b in range(n_blocks):
+            self.flows.append(ActNorm(channels=in_channels * n_sqz))
+            if inv_conv_type == 'near':
+                self.flows.append(InvConvNear(channels=in_channels * n_sqz, n_split=n_split, n_sqz=n_sqz))
+            if inv_conv_type == 'invconv':
+                self.flows.append(InvConv(channels=in_channels * n_sqz))
+            if share_wn_layers > 0:
+                if b % share_wn_layers == 0:
+                    wn = WN(hidden_channels, kernel_size, dilation_rate, n_layers, gin_channels * n_sqz,
+                            p_dropout, share_cond_layers)
+            self.flows.append(
+                CouplingBlock(
+                    in_channels * n_sqz,
+                    hidden_channels,
+                    kernel_size=kernel_size,
+                    dilation_rate=dilation_rate,
+                    n_layers=n_layers,
+                    gin_channels=gin_channels * n_sqz,
+                    p_dropout=p_dropout,
+                    sigmoid_scale=sigmoid_scale,
+                    share_cond_layers=share_cond_layers,
+                    wn=wn
+                ))
+
+    def forward(self, x, x_mask=None, g=None, reverse=False, return_hiddens=False):
+        """
+        x: [B,T,C]
+        x_mask: [B,T]
+        g: [B,T,C]
+        """
+        x = x.transpose(1,2)
+        x_mask = x_mask.unsqueeze(1)
+        if g is not None:
+            g = g.transpose(1,2)
+
+        logdet_tot = 0
+        if not reverse:
+            flows = self.flows
+        else:
+            flows = reversed(self.flows)
+        if return_hiddens:
+            hs = []
+        if self.n_sqz > 1:
+            x, x_mask_ = utils.squeeze(x, x_mask, self.n_sqz)
+            if g is not None:
+                g, _ = utils.squeeze(g, x_mask, self.n_sqz)
+            x_mask = x_mask_
+        if self.share_cond_layers and g is not None:
+            g = self.cond_layer(g)
+        for f in flows:
+            x, logdet = f(x, x_mask, g=g, reverse=reverse)
+            if return_hiddens:
+                hs.append(x)
+            logdet_tot += logdet
+        if self.n_sqz > 1:
+            x, x_mask = utils.unsqueeze(x, x_mask, self.n_sqz)
+        
+        x = x.transpose(1,2)
+        if return_hiddens:
+            return x, logdet_tot, hs
+        return x, logdet_tot
+
+    def store_inverse(self):
+        def remove_weight_norm(m):
+            try:
+                nn.utils.remove_weight_norm(m)
+            except ValueError:  # this module didn't have weight norm
+                return
+
+        self.apply(remove_weight_norm)
+        for f in self.flows:
+            f.store_inverse()
+
+
+if __name__ == '__main__':
+    model = Glow(in_channels=64,
+                hidden_channels=128,
+                kernel_size=5,
+                dilation_rate=1,
+                n_blocks=12,
+                n_layers=4,
+                p_dropout=0.0,
+                n_split=4,
+                n_sqz=2,
+                sigmoid_scale=False,
+                gin_channels=80
+                )
+    exp = torch.rand([1,1440,64])
+    mel = torch.rand([1,1440,80])
+    x_mask = torch.ones([1,1440],dtype=torch.float32)
+    y, logdet = model(exp, x_mask,g=mel, reverse=False)
+    pred_exp, logdet = model(y, x_mask,g=mel, reverse=False)
+    # y: [b, t,c=64]
+    print(" ")
\ No newline at end of file
diff --git a/modules/audio2motion/multi_length_disc.py b/modules/audio2motion/multi_length_disc.py
new file mode 100644
index 0000000000000000000000000000000000000000..4a57df2cef929691f2f1fa41981ed8316ff5dce6
--- /dev/null
+++ b/modules/audio2motion/multi_length_disc.py
@@ -0,0 +1,340 @@
+import numpy as np
+import random
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from modules.audio2motion.cnn_models import LambdaLayer
+
+
+class Discriminator1DFactory(nn.Module):
+    def __init__(self, time_length, kernel_size=3, in_dim=1, hidden_size=128, norm_type='bn'):
+        super(Discriminator1DFactory, self).__init__()
+        padding = kernel_size // 2
+
+        def discriminator_block(in_filters, out_filters, first=False):
+            """
+            Input: (B, c, T)
+            Output:(B, c, T//2)
+            """
+            conv = nn.Conv1d(in_filters, out_filters, kernel_size, 2, padding)
+            block = [
+                conv,  # padding = kernel//2
+                nn.LeakyReLU(0.2, inplace=True),
+                nn.Dropout2d(0.25)
+            ]
+            if norm_type == 'bn' and not first:
+                block.append(nn.BatchNorm1d(out_filters, 0.8))
+            if norm_type == 'in' and not first:
+                block.append(nn.InstanceNorm1d(out_filters, affine=True))
+            block = nn.Sequential(*block)
+            return block
+
+        if time_length >= 8:
+            self.model = nn.ModuleList([
+                discriminator_block(in_dim, hidden_size, first=True),
+                discriminator_block(hidden_size, hidden_size),
+                discriminator_block(hidden_size, hidden_size),
+            ])
+            ds_size = time_length // (2 ** 3)
+        elif time_length == 3:
+            self.model = nn.ModuleList([
+                nn.Sequential(*[
+                    nn.Conv1d(in_dim, hidden_size, 3, 1, 0),
+                    nn.LeakyReLU(0.2, inplace=True),
+                    nn.Dropout2d(0.25),
+                    nn.Conv1d(hidden_size, hidden_size, 1, 1, 0),
+                    nn.LeakyReLU(0.2, inplace=True),
+                    nn.Dropout2d(0.25),
+                    nn.BatchNorm1d(hidden_size, 0.8),
+                    nn.Conv1d(hidden_size, hidden_size, 1, 1, 0),
+                    nn.LeakyReLU(0.2, inplace=True),
+                    nn.Dropout2d(0.25),
+                    nn.BatchNorm1d(hidden_size, 0.8)
+                ])
+            ])
+            ds_size = 1
+        elif time_length == 1:
+            self.model = nn.ModuleList([
+                nn.Sequential(*[
+                    nn.Linear(in_dim, hidden_size),
+                    nn.LeakyReLU(0.2, inplace=True),
+                    nn.Dropout2d(0.25),
+                    nn.Linear(hidden_size, hidden_size),
+                    nn.LeakyReLU(0.2, inplace=True),
+                    nn.Dropout2d(0.25),
+                ])
+            ])
+            ds_size = 1
+
+        self.adv_layer = nn.Linear(hidden_size * ds_size, 1)
+
+    def forward(self, x):
+        """
+
+        :param x: [B, C, T]
+        :return: validity: [B, 1], h: List of hiddens
+        """
+        h = []
+        if x.shape[-1] == 1:
+            x = x.squeeze(-1)
+        for l in self.model:
+            x = l(x)
+            h.append(x)
+        if x.ndim == 2:
+            b, ct = x.shape
+            use_sigmoid = True
+        else:
+            b, c, t = x.shape
+            ct = c * t
+            use_sigmoid = False
+        x = x.view(b, ct)
+        validity = self.adv_layer(x)  # [B, 1]
+        if use_sigmoid:
+            validity = torch.sigmoid(validity)
+        return validity, h
+
+
+class CosineDiscriminator1DFactory(nn.Module):
+    def __init__(self, time_length, kernel_size=3, in_dim=1, hidden_size=128, norm_type='bn'):
+        super().__init__()
+        padding = kernel_size // 2
+
+        def discriminator_block(in_filters, out_filters, first=False):
+            """
+            Input: (B, c, T)
+            Output:(B, c, T//2)
+            """
+            conv = nn.Conv1d(in_filters, out_filters, kernel_size, 2, padding)
+            block = [
+                conv,  # padding = kernel//2
+                nn.LeakyReLU(0.2, inplace=True),
+                nn.Dropout2d(0.25)
+            ]
+            if norm_type == 'bn' and not first:
+                block.append(nn.BatchNorm1d(out_filters, 0.8))
+            if norm_type == 'in' and not first:
+                block.append(nn.InstanceNorm1d(out_filters, affine=True))
+            block = nn.Sequential(*block)
+            return block
+
+        self.model1 = nn.ModuleList([
+            discriminator_block(in_dim, hidden_size, first=True),
+            discriminator_block(hidden_size, hidden_size),
+            discriminator_block(hidden_size, hidden_size),
+        ])
+
+        self.model2 = nn.ModuleList([
+            discriminator_block(in_dim, hidden_size, first=True),
+            discriminator_block(hidden_size, hidden_size),
+            discriminator_block(hidden_size, hidden_size),
+        ])
+
+        self.relu = nn.ReLU()
+    def forward(self, x1, x2):
+        """
+
+        :param x1: [B, C, T]
+        :param x2: [B, C, T]
+        :return: validity: [B, 1], h: List of hiddens
+        """
+        h1, h2 = [], []
+        for l in self.model1:
+            x1 = l(x1)
+            h1.append(x1)
+        for l in self.model2:
+            x2 = l(x2)
+            h2.append(x1)
+        b,c,t = x1.shape
+        x1 = x1.view(b, c*t)
+        x2 = x2.view(b, c*t)
+        x1 = self.relu(x1)
+        x2 = self.relu(x2)
+        # x1 = F.normalize(x1, p=2, dim=1)
+        # x2 = F.normalize(x2, p=2, dim=1)
+        validity = F.cosine_similarity(x1, x2)    
+        return validity, [h1,h2]
+
+
+class MultiWindowDiscriminator(nn.Module):
+    def __init__(self, time_lengths, cond_dim=80, in_dim=64, kernel_size=3, hidden_size=128, disc_type='standard', norm_type='bn', reduction='sum'):
+        super(MultiWindowDiscriminator, self).__init__()
+        self.win_lengths = time_lengths
+        self.reduction = reduction
+        self.disc_type = disc_type
+
+        if cond_dim > 0:
+            self.use_cond = True
+            self.cond_proj_layers = nn.ModuleList()
+            self.in_proj_layers = nn.ModuleList()
+        else:
+            self.use_cond = False
+
+        self.conv_layers = nn.ModuleList()
+        for time_length in time_lengths:
+            conv_layer = [
+                Discriminator1DFactory(
+                    time_length, kernel_size, in_dim=64, hidden_size=hidden_size,
+                    norm_type=norm_type) if self.disc_type == 'standard' 
+                else CosineDiscriminator1DFactory(time_length, kernel_size, in_dim=64, 
+                    hidden_size=hidden_size,norm_type=norm_type)
+            ]
+            self.conv_layers += conv_layer
+            if self.use_cond:
+                self.cond_proj_layers.append(nn.Linear(cond_dim, 64))
+                self.in_proj_layers.append(nn.Linear(in_dim, 64))
+    
+    def clip(self, x, cond, x_len, win_length, start_frames=None):
+        '''Ramdom clip x to win_length.
+        Args:
+            x (tensor) : (B,  T, C).
+            cond (tensor) : (B, T, H).
+            x_len (tensor) : (B,).
+            win_length (int): target clip length
+
+        Returns:
+            (tensor) : (B, c_in, win_length, n_bins).
+
+        '''
+        clip_from_same_frame = start_frames is None
+        T_start = 0
+        # T_end = x_len.max() - win_length
+        T_end = x_len.min() - win_length
+        if T_end < 0:
+            return None, None, start_frames
+        T_end = T_end.item()
+        if start_frames is None:
+            start_frame = np.random.randint(low=T_start, high=T_end + 1)
+            start_frames = [start_frame] * x.size(0)
+        else:
+            start_frame = start_frames[0]
+
+
+        if clip_from_same_frame:
+            x_batch = x[:, start_frame: start_frame + win_length, :]
+            c_batch = cond[:, start_frame: start_frame + win_length, :] if cond is not None else None
+        else:
+            x_lst = []
+            c_lst = []
+            for i, start_frame in enumerate(start_frames):
+                x_lst.append(x[i, start_frame: start_frame + win_length, :])
+                if cond is not None:
+                    c_lst.append(cond[i, start_frame: start_frame + win_length, :])
+            x_batch = torch.stack(x_lst, dim=0)
+            if cond is None:
+                c_batch = None
+            else:
+                c_batch = torch.stack(c_lst, dim=0)
+        return x_batch, c_batch, start_frames
+
+    def forward(self, x, x_len, cond=None, start_frames_wins=None):
+        '''
+        Args:
+            x (tensor): input mel, (B, T, C).
+            x_length (tensor): len of per mel. (B,).
+
+        Returns:
+            tensor : (B).
+        '''
+        validity = []
+        if start_frames_wins is None:
+            start_frames_wins = [None] * len(self.conv_layers)
+        h = []
+        for i, start_frames in zip(range(len(self.conv_layers)), start_frames_wins):
+            x_clip, c_clip, start_frames = self.clip(
+                x, cond, x_len, self.win_lengths[i], start_frames)  # (B, win_length, C)
+            start_frames_wins[i] = start_frames
+            if x_clip is None:
+                continue
+            if self.disc_type == 'standard':
+                if self.use_cond:
+                    x_clip = self.in_proj_layers[i](x_clip)  # (B, T, C)
+                    c_clip = self.cond_proj_layers[i](c_clip)
+                    x_clip = x_clip + c_clip
+                validity_pred, h_ = self.conv_layers[i](x_clip.transpose(1,2))
+            elif self.disc_type == 'cosine':
+                assert self.use_cond is True
+                x_clip = self.in_proj_layers[i](x_clip)  # (B, T, C)
+                c_clip = self.cond_proj_layers[i](c_clip)
+                validity_pred, h_ = self.conv_layers[i](x_clip.transpose(1,2), c_clip.transpose(1,2))
+            else:
+                raise NotImplementedError
+
+            h += h_
+            validity.append(validity_pred)
+        if len(validity) != len(self.conv_layers):
+            return None, start_frames_wins, h
+        if self.reduction == 'sum':
+            validity = sum(validity)  # [B]
+        elif self.reduction == 'stack':
+            validity = torch.stack(validity, -1)  # [B, W_L]
+        return validity, start_frames_wins, h
+
+
+class Discriminator(nn.Module):
+    def __init__(self, x_dim=80, y_dim=64, disc_type='standard', 
+                uncond_disc=False, kernel_size=3, hidden_size=128, norm_type='bn', reduction='sum', time_lengths=(8,16,32)):
+        """_summary_
+
+        Args:
+            time_lengths (list, optional): the list of  window size. Defaults to [32, 64, 128].
+            x_dim (int, optional): the dim of audio features. Defaults to 80, corresponding to mel-spec.
+            y_dim (int, optional): the dim of facial coeff. Defaults to 64, correspond to exp; other options can be 7(pose) or 71(exp+pose).
+            kernel (tuple, optional): _description_. Defaults to (3, 3).
+            c_in (int, optional): _description_. Defaults to 1.
+            hidden_size (int, optional): _description_. Defaults to 128.
+            norm_type (str, optional): _description_. Defaults to 'bn'.
+            reduction (str, optional): _description_. Defaults to 'sum'.
+            uncond_disc (bool, optional): _description_. Defaults to False.
+        """
+        super(Discriminator, self).__init__()
+        self.time_lengths = time_lengths
+        self.x_dim, self.y_dim = x_dim, y_dim
+        self.disc_type = disc_type
+        self.reduction = reduction
+        self.uncond_disc = uncond_disc
+
+        if uncond_disc:
+            self.x_dim = 0
+            cond_dim = 0
+
+        else:
+            cond_dim = 64
+            self.mel_encoder = nn.Sequential(*[
+                    nn.Conv1d(self.x_dim, 64, 3, 1, 1, bias=False),
+                    nn.BatchNorm1d(64),
+                    nn.GELU(),
+                    nn.Conv1d(64, cond_dim, 3, 1, 1, bias=False)
+                ]) 
+
+        self.disc = MultiWindowDiscriminator(
+            time_lengths=self.time_lengths,
+            in_dim=self.y_dim,
+            cond_dim=cond_dim,
+            kernel_size=kernel_size,
+            hidden_size=hidden_size, norm_type=norm_type,
+            reduction=reduction,
+            disc_type=disc_type
+        )
+        self.downsampler = LambdaLayer(lambda x: F.interpolate(x.transpose(1,2), scale_factor=0.5, mode='nearest').transpose(1,2))
+    
+    @property
+    def device(self):
+        return self.disc.parameters().__next__().device
+
+    def forward(self,x, batch, start_frames_wins=None):
+        """
+
+        :param x: [B, T, C]
+        :param cond: [B, T, cond_size]
+        :return:
+        """
+        x = x.to(self.device)
+        if not self.uncond_disc:
+            mel = self.downsampler(batch['mel'].to(self.device))
+            mel_feat = self.mel_encoder(mel.transpose(1,2)).transpose(1,2)
+        else:
+            mel_feat = None
+        x_len = x.sum(-1).ne(0).int().sum([1])
+        disc_confidence, start_frames_wins, h = self.disc(x, x_len, mel_feat, start_frames_wins=start_frames_wins)
+        return disc_confidence
+    
diff --git a/modules/audio2motion/transformer_base.py b/modules/audio2motion/transformer_base.py
new file mode 100644
index 0000000000000000000000000000000000000000..39bbe0073907742b2921b28afed2b241b7caeb60
--- /dev/null
+++ b/modules/audio2motion/transformer_base.py
@@ -0,0 +1,988 @@
+import math
+import torch
+from torch import nn
+from torch.nn import Parameter
+import torch.onnx.operators
+import torch.nn.functional as F
+from collections import defaultdict
+
+
+def make_positions(tensor, padding_idx):
+    """Replace non-padding symbols with their position numbers.
+
+    Position numbers begin at padding_idx+1. Padding symbols are ignored.
+    """
+    # The series of casts and type-conversions here are carefully
+    # balanced to both work with ONNX export and XLA. In particular XLA
+    # prefers ints, cumsum defaults to output longs, and ONNX doesn't know
+    # how to handle the dtype kwarg in cumsum.
+    mask = tensor.ne(padding_idx).int()
+    return (
+                   torch.cumsum(mask, dim=1).type_as(mask) * mask
+           ).long() + padding_idx
+
+
+def softmax(x, dim):
+    return F.softmax(x, dim=dim, dtype=torch.float32)
+
+
+INCREMENTAL_STATE_INSTANCE_ID = defaultdict(lambda: 0)
+
+def _get_full_incremental_state_key(module_instance, key):
+    module_name = module_instance.__class__.__name__
+
+    # assign a unique ID to each module instance, so that incremental state is
+    # not shared across module instances
+    if not hasattr(module_instance, '_instance_id'):
+        INCREMENTAL_STATE_INSTANCE_ID[module_name] += 1
+        module_instance._instance_id = INCREMENTAL_STATE_INSTANCE_ID[module_name]
+
+    return '{}.{}.{}'.format(module_name, module_instance._instance_id, key)
+
+
+
+def get_incremental_state(module, incremental_state, key):
+    """Helper for getting incremental state for an nn.Module."""
+    full_key = _get_full_incremental_state_key(module, key)
+    if incremental_state is None or full_key not in incremental_state:
+        return None
+    return incremental_state[full_key]
+
+
+def set_incremental_state(module, incremental_state, key, value):
+    """Helper for setting incremental state for an nn.Module."""
+    if incremental_state is not None:
+        full_key = _get_full_incremental_state_key(module, key)
+        incremental_state[full_key] = value
+
+
+
+class Reshape(nn.Module):
+    def __init__(self, *args):
+        super(Reshape, self).__init__()
+        self.shape = args
+
+    def forward(self, x):
+        return x.view(self.shape)
+
+
+class Permute(nn.Module):
+    def __init__(self, *args):
+        super(Permute, self).__init__()
+        self.args = args
+
+    def forward(self, x):
+        return x.permute(self.args)
+
+
+class LinearNorm(torch.nn.Module):
+    def __init__(self, in_dim, out_dim, bias=True, w_init_gain='linear'):
+        super(LinearNorm, self).__init__()
+        self.linear_layer = torch.nn.Linear(in_dim, out_dim, bias=bias)
+
+        torch.nn.init.xavier_uniform_(
+            self.linear_layer.weight,
+            gain=torch.nn.init.calculate_gain(w_init_gain))
+
+    def forward(self, x):
+        return self.linear_layer(x)
+
+
+class ConvNorm(torch.nn.Module):
+    def __init__(self, in_channels, out_channels, kernel_size=1, stride=1,
+                 padding=None, dilation=1, bias=True, w_init_gain='linear'):
+        super(ConvNorm, self).__init__()
+        if padding is None:
+            assert (kernel_size % 2 == 1)
+            padding = int(dilation * (kernel_size - 1) / 2)
+
+        self.conv = torch.nn.Conv1d(in_channels, out_channels,
+                                    kernel_size=kernel_size, stride=stride,
+                                    padding=padding, dilation=dilation,
+                                    bias=bias)
+
+        torch.nn.init.xavier_uniform_(
+            self.conv.weight, gain=torch.nn.init.calculate_gain(w_init_gain))
+
+    def forward(self, signal):
+        conv_signal = self.conv(signal)
+        return conv_signal
+
+
+def Embedding(num_embeddings, embedding_dim, padding_idx=None):
+    m = nn.Embedding(num_embeddings, embedding_dim, padding_idx=padding_idx)
+    nn.init.normal_(m.weight, mean=0, std=embedding_dim ** -0.5)
+    if padding_idx is not None:
+        nn.init.constant_(m.weight[padding_idx], 0)
+    return m
+
+
+class GroupNorm1DTBC(nn.GroupNorm):
+    def forward(self, input):
+        return super(GroupNorm1DTBC, self).forward(input.permute(1, 2, 0)).permute(2, 0, 1)
+
+
+def LayerNorm(normalized_shape, eps=1e-5, elementwise_affine=True, export=False):
+    if not export and torch.cuda.is_available():
+        try:
+            from apex.normalization import FusedLayerNorm
+            return FusedLayerNorm(normalized_shape, eps, elementwise_affine)
+        except ImportError:
+            pass
+    return torch.nn.LayerNorm(normalized_shape, eps, elementwise_affine)
+
+
+def Linear(in_features, out_features, bias=True):
+    m = nn.Linear(in_features, out_features, bias)
+    nn.init.xavier_uniform_(m.weight)
+    if bias:
+        nn.init.constant_(m.bias, 0.)
+    return m
+
+
+class SinusoidalPositionalEmbedding(nn.Module):
+    """This module produces sinusoidal positional embeddings of any length.
+
+    Padding symbols are ignored.
+    """
+
+    def __init__(self, embedding_dim, padding_idx, init_size=1024):
+        super().__init__()
+        self.embedding_dim = embedding_dim
+        self.padding_idx = padding_idx
+        self.weights = SinusoidalPositionalEmbedding.get_embedding(
+            init_size,
+            embedding_dim,
+            padding_idx,
+        )
+        self.register_buffer('_float_tensor', torch.FloatTensor(1))
+
+    @staticmethod
+    def get_embedding(num_embeddings, embedding_dim, padding_idx=None):
+        """Build sinusoidal embeddings.
+
+        This matches the implementation in tensor2tensor, but differs slightly
+        from the description in Section 3.5 of "Attention Is All You Need".
+        """
+        half_dim = embedding_dim // 2
+        emb = math.log(10000) / (half_dim - 1)
+        emb = torch.exp(torch.arange(half_dim, dtype=torch.float) * -emb)
+        emb = torch.arange(num_embeddings, dtype=torch.float).unsqueeze(1) * emb.unsqueeze(0)
+        emb = torch.cat([torch.sin(emb), torch.cos(emb)], dim=1).view(num_embeddings, -1)
+        if embedding_dim % 2 == 1:
+            # zero pad
+            emb = torch.cat([emb, torch.zeros(num_embeddings, 1)], dim=1)
+        if padding_idx is not None:
+            emb[padding_idx, :] = 0
+        return emb
+
+    def forward(self, input, incremental_state=None, timestep=None, positions=None, **kwargs):
+        """Input is expected to be of size [bsz x seqlen]."""
+        bsz, seq_len = input.shape[:2]
+        max_pos = self.padding_idx + 1 + seq_len
+        if self.weights is None or max_pos > self.weights.size(0):
+            # recompute/expand embeddings if needed
+            self.weights = SinusoidalPositionalEmbedding.get_embedding(
+                max_pos,
+                self.embedding_dim,
+                self.padding_idx,
+            )
+        self.weights = self.weights.to(self._float_tensor)
+
+        if incremental_state is not None:
+            # positions is the same for every token when decoding a single step
+            pos = timestep.view(-1)[0] + 1 if timestep is not None else seq_len
+            return self.weights[self.padding_idx + pos, :].expand(bsz, 1, -1)
+
+        positions = make_positions(input, self.padding_idx) if positions is None else positions
+        return self.weights.index_select(0, positions.view(-1)).view(bsz, seq_len, -1).detach()
+
+    def max_positions(self):
+        """Maximum number of supported positions."""
+        return int(1e5)  # an arbitrary large number
+
+
+class ConvTBC(nn.Module):
+    def __init__(self, in_channels, out_channels, kernel_size, padding=0):
+        super(ConvTBC, self).__init__()
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.kernel_size = kernel_size
+        self.padding = padding
+
+        self.weight = torch.nn.Parameter(torch.Tensor(
+            self.kernel_size, in_channels, out_channels))
+        self.bias = torch.nn.Parameter(torch.Tensor(out_channels))
+
+    def forward(self, input):
+        return torch.conv_tbc(input.contiguous(), self.weight, self.bias, self.padding)
+
+
+class MultiheadAttention(nn.Module):
+    def __init__(self, embed_dim, num_heads, kdim=None, vdim=None, dropout=0., bias=True,
+                 add_bias_kv=False, add_zero_attn=False, self_attention=False,
+                 encoder_decoder_attention=False):
+        super().__init__()
+        self.embed_dim = embed_dim
+        self.kdim = kdim if kdim is not None else embed_dim
+        self.vdim = vdim if vdim is not None else embed_dim
+        self.qkv_same_dim = self.kdim == embed_dim and self.vdim == embed_dim
+
+        self.num_heads = num_heads
+        self.dropout = dropout
+        self.head_dim = embed_dim // num_heads
+        assert self.head_dim * num_heads == self.embed_dim, "embed_dim must be divisible by num_heads"
+        self.scaling = self.head_dim ** -0.5
+
+        self.self_attention = self_attention
+        self.encoder_decoder_attention = encoder_decoder_attention
+
+        assert not self.self_attention or self.qkv_same_dim, 'Self-attention requires query, key and ' \
+                                                             'value to be of the same size'
+
+        if self.qkv_same_dim:
+            self.in_proj_weight = Parameter(torch.Tensor(3 * embed_dim, embed_dim))
+        else:
+            self.k_proj_weight = Parameter(torch.Tensor(embed_dim, self.kdim))
+            self.v_proj_weight = Parameter(torch.Tensor(embed_dim, self.vdim))
+            self.q_proj_weight = Parameter(torch.Tensor(embed_dim, embed_dim))
+
+        if bias:
+            self.in_proj_bias = Parameter(torch.Tensor(3 * embed_dim))
+        else:
+            self.register_parameter('in_proj_bias', None)
+
+        self.out_proj = nn.Linear(embed_dim, embed_dim, bias=bias)
+
+        if add_bias_kv:
+            self.bias_k = Parameter(torch.Tensor(1, 1, embed_dim))
+            self.bias_v = Parameter(torch.Tensor(1, 1, embed_dim))
+        else:
+            self.bias_k = self.bias_v = None
+
+        self.add_zero_attn = add_zero_attn
+
+        self.reset_parameters()
+
+        self.enable_torch_version = False
+        if hasattr(F, "multi_head_attention_forward"):
+            self.enable_torch_version = True
+        else:
+            self.enable_torch_version = False
+        self.last_attn_probs = None
+
+    def reset_parameters(self):
+        if self.qkv_same_dim:
+            nn.init.xavier_uniform_(self.in_proj_weight)
+        else:
+            nn.init.xavier_uniform_(self.k_proj_weight)
+            nn.init.xavier_uniform_(self.v_proj_weight)
+            nn.init.xavier_uniform_(self.q_proj_weight)
+
+        nn.init.xavier_uniform_(self.out_proj.weight)
+        if self.in_proj_bias is not None:
+            nn.init.constant_(self.in_proj_bias, 0.)
+            nn.init.constant_(self.out_proj.bias, 0.)
+        if self.bias_k is not None:
+            nn.init.xavier_normal_(self.bias_k)
+        if self.bias_v is not None:
+            nn.init.xavier_normal_(self.bias_v)
+
+    def forward(
+            self,
+            query, key, value,
+            key_padding_mask=None,
+            incremental_state=None,
+            need_weights=True,
+            static_kv=False,
+            attn_mask=None,
+            before_softmax=False,
+            need_head_weights=False,
+            enc_dec_attn_constraint_mask=None,
+            reset_attn_weight=None
+    ):
+        """Input shape: Time x Batch x Channel
+
+        Args:
+            key_padding_mask (ByteTensor, optional): mask to exclude
+                keys that are pads, of shape `(batch, src_len)`, where
+                padding elements are indicated by 1s.
+            need_weights (bool, optional): return the attention weights,
+                averaged over heads (default: False).
+            attn_mask (ByteTensor, optional): typically used to
+                implement causal attention, where the mask prevents the
+                attention from looking forward in time (default: None).
+            before_softmax (bool, optional): return the raw attention
+                weights and values before the attention softmax.
+            need_head_weights (bool, optional): return the attention
+                weights for each head. Implies *need_weights*. Default:
+                return the average attention weights over all heads.
+        """
+        if need_head_weights:
+            need_weights = True
+
+        tgt_len, bsz, embed_dim = query.size()
+        assert embed_dim == self.embed_dim
+        assert list(query.size()) == [tgt_len, bsz, embed_dim]
+        if self.enable_torch_version and incremental_state is None and not static_kv and reset_attn_weight is None:
+            if self.qkv_same_dim:
+                return F.multi_head_attention_forward(query, key, value,
+                                                      self.embed_dim, self.num_heads,
+                                                      self.in_proj_weight,
+                                                      self.in_proj_bias, self.bias_k, self.bias_v,
+                                                      self.add_zero_attn, self.dropout,
+                                                      self.out_proj.weight, self.out_proj.bias,
+                                                      self.training, key_padding_mask, need_weights,
+                                                      attn_mask)
+            else:
+                return F.multi_head_attention_forward(query, key, value,
+                                                      self.embed_dim, self.num_heads,
+                                                      torch.empty([0]),
+                                                      self.in_proj_bias, self.bias_k, self.bias_v,
+                                                      self.add_zero_attn, self.dropout,
+                                                      self.out_proj.weight, self.out_proj.bias,
+                                                      self.training, key_padding_mask, need_weights,
+                                                      attn_mask, use_separate_proj_weight=True,
+                                                      q_proj_weight=self.q_proj_weight,
+                                                      k_proj_weight=self.k_proj_weight,
+                                                      v_proj_weight=self.v_proj_weight)
+
+        if incremental_state is not None:
+            saved_state = self._get_input_buffer(incremental_state)
+            if 'prev_key' in saved_state:
+                # previous time steps are cached - no need to recompute
+                # key and value if they are static
+                if static_kv:
+                    assert self.encoder_decoder_attention and not self.self_attention
+                    key = value = None
+        else:
+            saved_state = None
+
+        if self.self_attention:
+            # self-attention
+            q, k, v = self.in_proj_qkv(query)
+        elif self.encoder_decoder_attention:
+            # encoder-decoder attention
+            q = self.in_proj_q(query)
+            if key is None:
+                assert value is None
+                k = v = None
+            else:
+                k = self.in_proj_k(key)
+                v = self.in_proj_v(key)
+
+        else:
+            q = self.in_proj_q(query)
+            k = self.in_proj_k(key)
+            v = self.in_proj_v(value)
+        q *= self.scaling
+
+        if self.bias_k is not None:
+            assert self.bias_v is not None
+            k = torch.cat([k, self.bias_k.repeat(1, bsz, 1)])
+            v = torch.cat([v, self.bias_v.repeat(1, bsz, 1)])
+            if attn_mask is not None:
+                attn_mask = torch.cat([attn_mask, attn_mask.new_zeros(attn_mask.size(0), 1)], dim=1)
+            if key_padding_mask is not None:
+                key_padding_mask = torch.cat(
+                    [key_padding_mask, key_padding_mask.new_zeros(key_padding_mask.size(0), 1)], dim=1)
+
+        q = q.contiguous().view(tgt_len, bsz * self.num_heads, self.head_dim).transpose(0, 1)
+        if k is not None:
+            k = k.contiguous().view(-1, bsz * self.num_heads, self.head_dim).transpose(0, 1)
+        if v is not None:
+            v = v.contiguous().view(-1, bsz * self.num_heads, self.head_dim).transpose(0, 1)
+
+        if saved_state is not None:
+            # saved states are stored with shape (bsz, num_heads, seq_len, head_dim)
+            if 'prev_key' in saved_state:
+                prev_key = saved_state['prev_key'].view(bsz * self.num_heads, -1, self.head_dim)
+                if static_kv:
+                    k = prev_key
+                else:
+                    k = torch.cat((prev_key, k), dim=1)
+            if 'prev_value' in saved_state:
+                prev_value = saved_state['prev_value'].view(bsz * self.num_heads, -1, self.head_dim)
+                if static_kv:
+                    v = prev_value
+                else:
+                    v = torch.cat((prev_value, v), dim=1)
+            if 'prev_key_padding_mask' in saved_state and saved_state['prev_key_padding_mask'] is not None:
+                prev_key_padding_mask = saved_state['prev_key_padding_mask']
+                if static_kv:
+                    key_padding_mask = prev_key_padding_mask
+                else:
+                    key_padding_mask = torch.cat((prev_key_padding_mask, key_padding_mask), dim=1)
+
+            saved_state['prev_key'] = k.view(bsz, self.num_heads, -1, self.head_dim)
+            saved_state['prev_value'] = v.view(bsz, self.num_heads, -1, self.head_dim)
+            saved_state['prev_key_padding_mask'] = key_padding_mask
+
+            self._set_input_buffer(incremental_state, saved_state)
+
+        src_len = k.size(1)
+
+        # This is part of a workaround to get around fork/join parallelism
+        # not supporting Optional types.
+        if key_padding_mask is not None and key_padding_mask.shape == torch.Size([]):
+            key_padding_mask = None
+
+        if key_padding_mask is not None:
+            assert key_padding_mask.size(0) == bsz
+            assert key_padding_mask.size(1) == src_len
+
+        if self.add_zero_attn:
+            src_len += 1
+            k = torch.cat([k, k.new_zeros((k.size(0), 1) + k.size()[2:])], dim=1)
+            v = torch.cat([v, v.new_zeros((v.size(0), 1) + v.size()[2:])], dim=1)
+            if attn_mask is not None:
+                attn_mask = torch.cat([attn_mask, attn_mask.new_zeros(attn_mask.size(0), 1)], dim=1)
+            if key_padding_mask is not None:
+                key_padding_mask = torch.cat(
+                    [key_padding_mask, torch.zeros(key_padding_mask.size(0), 1).type_as(key_padding_mask)], dim=1)
+
+        attn_weights = torch.bmm(q, k.transpose(1, 2))
+        attn_weights = self.apply_sparse_mask(attn_weights, tgt_len, src_len, bsz)
+
+        assert list(attn_weights.size()) == [bsz * self.num_heads, tgt_len, src_len]
+
+        if attn_mask is not None:
+            if len(attn_mask.shape) == 2:
+                attn_mask = attn_mask.unsqueeze(0)
+            elif len(attn_mask.shape) == 3:
+                attn_mask = attn_mask[:, None].repeat([1, self.num_heads, 1, 1]).reshape(
+                    bsz * self.num_heads, tgt_len, src_len)
+            attn_weights = attn_weights + attn_mask
+
+        if enc_dec_attn_constraint_mask is not None:  # bs x head x L_kv
+            attn_weights = attn_weights.view(bsz, self.num_heads, tgt_len, src_len)
+            attn_weights = attn_weights.masked_fill(
+                enc_dec_attn_constraint_mask.unsqueeze(2).bool(),
+                -1e8,
+            )
+            attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len)
+
+        if key_padding_mask is not None:
+            # don't attend to padding symbols
+            attn_weights = attn_weights.view(bsz, self.num_heads, tgt_len, src_len)
+            attn_weights = attn_weights.masked_fill(
+                key_padding_mask.unsqueeze(1).unsqueeze(2),
+                -1e8,
+            )
+            attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len)
+
+        attn_logits = attn_weights.view(bsz, self.num_heads, tgt_len, src_len)
+
+        if before_softmax:
+            return attn_weights, v
+
+        attn_weights_float = softmax(attn_weights, dim=-1)
+        attn_weights = attn_weights_float.type_as(attn_weights)
+        attn_probs = F.dropout(attn_weights_float.type_as(attn_weights), p=self.dropout, training=self.training)
+
+        if reset_attn_weight is not None:
+            if reset_attn_weight:
+                self.last_attn_probs = attn_probs.detach()
+            else:
+                assert self.last_attn_probs is not None
+                attn_probs = self.last_attn_probs
+        attn = torch.bmm(attn_probs, v)
+        assert list(attn.size()) == [bsz * self.num_heads, tgt_len, self.head_dim]
+        attn = attn.transpose(0, 1).contiguous().view(tgt_len, bsz, embed_dim)
+        attn = self.out_proj(attn)
+
+        if need_weights:
+            attn_weights = attn_weights_float.view(bsz, self.num_heads, tgt_len, src_len).transpose(1, 0)
+            if not need_head_weights:
+                # average attention weights over heads
+                attn_weights = attn_weights.mean(dim=0)
+        else:
+            attn_weights = None
+
+        return attn, (attn_weights, attn_logits)
+
+    def in_proj_qkv(self, query):
+        return self._in_proj(query).chunk(3, dim=-1)
+
+    def in_proj_q(self, query):
+        if self.qkv_same_dim:
+            return self._in_proj(query, end=self.embed_dim)
+        else:
+            bias = self.in_proj_bias
+            if bias is not None:
+                bias = bias[:self.embed_dim]
+            return F.linear(query, self.q_proj_weight, bias)
+
+    def in_proj_k(self, key):
+        if self.qkv_same_dim:
+            return self._in_proj(key, start=self.embed_dim, end=2 * self.embed_dim)
+        else:
+            weight = self.k_proj_weight
+            bias = self.in_proj_bias
+            if bias is not None:
+                bias = bias[self.embed_dim:2 * self.embed_dim]
+            return F.linear(key, weight, bias)
+
+    def in_proj_v(self, value):
+        if self.qkv_same_dim:
+            return self._in_proj(value, start=2 * self.embed_dim)
+        else:
+            weight = self.v_proj_weight
+            bias = self.in_proj_bias
+            if bias is not None:
+                bias = bias[2 * self.embed_dim:]
+            return F.linear(value, weight, bias)
+
+    def _in_proj(self, input, start=0, end=None):
+        weight = self.in_proj_weight
+        bias = self.in_proj_bias
+        weight = weight[start:end, :]
+        if bias is not None:
+            bias = bias[start:end]
+        return F.linear(input, weight, bias)
+
+    def _get_input_buffer(self, incremental_state):
+        return get_incremental_state(
+            self,
+            incremental_state,
+            'attn_state',
+        ) or {}
+
+    def _set_input_buffer(self, incremental_state, buffer):
+        set_incremental_state(
+            self,
+            incremental_state,
+            'attn_state',
+            buffer,
+        )
+
+    def apply_sparse_mask(self, attn_weights, tgt_len, src_len, bsz):
+        return attn_weights
+
+    def clear_buffer(self, incremental_state=None):
+        if incremental_state is not None:
+            saved_state = self._get_input_buffer(incremental_state)
+            if 'prev_key' in saved_state:
+                del saved_state['prev_key']
+            if 'prev_value' in saved_state:
+                del saved_state['prev_value']
+            self._set_input_buffer(incremental_state, saved_state)
+
+
+class Swish(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, i):
+        result = i * torch.sigmoid(i)
+        ctx.save_for_backward(i)
+        return result
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        i = ctx.saved_variables[0]
+        sigmoid_i = torch.sigmoid(i)
+        return grad_output * (sigmoid_i * (1 + i * (1 - sigmoid_i)))
+
+
+class CustomSwish(nn.Module):
+    def forward(self, input_tensor):
+        return Swish.apply(input_tensor)
+
+
+class TransformerFFNLayer(nn.Module):
+    def __init__(self, hidden_size, filter_size, padding="SAME", kernel_size=1, dropout=0., act='gelu'):
+        super().__init__()
+        self.kernel_size = kernel_size
+        self.dropout = dropout
+        self.act = act
+        if padding == 'SAME':
+            self.ffn_1 = nn.Conv1d(hidden_size, filter_size, kernel_size, padding=kernel_size // 2)
+        elif padding == 'LEFT':
+            self.ffn_1 = nn.Sequential(
+                nn.ConstantPad1d((kernel_size - 1, 0), 0.0),
+                nn.Conv1d(hidden_size, filter_size, kernel_size)
+            )
+        self.ffn_2 = Linear(filter_size, hidden_size)
+        if self.act == 'swish':
+            self.swish_fn = CustomSwish()
+
+    def forward(self, x, incremental_state=None):
+        # x: T x B x C
+        if incremental_state is not None:
+            saved_state = self._get_input_buffer(incremental_state)
+            if 'prev_input' in saved_state:
+                prev_input = saved_state['prev_input']
+                x = torch.cat((prev_input, x), dim=0)
+            x = x[-self.kernel_size:]
+            saved_state['prev_input'] = x
+            self._set_input_buffer(incremental_state, saved_state)
+
+        x = self.ffn_1(x.permute(1, 2, 0)).permute(2, 0, 1)
+        x = x * self.kernel_size ** -0.5
+
+        if incremental_state is not None:
+            x = x[-1:]
+        if self.act == 'gelu':
+            x = F.gelu(x)
+        if self.act == 'relu':
+            x = F.relu(x)
+        if self.act == 'swish':
+            x = self.swish_fn(x)
+        x = F.dropout(x, self.dropout, training=self.training)
+        x = self.ffn_2(x)
+        return x
+
+    def _get_input_buffer(self, incremental_state):
+        return get_incremental_state(
+            self,
+            incremental_state,
+            'f',
+        ) or {}
+
+    def _set_input_buffer(self, incremental_state, buffer):
+        set_incremental_state(
+            self,
+            incremental_state,
+            'f',
+            buffer,
+        )
+
+    def clear_buffer(self, incremental_state):
+        if incremental_state is not None:
+            saved_state = self._get_input_buffer(incremental_state)
+            if 'prev_input' in saved_state:
+                del saved_state['prev_input']
+            self._set_input_buffer(incremental_state, saved_state)
+
+
+class BatchNorm1dTBC(nn.Module):
+    def __init__(self, c):
+        super(BatchNorm1dTBC, self).__init__()
+        self.bn = nn.BatchNorm1d(c)
+
+    def forward(self, x):
+        """
+
+        :param x: [T, B, C]
+        :return: [T, B, C]
+        """
+        x = x.permute(1, 2, 0)  # [B, C, T]
+        x = self.bn(x)  # [B, C, T]
+        x = x.permute(2, 0, 1)  # [T, B, C]
+        return x
+
+
+class EncSALayer(nn.Module):
+    def __init__(self, c, num_heads, dropout, attention_dropout=0.1,
+                 relu_dropout=0.1, kernel_size=9, padding='SAME', norm='ln', act='gelu'):
+        super().__init__()
+        self.c = c
+        self.dropout = dropout
+        self.num_heads = num_heads
+        if num_heads > 0:
+            if norm == 'ln':
+                self.layer_norm1 = LayerNorm(c)
+            elif norm == 'bn':
+                self.layer_norm1 = BatchNorm1dTBC(c)
+            elif norm == 'gn':
+                self.layer_norm1 = GroupNorm1DTBC(8, c)
+            self.self_attn = MultiheadAttention(
+                self.c, num_heads, self_attention=True, dropout=attention_dropout, bias=False)
+        if norm == 'ln':
+            self.layer_norm2 = LayerNorm(c)
+        elif norm == 'bn':
+            self.layer_norm2 = BatchNorm1dTBC(c)
+        elif norm == 'gn':
+            self.layer_norm2 = GroupNorm1DTBC(8, c)
+        self.ffn = TransformerFFNLayer(
+            c, 4 * c, kernel_size=kernel_size, dropout=relu_dropout, padding=padding, act=act)
+
+    def forward(self, x, encoder_padding_mask=None, **kwargs):
+        layer_norm_training = kwargs.get('layer_norm_training', None)
+        if layer_norm_training is not None:
+            self.layer_norm1.training = layer_norm_training
+            self.layer_norm2.training = layer_norm_training
+        if self.num_heads > 0:
+            residual = x
+            x = self.layer_norm1(x)
+            x, _, = self.self_attn(
+                query=x,
+                key=x,
+                value=x,
+                key_padding_mask=encoder_padding_mask
+            )
+            x = F.dropout(x, self.dropout, training=self.training)
+            x = residual + x
+            x = x * (1 - encoder_padding_mask.float()).transpose(0, 1)[..., None]
+
+        residual = x
+        x = self.layer_norm2(x)
+        x = self.ffn(x)
+        x = F.dropout(x, self.dropout, training=self.training)
+        x = residual + x
+        x = x * (1 - encoder_padding_mask.float()).transpose(0, 1)[..., None]
+        return x
+
+
+class DecSALayer(nn.Module):
+    def __init__(self, c, num_heads, dropout, attention_dropout=0.1, relu_dropout=0.1,
+                 kernel_size=9, act='gelu', norm='ln'):
+        super().__init__()
+        self.c = c
+        self.dropout = dropout
+        if norm == 'ln':
+            self.layer_norm1 = LayerNorm(c)
+        elif norm == 'gn':
+            self.layer_norm1 = GroupNorm1DTBC(8, c)
+        self.self_attn = MultiheadAttention(
+            c, num_heads, self_attention=True, dropout=attention_dropout, bias=False
+        )
+        if norm == 'ln':
+            self.layer_norm2 = LayerNorm(c)
+        elif norm == 'gn':
+            self.layer_norm2 = GroupNorm1DTBC(8, c)
+        self.encoder_attn = MultiheadAttention(
+            c, num_heads, encoder_decoder_attention=True, dropout=attention_dropout, bias=False,
+        )
+        if norm == 'ln':
+            self.layer_norm3 = LayerNorm(c)
+        elif norm == 'gn':
+            self.layer_norm3 = GroupNorm1DTBC(8, c)
+        self.ffn = TransformerFFNLayer(
+            c, 4 * c, padding='LEFT', kernel_size=kernel_size, dropout=relu_dropout, act=act)
+
+    def forward(
+            self,
+            x,
+            encoder_out=None,
+            encoder_padding_mask=None,
+            incremental_state=None,
+            self_attn_mask=None,
+            self_attn_padding_mask=None,
+            attn_out=None,
+            reset_attn_weight=None,
+            **kwargs,
+    ):
+        layer_norm_training = kwargs.get('layer_norm_training', None)
+        if layer_norm_training is not None:
+            self.layer_norm1.training = layer_norm_training
+            self.layer_norm2.training = layer_norm_training
+            self.layer_norm3.training = layer_norm_training
+        residual = x
+        x = self.layer_norm1(x)
+        x, _ = self.self_attn(
+            query=x,
+            key=x,
+            value=x,
+            key_padding_mask=self_attn_padding_mask,
+            incremental_state=incremental_state,
+            attn_mask=self_attn_mask
+        )
+        x = F.dropout(x, self.dropout, training=self.training)
+        x = residual + x
+
+        attn_logits = None
+        if encoder_out is not None or attn_out is not None:
+            residual = x
+            x = self.layer_norm2(x)
+        if encoder_out is not None:
+            x, attn = self.encoder_attn(
+                query=x,
+                key=encoder_out,
+                value=encoder_out,
+                key_padding_mask=encoder_padding_mask,
+                incremental_state=incremental_state,
+                static_kv=True,
+                enc_dec_attn_constraint_mask=get_incremental_state(self, incremental_state,
+                                                                   'enc_dec_attn_constraint_mask'),
+                reset_attn_weight=reset_attn_weight
+            )
+            attn_logits = attn[1]
+        elif attn_out is not None:
+            x = self.encoder_attn.in_proj_v(attn_out)
+        if encoder_out is not None or attn_out is not None:
+            x = F.dropout(x, self.dropout, training=self.training)
+            x = residual + x
+
+        residual = x
+        x = self.layer_norm3(x)
+        x = self.ffn(x, incremental_state=incremental_state)
+        x = F.dropout(x, self.dropout, training=self.training)
+        x = residual + x
+        return x, attn_logits
+
+    def clear_buffer(self, input, encoder_out=None, encoder_padding_mask=None, incremental_state=None):
+        self.encoder_attn.clear_buffer(incremental_state)
+        self.ffn.clear_buffer(incremental_state)
+
+    def set_buffer(self, name, tensor, incremental_state):
+        return set_incremental_state(self, incremental_state, name, tensor)
+
+
+class ConvBlock(nn.Module):
+    def __init__(self, idim=80, n_chans=256, kernel_size=3, stride=1, norm='gn', dropout=0):
+        super().__init__()
+        self.conv = ConvNorm(idim, n_chans, kernel_size, stride=stride)
+        self.norm = norm
+        if self.norm == 'bn':
+            self.norm = nn.BatchNorm1d(n_chans)
+        elif self.norm == 'in':
+            self.norm = nn.InstanceNorm1d(n_chans, affine=True)
+        elif self.norm == 'gn':
+            self.norm = nn.GroupNorm(n_chans // 16, n_chans)
+        elif self.norm == 'ln':
+            self.norm = LayerNorm(n_chans // 16, n_chans)
+        elif self.norm == 'wn':
+            self.conv = torch.nn.utils.weight_norm(self.conv.conv)
+        self.dropout = nn.Dropout(dropout)
+        self.relu = nn.ReLU()
+
+    def forward(self, x):
+        """
+
+        :param x: [B, C, T]
+        :return: [B, C, T]
+        """
+        x = self.conv(x)
+        if not isinstance(self.norm, str):
+            if self.norm == 'none':
+                pass
+            elif self.norm == 'ln':
+                x = self.norm(x.transpose(1, 2)).transpose(1, 2)
+            else:
+                x = self.norm(x)
+        x = self.relu(x)
+        x = self.dropout(x)
+        return x
+
+
+class ConvStacks(nn.Module):
+    def __init__(self, idim=80, n_layers=5, n_chans=256, odim=32, kernel_size=5, norm='gn',
+                 dropout=0, strides=None, res=True):
+        super().__init__()
+        self.conv = torch.nn.ModuleList()
+        self.kernel_size = kernel_size
+        self.res = res
+        self.in_proj = Linear(idim, n_chans)
+        if strides is None:
+            strides = [1] * n_layers
+        else:
+            assert len(strides) == n_layers
+        for idx in range(n_layers):
+            self.conv.append(ConvBlock(
+                n_chans, n_chans, kernel_size, stride=strides[idx], norm=norm, dropout=dropout))
+        self.out_proj = Linear(n_chans, odim)
+
+    def forward(self, x, return_hiddens=False):
+        """
+
+        :param x: [B, T, H]
+        :return: [B, T, H]
+        """
+        x = self.in_proj(x)
+        x = x.transpose(1, -1)  # (B, idim, Tmax)
+        hiddens = []
+        for f in self.conv:
+            x_ = f(x)
+            x = x + x_ if self.res else x_  # (B, C, Tmax)
+            hiddens.append(x)
+        x = x.transpose(1, -1)
+        x = self.out_proj(x)  # (B, Tmax, H)
+        if return_hiddens:
+            hiddens = torch.stack(hiddens, 1)  # [B, L, C, T]
+            return x, hiddens
+        return x
+
+
+class ConvGlobalStacks(nn.Module):
+    def __init__(self, idim=80, n_layers=5, n_chans=256, odim=32, kernel_size=5, norm='gn', dropout=0,
+                 strides=[2, 2, 2, 2, 2]):
+        super().__init__()
+        self.conv = torch.nn.ModuleList()
+        self.pooling = torch.nn.ModuleList()
+        self.kernel_size = kernel_size
+        self.in_proj = Linear(idim, n_chans)
+        for idx in range(n_layers):
+            self.conv.append(ConvBlock(n_chans, n_chans, kernel_size, stride=strides[idx],
+                                       norm=norm, dropout=dropout))
+            self.pooling.append(nn.MaxPool1d(strides[idx]))
+        self.out_proj = Linear(n_chans, odim)
+
+    def forward(self, x):
+        """
+
+        :param x: [B, T, H]
+        :return: [B, T, H]
+        """
+        x = self.in_proj(x)
+        x = x.transpose(1, -1)  # (B, idim, Tmax)
+        for f, p in zip(self.conv, self.pooling):
+            x = f(x)  # (B, C, T)
+        x = x.transpose(1, -1)
+        x = self.out_proj(x.mean(1))  # (B, H)
+        return x
+
+
+class ConvDecoder(nn.Module):
+    def __init__(self, c, dropout, kernel_size=9, act='gelu'):
+        super().__init__()
+        self.c = c
+        self.dropout = dropout
+
+        self.pre_convs = nn.ModuleList()
+        self.pre_lns = nn.ModuleList()
+        for i in range(2):
+            self.pre_convs.append(TransformerFFNLayer(
+                c, c * 2, padding='LEFT', kernel_size=kernel_size, dropout=dropout, act=act))
+            self.pre_lns.append(LayerNorm(c))
+
+        self.layer_norm_attn = LayerNorm(c)
+        self.encoder_attn = MultiheadAttention(c, 1, encoder_decoder_attention=True, bias=False)
+
+        self.post_convs = nn.ModuleList()
+        self.post_lns = nn.ModuleList()
+        for i in range(8):
+            self.post_convs.append(TransformerFFNLayer(
+                c, c * 2, padding='LEFT', kernel_size=kernel_size, dropout=dropout, act=act))
+            self.post_lns.append(LayerNorm(c))
+
+    def forward(
+            self,
+            x,
+            encoder_out=None,
+            encoder_padding_mask=None,
+            incremental_state=None,
+            **kwargs,
+    ):
+        attn_logits = None
+        for conv, ln in zip(self.pre_convs, self.pre_lns):
+            residual = x
+            x = ln(x)
+            x = conv(x) + residual
+        if encoder_out is not None:
+            residual = x
+            x = self.layer_norm_attn(x)
+            x, attn = self.encoder_attn(
+                query=x,
+                key=encoder_out,
+                value=encoder_out,
+                key_padding_mask=encoder_padding_mask,
+                incremental_state=incremental_state,
+                static_kv=True,
+                enc_dec_attn_constraint_mask=get_incremental_state(self, incremental_state,
+                                                                   'enc_dec_attn_constraint_mask'),
+            )
+            attn_logits = attn[1]
+            x = F.dropout(x, self.dropout, training=self.training)
+            x = residual + x
+        for conv, ln in zip(self.post_convs, self.post_lns):
+            residual = x
+            x = ln(x)
+            x = conv(x) + residual
+        return x, attn_logits
+
+    def clear_buffer(self, input, encoder_out=None, encoder_padding_mask=None, incremental_state=None):
+        self.encoder_attn.clear_buffer(incremental_state)
+        self.ffn.clear_buffer(incremental_state)
+
+    def set_buffer(self, name, tensor, incremental_state):
+        return set_incremental_state(self, incremental_state, name, tensor)
diff --git a/modules/audio2motion/transformer_models.py b/modules/audio2motion/transformer_models.py
new file mode 100644
index 0000000000000000000000000000000000000000..05cc5ea196fb0af0f7cce8b8a41a2dcd5562f631
--- /dev/null
+++ b/modules/audio2motion/transformer_models.py
@@ -0,0 +1,208 @@
+from numpy import isin
+import torch
+import torch.nn as nn
+from modules.audio2motion.transformer_base import *
+
+DEFAULT_MAX_SOURCE_POSITIONS = 2000
+DEFAULT_MAX_TARGET_POSITIONS = 2000
+
+
+class TransformerEncoderLayer(nn.Module):
+    def __init__(self, hidden_size, dropout, kernel_size=None, num_heads=2, norm='ln'):
+        super().__init__()
+        self.hidden_size = hidden_size
+        self.dropout = dropout
+        self.num_heads = num_heads
+        self.op = EncSALayer(
+            hidden_size, num_heads, dropout=dropout,
+            attention_dropout=0.0, relu_dropout=dropout,
+            kernel_size=kernel_size
+            if kernel_size is not None else 9,
+            padding='SAME',
+            norm=norm, act='gelu'
+            )
+
+    def forward(self, x, **kwargs):
+        return self.op(x, **kwargs)
+
+
+######################
+# fastspeech modules
+######################
+class LayerNorm(torch.nn.LayerNorm):
+    """Layer normalization module.
+    :param int nout: output dim size
+    :param int dim: dimension to be normalized
+    """
+
+    def __init__(self, nout, dim=-1, eps=1e-5):
+        """Construct an LayerNorm object."""
+        super(LayerNorm, self).__init__(nout, eps=eps)
+        self.dim = dim
+
+    def forward(self, x):
+        """Apply layer normalization.
+        :param torch.Tensor x: input tensor
+        :return: layer normalized tensor
+        :rtype torch.Tensor
+        """
+        if self.dim == -1:
+            return super(LayerNorm, self).forward(x)
+        return super(LayerNorm, self).forward(x.transpose(1, -1)).transpose(1, -1)
+
+
+class FFTBlocks(nn.Module):
+    def __init__(self, hidden_size, num_layers, ffn_kernel_size=9, dropout=None,
+                 num_heads=2, use_pos_embed=True, use_last_norm=True, norm='ln',
+                 use_pos_embed_alpha=True):
+        super().__init__()
+        self.num_layers = num_layers
+        embed_dim = self.hidden_size = hidden_size
+        self.dropout = dropout if dropout is not None else 0.1
+        self.use_pos_embed = use_pos_embed
+        self.use_last_norm = use_last_norm
+        if use_pos_embed:
+            self.max_source_positions = DEFAULT_MAX_TARGET_POSITIONS
+            self.padding_idx = 0
+            self.pos_embed_alpha = nn.Parameter(torch.Tensor([1])) if use_pos_embed_alpha else 1
+            self.embed_positions = SinusoidalPositionalEmbedding(
+                embed_dim, self.padding_idx, init_size=DEFAULT_MAX_TARGET_POSITIONS,
+            )
+
+        self.layers = nn.ModuleList([])
+        self.layers.extend([
+            TransformerEncoderLayer(self.hidden_size, self.dropout,
+                                    kernel_size=ffn_kernel_size, num_heads=num_heads,
+                                    norm=norm)
+            for _ in range(self.num_layers)
+        ])
+        if self.use_last_norm:
+            if norm == 'ln':
+                self.layer_norm = nn.LayerNorm(embed_dim)
+            elif norm == 'bn':
+                self.layer_norm = BatchNorm1dTBC(embed_dim)
+            elif norm == 'gn':
+                self.layer_norm = GroupNorm1DTBC(8, embed_dim)
+        else:
+            self.layer_norm = None
+
+    def forward(self, x, padding_mask=None, attn_mask=None, return_hiddens=False):
+        """
+        :param x: [B, T, C]
+        :param padding_mask: [B, T]
+        :return: [B, T, C] or [L, B, T, C]
+        """
+        padding_mask = x.abs().sum(-1).eq(0).data if padding_mask is None else padding_mask
+        nonpadding_mask_TB = 1 - padding_mask.transpose(0, 1).float()[:, :, None]  # [T, B, 1]
+        if self.use_pos_embed:
+            positions = self.pos_embed_alpha * self.embed_positions(x[..., 0])
+            x = x + positions
+            x = F.dropout(x, p=self.dropout, training=self.training)
+        # B x T x C -> T x B x C
+        x = x.transpose(0, 1) * nonpadding_mask_TB
+        hiddens = []
+        for layer in self.layers:
+            x = layer(x, encoder_padding_mask=padding_mask, attn_mask=attn_mask) * nonpadding_mask_TB
+            hiddens.append(x)
+        if self.use_last_norm:
+            x = self.layer_norm(x) * nonpadding_mask_TB
+        if return_hiddens:
+            x = torch.stack(hiddens, 0)  # [L, T, B, C]
+            x = x.transpose(1, 2)  # [L, B, T, C]
+        else:
+            x = x.transpose(0, 1)  # [B, T, C]
+        return x
+
+class SequentialSA(nn.Module):
+    def __init__(self,layers):
+        super(SequentialSA,self).__init__()
+        self.layers = nn.ModuleList(layers)
+    
+    def forward(self,x,x_mask):
+        """
+        x: [batch, T, H]
+        x_mask: [batch, T]
+        """
+        pad_mask = 1. - x_mask
+        for layer in self.layers:
+            if isinstance(layer, EncSALayer):
+                x = x.permute(1,0,2)
+                x = layer(x,pad_mask)
+                x = x.permute(1,0,2)
+            elif isinstance(layer, nn.Linear):
+                x = layer(x) * x_mask.unsqueeze(2)
+            elif isinstance(layer, nn.AvgPool1d):
+                x = x.permute(0,2,1)
+                x = layer(x)
+                x = x.permute(0,2,1)
+            elif isinstance(layer, nn.PReLU):
+                bs, t, hid = x.shape
+                x = x.reshape([bs*t,hid])
+                x = layer(x)
+                x = x.reshape([bs, t, hid])
+            else: # Relu
+                x = layer(x) 
+            
+        return x
+
+class TransformerStyleFusionModel(nn.Module):
+    def __init__(self, num_heads=4, dropout = 0.1, out_dim = 64):
+        super(TransformerStyleFusionModel, self).__init__()
+        self.audio_layer = SequentialSA([
+            nn.Linear(29, 48),
+            nn.ReLU(48),
+            nn.Linear(48, 128),
+        ])
+
+        self.energy_layer = SequentialSA([
+            nn.Linear(1, 16),
+            nn.ReLU(16),
+            nn.Linear(16, 64),
+        ])
+
+        self.backbone1 = FFTBlocks(hidden_size=192,num_layers=3)
+
+        self.sty_encoder = nn.Sequential(*[
+            nn.Linear(135, 64),
+            nn.ReLU(),
+            nn.Linear(64, 128)
+        ])
+
+        self.backbone2 = FFTBlocks(hidden_size=320,num_layers=3)
+
+        self.out_layer = SequentialSA([
+            nn.AvgPool1d(kernel_size=2,stride=2,padding=0), #[b,hid,t_audio]=>[b,hid,t_audio//2]
+            nn.Linear(320,out_dim),
+            nn.PReLU(out_dim),
+            nn.Linear(out_dim,out_dim),
+        ])
+
+        self.dropout = nn.Dropout(p = dropout)
+
+    def forward(self, audio, energy, style, x_mask, y_mask):
+        pad_mask = 1. - x_mask
+        audio_feat = self.audio_layer(audio, x_mask)
+        energy_feat = self.energy_layer(energy, x_mask)
+        feat = torch.cat((audio_feat, energy_feat), dim=-1) # [batch, T, H=48+16]
+        feat = self.backbone1(feat, pad_mask)
+        feat = self.dropout(feat)
+
+        sty_feat = self.sty_encoder(style) # [batch,135]=>[batch, H=64]
+        sty_feat = sty_feat.unsqueeze(1).repeat(1, feat.shape[1], 1) # [batch, T, H=64]
+
+        feat = torch.cat([feat, sty_feat], dim=-1) # [batch, T, H=64+64]
+        feat = self.backbone2(feat, pad_mask) # [batch, T, H=128]
+        out = self.out_layer(feat, y_mask) # [batch, T//2, H=out_dim]
+
+        return out
+
+
+if __name__ == '__main__':
+    model = TransformerStyleFusionModel()
+    audio = torch.rand(4,200,29) # [B,T,H]
+    energy = torch.rand(4,200,1) # [B,T,H]
+    style = torch.ones(4,135) # [B,T]
+    x_mask = torch.ones(4,200) # [B,T]
+    x_mask[3,10:] = 0
+    ret = model(audio,energy,style, x_mask)
+    print(" ")
\ No newline at end of file
diff --git a/modules/audio2motion/utils.py b/modules/audio2motion/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..7eb56ec514bff822ba1a19a6474207ed82492410
--- /dev/null
+++ b/modules/audio2motion/utils.py
@@ -0,0 +1,29 @@
+import torch
+
+
+def squeeze(x, x_mask=None, n_sqz=2):
+    b, c, t = x.size()
+
+    t = (t // n_sqz) * n_sqz
+    x = x[:, :, :t]
+    x_sqz = x.view(b, c, t // n_sqz, n_sqz)
+    x_sqz = x_sqz.permute(0, 3, 1, 2).contiguous().view(b, c * n_sqz, t // n_sqz)
+
+    if x_mask is not None:
+        x_mask = x_mask[:, :, n_sqz - 1::n_sqz]
+    else:
+        x_mask = torch.ones(b, 1, t // n_sqz).to(device=x.device, dtype=x.dtype)
+    return x_sqz * x_mask, x_mask
+
+
+def unsqueeze(x, x_mask=None, n_sqz=2):
+    b, c, t = x.size()
+
+    x_unsqz = x.view(b, n_sqz, c // n_sqz, t)
+    x_unsqz = x_unsqz.permute(0, 2, 3, 1).contiguous().view(b, c // n_sqz, t * n_sqz)
+
+    if x_mask is not None:
+        x_mask = x_mask.unsqueeze(-1).repeat(1, 1, 1, n_sqz).view(b, 1, t * n_sqz)
+    else:
+        x_mask = torch.ones(b, 1, t * n_sqz).to(device=x.device, dtype=x.dtype)
+    return x_unsqz * x_mask, x_mask
diff --git a/modules/audio2motion/vae.py b/modules/audio2motion/vae.py
new file mode 100644
index 0000000000000000000000000000000000000000..9801ed631a6142297ce96d33c93ee508f32304b9
--- /dev/null
+++ b/modules/audio2motion/vae.py
@@ -0,0 +1,468 @@
+import math
+import torch
+from torch import nn
+from torch.nn import functional as F
+import torch.distributions as dist
+import numpy as np
+import copy
+from modules.audio2motion.flow_base import Glow, WN, ResidualCouplingBlock
+from modules.audio2motion.transformer_base import Embedding
+
+from utils.commons.pitch_utils import f0_to_coarse
+from utils.commons.hparams import hparams
+
+
+class LambdaLayer(nn.Module):
+    def __init__(self, lambd):
+        super(LambdaLayer, self).__init__()
+        self.lambd = lambd
+
+    def forward(self, x):
+        return self.lambd(x)
+
+
+def make_positions(tensor, padding_idx):
+    """Replace non-padding symbols with their position numbers.
+
+    Position numbers begin at padding_idx+1. Padding symbols are ignored.
+    """
+    # The series of casts and type-conversions here are carefully
+    # balanced to both work with ONNX export and XLA. In particular XLA
+    # prefers ints, cumsum defaults to output longs, and ONNX doesn't know
+    # how to handle the dtype kwarg in cumsum.
+    mask = tensor.ne(padding_idx).int()
+    return (
+                   torch.cumsum(mask, dim=1).type_as(mask) * mask
+           ).long() + padding_idx
+
+class SinusoidalPositionalEmbedding(nn.Module):
+    """This module produces sinusoidal positional embeddings of any length.
+
+    Padding symbols are ignored.
+    """
+
+    def __init__(self, embedding_dim, padding_idx, init_size=1024):
+        super().__init__()
+        self.embedding_dim = embedding_dim
+        self.padding_idx = padding_idx
+        self.weights = SinusoidalPositionalEmbedding.get_embedding(
+            init_size,
+            embedding_dim,
+            padding_idx,
+        )
+        self.register_buffer('_float_tensor', torch.FloatTensor(1))
+
+    @staticmethod
+    def get_embedding(num_embeddings, embedding_dim, padding_idx=None):
+        """Build sinusoidal embeddings.
+
+        This matches the implementation in tensor2tensor, but differs slightly
+        from the description in Section 3.5 of "Attention Is All You Need".
+        """
+        half_dim = embedding_dim // 2
+        emb = math.log(10000) / (half_dim - 1)
+        emb = torch.exp(torch.arange(half_dim, dtype=torch.float) * -emb)
+        emb = torch.arange(num_embeddings, dtype=torch.float).unsqueeze(1) * emb.unsqueeze(0)
+        emb = torch.cat([torch.sin(emb), torch.cos(emb)], dim=1).view(num_embeddings, -1)
+        if embedding_dim % 2 == 1:
+            # zero pad
+            emb = torch.cat([emb, torch.zeros(num_embeddings, 1)], dim=1)
+        if padding_idx is not None:
+            emb[padding_idx, :] = 0
+        return emb
+
+    def forward(self, input, incremental_state=None, timestep=None, positions=None, **kwargs):
+        """Input is expected to be of size [bsz x seqlen]."""
+        bsz, seq_len = input.shape[:2]
+        max_pos = self.padding_idx + 1 + seq_len
+        if self.weights is None or max_pos > self.weights.size(0):
+            # recompute/expand embeddings if needed
+            self.weights = SinusoidalPositionalEmbedding.get_embedding(
+                max_pos,
+                self.embedding_dim,
+                self.padding_idx,
+            )
+        self.weights = self.weights.to(self._float_tensor)
+
+        if incremental_state is not None:
+            # positions is the same for every token when decoding a single step
+            pos = timestep.view(-1)[0] + 1 if timestep is not None else seq_len
+            return self.weights[self.padding_idx + pos, :].expand(bsz, 1, -1)
+
+        positions = make_positions(input, self.padding_idx) if positions is None else positions
+        return self.weights.index_select(0, positions.view(-1)).view(bsz, seq_len, -1).detach()
+
+    def max_positions(self):
+        """Maximum number of supported positions."""
+        return int(1e4)  # an arbitrary large number
+
+class FVAEEncoder(nn.Module):
+    def __init__(self, in_channels, hidden_channels, latent_channels, kernel_size,
+                 n_layers, gin_channels=0, p_dropout=0, strides=[4]):
+        super().__init__()
+        self.strides = strides
+        self.hidden_size = hidden_channels
+        self.pre_net = nn.Sequential(*[
+            nn.Conv1d(in_channels, hidden_channels, kernel_size=s * 2, stride=s, padding=s // 2)
+            if i == 0 else
+            nn.Conv1d(hidden_channels, hidden_channels, kernel_size=s * 2, stride=s, padding=s // 2)
+            for i, s in enumerate(strides)
+        ])
+        self.wn = WN(hidden_channels, kernel_size, 1, n_layers, gin_channels, p_dropout)
+        self.out_proj = nn.Conv1d(hidden_channels, latent_channels * 2, 1)
+
+        self.latent_channels = latent_channels
+
+    def forward(self, x, x_mask, g):
+        x = self.pre_net(x)
+        x_mask = x_mask[:, :, ::np.prod(self.strides)][:, :, :x.shape[-1]]
+        x = x * x_mask
+        x = self.wn(x, x_mask, g) * x_mask
+        x = self.out_proj(x)
+        m, logs = torch.split(x, self.latent_channels, dim=1)
+        z = (m + torch.randn_like(m) * torch.exp(logs))
+        return z, m, logs, x_mask
+
+
+class FVAEDecoder(nn.Module):
+    def __init__(self, latent_channels, hidden_channels, out_channels, kernel_size,
+                 n_layers, gin_channels=0, p_dropout=0,
+                 strides=[4]):
+        super().__init__()
+        self.strides = strides
+        self.hidden_size = hidden_channels
+        self.pre_net = nn.Sequential(*[
+            nn.ConvTranspose1d(latent_channels, hidden_channels, kernel_size=s, stride=s)
+            if i == 0 else
+            nn.ConvTranspose1d(hidden_channels, hidden_channels, kernel_size=s, stride=s)
+            for i, s in enumerate(strides)
+        ])
+        self.wn = WN(hidden_channels, kernel_size, 1, n_layers, gin_channels, p_dropout)
+        self.out_proj = nn.Conv1d(hidden_channels, out_channels, 1)
+
+    def forward(self, x, x_mask, g):
+        x = self.pre_net(x)
+        x = x * x_mask
+        x = self.wn(x, x_mask, g) * x_mask
+        x = self.out_proj(x)
+        return x
+
+class FVAE(nn.Module):
+    def __init__(self,
+                 in_out_channels=64, hidden_channels=256, latent_size=16,
+                 kernel_size=3, enc_n_layers=5, dec_n_layers=5, gin_channels=80, strides=[4,],
+                 use_prior_glow=True, glow_hidden=256, glow_kernel_size=3, glow_n_blocks=5,
+                 sqz_prior=False, use_pos_emb=False):
+        super(FVAE, self).__init__()
+        self.in_out_channels = in_out_channels
+        self.strides = strides
+        self.hidden_size = hidden_channels
+        self.latent_size = latent_size
+        self.use_prior_glow = use_prior_glow
+        self.sqz_prior = sqz_prior
+        self.g_pre_net = nn.Sequential(*[
+            nn.Conv1d(gin_channels, gin_channels, kernel_size=s * 2, stride=s, padding=s // 2)
+            for i, s in enumerate(strides)
+        ])
+        self.encoder = FVAEEncoder(in_out_channels, hidden_channels, latent_size, kernel_size,
+                                   enc_n_layers, gin_channels, strides=strides)
+        if use_prior_glow:
+            self.prior_flow = ResidualCouplingBlock(
+                latent_size, glow_hidden, glow_kernel_size, 1, glow_n_blocks, 4, gin_channels=gin_channels)
+        self.use_pos_embed = use_pos_emb
+        if sqz_prior:
+            self.query_proj = nn.Linear(latent_size, latent_size)
+            self.key_proj = nn.Linear(latent_size, latent_size)
+            self.value_proj = nn.Linear(latent_size, hidden_channels)
+            if self.in_out_channels in [7, 64]:
+                self.decoder = FVAEDecoder(hidden_channels, hidden_channels, in_out_channels, kernel_size,
+                                    dec_n_layers, gin_channels, strides=strides)
+            elif self.in_out_channels == 71:
+                self.exp_decoder = FVAEDecoder(hidden_channels, hidden_channels, 64, kernel_size,
+                                    dec_n_layers, gin_channels, strides=strides)
+                self.pose_decoder = FVAEDecoder(hidden_channels, hidden_channels, 7, kernel_size,
+                                    dec_n_layers, gin_channels, strides=strides)
+            if self.use_pos_embed:
+                self.embed_positions = SinusoidalPositionalEmbedding(self.latent_size, 0,init_size=2000+1,)
+        else:
+            self.decoder = FVAEDecoder(latent_size, hidden_channels, in_out_channels, kernel_size,
+                                    dec_n_layers, gin_channels, strides=strides)
+
+        self.prior_dist = dist.Normal(0, 1)
+
+    def forward(self, x=None, x_mask=None, g=None, infer=False, temperature=1. , **kwargs):
+        """
+
+        :param x: [B, T,  C_in_out]
+        :param x_mask: [B, T]
+        :param g: [B, T, C_g]
+        :return:
+        """
+        x_mask = x_mask[:, None, :] # [B, 1, T]
+        g = g.transpose(1,2) # [B, C_g, T]
+        g_for_sqz = g
+
+        g_sqz = self.g_pre_net(g_for_sqz)
+
+        if not infer:
+            x = x.transpose(1,2) # [B, C, T]
+            z_q, m_q, logs_q, x_mask_sqz = self.encoder(x, x_mask, g_sqz)
+            if self.sqz_prior:
+                z = z_q
+                if self.use_pos_embed:
+                    position = self.embed_positions(z.transpose(1,2).abs().sum(-1)).transpose(1,2)
+                    z = z + position
+                q = self.query_proj(z.mean(dim=-1,keepdim=True).transpose(1,2)) # [B, 1, C=16]
+                k = self.key_proj(z.transpose(1,2)) # [B, T, C=16]
+                v = self.value_proj(z.transpose(1,2)) # [B, T, C=256]
+                attn = torch.bmm(q,k.transpose(1,2)) # [B, 1, T]
+                attn = F.softmax(attn, dim=-1)
+                out = torch.bmm(attn, v) # [B, 1, C=256]
+                style_encoding = out.repeat([1,z_q.shape[-1],1]).transpose(1,2) # [B, C=256, T]
+                if self.in_out_channels == 71:
+                    x_recon = torch.cat([self.exp_decoder(style_encoding, x_mask, g), self.pose_decoder(style_encoding, x_mask, g)], dim=1)
+                else:
+                    x_recon = self.decoder(style_encoding, x_mask, g)
+            else:
+                if self.in_out_channels == 71:
+                    x_recon = torch.cat([self.exp_decoder(z_q, x_mask, g), self.pose_decoder(z_q, x_mask, g)], dim=1)
+                else:
+                    x_recon = self.decoder(z_q, x_mask, g)
+            q_dist = dist.Normal(m_q, logs_q.exp())
+            if self.use_prior_glow:
+                logqx = q_dist.log_prob(z_q)
+                z_p = self.prior_flow(z_q, x_mask_sqz, g_sqz)
+                logpx = self.prior_dist.log_prob(z_p)
+                loss_kl = ((logqx - logpx) * x_mask_sqz).sum() / x_mask_sqz.sum() / logqx.shape[1]
+            else:
+                loss_kl = torch.distributions.kl_divergence(q_dist, self.prior_dist)
+                loss_kl = (loss_kl * x_mask_sqz).sum() / x_mask_sqz.sum() / z_q.shape[1]
+                z_p = z_q
+            return x_recon.transpose(1,2), loss_kl, z_p.transpose(1,2), m_q.transpose(1,2), logs_q.transpose(1,2)
+        else:
+            latent_shape = [g_sqz.shape[0], self.latent_size, g_sqz.shape[2]]
+            z_p = self.prior_dist.sample(latent_shape).to(g.device) * temperature # [B, latent_size, T_sqz]
+            if self.use_prior_glow:
+                z_p = self.prior_flow(z_p, 1, g_sqz, reverse=True)
+            if self.sqz_prior:
+                z = z_p
+                if self.use_pos_embed:
+                    position = self.embed_positions(z.abs().sum(-1))
+                    z += position
+                q = self.query_proj(z.mean(dim=-1,keepdim=True).transpose(1,2)) # [B, 1, C=16]
+                k = self.key_proj(z.transpose(1,2)) # [B, T, C=16]
+                v = self.value_proj(z.transpose(1,2)) # [B, T, C=256]
+                attn = torch.bmm(q,k.transpose(1,2)) # [B, 1, T]
+                attn = F.softmax(attn, dim=-1)
+                out = torch.bmm(attn, v) # [B, 1, C=256]
+                style_encoding = out.repeat([1,z_p.shape[-1],1]).transpose(1,2) # [B, C=256, T]
+                x_recon = self.decoder(style_encoding, 1, g)
+                if self.in_out_channels == 71:
+                    x_recon = torch.cat([self.exp_decoder(style_encoding, 1, g), self.pose_decoder(style_encoding, 1, g)], dim=1)
+                else:
+                    x_recon = self.decoder(style_encoding, 1, g)
+            else:
+                if self.in_out_channels == 71:
+                    x_recon = torch.cat([self.exp_decoder(z_p, 1, g), self.pose_decoder(z_p, 1, g)], dim=1)
+                else:
+                    x_recon = self.decoder(z_p, 1, g)
+            return x_recon.transpose(1,2), z_p.transpose(1,2)
+
+
+class VAEModel(nn.Module):
+    def __init__(self, in_out_dim=64, audio_in_dim=1024, sqz_prior=False, cond_drop=False, use_prior_flow=True):
+        super().__init__()
+        feat_dim = 64
+        self.blink_embed = nn.Embedding(2, feat_dim)
+        self.audio_in_dim = audio_in_dim
+        cond_dim = feat_dim
+        self.mel_encoder = nn.Sequential(*[
+                nn.Conv1d(audio_in_dim, 64, 3, 1, 1, bias=False),
+                nn.BatchNorm1d(64),
+                nn.GELU(),
+                nn.Conv1d(64, feat_dim, 3, 1, 1, bias=False)
+            ]) 
+        self.cond_drop = cond_drop
+        if self.cond_drop:
+            self.dropout = nn.Dropout(0.5)
+
+        self.in_dim, self.out_dim = in_out_dim, in_out_dim
+        self.sqz_prior = sqz_prior
+        self.use_prior_flow = use_prior_flow
+        self.vae = FVAE(in_out_channels=in_out_dim, hidden_channels=256, latent_size=16, kernel_size=5,
+            enc_n_layers=8, dec_n_layers=4, gin_channels=cond_dim, strides=[4,],
+            use_prior_glow=self.use_prior_flow, glow_hidden=64, glow_kernel_size=3, glow_n_blocks=4,sqz_prior=sqz_prior)
+        self.downsampler = LambdaLayer(lambda x: F.interpolate(x.transpose(1,2), scale_factor=0.5, mode='linear').transpose(1,2))
+        # self.downsampler = LambdaLayer(lambda x: F.interpolate(x.transpose(1,2), scale_factor=0.5, mode='nearest').transpose(1,2))
+
+    def num_params(self, model, print_out=True, model_name="model"):
+        parameters = filter(lambda p: p.requires_grad, model.parameters())
+        parameters = sum([np.prod(p.size()) for p in parameters]) / 1_000_000
+        if print_out:
+            print(f'| {model_name} Trainable Parameters: %.3fM' % parameters)
+        return parameters
+    
+    @property
+    def device(self):
+        return self.vae.parameters().__next__().device
+
+    def forward(self, batch, ret, train=True, return_latent=False, temperature=1.):
+        infer = not train
+        mask = batch['y_mask'].to(self.device)
+        mel = batch['audio'].to(self.device)
+        mel = self.downsampler(mel)
+        cond_feat = self.mel_encoder(mel.transpose(1,2)).transpose(1,2)
+
+        if self.cond_drop:
+            cond_feat = self.dropout(cond_feat)
+        
+        if not infer:
+            exp = batch['y'].to(self.device)
+            x = exp
+            x_recon, loss_kl, z_p, m_q, logs_q = self.vae(x=x, x_mask=mask, g=cond_feat, infer=False)
+            x_recon = x_recon * mask.unsqueeze(-1)
+            ret['pred'] = x_recon
+            ret['mask'] = mask
+            ret['loss_kl'] = loss_kl
+            if return_latent:
+                ret['m_q'] = m_q
+                ret['z_p'] = z_p
+            return x_recon, loss_kl, m_q, logs_q
+        else:
+            x_recon, z_p = self.vae(x=None, x_mask=mask, g=cond_feat, infer=True, temperature=temperature)
+            x_recon = x_recon * mask.unsqueeze(-1)
+            ret['pred'] = x_recon
+            ret['mask'] = mask
+
+            return x_recon
+
+
+class PitchContourVAEModel(nn.Module):
+    def __init__(self, hparams, in_out_dim=64, audio_in_dim=1024, sqz_prior=False, cond_drop=False, use_prior_flow=True):
+        super().__init__()
+        self.hparams = copy.deepcopy(hparams)
+        feat_dim = 128
+        self.audio_in_dim = audio_in_dim
+        self.blink_embed = nn.Embedding(2, feat_dim)
+        
+        self.mel_encoder = nn.Sequential(*[
+                nn.Conv1d(audio_in_dim, feat_dim, 3, 1, 1, bias=False),
+                nn.BatchNorm1d(feat_dim ),
+                nn.GELU(),
+                nn.Conv1d(feat_dim , feat_dim, 3, 1, 1, bias=False)
+            ])
+        
+        self.pitch_embed = Embedding(300, feat_dim, None)
+        self.pitch_encoder = nn.Sequential(*[
+                nn.Conv1d(feat_dim, feat_dim , 3, 1, 1, bias=False),
+                nn.BatchNorm1d(feat_dim),
+                nn.GELU(),
+                nn.Conv1d(feat_dim, feat_dim, 3, 1, 1, bias=False)
+            ])
+
+        cond_dim = feat_dim + feat_dim + feat_dim
+
+        if hparams.get('use_mouth_amp_embed', False):
+            self.mouth_amp_embed = nn.Parameter(torch.randn(feat_dim))
+            cond_dim += feat_dim
+
+        if hparams.get('use_eye_amp_embed', False):
+            self.eye_amp_embed = nn.Parameter(torch.randn(feat_dim))
+            cond_dim += feat_dim
+
+        self.cond_proj = nn.Linear(cond_dim, feat_dim, bias=True)
+
+        self.cond_drop = cond_drop
+        if self.cond_drop:
+            self.dropout = nn.Dropout(0.5)
+
+        self.in_dim, self.out_dim = in_out_dim, in_out_dim
+        self.sqz_prior = sqz_prior
+        self.use_prior_flow = use_prior_flow
+        self.vae = FVAE(in_out_channels=in_out_dim, hidden_channels=256, latent_size=16, kernel_size=5,
+            enc_n_layers=8, dec_n_layers=4, gin_channels=feat_dim, strides=[4,],
+            use_prior_glow=self.use_prior_flow, glow_hidden=64, glow_kernel_size=3, glow_n_blocks=4,sqz_prior=sqz_prior)
+        self.downsampler = LambdaLayer(lambda x: F.interpolate(x.transpose(1,2), scale_factor=0.5, mode='nearest').transpose(1,2))
+
+    def num_params(self, model, print_out=True, model_name="model"):
+        parameters = filter(lambda p: p.requires_grad, model.parameters())
+        parameters = sum([np.prod(p.size()) for p in parameters]) / 1_000_000
+        if print_out:
+            print(f'| {model_name} Trainable Parameters: %.3fM' % parameters)
+        return parameters
+    
+    @property
+    def device(self):
+        return self.vae.parameters().__next__().device
+
+    def forward(self, batch, ret, train=True, return_latent=False, temperature=1.):
+        infer = not train
+        hparams = self.hparams
+        mask = batch['y_mask'].to(self.device)
+        mel = batch['audio'].to(self.device)
+        f0 = batch['f0'].to(self.device) # [b,t]
+        if 'blink' not in batch:
+            batch['blink'] = torch.zeros([f0.shape[0], f0.shape[1], 1], dtype=torch.long, device=f0.device)
+        blink = batch['blink'].to(self.device)
+        blink_feat = self.blink_embed(blink.squeeze(2))
+
+        blink_feat  = self.downsampler(blink_feat)
+        mel = self.downsampler(mel)
+        f0 = self.downsampler(f0.unsqueeze(-1)).squeeze(-1)
+        f0_coarse = f0_to_coarse(f0)
+        pitch_emb = self.pitch_embed(f0_coarse)
+        cond_feat = self.mel_encoder(mel.transpose(1,2)).transpose(1,2)
+        pitch_feat = self.pitch_encoder(pitch_emb.transpose(1,2)).transpose(1,2)
+
+        cond_feats = [cond_feat, pitch_feat, blink_feat]
+        if hparams.get('use_mouth_amp_embed', False):
+            mouth_amp = batch.get('mouth_amp', torch.ones([f0.shape[0], 1], device=f0.device) * 0.4)
+            mouth_amp_feat = mouth_amp.unsqueeze(1) * self.mouth_amp_embed.unsqueeze(0)
+            mouth_amp_feat = mouth_amp_feat.repeat([1,cond_feat.shape[1],1])
+            cond_feats.append(mouth_amp_feat)
+
+        if hparams.get('use_eye_amp_embed', False):
+            eye_amp = batch.get('eye_amp', torch.ones([f0.shape[0], 1], device=f0.device) * 0.4)
+            eye_amp_feat = eye_amp.unsqueeze(1) * self.eye_amp_embed.unsqueeze(0)
+            eye_amp_feat = eye_amp_feat.repeat([1,cond_feat.shape[1],1])
+            cond_feats.append(eye_amp_feat)
+
+        cond_feat = torch.cat(cond_feats, dim=-1)
+        cond_feat = self.cond_proj(cond_feat)
+
+        if self.cond_drop:
+            cond_feat = self.dropout(cond_feat)
+        
+        if not infer:
+            exp = batch['y'].to(self.device)
+            x = exp
+            x_recon, loss_kl, z_p, m_q, logs_q = self.vae(x=x, x_mask=mask, g=cond_feat, infer=False)
+            x_recon = x_recon * mask.unsqueeze(-1)
+            ret['pred'] = x_recon
+            ret['mask'] = mask
+            ret['loss_kl'] = loss_kl
+            if return_latent:
+                ret['m_q'] = m_q
+                ret['z_p'] = z_p
+            return x_recon, loss_kl, m_q, logs_q
+        else:
+            x_recon, z_p = self.vae(x=None, x_mask=mask, g=cond_feat, infer=True, temperature=temperature)
+            x_recon = x_recon * mask.unsqueeze(-1)
+            ret['pred'] = x_recon
+            ret['mask'] = mask
+
+            return x_recon
+
+
+if __name__ == '__main__':
+    model = FVAE(in_out_channels=64, hidden_channels=128, latent_size=32,kernel_size=3, enc_n_layers=6, dec_n_layers=2, 
+        gin_channels=80, strides=[4], use_prior_glow=False, glow_hidden=128, glow_kernel_size=3, glow_n_blocks=3)
+    x = torch.rand([8, 64, 1000])
+    x_mask = torch.ones([8,1,1000])
+    g = torch.rand([8, 80, 1000])
+    train_out = model(x,x_mask,g,infer=False)
+    x_recon, loss_kl, z_p, m_q, logs_q = train_out
+    print(" ")
+    infer_out = model(x,x_mask,g,infer=True)
+    x_recon, z_p = infer_out
+    print(" ")
diff --git a/modules/audio2motion/vqvae.py b/modules/audio2motion/vqvae.py
new file mode 100644
index 0000000000000000000000000000000000000000..310ffc7bf4bf1c5a8c2901163439bba179a968fc
--- /dev/null
+++ b/modules/audio2motion/vqvae.py
@@ -0,0 +1,200 @@
+import scipy
+from scipy import linalg
+from torch.nn import functional as F
+import torch
+from torch import nn
+import numpy as np
+from modules.audio2motion.transformer_models import FFTBlocks
+import modules.audio2motion.utils as utils
+from modules.audio2motion.flow_base import Glow, WN, ResidualCouplingBlock
+import torch.distributions as dist
+from modules.audio2motion.cnn_models import LambdaLayer, LayerNorm
+
+from vector_quantize_pytorch import VectorQuantize
+
+
+class FVAEEncoder(nn.Module):
+    def __init__(self, in_channels, hidden_channels, latent_channels, kernel_size,
+                 n_layers, gin_channels=0, p_dropout=0, strides=[4]):
+        super().__init__()
+        self.strides = strides
+        self.hidden_size = hidden_channels
+        self.pre_net = nn.Sequential(*[
+            nn.Conv1d(in_channels, hidden_channels, kernel_size=s * 2, stride=s, padding=s // 2)
+            if i == 0 else
+            nn.Conv1d(hidden_channels, hidden_channels, kernel_size=s * 2, stride=s, padding=s // 2)
+            for i, s in enumerate(strides)
+        ])
+        self.wn = WN(hidden_channels, kernel_size, 1, n_layers, gin_channels, p_dropout)
+        self.out_proj = nn.Conv1d(hidden_channels, latent_channels * 2, 1)
+        self.latent_channels = latent_channels
+
+    def forward(self, x, x_mask, g):
+        x = self.pre_net(x)
+        x_mask = x_mask[:, :, ::np.prod(self.strides)][:, :, :x.shape[-1]]
+        x = x * x_mask
+        x = self.wn(x, x_mask, g) * x_mask
+        x = self.out_proj(x)
+        m, logs = torch.split(x, self.latent_channels, dim=1)
+        z = (m + torch.randn_like(m) * torch.exp(logs))
+        return z, m, logs, x_mask
+
+
+class FVAEDecoder(nn.Module):
+    def __init__(self, latent_channels, hidden_channels, out_channels, kernel_size,
+                 n_layers, gin_channels=0, p_dropout=0,
+                 strides=[4]):
+        super().__init__()
+        self.strides = strides
+        self.hidden_size = hidden_channels
+        self.pre_net = nn.Sequential(*[
+            nn.ConvTranspose1d(latent_channels, hidden_channels, kernel_size=s, stride=s)
+            if i == 0 else
+            nn.ConvTranspose1d(hidden_channels, hidden_channels, kernel_size=s, stride=s)
+            for i, s in enumerate(strides)
+        ])
+        self.wn = WN(hidden_channels, kernel_size, 1, n_layers, gin_channels, p_dropout)
+        self.out_proj = nn.Conv1d(hidden_channels, out_channels, 1)
+
+    def forward(self, x, x_mask, g):
+        x = self.pre_net(x)
+        x = x * x_mask
+        x = self.wn(x, x_mask, g) * x_mask
+        x = self.out_proj(x)
+        return x
+
+
+class VQVAE(nn.Module):
+    def __init__(self,
+                 in_out_channels=64, hidden_channels=256, latent_size=16,
+                 kernel_size=3, enc_n_layers=5, dec_n_layers=5, gin_channels=80, strides=[4,],
+                 sqz_prior=False):
+        super().__init__()
+        self.in_out_channels = in_out_channels
+        self.strides = strides
+        self.hidden_size = hidden_channels
+        self.latent_size = latent_size
+        self.g_pre_net = nn.Sequential(*[
+            nn.Conv1d(gin_channels, gin_channels, kernel_size=s * 2, stride=s, padding=s // 2)
+            for i, s in enumerate(strides)
+        ])
+        self.encoder = FVAEEncoder(in_out_channels, hidden_channels, hidden_channels, kernel_size,
+                                   enc_n_layers, gin_channels, strides=strides)
+        # if use_prior_glow:
+        #     self.prior_flow = ResidualCouplingBlock(
+        #         latent_size, glow_hidden, glow_kernel_size, 1, glow_n_blocks, 4, gin_channels=gin_channels)
+        self.vq = VectorQuantize(dim=hidden_channels, codebook_size=256, codebook_dim=16)
+
+        self.decoder = FVAEDecoder(hidden_channels, hidden_channels, in_out_channels, kernel_size,
+                                   dec_n_layers, gin_channels, strides=strides)
+        self.prior_dist = dist.Normal(0, 1)
+        self.sqz_prior = sqz_prior
+
+    def forward(self, x=None, x_mask=None, g=None, infer=False, **kwargs):
+        """
+
+        :param x: [B, T,  C_in_out]
+        :param x_mask: [B, T]
+        :param g: [B, T, C_g]
+        :return:
+        """
+        x_mask = x_mask[:, None, :] # [B, 1, T]
+        g = g.transpose(1,2) # [B, C_g, T]
+        g_for_sqz = g
+
+        g_sqz = self.g_pre_net(g_for_sqz)
+
+        if not infer:
+            x = x.transpose(1,2) # [B, C, T]
+            z_q, m_q, logs_q, x_mask_sqz = self.encoder(x, x_mask, g_sqz)
+            if self.sqz_prior:
+                z_q = F.interpolate(z_q, scale_factor=1/8)
+            z_p, idx, commit_loss = self.vq(z_q.transpose(1,2))
+            if self.sqz_prior:
+                z_p = F.interpolate(z_p.transpose(1,2),scale_factor=8).transpose(1,2)
+
+            x_recon = self.decoder(z_p.transpose(1,2), x_mask, g)
+            return x_recon.transpose(1,2), commit_loss, z_p.transpose(1,2), m_q.transpose(1,2), logs_q.transpose(1,2)
+        else:
+            bs, t = g_sqz.shape[0], g_sqz.shape[2]
+            if self.sqz_prior:
+                t = t // 8
+            latent_shape = [int(bs * t)]
+            latent_idx = torch.randint(0,256,latent_shape).to(self.vq.codebook.device)
+            # latent_idx = torch.ones_like(latent_idx, dtype=torch.long)
+            # z_p = torch.gather(self.vq.codebook, 0, latent_idx)# self.vq.codebook[latent_idx]
+            z_p = self.vq.codebook[latent_idx]
+            z_p = z_p.reshape([bs, t, -1])
+            z_p = self.vq.project_out(z_p)
+            if self.sqz_prior:
+                z_p = F.interpolate(z_p.transpose(1,2),scale_factor=8).transpose(1,2)
+
+            x_recon = self.decoder(z_p.transpose(1,2), 1, g)
+            return x_recon.transpose(1,2), z_p.transpose(1,2)
+
+
+class VQVAEModel(nn.Module):
+    def __init__(self, in_out_dim=71, sqz_prior=False, enc_no_cond=False):
+        super().__init__()
+        self.mel_encoder = nn.Sequential(*[
+                nn.Conv1d(80, 64, 3, 1, 1, bias=False),
+                nn.BatchNorm1d(64),
+                nn.GELU(),
+                nn.Conv1d(64, 64, 3, 1, 1, bias=False)
+            ]) 
+        self.in_dim, self.out_dim = in_out_dim, in_out_dim
+        self.sqz_prior = sqz_prior
+        self.enc_no_cond = enc_no_cond
+        self.vae = VQVAE(in_out_channels=in_out_dim, hidden_channels=256, latent_size=16, kernel_size=5,
+            enc_n_layers=8, dec_n_layers=4, gin_channels=64, strides=[4,], sqz_prior=sqz_prior)
+        self.downsampler = LambdaLayer(lambda x: F.interpolate(x.transpose(1,2), scale_factor=0.5, mode='nearest').transpose(1,2))
+    
+    @property
+    def device(self):
+        return self.vae.parameters().__next__().device
+
+    def forward(self, batch, ret, log_dict=None, train=True):
+        infer = not train
+        mask = batch['y_mask'].to(self.device)
+        mel = batch['mel'].to(self.device)
+        mel = self.downsampler(mel)
+
+        mel_feat = self.mel_encoder(mel.transpose(1,2)).transpose(1,2)
+        if not infer:
+            exp = batch['exp'].to(self.device)
+            pose = batch['pose'].to(self.device)
+            if self.in_dim == 71:
+                x = torch.cat([exp, pose], dim=-1) # [B, T, C=64 + 7]
+            elif self.in_dim == 64:
+                x = exp
+            elif self.in_dim == 7:
+                x = pose
+            if self.enc_no_cond:
+                x_recon, loss_commit, z_p, m_q, logs_q = self.vae(x=x, x_mask=mask, g=torch.zeros_like(mel_feat), infer=False)
+            else:
+                x_recon, loss_commit, z_p, m_q, logs_q = self.vae(x=x, x_mask=mask, g=mel_feat, infer=False)
+            loss_commit = loss_commit.reshape([])
+            ret['pred'] = x_recon
+            ret['mask'] = mask
+            ret['loss_commit'] = loss_commit
+            return x_recon, loss_commit, m_q, logs_q
+        else:
+            x_recon, z_p = self.vae(x=None, x_mask=mask, g=mel_feat, infer=True)
+            return x_recon
+
+    # def __get_feat(self, exp, pose):
+    # diff_exp = exp[:-1, :] - exp[1:, :]
+    # exp_std = (np.std(exp, axis = 0) - self.exp_std_mean) / self.exp_std_std
+    # diff_exp_std = (np.std(diff_exp, axis = 0) - self.exp_diff_std_mean) / self.exp_diff_std_std
+
+    # diff_pose = pose[:-1, :] - pose[1:, :]
+    # diff_pose_std = (np.std(diff_pose, axis = 0) - self.pose_diff_std_mean) / self.pose_diff_std_std
+
+    # return np.concatenate((exp_std, diff_exp_std, diff_pose_std))
+    
+    def num_params(self, model, print_out=True, model_name="model"):
+        parameters = filter(lambda p: p.requires_grad, model.parameters())
+        parameters = sum([np.prod(p.size()) for p in parameters]) / 1_000_000
+        if print_out:
+            print(f'| {model_name} Trainable Parameters: %.3fM' % parameters)
+        return parameters
diff --git a/modules/commons/attention/attentions.py b/modules/commons/attention/attentions.py
new file mode 100644
index 0000000000000000000000000000000000000000..1b2b5bc03732ff17a0cb135e977fbe526dff3341
--- /dev/null
+++ b/modules/commons/attention/attentions.py
@@ -0,0 +1,427 @@
+import math
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch import Tensor
+import numpy as np
+from typing import Optional, Tuple
+
+
+class ScaledDotProductAttention(nn.Module):
+    """
+    Scaled Dot-Product Attention proposed in "Attention Is All You Need"
+    Compute the dot products of the query with all keys, divide each by sqrt(dim),
+    and apply a softmax function to obtain the weights on the values
+    Args: dim, mask
+        dim (int): dimention of attention
+        mask (torch.Tensor): tensor containing indices to be masked
+    Inputs: query, key, value, mask
+        - **query** (batch, q_len, d_model): tensor containing projection vector for decoder.
+        - **key** (batch, k_len, d_model): tensor containing projection vector for encoder.
+        - **value** (batch, v_len, d_model): tensor containing features of the encoded input sequence.
+        - **mask** (-): tensor containing indices to be masked
+    Returns: context, attn
+        - **context**: tensor containing the context vector from attention mechanism.
+        - **attn**: tensor containing the attention (alignment) from the encoder outputs.
+    """
+    def __init__(self, dim: int):
+        super(ScaledDotProductAttention, self).__init__()
+        self.sqrt_dim = np.sqrt(dim)
+
+    def forward(self, query: Tensor, key: Tensor, value: Tensor, mask: Optional[Tensor] = None) -> Tuple[Tensor, Tensor]:
+        score = torch.bmm(query, key.transpose(1, 2)) / self.sqrt_dim
+
+        if mask is not None:
+            score.masked_fill_(mask.view(score.size()), -float('Inf'))
+
+        attn = F.softmax(score, -1)
+        context = torch.bmm(attn, value)
+        return context, attn
+
+
+class DotProductAttention(nn.Module):
+    """
+    Compute the dot products of the query with all values and apply a softmax function to obtain the weights on the values
+    """
+    def __init__(self, hidden_dim):
+        super(DotProductAttention, self).__init__()
+
+    def forward(self, query: Tensor, value: Tensor) -> Tuple[Tensor, Tensor]:
+        batch_size, hidden_dim, input_size = query.size(0), query.size(2), value.size(1)
+
+        score = torch.bmm(query, value.transpose(1, 2))
+        attn = F.softmax(score.view(-1, input_size), dim=1).view(batch_size, -1, input_size)
+        context = torch.bmm(attn, value)
+
+        return context, attn
+
+
+class AdditiveAttention(nn.Module):
+    """
+     Applies a additive attention (bahdanau) mechanism on the output features from the decoder.
+     Additive attention proposed in "Neural Machine Translation by Jointly Learning to Align and Translate" paper.
+     Args:
+         hidden_dim (int): dimesion of hidden state vector
+     Inputs: query, value
+         - **query** (batch_size, q_len, hidden_dim): tensor containing the output features from the decoder.
+         - **value** (batch_size, v_len, hidden_dim): tensor containing features of the encoded input sequence.
+     Returns: context, attn
+         - **context**: tensor containing the context vector from attention mechanism.
+         - **attn**: tensor containing the alignment from the encoder outputs.
+     Reference:
+         - **Neural Machine Translation by Jointly Learning to Align and Translate**: https://arxiv.org/abs/1409.0473
+    """
+    def __init__(self, hidden_dim: int) -> None:
+        super(AdditiveAttention, self).__init__()
+        self.query_proj = nn.Linear(hidden_dim, hidden_dim, bias=False)
+        self.key_proj = nn.Linear(hidden_dim, hidden_dim, bias=False)
+        self.bias = nn.Parameter(torch.rand(hidden_dim).uniform_(-0.1, 0.1))
+        self.score_proj = nn.Linear(hidden_dim, 1)
+
+    def forward(self, query: Tensor, key: Tensor, value: Tensor) -> Tuple[Tensor, Tensor]:
+        score = self.score_proj(torch.tanh(self.key_proj(key) + self.query_proj(query) + self.bias)).squeeze(-1)
+        attn = F.softmax(score, dim=-1)
+        context = torch.bmm(attn.unsqueeze(1), value)
+        return context, attn
+
+
+class LocationAwareAttention(nn.Module):
+    """
+    Applies a location-aware attention mechanism on the output features from the decoder.
+    Location-aware attention proposed in "Attention-Based Models for Speech Recognition" paper.
+    The location-aware attention mechanism is performing well in speech recognition tasks.
+    We refer to implementation of ClovaCall Attention style.
+    Args:
+        hidden_dim (int): dimesion of hidden state vector
+        smoothing (bool): flag indication whether to use smoothing or not.
+    Inputs: query, value, last_attn, smoothing
+        - **query** (batch, q_len, hidden_dim): tensor containing the output features from the decoder.
+        - **value** (batch, v_len, hidden_dim): tensor containing features of the encoded input sequence.
+        - **last_attn** (batch_size * num_heads, v_len): tensor containing previous timestep`s attention (alignment)
+    Returns: output, attn
+        - **output** (batch, output_len, dimensions): tensor containing the feature from encoder outputs
+        - **attn** (batch * num_heads, v_len): tensor containing the attention (alignment) from the encoder outputs.
+    Reference:
+        - **Attention-Based Models for Speech Recognition**: https://arxiv.org/abs/1506.07503
+        - **ClovaCall**: https://github.com/clovaai/ClovaCall/blob/master/las.pytorch/models/attention.py
+    """
+    def __init__(self, hidden_dim: int, smoothing: bool = True) -> None:
+        super(LocationAwareAttention, self).__init__()
+        self.hidden_dim = hidden_dim
+        self.conv1d = nn.Conv1d(in_channels=1, out_channels=hidden_dim, kernel_size=3, padding=1)
+        self.query_proj = nn.Linear(hidden_dim, hidden_dim, bias=False)
+        self.value_proj = nn.Linear(hidden_dim, hidden_dim, bias=False)
+        self.score_proj = nn.Linear(hidden_dim, 1, bias=True)
+        self.bias = nn.Parameter(torch.rand(hidden_dim).uniform_(-0.1, 0.1))
+        self.smoothing = smoothing
+
+    def forward(self, query: Tensor, value: Tensor, last_attn: Tensor) -> Tuple[Tensor, Tensor]:
+        batch_size, hidden_dim, seq_len = query.size(0), query.size(2), value.size(1)
+
+        # Initialize previous attention (alignment) to zeros
+        if last_attn is None:
+            last_attn = value.new_zeros(batch_size, seq_len)
+
+        conv_attn = torch.transpose(self.conv1d(last_attn.unsqueeze(1)), 1, 2)
+        score = self.score_proj(torch.tanh(
+                self.query_proj(query.reshape(-1, hidden_dim)).view(batch_size, -1, hidden_dim)
+                + self.value_proj(value.reshape(-1, hidden_dim)).view(batch_size, -1, hidden_dim)
+                + conv_attn
+                + self.bias
+        )).squeeze(dim=-1)
+
+        if self.smoothing:
+            score = torch.sigmoid(score)
+            attn = torch.div(score, score.sum(dim=-1).unsqueeze(dim=-1))
+        else:
+            attn = F.softmax(score, dim=-1)
+
+        context = torch.bmm(attn.unsqueeze(dim=1), value).squeeze(dim=1)  # Bx1xT X BxTxD => Bx1xD => BxD
+
+        return context, attn
+
+
+class MultiHeadLocationAwareAttention(nn.Module):
+    """
+    Applies a multi-headed location-aware attention mechanism on the output features from the decoder.
+    Location-aware attention proposed in "Attention-Based Models for Speech Recognition" paper.
+    The location-aware attention mechanism is performing well in speech recognition tasks.
+    In the above paper applied a signle head, but we applied multi head concept.
+    Args:
+        hidden_dim (int): The number of expected features in the output
+        num_heads (int): The number of heads. (default: )
+        conv_out_channel (int): The number of out channel in convolution
+    Inputs: query, value, prev_attn
+        - **query** (batch, q_len, hidden_dim): tensor containing the output features from the decoder.
+        - **value** (batch, v_len, hidden_dim): tensor containing features of the encoded input sequence.
+        - **prev_attn** (batch_size * num_heads, v_len): tensor containing previous timestep`s attention (alignment)
+    Returns: output, attn
+        - **output** (batch, output_len, dimensions): tensor containing the feature from encoder outputs
+        - **attn** (batch * num_heads, v_len): tensor containing the attention (alignment) from the encoder outputs.
+    Reference:
+        - **Attention Is All You Need**: https://arxiv.org/abs/1706.03762
+        - **Attention-Based Models for Speech Recognition**: https://arxiv.org/abs/1506.07503
+    """
+    def __init__(self, hidden_dim: int, num_heads: int = 8, conv_out_channel: int = 10) -> None:
+        super(MultiHeadLocationAwareAttention, self).__init__()
+        self.hidden_dim = hidden_dim
+        self.num_heads = num_heads
+        self.dim = int(hidden_dim / num_heads)
+        self.conv1d = nn.Conv1d(num_heads, conv_out_channel, kernel_size=3, padding=1)
+        self.loc_proj = nn.Linear(conv_out_channel, self.dim, bias=False)
+        self.query_proj = nn.Linear(hidden_dim, self.dim * num_heads, bias=False)
+        self.value_proj = nn.Linear(hidden_dim, self.dim * num_heads, bias=False)
+        self.score_proj = nn.Linear(self.dim, 1, bias=True)
+        self.bias = nn.Parameter(torch.rand(self.dim).uniform_(-0.1, 0.1))
+
+    def forward(self, query: Tensor, value: Tensor, last_attn: Tensor) -> Tuple[Tensor, Tensor]:
+        batch_size, seq_len = value.size(0), value.size(1)
+
+        if last_attn is None:
+            last_attn = value.new_zeros(batch_size, self.num_heads, seq_len)
+
+        loc_energy = torch.tanh(self.loc_proj(self.conv1d(last_attn).transpose(1, 2)))
+        loc_energy = loc_energy.unsqueeze(1).repeat(1, self.num_heads, 1, 1).view(-1, seq_len, self.dim)
+
+        query = self.query_proj(query).view(batch_size, -1, self.num_heads, self.dim).permute(0, 2, 1, 3)
+        value = self.value_proj(value).view(batch_size, -1, self.num_heads, self.dim).permute(0, 2, 1, 3)
+        query = query.contiguous().view(-1, 1, self.dim)
+        value = value.contiguous().view(-1, seq_len, self.dim)
+
+        score = self.score_proj(torch.tanh(value + query + loc_energy + self.bias)).squeeze(2)
+        attn = F.softmax(score, dim=1)
+
+        value = value.view(batch_size, seq_len, self.num_heads, self.dim).permute(0, 2, 1, 3)
+        value = value.contiguous().view(-1, seq_len, self.dim)
+
+        context = torch.bmm(attn.unsqueeze(1), value).view(batch_size, -1, self.num_heads * self.dim)
+        attn = attn.view(batch_size, self.num_heads, -1)
+
+        return context, attn
+
+
+class MultiHeadAttention(nn.Module):
+    """
+    Multi-Head Attention proposed in "Attention Is All You Need"
+    Instead of performing a single attention function with d_model-dimensional keys, values, and queries,
+    project the queries, keys and values h times with different, learned linear projections to d_head dimensions.
+    These are concatenated and once again projected, resulting in the final values.
+    Multi-head attention allows the model to jointly attend to information from different representation
+    subspaces at different positions.
+    MultiHead(Q, K, V) = Concat(head_1, ..., head_h) · W_o
+        where head_i = Attention(Q · W_q, K · W_k, V · W_v)
+    Args:
+        d_model (int): The dimension of keys / values / quries (default: 512)
+        num_heads (int): The number of attention heads. (default: 8)
+    Inputs: query, key, value, mask
+        - **query** (batch, q_len, d_model): In transformer, three different ways:
+            Case 1: come from previoys decoder layer
+            Case 2: come from the input embedding
+            Case 3: come from the output embedding (masked)
+        - **key** (batch, k_len, d_model): In transformer, three different ways:
+            Case 1: come from the output of the encoder
+            Case 2: come from the input embeddings
+            Case 3: come from the output embedding (masked)
+        - **value** (batch, v_len, d_model): In transformer, three different ways:
+            Case 1: come from the output of the encoder
+            Case 2: come from the input embeddings
+            Case 3: come from the output embedding (masked)
+        - **mask** (-): tensor containing indices to be masked
+    Returns: output, attn
+        - **output** (batch, output_len, dimensions): tensor containing the attended output features.
+        - **attn** (batch * num_heads, v_len): tensor containing the attention (alignment) from the encoder outputs.
+    """
+    def __init__(self, d_model: int = 512, num_heads: int = 8):
+        super(MultiHeadAttention, self).__init__()
+
+        assert d_model % num_heads == 0, "d_model % num_heads should be zero."
+
+        self.d_head = int(d_model / num_heads)
+        self.num_heads = num_heads
+        self.scaled_dot_attn = ScaledDotProductAttention(self.d_head)
+        self.query_proj = nn.Linear(d_model, self.d_head * num_heads)
+        self.key_proj = nn.Linear(d_model, self.d_head * num_heads)
+        self.value_proj = nn.Linear(d_model, self.d_head * num_heads)
+
+    def forward(
+            self,
+            query: Tensor,
+            key: Tensor,
+            value: Tensor,
+            mask: Optional[Tensor] = None
+    ) -> Tuple[Tensor, Tensor]:
+        batch_size = value.size(0)
+
+        query = self.query_proj(query).view(batch_size, -1, self.num_heads, self.d_head)  # BxQ_LENxNxD
+        key = self.key_proj(key).view(batch_size, -1, self.num_heads, self.d_head)      # BxK_LENxNxD
+        value = self.value_proj(value).view(batch_size, -1, self.num_heads, self.d_head)  # BxV_LENxNxD
+
+        query = query.permute(2, 0, 1, 3).contiguous().view(batch_size * self.num_heads, -1, self.d_head)  # BNxQ_LENxD
+        key = key.permute(2, 0, 1, 3).contiguous().view(batch_size * self.num_heads, -1, self.d_head)      # BNxK_LENxD
+        value = value.permute(2, 0, 1, 3).contiguous().view(batch_size * self.num_heads, -1, self.d_head)  # BNxV_LENxD
+
+        if mask is not None:
+            mask = mask.unsqueeze(1).repeat(1, self.num_heads, 1, 1)  # BxNxQ_LENxK_LEN
+
+        context, attn = self.scaled_dot_attn(query, key, value, mask)
+
+        context = context.view(self.num_heads, batch_size, -1, self.d_head)
+        context = context.permute(1, 2, 0, 3).contiguous().view(batch_size, -1, self.num_heads * self.d_head)  # BxTxND
+
+        return context, attn
+
+
+class RelativeMultiHeadAttention(nn.Module):
+    """
+    Multi-head attention with relative positional encoding.
+    This concept was proposed in the "Transformer-XL: Attentive Language Models Beyond a Fixed-Length Context"
+    Args:
+        d_model (int): The dimension of model
+        num_heads (int): The number of attention heads.
+        dropout_p (float): probability of dropout
+    Inputs: query, key, value, pos_embedding, mask
+        - **query** (batch, time, dim): Tensor containing query vector
+        - **key** (batch, time, dim): Tensor containing key vector
+        - **value** (batch, time, dim): Tensor containing value vector
+        - **pos_embedding** (batch, time, dim): Positional embedding tensor
+        - **mask** (batch, 1, time2) or (batch, time1, time2): Tensor containing indices to be masked
+    Returns:
+        - **outputs**: Tensor produces by relative multi head attention module.
+    """
+    def __init__(
+            self,
+            d_model: int = 512,
+            num_heads: int = 16,
+            dropout_p: float = 0.1,
+    ):
+        super(RelativeMultiHeadAttention, self).__init__()
+        assert d_model % num_heads == 0, "d_model % num_heads should be zero."
+        self.d_model = d_model
+        self.d_head = int(d_model / num_heads)
+        self.num_heads = num_heads
+        self.sqrt_dim = math.sqrt(d_model)
+
+        self.query_proj = nn.Linear(d_model, d_model)
+        self.key_proj = nn.Linear(d_model, d_model)
+        self.value_proj = nn.Linear(d_model, d_model)
+        self.pos_proj = nn.Linear(d_model, d_model, bias=False)
+
+        self.dropout = nn.Dropout(p=dropout_p)
+        self.u_bias = nn.Parameter(torch.Tensor(self.num_heads, self.d_head))
+        self.v_bias = nn.Parameter(torch.Tensor(self.num_heads, self.d_head))
+        torch.nn.init.xavier_uniform_(self.u_bias)
+        torch.nn.init.xavier_uniform_(self.v_bias)
+
+        self.out_proj = nn.Linear(d_model, d_model)
+
+    def forward(
+            self,
+            query: Tensor,
+            key: Tensor,
+            value: Tensor,
+            pos_embedding: Tensor,
+            mask: Optional[Tensor] = None,
+    ) -> Tensor:
+        batch_size = value.size(0)
+
+        query = self.query_proj(query).view(batch_size, -1, self.num_heads, self.d_head)
+        key = self.key_proj(key).view(batch_size, -1, self.num_heads, self.d_head).permute(0, 2, 1, 3)
+        value = self.value_proj(value).view(batch_size, -1, self.num_heads, self.d_head).permute(0, 2, 1, 3)
+        pos_embedding = self.pos_proj(pos_embedding).view(batch_size, -1, self.num_heads, self.d_head)
+
+        content_score = torch.matmul((query + self.u_bias).transpose(1, 2), key.transpose(2, 3))
+        pos_score = torch.matmul((query + self.v_bias).transpose(1, 2), pos_embedding.permute(0, 2, 3, 1))
+        pos_score = self._compute_relative_positional_encoding(pos_score)
+
+        score = (content_score + pos_score) / self.sqrt_dim
+
+        if mask is not None:
+            mask = mask.unsqueeze(1)
+            score.masked_fill_(mask, -1e9)
+
+        attn = F.softmax(score, -1)
+        attn = self.dropout(attn)
+
+        context = torch.matmul(attn, value).transpose(1, 2)
+        context = context.contiguous().view(batch_size, -1, self.d_model)
+
+        return self.out_proj(context)
+
+    def _compute_relative_positional_encoding(self, pos_score: Tensor) -> Tensor:
+        batch_size, num_heads, seq_length1, seq_length2 = pos_score.size()
+        zeros = pos_score.new_zeros(batch_size, num_heads, seq_length1, 1)
+        padded_pos_score = torch.cat([zeros, pos_score], dim=-1)
+
+        padded_pos_score = padded_pos_score.view(batch_size, num_heads, seq_length2 + 1, seq_length1)
+        pos_score = padded_pos_score[:, :, 1:].view_as(pos_score)
+
+        return pos_score
+
+
+class CustomizingAttention(nn.Module):
+    r"""
+    Customizing Attention
+    Applies a multi-head + location-aware attention mechanism on the output features from the decoder.
+    Multi-head attention proposed in "Attention Is All You Need" paper.
+    Location-aware attention proposed in "Attention-Based Models for Speech Recognition" paper.
+    I combined these two attention mechanisms as custom.
+    Args:
+        hidden_dim (int): The number of expected features in the output
+        num_heads (int): The number of heads. (default: )
+        conv_out_channel (int): The dimension of convolution
+    Inputs: query, value, last_attn
+        - **query** (batch, q_len, hidden_dim): tensor containing the output features from the decoder.
+        - **value** (batch, v_len, hidden_dim): tensor containing features of the encoded input sequence.
+        - **last_attn** (batch_size * num_heads, v_len): tensor containing previous timestep`s alignment
+    Returns: output, attn
+        - **output** (batch, output_len, dimensions): tensor containing the attended output features from the decoder.
+        - **attn** (batch * num_heads, v_len): tensor containing the alignment from the encoder outputs.
+    Reference:
+        - **Attention Is All You Need**: https://arxiv.org/abs/1706.03762
+        - **Attention-Based Models for Speech Recognition**: https://arxiv.org/abs/1506.07503
+    """
+
+    def __init__(self, hidden_dim: int, num_heads: int = 4, conv_out_channel: int = 10) -> None:
+        super(CustomizingAttention, self).__init__()
+        self.hidden_dim = hidden_dim
+        self.num_heads = num_heads
+        self.dim = int(hidden_dim / num_heads)
+        self.scaled_dot_attn = ScaledDotProductAttention(self.dim)
+        self.conv1d = nn.Conv1d(1, conv_out_channel, kernel_size=3, padding=1)
+        self.query_proj = nn.Linear(hidden_dim, self.dim * num_heads, bias=True)
+        self.value_proj = nn.Linear(hidden_dim, self.dim * num_heads, bias=False)
+        self.loc_proj = nn.Linear(conv_out_channel, self.dim, bias=False)
+        self.bias = nn.Parameter(torch.rand(self.dim * num_heads).uniform_(-0.1, 0.1))
+
+    def forward(self, query: Tensor, value: Tensor, last_attn: Tensor) -> Tuple[Tensor, Tensor]:
+        batch_size, q_len, v_len = value.size(0), query.size(1), value.size(1)
+
+        if last_attn is None:
+            last_attn = value.new_zeros(batch_size * self.num_heads, v_len)
+
+        loc_energy = self.get_loc_energy(last_attn, batch_size, v_len)  # get location energy
+
+        query = self.query_proj(query).view(batch_size, q_len, self.num_heads * self.dim)
+        value = self.value_proj(value).view(batch_size, v_len, self.num_heads * self.dim) + loc_energy + self.bias
+
+        query = query.view(batch_size, q_len, self.num_heads, self.dim).permute(2, 0, 1, 3)
+        value = value.view(batch_size, v_len, self.num_heads, self.dim).permute(2, 0, 1, 3)
+        query = query.contiguous().view(-1, q_len, self.dim)
+        value = value.contiguous().view(-1, v_len, self.dim)
+
+        context, attn = self.scaled_dot_attn(query, value)
+        attn = attn.squeeze()
+
+        context = context.view(self.num_heads, batch_size, q_len, self.dim).permute(1, 2, 0, 3)
+        context = context.contiguous().view(batch_size, q_len, -1)
+
+        return context, attn
+
+    def get_loc_energy(self, last_attn: Tensor, batch_size: int, v_len: int) -> Tensor:
+        conv_feat = self.conv1d(last_attn.unsqueeze(1))
+        conv_feat = conv_feat.view(batch_size, self.num_heads, -1, v_len).permute(0, 1, 3, 2)
+
+        loc_energy = self.loc_proj(conv_feat).view(batch_size, self.num_heads, v_len, self.dim)
+        loc_energy = loc_energy.permute(0, 2, 1, 3).reshape(batch_size, v_len, self.num_heads * self.dim)
+
+        return loc_energy
\ No newline at end of file
diff --git a/modules/commons/attention/simple_attention.py b/modules/commons/attention/simple_attention.py
new file mode 100644
index 0000000000000000000000000000000000000000..c8c451ce9324491a5c9fa8546b0fe98dc146c6c1
--- /dev/null
+++ b/modules/commons/attention/simple_attention.py
@@ -0,0 +1,50 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+def split_heads(x, num_heads):
+    """ Split heads
+    :param x: A tensor with shape [batch, length, channels]
+    :param num_heads: An integer
+    :returns: A tensor with shape [batch, heads, length, channels / heads]
+    """
+    assert x.shape[-1] % num_heads == 0, str(x.shape)
+    return x.reshape(x.shape[:-1] + (num_heads, x.shape[-1] // num_heads)).permute(0, 2, 1, 3)
+
+
+def combine_heads(x):
+    """ Combine heads
+    :param x: A tensor with shape [batch, heads, length, channels]
+    :returns: A tensor with shape [batch, length, heads * channels]
+    """
+    x = x.permute([0, 2, 1, 3])
+    return x.reshape(x.shape[:-2] + (x.shape[-1] * x.shape[-2],))
+
+
+class SimpleAttention(nn.Module):
+    def __init__(self, query_size=192, key_size=192, value_size=192, num_heads=1):
+        super(SimpleAttention, self).__init__()
+        self.q_transform = nn.Linear(query_size, query_size, bias=False)
+        self.k_transform = nn.Linear(key_size, query_size, bias=False)
+        self.v_transform = nn.Linear(value_size, query_size, bias=False)
+        self.output_transform = nn.Linear(query_size, query_size, bias=False)
+        self.query_size = query_size
+        self.key_size = key_size
+        self.value_size = value_size
+        self.num_heads = num_heads
+
+    def forward(self, query, key, value, attn_mask=None, bias=None):
+        q = self.q_transform(query)
+        k = self.k_transform(key)
+        v = self.v_transform(value)
+
+        logits = torch.bmm(q, k.transpose(1, 2))  # [batch, length_q, length_k]
+        if bias is not None:
+            logits += bias
+        if attn_mask is not None:
+            logits = logits + attn_mask * -1e9
+        weights = F.softmax(logits, dim=-1)
+        out = torch.bmm(weights, v)
+        out = self.output_transform(out)
+        return out, weights
diff --git a/modules/commons/conformer/conformer.py b/modules/commons/conformer/conformer.py
new file mode 100644
index 0000000000000000000000000000000000000000..f9b5d719a7b67ef745f178cf44e8c452191a3a2a
--- /dev/null
+++ b/modules/commons/conformer/conformer.py
@@ -0,0 +1,97 @@
+import torch
+from torch import nn
+from .espnet_positional_embedding import RelPositionalEncoding
+from .espnet_transformer_attn import RelPositionMultiHeadedAttention, MultiHeadedAttention
+from .layers import Swish, ConvolutionModule, EncoderLayer, MultiLayeredConv1d
+from ..layers import Embedding
+
+
+def sequence_mask(length, max_length=None):
+    if max_length is None:
+        max_length = length.max()
+    x = torch.arange(max_length, dtype=length.dtype, device=length.device)
+    return x.unsqueeze(0) < length.unsqueeze(1)
+
+
+class ConformerLayers(nn.Module):
+    def __init__(self, hidden_size, num_layers, kernel_size=9, dropout=0.0, num_heads=4, use_last_norm=True):
+        super().__init__()
+        self.use_last_norm = use_last_norm
+        self.layers = nn.ModuleList()
+        positionwise_layer = MultiLayeredConv1d
+        positionwise_layer_args = (hidden_size, hidden_size * 4, 1, dropout)
+        self.encoder_layers = nn.ModuleList([EncoderLayer(
+            hidden_size,
+            MultiHeadedAttention(num_heads, hidden_size, 0.0),
+            positionwise_layer(*positionwise_layer_args),
+            positionwise_layer(*positionwise_layer_args),
+            ConvolutionModule(hidden_size, kernel_size, Swish()),
+            dropout,
+        ) for _ in range(num_layers)])
+        if self.use_last_norm:
+            self.layer_norm = nn.LayerNorm(hidden_size)
+        else:
+            self.layer_norm = nn.Linear(hidden_size, hidden_size)
+
+    def forward(self, x, x_mask):
+        """
+
+        :param x: [B, T, H]
+        :param padding_mask: [B, T]
+        :return: [B, T, H]
+        """
+        for l in self.encoder_layers:
+            x, mask = l(x, x_mask)
+        x = self.layer_norm(x) * x_mask
+        return x
+
+
+class ConformerEncoder(ConformerLayers):
+    def __init__(self, hidden_size, dict_size=0, in_size=0, strides=[2,2], num_layers=None):
+        conformer_enc_kernel_size = 9
+        super().__init__(hidden_size, num_layers, conformer_enc_kernel_size)
+        self.dict_size = dict_size
+        if dict_size != 0:
+            self.embed = Embedding(dict_size, hidden_size, padding_idx=0)
+        else:
+            self.seq_proj_in = torch.nn.Linear(in_size, hidden_size)
+            self.seq_proj_out = torch.nn.Linear(hidden_size, in_size)
+        self.mel_in = torch.nn.Linear(160, hidden_size)
+        self.mel_pre_net = torch.nn.Sequential(*[
+                torch.nn.Conv1d(hidden_size, hidden_size, kernel_size=s * 2, stride=s, padding=s // 2)
+                for i, s in enumerate(strides)
+            ])
+
+    def forward(self, seq_out, mels_timbre, other_embeds=0):
+        """
+
+        :param src_tokens: [B, T]
+        :return: [B x T x C]
+        """
+        x_lengths = (seq_out > 0).long().sum(-1)
+        x = seq_out
+        if self.dict_size != 0:
+            x = self.embed(x) + other_embeds  # [B, T, H]
+        else:
+            x = self.seq_proj_in(x) + other_embeds  # [B, T, H]
+        mels_timbre = self.mel_in(mels_timbre).transpose(1, 2)
+        mels_timbre = self.mel_pre_net(mels_timbre).transpose(1, 2)
+
+        T_out = x.size(1)
+        if self.dict_size != 0:
+            x_mask = torch.unsqueeze(sequence_mask(x_lengths + mels_timbre.size(1), x.size(1) + mels_timbre.size(1)), 2).to(x.dtype)
+        else:
+            x_mask = torch.cat((torch.ones(x.size(0), mels_timbre.size(1), 1).to(x.device), (x.abs().sum(2) > 0).float()[:, :, None]), dim=1)
+        x = torch.cat((mels_timbre, x), 1)
+        x = super(ConformerEncoder, self).forward(x, x_mask)
+        if self.dict_size != 0:
+            x = x[:, -T_out:, :]
+        else:
+            x = self.seq_proj_out(x[:, -T_out:, :])
+        return x
+
+
+class ConformerDecoder(ConformerLayers):
+    def __init__(self, hidden_size, num_layers):
+        conformer_dec_kernel_size = 9
+        super().__init__(hidden_size, num_layers, conformer_dec_kernel_size)
diff --git a/modules/commons/conformer/espnet_positional_embedding.py b/modules/commons/conformer/espnet_positional_embedding.py
new file mode 100644
index 0000000000000000000000000000000000000000..89b9b5549cc779d1ea67f052b1c99cad92365503
--- /dev/null
+++ b/modules/commons/conformer/espnet_positional_embedding.py
@@ -0,0 +1,113 @@
+import math
+import torch
+
+
+class PositionalEncoding(torch.nn.Module):
+    """Positional encoding.
+    Args:
+        d_model (int): Embedding dimension.
+        dropout_rate (float): Dropout rate.
+        max_len (int): Maximum input length.
+        reverse (bool): Whether to reverse the input position.
+    """
+
+    def __init__(self, d_model, dropout_rate, max_len=5000, reverse=False):
+        """Construct an PositionalEncoding object."""
+        super(PositionalEncoding, self).__init__()
+        self.d_model = d_model
+        self.reverse = reverse
+        self.xscale = math.sqrt(self.d_model)
+        self.dropout = torch.nn.Dropout(p=dropout_rate)
+        self.pe = None
+        self.extend_pe(torch.tensor(0.0).expand(1, max_len))
+
+    def extend_pe(self, x):
+        """Reset the positional encodings."""
+        if self.pe is not None:
+            if self.pe.size(1) >= x.size(1):
+                if self.pe.dtype != x.dtype or self.pe.device != x.device:
+                    self.pe = self.pe.to(dtype=x.dtype, device=x.device)
+                return
+        pe = torch.zeros(x.size(1), self.d_model)
+        if self.reverse:
+            position = torch.arange(
+                x.size(1) - 1, -1, -1.0, dtype=torch.float32
+            ).unsqueeze(1)
+        else:
+            position = torch.arange(0, x.size(1), dtype=torch.float32).unsqueeze(1)
+        div_term = torch.exp(
+            torch.arange(0, self.d_model, 2, dtype=torch.float32)
+            * -(math.log(10000.0) / self.d_model)
+        )
+        pe[:, 0::2] = torch.sin(position * div_term)
+        pe[:, 1::2] = torch.cos(position * div_term)
+        pe = pe.unsqueeze(0)
+        self.pe = pe.to(device=x.device, dtype=x.dtype)
+
+    def forward(self, x: torch.Tensor):
+        """Add positional encoding.
+        Args:
+            x (torch.Tensor): Input tensor (batch, time, `*`).
+        Returns:
+            torch.Tensor: Encoded tensor (batch, time, `*`).
+        """
+        self.extend_pe(x)
+        x = x * self.xscale + self.pe[:, : x.size(1)]
+        return self.dropout(x)
+
+
+class ScaledPositionalEncoding(PositionalEncoding):
+    """Scaled positional encoding module.
+    See Sec. 3.2  https://arxiv.org/abs/1809.08895
+    Args:
+        d_model (int): Embedding dimension.
+        dropout_rate (float): Dropout rate.
+        max_len (int): Maximum input length.
+    """
+
+    def __init__(self, d_model, dropout_rate, max_len=5000):
+        """Initialize class."""
+        super().__init__(d_model=d_model, dropout_rate=dropout_rate, max_len=max_len)
+        self.alpha = torch.nn.Parameter(torch.tensor(1.0))
+
+    def reset_parameters(self):
+        """Reset parameters."""
+        self.alpha.data = torch.tensor(1.0)
+
+    def forward(self, x):
+        """Add positional encoding.
+        Args:
+            x (torch.Tensor): Input tensor (batch, time, `*`).
+        Returns:
+            torch.Tensor: Encoded tensor (batch, time, `*`).
+        """
+        self.extend_pe(x)
+        x = x + self.alpha * self.pe[:, : x.size(1)]
+        return self.dropout(x)
+
+
+class RelPositionalEncoding(PositionalEncoding):
+    """Relative positional encoding module.
+    See : Appendix B in https://arxiv.org/abs/1901.02860
+    Args:
+        d_model (int): Embedding dimension.
+        dropout_rate (float): Dropout rate.
+        max_len (int): Maximum input length.
+    """
+
+    def __init__(self, d_model, dropout_rate, max_len=5000):
+        """Initialize class."""
+        super().__init__(d_model, dropout_rate, max_len, reverse=True)
+
+    def forward(self, x):
+        """Compute positional encoding.
+        Args:
+            x (torch.Tensor): Input tensor (batch, time, `*`).
+        Returns:
+            torch.Tensor: Encoded tensor (batch, time, `*`).
+            torch.Tensor: Positional embedding tensor (1, time, `*`).
+        """
+        self.extend_pe(x)
+        x = x * self.xscale
+        pos_emb = self.pe[:, : x.size(1)]
+        return self.dropout(x), self.dropout(pos_emb)
\ No newline at end of file
diff --git a/modules/commons/conformer/espnet_transformer_attn.py b/modules/commons/conformer/espnet_transformer_attn.py
new file mode 100644
index 0000000000000000000000000000000000000000..48a52aacbaf07ef191c28baf12123036c2bc6b10
--- /dev/null
+++ b/modules/commons/conformer/espnet_transformer_attn.py
@@ -0,0 +1,205 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+# Copyright 2019 Shigeki Karita
+#  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+
+"""Multi-Head Attention layer definition."""
+
+import math
+
+import numpy
+import torch
+from torch import nn
+
+
+class MultiHeadedAttention(nn.Module):
+    """Multi-Head Attention layer.
+    Args:
+        n_head (int): The number of heads.
+        n_feat (int): The number of features.
+        dropout_rate (float): Dropout rate.
+    """
+
+    def __init__(self, n_head, n_feat, dropout_rate):
+        """Construct an MultiHeadedAttention object."""
+        super(MultiHeadedAttention, self).__init__()
+        assert n_feat % n_head == 0
+        # We assume d_v always equals d_k
+        self.d_k = n_feat // n_head
+        self.h = n_head
+        self.linear_q = nn.Linear(n_feat, n_feat)
+        self.linear_k = nn.Linear(n_feat, n_feat)
+        self.linear_v = nn.Linear(n_feat, n_feat)
+        self.linear_out = nn.Linear(n_feat, n_feat)
+        self.attn = None
+        self.dropout = nn.Dropout(p=dropout_rate)
+        self.flash = hasattr(torch.nn.functional, 'scaled_dot_product_attention')
+        if not self.flash:
+            print("WARNING: using slow attention. Flash Attention requires PyTorch >= 2.0")
+
+
+    def forward_qkv(self, query, key, value):
+        """Transform query, key and value.
+        Args:
+            query (torch.Tensor): Query tensor (#batch, time1, size).
+            key (torch.Tensor): Key tensor (#batch, time2, size).
+            value (torch.Tensor): Value tensor (#batch, time2, size).
+        Returns:
+            torch.Tensor: Transformed query tensor (#batch, n_head, time1, d_k).
+            torch.Tensor: Transformed key tensor (#batch, n_head, time2, d_k).
+            torch.Tensor: Transformed value tensor (#batch, n_head, time2, d_k).
+        """
+        n_batch = query.size(0)
+        q = self.linear_q(query).view(n_batch, -1, self.h, self.d_k)
+        k = self.linear_k(key).view(n_batch, -1, self.h, self.d_k)
+        v = self.linear_v(value).view(n_batch, -1, self.h, self.d_k)
+        q = q.transpose(1, 2)  # (batch, head, time1, d_k)
+        k = k.transpose(1, 2)  # (batch, head, time2, d_k)
+        v = v.transpose(1, 2)  # (batch, head, time2, d_k)
+
+        return q, k, v
+
+    def forward_attention(self, value, scores, mask):
+        """Compute attention context vector.
+        Args:
+            value (torch.Tensor): Transformed value (#batch, n_head, time2, d_k).
+            scores (torch.Tensor): Attention score (#batch, n_head, time1, time2).
+            mask (torch.Tensor): Mask (#batch, 1, time2) or (#batch, time1, time2).
+        Returns:
+            torch.Tensor: Transformed value (#batch, time1, d_model)
+                weighted by the attention score (#batch, time1, time2).
+        """
+        n_batch = value.size(0)
+        if mask is not None:
+            mask = mask.unsqueeze(1).eq(0)  # (batch, 1, *, time2)
+            min_value = float(
+                numpy.finfo(torch.tensor(0, dtype=scores.dtype).numpy().dtype).min
+            )
+            scores = scores.masked_fill(mask, min_value)
+            self.attn = torch.softmax(scores, dim=-1).masked_fill(
+                mask, 0.0
+            )  # (batch, head, time1, time2)
+        else:
+            self.attn = torch.softmax(scores, dim=-1)  # (batch, head, time1, time2)
+
+        p_attn = self.dropout(self.attn)
+        x = torch.matmul(p_attn, value)  # (batch, head, time1, d_k)
+        x = (
+            x.transpose(1, 2).contiguous().view(n_batch, -1, self.h * self.d_k)
+        )  # (batch, time1, d_model)
+
+        return self.linear_out(x)  # (batch, time1, d_model)
+
+    def forward(self, query, key, value, mask):
+        """Compute scaled dot product attention.
+        Args:
+            query (torch.Tensor): Query tensor (#batch, time1, size).
+            key (torch.Tensor): Key tensor (#batch, time2, size).
+            value (torch.Tensor): Value tensor (#batch, time2, size).
+            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
+                (#batch, time1, time2).
+        Returns:
+            torch.Tensor: Output tensor (#batch, time1, d_model).
+        """
+        q, k, v = self.forward_qkv(query, key, value)
+
+        B, Nh, Nt, E = q.shape
+        q = q / math.sqrt(E)
+        mask = mask * mask[:, None, :, 0]
+        mask = mask[:, None]
+        if self.flash:
+            attn = torch.nn.functional.scaled_dot_product_attention(q, k, v, is_causal=False, attn_mask=mask)
+        else:
+            attn = self.slow_attn(q, k, v, is_causal=False, attn_mask=mask)
+        attn = attn.transpose(1, 2)
+        attn = attn.reshape(B, -1, self.h * self.d_k)
+        attn = self.linear_out(attn)
+        return attn
+
+    def slow_attn(self, Q, K, V, is_causal, attn_mask):
+        attn_mask = attn_mask.masked_fill(not attn_mask, -float('inf')) if attn_mask.dtype == torch.bool else attn_mask
+        attn_weight = torch.softmax((Q @ K.transpose(-2, -1) / math.sqrt(Q.size(-1))) + attn_mask, dim=-1)
+        return attn_weight @ V
+
+class RelPositionMultiHeadedAttention(MultiHeadedAttention):
+    """Multi-Head Attention layer with relative position encoding.
+    Paper: https://arxiv.org/abs/1901.02860
+    Args:
+        n_head (int): The number of heads.
+        n_feat (int): The number of features.
+        dropout_rate (float): Dropout rate.
+    """
+
+    def __init__(self, n_head, n_feat, dropout_rate):
+        """Construct an RelPositionMultiHeadedAttention object."""
+        super().__init__(n_head, n_feat, dropout_rate)
+        # linear transformation for positional ecoding
+        self.linear_pos = nn.Linear(n_feat, n_feat, bias=False)
+        # these two learnable bias are used in matrix c and matrix d
+        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
+        self.pos_bias_u = nn.Parameter(torch.Tensor(self.h, self.d_k))
+        self.pos_bias_v = nn.Parameter(torch.Tensor(self.h, self.d_k))
+        torch.nn.init.xavier_uniform_(self.pos_bias_u)
+        torch.nn.init.xavier_uniform_(self.pos_bias_v)
+
+    def rel_shift(self, x, zero_triu=False):
+        """Compute relative positinal encoding.
+        Args:
+            x (torch.Tensor): Input tensor (batch, time, size).
+            zero_triu (bool): If true, return the lower triangular part of the matrix.
+        Returns:
+            torch.Tensor: Output tensor.
+        """
+        zero_pad = torch.zeros((*x.size()[:3], 1), device=x.device, dtype=x.dtype)
+        x_padded = torch.cat([zero_pad, x], dim=-1)
+
+        x_padded = x_padded.view(*x.size()[:2], x.size(3) + 1, x.size(2))
+        x = x_padded[:, :, 1:].view_as(x)
+
+        if zero_triu:
+            ones = torch.ones((x.size(2), x.size(3)))
+            x = x * torch.tril(ones, x.size(3) - x.size(2))[None, None, :, :]
+
+        return x
+
+    def forward(self, query, key, value, pos_emb, mask):
+        """Compute 'Scaled Dot Product Attention' with rel. positional encoding.
+        Args:
+            query (torch.Tensor): Query tensor (#batch, time1, size).
+            key (torch.Tensor): Key tensor (#batch, time2, size).
+            value (torch.Tensor): Value tensor (#batch, time2, size).
+            pos_emb (torch.Tensor): Positional embedding tensor (#batch, time2, size).
+            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
+                (#batch, time1, time2).
+        Returns:
+            torch.Tensor: Output tensor (#batch, time1, d_model).
+        """
+        q, k, v = self.forward_qkv(query, key, value)
+        q = q.transpose(1, 2)  # (batch, time1, head, d_k)
+
+        n_batch_pos = pos_emb.size(0)
+        p = self.linear_pos(pos_emb).view(n_batch_pos, -1, self.h, self.d_k)
+        p = p.transpose(1, 2)  # (batch, head, time1, d_k)
+
+        # (batch, head, time1, d_k)
+        q_with_bias_u = (q + self.pos_bias_u).transpose(1, 2)
+        # (batch, head, time1, d_k)
+        q_with_bias_v = (q + self.pos_bias_v).transpose(1, 2)
+
+        # compute attention score
+        # first compute matrix a and matrix c
+        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
+        # (batch, head, time1, time2)
+        matrix_ac = torch.matmul(q_with_bias_u, k.transpose(-2, -1))
+
+        # compute matrix b and matrix d
+        # (batch, head, time1, time2)
+        matrix_bd = torch.matmul(q_with_bias_v, p.transpose(-2, -1))
+        matrix_bd = self.rel_shift(matrix_bd)
+
+        scores = (matrix_ac + matrix_bd) / math.sqrt(
+            self.d_k
+        )  # (batch, head, time1, time2)
+
+        return self.forward_attention(v, scores, mask)
diff --git a/modules/commons/conformer/layers.py b/modules/commons/conformer/layers.py
new file mode 100644
index 0000000000000000000000000000000000000000..cd7f501667e0b8aa816373d843adc816748e73a8
--- /dev/null
+++ b/modules/commons/conformer/layers.py
@@ -0,0 +1,260 @@
+from torch import nn
+import torch
+
+from modules.commons.layers import LayerNorm
+
+
+class ConvolutionModule(nn.Module):
+    """ConvolutionModule in Conformer model.
+    Args:
+        channels (int): The number of channels of conv layers.
+        kernel_size (int): Kernerl size of conv layers.
+    """
+
+    def __init__(self, channels, kernel_size, activation=nn.ReLU(), bias=True):
+        """Construct an ConvolutionModule object."""
+        super(ConvolutionModule, self).__init__()
+        # kernerl_size should be a odd number for 'SAME' padding
+        assert (kernel_size - 1) % 2 == 0
+
+        self.pointwise_conv1 = nn.Conv1d(
+            channels,
+            2 * channels,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+            bias=bias,
+        )
+        self.depthwise_conv = nn.Conv1d(
+            channels,
+            channels,
+            kernel_size,
+            stride=1,
+            padding=(kernel_size - 1) // 2,
+            groups=channels,
+            bias=bias,
+        )
+        self.norm = nn.BatchNorm1d(channels)
+        self.pointwise_conv2 = nn.Conv1d(
+            channels,
+            channels,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+            bias=bias,
+        )
+        self.activation = activation
+
+    def forward(self, x):
+        """Compute convolution module.
+        Args:
+            x (torch.Tensor): Input tensor (#batch, time, channels).
+        Returns:
+            torch.Tensor: Output tensor (#batch, time, channels).
+        """
+        # exchange the temporal dimension and the feature dimension
+        x = x.transpose(1, 2)
+
+        # GLU mechanism
+        x = self.pointwise_conv1(x)  # (batch, 2*channel, dim)
+        x = nn.functional.glu(x, dim=1)  # (batch, channel, dim)
+
+        # 1D Depthwise Conv
+        x = self.depthwise_conv(x)
+        x = self.activation(self.norm(x))
+
+        x = self.pointwise_conv2(x)
+
+        return x.transpose(1, 2)
+
+
+class MultiLayeredConv1d(torch.nn.Module):
+    """Multi-layered conv1d for Transformer block.
+    This is a module of multi-leyered conv1d designed
+    to replace positionwise feed-forward network
+    in Transforner block, which is introduced in
+    `FastSpeech: Fast, Robust and Controllable Text to Speech`_.
+    .. _`FastSpeech: Fast, Robust and Controllable Text to Speech`:
+        https://arxiv.org/pdf/1905.09263.pdf
+    """
+
+    def __init__(self, in_chans, hidden_chans, kernel_size, dropout_rate):
+        """Initialize MultiLayeredConv1d module.
+        Args:
+            in_chans (int): Number of input channels.
+            hidden_chans (int): Number of hidden channels.
+            kernel_size (int): Kernel size of conv1d.
+            dropout_rate (float): Dropout rate.
+        """
+        super(MultiLayeredConv1d, self).__init__()
+        self.w_1 = torch.nn.Conv1d(
+            in_chans,
+            hidden_chans,
+            kernel_size,
+            stride=1,
+            padding=(kernel_size - 1) // 2,
+        )
+        self.w_2 = torch.nn.Conv1d(
+            hidden_chans,
+            in_chans,
+            kernel_size,
+            stride=1,
+            padding=(kernel_size - 1) // 2,
+        )
+        self.dropout = torch.nn.Dropout(dropout_rate)
+
+    def forward(self, x):
+        """Calculate forward propagation.
+        Args:
+            x (torch.Tensor): Batch of input tensors (B, T, in_chans).
+        Returns:
+            torch.Tensor: Batch of output tensors (B, T, hidden_chans).
+        """
+        x = torch.relu(self.w_1(x.transpose(-1, 1))).transpose(-1, 1)
+        return self.w_2(self.dropout(x).transpose(-1, 1)).transpose(-1, 1)
+
+
+class Swish(torch.nn.Module):
+    """Construct an Swish object."""
+
+    def forward(self, x):
+        """Return Swich activation function."""
+        return x * torch.sigmoid(x)
+
+
+class EncoderLayer(nn.Module):
+    """Encoder layer module.
+    Args:
+        size (int): Input dimension.
+        self_attn (torch.nn.Module): Self-attention module instance.
+            `MultiHeadedAttention` or `RelPositionMultiHeadedAttention` instance
+            can be used as the argument.
+        feed_forward (torch.nn.Module): Feed-forward module instance.
+            `PositionwiseFeedForward`, `MultiLayeredConv1d`, or `Conv1dLinear` instance
+            can be used as the argument.
+        feed_forward_macaron (torch.nn.Module): Additional feed-forward module instance.
+            `PositionwiseFeedForward`, `MultiLayeredConv1d`, or `Conv1dLinear` instance
+            can be used as the argument.
+        conv_module (torch.nn.Module): Convolution module instance.
+            `ConvlutionModule` instance can be used as the argument.
+        dropout_rate (float): Dropout rate.
+        normalize_before (bool): Whether to use layer_norm before the first block.
+        concat_after (bool): Whether to concat attention layer's input and output.
+            if True, additional linear will be applied.
+            i.e. x -> x + linear(concat(x, att(x)))
+            if False, no additional linear will be applied. i.e. x -> x + att(x)
+    """
+
+    def __init__(
+            self,
+            size,
+            self_attn,
+            feed_forward,
+            feed_forward_macaron,
+            conv_module,
+            dropout_rate,
+            normalize_before=True,
+            concat_after=False,
+    ):
+        """Construct an EncoderLayer object."""
+        super(EncoderLayer, self).__init__()
+        self.self_attn = self_attn
+        self.feed_forward = feed_forward
+        self.feed_forward_macaron = feed_forward_macaron
+        self.conv_module = conv_module
+        self.norm_ff = LayerNorm(size)  # for the FNN module
+        self.norm_mha = LayerNorm(size)  # for the MHA module
+        if feed_forward_macaron is not None:
+            self.norm_ff_macaron = LayerNorm(size)
+            self.ff_scale = 0.5
+        else:
+            self.ff_scale = 1.0
+        if self.conv_module is not None:
+            self.norm_conv = LayerNorm(size)  # for the CNN module
+            self.norm_final = LayerNorm(size)  # for the final output of the block
+        self.dropout = nn.Dropout(dropout_rate)
+        self.size = size
+        self.normalize_before = normalize_before
+        self.concat_after = concat_after
+        if self.concat_after:
+            self.concat_linear = nn.Linear(size + size, size)
+
+    def forward(self, x_input, mask, cache=None):
+        """Compute encoded features.
+        Args:
+            x_input (Union[Tuple, torch.Tensor]): Input tensor w/ or w/o pos emb.
+                - w/ pos emb: Tuple of tensors [(#batch, time, size), (1, time, size)].
+                - w/o pos emb: Tensor (#batch, time, size).
+            mask (torch.Tensor): Mask tensor for the input (#batch, time).
+            cache (torch.Tensor): Cache tensor of the input (#batch, time - 1, size).
+        Returns:
+            torch.Tensor: Output tensor (#batch, time, size).
+            torch.Tensor: Mask tensor (#batch, time).
+        """
+        if isinstance(x_input, tuple):
+            x, pos_emb = x_input[0], x_input[1]
+        else:
+            x, pos_emb = x_input, None
+
+        # whether to use macaron style
+        if self.feed_forward_macaron is not None:
+            residual = x
+            if self.normalize_before:
+                x = self.norm_ff_macaron(x)
+            x = residual + self.ff_scale * self.dropout(self.feed_forward_macaron(x))
+            if not self.normalize_before:
+                x = self.norm_ff_macaron(x)
+
+        # multi-headed self-attention module
+        residual = x
+        if self.normalize_before:
+            x = self.norm_mha(x)
+
+        if cache is None:
+            x_q = x
+        else:
+            assert cache.shape == (x.shape[0], x.shape[1] - 1, self.size)
+            x_q = x[:, -1:, :]
+            residual = residual[:, -1:, :]
+            mask = None if mask is None else mask[:, -1:, :]
+
+        if pos_emb is not None:
+            x_att = self.self_attn(x_q, x, x, pos_emb, mask)
+        else:
+            x_att = self.self_attn(x_q, x, x, mask)
+
+        if self.concat_after:
+            x_concat = torch.cat((x, x_att), dim=-1)
+            x = residual + self.concat_linear(x_concat)
+        else:
+            x = residual + self.dropout(x_att)
+        if not self.normalize_before:
+            x = self.norm_mha(x)
+
+        # convolution module
+        if self.conv_module is not None:
+            residual = x
+            if self.normalize_before:
+                x = self.norm_conv(x)
+            x = residual + self.dropout(self.conv_module(x))
+            if not self.normalize_before:
+                x = self.norm_conv(x)
+
+        # feed forward module
+        residual = x
+        if self.normalize_before:
+            x = self.norm_ff(x)
+        x = residual + self.ff_scale * self.dropout(self.feed_forward(x))
+        if not self.normalize_before:
+            x = self.norm_ff(x)
+
+        if self.conv_module is not None:
+            x = self.norm_final(x)
+
+        if cache is not None:
+            x = torch.cat([cache, x], dim=1)
+
+        if pos_emb is not None:
+            return (x, pos_emb), mask
+
+        return x, mask
diff --git a/modules/commons/conv.py b/modules/commons/conv.py
new file mode 100644
index 0000000000000000000000000000000000000000..a601f06042c2db37ace11ce72149101a9b8aefe4
--- /dev/null
+++ b/modules/commons/conv.py
@@ -0,0 +1,198 @@
+import math
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from modules.commons.layers import LayerNorm, Embedding
+
+
+class LambdaLayer(nn.Module):
+    def __init__(self, lambd):
+        super(LambdaLayer, self).__init__()
+        self.lambd = lambd
+
+    def forward(self, x):
+        return self.lambd(x)
+
+
+def init_weights_func(m):
+    classname = m.__class__.__name__
+    if classname.find("Conv1d") != -1:
+        torch.nn.init.xavier_uniform_(m.weight)
+
+
+class ResidualBlock(nn.Module):
+    """Implements conv->PReLU->norm n-times"""
+
+    def __init__(self, channels, kernel_size, dilation, n=2, norm_type='bn', dropout=0.0,
+                 c_multiple=2, ln_eps=1e-12, left_pad=False):
+        super(ResidualBlock, self).__init__()
+
+        if norm_type == 'bn':
+            norm_builder = lambda: nn.BatchNorm1d(channels)
+        elif norm_type == 'in':
+            norm_builder = lambda: nn.InstanceNorm1d(channels, affine=True)
+        elif norm_type == 'gn':
+            norm_builder = lambda: nn.GroupNorm(8, channels)
+        elif norm_type == 'ln':
+            norm_builder = lambda: LayerNorm(channels, dim=1, eps=ln_eps)
+        else:
+            norm_builder = lambda: nn.Identity()
+
+        if left_pad:
+            self.blocks = [
+                nn.Sequential(
+                    norm_builder(),
+                    nn.ConstantPad1d(((dilation * (kernel_size - 1)) // 2 * 2, 0), 0),
+                    nn.Conv1d(channels, c_multiple * channels, kernel_size, dilation=dilation, padding=0),
+                    LambdaLayer(lambda x: x * kernel_size ** -0.5),
+                    nn.GELU(),
+                    nn.Conv1d(c_multiple * channels, channels, 1, dilation=dilation, padding_mode='reflect'),
+                )
+                for i in range(n)
+            ]
+        else:
+            self.blocks = [
+                nn.Sequential(
+                    norm_builder(),
+                    nn.Conv1d(channels, c_multiple * channels, kernel_size, dilation=dilation,
+                              padding=(dilation * (kernel_size - 1)) // 2, padding_mode='reflect'),
+                    LambdaLayer(lambda x: x * kernel_size ** -0.5),
+                    nn.GELU(),
+                    nn.Conv1d(c_multiple * channels, channels, 1, dilation=dilation, padding_mode='reflect'),
+                )
+                for i in range(n)
+            ]
+
+        self.blocks = nn.ModuleList(self.blocks)
+        self.dropout = dropout
+
+    def forward(self, x):
+        nonpadding = (x.abs().sum(1) > 0).float()[:, None, :]
+        for b in self.blocks:
+            x_ = b(x)
+            if self.dropout > 0 and self.training:
+                x_ = F.dropout(x_, self.dropout, training=self.training)
+            x = x + x_
+            x = x * nonpadding
+        return x
+
+
+class ConvBlocks(nn.Module):
+    """Decodes the expanded phoneme encoding into spectrograms"""
+
+    def __init__(self, hidden_size, out_dims, dilations, kernel_size,
+                 norm_type='ln', layers_in_block=2, c_multiple=2,
+                 dropout=0.0, ln_eps=1e-5,
+                 init_weights=True, is_BTC=True, num_layers=None, post_net_kernel=3,
+                 left_pad=False, c_in=None):
+        super(ConvBlocks, self).__init__()
+        self.is_BTC = is_BTC
+        if num_layers is not None:
+            dilations = [1] * num_layers
+        self.res_blocks = nn.Sequential(
+            *[ResidualBlock(hidden_size, kernel_size, d,
+                            n=layers_in_block, norm_type=norm_type, c_multiple=c_multiple,
+                            dropout=dropout, ln_eps=ln_eps, left_pad=left_pad)
+              for d in dilations],
+        )
+        if norm_type == 'bn':
+            norm = nn.BatchNorm1d(hidden_size)
+        elif norm_type == 'in':
+            norm = nn.InstanceNorm1d(hidden_size, affine=True)
+        elif norm_type == 'gn':
+            norm = nn.GroupNorm(8, hidden_size)
+        elif norm_type == 'ln':
+            norm = LayerNorm(hidden_size, dim=1, eps=ln_eps)
+        self.last_norm = norm
+        if left_pad:
+            self.post_net1 = nn.Sequential(
+                nn.ConstantPad1d((post_net_kernel // 2 * 2, 0), 0),
+                nn.Conv1d(hidden_size, out_dims, kernel_size=post_net_kernel, padding=0),
+            )
+        else:
+            self.post_net1 = nn.Conv1d(hidden_size, out_dims, kernel_size=post_net_kernel,
+                                       padding=post_net_kernel // 2, padding_mode='reflect')
+        self.c_in = c_in
+        if c_in is not None:
+            self.in_conv = nn.Conv1d(c_in, hidden_size, kernel_size=1, padding_mode='reflect')
+        if init_weights:
+            self.apply(init_weights_func)
+
+    def forward(self, x, nonpadding=None):
+        """
+
+        :param x: [B, T, H]
+        :return:  [B, T, H]
+        """
+        if self.is_BTC:
+            x = x.transpose(1, 2)
+        if self.c_in is not None:
+            x = self.in_conv(x)
+        if nonpadding is None:
+            nonpadding = (x.abs().sum(1) > 0).float()[:, None, :]
+        elif self.is_BTC:
+            nonpadding = nonpadding.transpose(1, 2)
+        x = self.res_blocks(x) * nonpadding
+        x = self.last_norm(x) * nonpadding
+        x = self.post_net1(x) * nonpadding
+        if self.is_BTC:
+            x = x.transpose(1, 2)
+        return x
+
+
+class TextConvEncoder(ConvBlocks):
+    def __init__(self, dict_size, hidden_size, out_dims, dilations, kernel_size,
+                 norm_type='ln', layers_in_block=2, c_multiple=2,
+                 dropout=0.0, ln_eps=1e-5, init_weights=True, num_layers=None, post_net_kernel=3):
+        super().__init__(hidden_size, out_dims, dilations, kernel_size,
+                         norm_type, layers_in_block, c_multiple,
+                         dropout, ln_eps, init_weights, num_layers=num_layers,
+                         post_net_kernel=post_net_kernel)
+        self.dict_size = dict_size
+        if dict_size > 0:
+            self.embed_tokens = Embedding(dict_size, hidden_size, 0)
+            self.embed_scale = math.sqrt(hidden_size)
+
+    def forward(self, txt_tokens, other_embeds=0):
+        """
+
+        :param txt_tokens: [B, T]
+        :return: {
+            'encoder_out': [B x T x C]
+        }
+        """
+        if self.dict_size > 0:
+            x = self.embed_scale * self.embed_tokens(txt_tokens)
+        else:
+            x = txt_tokens
+        x = x + other_embeds
+        return super().forward(x, nonpadding=(txt_tokens > 0).float()[..., None])
+
+
+class ConditionalConvBlocks(ConvBlocks):
+    def __init__(self, hidden_size, c_cond, c_out, dilations, kernel_size,
+                 norm_type='ln', layers_in_block=2, c_multiple=2,
+                 dropout=0.0, ln_eps=1e-5, init_weights=True, is_BTC=True, num_layers=None):
+        super().__init__(hidden_size, c_out, dilations, kernel_size,
+                         norm_type, layers_in_block, c_multiple,
+                         dropout, ln_eps, init_weights, is_BTC=False, num_layers=num_layers)
+        self.g_prenet = nn.Conv1d(c_cond, hidden_size, 3, padding=1, padding_mode='reflect')
+        self.is_BTC_ = is_BTC
+        if init_weights:
+            self.g_prenet.apply(init_weights_func)
+
+    def forward(self, x, cond, nonpadding=None):
+        if self.is_BTC_:
+            x = x.transpose(1, 2)
+            cond = cond.transpose(1, 2)
+            if nonpadding is not None:
+                nonpadding = nonpadding.transpose(1, 2)
+        if nonpadding is None:
+            nonpadding = x.abs().sum(1)[:, None]
+        x = x + self.g_prenet(cond)
+        x = x * nonpadding
+        x = super(ConditionalConvBlocks, self).forward(x)  # input needs to be BTC
+        if self.is_BTC_:
+            x = x.transpose(1, 2)
+        return x
diff --git a/modules/commons/gpt.py b/modules/commons/gpt.py
new file mode 100644
index 0000000000000000000000000000000000000000..16e40349d0fae65107206033583d2cdc55289d09
--- /dev/null
+++ b/modules/commons/gpt.py
@@ -0,0 +1,474 @@
+import math
+import torch
+from typing import Optional, Tuple
+from torch import nn
+from utils.nn.seq_utils import get_incremental_state, set_incremental_state, softmax, make_positions
+import torch.nn.functional as F
+
+# from flash_attn import flash_attn_qkvpacked_func, flash_attn_func
+
+DEFAULT_MAX_SOURCE_POSITIONS = 20000
+DEFAULT_MAX_TARGET_POSITIONS = 20000
+
+
+class RotaryEmbeddings(nn.Module):
+    cos: torch.Tensor
+    sin: torch.Tensor
+    theta: torch.Tensor
+
+    def __init__(
+            self,
+            width: int,
+            *,
+            seq_len: int = 4000,
+            base: int = 10000,
+            device: Optional[torch.device] = None,
+    ):
+        """Rotary embeddings (Su et al., 2021) layer. The rotary embedding
+        will be precomputed for up to 'seq _len' positions. The embedding
+        will be recomputed when a longer sequence is found in the input.
+
+        :param width:
+            Rotary embedding dimensionality, must be even.
+        :param seq_len:
+            Number of positons to initially precompute.
+        :param base:
+            The base used for Θ_i, determines the cycle length of the
+            embeddings.
+        :param device: Device on which the module is to be initialized.
+        """
+        super().__init__()
+
+        if width % 2:
+            raise ValueError(f"Width of rotary embeddings must be even, was: {width}")
+
+        # Ignore allocations on the meta device as we don't persist our buffer,
+        # i.e., we don't expect the backing tensor to be replaced with pretrained weights.
+        if device is not None and device.type == "meta":
+            device = None
+        # Θ_i = 10000^(-2(i-1)/d)
+        theta = torch.pow(
+            base, -torch.arange(0, width, 2, dtype=torch.float, device=device) / width
+        )
+        self.register_buffer("theta", theta, persistent=False)
+
+        self._create_rotary_embed(width=width, length=seq_len)
+
+    def _create_rotary_embed(self, *, width: int, length: int):
+        # mΘ
+        position = torch.arange(length, device=self.theta.device).unsqueeze(1)
+        m_theta = position * self.theta.unsqueeze(0)
+
+        # We apply both sin and cos twice (see Eq 15, 34), but the ordering
+        # is changed for compatibility with most common implementations.
+        m_theta = torch.cat([m_theta, m_theta], dim=-1)
+
+        re_cos = m_theta.cos().view([length, width]).half()
+        re_sin = m_theta.sin().view([length, width]).half()
+
+        self.register_buffer("cos", re_cos, persistent=False)
+        self.register_buffer("sin", re_sin, persistent=False)
+
+    def _rotate(self, input: torch.Tensor):
+        """Rotate the input tensor by half of its innermost width.
+
+        input (Tensor): array to rotate.
+        RETURNS (Tensor): rotated array.
+
+        Shapes:
+            input - (..., width)
+            output - (..., width)
+        """
+        half_idx = input.shape[-1] // 2
+        input_1 = -input[..., half_idx:]
+        input_2 = input[..., :half_idx]
+        return torch.cat([input_1, input_2], dim=-1)
+
+    def forward(self, input: torch.Tensor, *, positions: Optional[torch.Tensor] = None):
+        """
+        Apply rotary embeddings to an array.
+
+        :param input: Array to apply the rotary embeddings to.
+        :param positions: positions of the inputs. If no positions are
+            provided, they are assumed to be [0, seq_len).
+        :return: Array with the rotary embeddings applied.
+
+        Shapes:
+            input - (batch_size, num_heads, seq_len, width_per_head)
+            positions - (batch_size, seq_len)
+            output - (batch_size, num_heads, seq_len, width_per_head)
+        """
+        batch_size, _, seq_len, width = input.shape
+
+        if positions is None:
+            # Fastpath: positions from [0..seq_len), avoid indexing.
+            if self.cos.size(-2) < seq_len:
+                self._create_rotary_embed(width=width, length=seq_len)
+            rot_cos = self.cos[:seq_len, :].view(1, 1, seq_len, width)
+            rot_sin = self.sin[:seq_len, :].view(1, 1, seq_len, width)
+        else:
+            max_len = int(positions.max()) + 1
+            if self.cos.size(-2) < max_len:
+                self._create_rotary_embed(width=width, length=max_len)
+
+            # Flatten positions to index cos/sin arrays, then unflatten.
+            #
+            # Example shapes:
+            #
+            #   positions_flat - (batch_size * seq_len)
+            #   self.cos - (max_len, width)
+            #   rot_cos - (batch_size, seq_len, width)
+            positions_flat = positions.view(-1)
+            rot_cos = self.cos[positions_flat].view(batch_size, 1, seq_len, width)
+            rot_sin = self.sin[positions_flat].view(batch_size, 1, seq_len, width)
+
+        # Eq 34 with ordering changed for compatibility.
+        return rot_cos * input + rot_sin * self._rotate(input)
+
+
+class LayerNorm(nn.Module):
+    """ LayerNorm but with an optional bias. PyTorch doesn't support simply bias=False """
+
+    def __init__(self, ndim, bias=False):
+        super().__init__()
+        self.weight = nn.Parameter(torch.ones(ndim))
+        self.bias = nn.Parameter(torch.zeros(ndim)) if bias else None
+
+    def forward(self, input):
+        return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+
+
+class CausalSelfAttention(nn.Module):
+    def __init__(self, embed_dim, num_heads, dropout=0.):
+        super().__init__()
+        # Typically, bias = True in Linears and LayerNorms, like GPT-2. But we set bias = False: a bit better and faster (following https://github.com/karpathy/nanoGPT)
+        assert embed_dim % num_heads == 0
+        self.embed_dim = embed_dim
+        self.num_heads = num_heads
+        self.dropout = dropout
+        self.head_dim = embed_dim // num_heads
+        assert self.head_dim * num_heads == self.embed_dim, "embed_dim must be divisible by num_heads"
+        self.scaling = self.head_dim ** -0.5
+        # key, query, value projections for all heads, but in a batch
+        self.c_attn = nn.Linear(embed_dim, 3 * embed_dim, bias=False)
+        # output projection
+        self.out_proj = nn.Linear(embed_dim, embed_dim, bias=False)
+        # rotary embeddings
+        self.rotary_embeds = RotaryEmbeddings(width=embed_dim // num_heads)
+        # flash attention make GPU go brrrrr but support is only in PyTorch >= 2.0
+        self.flash = hasattr(torch.nn.functional, 'scaled_dot_product_attention')
+        if not self.flash:
+            print("WARNING: using slow attention. Flash Attention requires PyTorch >= 2.0")
+
+    def forward(
+            self,
+            query, key, value,
+            spk_pos_ids_flat=None,
+            incremental_state=None,
+            need_weights=True,
+            static_kv=False,
+            attn_mask=None,
+            need_head_weights=False,
+            enc_dec_attn_constraint_mask=None,
+    ):
+        """Input shape: Time x Batch x Channel
+
+        Args:
+            need_weights (bool, optional): return the attention weights,
+                averaged over heads (default: False).
+            attn_mask (ByteTensor, optional): typically used to
+                implement causal attention, where the mask prevents the
+                attention from looking forward in time (default: None).
+            need_head_weights (bool, optional): return the attention
+                weights for each head. Implies *need_weights*. Default:
+                return the average attention weights over all heads.
+        """
+        if need_head_weights:
+            need_weights = True
+
+        tgt_len, bsz, embed_dim = query.size()
+        assert embed_dim == self.embed_dim
+        assert list(query.size()) == [tgt_len, bsz, embed_dim]
+
+        if incremental_state is not None:
+            saved_state = self._get_input_buffer(incremental_state)
+        else:
+            saved_state = None
+
+        # calculate query, key, values for all heads in batch and move head forward to be the batch dim
+        q, k, v = self.c_attn(query).split(self.embed_dim, dim=2)
+
+        q = q.contiguous().view(tgt_len, bsz * self.num_heads, self.head_dim).transpose(0, 1)
+        k = k.contiguous().view(-1, bsz * self.num_heads, self.head_dim).transpose(0, 1)
+        v = v.contiguous().view(-1, bsz * self.num_heads, self.head_dim).transpose(0, 1)
+
+        # Apply rot embedding and store incremental_state
+        q = self.rotary_embeds(q[None, :], positions=spk_pos_ids_flat)[0]
+        if saved_state is not None:
+            # saved states are stored with shape (bsz, num_heads, seq_len, head_dim)
+            if 'prev_key' in saved_state:
+                prev_key = saved_state['prev_key'].view(bsz * self.num_heads, -1, self.head_dim)
+                if static_kv:
+                    k = prev_key
+                else:
+                    k = torch.cat((prev_key, k), dim=1)
+            if 'prev_value' in saved_state:
+                prev_value = saved_state['prev_value'].view(bsz * self.num_heads, -1, self.head_dim)
+                if static_kv:
+                    v = prev_value
+                else:
+                    v = torch.cat((prev_value, v), dim=1)
+            saved_state['prev_key'], saved_state['prev_value'] = k.view(bsz, self.num_heads, -1, self.head_dim), v.view(
+                bsz, self.num_heads, -1, self.head_dim)
+            self._set_input_buffer(incremental_state, saved_state)
+        if incremental_state is not None:
+            key_pos = torch.arange(k.shape[-2], device=q.device).unsqueeze(0)
+        else:
+            key_pos = spk_pos_ids_flat
+        k = self.rotary_embeds(k[None, :], positions=key_pos)[0]
+
+        src_len = k.size(1)
+
+        # Start Attention
+        if self.flash:
+            # efficient attention using Flash Attention CUDA kernels
+            attn = torch.nn.functional.scaled_dot_product_attention(
+                q, k, v, attn_mask=attn_mask, dropout_p=0,
+                is_causal=False)
+            assert list(attn.size()) == [bsz * self.num_heads, tgt_len, self.head_dim]
+            attn = attn.transpose(0, 1).contiguous().view(tgt_len, bsz, embed_dim)
+
+            # Flash Attn 2
+            # from flash_attn import flash_attn_func
+            # q, k, v = q.transpose(0, 1)[None, :], k.transpose(0, 1)[None, :], v.transpose(0, 1)[None, :]
+            # attn = flash_attn_func(q, k, v, dropout_p=0.0, causal=False)[0].contiguous().view(tgt_len, bsz, embed_dim)
+
+            attn = self.out_proj(attn)
+            attn_logits = None
+        else:
+            attn_weights = torch.bmm(q, k.transpose(1, 2))
+            assert list(attn_weights.size()) == [bsz * self.num_heads, tgt_len, src_len]
+
+            if attn_mask is not None:
+                if len(attn_mask.shape) == 2:
+                    attn_mask = attn_mask.unsqueeze(0)
+                elif len(attn_mask.shape) == 3:
+                    attn_mask = attn_mask[:, None].repeat([1, self.num_heads, 1, 1]).reshape(
+                        bsz * self.num_heads, tgt_len, src_len)
+                attn_weights = attn_weights + attn_mask
+
+            attn_logits = attn_weights.view(bsz, self.num_heads, tgt_len, src_len)
+
+            attn_weights_float = softmax(attn_weights, dim=-1)
+            attn_weights = attn_weights_float.type_as(attn_weights)
+            attn_probs = F.dropout(attn_weights_float.type_as(attn_weights), p=self.dropout, training=self.training)
+
+            attn = torch.bmm(attn_probs, v)
+            assert list(attn.size()) == [bsz * self.num_heads, tgt_len, self.head_dim]
+            attn = attn.transpose(0, 1).contiguous().view(tgt_len, bsz, embed_dim)
+            attn = self.out_proj(attn)
+
+        if need_weights:
+            attn_weights = attn_weights_float.view(bsz, self.num_heads, tgt_len, src_len).transpose(1, 0)
+            if not need_head_weights:
+                # average attention weights over heads
+                attn_weights = attn_weights.mean(dim=0)
+        else:
+            attn_weights = None
+
+        return attn, (attn_weights, attn_logits)
+
+    def _get_input_buffer(self, incremental_state):
+        return get_incremental_state(
+            self,
+            incremental_state,
+            'attn_state',
+        ) or {}
+
+    def _set_input_buffer(self, incremental_state, buffer):
+        set_incremental_state(
+            self,
+            incremental_state,
+            'attn_state',
+            buffer,
+        )
+
+    def clear_buffer(self, incremental_state=None):
+        if incremental_state is not None:
+            saved_state = self._get_input_buffer(incremental_state)
+            if 'prev_key' in saved_state:
+                del saved_state['prev_key']
+            if 'prev_value' in saved_state:
+                del saved_state['prev_value']
+            self._set_input_buffer(incremental_state, saved_state)
+
+
+class TransformerFFNLayer(nn.Module):
+    def __init__(self, hidden_size, filter_size, padding="SAME", kernel_size=1, dropout=0., act='gelu'):
+        super().__init__()
+        self.kernel_size = kernel_size
+        self.dropout = dropout
+        self.act = act
+        if padding == 'SAME':
+            self.ffn_1 = nn.Conv1d(hidden_size, filter_size, kernel_size, padding=kernel_size // 2, bias=False)
+        elif padding == 'LEFT':
+            self.ffn_1 = nn.Sequential(
+                nn.ConstantPad1d((kernel_size - 1, 0), 0.0),
+                nn.Conv1d(hidden_size, filter_size, kernel_size, bias=False)
+            )
+        self.ffn_2 = nn.Linear(filter_size, hidden_size, bias=False)
+
+    def forward(self, x, incremental_state=None):
+        # x: T x B x C
+        if incremental_state is not None:
+            T_inp = x.shape[0]
+            saved_state = self._get_input_buffer(incremental_state)
+            if 'prev_input' in saved_state:
+                prev_input = saved_state['prev_input']
+                x = torch.cat((prev_input, x), dim=0)
+            x = x[-self.kernel_size:]
+            saved_state['prev_input'] = x
+            self._set_input_buffer(incremental_state, saved_state)
+
+        x = self.ffn_1(x.permute(1, 2, 0)).permute(2, 0, 1)
+        x = x * self.kernel_size ** -0.5
+
+        if incremental_state is not None:
+            x = x[-T_inp:]
+        # if self.act == 'gelu':
+        #     x = F.gelu(x)
+        # if self.act == 'relu':
+        #     x = F.relu(x)
+        x = F.silu(x)
+        x = F.dropout(x, self.dropout, training=self.training)
+        x = self.ffn_2(x)
+        return x
+
+    def _get_input_buffer(self, incremental_state):
+        return get_incremental_state(
+            self,
+            incremental_state,
+            'f',
+        ) or {}
+
+    def _set_input_buffer(self, incremental_state, buffer):
+        set_incremental_state(
+            self,
+            incremental_state,
+            'f',
+            buffer,
+        )
+
+    def clear_buffer(self, incremental_state):
+        if incremental_state is not None:
+            saved_state = self._get_input_buffer(incremental_state)
+            if 'prev_input' in saved_state:
+                del saved_state['prev_input']
+            self._set_input_buffer(incremental_state, saved_state)
+
+
+class GPTBlock(nn.Module):
+    def __init__(self, c, num_heads, dropout, attention_dropout=0.1, relu_dropout=0.1,
+                 kernel_size=9, ffn_hidden_size=1024, act='gelu', post_ln=False, norm_cls=LayerNorm):
+        super().__init__()
+        self.c = c
+        self.dropout = dropout
+        self.layer_norm1 = norm_cls(c)
+        self.self_attn = CausalSelfAttention(
+            c, num_heads, dropout=attention_dropout
+        )
+        self.layer_norm2 = norm_cls(c)
+        self.ffn = TransformerFFNLayer(
+            c, ffn_hidden_size, padding='LEFT', kernel_size=kernel_size, dropout=relu_dropout, act=act)
+        self.post_ln = post_ln
+
+    def forward(
+            self,
+            x,
+            encoder_out=None,
+            encoder_padding_mask=None,
+            incremental_state=None,
+            self_attn_mask=None,
+            attn_out=None,
+            spk_pos_ids_flat=None,
+            **kwargs,
+    ):
+        layer_norm_training = kwargs.get('layer_norm_training', None)
+        if layer_norm_training is not None:
+            self.layer_norm1.training = layer_norm_training
+            self.layer_norm2.training = layer_norm_training
+        residual = x
+        if not self.post_ln:
+            x = self.layer_norm1(x)
+
+        x, _ = self.self_attn(
+            query=x,
+            key=x,
+            value=x,
+            incremental_state=incremental_state,
+            attn_mask=self_attn_mask,
+            spk_pos_ids_flat=spk_pos_ids_flat,
+            need_weights=False
+        )
+        x = F.dropout(x, self.dropout, training=self.training)
+        x = residual + x
+        if self.post_ln:
+            x = self.layer_norm1(x)
+
+        attn_logits = None
+
+        residual = x
+        if not self.post_ln:
+            x = self.layer_norm2(x)
+        x = self.ffn(x, incremental_state=incremental_state)
+        x = F.dropout(x, self.dropout, training=self.training)
+        x = residual + x
+        if self.post_ln:
+            x = self.layer_norm2(x)
+        return x, attn_logits
+
+    def clear_buffer(self, input, encoder_out=None, encoder_padding_mask=None, incremental_state=None):
+        self.encoder_attn.clear_buffer(incremental_state)
+        self.ffn.clear_buffer(incremental_state)
+
+    def set_buffer(self, name, tensor, incremental_state):
+        return set_incremental_state(self, incremental_state, name, tensor)
+
+
+class GPTLayer(nn.Module):
+    def __init__(self, hidden_size, dropout, kernel_size=9, num_heads=8, ffn_hidden_size=1024, post_ln=False,
+                 lm_num_layers=10, norm_cls=LayerNorm):
+        super().__init__()
+        self.hidden_size = hidden_size
+        self.dropout = dropout
+        self.num_heads = num_heads
+        self.op = GPTBlock(
+            hidden_size, num_heads, dropout=dropout,
+            attention_dropout=0.0, relu_dropout=dropout,
+            kernel_size=kernel_size, ffn_hidden_size=ffn_hidden_size,
+            post_ln=post_ln, norm_cls=norm_cls)
+
+        # init all weights
+        self.apply(self._init_weights)
+        # apply special scaled init to the residual projections, per GPT-2 paper
+        for pn, p in self.named_parameters():
+            if pn.endswith('ffn_2.weight') or pn.endswith('out_proj.weight'):
+                torch.nn.init.normal_(p, mean=0.0, std=0.02 / math.sqrt(2 * lm_num_layers))
+
+    def _init_weights(self, module):
+        if isinstance(module, nn.Linear):
+            torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
+            if module.bias is not None:
+                torch.nn.init.zeros_(module.bias)
+        elif isinstance(module, nn.Embedding):
+            torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
+
+    @torch.autocast(device_type='cuda')
+    def forward(self, x, **kwargs):
+        return self.op(x, **kwargs)
+
+    def clear_buffer(self, *args):
+        return self.op.clear_buffer(*args)
+
+    def set_buffer(self, *args):
+        return self.op.set_buffer(*args)
diff --git a/modules/commons/improved_diffusion/__init__.py b/modules/commons/improved_diffusion/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..9665a0d63f695eab303318d824dad14041c7cde9
--- /dev/null
+++ b/modules/commons/improved_diffusion/__init__.py
@@ -0,0 +1,3 @@
+"""
+Codebase for "Improved Denoising Diffusion Probabilistic Models".
+"""
diff --git a/modules/commons/improved_diffusion/dist_util.py b/modules/commons/improved_diffusion/dist_util.py
new file mode 100644
index 0000000000000000000000000000000000000000..f665604d6baaf5df6008f131c86cf0779c8b208a
--- /dev/null
+++ b/modules/commons/improved_diffusion/dist_util.py
@@ -0,0 +1,82 @@
+"""
+Helpers for distributed training.
+"""
+
+import io
+import os
+import socket
+
+import blobfile as bf
+from mpi4py import MPI
+import torch as th
+import torch.distributed as dist
+
+# Change this to reflect your cluster layout.
+# The GPU for a given rank is (rank % GPUS_PER_NODE).
+GPUS_PER_NODE = 8
+
+SETUP_RETRY_COUNT = 3
+
+
+def setup_dist():
+    """
+    Setup a distributed process group.
+    """
+    if dist.is_initialized():
+        return
+
+    comm = MPI.COMM_WORLD
+    backend = "gloo" if not th.cuda.is_available() else "nccl"
+
+    if backend == "gloo":
+        hostname = "localhost"
+    else:
+        hostname = socket.gethostbyname(socket.getfqdn())
+    os.environ["MASTER_ADDR"] = comm.bcast(hostname, root=0)
+    os.environ["RANK"] = str(comm.rank)
+    os.environ["WORLD_SIZE"] = str(comm.size)
+
+    port = comm.bcast(_find_free_port(), root=0)
+    os.environ["MASTER_PORT"] = str(port)
+    dist.init_process_group(backend=backend, init_method="env://")
+
+
+def dev():
+    """
+    Get the device to use for torch.distributed.
+    """
+    if th.cuda.is_available():
+        return th.device(f"cuda:{MPI.COMM_WORLD.Get_rank() % GPUS_PER_NODE}")
+    return th.device("cpu")
+
+
+def load_state_dict(path, **kwargs):
+    """
+    Load a PyTorch file without redundant fetches across MPI ranks.
+    """
+    if MPI.COMM_WORLD.Get_rank() == 0:
+        with bf.BlobFile(path, "rb") as f:
+            data = f.read()
+    else:
+        data = None
+    data = MPI.COMM_WORLD.bcast(data)
+    return th.load(io.BytesIO(data), **kwargs)
+
+
+def sync_params(params):
+    """
+    Synchronize a sequence of Tensors across ranks from rank 0.
+    """
+    for p in params:
+        with th.no_grad():
+            dist.broadcast(p, 0)
+
+
+def _find_free_port():
+    try:
+        s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+        s.bind(("", 0))
+        s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
+        return s.getsockname()[1]
+    finally:
+        s.close()
diff --git a/modules/commons/improved_diffusion/fp16_util.py b/modules/commons/improved_diffusion/fp16_util.py
new file mode 100644
index 0000000000000000000000000000000000000000..23e0418153143200a718f56077b3360f30f4c663
--- /dev/null
+++ b/modules/commons/improved_diffusion/fp16_util.py
@@ -0,0 +1,76 @@
+"""
+Helpers to train with 16-bit precision.
+"""
+
+import torch.nn as nn
+from torch._utils import _flatten_dense_tensors, _unflatten_dense_tensors
+
+
+def convert_module_to_f16(l):
+    """
+    Convert primitive modules to float16.
+    """
+    if isinstance(l, (nn.Conv1d, nn.Conv2d, nn.Conv3d)):
+        l.weight.data = l.weight.data.half()
+        l.bias.data = l.bias.data.half()
+
+
+def convert_module_to_f32(l):
+    """
+    Convert primitive modules to float32, undoing convert_module_to_f16().
+    """
+    if isinstance(l, (nn.Conv1d, nn.Conv2d, nn.Conv3d)):
+        l.weight.data = l.weight.data.float()
+        l.bias.data = l.bias.data.float()
+
+
+def make_master_params(model_params):
+    """
+    Copy model parameters into a (differently-shaped) list of full-precision
+    parameters.
+    """
+    master_params = _flatten_dense_tensors(
+        [param.detach().float() for param in model_params]
+    )
+    master_params = nn.Parameter(master_params)
+    master_params.requires_grad = True
+    return [master_params]
+
+
+def model_grads_to_master_grads(model_params, master_params):
+    """
+    Copy the gradients from the model parameters into the master parameters
+    from make_master_params().
+    """
+    master_params[0].grad = _flatten_dense_tensors(
+        [param.grad.data.detach().float() for param in model_params]
+    )
+
+
+def master_params_to_model_params(model_params, master_params):
+    """
+    Copy the master parameter data back into the model parameters.
+    """
+    # Without copying to a list, if a generator is passed, this will
+    # silently not copy any parameters.
+    model_params = list(model_params)
+
+    for param, master_param in zip(
+        model_params, unflatten_master_params(model_params, master_params)
+    ):
+        param.detach().copy_(master_param)
+
+
+def unflatten_master_params(model_params, master_params):
+    """
+    Unflatten the master parameters to look like model_params.
+    """
+    return _unflatten_dense_tensors(master_params[0].detach(), model_params)
+
+
+def zero_grad(model_params):
+    for param in model_params:
+        # Taken from https://pytorch.org/docs/stable/_modules/torch/optim/optimizer.html#Optimizer.add_param_group
+        if param.grad is not None:
+            param.grad.detach_()
+            param.grad.zero_()
diff --git a/modules/commons/improved_diffusion/gaussian_diffusion.py b/modules/commons/improved_diffusion/gaussian_diffusion.py
new file mode 100644
index 0000000000000000000000000000000000000000..9e76eafab7a071e14b92821dbe0d8fd4382bdccd
--- /dev/null
+++ b/modules/commons/improved_diffusion/gaussian_diffusion.py
@@ -0,0 +1,870 @@
+"""
+This code started out as a PyTorch port of Ho et al's diffusion models:
+https://github.com/hojonathanho/diffusion/blob/1e0dceb3b3495bbe19116a5e1b3596cd0706c543/diffusion_tf/diffusion_utils_2.py
+
+Docstrings have been added, as well as DDIM sampling and a new collection of beta schedules.
+"""
+
+import enum
+import math
+
+import numpy as np
+import torch as th
+
+from .nn import mean_flat
+from .losses import normal_kl, discretized_gaussian_log_likelihood
+
+
+def get_named_beta_schedule(schedule_name, num_diffusion_timesteps):
+    """
+    Get a pre-defined beta schedule for the given name.
+
+    The beta schedule library consists of beta schedules which remain similar
+    in the limit of num_diffusion_timesteps.
+    Beta schedules may be added, but should not be removed or changed once
+    they are committed to maintain backwards compatibility.
+    """
+    if schedule_name == "linear":
+        # Linear schedule from Ho et al, extended to work for any number of
+        # diffusion steps.
+        scale = 1000 / num_diffusion_timesteps
+        beta_start = scale * 0.0001
+        beta_end = scale * 0.02
+        return np.linspace(
+            beta_start, beta_end, num_diffusion_timesteps, dtype=np.float64
+        )
+    elif schedule_name == "cosine":
+        return betas_for_alpha_bar(
+            num_diffusion_timesteps,
+            lambda t: math.cos((t + 0.008) / 1.008 * math.pi / 2) ** 2,
+        )
+    else:
+        raise NotImplementedError(f"unknown beta schedule: {schedule_name}")
+
+
+def betas_for_alpha_bar(num_diffusion_timesteps, alpha_bar, max_beta=0.999):
+    """
+    Create a beta schedule that discretizes the given alpha_t_bar function,
+    which defines the cumulative product of (1-beta) over time from t = [0,1].
+
+    :param num_diffusion_timesteps: the number of betas to produce.
+    :param alpha_bar: a lambda that takes an argument t from 0 to 1 and
+                      produces the cumulative product of (1-beta) up to that
+                      part of the diffusion process.
+    :param max_beta: the maximum beta to use; use values lower than 1 to
+                     prevent singularities.
+    """
+    betas = []
+    for i in range(num_diffusion_timesteps):
+        t1 = i / num_diffusion_timesteps
+        t2 = (i + 1) / num_diffusion_timesteps
+        betas.append(min(1 - alpha_bar(t2) / alpha_bar(t1), max_beta))
+    return np.array(betas)
+
+
+class ModelMeanType(enum.Enum):
+    """
+    Which type of output the model predicts.
+    """
+
+    PREVIOUS_X = enum.auto()  # the model predicts x_{t-1}
+    START_X = enum.auto()  # the model predicts x_0
+    EPSILON = enum.auto()  # the model predicts epsilon
+
+
+class ModelVarType(enum.Enum):
+    """
+    What is used as the model's output variance.
+
+    The LEARNED_RANGE option has been added to allow the model to predict
+    values between FIXED_SMALL and FIXED_LARGE, making its job easier.
+    """
+
+    LEARNED = enum.auto()
+    FIXED_SMALL = enum.auto()
+    FIXED_LARGE = enum.auto()
+    LEARNED_RANGE = enum.auto()
+
+
+class LossType(enum.Enum):
+    MSE = enum.auto()  # use raw MSE loss (and KL when learning variances)
+    RESCALED_MSE = (
+        enum.auto()
+    )  # use raw MSE loss (with RESCALED_KL when learning variances)
+    KL = enum.auto()  # use the variational lower-bound
+    RESCALED_KL = enum.auto()  # like KL, but rescale to estimate the full VLB
+
+    def is_vb(self):
+        return self == LossType.KL or self == LossType.RESCALED_KL
+
+
+class GaussianDiffusion:
+    """
+    Utilities for training and sampling diffusion models.
+
+    Ported directly from here, and then adapted over time to further experimentation.
+    https://github.com/hojonathanho/diffusion/blob/1e0dceb3b3495bbe19116a5e1b3596cd0706c543/diffusion_tf/diffusion_utils_2.py#L42
+
+    :param betas: a 1-D numpy array of betas for each diffusion timestep,
+                  starting at T and going to 1.
+    :param model_mean_type: a ModelMeanType determining what the model outputs.
+    :param model_var_type: a ModelVarType determining how variance is output.
+    :param loss_type: a LossType determining the loss function to use.
+    :param rescale_timesteps: if True, pass floating point timesteps into the
+                              model so that they are always scaled like in the
+                              original paper (0 to 1000).
+    """
+
+    def __init__(
+            self,
+            *,
+            betas,
+            model_mean_type,
+            model_var_type,
+            loss_type,
+            rescale_timesteps=False,
+    ):
+        self.model_mean_type = model_mean_type
+        self.model_var_type = model_var_type
+        self.loss_type = loss_type
+        self.rescale_timesteps = rescale_timesteps
+
+        # Use float64 for accuracy.
+        betas = np.array(betas, dtype=np.float64)
+        self.betas = betas
+        assert len(betas.shape) == 1, "betas must be 1-D"
+        assert (betas > 0).all() and (betas <= 1).all()
+
+        self.num_timesteps = int(betas.shape[0])
+
+        alphas = 1.0 - betas
+        self.alphas_cumprod = np.cumprod(alphas, axis=0)
+        self.alphas_cumprod_prev = np.append(1.0, self.alphas_cumprod[:-1])
+        self.alphas_cumprod_next = np.append(self.alphas_cumprod[1:], 0.0)
+        assert self.alphas_cumprod_prev.shape == (self.num_timesteps,)
+
+        # calculations for diffusion q(x_t | x_{t-1}) and others
+        self.sqrt_alphas_cumprod = np.sqrt(self.alphas_cumprod)
+        self.sqrt_one_minus_alphas_cumprod = np.sqrt(1.0 - self.alphas_cumprod)
+        self.log_one_minus_alphas_cumprod = np.log(1.0 - self.alphas_cumprod)
+        self.sqrt_recip_alphas_cumprod = np.sqrt(1.0 / self.alphas_cumprod)
+        self.sqrt_recipm1_alphas_cumprod = np.sqrt(1.0 / self.alphas_cumprod - 1)
+
+        # calculations for posterior q(x_{t-1} | x_t, x_0)
+        self.posterior_variance = (
+                betas * (1.0 - self.alphas_cumprod_prev) / (1.0 - self.alphas_cumprod)
+        )
+        # log calculation clipped because the posterior variance is 0 at the
+        # beginning of the diffusion chain.
+        self.posterior_log_variance_clipped = np.log(
+            np.append(self.posterior_variance[1], self.posterior_variance[1:])
+        )
+        self.posterior_mean_coef1 = (
+                betas * np.sqrt(self.alphas_cumprod_prev) / (1.0 - self.alphas_cumprod)
+        )
+        self.posterior_mean_coef2 = (
+                (1.0 - self.alphas_cumprod_prev)
+                * np.sqrt(alphas)
+                / (1.0 - self.alphas_cumprod)
+        )
+
+    def q_mean_variance(self, x_start, t):
+        """
+        Get the distribution q(x_t | x_0).
+
+        :param x_start: the [N x C x ...] tensor of noiseless inputs.
+        :param t: the number of diffusion steps (minus 1). Here, 0 means one step.
+        :return: A tuple (mean, variance, log_variance), all of x_start's shape.
+        """
+        mean = (
+                _extract_into_tensor(self.sqrt_alphas_cumprod, t, x_start.shape) * x_start
+        )
+        variance = _extract_into_tensor(1.0 - self.alphas_cumprod, t, x_start.shape)
+        log_variance = _extract_into_tensor(
+            self.log_one_minus_alphas_cumprod, t, x_start.shape
+        )
+        return mean, variance, log_variance
+
+    def q_sample(self, x_start, t, noise=None):
+        """
+        Diffuse the data for a given number of diffusion steps.
+
+        In other words, sample from q(x_t | x_0).
+
+        :param x_start: the initial data batch.
+        :param t: the number of diffusion steps (minus 1). Here, 0 means one step.
+        :param noise: if specified, the split-out normal noise.
+        :return: A noisy version of x_start.
+        """
+        if noise is None:
+            noise = th.randn_like(x_start)
+        assert noise.shape == x_start.shape
+        return (
+                _extract_into_tensor(self.sqrt_alphas_cumprod, t, x_start.shape) * x_start
+                + _extract_into_tensor(self.sqrt_one_minus_alphas_cumprod, t, x_start.shape)
+                * noise
+        )
+
+    def q_posterior_mean_variance(self, x_start, x_t, t):
+        """
+        Compute the mean and variance of the diffusion posterior:
+
+            q(x_{t-1} | x_t, x_0)
+
+        """
+        assert x_start.shape == x_t.shape
+        posterior_mean = (
+                _extract_into_tensor(self.posterior_mean_coef1, t, x_t.shape) * x_start
+                + _extract_into_tensor(self.posterior_mean_coef2, t, x_t.shape) * x_t
+        )
+        posterior_variance = _extract_into_tensor(self.posterior_variance, t, x_t.shape)
+        posterior_log_variance_clipped = _extract_into_tensor(
+            self.posterior_log_variance_clipped, t, x_t.shape
+        )
+        assert (
+                posterior_mean.shape[0]
+                == posterior_variance.shape[0]
+                == posterior_log_variance_clipped.shape[0]
+                == x_start.shape[0]
+        )
+        return posterior_mean, posterior_variance, posterior_log_variance_clipped
+
+    def p_mean_variance(
+            self, model, x, t, clip_denoised=True, denoised_fn=None, model_kwargs=None
+    ):
+        """
+        Apply the model to get p(x_{t-1} | x_t), as well as a prediction of
+        the initial x, x_0.
+
+        :param model: the model, which takes a signal and a batch of timesteps
+                      as input.
+        :param x: the [N x C x ...] tensor at time t.
+        :param t: a 1-D Tensor of timesteps.
+        :param clip_denoised: if True, clip the denoised signal into [-1, 1].
+        :param denoised_fn: if not None, a function which applies to the
+            x_start prediction before it is used to sample. Applies before
+            clip_denoised.
+        :param model_kwargs: if not None, a dict of extra keyword arguments to
+            pass to the model. This can be used for conditioning.
+        :return: a dict with the following keys:
+                 - 'mean': the model mean output.
+                 - 'variance': the model variance output.
+                 - 'log_variance': the log of 'variance'.
+                 - 'pred_xstart': the prediction for x_0.
+        """
+        if model_kwargs is None:
+            model_kwargs = {}
+
+        B, C = x.shape[:2]
+        assert t.shape == (B,)
+        model_output = model(x, self._scale_timesteps(t), **model_kwargs)
+
+        if self.model_var_type in [ModelVarType.LEARNED, ModelVarType.LEARNED_RANGE]:
+            assert model_output.shape == (B, C * 2, *x.shape[2:])
+            model_output, model_var_values = th.split(model_output, C, dim=1)
+            if self.model_var_type == ModelVarType.LEARNED:
+                model_log_variance = model_var_values
+                model_variance = th.exp(model_log_variance)
+            else:
+                min_log = _extract_into_tensor(
+                    self.posterior_log_variance_clipped, t, x.shape
+                )
+                max_log = _extract_into_tensor(np.log(self.betas), t, x.shape)
+                # The model_var_values is [-1, 1] for [min_var, max_var].
+                frac = (model_var_values + 1) / 2
+                model_log_variance = frac * max_log + (1 - frac) * min_log
+                model_variance = th.exp(model_log_variance)
+        else:
+            model_variance, model_log_variance = {
+                # for fixedlarge, we set the initial (log-)variance like so
+                # to get a better decoder log likelihood.
+                ModelVarType.FIXED_LARGE: (
+                    np.append(self.posterior_variance[1], self.betas[1:]),
+                    np.log(np.append(self.posterior_variance[1], self.betas[1:])),
+                ),
+                ModelVarType.FIXED_SMALL: (
+                    self.posterior_variance,
+                    self.posterior_log_variance_clipped,
+                ),
+            }[self.model_var_type]
+            model_variance = _extract_into_tensor(model_variance, t, x.shape)
+            model_log_variance = _extract_into_tensor(model_log_variance, t, x.shape)
+
+        def process_xstart(x):
+            if denoised_fn is not None:
+                x = denoised_fn(x)
+            if clip_denoised:
+                return x.clamp(-1, 1)
+            return x
+
+        if self.model_mean_type == ModelMeanType.PREVIOUS_X:
+            pred_xstart = process_xstart(
+                self._predict_xstart_from_xprev(x_t=x, t=t, xprev=model_output)
+            )
+            model_mean = model_output
+        elif self.model_mean_type in [ModelMeanType.START_X, ModelMeanType.EPSILON]:
+            if self.model_mean_type == ModelMeanType.START_X:
+                pred_xstart = process_xstart(model_output)
+            else:
+                pred_xstart = process_xstart(
+                    self._predict_xstart_from_eps(x_t=x, t=t, eps=model_output)
+                )
+            model_mean, _, _ = self.q_posterior_mean_variance(
+                x_start=pred_xstart, x_t=x, t=t
+            )
+        else:
+            raise NotImplementedError(self.model_mean_type)
+
+        assert (
+                model_mean.shape == model_log_variance.shape == pred_xstart.shape == x.shape
+        )
+        return {
+            "mean": model_mean,
+            "variance": model_variance,
+            "log_variance": model_log_variance,
+            "pred_xstart": pred_xstart,
+        }
+
+    def _predict_xstart_from_eps(self, x_t, t, eps):
+        assert x_t.shape == eps.shape
+        return (
+                _extract_into_tensor(self.sqrt_recip_alphas_cumprod, t, x_t.shape) * x_t
+                - _extract_into_tensor(self.sqrt_recipm1_alphas_cumprod, t, x_t.shape) * eps
+        )
+
+    def _predict_xstart_from_xprev(self, x_t, t, xprev):
+        assert x_t.shape == xprev.shape
+        return (  # (xprev - coef2*x_t) / coef1
+                _extract_into_tensor(1.0 / self.posterior_mean_coef1, t, x_t.shape) * xprev
+                - _extract_into_tensor(
+            self.posterior_mean_coef2 / self.posterior_mean_coef1, t, x_t.shape
+        )
+                * x_t
+        )
+
+    def _predict_eps_from_xstart(self, x_t, t, pred_xstart):
+        return (
+                       _extract_into_tensor(self.sqrt_recip_alphas_cumprod, t, x_t.shape) * x_t
+                       - pred_xstart
+               ) / _extract_into_tensor(self.sqrt_recipm1_alphas_cumprod, t, x_t.shape)
+
+    def _scale_timesteps(self, t):
+        if self.rescale_timesteps:
+            return t.float() * (1000.0 / self.num_timesteps)
+        return t
+
+    def p_sample(
+            self, model, x, t, clip_denoised=True, denoised_fn=None, model_kwargs=None
+    ):
+        """
+        Sample x_{t-1} from the model at the given timestep.
+
+        :param model: the model to sample from.
+        :param x: the current tensor at x_{t-1}.
+        :param t: the value of t, starting at 0 for the first diffusion step.
+        :param clip_denoised: if True, clip the x_start prediction to [-1, 1].
+        :param denoised_fn: if not None, a function which applies to the
+            x_start prediction before it is used to sample.
+        :param model_kwargs: if not None, a dict of extra keyword arguments to
+            pass to the model. This can be used for conditioning.
+        :return: a dict containing the following keys:
+                 - 'sample': a random sample from the model.
+                 - 'pred_xstart': a prediction of x_0.
+        """
+        out = self.p_mean_variance(
+            model,
+            x,
+            t,
+            clip_denoised=clip_denoised,
+            denoised_fn=denoised_fn,
+            model_kwargs=model_kwargs,
+        )
+        noise = th.randn_like(x)
+        nonzero_mask = (
+            (t != 0).float().view(-1, *([1] * (len(x.shape) - 1)))
+        )  # no noise when t == 0
+        sample = out["mean"] + nonzero_mask * th.exp(0.5 * out["log_variance"]) * noise
+        return {"sample": sample, "pred_xstart": out["pred_xstart"]}
+
+    def p_sample_loop(
+            self,
+            model,
+            shape,
+            noise=None,
+            clip_denoised=True,
+            denoised_fn=None,
+            model_kwargs=None,
+            device=None,
+            progress=False,
+    ):
+        """
+        Generate samples from the model.
+
+        :param model: the model module.
+        :param shape: the shape of the samples, (N, C, H, W).
+        :param noise: if specified, the noise from the encoder to sample.
+                      Should be of the same shape as `shape`.
+        :param clip_denoised: if True, clip x_start predictions to [-1, 1].
+        :param denoised_fn: if not None, a function which applies to the
+            x_start prediction before it is used to sample.
+        :param model_kwargs: if not None, a dict of extra keyword arguments to
+            pass to the model. This can be used for conditioning.
+        :param device: if specified, the device to create the samples on.
+                       If not specified, use a model parameter's device.
+        :param progress: if True, show a tqdm progress bar.
+        :return: a non-differentiable batch of samples.
+        """
+        final = None
+        for sample in self.p_sample_loop_progressive(
+                model,
+                shape,
+                noise=noise,
+                clip_denoised=clip_denoised,
+                denoised_fn=denoised_fn,
+                model_kwargs=model_kwargs,
+                device=device,
+                progress=progress,
+        ):
+            final = sample
+        return final["sample"]
+
+    def p_sample_loop_progressive(
+            self,
+            model,
+            shape,
+            noise=None,
+            clip_denoised=True,
+            denoised_fn=None,
+            model_kwargs=None,
+            device=None,
+            progress=False,
+    ):
+        """
+        Generate samples from the model and yield intermediate samples from
+        each timestep of diffusion.
+
+        Arguments are the same as p_sample_loop().
+        Returns a generator over dicts, where each dict is the return value of
+        p_sample().
+        """
+        if device is None:
+            device = next(model.parameters()).device
+        assert isinstance(shape, (tuple, list))
+        if noise is not None:
+            img = noise
+        else:
+            img = th.randn(*shape, device=device)
+        indices = list(range(self.num_timesteps))[::-1]
+
+        if progress:
+            # Lazy import so that we don't depend on tqdm.
+            from tqdm.auto import tqdm
+
+            indices = tqdm(indices)
+
+        for i in indices:
+            t = th.tensor([i] * shape[0], device=device)
+            with th.no_grad():
+                out = self.p_sample(
+                    model,
+                    img,
+                    t,
+                    clip_denoised=clip_denoised,
+                    denoised_fn=denoised_fn,
+                    model_kwargs=model_kwargs,
+                )
+                yield out
+                img = out["sample"]
+
+    def ddim_sample(
+            self,
+            model,
+            x,
+            t,
+            clip_denoised=True,
+            denoised_fn=None,
+            model_kwargs=None,
+            eta=0.0,
+    ):
+        """
+        Sample x_{t-1} from the model using DDIM.
+
+        Same usage as p_sample().
+        """
+        out = self.p_mean_variance(
+            model,
+            x,
+            t,
+            clip_denoised=clip_denoised,
+            denoised_fn=denoised_fn,
+            model_kwargs=model_kwargs,
+        )
+        # Usually our model outputs epsilon, but we re-derive it
+        # in case we used x_start or x_prev prediction.
+        eps = self._predict_eps_from_xstart(x, t, out["pred_xstart"])
+        alpha_bar = _extract_into_tensor(self.alphas_cumprod, t, x.shape)
+        alpha_bar_prev = _extract_into_tensor(self.alphas_cumprod_prev, t, x.shape)
+        sigma = (
+                eta
+                * th.sqrt((1 - alpha_bar_prev) / (1 - alpha_bar))
+                * th.sqrt(1 - alpha_bar / alpha_bar_prev)
+        )
+        # Equation 12.
+        noise = th.randn_like(x)
+        mean_pred = (
+                out["pred_xstart"] * th.sqrt(alpha_bar_prev)
+                + th.sqrt(1 - alpha_bar_prev - sigma ** 2) * eps
+        )
+        nonzero_mask = (
+            (t != 0).float().view(-1, *([1] * (len(x.shape) - 1)))
+        )  # no noise when t == 0
+        sample = mean_pred + nonzero_mask * sigma * noise
+        return {"sample": sample, "pred_xstart": out["pred_xstart"]}
+
+    def ddim_reverse_sample(
+            self,
+            model,
+            x,
+            t,
+            clip_denoised=True,
+            denoised_fn=None,
+            model_kwargs=None,
+            eta=0.0,
+    ):
+        """
+        Sample x_{t+1} from the model using DDIM reverse ODE.
+        """
+        assert eta == 0.0, "Reverse ODE only for deterministic path"
+        out = self.p_mean_variance(
+            model,
+            x,
+            t,
+            clip_denoised=clip_denoised,
+            denoised_fn=denoised_fn,
+            model_kwargs=model_kwargs,
+        )
+        # Usually our model outputs epsilon, but we re-derive it
+        # in case we used x_start or x_prev prediction.
+        eps = (
+                      _extract_into_tensor(self.sqrt_recip_alphas_cumprod, t, x.shape) * x
+                      - out["pred_xstart"]
+              ) / _extract_into_tensor(self.sqrt_recipm1_alphas_cumprod, t, x.shape)
+        alpha_bar_next = _extract_into_tensor(self.alphas_cumprod_next, t, x.shape)
+
+        # Equation 12. reversed
+        mean_pred = (
+                out["pred_xstart"] * th.sqrt(alpha_bar_next)
+                + th.sqrt(1 - alpha_bar_next) * eps
+        )
+
+        return {"sample": mean_pred, "pred_xstart": out["pred_xstart"]}
+
+    def ddim_sample_loop(
+            self,
+            model,
+            shape,
+            noise=None,
+            clip_denoised=True,
+            denoised_fn=None,
+            model_kwargs=None,
+            device=None,
+            progress=False,
+            eta=0.0,
+    ):
+        """
+        Generate samples from the model using DDIM.
+
+        Same usage as p_sample_loop().
+        """
+        final = None
+        for sample in self.ddim_sample_loop_progressive(
+                model,
+                shape,
+                noise=noise,
+                clip_denoised=clip_denoised,
+                denoised_fn=denoised_fn,
+                model_kwargs=model_kwargs,
+                device=device,
+                progress=progress,
+                eta=eta,
+        ):
+            final = sample
+        if 'sample_merge' in final:
+            return final["sample_merge"]
+        else:
+            return final["sample"]
+
+    def ddim_sample_loop_progressive(
+            self,
+            model,
+            shape,
+            noise=None,
+            clip_denoised=True,
+            denoised_fn=None,
+            model_kwargs=None,
+            device=None,
+            progress=False,
+            eta=0.0,
+    ):
+        """
+        Use DDIM to sample from the model and yield intermediate samples from
+        each timestep of DDIM.
+
+        Same usage as p_sample_loop_progressive().
+        """
+        if device is None:
+            device = next(model.parameters()).device
+        assert isinstance(shape, (tuple, list))
+        if noise is not None:
+            img = noise
+        else:
+            img = th.randn(*shape, device=device)
+        indices = list(range(self.num_timesteps))[::-1]
+
+        if progress:
+            # Lazy import so that we don't depend on tqdm.
+            from tqdm.auto import tqdm
+
+            indices = tqdm(indices)
+
+        for i in indices:
+            t = th.tensor([i] * shape[0], device=device)
+            with th.no_grad():
+                out = self.ddim_sample(
+                    model,
+                    img,
+                    t,
+                    clip_denoised=clip_denoised,
+                    denoised_fn=denoised_fn,
+                    model_kwargs=model_kwargs,
+                    eta=eta,
+                )
+                # mask = model_kwargs['mask']
+                # img = out["sample"] * mask
+                # if model_kwargs.get('replace_val') is not None:
+                #     replace_idx = model_kwargs['replace_idx']
+                #     replace_val = model_kwargs['replace_val']
+                #     x_t = self.q_sample(replace_val, t - 1) if t > 0 else replace_val
+                #     B, T = img.shape[:2]
+                #     img = img.reshape(B, T, -1, 3)
+                #     img[:, :, replace_idx] = x_t[:, :, replace_idx]
+                #     out["sample"] = img = img.flatten(2)
+                # if 'frames_inp' in model_kwargs:
+                #     x_t = self.q_sample(model_kwargs['frames_inp'], t - 1) \
+                #         if t > 0 else model_kwargs['frames_inp']
+                #     img = img * mask + x_t * (1 - mask)
+                #     out['sample_merge'] = img
+                yield out
+                img = out["sample"]
+
+    def _vb_terms_bpd(
+            self, model, x_start, x_t, t, clip_denoised=True, model_kwargs=None
+    ):
+        """
+        Get a term for the variational lower-bound.
+
+        The resulting units are bits (rather than nats, as one might expect).
+        This allows for comparison to other papers.
+
+        :return: a dict with the following keys:
+                 - 'output': a shape [N] tensor of NLLs or KLs.
+                 - 'pred_xstart': the x_0 predictions.
+        """
+        true_mean, _, true_log_variance_clipped = self.q_posterior_mean_variance(
+            x_start=x_start, x_t=x_t, t=t
+        )
+        out = self.p_mean_variance(
+            model, x_t, t, clip_denoised=clip_denoised, model_kwargs=model_kwargs
+        )
+        kl = normal_kl(
+            true_mean, true_log_variance_clipped, out["mean"], out["log_variance"]
+        )
+        kl = mean_flat(kl) / np.log(2.0)
+
+        decoder_nll = -discretized_gaussian_log_likelihood(
+            x_start, means=out["mean"], log_scales=0.5 * out["log_variance"]
+        )
+        assert decoder_nll.shape == x_start.shape
+        decoder_nll = mean_flat(decoder_nll) / np.log(2.0)
+
+        # At the first timestep return the decoder NLL,
+        # otherwise return KL(q(x_{t-1}|x_t,x_0) || p(x_{t-1}|x_t))
+        output = th.where((t == 0), decoder_nll, kl)
+        return {"output": output, "pred_xstart": out["pred_xstart"]}
+
+    def training_losses(self, model, x_start, t, model_kwargs=None, noise=None):
+        """
+        Compute training losses for a single timestep.
+
+        :param model: the model to evaluate loss on.
+        :param x_start: the [N x C x ...] tensor of inputs.
+        :param t: a batch of timestep indices.
+        :param model_kwargs: if not None, a dict of extra keyword arguments to
+            pass to the model. This can be used for conditioning.
+        :param noise: if specified, the specific Gaussian noise to try to remove.
+        :return: a dict with the key "loss" containing a tensor of shape [N].
+                 Some mean or variance settings may also have other keys.
+        """
+        if model_kwargs is None:
+            model_kwargs = {}
+        if noise is None:
+            noise = th.randn_like(x_start)
+        x_t = self.q_sample(x_start, t, noise=noise)
+
+        terms = {}
+
+        if self.loss_type == LossType.KL or self.loss_type == LossType.RESCALED_KL:
+            terms["loss"] = self._vb_terms_bpd(
+                model=model,
+                x_start=x_start,
+                x_t=x_t,
+                t=t,
+                clip_denoised=False,
+                model_kwargs=model_kwargs,
+            )["output"]
+            if self.loss_type == LossType.RESCALED_KL:
+                terms["loss"] *= self.num_timesteps
+        elif self.loss_type == LossType.MSE or self.loss_type == LossType.RESCALED_MSE:
+            model_output = model(x_t, self._scale_timesteps(t), **model_kwargs)
+
+            if self.model_var_type in [
+                ModelVarType.LEARNED,
+                ModelVarType.LEARNED_RANGE,
+            ]:
+                B, C = x_t.shape[:2]
+                assert model_output.shape == (B, C * 2, *x_t.shape[2:])
+                model_output, model_var_values = th.split(model_output, C, dim=1)
+                # Learn the variance using the variational bound, but don't let
+                # it affect our mean prediction.
+                frozen_out = th.cat([model_output.detach(), model_var_values], dim=1)
+                terms["vb"] = self._vb_terms_bpd(
+                    model=lambda *args, r=frozen_out: r,
+                    x_start=x_start,
+                    x_t=x_t,
+                    t=t,
+                    clip_denoised=False,
+                )["output"]
+                if self.loss_type == LossType.RESCALED_MSE:
+                    # Divide by 1000 for equivalence with initial implementation.
+                    # Without a factor of 1/1000, the VB term hurts the MSE term.
+                    terms["vb"] *= self.num_timesteps / 1000.0
+
+            target = {
+                ModelMeanType.PREVIOUS_X: self.q_posterior_mean_variance(
+                    x_start=x_start, x_t=x_t, t=t
+                )[0],
+                ModelMeanType.START_X: x_start,
+                ModelMeanType.EPSILON: noise,
+            }[self.model_mean_type]
+            assert model_output.shape == target.shape == x_start.shape
+
+            # mask = model_kwargs['mask']
+            # if mask.shape != x_start.shape:
+            #     mask = mask.expand_as(x_start)
+            #     mask = mask.flatten(2)
+            #
+            # terms["mse"] = (target - model_output) ** 2
+            # terms["mse"] = terms["mse"].flatten(2)
+            # terms["mse"] = (terms["mse"] * mask).sum(-1) / mask.sum(-1)
+            terms["mse"] = mean_flat((target - model_output) ** 2)
+            # print(">>>", (target - model_output).abs().mean())
+
+            if "vb" in terms:
+                terms["loss"] = terms["mse"] + terms["vb"]
+            else:
+                terms["loss"] = terms["mse"]
+        else:
+            raise NotImplementedError(self.loss_type)
+
+        return terms
+
+    def _prior_bpd(self, x_start):
+        """
+        Get the prior KL term for the variational lower-bound, measured in
+        bits-per-dim.
+
+        This term can't be optimized, as it only depends on the encoder.
+
+        :param x_start: the [N x C x ...] tensor of inputs.
+        :return: a batch of [N] KL values (in bits), one per batch element.
+        """
+        batch_size = x_start.shape[0]
+        t = th.tensor([self.num_timesteps - 1] * batch_size, device=x_start.device)
+        qt_mean, _, qt_log_variance = self.q_mean_variance(x_start, t)
+        kl_prior = normal_kl(
+            mean1=qt_mean, logvar1=qt_log_variance, mean2=0.0, logvar2=0.0
+        )
+        return mean_flat(kl_prior) / np.log(2.0)
+
+    def calc_bpd_loop(self, model, x_start, clip_denoised=True, model_kwargs=None):
+        """
+        Compute the entire variational lower-bound, measured in bits-per-dim,
+        as well as other related quantities.
+
+        :param model: the model to evaluate loss on.
+        :param x_start: the [N x C x ...] tensor of inputs.
+        :param clip_denoised: if True, clip denoised samples.
+        :param model_kwargs: if not None, a dict of extra keyword arguments to
+            pass to the model. This can be used for conditioning.
+
+        :return: a dict containing the following keys:
+                 - total_bpd: the total variational lower-bound, per batch element.
+                 - prior_bpd: the prior term in the lower-bound.
+                 - vb: an [N x T] tensor of terms in the lower-bound.
+                 - xstart_mse: an [N x T] tensor of x_0 MSEs for each timestep.
+                 - mse: an [N x T] tensor of epsilon MSEs for each timestep.
+        """
+        device = x_start.device
+        batch_size = x_start.shape[0]
+
+        vb = []
+        xstart_mse = []
+        mse = []
+        for t in list(range(self.num_timesteps))[::-1]:
+            t_batch = th.tensor([t] * batch_size, device=device)
+            noise = th.randn_like(x_start)
+            x_t = self.q_sample(x_start=x_start, t=t_batch, noise=noise)
+            # Calculate VLB term at the current timestep
+            with th.no_grad():
+                out = self._vb_terms_bpd(
+                    model,
+                    x_start=x_start,
+                    x_t=x_t,
+                    t=t_batch,
+                    clip_denoised=clip_denoised,
+                    model_kwargs=model_kwargs,
+                )
+            vb.append(out["output"])
+            xstart_mse.append(mean_flat((out["pred_xstart"] - x_start) ** 2))
+            eps = self._predict_eps_from_xstart(x_t, t_batch, out["pred_xstart"])
+            mse.append(mean_flat((eps - noise) ** 2))
+
+        vb = th.stack(vb, dim=1)
+        xstart_mse = th.stack(xstart_mse, dim=1)
+        mse = th.stack(mse, dim=1)
+
+        prior_bpd = self._prior_bpd(x_start)
+        total_bpd = vb.sum(dim=1) + prior_bpd
+        return {
+            "total_bpd": total_bpd,
+            "prior_bpd": prior_bpd,
+            "vb": vb,
+            "xstart_mse": xstart_mse,
+            "mse": mse,
+        }
+
+
+def _extract_into_tensor(arr, timesteps, broadcast_shape):
+    """
+    Extract values from a 1-D numpy array for a batch of indices.
+
+    :param arr: the 1-D numpy array.
+    :param timesteps: a tensor of indices into the array to extract.
+    :param broadcast_shape: a larger shape of K dimensions with the batch
+                            dimension equal to the length of timesteps.
+    :return: a tensor of shape [batch_size, 1, ...] where the shape has K dims.
+    """
+    res = th.from_numpy(arr).to(device=timesteps.device)[timesteps].float()
+    while len(res.shape) < len(broadcast_shape):
+        res = res[..., None]
+    return res.expand(broadcast_shape)
diff --git a/modules/commons/improved_diffusion/image_datasets.py b/modules/commons/improved_diffusion/image_datasets.py
new file mode 100644
index 0000000000000000000000000000000000000000..3e49d2394622e5b7ea988e4afe9fef117dedf6a9
--- /dev/null
+++ b/modules/commons/improved_diffusion/image_datasets.py
@@ -0,0 +1,106 @@
+from PIL import Image
+import blobfile as bf
+from mpi4py import MPI
+import numpy as np
+from torch.utils.data import DataLoader, Dataset
+
+
+def load_data(
+    *, data_dir, batch_size, image_size, class_cond=False, deterministic=False
+):
+    """
+    For a dataset, create a generator over (images, kwargs) pairs.
+
+    Each images is an NCHW float tensor, and the kwargs dict contains zero or
+    more keys, each of which map to a batched Tensor of their own.
+    The kwargs dict can be used for class labels, in which case the key is "y"
+    and the values are integer tensors of class labels.
+
+    :param data_dir: a dataset directory.
+    :param batch_size: the batch size of each returned pair.
+    :param image_size: the size to which images are resized.
+    :param class_cond: if True, include a "y" key in returned dicts for class
+                       label. If classes are not available and this is true, an
+                       exception will be raised.
+    :param deterministic: if True, yield results in a deterministic order.
+    """
+    if not data_dir:
+        raise ValueError("unspecified data directory")
+    all_files = _list_image_files_recursively(data_dir)
+    classes = None
+    if class_cond:
+        # Assume classes are the first part of the filename,
+        # before an underscore.
+        class_names = [bf.basename(path).split("_")[0] for path in all_files]
+        sorted_classes = {x: i for i, x in enumerate(sorted(set(class_names)))}
+        classes = [sorted_classes[x] for x in class_names]
+    dataset = ImageDataset(
+        image_size,
+        all_files,
+        classes=classes,
+        shard=MPI.COMM_WORLD.Get_rank(),
+        num_shards=MPI.COMM_WORLD.Get_size(),
+    )
+    if deterministic:
+        loader = DataLoader(
+            dataset, batch_size=batch_size, shuffle=False, num_workers=1, drop_last=True
+        )
+    else:
+        loader = DataLoader(
+            dataset, batch_size=batch_size, shuffle=True, num_workers=1, drop_last=True
+        )
+    while True:
+        yield from loader
+
+
+def _list_image_files_recursively(data_dir):
+    results = []
+    for entry in sorted(bf.listdir(data_dir)):
+        full_path = bf.join(data_dir, entry)
+        ext = entry.split(".")[-1]
+        if "." in entry and ext.lower() in ["jpg", "jpeg", "png", "gif"]:
+            results.append(full_path)
+        elif bf.isdir(full_path):
+            results.extend(_list_image_files_recursively(full_path))
+    return results
+
+
+class ImageDataset(Dataset):
+    def __init__(self, resolution, image_paths, classes=None, shard=0, num_shards=1):
+        super().__init__()
+        self.resolution = resolution
+        self.local_images = image_paths[shard:][::num_shards]
+        self.local_classes = None if classes is None else classes[shard:][::num_shards]
+
+    def __len__(self):
+        return len(self.local_images)
+
+    def __getitem__(self, idx):
+        path = self.local_images[idx]
+        with bf.BlobFile(path, "rb") as f:
+            pil_image = Image.open(f)
+            pil_image.load()
+
+        # We are not on a new enough PIL to support the `reducing_gap`
+        # argument, which uses BOX downsampling at powers of two first.
+        # Thus, we do it by hand to improve downsample quality.
+        while min(*pil_image.size) >= 2 * self.resolution:
+            pil_image = pil_image.resize(
+                tuple(x // 2 for x in pil_image.size), resample=Image.BOX
+            )
+
+        scale = self.resolution / min(*pil_image.size)
+        pil_image = pil_image.resize(
+            tuple(round(x * scale) for x in pil_image.size), resample=Image.BICUBIC
+        )
+
+        arr = np.array(pil_image.convert("RGB"))
+        crop_y = (arr.shape[0] - self.resolution) // 2
+        crop_x = (arr.shape[1] - self.resolution) // 2
+        arr = arr[crop_y : crop_y + self.resolution, crop_x : crop_x + self.resolution]
+        arr = arr.astype(np.float32) / 127.5 - 1
+
+        out_dict = {}
+        if self.local_classes is not None:
+            out_dict["y"] = np.array(self.local_classes[idx], dtype=np.int64)
+        return np.transpose(arr, [2, 0, 1]), out_dict
diff --git a/modules/commons/improved_diffusion/logger.py b/modules/commons/improved_diffusion/logger.py
new file mode 100644
index 0000000000000000000000000000000000000000..b1d856dcfea6b56a2ee8d37b286887430dbfac30
--- /dev/null
+++ b/modules/commons/improved_diffusion/logger.py
@@ -0,0 +1,495 @@
+"""
+Logger copied from OpenAI baselines to avoid extra RL-based dependencies:
+https://github.com/openai/baselines/blob/ea25b9e8b234e6ee1bca43083f8f3cf974143998/baselines/logger.py
+"""
+
+import os
+import sys
+import shutil
+import os.path as osp
+import json
+import time
+import datetime
+import tempfile
+import warnings
+from collections import defaultdict
+from contextlib import contextmanager
+
+DEBUG = 10
+INFO = 20
+WARN = 30
+ERROR = 40
+
+DISABLED = 50
+
+
+class KVWriter(object):
+    def writekvs(self, kvs):
+        raise NotImplementedError
+
+
+class SeqWriter(object):
+    def writeseq(self, seq):
+        raise NotImplementedError
+
+
+class HumanOutputFormat(KVWriter, SeqWriter):
+    def __init__(self, filename_or_file):
+        if isinstance(filename_or_file, str):
+            self.file = open(filename_or_file, "wt")
+            self.own_file = True
+        else:
+            assert hasattr(filename_or_file, "read"), (
+                "expected file or str, got %s" % filename_or_file
+            )
+            self.file = filename_or_file
+            self.own_file = False
+
+    def writekvs(self, kvs):
+        # Create strings for printing
+        key2str = {}
+        for (key, val) in sorted(kvs.items()):
+            if hasattr(val, "__float__"):
+                valstr = "%-8.3g" % val
+            else:
+                valstr = str(val)
+            key2str[self._truncate(key)] = self._truncate(valstr)
+
+        # Find max widths
+        if len(key2str) == 0:
+            print("WARNING: tried to write empty key-value dict")
+            return
+        else:
+            keywidth = max(map(len, key2str.keys()))
+            valwidth = max(map(len, key2str.values()))
+
+        # Write out the data
+        dashes = "-" * (keywidth + valwidth + 7)
+        lines = [dashes]
+        for (key, val) in sorted(key2str.items(), key=lambda kv: kv[0].lower()):
+            lines.append(
+                "| %s%s | %s%s |"
+                % (key, " " * (keywidth - len(key)), val, " " * (valwidth - len(val)))
+            )
+        lines.append(dashes)
+        self.file.write("\n".join(lines) + "\n")
+
+        # Flush the output to the file
+        self.file.flush()
+
+    def _truncate(self, s):
+        maxlen = 30
+        return s[: maxlen - 3] + "..." if len(s) > maxlen else s
+
+    def writeseq(self, seq):
+        seq = list(seq)
+        for (i, elem) in enumerate(seq):
+            self.file.write(elem)
+            if i < len(seq) - 1:  # add space unless this is the last one
+                self.file.write(" ")
+        self.file.write("\n")
+        self.file.flush()
+
+    def close(self):
+        if self.own_file:
+            self.file.close()
+
+
+class JSONOutputFormat(KVWriter):
+    def __init__(self, filename):
+        self.file = open(filename, "wt")
+
+    def writekvs(self, kvs):
+        for k, v in sorted(kvs.items()):
+            if hasattr(v, "dtype"):
+                kvs[k] = float(v)
+        self.file.write(json.dumps(kvs) + "\n")
+        self.file.flush()
+
+    def close(self):
+        self.file.close()
+
+
+class CSVOutputFormat(KVWriter):
+    def __init__(self, filename):
+        self.file = open(filename, "w+t")
+        self.keys = []
+        self.sep = ","
+
+    def writekvs(self, kvs):
+        # Add our current row to the history
+        extra_keys = list(kvs.keys() - self.keys)
+        extra_keys.sort()
+        if extra_keys:
+            self.keys.extend(extra_keys)
+            self.file.seek(0)
+            lines = self.file.readlines()
+            self.file.seek(0)
+            for (i, k) in enumerate(self.keys):
+                if i > 0:
+                    self.file.write(",")
+                self.file.write(k)
+            self.file.write("\n")
+            for line in lines[1:]:
+                self.file.write(line[:-1])
+                self.file.write(self.sep * len(extra_keys))
+                self.file.write("\n")
+        for (i, k) in enumerate(self.keys):
+            if i > 0:
+                self.file.write(",")
+            v = kvs.get(k)
+            if v is not None:
+                self.file.write(str(v))
+        self.file.write("\n")
+        self.file.flush()
+
+    def close(self):
+        self.file.close()
+
+
+class TensorBoardOutputFormat(KVWriter):
+    """
+    Dumps key/value pairs into TensorBoard's numeric format.
+    """
+
+    def __init__(self, dir):
+        os.makedirs(dir, exist_ok=True)
+        self.dir = dir
+        self.step = 1
+        prefix = "events"
+        path = osp.join(osp.abspath(dir), prefix)
+        import tensorflow as tf
+        from tensorflow.python import pywrap_tensorflow
+        from tensorflow.core.util import event_pb2
+        from tensorflow.python.util import compat
+
+        self.tf = tf
+        self.event_pb2 = event_pb2
+        self.pywrap_tensorflow = pywrap_tensorflow
+        self.writer = pywrap_tensorflow.EventsWriter(compat.as_bytes(path))
+
+    def writekvs(self, kvs):
+        def summary_val(k, v):
+            kwargs = {"tag": k, "simple_value": float(v)}
+            return self.tf.Summary.Value(**kwargs)
+
+        summary = self.tf.Summary(value=[summary_val(k, v) for k, v in kvs.items()])
+        event = self.event_pb2.Event(wall_time=time.time(), summary=summary)
+        event.step = (
+            self.step
+        )  # is there any reason why you'd want to specify the step?
+        self.writer.WriteEvent(event)
+        self.writer.Flush()
+        self.step += 1
+
+    def close(self):
+        if self.writer:
+            self.writer.Close()
+            self.writer = None
+
+
+def make_output_format(format, ev_dir, log_suffix=""):
+    os.makedirs(ev_dir, exist_ok=True)
+    if format == "stdout":
+        return HumanOutputFormat(sys.stdout)
+    elif format == "log":
+        return HumanOutputFormat(osp.join(ev_dir, "log%s.txt" % log_suffix))
+    elif format == "json":
+        return JSONOutputFormat(osp.join(ev_dir, "progress%s.json" % log_suffix))
+    elif format == "csv":
+        return CSVOutputFormat(osp.join(ev_dir, "progress%s.csv" % log_suffix))
+    elif format == "tensorboard":
+        return TensorBoardOutputFormat(osp.join(ev_dir, "tb%s" % log_suffix))
+    else:
+        raise ValueError("Unknown format specified: %s" % (format,))
+
+
+# ================================================================
+# API
+# ================================================================
+
+
+def logkv(key, val):
+    """
+    Log a value of some diagnostic
+    Call this once for each diagnostic quantity, each iteration
+    If called many times, last value will be used.
+    """
+    get_current().logkv(key, val)
+
+
+def logkv_mean(key, val):
+    """
+    The same as logkv(), but if called many times, values averaged.
+    """
+    get_current().logkv_mean(key, val)
+
+
+def logkvs(d):
+    """
+    Log a dictionary of key-value pairs
+    """
+    for (k, v) in d.items():
+        logkv(k, v)
+
+
+def dumpkvs():
+    """
+    Write all of the diagnostics from the current iteration
+    """
+    return get_current().dumpkvs()
+
+
+def getkvs():
+    return get_current().name2val
+
+
+def log(*args, level=INFO):
+    """
+    Write the sequence of args, with no separators, to the console and output files (if you've configured an output file).
+    """
+    get_current().log(*args, level=level)
+
+
+def debug(*args):
+    log(*args, level=DEBUG)
+
+
+def info(*args):
+    log(*args, level=INFO)
+
+
+def warn(*args):
+    log(*args, level=WARN)
+
+
+def error(*args):
+    log(*args, level=ERROR)
+
+
+def set_level(level):
+    """
+    Set logging threshold on current logger.
+    """
+    get_current().set_level(level)
+
+
+def set_comm(comm):
+    get_current().set_comm(comm)
+
+
+def get_dir():
+    """
+    Get directory that log files are being written to.
+    will be None if there is no output directory (i.e., if you didn't call start)
+    """
+    return get_current().get_dir()
+
+
+record_tabular = logkv
+dump_tabular = dumpkvs
+
+
+@contextmanager
+def profile_kv(scopename):
+    logkey = "wait_" + scopename
+    tstart = time.time()
+    try:
+        yield
+    finally:
+        get_current().name2val[logkey] += time.time() - tstart
+
+
+def profile(n):
+    """
+    Usage:
+    @profile("my_func")
+    def my_func(): code
+    """
+
+    def decorator_with_name(func):
+        def func_wrapper(*args, **kwargs):
+            with profile_kv(n):
+                return func(*args, **kwargs)
+
+        return func_wrapper
+
+    return decorator_with_name
+
+
+# ================================================================
+# Backend
+# ================================================================
+
+
+def get_current():
+    if Logger.CURRENT is None:
+        _configure_default_logger()
+
+    return Logger.CURRENT
+
+
+class Logger(object):
+    DEFAULT = None  # A logger with no output files. (See right below class definition)
+    # So that you can still log to the terminal without setting up any output files
+    CURRENT = None  # Current logger being used by the free functions above
+
+    def __init__(self, dir, output_formats, comm=None):
+        self.name2val = defaultdict(float)  # values this iteration
+        self.name2cnt = defaultdict(int)
+        self.level = INFO
+        self.dir = dir
+        self.output_formats = output_formats
+        self.comm = comm
+
+    # Logging API, forwarded
+    # ----------------------------------------
+    def logkv(self, key, val):
+        self.name2val[key] = val
+
+    def logkv_mean(self, key, val):
+        oldval, cnt = self.name2val[key], self.name2cnt[key]
+        self.name2val[key] = oldval * cnt / (cnt + 1) + val / (cnt + 1)
+        self.name2cnt[key] = cnt + 1
+
+    def dumpkvs(self):
+        if self.comm is None:
+            d = self.name2val
+        else:
+            d = mpi_weighted_mean(
+                self.comm,
+                {
+                    name: (val, self.name2cnt.get(name, 1))
+                    for (name, val) in self.name2val.items()
+                },
+            )
+            if self.comm.rank != 0:
+                d["dummy"] = 1  # so we don't get a warning about empty dict
+        out = d.copy()  # Return the dict for unit testing purposes
+        for fmt in self.output_formats:
+            if isinstance(fmt, KVWriter):
+                fmt.writekvs(d)
+        self.name2val.clear()
+        self.name2cnt.clear()
+        return out
+
+    def log(self, *args, level=INFO):
+        if self.level <= level:
+            self._do_log(args)
+
+    # Configuration
+    # ----------------------------------------
+    def set_level(self, level):
+        self.level = level
+
+    def set_comm(self, comm):
+        self.comm = comm
+
+    def get_dir(self):
+        return self.dir
+
+    def close(self):
+        for fmt in self.output_formats:
+            fmt.close()
+
+    # Misc
+    # ----------------------------------------
+    def _do_log(self, args):
+        for fmt in self.output_formats:
+            if isinstance(fmt, SeqWriter):
+                fmt.writeseq(map(str, args))
+
+
+def get_rank_without_mpi_import():
+    # check environment variables here instead of importing mpi4py
+    # to avoid calling MPI_Init() when this module is imported
+    for varname in ["PMI_RANK", "OMPI_COMM_WORLD_RANK"]:
+        if varname in os.environ:
+            return int(os.environ[varname])
+    return 0
+
+
+def mpi_weighted_mean(comm, local_name2valcount):
+    """
+    Copied from: https://github.com/openai/baselines/blob/ea25b9e8b234e6ee1bca43083f8f3cf974143998/baselines/common/mpi_util.py#L110
+    Perform a weighted average over dicts that are each on a different node
+    Input: local_name2valcount: dict mapping key -> (value, count)
+    Returns: key -> mean
+    """
+    all_name2valcount = comm.gather(local_name2valcount)
+    if comm.rank == 0:
+        name2sum = defaultdict(float)
+        name2count = defaultdict(float)
+        for n2vc in all_name2valcount:
+            for (name, (val, count)) in n2vc.items():
+                try:
+                    val = float(val)
+                except ValueError:
+                    if comm.rank == 0:
+                        warnings.warn(
+                            "WARNING: tried to compute mean on non-float {}={}".format(
+                                name, val
+                            )
+                        )
+                else:
+                    name2sum[name] += val * count
+                    name2count[name] += count
+        return {name: name2sum[name] / name2count[name] for name in name2sum}
+    else:
+        return {}
+
+
+def configure(dir=None, format_strs=None, comm=None, log_suffix=""):
+    """
+    If comm is provided, average all numerical stats across that comm
+    """
+    if dir is None:
+        dir = os.getenv("OPENAI_LOGDIR")
+    if dir is None:
+        dir = osp.join(
+            tempfile.gettempdir(),
+            datetime.datetime.now().strftime("openai-%Y-%m-%d-%H-%M-%S-%f"),
+        )
+    assert isinstance(dir, str)
+    dir = os.path.expanduser(dir)
+    os.makedirs(os.path.expanduser(dir), exist_ok=True)
+
+    rank = get_rank_without_mpi_import()
+    if rank > 0:
+        log_suffix = log_suffix + "-rank%03i" % rank
+
+    if format_strs is None:
+        if rank == 0:
+            format_strs = os.getenv("OPENAI_LOG_FORMAT", "stdout,log,csv").split(",")
+        else:
+            format_strs = os.getenv("OPENAI_LOG_FORMAT_MPI", "log").split(",")
+    format_strs = filter(None, format_strs)
+    output_formats = [make_output_format(f, dir, log_suffix) for f in format_strs]
+
+    Logger.CURRENT = Logger(dir=dir, output_formats=output_formats, comm=comm)
+    if output_formats:
+        log("Logging to %s" % dir)
+
+
+def _configure_default_logger():
+    configure()
+    Logger.DEFAULT = Logger.CURRENT
+
+
+def reset():
+    if Logger.CURRENT is not Logger.DEFAULT:
+        Logger.CURRENT.close()
+        Logger.CURRENT = Logger.DEFAULT
+        log("Reset logger")
+
+
+@contextmanager
+def scoped_configure(dir=None, format_strs=None, comm=None):
+    prevlogger = Logger.CURRENT
+    configure(dir=dir, format_strs=format_strs, comm=comm)
+    try:
+        yield
+    finally:
+        Logger.CURRENT.close()
+        Logger.CURRENT = prevlogger
+
diff --git a/modules/commons/improved_diffusion/losses.py b/modules/commons/improved_diffusion/losses.py
new file mode 100644
index 0000000000000000000000000000000000000000..251e42e4f36a31bb5e1aeda874b3a45d722000a2
--- /dev/null
+++ b/modules/commons/improved_diffusion/losses.py
@@ -0,0 +1,77 @@
+"""
+Helpers for various likelihood-based losses. These are ported from the original
+Ho et al. diffusion models codebase:
+https://github.com/hojonathanho/diffusion/blob/1e0dceb3b3495bbe19116a5e1b3596cd0706c543/diffusion_tf/utils.py
+"""
+
+import numpy as np
+
+import torch as th
+
+
+def normal_kl(mean1, logvar1, mean2, logvar2):
+    """
+    Compute the KL divergence between two gaussians.
+
+    Shapes are automatically broadcasted, so batches can be compared to
+    scalars, among other use cases.
+    """
+    tensor = None
+    for obj in (mean1, logvar1, mean2, logvar2):
+        if isinstance(obj, th.Tensor):
+            tensor = obj
+            break
+    assert tensor is not None, "at least one argument must be a Tensor"
+
+    # Force variances to be Tensors. Broadcasting helps convert scalars to
+    # Tensors, but it does not work for th.exp().
+    logvar1, logvar2 = [
+        x if isinstance(x, th.Tensor) else th.tensor(x).to(tensor)
+        for x in (logvar1, logvar2)
+    ]
+
+    return 0.5 * (
+        -1.0
+        + logvar2
+        - logvar1
+        + th.exp(logvar1 - logvar2)
+        + ((mean1 - mean2) ** 2) * th.exp(-logvar2)
+    )
+
+
+def approx_standard_normal_cdf(x):
+    """
+    A fast approximation of the cumulative distribution function of the
+    standard normal.
+    """
+    return 0.5 * (1.0 + th.tanh(np.sqrt(2.0 / np.pi) * (x + 0.044715 * th.pow(x, 3))))
+
+
+def discretized_gaussian_log_likelihood(x, *, means, log_scales):
+    """
+    Compute the log-likelihood of a Gaussian distribution discretizing to a
+    given image.
+
+    :param x: the target images. It is assumed that this was uint8 values,
+              rescaled to the range [-1, 1].
+    :param means: the Gaussian mean Tensor.
+    :param log_scales: the Gaussian log stddev Tensor.
+    :return: a tensor like x of log probabilities (in nats).
+    """
+    assert x.shape == means.shape == log_scales.shape
+    centered_x = x - means
+    inv_stdv = th.exp(-log_scales)
+    plus_in = inv_stdv * (centered_x + 1.0 / 255.0)
+    cdf_plus = approx_standard_normal_cdf(plus_in)
+    min_in = inv_stdv * (centered_x - 1.0 / 255.0)
+    cdf_min = approx_standard_normal_cdf(min_in)
+    log_cdf_plus = th.log(cdf_plus.clamp(min=1e-12))
+    log_one_minus_cdf_min = th.log((1.0 - cdf_min).clamp(min=1e-12))
+    cdf_delta = cdf_plus - cdf_min
+    log_probs = th.where(
+        x < -0.999,
+        log_cdf_plus,
+        th.where(x > 0.999, log_one_minus_cdf_min, th.log(cdf_delta.clamp(min=1e-12))),
+    )
+    assert log_probs.shape == x.shape
+    return log_probs
diff --git a/modules/commons/improved_diffusion/nn.py b/modules/commons/improved_diffusion/nn.py
new file mode 100644
index 0000000000000000000000000000000000000000..a4cd59c2324b003626b8cf4c7581effd334908d3
--- /dev/null
+++ b/modules/commons/improved_diffusion/nn.py
@@ -0,0 +1,170 @@
+"""
+Various utilities for neural networks.
+"""
+
+import math
+
+import torch as th
+import torch.nn as nn
+
+
+# PyTorch 1.7 has SiLU, but we support PyTorch 1.5.
+class SiLU(nn.Module):
+    def forward(self, x):
+        return x * th.sigmoid(x)
+
+
+class GroupNorm32(nn.GroupNorm):
+    def forward(self, x):
+        return super().forward(x.float()).type(x.dtype)
+
+
+def conv_nd(dims, *args, **kwargs):
+    """
+    Create a 1D, 2D, or 3D convolution module.
+    """
+    if dims == 1:
+        return nn.Conv1d(*args, **kwargs)
+    elif dims == 2:
+        return nn.Conv2d(*args, **kwargs)
+    elif dims == 3:
+        return nn.Conv3d(*args, **kwargs)
+    raise ValueError(f"unsupported dimensions: {dims}")
+
+
+def linear(*args, **kwargs):
+    """
+    Create a linear module.
+    """
+    return nn.Linear(*args, **kwargs)
+
+
+def avg_pool_nd(dims, *args, **kwargs):
+    """
+    Create a 1D, 2D, or 3D average pooling module.
+    """
+    if dims == 1:
+        return nn.AvgPool1d(*args, **kwargs)
+    elif dims == 2:
+        return nn.AvgPool2d(*args, **kwargs)
+    elif dims == 3:
+        return nn.AvgPool3d(*args, **kwargs)
+    raise ValueError(f"unsupported dimensions: {dims}")
+
+
+def update_ema(target_params, source_params, rate=0.99):
+    """
+    Update target parameters to be closer to those of source parameters using
+    an exponential moving average.
+
+    :param target_params: the target parameter sequence.
+    :param source_params: the source parameter sequence.
+    :param rate: the EMA rate (closer to 1 means slower).
+    """
+    for targ, src in zip(target_params, source_params):
+        targ.detach().mul_(rate).add_(src, alpha=1 - rate)
+
+
+def zero_module(module):
+    """
+    Zero out the parameters of a module and return it.
+    """
+    for p in module.parameters():
+        p.detach().zero_()
+    return module
+
+
+def scale_module(module, scale):
+    """
+    Scale the parameters of a module and return it.
+    """
+    for p in module.parameters():
+        p.detach().mul_(scale)
+    return module
+
+
+def mean_flat(tensor):
+    """
+    Take the mean over all non-batch dimensions.
+    """
+    return tensor.mean(dim=list(range(1, len(tensor.shape))))
+
+
+def normalization(channels):
+    """
+    Make a standard normalization layer.
+
+    :param channels: number of input channels.
+    :return: an nn.Module for normalization.
+    """
+    return GroupNorm32(32, channels)
+
+
+def timestep_embedding(timesteps, dim, max_period=10000):
+    """
+    Create sinusoidal timestep embeddings.
+
+    :param timesteps: a 1-D Tensor of N indices, one per batch element.
+                      These may be fractional.
+    :param dim: the dimension of the output.
+    :param max_period: controls the minimum frequency of the embeddings.
+    :return: an [N x dim] Tensor of positional embeddings.
+    """
+    half = dim // 2
+    freqs = th.exp(
+        -math.log(max_period) * th.arange(start=0, end=half, dtype=th.float32) / half
+    ).to(device=timesteps.device)
+    args = timesteps[:, None].float() * freqs[None]
+    embedding = th.cat([th.cos(args), th.sin(args)], dim=-1)
+    if dim % 2:
+        embedding = th.cat([embedding, th.zeros_like(embedding[:, :1])], dim=-1)
+    return embedding
+
+
+def checkpoint(func, inputs, params, flag):
+    """
+    Evaluate a function without caching intermediate activations, allowing for
+    reduced memory at the expense of extra compute in the backward pass.
+
+    :param func: the function to evaluate.
+    :param inputs: the argument sequence to pass to `func`.
+    :param params: a sequence of parameters `func` depends on but does not
+                   explicitly take as arguments.
+    :param flag: if False, disable gradient checkpointing.
+    """
+    if flag:
+        args = tuple(inputs) + tuple(params)
+        return CheckpointFunction.apply(func, len(inputs), *args)
+    else:
+        return func(*inputs)
+
+
+class CheckpointFunction(th.autograd.Function):
+    @staticmethod
+    def forward(ctx, run_function, length, *args):
+        ctx.run_function = run_function
+        ctx.input_tensors = list(args[:length])
+        ctx.input_params = list(args[length:])
+        with th.no_grad():
+            output_tensors = ctx.run_function(*ctx.input_tensors)
+        return output_tensors
+
+    @staticmethod
+    def backward(ctx, *output_grads):
+        ctx.input_tensors = [x.detach().requires_grad_(True) for x in ctx.input_tensors]
+        with th.enable_grad():
+            # Fixes a bug where the first op in run_function modifies the
+            # Tensor storage in place, which is not allowed for detach()'d
+            # Tensors.
+            shallow_copies = [x.view_as(x) for x in ctx.input_tensors]
+            output_tensors = ctx.run_function(*shallow_copies)
+        input_grads = th.autograd.grad(
+            output_tensors,
+            ctx.input_tensors + ctx.input_params,
+            output_grads,
+            allow_unused=True,
+        )
+        del ctx.input_tensors
+        del ctx.input_params
+        del output_tensors
+        return (None, None) + input_grads
diff --git a/modules/commons/improved_diffusion/resample.py b/modules/commons/improved_diffusion/resample.py
new file mode 100644
index 0000000000000000000000000000000000000000..c82eccdcd47c468d41e7cbe02de6a731f2c9bf81
--- /dev/null
+++ b/modules/commons/improved_diffusion/resample.py
@@ -0,0 +1,154 @@
+from abc import ABC, abstractmethod
+
+import numpy as np
+import torch as th
+import torch.distributed as dist
+
+
+def create_named_schedule_sampler(name, diffusion):
+    """
+    Create a ScheduleSampler from a library of pre-defined samplers.
+
+    :param name: the name of the sampler.
+    :param diffusion: the diffusion object to sample for.
+    """
+    if name == "uniform":
+        return UniformSampler(diffusion)
+    elif name == "loss-second-moment":
+        return LossSecondMomentResampler(diffusion)
+    else:
+        raise NotImplementedError(f"unknown schedule sampler: {name}")
+
+
+class ScheduleSampler(ABC):
+    """
+    A distribution over timesteps in the diffusion process, intended to reduce
+    variance of the objective.
+
+    By default, samplers perform unbiased importance sampling, in which the
+    objective's mean is unchanged.
+    However, subclasses may override sample() to change how the resampled
+    terms are reweighted, allowing for actual changes in the objective.
+    """
+
+    @abstractmethod
+    def weights(self):
+        """
+        Get a numpy array of weights, one per diffusion step.
+
+        The weights needn't be normalized, but must be positive.
+        """
+
+    def sample(self, batch_size, device):
+        """
+        Importance-sample timesteps for a batch.
+
+        :param batch_size: the number of timesteps.
+        :param device: the torch device to save to.
+        :return: a tuple (timesteps, weights):
+                 - timesteps: a tensor of timestep indices.
+                 - weights: a tensor of weights to scale the resulting losses.
+        """
+        w = self.weights()
+        p = w / np.sum(w)
+        indices_np = np.random.choice(len(p), size=(batch_size,), p=p)
+        indices = th.from_numpy(indices_np).long().to(device)
+        weights_np = 1 / (len(p) * p[indices_np])
+        weights = th.from_numpy(weights_np).float().to(device)
+        return indices, weights
+
+
+class UniformSampler(ScheduleSampler):
+    def __init__(self, diffusion):
+        self.diffusion = diffusion
+        self._weights = np.ones([diffusion.num_timesteps])
+
+    def weights(self):
+        return self._weights
+
+
+class LossAwareSampler(ScheduleSampler):
+    def update_with_local_losses(self, local_ts, local_losses):
+        """
+        Update the reweighting using losses from a model.
+
+        Call this method from each rank with a batch of timesteps and the
+        corresponding losses for each of those timesteps.
+        This method will perform synchronization to make sure all of the ranks
+        maintain the exact same reweighting.
+
+        :param local_ts: an integer Tensor of timesteps.
+        :param local_losses: a 1D Tensor of losses.
+        """
+        batch_sizes = [
+            th.tensor([0], dtype=th.int32, device=local_ts.device)
+            for _ in range(dist.get_world_size())
+        ]
+        dist.all_gather(
+            batch_sizes,
+            th.tensor([len(local_ts)], dtype=th.int32, device=local_ts.device),
+        )
+
+        # Pad all_gather batches to be the maximum batch size.
+        batch_sizes = [x.item() for x in batch_sizes]
+        max_bs = max(batch_sizes)
+
+        timestep_batches = [th.zeros(max_bs).to(local_ts) for bs in batch_sizes]
+        loss_batches = [th.zeros(max_bs).to(local_losses) for bs in batch_sizes]
+        dist.all_gather(timestep_batches, local_ts)
+        dist.all_gather(loss_batches, local_losses)
+        timesteps = [
+            x.item() for y, bs in zip(timestep_batches, batch_sizes) for x in y[:bs]
+        ]
+        losses = [x.item() for y, bs in zip(loss_batches, batch_sizes) for x in y[:bs]]
+        self.update_with_all_losses(timesteps, losses)
+
+    @abstractmethod
+    def update_with_all_losses(self, ts, losses):
+        """
+        Update the reweighting using losses from a model.
+
+        Sub-classes should override this method to update the reweighting
+        using losses from the model.
+
+        This method directly updates the reweighting without synchronizing
+        between workers. It is called by update_with_local_losses from all
+        ranks with identical arguments. Thus, it should have deterministic
+        behavior to maintain state across workers.
+
+        :param ts: a list of int timesteps.
+        :param losses: a list of float losses, one per timestep.
+        """
+
+
+class LossSecondMomentResampler(LossAwareSampler):
+    def __init__(self, diffusion, history_per_term=10, uniform_prob=0.001):
+        self.diffusion = diffusion
+        self.history_per_term = history_per_term
+        self.uniform_prob = uniform_prob
+        self._loss_history = np.zeros(
+            [diffusion.num_timesteps, history_per_term], dtype=np.float64
+        )
+        self._loss_counts = np.zeros([diffusion.num_timesteps], dtype=np.int)
+
+    def weights(self):
+        if not self._warmed_up():
+            return np.ones([self.diffusion.num_timesteps], dtype=np.float64)
+        weights = np.sqrt(np.mean(self._loss_history ** 2, axis=-1))
+        weights /= np.sum(weights)
+        weights *= 1 - self.uniform_prob
+        weights += self.uniform_prob / len(weights)
+        return weights
+
+    def update_with_all_losses(self, ts, losses):
+        for t, loss in zip(ts, losses):
+            if self._loss_counts[t] == self.history_per_term:
+                # Shift out the oldest loss term.
+                self._loss_history[t, :-1] = self._loss_history[t, 1:]
+                self._loss_history[t, -1] = loss
+            else:
+                self._loss_history[t, self._loss_counts[t]] = loss
+                self._loss_counts[t] += 1
+
+    def _warmed_up(self):
+        return (self._loss_counts == self.history_per_term).all()
diff --git a/modules/commons/improved_diffusion/respace.py b/modules/commons/improved_diffusion/respace.py
new file mode 100644
index 0000000000000000000000000000000000000000..045d58df956e6ddb04216e972bffff47c59bf488
--- /dev/null
+++ b/modules/commons/improved_diffusion/respace.py
@@ -0,0 +1,122 @@
+import numpy as np
+import torch as th
+
+from .gaussian_diffusion import GaussianDiffusion
+
+
+def space_timesteps(num_timesteps, section_counts):
+    """
+    Create a list of timesteps to use from an original diffusion process,
+    given the number of timesteps we want to take from equally-sized portions
+    of the original process.
+
+    For example, if there's 300 timesteps and the section counts are [10,15,20]
+    then the first 100 timesteps are strided to be 10 timesteps, the second 100
+    are strided to be 15 timesteps, and the final 100 are strided to be 20.
+
+    If the stride is a string starting with "ddim", then the fixed striding
+    from the DDIM paper is used, and only one section is allowed.
+
+    :param num_timesteps: the number of diffusion steps in the original
+                          process to divide up.
+    :param section_counts: either a list of numbers, or a string containing
+                           comma-separated numbers, indicating the step count
+                           per section. As a special case, use "ddimN" where N
+                           is a number of steps to use the striding from the
+                           DDIM paper.
+    :return: a set of diffusion steps from the original process to use.
+    """
+    if isinstance(section_counts, str):
+        if section_counts.startswith("ddim"):
+            desired_count = int(section_counts[len("ddim") :])
+            for i in range(1, num_timesteps):
+                if len(range(0, num_timesteps, i)) == desired_count:
+                    return set(range(0, num_timesteps, i))
+            raise ValueError(
+                f"cannot create exactly {num_timesteps} steps with an integer stride"
+            )
+        section_counts = [int(x) for x in section_counts.split(",")]
+    size_per = num_timesteps // len(section_counts)
+    extra = num_timesteps % len(section_counts)
+    start_idx = 0
+    all_steps = []
+    for i, section_count in enumerate(section_counts):
+        size = size_per + (1 if i < extra else 0)
+        if size < section_count:
+            raise ValueError(
+                f"cannot divide section of {size} steps into {section_count}"
+            )
+        if section_count <= 1:
+            frac_stride = 1
+        else:
+            frac_stride = (size - 1) / (section_count - 1)
+        cur_idx = 0.0
+        taken_steps = []
+        for _ in range(section_count):
+            taken_steps.append(start_idx + round(cur_idx))
+            cur_idx += frac_stride
+        all_steps += taken_steps
+        start_idx += size
+    return set(all_steps)
+
+
+class SpacedDiffusion(GaussianDiffusion):
+    """
+    A diffusion process which can skip steps in a base diffusion process.
+
+    :param use_timesteps: a collection (sequence or set) of timesteps from the
+                          original diffusion process to retain.
+    :param kwargs: the kwargs to create the base diffusion process.
+    """
+
+    def __init__(self, use_timesteps, **kwargs):
+        self.use_timesteps = set(use_timesteps)
+        self.timestep_map = []
+        self.original_num_steps = len(kwargs["betas"])
+
+        base_diffusion = GaussianDiffusion(**kwargs)  # pylint: disable=missing-kwoa
+        last_alpha_cumprod = 1.0
+        new_betas = []
+        for i, alpha_cumprod in enumerate(base_diffusion.alphas_cumprod):
+            if i in self.use_timesteps:
+                new_betas.append(1 - alpha_cumprod / last_alpha_cumprod)
+                last_alpha_cumprod = alpha_cumprod
+                self.timestep_map.append(i)
+        kwargs["betas"] = np.array(new_betas)
+        super().__init__(**kwargs)
+
+    def p_mean_variance(
+        self, model, *args, **kwargs
+    ):  # pylint: disable=signature-differs
+        return super().p_mean_variance(self._wrap_model(model), *args, **kwargs)
+
+    def training_losses(
+        self, model, *args, **kwargs
+    ):  # pylint: disable=signature-differs
+        return super().training_losses(self._wrap_model(model), *args, **kwargs)
+
+    def _wrap_model(self, model):
+        if isinstance(model, _WrappedModel):
+            return model
+        return _WrappedModel(
+            model, self.timestep_map, self.rescale_timesteps, self.original_num_steps
+        )
+
+    def _scale_timesteps(self, t):
+        # Scaling is done by the wrapped model.
+        return t
+
+
+class _WrappedModel:
+    def __init__(self, model, timestep_map, rescale_timesteps, original_num_steps):
+        self.model = model
+        self.timestep_map = timestep_map
+        self.rescale_timesteps = rescale_timesteps
+        self.original_num_steps = original_num_steps
+
+    def __call__(self, x, ts, **kwargs):
+        map_tensor = th.tensor(self.timestep_map, device=ts.device, dtype=ts.dtype)
+        new_ts = map_tensor[ts]
+        if self.rescale_timesteps:
+            new_ts = new_ts.float() * (1000.0 / self.original_num_steps)
+        return self.model(x, new_ts, **kwargs)
diff --git a/modules/commons/improved_diffusion/train_util.py b/modules/commons/improved_diffusion/train_util.py
new file mode 100644
index 0000000000000000000000000000000000000000..1867604145736352dc51ab05b6caae8b541a6ebb
--- /dev/null
+++ b/modules/commons/improved_diffusion/train_util.py
@@ -0,0 +1,356 @@
+import copy
+import functools
+import os
+
+import blobfile as bf
+import numpy as np
+import torch as th
+import torch.distributed as dist
+from torch.nn.parallel.distributed import DistributedDataParallel as DDP
+from torch.optim import AdamW
+
+from . import dist_util, logger
+from .fp16_util import (
+    make_master_params,
+    master_params_to_model_params,
+    model_grads_to_master_grads,
+    unflatten_master_params,
+    zero_grad,
+)
+from .nn import update_ema
+from .resample import LossAwareSampler, UniformSampler
+
+# For ImageNet experiments, this was a good default value.
+# We found that the lg_loss_scale quickly climbed to
+# 20-21 within the first ~1K steps of training.
+INITIAL_LOG_LOSS_SCALE = 20.0
+
+
+class TrainLoop:
+    def __init__(
+        self,
+        *,
+        model,
+        diffusion,
+        data,
+        batch_size,
+        microbatch,
+        lr,
+        ema_rate,
+        log_interval,
+        save_interval,
+        resume_checkpoint,
+        use_fp16=False,
+        fp16_scale_growth=1e-3,
+        schedule_sampler=None,
+        weight_decay=0.0,
+        lr_anneal_steps=0,
+    ):
+        self.model = model
+        self.diffusion = diffusion
+        self.data = data
+        self.batch_size = batch_size
+        self.microbatch = microbatch if microbatch > 0 else batch_size
+        self.lr = lr
+        self.ema_rate = (
+            [ema_rate]
+            if isinstance(ema_rate, float)
+            else [float(x) for x in ema_rate.split(",")]
+        )
+        self.log_interval = log_interval
+        self.save_interval = save_interval
+        self.resume_checkpoint = resume_checkpoint
+        self.use_fp16 = use_fp16
+        self.fp16_scale_growth = fp16_scale_growth
+        self.schedule_sampler = schedule_sampler or UniformSampler(diffusion)
+        self.weight_decay = weight_decay
+        self.lr_anneal_steps = lr_anneal_steps
+
+        self.step = 0
+        self.resume_step = 0
+        self.global_batch = self.batch_size * dist.get_world_size()
+
+        self.model_params = list(self.model.parameters())
+        self.master_params = self.model_params
+        self.lg_loss_scale = INITIAL_LOG_LOSS_SCALE
+        self.sync_cuda = th.cuda.is_available()
+
+        self._load_and_sync_parameters()
+        if self.use_fp16:
+            self._setup_fp16()
+
+        self.opt = AdamW(self.master_params, lr=self.lr, weight_decay=self.weight_decay)
+        if self.resume_step:
+            self._load_optimizer_state()
+            # Model was resumed, either due to a restart or a checkpoint
+            # being specified at the command line.
+            self.ema_params = [
+                self._load_ema_parameters(rate) for rate in self.ema_rate
+            ]
+        else:
+            self.ema_params = [
+                copy.deepcopy(self.master_params) for _ in range(len(self.ema_rate))
+            ]
+
+        if th.cuda.is_available():
+            self.use_ddp = True
+            self.ddp_model = DDP(
+                self.model,
+                device_ids=[dist_util.dev()],
+                output_device=dist_util.dev(),
+                broadcast_buffers=False,
+                bucket_cap_mb=128,
+                find_unused_parameters=False,
+            )
+        else:
+            if dist.get_world_size() > 1:
+                logger.warn(
+                    "Distributed training requires CUDA. "
+                    "Gradients will not be synchronized properly!"
+                )
+            self.use_ddp = False
+            self.ddp_model = self.model
+
+    def _load_and_sync_parameters(self):
+        resume_checkpoint = find_resume_checkpoint() or self.resume_checkpoint
+
+        if resume_checkpoint:
+            self.resume_step = parse_resume_step_from_filename(resume_checkpoint)
+            if dist.get_rank() == 0:
+                logger.log(f"loading model from checkpoint: {resume_checkpoint}...")
+                self.model.load_state_dict(
+                    dist_util.load_state_dict(
+                        resume_checkpoint, map_location=dist_util.dev()
+                    )
+                )
+
+        dist_util.sync_params(self.model.parameters())
+
+    def _load_ema_parameters(self, rate):
+        ema_params = copy.deepcopy(self.master_params)
+
+        main_checkpoint = find_resume_checkpoint() or self.resume_checkpoint
+        ema_checkpoint = find_ema_checkpoint(main_checkpoint, self.resume_step, rate)
+        if ema_checkpoint:
+            if dist.get_rank() == 0:
+                logger.log(f"loading EMA from checkpoint: {ema_checkpoint}...")
+                state_dict = dist_util.load_state_dict(
+                    ema_checkpoint, map_location=dist_util.dev()
+                )
+                ema_params = self._state_dict_to_master_params(state_dict)
+
+        dist_util.sync_params(ema_params)
+        return ema_params
+
+    def _load_optimizer_state(self):
+        main_checkpoint = find_resume_checkpoint() or self.resume_checkpoint
+        opt_checkpoint = bf.join(
+            bf.dirname(main_checkpoint), f"opt{self.resume_step:06}.pt"
+        )
+        if bf.exists(opt_checkpoint):
+            logger.log(f"loading optimizer state from checkpoint: {opt_checkpoint}")
+            state_dict = dist_util.load_state_dict(
+                opt_checkpoint, map_location=dist_util.dev()
+            )
+            self.opt.load_state_dict(state_dict)
+
+    def _setup_fp16(self):
+        self.master_params = make_master_params(self.model_params)
+        self.model.convert_to_fp16()
+
+    def run_loop(self):
+        while (
+            not self.lr_anneal_steps
+            or self.step + self.resume_step < self.lr_anneal_steps
+        ):
+            batch, cond = next(self.data)
+            self.run_step(batch, cond)
+            if self.step % self.log_interval == 0:
+                logger.dumpkvs()
+            if self.step % self.save_interval == 0:
+                self.save()
+                # Run for a finite amount of time in integration tests.
+                if os.environ.get("DIFFUSION_TRAINING_TEST", "") and self.step > 0:
+                    return
+            self.step += 1
+        # Save the last checkpoint if it wasn't already saved.
+        if (self.step - 1) % self.save_interval != 0:
+            self.save()
+
+    def run_step(self, batch, cond):
+        self.forward_backward(batch, cond)
+        if self.use_fp16:
+            self.optimize_fp16()
+        else:
+            self.optimize_normal()
+        self.log_step()
+
+    def forward_backward(self, batch, cond):
+        zero_grad(self.model_params)
+        for i in range(0, batch.shape[0], self.microbatch):
+            micro = batch[i : i + self.microbatch].to(dist_util.dev())
+            micro_cond = {
+                k: v[i : i + self.microbatch].to(dist_util.dev())
+                for k, v in cond.items()
+            }
+            last_batch = (i + self.microbatch) >= batch.shape[0]
+            t, weights = self.schedule_sampler.sample(micro.shape[0], dist_util.dev())
+
+            compute_losses = functools.partial(
+                self.diffusion.training_losses,
+                self.ddp_model,
+                micro,
+                t,
+                model_kwargs=micro_cond,
+            )
+
+            if last_batch or not self.use_ddp:
+                losses = compute_losses()
+            else:
+                with self.ddp_model.no_sync():
+                    losses = compute_losses()
+
+            if isinstance(self.schedule_sampler, LossAwareSampler):
+                self.schedule_sampler.update_with_local_losses(
+                    t, losses["loss"].detach()
+                )
+
+            loss = (losses["loss"] * weights).mean()
+            log_loss_dict(
+                self.diffusion, t, {k: v * weights for k, v in losses.items()}
+            )
+            if self.use_fp16:
+                loss_scale = 2 ** self.lg_loss_scale
+                (loss * loss_scale).backward()
+            else:
+                loss.backward()
+
+    def optimize_fp16(self):
+        if any(not th.isfinite(p.grad).all() for p in self.model_params):
+            self.lg_loss_scale -= 1
+            logger.log(f"Found NaN, decreased lg_loss_scale to {self.lg_loss_scale}")
+            return
+
+        model_grads_to_master_grads(self.model_params, self.master_params)
+        self.master_params[0].grad.mul_(1.0 / (2 ** self.lg_loss_scale))
+        self._log_grad_norm()
+        self._anneal_lr()
+        self.opt.step()
+        for rate, params in zip(self.ema_rate, self.ema_params):
+            update_ema(params, self.master_params, rate=rate)
+        master_params_to_model_params(self.model_params, self.master_params)
+        self.lg_loss_scale += self.fp16_scale_growth
+
+    def optimize_normal(self):
+        self._log_grad_norm()
+        self._anneal_lr()
+        self.opt.step()
+        for rate, params in zip(self.ema_rate, self.ema_params):
+            update_ema(params, self.master_params, rate=rate)
+
+    def _log_grad_norm(self):
+        sqsum = 0.0
+        for p in self.master_params:
+            sqsum += (p.grad ** 2).sum().item()
+        logger.logkv_mean("grad_norm", np.sqrt(sqsum))
+
+    def _anneal_lr(self):
+        if not self.lr_anneal_steps:
+            return
+        frac_done = (self.step + self.resume_step) / self.lr_anneal_steps
+        lr = self.lr * (1 - frac_done)
+        for param_group in self.opt.param_groups:
+            param_group["lr"] = lr
+
+    def log_step(self):
+        logger.logkv("step", self.step + self.resume_step)
+        logger.logkv("samples", (self.step + self.resume_step + 1) * self.global_batch)
+        if self.use_fp16:
+            logger.logkv("lg_loss_scale", self.lg_loss_scale)
+
+    def save(self):
+        def save_checkpoint(rate, params):
+            state_dict = self._master_params_to_state_dict(params)
+            if dist.get_rank() == 0:
+                logger.log(f"saving model {rate}...")
+                if not rate:
+                    filename = f"model{(self.step+self.resume_step):06d}.pt"
+                else:
+                    filename = f"ema_{rate}_{(self.step+self.resume_step):06d}.pt"
+                with bf.BlobFile(bf.join(get_blob_logdir(), filename), "wb") as f:
+                    th.save(state_dict, f)
+
+        save_checkpoint(0, self.master_params)
+        for rate, params in zip(self.ema_rate, self.ema_params):
+            save_checkpoint(rate, params)
+
+        if dist.get_rank() == 0:
+            with bf.BlobFile(
+                bf.join(get_blob_logdir(), f"opt{(self.step+self.resume_step):06d}.pt"),
+                "wb",
+            ) as f:
+                th.save(self.opt.state_dict(), f)
+
+        dist.barrier()
+
+    def _master_params_to_state_dict(self, master_params):
+        if self.use_fp16:
+            master_params = unflatten_master_params(
+                self.model.parameters(), master_params
+            )
+        state_dict = self.model.state_dict()
+        for i, (name, _value) in enumerate(self.model.named_parameters()):
+            assert name in state_dict
+            state_dict[name] = master_params[i]
+        return state_dict
+
+    def _state_dict_to_master_params(self, state_dict):
+        params = [state_dict[name] for name, _ in self.model.named_parameters()]
+        if self.use_fp16:
+            return make_master_params(params)
+        else:
+            return params
+
+
+def parse_resume_step_from_filename(filename):
+    """
+    Parse filenames of the form path/to/modelNNNNNN.pt, where NNNNNN is the
+    checkpoint's number of steps.
+    """
+    split = filename.split("model")
+    if len(split) < 2:
+        return 0
+    split1 = split[-1].split(".")[0]
+    try:
+        return int(split1)
+    except ValueError:
+        return 0
+
+
+def get_blob_logdir():
+    return os.environ.get("DIFFUSION_BLOB_LOGDIR", logger.get_dir())
+
+
+def find_resume_checkpoint():
+    # On your infrastructure, you may want to override this to automatically
+    # discover the latest checkpoint on your blob storage, etc.
+    return None
+
+
+def find_ema_checkpoint(main_checkpoint, step, rate):
+    if main_checkpoint is None:
+        return None
+    filename = f"ema_{rate}_{(step):06d}.pt"
+    path = bf.join(bf.dirname(main_checkpoint), filename)
+    if bf.exists(path):
+        return path
+    return None
+
+
+def log_loss_dict(diffusion, ts, losses):
+    for key, values in losses.items():
+        logger.logkv_mean(key, values.mean().item())
+        # Log the quantiles (four quartiles, in particular).
+        for sub_t, sub_loss in zip(ts.cpu().numpy(), values.detach().cpu().numpy()):
+            quartile = int(4 * sub_t / diffusion.num_timesteps)
+            logger.logkv_mean(f"{key}_q{quartile}", sub_loss)
diff --git a/modules/commons/layers.py b/modules/commons/layers.py
new file mode 100644
index 0000000000000000000000000000000000000000..88e1c75876050fa05a768a5ae0467fdfc05bb006
--- /dev/null
+++ b/modules/commons/layers.py
@@ -0,0 +1,50 @@
+import torch
+from torch import nn
+
+
+class LayerNorm(torch.nn.LayerNorm):
+    """Layer normalization module.
+    :param int nout: output dim size
+    :param int dim: dimension to be normalized
+    """
+
+    def __init__(self, nout, dim=-1, eps=1e-5):
+        """Construct an LayerNorm object."""
+        super(LayerNorm, self).__init__(nout, eps=eps)
+        self.dim = dim
+
+    def forward(self, x):
+        """Apply layer normalization.
+        :param torch.Tensor x: input tensor
+        :return: layer normalized tensor
+        :rtype torch.Tensor
+        """
+        if self.dim == -1:
+            return super(LayerNorm, self).forward(x)
+        return super(LayerNorm, self).forward(x.transpose(1, -1)).transpose(1, -1)
+
+
+class Reshape(nn.Module):
+    def __init__(self, *args):
+        super(Reshape, self).__init__()
+        self.shape = args
+
+    def forward(self, x):
+        return x.view(self.shape)
+
+
+class Permute(nn.Module):
+    def __init__(self, *args):
+        super(Permute, self).__init__()
+        self.args = args
+
+    def forward(self, x):
+        return x.permute(self.args)
+
+
+def Embedding(num_embeddings, embedding_dim, padding_idx=None):
+    m = nn.Embedding(num_embeddings, embedding_dim, padding_idx=padding_idx)
+    nn.init.normal_(m.weight, mean=0, std=embedding_dim ** -0.5)
+    if padding_idx is not None:
+        nn.init.constant_(m.weight[padding_idx], 0)
+    return m
diff --git a/modules/commons/normalizing_flow/glow_modules.py b/modules/commons/normalizing_flow/glow_modules.py
new file mode 100644
index 0000000000000000000000000000000000000000..c589af0f2eba2b154317912f9ad01a4163b3fd6a
--- /dev/null
+++ b/modules/commons/normalizing_flow/glow_modules.py
@@ -0,0 +1,362 @@
+import scipy
+from torch.nn import functional as F
+import torch
+from torch import nn
+import numpy as np
+from modules.commons.wavenet import WN
+from modules.tts.glow import utils
+
+
+class ActNorm(nn.Module):
+    def __init__(self, channels, ddi=False, **kwargs):
+        super().__init__()
+        self.channels = channels
+        self.initialized = not ddi
+
+        self.logs = nn.Parameter(torch.zeros(1, channels, 1))
+        self.bias = nn.Parameter(torch.zeros(1, channels, 1))
+
+    def forward(self, x, x_mask=None, reverse=False, **kwargs):
+        if x_mask is None:
+            x_mask = torch.ones(x.size(0), 1, x.size(2)).to(device=x.device, dtype=x.dtype)
+        x_len = torch.sum(x_mask, [1, 2])
+        if not self.initialized:
+            self.initialize(x, x_mask)
+            self.initialized = True
+
+        if reverse:
+            z = (x - self.bias) * torch.exp(-self.logs) * x_mask
+            logdet = torch.sum(-self.logs) * x_len
+        else:
+            z = (self.bias + torch.exp(self.logs) * x) * x_mask
+            logdet = torch.sum(self.logs) * x_len  # [b]
+        return z, logdet
+
+    def store_inverse(self):
+        pass
+
+    def set_ddi(self, ddi):
+        self.initialized = not ddi
+
+    def initialize(self, x, x_mask):
+        with torch.no_grad():
+            denom = torch.sum(x_mask, [0, 2])
+            m = torch.sum(x * x_mask, [0, 2]) / denom
+            m_sq = torch.sum(x * x * x_mask, [0, 2]) / denom
+            v = m_sq - (m ** 2)
+            logs = 0.5 * torch.log(torch.clamp_min(v, 1e-6))
+
+            bias_init = (-m * torch.exp(-logs)).view(*self.bias.shape).to(dtype=self.bias.dtype)
+            logs_init = (-logs).view(*self.logs.shape).to(dtype=self.logs.dtype)
+
+            self.bias.data.copy_(bias_init)
+            self.logs.data.copy_(logs_init)
+
+
+class InvConvNear(nn.Module):
+    def __init__(self, channels, n_split=4, no_jacobian=False, lu=True, n_sqz=2, **kwargs):
+        super().__init__()
+        assert (n_split % 2 == 0)
+        self.channels = channels
+        self.n_split = n_split
+        self.n_sqz = n_sqz
+        self.no_jacobian = no_jacobian
+
+        w_init = torch.qr(torch.FloatTensor(self.n_split, self.n_split).normal_())[0]
+        if torch.det(w_init) < 0:
+            w_init[:, 0] = -1 * w_init[:, 0]
+        self.lu = lu
+        if lu:
+            # LU decomposition can slightly speed up the inverse
+            np_p, np_l, np_u = scipy.linalg.lu(w_init)
+            np_s = np.diag(np_u)
+            np_sign_s = np.sign(np_s)
+            np_log_s = np.log(np.abs(np_s))
+            np_u = np.triu(np_u, k=1)
+            l_mask = np.tril(np.ones(w_init.shape, dtype=float), -1)
+            eye = np.eye(*w_init.shape, dtype=float)
+
+            self.register_buffer('p', torch.Tensor(np_p.astype(float)))
+            self.register_buffer('sign_s', torch.Tensor(np_sign_s.astype(float)))
+            self.l = nn.Parameter(torch.Tensor(np_l.astype(float)), requires_grad=True)
+            self.log_s = nn.Parameter(torch.Tensor(np_log_s.astype(float)), requires_grad=True)
+            self.u = nn.Parameter(torch.Tensor(np_u.astype(float)), requires_grad=True)
+            self.register_buffer('l_mask', torch.Tensor(l_mask))
+            self.register_buffer('eye', torch.Tensor(eye))
+        else:
+            self.weight = nn.Parameter(w_init)
+
+    def forward(self, x, x_mask=None, reverse=False, **kwargs):
+        b, c, t = x.size()
+        assert (c % self.n_split == 0)
+        if x_mask is None:
+            x_mask = 1
+            x_len = torch.ones((b,), dtype=x.dtype, device=x.device) * t
+        else:
+            x_len = torch.sum(x_mask, [1, 2])
+
+        x = x.view(b, self.n_sqz, c // self.n_split, self.n_split // self.n_sqz, t)
+        x = x.permute(0, 1, 3, 2, 4).contiguous().view(b, self.n_split, c // self.n_split, t)
+
+        if self.lu:
+            self.weight, log_s = self._get_weight()
+            logdet = log_s.sum()
+            logdet = logdet * (c / self.n_split) * x_len
+        else:
+            logdet = torch.logdet(self.weight) * (c / self.n_split) * x_len  # [b]
+
+        if reverse:
+            if hasattr(self, "weight_inv"):
+                weight = self.weight_inv
+            else:
+                weight = torch.inverse(self.weight.float()).to(dtype=self.weight.dtype)
+            logdet = -logdet
+        else:
+            weight = self.weight
+            if self.no_jacobian:
+                logdet = 0
+
+        weight = weight.view(self.n_split, self.n_split, 1, 1)
+        z = F.conv2d(x, weight)
+
+        z = z.view(b, self.n_sqz, self.n_split // self.n_sqz, c // self.n_split, t)
+        z = z.permute(0, 1, 3, 2, 4).contiguous().view(b, c, t) * x_mask
+        return z, logdet
+
+    def _get_weight(self):
+        l, log_s, u = self.l, self.log_s, self.u
+        l = l * self.l_mask + self.eye
+        u = u * self.l_mask.transpose(0, 1).contiguous() + torch.diag(self.sign_s * torch.exp(log_s))
+        weight = torch.matmul(self.p, torch.matmul(l, u))
+        return weight, log_s
+
+    def store_inverse(self):
+        weight, _ = self._get_weight()
+        self.weight_inv = torch.inverse(weight.float()).to(next(self.parameters()).device)
+
+
+class InvConv(nn.Module):
+    def __init__(self, channels, no_jacobian=False, lu=True, **kwargs):
+        super().__init__()
+        w_shape = [channels, channels]
+        w_init = np.linalg.qr(np.random.randn(*w_shape))[0].astype(float)
+        LU_decomposed = lu
+        if not LU_decomposed:
+            # Sample a random orthogonal matrix:
+            self.register_parameter("weight", nn.Parameter(torch.Tensor(w_init)))
+        else:
+            np_p, np_l, np_u = scipy.linalg.lu(w_init)
+            np_s = np.diag(np_u)
+            np_sign_s = np.sign(np_s)
+            np_log_s = np.log(np.abs(np_s))
+            np_u = np.triu(np_u, k=1)
+            l_mask = np.tril(np.ones(w_shape, dtype=float), -1)
+            eye = np.eye(*w_shape, dtype=float)
+
+            self.register_buffer('p', torch.Tensor(np_p.astype(float)))
+            self.register_buffer('sign_s', torch.Tensor(np_sign_s.astype(float)))
+            self.l = nn.Parameter(torch.Tensor(np_l.astype(float)))
+            self.log_s = nn.Parameter(torch.Tensor(np_log_s.astype(float)))
+            self.u = nn.Parameter(torch.Tensor(np_u.astype(float)))
+            self.l_mask = torch.Tensor(l_mask)
+            self.eye = torch.Tensor(eye)
+        self.w_shape = w_shape
+        self.LU = LU_decomposed
+        self.weight = None
+
+    def get_weight(self, device, reverse):
+        w_shape = self.w_shape
+        self.p = self.p.to(device)
+        self.sign_s = self.sign_s.to(device)
+        self.l_mask = self.l_mask.to(device)
+        self.eye = self.eye.to(device)
+        l = self.l * self.l_mask + self.eye
+        u = self.u * self.l_mask.transpose(0, 1).contiguous() + torch.diag(self.sign_s * torch.exp(self.log_s))
+        dlogdet = self.log_s.sum()
+        if not reverse:
+            w = torch.matmul(self.p, torch.matmul(l, u))
+        else:
+            l = torch.inverse(l.double()).float()
+            u = torch.inverse(u.double()).float()
+            w = torch.matmul(u, torch.matmul(l, self.p.inverse()))
+        return w.view(w_shape[0], w_shape[1], 1), dlogdet
+
+    def forward(self, x, x_mask=None, reverse=False, **kwargs):
+        """
+        log-det = log|abs(|W|)| * pixels
+        """
+        b, c, t = x.size()
+        if x_mask is None:
+            x_len = torch.ones((b,), dtype=x.dtype, device=x.device) * t
+        else:
+            x_len = torch.sum(x_mask, [1, 2])
+        logdet = 0
+        if not reverse:
+            weight, dlogdet = self.get_weight(x.device, reverse)
+            z = F.conv1d(x, weight)
+            if logdet is not None:
+                logdet = logdet + dlogdet * x_len
+            return z, logdet
+        else:
+            if self.weight is None:
+                weight, dlogdet = self.get_weight(x.device, reverse)
+            else:
+                weight, dlogdet = self.weight, self.dlogdet
+            z = F.conv1d(x, weight)
+            if logdet is not None:
+                logdet = logdet - dlogdet * x_len
+            return z, logdet
+
+    def store_inverse(self):
+        self.weight, self.dlogdet = self.get_weight('cuda', reverse=True)
+
+
+class CouplingBlock(nn.Module):
+    def __init__(self, in_channels, hidden_channels, kernel_size, dilation_rate, n_layers,
+                 gin_channels=0, p_dropout=0, sigmoid_scale=False, wn=None):
+        super().__init__()
+        self.in_channels = in_channels
+        self.hidden_channels = hidden_channels
+        self.kernel_size = kernel_size
+        self.dilation_rate = dilation_rate
+        self.n_layers = n_layers
+        self.gin_channels = gin_channels
+        self.p_dropout = p_dropout
+        self.sigmoid_scale = sigmoid_scale
+
+        start = torch.nn.Conv1d(in_channels // 2, hidden_channels, 1)
+        start = torch.nn.utils.weight_norm(start)
+        self.start = start
+        # Initializing last layer to 0 makes the affine coupling layers
+        # do nothing at first.  This helps with training stability
+        end = torch.nn.Conv1d(hidden_channels, in_channels, 1)
+        end.weight.data.zero_()
+        end.bias.data.zero_()
+        self.end = end
+        self.wn = WN(hidden_channels, kernel_size, dilation_rate, n_layers, gin_channels, p_dropout)
+        if wn is not None:
+            self.wn.in_layers = wn.in_layers
+            self.wn.res_skip_layers = wn.res_skip_layers
+
+    def forward(self, x, x_mask=None, reverse=False, g=None, **kwargs):
+        if x_mask is None:
+            x_mask = 1
+        x_0, x_1 = x[:, :self.in_channels // 2], x[:, self.in_channels // 2:]
+
+        x = self.start(x_0) * x_mask
+        x = self.wn(x, x_mask, g)
+        out = self.end(x)
+
+        z_0 = x_0
+        m = out[:, :self.in_channels // 2, :]
+        logs = out[:, self.in_channels // 2:, :]
+        if self.sigmoid_scale:
+            logs = torch.log(1e-6 + torch.sigmoid(logs + 2))
+        if reverse:
+            z_1 = (x_1 - m) * torch.exp(-logs) * x_mask
+            logdet = torch.sum(-logs * x_mask, [1, 2])
+        else:
+            z_1 = (m + torch.exp(logs) * x_1) * x_mask
+            logdet = torch.sum(logs * x_mask, [1, 2])
+        z = torch.cat([z_0, z_1], 1)
+        return z, logdet
+
+    def store_inverse(self):
+        self.wn.remove_weight_norm()
+
+
+class Glow(nn.Module):
+    def __init__(self,
+                 in_channels,
+                 hidden_channels,
+                 kernel_size,
+                 dilation_rate,
+                 n_blocks,
+                 n_layers,
+                 p_dropout=0.,
+                 n_split=4,
+                 n_sqz=2,
+                 sigmoid_scale=False,
+                 gin_channels=0,
+                 inv_conv_type='near',
+                 share_cond_layers=False,
+                 share_wn_layers=0,
+                 ):
+        super().__init__()
+
+        self.in_channels = in_channels
+        self.hidden_channels = hidden_channels
+        self.kernel_size = kernel_size
+        self.dilation_rate = dilation_rate
+        self.n_blocks = n_blocks
+        self.n_layers = n_layers
+        self.p_dropout = p_dropout
+        self.n_split = n_split
+        self.n_sqz = n_sqz
+        self.sigmoid_scale = sigmoid_scale
+        self.gin_channels = gin_channels
+        self.share_cond_layers = share_cond_layers
+        if gin_channels != 0 and share_cond_layers:
+            cond_layer = torch.nn.Conv1d(gin_channels * n_sqz, 2 * hidden_channels * n_layers, 1)
+            self.cond_layer = torch.nn.utils.weight_norm(cond_layer, name='weight')
+        wn = None
+        self.flows = nn.ModuleList()
+        for b in range(n_blocks):
+            self.flows.append(ActNorm(channels=in_channels * n_sqz))
+            if inv_conv_type == 'near':
+                self.flows.append(InvConvNear(channels=in_channels * n_sqz, n_split=n_split, n_sqz=n_sqz))
+            if inv_conv_type == 'invconv':
+                self.flows.append(InvConv(channels=in_channels * n_sqz))
+            if share_wn_layers > 0:
+                if b % share_wn_layers == 0:
+                    wn = WN(hidden_channels, kernel_size, dilation_rate, n_layers, gin_channels * n_sqz,
+                            p_dropout, share_cond_layers)
+            self.flows.append(
+                CouplingBlock(
+                    in_channels * n_sqz,
+                    hidden_channels,
+                    kernel_size=kernel_size,
+                    dilation_rate=dilation_rate,
+                    n_layers=n_layers,
+                    gin_channels=gin_channels * n_sqz,
+                    p_dropout=p_dropout,
+                    sigmoid_scale=sigmoid_scale,
+                    wn=wn
+                ))
+
+    def forward(self, x, x_mask=None, g=None, reverse=False, return_hiddens=False):
+        logdet_tot = 0
+        if not reverse:
+            flows = self.flows
+        else:
+            flows = reversed(self.flows)
+        if return_hiddens:
+            hs = []
+        if self.n_sqz > 1:
+            x, x_mask_ = utils.squeeze(x, x_mask, self.n_sqz)
+            if g is not None:
+                g, _ = utils.squeeze(g, x_mask, self.n_sqz)
+            x_mask = x_mask_
+        if self.share_cond_layers and g is not None:
+            g = self.cond_layer(g)
+        for f in flows:
+            x, logdet = f(x, x_mask, g=g, reverse=reverse)
+            if return_hiddens:
+                hs.append(x)
+            logdet_tot += logdet
+        if self.n_sqz > 1:
+            x, x_mask = utils.unsqueeze(x, x_mask, self.n_sqz)
+        if return_hiddens:
+            return x, logdet_tot, hs
+        return x, logdet_tot
+
+    def store_inverse(self):
+        def remove_weight_norm(m):
+            try:
+                nn.utils.remove_weight_norm(m)
+            except ValueError:  # this module didn't have weight norm
+                return
+
+        self.apply(remove_weight_norm)
+        for f in self.flows:
+            f.store_inverse()
diff --git a/modules/commons/normalizing_flow/res_flow.py b/modules/commons/normalizing_flow/res_flow.py
new file mode 100644
index 0000000000000000000000000000000000000000..428fb7da9e3becb0d11cdf239fff410c86028d95
--- /dev/null
+++ b/modules/commons/normalizing_flow/res_flow.py
@@ -0,0 +1,61 @@
+import torch
+from torch import nn
+from modules.commons.conv import ConditionalConvBlocks
+from modules.commons.wavenet import WN
+
+
+class FlipLayer(nn.Module):
+    def forward(self, x, *args, **kwargs):
+        x = torch.flip(x, [1])
+        return x
+
+
+class CouplingLayer(nn.Module):
+    def __init__(self, c_in, hidden_size, kernel_size, n_layers, p_dropout=0, c_in_g=0, nn_type='wn'):
+        super().__init__()
+        self.channels = c_in
+        self.hidden_size = hidden_size
+        self.kernel_size = kernel_size
+        self.n_layers = n_layers
+        self.c_half = c_in // 2
+
+        self.pre = nn.Conv1d(self.c_half, hidden_size, 1)
+        if nn_type == 'wn':
+            self.enc = WN(hidden_size, kernel_size, 1, n_layers, p_dropout=p_dropout,
+                          c_cond=c_in_g)
+        elif nn_type == 'conv':
+            self.enc = ConditionalConvBlocks(
+                hidden_size, c_in_g, hidden_size, None, kernel_size,
+                layers_in_block=1, is_BTC=False, num_layers=n_layers)
+        self.post = nn.Conv1d(hidden_size, self.c_half, 1)
+
+    def forward(self, x, nonpadding, cond=None, reverse=False):
+        x0, x1 = x[:, :self.c_half], x[:, self.c_half:]
+        x_ = self.pre(x0) * nonpadding
+        x_ = self.enc(x_, nonpadding=nonpadding, cond=cond)
+        m = self.post(x_)
+        x1 = m + x1 if not reverse else x1 - m
+        x = torch.cat([x0, x1], 1)
+        return x * nonpadding
+
+
+class ResFlow(nn.Module):
+    def __init__(self,
+                 c_in,
+                 hidden_size,
+                 kernel_size,
+                 n_flow_layers,
+                 n_flow_steps=4,
+                 c_cond=0,
+                 nn_type='wn'):
+        super().__init__()
+        self.flows = nn.ModuleList()
+        for i in range(n_flow_steps):
+            self.flows.append(
+                CouplingLayer(c_in, hidden_size, kernel_size, n_flow_layers, c_in_g=c_cond, nn_type=nn_type))
+            self.flows.append(FlipLayer())
+
+    def forward(self, x, nonpadding, cond=None, reverse=False):
+        for flow in (self.flows if not reverse else reversed(self.flows)):
+            x = flow(x, nonpadding, cond=cond, reverse=reverse)
+        return x
diff --git a/modules/commons/normalizing_flow/utils.py b/modules/commons/normalizing_flow/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..7eb56ec514bff822ba1a19a6474207ed82492410
--- /dev/null
+++ b/modules/commons/normalizing_flow/utils.py
@@ -0,0 +1,29 @@
+import torch
+
+
+def squeeze(x, x_mask=None, n_sqz=2):
+    b, c, t = x.size()
+
+    t = (t // n_sqz) * n_sqz
+    x = x[:, :, :t]
+    x_sqz = x.view(b, c, t // n_sqz, n_sqz)
+    x_sqz = x_sqz.permute(0, 3, 1, 2).contiguous().view(b, c * n_sqz, t // n_sqz)
+
+    if x_mask is not None:
+        x_mask = x_mask[:, :, n_sqz - 1::n_sqz]
+    else:
+        x_mask = torch.ones(b, 1, t // n_sqz).to(device=x.device, dtype=x.dtype)
+    return x_sqz * x_mask, x_mask
+
+
+def unsqueeze(x, x_mask=None, n_sqz=2):
+    b, c, t = x.size()
+
+    x_unsqz = x.view(b, n_sqz, c // n_sqz, t)
+    x_unsqz = x_unsqz.permute(0, 2, 3, 1).contiguous().view(b, c // n_sqz, t * n_sqz)
+
+    if x_mask is not None:
+        x_mask = x_mask.unsqueeze(-1).repeat(1, 1, 1, n_sqz).view(b, 1, t * n_sqz)
+    else:
+        x_mask = torch.ones(b, 1, t * n_sqz).to(device=x.device, dtype=x.dtype)
+    return x_unsqz * x_mask, x_mask
diff --git a/modules/commons/rel_transformer.py b/modules/commons/rel_transformer.py
new file mode 100644
index 0000000000000000000000000000000000000000..dd41b301a98609391d1a18b118d1f1b3e538af1d
--- /dev/null
+++ b/modules/commons/rel_transformer.py
@@ -0,0 +1,389 @@
+import math
+import torch
+from torch import nn
+from torch.nn import functional as F
+
+from modules.commons.layers import Embedding
+
+
+def convert_pad_shape(pad_shape):
+    l = pad_shape[::-1]
+    pad_shape = [item for sublist in l for item in sublist]
+    return pad_shape
+
+
+def shift_1d(x):
+    x = F.pad(x, convert_pad_shape([[0, 0], [0, 0], [1, 0]]))[:, :, :-1]
+    return x
+
+
+def sequence_mask(length, max_length=None):
+    if max_length is None:
+        max_length = length.max()
+    x = torch.arange(max_length, dtype=length.dtype, device=length.device)
+    return x.unsqueeze(0) < length.unsqueeze(1)
+
+
+class Encoder(nn.Module):
+    def __init__(self, hidden_channels, filter_channels, n_heads, n_layers, kernel_size=1, p_dropout=0.,
+                 window_size=None, block_length=None, pre_ln=False, **kwargs):
+        super().__init__()
+        self.hidden_channels = hidden_channels
+        self.filter_channels = filter_channels
+        self.n_heads = n_heads
+        self.n_layers = n_layers
+        self.kernel_size = kernel_size
+        self.p_dropout = p_dropout
+        self.window_size = window_size
+        self.block_length = block_length
+        self.pre_ln = pre_ln
+
+        self.drop = nn.Dropout(p_dropout)
+        self.attn_layers = nn.ModuleList()
+        self.norm_layers_1 = nn.ModuleList()
+        self.ffn_layers = nn.ModuleList()
+        self.norm_layers_2 = nn.ModuleList()
+        for i in range(self.n_layers):
+            self.attn_layers.append(
+                MultiHeadAttention(hidden_channels, hidden_channels, n_heads, window_size=window_size,
+                                   p_dropout=p_dropout, block_length=block_length))
+            self.norm_layers_1.append(LayerNorm(hidden_channels))
+            self.ffn_layers.append(
+                FFN(hidden_channels, hidden_channels, filter_channels, kernel_size, p_dropout=p_dropout))
+            self.norm_layers_2.append(LayerNorm(hidden_channels))
+        if pre_ln:
+            self.last_ln = LayerNorm(hidden_channels)
+
+    def forward(self, x, x_mask, attn_mask=1):
+        if isinstance(attn_mask, torch.Tensor):
+            attn_mask = attn_mask[:, None]
+        attn_mask = x_mask.unsqueeze(2) * x_mask.unsqueeze(-1) * attn_mask
+        for i in range(self.n_layers):
+            x = x * x_mask
+            x_ = x
+            if self.pre_ln:
+                x = self.norm_layers_1[i](x)
+            y = self.attn_layers[i](x, x, attn_mask)
+            y = self.drop(y)
+            x = x_ + y
+            if not self.pre_ln:
+                x = self.norm_layers_1[i](x)
+
+            x_ = x
+            if self.pre_ln:
+                x = self.norm_layers_2[i](x)
+            y = self.ffn_layers[i](x, x_mask)
+            y = self.drop(y)
+            x = x_ + y
+            if not self.pre_ln:
+                x = self.norm_layers_2[i](x)
+        if self.pre_ln:
+            x = self.last_ln(x)
+        x = x * x_mask
+        return x
+
+
+class MultiHeadAttention(nn.Module):
+    def __init__(self, channels, out_channels, n_heads, window_size=None, heads_share=True, p_dropout=0.,
+                 block_length=None, proximal_bias=False, proximal_init=False):
+        super().__init__()
+        assert channels % n_heads == 0
+
+        self.channels = channels
+        self.out_channels = out_channels
+        self.n_heads = n_heads
+        self.window_size = window_size
+        self.heads_share = heads_share
+        self.block_length = block_length
+        self.proximal_bias = proximal_bias
+        self.p_dropout = p_dropout
+        self.attn = None
+
+        self.k_channels = channels // n_heads
+        self.conv_q = nn.Conv1d(channels, channels, 1)
+        self.conv_k = nn.Conv1d(channels, channels, 1)
+        self.conv_v = nn.Conv1d(channels, channels, 1)
+        if window_size is not None:
+            n_heads_rel = 1 if heads_share else n_heads
+            rel_stddev = self.k_channels ** -0.5
+            self.emb_rel_k = nn.Parameter(torch.randn(n_heads_rel, window_size * 2 + 1, self.k_channels) * rel_stddev)
+            self.emb_rel_v = nn.Parameter(torch.randn(n_heads_rel, window_size * 2 + 1, self.k_channels) * rel_stddev)
+        self.conv_o = nn.Conv1d(channels, out_channels, 1)
+        self.drop = nn.Dropout(p_dropout)
+
+        nn.init.xavier_uniform_(self.conv_q.weight)
+        nn.init.xavier_uniform_(self.conv_k.weight)
+        if proximal_init:
+            self.conv_k.weight.data.copy_(self.conv_q.weight.data)
+            self.conv_k.bias.data.copy_(self.conv_q.bias.data)
+        nn.init.xavier_uniform_(self.conv_v.weight)
+
+    def forward(self, x, c, attn_mask=None):
+        q = self.conv_q(x)
+        k = self.conv_k(c)
+        v = self.conv_v(c)
+
+        x, self.attn = self.attention(q, k, v, mask=attn_mask)
+
+        x = self.conv_o(x)
+        return x
+
+    def attention(self, query, key, value, mask=None):
+        # reshape [b, d, t] -> [b, n_h, t, d_k]
+        b, d, t_s, t_t = (*key.size(), query.size(2))
+        query = query.view(b, self.n_heads, self.k_channels, t_t).transpose(2, 3)
+        key = key.view(b, self.n_heads, self.k_channels, t_s).transpose(2, 3)
+        value = value.view(b, self.n_heads, self.k_channels, t_s).transpose(2, 3)
+
+        scores = torch.matmul(query, key.transpose(-2, -1)) / math.sqrt(self.k_channels)
+        if self.window_size is not None:
+            assert t_s == t_t, "Relative attention is only available for self-attention."
+            key_relative_embeddings = self._get_relative_embeddings(self.emb_rel_k, t_s)
+            rel_logits = self._matmul_with_relative_keys(query, key_relative_embeddings)
+            rel_logits = self._relative_position_to_absolute_position(rel_logits)
+            scores_local = rel_logits / math.sqrt(self.k_channels)
+            scores = scores + scores_local
+        if self.proximal_bias:
+            assert t_s == t_t, "Proximal bias is only available for self-attention."
+            scores = scores + self._attention_bias_proximal(t_s).to(device=scores.device, dtype=scores.dtype)
+        if mask is not None:
+            scores = scores.masked_fill(mask == 0, -1e4)
+            if self.block_length is not None:
+                block_mask = torch.ones_like(scores).triu(-self.block_length).tril(self.block_length)
+                scores = scores * block_mask + -1e4 * (1 - block_mask)
+        p_attn = F.softmax(scores, dim=-1)  # [b, n_h, t_t, t_s]
+        p_attn = self.drop(p_attn)
+        output = torch.matmul(p_attn, value)
+        if self.window_size is not None:
+            relative_weights = self._absolute_position_to_relative_position(p_attn)
+            value_relative_embeddings = self._get_relative_embeddings(self.emb_rel_v, t_s)
+            output = output + self._matmul_with_relative_values(relative_weights, value_relative_embeddings)
+        output = output.transpose(2, 3).contiguous().view(b, d, t_t)  # [b, n_h, t_t, d_k] -> [b, d, t_t]
+        return output, p_attn
+
+    def _matmul_with_relative_values(self, x, y):
+        """
+        x: [b, h, l, m]
+        y: [h or 1, m, d]
+        ret: [b, h, l, d]
+        """
+        ret = torch.matmul(x, y.unsqueeze(0))
+        return ret
+
+    def _matmul_with_relative_keys(self, x, y):
+        """
+        x: [b, h, l, d]
+        y: [h or 1, m, d]
+        ret: [b, h, l, m]
+        """
+        ret = torch.matmul(x, y.unsqueeze(0).transpose(-2, -1))
+        return ret
+
+    def _get_relative_embeddings(self, relative_embeddings, length):
+        max_relative_position = 2 * self.window_size + 1
+        # Pad first before slice to avoid using cond ops.
+        pad_length = max(length - (self.window_size + 1), 0)
+        slice_start_position = max((self.window_size + 1) - length, 0)
+        slice_end_position = slice_start_position + 2 * length - 1
+        if pad_length > 0:
+            padded_relative_embeddings = F.pad(
+                relative_embeddings,
+                convert_pad_shape([[0, 0], [pad_length, pad_length], [0, 0]]))
+        else:
+            padded_relative_embeddings = relative_embeddings
+        used_relative_embeddings = padded_relative_embeddings[:, slice_start_position:slice_end_position]
+        return used_relative_embeddings
+
+    def _relative_position_to_absolute_position(self, x):
+        """
+        x: [b, h, l, 2*l-1]
+        ret: [b, h, l, l]
+        """
+        batch, heads, length, _ = x.size()
+        # Concat columns of pad to shift from relative to absolute indexing.
+        x = F.pad(x, convert_pad_shape([[0, 0], [0, 0], [0, 0], [0, 1]]))
+
+        # Concat extra elements so to add up to shape (len+1, 2*len-1).
+        x_flat = x.view([batch, heads, length * 2 * length])
+        x_flat = F.pad(x_flat, convert_pad_shape([[0, 0], [0, 0], [0, length - 1]]))
+
+        # Reshape and slice out the padded elements.
+        x_final = x_flat.view([batch, heads, length + 1, 2 * length - 1])[:, :, :length, length - 1:]
+        return x_final
+
+    def _absolute_position_to_relative_position(self, x):
+        """
+        x: [b, h, l, l]
+        ret: [b, h, l, 2*l-1]
+        """
+        batch, heads, length, _ = x.size()
+        # padd along column
+        x = F.pad(x, convert_pad_shape([[0, 0], [0, 0], [0, 0], [0, length - 1]]))
+        x_flat = x.view([batch, heads, -1])
+        # add 0's in the beginning that will skew the elements after reshape
+        x_flat = F.pad(x_flat, convert_pad_shape([[0, 0], [0, 0], [length, 0]]))
+        x_final = x_flat.view([batch, heads, length, 2 * length])[:, :, :, 1:]
+        return x_final
+
+    def _attention_bias_proximal(self, length):
+        """Bias for self-attention to encourage attention to close positions.
+        Args:
+          length: an integer scalar.
+        Returns:
+          a Tensor with shape [1, 1, length, length]
+        """
+        r = torch.arange(length, dtype=torch.float32)
+        diff = torch.unsqueeze(r, 0) - torch.unsqueeze(r, 1)
+        return torch.unsqueeze(torch.unsqueeze(-torch.log1p(torch.abs(diff)), 0), 0)
+
+
+class FFN(nn.Module):
+    def __init__(self, in_channels, out_channels, filter_channels, kernel_size, p_dropout=0., activation=None):
+        super().__init__()
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.filter_channels = filter_channels
+        self.kernel_size = kernel_size
+        self.p_dropout = p_dropout
+        self.activation = activation
+
+        self.conv_1 = nn.Conv1d(in_channels, filter_channels, kernel_size, padding=kernel_size // 2)
+        self.conv_2 = nn.Conv1d(filter_channels, out_channels, 1)
+        self.drop = nn.Dropout(p_dropout)
+
+    def forward(self, x, x_mask):
+        x = self.conv_1(x * x_mask)
+        if self.activation == "gelu":
+            x = x * torch.sigmoid(1.702 * x)
+        else:
+            x = torch.relu(x)
+        x = self.drop(x)
+        x = self.conv_2(x * x_mask)
+        return x * x_mask
+
+
+class LayerNorm(nn.Module):
+    def __init__(self, channels, eps=1e-4):
+        super().__init__()
+        self.channels = channels
+        self.eps = eps
+
+        self.gamma = nn.Parameter(torch.ones(channels))
+        self.beta = nn.Parameter(torch.zeros(channels))
+
+    def forward(self, x):
+        n_dims = len(x.shape)
+        mean = torch.mean(x, 1, keepdim=True)
+        variance = torch.mean((x - mean) ** 2, 1, keepdim=True)
+
+        x = (x - mean) * torch.rsqrt(variance + self.eps)
+
+        shape = [1, -1] + [1] * (n_dims - 2)
+        x = x * self.gamma.view(*shape) + self.beta.view(*shape)
+        return x
+
+
+class ConvReluNorm(nn.Module):
+    def __init__(self, in_channels, hidden_channels, out_channels, kernel_size, n_layers, p_dropout):
+        super().__init__()
+        self.in_channels = in_channels
+        self.hidden_channels = hidden_channels
+        self.out_channels = out_channels
+        self.kernel_size = kernel_size
+        self.n_layers = n_layers
+        self.p_dropout = p_dropout
+        assert n_layers > 1, "Number of layers should be larger than 0."
+
+        self.conv_layers = nn.ModuleList()
+        self.norm_layers = nn.ModuleList()
+        self.conv_layers.append(nn.Conv1d(in_channels, hidden_channels, kernel_size, padding=kernel_size // 2))
+        self.norm_layers.append(LayerNorm(hidden_channels))
+        self.relu_drop = nn.Sequential(
+            nn.ReLU(),
+            nn.Dropout(p_dropout))
+        for _ in range(n_layers - 1):
+            self.conv_layers.append(nn.Conv1d(hidden_channels, hidden_channels, kernel_size, padding=kernel_size // 2))
+            self.norm_layers.append(LayerNorm(hidden_channels))
+        self.proj = nn.Conv1d(hidden_channels, out_channels, 1)
+        self.proj.weight.data.zero_()
+        self.proj.bias.data.zero_()
+
+    def forward(self, x, x_mask):
+        x_org = x
+        for i in range(self.n_layers):
+            x = self.conv_layers[i](x * x_mask)
+            x = self.norm_layers[i](x)
+            x = self.relu_drop(x)
+        x = x_org + self.proj(x)
+        return x * x_mask
+
+
+class RelTransformerEncoder(nn.Module):
+    def __init__(self,
+                 n_vocab,
+                 out_channels,
+                 hidden_channels,
+                 filter_channels,
+                 n_heads,
+                 n_layers,
+                 kernel_size,
+                 p_dropout=0.0,
+                 window_size=4,
+                 block_length=None,
+                 in_channels=None,
+                 prenet=True,
+                 pre_ln=True,
+                 ):
+
+        super().__init__()
+
+        self.n_vocab = n_vocab
+        self.out_channels = out_channels
+        self.hidden_channels = hidden_channels
+        self.filter_channels = filter_channels
+        self.n_heads = n_heads
+        self.n_layers = n_layers
+        self.kernel_size = kernel_size
+        self.p_dropout = p_dropout
+        self.window_size = window_size
+        self.block_length = block_length
+        self.prenet = prenet
+        if n_vocab > 0:
+            self.emb = Embedding(n_vocab, hidden_channels, padding_idx=0)
+
+        if prenet:
+            if in_channels is None:
+                in_channels = hidden_channels
+            self.pre = ConvReluNorm(in_channels, in_channels, in_channels,
+                                    kernel_size=5, n_layers=3, p_dropout=0)
+        if in_channels is not None and in_channels != hidden_channels:
+            self.encoder_inp_proj = nn.Conv1d(in_channels, hidden_channels, 1)
+        self.encoder = Encoder(
+            hidden_channels,
+            filter_channels,
+            n_heads,
+            n_layers,
+            kernel_size,
+            p_dropout,
+            window_size=window_size,
+            block_length=block_length,
+            pre_ln=pre_ln,
+        )
+
+    def forward(self, x, x_mask=None, other_embeds=0, attn_mask=1):
+        if self.n_vocab > 0:
+            x_lengths = (x > 0).long().sum(-1)
+            x = self.emb(x) * math.sqrt(self.hidden_channels)  # [b, t, h]
+        else:
+            x_lengths = (x.abs().sum(-1) > 0).long().sum(-1)
+        x = x + other_embeds
+        x = torch.transpose(x, 1, -1)  # [b, h, t]
+        x_mask = torch.unsqueeze(sequence_mask(x_lengths, x.size(2)), 1).to(x.dtype)
+
+        if self.prenet:
+            x = self.pre(x, x_mask)
+            self.prenet_out = x.transpose(1, 2)
+        if hasattr(self, 'encoder_inp_proj'):
+            x = self.encoder_inp_proj(x) * x_mask
+        x = self.encoder(x, x_mask, attn_mask)
+        return x.transpose(1, 2)
diff --git a/modules/commons/rnn.py b/modules/commons/rnn.py
new file mode 100644
index 0000000000000000000000000000000000000000..205c2c76b8fda2de920bc59228a5eec0a20119a9
--- /dev/null
+++ b/modules/commons/rnn.py
@@ -0,0 +1,261 @@
+import torch
+from torch import nn
+import torch.nn.functional as F
+
+
+class PreNet(nn.Module):
+    def __init__(self, in_dims, fc1_dims=256, fc2_dims=128, dropout=0.5):
+        super().__init__()
+        self.fc1 = nn.Linear(in_dims, fc1_dims)
+        self.fc2 = nn.Linear(fc1_dims, fc2_dims)
+        self.p = dropout
+
+    def forward(self, x):
+        x = self.fc1(x)
+        x = F.relu(x)
+        x = F.dropout(x, self.p, training=self.training)
+        x = self.fc2(x)
+        x = F.relu(x)
+        x = F.dropout(x, self.p, training=self.training)
+        return x
+
+
+class HighwayNetwork(nn.Module):
+    def __init__(self, size):
+        super().__init__()
+        self.W1 = nn.Linear(size, size)
+        self.W2 = nn.Linear(size, size)
+        self.W1.bias.data.fill_(0.)
+
+    def forward(self, x):
+        x1 = self.W1(x)
+        x2 = self.W2(x)
+        g = torch.sigmoid(x2)
+        y = g * F.relu(x1) + (1. - g) * x
+        return y
+
+
+class BatchNormConv(nn.Module):
+    def __init__(self, in_channels, out_channels, kernel, relu=True):
+        super().__init__()
+        self.conv = nn.Conv1d(in_channels, out_channels, kernel, stride=1, padding=kernel // 2, bias=False)
+        self.bnorm = nn.BatchNorm1d(out_channels)
+        self.relu = relu
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = F.relu(x) if self.relu is True else x
+        return self.bnorm(x)
+
+
+class ConvNorm(torch.nn.Module):
+    def __init__(self, in_channels, out_channels, kernel_size=1, stride=1,
+                 padding=None, dilation=1, bias=True, w_init_gain='linear'):
+        super(ConvNorm, self).__init__()
+        if padding is None:
+            assert (kernel_size % 2 == 1)
+            padding = int(dilation * (kernel_size - 1) / 2)
+
+        self.conv = torch.nn.Conv1d(in_channels, out_channels,
+                                    kernel_size=kernel_size, stride=stride,
+                                    padding=padding, dilation=dilation,
+                                    bias=bias)
+
+        torch.nn.init.xavier_uniform_(
+            self.conv.weight, gain=torch.nn.init.calculate_gain(w_init_gain))
+
+    def forward(self, signal):
+        conv_signal = self.conv(signal)
+        return conv_signal
+
+
+class CBHG(nn.Module):
+    def __init__(self, K, in_channels, channels, proj_channels, num_highways):
+        super().__init__()
+
+        # List of all rnns to call `flatten_parameters()` on
+        self._to_flatten = []
+
+        self.bank_kernels = [i for i in range(1, K + 1)]
+        self.conv1d_bank = nn.ModuleList()
+        for k in self.bank_kernels:
+            conv = BatchNormConv(in_channels, channels, k)
+            self.conv1d_bank.append(conv)
+
+        self.maxpool = nn.MaxPool1d(kernel_size=2, stride=1, padding=1)
+
+        self.conv_project1 = BatchNormConv(len(self.bank_kernels) * channels, proj_channels[0], 3)
+        self.conv_project2 = BatchNormConv(proj_channels[0], proj_channels[1], 3, relu=False)
+
+        # Fix the highway input if necessary
+        if proj_channels[-1] != channels:
+            self.highway_mismatch = True
+            self.pre_highway = nn.Linear(proj_channels[-1], channels, bias=False)
+        else:
+            self.highway_mismatch = False
+
+        self.highways = nn.ModuleList()
+        for i in range(num_highways):
+            hn = HighwayNetwork(channels)
+            self.highways.append(hn)
+
+        self.rnn = nn.GRU(channels, channels, batch_first=True, bidirectional=True)
+        self._to_flatten.append(self.rnn)
+
+        # Avoid fragmentation of RNN parameters and associated warning
+        self._flatten_parameters()
+
+    def forward(self, x):
+        # Although we `_flatten_parameters()` on init, when using DataParallel
+        # the model gets replicated, making it no longer guaranteed that the
+        # weights are contiguous in GPU memory. Hence, we must call it again
+        self._flatten_parameters()
+
+        # Save these for later
+        residual = x
+        seq_len = x.size(-1)
+        conv_bank = []
+
+        # Convolution Bank
+        for conv in self.conv1d_bank:
+            c = conv(x)  # Convolution
+            conv_bank.append(c[:, :, :seq_len])
+
+        # Stack along the channel axis
+        conv_bank = torch.cat(conv_bank, dim=1)
+
+        # dump the last padding to fit residual
+        x = self.maxpool(conv_bank)[:, :, :seq_len]
+
+        # Conv1d projections
+        x = self.conv_project1(x)
+        x = self.conv_project2(x)
+
+        # Residual Connect
+        x = x + residual
+
+        # Through the highways
+        x = x.transpose(1, 2)
+        if self.highway_mismatch is True:
+            x = self.pre_highway(x)
+        for h in self.highways:
+            x = h(x)
+
+        # And then the RNN
+        x, _ = self.rnn(x)
+        return x
+
+    def _flatten_parameters(self):
+        """Calls `flatten_parameters` on all the rnns used by the WaveRNN. Used
+        to improve efficiency and avoid PyTorch yelling at us."""
+        [m.flatten_parameters() for m in self._to_flatten]
+
+
+class TacotronEncoder(nn.Module):
+    def __init__(self, embed_dims, num_chars, cbhg_channels, K, num_highways, dropout):
+        super().__init__()
+        self.embedding = nn.Embedding(num_chars, embed_dims)
+        self.pre_net = PreNet(embed_dims, embed_dims, embed_dims, dropout=dropout)
+        self.cbhg = CBHG(K=K, in_channels=cbhg_channels, channels=cbhg_channels,
+                         proj_channels=[cbhg_channels, cbhg_channels],
+                         num_highways=num_highways)
+        self.proj_out = nn.Linear(cbhg_channels * 2, cbhg_channels)
+
+    def forward(self, x):
+        x = self.embedding(x)
+        x = self.pre_net(x)
+        x.transpose_(1, 2)
+        x = self.cbhg(x)
+        x = self.proj_out(x)
+        return x
+
+
+class RNNEncoder(nn.Module):
+    def __init__(self, num_chars, embedding_dim, n_convolutions=3, kernel_size=5):
+        super(RNNEncoder, self).__init__()
+        self.embedding = nn.Embedding(num_chars, embedding_dim, padding_idx=0)
+        convolutions = []
+        for _ in range(n_convolutions):
+            conv_layer = nn.Sequential(
+                ConvNorm(embedding_dim,
+                         embedding_dim,
+                         kernel_size=kernel_size, stride=1,
+                         padding=int((kernel_size - 1) / 2),
+                         dilation=1, w_init_gain='relu'),
+                nn.BatchNorm1d(embedding_dim))
+            convolutions.append(conv_layer)
+        self.convolutions = nn.ModuleList(convolutions)
+
+        self.lstm = nn.LSTM(embedding_dim, int(embedding_dim / 2), 1,
+                            batch_first=True, bidirectional=True)
+
+    def forward(self, x):
+        input_lengths = (x > 0).sum(-1)
+        input_lengths = input_lengths.cpu().numpy()
+
+        x = self.embedding(x)
+        x = x.transpose(1, 2)  # [B, H, T]
+        for conv in self.convolutions:
+            x = F.dropout(F.relu(conv(x)), 0.5, self.training) + x
+        x = x.transpose(1, 2)  # [B, T, H]
+
+        # pytorch tensor are not reversible, hence the conversion
+        x = nn.utils.rnn.pack_padded_sequence(x, input_lengths, batch_first=True, enforce_sorted=False)
+
+        self.lstm.flatten_parameters()
+        outputs, _ = self.lstm(x)
+        outputs, _ = nn.utils.rnn.pad_packed_sequence(outputs, batch_first=True)
+
+        return outputs
+
+
+class DecoderRNN(torch.nn.Module):
+    def __init__(self, hidden_size, decoder_rnn_dim, dropout):
+        super(DecoderRNN, self).__init__()
+        self.in_conv1d = nn.Sequential(
+            torch.nn.Conv1d(
+                in_channels=hidden_size,
+                out_channels=hidden_size,
+                kernel_size=9, padding=4,
+            ),
+            torch.nn.ReLU(),
+            torch.nn.Conv1d(
+                in_channels=hidden_size,
+                out_channels=hidden_size,
+                kernel_size=9, padding=4,
+            ),
+        )
+        self.ln = nn.LayerNorm(hidden_size)
+        if decoder_rnn_dim == 0:
+            decoder_rnn_dim = hidden_size * 2
+        self.rnn = torch.nn.LSTM(
+            input_size=hidden_size,
+            hidden_size=decoder_rnn_dim,
+            num_layers=1,
+            batch_first=True,
+            bidirectional=True,
+            dropout=dropout
+        )
+        self.rnn.flatten_parameters()
+        self.conv1d = torch.nn.Conv1d(
+            in_channels=decoder_rnn_dim * 2,
+            out_channels=hidden_size,
+            kernel_size=3,
+            padding=1,
+        )
+
+    def forward(self, x):
+        input_masks = x.abs().sum(-1).ne(0).data[:, :, None]
+        input_lengths = input_masks.sum([-1, -2])
+        input_lengths = input_lengths.cpu().numpy()
+
+        x = self.in_conv1d(x.transpose(1, 2)).transpose(1, 2)
+        x = self.ln(x)
+        x = nn.utils.rnn.pack_padded_sequence(x, input_lengths, batch_first=True, enforce_sorted=False)
+        self.rnn.flatten_parameters()
+        x, _ = self.rnn(x)  # [B, T, C]
+        x, _ = nn.utils.rnn.pad_packed_sequence(x, batch_first=True)
+        x = x * input_masks
+        pre_mel = self.conv1d(x.transpose(1, 2)).transpose(1, 2)  # [B, T, C]
+        pre_mel = pre_mel * input_masks
+        return pre_mel
diff --git a/modules/commons/rot_transformer.py b/modules/commons/rot_transformer.py
new file mode 100644
index 0000000000000000000000000000000000000000..d17c488042b54a70f0b897f4efc488dfbce3b3b3
--- /dev/null
+++ b/modules/commons/rot_transformer.py
@@ -0,0 +1,635 @@
+import math
+import torch
+from typing import Optional, Tuple
+from torch import nn
+from torch.nn import Parameter, Linear
+from torch.cuda.amp import autocast
+from modules.commons.layers import LayerNorm, Embedding
+from modules.commons.transformer import TransformerFFNLayer, MultiheadAttention
+from utils.nn.seq_utils import get_incremental_state, set_incremental_state, softmax, make_positions
+import torch.nn.functional as F
+
+DEFAULT_MAX_SOURCE_POSITIONS = 3000
+DEFAULT_MAX_TARGET_POSITIONS = 3000
+
+
+class SinusoidalPositionalEmbedding(nn.Module):
+    """This module produces sinusoidal positional embeddings of any length.
+
+    Padding symbols are ignored.
+    """
+
+    def __init__(self, embedding_dim, padding_idx, init_size=1024):
+        super().__init__()
+        self.embedding_dim = embedding_dim
+        self.padding_idx = padding_idx
+        self.weights = SinusoidalPositionalEmbedding.get_embedding(
+            init_size,
+            embedding_dim,
+            padding_idx,
+        )
+        self.register_buffer('_float_tensor', torch.FloatTensor(1))
+
+    @staticmethod
+    def get_embedding(num_embeddings, embedding_dim, padding_idx=None):
+        """Build sinusoidal embeddings.
+
+        This matches the implementation in tensor2tensor, but differs slightly
+        from the description in Section 3.5 of "Attention Is All You Need".
+        """
+        half_dim = embedding_dim // 2
+        emb = math.log(10000) / (half_dim - 1)
+        emb = torch.exp(torch.arange(half_dim, dtype=torch.float) * -emb)
+        emb = torch.arange(num_embeddings, dtype=torch.float).unsqueeze(1) * emb.unsqueeze(0)
+        emb = torch.cat([torch.sin(emb), torch.cos(emb)], dim=1).view(num_embeddings, -1)
+        if embedding_dim % 2 == 1:
+            # zero pad
+            emb = torch.cat([emb, torch.zeros(num_embeddings, 1)], dim=1)
+        if padding_idx is not None:
+            emb[padding_idx, :] = 0
+        return emb
+
+    def forward(self, input, incremental_state=None, timestep=None, positions=None, **kwargs):
+        """Input is expected to be of size [bsz x seqlen]."""
+        bsz, seq_len = input.shape[:2]
+        max_pos = self.padding_idx + 1 + seq_len
+        if self.weights is None or max_pos > self.weights.size(0):
+            # recompute/expand embeddings if needed
+            self.weights = SinusoidalPositionalEmbedding.get_embedding(
+                max_pos,
+                self.embedding_dim,
+                self.padding_idx,
+            )
+        self.weights = self.weights.to(self._float_tensor)
+
+        if incremental_state is not None:
+            # positions is the same for every token when decoding a single step
+            pos = timestep.view(-1)[0] + 1 if timestep is not None else seq_len
+            return self.weights[self.padding_idx + pos, :].expand(bsz, 1, -1)
+
+        positions = make_positions(input, self.padding_idx) if positions is None else positions
+        return self.weights.index_select(0, positions.view(-1)).view(bsz, seq_len, -1).detach()
+
+    def max_positions(self):
+        """Maximum number of supported positions."""
+        return int(1e5)  # an arbitrary large number
+
+
+class RotaryEmbeddings(nn.Module):
+    cos: torch.Tensor
+    sin: torch.Tensor
+    theta: torch.Tensor
+
+    def __init__(
+            self,
+            width: int,
+            *,
+            seq_len: int = 4000,
+            base: int = 10000,
+            device: Optional[torch.device] = None,
+    ):
+        """Rotary embeddings (Su et al., 2021) layer. The rotary embedding
+        will be precomputed for up to 'seq _len' positions. The embedding
+        will be recomputed when a longer sequence is found in the input.
+
+        :param width:
+            Rotary embedding dimensionality, must be even.
+        :param seq_len:
+            Number of positons to initially precompute.
+        :param base:
+            The base used for Θ_i, determines the cycle length of the
+            embeddings.
+        :param device: Device on which the module is to be initialized.
+        """
+        super().__init__()
+
+        if width % 2:
+            raise ValueError(f"Width of rotary embeddings must be even, was: {width}")
+
+        # Ignore allocations on the meta device as we don't persist our buffer,
+        # i.e., we don't expect the backing tensor to be replaced with pretrained weights.
+        if device is not None and device.type == "meta":
+            device = None
+        # Θ_i = 10000^(-2(i-1)/d)
+        theta = torch.pow(
+            base, -torch.arange(0, width, 2, dtype=torch.float, device=device) / width
+        )
+        self.register_buffer("theta", theta, persistent=False)
+
+        self._create_rotary_embed(width=width, length=seq_len)
+
+    def _create_rotary_embed(self, *, width: int, length: int):
+        # mΘ
+        position = torch.arange(length, device=self.theta.device).unsqueeze(1)
+        m_theta = position * self.theta.unsqueeze(0)
+
+        # We apply both sin and cos twice (see Eq 15, 34), but the ordering
+        # is changed for compatibility with most common implementations.
+        m_theta = torch.cat([m_theta, m_theta], dim=-1)
+
+        re_cos = m_theta.cos().view([length, width])
+        re_sin = m_theta.sin().view([length, width])
+
+        self.register_buffer("cos", re_cos, persistent=False)
+        self.register_buffer("sin", re_sin, persistent=False)
+
+    def _rotate(self, input: torch.Tensor):
+        """Rotate the input tensor by half of its innermost width.
+
+        input (Tensor): array to rotate.
+        RETURNS (Tensor): rotated array.
+
+        Shapes:
+            input - (..., width)
+            output - (..., width)
+        """
+        half_idx = input.shape[-1] // 2
+        input_1 = -input[..., half_idx:]
+        input_2 = input[..., :half_idx]
+        return torch.cat([input_1, input_2], dim=-1)
+
+    def forward(self, input: torch.Tensor, *, positions: Optional[torch.Tensor] = None):
+        """
+        Apply rotary embeddings to an array.
+
+        :param input: Array to apply the rotary embeddings to.
+        :param positions: positions of the inputs. If no positions are
+            provided, they are assumed to be [0, seq_len).
+        :return: Array with the rotary embeddings applied.
+
+        Shapes:
+            input - (batch_size, num_heads, seq_len, width_per_head)
+            positions - (batch_size, seq_len)
+            output - (batch_size, num_heads, seq_len, width_per_head)
+        """
+        batch_size, _, seq_len, width = input.shape
+
+        if positions is None:
+            # Fastpath: positions from [0..seq_len), avoid indexing.
+            if self.cos.size(-2) < seq_len:
+                self._create_rotary_embed(width=width, length=seq_len)
+            rot_cos = self.cos[:seq_len, :].view(1, 1, seq_len, width)
+            rot_sin = self.sin[:seq_len, :].view(1, 1, seq_len, width)
+        else:
+            max_len = int(positions.max()) + 1
+            if self.cos.size(-2) < max_len:
+                self._create_rotary_embed(width=width, length=max_len)
+
+            # Flatten positions to index cos/sin arrays, then unflatten.
+            #
+            # Example shapes:
+            #
+            #   positions_flat - (batch_size * seq_len)
+            #   self.cos - (max_len, width)
+            #   rot_cos - (batch_size, seq_len, width)
+            positions_flat = positions.view(-1)
+            rot_cos = self.cos[positions_flat].view(batch_size, 1, seq_len, width)
+            rot_sin = self.sin[positions_flat].view(batch_size, 1, seq_len, width)
+
+        # Eq 34 with ordering changed for compatibility.
+        return rot_cos * input + rot_sin * self._rotate(input)
+
+
+class RotMultiheadAttention(MultiheadAttention):
+    def __init__(self, embed_dim, num_heads, kdim=None, vdim=None, dropout=0., bias=True,
+                 add_bias_kv=False, add_zero_attn=False, self_attention=False,
+                 encoder_decoder_attention=False):
+        super().__init__(embed_dim, num_heads, kdim=kdim, vdim=vdim, dropout=dropout, bias=bias,
+                         add_bias_kv=add_bias_kv, add_zero_attn=add_zero_attn, self_attention=self_attention,
+                         encoder_decoder_attention=encoder_decoder_attention)
+        self.rotary_embeds = RotaryEmbeddings(width=embed_dim // num_heads)
+
+    def forward(
+            self,
+            query, key, value,
+            spk_pos_ids_flat=None,
+            key_padding_mask=None,
+            incremental_state=None,
+            need_weights=True,
+            static_kv=False,
+            attn_mask=None,
+            before_softmax=False,
+            need_head_weights=False,
+            enc_dec_attn_constraint_mask=None,
+            reset_attn_weight=None
+    ):
+        """Input shape: Time x Batch x Channel
+
+        Args:
+            key_padding_mask (ByteTensor, optional): mask to exclude
+                keys that are pads, of shape `(batch, src_len)`, where
+                padding elements are indicated by 1s.
+            need_weights (bool, optional): return the attention weights,
+                averaged over heads (default: False).
+            attn_mask (ByteTensor, optional): typically used to
+                implement causal attention, where the mask prevents the
+                attention from looking forward in time (default: None).
+            before_softmax (bool, optional): return the raw attention
+                weights and values before the attention softmax.
+            need_head_weights (bool, optional): return the attention
+                weights for each head. Implies *need_weights*. Default:
+                return the average attention weights over all heads.
+        """
+        if need_head_weights:
+            need_weights = True
+
+        tgt_len, bsz, embed_dim = query.size()
+        assert embed_dim == self.embed_dim
+        assert list(query.size()) == [tgt_len, bsz, embed_dim]
+
+        if incremental_state is not None:
+            saved_state = self._get_input_buffer(incremental_state)
+            if 'prev_key' in saved_state:
+                # previous time steps are cached - no need to recompute
+                # key and value if they are static
+                if static_kv:
+                    assert self.encoder_decoder_attention and not self.self_attention
+                    key = value = None
+        else:
+            saved_state = None
+
+        if self.self_attention:
+            # self-attention
+            q, k, v = self.in_proj_qkv(query)
+        elif self.encoder_decoder_attention:
+            # encoder-decoder attention
+            q = self.in_proj_q(query)
+            if key is None:
+                assert value is None
+                k = v = None
+            else:
+                k = self.in_proj_k(key)
+                v = self.in_proj_v(key)
+        else:
+            q = self.in_proj_q(query)
+            k = self.in_proj_k(key)
+            v = self.in_proj_v(value)
+        q = q * self.scaling
+
+        if self.bias_k is not None:
+            assert self.bias_v is not None
+            k = torch.cat([k, self.bias_k.repeat(1, bsz, 1)])
+            v = torch.cat([v, self.bias_v.repeat(1, bsz, 1)])
+            if attn_mask is not None:
+                attn_mask = torch.cat([attn_mask, attn_mask.new_zeros(attn_mask.size(0), 1)], dim=1)
+            if key_padding_mask is not None:
+                key_padding_mask = torch.cat(
+                    [key_padding_mask, key_padding_mask.new_zeros(key_padding_mask.size(0), 1)], dim=1)
+
+        q = q.contiguous().view(tgt_len, bsz * self.num_heads, self.head_dim).transpose(0, 1)
+        if k is not None:
+            k = k.contiguous().view(-1, bsz * self.num_heads, self.head_dim).transpose(0, 1)
+        if v is not None:
+            v = v.contiguous().view(-1, bsz * self.num_heads, self.head_dim).transpose(0, 1)
+
+        # Apply rot embedding and store incremental_state
+        q = self.rotary_embeds(q[None, :], positions=spk_pos_ids_flat)[0]
+        if saved_state is not None:
+            # saved states are stored with shape (bsz, num_heads, seq_len, head_dim)
+            if 'prev_key' in saved_state:
+                prev_key = saved_state['prev_key'].view(bsz * self.num_heads, -1, self.head_dim)
+                if static_kv:
+                    k = prev_key
+                else:
+                    k = torch.cat((prev_key, k), dim=1)
+            if 'prev_value' in saved_state:
+                prev_value = saved_state['prev_value'].view(bsz * self.num_heads, -1, self.head_dim)
+                if static_kv:
+                    v = prev_value
+                else:
+                    v = torch.cat((prev_value, v), dim=1)
+            saved_state['prev_key'], saved_state['prev_value'] = k.view(bsz, self.num_heads, -1, self.head_dim), v.view(
+                bsz, self.num_heads, -1, self.head_dim)
+            self._set_input_buffer(incremental_state, saved_state)
+        if incremental_state is not None:
+            key_pos = torch.arange(k.shape[-2], device=q.device).unsqueeze(0)
+        else:
+            key_pos = spk_pos_ids_flat
+        k = self.rotary_embeds(k[None, :], positions=key_pos)[0]
+
+        src_len = k.size(1)
+
+        # This is part of a workaround to get around fork/join parallelism
+        # not supporting Optional types.
+        if key_padding_mask is not None and key_padding_mask.shape == torch.Size([]):
+            key_padding_mask = None
+
+        if key_padding_mask is not None:
+            assert key_padding_mask.size(0) == bsz
+            assert key_padding_mask.size(1) == src_len
+
+        if self.add_zero_attn:
+            src_len += 1
+            k = torch.cat([k, k.new_zeros((k.size(0), 1) + k.size()[2:])], dim=1)
+            v = torch.cat([v, v.new_zeros((v.size(0), 1) + v.size()[2:])], dim=1)
+            if attn_mask is not None:
+                attn_mask = torch.cat([attn_mask, attn_mask.new_zeros(attn_mask.size(0), 1)], dim=1)
+            if key_padding_mask is not None:
+                key_padding_mask = torch.cat(
+                    [key_padding_mask, torch.zeros(key_padding_mask.size(0), 1).type_as(key_padding_mask)], dim=1)
+
+        attn_weights = torch.bmm(q, k.transpose(1, 2))
+        attn_weights = self.apply_sparse_mask(attn_weights, tgt_len, src_len, bsz)
+        assert list(attn_weights.size()) == [bsz * self.num_heads, tgt_len, src_len]
+
+        if attn_mask is not None:
+            if len(attn_mask.shape) == 2:
+                attn_mask = attn_mask.unsqueeze(0)
+            elif len(attn_mask.shape) == 3:
+                attn_mask = attn_mask[:, None].repeat([1, self.num_heads, 1, 1]).reshape(
+                    bsz * self.num_heads, tgt_len, src_len)
+            attn_weights = attn_weights + attn_mask
+
+        if enc_dec_attn_constraint_mask is not None:  # bs x head x L_kv
+            attn_weights = attn_weights.view(bsz, self.num_heads, tgt_len, src_len)
+            attn_weights = attn_weights.masked_fill(
+                enc_dec_attn_constraint_mask.unsqueeze(2).bool(),
+                -1e8,
+            )
+            attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len)
+
+        if key_padding_mask is not None:
+            # don't attend to padding symbols
+            attn_weights = attn_weights.view(bsz, self.num_heads, tgt_len, src_len)
+            attn_weights = attn_weights.masked_fill(
+                key_padding_mask.unsqueeze(1).unsqueeze(2),
+                -1e8,
+            )
+            attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len)
+
+        attn_logits = attn_weights.view(bsz, self.num_heads, tgt_len, src_len)
+
+        if before_softmax:
+            return attn_weights, v
+
+        attn_weights_float = softmax(attn_weights, dim=-1)
+        attn_weights = attn_weights_float.type_as(attn_weights)
+        attn_probs = F.dropout(attn_weights_float.type_as(attn_weights), p=self.dropout, training=self.training)
+
+        if reset_attn_weight is not None:
+            if reset_attn_weight:
+                self.last_attn_probs = attn_probs.detach()
+            else:
+                assert self.last_attn_probs is not None
+                attn_probs = self.last_attn_probs
+        attn = torch.bmm(attn_probs, v)
+        assert list(attn.size()) == [bsz * self.num_heads, tgt_len, self.head_dim]
+        attn = attn.transpose(0, 1).contiguous().view(tgt_len, bsz, embed_dim)
+        attn = self.out_proj(attn)
+
+        if need_weights:
+            attn_weights = attn_weights_float.view(bsz, self.num_heads, tgt_len, src_len).transpose(1, 0)
+            if not need_head_weights:
+                # average attention weights over heads
+                attn_weights = attn_weights.mean(dim=0)
+        else:
+            attn_weights = None
+
+        return attn, (attn_weights, attn_logits)
+
+
+class RotMultiheadAttention2(MultiheadAttention):
+    def __init__(self, embed_dim, num_heads, kdim=None, vdim=None, dropout=0., bias=True,
+                 add_bias_kv=False, add_zero_attn=False, self_attention=False,
+                 encoder_decoder_attention=False):
+        super().__init__(embed_dim, num_heads, kdim=kdim, vdim=vdim, dropout=dropout, bias=bias,
+                         add_bias_kv=add_bias_kv, add_zero_attn=add_zero_attn, self_attention=self_attention,
+                         encoder_decoder_attention=encoder_decoder_attention)
+        self.rotary_embeds = RotaryEmbeddings(width=embed_dim // num_heads)
+
+    def forward(
+            self,
+            query, key, value,
+            spk_pos_ids_flat=None,
+            key_padding_mask=None,
+            incremental_state=None,
+            need_weights=True,
+            static_kv=False,
+            attn_mask=None,
+            before_softmax=False,
+            need_head_weights=False,
+            enc_dec_attn_constraint_mask=None,
+            reset_attn_weight=None
+    ):
+        """Input shape: Time x Batch x Channel
+
+        Args:
+            key_padding_mask (ByteTensor, optional): mask to exclude
+                keys that are pads, of shape `(batch, src_len)`, where
+                padding elements are indicated by 1s.
+            need_weights (bool, optional): return the attention weights,
+                averaged over heads (default: False).
+            attn_mask (ByteTensor, optional): typically used to
+                implement causal attention, where the mask prevents the
+                attention from looking forward in time (default: None).
+            before_softmax (bool, optional): return the raw attention
+                weights and values before the attention softmax.
+            need_head_weights (bool, optional): return the attention
+                weights for each head. Implies *need_weights*. Default:
+                return the average attention weights over all heads.
+        """
+        if need_head_weights:
+            need_weights = True
+
+        tgt_len, bsz, embed_dim = query.size()
+        assert embed_dim == self.embed_dim
+        assert list(query.size()) == [tgt_len, bsz, embed_dim]
+
+        if incremental_state is not None:
+            saved_state = self._get_input_buffer(incremental_state)
+            if 'prev_key' in saved_state:
+                # previous time steps are cached - no need to recompute
+                # key and value if they are static
+                if static_kv:
+                    assert self.encoder_decoder_attention and not self.self_attention
+                    key = value = None
+        else:
+            saved_state = None
+
+        if self.self_attention:
+            # self-attention
+            q, k, v = self.in_proj_qkv(query)
+        elif self.encoder_decoder_attention:
+            # encoder-decoder attention
+            q = self.in_proj_q(query)
+            if key is None:
+                assert value is None
+                k = v = None
+            else:
+                k = self.in_proj_k(key)
+                v = self.in_proj_v(key)
+        else:
+            q = self.in_proj_q(query)
+            k = self.in_proj_k(key)
+            v = self.in_proj_v(value)
+
+        if self.bias_k is not None:
+            assert self.bias_v is not None
+            k = torch.cat([k, self.bias_k.repeat(1, bsz, 1)])
+            v = torch.cat([v, self.bias_v.repeat(1, bsz, 1)])
+            if attn_mask is not None:
+                attn_mask = torch.cat([attn_mask, attn_mask.new_zeros(attn_mask.size(0), 1)], dim=1)
+            if key_padding_mask is not None:
+                key_padding_mask = torch.cat(
+                    [key_padding_mask, key_padding_mask.new_zeros(key_padding_mask.size(0), 1)], dim=1)
+
+        q = q.contiguous().view(tgt_len, bsz * self.num_heads, self.head_dim).transpose(0, 1)
+        if k is not None:
+            k = k.contiguous().view(-1, bsz * self.num_heads, self.head_dim).transpose(0, 1)
+        if v is not None:
+            v = v.contiguous().view(-1, bsz * self.num_heads, self.head_dim).transpose(0, 1)
+
+        # Apply rot embedding and store incremental_state
+        q = self.rotary_embeds(q[None, :], positions=spk_pos_ids_flat)[0]
+        if saved_state is not None:
+            # saved states are stored with shape (bsz, num_heads, seq_len, head_dim)
+            if 'prev_key' in saved_state:
+                prev_key = saved_state['prev_key'].view(bsz * self.num_heads, -1, self.head_dim)
+                if static_kv:
+                    k = prev_key
+                else:
+                    k = torch.cat((prev_key, k), dim=1)
+            if 'prev_value' in saved_state:
+                prev_value = saved_state['prev_value'].view(bsz * self.num_heads, -1, self.head_dim)
+                if static_kv:
+                    v = prev_value
+                else:
+                    v = torch.cat((prev_value, v), dim=1)
+            saved_state['prev_key'], saved_state['prev_value'] = k.view(bsz, self.num_heads, -1, self.head_dim), v.view(
+                bsz, self.num_heads, -1, self.head_dim)
+            self._set_input_buffer(incremental_state, saved_state)
+        key_pos = torch.arange(k.shape[-2], device=q.device).unsqueeze(0)
+        k = self.rotary_embeds(k[None, :], positions=key_pos)[0]
+
+        src_len = k.size(1)
+
+        # This is part of a workaround to get around fork/join parallelism
+        # not supporting Optional types.
+        if key_padding_mask is not None and key_padding_mask.shape == torch.Size([]):
+            key_padding_mask = None
+
+        if key_padding_mask is not None:
+            assert key_padding_mask.size(0) == bsz
+            assert key_padding_mask.size(1) == src_len
+
+        if attn_mask is not None:
+            if len(attn_mask.shape) == 2:
+                attn_mask = attn_mask.unsqueeze(0)
+            elif len(attn_mask.shape) == 3:
+                attn_mask = attn_mask[:, None].repeat([1, self.num_heads, 1, 1]).reshape(
+                    bsz * self.num_heads, tgt_len, src_len)
+        attn = torch.nn.functional.scaled_dot_product_attention(
+            q, k, v, attn_mask=attn_mask, dropout_p=0, is_causal=False)
+        assert list(attn.size()) == [bsz * self.num_heads, tgt_len, self.head_dim]
+        attn = attn.transpose(0, 1).contiguous().view(tgt_len, bsz, embed_dim)
+        attn_logits = None
+        attn_weights = None
+        return attn, (attn_weights, attn_logits)
+
+
+class RotDecSALayer(nn.Module):
+    def __init__(self, c, num_heads, dropout, attention_dropout=0.1, relu_dropout=0.1,
+                 kernel_size=9, ffn_hidden_size=1024, act='gelu', post_ln=False):
+        super().__init__()
+        self.c = c
+        self.dropout = dropout
+        self.layer_norm1 = LayerNorm(c)
+        self.self_attn = RotMultiheadAttention(
+            c, num_heads, self_attention=True, dropout=attention_dropout, bias=False
+        )
+        self.layer_norm2 = LayerNorm(c)
+        self.ffn = TransformerFFNLayer(
+            c, ffn_hidden_size, padding='LEFT', kernel_size=kernel_size, dropout=relu_dropout, act=act)
+        self.post_ln = post_ln
+
+    def forward(
+            self,
+            x,
+            encoder_out=None,
+            encoder_padding_mask=None,
+            incremental_state=None,
+            self_attn_mask=None,
+            self_attn_padding_mask=None,
+            attn_out=None,
+            reset_attn_weight=None,
+            spk_pos_ids_flat=None,
+            **kwargs,
+    ):
+        layer_norm_training = kwargs.get('layer_norm_training', None)
+        if layer_norm_training is not None:
+            self.layer_norm1.training = layer_norm_training
+            self.layer_norm2.training = layer_norm_training
+        residual = x
+        if not self.post_ln:
+            x = self.layer_norm1(x)
+
+        x, (attn_weights, _) = self.self_attn(
+            query=x,
+            key=x,
+            value=x,
+            key_padding_mask=self_attn_padding_mask,
+            incremental_state=incremental_state,
+            attn_mask=self_attn_mask,
+            spk_pos_ids_flat=spk_pos_ids_flat
+        )
+        x = F.dropout(x, self.dropout, training=self.training)
+        x = residual + x
+        if self.post_ln:
+            x = self.layer_norm1(x)
+
+        residual = x
+        if not self.post_ln:
+            x = self.layer_norm2(x)
+        x = self.ffn(x, incremental_state=incremental_state)
+        x = F.dropout(x, self.dropout, training=self.training)
+        x = residual + x
+        if self.post_ln:
+            x = self.layer_norm2(x)
+        return x, attn_weights
+
+    def clear_buffer(self, input, encoder_out=None, encoder_padding_mask=None, incremental_state=None):
+        self.encoder_attn.clear_buffer(incremental_state)
+        self.ffn.clear_buffer(incremental_state)
+
+    def set_buffer(self, name, tensor, incremental_state):
+        return set_incremental_state(self, incremental_state, name, tensor)
+
+
+class RotDecSALayer2(RotDecSALayer):
+    def __init__(self, c, num_heads, dropout, attention_dropout=0.1, relu_dropout=0.1, kernel_size=9,
+                 ffn_hidden_size=1024, act='gelu', post_ln=False):
+        super().__init__(c, num_heads, dropout, attention_dropout, relu_dropout, kernel_size, ffn_hidden_size, act,
+                         post_ln)
+        self.self_attn = RotMultiheadAttention2(
+            c, num_heads, self_attention=True, dropout=attention_dropout, bias=False
+        )
+
+
+class RotTransformerDecoderLayer(nn.Module):
+    def __init__(self, hidden_size, dropout, kernel_size=9, num_heads=8, ffn_hidden_size=1024, post_ln=False,
+                 op_version=1):
+        super().__init__()
+        self.hidden_size = hidden_size
+        self.dropout = dropout
+        self.num_heads = num_heads
+        if op_version == 1:
+            self.op = RotDecSALayer(
+                hidden_size, num_heads, dropout=dropout,
+                attention_dropout=0.0, relu_dropout=dropout,
+                kernel_size=kernel_size, ffn_hidden_size=ffn_hidden_size,
+                post_ln=post_ln)
+        else:
+            self.op = RotDecSALayer2(
+                hidden_size, num_heads, dropout=dropout,
+                attention_dropout=0.0, relu_dropout=dropout,
+                kernel_size=kernel_size, ffn_hidden_size=ffn_hidden_size,
+                post_ln=post_ln)
+
+    def forward(self, x, **kwargs):
+        return self.op(x, **kwargs)
+
+    def clear_buffer(self, *args):
+        return self.op.clear_buffer(*args)
+
+    def set_buffer(self, *args):
+        return self.op.set_buffer(*args)
diff --git a/modules/commons/taming_tfm_modules.py b/modules/commons/taming_tfm_modules.py
new file mode 100644
index 0000000000000000000000000000000000000000..79418633fbf06fac1afaa2d794a9ef2af9bdb7b3
--- /dev/null
+++ b/modules/commons/taming_tfm_modules.py
@@ -0,0 +1,366 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+def nonlinearity(x):
+    # swish
+    return x * torch.sigmoid(x)
+
+
+class Normalize(nn.Module):
+    def __init__(self, channels, eps=1e-5):
+        super().__init__()
+        self.channels = channels
+        self.eps = eps
+
+        self.gamma = nn.Parameter(torch.ones(channels))
+        self.beta = nn.Parameter(torch.zeros(channels))
+        self.proj = nn.Linear(channels, channels)
+
+    def forward(self, x):
+        x = x.transpose(1, 2)
+        x = F.layer_norm(x, (self.channels,), self.gamma, self.beta, self.eps)
+        x = self.proj(x)
+        return x.transpose(1, 2)
+
+
+class Upsample(nn.Module):
+    def __init__(self, in_channels, with_conv):
+        super().__init__()
+        self.with_conv = with_conv
+        if self.with_conv:
+            self.conv = torch.nn.Conv1d(in_channels,
+                                        in_channels,
+                                        kernel_size=3,
+                                        stride=1,
+                                        padding=1)
+
+    def forward(self, x):
+        x = torch.nn.functional.interpolate(x, scale_factor=2.0, mode="nearest")
+        if self.with_conv:
+            x = self.conv(x)
+        return x
+
+
+class Downsample(nn.Module):
+    def __init__(self, in_channels, with_conv):
+        super().__init__()
+        self.with_conv = with_conv
+        if self.with_conv:
+            # no asymmetric padding in torch conv, must do it ourselves
+            self.conv = torch.nn.Conv1d(in_channels,
+                                        in_channels,
+                                        kernel_size=4,
+                                        stride=2,
+                                        padding=1)
+
+    def forward(self, x):
+        if self.with_conv:
+            x = self.conv(x)
+        else:
+            x = torch.nn.functional.avg_pool1d(x, kernel_size=2, stride=2)
+        return x
+
+
+class ResnetBlock(nn.Module):
+    def __init__(self, *, in_channels, out_channels=None, conv_shortcut=False,
+                 temb_channels=512):
+        super().__init__()
+        self.in_channels = in_channels
+        out_channels = in_channels if out_channels is None else out_channels
+        self.out_channels = out_channels
+        self.use_conv_shortcut = conv_shortcut
+
+        self.norm1 = Normalize(in_channels)
+        self.conv1 = torch.nn.Conv1d(in_channels,
+                                     out_channels,
+                                     kernel_size=3,
+                                     stride=1,
+                                     padding=1)
+        if temb_channels > 0:
+            self.temb_proj = torch.nn.Linear(temb_channels,
+                                             out_channels)
+        self.norm2 = Normalize(out_channels)
+        self.conv2 = torch.nn.Conv1d(out_channels,
+                                     out_channels,
+                                     kernel_size=3,
+                                     stride=1,
+                                     padding=1)
+        if self.in_channels != self.out_channels:
+            if self.use_conv_shortcut:
+                self.conv_shortcut = torch.nn.Conv1d(in_channels,
+                                                     out_channels,
+                                                     kernel_size=3,
+                                                     stride=1,
+                                                     padding=1)
+            else:
+                self.nin_shortcut = torch.nn.Conv1d(in_channels,
+                                                    out_channels,
+                                                    kernel_size=1,
+                                                    stride=1,
+                                                    padding=0)
+
+    def forward(self, x, _, x_mask):
+        x = x * x_mask
+        h = x
+        h = self.norm1(h) * x_mask
+        h = nonlinearity(h) * x_mask
+        h = self.conv1(h) * x_mask
+
+        h = self.norm2(h) * x_mask
+        h = nonlinearity(h) * x_mask
+        h = self.conv2(h) * x_mask
+
+        if self.in_channels != self.out_channels:
+            if self.use_conv_shortcut:
+                x = self.conv_shortcut(x) * x_mask
+            else:
+                x = self.nin_shortcut(x) * x_mask
+
+        return (x + h) * x_mask
+
+
+class AttnBlock(nn.Module):
+    def __init__(self, in_channels):
+        super().__init__()
+        self.in_channels = in_channels
+
+        self.norm = Normalize(in_channels)
+        self.q = torch.nn.Conv1d(in_channels,
+                                 in_channels,
+                                 kernel_size=1,
+                                 stride=1,
+                                 padding=0)
+        self.k = torch.nn.Conv1d(in_channels,
+                                 in_channels,
+                                 kernel_size=1,
+                                 stride=1,
+                                 padding=0)
+        self.v = torch.nn.Conv1d(in_channels,
+                                 in_channels,
+                                 kernel_size=1,
+                                 stride=1,
+                                 padding=0)
+        self.proj_out = torch.nn.Conv1d(in_channels,
+                                        in_channels,
+                                        kernel_size=1,
+                                        stride=1,
+                                        padding=0)
+
+    def forward(self, x, x_mask):
+        h_ = x * x_mask
+        h_ = self.norm(h_) * x_mask
+        q = self.q(h_) * x_mask
+        k = self.k(h_) * x_mask
+        v = self.v(h_) * x_mask
+
+        # compute attention
+        b, c, h = q.shape
+        w = 1
+        q = q.reshape(b, c, h * w)
+        q = q.permute(0, 2, 1)  # b,hw,c
+        k = k.reshape(b, c, h * w)  # b,c,hw
+        w_ = torch.bmm(q, k)  # b,hw,hw    w[b,i,j]=sum_c q[b,i,c]k[b,c,j]
+        w_ = w_ * (int(c) ** (-0.5))
+        w_ = w_ + ((1 - x_mask) * -1e8) + ((1 - x_mask) * -1e8).transpose(1, 2)
+        w_ = torch.nn.functional.softmax(w_, dim=2)
+
+        # attend to values
+        v = v.reshape(b, c, h * w)
+        w_ = w_.permute(0, 2, 1)  # b,hw,hw (first hw of k, second of q)
+        h_ = torch.bmm(v, w_)  # b, c,hw (hw of q) h_[b,c,j] = sum_i v[b,c,i] w_[b,i,j]
+        h_ = h_.reshape(b, c, h)
+
+        h_ = self.proj_out(h_) * x_mask
+
+        return (x + h_) * x_mask
+
+
+class Encoder(nn.Module):
+    def __init__(self, *, ch, out_ch, ch_mult=(1, 2, 4, 8), num_res_blocks,
+                 resamp_with_conv=False, in_channels):
+        super().__init__()
+        self.ch = ch
+        self.temb_ch = 0
+        self.num_resolutions = len(ch_mult)
+        self.num_res_blocks = num_res_blocks
+        self.in_channels = in_channels
+
+        # downsampling
+        self.conv_in = torch.nn.Conv1d(in_channels,
+                                       self.ch,
+                                       kernel_size=3,
+                                       stride=1,
+                                       padding=1)
+
+        in_ch_mult = (1,) + tuple(ch_mult)
+        self.down = nn.ModuleList()
+        for i_level in range(self.num_resolutions):
+            block = nn.ModuleList()
+            attn = nn.ModuleList()
+            block_in = ch * in_ch_mult[i_level]
+            block_out = ch * ch_mult[i_level]
+            for i_block in range(self.num_res_blocks):
+                block.append(ResnetBlock(in_channels=block_in,
+                                         out_channels=block_out,
+                                         temb_channels=self.temb_ch))
+                block_in = block_out
+                if i_level == self.num_resolutions - 1:
+                    attn.append(AttnBlock(block_in))
+            down = nn.Module()
+            down.block = block
+            down.attn = attn
+            if i_level != self.num_resolutions - 1:
+                down.downsample = Downsample(block_in, resamp_with_conv)
+            self.down.append(down)
+
+        # middle
+        self.mid = nn.Module()
+        self.mid.block_1 = ResnetBlock(in_channels=block_in,
+                                       out_channels=block_in,
+                                       temb_channels=self.temb_ch)
+        self.mid.attn_1 = AttnBlock(block_in)
+        self.mid.block_2 = ResnetBlock(in_channels=block_in,
+                                       out_channels=block_in,
+                                       temb_channels=self.temb_ch)
+
+        # end
+        self.norm_out = Normalize(block_in)
+        self.conv_out = torch.nn.Conv1d(block_in,
+                                        out_ch,
+                                        kernel_size=3,
+                                        stride=1,
+                                        padding=1)
+
+    def forward(self, x, x_mask):
+        if x_mask is None:
+            x_mask = torch.ones_like(x_mask[:, :, :1])
+        x = x.permute(0, 2, 1)
+        x_mask = x_mask.permute(0, 2, 1)
+
+        temb = None
+        # downsampling
+        hs = [self.conv_in(x) * x_mask]
+        for i_level in range(self.num_resolutions):
+            x_mask_ = x_mask[:, :, ::2 ** i_level]
+            for i_block in range(self.num_res_blocks):
+                h = self.down[i_level].block[i_block](hs[-1], temb, x_mask_) * x_mask_
+                if len(self.down[i_level].attn) > 0:
+                    h = self.down[i_level].attn[i_block](h, x_mask_) * x_mask_
+                hs.append(h)
+            if i_level != self.num_resolutions - 1:
+                hs.append(self.down[i_level].downsample(hs[-1]) * x_mask_[:, :, ::2])
+
+        x_mask_ = x_mask[:, :, ::2 ** (self.num_resolutions - 1)]
+        # middle
+        h = hs[-1] * x_mask_
+        h = self.mid.block_1(h, temb, x_mask_) * x_mask_
+        h = self.mid.attn_1(h, x_mask_) * x_mask_
+        h = self.mid.block_2(h, temb, x_mask_) * x_mask_
+
+        # end
+        h = self.norm_out(h) * x_mask_
+        h = nonlinearity(h) * x_mask_
+        h = self.conv_out(h) * x_mask_
+        h = h.permute(0, 2, 1)
+        return h
+
+
+class Decoder(nn.Module):
+    def __init__(self, *, ch, out_ch, ch_mult=(1, 2, 4, 8), num_res_blocks,
+                 resamp_with_conv=True, in_channels, give_pre_end=False):
+        super().__init__()
+        self.ch = ch
+        self.temb_ch = 0
+        self.num_resolutions = len(ch_mult)
+        self.num_res_blocks = num_res_blocks
+        self.in_channels = in_channels
+        self.give_pre_end = give_pre_end
+
+        # compute in_ch_mult, block_in and curr_res at lowest res
+        block_in = ch * ch_mult[self.num_resolutions - 1]
+
+        # z to block_in
+        self.conv_in = torch.nn.Conv1d(in_channels,
+                                       block_in,
+                                       kernel_size=3,
+                                       stride=1,
+                                       padding=1)
+
+        # middle
+        self.mid = nn.Module()
+        self.mid.block_1 = ResnetBlock(in_channels=block_in,
+                                       out_channels=block_in,
+                                       temb_channels=self.temb_ch)
+        self.mid.attn_1 = AttnBlock(block_in)
+        self.mid.block_2 = ResnetBlock(in_channels=block_in,
+                                       out_channels=block_in,
+                                       temb_channels=self.temb_ch)
+
+        # upsampling
+        self.up = nn.ModuleList()
+        for i_level in reversed(range(self.num_resolutions)):
+            block = nn.ModuleList()
+            attn = nn.ModuleList()
+            block_out = ch * ch_mult[i_level]
+            for i_block in range(self.num_res_blocks + 1):
+                block.append(ResnetBlock(in_channels=block_in,
+                                         out_channels=block_out,
+                                         temb_channels=self.temb_ch))
+                block_in = block_out
+                if i_level == self.num_resolutions - 1:
+                    attn.append(AttnBlock(block_in))
+            up = nn.Module()
+            up.block = block
+            up.attn = attn
+            if i_level != 0:
+                up.upsample = Upsample(block_in, resamp_with_conv)
+            self.up.insert(0, up)  # prepend to get consistent order
+
+        # end
+        self.norm_out = Normalize(block_in)
+        self.conv_out = torch.nn.Conv1d(block_in,
+                                        out_ch,
+                                        kernel_size=3,
+                                        stride=1,
+                                        padding=1)
+
+    def forward(self, z, x_mask):
+        if x_mask is None:
+            x_mask = torch.ones_like(z[:, :, :1]).repeat(1, 8, 1)
+        z = z.permute(0, 2, 1)
+        x_mask = x_mask.permute(0, 2, 1)
+
+        # timestep embedding
+        temb = None
+
+        # z to block_in
+        h = self.conv_in(z)
+
+        # middle
+        i_level = self.num_resolutions - 1
+        x_mask_ = x_mask[:, :, ::2 ** i_level]
+        h = self.mid.block_1(h, temb, x_mask_)
+        h = self.mid.attn_1(h, x_mask_)
+        h = self.mid.block_2(h, temb, x_mask_)
+
+        # upsampling
+        for i_level in reversed(range(self.num_resolutions)):
+            x_mask_ = x_mask[:, :, ::2 ** i_level]
+            for i_block in range(self.num_res_blocks + 1):
+                h = self.up[i_level].block[i_block](h, temb, x_mask_)
+                if len(self.up[i_level].attn) > 0:
+                    h = self.up[i_level].attn[i_block](h, x_mask_)
+            if i_level != 0:
+                h = self.up[i_level].upsample(h)
+
+        # end
+        if self.give_pre_end:
+            return h
+
+        h = self.norm_out(h)
+        h = nonlinearity(h)
+        h = self.conv_out(h) * x_mask
+        h = h.permute(0, 2, 1)
+        return h
diff --git a/modules/commons/transformer.py b/modules/commons/transformer.py
new file mode 100644
index 0000000000000000000000000000000000000000..36e09edfb2a124f7cc8913254b167fefec4f5b96
--- /dev/null
+++ b/modules/commons/transformer.py
@@ -0,0 +1,752 @@
+import math
+import torch
+from torch import nn
+from torch.nn import Parameter, Linear
+from modules.commons.layers import LayerNorm, Embedding
+from utils.nn.seq_utils import get_incremental_state, set_incremental_state, softmax, make_positions
+import torch.nn.functional as F
+
+DEFAULT_MAX_SOURCE_POSITIONS = 3000
+DEFAULT_MAX_TARGET_POSITIONS = 3000
+
+
+class SinusoidalPositionalEmbedding(nn.Module):
+    """This module produces sinusoidal positional embeddings of any length.
+
+    Padding symbols are ignored.
+    """
+
+    def __init__(self, embedding_dim, padding_idx, init_size=1024):
+        super().__init__()
+        self.embedding_dim = embedding_dim
+        self.padding_idx = padding_idx
+        self.weights = SinusoidalPositionalEmbedding.get_embedding(
+            init_size,
+            embedding_dim,
+            padding_idx,
+        )
+        self.register_buffer('_float_tensor', torch.FloatTensor(1))
+
+    @staticmethod
+    def get_embedding(num_embeddings, embedding_dim, padding_idx=None):
+        """Build sinusoidal embeddings.
+
+        This matches the implementation in tensor2tensor, but differs slightly
+        from the description in Section 3.5 of "Attention Is All You Need".
+        """
+        half_dim = embedding_dim // 2
+        emb = math.log(10000) / (half_dim - 1)
+        emb = torch.exp(torch.arange(half_dim, dtype=torch.float) * -emb)
+        emb = torch.arange(num_embeddings, dtype=torch.float).unsqueeze(1) * emb.unsqueeze(0)
+        emb = torch.cat([torch.sin(emb), torch.cos(emb)], dim=1).view(num_embeddings, -1)
+        if embedding_dim % 2 == 1:
+            # zero pad
+            emb = torch.cat([emb, torch.zeros(num_embeddings, 1)], dim=1)
+        if padding_idx is not None:
+            emb[padding_idx, :] = 0
+        return emb
+
+    def forward(self, input, incremental_state=None, timestep=None, positions=None, **kwargs):
+        """Input is expected to be of size [bsz x seqlen]."""
+        bsz, seq_len = input.shape[:2]
+        max_pos = self.padding_idx + 1 + seq_len
+        if self.weights is None or max_pos > self.weights.size(0):
+            # recompute/expand embeddings if needed
+            self.weights = SinusoidalPositionalEmbedding.get_embedding(
+                max_pos,
+                self.embedding_dim,
+                self.padding_idx,
+            )
+        self.weights = self.weights.to(self._float_tensor)
+
+        if incremental_state is not None:
+            # positions is the same for every token when decoding a single step
+            pos = timestep.view(-1)[0] + 1 if timestep is not None else seq_len
+            return self.weights[self.padding_idx + pos, :].expand(bsz, 1, -1)
+
+        positions = make_positions(input, self.padding_idx) if positions is None else positions
+        return self.weights.index_select(0, positions.view(-1)).view(bsz, seq_len, -1).detach()
+
+    def max_positions(self):
+        """Maximum number of supported positions."""
+        return int(1e5)  # an arbitrary large number
+
+
+class TransformerFFNLayer(nn.Module):
+    def __init__(self, hidden_size, filter_size, padding="SAME", kernel_size=1, dropout=0., act='gelu'):
+        super().__init__()
+        self.kernel_size = kernel_size
+        self.dropout = dropout
+        self.act = act
+        if padding == 'SAME':
+            self.ffn_1 = nn.Conv1d(hidden_size, filter_size, kernel_size, padding=kernel_size // 2)
+        elif padding == 'LEFT':
+            self.ffn_1 = nn.Sequential(
+                nn.ConstantPad1d((kernel_size - 1, 0), 0.0),
+                nn.Conv1d(hidden_size, filter_size, kernel_size)
+            )
+        self.ffn_2 = Linear(filter_size, hidden_size)
+
+    def forward(self, x, incremental_state=None):
+        # x: T x B x C
+        if incremental_state is not None:
+            saved_state = self._get_input_buffer(incremental_state)
+            if 'prev_input' in saved_state:
+                prev_input = saved_state['prev_input']
+                x = torch.cat((prev_input, x), dim=0)
+            x = x[-self.kernel_size:]
+            saved_state['prev_input'] = x
+            self._set_input_buffer(incremental_state, saved_state)
+
+        x = self.ffn_1(x.permute(1, 2, 0)).permute(2, 0, 1)
+        x = x * self.kernel_size ** -0.5
+
+        if incremental_state is not None:
+            x = x[-1:]
+        if self.act == 'gelu':
+            x = F.gelu(x)
+        if self.act == 'relu':
+            x = F.relu(x)
+        x = F.dropout(x, self.dropout, training=self.training)
+        x = self.ffn_2(x)
+        return x
+
+    def _get_input_buffer(self, incremental_state):
+        return get_incremental_state(
+            self,
+            incremental_state,
+            'f',
+        ) or {}
+
+    def _set_input_buffer(self, incremental_state, buffer):
+        set_incremental_state(
+            self,
+            incremental_state,
+            'f',
+            buffer,
+        )
+
+    def clear_buffer(self, incremental_state):
+        if incremental_state is not None:
+            saved_state = self._get_input_buffer(incremental_state)
+            if 'prev_input' in saved_state:
+                del saved_state['prev_input']
+            self._set_input_buffer(incremental_state, saved_state)
+
+
+class MultiheadAttention(nn.Module):
+    def __init__(self, embed_dim, num_heads, kdim=None, vdim=None, dropout=0., bias=True,
+                 add_bias_kv=False, add_zero_attn=False, self_attention=False,
+                 encoder_decoder_attention=False):
+        super().__init__()
+        self.embed_dim = embed_dim
+        self.kdim = kdim if kdim is not None else embed_dim
+        self.vdim = vdim if vdim is not None else embed_dim
+        self.qkv_same_dim = self.kdim == embed_dim and self.vdim == embed_dim
+
+        self.num_heads = num_heads
+        self.dropout = dropout
+        self.head_dim = embed_dim // num_heads
+        assert self.head_dim * num_heads == self.embed_dim, "embed_dim must be divisible by num_heads"
+        self.scaling = self.head_dim ** -0.5
+
+        self.self_attention = self_attention
+        self.encoder_decoder_attention = encoder_decoder_attention
+
+        assert not self.self_attention or self.qkv_same_dim, 'Self-attention requires query, key and ' \
+                                                             'value to be of the same size'
+
+        if self.qkv_same_dim:
+            self.in_proj_weight = Parameter(torch.Tensor(3 * embed_dim, embed_dim))
+        else:
+            self.k_proj_weight = Parameter(torch.Tensor(embed_dim, self.kdim))
+            self.v_proj_weight = Parameter(torch.Tensor(embed_dim, self.vdim))
+            self.q_proj_weight = Parameter(torch.Tensor(embed_dim, embed_dim))
+
+        if bias:
+            self.in_proj_bias = Parameter(torch.Tensor(3 * embed_dim))
+        else:
+            self.register_parameter('in_proj_bias', None)
+
+        self.out_proj = nn.Linear(embed_dim, embed_dim, bias=bias)
+
+        if add_bias_kv:
+            self.bias_k = Parameter(torch.Tensor(1, 1, embed_dim))
+            self.bias_v = Parameter(torch.Tensor(1, 1, embed_dim))
+        else:
+            self.bias_k = self.bias_v = None
+
+        self.add_zero_attn = add_zero_attn
+
+        self.reset_parameters()
+
+        self.enable_torch_version = False
+        self.last_attn_probs = None
+
+    def reset_parameters(self):
+        if self.qkv_same_dim:
+            nn.init.xavier_uniform_(self.in_proj_weight)
+        else:
+            nn.init.xavier_uniform_(self.k_proj_weight)
+            nn.init.xavier_uniform_(self.v_proj_weight)
+            nn.init.xavier_uniform_(self.q_proj_weight)
+
+        nn.init.xavier_uniform_(self.out_proj.weight)
+        if self.in_proj_bias is not None:
+            nn.init.constant_(self.in_proj_bias, 0.)
+            nn.init.constant_(self.out_proj.bias, 0.)
+        if self.bias_k is not None:
+            nn.init.xavier_normal_(self.bias_k)
+        if self.bias_v is not None:
+            nn.init.xavier_normal_(self.bias_v)
+
+    def forward(
+            self,
+            query, key, value,
+            key_padding_mask=None,
+            incremental_state=None,
+            need_weights=True,
+            static_kv=False,
+            attn_mask=None,
+            before_softmax=False,
+            need_head_weights=False,
+            enc_dec_attn_constraint_mask=None,
+            reset_attn_weight=None
+    ):
+        """Input shape: Time x Batch x Channel
+
+        Args:
+            key_padding_mask (ByteTensor, optional): mask to exclude
+                keys that are pads, of shape `(batch, src_len)`, where
+                padding elements are indicated by 1s.
+            need_weights (bool, optional): return the attention weights,
+                averaged over heads (default: False).
+            attn_mask (ByteTensor, optional): typically used to
+                implement causal attention, where the mask prevents the
+                attention from looking forward in time (default: None).
+            before_softmax (bool, optional): return the raw attention
+                weights and values before the attention softmax.
+            need_head_weights (bool, optional): return the attention
+                weights for each head. Implies *need_weights*. Default:
+                return the average attention weights over all heads.
+        """
+        if need_head_weights:
+            need_weights = True
+
+        tgt_len, bsz, embed_dim = query.size()
+        assert embed_dim == self.embed_dim
+        assert list(query.size()) == [tgt_len, bsz, embed_dim]
+
+        if self.enable_torch_version and incremental_state is None and not static_kv and reset_attn_weight is None:
+            if self.qkv_same_dim:
+                return F.multi_head_attention_forward(query, key, value,
+                                                      self.embed_dim, self.num_heads,
+                                                      self.in_proj_weight,
+                                                      self.in_proj_bias, self.bias_k, self.bias_v,
+                                                      self.add_zero_attn, self.dropout,
+                                                      self.out_proj.weight, self.out_proj.bias,
+                                                      self.training, key_padding_mask, need_weights,
+                                                      attn_mask)
+            else:
+                return F.multi_head_attention_forward(query, key, value,
+                                                      self.embed_dim, self.num_heads,
+                                                      torch.empty([0]),
+                                                      self.in_proj_bias, self.bias_k, self.bias_v,
+                                                      self.add_zero_attn, self.dropout,
+                                                      self.out_proj.weight, self.out_proj.bias,
+                                                      self.training, key_padding_mask, need_weights,
+                                                      attn_mask, use_separate_proj_weight=True,
+                                                      q_proj_weight=self.q_proj_weight,
+                                                      k_proj_weight=self.k_proj_weight,
+                                                      v_proj_weight=self.v_proj_weight)
+
+        if incremental_state is not None:
+            saved_state = self._get_input_buffer(incremental_state)
+            if 'prev_key' in saved_state:
+                # previous time steps are cached - no need to recompute
+                # key and value if they are static
+                if static_kv:
+                    assert self.encoder_decoder_attention and not self.self_attention
+                    key = value = None
+        else:
+            saved_state = None
+
+        if self.self_attention:
+            # self-attention
+            q, k, v = self.in_proj_qkv(query)
+        elif self.encoder_decoder_attention:
+            # encoder-decoder attention
+            q = self.in_proj_q(query)
+            if key is None:
+                assert value is None
+                k = v = None
+            else:
+                k = self.in_proj_k(key)
+                v = self.in_proj_v(key)
+
+        else:
+            q = self.in_proj_q(query)
+            k = self.in_proj_k(key)
+            v = self.in_proj_v(value)
+        q = q * self.scaling
+
+        if self.bias_k is not None:
+            assert self.bias_v is not None
+            k = torch.cat([k, self.bias_k.repeat(1, bsz, 1)])
+            v = torch.cat([v, self.bias_v.repeat(1, bsz, 1)])
+            if attn_mask is not None:
+                attn_mask = torch.cat([attn_mask, attn_mask.new_zeros(attn_mask.size(0), 1)], dim=1)
+            if key_padding_mask is not None:
+                key_padding_mask = torch.cat(
+                    [key_padding_mask, key_padding_mask.new_zeros(key_padding_mask.size(0), 1)], dim=1)
+
+        q = q.contiguous().view(tgt_len, bsz * self.num_heads, self.head_dim).transpose(0, 1)
+        if k is not None:
+            k = k.contiguous().view(-1, bsz * self.num_heads, self.head_dim).transpose(0, 1)
+        if v is not None:
+            v = v.contiguous().view(-1, bsz * self.num_heads, self.head_dim).transpose(0, 1)
+
+        if saved_state is not None:
+            # saved states are stored with shape (bsz, num_heads, seq_len, head_dim)
+            if 'prev_key' in saved_state:
+                prev_key = saved_state['prev_key'].view(bsz * self.num_heads, -1, self.head_dim)
+                if static_kv:
+                    k = prev_key
+                else:
+                    k = torch.cat((prev_key, k), dim=1)
+            if 'prev_value' in saved_state:
+                prev_value = saved_state['prev_value'].view(bsz * self.num_heads, -1, self.head_dim)
+                if static_kv:
+                    v = prev_value
+                else:
+                    v = torch.cat((prev_value, v), dim=1)
+            if 'prev_key_padding_mask' in saved_state and saved_state['prev_key_padding_mask'] is not None:
+                prev_key_padding_mask = saved_state['prev_key_padding_mask']
+                if static_kv:
+                    key_padding_mask = prev_key_padding_mask
+                else:
+                    key_padding_mask = torch.cat((prev_key_padding_mask, key_padding_mask), dim=1)
+
+            saved_state['prev_key'] = k.view(bsz, self.num_heads, -1, self.head_dim)
+            saved_state['prev_value'] = v.view(bsz, self.num_heads, -1, self.head_dim)
+            saved_state['prev_key_padding_mask'] = key_padding_mask
+
+            self._set_input_buffer(incremental_state, saved_state)
+
+        src_len = k.size(1)
+
+        # This is part of a workaround to get around fork/join parallelism
+        # not supporting Optional types.
+        if key_padding_mask is not None and key_padding_mask.shape == torch.Size([]):
+            key_padding_mask = None
+
+        if key_padding_mask is not None:
+            assert key_padding_mask.size(0) == bsz
+            assert key_padding_mask.size(1) == src_len
+
+        if self.add_zero_attn:
+            src_len += 1
+            k = torch.cat([k, k.new_zeros((k.size(0), 1) + k.size()[2:])], dim=1)
+            v = torch.cat([v, v.new_zeros((v.size(0), 1) + v.size()[2:])], dim=1)
+            if attn_mask is not None:
+                attn_mask = torch.cat([attn_mask, attn_mask.new_zeros(attn_mask.size(0), 1)], dim=1)
+            if key_padding_mask is not None:
+                key_padding_mask = torch.cat(
+                    [key_padding_mask, torch.zeros(key_padding_mask.size(0), 1).type_as(key_padding_mask)], dim=1)
+
+        attn_weights = torch.bmm(q, k.transpose(1, 2))
+        attn_weights = self.apply_sparse_mask(attn_weights, tgt_len, src_len, bsz)
+
+        assert list(attn_weights.size()) == [bsz * self.num_heads, tgt_len, src_len]
+
+        if attn_mask is not None:
+            if len(attn_mask.shape) == 2:
+                attn_mask = attn_mask.unsqueeze(0)
+            elif len(attn_mask.shape) == 3:
+                attn_mask = attn_mask[:, None].repeat([1, self.num_heads, 1, 1]).reshape(
+                    bsz * self.num_heads, tgt_len, src_len)
+            attn_weights = attn_weights + attn_mask
+
+        if enc_dec_attn_constraint_mask is not None:  # bs x head x L_kv
+            attn_weights = attn_weights.view(bsz, self.num_heads, tgt_len, src_len)
+            attn_weights = attn_weights.masked_fill(
+                enc_dec_attn_constraint_mask.unsqueeze(2).bool(),
+                -1e8,
+            )
+            attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len)
+
+        if key_padding_mask is not None:
+            # don't attend to padding symbols
+            attn_weights = attn_weights.view(bsz, self.num_heads, tgt_len, src_len)
+            attn_weights = attn_weights.masked_fill(
+                key_padding_mask.unsqueeze(1).unsqueeze(2),
+                -1e8,
+            )
+            attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len)
+
+        attn_logits = attn_weights.view(bsz, self.num_heads, tgt_len, src_len)
+
+        if before_softmax:
+            return attn_weights, v
+
+        attn_weights_float = softmax(attn_weights, dim=-1)
+        attn_weights = attn_weights_float.type_as(attn_weights)
+        attn_probs = F.dropout(attn_weights_float.type_as(attn_weights), p=self.dropout, training=self.training)
+
+        if reset_attn_weight is not None:
+            if reset_attn_weight:
+                self.last_attn_probs = attn_probs.detach()
+            else:
+                assert self.last_attn_probs is not None
+                attn_probs = self.last_attn_probs
+        attn = torch.bmm(attn_probs, v)
+        assert list(attn.size()) == [bsz * self.num_heads, tgt_len, self.head_dim]
+        attn = attn.transpose(0, 1).contiguous().view(tgt_len, bsz, embed_dim)
+        attn = self.out_proj(attn)
+
+        if need_weights:
+            attn_weights = attn_weights_float.view(bsz, self.num_heads, tgt_len, src_len).transpose(1, 0)
+            if not need_head_weights:
+                # average attention weights over heads
+                attn_weights = attn_weights.mean(dim=0)
+        else:
+            attn_weights = None
+
+        return attn, (attn_weights, attn_logits)
+
+    def in_proj_qkv(self, query):
+        return self._in_proj(query).chunk(3, dim=-1)
+
+    def in_proj_q(self, query):
+        if self.qkv_same_dim:
+            return self._in_proj(query, end=self.embed_dim)
+        else:
+            bias = self.in_proj_bias
+            if bias is not None:
+                bias = bias[:self.embed_dim]
+            return F.linear(query, self.q_proj_weight, bias)
+
+    def in_proj_k(self, key):
+        if self.qkv_same_dim:
+            return self._in_proj(key, start=self.embed_dim, end=2 * self.embed_dim)
+        else:
+            weight = self.k_proj_weight
+            bias = self.in_proj_bias
+            if bias is not None:
+                bias = bias[self.embed_dim:2 * self.embed_dim]
+            return F.linear(key, weight, bias)
+
+    def in_proj_v(self, value):
+        if self.qkv_same_dim:
+            return self._in_proj(value, start=2 * self.embed_dim)
+        else:
+            weight = self.v_proj_weight
+            bias = self.in_proj_bias
+            if bias is not None:
+                bias = bias[2 * self.embed_dim:]
+            return F.linear(value, weight, bias)
+
+    def _in_proj(self, input, start=0, end=None):
+        weight = self.in_proj_weight
+        bias = self.in_proj_bias
+        weight = weight[start:end, :]
+        if bias is not None:
+            bias = bias[start:end]
+        return F.linear(input, weight, bias)
+
+    def _get_input_buffer(self, incremental_state):
+        return get_incremental_state(
+            self,
+            incremental_state,
+            'attn_state',
+        ) or {}
+
+    def _set_input_buffer(self, incremental_state, buffer):
+        set_incremental_state(
+            self,
+            incremental_state,
+            'attn_state',
+            buffer,
+        )
+
+    def apply_sparse_mask(self, attn_weights, tgt_len, src_len, bsz):
+        return attn_weights
+
+    def clear_buffer(self, incremental_state=None):
+        if incremental_state is not None:
+            saved_state = self._get_input_buffer(incremental_state)
+            if 'prev_key' in saved_state:
+                del saved_state['prev_key']
+            if 'prev_value' in saved_state:
+                del saved_state['prev_value']
+            self._set_input_buffer(incremental_state, saved_state)
+
+
+class EncSALayer(nn.Module):
+    def __init__(self, c, num_heads, dropout, attention_dropout=0.1,
+                 relu_dropout=0.1, kernel_size=9, padding='SAME', act='gelu',
+                 ffn_hidden_size=1024):
+        super().__init__()
+        self.c = c
+        self.dropout = dropout
+        self.num_heads = num_heads
+        if num_heads > 0:
+            self.layer_norm1 = LayerNorm(c)
+            self.self_attn = MultiheadAttention(
+                self.c, num_heads, self_attention=True, dropout=attention_dropout, bias=False)
+        self.layer_norm2 = LayerNorm(c)
+        self.ffn = TransformerFFNLayer(
+            c, ffn_hidden_size, kernel_size=kernel_size, dropout=relu_dropout, padding=padding, act=act)
+
+    def forward(self, x, encoder_padding_mask=None, **kwargs):
+        layer_norm_training = kwargs.get('layer_norm_training', None)
+        if layer_norm_training is not None:
+            self.layer_norm1.training = layer_norm_training
+            self.layer_norm2.training = layer_norm_training
+        if self.num_heads > 0:
+            residual = x
+            x = self.layer_norm1(x)
+            x, _, = self.self_attn(
+                query=x,
+                key=x,
+                value=x,
+                key_padding_mask=encoder_padding_mask
+            )
+            x = F.dropout(x, self.dropout, training=self.training)
+            x = residual + x
+            x = x * (1 - encoder_padding_mask.float()).transpose(0, 1)[..., None]
+
+        residual = x
+        x = self.layer_norm2(x)
+        x = self.ffn(x)
+        x = F.dropout(x, self.dropout, training=self.training)
+        x = residual + x
+        x = x * (1 - encoder_padding_mask.float()).transpose(0, 1)[..., None]
+        return x
+
+
+class DecSALayer(nn.Module):
+    def __init__(self, c, num_heads, dropout, attention_dropout=0.1, relu_dropout=0.1,
+                 kernel_size=9, ffn_hidden_size=1024, act='gelu', post_ln=False):
+        super().__init__()
+        self.c = c
+        self.dropout = dropout
+        self.layer_norm1 = LayerNorm(c)
+        self.self_attn = MultiheadAttention(
+            c, num_heads, self_attention=True, dropout=attention_dropout, bias=False
+        )
+        self.layer_norm2 = LayerNorm(c)
+        self.encoder_attn = MultiheadAttention(
+            c, num_heads, encoder_decoder_attention=True, dropout=attention_dropout, bias=False,
+        )
+        self.layer_norm3 = LayerNorm(c)
+        self.ffn = TransformerFFNLayer(
+            c, ffn_hidden_size, padding='LEFT', kernel_size=kernel_size, dropout=relu_dropout, act=act)
+        self.post_ln = post_ln
+
+    def forward(
+            self,
+            x,
+            encoder_out=None,
+            encoder_padding_mask=None,
+            incremental_state=None,
+            self_attn_mask=None,
+            self_attn_padding_mask=None,
+            attn_out=None,
+            reset_attn_weight=None,
+            **kwargs,
+    ):
+        layer_norm_training = kwargs.get('layer_norm_training', None)
+        if layer_norm_training is not None:
+            self.layer_norm1.training = layer_norm_training
+            self.layer_norm2.training = layer_norm_training
+            self.layer_norm3.training = layer_norm_training
+        residual = x
+        if not self.post_ln:
+            x = self.layer_norm1(x)
+        x, _ = self.self_attn(
+            query=x,
+            key=x,
+            value=x,
+            key_padding_mask=self_attn_padding_mask,
+            incremental_state=incremental_state,
+            attn_mask=self_attn_mask
+        )
+        x = F.dropout(x, self.dropout, training=self.training)
+        x = residual + x
+        if self.post_ln:
+            x = self.layer_norm1(x)
+
+        attn_logits = None
+        if encoder_out is not None or attn_out is not None:
+            residual = x
+            if not self.post_ln:
+                x = self.layer_norm2(x)
+        if encoder_out is not None:
+            x, attn = self.encoder_attn(
+                query=x,
+                key=encoder_out,
+                value=encoder_out,
+                key_padding_mask=encoder_padding_mask,
+                incremental_state=incremental_state,
+                static_kv=True,
+                enc_dec_attn_constraint_mask=get_incremental_state(self, incremental_state,
+                                                                   'enc_dec_attn_constraint_mask'),
+                reset_attn_weight=reset_attn_weight
+            )
+            attn_logits = attn[1]
+        elif attn_out is not None:
+            x = self.encoder_attn.in_proj_v(attn_out)
+        if encoder_out is not None or attn_out is not None:
+            x = F.dropout(x, self.dropout, training=self.training)
+            x = residual + x
+        if self.post_ln:
+            x = self.layer_norm2(x)
+
+        residual = x
+        if not self.post_ln:
+            x = self.layer_norm3(x)
+        x = self.ffn(x, incremental_state=incremental_state)
+        x = F.dropout(x, self.dropout, training=self.training)
+        x = residual + x
+        if self.post_ln:
+            x = self.layer_norm3(x)
+        return x, attn_logits
+
+    def clear_buffer(self, input, encoder_out=None, encoder_padding_mask=None, incremental_state=None):
+        self.encoder_attn.clear_buffer(incremental_state)
+        self.ffn.clear_buffer(incremental_state)
+
+    def set_buffer(self, name, tensor, incremental_state):
+        return set_incremental_state(self, incremental_state, name, tensor)
+
+
+class TransformerEncoderLayer(nn.Module):
+    def __init__(self, hidden_size, dropout, kernel_size=9, num_heads=2, ffn_hidden_size=1024):
+        super().__init__()
+        self.hidden_size = hidden_size
+        self.dropout = dropout
+        self.num_heads = num_heads
+        self.op = EncSALayer(
+            hidden_size, num_heads, dropout=dropout,
+            attention_dropout=0.0, relu_dropout=dropout,
+            kernel_size=kernel_size, ffn_hidden_size=ffn_hidden_size)
+
+    def forward(self, x, **kwargs):
+        return self.op(x, **kwargs)
+
+
+class TransformerDecoderLayer(nn.Module):
+    def __init__(self, hidden_size, dropout, kernel_size=9, num_heads=2, ffn_hidden_size=1024, post_ln=False):
+        super().__init__()
+        self.hidden_size = hidden_size
+        self.dropout = dropout
+        self.num_heads = num_heads
+        self.op = DecSALayer(
+            hidden_size, num_heads, dropout=dropout,
+            attention_dropout=0.0, relu_dropout=dropout,
+            kernel_size=kernel_size, ffn_hidden_size=ffn_hidden_size,
+            post_ln=post_ln)
+
+    def forward(self, x, **kwargs):
+        return self.op(x, **kwargs)
+
+    def clear_buffer(self, *args):
+        return self.op.clear_buffer(*args)
+
+    def set_buffer(self, *args):
+        return self.op.set_buffer(*args)
+
+
+class FFTBlocks(nn.Module):
+    def __init__(self, hidden_size, num_layers, ffn_kernel_size=9, dropout=0.0,
+                 num_heads=2, use_pos_embed=True, use_last_norm=True,
+                 use_pos_embed_alpha=True, ffn_hidden_size=1024):
+        super().__init__()
+        self.num_layers = num_layers
+        embed_dim = self.hidden_size = hidden_size
+        self.dropout = dropout
+        self.use_pos_embed = use_pos_embed
+        self.use_last_norm = use_last_norm
+        if use_pos_embed:
+            self.max_source_positions = DEFAULT_MAX_TARGET_POSITIONS
+            self.padding_idx = 0
+            self.pos_embed_alpha = nn.Parameter(torch.Tensor([1])) if use_pos_embed_alpha else 1
+            self.embed_positions = SinusoidalPositionalEmbedding(
+                embed_dim, self.padding_idx, init_size=DEFAULT_MAX_TARGET_POSITIONS,
+            )
+
+        self.layers = nn.ModuleList([])
+        self.layers.extend([
+            TransformerEncoderLayer(self.hidden_size, self.dropout,
+                                    kernel_size=ffn_kernel_size, num_heads=num_heads,
+                                    ffn_hidden_size=ffn_hidden_size)
+            for _ in range(self.num_layers)
+        ])
+        if self.use_last_norm:
+            self.layer_norm = nn.LayerNorm(embed_dim)
+        else:
+            self.layer_norm = None
+
+    def forward(self, x, padding_mask=None, attn_mask=None, return_hiddens=False):
+        """
+        :param x: [B, T, C]
+        :param padding_mask: [B, T]
+        :return: [B, T, C] or [L, B, T, C]
+        """
+        padding_mask = x.abs().sum(-1).eq(0).data if padding_mask is None else padding_mask
+        nonpadding_mask_TB = 1 - padding_mask.transpose(0, 1).float()[:, :, None]  # [T, B, 1]
+        if self.use_pos_embed:
+            positions = self.pos_embed_alpha * self.embed_positions(x[..., 0])
+            x = x + positions
+            x = F.dropout(x, p=self.dropout, training=self.training)
+        # B x T x C -> T x B x C
+        x = x.transpose(0, 1) * nonpadding_mask_TB
+        hiddens = []
+        for layer in self.layers:
+            x = layer(x, encoder_padding_mask=padding_mask, attn_mask=attn_mask) * nonpadding_mask_TB
+            hiddens.append(x)
+        if self.use_last_norm:
+            x = self.layer_norm(x) * nonpadding_mask_TB
+        if return_hiddens:
+            x = torch.stack(hiddens, 0)  # [L, T, B, C]
+            x = x.transpose(1, 2)  # [L, B, T, C]
+        else:
+            x = x.transpose(0, 1)  # [B, T, C]
+        return x
+
+
+class FastSpeechEncoder(FFTBlocks):
+    def __init__(self, dict_size, hidden_size=256, num_layers=4, kernel_size=9,
+                 dropout=0.0, num_heads=2, ffn_hidden_size=1024):
+        super().__init__(hidden_size, num_layers, kernel_size, num_heads=num_heads,
+                         use_pos_embed=False, dropout=dropout, ffn_hidden_size=ffn_hidden_size)
+        self.embed_tokens = Embedding(dict_size, hidden_size, 0)
+        self.embed_scale = math.sqrt(hidden_size)
+        self.padding_idx = 0
+        self.embed_positions = SinusoidalPositionalEmbedding(
+            hidden_size, self.padding_idx, init_size=DEFAULT_MAX_TARGET_POSITIONS,
+        )
+
+    def forward(self, txt_tokens, attn_mask=None, other_embeds=0):
+        """
+
+        :param txt_tokens: [B, T]
+        :return: {
+            'encoder_out': [B x T x C]
+        }
+        """
+        encoder_padding_mask = txt_tokens.eq(self.padding_idx).data
+        x = self.forward_embedding(txt_tokens) + other_embeds  # [B, T, H]
+        if self.num_layers > 0:
+            x = super(FastSpeechEncoder, self).forward(x, encoder_padding_mask, attn_mask=attn_mask)
+        return x
+
+    def forward_embedding(self, txt_tokens):
+        # embed tokens and positions
+        x = self.embed_scale * self.embed_tokens(txt_tokens)
+        if self.use_pos_embed:
+            positions = self.embed_positions(txt_tokens)
+            x = x + positions
+        x = F.dropout(x, p=self.dropout, training=self.training)
+        return x
diff --git a/modules/commons/unet1d.py b/modules/commons/unet1d.py
new file mode 100644
index 0000000000000000000000000000000000000000..ed8a9bc82c22058bcc6d9c2ea59868b35c7fc2d5
--- /dev/null
+++ b/modules/commons/unet1d.py
@@ -0,0 +1,202 @@
+from collections import OrderedDict
+
+import torch
+import torch.nn as nn
+
+
+class UNet1d(nn.Module):
+
+    def __init__(self, in_channels=3, out_channels=1, init_features=128, multi=None):
+        super(UNet1d, self).__init__()
+        if multi is None:
+            multi = [1, 2, 2, 4]
+        features = init_features
+        self.encoder1 = UNet1d._block(in_channels, features * multi[0], name="enc1")
+        self.pool1 = nn.MaxPool1d(kernel_size=2, stride=2)
+        self.encoder2 = UNet1d._block(features * multi[0], features * multi[1], name="enc2")
+        self.pool2 = nn.MaxPool1d(kernel_size=2, stride=2)
+        self.encoder3 = UNet1d._block(features * multi[1], features * multi[2], name="enc3")
+        self.pool3 = nn.MaxPool1d(kernel_size=2, stride=2)
+        self.encoder4 = UNet1d._block(features * multi[2], features * multi[3], name="enc4")
+        self.pool4 = nn.MaxPool1d(kernel_size=2, stride=2)
+
+        self.bottleneck = UNet1d._block(features * multi[3], features * multi[3], name="bottleneck")
+
+        self.upconv4 = nn.ConvTranspose1d(
+            features * multi[3], features * multi[3], kernel_size=2, stride=2
+        )
+        self.decoder4 = UNet1d._block((features * multi[3]) * 2, features * multi[3], name="dec4")
+        self.upconv3 = nn.ConvTranspose1d(
+            features * multi[3], features * multi[2], kernel_size=2, stride=2
+        )
+        self.decoder3 = UNet1d._block((features * multi[2]) * 2, features * multi[2], name="dec3")
+        self.upconv2 = nn.ConvTranspose1d(
+            features * multi[2], features * multi[1], kernel_size=2, stride=2
+        )
+        self.decoder2 = UNet1d._block((features * multi[1]) * 2, features * multi[1], name="dec2")
+        self.upconv1 = nn.ConvTranspose1d(
+            features * multi[1], features * multi[0], kernel_size=2, stride=2
+        )
+        self.decoder1 = UNet1d._block(features * multi[0] * 2, features * multi[0], name="dec1")
+
+        self.conv = nn.Conv1d(
+            in_channels=features * multi[0], out_channels=out_channels, kernel_size=1
+        )
+
+    def forward(self, x, nonpadding=None):
+        if nonpadding is None:
+            nonpadding = torch.ones_like(x)[:, :, :1]
+        enc1 = self.encoder1(x.transpose(1, 2)) * nonpadding.transpose(1, 2)
+        enc2 = self.encoder2(self.pool1(enc1))
+        enc3 = self.encoder3(self.pool2(enc2))
+        enc4 = self.encoder4(self.pool3(enc3))
+
+        bottleneck = self.bottleneck(self.pool4(enc4))
+
+        dec4 = self.upconv4(bottleneck)
+        dec4 = torch.cat((dec4, enc4), dim=1)
+        dec4 = self.decoder4(dec4)
+        dec3 = self.upconv3(dec4)
+        dec3 = torch.cat((dec3, enc3), dim=1)
+        dec3 = self.decoder3(dec3)
+        dec2 = self.upconv2(dec3)
+        dec2 = torch.cat((dec2, enc2), dim=1)
+        dec2 = self.decoder2(dec2)
+        dec1 = self.upconv1(dec2)
+        dec1 = torch.cat((dec1, enc1), dim=1)
+        dec1 = self.decoder1(dec1)
+        return self.conv(dec1).transpose(1, 2) * nonpadding
+
+    @staticmethod
+    def _block(in_channels, features, name):
+        return nn.Sequential(
+            OrderedDict(
+                [
+                    (
+                        name + "conv1",
+                        nn.Conv1d(
+                            in_channels=in_channels,
+                            out_channels=features,
+                            kernel_size=5,
+                            padding=2,
+                            bias=False,
+                        ),
+                    ),
+                    (name + "norm1", nn.GroupNorm(4, features)),
+                    (name + "tanh1", nn.Tanh()),
+                    (
+                        name + "conv2",
+                        nn.Conv1d(
+                            in_channels=features,
+                            out_channels=features,
+                            kernel_size=5,
+                            padding=2,
+                            bias=False,
+                        ),
+                    ),
+                    (name + "norm2", nn.GroupNorm(4, features)),
+                    (name + "tanh2", nn.Tanh()),
+                ]
+            )
+        )
+
+
+class UNet2d(nn.Module):
+    def __init__(self, in_channels=3, out_channels=1, init_features=32, multi=None):
+        super(UNet2d, self).__init__()
+
+        features = init_features
+        self.encoder1 = UNet2d._block(in_channels, features * multi[0], name="enc1")
+        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
+        self.encoder2 = UNet2d._block(features * multi[0], features * multi[1], name="enc2")
+        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
+        self.encoder3 = UNet2d._block(features * multi[1], features * multi[2], name="enc3")
+        self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2)
+        self.encoder4 = UNet2d._block(features * multi[2], features * multi[3], name="enc4")
+        self.pool4 = nn.MaxPool2d(kernel_size=2, stride=2)
+
+        self.bottleneck = UNet2d._block(features * multi[3], features * multi[3], name="bottleneck")
+
+        self.upconv4 = nn.ConvTranspose2d(
+            features * multi[3], features * multi[3], kernel_size=2, stride=2
+        )
+        self.decoder4 = UNet2d._block((features * multi[3]) * 2, features * multi[3], name="dec4")
+        self.upconv3 = nn.ConvTranspose2d(
+            features * multi[3], features * multi[2], kernel_size=2, stride=2
+        )
+        self.decoder3 = UNet2d._block((features * multi[2]) * 2, features * multi[2], name="dec3")
+        self.upconv2 = nn.ConvTranspose2d(
+            features * multi[2], features * multi[1], kernel_size=2, stride=2
+        )
+        self.decoder2 = UNet2d._block((features * multi[1]) * 2, features * multi[1], name="dec2")
+        self.upconv1 = nn.ConvTranspose2d(
+            features * multi[1], features * multi[0], kernel_size=2, stride=2
+        )
+        self.decoder1 = UNet2d._block(features * multi[0] * 2, features * multi[0], name="dec1")
+
+        self.conv = nn.Conv2d(
+            in_channels=features * multi[0], out_channels=out_channels, kernel_size=1
+        )
+
+    def forward(self, x):
+        enc1 = self.encoder1(x)
+        enc2 = self.encoder2(self.pool1(enc1))
+        enc3 = self.encoder3(self.pool2(enc2))
+        enc4 = self.encoder4(self.pool3(enc3))
+
+        bottleneck = self.bottleneck(self.pool4(enc4))
+
+        dec4 = self.upconv4(bottleneck)
+        dec4 = torch.cat((dec4, enc4), dim=1)
+        dec4 = self.decoder4(dec4)
+        dec3 = self.upconv3(dec4)
+        dec3 = torch.cat((dec3, enc3), dim=1)
+        dec3 = self.decoder3(dec3)
+        dec2 = self.upconv2(dec3)
+        dec2 = torch.cat((dec2, enc2), dim=1)
+        dec2 = self.decoder2(dec2)
+        dec1 = self.upconv1(dec2)
+        dec1 = torch.cat((dec1, enc1), dim=1)
+        dec1 = self.decoder1(dec1)
+        x = self.conv(dec1)
+        return x
+
+    @staticmethod
+    def _block(in_channels, features, name):
+        return nn.Sequential(
+            OrderedDict(
+                [
+                    (
+                        name + "conv1",
+                        nn.Conv2d(
+                            in_channels=in_channels,
+                            out_channels=features,
+                            kernel_size=3,
+                            padding=1,
+                            bias=False,
+                        ),
+                    ),
+                    (name + "norm1", nn.GroupNorm(4, features)),
+                    (name + "tanh1", nn.Tanh()),
+                    (
+                        name + "conv2",
+                        nn.Conv2d(
+                            in_channels=features,
+                            out_channels=features,
+                            kernel_size=3,
+                            padding=1,
+                            bias=False,
+                        ),
+                    ),
+                    (name + "norm2", nn.GroupNorm(4, features)),
+                    (name + "tanh2", nn.Tanh()),
+                    (name + "conv3", nn.Conv2d(
+                        in_channels=features,
+                        out_channels=features,
+                        kernel_size=1,
+                        padding=0,
+                        bias=True,
+                    )),
+                ]
+            )
+        )
diff --git a/modules/commons/vqvae.py b/modules/commons/vqvae.py
new file mode 100644
index 0000000000000000000000000000000000000000..7bc259ad1ecbc4aca7397f25476f407c43a032a0
--- /dev/null
+++ b/modules/commons/vqvae.py
@@ -0,0 +1,148 @@
+import torch
+import torch.nn as nn
+from scipy.cluster.vq import kmeans2
+from torch.nn import functional as F
+
+
+class VQEmbeddingEMA(nn.Module):
+    def __init__(self, n_embeddings, embedding_dim, commitment_cost=0.25, decay=0.999, epsilon=1e-5,
+                 print_vq_prob=False):
+        super(VQEmbeddingEMA, self).__init__()
+        self.commitment_cost = commitment_cost
+        self.n_embeddings = n_embeddings
+        self.decay = decay
+        self.epsilon = epsilon
+        self.print_vq_prob = print_vq_prob
+        self.register_buffer('data_initialized', torch.zeros(1))
+
+        init_bound = 1 / 512
+        embedding = torch.Tensor(n_embeddings, embedding_dim)
+        embedding.uniform_(-init_bound, init_bound)
+        self.register_buffer("embedding", embedding)
+        self.register_buffer("ema_count", torch.zeros(n_embeddings))
+        self.register_buffer("ema_weight", self.embedding.clone())
+
+    def encode(self, x):
+        B, T, _ = x.shape
+        M, D = self.embedding.size()
+        x_flat = x.detach().reshape(-1, D)
+
+        distances = torch.addmm(torch.sum(self.embedding ** 2, dim=1) +
+                                torch.sum(x_flat ** 2, dim=1, keepdim=True),
+                                x_flat, self.embedding.t(),
+                                alpha=-2.0, beta=1.0)  # [B*T_mel, N_vq]
+        indices = torch.argmin(distances.float(), dim=-1)  # [B*T_mel]
+        quantized = F.embedding(indices, self.embedding)
+        quantized = quantized.view_as(x)
+        return x_flat, quantized, indices
+
+    def forward(self, x):
+        """
+
+        :param x: [B, T, D]
+        :return: [B, T, D]
+        """
+        B, T, _ = x.shape
+        M, D = self.embedding.size()
+        # if self.training and self.data_initialized.item() == 0:
+        #     print('| running kmeans in VQVAE')  # data driven initialization for the embeddings
+        #     x_flat = x.detach().reshape(-1, D)
+        #     rp = torch.randperm(x_flat.size(0))
+        #     kd = kmeans2(x_flat[rp].data.cpu().numpy(), self.n_embeddings, minit='points')
+        #     self.embedding.copy_(torch.from_numpy(kd[0]))
+        #     x_flat, quantized, indices = self.encode(x)
+        #     encodings = F.one_hot(indices, M).float()
+        #     self.ema_weight.copy_(torch.matmul(encodings.t(), x_flat))
+        #     self.ema_count.copy_(torch.sum(encodings, dim=0))
+
+        x_flat, quantized, indices = self.encode(x)
+        encodings = F.one_hot(indices, M).float()
+        indices = indices.reshape(B, T)
+
+        if self.training and self.data_initialized.item() != 0:
+            self.ema_count = self.decay * self.ema_count + (1 - self.decay) * torch.sum(encodings, dim=0)
+
+            n = torch.sum(self.ema_count)
+            self.ema_count = (self.ema_count + self.epsilon) / (n + M * self.epsilon) * n
+
+            dw = torch.matmul(encodings.t(), x_flat)
+            self.ema_weight = self.decay * self.ema_weight + (1 - self.decay) * dw
+
+            self.embedding = self.ema_weight / self.ema_count.unsqueeze(-1)
+
+        if self.training and self.data_initialized.item() == 0:
+            self.data_initialized.fill_(1)
+
+        e_latent_loss = F.mse_loss(x, quantized.detach(), reduction='none')
+        nonpadding = (x.abs().sum(-1) > 0).float()
+        e_latent_loss = (e_latent_loss.mean(-1) * nonpadding).sum() / nonpadding.sum()
+        loss = self.commitment_cost * e_latent_loss
+
+        quantized = x + (quantized - x).detach()
+
+        avg_probs = torch.mean(encodings, dim=0)
+        perplexity = torch.exp(-torch.sum(avg_probs * torch.log(avg_probs + 1e-10)))
+        if self.print_vq_prob:
+            print("| VQ code avg_probs: ", avg_probs)
+        return quantized, loss, indices, perplexity
+
+
+class VQEmbedding(nn.Module):
+    def __init__(self, n_embeddings, embedding_dim, commitment_cost=0.25, lambda_kl=1.0):
+        super(VQEmbedding, self).__init__()
+        self.commitment_cost = commitment_cost
+        self.lambda_kl = lambda_kl
+        self.n_embeddings = n_embeddings
+        embedding = torch.Tensor(n_embeddings, embedding_dim)
+        self.register_buffer("embedding", embedding)
+        self.register_buffer('data_initialized', torch.zeros(1))
+
+    def encode(self, x):
+        B, T, _ = x.shape
+        M, D = self.embedding.size()
+        x_flat = x.detach().reshape(-1, D)
+
+        distances = torch.addmm(torch.sum(self.embedding ** 2, dim=1) +
+                                torch.sum(x_flat ** 2, dim=1, keepdim=True),
+                                x_flat, self.embedding.t(),
+                                alpha=-2.0, beta=1.0)  # [B*T_mel, N_vq]
+        indices = torch.argmin(distances.float(), dim=-1)  # [B*T_mel]
+        quantized = F.embedding(indices, self.embedding)
+        quantized = quantized.view_as(x)
+        return x_flat, quantized, indices
+
+    def forward(self, x):
+        """
+
+        :param x: [B, T, D]
+        :return: [B, T, D]
+        """
+        B, T, _ = x.shape
+        M, D = self.embedding.size()
+
+        x_flat, quantized, indices = self.encode(x)
+        encodings = F.one_hot(indices, M).float()
+        indices = indices.reshape(B, T)
+
+        # DeepMind def does not do this but I find I have to... ;\
+        if self.training and self.data_initialized.item() == 0:
+            print('| running kmeans in VQVAE')  # data driven initialization for the embeddings
+            rp = torch.randperm(x_flat.size(0))
+            kd = kmeans2(x_flat[rp].data.cpu().numpy(), self.n_embeddings, minit='points')
+            self.embedding.copy_(torch.from_numpy(kd[0]))
+            self.data_initialized.fill_(1)
+            # TODO: this won't work in multi-GPU setups
+            x_flat, quantized, indices = self.encode(x)
+            encodings = F.one_hot(indices, M).float()
+            indices = indices.reshape(B, T)
+
+        # vector quantization cost that trains the embedding vectors
+        loss = self.commitment_cost * (x.detach() - quantized).pow(2).mean() + \
+               (quantized - x.detach()).pow(2).mean()
+        loss *= self.lambda_kl
+
+        quantized = x + (quantized - x).detach()
+
+        avg_probs = torch.mean(encodings, dim=0)
+        perplexity = torch.exp(-torch.sum(avg_probs * torch.log(avg_probs + 1e-10)))
+        return quantized, loss, indices, perplexity
diff --git a/modules/commons/vqvae_cvq.py b/modules/commons/vqvae_cvq.py
new file mode 100644
index 0000000000000000000000000000000000000000..082039d3566b1b618d9bb54878122ab48de6cdbc
--- /dev/null
+++ b/modules/commons/vqvae_cvq.py
@@ -0,0 +1,190 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import numpy as np
+from torch import einsum
+from einops import rearrange
+import torch.distributed as dist
+
+from utils.commons.hparams import hparams
+
+
+class ClusteringVectorQuantiser(nn.Module):
+    """
+    Improved version over vector quantiser, with the dynamic initialisation
+    for these unoptimised "dead" points.
+    num_embed: number of codebook entry
+    embed_dim: dimensionality of codebook entry
+    beta: weight for the commitment loss
+    distance: distance for looking up the closest code
+    anchor: anchor sampled methods
+    first_batch: if true, the offline version of our model
+    contras_loss: if true, use the contras_loss to further improve the performance
+    """
+    def __init__(self, num_embed=1024, embed_dim=512, beta=0.25, distance='l2', 
+                 anchor='closest', first_batch=False, contras_loss=True):
+        super().__init__()
+
+        self.num_embed = num_embed
+        self.embed_dim = embed_dim
+        self.beta = beta
+        self.distance = distance
+        self.anchor = anchor
+        self.first_batch = first_batch
+        self.contras_loss = contras_loss
+        self.decay = 0.99
+        self.init = False
+
+        self.pool = FeaturePool(self.num_embed, self.embed_dim)
+        self.embedding = nn.Embedding(self.num_embed, self.embed_dim)
+        self.embedding.weight.data.uniform_(-1.0 / self.num_embed, 1.0 / self.num_embed)
+        self.register_buffer("embed_prob", torch.zeros(self.num_embed))
+
+    
+    def forward(self, z, mask=None, temp=None, rescale_logits=False, return_logits=False):
+        if mask is not None:
+            assert mask.shape[:2] == z.shape[:2], (mask.shape, z.shape)
+            assert mask.shape[-1] == 1, (mask.shape,)
+            z = z * mask
+        assert temp is None or temp == 1.0, "Only for interface compatible with Gumbel"
+        assert rescale_logits == False, "Only for interface compatible with Gumbel"
+        assert return_logits == False, "Only for interface compatible with Gumbel"
+        # reshape z -> (batch, height, width, channel) and flatten
+        # z = rearrange(z, 'b c h w -> b h w c').contiguous()
+        assert z.shape[-1] == self.embed_dim
+        z_flattened = z.view(-1, self.embed_dim)
+
+        # clculate the distance
+        if self.distance == 'l2':
+            # l2 distances from z to embeddings e_j (z - e)^2 = z^2 + e^2 - 2 e * z
+            d = - torch.sum(z_flattened.detach() ** 2, dim=1, keepdim=True) - \
+                torch.sum(self.embedding.weight ** 2, dim=1) + \
+                2 * torch.einsum('bd, dn-> bn', z_flattened.detach(), rearrange(self.embedding.weight, 'n d-> d n'))
+        elif self.distance == 'cos':
+            # cosine distances from z to embeddings e_j 
+            normed_z_flattened = F.normalize(z_flattened, dim=1).detach()
+            normed_codebook = F.normalize(self.embedding.weight, dim=1)
+            d = torch.einsum('bd,dn->bn', normed_z_flattened, rearrange(normed_codebook, 'n d -> d n'))
+
+        # encoding
+        sort_distance, indices = d.sort(dim=1)
+        # look up the closest point for the indices
+        encoding_indices = indices[:,-1]
+        encodings = torch.zeros(encoding_indices.unsqueeze(1).shape[0], self.num_embed, device=z.device)
+        encodings.scatter_(1, encoding_indices.unsqueeze(1), 1)
+
+        # quantise and unflatten
+        z_q = torch.matmul(encodings, self.embedding.weight).view(z.shape)
+        # compute loss for embedding
+        loss = self.beta * (z_q.detach() - z) ** 2 + (z_q - z.detach()) ** 2
+        if mask is not None:
+            loss = (loss * mask).sum() / mask.sum() / self.embed_dim
+        else:
+            loss = loss.mean()
+        # loss = self.beta * torch.mean((z_q.detach()-z)**2) + torch.mean((z_q - z.detach()) ** 2)
+        # preserve gradients
+        z_q = z + (z_q - z).detach()
+        # reshape back to match original input shape
+        # z_q = rearrange(z_q, 'b h w c -> b c h w').contiguous()
+        # count
+        # import pdb
+        # pdb.set_trace()
+        avg_probs = torch.mean(encodings, dim=0)
+        # perplexity = torch.exp(-torch.sum(avg_probs * torch.log(avg_probs + 1e-10)))
+        # min_encodings = encodings
+
+        # online clustered reinitialisation for unoptimized points
+        if self.training:
+            # calculate the average usage of code entries
+            self.embed_prob.mul_(self.decay).add_(avg_probs, alpha= 1 - self.decay)
+            # running average updates
+            if self.anchor in ['closest', 'random', 'probrandom'] and (not self.init):
+                # closest sampling
+                if self.anchor == 'closest':
+                    sort_distance, indices = d.sort(dim=0)
+                    random_feat = z_flattened.detach()[indices[-1,:]]
+                # feature pool based random sampling
+                elif self.anchor == 'random':
+                    random_feat = self.pool.query(z_flattened.detach())
+                # probabilitical based random sampling
+                elif self.anchor == 'probrandom':
+                    norm_distance = F.softmax(d.t(), dim=1)
+                    prob = torch.multinomial(norm_distance, num_samples=1).view(-1)
+                    random_feat = z_flattened.detach()[prob]
+                # decay parameter based on the average usage
+                decay = torch.exp(-(self.embed_prob*self.num_embed*10)/(1-self.decay)-1e-3).unsqueeze(1).repeat(1, self.embed_dim)
+                if hparams.get('reduce_cvq_embed') and dist.is_initialized():
+                    # 确保在所有GPU上同步embedding的权重
+                    dist.all_reduce(random_feat.data, op=dist.ReduceOp.SUM)
+                    random_feat.data /= dist.get_world_size()
+                self.embedding.weight.data = self.embedding.weight.data * (1 - decay) + random_feat * decay
+                if self.first_batch:
+                    self.init = True
+            # contrastive loss
+            if self.contras_loss:
+                sort_distance, indices = d.sort(dim=0)
+                dis_pos = sort_distance[-max(1, int(sort_distance.size(0)/self.num_embed)):,:].mean(dim=0, keepdim=True)
+                dis_neg = sort_distance[:int(sort_distance.size(0)*1/2),:]
+                dis = torch.cat([dis_pos, dis_neg], dim=0).t() / 0.07
+                contra_loss = F.cross_entropy(dis, torch.zeros((dis.size(0),), dtype=torch.long, device=dis.device))
+                loss +=  contra_loss
+
+        encoding_indices = encoding_indices.reshape(z.shape[:-1])
+        return z_q, loss, encoding_indices
+    
+    def get_codebook_entry(self, encoding_indices):
+        # # get quantized latent vectors
+        # print(encoding_indices.shape)
+        # encoding_indices = encoding_indices.view(-1)
+        # encodings = torch.zeros(encoding_indices.unsqueeze(1).shape[0], self.num_embed, device=encoding_indices.device)
+        # print(encodings.shape)
+        # encodings.scatter_(1, encoding_indices.unsqueeze(1), 1)
+        # print(encodings.shape)
+        # # quantise and unflatten
+        # z_q = torch.matmul(encodings, self.embedding.weight).view(encoding_indices.shape[0], -1)
+        z_q = self.embedding(encoding_indices)
+        return z_q
+
+class FeaturePool():
+    """
+    This class implements a feature buffer that stores previously encoded features
+
+    This buffer enables us to initialize the codebook using a history of generated features
+    rather than the ones produced by the latest encoders
+    """
+    def __init__(self, pool_size, dim=64):
+        """
+        Initialize the FeaturePool class
+
+        Parameters:
+            pool_size(int) -- the size of featue buffer
+        """
+        self.pool_size = pool_size
+        if self.pool_size > 0:
+            self.nums_features = 0
+            self.features = (torch.rand((pool_size, dim)) * 2 - 1)/ pool_size
+
+    def query(self, features):
+        """
+        return features from the pool
+        """
+        self.features = self.features.to(features.device)    
+        if self.nums_features < self.pool_size:
+            if features.size(0) > self.pool_size: # if the batch size is large enough, directly update the whole codebook
+                random_feat_id = torch.randint(0, features.size(0), (int(self.pool_size),))
+                self.features = features[random_feat_id]
+                self.nums_features = self.pool_size
+            else:
+                # if the mini-batch is not large nuough, just store it for the next update
+                num = self.nums_features + features.size(0)
+                self.features[self.nums_features:num] = features
+                self.nums_features = num
+        else:
+            if features.size(0) > int(self.pool_size):
+                random_feat_id = torch.randint(0, features.size(0), (int(self.pool_size),))
+                self.features = features[random_feat_id]
+            else:
+                random_id = torch.randperm(self.pool_size)
+                self.features[random_id[:features.size(0)]] = features
+
+        return self.features
\ No newline at end of file
diff --git a/modules/commons/vqvae_fsq.py b/modules/commons/vqvae_fsq.py
new file mode 100644
index 0000000000000000000000000000000000000000..12ade280e20a2f1cb9701e465e7335d45dee286a
--- /dev/null
+++ b/modules/commons/vqvae_fsq.py
@@ -0,0 +1,72 @@
+"""
+Finite Scalar Quantization: VQ-VAE Made Simple - https://arxiv.org/abs/2309.15505
+Code adapted from Jax version in Appendix A.1
+"""
+
+from typing import List
+
+import torch
+import torch.nn as nn
+from torch import Tensor, int32
+
+
+def round_ste(z: Tensor) -> Tensor:
+    """Round with straight through gradients."""
+    zhat = z.round()
+    return z + (zhat - z).detach()
+
+
+class FSQ(nn.Module):
+    def __init__(self, levels: List[int]):
+        super().__init__()
+        _levels = torch.tensor(levels, dtype=int32)
+        self.register_buffer("_levels", _levels)
+
+        _basis = torch.cumprod(torch.tensor([1] + levels[:-1]), dim=0, dtype=int32)
+        self.register_buffer("_basis", _basis)
+
+        self.dim = len(levels)
+        self.n_codes = self._levels.prod().item()
+        implicit_codebook = self.indices_to_codes(torch.arange(self.n_codes))
+        self.register_buffer("implicit_codebook", implicit_codebook)
+
+    def forward(self, z: Tensor) -> Tensor:
+        zhat = self.quantize(z)
+        indices = self.codes_to_indices(zhat)
+        return zhat, indices
+
+    def bound(self, z: Tensor, eps: float = 1e-3) -> Tensor:
+        """Bound `z`, an array of shape (..., d)."""
+        half_l = (self._levels - 1) * (1 - eps) / 2
+        offset = torch.where(self._levels % 2 == 0, 0.5, 0.0)
+        shift = (offset / half_l).tan()
+        return (z + shift).tanh() * half_l - offset
+
+    def quantize(self, z: Tensor) -> Tensor:
+        """Quantizes z, returns quantized zhat, same shape as z."""
+        quantized = round_ste(self.bound(z))
+        half_width = self._levels // 2  # Renormalize to [-1, 1].
+        return quantized / half_width
+
+    def _scale_and_shift(self, zhat_normalized: Tensor) -> Tensor:
+        half_width = self._levels // 2
+        return (zhat_normalized * half_width) + half_width
+
+    def _scale_and_shift_inverse(self, zhat: Tensor) -> Tensor:
+        half_width = self._levels // 2
+        return (zhat - half_width) / half_width
+
+    def codes_to_indices(self, zhat: Tensor) -> Tensor:
+        """Converts a `code` to an index in the codebook."""
+        assert zhat.shape[-1] == self.dim
+        zhat = self._scale_and_shift(zhat)
+        return (zhat * self._basis).sum(dim=-1).to(int32)
+
+    def indices_to_codes(self, indices: Tensor) -> Tensor:
+        """Inverse of `codes_to_indices`."""
+        indices = indices.unsqueeze(-1)
+        codes_non_centered = (indices // self._basis) % self._levels
+        return self._scale_and_shift_inverse(codes_non_centered)
+
+    def get_codebook_entry(self, encoding_indices):
+        return self.indices_to_codes(encoding_indices)
diff --git a/modules/commons/vqvae_lfq.py b/modules/commons/vqvae_lfq.py
new file mode 100644
index 0000000000000000000000000000000000000000..ff9b0bf4837caa7bc6944952e02d4ce8e495f0bc
--- /dev/null
+++ b/modules/commons/vqvae_lfq.py
@@ -0,0 +1,276 @@
+"""
+Lookup Free Quantization
+Proposed in https://arxiv.org/abs/2310.05737
+
+basically a 2-level FSQ (Finite Scalar Quantization) with entropy loss
+https://arxiv.org/abs/2309.15505
+"""
+
+from math import log2, ceil
+from collections import namedtuple
+
+import torch
+from torch import nn, Tensor, einsum
+import torch.nn.functional as F
+from torch.nn import Module
+
+from einops import rearrange, reduce, pack, unpack
+
+# constants
+
+# Return = namedtuple('Return', ['quantized', 'indices', 'entropy_aux_loss'])
+
+LossBreakdown = namedtuple('LossBreakdown', ['per_sample_entropy', 'batch_entropy', 'commitment'])
+
+# helper functions
+
+def exists(v):
+    return v is not None
+
+def default(*args):
+    for arg in args:
+        if exists(arg):
+            return arg() if callable(arg) else arg
+    return None
+
+def pack_one(t, pattern):
+    return pack([t], pattern)
+
+def unpack_one(t, ps, pattern):
+    return unpack(t, ps, pattern)[0]
+
+# distance
+
+def euclidean_distance_squared(x, y):
+    x2 = reduce(x ** 2, '... n d -> ... n', 'sum')
+    y2 = reduce(y ** 2, 'n d -> n', 'sum')
+    xy = einsum('... i d, j d -> ... i j', x, y) * -2
+    return rearrange(x2, '... i -> ... i 1') + y2 + xy
+
+# entropy
+
+def log(t, eps = 1e-20):
+    return t.clamp(min = eps).log()
+
+def entropy(prob):
+    return -prob * log(prob)
+
+# class
+
+class LFQ(Module):
+    def __init__(
+        self,
+        *,
+        dim = None,
+        codebook_size = None,
+        entropy_loss_weight = 0.1,
+        commitment_loss_weight = 1.,
+        diversity_gamma = 2.5,
+        straight_through_activation = nn.Identity(),
+        num_codebooks = 1,
+        keep_num_codebooks_dim = None,
+        codebook_scale = 1.  # for residual LFQ, codebook scaled down by 2x at each layer
+    ):
+        super().__init__()
+
+        # some assert validations
+
+        assert exists(dim) or exists(codebook_size), 'either dim or codebook_size must be specified for LFQ'
+        assert not exists(codebook_size) or log2(codebook_size).is_integer(), f'your codebook size must be a power of 2 for lookup free quantization (suggested {2 ** ceil(log2(codebook_size))})'
+
+        codebook_size = default(codebook_size, lambda: 2 ** dim)
+        codebook_dim = int(log2(codebook_size))
+
+        codebook_dims = codebook_dim * num_codebooks
+        dim = default(dim, codebook_dims)
+
+        self.project_in = nn.Linear(dim, codebook_dims) if dim != codebook_dims else nn.Identity()
+        self.project_out = nn.Linear(codebook_dims, dim) if dim != codebook_dims else nn.Identity()
+
+        self.dim = dim
+        self.codebook_dim = codebook_dim
+        self.num_codebooks = num_codebooks
+
+        keep_num_codebooks_dim = default(keep_num_codebooks_dim, num_codebooks > 1)
+        assert not (num_codebooks > 1 and not keep_num_codebooks_dim)
+        self.keep_num_codebooks_dim = keep_num_codebooks_dim
+
+        # straight through activation
+
+        self.activation = straight_through_activation
+
+        # entropy aux loss related weights
+
+        self.diversity_gamma = diversity_gamma
+        self.entropy_loss_weight = entropy_loss_weight
+
+        # codebook scale
+
+        self.codebook_scale = codebook_scale
+
+        # commitment loss
+
+        self.commitment_loss_weight = commitment_loss_weight
+
+        # for no auxiliary loss, during inference
+
+        self.register_buffer('mask', 2 ** torch.arange(codebook_dim - 1, -1, -1))
+        self.register_buffer('zero', torch.tensor(0.), persistent = False)
+
+        # codes
+
+        all_codes = torch.arange(codebook_size)
+        bits = ((all_codes[..., None].int() & self.mask) != 0).float()
+        codebook = self.bits_to_codes(bits)
+
+        self.register_buffer('codebook', codebook, persistent = False)
+
+    def bits_to_codes(self, bits):
+        return bits * self.codebook_scale * 2 - self.codebook_scale
+
+    @property
+    def dtype(self):
+        return self.codebook.dtype
+
+    def indices_to_codes(
+        self,
+        indices,
+        project_out = True
+    ):
+        is_img_or_video = indices.ndim >= (3 + int(self.keep_num_codebooks_dim))
+
+        if not self.keep_num_codebooks_dim:
+            indices = rearrange(indices, '... -> ... 1')
+
+        # indices to codes, which are bits of either -1 or 1
+
+        bits = ((indices[..., None].int() & self.mask) != 0).to(self.dtype)
+
+        codes = self.bits_to_codes(bits)
+
+        codes = rearrange(codes, '... c d -> ... (c d)')
+
+        # whether to project codes out to original dimensions
+        # if the input feature dimensions were not log2(codebook size)
+
+        if project_out:
+            codes = self.project_out(codes)
+
+        # rearrange codes back to original shape
+
+        if is_img_or_video:
+            codes = rearrange(codes, 'b ... d -> b d ...')
+
+        return codes
+
+    def forward(
+        self,
+        x,
+        mask=None,
+        inv_temperature = 1.,
+        return_loss_breakdown = False
+    ):
+        """
+        einstein notation
+        b - batch
+        n - sequence (or flattened spatial dimensions)
+        d - feature dimension, which is also log2(codebook size)
+        c - number of codebook dim
+        """
+
+        is_img_or_video = x.ndim >= 4
+
+        # standardize image or video into (batch, seq, dimension)
+
+        if is_img_or_video:
+            x = rearrange(x, 'b d ... -> b ... d')
+            x, ps = pack_one(x, 'b * d')
+
+        assert x.shape[-1] == self.dim, f'expected dimension of {self.dim} but received {x.shape[-1]}'
+
+        x = self.project_in(x)
+
+        # split out number of codebooks
+
+        x = rearrange(x, 'b n (c d) -> b n c d', c = self.num_codebooks)
+
+        # quantize by eq 3.
+
+        original_input = x
+
+        codebook_value = torch.ones_like(x) * self.codebook_scale
+        quantized = torch.where(x > 0, codebook_value, -codebook_value)
+
+        # use straight-through gradients with tanh (or custom activation fn) if training
+
+        if self.training:
+            x = self.activation(x)
+            x = x - x.detach() + quantized
+        else:
+            x = quantized
+
+        # calculate indices
+
+        indices = reduce((x > 0).int() * self.mask.int(), 'b n c d -> b n c', 'sum')
+
+        # entropy aux loss
+
+        if self.training:
+            distance = euclidean_distance_squared(original_input, self.codebook)
+
+            prob = (-distance * inv_temperature).softmax(dim = -1)
+
+            per_sample_entropy = entropy(prob).mean()
+
+            avg_prob = reduce(prob, 'b n c d -> b c d', 'mean')
+            codebook_entropy = entropy(avg_prob).mean()
+
+            # 1. entropy will be nudged to be low for each code, to encourage the network to output confident predictions
+            # 2. codebook entropy will be nudged to be high, to encourage all codes to be uniformly used within the batch
+
+            entropy_aux_loss = per_sample_entropy - self.diversity_gamma * codebook_entropy
+        else:
+            # if not training, just return dummy 0
+            entropy_aux_loss = per_sample_entropy = codebook_entropy = self.zero
+
+        # commit loss
+
+        if self.training:
+            commit_loss = F.mse_loss(original_input, quantized.detach())
+        else:
+            commit_loss = self.zero
+
+        # merge back codebook dim
+
+        x = rearrange(x, 'b n c d -> b n (c d)')
+
+        # project out to feature dimension if needed
+
+        x = self.project_out(x)
+
+        # reconstitute image or video dimensions
+
+        if is_img_or_video:
+            x = unpack_one(x, ps, 'b * d')
+            x = rearrange(x, 'b ... d -> b d ...')
+
+            indices = unpack_one(indices, ps, 'b * c')
+
+        # whether to remove single codebook dim
+
+        if not self.keep_num_codebooks_dim:
+            indices = rearrange(indices, '... 1 -> ...')
+
+        # complete aux loss
+
+        aux_loss = entropy_aux_loss * self.entropy_loss_weight + commit_loss * self.commitment_loss_weight
+
+        ret = x, aux_loss, indices
+
+        if not return_loss_breakdown:
+            return ret
+
+        return ret, LossBreakdown(per_sample_entropy, codebook_entropy, commit_loss)
+
+    def get_codebook_entry(self, encoding_indices):
+        return self.indices_to_codes(encoding_indices)
diff --git a/modules/commons/vqvae_lfq_y.py b/modules/commons/vqvae_lfq_y.py
new file mode 100644
index 0000000000000000000000000000000000000000..b34ead5d2481801a6a966b7d560b326e8083e310
--- /dev/null
+++ b/modules/commons/vqvae_lfq_y.py
@@ -0,0 +1,109 @@
+"""
+Lookup Free Quantization
+Proposed in https://arxiv.org/abs/2310.05737
+
+basically a 2-level FSQ (Finite Scalar Quantization) with entropy loss
+https://arxiv.org/abs/2309.15505
+"""
+
+import torch
+from einops import rearrange
+from torch.nn import Module
+
+
+# entropy
+
+def binary_entropy(prob):
+    return -prob * log(prob) - (1 - prob) * log(1 - prob)
+
+
+# tensor helpers
+
+def log(t, eps=1e-20):
+    return t.clamp(min=eps).log()
+
+
+# convert to bit representations and back
+
+def decimal_to_bits(x: torch.LongTensor, bits: int) -> torch.FloatTensor:
+    # [b, ...] {0, 1, ..., max - 1} -> [b, ..., d] {-1, 1}
+    mask = 2 ** torch.arange(bits).to(x)  # [d]
+    bits = ((x.unsqueeze(-1) & mask) != 0).float()  # [b, n, d] {0, 1}
+    return bits * 2 - 1   # {0, 1} -> {-1, 1}
+
+
+def bits_to_decimal(x: torch.FloatTensor) -> torch.LongTensor:
+    # [b, ..., d] {-1, 1} -> [b, ...] {0, 1, ..., max - 1}
+    x = (x > 0).long()   # {-1, 1} -> {0, 1}, [b, ..., d]
+    mask = 2 ** torch.arange(x.size(-1)).to(x)  # [d]
+    dec = (x * mask).sum(-1)  # [b, ...]
+    return dec
+
+
+# class
+
+class LFQY(Module):
+    def __init__(self, dim, entropy_loss_weight=0.1, diversity_gamma=1.0):
+        super().__init__()
+        self.dim = dim
+        self.diversity_gamma = diversity_gamma
+        self.entropy_loss_weight = entropy_loss_weight
+
+    def indices_to_codes(self, indices):
+        codes = decimal_to_bits(indices, self.dim)
+        # codes = rearrange(codes, 'b ... d -> b d ...')
+        return codes
+
+    def forward(self, x, mask=None, inv_temperature=1.):
+        """
+        einstein notation
+        b - batch
+        n - sequence (or flattened spatial dimensions)
+        d - feature dimension, which is also log2(codebook size)
+        """
+        # x = rearrange(x, 'b d ... -> b ... d')
+
+        assert x.shape[-1] == self.dim
+        z = torch.tanh(x / inv_temperature)  # (-1, 1)
+
+        # quantize by eq 3.
+        quantized = torch.sign(x)  # {-1, 1}
+        z = z + (quantized - z).detach()
+
+        # calculate indices
+        indices = bits_to_decimal(z)
+
+        # entropy aux loss
+        if self.training:
+            prob = torch.sigmoid(x / inv_temperature)  # [b, ..., d]
+
+            bit_entropy = binary_entropy(prob).sum(-1).mean()
+            # E[H(q)] = avg(sum(H(q_i)))
+
+            avg_prob = prob.flatten(0, -2).mean(0)  # [b, ..., d] -> [n, d] -> [d]
+            codebook_entropy = binary_entropy(avg_prob).sum()
+            # H(E[q]) = sum(H(avg(q_i)))
+
+            """
+                1. entropy will be nudged to be low for each bit, 
+                so each scalar commits to one latent binary bit or the other.
+                2. codebook entropy will be nudged to be high,
+                to encourage all codes to be uniformly used.
+            """
+
+            entropy_aux_loss = bit_entropy - self.diversity_gamma * codebook_entropy
+        else:
+            # if not training, just return dummy 0
+            entropy_aux_loss = torch.zeros(1).to(z)
+
+        entropy_aux_loss = entropy_aux_loss * self.entropy_loss_weight
+
+        # reconstitute image or video dimensions
+
+        # z = rearrange(z, 'b ... d -> b d ...')
+
+        # bits to decimal for the codebook indices
+        return z, entropy_aux_loss, indices
+
+    def get_codebook_entry(self, encoding_indices):
+        return self.indices_to_codes(encoding_indices)
diff --git a/modules/commons/vqvae_taming.py b/modules/commons/vqvae_taming.py
new file mode 100644
index 0000000000000000000000000000000000000000..59b7abff0050186aacdd5899f142c5dcbcf49295
--- /dev/null
+++ b/modules/commons/vqvae_taming.py
@@ -0,0 +1,428 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import numpy as np
+from scipy.cluster.vq import kmeans2
+from torch import einsum
+from einops import rearrange
+import torch.distributed as dist
+
+
+class VectorQuantizer(nn.Module):
+    """
+    see https://github.com/MishaLaskin/vqvae/blob/d761a999e2267766400dc646d82d3ac3657771d4/models/quantizer.py
+    ____________________________________________
+    Discretization bottleneck part of the VQ-VAE.
+    Inputs:
+    - n_e : number of embeddings
+    - e_dim : dimension of embedding
+    - beta : commitment cost used in loss term, beta * ||z_e(x)-sg[e]||^2
+    _____________________________________________
+    """
+
+    # NOTE: this class contains a bug regarding beta; see VectorQuantizer2 for
+    # a fix and use legacy=False to apply that fix. VectorQuantizer2 can be
+    # used wherever VectorQuantizer has been used before and is additionally
+    # more efficient.
+    def __init__(self, n_e, e_dim, beta):
+        super(VectorQuantizer, self).__init__()
+        self.n_e = n_e
+        self.e_dim = e_dim
+        self.beta = beta
+
+        self.embedding = nn.Embedding(self.n_e, self.e_dim)
+        self.embedding.weight.data.uniform_(-1.0 / self.n_e, 1.0 / self.n_e)
+
+    def forward(self, z):
+        """
+        Inputs the output of the encoder network z and maps it to a discrete
+        one-hot vector that is the index of the closest embedding vector e_j
+        z (continuous) -> z_q (discrete)
+        z.shape = (batch, channel, height, width)
+        quantization pipeline:
+            1. get encoder input (B,C,H,W)
+            2. flatten input to (B*H*W,C)
+        """
+        # reshape z -> (batch, height, width, channel) and flatten
+        z = z.permute(0, 2, 3, 1).contiguous()
+        z_flattened = z.view(-1, self.e_dim)
+        # distances from z to embeddings e_j (z - e)^2 = z^2 + e^2 - 2 e * z
+
+        d = torch.sum(z_flattened ** 2, dim=1, keepdim=True) + \
+            torch.sum(self.embedding.weight ** 2, dim=1) - 2 * \
+            torch.matmul(z_flattened, self.embedding.weight.t())
+
+        ## could possible replace this here
+        # #\start...
+        # find closest encodings
+        min_encoding_indices = torch.argmin(d, dim=1).unsqueeze(1)
+
+        min_encodings = torch.zeros(
+            min_encoding_indices.shape[0], self.n_e).to(z)
+        min_encodings.scatter_(1, min_encoding_indices, 1)
+
+        # dtype min encodings: torch.float32
+        # min_encodings shape: torch.Size([2048, 512])
+        # min_encoding_indices.shape: torch.Size([2048, 1])
+
+        # get quantized latent vectors
+        z_q = torch.matmul(min_encodings, self.embedding.weight).view(z.shape)
+        # .........\end
+
+        # with:
+        # .........\start
+        # min_encoding_indices = torch.argmin(d, dim=1)
+        # z_q = self.embedding(min_encoding_indices)
+        # ......\end......... (TODO)
+
+        # compute loss for embedding
+        loss = torch.mean((z_q.detach() - z) ** 2) + self.beta * \
+               torch.mean((z_q - z.detach()) ** 2)
+
+        # preserve gradients
+        z_q = z + (z_q - z).detach()
+
+        # perplexity
+        e_mean = torch.mean(min_encodings, dim=0)
+        perplexity = torch.exp(-torch.sum(e_mean * torch.log(e_mean + 1e-10)))
+
+        # reshape back to match original input shape
+        z_q = z_q.permute(0, 3, 1, 2).contiguous()
+
+        return z_q, loss, (perplexity, min_encodings, min_encoding_indices)
+
+    def get_codebook_entry(self, indices, shape):
+        # shape specifying (batch, height, width, channel)
+        # TODO: check for more easy handling with nn.Embedding
+        min_encodings = torch.zeros(indices.shape[0], self.n_e).to(indices)
+        min_encodings.scatter_(1, indices[:, None], 1)
+
+        # get quantized latent vectors
+        z_q = torch.matmul(min_encodings.float(), self.embedding.weight)
+
+        if shape is not None:
+            z_q = z_q.view(shape)
+
+            # reshape back to match original input shape
+            z_q = z_q.permute(0, 3, 1, 2).contiguous()
+
+        return z_q
+
+
+class GumbelQuantize(nn.Module):
+    """
+    credit to @karpathy: https://github.com/karpathy/deep-vector-quantization/blob/main/model.py (thanks!)
+    Gumbel Softmax trick quantizer
+    Categorical Reparameterization with Gumbel-Softmax, Jang et al. 2016
+    https://arxiv.org/abs/1611.01144
+    """
+
+    def __init__(self, num_hiddens, embedding_dim, n_embed, straight_through=True,
+                 kl_weight=5e-4, temp_init=1.0, use_vqinterface=True,
+                 remap=None, unknown_index="random"):
+        super().__init__()
+
+        self.embedding_dim = embedding_dim
+        self.n_embed = n_embed
+
+        self.straight_through = straight_through
+        self.temperature = temp_init
+        self.kl_weight = kl_weight
+
+        self.proj = nn.Conv2d(num_hiddens, n_embed, 1)
+        self.embed = nn.Embedding(n_embed, embedding_dim)
+
+        self.use_vqinterface = use_vqinterface
+
+        self.remap = remap
+        if self.remap is not None:
+            self.register_buffer("used", torch.tensor(np.load(self.remap)))
+            self.re_embed = self.used.shape[0]
+            self.unknown_index = unknown_index  # "random" or "extra" or integer
+            if self.unknown_index == "extra":
+                self.unknown_index = self.re_embed
+                self.re_embed = self.re_embed + 1
+            print(f"Remapping {self.n_embed} indices to {self.re_embed} indices. "
+                  f"Using {self.unknown_index} for unknown indices.")
+        else:
+            self.re_embed = n_embed
+
+    def remap_to_used(self, inds):
+        ishape = inds.shape
+        assert len(ishape) > 1
+        inds = inds.reshape(ishape[0], -1)
+        used = self.used.to(inds)
+        match = (inds[:, :, None] == used[None, None, ...]).long()
+        new = match.argmax(-1)
+        unknown = match.sum(2) < 1
+        if self.unknown_index == "random":
+            new[unknown] = torch.randint(0, self.re_embed, size=new[unknown].shape).to(device=new.device)
+        else:
+            new[unknown] = self.unknown_index
+        return new.reshape(ishape)
+
+    def unmap_to_all(self, inds):
+        ishape = inds.shape
+        assert len(ishape) > 1
+        inds = inds.reshape(ishape[0], -1)
+        used = self.used.to(inds)
+        if self.re_embed > self.used.shape[0]:  # extra token
+            inds[inds >= self.used.shape[0]] = 0  # simply set to zero
+        back = torch.gather(used[None, :][inds.shape[0] * [0], :], 1, inds)
+        return back.reshape(ishape)
+
+    def forward(self, z, temp=None, return_logits=False):
+        # force hard = True when we are in eval mode, as we must quantize. actually, always true seems to work
+        hard = self.straight_through if self.training else True
+        temp = self.temperature if temp is None else temp
+
+        logits = self.proj(z)
+        if self.remap is not None:
+            # continue only with used logits
+            full_zeros = torch.zeros_like(logits)
+            logits = logits[:, self.used, ...]
+
+        soft_one_hot = F.gumbel_softmax(logits, tau=temp, dim=1, hard=hard)
+        if self.remap is not None:
+            # go back to all entries but unused set to zero
+            full_zeros[:, self.used, ...] = soft_one_hot
+            soft_one_hot = full_zeros
+        z_q = einsum('b n h w, n d -> b d h w', soft_one_hot, self.embed.weight)
+
+        # + kl divergence to the prior loss
+        qy = F.softmax(logits, dim=1)
+        diff = self.kl_weight * torch.sum(qy * torch.log(qy * self.n_embed + 1e-10), dim=1).mean()
+
+        ind = soft_one_hot.argmax(dim=1)
+        if self.remap is not None:
+            ind = self.remap_to_used(ind)
+        if self.use_vqinterface:
+            if return_logits:
+                return z_q, diff, (None, None, ind), logits
+            return z_q, diff, (None, None, ind)
+        return z_q, diff, ind
+
+    def get_codebook_entry(self, indices, shape):
+        b, h, w, c = shape
+        assert b * h * w == indices.shape[0]
+        indices = rearrange(indices, '(b h w) -> b h w', b=b, h=h, w=w)
+        if self.remap is not None:
+            indices = self.unmap_to_all(indices)
+        one_hot = F.one_hot(indices, num_classes=self.n_embed).permute(0, 3, 1, 2).float()
+        z_q = einsum('b n h w, n d -> b d h w', one_hot, self.embed.weight)
+        return z_q
+
+
+class VectorQuantizer2(nn.Module):
+    """
+    Improved version over VectorQuantizer, can be used as a drop-in replacement. Mostly
+    avoids costly matrix multiplications and allows for post-hoc remapping of indices.
+    """
+
+    # NOTE: due to a bug the beta term was applied to the wrong term. for
+    # backwards compatibility we use the buggy version by default, but you can
+    # specify legacy=False to fix it.
+    def __init__(self, n_e, e_dim, beta, legacy=False):
+        super().__init__()
+        self.n_e = n_e
+        self.e_dim = e_dim
+        self.beta = beta
+        self.legacy = legacy
+
+        self.embedding = nn.Embedding(self.n_e, self.e_dim)
+        self.embedding.weight.data.uniform_(-1.0 / self.n_e, 1.0 / self.n_e)
+
+        self.re_embed = n_e
+
+    def encode(self, z):
+        B, T, _ = z.shape
+        z_flattened = z.reshape(-1, self.e_dim)
+        d = torch.sum(z_flattened ** 2, dim=1, keepdim=True) + \
+            torch.sum(self.embedding.weight ** 2, dim=1) - 2 * \
+            torch.einsum('bd,dn->bn', z_flattened, rearrange(self.embedding.weight, 'n d -> d n'))
+
+        min_encoding_indices = torch.argmin(d, dim=1)
+        z_q = self.embedding(min_encoding_indices).reshape(z.shape)
+
+        z_q = z_q.view_as(z)
+        min_encoding_indices = min_encoding_indices.reshape(z.shape[:-1])
+        return z_flattened, z_q, min_encoding_indices
+
+    def forward(self, z, mask=None, temp=None, rescale_logits=False, return_logits=False):
+        if mask is not None:
+            assert mask.shape[:2] == z.shape[:2], (mask.shape, z.shape)
+            assert mask.shape[-1] == 1, (mask.shape,)
+            z = z * mask
+        assert temp is None or temp == 1.0, "Only for interface compatible with Gumbel"
+        assert rescale_logits == False, "Only for interface compatible with Gumbel"
+        assert return_logits == False, "Only for interface compatible with Gumbel"
+        # reshape z -> (batch, height, width, channel) and flatten
+        # z = rearrange(z, 'b c h w -> b h w c').contiguous()
+        assert z.shape[-1] == self.e_dim
+        z_flattened = z.reshape(-1, self.e_dim)
+        # distances from z to embeddings e_j (z - e)^2 = z^2 + e^2 - 2 e * z
+
+        d = torch.sum(z_flattened ** 2, dim=1, keepdim=True) + \
+            torch.sum(self.embedding.weight ** 2, dim=1) - 2 * \
+            torch.matmul(z_flattened, rearrange(self.embedding.weight, 'n d -> d n'))
+            #torch.einsum('bd,dn->bn', z_flattened, rearrange(self.embedding.weight, 'n d -> d n'))
+
+        min_encoding_indices = torch.argmin(d, dim=1)
+        z_q = self.embedding(min_encoding_indices).reshape(z.shape)
+        perplexity = None
+
+        # compute loss for embedding
+        if not self.legacy:
+            loss = self.beta * (z_q.detach() - z) ** 2 + \
+                   (z_q - z.detach()) ** 2
+        else:
+            loss = (z_q.detach() - z) ** 2 + self.beta * \
+                   (z_q - z.detach()) ** 2
+
+        # preserve gradients
+        z_q = z + (z_q - z).detach()
+
+        min_encoding_indices = min_encoding_indices.reshape(z.shape[:-1])
+        if mask is not None:
+            loss = (loss * mask).sum() / mask.sum() / self.e_dim
+        else:
+            loss = loss.mean()
+        return z_q, loss, min_encoding_indices, perplexity
+
+    def get_codebook_entry(self, indices, shape=None):
+        # get quantized latent vectors
+        z_q = self.embedding(indices)
+
+        if shape is not None:
+            z_q = z_q.view(shape)
+            # reshape back to match original input shape
+            z_q = z_q.permute(0, 3, 1, 2).contiguous()
+
+        return z_q
+
+
+class VectorQuantizer4(nn.Module):
+    def __init__(self, n_e, e_dim, beta, legacy=False, kmeans_reset_every=1000):
+        super().__init__()
+        self.n_e = n_e
+        self.e_dim = e_dim
+        self.beta = beta
+        self.legacy = legacy
+
+        self.embedding = nn.Embedding(self.n_e, self.e_dim)
+        self.embedding.weight.data.uniform_(-1.0 / self.n_e, 1.0 / self.n_e)
+
+        self.re_embed = n_e
+        self.reset_every = kmeans_reset_every
+        self.reset_thres = 20
+        self.z_buffer = []
+        self.register_buffer('use_flag', torch.zeros(n_e))
+        self.register_buffer('steps', torch.zeros(1))
+
+    def encode(self, z):
+        B, T, _ = z.shape
+        z_flattened = z.reshape(-1, self.e_dim)
+        d = torch.sum(z_flattened ** 2, dim=1, keepdim=True) + \
+            torch.sum(self.embedding.weight ** 2, dim=1) - 2 * \
+            torch.einsum('bd,dn->bn', z_flattened, rearrange(self.embedding.weight, 'n d -> d n'))
+
+        min_encoding_indices = torch.argmin(d, dim=1)
+        z_q = self.embedding(min_encoding_indices).reshape(z.shape)
+
+        z_q = z_q.view_as(z)
+        min_encoding_indices = min_encoding_indices.reshape(z.shape[:-1])
+        return z_flattened, z_q, min_encoding_indices
+
+    def forward(self, z, mask=None, temp=None, rescale_logits=False, return_logits=False):
+        if mask is not None:
+            assert mask.shape[:2] == z.shape[:2], (mask.shape, z.shape)
+            assert mask.shape[-1] == 1, (mask.shape,)
+            z = z * mask
+        assert temp is None or temp == 1.0, "Only for interface compatible with Gumbel"
+        assert rescale_logits == False, "Only for interface compatible with Gumbel"
+        assert return_logits == False, "Only for interface compatible with Gumbel"
+        # reshape z -> (batch, height, width, channel) and flatten
+        # z = rearrange(z, 'b c h w -> b h w c').contiguous()
+        assert z.shape[-1] == self.e_dim
+        z_flattened = z.reshape(-1, self.e_dim)
+
+        # distances from z to embeddings e_j (z - e)^2 = z^2 + e^2 - 2 e * z
+        d = torch.sum(z_flattened ** 2, dim=1, keepdim=True) + \
+            torch.sum(self.embedding.weight ** 2, dim=1) - 2 * \
+            torch.einsum('bd,dn->bn', z_flattened, rearrange(self.embedding.weight, 'n d -> d n'))
+
+        min_encoding_indices = torch.argmin(d, dim=1)
+        z_q = self.embedding(min_encoding_indices).reshape(z.shape)
+        perplexity = None
+
+        if self.training:
+            self.steps += 1
+            self.use_flag += torch.bincount(min_encoding_indices, minlength=self.n_e)
+            is_master = not dist.is_initialized() or dist.get_rank() == 0
+            if self.reset_every - 100 <= self.steps <= self.reset_every:
+                if dist.is_initialized():
+                    z_buffer_ = [None for _ in range(dist.get_world_size())]
+                    dist.all_gather_object(z_buffer_, z_flattened.detach().cpu())
+                else:
+                    z_buffer_ = [z_flattened.detach().cpu()]
+                self.z_buffer += z_buffer_
+
+            if self.steps % self.reset_every == 0:
+                if dist.is_initialized():
+                    dist.all_reduce(self.use_flag)
+                vq_usage = (self.use_flag > self.reset_thres).sum().item() / self.use_flag.shape[0]
+                print("| VQ usage: ", vq_usage)
+                if vq_usage != 1:
+                    if is_master:
+                        if self.steps.item() == self.reset_every:
+                            print('| running kmeans in VQVAE')  # data driven initialization for the embeddings
+                            z_buffer = torch.cat(self.z_buffer, 0)
+                            rp = torch.randperm(z_buffer.shape[0])
+                            kd = kmeans2(z_buffer[rp].numpy(), self.n_e, minit='points')[0]
+                            self.embedding.weight.data = torch.from_numpy(kd).to(z.device)
+                        else:
+                            reset_ids = self.use_flag < self.reset_thres
+                            keep_ids = self.use_flag >= self.reset_thres
+                            t = torch.randint(0, keep_ids.sum(), [reset_ids.sum()], device=self.use_flag.device)
+                            keep_ids = torch.where(keep_ids)[0][t]
+                            self.embedding.weight.data[reset_ids] = self.embedding.weight.data[keep_ids].clone()
+                    if dist.is_initialized():
+                        dist.broadcast(self.embedding.weight.data, 0)
+
+                    # distances from z to embeddings e_j (z - e)^2 = z^2 + e^2 - 2 e * z
+                    d = torch.sum(z_flattened ** 2, dim=1, keepdim=True) + \
+                        torch.sum(self.embedding.weight ** 2, dim=1) - 2 * \
+                        torch.einsum('bd,dn->bn', z_flattened, rearrange(self.embedding.weight, 'n d -> d n'))
+                    min_encoding_indices = torch.argmin(d, dim=1)
+                    z_q = self.embedding(min_encoding_indices).reshape(z.shape)
+                self.use_flag.fill_(0)
+                self.z_buffer = []
+
+        # compute loss for embedding
+        if not self.legacy:
+            loss = self.beta * (z_q.detach() - z) ** 2 + \
+                   (z_q - z.detach()) ** 2
+        else:
+            loss = (z_q.detach() - z) ** 2 + self.beta * \
+                   (z_q - z.detach()) ** 2
+
+        # preserve gradients
+        z_q = z + (z_q - z).detach()
+
+        min_encoding_indices = min_encoding_indices.reshape(z.shape[:-1])
+        if mask is not None:
+            loss = (loss * mask).sum() / mask.sum() / self.e_dim
+        else:
+            loss = loss.mean()
+        return z_q, loss, min_encoding_indices, perplexity
+
+    def get_codebook_entry(self, indices, shape=None):
+        # get quantized latent vectors
+        z_q = self.embedding(indices)
+
+        if shape is not None:
+            z_q = z_q.view(shape)
+            # reshape back to match original input shape
+            z_q = z_q.permute(0, 3, 1, 2).contiguous()
+
+        return z_q
diff --git a/modules/commons/wavenet.py b/modules/commons/wavenet.py
new file mode 100644
index 0000000000000000000000000000000000000000..7809c9b9d3331ba4fd2ffd4caae14e721e4b0732
--- /dev/null
+++ b/modules/commons/wavenet.py
@@ -0,0 +1,97 @@
+import torch
+from torch import nn
+
+
+def fused_add_tanh_sigmoid_multiply(input_a, input_b, n_channels):
+    n_channels_int = n_channels[0]
+    in_act = input_a + input_b
+    t_act = torch.tanh(in_act[:, :n_channels_int, :])
+    s_act = torch.sigmoid(in_act[:, n_channels_int:, :])
+    acts = t_act * s_act
+    return acts
+
+
+class WN(torch.nn.Module):
+    def __init__(self, hidden_size, kernel_size, dilation_rate, n_layers, c_cond=0,
+                 p_dropout=0, share_cond_layers=False, is_BTC=False):
+        super(WN, self).__init__()
+        assert (kernel_size % 2 == 1)
+        assert (hidden_size % 2 == 0)
+        self.is_BTC = is_BTC
+        self.hidden_size = hidden_size
+        self.kernel_size = kernel_size
+        self.dilation_rate = dilation_rate
+        self.n_layers = n_layers
+        self.gin_channels = c_cond
+        self.p_dropout = p_dropout
+        self.share_cond_layers = share_cond_layers
+
+        self.in_layers = torch.nn.ModuleList()
+        self.res_skip_layers = torch.nn.ModuleList()
+        self.drop = nn.Dropout(p_dropout)
+
+        if c_cond != 0 and not share_cond_layers:
+            cond_layer = torch.nn.Conv1d(c_cond, 2 * hidden_size * n_layers, 1)
+            self.cond_layer = torch.nn.utils.weight_norm(cond_layer, name='weight')
+
+        for i in range(n_layers):
+            dilation = dilation_rate ** i
+            padding = int((kernel_size * dilation - dilation) / 2)
+            in_layer = torch.nn.Conv1d(hidden_size, 2 * hidden_size, kernel_size,
+                                       dilation=dilation, padding=padding)
+            in_layer = torch.nn.utils.weight_norm(in_layer, name='weight')
+            self.in_layers.append(in_layer)
+
+            # last one is not necessary
+            if i < n_layers - 1:
+                res_skip_channels = 2 * hidden_size
+            else:
+                res_skip_channels = hidden_size
+
+            res_skip_layer = torch.nn.Conv1d(hidden_size, res_skip_channels, 1)
+            res_skip_layer = torch.nn.utils.weight_norm(res_skip_layer, name='weight')
+            self.res_skip_layers.append(res_skip_layer)
+
+    def forward(self, x, nonpadding=None, cond=None):
+        if self.is_BTC:
+            x = x.transpose(1, 2)
+            cond = cond.transpose(1, 2) if cond is not None else None
+            nonpadding = nonpadding.transpose(1, 2) if nonpadding is not None else None
+        if nonpadding is None:
+            nonpadding = 1
+        output = torch.zeros_like(x)
+        n_channels_tensor = torch.IntTensor([self.hidden_size])
+
+        if cond is not None and not self.share_cond_layers:
+            cond = self.cond_layer(cond)
+
+        for i in range(self.n_layers):
+            x_in = self.in_layers[i](x)
+            x_in = self.drop(x_in)
+            if cond is not None:
+                cond_offset = i * 2 * self.hidden_size
+                cond_l = cond[:, cond_offset:cond_offset + 2 * self.hidden_size, :]
+            else:
+                cond_l = torch.zeros_like(x_in)
+
+            acts = fused_add_tanh_sigmoid_multiply(x_in, cond_l, n_channels_tensor)
+
+            res_skip_acts = self.res_skip_layers[i](acts)
+            if i < self.n_layers - 1:
+                x = (x + res_skip_acts[:, :self.hidden_size, :]) * nonpadding
+                output = output + res_skip_acts[:, self.hidden_size:, :]
+            else:
+                output = output + res_skip_acts
+        output = output * nonpadding
+        if self.is_BTC:
+            output = output.transpose(1, 2)
+        return output
+
+    def remove_weight_norm(self):
+        def remove_weight_norm(m):
+            try:
+                nn.utils.remove_weight_norm(m)
+            except ValueError:  # this module didn't have weight norm
+                return
+
+        self.apply(remove_weight_norm)
diff --git a/modules/eg3ds/camera_utils/pose_sampler.py b/modules/eg3ds/camera_utils/pose_sampler.py
new file mode 100644
index 0000000000000000000000000000000000000000..6e36f3bcac364ab993ad59f25ce7f90726f32ceb
--- /dev/null
+++ b/modules/eg3ds/camera_utils/pose_sampler.py
@@ -0,0 +1,216 @@
+# SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: LicenseRef-NvidiaProprietary
+#
+# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+# property and proprietary rights in and to this material, related
+# documentation and any modifications thereto. Any use, reproduction,
+# disclosure or distribution of this material and related documentation
+# without an express license agreement from NVIDIA CORPORATION or
+# its affiliates is strictly prohibited.
+
+"""
+Helper functions for constructing camera parameter matrices. Primarily used in visualization and inference scripts.
+"""
+
+import math
+import numpy as np
+import torch
+import torch.nn as nn
+
+from modules.eg3ds.volumetric_rendering import math_utils
+
+
+class UnifiedCameraPoseSampler():
+    """
+    A unified class for obtain camera pose, a 25 dimension vector that consists of camera2world matrix (4x4) and camera intrinsic (3,3)
+        it utilize the samplers constructed below.
+    """
+    def get_camera_pose(self, pitch, yaw, lookat_location=None, distance_to_orig=2.7, batch_size=1, device='cpu', roll=None):
+        if lookat_location is None:
+            lookat_location = torch.tensor([0., 0., -0.2], device=device)
+
+        c2w = LookAtPoseSampler.sample(yaw, pitch, lookat_location, 0, 0, distance_to_orig, batch_size, device, roll=roll).reshape([batch_size, 16])
+        intrinsics = torch.tensor([[4.2647, 0, 0.5], [0, 4.2647, 0.5], [0, 0, 1]], device=device).reshape([9,]).unsqueeze(0).repeat([batch_size, 1])
+        # intrinsics = FOV_to_intrinsics(fov_degrees, device=device).reshape([9,]).unsqueeze(0).repeat([batch_size, 1])
+        camera = torch.cat([c2w, intrinsics], dim=1) # [batch, 25]
+        return camera
+    
+
+class GaussianCameraPoseSampler:
+    """
+    Samples pitch and yaw from a Gaussian distribution and returns a camera pose.
+    Camera is specified as looking at the origin.
+    If horizontal and vertical stddev (specified in radians) are zero, gives a
+    deterministic camera pose with yaw=horizontal_mean, pitch=vertical_mean.
+    The coordinate system is specified with y-up, z-forward, x-left.
+    Horizontal mean is the azimuthal angle (rotation around y axis) in radians,
+    vertical mean is the polar angle (angle from the y axis) in radians.
+    A point along the z-axis has azimuthal_angle=0, polar_angle=pi/2.
+
+    Example:
+    For a camera pose looking at the origin with the camera at position [0, 0, 1]:
+    cam2world = GaussianCameraPoseSampler.sample(math.pi/2, math.pi/2, radius=1)
+    """
+
+    @staticmethod
+    def sample(horizontal_mean, vertical_mean, horizontal_stddev=0, vertical_stddev=0, radius=1, batch_size=1, device='cpu'):
+        """
+        horizontal_mean: 偏转角, 也叫方位角， -pi/2 denotes camera at left, 0 denotes forward, pi/2 denotes right,
+        vertical_mean: 俯仰角, 0 denotes up, -pi/2 denotes camera at up, 0 means horizontal, pi/2 denotes down. however, 0.2 is a good choice for front face.
+        """ 
+        assert horizontal_mean < np.pi/2 + 1e-5 and horizontal_mean > - np.pi/2 - 1e-5
+        assert vertical_mean < np.pi/2 + 1e-5 and vertical_mean > - np.pi/2 - 1e-5
+        horizontal_mean += np.pi/2
+        vertical_mean += np.pi/2
+        h = torch.randn((batch_size, 1), device=device) * horizontal_stddev + horizontal_mean
+        v = torch.randn((batch_size, 1), device=device) * vertical_stddev + vertical_mean
+        v = torch.clamp(v, 1e-5, math.pi - 1e-5)
+
+        theta = h
+        v = v / math.pi
+        phi = torch.arccos(1 - 2*v)
+
+        camera_origins = torch.zeros((batch_size, 3), device=device)
+
+        camera_origins[:, 0:1] = radius*torch.sin(phi) * torch.cos(math.pi-theta)
+        camera_origins[:, 2:3] = radius*torch.sin(phi) * torch.sin(math.pi-theta)
+        camera_origins[:, 1:2] = radius*torch.cos(phi)
+
+        forward_vectors = math_utils.normalize_vecs(-camera_origins) # the direction the camera is pointing, pointing to origin in this func
+        return create_cam2world_matrix(forward_vectors, camera_origins)
+
+
+class LookAtPoseSampler:
+    """
+    Same as GaussianCameraPoseSampler, except the
+    camera is specified as looking at 'lookat_position', a 3-vector.
+
+    Example:
+    For a camera pose looking at the origin with the camera at position [0, 0, 1]:
+    cam2world = LookAtPoseSampler.sample(math.pi/2, math.pi/2, torch.tensor([0, 0, 0]), radius=1)
+    """
+
+    @staticmethod
+    def sample(horizontal_mean, vertical_mean, lookat_position, horizontal_stddev=0, vertical_stddev=0, radius=1, batch_size=1, device='cpu', roll=None):
+        """
+        horizontal_mean: 偏转角, 也叫方位角， -pi/2 denotes camera at left, 0 denotes forward, pi/2 denotes right,
+        vertical_mean: 俯仰角, 0 denotes up, -pi/2 denotes camera at up, 0 means horizontal, pi/2 denotes down. however, 0.2 is a good choice for front face.
+        """ 
+        # assert horizontal_mean < np.pi + 1e-5 and horizontal_mean > - np.pi - 1e-5
+        # assert vertical_mean < np.pi + 1e-5 and vertical_mean > - np.pi - 1e-5
+        horizontal_mean += np.pi/2
+        vertical_mean += np.pi/2
+
+        # if horizontal_mean < -np.pi:
+        #     horizontal_mean += 2*np.pi
+        # if vertical_mean < -np.pi:
+        #     vertical_mean += 2*np.pi
+        # if horizontal_mean > np.pi:
+        #     horizontal_mean -= 2*np.pi
+        # if vertical_mean > np.pi:
+        #     vertical_mean -= 2*np.pi
+
+        h = torch.randn((batch_size, 1), device=device) * horizontal_stddev + horizontal_mean
+        v = torch.randn((batch_size, 1), device=device) * vertical_stddev + vertical_mean
+        v = torch.clamp(v, 1e-5, math.pi - 1e-5)
+
+        theta = h # 球坐标系里的滚转角
+        v = v / math.pi
+        phi = torch.arccos(1 - 2*v)
+
+        camera_origins = torch.zeros((batch_size, 3), device=device)
+
+        # radius*torch.sin(phi) 是球半径在水平平面上的投影，随后再根据yaw角来分别计算x和y
+        # radius*torch.cos(phi)则是纵轴的分量
+        camera_origins[:, 0:1] = radius*torch.sin(phi) * torch.cos(math.pi-theta)
+        camera_origins[:, 2:3] = radius*torch.sin(phi) * torch.sin(math.pi-theta)
+        camera_origins[:, 1:2] = radius*torch.cos(phi)
+
+        # forward_vectors = math_utils.normalize_vecs(-camera_origins)
+        forward_vectors = math_utils.normalize_vecs(lookat_position.to(device) - camera_origins)  # the direction the camera is pointing, pointing to the lookat_position
+        return create_cam2world_matrix(forward_vectors, camera_origins, roll)
+
+
+class UniformCameraPoseSampler:
+    """
+    Same as GaussianCameraPoseSampler, except the
+    pose is sampled from a UNIFORM distribution with range +-[horizontal/vertical]_stddev, instead of a GAUSSIAN distribution.
+
+    Example:
+    For a batch of random camera poses looking at the origin with yaw sampled from [-pi/2, +pi/2] radians:
+
+    cam2worlds = UniformCameraPoseSampler.sample(math.pi/2, math.pi/2, horizontal_stddev=math.pi/2, radius=1, batch_size=16)
+    """
+
+    @staticmethod
+    def sample(horizontal_mean, vertical_mean, horizontal_stddev=0, vertical_stddev=0, radius=1, batch_size=1, device='cpu'):
+        """
+        horizontal_mean: 偏转角, 也叫方位角， -pi/2 denotes camera at left, 0 denotes forward, pi/2 denotes right,
+        vertical_mean: 俯仰角, 0 denotes up, -pi/2 denotes camera at up, 0 means horizontal, pi/2 denotes down. however, 0.2 is a good choice for front face.
+        """ 
+        assert horizontal_mean < np.pi/2 + 1e-5 and horizontal_mean > - np.pi/2 - 1e-5
+        assert vertical_mean < np.pi/2 + 1e-5 and vertical_mean > - np.pi/2 - 1e-5
+        horizontal_mean += np.pi/2
+        vertical_mean += np.pi/2
+    
+        h = (torch.rand((batch_size, 1), device=device) * 2 - 1) * horizontal_stddev + horizontal_mean
+        v = (torch.rand((batch_size, 1), device=device) * 2 - 1) * vertical_stddev + vertical_mean
+        v = torch.clamp(v, 1e-5, math.pi - 1e-5)
+
+        theta = h
+        v = v / math.pi
+        phi = torch.arccos(1 - 2*v)
+
+        camera_origins = torch.zeros((batch_size, 3), device=device) # the location of camera
+
+        camera_origins[:, 0:1] = radius*torch.sin(phi) * torch.cos(math.pi-theta)
+        camera_origins[:, 2:3] = radius*torch.sin(phi) * torch.sin(math.pi-theta)
+        camera_origins[:, 1:2] = radius*torch.cos(phi)
+
+        forward_vectors = math_utils.normalize_vecs(-camera_origins) # the direction the camera is pointing, pointing to origin in this func
+        return create_cam2world_matrix(forward_vectors, camera_origins)    
+
+
+def create_cam2world_matrix(forward_vector, origin, roll=None):
+    """
+    Takes in the direction the camera is pointing and the camera origin and returns a cam2world matrix.
+    Works on batches of forward_vectors, origins. Assumes y-axis is up.
+    Modified by yerfor to support roll controll
+    roll: Default None, leads to 0 roll; or Tensor([Batch_size, 1]), with radian in [-pi, pi]
+    """
+
+    batch_size = len(forward_vector)
+    forward_vector = math_utils.normalize_vecs(forward_vector)
+    # up_vector 代表相机的正上方方向向量，所以可以通过旋转它来控制roll
+    up_vector = torch.zeros([batch_size, 3], dtype=forward_vector.dtype, device=forward_vector.device)
+    if roll is None:
+        roll = torch.zeros([batch_size, 1], dtype=forward_vector.dtype, device=forward_vector.device)
+    else:
+        roll = roll.reshape([batch_size, 1])
+
+    up_vector[:, 0] = torch.sin(roll)
+    up_vector[:, 1] = torch.cos(roll)
+
+    right_vector = -math_utils.normalize_vecs(torch.cross(up_vector, forward_vector, dim=-1))
+    up_vector = math_utils.normalize_vecs(torch.cross(forward_vector, right_vector, dim=-1))
+
+    rotation_matrix = torch.eye(4, device=origin.device).unsqueeze(0).repeat(forward_vector.shape[0], 1, 1)
+    rotation_matrix[:, :3, :3] = torch.stack((right_vector, up_vector, forward_vector), axis=-1)
+
+    translation_matrix = torch.eye(4, device=origin.device).unsqueeze(0).repeat(forward_vector.shape[0], 1, 1)
+    translation_matrix[:, :3, 3] = origin
+    cam2world = (translation_matrix @ rotation_matrix)[:, :, :]
+    assert(cam2world.shape[1:] == (4, 4))
+    return cam2world
+
+
+def FOV_to_intrinsics(fov_degrees=18.837, device='cpu'):
+    """
+    Creates a 3x3 camera intrinsics matrix from the camera field of view, specified in degrees.
+    Note the intrinsics are returned as normalized by image size, rather than in pixel units.
+    Assumes principal point is at image center.
+    """
+
+    focal_length = float(1 / (math.tan(fov_degrees * 3.14159 / 360) * 1.414))
+    intrinsics = torch.tensor([[focal_length, 0, 0.5], [0, focal_length, 0.5], [0, 0, 1]], device=device)
+    return intrinsics
\ No newline at end of file
diff --git a/modules/eg3ds/dnnlib/__init__.py b/modules/eg3ds/dnnlib/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..dd91ed142e955581e83948455fb71cd837215f61
--- /dev/null
+++ b/modules/eg3ds/dnnlib/__init__.py
@@ -0,0 +1,11 @@
+# SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: LicenseRef-NvidiaProprietary
+#
+# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+# property and proprietary rights in and to this material, related
+# documentation and any modifications thereto. Any use, reproduction,
+# disclosure or distribution of this material and related documentation
+# without an express license agreement from NVIDIA CORPORATION or
+# its affiliates is strictly prohibited.
+
+from .util import EasyDict, make_cache_dir_path
diff --git a/modules/eg3ds/dnnlib/util.py b/modules/eg3ds/dnnlib/util.py
new file mode 100644
index 0000000000000000000000000000000000000000..80b67c4e312cd1b847ca21fd3b929802a57e6f6d
--- /dev/null
+++ b/modules/eg3ds/dnnlib/util.py
@@ -0,0 +1,493 @@
+# SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: LicenseRef-NvidiaProprietary
+#
+# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+# property and proprietary rights in and to this material, related
+# documentation and any modifications thereto. Any use, reproduction,
+# disclosure or distribution of this material and related documentation
+# without an express license agreement from NVIDIA CORPORATION or
+# its affiliates is strictly prohibited.
+
+"""Miscellaneous utility classes and functions."""
+
+import ctypes
+import fnmatch
+import importlib
+import inspect
+import numpy as np
+import os
+import shutil
+import sys
+import types
+import io
+import pickle
+import re
+import requests
+import html
+import hashlib
+import glob
+import tempfile
+import urllib
+import urllib.request
+import uuid
+
+from distutils.util import strtobool
+from typing import Any, List, Tuple, Union
+
+
+# Util classes
+# ------------------------------------------------------------------------------------------
+
+
+class EasyDict(dict):
+    """Convenience class that behaves like a dict but allows access with the attribute syntax."""
+
+    def __getattr__(self, name: str) -> Any:
+        try:
+            return self[name]
+        except KeyError:
+            raise AttributeError(name)
+
+    def __setattr__(self, name: str, value: Any) -> None:
+        self[name] = value
+
+    def __delattr__(self, name: str) -> None:
+        del self[name]
+
+
+class Logger(object):
+    """Redirect stderr to stdout, optionally print stdout to a file, and optionally force flushing on both stdout and the file."""
+
+    def __init__(self, file_name: str = None, file_mode: str = "w", should_flush: bool = True):
+        self.file = None
+
+        if file_name is not None:
+            self.file = open(file_name, file_mode)
+
+        self.should_flush = should_flush
+        self.stdout = sys.stdout
+        self.stderr = sys.stderr
+
+        sys.stdout = self
+        sys.stderr = self
+
+    def __enter__(self) -> "Logger":
+        return self
+
+    def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None:
+        self.close()
+
+    def write(self, text: Union[str, bytes]) -> None:
+        """Write text to stdout (and a file) and optionally flush."""
+        if isinstance(text, bytes):
+            text = text.decode()
+        if len(text) == 0: # workaround for a bug in VSCode debugger: sys.stdout.write(''); sys.stdout.flush() => crash
+            return
+
+        if self.file is not None:
+            self.file.write(text)
+
+        self.stdout.write(text)
+
+        if self.should_flush:
+            self.flush()
+
+    def flush(self) -> None:
+        """Flush written text to both stdout and a file, if open."""
+        if self.file is not None:
+            self.file.flush()
+
+        self.stdout.flush()
+
+    def close(self) -> None:
+        """Flush, close possible files, and remove stdout/stderr mirroring."""
+        self.flush()
+
+        # if using multiple loggers, prevent closing in wrong order
+        if sys.stdout is self:
+            sys.stdout = self.stdout
+        if sys.stderr is self:
+            sys.stderr = self.stderr
+
+        if self.file is not None:
+            self.file.close()
+            self.file = None
+
+
+# Cache directories
+# ------------------------------------------------------------------------------------------
+
+_dnnlib_cache_dir = None
+
+def set_cache_dir(path: str) -> None:
+    global _dnnlib_cache_dir
+    _dnnlib_cache_dir = path
+
+def make_cache_dir_path(*paths: str) -> str:
+    if _dnnlib_cache_dir is not None:
+        return os.path.join(_dnnlib_cache_dir, *paths)
+    if 'DNNLIB_CACHE_DIR' in os.environ:
+        return os.path.join(os.environ['DNNLIB_CACHE_DIR'], *paths)
+    if 'HOME' in os.environ:
+        return os.path.join(os.environ['HOME'], '.cache', 'dnnlib', *paths)
+    if 'USERPROFILE' in os.environ:
+        return os.path.join(os.environ['USERPROFILE'], '.cache', 'dnnlib', *paths)
+    return os.path.join(tempfile.gettempdir(), '.cache', 'dnnlib', *paths)
+
+# Small util functions
+# ------------------------------------------------------------------------------------------
+
+
+def format_time(seconds: Union[int, float]) -> str:
+    """Convert the seconds to human readable string with days, hours, minutes and seconds."""
+    s = int(np.rint(seconds))
+
+    if s < 60:
+        return "{0}s".format(s)
+    elif s < 60 * 60:
+        return "{0}m {1:02}s".format(s // 60, s % 60)
+    elif s < 24 * 60 * 60:
+        return "{0}h {1:02}m {2:02}s".format(s // (60 * 60), (s // 60) % 60, s % 60)
+    else:
+        return "{0}d {1:02}h {2:02}m".format(s // (24 * 60 * 60), (s // (60 * 60)) % 24, (s // 60) % 60)
+
+
+def format_time_brief(seconds: Union[int, float]) -> str:
+    """Convert the seconds to human readable string with days, hours, minutes and seconds."""
+    s = int(np.rint(seconds))
+
+    if s < 60:
+        return "{0}s".format(s)
+    elif s < 60 * 60:
+        return "{0}m {1:02}s".format(s // 60, s % 60)
+    elif s < 24 * 60 * 60:
+        return "{0}h {1:02}m".format(s // (60 * 60), (s // 60) % 60)
+    else:
+        return "{0}d {1:02}h".format(s // (24 * 60 * 60), (s // (60 * 60)) % 24)
+
+
+def ask_yes_no(question: str) -> bool:
+    """Ask the user the question until the user inputs a valid answer."""
+    while True:
+        try:
+            print("{0} [y/n]".format(question))
+            return strtobool(input().lower())
+        except ValueError:
+            pass
+
+
+def tuple_product(t: Tuple) -> Any:
+    """Calculate the product of the tuple elements."""
+    result = 1
+
+    for v in t:
+        result *= v
+
+    return result
+
+
+_str_to_ctype = {
+    "uint8": ctypes.c_ubyte,
+    "uint16": ctypes.c_uint16,
+    "uint32": ctypes.c_uint32,
+    "uint64": ctypes.c_uint64,
+    "int8": ctypes.c_byte,
+    "int16": ctypes.c_int16,
+    "int32": ctypes.c_int32,
+    "int64": ctypes.c_int64,
+    "float32": ctypes.c_float,
+    "float64": ctypes.c_double
+}
+
+
+def get_dtype_and_ctype(type_obj: Any) -> Tuple[np.dtype, Any]:
+    """Given a type name string (or an object having a __name__ attribute), return matching Numpy and ctypes types that have the same size in bytes."""
+    type_str = None
+
+    if isinstance(type_obj, str):
+        type_str = type_obj
+    elif hasattr(type_obj, "__name__"):
+        type_str = type_obj.__name__
+    elif hasattr(type_obj, "name"):
+        type_str = type_obj.name
+    else:
+        raise RuntimeError("Cannot infer type name from input")
+
+    assert type_str in _str_to_ctype.keys()
+
+    my_dtype = np.dtype(type_str)
+    my_ctype = _str_to_ctype[type_str]
+
+    assert my_dtype.itemsize == ctypes.sizeof(my_ctype)
+
+    return my_dtype, my_ctype
+
+
+def is_pickleable(obj: Any) -> bool:
+    try:
+        with io.BytesIO() as stream:
+            pickle.dump(obj, stream)
+        return True
+    except:
+        return False
+
+
+# Functionality to import modules/objects by name, and call functions by name
+# ------------------------------------------------------------------------------------------
+
+def get_module_from_obj_name(obj_name: str) -> Tuple[types.ModuleType, str]:
+    """Searches for the underlying module behind the name to some python object.
+    Returns the module and the object name (original name with module part removed)."""
+
+    # allow convenience shorthands, substitute them by full names
+    obj_name = re.sub("^np.", "numpy.", obj_name)
+    obj_name = re.sub("^tf.", "tensorflow.", obj_name)
+
+    # list alternatives for (module_name, local_obj_name)
+    parts = obj_name.split(".")
+    name_pairs = [(".".join(parts[:i]), ".".join(parts[i:])) for i in range(len(parts), 0, -1)]
+
+    # try each alternative in turn
+    for module_name, local_obj_name in name_pairs:
+        try:
+            module = importlib.import_module(module_name) # may raise ImportError
+            get_obj_from_module(module, local_obj_name) # may raise AttributeError
+            return module, local_obj_name
+        except:
+            pass
+
+    # maybe some of the modules themselves contain errors?
+    for module_name, _local_obj_name in name_pairs:
+        try:
+            importlib.import_module(module_name) # may raise ImportError
+        except ImportError:
+            if not str(sys.exc_info()[1]).startswith("No module named '" + module_name + "'"):
+                raise
+
+    # maybe the requested attribute is missing?
+    for module_name, local_obj_name in name_pairs:
+        try:
+            module = importlib.import_module(module_name) # may raise ImportError
+            get_obj_from_module(module, local_obj_name) # may raise AttributeError
+        except ImportError:
+            pass
+
+    # we are out of luck, but we have no idea why
+    raise ImportError(obj_name)
+
+
+def get_obj_from_module(module: types.ModuleType, obj_name: str) -> Any:
+    """Traverses the object name and returns the last (rightmost) python object."""
+    if obj_name == '':
+        return module
+    obj = module
+    for part in obj_name.split("."):
+        obj = getattr(obj, part)
+    return obj
+
+
+def get_obj_by_name(name: str) -> Any:
+    """Finds the python object with the given name."""
+    module, obj_name = get_module_from_obj_name(name)
+    return get_obj_from_module(module, obj_name)
+
+
+def call_func_by_name(*args, func_name: str = None, **kwargs) -> Any:
+    """Finds the python object with the given name and calls it as a function."""
+    assert func_name is not None
+    func_obj = get_obj_by_name(func_name)
+    assert callable(func_obj)
+    return func_obj(*args, **kwargs)
+
+
+def construct_class_by_name(*args, class_name: str = None, **kwargs) -> Any:
+    """Finds the python class with the given name and constructs it with the given arguments."""
+    return call_func_by_name(*args, func_name=class_name, **kwargs)
+
+
+def get_module_dir_by_obj_name(obj_name: str) -> str:
+    """Get the directory path of the module containing the given object name."""
+    module, _ = get_module_from_obj_name(obj_name)
+    return os.path.dirname(inspect.getfile(module))
+
+
+def is_top_level_function(obj: Any) -> bool:
+    """Determine whether the given object is a top-level function, i.e., defined at module scope using 'def'."""
+    return callable(obj) and obj.__name__ in sys.modules[obj.__module__].__dict__
+
+
+def get_top_level_function_name(obj: Any) -> str:
+    """Return the fully-qualified name of a top-level function."""
+    assert is_top_level_function(obj)
+    module = obj.__module__
+    if module == '__main__':
+        module = os.path.splitext(os.path.basename(sys.modules[module].__file__))[0]
+    return module + "." + obj.__name__
+
+
+# File system helpers
+# ------------------------------------------------------------------------------------------
+
+def list_dir_recursively_with_ignore(dir_path: str, ignores: List[str] = None, add_base_to_relative: bool = False) -> List[Tuple[str, str]]:
+    """List all files recursively in a given directory while ignoring given file and directory names.
+    Returns list of tuples containing both absolute and relative paths."""
+    assert os.path.isdir(dir_path)
+    base_name = os.path.basename(os.path.normpath(dir_path))
+
+    if ignores is None:
+        ignores = []
+
+    result = []
+
+    for root, dirs, files in os.walk(dir_path, topdown=True):
+        for ignore_ in ignores:
+            dirs_to_remove = [d for d in dirs if fnmatch.fnmatch(d, ignore_)]
+
+            # dirs need to be edited in-place
+            for d in dirs_to_remove:
+                dirs.remove(d)
+
+            files = [f for f in files if not fnmatch.fnmatch(f, ignore_)]
+
+        absolute_paths = [os.path.join(root, f) for f in files]
+        relative_paths = [os.path.relpath(p, dir_path) for p in absolute_paths]
+
+        if add_base_to_relative:
+            relative_paths = [os.path.join(base_name, p) for p in relative_paths]
+
+        assert len(absolute_paths) == len(relative_paths)
+        result += zip(absolute_paths, relative_paths)
+
+    return result
+
+
+def copy_files_and_create_dirs(files: List[Tuple[str, str]]) -> None:
+    """Takes in a list of tuples of (src, dst) paths and copies files.
+    Will create all necessary directories."""
+    for file in files:
+        target_dir_name = os.path.dirname(file[1])
+
+        # will create all intermediate-level directories
+        if not os.path.exists(target_dir_name):
+            os.makedirs(target_dir_name)
+
+        shutil.copyfile(file[0], file[1])
+
+
+# URL helpers
+# ------------------------------------------------------------------------------------------
+
+def is_url(obj: Any, allow_file_urls: bool = False) -> bool:
+    """Determine whether the given object is a valid URL string."""
+    if not isinstance(obj, str) or not "://" in obj:
+        return False
+    if allow_file_urls and obj.startswith('file://'):
+        return True
+    try:
+        res = requests.compat.urlparse(obj)
+        if not res.scheme or not res.netloc or not "." in res.netloc:
+            return False
+        res = requests.compat.urlparse(requests.compat.urljoin(obj, "/"))
+        if not res.scheme or not res.netloc or not "." in res.netloc:
+            return False
+    except:
+        return False
+    return True
+
+
+def open_url(url: str, cache_dir: str = None, num_attempts: int = 10, verbose: bool = True, return_filename: bool = False, cache: bool = True) -> Any:
+    """Download the given URL and return a binary-mode file object to access the data."""
+    assert num_attempts >= 1
+    assert not (return_filename and (not cache))
+
+    # Doesn't look like an URL scheme so interpret it as a local filename.
+    if not re.match('^[a-z]+://', url):
+        return url if return_filename else open(url, "rb")
+
+    # Handle file URLs.  This code handles unusual file:// patterns that
+    # arise on Windows:
+    #
+    # file:///c:/foo.txt
+    #
+    # which would translate to a local '/c:/foo.txt' filename that's
+    # invalid.  Drop the forward slash for such pathnames.
+    #
+    # If you touch this code path, you should test it on both Linux and
+    # Windows.
+    #
+    # Some internet resources suggest using urllib.request.url2pathname() but
+    # but that converts forward slashes to backslashes and this causes
+    # its own set of problems.
+    if url.startswith('file://'):
+        filename = urllib.parse.urlparse(url).path
+        if re.match(r'^/[a-zA-Z]:', filename):
+            filename = filename[1:]
+        return filename if return_filename else open(filename, "rb")
+
+    assert is_url(url)
+
+    # Lookup from cache.
+    if cache_dir is None:
+        cache_dir = make_cache_dir_path('downloads')
+
+    url_md5 = hashlib.md5(url.encode("utf-8")).hexdigest()
+    if cache:
+        cache_files = glob.glob(os.path.join(cache_dir, url_md5 + "_*"))
+        if len(cache_files) == 1:
+            filename = cache_files[0]
+            return filename if return_filename else open(filename, "rb")
+
+    # Download.
+    url_name = None
+    url_data = None
+    with requests.Session() as session:
+        if verbose:
+            print("Downloading %s ..." % url, end="", flush=True)
+        for attempts_left in reversed(range(num_attempts)):
+            try:
+                with session.get(url) as res:
+                    res.raise_for_status()
+                    if len(res.content) == 0:
+                        raise IOError("No data received")
+
+                    if len(res.content) < 8192:
+                        content_str = res.content.decode("utf-8")
+                        if "download_warning" in res.headers.get("Set-Cookie", ""):
+                            links = [html.unescape(link) for link in content_str.split('"') if "export=download" in link]
+                            if len(links) == 1:
+                                url = requests.compat.urljoin(url, links[0])
+                                raise IOError("Google Drive virus checker nag")
+                        if "Google Drive - Quota exceeded" in content_str:
+                            raise IOError("Google Drive download quota exceeded -- please try again later")
+
+                    match = re.search(r'filename="([^"]*)"', res.headers.get("Content-Disposition", ""))
+                    url_name = match[1] if match else url
+                    url_data = res.content
+                    if verbose:
+                        print(" done")
+                    break
+            except KeyboardInterrupt:
+                raise
+            except:
+                if not attempts_left:
+                    if verbose:
+                        print(" failed")
+                    raise
+                if verbose:
+                    print(".", end="", flush=True)
+
+    # Save to cache.
+    if cache:
+        safe_name = re.sub(r"[^0-9a-zA-Z-._]", "_", url_name)
+        cache_file = os.path.join(cache_dir, url_md5 + "_" + safe_name)
+        temp_file = os.path.join(cache_dir, "tmp_" + uuid.uuid4().hex + "_" + url_md5 + "_" + safe_name)
+        os.makedirs(cache_dir, exist_ok=True)
+        with open(temp_file, "wb") as f:
+            f.write(url_data)
+        os.replace(temp_file, cache_file) # atomic
+        if return_filename:
+            return cache_file
+
+    # Return data as file object.
+    assert not return_filename
+    return io.BytesIO(url_data)
diff --git a/modules/eg3ds/metrics/__init__.py b/modules/eg3ds/metrics/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..dfebd04f47e6f6b1b44984c14c23b57d56f72240
--- /dev/null
+++ b/modules/eg3ds/metrics/__init__.py
@@ -0,0 +1,11 @@
+# SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: LicenseRef-NvidiaProprietary
+#
+# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+# property and proprietary rights in and to this material, related
+# documentation and any modifications thereto. Any use, reproduction,
+# disclosure or distribution of this material and related documentation
+# without an express license agreement from NVIDIA CORPORATION or
+# its affiliates is strictly prohibited.
+
+# empty
diff --git a/modules/eg3ds/metrics/equivariance.py b/modules/eg3ds/metrics/equivariance.py
new file mode 100644
index 0000000000000000000000000000000000000000..d105cb93031d5a9638d7a9c12c65db1d8c4a0860
--- /dev/null
+++ b/modules/eg3ds/metrics/equivariance.py
@@ -0,0 +1,270 @@
+# SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: LicenseRef-NvidiaProprietary
+#
+# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+# property and proprietary rights in and to this material, related
+# documentation and any modifications thereto. Any use, reproduction,
+# disclosure or distribution of this material and related documentation
+# without an express license agreement from NVIDIA CORPORATION or
+# its affiliates is strictly prohibited.
+
+"""Equivariance metrics (EQ-T, EQ-T_frac, and EQ-R) from the paper
+"Alias-Free Generative Adversarial Networks"."""
+
+import copy
+import numpy as np
+import torch
+import torch.fft
+from modules.eg3ds.torch_utils.ops import upfirdn2d
+
+from . import metric_utils
+
+#----------------------------------------------------------------------------
+# Utilities.
+
+def sinc(x):
+    y = (x * np.pi).abs()
+    z = torch.sin(y) / y.clamp(1e-30, float('inf'))
+    return torch.where(y < 1e-30, torch.ones_like(x), z)
+
+def lanczos_window(x, a):
+    x = x.abs() / a
+    return torch.where(x < 1, sinc(x), torch.zeros_like(x))
+
+def rotation_matrix(angle):
+    angle = torch.as_tensor(angle).to(torch.float32)
+    mat = torch.eye(3, device=angle.device)
+    mat[0, 0] = angle.cos()
+    mat[0, 1] = angle.sin()
+    mat[1, 0] = -angle.sin()
+    mat[1, 1] = angle.cos()
+    return mat
+
+#----------------------------------------------------------------------------
+# Apply integer translation to a batch of 2D images. Corresponds to the
+# operator T_x in Appendix E.1.
+
+def apply_integer_translation(x, tx, ty):
+    _N, _C, H, W = x.shape
+    tx = torch.as_tensor(tx * W).to(dtype=torch.float32, device=x.device)
+    ty = torch.as_tensor(ty * H).to(dtype=torch.float32, device=x.device)
+    ix = tx.round().to(torch.int64)
+    iy = ty.round().to(torch.int64)
+
+    z = torch.zeros_like(x)
+    m = torch.zeros_like(x)
+    if abs(ix) < W and abs(iy) < H:
+        y = x[:, :, max(-iy,0) : H+min(-iy,0), max(-ix,0) : W+min(-ix,0)]
+        z[:, :, max(iy,0) : H+min(iy,0), max(ix,0) : W+min(ix,0)] = y
+        m[:, :, max(iy,0) : H+min(iy,0), max(ix,0) : W+min(ix,0)] = 1
+    return z, m
+
+#----------------------------------------------------------------------------
+# Apply integer translation to a batch of 2D images. Corresponds to the
+# operator T_x in Appendix E.2.
+
+def apply_fractional_translation(x, tx, ty, a=3):
+    _N, _C, H, W = x.shape
+    tx = torch.as_tensor(tx * W).to(dtype=torch.float32, device=x.device)
+    ty = torch.as_tensor(ty * H).to(dtype=torch.float32, device=x.device)
+    ix = tx.floor().to(torch.int64)
+    iy = ty.floor().to(torch.int64)
+    fx = tx - ix
+    fy = ty - iy
+    b = a - 1
+
+    z = torch.zeros_like(x)
+    zx0 = max(ix - b, 0)
+    zy0 = max(iy - b, 0)
+    zx1 = min(ix + a, 0) + W
+    zy1 = min(iy + a, 0) + H
+    if zx0 < zx1 and zy0 < zy1:
+        taps = torch.arange(a * 2, device=x.device) - b
+        filter_x = (sinc(taps - fx) * sinc((taps - fx) / a)).unsqueeze(0)
+        filter_y = (sinc(taps - fy) * sinc((taps - fy) / a)).unsqueeze(1)
+        y = x
+        y = upfirdn2d.filter2d(y, filter_x / filter_x.sum(), padding=[b,a,0,0])
+        y = upfirdn2d.filter2d(y, filter_y / filter_y.sum(), padding=[0,0,b,a])
+        y = y[:, :, max(b-iy,0) : H+b+a+min(-iy-a,0), max(b-ix,0) : W+b+a+min(-ix-a,0)]
+        z[:, :, zy0:zy1, zx0:zx1] = y
+
+    m = torch.zeros_like(x)
+    mx0 = max(ix + a, 0)
+    my0 = max(iy + a, 0)
+    mx1 = min(ix - b, 0) + W
+    my1 = min(iy - b, 0) + H
+    if mx0 < mx1 and my0 < my1:
+        m[:, :, my0:my1, mx0:mx1] = 1
+    return z, m
+
+#----------------------------------------------------------------------------
+# Construct an oriented low-pass filter that applies the appropriate
+# bandlimit with respect to the input and output of the given affine 2D
+# image transformation.
+
+def construct_affine_bandlimit_filter(mat, a=3, amax=16, aflt=64, up=4, cutoff_in=1, cutoff_out=1):
+    assert a <= amax < aflt
+    mat = torch.as_tensor(mat).to(torch.float32)
+
+    # Construct 2D filter taps in input & output coordinate spaces.
+    taps = ((torch.arange(aflt * up * 2 - 1, device=mat.device) + 1) / up - aflt).roll(1 - aflt * up)
+    yi, xi = torch.meshgrid(taps, taps)
+    xo, yo = (torch.stack([xi, yi], dim=2) @ mat[:2, :2].t()).unbind(2)
+
+    # Convolution of two oriented 2D sinc filters.
+    fi = sinc(xi * cutoff_in) * sinc(yi * cutoff_in)
+    fo = sinc(xo * cutoff_out) * sinc(yo * cutoff_out)
+    f = torch.fft.ifftn(torch.fft.fftn(fi) * torch.fft.fftn(fo)).real
+
+    # Convolution of two oriented 2D Lanczos windows.
+    wi = lanczos_window(xi, a) * lanczos_window(yi, a)
+    wo = lanczos_window(xo, a) * lanczos_window(yo, a)
+    w = torch.fft.ifftn(torch.fft.fftn(wi) * torch.fft.fftn(wo)).real
+
+    # Construct windowed FIR filter.
+    f = f * w
+
+    # Finalize.
+    c = (aflt - amax) * up
+    f = f.roll([aflt * up - 1] * 2, dims=[0,1])[c:-c, c:-c]
+    f = torch.nn.functional.pad(f, [0, 1, 0, 1]).reshape(amax * 2, up, amax * 2, up)
+    f = f / f.sum([0,2], keepdim=True) / (up ** 2)
+    f = f.reshape(amax * 2 * up, amax * 2 * up)[:-1, :-1]
+    return f
+
+#----------------------------------------------------------------------------
+# Apply the given affine transformation to a batch of 2D images.
+
+def apply_affine_transformation(x, mat, up=4, **filter_kwargs):
+    _N, _C, H, W = x.shape
+    mat = torch.as_tensor(mat).to(dtype=torch.float32, device=x.device)
+
+    # Construct filter.
+    f = construct_affine_bandlimit_filter(mat, up=up, **filter_kwargs)
+    assert f.ndim == 2 and f.shape[0] == f.shape[1] and f.shape[0] % 2 == 1
+    p = f.shape[0] // 2
+
+    # Construct sampling grid.
+    theta = mat.inverse()
+    theta[:2, 2] *= 2
+    theta[0, 2] += 1 / up / W
+    theta[1, 2] += 1 / up / H
+    theta[0, :] *= W / (W + p / up * 2)
+    theta[1, :] *= H / (H + p / up * 2)
+    theta = theta[:2, :3].unsqueeze(0).repeat([x.shape[0], 1, 1])
+    g = torch.nn.functional.affine_grid(theta, x.shape, align_corners=False)
+
+    # Resample image.
+    y = upfirdn2d.upsample2d(x=x, f=f, up=up, padding=p)
+    z = torch.nn.functional.grid_sample(y, g, mode='bilinear', padding_mode='zeros', align_corners=False)
+
+    # Form mask.
+    m = torch.zeros_like(y)
+    c = p * 2 + 1
+    m[:, :, c:-c, c:-c] = 1
+    m = torch.nn.functional.grid_sample(m, g, mode='nearest', padding_mode='zeros', align_corners=False)
+    return z, m
+
+#----------------------------------------------------------------------------
+# Apply fractional rotation to a batch of 2D images. Corresponds to the
+# operator R_\alpha in Appendix E.3.
+
+def apply_fractional_rotation(x, angle, a=3, **filter_kwargs):
+    angle = torch.as_tensor(angle).to(dtype=torch.float32, device=x.device)
+    mat = rotation_matrix(angle)
+    return apply_affine_transformation(x, mat, a=a, amax=a*2, **filter_kwargs)
+
+#----------------------------------------------------------------------------
+# Modify the frequency content of a batch of 2D images as if they had undergo
+# fractional rotation -- but without actually rotating them. Corresponds to
+# the operator R^*_\alpha in Appendix E.3.
+
+def apply_fractional_pseudo_rotation(x, angle, a=3, **filter_kwargs):
+    angle = torch.as_tensor(angle).to(dtype=torch.float32, device=x.device)
+    mat = rotation_matrix(-angle)
+    f = construct_affine_bandlimit_filter(mat, a=a, amax=a*2, up=1, **filter_kwargs)
+    y = upfirdn2d.filter2d(x=x, f=f)
+    m = torch.zeros_like(y)
+    c = f.shape[0] // 2
+    m[:, :, c:-c, c:-c] = 1
+    return y, m
+
+#----------------------------------------------------------------------------
+# Compute the selected equivariance metrics for the given generator.
+
+def compute_equivariance_metrics(opts, num_samples, batch_size, translate_max=0.125, rotate_max=1, compute_eqt_int=False, compute_eqt_frac=False, compute_eqr=False):
+    assert compute_eqt_int or compute_eqt_frac or compute_eqr
+
+    # Setup generator and labels.
+    G = copy.deepcopy(opts.G).eval().requires_grad_(False).to(opts.device)
+    I = torch.eye(3, device=opts.device)
+    M = getattr(getattr(getattr(G, 'synthesis', None), 'input', None), 'transform', None)
+    if M is None:
+        raise ValueError('Cannot compute equivariance metrics; the given generator does not support user-specified image transformations')
+    c_iter = metric_utils.iterate_random_labels(opts=opts, batch_size=batch_size)
+
+    # Sampling loop.
+    sums = None
+    progress = opts.progress.sub(tag='eq sampling', num_items=num_samples)
+    for batch_start in range(0, num_samples, batch_size * opts.num_gpus):
+        progress.update(batch_start)
+        s = []
+
+        # Randomize noise buffers, if any.
+        for name, buf in G.named_buffers():
+            if name.endswith('.noise_const'):
+                buf.copy_(torch.randn_like(buf))
+
+        # Run mapping network.
+        z = torch.randn([batch_size, G.z_dim], device=opts.device)
+        c = next(c_iter)
+        ws = G.mapping(z=z, c=c)
+
+        # Generate reference image.
+        M[:] = I
+        orig = G.synthesis(ws=ws, noise_mode='const', **opts.G_kwargs)
+
+        # Integer translation (EQ-T).
+        if compute_eqt_int:
+            t = (torch.rand(2, device=opts.device) * 2 - 1) * translate_max
+            t = (t * G.img_resolution).round() / G.img_resolution
+            M[:] = I
+            M[:2, 2] = -t
+            img = G.synthesis(ws=ws, noise_mode='const', **opts.G_kwargs)
+            ref, mask = apply_integer_translation(orig, t[0], t[1])
+            s += [(ref - img).square() * mask, mask]
+
+        # Fractional translation (EQ-T_frac).
+        if compute_eqt_frac:
+            t = (torch.rand(2, device=opts.device) * 2 - 1) * translate_max
+            M[:] = I
+            M[:2, 2] = -t
+            img = G.synthesis(ws=ws, noise_mode='const', **opts.G_kwargs)
+            ref, mask = apply_fractional_translation(orig, t[0], t[1])
+            s += [(ref - img).square() * mask, mask]
+
+        # Rotation (EQ-R).
+        if compute_eqr:
+            angle = (torch.rand([], device=opts.device) * 2 - 1) * (rotate_max * np.pi)
+            M[:] = rotation_matrix(-angle)
+            img = G.synthesis(ws=ws, noise_mode='const', **opts.G_kwargs)
+            ref, ref_mask = apply_fractional_rotation(orig, angle)
+            pseudo, pseudo_mask = apply_fractional_pseudo_rotation(img, angle)
+            mask = ref_mask * pseudo_mask
+            s += [(ref - pseudo).square() * mask, mask]
+
+        # Accumulate results.
+        s = torch.stack([x.to(torch.float64).sum() for x in s])
+        sums = sums + s if sums is not None else s
+    progress.update(num_samples)
+
+    # Compute PSNRs.
+    if opts.num_gpus > 1:
+        torch.distributed.all_reduce(sums)
+    sums = sums.cpu()
+    mses = sums[0::2] / sums[1::2]
+    psnrs = np.log10(2) * 20 - mses.log10() * 10
+    psnrs = tuple(psnrs.numpy())
+    return psnrs[0] if len(psnrs) == 1 else psnrs
+
+#----------------------------------------------------------------------------
diff --git a/modules/eg3ds/metrics/frechet_inception_distance.py b/modules/eg3ds/metrics/frechet_inception_distance.py
new file mode 100644
index 0000000000000000000000000000000000000000..e682de6162066e255b04c0db2f1cc8860c96de7c
--- /dev/null
+++ b/modules/eg3ds/metrics/frechet_inception_distance.py
@@ -0,0 +1,45 @@
+# SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: LicenseRef-NvidiaProprietary
+#
+# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+# property and proprietary rights in and to this material, related
+# documentation and any modifications thereto. Any use, reproduction,
+# disclosure or distribution of this material and related documentation
+# without an express license agreement from NVIDIA CORPORATION or
+# its affiliates is strictly prohibited.
+
+"""Frechet Inception Distance (FID) from the paper
+"GANs trained by a two time-scale update rule converge to a local Nash
+equilibrium". Matches the original implementation by Heusel et al. at
+https://github.com/bioinf-jku/TTUR/blob/master/fid.py"""
+
+import numpy as np
+import scipy.linalg
+from . import metric_utils
+
+#----------------------------------------------------------------------------
+
+def compute_fid(opts, max_real, num_gen):
+    # Direct TorchScript translation of http://download.tensorflow.org/models/image/imagenet/inception-2015-12-05.tgz
+    # detector_url = 'https://api.ngc.nvidia.com/v2/models/nvidia/research/stylegan3/versions/1/files/metrics/inception-2015-12-05.pkl'
+    detector_url = 'file:///home/tiger/nfs/myenv/cache/useful_ckpts/inception-2015-12-05.pkl'
+    detector_kwargs = dict(return_features=True) # Return raw features before the softmax layer.
+
+    mu_real, sigma_real = metric_utils.compute_feature_stats_for_dataset(
+        opts=opts, detector_url=detector_url, detector_kwargs=detector_kwargs,
+        rel_lo=0, rel_hi=0, capture_mean_cov=True, max_items=max_real).get_mean_cov()
+
+    mu_gen, sigma_gen = metric_utils.compute_feature_stats_for_generator(
+        opts=opts, detector_url=detector_url, detector_kwargs=detector_kwargs,
+        rel_lo=0, rel_hi=1, capture_mean_cov=True, max_items=num_gen).get_mean_cov()
+
+    if opts.rank != 0:
+        return float('nan')
+
+    m = np.square(mu_gen - mu_real).sum()
+    s, _ = scipy.linalg.sqrtm(np.dot(sigma_gen, sigma_real), disp=False) # pylint: disable=no-member
+    fid = np.real(m + np.trace(sigma_gen + sigma_real - s * 2))
+    return float(fid)
+
+#----------------------------------------------------------------------------
+
diff --git a/modules/eg3ds/metrics/inception_score.py b/modules/eg3ds/metrics/inception_score.py
new file mode 100644
index 0000000000000000000000000000000000000000..a8887595d5d563d391a9f95f193081e70d11caba
--- /dev/null
+++ b/modules/eg3ds/metrics/inception_score.py
@@ -0,0 +1,41 @@
+# SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: LicenseRef-NvidiaProprietary
+#
+# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+# property and proprietary rights in and to this material, related
+# documentation and any modifications thereto. Any use, reproduction,
+# disclosure or distribution of this material and related documentation
+# without an express license agreement from NVIDIA CORPORATION or
+# its affiliates is strictly prohibited.
+
+"""Inception Score (IS) from the paper "Improved techniques for training
+GANs". Matches the original implementation by Salimans et al. at
+https://github.com/openai/improved-gan/blob/master/inception_score/model.py"""
+
+import numpy as np
+from . import metric_utils
+
+#----------------------------------------------------------------------------
+
+def compute_is(opts, num_gen, num_splits):
+    # Direct TorchScript translation of http://download.tensorflow.org/models/image/imagenet/inception-2015-12-05.tgz
+    # detector_url = 'https://api.ngc.nvidia.com/v2/models/nvidia/research/stylegan3/versions/1/files/metrics/inception-2015-12-05.pkl'
+    detector_url = 'file:///home/tiger/nfs/myenv/cache/useful_ckpts/inception-2015-12-05.pkl'
+    detector_kwargs = dict(no_output_bias=True) # Match the original implementation by not applying bias in the softmax layer.
+
+    gen_probs = metric_utils.compute_feature_stats_for_generator(
+        opts=opts, detector_url=detector_url, detector_kwargs=detector_kwargs,
+        capture_all=True, max_items=num_gen).get_all()
+
+    if opts.rank != 0:
+        return float('nan'), float('nan')
+
+    scores = []
+    for i in range(num_splits):
+        part = gen_probs[i * num_gen // num_splits : (i + 1) * num_gen // num_splits]
+        kl = part * (np.log(part) - np.log(np.mean(part, axis=0, keepdims=True)))
+        kl = np.mean(np.sum(kl, axis=1))
+        scores.append(np.exp(kl))
+    return float(np.mean(scores)), float(np.std(scores))
+
+#----------------------------------------------------------------------------
diff --git a/modules/eg3ds/metrics/kernel_inception_distance.py b/modules/eg3ds/metrics/kernel_inception_distance.py
new file mode 100644
index 0000000000000000000000000000000000000000..4a7735f387fb639135a0dd9a63be6b24c9bb3ade
--- /dev/null
+++ b/modules/eg3ds/metrics/kernel_inception_distance.py
@@ -0,0 +1,49 @@
+# SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: LicenseRef-NvidiaProprietary
+#
+# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+# property and proprietary rights in and to this material, related
+# documentation and any modifications thereto. Any use, reproduction,
+# disclosure or distribution of this material and related documentation
+# without an express license agreement from NVIDIA CORPORATION or
+# its affiliates is strictly prohibited.
+
+"""Kernel Inception Distance (KID) from the paper "Demystifying MMD
+GANs". Matches the original implementation by Binkowski et al. at
+https://github.com/mbinkowski/MMD-GAN/blob/master/gan/compute_scores.py"""
+
+import numpy as np
+from . import metric_utils
+
+#----------------------------------------------------------------------------
+
+def compute_kid(opts, max_real, num_gen, num_subsets, max_subset_size):
+    # Direct TorchScript translation of http://download.tensorflow.org/models/image/imagenet/inception-2015-12-05.tgz
+    # detector_url = 'https://api.ngc.nvidia.com/v2/models/nvidia/research/stylegan3/versions/1/files/metrics/inception-2015-12-05.pkl'
+    detector_url = 'file:///home/tiger/nfs/myenv/cache/useful_ckpts/inception-2015-12-05.pkl'
+    detector_kwargs = dict(return_features=True) # Return raw features before the softmax layer.
+
+    real_features = metric_utils.compute_feature_stats_for_dataset(
+        opts=opts, detector_url=detector_url, detector_kwargs=detector_kwargs,
+        rel_lo=0, rel_hi=0, capture_all=True, max_items=max_real).get_all()
+
+    gen_features = metric_utils.compute_feature_stats_for_generator(
+        opts=opts, detector_url=detector_url, detector_kwargs=detector_kwargs,
+        rel_lo=0, rel_hi=1, capture_all=True, max_items=num_gen).get_all()
+
+    if opts.rank != 0:
+        return float('nan')
+
+    n = real_features.shape[1]
+    m = min(min(real_features.shape[0], gen_features.shape[0]), max_subset_size)
+    t = 0
+    for _subset_idx in range(num_subsets):
+        x = gen_features[np.random.choice(gen_features.shape[0], m, replace=False)]
+        y = real_features[np.random.choice(real_features.shape[0], m, replace=False)]
+        a = (x @ x.T / n + 1) ** 3 + (y @ y.T / n + 1) ** 3
+        b = (x @ y.T / n + 1) ** 3
+        t += (a.sum() - np.diag(a).sum()) / (m - 1) - b.sum() * 2 / m
+    kid = t / num_subsets / m
+    return float(kid)
+
+#----------------------------------------------------------------------------
diff --git a/modules/eg3ds/metrics/metric_main.py b/modules/eg3ds/metrics/metric_main.py
new file mode 100644
index 0000000000000000000000000000000000000000..77eadbef168888cd740abb2e638ee111ef15c559
--- /dev/null
+++ b/modules/eg3ds/metrics/metric_main.py
@@ -0,0 +1,155 @@
+# SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: LicenseRef-NvidiaProprietary
+#
+# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+# property and proprietary rights in and to this material, related
+# documentation and any modifications thereto. Any use, reproduction,
+# disclosure or distribution of this material and related documentation
+# without an express license agreement from NVIDIA CORPORATION or
+# its affiliates is strictly prohibited.
+
+"""Main API for computing and reporting quality metrics."""
+
+import os
+import time
+import json
+import torch
+import modules.eg3ds.dnnlib as dnnlib
+
+from . import metric_utils
+from . import frechet_inception_distance
+from . import kernel_inception_distance
+from . import precision_recall
+from . import perceptual_path_length
+from . import inception_score
+from . import equivariance
+
+#----------------------------------------------------------------------------
+
+_metric_dict = dict() # name => fn
+
+def register_metric(fn):
+    assert callable(fn)
+    _metric_dict[fn.__name__] = fn
+    return fn
+
+def is_valid_metric(metric):
+    return metric in _metric_dict
+
+def list_valid_metrics():
+    return list(_metric_dict.keys())
+
+#----------------------------------------------------------------------------
+
+def calc_metric(metric, **kwargs): # See metric_utils.MetricOptions for the full list of arguments.
+    assert is_valid_metric(metric)
+    opts = metric_utils.MetricOptions(**kwargs)
+
+    # Calculate.
+    start_time = time.time()
+    results = _metric_dict[metric](opts)
+    total_time = time.time() - start_time
+
+    # Broadcast results.
+    for key, value in list(results.items()):
+        if opts.num_gpus > 1:
+            value = torch.as_tensor(value, dtype=torch.float64, device=opts.device)
+            torch.distributed.broadcast(tensor=value, src=0)
+            value = float(value.cpu())
+        results[key] = value
+
+    # Decorate with metadata.
+    return dnnlib.EasyDict(
+        results         = dnnlib.EasyDict(results),
+        metric          = metric,
+        total_time      = total_time,
+        total_time_str  = dnnlib.util.format_time(total_time),
+        num_gpus        = opts.num_gpus,
+    )
+
+#----------------------------------------------------------------------------
+
+def report_metric(result_dict, run_dir=None, snapshot_pkl=None):
+    metric = result_dict['metric']
+    assert is_valid_metric(metric)
+    if run_dir is not None and snapshot_pkl is not None:
+        snapshot_pkl = os.path.relpath(snapshot_pkl, run_dir)
+
+    jsonl_line = json.dumps(dict(result_dict, snapshot_pkl=snapshot_pkl, timestamp=time.time()))
+    print(jsonl_line)
+    if run_dir is not None and os.path.isdir(run_dir):
+        with open(os.path.join(run_dir, f'metric-{metric}.jsonl'), 'at') as f:
+            f.write(jsonl_line + '\n')
+
+#----------------------------------------------------------------------------
+# Recommended metrics.
+
+@register_metric
+def fid50k_full(opts):
+    opts.dataset_kwargs.update(max_size=None, xflip=False)
+    fid = frechet_inception_distance.compute_fid(opts, max_real=None, num_gen=50000)
+    return dict(fid50k_full=fid)
+
+@register_metric
+def kid50k_full(opts):
+    opts.dataset_kwargs.update(max_size=None, xflip=False)
+    kid = kernel_inception_distance.compute_kid(opts, max_real=1000000, num_gen=50000, num_subsets=100, max_subset_size=1000)
+    return dict(kid50k_full=kid)
+
+@register_metric
+def pr50k3_full(opts):
+    opts.dataset_kwargs.update(max_size=None, xflip=False)
+    precision, recall = precision_recall.compute_pr(opts, max_real=200000, num_gen=50000, nhood_size=3, row_batch_size=10000, col_batch_size=10000)
+    return dict(pr50k3_full_precision=precision, pr50k3_full_recall=recall)
+
+@register_metric
+def ppl2_wend(opts):
+    ppl = perceptual_path_length.compute_ppl(opts, num_samples=50000, epsilon=1e-4, space='w', sampling='end', crop=False, batch_size=2)
+    return dict(ppl2_wend=ppl)
+
+@register_metric
+def eqt50k_int(opts):
+    opts.G_kwargs.update(force_fp32=True)
+    psnr = equivariance.compute_equivariance_metrics(opts, num_samples=50000, batch_size=4, compute_eqt_int=True)
+    return dict(eqt50k_int=psnr)
+
+@register_metric
+def eqt50k_frac(opts):
+    opts.G_kwargs.update(force_fp32=True)
+    psnr = equivariance.compute_equivariance_metrics(opts, num_samples=50000, batch_size=4, compute_eqt_frac=True)
+    return dict(eqt50k_frac=psnr)
+
+@register_metric
+def eqr50k(opts):
+    opts.G_kwargs.update(force_fp32=True)
+    psnr = equivariance.compute_equivariance_metrics(opts, num_samples=50000, batch_size=4, compute_eqr=True)
+    return dict(eqr50k=psnr)
+
+#----------------------------------------------------------------------------
+# Legacy metrics.
+
+@register_metric
+def fid50k(opts):
+    opts.dataset_kwargs.update(max_size=None)
+    fid = frechet_inception_distance.compute_fid(opts, max_real=50000, num_gen=50000)
+    return dict(fid50k=fid)
+
+@register_metric
+def kid50k(opts):
+    opts.dataset_kwargs.update(max_size=None)
+    kid = kernel_inception_distance.compute_kid(opts, max_real=50000, num_gen=50000, num_subsets=100, max_subset_size=1000)
+    return dict(kid50k=kid)
+
+@register_metric
+def pr50k3(opts):
+    opts.dataset_kwargs.update(max_size=None)
+    precision, recall = precision_recall.compute_pr(opts, max_real=50000, num_gen=50000, nhood_size=3, row_batch_size=10000, col_batch_size=10000)
+    return dict(pr50k3_precision=precision, pr50k3_recall=recall)
+
+@register_metric
+def is50k(opts):
+    opts.dataset_kwargs.update(max_size=None, xflip=False)
+    mean, std = inception_score.compute_is(opts, num_gen=50000, num_splits=10)
+    return dict(is50k_mean=mean, is50k_std=std)
+
+#----------------------------------------------------------------------------
diff --git a/modules/eg3ds/metrics/metric_utils.py b/modules/eg3ds/metrics/metric_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..756169b281ff0cf72bbacb879bafccc2721b5d42
--- /dev/null
+++ b/modules/eg3ds/metrics/metric_utils.py
@@ -0,0 +1,324 @@
+# SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: LicenseRef-NvidiaProprietary
+#
+# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+# property and proprietary rights in and to this material, related
+# documentation and any modifications thereto. Any use, reproduction,
+# disclosure or distribution of this material and related documentation
+# without an express license agreement from NVIDIA CORPORATION or
+# its affiliates is strictly prohibited.
+
+"""Miscellaneous utilities used internally by the quality metrics."""
+
+import os
+import sys
+sys.path.append("/home/tiger/projects/GeneFace_private/modules/eg3ds")
+
+import time
+import hashlib
+import pickle
+import copy
+import uuid
+import numpy as np
+import torch
+import modules.eg3ds.dnnlib as dnnlib
+
+from tasks.eg3ds.dataset_utils.kv_eg3d_ffhq_dataset import KV_FFHQ_EG3D_Dataset
+from utils.commons.hparams import hparams
+#----------------------------------------------------------------------------
+
+def chunk(iterable, chunk_size):
+    final_ret = []
+    cnt = 0
+    ret = []
+    for record in iterable:
+        if cnt == 0:
+            ret = []
+        ret.append(record)
+        cnt += 1
+        if len(ret) == chunk_size:
+            final_ret.append(ret)
+            ret = []
+    if len(final_ret[-1]) != chunk_size:
+        final_ret.append(ret)
+    return final_ret
+
+class MetricOptions:
+    def __init__(self, G=None, G_kwargs={}, dataset_kwargs={}, num_gpus=1, rank=0, device=None, progress=None, cache=True):
+        assert 0 <= rank < num_gpus
+        self.G              = G
+        self.G_kwargs       = dnnlib.EasyDict(G_kwargs)
+        self.dataset_kwargs = dnnlib.EasyDict(dataset_kwargs)
+        self.num_gpus       = num_gpus
+        self.rank           = rank
+        self.device         = device if device is not None else torch.device('cuda', rank)
+        self.progress       = progress.sub() if progress is not None and rank == 0 else ProgressMonitor()
+        self.cache          = cache
+
+#----------------------------------------------------------------------------
+
+_feature_detector_cache = dict()
+
+def get_feature_detector_name(url):
+    return os.path.splitext(url.split('/')[-1])[0]
+
+def get_feature_detector(url, device=torch.device('cpu'), num_gpus=1, rank=0, verbose=False):
+    assert 0 <= rank < num_gpus
+    key = (url, device)
+    if key not in _feature_detector_cache:
+        is_leader = (rank == 0)
+        if not is_leader and num_gpus > 1:
+            torch.distributed.barrier() # leader goes first
+        with dnnlib.util.open_url(url, verbose=(verbose and is_leader)) as f:
+            
+            _feature_detector_cache[key] = pickle.load(f).to(device)
+        if is_leader and num_gpus > 1:
+            torch.distributed.barrier() # others follow
+    return _feature_detector_cache[key]
+
+#----------------------------------------------------------------------------
+
+def iterate_random_labels(opts, batch_size):
+    if opts.G.c_dim == 0:
+        c = torch.zeros([batch_size, opts.G.c_dim], device=opts.device)
+        while True:
+            yield c
+    else:
+        # dataset = dnnlib.util.construct_class_by_name(**opts.dataset_kwargs)
+        if hparams['ds_name'] in ['FFHQ']:
+            dataset = KV_FFHQ_EG3D_Dataset('train', shuffle=False)
+        else:
+            raise NotImplementedError()
+        while True:
+            # c = [dataset.get_label(np.random.randint(len(dataset))) for _i in range(batch_size)]
+            # c = torch.from_numpy(np.stack(c)).pin_memory().to(opts.device)
+            index = np.random.randint(len(dataset), size=(batch_size))
+            samples = dataset[index]
+            cameras = [s['real_camera'] for s in samples]
+            c = torch.stack(cameras).pin_memory().to(opts.device)
+            yield c
+
+#----------------------------------------------------------------------------
+
+class FeatureStats:
+    def __init__(self, capture_all=False, capture_mean_cov=False, max_items=None):
+        self.capture_all = capture_all
+        self.capture_mean_cov = capture_mean_cov
+        self.max_items = max_items
+        self.num_items = 0
+        self.num_features = None
+        self.all_features = None
+        self.raw_mean = None
+        self.raw_cov = None
+
+    def set_num_features(self, num_features):
+        if self.num_features is not None:
+            assert num_features == self.num_features
+        else:
+            self.num_features = num_features
+            self.all_features = []
+            self.raw_mean = np.zeros([num_features], dtype=np.float64)
+            self.raw_cov = np.zeros([num_features, num_features], dtype=np.float64)
+
+    def is_full(self):
+        return (self.max_items is not None) and (self.num_items >= self.max_items)
+
+    def append(self, x):
+        x = np.asarray(x, dtype=np.float32)
+        assert x.ndim == 2
+        if (self.max_items is not None) and (self.num_items + x.shape[0] > self.max_items):
+            if self.num_items >= self.max_items:
+                return
+            x = x[:self.max_items - self.num_items]
+
+        self.set_num_features(x.shape[1])
+        self.num_items += x.shape[0]
+        if self.capture_all:
+            self.all_features.append(x)
+        if self.capture_mean_cov:
+            x64 = x.astype(np.float64)
+            self.raw_mean += x64.sum(axis=0)
+            self.raw_cov += x64.T @ x64
+
+    def append_torch(self, x, num_gpus=1, rank=0):
+        assert isinstance(x, torch.Tensor) and x.ndim == 2
+        assert 0 <= rank < num_gpus
+        if num_gpus > 1:
+            ys = []
+            for src in range(num_gpus):
+                y = x.clone()
+                torch.distributed.broadcast(y, src=src)
+                ys.append(y)
+            x = torch.stack(ys, dim=1).flatten(0, 1) # interleave samples
+        self.append(x.cpu().numpy())
+
+    def get_all(self):
+        assert self.capture_all
+        return np.concatenate(self.all_features, axis=0)
+
+    def get_all_torch(self):
+        return torch.from_numpy(self.get_all())
+
+    def get_mean_cov(self):
+        assert self.capture_mean_cov
+        mean = self.raw_mean / self.num_items
+        cov = self.raw_cov / self.num_items
+        cov = cov - np.outer(mean, mean)
+        return mean, cov
+
+    def save(self, pkl_file):
+        with open(pkl_file, 'wb') as f:
+            pickle.dump(self.__dict__, f)
+
+    @staticmethod
+    def load(pkl_file):
+        with open(pkl_file, 'rb') as f:
+            s = dnnlib.EasyDict(pickle.load(f))
+        obj = FeatureStats(capture_all=s.capture_all, max_items=s.max_items)
+        obj.__dict__.update(s)
+        return obj
+
+#----------------------------------------------------------------------------
+
+class ProgressMonitor:
+    def __init__(self, tag=None, num_items=None, flush_interval=1000, verbose=False, progress_fn=None, pfn_lo=0, pfn_hi=1000, pfn_total=1000):
+        self.tag = tag
+        self.num_items = num_items
+        self.verbose = verbose
+        self.flush_interval = flush_interval
+        self.progress_fn = progress_fn
+        self.pfn_lo = pfn_lo
+        self.pfn_hi = pfn_hi
+        self.pfn_total = pfn_total
+        self.start_time = time.time()
+        self.batch_time = self.start_time
+        self.batch_items = 0
+        if self.progress_fn is not None:
+            self.progress_fn(self.pfn_lo, self.pfn_total)
+
+    def update(self, cur_items):
+        assert (self.num_items is None) or (cur_items <= self.num_items)
+        if (cur_items < self.batch_items + self.flush_interval) and (self.num_items is None or cur_items < self.num_items):
+            return
+        cur_time = time.time()
+        total_time = cur_time - self.start_time
+        time_per_item = (cur_time - self.batch_time) / max(cur_items - self.batch_items, 1)
+        if (self.verbose) and (self.tag is not None):
+            print(f'{self.tag:<19s} items {cur_items:<7d} time {dnnlib.util.format_time(total_time):<12s} ms/item {time_per_item*1e3:.2f}')
+        self.batch_time = cur_time
+        self.batch_items = cur_items
+
+        if (self.progress_fn is not None) and (self.num_items is not None):
+            self.progress_fn(self.pfn_lo + (self.pfn_hi - self.pfn_lo) * (cur_items / self.num_items), self.pfn_total)
+
+    def sub(self, tag=None, num_items=None, flush_interval=1000, rel_lo=0, rel_hi=1):
+        return ProgressMonitor(
+            tag             = tag,
+            num_items       = num_items,
+            flush_interval  = flush_interval,
+            verbose         = self.verbose,
+            progress_fn     = self.progress_fn,
+            pfn_lo          = self.pfn_lo + (self.pfn_hi - self.pfn_lo) * rel_lo,
+            pfn_hi          = self.pfn_lo + (self.pfn_hi - self.pfn_lo) * rel_hi,
+            pfn_total       = self.pfn_total,
+        )
+
+#----------------------------------------------------------------------------
+
+def compute_feature_stats_for_dataset(opts, detector_url, detector_kwargs, rel_lo=0, rel_hi=1, batch_size=64, data_loader_kwargs=None, max_items=None, **stats_kwargs):
+    # dataset = dnnlib.util.construct_class_by_name(**opts.dataset_kwargs)
+    if hparams['ds_name'] in ['FFHQ']:
+        dataset = KV_FFHQ_EG3D_Dataset('train', shuffle=False)
+    else:
+        raise NotImplementedError()
+    
+    if data_loader_kwargs is None:
+        data_loader_kwargs = dict(pin_memory=True, num_workers=3, prefetch_factor=2)
+
+    # Try to lookup from cache.
+    cache_file = None
+    if opts.cache:
+        # Choose cache file name.
+        args = dict(dataset_kwargs=opts.dataset_kwargs, detector_url=detector_url, detector_kwargs=detector_kwargs, stats_kwargs=stats_kwargs)
+        md5 = hashlib.md5(repr(sorted(args.items())).encode('utf-8'))
+        ds_name = hparams['ds_name'] + dataset.prefix
+        cache_tag = f'{ds_name}-{get_feature_detector_name(detector_url)}-{md5.hexdigest()}'
+        cache_file = dnnlib.make_cache_dir_path('gan-metrics', cache_tag + '.pkl')
+
+        # Check if the file exists (all processes must agree).
+        flag = os.path.isfile(cache_file) if opts.rank == 0 else False
+        if opts.num_gpus > 1:
+            flag = torch.as_tensor(flag, dtype=torch.float32, device=opts.device)
+            torch.distributed.broadcast(tensor=flag, src=0)
+            flag = (float(flag.cpu()) != 0)
+
+        # Load.
+        if flag:
+            return FeatureStats.load(cache_file)
+
+    # Initialize.
+    num_items = len(dataset)
+    if max_items is not None:
+        num_items = min(num_items, max_items)
+    stats = FeatureStats(max_items=num_items, **stats_kwargs)
+    progress = opts.progress.sub(tag='dataset features', num_items=num_items, rel_lo=rel_lo, rel_hi=rel_hi)
+    detector = get_feature_detector(url=detector_url, device=opts.device, num_gpus=opts.num_gpus, rank=opts.rank, verbose=progress.verbose)
+
+    # Main loop.
+    item_subset = [(i * opts.num_gpus + opts.rank) % num_items for i in range((num_items - 1) // opts.num_gpus + 1)]
+    item_subset = chunk(item_subset, chunk_size=batch_size)
+    for batch in torch.utils.data.DataLoader(dataset=dataset, sampler=item_subset, batch_size=1, collate_fn=dataset.collater, **data_loader_kwargs):
+        images = batch['real_imgs']
+        if images.shape[1] == 1:
+            images = images.repeat([1, 3, 1, 1])
+
+        if images.dtype != torch.uint8:
+            images = (images * 127.5 + 128).clamp(0, 255).to(torch.uint8)
+        
+        features = detector(images.to(opts.device), **detector_kwargs)
+        stats.append_torch(features, num_gpus=opts.num_gpus, rank=opts.rank)
+        progress.update(stats.num_items)
+
+    # Save to cache.
+    if cache_file is not None and opts.rank == 0:
+        os.makedirs(os.path.dirname(cache_file), exist_ok=True)
+        temp_file = cache_file + '.' + uuid.uuid4().hex
+        stats.save(temp_file)
+        os.replace(temp_file, cache_file) # atomic
+    return stats
+
+#----------------------------------------------------------------------------
+
+def compute_feature_stats_for_generator(opts, detector_url, detector_kwargs, rel_lo=0, rel_hi=1, batch_size=64, batch_gen=None, **stats_kwargs):
+    if batch_gen is None:
+        batch_gen = min(batch_size, 4)
+    assert batch_size % batch_gen == 0
+
+    # Setup generator and labels.
+    G = copy.deepcopy(opts.G).eval().requires_grad_(False).to(opts.device)
+    c_iter = iterate_random_labels(opts=opts, batch_size=batch_gen)
+
+    # Initialize.
+    stats = FeatureStats(**stats_kwargs)
+    assert stats.max_items is not None
+    progress = opts.progress.sub(tag='generator features', num_items=stats.max_items, rel_lo=rel_lo, rel_hi=rel_hi)
+    detector = get_feature_detector(url=detector_url, device=opts.device, num_gpus=opts.num_gpus, rank=opts.rank, verbose=progress.verbose)
+
+    # Main loop.
+    while not stats.is_full():
+        images = []
+        for _i in range(batch_size // batch_gen):
+            z = torch.randn([batch_gen, G.z_dim], device=opts.device)
+            img = G(z=z, camera=next(c_iter))['image']
+            # img = G(z=z, c=next(c_iter), **opts.G_kwargs)['image']
+            img = (img * 127.5 + 128).clamp(0, 255).to(torch.uint8)
+            images.append(img)
+        images = torch.cat(images)
+        if images.shape[1] == 1:
+            images = images.repeat([1, 3, 1, 1])
+        features = detector(images, **detector_kwargs)
+        stats.append_torch(features, num_gpus=opts.num_gpus, rank=opts.rank)
+        progress.update(stats.num_items)
+    return stats
+
+#----------------------------------------------------------------------------
diff --git a/modules/eg3ds/metrics/perceptual_path_length.py b/modules/eg3ds/metrics/perceptual_path_length.py
new file mode 100644
index 0000000000000000000000000000000000000000..5e58dac3317733e2ace6d64ee1f97cafa0a38225
--- /dev/null
+++ b/modules/eg3ds/metrics/perceptual_path_length.py
@@ -0,0 +1,127 @@
+# SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: LicenseRef-NvidiaProprietary
+#
+# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+# property and proprietary rights in and to this material, related
+# documentation and any modifications thereto. Any use, reproduction,
+# disclosure or distribution of this material and related documentation
+# without an express license agreement from NVIDIA CORPORATION or
+# its affiliates is strictly prohibited.
+
+"""Perceptual Path Length (PPL) from the paper "A Style-Based Generator
+Architecture for Generative Adversarial Networks". Matches the original
+implementation by Karras et al. at
+https://github.com/NVlabs/stylegan/blob/master/metrics/perceptual_path_length.py"""
+
+import copy
+import numpy as np
+import torch
+from . import metric_utils
+
+#----------------------------------------------------------------------------
+
+# Spherical interpolation of a batch of vectors.
+def slerp(a, b, t):
+    a = a / a.norm(dim=-1, keepdim=True)
+    b = b / b.norm(dim=-1, keepdim=True)
+    d = (a * b).sum(dim=-1, keepdim=True)
+    p = t * torch.acos(d)
+    c = b - d * a
+    c = c / c.norm(dim=-1, keepdim=True)
+    d = a * torch.cos(p) + c * torch.sin(p)
+    d = d / d.norm(dim=-1, keepdim=True)
+    return d
+
+#----------------------------------------------------------------------------
+
+class PPLSampler(torch.nn.Module):
+    def __init__(self, G, G_kwargs, epsilon, space, sampling, crop, vgg16):
+        assert space in ['z', 'w']
+        assert sampling in ['full', 'end']
+        super().__init__()
+        self.G = copy.deepcopy(G)
+        self.G_kwargs = G_kwargs
+        self.epsilon = epsilon
+        self.space = space
+        self.sampling = sampling
+        self.crop = crop
+        self.vgg16 = copy.deepcopy(vgg16)
+
+    def forward(self, c):
+        # Generate random latents and interpolation t-values.
+        t = torch.rand([c.shape[0]], device=c.device) * (1 if self.sampling == 'full' else 0)
+        z0, z1 = torch.randn([c.shape[0] * 2, self.G.z_dim], device=c.device).chunk(2)
+
+        # Interpolate in W or Z.
+        if self.space == 'w':
+            w0, w1 = self.G.mapping(z=torch.cat([z0,z1]), c=torch.cat([c,c])).chunk(2)
+            wt0 = w0.lerp(w1, t.unsqueeze(1).unsqueeze(2))
+            wt1 = w0.lerp(w1, t.unsqueeze(1).unsqueeze(2) + self.epsilon)
+        else: # space == 'z'
+            zt0 = slerp(z0, z1, t.unsqueeze(1))
+            zt1 = slerp(z0, z1, t.unsqueeze(1) + self.epsilon)
+            wt0, wt1 = self.G.mapping(z=torch.cat([zt0,zt1]), c=torch.cat([c,c])).chunk(2)
+
+        # Randomize noise buffers.
+        for name, buf in self.G.named_buffers():
+            if name.endswith('.noise_const'):
+                buf.copy_(torch.randn_like(buf))
+
+        # Generate images.
+        img = self.G.synthesis(ws=torch.cat([wt0,wt1]), noise_mode='const', force_fp32=True, **self.G_kwargs)
+
+        # Center crop.
+        if self.crop:
+            assert img.shape[2] == img.shape[3]
+            c = img.shape[2] // 8
+            img = img[:, :, c*3 : c*7, c*2 : c*6]
+
+        # Downsample to 256x256.
+        factor = self.G.img_resolution // 256
+        if factor > 1:
+            img = img.reshape([-1, img.shape[1], img.shape[2] // factor, factor, img.shape[3] // factor, factor]).mean([3, 5])
+
+        # Scale dynamic range from [-1,1] to [0,255].
+        img = (img + 1) * (255 / 2)
+        if self.G.img_channels == 1:
+            img = img.repeat([1, 3, 1, 1])
+
+        # Evaluate differential LPIPS.
+        lpips_t0, lpips_t1 = self.vgg16(img, resize_images=False, return_lpips=True).chunk(2)
+        dist = (lpips_t0 - lpips_t1).square().sum(1) / self.epsilon ** 2
+        return dist
+
+#----------------------------------------------------------------------------
+
+def compute_ppl(opts, num_samples, epsilon, space, sampling, crop, batch_size):
+    vgg16_url = 'https://api.ngc.nvidia.com/v2/models/nvidia/research/stylegan3/versions/1/files/metrics/vgg16.pkl'
+    vgg16 = metric_utils.get_feature_detector(vgg16_url, num_gpus=opts.num_gpus, rank=opts.rank, verbose=opts.progress.verbose)
+
+    # Setup sampler and labels.
+    sampler = PPLSampler(G=opts.G, G_kwargs=opts.G_kwargs, epsilon=epsilon, space=space, sampling=sampling, crop=crop, vgg16=vgg16)
+    sampler.eval().requires_grad_(False).to(opts.device)
+    c_iter = metric_utils.iterate_random_labels(opts=opts, batch_size=batch_size)
+
+    # Sampling loop.
+    dist = []
+    progress = opts.progress.sub(tag='ppl sampling', num_items=num_samples)
+    for batch_start in range(0, num_samples, batch_size * opts.num_gpus):
+        progress.update(batch_start)
+        x = sampler(next(c_iter))
+        for src in range(opts.num_gpus):
+            y = x.clone()
+            if opts.num_gpus > 1:
+                torch.distributed.broadcast(y, src=src)
+            dist.append(y)
+    progress.update(num_samples)
+
+    # Compute PPL.
+    if opts.rank != 0:
+        return float('nan')
+    dist = torch.cat(dist)[:num_samples].cpu().numpy()
+    lo = np.percentile(dist, 1, interpolation='lower')
+    hi = np.percentile(dist, 99, interpolation='higher')
+    ppl = np.extract(np.logical_and(dist >= lo, dist <= hi), dist).mean()
+    return float(ppl)
+
+#----------------------------------------------------------------------------
diff --git a/modules/eg3ds/metrics/precision_recall.py b/modules/eg3ds/metrics/precision_recall.py
new file mode 100644
index 0000000000000000000000000000000000000000..6043717d59c53c34d76e35600a58f91e77659e0c
--- /dev/null
+++ b/modules/eg3ds/metrics/precision_recall.py
@@ -0,0 +1,65 @@
+# SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: LicenseRef-NvidiaProprietary
+#
+# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+# property and proprietary rights in and to this material, related
+# documentation and any modifications thereto. Any use, reproduction,
+# disclosure or distribution of this material and related documentation
+# without an express license agreement from NVIDIA CORPORATION or
+# its affiliates is strictly prohibited.
+
+"""Precision/Recall (PR) from the paper "Improved Precision and Recall
+Metric for Assessing Generative Models". Matches the original implementation
+by Kynkaanniemi et al. at
+https://github.com/kynkaat/improved-precision-and-recall-metric/blob/master/precision_recall.py"""
+
+import torch
+from . import metric_utils
+
+#----------------------------------------------------------------------------
+
+def compute_distances(row_features, col_features, num_gpus, rank, col_batch_size):
+    assert 0 <= rank < num_gpus
+    num_cols = col_features.shape[0]
+    num_batches = ((num_cols - 1) // col_batch_size // num_gpus + 1) * num_gpus
+    col_batches = torch.nn.functional.pad(col_features, [0, 0, 0, -num_cols % num_batches]).chunk(num_batches)
+    dist_batches = []
+    for col_batch in col_batches[rank :: num_gpus]:
+        dist_batch = torch.cdist(row_features.unsqueeze(0), col_batch.unsqueeze(0))[0]
+        for src in range(num_gpus):
+            dist_broadcast = dist_batch.clone()
+            if num_gpus > 1:
+                torch.distributed.broadcast(dist_broadcast, src=src)
+            dist_batches.append(dist_broadcast.cpu() if rank == 0 else None)
+    return torch.cat(dist_batches, dim=1)[:, :num_cols] if rank == 0 else None
+
+#----------------------------------------------------------------------------
+
+def compute_pr(opts, max_real, num_gen, nhood_size, row_batch_size, col_batch_size):
+    # detector_url = 'https://api.ngc.nvidia.com/v2/models/nvidia/research/stylegan3/versions/1/files/metrics/vgg16.pkl'
+    detector_url = 'file:///home/tiger/nfs/myenv/cache/useful_ckpts/vgg16.pkl'
+    detector_kwargs = dict(return_features=True)
+
+    real_features = metric_utils.compute_feature_stats_for_dataset(
+        opts=opts, detector_url=detector_url, detector_kwargs=detector_kwargs,
+        rel_lo=0, rel_hi=0, capture_all=True, max_items=max_real).get_all_torch().to(torch.float16).to(opts.device)
+
+    gen_features = metric_utils.compute_feature_stats_for_generator(
+        opts=opts, detector_url=detector_url, detector_kwargs=detector_kwargs,
+        rel_lo=0, rel_hi=1, capture_all=True, max_items=num_gen).get_all_torch().to(torch.float16).to(opts.device)
+
+    results = dict()
+    for name, manifold, probes in [('precision', real_features, gen_features), ('recall', gen_features, real_features)]:
+        kth = []
+        for manifold_batch in manifold.split(row_batch_size):
+            dist = compute_distances(row_features=manifold_batch, col_features=manifold, num_gpus=opts.num_gpus, rank=opts.rank, col_batch_size=col_batch_size)
+            kth.append(dist.to(torch.float32).kthvalue(nhood_size + 1).values.to(torch.float16) if opts.rank == 0 else None)
+        kth = torch.cat(kth) if opts.rank == 0 else None
+        pred = []
+        for probes_batch in probes.split(row_batch_size):
+            dist = compute_distances(row_features=probes_batch, col_features=manifold, num_gpus=opts.num_gpus, rank=opts.rank, col_batch_size=col_batch_size)
+            pred.append((dist <= kth).any(dim=1) if opts.rank == 0 else None)
+        results[name] = float(torch.cat(pred).to(torch.float32).mean() if opts.rank == 0 else 'nan')
+    return results['precision'], results['recall']
+
+#----------------------------------------------------------------------------
diff --git a/modules/eg3ds/models/dual_discriminator.py b/modules/eg3ds/models/dual_discriminator.py
new file mode 100644
index 0000000000000000000000000000000000000000..58d82d4148253a341cf3bccf7bd056a39be00e22
--- /dev/null
+++ b/modules/eg3ds/models/dual_discriminator.py
@@ -0,0 +1,374 @@
+# SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: LicenseRef-NvidiaProprietary
+#
+# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+# property and proprietary rights in and to this material, related
+# documentation and any modifications thereto. Any use, reproduction,
+# disclosure or distribution of this material and related documentation
+# without an express license agreement from NVIDIA CORPORATION or
+# its affiliates is strictly prohibited.
+
+"""Discriminator architectures from the paper
+"Efficient Geometry-aware 3D Generative Adversarial Networks"."""
+
+import numpy as np
+import torch
+import torch.nn as nn
+# 
+
+from modules.eg3ds.torch_utils.ops import upfirdn2d
+from modules.eg3ds.models.networks_stylegan2 import DiscriminatorBlock, MappingNetwork, DiscriminatorEpilogue
+from einops import rearrange
+from utils.commons.hparams import hparams
+
+
+class SingleDiscriminator(torch.nn.Module):
+    def __init__(self,
+        img_resolution,                 # Input resolution.
+        img_channels        =3,         # Number of input color channels.
+        architecture        = 'resnet', # Architecture: 'orig', 'skip', 'resnet'.
+        channel_base        = 32768,    # Overall multiplier for the number of channels.
+        channel_max         = 512,      # Maximum number of channels in any layer.
+        num_fp16_res        = 4,        # Use FP16 for the N highest resolutions.
+        conv_clamp          = 256,      # Clamp the output of convolution layers to +-X, None = disable clamping.
+        cmap_dim            = None,     # Dimensionality of mapped conditioning label, None = default.
+        sr_upsample_factor  = 1,        # Ignored for SingleDiscriminator
+        block_kwargs        = {},       # Arguments for DiscriminatorBlock.
+        mapping_kwargs      = {},       # Arguments for MappingNetwork.
+        epilogue_kwargs     = {},       # Arguments for DiscriminatorEpilogue.
+    ):
+        super().__init__()
+        self.camera_dim = 25
+        if hparams['disc_cond_mode'] == 'idexp_lm3d_normalized':
+            self.cond_dim = 204
+        else:
+            self.cond_dim = 0
+        c_dim = self.camera_dim
+        self.c_dim = c_dim
+
+        self.img_resolution = img_resolution
+        self.img_resolution_log2 = int(np.log2(img_resolution))
+        self.img_channels = img_channels
+        self.block_resolutions = [2 ** i for i in range(self.img_resolution_log2, 2, -1)]
+        channels_dict = {res: min(channel_base // res, channel_max) for res in self.block_resolutions + [4]}
+        fp16_resolution = max(2 ** (self.img_resolution_log2 + 1 - num_fp16_res), 8)
+
+        if cmap_dim is None:
+            cmap_dim = channels_dict[4]
+        if c_dim == 0:
+            cmap_dim = 0
+
+        common_kwargs = dict(img_channels=img_channels, architecture=architecture, conv_clamp=conv_clamp)
+        cur_layer_idx = 0
+        for res in self.block_resolutions:
+            in_channels = channels_dict[res] if res < img_resolution else 0
+            tmp_channels = channels_dict[res]
+            out_channels = channels_dict[res // 2]
+            use_fp16 = (res >= fp16_resolution)
+            block = DiscriminatorBlock(in_channels, tmp_channels, out_channels, resolution=res,
+                first_layer_idx=cur_layer_idx, use_fp16=use_fp16, **block_kwargs, **common_kwargs)
+            setattr(self, f'b{res}', block)
+            cur_layer_idx += block.num_layers
+        if c_dim > 0:
+            self.mapping = MappingNetwork(z_dim=0, c_dim=c_dim, w_dim=cmap_dim, num_ws=None, w_avg_beta=None, **mapping_kwargs)
+        self.b4 = DiscriminatorEpilogue(channels_dict[4], cmap_dim=cmap_dim, resolution=4, **epilogue_kwargs, **common_kwargs)
+
+    def forward(self, img, camera, cond=None, update_emas=False, **block_kwargs):
+        img = img['image']
+
+        _ = update_emas # unused
+        x = None
+        for res in self.block_resolutions:
+            block = getattr(self, f'b{res}')
+            x, img = block(x, img, **block_kwargs)
+
+        cmap = None
+        c = camera
+        if self.cond_dim > 0:
+            cond_feat = self.cond_encoder(cond)
+            c = torch.cat([c, cond_feat], dim=-1) # [b, 25+8]
+        
+        cmap = self.mapping(None, c)
+        x = self.b4(x, img, cmap)
+        return x
+    
+    def extra_repr(self):
+        return f'c_dim={self.c_dim:d}, img_resolution={self.img_resolution:d}, img_channels={self.img_channels:d}'
+
+#----------------------------------------------------------------------------
+
+def filtered_resizing(image_orig_tensor, size, f, filter_mode='antialiased'):
+    is_bcthw_flag = True if image_orig_tensor.ndim == 5 else False
+    if is_bcthw_flag: # [B, c, T, H, W]
+        n,c,t,h,w = image_orig_tensor.shape
+        image_orig_tensor = rearrange(image_orig_tensor, "n c t h w -> (n t) c h w")
+
+    if filter_mode == 'antialiased':
+        ada_filtered_64 = torch.nn.functional.interpolate(image_orig_tensor, size=(size, size), mode='bilinear', align_corners=False, antialias=True)
+    elif filter_mode == 'classic':
+        ada_filtered_64 = upfirdn2d.upsample2d(image_orig_tensor, f, up=2)
+        ada_filtered_64 = torch.nn.functional.interpolate(ada_filtered_64, size=(size * 2 + 2, size * 2 + 2), mode='bilinear', align_corners=False)
+        ada_filtered_64 = upfirdn2d.downsample2d(ada_filtered_64, f, down=2, flip_filter=True, padding=-1)
+    elif filter_mode == 'none':
+        ada_filtered_64 = torch.nn.functional.interpolate(image_orig_tensor, size=(size, size), mode='bilinear', align_corners=False)
+    elif type(filter_mode) == float:
+        assert 0 < filter_mode < 1
+
+        filtered = torch.nn.functional.interpolate(image_orig_tensor, size=(size, size), mode='bilinear', align_corners=False, antialias=True)
+        aliased  = torch.nn.functional.interpolate(image_orig_tensor, size=(size, size), mode='bilinear', align_corners=False, antialias=False)
+        ada_filtered_64 = (1 - filter_mode) * aliased + (filter_mode) * filtered
+    if is_bcthw_flag: # [B, c, T, H, W]
+        ada_filtered_64 = rearrange(ada_filtered_64, "(n t) c h w -> n c t h w", n=n,t=t)
+        
+    return ada_filtered_64
+
+#----------------------------------------------------------------------------
+
+class DualDiscriminator(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+        channel_base = hparams['base_channel']
+        channel_max = hparams['max_channel']
+        conv_clamp = 256
+        cmap_dim = None
+        block_kwargs = {'freeze_layers': 0}
+        mapping_kwargs = {}
+        epilogue_kwargs = {'mbstd_group_size': hparams['group_size_for_mini_batch_std']}
+        architecture = 'resnet' # Architecture: 'orig', 'skip', 'resnet'.
+        
+        img_channels = 3
+        img_channels *= 2
+
+        self.camera_dim = 25
+        c_dim = self.camera_dim
+
+        self.img_resolution = hparams['final_resolution']
+        self.img_resolution_log2 = int(np.log2(self.img_resolution))
+        self.img_channels = 3
+
+        self.block_resolutions = [2 ** i for i in range(self.img_resolution_log2, 2, -1)]
+        channels_dict = {res: min(channel_base // res, channel_max) for res in self.block_resolutions + [4]}
+        self.num_fp16_res = hparams['num_fp16_layers_in_discriminator']
+        fp16_resolution = max(2 ** (self.img_resolution_log2 + 1 - self.num_fp16_res), 8)
+        if cmap_dim is None:
+            cmap_dim = channels_dict[4]
+        if c_dim == 0:
+            cmap_dim = 0
+
+        common_kwargs = dict(img_channels=img_channels, architecture=architecture, conv_clamp=conv_clamp)
+        cur_layer_idx = 0
+        for res in self.block_resolutions:
+            in_channels = channels_dict[res] if res < self.img_resolution else 0
+            tmp_channels = channels_dict[res]
+            out_channels = channels_dict[res // 2]
+            use_fp16 = (res >= fp16_resolution)
+            # use_fp16 = True
+            block = DiscriminatorBlock(in_channels, tmp_channels, out_channels, resolution=res,
+                first_layer_idx=cur_layer_idx, use_fp16=use_fp16, **block_kwargs, **common_kwargs)
+            setattr(self, f'b{res}', block)
+            cur_layer_idx += block.num_layers
+
+        self.mapping = MappingNetwork(z_dim=0, c_dim=c_dim, w_dim=cmap_dim, num_ws=None, w_avg_beta=None, **mapping_kwargs)
+        if hparams.get("disc_cond_mode", 'none') != 'none':
+            """
+            For discriminator, embed cond with mapping network works well.
+            """
+            self.cond_dim = 204
+            self.mapping = MappingNetwork(z_dim=self.cond_dim, c_dim=c_dim, w_dim=cmap_dim, num_ws=None, w_avg_beta=None, **mapping_kwargs)
+        self.b4 = DiscriminatorEpilogue(channels_dict[4], cmap_dim=cmap_dim, resolution=4, **epilogue_kwargs, **common_kwargs)
+        self.register_buffer('resample_filter', upfirdn2d.setup_filter([1,3,3,1]))
+
+    def forward(self, img, camera, cond=None, update_emas=False, feature_maps=None, **block_kwargs):
+        image_raw = filtered_resizing(img['image_raw'], size=img['image'].shape[-1], f=self.resample_filter)
+        img = torch.cat([img['image'], image_raw], 1)
+
+        # add by yerfor
+        img = torch.clamp(img, min=-1, max=1)
+
+        _ = update_emas # unused
+        x = None
+        for res in self.block_resolutions:
+            block = getattr(self, f'b{res}')
+            x, img = block(x, img, **block_kwargs)
+            if feature_maps is not None:
+                feature_maps.append(x)
+        cmap = None
+
+        c = camera.clone() # prevent inplace modification in sample!
+        if hparams['disc_c_noise'] > 0: 
+            if len(c) > 1:
+                c_std = c.std(0)
+            else:
+                # c_std = 1
+                c_std = torch.tensor([0.0664, 0.0295, 0.2720, 0.6971, 0.0279, 0.0178, 0.1280, 0.3284, 0.2721,
+                            0.1274, 0.0679, 0.1642, 0.0000, 0.0000, 0.0000, 0.0000, 0.0079, 0.0000,
+                            0.0000, 0.0000, 0.0079, 0.0000, 0.0000, 0.0000, 0.0000]).to(c.device)
+            c += torch.randn_like(c) * c_std * hparams['disc_c_noise']
+
+        # x: [B, 512, 4, 4], img: None, cmap: [B, 512]
+        if hparams.get("disc_cond_mode", 'none') != 'none':
+            cmap = self.mapping(cond, c)
+        else:
+            cmap = self.mapping(None, c)
+
+        x = self.b4(x, img, cmap)
+        return x
+
+    def extra_repr(self):
+        return f'c_dim={self.c_dim:d}, img_resolution={self.img_resolution:d}, img_channels={self.img_channels:d}'
+
+#----------------------------------------------------------------------------
+
+class DummyDualDiscriminator(torch.nn.Module):
+    def __init__(self,
+        c_dim,                          # Conditioning label (C) dimensionality.
+        img_resolution,                 # Input resolution.
+        img_channels,                   # Number of input color channels.
+        architecture        = 'resnet', # Architecture: 'orig', 'skip', 'resnet'.
+        channel_base        = 32768,    # Overall multiplier for the number of channels.
+        channel_max         = 512,      # Maximum number of channels in any layer.
+        num_fp16_res        = 4,        # Use FP16 for the N highest resolutions.
+        conv_clamp          = 256,      # Clamp the output of convolution layers to +-X, None = disable clamping.
+        cmap_dim            = None,     # Dimensionality of mapped conditioning label, None = default.
+        block_kwargs        = {},       # Arguments for DiscriminatorBlock.
+        mapping_kwargs      = {},       # Arguments for MappingNetwork.
+        epilogue_kwargs     = {},       # Arguments for DiscriminatorEpilogue.
+    ):
+        super().__init__()
+        img_channels *= 2
+
+        self.c_dim = c_dim
+        self.img_resolution = img_resolution
+        self.img_resolution_log2 = int(np.log2(img_resolution))
+        self.img_channels = img_channels
+        self.block_resolutions = [2 ** i for i in range(self.img_resolution_log2, 2, -1)]
+        channels_dict = {res: min(channel_base // res, channel_max) for res in self.block_resolutions + [4]}
+        fp16_resolution = max(2 ** (self.img_resolution_log2 + 1 - num_fp16_res), 8)
+
+        if cmap_dim is None:
+            cmap_dim = channels_dict[4]
+        if c_dim == 0:
+            cmap_dim = 0
+
+        common_kwargs = dict(img_channels=img_channels, architecture=architecture, conv_clamp=conv_clamp)
+        cur_layer_idx = 0
+        for res in self.block_resolutions:
+            in_channels = channels_dict[res] if res < img_resolution else 0
+            tmp_channels = channels_dict[res]
+            out_channels = channels_dict[res // 2]
+            use_fp16 = (res >= fp16_resolution)
+            block = DiscriminatorBlock(in_channels, tmp_channels, out_channels, resolution=res,
+                first_layer_idx=cur_layer_idx, use_fp16=use_fp16, **block_kwargs, **common_kwargs)
+            setattr(self, f'b{res}', block)
+            cur_layer_idx += block.num_layers
+        if c_dim > 0:
+            self.mapping = MappingNetwork(z_dim=0, c_dim=c_dim, w_dim=cmap_dim, num_ws=None, w_avg_beta=None, **mapping_kwargs)
+        self.b4 = DiscriminatorEpilogue(channels_dict[4], cmap_dim=cmap_dim, resolution=4, **epilogue_kwargs, **common_kwargs)
+        self.register_buffer('resample_filter', upfirdn2d.setup_filter([1,3,3,1]))
+
+        self.raw_fade = 1
+
+    def forward(self, img, c, update_emas=False, **block_kwargs):
+        self.raw_fade = max(0, self.raw_fade - 1/(500000/32))
+
+        image_raw = filtered_resizing(img['image_raw'], size=img['image'].shape[-1], f=self.resample_filter) * self.raw_fade
+        img = torch.cat([img['image'], image_raw], 1)
+
+        _ = update_emas # unused
+        x = None
+        for res in self.block_resolutions:
+            block = getattr(self, f'b{res}')
+            x, img = block(x, img, **block_kwargs)
+
+        cmap = None
+        if self.c_dim > 0:
+            cmap = self.mapping(None, c)
+        x = self.b4(x, img, cmap)
+        return x
+
+    def extra_repr(self):
+        return f'c_dim={self.c_dim:d}, img_resolution={self.img_resolution:d}, img_channels={self.img_channels:d}'
+
+#----------------------------------------------------------------------------
+
+
+# Tri-discriminator: upsampled image, super-resolved image, and segmentation mask
+# V2: first concatenate imgs and seg mask, using only one conv block
+class MaskDualDiscriminatorV2(torch.nn.Module):
+    def __init__(self,
+        c_dim,                          # Conditioning label (C) dimensionality.
+        img_resolution,                 # Input resolution.
+        img_channels,                   # Number of input color channels.
+        seg_resolution      = 128,                 # Input resolution.
+        seg_channels        = 1,                   # Number of input color channels.
+        architecture        = 'resnet', # Architecture: 'orig', 'skip', 'resnet'.
+        channel_base        = 32768,    # Overall multiplier for the number of channels.
+        channel_max         = 512,      # Maximum number of channels in any layer.
+        num_fp16_res        = 4,        # Use FP16 for the N highest resolutions.
+        conv_clamp          = 256,      # Clamp the output of convolution layers to +-X, None = disable clamping.
+        cmap_dim            = None,     # Dimensionality of mapped conditioning label, None = default.
+        disc_c_noise        = 0,        # Corrupt camera parameters with X std dev of noise before disc. pose conditioning.
+        block_kwargs        = {},       # Arguments for DiscriminatorBlock.
+        mapping_kwargs      = {},       # Arguments for MappingNetwork.
+        epilogue_kwargs     = {},       # Arguments for DiscriminatorEpilogue.
+    ):
+        super().__init__()
+        img_channels = img_channels * 2 + seg_channels
+
+        self.c_dim = c_dim
+        self.img_resolution = img_resolution
+        self.img_resolution_log2 = int(np.log2(img_resolution))
+        self.img_channels = img_channels
+        self.seg_resolution = seg_resolution
+        self.seg_channels = seg_channels
+        
+        self.block_resolutions = [2 ** i for i in range(self.img_resolution_log2, 2, -1)]
+        channels_dict = {res: min(channel_base // res, channel_max) for res in self.block_resolutions + [4]}
+        fp16_resolution = max(2 ** (self.img_resolution_log2 + 1 - num_fp16_res), 8)
+
+        if cmap_dim is None:
+            cmap_dim = channels_dict[4]
+        if c_dim == 0:
+            cmap_dim = 0
+
+        common_kwargs = dict(img_channels=img_channels, architecture=architecture, conv_clamp=conv_clamp)
+        cur_layer_idx = 0
+        for res in self.block_resolutions:
+            in_channels = channels_dict[res] if res < img_resolution else 0
+            tmp_channels = channels_dict[res]
+            out_channels = channels_dict[res // 2]
+            use_fp16 = (res >= fp16_resolution)
+            block = DiscriminatorBlock(in_channels, tmp_channels, out_channels, resolution=res,
+                first_layer_idx=cur_layer_idx, use_fp16=use_fp16, **block_kwargs, **common_kwargs)
+            setattr(self, f'b{res}', block)
+            cur_layer_idx += block.num_layers
+        if c_dim > 0:
+            self.mapping = MappingNetwork(z_dim=0, c_dim=c_dim, w_dim=cmap_dim, num_ws=None, w_avg_beta=None, **mapping_kwargs)
+        self.b4 = DiscriminatorEpilogue(channels_dict[4], cmap_dim=cmap_dim, resolution=4, **epilogue_kwargs, **common_kwargs)
+        self.register_buffer('resample_filter', upfirdn2d.setup_filter([1,3,3,1]))
+        self.disc_c_noise = disc_c_noise
+
+    def forward(self, img, c, update_emas=False, **block_kwargs):
+        image_raw = filtered_resizing(img['image_raw'], size=img['image'].shape[-1], f=self.resample_filter)
+        seg = filtered_resizing(img['image_mask'], size=img['image'].shape[-1], f=self.resample_filter)
+        seg = 2 * seg - 1 # normalize to [-1,1]
+        img = torch.cat([img['image'], image_raw, seg], 1)
+
+        _ = update_emas # unused
+        x = None
+        for res in self.block_resolutions:
+            block = getattr(self, f'b{res}')
+            x, img = block(x, img, **block_kwargs)
+
+        cmap = None
+        if self.c_dim > 0:
+            if self.disc_c_noise > 0: c += torch.randn_like(c) * c.std(0) * self.disc_c_noise
+            cmap = self.mapping(None, c)
+        x = self.b4(x, img, cmap)
+        return x
+
+    def extra_repr(self):
+        return ' '.join([
+            f'c_dim={self.c_dim:d},',
+            f'img_resolution={self.img_resolution:d}, img_channels={self.img_channels:d},',
+            f'seg_resolution={self.seg_resolution:d}, seg_channels={self.seg_channels:d}'])
\ No newline at end of file
diff --git a/modules/eg3ds/models/dual_discriminator_cond.py b/modules/eg3ds/models/dual_discriminator_cond.py
new file mode 100644
index 0000000000000000000000000000000000000000..0d6b37470054d002607f05fb764988d160272c80
--- /dev/null
+++ b/modules/eg3ds/models/dual_discriminator_cond.py
@@ -0,0 +1,279 @@
+# SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: LicenseRef-NvidiaProprietary
+#
+# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+# property and proprietary rights in and to this material, related
+# documentation and any modifications thereto. Any use, reproduction,
+# disclosure or distribution of this material and related documentation
+# without an express license agreement from NVIDIA CORPORATION or
+# its affiliates is strictly prohibited.
+
+"""Discriminator architectures from the paper
+"Efficient Geometry-aware 3D Generative Adversarial Networks"."""
+
+import numpy as np
+import torch
+import torch.nn as nn
+from modules.eg3ds.torch_utils.ops import upfirdn2d
+from modules.eg3ds.models.networks_stylegan2 import DiscriminatorBlock, MappingNetwork, DiscriminatorEpilogue
+from modules.eg3ds.models.cond_encoder import LM3D_Win_Encoder
+
+from utils.commons.hparams import hparams
+
+
+class SingleDiscriminator(torch.nn.Module):
+    def __init__(self,
+        img_resolution,                 # Input resolution.
+        img_channels        =3,         # Number of input color channels.
+        architecture        = 'resnet', # Architecture: 'orig', 'skip', 'resnet'.
+        channel_base        = 32768,    # Overall multiplier for the number of channels.
+        channel_max         = 512,      # Maximum number of channels in any layer.
+        num_fp16_res        = 4,        # Use FP16 for the N highest resolutions.
+        conv_clamp          = 256,      # Clamp the output of convolution layers to +-X, None = disable clamping.
+        cmap_dim            = None,     # Dimensionality of mapped conditioning label, None = default.
+        sr_upsample_factor  = 1,        # Ignored for SingleDiscriminator
+        block_kwargs        = {},       # Arguments for DiscriminatorBlock.
+        mapping_kwargs      = {},       # Arguments for MappingNetwork.
+        epilogue_kwargs     = {},       # Arguments for DiscriminatorEpilogue.
+    ):
+        super().__init__()
+        self.camera_dim = 25
+        if hparams['cond_type'] == 'idexp_lm3d_normalized':
+            self.cond_dim = 204
+        else:
+            self.cond_dim = 0
+        c_dim = self.camera_dim
+        if self.cond_dim > 0:
+            cond_out_dim = hparams['cond_out_dim']
+            c_dim += cond_out_dim
+            self.cond_encoder = LM3D_Win_Encoder(self.cond_dim, hid_dim=hparams['cond_hid_dim'], out_dim=cond_out_dim, smo_size=hparams['smo_win_size'])
+        self.c_dim = c_dim
+
+        self.img_resolution = img_resolution
+        self.img_resolution_log2 = int(np.log2(img_resolution))
+        self.img_channels = img_channels
+        self.block_resolutions = [2 ** i for i in range(self.img_resolution_log2, 2, -1)]
+        channels_dict = {res: min(channel_base // res, channel_max) for res in self.block_resolutions + [4]}
+        fp16_resolution = max(2 ** (self.img_resolution_log2 + 1 - num_fp16_res), 8)
+
+        if cmap_dim is None:
+            cmap_dim = channels_dict[4]
+        if c_dim == 0:
+            cmap_dim = 0
+
+        common_kwargs = dict(img_channels=img_channels, architecture=architecture, conv_clamp=conv_clamp)
+        cur_layer_idx = 0
+        for res in self.block_resolutions:
+            in_channels = channels_dict[res] if res < img_resolution else 0
+            tmp_channels = channels_dict[res]
+            out_channels = channels_dict[res // 2]
+            use_fp16 = (res >= fp16_resolution)
+            block = DiscriminatorBlock(in_channels, tmp_channels, out_channels, resolution=res,
+                first_layer_idx=cur_layer_idx, use_fp16=use_fp16, **block_kwargs, **common_kwargs)
+            setattr(self, f'b{res}', block)
+            cur_layer_idx += block.num_layers
+        if c_dim > 0:
+            self.mapping = MappingNetwork(z_dim=0, c_dim=c_dim, w_dim=cmap_dim, num_ws=None, w_avg_beta=None, **mapping_kwargs)
+        self.b4 = DiscriminatorEpilogue(channels_dict[4], cmap_dim=cmap_dim, resolution=4, **epilogue_kwargs, **common_kwargs)
+
+    def forward(self, img, camera, cond=None, update_emas=False, **block_kwargs):
+        img = img['image']
+
+        _ = update_emas # unused
+        x = None
+        for res in self.block_resolutions:
+            block = getattr(self, f'b{res}')
+            x, img = block(x, img, **block_kwargs)
+
+        cmap = None
+        c = camera
+        if self.cond_dim > 0:
+            cond_feat = self.cond_encoder(cond)
+            c = torch.cat([c, cond_feat], dim=-1) # [b, 25+8]
+        
+        cmap = self.mapping(None, c)
+        x = self.b4(x, img, cmap)
+        return x
+    
+    def extra_repr(self):
+        return f'c_dim={self.c_dim:d}, img_resolution={self.img_resolution:d}, img_channels={self.img_channels:d}'
+
+#----------------------------------------------------------------------------
+
+def filtered_resizing(image_orig_tensor, size, f, filter_mode='antialiased'):
+    if filter_mode == 'antialiased':
+        ada_filtered_64 = torch.nn.functional.interpolate(image_orig_tensor, size=(size, size), mode='bilinear', align_corners=False, antialias=True)
+    elif filter_mode == 'classic':
+        ada_filtered_64 = upfirdn2d.upsample2d(image_orig_tensor, f, up=2)
+        ada_filtered_64 = torch.nn.functional.interpolate(ada_filtered_64, size=(size * 2 + 2, size * 2 + 2), mode='bilinear', align_corners=False)
+        ada_filtered_64 = upfirdn2d.downsample2d(ada_filtered_64, f, down=2, flip_filter=True, padding=-1)
+    elif filter_mode == 'none':
+        ada_filtered_64 = torch.nn.functional.interpolate(image_orig_tensor, size=(size, size), mode='bilinear', align_corners=False)
+    elif type(filter_mode) == float:
+        assert 0 < filter_mode < 1
+
+        filtered = torch.nn.functional.interpolate(image_orig_tensor, size=(size, size), mode='bilinear', align_corners=False, antialias=True)
+        aliased  = torch.nn.functional.interpolate(image_orig_tensor, size=(size, size), mode='bilinear', align_corners=False, antialias=False)
+        ada_filtered_64 = (1 - filter_mode) * aliased + (filter_mode) * filtered
+        
+    return ada_filtered_64
+
+#----------------------------------------------------------------------------
+
+class DualDiscriminator(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+        channel_base = hparams['base_channel']
+        channel_max = hparams['max_channel']
+        conv_clamp = 256
+        cmap_dim = None
+        disc_c_noise = 0.
+        block_kwargs = {'freeze_layers': 0}
+        mapping_kwargs = {}
+        epilogue_kwargs = {'mbstd_group_size': 4}
+        architecture = 'resnet' # Architecture: 'orig', 'skip', 'resnet'.
+        
+        img_channels = 3
+        img_channels *= 2
+
+        self.camera_dim = 25
+        if hparams['cond_type'] == 'idexp_lm3d_normalized':
+            self.cond_dim = 204
+        else:
+            self.cond_dim = 0
+        c_dim = self.camera_dim
+
+        if self.cond_dim > 0:
+            cond_out_dim = hparams['cond_out_dim']
+            c_dim += cond_out_dim
+            self.cond_encoder = LM3D_Win_Encoder(self.cond_dim, hid_dim=hparams['cond_hid_dim'], out_dim=cond_out_dim, smo_size=hparams['smo_win_size'])
+
+        self.img_resolution = hparams['final_resolution']
+        self.img_resolution_log2 = int(np.log2(self.img_resolution))
+        self.img_channels = 3
+
+        self.block_resolutions = [2 ** i for i in range(self.img_resolution_log2, 2, -1)]
+        channels_dict = {res: min(channel_base // res, channel_max) for res in self.block_resolutions + [4]}
+        self.num_fp16_res = hparams['num_fp16_layers_in_discriminator']
+        fp16_resolution = max(2 ** (self.img_resolution_log2 + 1 - self.num_fp16_res), 8)
+        if cmap_dim is None:
+            cmap_dim = channels_dict[4]
+        if c_dim == 0:
+            cmap_dim = 0
+
+        common_kwargs = dict(img_channels=img_channels, architecture=architecture, conv_clamp=conv_clamp)
+        cur_layer_idx = 0
+        for res in self.block_resolutions:
+            in_channels = channels_dict[res] if res < self.img_resolution else 0
+            tmp_channels = channels_dict[res]
+            out_channels = channels_dict[res // 2]
+            use_fp16 = (res >= fp16_resolution)
+            block = DiscriminatorBlock(in_channels, tmp_channels, out_channels, resolution=res,
+                first_layer_idx=cur_layer_idx, use_fp16=use_fp16, **block_kwargs, **common_kwargs)
+            setattr(self, f'b{res}', block)
+            cur_layer_idx += block.num_layers
+
+        self.mapping = MappingNetwork(z_dim=0, c_dim=c_dim, w_dim=cmap_dim, num_ws=None, w_avg_beta=None, **mapping_kwargs)
+        self.b4 = DiscriminatorEpilogue(channels_dict[4], cmap_dim=cmap_dim, resolution=4, **epilogue_kwargs, **common_kwargs)
+        self.register_buffer('resample_filter', upfirdn2d.setup_filter([1,3,3,1]))
+        self.disc_c_noise = disc_c_noise
+
+    def forward(self, img, camera, cond=None, update_emas=False, **block_kwargs):
+        image_raw = filtered_resizing(img['image_raw'], size=img['image'].shape[-1], f=self.resample_filter)
+        img = torch.cat([img['image'], image_raw], 1)
+
+        _ = update_emas # unused
+        x = None
+        for res in self.block_resolutions:
+            block = getattr(self, f'b{res}')
+            x, img = block(x, img, **block_kwargs)
+
+        cmap = None
+
+        c = camera
+        if self.cond_dim > 0:
+            cond_feat = self.cond_encoder(cond)
+            c = torch.cat([c, cond_feat], dim=-1) # [b, 25+8]
+        if self.disc_c_noise > 0: 
+            c += torch.randn_like(c) * c.std(0) * self.disc_c_noise
+        
+        cmap = self.mapping(None, c)
+        x = self.b4(x, img, cmap)
+        return x
+
+    def extra_repr(self):
+        return f'c_dim={self.c_dim:d}, img_resolution={self.img_resolution:d}, img_channels={self.img_channels:d}'
+
+#----------------------------------------------------------------------------
+
+class DummyDualDiscriminator(torch.nn.Module):
+    def __init__(self,
+        c_dim,                          # Conditioning label (C) dimensionality.
+        img_resolution,                 # Input resolution.
+        img_channels,                   # Number of input color channels.
+        architecture        = 'resnet', # Architecture: 'orig', 'skip', 'resnet'.
+        channel_base        = 32768,    # Overall multiplier for the number of channels.
+        channel_max         = 512,      # Maximum number of channels in any layer.
+        num_fp16_res        = 4,        # Use FP16 for the N highest resolutions.
+        conv_clamp          = 256,      # Clamp the output of convolution layers to +-X, None = disable clamping.
+        cmap_dim            = None,     # Dimensionality of mapped conditioning label, None = default.
+        block_kwargs        = {},       # Arguments for DiscriminatorBlock.
+        mapping_kwargs      = {},       # Arguments for MappingNetwork.
+        epilogue_kwargs     = {},       # Arguments for DiscriminatorEpilogue.
+    ):
+        super().__init__()
+        img_channels *= 2
+
+        self.c_dim = c_dim
+        self.img_resolution = img_resolution
+        self.img_resolution_log2 = int(np.log2(img_resolution))
+        self.img_channels = img_channels
+        self.block_resolutions = [2 ** i for i in range(self.img_resolution_log2, 2, -1)]
+        channels_dict = {res: min(channel_base // res, channel_max) for res in self.block_resolutions + [4]}
+        fp16_resolution = max(2 ** (self.img_resolution_log2 + 1 - num_fp16_res), 8)
+
+        if cmap_dim is None:
+            cmap_dim = channels_dict[4]
+        if c_dim == 0:
+            cmap_dim = 0
+
+        common_kwargs = dict(img_channels=img_channels, architecture=architecture, conv_clamp=conv_clamp)
+        cur_layer_idx = 0
+        for res in self.block_resolutions:
+            in_channels = channels_dict[res] if res < img_resolution else 0
+            tmp_channels = channels_dict[res]
+            out_channels = channels_dict[res // 2]
+            use_fp16 = (res >= fp16_resolution)
+            block = DiscriminatorBlock(in_channels, tmp_channels, out_channels, resolution=res,
+                first_layer_idx=cur_layer_idx, use_fp16=use_fp16, **block_kwargs, **common_kwargs)
+            setattr(self, f'b{res}', block)
+            cur_layer_idx += block.num_layers
+        if c_dim > 0:
+            self.mapping = MappingNetwork(z_dim=0, c_dim=c_dim, w_dim=cmap_dim, num_ws=None, w_avg_beta=None, **mapping_kwargs)
+        self.b4 = DiscriminatorEpilogue(channels_dict[4], cmap_dim=cmap_dim, resolution=4, **epilogue_kwargs, **common_kwargs)
+        self.register_buffer('resample_filter', upfirdn2d.setup_filter([1,3,3,1]))
+
+        self.raw_fade = 1
+
+    def forward(self, img, c, update_emas=False, **block_kwargs):
+        self.raw_fade = max(0, self.raw_fade - 1/(500000/32))
+
+        image_raw = filtered_resizing(img['image_raw'], size=img['image'].shape[-1], f=self.resample_filter) * self.raw_fade
+        img = torch.cat([img['image'], image_raw], 1)
+
+        _ = update_emas # unused
+        x = None
+        for res in self.block_resolutions:
+            block = getattr(self, f'b{res}')
+            x, img = block(x, img, **block_kwargs)
+
+        cmap = None
+        if self.c_dim > 0:
+            cmap = self.mapping(None, c)
+        x = self.b4(x, img, cmap)
+        return x
+
+    def extra_repr(self):
+        return f'c_dim={self.c_dim:d}, img_resolution={self.img_resolution:d}, img_channels={self.img_channels:d}'
+
+#----------------------------------------------------------------------------
diff --git a/modules/eg3ds/models/networks_stylegan2.py b/modules/eg3ds/models/networks_stylegan2.py
new file mode 100644
index 0000000000000000000000000000000000000000..30d638e3f6898cec32c92c475ad7a73df12e8f9c
--- /dev/null
+++ b/modules/eg3ds/models/networks_stylegan2.py
@@ -0,0 +1,814 @@
+# SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: LicenseRef-NvidiaProprietary
+#
+# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+# property and proprietary rights in and to this material, related
+# documentation and any modifications thereto. Any use, reproduction,
+# disclosure or distribution of this material and related documentation
+# without an express license agreement from NVIDIA CORPORATION or
+# its affiliates is strictly prohibited.
+
+"""Network architectures from the paper
+"Analyzing and Improving the Image Quality of StyleGAN".
+Matches the original implementation of configs E-F by Karras et al. at
+https://github.com/NVlabs/stylegan2/blob/master/training/networks_stylegan2.py"""
+
+import numpy as np
+import torch
+import torch.nn as nn
+import math
+from modules.eg3ds.torch_utils import misc
+from modules.eg3ds.torch_utils.ops import conv2d_resample
+from modules.eg3ds.torch_utils.ops import upfirdn2d
+from modules.eg3ds.torch_utils.ops import bias_act
+from modules.eg3ds.torch_utils.ops import fma
+
+from utils.commons.hparams import hparams
+
+#----------------------------------------------------------------------------
+
+@misc.profiled_function
+def normalize_2nd_moment(x, dim=1, eps=1e-8):
+    return x * (x.square().mean(dim=dim, keepdim=True) + eps).rsqrt()
+
+#----------------------------------------------------------------------------
+
+@misc.profiled_function
+def modulated_conv2d(
+    x,                          # Input tensor of shape [batch_size, in_channels, in_height, in_width].
+    weight,                     # Weight tensor of shape [out_channels, in_channels, kernel_height, kernel_width].
+    styles,                     # Modulation coefficients of shape [batch_size, in_channels].
+    noise           = None,     # Optional noise tensor to add to the output activations.
+    up              = 1,        # Integer upsampling factor.
+    down            = 1,        # Integer downsampling factor.
+    padding         = 0,        # Padding with respect to the upsampled image.
+    resample_filter = None,     # Low-pass filter to apply when resampling activations. Must be prepared beforehand by calling upfirdn2d.setup_filter().
+    demodulate      = True,     # Apply weight demodulation?
+    flip_weight     = True,     # False = convolution, True = correlation (matches torch.nn.functional.conv2d).
+    fused_modconv   = True,     # Perform modulation, convolution, and demodulation as a single fused operation? 
+):
+    batch_size = x.shape[0]
+    out_channels, in_channels, kh, kw = weight.shape
+    misc.assert_shape(weight, [out_channels, in_channels, kh, kw]) # [OIkk]
+    misc.assert_shape(x, [batch_size, in_channels, None, None]) # [NIHW]
+    misc.assert_shape(styles, [batch_size, in_channels]) # [NI]
+
+    # Pre-normalize inputs to avoid FP16 overflow.
+    if x.dtype == torch.float16 and demodulate:
+        weight = weight * (1 / np.sqrt(in_channels * kh * kw) / weight.norm(float('inf'), dim=[1,2,3], keepdim=True)) # max_Ikk
+        styles = styles / styles.norm(float('inf'), dim=1, keepdim=True) # max_I
+
+    # Calculate per-sample weights and demodulation coefficients.
+    w = None
+    dcoefs = None
+    if demodulate or fused_modconv:
+        w = weight.unsqueeze(0) # [NOIkk]
+        w = w * styles.reshape(batch_size, 1, -1, 1, 1) # [NOIkk], 将weight乘以style
+    if demodulate:
+        dcoefs = (w.square().sum(dim=[2,3,4]) + 1e-8).rsqrt() # [NO] # [2, 512,512,3,3]==>[2, 512] 归一化
+    if demodulate and fused_modconv:
+        w = w * dcoefs.reshape(batch_size, -1, 1, 1, 1) # [NOIkk]
+
+    # Execute by scaling the activations before and after the convolution.
+    if not fused_modconv: 
+        x = x * styles.to(x.dtype).reshape(batch_size, -1, 1, 1) # 将x乘以style
+        x = conv2d_resample.conv2d_resample(x=x, w=weight.to(x.dtype), f=resample_filter, up=up, down=down, padding=padding, flip_weight=flip_weight) # conv2d forward
+        if demodulate and noise is not None:
+            x = fma.fma(x, dcoefs.to(x.dtype).reshape(batch_size, -1, 1, 1), noise.to(x.dtype)) # FusedMultiplyAdd
+        elif demodulate:
+            x = x * dcoefs.to(x.dtype).reshape(batch_size, -1, 1, 1)
+        elif noise is not None:
+            x = x.add_(noise.to(x.dtype))
+        return x
+
+    # Execute as one fused op using grouped convolution.
+    with misc.suppress_tracer_warnings(): # this value will be treated as a constant
+        batch_size = int(batch_size)
+    misc.assert_shape(x, [batch_size, in_channels, None, None])
+    x = x.reshape(1, -1, *x.shape[2:])
+    w = w.reshape(-1, in_channels, kh, kw)
+    x = conv2d_resample.conv2d_resample(x=x, w=w.to(x.dtype), f=resample_filter, up=up, down=down, padding=padding, groups=batch_size, flip_weight=flip_weight)
+    x = x.reshape(batch_size, -1, *x.shape[2:])
+    if noise is not None:
+        x = x.add_(noise)
+    return x
+
+#----------------------------------------------------------------------------
+
+
+class FullyConnectedLayer(torch.nn.Module):
+    def __init__(self,
+        in_features,                # Number of input features.
+        out_features,               # Number of output features.
+        bias            = True,     # Apply additive bias before the activation function?
+        activation      = 'linear', # Activation function: 'relu', 'lrelu', etc.
+        lr_multiplier   = 1,        # Learning rate multiplier.
+        bias_init       = 0,        # Initial value for the additive bias.
+    ):
+        super().__init__()
+        self.in_features = in_features
+        self.out_features = out_features
+        self.activation = activation
+        self.bias_init = bias_init
+        self.weight = torch.nn.Parameter(torch.randn([out_features, in_features]) / lr_multiplier)
+        self.bias = torch.nn.Parameter(torch.full([out_features], np.float32(bias_init))) if bias else None
+        self.weight_gain = lr_multiplier / np.sqrt(in_features)
+        self.bias_gain = lr_multiplier
+
+    def forward(self, x):
+        w = self.weight.to(x.dtype) * self.weight_gain
+        b = self.bias
+        if b is not None:
+            b = b.to(x.dtype)
+            if self.bias_gain != 1:
+                b = b * self.bias_gain
+
+        if self.activation == 'linear' and b is not None:
+            x = torch.addmm(b.unsqueeze(0), x, w.t())
+        else:
+            x = x.matmul(w.t())
+            x = bias_act.bias_act(x, b, act=self.activation)
+        return x
+
+    def extra_repr(self):
+        return f'in_features={self.in_features:d}, out_features={self.out_features:d}, activation={self.activation:s}'
+
+#----------------------------------------------------------------------------
+
+
+class Conv2dLayer(torch.nn.Module):
+    def __init__(self,
+        in_channels,                    # Number of input channels.
+        out_channels,                   # Number of output channels.
+        kernel_size,                    # Width and height of the convolution kernel.
+        bias            = True,         # Apply additive bias before the activation function?
+        activation      = 'linear',     # Activation function: 'relu', 'lrelu', etc.
+        up              = 1,            # Integer upsampling factor.
+        down            = 1,            # Integer downsampling factor.
+        resample_filter = [1,3,3,1],    # Low-pass filter to apply when resampling activations.
+        conv_clamp      = None,         # Clamp the output to +-X, None = disable clamping.
+        channels_last   = False,        # Expect the input to have memory_format=channels_last?
+        trainable       = True,         # Update the weights of this layer during training?
+    ):
+        super().__init__()
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.activation = activation
+        self.up = up
+        self.down = down
+        self.conv_clamp = conv_clamp
+        self.register_buffer('resample_filter', upfirdn2d.setup_filter(resample_filter))
+        self.padding = kernel_size // 2
+        self.weight_gain = 1 / np.sqrt(in_channels * (kernel_size ** 2))
+        self.act_gain = bias_act.activation_funcs[activation].def_gain
+        self.trainable = trainable
+
+        memory_format = torch.channels_last if channels_last else torch.contiguous_format
+        weight = torch.randn([out_channels, in_channels, kernel_size, kernel_size]).to(memory_format=memory_format)
+        bias = torch.zeros([out_channels]) if bias else None
+        if trainable:
+            self.weight = torch.nn.Parameter(weight)
+            self.bias = torch.nn.Parameter(bias) if bias is not None else None
+        else:
+            self.register_buffer('weight', weight)
+            if bias is not None:
+                self.register_buffer('bias', bias)
+            else:
+                self.bias = None
+
+    def forward(self, x, gain=1):
+        w = self.weight * self.weight_gain
+
+        b = self.bias.to(x.dtype) if self.bias is not None else None
+        flip_weight = (self.up == 1) # slightly faster
+        x = conv2d_resample.conv2d_resample(x=x, w=w.to(x.dtype), f=self.resample_filter, up=self.up, down=self.down, padding=self.padding, flip_weight=flip_weight)
+
+        act_gain = self.act_gain * gain
+        act_clamp = self.conv_clamp * gain if self.conv_clamp is not None else None
+        x = bias_act.bias_act(x, b, act=self.activation, gain=act_gain, clamp=act_clamp)
+        return x
+
+    def extra_repr(self):
+        return ' '.join([
+            f'in_channels={self.in_channels:d}, out_channels={self.out_channels:d}, activation={self.activation:s},',
+            f'up={self.up}, down={self.down}'])
+
+#----------------------------------------------------------------------------
+
+
+class MappingNetwork(torch.nn.Module):
+    def __init__(self,
+        z_dim,                      # Input latent (Z) dimensionality, 0 = no latent.
+        c_dim,                      # Conditioning label (C) dimensionality, 0 = no label.
+        w_dim,                      # Intermediate latent (W) dimensionality.
+        num_ws,                     # Number of intermediate latents to output, None = do not broadcast.
+        num_layers      = 8,        # Number of mapping layers.
+        embed_features  = None,     # Label embedding dimensionality, None = same as w_dim.
+        layer_features  = None,     # Number of intermediate features in the mapping layers, None = same as w_dim.
+        activation      = 'lrelu',  # Activation function: 'relu', 'lrelu', etc.
+        last_activation = None, # add by panohead, define the last activation
+        lr_multiplier   = 0.01,     # Learning rate multiplier for the mapping layers.
+        w_avg_beta      = 0.998,    # Decay for tracking the moving average of W during training, None = do not track.
+    ):
+        super().__init__()
+        self.z_dim = z_dim
+        self.c_dim = c_dim
+        self.w_dim = w_dim
+        self.num_ws = num_ws
+        self.num_layers = num_layers
+        self.w_avg_beta = w_avg_beta
+
+        if embed_features is None:
+            embed_features = w_dim
+        if c_dim == 0:
+            embed_features = 0
+        if layer_features is None:
+            layer_features = w_dim
+        features_list = [z_dim + embed_features] + [layer_features] * (num_layers - 1) + [w_dim]
+
+        if c_dim > 0:
+            self.embed = FullyConnectedLayer(c_dim, embed_features)
+        for idx in range(num_layers):
+            in_features = features_list[idx]
+            out_features = features_list[idx + 1]
+            if idx == num_layers - 1 and last_activation:
+                layer = FullyConnectedLayer(in_features, out_features, activation=last_activation, lr_multiplier=lr_multiplier)
+            else:
+                layer = FullyConnectedLayer(in_features, out_features, activation=activation, lr_multiplier=lr_multiplier)
+            setattr(self, f'fc{idx}', layer)
+
+        if num_ws is not None and w_avg_beta is not None:
+            self.register_buffer('w_avg', torch.zeros([w_dim]))
+
+    def forward(self, z, c, truncation_psi=1, truncation_cutoff=None, update_emas=False):
+        # Embed, normalize, and concat inputs.
+        x = None
+        with torch.autograd.profiler.record_function('input'):
+            if self.z_dim > 0:
+                misc.assert_shape(z, [None, self.z_dim])
+                x = normalize_2nd_moment(z.to(torch.float32))
+            if self.c_dim > 0:
+                misc.assert_shape(c, [None, self.c_dim])
+                y = normalize_2nd_moment(self.embed(c.to(torch.float32)))
+                x = torch.cat([x, y], dim=1) if x is not None else y
+
+        # Main layers.
+        for idx in range(self.num_layers):
+            layer = getattr(self, f'fc{idx}')
+            x = layer(x)
+
+        # Update moving average of W.
+        if update_emas and self.w_avg_beta is not None:
+            with torch.autograd.profiler.record_function('update_w_avg'):
+                self.w_avg.copy_(x.detach().mean(dim=0).lerp(self.w_avg, self.w_avg_beta))
+
+        # Broadcast.
+        if self.num_ws is not None:
+            with torch.autograd.profiler.record_function('broadcast'):
+                x = x.unsqueeze(1).repeat([1, self.num_ws, 1])
+
+        # Apply truncation trick.
+        if truncation_psi != 1:
+            with torch.autograd.profiler.record_function('truncate'):
+                assert self.w_avg_beta is not None
+                if self.num_ws is None or truncation_cutoff is None:
+                    x = self.w_avg.lerp(x, truncation_psi)
+                else:
+                    x[:, :truncation_cutoff] = self.w_avg.lerp(x[:, :truncation_cutoff], truncation_psi) # 从w_avg出发向x前进，前进步数[0~1.]为truncation_psi
+        return x
+
+    def extra_repr(self):
+        return f'z_dim={self.z_dim:d}, c_dim={self.c_dim:d}, w_dim={self.w_dim:d}, num_ws={self.num_ws:d}'
+
+#----------------------------------------------------------------------------
+
+
+class SynthesisLayer(torch.nn.Module):
+    def __init__(self,
+        in_channels,                    # Number of input channels.
+        out_channels,                   # Number of output channels.
+        w_dim,                          # Intermediate latent (W) dimensionality.
+        resolution,                     # Resolution of this layer.
+        kernel_size     = 3,            # Convolution kernel size.
+        up              = 1,            # Integer upsampling factor.
+        use_noise       = True,         # Enable noise input?
+        activation      = 'lrelu',      # Activation function: 'relu', 'lrelu', etc.
+        resample_filter = [1,3,3,1],    # Low-pass filter to apply when resampling activations.
+        conv_clamp      = None,         # Clamp the output of convolution layers to +-X, None = disable clamping.
+        channels_last   = False,        # Use channels_last format for the weights?
+        **other_args
+    ):
+        super().__init__()
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.w_dim = w_dim
+        self.resolution = resolution
+        self.up = up
+        self.use_noise = use_noise
+        self.activation = activation
+        self.conv_clamp = conv_clamp
+        self.register_buffer('resample_filter', upfirdn2d.setup_filter(resample_filter))
+        self.padding = kernel_size // 2
+        self.act_gain = bias_act.activation_funcs[activation].def_gain
+
+        self.affine = FullyConnectedLayer(w_dim, in_channels, bias_init=1)
+        memory_format = torch.channels_last if channels_last else torch.contiguous_format
+        self.weight = torch.nn.Parameter(torch.randn([out_channels, in_channels, kernel_size, kernel_size]).to(memory_format=memory_format))
+        if use_noise:
+            self.register_buffer('noise_const', torch.randn([resolution, resolution]))
+            self.noise_strength = torch.nn.Parameter(torch.zeros([]))
+        self.bias = torch.nn.Parameter(torch.zeros([out_channels]))
+
+    def forward(self, x, w, noise_mode='random', fused_modconv=True, gain=1, **kwargs):
+        assert noise_mode in ['random', 'const', 'none']
+        in_resolution = self.resolution // self.up
+        misc.assert_shape(x, [None, self.in_channels, in_resolution, in_resolution])
+        styles = self.affine(w)
+
+        noise = None
+        if self.use_noise and noise_mode == 'random':
+            noise = torch.randn([x.shape[0], 1, self.resolution, self.resolution], device=x.device) * self.noise_strength
+        if self.use_noise and noise_mode == 'const':
+            noise = self.noise_const * self.noise_strength
+
+        flip_weight = (self.up == 1) # slightly faster
+        weight = self.weight
+        x = modulated_conv2d(x=x, weight=weight, styles=styles, noise=noise, up=self.up,
+            padding=self.padding, resample_filter=self.resample_filter, flip_weight=flip_weight, fused_modconv=fused_modconv)
+
+        act_gain = self.act_gain * gain
+        act_clamp = self.conv_clamp * gain if self.conv_clamp is not None else None
+        x = bias_act.bias_act(x, self.bias.to(x.dtype), act=self.activation, gain=act_gain, clamp=act_clamp)
+        return x
+
+    def extra_repr(self):
+        return ' '.join([
+            f'in_channels={self.in_channels:d}, out_channels={self.out_channels:d}, w_dim={self.w_dim:d},',
+            f'resolution={self.resolution:d}, up={self.up}, activation={self.activation:s}'])
+
+#----------------------------------------------------------------------------
+
+
+class ToRGBLayer(torch.nn.Module):
+    def __init__(self, in_channels, out_channels, w_dim, kernel_size=1, conv_clamp=None, channels_last=False):
+        super().__init__()
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.w_dim = w_dim
+        self.conv_clamp = conv_clamp
+        self.affine = FullyConnectedLayer(w_dim, in_channels, bias_init=1)
+        memory_format = torch.channels_last if channels_last else torch.contiguous_format
+        self.weight = torch.nn.Parameter(torch.randn([out_channels, in_channels, kernel_size, kernel_size]).to(memory_format=memory_format))
+        self.bias = torch.nn.Parameter(torch.zeros([out_channels]))
+        self.weight_gain = 1 / np.sqrt(in_channels * (kernel_size ** 2))
+
+    def forward(self, x, w, fused_modconv=True):
+        styles = self.affine(w) * self.weight_gain
+        weight = self.weight
+        x = modulated_conv2d(x=x, weight=weight, styles=styles, demodulate=False, fused_modconv=fused_modconv) # demodulate为False
+        x = bias_act.bias_act(x, self.bias.to(x.dtype), clamp=self.conv_clamp)
+        return x
+
+    def extra_repr(self):
+        return f'in_channels={self.in_channels:d}, out_channels={self.out_channels:d}, w_dim={self.w_dim:d}'
+
+#----------------------------------------------------------------------------
+
+class SynthesisBlock(torch.nn.Module):
+    def __init__(self,
+        in_channels,                            # Number of input channels, 0 = first block.
+        out_channels,                           # Number of output channels.
+        w_dim,                                  # Intermediate latent (W) dimensionality.
+        resolution,                             # Resolution of this block.
+        img_channels,                           # Number of output color channels.
+        is_last,                                # Is this the last block?
+        architecture            = 'skip',       # Architecture: 'orig', 'skip', 'resnet'.
+        resample_filter         = [1,3,3,1],    # Low-pass filter to apply when resampling activations.
+        conv_clamp              = 256,          # Clamp the output of convolution layers to +-X, None = disable clamping.
+        use_fp16                = False,        # Use FP16 for this block?
+        fp16_channels_last      = False,        # Use channels-last memory format with FP16?
+        fused_modconv_default   = True,         # Default value of fused_modconv. 'inference_only' = True for inference, False for training.
+        **layer_kwargs,                         # Arguments for SynthesisLayer.
+    ):
+        assert architecture in ['orig', 'skip', 'resnet']
+        super().__init__()
+        self.in_channels = in_channels
+        self.w_dim = w_dim
+        self.resolution = resolution
+        self.img_channels = img_channels
+        self.is_last = is_last
+        self.architecture = architecture
+        self.use_fp16 = use_fp16
+        self.channels_last = (use_fp16 and fp16_channels_last)
+        self.fused_modconv_default = fused_modconv_default
+        self.register_buffer('resample_filter', upfirdn2d.setup_filter(resample_filter))
+        self.num_conv = 0
+        self.num_torgb = 0
+
+        if in_channels == 0:
+            self.const = torch.nn.Parameter(torch.randn([out_channels, resolution, resolution]))
+
+        if in_channels != 0:
+            self.conv0 = SynthesisLayer(in_channels, out_channels, w_dim=w_dim, resolution=resolution, up=2,
+                resample_filter=resample_filter, conv_clamp=conv_clamp, channels_last=self.channels_last, **layer_kwargs)
+            self.num_conv += 1
+
+        self.conv1 = SynthesisLayer(out_channels, out_channels, w_dim=w_dim, resolution=resolution,
+            conv_clamp=conv_clamp, channels_last=self.channels_last, **layer_kwargs)
+        self.num_conv += 1
+
+        if is_last or architecture == 'skip':
+            self.torgb = ToRGBLayer(out_channels, img_channels, w_dim=w_dim,
+                conv_clamp=conv_clamp, channels_last=self.channels_last)
+            self.num_torgb += 1
+
+        if in_channels != 0 and architecture == 'resnet':
+            self.skip = Conv2dLayer(in_channels, out_channels, kernel_size=1, bias=False, up=2,
+                resample_filter=resample_filter, channels_last=self.channels_last)
+
+    def forward(self, x, img, ws, force_fp32=False, fused_modconv=None, update_emas=False, **layer_kwargs):
+        _ = update_emas # unused
+        misc.assert_shape(ws, [None, self.num_conv + self.num_torgb, self.w_dim])
+        w_iter = iter(ws.unbind(dim=1))
+        if ws.device.type != 'cuda':
+            force_fp32 = True
+        dtype = torch.float16 if self.use_fp16 and not force_fp32 else torch.float32
+        memory_format = torch.channels_last if self.channels_last and not force_fp32 else torch.contiguous_format
+        if fused_modconv is None:
+            fused_modconv = self.fused_modconv_default
+        if fused_modconv == 'inference_only':
+            fused_modconv = (not self.training)
+
+        # Input.
+        if self.in_channels == 0:
+            x = self.const.to(dtype=dtype, memory_format=memory_format)
+            x = x.unsqueeze(0).repeat([ws.shape[0], 1, 1, 1])
+        else:
+            misc.assert_shape(x, [None, self.in_channels, self.resolution // 2, self.resolution // 2])
+            x = x.to(dtype=dtype, memory_format=memory_format)
+
+        # Main layers.
+        if self.in_channels == 0:
+            x = self.conv1(x, next(w_iter), fused_modconv=fused_modconv, **layer_kwargs)
+        elif self.architecture == 'resnet':
+            y = self.skip(x, gain=np.sqrt(0.5))
+            x = self.conv0(x, next(w_iter), fused_modconv=fused_modconv, **layer_kwargs)
+            x = self.conv1(x, next(w_iter), fused_modconv=fused_modconv, gain=np.sqrt(0.5), **layer_kwargs)
+            x = y.add_(x)
+        else:
+            x = self.conv0(x, next(w_iter), fused_modconv=fused_modconv, **layer_kwargs)
+            x = self.conv1(x, next(w_iter), fused_modconv=fused_modconv, **layer_kwargs)
+
+        # ToRGB.
+        if img is not None:
+            misc.assert_shape(img, [None, self.img_channels, self.resolution // 2, self.resolution // 2])
+            img = upfirdn2d.upsample2d(img, self.resample_filter)
+        if self.is_last or self.architecture == 'skip':
+            y = self.torgb(x, next(w_iter), fused_modconv=fused_modconv)
+            y = y.to(dtype=torch.float32, memory_format=torch.contiguous_format)
+            img = img.add_(y) if img is not None else y
+
+        assert x.dtype == dtype
+        assert img is None or img.dtype == torch.float32
+        return x, img
+
+    def extra_repr(self):
+        return f'resolution={self.resolution:d}, architecture={self.architecture:s}'
+
+#----------------------------------------------------------------------------
+
+
+class SynthesisNetwork(torch.nn.Module):
+    def __init__(self,
+        w_dim,                      # Intermediate latent (W) dimensionality.
+        img_resolution,             # Output image resolution.
+        img_channels,               # Number of color channels.
+        channel_base    = 32768,    # Overall multiplier for the number of channels.
+        channel_max     = 512,      # Maximum number of channels in any layer.
+        num_fp16_res    = 4,        # Use FP16 for the N highest resolutions.
+        **block_kwargs,             # Arguments for SynthesisBlock.
+    ):
+        assert img_resolution >= 4 and img_resolution & (img_resolution - 1) == 0
+        super().__init__()
+        self.w_dim = w_dim
+        self.img_resolution = img_resolution
+        self.img_resolution_log2 = int(np.log2(img_resolution))
+        self.img_channels = img_channels
+        self.num_fp16_res = num_fp16_res
+        self.block_resolutions = [2 ** i for i in range(2, self.img_resolution_log2 + 1)]
+        channels_dict = {res: min(channel_base // res, channel_max) for res in self.block_resolutions}
+        fp16_resolution = max(2 ** (self.img_resolution_log2 + 1 - num_fp16_res), 8)
+
+        self.num_ws = 0
+        for res in self.block_resolutions:
+            in_channels = channels_dict[res // 2] if res > 4 else 0
+            out_channels = channels_dict[res]
+            use_fp16 = (res >= fp16_resolution)
+            is_last = (res == self.img_resolution)
+            block = SynthesisBlock(in_channels, out_channels, w_dim=w_dim, resolution=res,
+                img_channels=img_channels, is_last=is_last, use_fp16=use_fp16, **block_kwargs)
+            self.num_ws += block.num_conv
+            if is_last:
+                self.num_ws += block.num_torgb
+            setattr(self, f'b{res}', block)
+
+    def forward(self, ws, **block_kwargs):
+        block_ws = []
+        with torch.autograd.profiler.record_function('split_ws'):
+            misc.assert_shape(ws, [None, self.num_ws, self.w_dim])
+            ws = ws.to(torch.float32)
+            w_idx = 0
+            for res in self.block_resolutions:
+                block = getattr(self, f'b{res}')
+                block_ws.append(ws.narrow(1, w_idx, block.num_conv + block.num_torgb)) # [B, num_conv_and_rgb, w_dim]
+                w_idx += block.num_conv
+
+        x = img = None
+        for res, cur_ws in zip(self.block_resolutions, block_ws):
+            block = getattr(self, f'b{res}')
+            x, img = block(x, img, cur_ws, **block_kwargs)
+        return img
+
+    def extra_repr(self):
+        return ' '.join([
+            f'w_dim={self.w_dim:d}, num_ws={self.num_ws:d},',
+            f'img_resolution={self.img_resolution:d}, img_channels={self.img_channels:d},',
+            f'num_fp16_res={self.num_fp16_res:d}'])
+
+#----------------------------------------------------------------------------
+
+
+class Generator(torch.nn.Module):
+    def __init__(self,
+        z_dim,                      # Input latent (Z) dimensionality.
+        c_dim,                      # Conditioning label (C) dimensionality.
+        w_dim,                      # Intermediate latent (W) dimensionality.
+        img_resolution,             # Output resolution.
+        img_channels,               # Number of output color channels.
+        mapping_kwargs      = {},   # Arguments for MappingNetwork.
+        **synthesis_kwargs,         # Arguments for SynthesisNetwork.
+    ):
+        super().__init__()
+        self.z_dim = z_dim
+        self.c_dim = c_dim
+        self.w_dim = w_dim
+        self.img_resolution = img_resolution
+        self.img_channels = img_channels
+        self.synthesis = SynthesisNetwork(w_dim=w_dim, img_resolution=img_resolution, img_channels=img_channels, **synthesis_kwargs)
+        self.num_ws = self.synthesis.num_ws
+        self.mapping = MappingNetwork(z_dim=z_dim, c_dim=c_dim, w_dim=w_dim, num_ws=self.num_ws, **mapping_kwargs)
+        if hparams.get("gen_cond_mode", 'none') == 'mapping': # comes from a attemp to inject landmark condition
+            self.cond_dim = 204
+            self.cond_mapping = MappingNetwork(z_dim=self.cond_dim, c_dim=0, w_dim=w_dim, num_ws=self.num_ws, **mapping_kwargs)
+
+    def forward(self, z, c, cond=None, truncation_psi=1, truncation_cutoff=None, update_emas=False, **synthesis_kwargs):
+        ws = self.mapping(z, c, truncation_psi=truncation_psi, truncation_cutoff=truncation_cutoff, update_emas=update_emas)
+        if hparams.get("gen_cond_mode", 'none') == 'mapping':
+            d_ws = self.cond_mapping(cond, 0, truncation_psi=truncation_psi, truncation_cutoff=truncation_cutoff, update_emas=update_emas)
+            ws = ws * 0.5 + d_ws * 0.5
+        img = self.synthesis(ws, update_emas=update_emas, **synthesis_kwargs)
+        return img
+
+#----------------------------------------------------------------------------
+
+
+class DiscriminatorBlock(torch.nn.Module):
+    def __init__(self,
+        in_channels,                        # Number of input channels, 0 = first block.
+        tmp_channels,                       # Number of intermediate channels.
+        out_channels,                       # Number of output channels.
+        resolution,                         # Resolution of this block.
+        img_channels,                       # Number of input color channels.
+        first_layer_idx,                    # Index of the first layer.
+        architecture        = 'resnet',     # Architecture: 'orig', 'skip', 'resnet'.
+        activation          = 'lrelu',      # Activation function: 'relu', 'lrelu', etc.
+        resample_filter     = [1,3,3,1],    # Low-pass filter to apply when resampling activations.
+        conv_clamp          = None,         # Clamp the output of convolution layers to +-X, None = disable clamping.
+        use_fp16            = False,        # Use FP16 for this block?
+        fp16_channels_last  = False,        # Use channels-last memory format with FP16?
+        freeze_layers       = 0,            # Freeze-D: Number of layers to freeze.
+    ):
+        assert in_channels in [0, tmp_channels]
+        assert architecture in ['orig', 'skip', 'resnet']
+        super().__init__()
+        self.in_channels = in_channels
+        self.resolution = resolution
+        self.img_channels = img_channels
+        self.first_layer_idx = first_layer_idx
+        self.architecture = architecture
+        self.use_fp16 = use_fp16
+        self.channels_last = (use_fp16 and fp16_channels_last)
+        self.register_buffer('resample_filter', upfirdn2d.setup_filter(resample_filter))
+
+        self.num_layers = 0
+        def trainable_gen():
+            while True:
+                layer_idx = self.first_layer_idx + self.num_layers
+                trainable = (layer_idx >= freeze_layers)
+                self.num_layers += 1
+                yield trainable
+        trainable_iter = trainable_gen()
+
+        if in_channels == 0 or architecture == 'skip':
+            self.fromrgb = Conv2dLayer(img_channels, tmp_channels, kernel_size=1, activation=activation,
+                trainable=next(trainable_iter), conv_clamp=conv_clamp, channels_last=self.channels_last)
+
+        self.conv0 = Conv2dLayer(tmp_channels, tmp_channels, kernel_size=3, activation=activation,
+            trainable=next(trainable_iter), conv_clamp=conv_clamp, channels_last=self.channels_last)
+
+        self.conv1 = Conv2dLayer(tmp_channels, out_channels, kernel_size=3, activation=activation, down=2,
+            trainable=next(trainable_iter), resample_filter=resample_filter, conv_clamp=conv_clamp, channels_last=self.channels_last)
+
+        if architecture == 'resnet':
+            self.skip = Conv2dLayer(tmp_channels, out_channels, kernel_size=1, bias=False, down=2,
+                trainable=next(trainable_iter), resample_filter=resample_filter, channels_last=self.channels_last)
+
+    def forward(self, x, img, force_fp32=False):
+        if (x if x is not None else img).device.type != 'cuda':
+            force_fp32 = True
+        dtype = torch.float16 if self.use_fp16 and not force_fp32 else torch.float32
+        memory_format = torch.channels_last if self.channels_last and not force_fp32 else torch.contiguous_format
+
+        # Input.
+        if x is not None:
+            misc.assert_shape(x, [None, self.in_channels, self.resolution, self.resolution])
+            x = x.to(dtype=dtype, memory_format=memory_format)
+
+        # FromRGB.
+        if self.in_channels == 0 or self.architecture == 'skip':
+            misc.assert_shape(img, [None, self.img_channels, self.resolution, self.resolution])
+            img = img.to(dtype=dtype, memory_format=memory_format)
+            y = self.fromrgb(img)
+            x = x + y if x is not None else y
+            img = upfirdn2d.downsample2d(img, self.resample_filter) if self.architecture == 'skip' else None
+
+        # Main layers.
+        if self.architecture == 'resnet':
+            y = self.skip(x, gain=np.sqrt(0.5))
+            x = self.conv0(x)
+            x = self.conv1(x, gain=np.sqrt(0.5))
+            x = y.add_(x)
+        else:
+            x = self.conv0(x)
+            x = self.conv1(x)
+
+        assert x.dtype == dtype
+        return x, img
+
+    def extra_repr(self):
+        return f'resolution={self.resolution:d}, architecture={self.architecture:s}'
+
+#----------------------------------------------------------------------------
+
+
+class MinibatchStdLayer(torch.nn.Module):
+    def __init__(self, group_size, num_channels=1):
+        super().__init__()
+        self.group_size = group_size
+        self.num_channels = num_channels
+
+    def forward(self, x):
+        N, C, H, W = x.shape
+        with misc.suppress_tracer_warnings(): # as_tensor results are registered as constants
+            G = torch.min(torch.as_tensor(self.group_size), torch.as_tensor(N)) if self.group_size is not None else N
+        F = self.num_channels
+        c = C // F
+
+        y = x.reshape(G, -1, F, c, H, W)    # [GnFcHW] Split minibatch N into n groups of size G, and channels C into F groups of size c.
+        y = y - y.mean(dim=0)               # [GnFcHW] Subtract mean over group.
+        y = y.square().mean(dim=0)          # [nFcHW]  Calc variance over group.
+        y = (y + 1e-8).sqrt()               # [nFcHW]  Calc stddev over group.
+        y = y.mean(dim=[2,3,4])             # [nF]     Take average over channels and pixels.
+        y = y.reshape(-1, F, 1, 1)          # [nF11]   Add missing dimensions.
+        y = y.repeat(G, 1, H, W)            # [NFHW]   Replicate over group and pixels.
+        x = torch.cat([x, y], dim=1)        # [NCHW]   Append to input as new channels.
+        return x
+
+    def extra_repr(self):
+        return f'group_size={self.group_size}, num_channels={self.num_channels:d}'
+
+#----------------------------------------------------------------------------
+
+
+class DiscriminatorEpilogue(torch.nn.Module):
+    def __init__(self,
+        in_channels,                    # Number of input channels.
+        cmap_dim,                       # Dimensionality of mapped conditioning label, 0 = no label.
+        resolution,                     # Resolution of this block.
+        img_channels,                   # Number of input color channels.
+        architecture        = 'resnet', # Architecture: 'orig', 'skip', 'resnet'.
+        mbstd_group_size    = 2,        # Group size for the minibatch standard deviation layer, None = entire minibatch.
+        mbstd_num_channels  = 1,        # Number of features for the minibatch standard deviation layer, 0 = disable.
+        activation          = 'lrelu',  # Activation function: 'relu', 'lrelu', etc.
+        conv_clamp          = None,     # Clamp the output of convolution layers to +-X, None = disable clamping.
+    ):
+        assert architecture in ['orig', 'skip', 'resnet']
+        super().__init__()
+        self.in_channels = in_channels
+        self.cmap_dim = cmap_dim
+        self.resolution = resolution
+        self.img_channels = img_channels
+        self.architecture = architecture
+
+        if architecture == 'skip':
+            self.fromrgb = Conv2dLayer(img_channels, in_channels, kernel_size=1, activation=activation)
+        self.mbstd = MinibatchStdLayer(group_size=mbstd_group_size, num_channels=mbstd_num_channels) if mbstd_num_channels > 0 else None
+        self.conv = Conv2dLayer(in_channels + mbstd_num_channels, in_channels, kernel_size=3, activation=activation, conv_clamp=conv_clamp)
+        self.fc = FullyConnectedLayer(in_channels * (resolution ** 2), in_channels, activation=activation)
+        self.out = FullyConnectedLayer(in_channels, 1 if cmap_dim == 0 else cmap_dim)
+
+    def forward(self, x, img, cmap, force_fp32=False):
+        misc.assert_shape(x, [None, self.in_channels, self.resolution, self.resolution]) # [NCHW]
+        _ = force_fp32 # unused
+        dtype = torch.float32
+        memory_format = torch.contiguous_format
+
+        # FromRGB.
+        x = x.to(dtype=dtype, memory_format=memory_format)
+        if self.architecture == 'skip':
+            misc.assert_shape(img, [None, self.img_channels, self.resolution, self.resolution])
+            img = img.to(dtype=dtype, memory_format=memory_format)
+            x = x + self.fromrgb(img)
+
+        # Main layers.
+        if self.mbstd is not None:
+            x = self.mbstd(x)
+        x = self.conv(x)
+        x = self.fc(x.flatten(1))
+        x = self.out(x)
+
+        # Conditioning.
+        if self.cmap_dim > 0:
+            misc.assert_shape(cmap, [None, self.cmap_dim])
+            x = (x * cmap).sum(dim=1, keepdim=True) * (1 / np.sqrt(self.cmap_dim))
+
+        assert x.dtype == dtype
+        return x
+
+    def extra_repr(self):
+        return f'resolution={self.resolution:d}, architecture={self.architecture:s}'
+
+#----------------------------------------------------------------------------
+
+
+class Discriminator(torch.nn.Module):
+    def __init__(self,
+        c_dim,                          # Conditioning label (C) dimensionality.
+        img_resolution,                 # Input resolution.
+        img_channels,                   # Number of input color channels.
+        architecture        = 'resnet', # Architecture: 'orig', 'skip', 'resnet'.
+        channel_base        = 32768,    # Overall multiplier for the number of channels.
+        channel_max         = 512,      # Maximum number of channels in any layer.
+        num_fp16_res        = 4,        # Use FP16 for the N highest resolutions.
+        conv_clamp          = 256,      # Clamp the output of convolution layers to +-X, None = disable clamping.
+        cmap_dim            = None,     # Dimensionality of mapped conditioning label, None = default.
+        block_kwargs        = {},       # Arguments for DiscriminatorBlock.
+        mapping_kwargs      = {},       # Arguments for MappingNetwork.
+        epilogue_kwargs     = {},       # Arguments for DiscriminatorEpilogue.
+    ):
+        super().__init__()
+        self.c_dim = c_dim
+        self.img_resolution = img_resolution
+        self.img_resolution_log2 = int(np.log2(img_resolution))
+        self.img_channels = img_channels
+        self.block_resolutions = [2 ** i for i in range(self.img_resolution_log2, 2, -1)]
+        channels_dict = {res: min(channel_base // res, channel_max) for res in self.block_resolutions + [4]}
+        fp16_resolution = max(2 ** (self.img_resolution_log2 + 1 - num_fp16_res), 8)
+
+        if cmap_dim is None:
+            cmap_dim = channels_dict[4]
+        if c_dim == 0:
+            cmap_dim = 0
+
+        common_kwargs = dict(img_channels=img_channels, architecture=architecture, conv_clamp=conv_clamp)
+        cur_layer_idx = 0
+        for res in self.block_resolutions:
+            in_channels = channels_dict[res] if res < img_resolution else 0
+            tmp_channels = channels_dict[res]
+            out_channels = channels_dict[res // 2]
+            use_fp16 = (res >= fp16_resolution)
+            block = DiscriminatorBlock(in_channels, tmp_channels, out_channels, resolution=res,
+                first_layer_idx=cur_layer_idx, use_fp16=use_fp16, **block_kwargs, **common_kwargs)
+            setattr(self, f'b{res}', block)
+            cur_layer_idx += block.num_layers
+        if c_dim > 0:
+            self.mapping = MappingNetwork(z_dim=0, c_dim=c_dim, w_dim=cmap_dim, num_ws=None, w_avg_beta=None, **mapping_kwargs)
+        self.b4 = DiscriminatorEpilogue(channels_dict[4], cmap_dim=cmap_dim, resolution=4, **epilogue_kwargs, **common_kwargs)
+
+    def forward(self, img, c, update_emas=False, **block_kwargs):
+        _ = update_emas # unused
+        x = None
+        for res in self.block_resolutions:
+            block = getattr(self, f'b{res}')
+            x, img = block(x, img, **block_kwargs)
+
+        cmap = None
+        if self.c_dim > 0:
+            cmap = self.mapping(None, c)
+        x = self.b4(x, img, cmap)
+        return x
+
+    def extra_repr(self):
+        return f'c_dim={self.c_dim:d}, img_resolution={self.img_resolution:d}, img_channels={self.img_channels:d}'
+
+#----------------------------------------------------------------------------
\ No newline at end of file
diff --git a/modules/eg3ds/models/networks_stylegan3.py b/modules/eg3ds/models/networks_stylegan3.py
new file mode 100644
index 0000000000000000000000000000000000000000..a5c38853db600f4006c3f6e0045a8df1e707ee85
--- /dev/null
+++ b/modules/eg3ds/models/networks_stylegan3.py
@@ -0,0 +1,516 @@
+# SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: LicenseRef-NvidiaProprietary
+#
+# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+# property and proprietary rights in and to this material, related
+# documentation and any modifications thereto. Any use, reproduction,
+# disclosure or distribution of this material and related documentation
+# without an express license agreement from NVIDIA CORPORATION or
+# its affiliates is strictly prohibited.
+
+"""Generator architecture from the paper
+"Alias-Free Generative Adversarial Networks"."""
+
+import numpy as np
+import scipy.signal
+import scipy.optimize
+import torch
+from modules.eg3ds.torch_utils import misc
+from modules.eg3ds.torch_utils.ops import conv2d_gradfix
+from modules.eg3ds.torch_utils.ops import filtered_lrelu
+from modules.eg3ds.torch_utils.ops import bias_act
+
+#----------------------------------------------------------------------------
+
+@misc.profiled_function
+def modulated_conv2d(
+    x,                  # Input tensor: [batch_size, in_channels, in_height, in_width]
+    w,                  # Weight tensor: [out_channels, in_channels, kernel_height, kernel_width]
+    s,                  # Style tensor: [batch_size, in_channels]
+    demodulate  = True, # Apply weight demodulation?
+    padding     = 0,    # Padding: int or [padH, padW]
+    input_gain  = None, # Optional scale factors for the input channels: [], [in_channels], or [batch_size, in_channels]
+):
+    with misc.suppress_tracer_warnings(): # this value will be treated as a constant
+        batch_size = int(x.shape[0])
+    out_channels, in_channels, kh, kw = w.shape
+    misc.assert_shape(w, [out_channels, in_channels, kh, kw]) # [OIkk]
+    misc.assert_shape(x, [batch_size, in_channels, None, None]) # [NIHW]
+    misc.assert_shape(s, [batch_size, in_channels]) # [NI]
+
+    # Pre-normalize inputs.
+    if demodulate:
+        w = w * w.square().mean([1,2,3], keepdim=True).rsqrt()
+        s = s * s.square().mean().rsqrt()
+
+    # Modulate weights.
+    w = w.unsqueeze(0) # [NOIkk]
+    w = w * s.unsqueeze(1).unsqueeze(3).unsqueeze(4) # [NOIkk]
+
+    # Demodulate weights.
+    if demodulate:
+        dcoefs = (w.square().sum(dim=[2,3,4]) + 1e-8).rsqrt() # [NO]
+        w = w * dcoefs.unsqueeze(2).unsqueeze(3).unsqueeze(4) # [NOIkk]
+
+    # Apply input scaling.
+    if input_gain is not None:
+        input_gain = input_gain.expand(batch_size, in_channels) # [NI]
+        w = w * input_gain.unsqueeze(1).unsqueeze(3).unsqueeze(4) # [NOIkk]
+
+    # Execute as one fused op using grouped convolution.
+    x = x.reshape(1, -1, *x.shape[2:])
+    w = w.reshape(-1, in_channels, kh, kw)
+    x = conv2d_gradfix.conv2d(input=x, weight=w.to(x.dtype), padding=padding, groups=batch_size)
+    x = x.reshape(batch_size, -1, *x.shape[2:])
+    return x
+
+#----------------------------------------------------------------------------
+
+
+class FullyConnectedLayer(torch.nn.Module):
+    def __init__(self,
+        in_features,                # Number of input features.
+        out_features,               # Number of output features.
+        activation      = 'linear', # Activation function: 'relu', 'lrelu', etc.
+        bias            = True,     # Apply additive bias before the activation function?
+        lr_multiplier   = 1,        # Learning rate multiplier.
+        weight_init     = 1,        # Initial standard deviation of the weight tensor.
+        bias_init       = 0,        # Initial value of the additive bias.
+    ):
+        super().__init__()
+        self.in_features = in_features
+        self.out_features = out_features
+        self.activation = activation
+        self.weight = torch.nn.Parameter(torch.randn([out_features, in_features]) * (weight_init / lr_multiplier))
+        bias_init = np.broadcast_to(np.asarray(bias_init, dtype=np.float32), [out_features])
+        self.bias = torch.nn.Parameter(torch.from_numpy(bias_init / lr_multiplier)) if bias else None
+        self.weight_gain = lr_multiplier / np.sqrt(in_features)
+        self.bias_gain = lr_multiplier
+
+    def forward(self, x):
+        w = self.weight.to(x.dtype) * self.weight_gain
+        b = self.bias
+        if b is not None:
+            b = b.to(x.dtype)
+            if self.bias_gain != 1:
+                b = b * self.bias_gain
+        if self.activation == 'linear' and b is not None:
+            x = torch.addmm(b.unsqueeze(0), x, w.t())
+        else:
+            x = x.matmul(w.t())
+            x = bias_act.bias_act(x, b, act=self.activation)
+        return x
+
+    def extra_repr(self):
+        return f'in_features={self.in_features:d}, out_features={self.out_features:d}, activation={self.activation:s}'
+
+#----------------------------------------------------------------------------
+
+
+class MappingNetwork(torch.nn.Module):
+    def __init__(self,
+        z_dim,                      # Input latent (Z) dimensionality.
+        c_dim,                      # Conditioning label (C) dimensionality, 0 = no labels.
+        w_dim,                      # Intermediate latent (W) dimensionality.
+        num_ws,                     # Number of intermediate latents to output.
+        num_layers      = 2,        # Number of mapping layers.
+        lr_multiplier   = 0.01,     # Learning rate multiplier for the mapping layers.
+        w_avg_beta      = 0.998,    # Decay for tracking the moving average of W during training.
+    ):
+        super().__init__()
+        self.z_dim = z_dim
+        self.c_dim = c_dim
+        self.w_dim = w_dim
+        self.num_ws = num_ws
+        self.num_layers = num_layers
+        self.w_avg_beta = w_avg_beta
+
+        # Construct layers.
+        self.embed = FullyConnectedLayer(self.c_dim, self.w_dim) if self.c_dim > 0 else None
+        features = [self.z_dim + (self.w_dim if self.c_dim > 0 else 0)] + [self.w_dim] * self.num_layers
+        for idx, in_features, out_features in zip(range(num_layers), features[:-1], features[1:]):
+            layer = FullyConnectedLayer(in_features, out_features, activation='lrelu', lr_multiplier=lr_multiplier)
+            setattr(self, f'fc{idx}', layer)
+        self.register_buffer('w_avg', torch.zeros([w_dim]))
+
+    def forward(self, z, c, truncation_psi=1, truncation_cutoff=None, update_emas=False):
+        misc.assert_shape(z, [None, self.z_dim])
+        if truncation_cutoff is None:
+            truncation_cutoff = self.num_ws
+
+        # Embed, normalize, and concatenate inputs.
+        x = z.to(torch.float32)
+        x = x * (x.square().mean(1, keepdim=True) + 1e-8).rsqrt()
+        if self.c_dim > 0:
+            misc.assert_shape(c, [None, self.c_dim])
+            y = self.embed(c.to(torch.float32))
+            y = y * (y.square().mean(1, keepdim=True) + 1e-8).rsqrt()
+            x = torch.cat([x, y], dim=1) if x is not None else y
+
+        # Execute layers.
+        for idx in range(self.num_layers):
+            x = getattr(self, f'fc{idx}')(x)
+
+        # Update moving average of W.
+        if update_emas:
+            self.w_avg.copy_(x.detach().mean(dim=0).lerp(self.w_avg, self.w_avg_beta))
+
+        # Broadcast and apply truncation.
+        x = x.unsqueeze(1).repeat([1, self.num_ws, 1])
+        if truncation_psi != 1:
+            x[:, :truncation_cutoff] = self.w_avg.lerp(x[:, :truncation_cutoff], truncation_psi)
+        return x
+
+    def extra_repr(self):
+        return f'z_dim={self.z_dim:d}, c_dim={self.c_dim:d}, w_dim={self.w_dim:d}, num_ws={self.num_ws:d}'
+
+#----------------------------------------------------------------------------
+
+
+class SynthesisInput(torch.nn.Module):
+    def __init__(self,
+        w_dim,          # Intermediate latent (W) dimensionality.
+        channels,       # Number of output channels.
+        size,           # Output spatial size: int or [width, height].
+        sampling_rate,  # Output sampling rate.
+        bandwidth,      # Output bandwidth.
+    ):
+        super().__init__()
+        self.w_dim = w_dim
+        self.channels = channels
+        self.size = np.broadcast_to(np.asarray(size), [2])
+        self.sampling_rate = sampling_rate
+        self.bandwidth = bandwidth
+
+        # Draw random frequencies from uniform 2D disc.
+        freqs = torch.randn([self.channels, 2])
+        radii = freqs.square().sum(dim=1, keepdim=True).sqrt()
+        freqs /= radii * radii.square().exp().pow(0.25)
+        freqs *= bandwidth
+        phases = torch.rand([self.channels]) - 0.5
+
+        # Setup parameters and buffers.
+        self.weight = torch.nn.Parameter(torch.randn([self.channels, self.channels]))
+        self.affine = FullyConnectedLayer(w_dim, 4, weight_init=0, bias_init=[1,0,0,0])
+        self.register_buffer('transform', torch.eye(3, 3)) # User-specified inverse transform wrt. resulting image.
+        self.register_buffer('freqs', freqs)
+        self.register_buffer('phases', phases)
+
+    def forward(self, w):
+        # Introduce batch dimension.
+        transforms = self.transform.unsqueeze(0) # [batch, row, col]
+        freqs = self.freqs.unsqueeze(0) # [batch, channel, xy]
+        phases = self.phases.unsqueeze(0) # [batch, channel]
+
+        # Apply learned transformation.
+        t = self.affine(w) # t = (r_c, r_s, t_x, t_y)
+        t = t / t[:, :2].norm(dim=1, keepdim=True) # t' = (r'_c, r'_s, t'_x, t'_y)
+        m_r = torch.eye(3, device=w.device).unsqueeze(0).repeat([w.shape[0], 1, 1]) # Inverse rotation wrt. resulting image.
+        m_r[:, 0, 0] = t[:, 0]  # r'_c
+        m_r[:, 0, 1] = -t[:, 1] # r'_s
+        m_r[:, 1, 0] = t[:, 1]  # r'_s
+        m_r[:, 1, 1] = t[:, 0]  # r'_c
+        m_t = torch.eye(3, device=w.device).unsqueeze(0).repeat([w.shape[0], 1, 1]) # Inverse translation wrt. resulting image.
+        m_t[:, 0, 2] = -t[:, 2] # t'_x
+        m_t[:, 1, 2] = -t[:, 3] # t'_y
+        transforms = m_r @ m_t @ transforms # First rotate resulting image, then translate, and finally apply user-specified transform.
+
+        # Transform frequencies.
+        phases = phases + (freqs @ transforms[:, :2, 2:]).squeeze(2)
+        freqs = freqs @ transforms[:, :2, :2]
+
+        # Dampen out-of-band frequencies that may occur due to the user-specified transform.
+        amplitudes = (1 - (freqs.norm(dim=2) - self.bandwidth) / (self.sampling_rate / 2 - self.bandwidth)).clamp(0, 1)
+
+        # Construct sampling grid.
+        theta = torch.eye(2, 3, device=w.device)
+        theta[0, 0] = 0.5 * self.size[0] / self.sampling_rate
+        theta[1, 1] = 0.5 * self.size[1] / self.sampling_rate
+        grids = torch.nn.functional.affine_grid(theta.unsqueeze(0), [1, 1, self.size[1], self.size[0]], align_corners=False)
+
+        # Compute Fourier features.
+        x = (grids.unsqueeze(3) @ freqs.permute(0, 2, 1).unsqueeze(1).unsqueeze(2)).squeeze(3) # [batch, height, width, channel]
+        x = x + phases.unsqueeze(1).unsqueeze(2)
+        x = torch.sin(x * (np.pi * 2))
+        x = x * amplitudes.unsqueeze(1).unsqueeze(2)
+
+        # Apply trainable mapping.
+        weight = self.weight / np.sqrt(self.channels)
+        x = x @ weight.t()
+
+        # Ensure correct shape.
+        x = x.permute(0, 3, 1, 2) # [batch, channel, height, width]
+        misc.assert_shape(x, [w.shape[0], self.channels, int(self.size[1]), int(self.size[0])])
+        return x
+
+    def extra_repr(self):
+        return '\n'.join([
+            f'w_dim={self.w_dim:d}, channels={self.channels:d}, size={list(self.size)},',
+            f'sampling_rate={self.sampling_rate:g}, bandwidth={self.bandwidth:g}'])
+
+#----------------------------------------------------------------------------
+
+
+class SynthesisLayer(torch.nn.Module):
+    def __init__(self,
+        w_dim,                          # Intermediate latent (W) dimensionality.
+        is_torgb,                       # Is this the final ToRGB layer?
+        is_critically_sampled,          # Does this layer use critical sampling?
+        use_fp16,                       # Does this layer use FP16?
+
+        # Input & output specifications.
+        in_channels,                    # Number of input channels.
+        out_channels,                   # Number of output channels.
+        in_size,                        # Input spatial size: int or [width, height].
+        out_size,                       # Output spatial size: int or [width, height].
+        in_sampling_rate,               # Input sampling rate (s).
+        out_sampling_rate,              # Output sampling rate (s).
+        in_cutoff,                      # Input cutoff frequency (f_c).
+        out_cutoff,                     # Output cutoff frequency (f_c).
+        in_half_width,                  # Input transition band half-width (f_h).
+        out_half_width,                 # Output Transition band half-width (f_h).
+
+        # Hyperparameters.
+        conv_kernel         = 3,        # Convolution kernel size. Ignored for final the ToRGB layer.
+        filter_size         = 6,        # Low-pass filter size relative to the lower resolution when up/downsampling.
+        lrelu_upsampling    = 2,        # Relative sampling rate for leaky ReLU. Ignored for final the ToRGB layer.
+        use_radial_filters  = False,    # Use radially symmetric downsampling filter? Ignored for critically sampled layers.
+        conv_clamp          = 256,      # Clamp the output to [-X, +X], None = disable clamping.
+        magnitude_ema_beta  = 0.999,    # Decay rate for the moving average of input magnitudes.
+    ):
+        super().__init__()
+        self.w_dim = w_dim
+        self.is_torgb = is_torgb
+        self.is_critically_sampled = is_critically_sampled
+        self.use_fp16 = use_fp16
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.in_size = np.broadcast_to(np.asarray(in_size), [2])
+        self.out_size = np.broadcast_to(np.asarray(out_size), [2])
+        self.in_sampling_rate = in_sampling_rate
+        self.out_sampling_rate = out_sampling_rate
+        self.tmp_sampling_rate = max(in_sampling_rate, out_sampling_rate) * (1 if is_torgb else lrelu_upsampling)
+        self.in_cutoff = in_cutoff
+        self.out_cutoff = out_cutoff
+        self.in_half_width = in_half_width
+        self.out_half_width = out_half_width
+        self.conv_kernel = 1 if is_torgb else conv_kernel
+        self.conv_clamp = conv_clamp
+        self.magnitude_ema_beta = magnitude_ema_beta
+
+        # Setup parameters and buffers.
+        self.affine = FullyConnectedLayer(self.w_dim, self.in_channels, bias_init=1)
+        self.weight = torch.nn.Parameter(torch.randn([self.out_channels, self.in_channels, self.conv_kernel, self.conv_kernel]))
+        self.bias = torch.nn.Parameter(torch.zeros([self.out_channels]))
+        self.register_buffer('magnitude_ema', torch.ones([]))
+
+        # Design upsampling filter.
+        self.up_factor = int(np.rint(self.tmp_sampling_rate / self.in_sampling_rate))
+        assert self.in_sampling_rate * self.up_factor == self.tmp_sampling_rate
+        self.up_taps = filter_size * self.up_factor if self.up_factor > 1 and not self.is_torgb else 1
+        self.register_buffer('up_filter', self.design_lowpass_filter(
+            numtaps=self.up_taps, cutoff=self.in_cutoff, width=self.in_half_width*2, fs=self.tmp_sampling_rate))
+
+        # Design downsampling filter.
+        self.down_factor = int(np.rint(self.tmp_sampling_rate / self.out_sampling_rate))
+        assert self.out_sampling_rate * self.down_factor == self.tmp_sampling_rate
+        self.down_taps = filter_size * self.down_factor if self.down_factor > 1 and not self.is_torgb else 1
+        self.down_radial = use_radial_filters and not self.is_critically_sampled
+        self.register_buffer('down_filter', self.design_lowpass_filter(
+            numtaps=self.down_taps, cutoff=self.out_cutoff, width=self.out_half_width*2, fs=self.tmp_sampling_rate, radial=self.down_radial))
+
+        # Compute padding.
+        pad_total = (self.out_size - 1) * self.down_factor + 1 # Desired output size before downsampling.
+        pad_total -= (self.in_size + self.conv_kernel - 1) * self.up_factor # Input size after upsampling.
+        pad_total += self.up_taps + self.down_taps - 2 # Size reduction caused by the filters.
+        pad_lo = (pad_total + self.up_factor) // 2 # Shift sample locations according to the symmetric interpretation (Appendix C.3).
+        pad_hi = pad_total - pad_lo
+        self.padding = [int(pad_lo[0]), int(pad_hi[0]), int(pad_lo[1]), int(pad_hi[1])]
+
+    def forward(self, x, w, noise_mode='random', force_fp32=False, update_emas=False):
+        assert noise_mode in ['random', 'const', 'none'] # unused
+        misc.assert_shape(x, [None, self.in_channels, int(self.in_size[1]), int(self.in_size[0])])
+        misc.assert_shape(w, [x.shape[0], self.w_dim])
+
+        # Track input magnitude.
+        if update_emas:
+            with torch.autograd.profiler.record_function('update_magnitude_ema'):
+                magnitude_cur = x.detach().to(torch.float32).square().mean()
+                self.magnitude_ema.copy_(magnitude_cur.lerp(self.magnitude_ema, self.magnitude_ema_beta))
+        input_gain = self.magnitude_ema.rsqrt()
+
+        # Execute affine layer.
+        styles = self.affine(w)
+        if self.is_torgb:
+            weight_gain = 1 / np.sqrt(self.in_channels * (self.conv_kernel ** 2))
+            styles = styles * weight_gain
+
+        # Execute modulated conv2d.
+        dtype = torch.float16 if (self.use_fp16 and not force_fp32 and x.device.type == 'cuda') else torch.float32
+        x = modulated_conv2d(x=x.to(dtype), w=self.weight, s=styles,
+            padding=self.conv_kernel-1, demodulate=(not self.is_torgb), input_gain=input_gain)
+
+        # Execute bias, filtered leaky ReLU, and clamping.
+        gain = 1 if self.is_torgb else np.sqrt(2)
+        slope = 1 if self.is_torgb else 0.2
+        x = filtered_lrelu.filtered_lrelu(x=x, fu=self.up_filter, fd=self.down_filter, b=self.bias.to(x.dtype),
+            up=self.up_factor, down=self.down_factor, padding=self.padding, gain=gain, slope=slope, clamp=self.conv_clamp)
+
+        # Ensure correct shape and dtype.
+        misc.assert_shape(x, [None, self.out_channels, int(self.out_size[1]), int(self.out_size[0])])
+        assert x.dtype == dtype
+        return x
+
+    @staticmethod
+    def design_lowpass_filter(numtaps, cutoff, width, fs, radial=False):
+        assert numtaps >= 1
+
+        # Identity filter.
+        if numtaps == 1:
+            return None
+
+        # Separable Kaiser low-pass filter.
+        if not radial:
+            f = scipy.signal.firwin(numtaps=numtaps, cutoff=cutoff, width=width, fs=fs)
+            return torch.as_tensor(f, dtype=torch.float32)
+
+        # Radially symmetric jinc-based filter.
+        x = (np.arange(numtaps) - (numtaps - 1) / 2) / fs
+        r = np.hypot(*np.meshgrid(x, x))
+        f = scipy.special.j1(2 * cutoff * (np.pi * r)) / (np.pi * r)
+        beta = scipy.signal.kaiser_beta(scipy.signal.kaiser_atten(numtaps, width / (fs / 2)))
+        w = np.kaiser(numtaps, beta)
+        f *= np.outer(w, w)
+        f /= np.sum(f)
+        return torch.as_tensor(f, dtype=torch.float32)
+
+    def extra_repr(self):
+        return '\n'.join([
+            f'w_dim={self.w_dim:d}, is_torgb={self.is_torgb},',
+            f'is_critically_sampled={self.is_critically_sampled}, use_fp16={self.use_fp16},',
+            f'in_sampling_rate={self.in_sampling_rate:g}, out_sampling_rate={self.out_sampling_rate:g},',
+            f'in_cutoff={self.in_cutoff:g}, out_cutoff={self.out_cutoff:g},',
+            f'in_half_width={self.in_half_width:g}, out_half_width={self.out_half_width:g},',
+            f'in_size={list(self.in_size)}, out_size={list(self.out_size)},',
+            f'in_channels={self.in_channels:d}, out_channels={self.out_channels:d}'])
+
+#----------------------------------------------------------------------------
+
+
+class SynthesisNetwork(torch.nn.Module):
+    def __init__(self,
+        w_dim,                          # Intermediate latent (W) dimensionality.
+        img_resolution,                 # Output image resolution.
+        img_channels,                   # Number of color channels.
+        channel_base        = 32768,    # Overall multiplier for the number of channels.
+        channel_max         = 512,      # Maximum number of channels in any layer.
+        num_layers          = 14,       # Total number of layers, excluding Fourier features and ToRGB.
+        num_critical        = 2,        # Number of critically sampled layers at the end.
+        first_cutoff        = 2,        # Cutoff frequency of the first layer (f_{c,0}).
+        first_stopband      = 2**2.1,   # Minimum stopband of the first layer (f_{t,0}).
+        last_stopband_rel   = 2**0.3,   # Minimum stopband of the last layer, expressed relative to the cutoff.
+        margin_size         = 10,       # Number of additional pixels outside the image.
+        output_scale        = 0.25,     # Scale factor for the output image.
+        num_fp16_res        = 4,        # Use FP16 for the N highest resolutions.
+        **layer_kwargs,                 # Arguments for SynthesisLayer.
+    ):
+        super().__init__()
+        self.w_dim = w_dim
+        self.num_ws = num_layers + 2
+        self.img_resolution = img_resolution
+        self.img_channels = img_channels
+        self.num_layers = num_layers
+        self.num_critical = num_critical
+        self.margin_size = margin_size
+        self.output_scale = output_scale
+        self.num_fp16_res = num_fp16_res
+
+        # Geometric progression of layer cutoffs and min. stopbands.
+        last_cutoff = self.img_resolution / 2 # f_{c,N}
+        last_stopband = last_cutoff * last_stopband_rel # f_{t,N}
+        exponents = np.minimum(np.arange(self.num_layers + 1) / (self.num_layers - self.num_critical), 1)
+        cutoffs = first_cutoff * (last_cutoff / first_cutoff) ** exponents # f_c[i]
+        stopbands = first_stopband * (last_stopband / first_stopband) ** exponents # f_t[i]
+
+        # Compute remaining layer parameters.
+        sampling_rates = np.exp2(np.ceil(np.log2(np.minimum(stopbands * 2, self.img_resolution)))) # s[i]
+        half_widths = np.maximum(stopbands, sampling_rates / 2) - cutoffs # f_h[i]
+        sizes = sampling_rates + self.margin_size * 2
+        sizes[-2:] = self.img_resolution
+        channels = np.rint(np.minimum((channel_base / 2) / cutoffs, channel_max))
+        channels[-1] = self.img_channels
+
+        # Construct layers.
+        self.input = SynthesisInput(
+            w_dim=self.w_dim, channels=int(channels[0]), size=int(sizes[0]),
+            sampling_rate=sampling_rates[0], bandwidth=cutoffs[0])
+        self.layer_names = []
+        for idx in range(self.num_layers + 1):
+            prev = max(idx - 1, 0)
+            is_torgb = (idx == self.num_layers)
+            is_critically_sampled = (idx >= self.num_layers - self.num_critical)
+            use_fp16 = (sampling_rates[idx] * (2 ** self.num_fp16_res) > self.img_resolution)
+            layer = SynthesisLayer(
+                w_dim=self.w_dim, is_torgb=is_torgb, is_critically_sampled=is_critically_sampled, use_fp16=use_fp16,
+                in_channels=int(channels[prev]), out_channels= int(channels[idx]),
+                in_size=int(sizes[prev]), out_size=int(sizes[idx]),
+                in_sampling_rate=int(sampling_rates[prev]), out_sampling_rate=int(sampling_rates[idx]),
+                in_cutoff=cutoffs[prev], out_cutoff=cutoffs[idx],
+                in_half_width=half_widths[prev], out_half_width=half_widths[idx],
+                **layer_kwargs)
+            name = f'L{idx}_{layer.out_size[0]}_{layer.out_channels}'
+            setattr(self, name, layer)
+            self.layer_names.append(name)
+
+    def forward(self, ws, **layer_kwargs):
+        misc.assert_shape(ws, [None, self.num_ws, self.w_dim])
+        ws = ws.to(torch.float32).unbind(dim=1)
+
+        # Execute layers.
+        x = self.input(ws[0])
+        for name, w in zip(self.layer_names, ws[1:]):
+            x = getattr(self, name)(x, w, **layer_kwargs)
+        if self.output_scale != 1:
+            x = x * self.output_scale
+
+        # Ensure correct shape and dtype.
+        misc.assert_shape(x, [None, self.img_channels, self.img_resolution, self.img_resolution])
+        x = x.to(torch.float32)
+        return x
+
+    def extra_repr(self):
+        return '\n'.join([
+            f'w_dim={self.w_dim:d}, num_ws={self.num_ws:d},',
+            f'img_resolution={self.img_resolution:d}, img_channels={self.img_channels:d},',
+            f'num_layers={self.num_layers:d}, num_critical={self.num_critical:d},',
+            f'margin_size={self.margin_size:d}, num_fp16_res={self.num_fp16_res:d}'])
+
+#----------------------------------------------------------------------------
+
+
+class Generator(torch.nn.Module):
+    def __init__(self,
+        z_dim,                      # Input latent (Z) dimensionality.
+        c_dim,                      # Conditioning label (C) dimensionality.
+        w_dim,                      # Intermediate latent (W) dimensionality.
+        img_resolution,             # Output resolution.
+        img_channels,               # Number of output color channels.
+        mapping_kwargs      = {},   # Arguments for MappingNetwork.
+        **synthesis_kwargs,         # Arguments for SynthesisNetwork.
+    ):
+        super().__init__()
+        self.z_dim = z_dim
+        self.c_dim = c_dim
+        self.w_dim = w_dim
+        self.img_resolution = img_resolution
+        self.img_channels = img_channels
+        self.synthesis = SynthesisNetwork(w_dim=w_dim, img_resolution=img_resolution, img_channels=img_channels, **synthesis_kwargs)
+        self.num_ws = self.synthesis.num_ws
+        self.mapping = MappingNetwork(z_dim=z_dim, c_dim=c_dim, w_dim=w_dim, num_ws=self.num_ws, **mapping_kwargs)
+
+    def forward(self, z, c, truncation_psi=1, truncation_cutoff=None, update_emas=False, **synthesis_kwargs):
+        ws = self.mapping(z, c, truncation_psi=truncation_psi, truncation_cutoff=truncation_cutoff, update_emas=update_emas)
+        img = self.synthesis(ws, update_emas=update_emas, **synthesis_kwargs)
+        return img
+
+#----------------------------------------------------------------------------
diff --git a/modules/eg3ds/models/superresolution.py b/modules/eg3ds/models/superresolution.py
new file mode 100644
index 0000000000000000000000000000000000000000..fb1bf50ae0a3153600c297090a053b3d5f5111e1
--- /dev/null
+++ b/modules/eg3ds/models/superresolution.py
@@ -0,0 +1,360 @@
+# SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: LicenseRef-NvidiaProprietary
+#
+# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+# property and proprietary rights in and to this material, related
+# documentation and any modifications thereto. Any use, reproduction,
+# disclosure or distribution of this material and related documentation
+# without an express license agreement from NVIDIA CORPORATION or
+# its affiliates is strictly prohibited.
+
+"""Superresolution network architectures from the paper
+"Efficient Geometry-aware 3D Generative Adversarial Networks"."""
+
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from modules.eg3ds.models.networks_stylegan2 import Conv2dLayer, SynthesisLayer, ToRGBLayer
+from modules.eg3ds.torch_utils.ops import upfirdn2d
+from modules.eg3ds.torch_utils import misc
+
+from modules.eg3ds.models.networks_stylegan2 import SynthesisBlock
+from modules.eg3ds.models.networks_stylegan3 import SynthesisLayer as AFSynthesisLayer
+from utils.commons.hparams import hparams
+
+
+#----------------------------------------------------------------------------
+
+# for 512x512 generation
+class SuperresolutionHybrid8X(torch.nn.Module):
+    def __init__(self, channels, img_resolution, sr_num_fp16_res, sr_antialias,
+                num_fp16_res=4, conv_clamp=None, channel_base=None, channel_max=None,# IGNORE
+                **block_kwargs):
+        super().__init__()
+        assert img_resolution == 512
+
+        use_fp16 = sr_num_fp16_res > 0
+        self.input_resolution = 128
+        self.sr_antialias = sr_antialias
+        self.block0 = SynthesisBlock(channels, 128, w_dim=512, resolution=256,
+                img_channels=3, is_last=False, use_fp16=use_fp16, conv_clamp=(256 if use_fp16 else None), **block_kwargs)
+        self.block1 = SynthesisBlock(128, 64, w_dim=512, resolution=512,
+                img_channels=3, is_last=True, use_fp16=use_fp16, conv_clamp=(256 if use_fp16 else None), **block_kwargs)
+        self.register_buffer('resample_filter', upfirdn2d.setup_filter([1,3,3,1]))
+
+    def forward(self, rgb, x, ws, **block_kwargs):
+        ws = ws[:, -1:, :].repeat(1, 3, 1)
+
+        if x.shape[-1] != self.input_resolution:
+            x = torch.nn.functional.interpolate(x, size=(self.input_resolution, self.input_resolution),
+                                                  mode='bilinear', align_corners=False, antialias=self.sr_antialias)
+            rgb = torch.nn.functional.interpolate(rgb, size=(self.input_resolution, self.input_resolution),
+                                                  mode='bilinear', align_corners=False, antialias=self.sr_antialias)
+
+        x, rgb = self.block0(x, rgb, ws, **block_kwargs)
+        x, rgb = self.block1(x, rgb, ws, **block_kwargs)
+        return rgb
+
+#----------------------------------------------------------------------------
+
+# for 256x256 generation
+
+class SuperresolutionHybrid4X(torch.nn.Module):
+    def __init__(self, channels, img_resolution, sr_num_fp16_res, sr_antialias,
+                num_fp16_res=4, conv_clamp=None, channel_base=None, channel_max=None,# IGNORE
+                **block_kwargs):
+        super().__init__()
+        assert img_resolution == 256
+        use_fp16 = sr_num_fp16_res > 0
+        self.sr_antialias = sr_antialias
+        self.input_resolution = 128
+        self.block0 = SynthesisBlockNoUp(channels, 128, w_dim=512, resolution=128,
+                img_channels=3, is_last=False, use_fp16=use_fp16, conv_clamp=(256 if use_fp16 else None), **block_kwargs)
+        self.block1 = SynthesisBlock(128, 64, w_dim=512, resolution=256,
+                img_channels=3, is_last=True, use_fp16=use_fp16, conv_clamp=(256 if use_fp16 else None), **block_kwargs)
+        self.register_buffer('resample_filter', upfirdn2d.setup_filter([1,3,3,1]))
+
+    def forward(self, rgb, x, ws, **block_kwargs):
+        ws = ws[:, -1:, :].repeat(1, 3, 1)
+
+        if x.shape[-1] < self.input_resolution:
+            x = torch.nn.functional.interpolate(x, size=(self.input_resolution, self.input_resolution),
+                                                  mode='bilinear', align_corners=False, antialias=self.sr_antialias)
+            rgb = torch.nn.functional.interpolate(rgb, size=(self.input_resolution, self.input_resolution),
+                                                  mode='bilinear', align_corners=False, antialias=self.sr_antialias)
+
+        x, rgb = self.block0(x, rgb, ws, **block_kwargs)
+        x, rgb = self.block1(x, rgb, ws, **block_kwargs)
+        return rgb
+
+#----------------------------------------------------------------------------
+
+# for 128 x 128 generation
+
+class SuperresolutionHybrid2X(torch.nn.Module):
+    def __init__(self, channels, img_resolution, sr_num_fp16_res, sr_antialias,
+                num_fp16_res=4, conv_clamp=None, channel_base=None, channel_max=None,# IGNORE
+                **block_kwargs):
+        super().__init__()
+        assert img_resolution == 128
+
+        use_fp16 = sr_num_fp16_res > 0
+        self.input_resolution = 64
+        self.sr_antialias = sr_antialias
+        self.block0 = SynthesisBlockNoUp(channels, 128, w_dim=512, resolution=64,
+                img_channels=3, is_last=False, use_fp16=use_fp16, conv_clamp=(256 if use_fp16 else None), **block_kwargs)
+        self.block1 = SynthesisBlock(128, 64, w_dim=512, resolution=128,
+                img_channels=3, is_last=True, use_fp16=use_fp16, conv_clamp=(256 if use_fp16 else None), **block_kwargs)
+        self.register_buffer('resample_filter', upfirdn2d.setup_filter([1,3,3,1]))
+
+    def forward(self, rgb, x, ws, **block_kwargs):
+        ws = ws[:, -1:, :].repeat(1, 3, 1)
+
+        if x.shape[-1] != self.input_resolution:
+            x = torch.nn.functional.interpolate(x, size=(self.input_resolution, self.input_resolution),
+                                                  mode='bilinear', align_corners=False, antialias=self.sr_antialias)
+            rgb = torch.nn.functional.interpolate(rgb, size=(self.input_resolution, self.input_resolution),
+                                                  mode='bilinear', align_corners=False, antialias=self.sr_antialias)
+
+        x, rgb = self.block0(x, rgb, ws, **block_kwargs)
+        x, rgb = self.block1(x, rgb, ws, **block_kwargs)
+        return rgb
+
+#----------------------------------------------------------------------------
+
+# TODO: Delete (here for backwards compatibility with old 256x256 models)
+
+class SuperresolutionHybridDeepfp32(torch.nn.Module):
+    def __init__(self, channels, img_resolution, sr_num_fp16_res,
+                num_fp16_res=4, conv_clamp=None, channel_base=None, channel_max=None,# IGNORE
+                **block_kwargs):
+        super().__init__()
+        assert img_resolution == 256
+        use_fp16 = sr_num_fp16_res > 0
+
+        self.input_resolution = 128
+        self.block0 = SynthesisBlockNoUp(channels, 128, w_dim=512, resolution=128,
+                img_channels=3, is_last=False, use_fp16=use_fp16, conv_clamp=(256 if use_fp16 else None), **block_kwargs)
+        self.block1 = SynthesisBlock(128, 64, w_dim=512, resolution=256,
+                img_channels=3, is_last=True, use_fp16=use_fp16, conv_clamp=(256 if use_fp16 else None), **block_kwargs)
+        self.register_buffer('resample_filter', upfirdn2d.setup_filter([1,3,3,1]))
+
+    def forward(self, rgb, x, ws, **block_kwargs):
+        ws = ws[:, -1:, :].repeat(1, 3, 1)
+
+        if x.shape[-1] < self.input_resolution:
+            x = torch.nn.functional.interpolate(x, size=(self.input_resolution, self.input_resolution),
+                                                  mode='bilinear', align_corners=False)
+            rgb = torch.nn.functional.interpolate(rgb, size=(self.input_resolution, self.input_resolution),
+                                                  mode='bilinear', align_corners=False)
+
+        x, rgb = self.block0(x, rgb, ws, **block_kwargs)
+        x, rgb = self.block1(x, rgb, ws, **block_kwargs)
+        return rgb
+
+#----------------------------------------------------------------------------
+
+
+class SynthesisBlockNoUp(torch.nn.Module):
+    def __init__(self,
+        in_channels,                            # Number of input channels, 0 = first block.
+        out_channels,                           # Number of output channels.
+        w_dim,                                  # Intermediate latent (W) dimensionality.
+        resolution,                             # Resolution of this block.
+        img_channels,                           # Number of output color channels.
+        is_last,                                # Is this the last block?
+        architecture            = 'skip',       # Architecture: 'orig', 'skip', 'resnet'.
+        resample_filter         = [1,3,3,1],    # Low-pass filter to apply when resampling activations.
+        conv_clamp              = 256,          # Clamp the output of convolution layers to +-X, None = disable clamping.
+        use_fp16                = False,        # Use FP16 for this block?
+        fp16_channels_last      = False,        # Use channels-last memory format with FP16?
+        fused_modconv_default   = True,         # Default value of fused_modconv. 'inference_only' = True for inference, False for training.
+        **layer_kwargs,                         # Arguments for SynthesisLayer.
+    ):
+        assert architecture in ['orig', 'skip', 'resnet']
+        super().__init__()
+        self.in_channels = in_channels
+        self.w_dim = w_dim
+        self.resolution = resolution
+        self.img_channels = img_channels
+        self.is_last = is_last
+        self.architecture = architecture
+        self.use_fp16 = use_fp16
+        self.channels_last = (use_fp16 and fp16_channels_last)
+        self.fused_modconv_default = fused_modconv_default
+        self.register_buffer('resample_filter', upfirdn2d.setup_filter(resample_filter))
+        self.num_conv = 0
+        self.num_torgb = 0
+
+        if in_channels == 0:
+            self.const = torch.nn.Parameter(torch.randn([out_channels, resolution, resolution]))
+
+        if in_channels != 0:
+            self.conv0 = SynthesisLayer(in_channels, out_channels, w_dim=w_dim, resolution=resolution,
+                conv_clamp=conv_clamp, channels_last=self.channels_last, **layer_kwargs)
+            self.num_conv += 1
+
+        self.conv1 = SynthesisLayer(out_channels, out_channels, w_dim=w_dim, resolution=resolution,
+            conv_clamp=conv_clamp, channels_last=self.channels_last, **layer_kwargs)
+        self.num_conv += 1
+
+        if is_last or architecture == 'skip':
+            self.torgb = ToRGBLayer(out_channels, img_channels, w_dim=w_dim,
+                conv_clamp=conv_clamp, channels_last=self.channels_last)
+            self.num_torgb += 1
+
+        if in_channels != 0 and architecture == 'resnet':
+            self.skip = Conv2dLayer(in_channels, out_channels, kernel_size=1, bias=False, up=2,
+                resample_filter=resample_filter, channels_last=self.channels_last)
+
+    def forward(self, x, img, ws, force_fp32=False, fused_modconv=None, update_emas=False, **layer_kwargs):
+        _ = update_emas # unused
+        misc.assert_shape(ws, [None, self.num_conv + self.num_torgb, self.w_dim])
+        w_iter = iter(ws.unbind(dim=1))
+        if ws.device.type != 'cuda':
+            force_fp32 = True
+        dtype = torch.float16 if self.use_fp16 and not force_fp32 else torch.float32
+        memory_format = torch.channels_last if self.channels_last and not force_fp32 else torch.contiguous_format
+        if fused_modconv is None:
+            fused_modconv = self.fused_modconv_default
+        if fused_modconv == 'inference_only':
+            fused_modconv = (not self.training)
+
+        # Input.
+        if self.in_channels == 0:
+            x = self.const.to(dtype=dtype, memory_format=memory_format)
+            x = x.unsqueeze(0).repeat([ws.shape[0], 1, 1, 1])
+        else:
+            misc.assert_shape(x, [None, self.in_channels, self.resolution, self.resolution])
+            x = x.to(dtype=dtype, memory_format=memory_format)
+
+        # Main layers.
+        if self.in_channels == 0:
+            x = self.conv1(x, next(w_iter), fused_modconv=fused_modconv, **layer_kwargs)
+        elif self.architecture == 'resnet':
+            y = self.skip(x, gain=np.sqrt(0.5))
+            x = self.conv0(x, next(w_iter), fused_modconv=fused_modconv, **layer_kwargs)
+            x = self.conv1(x, next(w_iter), fused_modconv=fused_modconv, gain=np.sqrt(0.5), **layer_kwargs)
+            x = y.add_(x)
+        else:
+            x = self.conv0(x, next(w_iter), fused_modconv=fused_modconv, **layer_kwargs)
+            x = self.conv1(x, next(w_iter), fused_modconv=fused_modconv, **layer_kwargs)
+
+        # ToRGB.
+        # if img is not None:
+            # misc.assert_shape(img, [None, self.img_channels, self.resolution // 2, self.resolution // 2])
+            # img = upfirdn2d.upsample2d(img, self.resample_filter)
+        if self.is_last or self.architecture == 'skip':
+            y = self.torgb(x, next(w_iter), fused_modconv=fused_modconv)
+            y = y.to(dtype=torch.float32, memory_format=torch.contiguous_format)
+            img = img.add_(y) if img is not None else y
+
+        assert x.dtype == dtype
+        assert img is None or img.dtype == torch.float32
+        return x, img
+
+    def extra_repr(self):
+        return f'resolution={self.resolution:d}, architecture={self.architecture:s}'
+
+
+#----------------------------------------------------------------------------
+# for 512x512 generation
+class ResBlock2d(nn.Module):
+    """
+    Res block, preserve spatial resolution.
+    """
+
+    def __init__(self, in_features, kernel_size, padding):
+        super(ResBlock2d, self).__init__()
+        self.conv1 = nn.Conv2d(in_channels=in_features, out_channels=in_features, kernel_size=kernel_size,
+                               padding=padding)
+        self.conv2 = nn.Conv2d(in_channels=in_features, out_channels=in_features, kernel_size=kernel_size,
+                               padding=padding)
+        self.act = nn.ReLU(inplace=False)
+        # self.act = nn.LeakyReLU(inplace=False) # run3
+        # self.norm1 = nn.BatchNorm2d(in_features, affine=True)
+        # self.norm2 = nn.BatchNorm2d(in_features, affine=True)
+
+        
+    def forward(self, x):
+        out = self.conv1(x)
+        out = self.act(out)
+        out = self.conv2(out)
+        out = self.act(out)
+        out = out + x
+        return out
+
+    # def forward(self, x):
+    #     out = self.norm1(x)
+    #     out = F.relu(out)
+    #     out = self.conv1(out)
+    #     out = self.norm2(out)
+    #     out = F.relu(out)
+    #     out = self.conv2(out)
+    #     out = x + out
+    #     return out
+
+
+class LargeSynthesisBlock0(nn.Module):
+    def __init__(self, channels, use_fp16, **block_kwargs):
+        super().__init__()
+        self.block = SynthesisBlock(channels, 256, w_dim=512, resolution=256,
+                img_channels=3, is_last=False, use_fp16=use_fp16, conv_clamp=(256 if use_fp16 else None), **block_kwargs)
+        self.resblocks = nn.Sequential(*[
+            ResBlock2d(256, kernel_size=3, padding=1) for _ in range(hparams['resblocks_in_large_sr'])
+        ])
+        self.to_rgb = nn.Conv2d(256, 3, kernel_size=1)
+
+    def forward(self, x, rgb, ws, **block_kwargs):
+        x, rgb = self.block(x, rgb, ws, **block_kwargs)
+        x = self.resblocks(x)
+        rgb = rgb + self.to_rgb(x)
+        return x, rgb
+
+class LargeSynthesisBlock1(nn.Module):
+    def __init__(self, use_fp16, **block_kwargs):
+        super().__init__()
+        self.block = SynthesisBlock(256, 128, w_dim=512, resolution=512,
+                img_channels=3, is_last=True, use_fp16=use_fp16, conv_clamp=(256 if use_fp16 else None), **block_kwargs)
+        self.resblocks = nn.Sequential(*[
+            ResBlock2d(128, kernel_size=3, padding=1) for _ in range(hparams['resblocks_in_large_sr'])
+        ])
+        self.to_rgb = nn.Conv2d(128, 3, kernel_size=1)
+
+    def forward(self, x, rgb, ws, **block_kwargs):
+        x, rgb = self.block(x, rgb, ws, **block_kwargs)
+        x = self.resblocks(x)
+        rgb = rgb + self.to_rgb(x)
+        return x, rgb
+    
+class SuperresolutionHybrid8XDC(torch.nn.Module):
+    def __init__(self, channels, img_resolution, sr_num_fp16_res, sr_antialias, large_sr=False, **block_kwargs):
+        super().__init__()
+        assert img_resolution == 512
+
+        use_fp16 = sr_num_fp16_res > 0
+        self.input_resolution = 128
+        self.sr_antialias = sr_antialias
+        if large_sr is True:
+            self.block0 = LargeSynthesisBlock0(channels, use_fp16=sr_num_fp16_res > 0, **block_kwargs)
+            self.block1 = LargeSynthesisBlock1(use_fp16=sr_num_fp16_res > 0, **block_kwargs)
+        else:
+            self.block0 = SynthesisBlock(channels, 256, w_dim=512, resolution=256,
+                    img_channels=3, is_last=False, use_fp16=use_fp16, conv_clamp=(256 if use_fp16 else None), **block_kwargs)
+            self.block1 = SynthesisBlock(256, 128, w_dim=512, resolution=512,
+                    img_channels=3, is_last=True, use_fp16=use_fp16, conv_clamp=(256 if use_fp16 else None), **block_kwargs)
+
+    def forward(self, rgb, x, ws, **block_kwargs):
+        ws = ws[:, -1:, :].repeat(1, 3, 1)
+
+        if x.shape[-1] != self.input_resolution:
+            x = torch.nn.functional.interpolate(x, size=(self.input_resolution, self.input_resolution),
+                                                  mode='bilinear', align_corners=False, antialias=self.sr_antialias)
+            rgb = torch.nn.functional.interpolate(rgb, size=(self.input_resolution, self.input_resolution),
+                                                  mode='bilinear', align_corners=False, antialias=self.sr_antialias)
+
+        x, rgb = self.block0(x, rgb, ws, **block_kwargs)
+        x, rgb = self.block1(x, rgb, ws, **block_kwargs)
+        return rgb
+#----------------------------------------------------------------------------
\ No newline at end of file
diff --git a/modules/eg3ds/torch_utils/__init__.py b/modules/eg3ds/torch_utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..dfebd04f47e6f6b1b44984c14c23b57d56f72240
--- /dev/null
+++ b/modules/eg3ds/torch_utils/__init__.py
@@ -0,0 +1,11 @@
+# SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: LicenseRef-NvidiaProprietary
+#
+# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+# property and proprietary rights in and to this material, related
+# documentation and any modifications thereto. Any use, reproduction,
+# disclosure or distribution of this material and related documentation
+# without an express license agreement from NVIDIA CORPORATION or
+# its affiliates is strictly prohibited.
+
+# empty
diff --git a/modules/eg3ds/torch_utils/custom_ops.py b/modules/eg3ds/torch_utils/custom_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..ed2524f47ab3d5b8750cfb868cc14012f424acc8
--- /dev/null
+++ b/modules/eg3ds/torch_utils/custom_ops.py
@@ -0,0 +1,159 @@
+# SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: LicenseRef-NvidiaProprietary
+#
+# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+# property and proprietary rights in and to this material, related
+# documentation and any modifications thereto. Any use, reproduction,
+# disclosure or distribution of this material and related documentation
+# without an express license agreement from NVIDIA CORPORATION or
+# its affiliates is strictly prohibited.
+
+import glob
+import hashlib
+import importlib
+import os
+import re
+import shutil
+import uuid
+
+import torch
+import torch.utils.cpp_extension
+from torch.utils.file_baton import FileBaton
+
+#----------------------------------------------------------------------------
+# Global options.
+
+verbosity = 'brief' # Verbosity level: 'none', 'brief', 'full'
+
+#----------------------------------------------------------------------------
+# Internal helper funcs.
+
+def _find_compiler_bindir():
+    patterns = [
+        'C:/Program Files (x86)/Microsoft Visual Studio/*/Professional/VC/Tools/MSVC/*/bin/Hostx64/x64',
+        'C:/Program Files (x86)/Microsoft Visual Studio/*/BuildTools/VC/Tools/MSVC/*/bin/Hostx64/x64',
+        'C:/Program Files (x86)/Microsoft Visual Studio/*/Community/VC/Tools/MSVC/*/bin/Hostx64/x64',
+        'C:/Program Files (x86)/Microsoft Visual Studio */vc/bin',
+    ]
+    for pattern in patterns:
+        matches = sorted(glob.glob(pattern))
+        if len(matches):
+            return matches[-1]
+    return None
+
+#----------------------------------------------------------------------------
+
+def _get_mangled_gpu_name():
+    name = torch.cuda.get_device_name().lower()
+    out = []
+    for c in name:
+        if re.match('[a-z0-9_-]+', c):
+            out.append(c)
+        else:
+            out.append('-')
+    return ''.join(out)
+
+#----------------------------------------------------------------------------
+# Main entry point for compiling and loading C++/CUDA plugins.
+
+_cached_plugins = dict()
+
+def get_plugin(module_name, sources, headers=None, source_dir=None, **build_kwargs):
+    assert verbosity in ['none', 'brief', 'full']
+    if headers is None:
+        headers = []
+    if source_dir is not None:
+        sources = [os.path.join(source_dir, fname) for fname in sources]
+        headers = [os.path.join(source_dir, fname) for fname in headers]
+
+    # Already cached?
+    if module_name in _cached_plugins:
+        return _cached_plugins[module_name]
+
+    # Print status.
+    if verbosity == 'full':
+        print(f'Setting up PyTorch plugin "{module_name}"...')
+    elif verbosity == 'brief':
+        print(f'Setting up PyTorch plugin "{module_name}"... ', end='', flush=True)
+    verbose_build = (verbosity == 'full')
+
+    # Compile and load.
+    try: # pylint: disable=too-many-nested-blocks
+        # Make sure we can find the necessary compiler binaries.
+        if os.name == 'nt' and os.system("where cl.exe >nul 2>nul") != 0:
+            compiler_bindir = _find_compiler_bindir()
+            if compiler_bindir is None:
+                raise RuntimeError(f'Could not find MSVC/GCC/CLANG installation on this computer. Check _find_compiler_bindir() in "{__file__}".')
+            os.environ['PATH'] += ';' + compiler_bindir
+
+        # Some containers set TORCH_CUDA_ARCH_LIST to a list that can either
+        # break the build or unnecessarily restrict what's available to nvcc.
+        # Unset it to let nvcc decide based on what's available on the
+        # machine.
+        os.environ['TORCH_CUDA_ARCH_LIST'] = ''
+
+        # Incremental build md5sum trickery.  Copies all the input source files
+        # into a cached build directory under a combined md5 digest of the input
+        # source files.  Copying is done only if the combined digest has changed.
+        # This keeps input file timestamps and filenames the same as in previous
+        # extension builds, allowing for fast incremental rebuilds.
+        #
+        # This optimization is done only in case all the source files reside in
+        # a single directory (just for simplicity) and if the TORCH_EXTENSIONS_DIR
+        # environment variable is set (we take this as a signal that the user
+        # actually cares about this.)
+        #
+        # EDIT: We now do it regardless of TORCH_EXTENSIOS_DIR, in order to work
+        # around the *.cu dependency bug in ninja config.
+        #
+        all_source_files = sorted(sources + headers)
+        all_source_dirs = set(os.path.dirname(fname) for fname in all_source_files)
+        if len(all_source_dirs) == 1: # and ('TORCH_EXTENSIONS_DIR' in os.environ):
+
+            # Compute combined hash digest for all source files.
+            hash_md5 = hashlib.md5()
+            for src in all_source_files:
+                with open(src, 'rb') as f:
+                    hash_md5.update(f.read())
+
+            # Select cached build directory name.
+            source_digest = hash_md5.hexdigest()
+            build_top_dir = torch.utils.cpp_extension._get_build_directory(module_name, verbose=verbose_build) # pylint: disable=protected-access
+            cached_build_dir = os.path.join(build_top_dir, f'{source_digest}-{_get_mangled_gpu_name()}')
+
+            if not os.path.isdir(cached_build_dir):
+                tmpdir = f'{build_top_dir}/srctmp-{uuid.uuid4().hex}'
+                os.makedirs(tmpdir)
+                for src in all_source_files:
+                    shutil.copyfile(src, os.path.join(tmpdir, os.path.basename(src)))
+                try:
+                    os.replace(tmpdir, cached_build_dir) # atomic
+                except OSError:
+                    # source directory already exists, delete tmpdir and its contents.
+                    shutil.rmtree(tmpdir)
+                    if not os.path.isdir(cached_build_dir): raise
+
+            # Compile.
+            cached_sources = [os.path.join(cached_build_dir, os.path.basename(fname)) for fname in sources]
+            torch.utils.cpp_extension.load(name=module_name, build_directory=cached_build_dir,
+                verbose=verbose_build, sources=cached_sources, **build_kwargs)
+        else:
+            torch.utils.cpp_extension.load(name=module_name, verbose=verbose_build, sources=sources, **build_kwargs)
+
+        # Load.
+        module = importlib.import_module(module_name)
+
+    except:
+        if verbosity == 'brief':
+            print('Failed!')
+        raise
+
+    # Print status and add to cache dict.
+    if verbosity == 'full':
+        print(f'Done setting up PyTorch plugin "{module_name}".')
+    elif verbosity == 'brief':
+        print('Done.')
+    _cached_plugins[module_name] = module
+    return module
+
+#----------------------------------------------------------------------------
diff --git a/modules/eg3ds/torch_utils/misc.py b/modules/eg3ds/torch_utils/misc.py
new file mode 100644
index 0000000000000000000000000000000000000000..e8d56d3d55fda85709ed63716485c7d55514bd1c
--- /dev/null
+++ b/modules/eg3ds/torch_utils/misc.py
@@ -0,0 +1,268 @@
+# SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: LicenseRef-NvidiaProprietary
+#
+# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+# property and proprietary rights in and to this material, related
+# documentation and any modifications thereto. Any use, reproduction,
+# disclosure or distribution of this material and related documentation
+# without an express license agreement from NVIDIA CORPORATION or
+# its affiliates is strictly prohibited.
+
+import re
+import contextlib
+import numpy as np
+import torch
+import warnings
+from modules.eg3ds import dnnlib
+
+#----------------------------------------------------------------------------
+# Cached construction of constant tensors. Avoids CPU=>GPU copy when the
+# same constant is used multiple times.
+
+_constant_cache = dict()
+
+def constant(value, shape=None, dtype=None, device=None, memory_format=None):
+    value = np.asarray(value)
+    if shape is not None:
+        shape = tuple(shape)
+    if dtype is None:
+        dtype = torch.get_default_dtype()
+    if device is None:
+        device = torch.device('cpu')
+    if memory_format is None:
+        memory_format = torch.contiguous_format
+
+    key = (value.shape, value.dtype, value.tobytes(), shape, dtype, device, memory_format)
+    tensor = _constant_cache.get(key, None)
+    if tensor is None:
+        tensor = torch.as_tensor(value.copy(), dtype=dtype, device=device)
+        if shape is not None:
+            tensor, _ = torch.broadcast_tensors(tensor, torch.empty(shape))
+        tensor = tensor.contiguous(memory_format=memory_format)
+        _constant_cache[key] = tensor
+    return tensor
+
+#----------------------------------------------------------------------------
+# Replace NaN/Inf with specified numerical values.
+
+try:
+    nan_to_num = torch.nan_to_num # 1.8.0a0
+except AttributeError:
+    def nan_to_num(input, nan=0.0, posinf=None, neginf=None, *, out=None): # pylint: disable=redefined-builtin
+        assert isinstance(input, torch.Tensor)
+        if posinf is None:
+            posinf = torch.finfo(input.dtype).max
+        if neginf is None:
+            neginf = torch.finfo(input.dtype).min
+        assert nan == 0
+        return torch.clamp(input.unsqueeze(0).nansum(0), min=neginf, max=posinf, out=out)
+
+#----------------------------------------------------------------------------
+# Symbolic assert.
+
+try:
+    symbolic_assert = torch._assert # 1.8.0a0 # pylint: disable=protected-access
+except AttributeError:
+    symbolic_assert = torch.Assert # 1.7.0
+
+#----------------------------------------------------------------------------
+# Context manager to temporarily suppress known warnings in torch.jit.trace().
+# Note: Cannot use catch_warnings because of https://bugs.python.org/issue29672
+
+@contextlib.contextmanager
+def suppress_tracer_warnings():
+    flt = ('ignore', None, torch.jit.TracerWarning, None, 0)
+    warnings.filters.insert(0, flt)
+    yield
+    warnings.filters.remove(flt)
+
+#----------------------------------------------------------------------------
+# Assert that the shape of a tensor matches the given list of integers.
+# None indicates that the size of a dimension is allowed to vary.
+# Performs symbolic assertion when used in torch.jit.trace().
+
+def assert_shape(tensor, ref_shape):
+    if tensor.ndim != len(ref_shape):
+        raise AssertionError(f'Wrong number of dimensions: got {tensor.ndim}, expected {len(ref_shape)}')
+    for idx, (size, ref_size) in enumerate(zip(tensor.shape, ref_shape)):
+        if ref_size is None:
+            pass
+        elif isinstance(ref_size, torch.Tensor):
+            with suppress_tracer_warnings(): # as_tensor results are registered as constants
+                symbolic_assert(torch.equal(torch.as_tensor(size), ref_size), f'Wrong size for dimension {idx}')
+        elif isinstance(size, torch.Tensor):
+            with suppress_tracer_warnings(): # as_tensor results are registered as constants
+                symbolic_assert(torch.equal(size, torch.as_tensor(ref_size)), f'Wrong size for dimension {idx}: expected {ref_size}')
+        elif size != ref_size:
+            raise AssertionError(f'Wrong size for dimension {idx}: got {size}, expected {ref_size}')
+
+#----------------------------------------------------------------------------
+# Function decorator that calls torch.autograd.profiler.record_function().
+
+def profiled_function(fn):
+    def decorator(*args, **kwargs):
+        with torch.autograd.profiler.record_function(fn.__name__):
+            return fn(*args, **kwargs)
+    decorator.__name__ = fn.__name__
+    return decorator
+
+#----------------------------------------------------------------------------
+# Sampler for torch.utils.data.DataLoader that loops over the dataset
+# indefinitely, shuffling items as it goes.
+
+class InfiniteSampler(torch.utils.data.Sampler):
+    def __init__(self, dataset, rank=0, num_replicas=1, shuffle=True, seed=0, window_size=0.5):
+        assert len(dataset) > 0
+        assert num_replicas > 0
+        assert 0 <= rank < num_replicas
+        assert 0 <= window_size <= 1
+        super().__init__(dataset)
+        self.dataset = dataset
+        self.rank = rank
+        self.num_replicas = num_replicas
+        self.shuffle = shuffle
+        self.seed = seed
+        self.window_size = window_size
+
+    def __iter__(self):
+        order = np.arange(len(self.dataset))
+        rnd = None
+        window = 0
+        if self.shuffle:
+            rnd = np.random.RandomState(self.seed)
+            rnd.shuffle(order)
+            window = int(np.rint(order.size * self.window_size))
+
+        idx = 0
+        while True:
+            i = idx % order.size
+            if idx % self.num_replicas == self.rank:
+                yield order[i]
+            if window >= 2:
+                j = (i - rnd.randint(window)) % order.size
+                order[i], order[j] = order[j], order[i]
+            idx += 1
+
+#----------------------------------------------------------------------------
+# Utilities for operating with torch.nn.Module parameters and buffers.
+
+def params_and_buffers(module):
+    assert isinstance(module, torch.nn.Module)
+    return list(module.parameters()) + list(module.buffers())
+
+def named_params_and_buffers(module):
+    assert isinstance(module, torch.nn.Module)
+    return list(module.named_parameters()) + list(module.named_buffers())
+
+def copy_params_and_buffers(src_module, dst_module, require_all=False):
+    assert isinstance(src_module, torch.nn.Module)
+    assert isinstance(dst_module, torch.nn.Module)
+    src_tensors = dict(named_params_and_buffers(src_module))
+    for name, tensor in named_params_and_buffers(dst_module):
+        assert (name in src_tensors) or (not require_all)
+        if name in src_tensors:
+            tensor.copy_(src_tensors[name].detach()).requires_grad_(tensor.requires_grad)
+
+#----------------------------------------------------------------------------
+# Context manager for easily enabling/disabling DistributedDataParallel
+# synchronization.
+
+@contextlib.contextmanager
+def ddp_sync(module, sync):
+    assert isinstance(module, torch.nn.Module)
+    if sync or not isinstance(module, torch.nn.parallel.DistributedDataParallel):
+        yield
+    else:
+        with module.no_sync():
+            yield
+
+#----------------------------------------------------------------------------
+# Check DistributedDataParallel consistency across processes.
+
+def check_ddp_consistency(module, ignore_regex=None):
+    assert isinstance(module, torch.nn.Module)
+    for name, tensor in named_params_and_buffers(module):
+        fullname = type(module).__name__ + '.' + name
+        if ignore_regex is not None and re.fullmatch(ignore_regex, fullname):
+            continue
+        tensor = tensor.detach()
+        if tensor.is_floating_point():
+            tensor = nan_to_num(tensor)
+        other = tensor.clone()
+        torch.distributed.broadcast(tensor=other, src=0)
+        assert (tensor == other).all(), fullname
+
+#----------------------------------------------------------------------------
+# Print summary table of module hierarchy.
+
+def print_module_summary(module, inputs, max_nesting=3, skip_redundant=True):
+    assert isinstance(module, torch.nn.Module)
+    assert not isinstance(module, torch.jit.ScriptModule)
+    assert isinstance(inputs, (tuple, list))
+
+    # Register hooks.
+    entries = []
+    nesting = [0]
+    def pre_hook(_mod, _inputs):
+        nesting[0] += 1
+    def post_hook(mod, _inputs, outputs):
+        nesting[0] -= 1
+        if nesting[0] <= max_nesting:
+            outputs = list(outputs) if isinstance(outputs, (tuple, list)) else [outputs]
+            outputs = [t for t in outputs if isinstance(t, torch.Tensor)]
+            entries.append(dnnlib.EasyDict(mod=mod, outputs=outputs))
+    hooks = [mod.register_forward_pre_hook(pre_hook) for mod in module.modules()]
+    hooks += [mod.register_forward_hook(post_hook) for mod in module.modules()]
+
+    # Run module.
+    outputs = module(*inputs)
+    for hook in hooks:
+        hook.remove()
+
+    # Identify unique outputs, parameters, and buffers.
+    tensors_seen = set()
+    for e in entries:
+        e.unique_params = [t for t in e.mod.parameters() if id(t) not in tensors_seen]
+        e.unique_buffers = [t for t in e.mod.buffers() if id(t) not in tensors_seen]
+        e.unique_outputs = [t for t in e.outputs if id(t) not in tensors_seen]
+        tensors_seen |= {id(t) for t in e.unique_params + e.unique_buffers + e.unique_outputs}
+
+    # Filter out redundant entries.
+    if skip_redundant:
+        entries = [e for e in entries if len(e.unique_params) or len(e.unique_buffers) or len(e.unique_outputs)]
+
+    # Construct table.
+    rows = [[type(module).__name__, 'Parameters', 'Buffers', 'Output shape', 'Datatype']]
+    rows += [['---'] * len(rows[0])]
+    param_total = 0
+    buffer_total = 0
+    submodule_names = {mod: name for name, mod in module.named_modules()}
+    for e in entries:
+        name = '<top-level>' if e.mod is module else submodule_names[e.mod]
+        param_size = sum(t.numel() for t in e.unique_params)
+        buffer_size = sum(t.numel() for t in e.unique_buffers)
+        output_shapes = [str(list(t.shape)) for t in e.outputs]
+        output_dtypes = [str(t.dtype).split('.')[-1] for t in e.outputs]
+        rows += [[
+            name + (':0' if len(e.outputs) >= 2 else ''),
+            str(param_size) if param_size else '-',
+            str(buffer_size) if buffer_size else '-',
+            (output_shapes + ['-'])[0],
+            (output_dtypes + ['-'])[0],
+        ]]
+        for idx in range(1, len(e.outputs)):
+            rows += [[name + f':{idx}', '-', '-', output_shapes[idx], output_dtypes[idx]]]
+        param_total += param_size
+        buffer_total += buffer_size
+    rows += [['---'] * len(rows[0])]
+    rows += [['Total', str(param_total), str(buffer_total), '-', '-']]
+
+    # Print table.
+    widths = [max(len(cell) for cell in column) for column in zip(*rows)]
+    print()
+    for row in rows:
+        print('  '.join(cell + ' ' * (width - len(cell)) for cell, width in zip(row, widths)))
+    print()
+    return outputs
+
+#----------------------------------------------------------------------------
diff --git a/modules/eg3ds/torch_utils/ops/__init__.py b/modules/eg3ds/torch_utils/ops/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..dfebd04f47e6f6b1b44984c14c23b57d56f72240
--- /dev/null
+++ b/modules/eg3ds/torch_utils/ops/__init__.py
@@ -0,0 +1,11 @@
+# SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: LicenseRef-NvidiaProprietary
+#
+# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+# property and proprietary rights in and to this material, related
+# documentation and any modifications thereto. Any use, reproduction,
+# disclosure or distribution of this material and related documentation
+# without an express license agreement from NVIDIA CORPORATION or
+# its affiliates is strictly prohibited.
+
+# empty
diff --git a/modules/eg3ds/torch_utils/ops/bias_act.cpp b/modules/eg3ds/torch_utils/ops/bias_act.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..ee6f6d0caaf4f84b94851d223e384344e1109cdc
--- /dev/null
+++ b/modules/eg3ds/torch_utils/ops/bias_act.cpp
@@ -0,0 +1,103 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: LicenseRef-NvidiaProprietary
+ *
+ * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+ * property and proprietary rights in and to this material, related
+ * documentation and any modifications thereto. Any use, reproduction,
+ * disclosure or distribution of this material and related documentation
+ * without an express license agreement from NVIDIA CORPORATION or
+ * its affiliates is strictly prohibited.
+ */
+
+#include <torch/extension.h>
+#include <ATen/cuda/CUDAContext.h>
+#include <c10/cuda/CUDAGuard.h>
+#include "bias_act.h"
+
+//------------------------------------------------------------------------
+
+static bool has_same_layout(torch::Tensor x, torch::Tensor y)
+{
+    if (x.dim() != y.dim())
+        return false;
+    for (int64_t i = 0; i < x.dim(); i++)
+    {
+        if (x.size(i) != y.size(i))
+            return false;
+        if (x.size(i) >= 2 && x.stride(i) != y.stride(i))
+            return false;
+    }
+    return true;
+}
+
+//------------------------------------------------------------------------
+
+static torch::Tensor bias_act(torch::Tensor x, torch::Tensor b, torch::Tensor xref, torch::Tensor yref, torch::Tensor dy, int grad, int dim, int act, float alpha, float gain, float clamp)
+{
+    // Validate arguments.
+    TORCH_CHECK(x.is_cuda(), "x must reside on CUDA device");
+    TORCH_CHECK(b.numel() == 0 || (b.dtype() == x.dtype() && b.device() == x.device()), "b must have the same dtype and device as x");
+    TORCH_CHECK(xref.numel() == 0 || (xref.sizes() == x.sizes() && xref.dtype() == x.dtype() && xref.device() == x.device()), "xref must have the same shape, dtype, and device as x");
+    TORCH_CHECK(yref.numel() == 0 || (yref.sizes() == x.sizes() && yref.dtype() == x.dtype() && yref.device() == x.device()), "yref must have the same shape, dtype, and device as x");
+    TORCH_CHECK(dy.numel() == 0 || (dy.sizes() == x.sizes() && dy.dtype() == x.dtype() && dy.device() == x.device()), "dy must have the same dtype and device as x");
+    TORCH_CHECK(x.numel() <= INT_MAX, "x is too large");
+    TORCH_CHECK(b.dim() == 1, "b must have rank 1");
+    TORCH_CHECK(b.numel() == 0 || (dim >= 0 && dim < x.dim()), "dim is out of bounds");
+    TORCH_CHECK(b.numel() == 0 || b.numel() == x.size(dim), "b has wrong number of elements");
+    TORCH_CHECK(grad >= 0, "grad must be non-negative");
+
+    // Validate layout.
+    TORCH_CHECK(x.is_non_overlapping_and_dense(), "x must be non-overlapping and dense");
+    TORCH_CHECK(b.is_contiguous(), "b must be contiguous");
+    TORCH_CHECK(xref.numel() == 0 || has_same_layout(xref, x), "xref must have the same layout as x");
+    TORCH_CHECK(yref.numel() == 0 || has_same_layout(yref, x), "yref must have the same layout as x");
+    TORCH_CHECK(dy.numel() == 0 || has_same_layout(dy, x), "dy must have the same layout as x");
+
+    // Create output tensor.
+    const at::cuda::OptionalCUDAGuard device_guard(device_of(x));
+    torch::Tensor y = torch::empty_like(x);
+    TORCH_CHECK(has_same_layout(y, x), "y must have the same layout as x");
+
+    // Initialize CUDA kernel parameters.
+    bias_act_kernel_params p;
+    p.x     = x.data_ptr();
+    p.b     = (b.numel()) ? b.data_ptr() : NULL;
+    p.xref  = (xref.numel()) ? xref.data_ptr() : NULL;
+    p.yref  = (yref.numel()) ? yref.data_ptr() : NULL;
+    p.dy    = (dy.numel()) ? dy.data_ptr() : NULL;
+    p.y     = y.data_ptr();
+    p.grad  = grad;
+    p.act   = act;
+    p.alpha = alpha;
+    p.gain  = gain;
+    p.clamp = clamp;
+    p.sizeX = (int)x.numel();
+    p.sizeB = (int)b.numel();
+    p.stepB = (b.numel()) ? (int)x.stride(dim) : 1;
+
+    // Choose CUDA kernel.
+    void* kernel;
+    AT_DISPATCH_FLOATING_TYPES_AND_HALF(x.scalar_type(), "upfirdn2d_cuda", [&]
+    {
+        kernel = choose_bias_act_kernel<scalar_t>(p);
+    });
+    TORCH_CHECK(kernel, "no CUDA kernel found for the specified activation func");
+
+    // Launch CUDA kernel.
+    p.loopX = 4;
+    int blockSize = 4 * 32;
+    int gridSize = (p.sizeX - 1) / (p.loopX * blockSize) + 1;
+    void* args[] = {&p};
+    AT_CUDA_CHECK(cudaLaunchKernel(kernel, gridSize, blockSize, args, 0, at::cuda::getCurrentCUDAStream()));
+    return y;
+}
+
+//------------------------------------------------------------------------
+
+PYBIND11_MODULE(TORCH_EXTENSION_NAME, m)
+{
+    m.def("bias_act", &bias_act);
+}
+
+//------------------------------------------------------------------------
diff --git a/modules/eg3ds/torch_utils/ops/bias_act.cu b/modules/eg3ds/torch_utils/ops/bias_act.cu
new file mode 100644
index 0000000000000000000000000000000000000000..71ca3900deda41e62d80044f0e409875f4c794b5
--- /dev/null
+++ b/modules/eg3ds/torch_utils/ops/bias_act.cu
@@ -0,0 +1,177 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: LicenseRef-NvidiaProprietary
+ *
+ * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+ * property and proprietary rights in and to this material, related
+ * documentation and any modifications thereto. Any use, reproduction,
+ * disclosure or distribution of this material and related documentation
+ * without an express license agreement from NVIDIA CORPORATION or
+ * its affiliates is strictly prohibited.
+ */
+
+#include <c10/util/Half.h>
+#include "bias_act.h"
+
+//------------------------------------------------------------------------
+// Helpers.
+
+template <class T> struct InternalType;
+template <> struct InternalType<double>     { typedef double scalar_t; };
+template <> struct InternalType<float>      { typedef float  scalar_t; };
+template <> struct InternalType<c10::Half>  { typedef float  scalar_t; };
+
+//------------------------------------------------------------------------
+// CUDA kernel.
+
+template <class T, int A>
+__global__ void bias_act_kernel(bias_act_kernel_params p)
+{
+    typedef typename InternalType<T>::scalar_t scalar_t;
+    int G                 = p.grad;
+    scalar_t alpha        = (scalar_t)p.alpha;
+    scalar_t gain         = (scalar_t)p.gain;
+    scalar_t clamp        = (scalar_t)p.clamp;
+    scalar_t one          = (scalar_t)1;
+    scalar_t two          = (scalar_t)2;
+    scalar_t expRange     = (scalar_t)80;
+    scalar_t halfExpRange = (scalar_t)40;
+    scalar_t seluScale    = (scalar_t)1.0507009873554804934193349852946;
+    scalar_t seluAlpha    = (scalar_t)1.6732632423543772848170429916717;
+
+    // Loop over elements.
+    int xi = blockIdx.x * p.loopX * blockDim.x + threadIdx.x;
+    for (int loopIdx = 0; loopIdx < p.loopX && xi < p.sizeX; loopIdx++, xi += blockDim.x)
+    {
+        // Load.
+        scalar_t x = (scalar_t)((const T*)p.x)[xi];
+        scalar_t b = (p.b) ? (scalar_t)((const T*)p.b)[(xi / p.stepB) % p.sizeB] : 0;
+        scalar_t xref = (p.xref) ? (scalar_t)((const T*)p.xref)[xi] : 0;
+        scalar_t yref = (p.yref) ? (scalar_t)((const T*)p.yref)[xi] : 0;
+        scalar_t dy = (p.dy) ? (scalar_t)((const T*)p.dy)[xi] : one;
+        scalar_t yy = (gain != 0) ? yref / gain : 0;
+        scalar_t y = 0;
+
+        // Apply bias.
+        ((G == 0) ? x : xref) += b;
+
+        // linear
+        if (A == 1)
+        {
+            if (G == 0) y = x;
+            if (G == 1) y = x;
+        }
+
+        // relu
+        if (A == 2)
+        {
+            if (G == 0) y = (x > 0) ? x : 0;
+            if (G == 1) y = (yy > 0) ? x : 0;
+        }
+
+        // lrelu
+        if (A == 3)
+        {
+            if (G == 0) y = (x > 0) ? x : x * alpha;
+            if (G == 1) y = (yy > 0) ? x : x * alpha;
+        }
+
+        // tanh
+        if (A == 4)
+        {
+            if (G == 0) { scalar_t c = exp(x); scalar_t d = one / c; y = (x < -expRange) ? -one : (x > expRange) ? one : (c - d) / (c + d); }
+            if (G == 1) y = x * (one - yy * yy);
+            if (G == 2) y = x * (one - yy * yy) * (-two * yy);
+        }
+
+        // sigmoid
+        if (A == 5)
+        {
+            if (G == 0) y = (x < -expRange) ? 0 : one / (exp(-x) + one);
+            if (G == 1) y = x * yy * (one - yy);
+            if (G == 2) y = x * yy * (one - yy) * (one - two * yy);
+        }
+
+        // elu
+        if (A == 6)
+        {
+            if (G == 0) y = (x >= 0) ? x : exp(x) - one;
+            if (G == 1) y = (yy >= 0) ? x : x * (yy + one);
+            if (G == 2) y = (yy >= 0) ? 0 : x * (yy + one);
+        }
+
+        // selu
+        if (A == 7)
+        {
+            if (G == 0) y = (x >= 0) ? seluScale * x : (seluScale * seluAlpha) * (exp(x) - one);
+            if (G == 1) y = (yy >= 0) ? x * seluScale : x * (yy + seluScale * seluAlpha);
+            if (G == 2) y = (yy >= 0) ? 0 : x * (yy + seluScale * seluAlpha);
+        }
+
+        // softplus
+        if (A == 8)
+        {
+            if (G == 0) y = (x > expRange) ? x : log(exp(x) + one);
+            if (G == 1) y = x * (one - exp(-yy));
+            if (G == 2) { scalar_t c = exp(-yy); y = x * c * (one - c); }
+        }
+
+        // swish
+        if (A == 9)
+        {
+            if (G == 0)
+                y = (x < -expRange) ? 0 : x / (exp(-x) + one);
+            else
+            {
+                scalar_t c = exp(xref);
+                scalar_t d = c + one;
+                if (G == 1)
+                    y = (xref > halfExpRange) ? x : x * c * (xref + d) / (d * d);
+                else
+                    y = (xref > halfExpRange) ? 0 : x * c * (xref * (two - d) + two * d) / (d * d * d);
+                yref = (xref < -expRange) ? 0 : xref / (exp(-xref) + one) * gain;
+            }
+        }
+
+        // Apply gain.
+        y *= gain * dy;
+
+        // Clamp.
+        if (clamp >= 0)
+        {
+            if (G == 0)
+                y = (y > -clamp & y < clamp) ? y : (y >= 0) ? clamp : -clamp;
+            else
+                y = (yref > -clamp & yref < clamp) ? y : 0;
+        }
+
+        // Store.
+        ((T*)p.y)[xi] = (T)y;
+    }
+}
+
+//------------------------------------------------------------------------
+// CUDA kernel selection.
+
+template <class T> void* choose_bias_act_kernel(const bias_act_kernel_params& p)
+{
+    if (p.act == 1) return (void*)bias_act_kernel<T, 1>;
+    if (p.act == 2) return (void*)bias_act_kernel<T, 2>;
+    if (p.act == 3) return (void*)bias_act_kernel<T, 3>;
+    if (p.act == 4) return (void*)bias_act_kernel<T, 4>;
+    if (p.act == 5) return (void*)bias_act_kernel<T, 5>;
+    if (p.act == 6) return (void*)bias_act_kernel<T, 6>;
+    if (p.act == 7) return (void*)bias_act_kernel<T, 7>;
+    if (p.act == 8) return (void*)bias_act_kernel<T, 8>;
+    if (p.act == 9) return (void*)bias_act_kernel<T, 9>;
+    return NULL;
+}
+
+//------------------------------------------------------------------------
+// Template specializations.
+
+template void* choose_bias_act_kernel<double>       (const bias_act_kernel_params& p);
+template void* choose_bias_act_kernel<float>        (const bias_act_kernel_params& p);
+template void* choose_bias_act_kernel<c10::Half>    (const bias_act_kernel_params& p);
+
+//------------------------------------------------------------------------
diff --git a/modules/eg3ds/torch_utils/ops/bias_act.h b/modules/eg3ds/torch_utils/ops/bias_act.h
new file mode 100644
index 0000000000000000000000000000000000000000..8994bfb4e9cae790865348e08de5f685152d3344
--- /dev/null
+++ b/modules/eg3ds/torch_utils/ops/bias_act.h
@@ -0,0 +1,42 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: LicenseRef-NvidiaProprietary
+ *
+ * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+ * property and proprietary rights in and to this material, related
+ * documentation and any modifications thereto. Any use, reproduction,
+ * disclosure or distribution of this material and related documentation
+ * without an express license agreement from NVIDIA CORPORATION or
+ * its affiliates is strictly prohibited.
+ */
+
+//------------------------------------------------------------------------
+// CUDA kernel parameters.
+
+struct bias_act_kernel_params
+{
+    const void* x;      // [sizeX]
+    const void* b;      // [sizeB] or NULL
+    const void* xref;   // [sizeX] or NULL
+    const void* yref;   // [sizeX] or NULL
+    const void* dy;     // [sizeX] or NULL
+    void*       y;      // [sizeX]
+
+    int         grad;
+    int         act;
+    float       alpha;
+    float       gain;
+    float       clamp;
+
+    int         sizeX;
+    int         sizeB;
+    int         stepB;
+    int         loopX;
+};
+
+//------------------------------------------------------------------------
+// CUDA kernel selection.
+
+template <class T> void* choose_bias_act_kernel(const bias_act_kernel_params& p);
+
+//------------------------------------------------------------------------
diff --git a/modules/eg3ds/torch_utils/ops/bias_act.py b/modules/eg3ds/torch_utils/ops/bias_act.py
new file mode 100644
index 0000000000000000000000000000000000000000..3984639c54faae2233837175ccb210a63016426c
--- /dev/null
+++ b/modules/eg3ds/torch_utils/ops/bias_act.py
@@ -0,0 +1,211 @@
+# SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: LicenseRef-NvidiaProprietary
+#
+# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+# property and proprietary rights in and to this material, related
+# documentation and any modifications thereto. Any use, reproduction,
+# disclosure or distribution of this material and related documentation
+# without an express license agreement from NVIDIA CORPORATION or
+# its affiliates is strictly prohibited.
+
+"""Custom PyTorch ops for efficient bias and activation."""
+
+import os
+import numpy as np
+import torch
+from modules.eg3ds import dnnlib
+
+from .. import custom_ops
+from .. import misc
+
+#----------------------------------------------------------------------------
+
+activation_funcs = {
+    'linear':   dnnlib.EasyDict(func=lambda x, **_:         x,                                          def_alpha=0,    def_gain=1,             cuda_idx=1, ref='',  has_2nd_grad=False),
+    'relu':     dnnlib.EasyDict(func=lambda x, **_:         torch.nn.functional.relu(x),                def_alpha=0,    def_gain=np.sqrt(2),    cuda_idx=2, ref='y', has_2nd_grad=False),
+    'lrelu':    dnnlib.EasyDict(func=lambda x, alpha, **_:  torch.nn.functional.leaky_relu(x, alpha),   def_alpha=0.2,  def_gain=np.sqrt(2),    cuda_idx=3, ref='y', has_2nd_grad=False),
+    'tanh':     dnnlib.EasyDict(func=lambda x, **_:         torch.tanh(x),                              def_alpha=0,    def_gain=1,             cuda_idx=4, ref='y', has_2nd_grad=True),
+    'sigmoid':  dnnlib.EasyDict(func=lambda x, **_:         torch.sigmoid(x),                           def_alpha=0,    def_gain=1,             cuda_idx=5, ref='y', has_2nd_grad=True),
+    'elu':      dnnlib.EasyDict(func=lambda x, **_:         torch.nn.functional.elu(x),                 def_alpha=0,    def_gain=1,             cuda_idx=6, ref='y', has_2nd_grad=True),
+    'selu':     dnnlib.EasyDict(func=lambda x, **_:         torch.nn.functional.selu(x),                def_alpha=0,    def_gain=1,             cuda_idx=7, ref='y', has_2nd_grad=True),
+    'softplus': dnnlib.EasyDict(func=lambda x, **_:         torch.nn.functional.softplus(x),            def_alpha=0,    def_gain=1,             cuda_idx=8, ref='y', has_2nd_grad=True),
+    'swish':    dnnlib.EasyDict(func=lambda x, **_:         torch.sigmoid(x) * x,                       def_alpha=0,    def_gain=np.sqrt(2),    cuda_idx=9, ref='x', has_2nd_grad=True),
+}
+
+#----------------------------------------------------------------------------
+
+_plugin = None
+_null_tensor = torch.empty([0])
+
+def _init():
+    global _plugin
+    if _plugin is None:
+        _plugin = custom_ops.get_plugin(
+            module_name='bias_act_plugin',
+            sources=['bias_act.cpp', 'bias_act.cu'],
+            headers=['bias_act.h'],
+            source_dir=os.path.dirname(__file__),
+            extra_cuda_cflags=['--use_fast_math'],
+        )
+    return True
+
+#----------------------------------------------------------------------------
+
+def bias_act(x, b=None, dim=1, act='linear', alpha=None, gain=None, clamp=None, impl='cuda'):
+    r"""Fused bias and activation function.
+
+    Adds bias `b` to activation tensor `x`, evaluates activation function `act`,
+    and scales the result by `gain`. Each of the steps is optional. In most cases,
+    the fused op is considerably more efficient than performing the same calculation
+    using standard PyTorch ops. It supports first and second order gradients,
+    but not third order gradients.
+
+    Args:
+        x:      Input activation tensor. Can be of any shape.
+        b:      Bias vector, or `None` to disable. Must be a 1D tensor of the same type
+                as `x`. The shape must be known, and it must match the dimension of `x`
+                corresponding to `dim`.
+        dim:    The dimension in `x` corresponding to the elements of `b`.
+                The value of `dim` is ignored if `b` is not specified.
+        act:    Name of the activation function to evaluate, or `"linear"` to disable.
+                Can be e.g. `"relu"`, `"lrelu"`, `"tanh"`, `"sigmoid"`, `"swish"`, etc.
+                See `activation_funcs` for a full list. `None` is not allowed.
+        alpha:  Shape parameter for the activation function, or `None` to use the default.
+        gain:   Scaling factor for the output tensor, or `None` to use default.
+                See `activation_funcs` for the default scaling of each activation function.
+                If unsure, consider specifying 1.
+        clamp:  Clamp the output values to `[-clamp, +clamp]`, or `None` to disable
+                the clamping (default).
+        impl:   Name of the implementation to use. Can be `"ref"` or `"cuda"` (default).
+
+    Returns:
+        Tensor of the same shape and datatype as `x`.
+    """
+    assert isinstance(x, torch.Tensor)
+    assert impl in ['ref', 'cuda']
+    if impl == 'cuda' and x.device.type == 'cuda' and _init():
+        return _bias_act_cuda(dim=dim, act=act, alpha=alpha, gain=gain, clamp=clamp).apply(x, b)
+    return _bias_act_ref(x=x, b=b, dim=dim, act=act, alpha=alpha, gain=gain, clamp=clamp)
+
+#----------------------------------------------------------------------------
+
+@misc.profiled_function
+def _bias_act_ref(x, b=None, dim=1, act='linear', alpha=None, gain=None, clamp=None):
+    """Slow reference implementation of `bias_act()` using standard TensorFlow ops.
+    """
+    assert isinstance(x, torch.Tensor)
+    assert clamp is None or clamp >= 0
+    spec = activation_funcs[act]
+    alpha = float(alpha if alpha is not None else spec.def_alpha)
+    gain = float(gain if gain is not None else spec.def_gain)
+    clamp = float(clamp if clamp is not None else -1)
+
+    # Add bias.
+    if b is not None:
+        assert isinstance(b, torch.Tensor) and b.ndim == 1
+        assert 0 <= dim < x.ndim
+        assert b.shape[0] == x.shape[dim]
+        x = x + b.reshape([-1 if i == dim else 1 for i in range(x.ndim)])
+
+    # Evaluate activation function.
+    alpha = float(alpha)
+    x = spec.func(x, alpha=alpha)
+
+    # Scale by gain.
+    gain = float(gain)
+    if gain != 1:
+        x = x * gain
+
+    # Clamp.
+    if clamp >= 0:
+        x = x.clamp(-clamp, clamp) # pylint: disable=invalid-unary-operand-type
+    return x
+
+#----------------------------------------------------------------------------
+
+_bias_act_cuda_cache = dict()
+
+def _bias_act_cuda(dim=1, act='linear', alpha=None, gain=None, clamp=None):
+    """Fast CUDA implementation of `bias_act()` using custom ops.
+    """
+    # Parse arguments.
+    assert clamp is None or clamp >= 0
+    spec = activation_funcs[act]
+    alpha = float(alpha if alpha is not None else spec.def_alpha)
+    gain = float(gain if gain is not None else spec.def_gain)
+    clamp = float(clamp if clamp is not None else -1)
+
+    # Lookup from cache.
+    key = (dim, act, alpha, gain, clamp)
+    if key in _bias_act_cuda_cache:
+        return _bias_act_cuda_cache[key]
+
+    # Forward op.
+    class BiasActCuda(torch.autograd.Function):
+        @staticmethod
+        def forward(ctx, x, b): # pylint: disable=arguments-differ
+            ctx.memory_format = torch.channels_last if x.ndim > 2 and x.stride(1) == 1 else torch.contiguous_format
+            x = x.contiguous(memory_format=ctx.memory_format)
+            b = b.contiguous() if b is not None else _null_tensor
+            y = x
+            if act != 'linear' or gain != 1 or clamp >= 0 or b is not _null_tensor:
+                y = _plugin.bias_act(x, b, _null_tensor, _null_tensor, _null_tensor, 0, dim, spec.cuda_idx, alpha, gain, clamp)
+            ctx.save_for_backward(
+                x if 'x' in spec.ref or spec.has_2nd_grad else _null_tensor,
+                b if 'x' in spec.ref or spec.has_2nd_grad else _null_tensor,
+                y if 'y' in spec.ref else _null_tensor)
+            return y
+
+        @staticmethod
+        def backward(ctx, dy): # pylint: disable=arguments-differ
+            dy = dy.contiguous(memory_format=ctx.memory_format)
+            x, b, y = ctx.saved_tensors
+            dx = None
+            db = None
+
+            if ctx.needs_input_grad[0] or ctx.needs_input_grad[1]:
+                dx = dy
+                if act != 'linear' or gain != 1 or clamp >= 0:
+                    dx = BiasActCudaGrad.apply(dy, x, b, y)
+
+            if ctx.needs_input_grad[1]:
+                db = dx.sum([i for i in range(dx.ndim) if i != dim])
+
+            return dx, db
+
+    # Backward op.
+    class BiasActCudaGrad(torch.autograd.Function):
+        @staticmethod
+        def forward(ctx, dy, x, b, y): # pylint: disable=arguments-differ
+            ctx.memory_format = torch.channels_last if dy.ndim > 2 and dy.stride(1) == 1 else torch.contiguous_format
+            dx = _plugin.bias_act(dy, b, x, y, _null_tensor, 1, dim, spec.cuda_idx, alpha, gain, clamp)
+            ctx.save_for_backward(
+                dy if spec.has_2nd_grad else _null_tensor,
+                x, b, y)
+            return dx
+
+        @staticmethod
+        def backward(ctx, d_dx): # pylint: disable=arguments-differ
+            d_dx = d_dx.contiguous(memory_format=ctx.memory_format)
+            dy, x, b, y = ctx.saved_tensors
+            d_dy = None
+            d_x = None
+            d_b = None
+            d_y = None
+
+            if ctx.needs_input_grad[0]:
+                d_dy = BiasActCudaGrad.apply(d_dx, x, b, y)
+
+            if spec.has_2nd_grad and (ctx.needs_input_grad[1] or ctx.needs_input_grad[2]):
+                d_x = _plugin.bias_act(d_dx, b, x, y, dy, 2, dim, spec.cuda_idx, alpha, gain, clamp)
+
+            if spec.has_2nd_grad and ctx.needs_input_grad[2]:
+                d_b = d_x.sum([i for i in range(d_x.ndim) if i != dim])
+
+            return d_dy, d_x, d_b, d_y
+
+    # Add to cache.
+    _bias_act_cuda_cache[key] = BiasActCuda
+    return BiasActCuda
+
+#----------------------------------------------------------------------------
diff --git a/modules/eg3ds/torch_utils/ops/conv2d_gradfix.py b/modules/eg3ds/torch_utils/ops/conv2d_gradfix.py
new file mode 100644
index 0000000000000000000000000000000000000000..9a177cc1c0b6eabf16908cf9afaa4387e7716b72
--- /dev/null
+++ b/modules/eg3ds/torch_utils/ops/conv2d_gradfix.py
@@ -0,0 +1,199 @@
+# SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: LicenseRef-NvidiaProprietary
+#
+# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+# property and proprietary rights in and to this material, related
+# documentation and any modifications thereto. Any use, reproduction,
+# disclosure or distribution of this material and related documentation
+# without an express license agreement from NVIDIA CORPORATION or
+# its affiliates is strictly prohibited.
+
+"""Custom replacement for `torch.nn.functional.conv2d` that supports
+arbitrarily high order gradients with zero performance penalty."""
+
+import contextlib
+import torch
+
+# pylint: disable=redefined-builtin
+# pylint: disable=arguments-differ
+# pylint: disable=protected-access
+
+#----------------------------------------------------------------------------
+
+enabled = False                     # Enable the custom op by setting this to true.
+weight_gradients_disabled = False   # Forcefully disable computation of gradients with respect to the weights.
+
+@contextlib.contextmanager
+def no_weight_gradients(disable=True):
+    global weight_gradients_disabled
+    old = weight_gradients_disabled
+    if disable:
+        weight_gradients_disabled = True
+    yield
+    weight_gradients_disabled = old
+
+#----------------------------------------------------------------------------
+
+def conv2d(input, weight, bias=None, stride=1, padding=0, dilation=1, groups=1):
+    if _should_use_custom_op(input):
+        return _conv2d_gradfix(transpose=False, weight_shape=weight.shape, stride=stride, padding=padding, output_padding=0, dilation=dilation, groups=groups).apply(input, weight, bias)
+    return torch.nn.functional.conv2d(input=input, weight=weight, bias=bias, stride=stride, padding=padding, dilation=dilation, groups=groups)
+
+def conv_transpose2d(input, weight, bias=None, stride=1, padding=0, output_padding=0, groups=1, dilation=1):
+    if _should_use_custom_op(input):
+        return _conv2d_gradfix(transpose=True, weight_shape=weight.shape, stride=stride, padding=padding, output_padding=output_padding, groups=groups, dilation=dilation).apply(input, weight, bias)
+    return torch.nn.functional.conv_transpose2d(input=input, weight=weight, bias=bias, stride=stride, padding=padding, output_padding=output_padding, groups=groups, dilation=dilation)
+
+#----------------------------------------------------------------------------
+
+def _should_use_custom_op(input):
+    assert isinstance(input, torch.Tensor)
+    if (not enabled) or (not torch.backends.cudnn.enabled):
+        return False
+    if input.device.type != 'cuda':
+        return False
+    return True
+
+def _tuple_of_ints(xs, ndim):
+    xs = tuple(xs) if isinstance(xs, (tuple, list)) else (xs,) * ndim
+    assert len(xs) == ndim
+    assert all(isinstance(x, int) for x in xs)
+    return xs
+
+#----------------------------------------------------------------------------
+
+_conv2d_gradfix_cache = dict()
+_null_tensor = torch.empty([0])
+
+def _conv2d_gradfix(transpose, weight_shape, stride, padding, output_padding, dilation, groups):
+    # Parse arguments.
+    ndim = 2
+    weight_shape = tuple(weight_shape)
+    stride = _tuple_of_ints(stride, ndim)
+    padding = _tuple_of_ints(padding, ndim)
+    output_padding = _tuple_of_ints(output_padding, ndim)
+    dilation = _tuple_of_ints(dilation, ndim)
+
+    # Lookup from cache.
+    key = (transpose, weight_shape, stride, padding, output_padding, dilation, groups)
+    if key in _conv2d_gradfix_cache:
+        return _conv2d_gradfix_cache[key]
+
+    # Validate arguments.
+    assert groups >= 1
+    assert len(weight_shape) == ndim + 2
+    assert all(stride[i] >= 1 for i in range(ndim))
+    assert all(padding[i] >= 0 for i in range(ndim))
+    assert all(dilation[i] >= 0 for i in range(ndim))
+    if not transpose:
+        assert all(output_padding[i] == 0 for i in range(ndim))
+    else: # transpose
+        assert all(0 <= output_padding[i] < max(stride[i], dilation[i]) for i in range(ndim))
+
+    # Helpers.
+    common_kwargs = dict(stride=stride, padding=padding, dilation=dilation, groups=groups)
+    def calc_output_padding(input_shape, output_shape):
+        if transpose:
+            return [0, 0]
+        return [
+            input_shape[i + 2]
+            - (output_shape[i + 2] - 1) * stride[i]
+            - (1 - 2 * padding[i])
+            - dilation[i] * (weight_shape[i + 2] - 1)
+            for i in range(ndim)
+        ]
+
+    # Forward & backward.
+    class Conv2d(torch.autograd.Function):
+        @staticmethod
+        def forward(ctx, input, weight, bias):
+            assert weight.shape == weight_shape
+            ctx.save_for_backward(
+                input if weight.requires_grad else _null_tensor,
+                weight if input.requires_grad else _null_tensor,
+            )
+            ctx.input_shape = input.shape
+
+            # Simple 1x1 convolution => cuBLAS (only on Volta, not on Ampere).
+            if weight_shape[2:] == stride == dilation == (1, 1) and padding == (0, 0) and torch.cuda.get_device_capability(input.device) < (8, 0):
+                a = weight.reshape(groups, weight_shape[0] // groups, weight_shape[1])
+                b = input.reshape(input.shape[0], groups, input.shape[1] // groups, -1)
+                c = (a.transpose(1, 2) if transpose else a) @ b.permute(1, 2, 0, 3).flatten(2)
+                c = c.reshape(-1, input.shape[0], *input.shape[2:]).transpose(0, 1)
+                c = c if bias is None else c + bias.unsqueeze(0).unsqueeze(2).unsqueeze(3)
+                return c.contiguous(memory_format=(torch.channels_last if input.stride(1) == 1 else torch.contiguous_format))
+
+            # General case => cuDNN.
+            if transpose:
+                return torch.nn.functional.conv_transpose2d(input=input, weight=weight, bias=bias, output_padding=output_padding, **common_kwargs)
+            return torch.nn.functional.conv2d(input=input, weight=weight, bias=bias, **common_kwargs)
+
+        @staticmethod
+        def backward(ctx, grad_output):
+            input, weight = ctx.saved_tensors
+            input_shape = ctx.input_shape
+            grad_input = None
+            grad_weight = None
+            grad_bias = None
+
+            if ctx.needs_input_grad[0]:
+                p = calc_output_padding(input_shape=input_shape, output_shape=grad_output.shape)
+                op = _conv2d_gradfix(transpose=(not transpose), weight_shape=weight_shape, output_padding=p, **common_kwargs)
+                grad_input = op.apply(grad_output, weight, None)
+                assert grad_input.shape == input_shape
+
+            if ctx.needs_input_grad[1] and not weight_gradients_disabled:
+                grad_weight = Conv2dGradWeight.apply(grad_output, input, weight)
+                assert grad_weight.shape == weight_shape
+
+            if ctx.needs_input_grad[2]:
+                grad_bias = grad_output.sum([0, 2, 3])
+
+            return grad_input, grad_weight, grad_bias
+
+    # Gradient with respect to the weights.
+    class Conv2dGradWeight(torch.autograd.Function):
+        @staticmethod
+        def forward(ctx, grad_output, input, weight):
+            ctx.save_for_backward(
+                grad_output if input.requires_grad else _null_tensor,
+                input if grad_output.requires_grad else _null_tensor,
+            )
+            ctx.grad_output_shape = grad_output.shape
+            ctx.input_shape = input.shape
+
+            # Simple 1x1 convolution => cuBLAS (on both Volta and Ampere).
+            if weight_shape[2:] == stride == dilation == (1, 1) and padding == (0, 0):
+                a = grad_output.reshape(grad_output.shape[0], groups, grad_output.shape[1] // groups, -1).permute(1, 2, 0, 3).flatten(2)
+                b = input.reshape(input.shape[0], groups, input.shape[1] // groups, -1).permute(1, 2, 0, 3).flatten(2)
+                c = (b @ a.transpose(1, 2) if transpose else a @ b.transpose(1, 2)).reshape(weight_shape)
+                return c.contiguous(memory_format=(torch.channels_last if input.stride(1) == 1 else torch.contiguous_format))
+
+            # General case => cuDNN.
+            return torch.ops.aten.convolution_backward(grad_output=grad_output, input=input, weight=weight, bias_sizes=None, stride=stride, padding=padding, dilation=dilation, transposed=transpose, output_padding=output_padding, groups=groups, output_mask=[False, True, False])[1]
+
+
+        @staticmethod
+        def backward(ctx, grad2_grad_weight):
+            grad_output, input = ctx.saved_tensors
+            grad_output_shape = ctx.grad_output_shape
+            input_shape = ctx.input_shape
+            grad2_grad_output = None
+            grad2_input = None
+
+            if ctx.needs_input_grad[0]:
+                grad2_grad_output = Conv2d.apply(input, grad2_grad_weight, None)
+                assert grad2_grad_output.shape == grad_output_shape
+
+            if ctx.needs_input_grad[1]:
+                p = calc_output_padding(input_shape=input_shape, output_shape=grad_output_shape)
+                op = _conv2d_gradfix(transpose=(not transpose), weight_shape=weight_shape, output_padding=p, **common_kwargs)
+                grad2_input = op.apply(grad_output, grad2_grad_weight, None)
+                assert grad2_input.shape == input_shape
+
+            return grad2_grad_output, grad2_input
+
+    _conv2d_gradfix_cache[key] = Conv2d
+    return Conv2d
+
+#----------------------------------------------------------------------------
diff --git a/modules/eg3ds/torch_utils/ops/conv2d_resample.py b/modules/eg3ds/torch_utils/ops/conv2d_resample.py
new file mode 100644
index 0000000000000000000000000000000000000000..5daad2efadcd79513aaf8aee9ecb08a5ce04797e
--- /dev/null
+++ b/modules/eg3ds/torch_utils/ops/conv2d_resample.py
@@ -0,0 +1,147 @@
+# SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: LicenseRef-NvidiaProprietary
+#
+# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+# property and proprietary rights in and to this material, related
+# documentation and any modifications thereto. Any use, reproduction,
+# disclosure or distribution of this material and related documentation
+# without an express license agreement from NVIDIA CORPORATION or
+# its affiliates is strictly prohibited.
+
+"""2D convolution with optional up/downsampling."""
+
+import torch
+
+from .. import misc
+from . import conv2d_gradfix
+from . import upfirdn2d
+from .upfirdn2d import _parse_padding
+from .upfirdn2d import _get_filter_size
+
+#----------------------------------------------------------------------------
+
+def _get_weight_shape(w):
+    with misc.suppress_tracer_warnings(): # this value will be treated as a constant
+        shape = [int(sz) for sz in w.shape]
+    misc.assert_shape(w, shape)
+    return shape
+
+#----------------------------------------------------------------------------
+
+def _conv2d_wrapper(x, w, stride=1, padding=0, groups=1, transpose=False, flip_weight=True):
+    """Wrapper for the underlying `conv2d()` and `conv_transpose2d()` implementations.
+    """
+    _out_channels, _in_channels_per_group, kh, kw = _get_weight_shape(w)
+
+    # Flip weight if requested.
+    # Note: conv2d() actually performs correlation (flip_weight=True) not convolution (flip_weight=False).
+    if not flip_weight and (kw > 1 or kh > 1):
+        w = w.flip([2, 3])
+
+    # Execute using conv2d_gradfix.
+    op = conv2d_gradfix.conv_transpose2d if transpose else conv2d_gradfix.conv2d
+    return op(x, w, stride=stride, padding=padding, groups=groups)
+
+#----------------------------------------------------------------------------
+
+@misc.profiled_function
+def conv2d_resample(x, w, f=None, up=1, down=1, padding=0, groups=1, flip_weight=True, flip_filter=False):
+    r"""2D convolution with optional up/downsampling.
+
+    Padding is performed only once at the beginning, not between the operations.
+
+    Args:
+        x:              Input tensor of shape
+                        `[batch_size, in_channels, in_height, in_width]`.
+        w:              Weight tensor of shape
+                        `[out_channels, in_channels//groups, kernel_height, kernel_width]`.
+        f:              Low-pass filter for up/downsampling. Must be prepared beforehand by
+                        calling upfirdn2d.setup_filter(). None = identity (default).
+        up:             Integer upsampling factor (default: 1).
+        down:           Integer downsampling factor (default: 1).
+        padding:        Padding with respect to the upsampled image. Can be a single number
+                        or a list/tuple `[x, y]` or `[x_before, x_after, y_before, y_after]`
+                        (default: 0).
+        groups:         Split input channels into N groups (default: 1).
+        flip_weight:    False = convolution, True = correlation (default: True).
+        flip_filter:    False = convolution, True = correlation (default: False).
+
+    Returns:
+        Tensor of the shape `[batch_size, num_channels, out_height, out_width]`.
+    """
+    # Validate arguments.
+    assert isinstance(x, torch.Tensor) and (x.ndim == 4)
+    assert isinstance(w, torch.Tensor) and (w.ndim == 4) and (w.dtype == x.dtype)
+    if isinstance(f, torch.Tensor) and f.dtype == torch.float16:
+        f = f.float()
+    assert f is None or (isinstance(f, torch.Tensor) and f.ndim in [1, 2] and f.dtype == torch.float32)
+    assert isinstance(up, int) and (up >= 1)
+    assert isinstance(down, int) and (down >= 1)
+    assert isinstance(groups, int) and (groups >= 1)
+    out_channels, in_channels_per_group, kh, kw = _get_weight_shape(w)
+    fw, fh = _get_filter_size(f)
+    px0, px1, py0, py1 = _parse_padding(padding)
+
+    # Adjust padding to account for up/downsampling.
+    if up > 1:
+        px0 += (fw + up - 1) // 2
+        px1 += (fw - up) // 2
+        py0 += (fh + up - 1) // 2
+        py1 += (fh - up) // 2
+    if down > 1:
+        px0 += (fw - down + 1) // 2
+        px1 += (fw - down) // 2
+        py0 += (fh - down + 1) // 2
+        py1 += (fh - down) // 2
+
+    # Fast path: 1x1 convolution with downsampling only => downsample first, then convolve.
+    if kw == 1 and kh == 1 and (down > 1 and up == 1):
+        x = upfirdn2d.upfirdn2d(x=x, f=f, down=down, padding=[px0,px1,py0,py1], flip_filter=flip_filter)
+        x = _conv2d_wrapper(x=x, w=w, groups=groups, flip_weight=flip_weight)
+        return x
+
+    # Fast path: 1x1 convolution with upsampling only => convolve first, then upsample.
+    if kw == 1 and kh == 1 and (up > 1 and down == 1):
+        x = _conv2d_wrapper(x=x, w=w, groups=groups, flip_weight=flip_weight)
+        x = upfirdn2d.upfirdn2d(x=x, f=f, up=up, padding=[px0,px1,py0,py1], gain=up**2, flip_filter=flip_filter)
+        return x
+
+    # Fast path: downsampling only => use strided convolution.
+    if down > 1 and up == 1:
+        x = upfirdn2d.upfirdn2d(x=x, f=f, padding=[px0,px1,py0,py1], flip_filter=flip_filter)
+        x = _conv2d_wrapper(x=x, w=w, stride=down, groups=groups, flip_weight=flip_weight)
+        return x
+
+    # Fast path: upsampling with optional downsampling => use transpose strided convolution.
+    if up > 1:
+        if groups == 1:
+            w = w.transpose(0, 1)
+        else:
+            w = w.reshape(groups, out_channels // groups, in_channels_per_group, kh, kw)
+            w = w.transpose(1, 2)
+            w = w.reshape(groups * in_channels_per_group, out_channels // groups, kh, kw)
+        px0 -= kw - 1
+        px1 -= kw - up
+        py0 -= kh - 1
+        py1 -= kh - up
+        pxt = max(min(-px0, -px1), 0)
+        pyt = max(min(-py0, -py1), 0)
+        x = _conv2d_wrapper(x=x, w=w, stride=up, padding=[pyt,pxt], groups=groups, transpose=True, flip_weight=(not flip_weight))
+        x = upfirdn2d.upfirdn2d(x=x, f=f, padding=[px0+pxt,px1+pxt,py0+pyt,py1+pyt], gain=up**2, flip_filter=flip_filter)
+        if down > 1:
+            x = upfirdn2d.upfirdn2d(x=x, f=f, down=down, flip_filter=flip_filter)
+        return x
+
+    # Fast path: no up/downsampling, padding supported by the underlying implementation => use plain conv2d.
+    if up == 1 and down == 1:
+        if px0 == px1 and py0 == py1 and px0 >= 0 and py0 >= 0:
+            return _conv2d_wrapper(x=x, w=w, padding=[py0,px0], groups=groups, flip_weight=flip_weight)
+
+    # Fallback: Generic reference implementation.
+    x = upfirdn2d.upfirdn2d(x=x, f=(f if up > 1 else None), up=up, padding=[px0,px1,py0,py1], gain=up**2, flip_filter=flip_filter)
+    x = _conv2d_wrapper(x=x, w=w, groups=groups, flip_weight=flip_weight)
+    if down > 1:
+        x = upfirdn2d.upfirdn2d(x=x, f=f, down=down, flip_filter=flip_filter)
+    return x
+
+#----------------------------------------------------------------------------
diff --git a/modules/eg3ds/torch_utils/ops/filtered_lrelu.cpp b/modules/eg3ds/torch_utils/ops/filtered_lrelu.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..4f55466235a020b0f5e150350bfdcd8b2a1e579d
--- /dev/null
+++ b/modules/eg3ds/torch_utils/ops/filtered_lrelu.cpp
@@ -0,0 +1,304 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: LicenseRef-NvidiaProprietary
+ *
+ * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+ * property and proprietary rights in and to this material, related
+ * documentation and any modifications thereto. Any use, reproduction,
+ * disclosure or distribution of this material and related documentation
+ * without an express license agreement from NVIDIA CORPORATION or
+ * its affiliates is strictly prohibited.
+ */
+
+#include <torch/extension.h>
+#include <ATen/cuda/CUDAContext.h>
+#include <c10/cuda/CUDAGuard.h>
+#include "filtered_lrelu.h"
+
+//------------------------------------------------------------------------
+
+static std::tuple<torch::Tensor, torch::Tensor, int> filtered_lrelu(
+    torch::Tensor x, torch::Tensor fu, torch::Tensor fd, torch::Tensor b, torch::Tensor si,
+    int up, int down, int px0, int px1, int py0, int py1, int sx, int sy, float gain, float slope, float clamp, bool flip_filters, bool writeSigns)
+{
+    // Set CUDA device.
+    TORCH_CHECK(x.is_cuda(), "x must reside on CUDA device");
+    const at::cuda::OptionalCUDAGuard device_guard(device_of(x));
+
+    // Validate arguments.
+    TORCH_CHECK(fu.device() == x.device() && fd.device() == x.device() && b.device() == x.device(), "all input tensors must reside on the same device");
+    TORCH_CHECK(fu.dtype() == torch::kFloat && fd.dtype() == torch::kFloat, "fu and fd must be float32");
+    TORCH_CHECK(b.dtype() == x.dtype(), "x and b must have the same dtype");
+    TORCH_CHECK(x.dtype() == torch::kHalf || x.dtype() == torch::kFloat, "x and b must be float16 or float32");
+    TORCH_CHECK(x.dim() == 4, "x must be rank 4");
+    TORCH_CHECK(x.size(0) * x.size(1) <= INT_MAX && x.size(2) <= INT_MAX && x.size(3) <= INT_MAX, "x is too large");
+    TORCH_CHECK(x.numel() > 0, "x is empty");
+    TORCH_CHECK((fu.dim() == 1 || fu.dim() == 2) && (fd.dim() == 1 || fd.dim() == 2), "fu and fd must be rank 1 or 2");
+    TORCH_CHECK(fu.size(0) <= INT_MAX && fu.size(-1) <= INT_MAX, "fu is too large");
+    TORCH_CHECK(fd.size(0) <= INT_MAX && fd.size(-1) <= INT_MAX, "fd is too large");
+    TORCH_CHECK(fu.numel() > 0, "fu is empty");
+    TORCH_CHECK(fd.numel() > 0, "fd is empty");
+    TORCH_CHECK(b.dim() == 1 && b.size(0) == x.size(1), "b must be a vector with the same number of channels as x");
+    TORCH_CHECK(up >= 1 && down >= 1, "up and down must be at least 1");
+
+    // Figure out how much shared memory is available on the device.
+    int maxSharedBytes = 0;
+    AT_CUDA_CHECK(cudaDeviceGetAttribute(&maxSharedBytes, cudaDevAttrMaxSharedMemoryPerBlockOptin, x.device().index()));
+    int sharedKB = maxSharedBytes >> 10;
+
+    // Populate enough launch parameters to check if a CUDA kernel exists.
+    filtered_lrelu_kernel_params p;
+    p.up      = up;
+    p.down    = down;
+    p.fuShape = make_int2((int)fu.size(-1), fu.dim() == 2 ? (int)fu.size(0) : 0); // shape [n, 0] indicates separable filter.
+    p.fdShape = make_int2((int)fd.size(-1), fd.dim() == 2 ? (int)fd.size(0) : 0);
+    filtered_lrelu_kernel_spec test_spec = choose_filtered_lrelu_kernel<float, int32_t, false, false>(p, sharedKB);
+    if (!test_spec.exec)
+    {
+        // No kernel found - return empty tensors and indicate missing kernel with return code of -1.
+        return std::make_tuple(torch::Tensor(), torch::Tensor(), -1);
+    }
+
+    // Input/output element size.
+    int64_t sz = (x.dtype() == torch::kHalf) ? 2 : 4;
+
+    // Input sizes.
+    int64_t xw = (int)x.size(3);
+    int64_t xh = (int)x.size(2);
+    int64_t fut_w = (int)fu.size(-1) - 1;
+    int64_t fut_h = (int)fu.size(0)  - 1;
+    int64_t fdt_w = (int)fd.size(-1) - 1;
+    int64_t fdt_h = (int)fd.size(0)  - 1;
+
+    // Logical size of upsampled buffer.
+    int64_t cw = xw * up + (px0 + px1) - fut_w;
+    int64_t ch = xh * up + (py0 + py1) - fut_h;
+    TORCH_CHECK(cw > fdt_w && ch > fdt_h, "upsampled buffer must be at least the size of downsampling filter");
+    TORCH_CHECK(cw <= INT_MAX && ch <= INT_MAX, "upsampled buffer is too large");
+
+    // Compute output size and allocate.
+    int64_t yw = (cw - fdt_w + (down - 1)) / down;
+    int64_t yh = (ch - fdt_h + (down - 1)) / down;
+    TORCH_CHECK(yw > 0 && yh > 0, "output must be at least 1x1");
+    TORCH_CHECK(yw <= INT_MAX && yh <= INT_MAX, "output is too large");
+    torch::Tensor y = torch::empty({x.size(0), x.size(1), yh, yw}, x.options(), x.suggest_memory_format());
+
+    // Allocate sign tensor.
+    torch::Tensor so;
+    torch::Tensor s = si;
+    bool readSigns = !!s.numel();
+    int64_t sw_active = 0; // Active width of sign tensor.
+    if (writeSigns)
+    {
+        sw_active = yw * down - (down - 1) + fdt_w;     // Active width in elements.
+        int64_t sh = yh * down - (down - 1) + fdt_h;    // Height = active height.
+        int64_t sw = (sw_active + 15) & ~15;            // Width  = active width in elements, rounded up to multiple of 16.
+        TORCH_CHECK(sh <= INT_MAX && (sw >> 2) <= INT_MAX, "signs is too large");
+        s = so = torch::empty({x.size(0), x.size(1), sh, sw >> 2}, x.options().dtype(torch::kUInt8), at::MemoryFormat::Contiguous);
+    }
+    else if (readSigns)
+        sw_active = s.size(3) << 2;
+
+    // Validate sign tensor if in use.
+    if (readSigns || writeSigns)
+    {
+        TORCH_CHECK(s.is_contiguous(), "signs must be contiguous");
+        TORCH_CHECK(s.dtype() == torch::kUInt8, "signs must be uint8");
+        TORCH_CHECK(s.device() == x.device(), "signs must reside on the same device as x");
+        TORCH_CHECK(s.dim() == 4, "signs must be rank 4");
+        TORCH_CHECK(s.size(0) == x.size(0) && s.size(1) == x.size(1), "signs must have same batch & channels as x");
+        TORCH_CHECK(s.size(2) <= INT_MAX && s.size(3) <= INT_MAX, "signs is too large");
+    }
+
+    // Populate rest of CUDA kernel parameters.
+    p.x         = x.data_ptr();
+    p.y         = y.data_ptr();
+    p.b         = b.data_ptr();
+    p.s         = (readSigns || writeSigns) ? s.data_ptr<unsigned char>() : 0;
+    p.fu        = fu.data_ptr<float>();
+    p.fd        = fd.data_ptr<float>();
+    p.pad0      = make_int2(px0, py0);
+    p.gain      = gain;
+    p.slope     = slope;
+    p.clamp     = clamp;
+    p.flip      = (flip_filters) ? 1 : 0;
+    p.xShape    = make_int4((int)x.size(3), (int)x.size(2), (int)x.size(1), (int)x.size(0));
+    p.yShape    = make_int4((int)y.size(3), (int)y.size(2), (int)y.size(1), (int)y.size(0));
+    p.sShape    = (readSigns || writeSigns) ? make_int2((int)s.size(3), (int)s.size(2)) : make_int2(0, 0); // Width is in bytes. Contiguous.
+    p.sOfs      = make_int2(sx, sy);
+    p.swLimit   = (sw_active + 3) >> 2; // Rounded up to bytes.
+
+    // x, y, b strides are in bytes.
+    p.xStride   = make_longlong4(sz * x.stride(3), sz * x.stride(2), sz * x.stride(1), sz * x.stride(0));
+    p.yStride   = make_longlong4(sz * y.stride(3), sz * y.stride(2), sz * y.stride(1), sz * y.stride(0));
+    p.bStride   = sz * b.stride(0);
+
+    // fu, fd strides are in elements.
+    p.fuStride  = make_longlong3(fu.stride(-1), fu.dim() == 2 ? fu.stride(0) : 0, 0);
+    p.fdStride  = make_longlong3(fd.stride(-1), fd.dim() == 2 ? fd.stride(0) : 0, 0);
+
+    // Determine if indices don't fit in int32. Support negative strides although Torch currently never produces those.
+    bool index64b = false;
+    if (std::abs(p.bStride * x.size(1)) > INT_MAX) index64b = true;
+    if (std::min(x.size(0) * p.xStride.w, 0ll) + std::min(x.size(1) * p.xStride.z, 0ll) + std::min(x.size(2) * p.xStride.y, 0ll) + std::min(x.size(3) * p.xStride.x, 0ll) < -INT_MAX) index64b = true;
+    if (std::max(x.size(0) * p.xStride.w, 0ll) + std::max(x.size(1) * p.xStride.z, 0ll) + std::max(x.size(2) * p.xStride.y, 0ll) + std::max(x.size(3) * p.xStride.x, 0ll) >  INT_MAX) index64b = true;
+    if (std::min(y.size(0) * p.yStride.w, 0ll) + std::min(y.size(1) * p.yStride.z, 0ll) + std::min(y.size(2) * p.yStride.y, 0ll) + std::min(y.size(3) * p.yStride.x, 0ll) < -INT_MAX) index64b = true;
+    if (std::max(y.size(0) * p.yStride.w, 0ll) + std::max(y.size(1) * p.yStride.z, 0ll) + std::max(y.size(2) * p.yStride.y, 0ll) + std::max(y.size(3) * p.yStride.x, 0ll) >  INT_MAX) index64b = true;
+    if (s.numel() > INT_MAX) index64b = true;
+
+    // Choose CUDA kernel.
+    filtered_lrelu_kernel_spec spec = { 0 };
+    AT_DISPATCH_FLOATING_TYPES_AND_HALF(x.scalar_type(), "filtered_lrelu_cuda", [&]
+    {
+        if constexpr (sizeof(scalar_t) <= 4) // Exclude doubles. constexpr prevents template instantiation.
+        {
+            // Choose kernel based on index type, datatype and sign read/write modes.
+            if      (!index64b &&  writeSigns && !readSigns) spec = choose_filtered_lrelu_kernel<scalar_t, int32_t, true,  false>(p, sharedKB);
+            else if (!index64b && !writeSigns &&  readSigns) spec = choose_filtered_lrelu_kernel<scalar_t, int32_t, false, true >(p, sharedKB);
+            else if (!index64b && !writeSigns && !readSigns) spec = choose_filtered_lrelu_kernel<scalar_t, int32_t, false, false>(p, sharedKB);
+            else if ( index64b &&  writeSigns && !readSigns) spec = choose_filtered_lrelu_kernel<scalar_t, int64_t, true,  false>(p, sharedKB);
+            else if ( index64b && !writeSigns &&  readSigns) spec = choose_filtered_lrelu_kernel<scalar_t, int64_t, false, true >(p, sharedKB);
+            else if ( index64b && !writeSigns && !readSigns) spec = choose_filtered_lrelu_kernel<scalar_t, int64_t, false, false>(p, sharedKB);
+        }
+    });
+    TORCH_CHECK(spec.exec, "internal error - CUDA kernel not found") // This should not happen because we tested earlier that kernel exists.
+
+    // Launch CUDA kernel.
+    void* args[] = {&p};
+    int bx = spec.numWarps * 32;
+    int gx = (p.yShape.x - 1) / spec.tileOut.x + 1;
+    int gy = (p.yShape.y - 1) / spec.tileOut.y + 1;
+    int gz = p.yShape.z * p.yShape.w;
+
+    // Repeat multiple horizontal tiles in a CTA?
+    if (spec.xrep)
+    {
+        p.tilesXrep = spec.xrep;
+        p.tilesXdim = gx;
+
+        gx = (gx + p.tilesXrep - 1) / p.tilesXrep;
+        std::swap(gx, gy);
+    }
+    else
+    {
+        p.tilesXrep = 0;
+        p.tilesXdim = 0;
+    }
+
+    // Launch filter setup kernel.
+    AT_CUDA_CHECK(cudaLaunchKernel(spec.setup, 1, 1024, args, 0, at::cuda::getCurrentCUDAStream()));
+
+    // Copy kernels to constant memory.
+    if      ( writeSigns && !readSigns) AT_CUDA_CHECK((copy_filters<true,  false>(at::cuda::getCurrentCUDAStream())));
+    else if (!writeSigns &&  readSigns) AT_CUDA_CHECK((copy_filters<false, true >(at::cuda::getCurrentCUDAStream())));
+    else if (!writeSigns && !readSigns) AT_CUDA_CHECK((copy_filters<false, false>(at::cuda::getCurrentCUDAStream())));
+
+    // Set cache and shared memory configurations for main kernel.
+    AT_CUDA_CHECK(cudaFuncSetCacheConfig(spec.exec, cudaFuncCachePreferShared));
+    if (spec.dynamicSharedKB) // Need dynamically allocated shared memory?
+        AT_CUDA_CHECK(cudaFuncSetAttribute(spec.exec, cudaFuncAttributeMaxDynamicSharedMemorySize, spec.dynamicSharedKB << 10));
+    AT_CUDA_CHECK(cudaFuncSetSharedMemConfig(spec.exec, cudaSharedMemBankSizeFourByte));
+
+    // Launch main kernel.
+    const int maxSubGz = 65535; // CUDA maximum for block z dimension.
+    for (int zofs=0; zofs < gz; zofs += maxSubGz) // Do multiple launches if gz is too big.
+    {
+        p.blockZofs = zofs;
+        int subGz = std::min(maxSubGz, gz - zofs);
+        AT_CUDA_CHECK(cudaLaunchKernel(spec.exec, dim3(gx, gy, subGz), bx, args, spec.dynamicSharedKB << 10, at::cuda::getCurrentCUDAStream()));
+    }
+
+    // Done.
+    return std::make_tuple(y, so, 0);
+}
+
+//------------------------------------------------------------------------
+
+static torch::Tensor filtered_lrelu_act(torch::Tensor x, torch::Tensor si, int sx, int sy, float gain, float slope, float clamp, bool writeSigns)
+{
+    // Set CUDA device.
+    TORCH_CHECK(x.is_cuda(), "x must reside on CUDA device");
+    const at::cuda::OptionalCUDAGuard device_guard(device_of(x));
+
+    // Validate arguments.
+    TORCH_CHECK(x.dim() == 4, "x must be rank 4");
+    TORCH_CHECK(x.size(0) * x.size(1) <= INT_MAX && x.size(2) <= INT_MAX && x.size(3) <= INT_MAX, "x is too large");
+    TORCH_CHECK(x.numel() > 0, "x is empty");
+    TORCH_CHECK(x.dtype() == torch::kHalf || x.dtype() == torch::kFloat || x.dtype() == torch::kDouble, "x must be float16, float32 or float64");
+
+    // Output signs if we don't have sign input.
+    torch::Tensor so;
+    torch::Tensor s = si;
+    bool readSigns = !!s.numel();
+    if (writeSigns)
+    {
+        int64_t sw = x.size(3);
+        sw = (sw + 15) & ~15; // Round to a multiple of 16 for coalescing.
+        s = so = torch::empty({x.size(0), x.size(1), x.size(2), sw >> 2}, x.options().dtype(torch::kUInt8), at::MemoryFormat::Contiguous);
+    }
+
+    // Validate sign tensor if in use.
+    if (readSigns || writeSigns)
+    {
+        TORCH_CHECK(s.is_contiguous(), "signs must be contiguous");
+        TORCH_CHECK(s.dtype() == torch::kUInt8, "signs must be uint8");
+        TORCH_CHECK(s.device() == x.device(), "signs must reside on the same device as x");
+        TORCH_CHECK(s.dim() == 4, "signs must be rank 4");
+        TORCH_CHECK(s.size(0) == x.size(0) && s.size(1) == x.size(1), "signs must have same batch & channels as x");
+        TORCH_CHECK(s.size(2) <= INT_MAX && (s.size(3) << 2) <= INT_MAX, "signs tensor is too large");
+    }
+
+    // Initialize CUDA kernel parameters.
+    filtered_lrelu_act_kernel_params p;
+    p.x         = x.data_ptr();
+    p.s         = (readSigns || writeSigns) ? s.data_ptr<unsigned char>() : 0;
+    p.gain      = gain;
+    p.slope     = slope;
+    p.clamp     = clamp;
+    p.xShape    = make_int4((int)x.size(3), (int)x.size(2), (int)x.size(1), (int)x.size(0));
+    p.xStride   = make_longlong4(x.stride(3), x.stride(2), x.stride(1), x.stride(0));
+    p.sShape    = (readSigns || writeSigns) ? make_int2((int)s.size(3) << 2, (int)s.size(2)) : make_int2(0, 0); // Width is in elements. Contiguous.
+    p.sOfs      = make_int2(sx, sy);
+
+    // Choose CUDA kernel.
+    void* func = 0;
+    AT_DISPATCH_FLOATING_TYPES_AND_HALF(x.scalar_type(), "filtered_lrelu_act_cuda", [&]
+    {
+        if (writeSigns)
+            func = choose_filtered_lrelu_act_kernel<scalar_t, true, false>();
+        else if (readSigns)
+            func = choose_filtered_lrelu_act_kernel<scalar_t, false, true>();
+        else
+            func = choose_filtered_lrelu_act_kernel<scalar_t, false, false>();
+    });
+    TORCH_CHECK(func, "internal error - CUDA kernel not found");
+
+    // Launch CUDA kernel.
+    void* args[] = {&p};
+    int bx = 128; // 4 warps per block.
+
+    // Logical size of launch = writeSigns ? p.s : p.x
+    uint32_t gx = writeSigns ? p.sShape.x : p.xShape.x;
+    uint32_t gy = writeSigns ? p.sShape.y : p.xShape.y;
+    uint32_t gz = p.xShape.z * p.xShape.w; // Same as in p.sShape if signs are in use.
+    gx = (gx - 1) / bx + 1;
+
+    // Make sure grid y and z dimensions are within CUDA launch limits. Kernel loops internally to do the rest.
+    const uint32_t gmax = 65535;
+    gy = std::min(gy, gmax);
+    gz = std::min(gz, gmax);
+
+    // Launch.
+    AT_CUDA_CHECK(cudaLaunchKernel(func, dim3(gx, gy, gz), bx, args, 0, at::cuda::getCurrentCUDAStream()));
+    return so;
+}
+
+//------------------------------------------------------------------------
+
+PYBIND11_MODULE(TORCH_EXTENSION_NAME, m)
+{
+    m.def("filtered_lrelu",      &filtered_lrelu);      // The whole thing.
+    m.def("filtered_lrelu_act_", &filtered_lrelu_act);  // Activation and sign tensor handling only. Modifies data tensor in-place.
+}
+
+//------------------------------------------------------------------------
diff --git a/modules/eg3ds/torch_utils/ops/filtered_lrelu.cu b/modules/eg3ds/torch_utils/ops/filtered_lrelu.cu
new file mode 100644
index 0000000000000000000000000000000000000000..aaac95408365f023ffaa4cb89348d499d3b948f0
--- /dev/null
+++ b/modules/eg3ds/torch_utils/ops/filtered_lrelu.cu
@@ -0,0 +1,1288 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: LicenseRef-NvidiaProprietary
+ *
+ * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+ * property and proprietary rights in and to this material, related
+ * documentation and any modifications thereto. Any use, reproduction,
+ * disclosure or distribution of this material and related documentation
+ * without an express license agreement from NVIDIA CORPORATION or
+ * its affiliates is strictly prohibited.
+ */
+
+#include <c10/util/Half.h>
+#include "filtered_lrelu.h"
+#include <cstdint>
+
+//------------------------------------------------------------------------
+// Helpers.
+
+enum // Filter modes.
+{
+    MODE_SUSD = 0,  // Separable upsampling, separable downsampling.
+    MODE_FUSD = 1,  // Full upsampling, separable downsampling.
+    MODE_SUFD = 2,  // Separable upsampling, full downsampling.
+    MODE_FUFD = 3,  // Full upsampling, full downsampling.
+};
+
+template <class T> struct InternalType;
+template <> struct InternalType<double>
+{
+    typedef double scalar_t; typedef double2 vec2_t; typedef double4 vec4_t;
+    __device__ __forceinline__ static vec2_t zero_vec2(void) { return make_double2(0, 0); }
+    __device__ __forceinline__ static vec4_t zero_vec4(void) { return make_double4(0, 0, 0, 0); }
+    __device__ __forceinline__ static double clamp(double x, double c) { return fmin(fmax(x, -c), c); }
+};
+template <> struct InternalType<float>
+{
+    typedef float scalar_t; typedef float2 vec2_t; typedef float4 vec4_t;
+    __device__ __forceinline__ static vec2_t zero_vec2(void) { return make_float2(0, 0); }
+    __device__ __forceinline__ static vec4_t zero_vec4(void) { return make_float4(0, 0, 0, 0); }
+    __device__ __forceinline__ static float clamp(float x, float c) { return fminf(fmaxf(x, -c), c); }
+};
+template <> struct InternalType<c10::Half>
+{
+    typedef float scalar_t; typedef float2 vec2_t; typedef float4 vec4_t;
+    __device__ __forceinline__ static vec2_t zero_vec2(void) { return make_float2(0, 0); }
+    __device__ __forceinline__ static vec4_t zero_vec4(void) { return make_float4(0, 0, 0, 0); }
+    __device__ __forceinline__ static float clamp(float x, float c) { return fminf(fmaxf(x, -c), c); }
+};
+
+#define MIN(A, B)       ((A) < (B) ? (A) : (B))
+#define MAX(A, B)       ((A) > (B) ? (A) : (B))
+#define CEIL_DIV(A, B) (((B)==1) ? (A) : \
+                        ((B)==2) ? ((int)((A)+1) >> 1) : \
+                        ((B)==4) ? ((int)((A)+3) >> 2) : \
+                        (((A) + ((A) > 0 ? (B) - 1 : 0)) / (B)))
+
+// This works only up to blocks of size 256 x 256 and for all N that are powers of two.
+template <int N> __device__ __forceinline__ void fast_div_mod(int& x, int& y, unsigned int i)
+{
+    if ((N & (N-1)) && N <= 256)
+        y = (i * ((1<<24)/N + 1)) >> 24; // Assumes N <= 256, i < N*256.
+    else
+        y = i/N;
+
+    x = i - y*N;
+}
+
+// Type cast stride before reading it.
+template <class T> __device__ __forceinline__ T get_stride(const int64_t& x)
+{
+    return *reinterpret_cast<const T*>(&x);
+}
+
+//------------------------------------------------------------------------
+// Filters, setup kernel, copying function.
+
+#define MAX_FILTER_SIZE 32
+
+// Combined up/down filter buffers so that transfer can be done with one copy.
+__device__              float g_fbuf[2 * MAX_FILTER_SIZE * MAX_FILTER_SIZE]; // Filters in global memory, written by setup kernel.
+__device__ __constant__ float c_fbuf[2 * MAX_FILTER_SIZE * MAX_FILTER_SIZE]; // Filters in constant memory, read by main kernel.
+
+// Accessors to combined buffers to index up/down filters individually.
+#define c_fu (c_fbuf)
+#define c_fd (c_fbuf + MAX_FILTER_SIZE * MAX_FILTER_SIZE)
+#define g_fu (g_fbuf)
+#define g_fd (g_fbuf + MAX_FILTER_SIZE * MAX_FILTER_SIZE)
+
+// Set up filters into global memory buffer.
+static __global__ void setup_filters_kernel(filtered_lrelu_kernel_params p)
+{
+    for (int idx = threadIdx.x; idx < MAX_FILTER_SIZE * MAX_FILTER_SIZE; idx += blockDim.x)
+    {
+        int x, y;
+        fast_div_mod<MAX_FILTER_SIZE>(x, y, idx);
+
+        int fu_x = p.flip ? x : (p.fuShape.x - 1 - x);
+        int fu_y = p.flip ? y : (p.fuShape.y - 1 - y);
+        if (p.fuShape.y > 0)
+            g_fu[idx] = (x >= p.fuShape.x || y >= p.fuShape.y) ? 0.0f : p.fu[fu_x * p.fuStride.x + fu_y * p.fuStride.y];
+        else
+            g_fu[idx] = (x >= p.fuShape.x || y > 0) ? 0.0f : p.fu[fu_x * p.fuStride.x];
+
+        int fd_x = p.flip ? x : (p.fdShape.x - 1 - x);
+        int fd_y = p.flip ? y : (p.fdShape.y - 1 - y);
+        if (p.fdShape.y > 0)
+            g_fd[idx] = (x >= p.fdShape.x || y >= p.fdShape.y) ? 0.0f : p.fd[fd_x * p.fdStride.x + fd_y * p.fdStride.y];
+        else
+            g_fd[idx] = (x >= p.fdShape.x || y > 0) ? 0.0f : p.fd[fd_x * p.fdStride.x];
+    }
+}
+
+// Host function to copy filters written by setup kernel into constant buffer for main kernel.
+template <bool, bool> static cudaError_t copy_filters(cudaStream_t stream)
+{
+    void* src = 0;
+    cudaError_t err = cudaGetSymbolAddress(&src, g_fbuf);
+    if (err) return err;
+    return cudaMemcpyToSymbolAsync(c_fbuf, src, 2 * MAX_FILTER_SIZE * MAX_FILTER_SIZE * sizeof(float), 0, cudaMemcpyDeviceToDevice, stream);
+}
+
+//------------------------------------------------------------------------
+// Coordinate spaces:
+// - Relative to input tensor:      inX, inY, tileInX, tileInY
+// - Relative to input tile:        relInX, relInY, tileInW, tileInH
+// - Relative to upsampled tile:    relUpX, relUpY, tileUpW, tileUpH
+// - Relative to output tile:       relOutX, relOutY, tileOutW, tileOutH
+// - Relative to output tensor:     outX, outY, tileOutX, tileOutY
+//
+// Relationships between coordinate spaces:
+// - inX = tileInX + relInX
+// - inY = tileInY + relInY
+// - relUpX = relInX * up + phaseInX
+// - relUpY = relInY * up + phaseInY
+// - relUpX = relOutX * down
+// - relUpY = relOutY * down
+// - outX = tileOutX + relOutX
+// - outY = tileOutY + relOutY
+
+extern __shared__ char s_buf_raw[]; // When sharedKB <= 48, allocate shared memory statically inside the kernel, otherwise use the externally allocated shared memory buffer.
+
+template <class T, class index_t, int sharedKB, bool signWrite, bool signRead, int filterMode, int up, int fuSize, int down, int fdSize, int tileOutW, int tileOutH, int threadsPerBlock, bool enableXrep, bool enableWriteSkip>
+static __global__ void filtered_lrelu_kernel(filtered_lrelu_kernel_params p)
+{
+    // Check that we don't try to support non-existing filter modes.
+    static_assert(up   == 1 || up   == 2 || up   == 4, "only up=1, up=2, up=4 scales supported");
+    static_assert(down == 1 || down == 2 || down == 4, "only down=1, down=2, down=4 scales supported");
+    static_assert(fuSize >= up,   "upsampling filter size must be at least upsampling factor");
+    static_assert(fdSize >= down, "downsampling filter size must be at least downsampling factor");
+    static_assert(fuSize % up   == 0, "upsampling filter size must be divisible with upsampling factor");
+    static_assert(fdSize % down == 0, "downsampling filter size must be divisible with downsampling factor");
+    static_assert(fuSize <= MAX_FILTER_SIZE && fdSize <= MAX_FILTER_SIZE, "filter size greater than MAX_FILTER_SIZE");
+    static_assert(up   != 1 || (fuSize == 1 && (filterMode == MODE_FUFD || filterMode == MODE_FUSD)), "up=1 supported only for 1x1 full filters");
+    static_assert(down != 1 || (fdSize == 1 && (filterMode == MODE_FUFD || filterMode == MODE_SUFD)), "down=1 supported only for 1x1 full filters");
+    static_assert(!(up   == 4 && (filterMode == MODE_FUFD || filterMode == MODE_FUSD)), "full filters not supported for up=4");
+    static_assert(!(down == 4 && (filterMode == MODE_FUFD || filterMode == MODE_SUFD)), "full filters not supported for down=4");
+
+    // Static definitions.
+    typedef typename InternalType<T>::scalar_t scalar_t;
+    typedef typename InternalType<T>::vec2_t vec2_t;
+    typedef typename InternalType<T>::vec4_t vec4_t;
+    const int tileUpW    = (tileOutW * down + (fdSize - 1) - (down - 1) + 3) & ~3;  // Upsampled tile width, rounded up to multiple of 4.
+    const int tileUpH    = tileOutH * down + (fdSize - 1) - (down - 1);             // Upsampled tile height.
+    const int tileInW    = CEIL_DIV(tileUpW  + (fuSize - 1), up);                   // Input tile width.
+    const int tileInH    = CEIL_DIV(tileUpH  + (fuSize - 1), up);                   // Input tile height.
+    const int tileUpH_up = CEIL_DIV(tileUpH, up) * up;                              // Upsampled tile height rounded up to a multiple of up.
+    const int tileInH_up = CEIL_DIV(tileUpH_up + (fuSize - 1), up);                 // For allocations only, to avoid shared memory read overruns with up=2 and up=4.
+
+    // Merge 1x1 downsampling into last upsampling step for upf1 and ups2.
+    const bool downInline = (down == 1) && ((up == 1 && filterMode == MODE_FUFD) || (up == 2 && filterMode == MODE_SUFD));
+
+    // Sizes of logical buffers.
+    const int szIn    = tileInH_up * tileInW;
+    const int szUpX   = tileInH_up * tileUpW;
+    const int szUpXY  = downInline ? 0 : (tileUpH * tileUpW);
+    const int szDownX = tileUpH * tileOutW;
+
+    // Sizes for shared memory arrays.
+    const int s_buf0_size_base =
+        (filterMode == MODE_SUSD) ? MAX(szIn, szUpXY) :
+        (filterMode == MODE_FUSD) ? MAX(szIn, szDownX) :
+        (filterMode == MODE_SUFD) ? MAX(szIn, szUpXY) :
+        (filterMode == MODE_FUFD) ? szIn :
+        -1;
+    const int s_buf1_size_base =
+        (filterMode == MODE_SUSD) ? MAX(szUpX, szDownX) :
+        (filterMode == MODE_FUSD) ? szUpXY :
+        (filterMode == MODE_SUFD) ? szUpX  :
+        (filterMode == MODE_FUFD) ? szUpXY :
+        -1;
+
+    // Ensure U128 alignment.
+    const int s_buf0_size = (s_buf0_size_base + 3) & ~3;
+    const int s_buf1_size = (s_buf1_size_base + 3) & ~3;
+
+    // Check at compile time that we don't use too much shared memory.
+    static_assert((s_buf0_size + s_buf1_size) * sizeof(scalar_t) <= (sharedKB << 10), "shared memory overflow");
+
+    // Declare shared memory arrays.
+    scalar_t* s_buf0;
+    scalar_t* s_buf1;
+    if (sharedKB <= 48)
+    {
+        // Allocate shared memory arrays here.
+        __shared__ scalar_t s_buf0_st[(sharedKB > 48) ? (1<<24) : (s_buf0_size + s_buf1_size)]; // Prevent launching if this isn't optimized away when unused.
+        s_buf0 = s_buf0_st;
+        s_buf1 = s_buf0 + s_buf0_size;
+    }
+    else
+    {
+        // Use the dynamically allocated shared memory array.
+        s_buf0 = (scalar_t*)s_buf_raw;
+        s_buf1 = s_buf0 + s_buf0_size;
+    }
+
+    // Pointers to the buffers.
+    scalar_t* s_tileIn;       // Input tile:                      [relInX * tileInH + relInY]
+    scalar_t* s_tileUpX;      // After horizontal upsampling:     [relInY * tileUpW + relUpX]
+    scalar_t* s_tileUpXY;     // After upsampling:                [relUpY * tileUpW + relUpX]
+    scalar_t* s_tileDownX;    // After horizontal downsampling:   [relUpY * tileOutW + relOutX]
+    if (filterMode == MODE_SUSD)
+    {
+        s_tileIn    = s_buf0;
+        s_tileUpX   = s_buf1;
+        s_tileUpXY  = s_buf0;
+        s_tileDownX = s_buf1;
+    }
+    else if (filterMode == MODE_FUSD)
+    {
+        s_tileIn    = s_buf0;
+        s_tileUpXY  = s_buf1;
+        s_tileDownX = s_buf0;
+    }
+    else if (filterMode == MODE_SUFD)
+    {
+        s_tileIn    = s_buf0;
+        s_tileUpX   = s_buf1;
+        s_tileUpXY  = s_buf0;
+    }
+    else if (filterMode == MODE_FUFD)
+    {
+        s_tileIn    = s_buf0;
+        s_tileUpXY  = s_buf1;
+    }
+
+    // Allow large grids in z direction via per-launch offset.
+    int channelIdx = blockIdx.z + p.blockZofs;
+    int batchIdx = channelIdx / p.yShape.z;
+    channelIdx -= batchIdx * p.yShape.z;
+
+    // Offset to output feature map. In bytes.
+    index_t mapOfsOut = channelIdx * get_stride<index_t>(p.yStride.z) + batchIdx * get_stride<index_t>(p.yStride.w);
+
+    // Sign shift amount.
+    uint32_t signXo = ((threadIdx.x + p.sOfs.x) << 1) & 6;
+
+    // Inner tile loop.
+    #pragma unroll 1
+    for (int tileIdx = 0; !enableXrep || (tileIdx < MIN(p.tilesXrep, p.tilesXdim - p.tilesXrep * blockIdx.y)); tileIdx++)
+    {
+        // Locate output tile.
+        int tileX = enableXrep ? blockIdx.y * p.tilesXrep + tileIdx : blockIdx.x;
+        int tileOutX = tileX * tileOutW;
+        int tileOutY = (enableXrep ? blockIdx.x : blockIdx.y) * tileOutH;
+
+        // Locate input tile.
+        int tmpX = tileOutX * down - p.pad0.x;
+        int tmpY = tileOutY * down - p.pad0.y;
+        int tileInX = CEIL_DIV(tmpX, up);
+        int tileInY = CEIL_DIV(tmpY, up);
+        const int phaseInX = tileInX * up - tmpX;
+        const int phaseInY = tileInY * up - tmpY;
+
+        // Extra sync if input and output buffers are the same and we are not on first tile.
+        if (enableXrep && tileIdx > 0 && (filterMode == MODE_FUSD || (filterMode == MODE_SUFD && !downInline) || (filterMode == MODE_FUFD && downInline)))
+            __syncthreads();
+
+        // Load input tile & apply bias. Unrolled.
+        scalar_t b = (scalar_t)*(const T*)((const char*)p.b + (channelIdx * get_stride<index_t>(p.bStride)));
+        index_t mapOfsIn = channelIdx * get_stride<index_t>(p.xStride.z) + batchIdx * get_stride<index_t>(p.xStride.w);
+        int idx = threadIdx.x;
+        const int loopCountIN = CEIL_DIV(tileInW * tileInH, threadsPerBlock);
+        #pragma unroll
+        for (int loop = 0; loop < loopCountIN; loop++)
+        {
+            int relInX, relInY;
+            fast_div_mod<tileInW>(relInX, relInY, idx);
+            int inX = tileInX + relInX;
+            int inY = tileInY + relInY;
+            scalar_t v = 0;
+
+            if ((uint32_t)inX < p.xShape.x && (uint32_t)inY < p.xShape.y)
+                v = (scalar_t)*((const T*)((const char*)p.x + (inX * get_stride<index_t>(p.xStride.x) + inY * get_stride<index_t>(p.xStride.y) + mapOfsIn))) + b;
+
+            bool skip = (loop == loopCountIN-1) && (idx >= tileInW * tileInH);
+            if (!skip)
+                s_tileIn[idx] = v;
+
+            idx += threadsPerBlock;
+        }
+
+        if (filterMode == MODE_SUSD || filterMode == MODE_SUFD) // Separable upsampling filter.
+        {
+            // Horizontal upsampling.
+            __syncthreads();
+            if (up == 4)
+            {
+                for (int idx = threadIdx.x*up; idx < tileUpW * tileInH; idx += blockDim.x*up)
+                {
+                    int relUpX0, relInY;
+                    fast_div_mod<tileUpW>(relUpX0, relInY, idx);
+                    int relInX0 = relUpX0 / up;
+                    int src0 = relInX0 + tileInW * relInY;
+                    int dst = relInY * tileUpW + relUpX0;
+                    vec4_t v = InternalType<T>::zero_vec4();
+                    scalar_t a = s_tileIn[src0];
+                    if (phaseInX == 0)
+                    {
+                        #pragma unroll
+                        for (int step = 0; step < fuSize / up; step++)
+                        {
+                            v.x += a * (scalar_t)c_fu[step * up + 0];
+                            a = s_tileIn[src0 + step + 1];
+                            v.y += a * (scalar_t)c_fu[step * up + 3];
+                            v.z += a * (scalar_t)c_fu[step * up + 2];
+                            v.w += a * (scalar_t)c_fu[step * up + 1];
+                        }
+                    }
+                    else if (phaseInX == 1)
+                    {
+                        #pragma unroll
+                        for (int step = 0; step < fuSize / up; step++)
+                        {
+                            v.x += a * (scalar_t)c_fu[step * up + 1];
+                            v.y += a * (scalar_t)c_fu[step * up + 0];
+                            a = s_tileIn[src0 + step + 1];
+                            v.z += a * (scalar_t)c_fu[step * up + 3];
+                            v.w += a * (scalar_t)c_fu[step * up + 2];
+                        }
+                    }
+                    else if (phaseInX == 2)
+                    {
+                        #pragma unroll
+                        for (int step = 0; step < fuSize / up; step++)
+                        {
+                            v.x += a * (scalar_t)c_fu[step * up + 2];
+                            v.y += a * (scalar_t)c_fu[step * up + 1];
+                            v.z += a * (scalar_t)c_fu[step * up + 0];
+                            a = s_tileIn[src0 + step + 1];
+                            v.w += a * (scalar_t)c_fu[step * up + 3];
+                        }
+                    }
+                    else // (phaseInX == 3)
+                    {
+                        #pragma unroll
+                        for (int step = 0; step < fuSize / up; step++)
+                        {
+                            v.x += a * (scalar_t)c_fu[step * up + 3];
+                            v.y += a * (scalar_t)c_fu[step * up + 2];
+                            v.z += a * (scalar_t)c_fu[step * up + 1];
+                            v.w += a * (scalar_t)c_fu[step * up + 0];
+                            a = s_tileIn[src0 + step + 1];
+                        }
+                    }
+                    s_tileUpX[dst+0] = v.x;
+                    s_tileUpX[dst+1] = v.y;
+                    s_tileUpX[dst+2] = v.z;
+                    s_tileUpX[dst+3] = v.w;
+                }
+            }
+            else if (up == 2)
+            {
+                bool p0 = (phaseInX == 0);
+                for (int idx = threadIdx.x*up; idx < tileUpW * tileInH; idx += blockDim.x*up)
+                {
+                    int relUpX0, relInY;
+                    fast_div_mod<tileUpW>(relUpX0, relInY, idx);
+                    int relInX0 = relUpX0 / up;
+                    int src0 = relInX0 + tileInW * relInY;
+                    int dst = relInY * tileUpW + relUpX0;
+                    vec2_t v = InternalType<T>::zero_vec2();
+                    scalar_t a = s_tileIn[src0];
+                    if (p0) // (phaseInX == 0)
+                    {
+                        #pragma unroll
+                        for (int step = 0; step < fuSize / up; step++)
+                        {
+                            v.x += a * (scalar_t)c_fu[step * up + 0];
+                            a = s_tileIn[src0 + step + 1];
+                            v.y += a * (scalar_t)c_fu[step * up + 1];
+                        }
+                    }
+                    else // (phaseInX == 1)
+                    {
+                        #pragma unroll
+                        for (int step = 0; step < fuSize / up; step++)
+                        {
+                            v.x += a * (scalar_t)c_fu[step * up + 1];
+                            v.y += a * (scalar_t)c_fu[step * up + 0];
+                            a = s_tileIn[src0 + step + 1];
+                        }
+                    }
+                    s_tileUpX[dst+0] = v.x;
+                    s_tileUpX[dst+1] = v.y;
+                }
+            }
+
+            // Vertical upsampling & nonlinearity.
+
+            __syncthreads();
+            int groupMask = 15 << ((threadIdx.x & 31) & ~3);
+            int minY = tileOutY ? (tileOutY - tileOutH) * down + tileUpH : 0; // Skip already written signs.
+            int sShapeMaxY = MIN(p.sShape.y, tileOutY * down + tileUpH); // Avoid out-of-tile sign writes.
+            if (up == 4)
+            {
+                minY -= 3; // Adjust according to block height.
+                for (int idx = threadIdx.x; idx < tileUpW * tileUpH_up / up; idx += blockDim.x)
+                {
+                    int relUpX, relInY0;
+                    fast_div_mod<tileUpW>(relUpX, relInY0, idx);
+                    int relUpY0 = relInY0 * up;
+                    int src0 = relInY0 * tileUpW + relUpX;
+                    int dst = relUpY0 * tileUpW + relUpX;
+                    vec4_t v = InternalType<T>::zero_vec4();
+
+                    scalar_t a = s_tileUpX[src0];
+                    if (phaseInY == 0)
+                    {
+                        #pragma unroll
+                        for (int step = 0; step < fuSize / up; step++)
+                        {
+                            v.x += a * (scalar_t)c_fu[step * up + 0];
+                            a = s_tileUpX[src0 + (step + 1) * tileUpW];
+                            v.y += a * (scalar_t)c_fu[step * up + 3];
+                            v.z += a * (scalar_t)c_fu[step * up + 2];
+                            v.w += a * (scalar_t)c_fu[step * up + 1];
+                        }
+                    }
+                    else if (phaseInY == 1)
+                    {
+                        #pragma unroll
+                        for (int step = 0; step < fuSize / up; step++)
+                        {
+                            v.x += a * (scalar_t)c_fu[step * up + 1];
+                            v.y += a * (scalar_t)c_fu[step * up + 0];
+                            a = s_tileUpX[src0 + (step + 1) * tileUpW];
+                            v.z += a * (scalar_t)c_fu[step * up + 3];
+                            v.w += a * (scalar_t)c_fu[step * up + 2];
+                        }
+                    }
+                    else if (phaseInY == 2)
+                    {
+                        #pragma unroll
+                        for (int step = 0; step < fuSize / up; step++)
+                        {
+                            v.x += a * (scalar_t)c_fu[step * up + 2];
+                            v.y += a * (scalar_t)c_fu[step * up + 1];
+                            v.z += a * (scalar_t)c_fu[step * up + 0];
+                            a = s_tileUpX[src0 + (step + 1) * tileUpW];
+                            v.w += a * (scalar_t)c_fu[step * up + 3];
+                        }
+                    }
+                    else // (phaseInY == 3)
+                    {
+                        #pragma unroll
+                        for (int step = 0; step < fuSize / up; step++)
+                        {
+                            v.x += a * (scalar_t)c_fu[step * up + 3];
+                            v.y += a * (scalar_t)c_fu[step * up + 2];
+                            v.z += a * (scalar_t)c_fu[step * up + 1];
+                            v.w += a * (scalar_t)c_fu[step * up + 0];
+                            a = s_tileUpX[src0 + (step + 1) * tileUpW];
+                        }
+                    }
+
+                    int x = tileOutX * down + relUpX;
+                    int y = tileOutY * down + relUpY0;
+                    int signX = x + p.sOfs.x;
+                    int signY = y + p.sOfs.y;
+                    int signZ = blockIdx.z + p.blockZofs;
+                    int signXb = signX >> 2;
+                    index_t si0 = signXb + p.sShape.x * (signY + (index_t)p.sShape.y * signZ);
+                    index_t si1 = si0 + p.sShape.x;
+                    index_t si2 = si0 + p.sShape.x * 2;
+                    index_t si3 = si0 + p.sShape.x * 3;
+
+                    v.x *= (scalar_t)((float)up * (float)up * p.gain);
+                    v.y *= (scalar_t)((float)up * (float)up * p.gain);
+                    v.z *= (scalar_t)((float)up * (float)up * p.gain);
+                    v.w *= (scalar_t)((float)up * (float)up * p.gain);
+
+                    if (signWrite)
+                    {
+                        if (!enableWriteSkip)
+                        {
+                            // Determine and write signs.
+                            int sx = __float_as_uint(v.x) >> 31 <<  0;
+                            int sy = __float_as_uint(v.y) >> 31 <<  8;
+                            int sz = __float_as_uint(v.z) >> 31 << 16;
+                            int sw = __float_as_uint(v.w) >> 31 << 24;
+                            if (sx) v.x *= p.slope;
+                            if (sy) v.y *= p.slope;
+                            if (sz) v.z *= p.slope;
+                            if (sw) v.w *= p.slope;
+                            if (fabsf(v.x) > p.clamp) { sx = 2 <<  0; v.x = InternalType<T>::clamp(v.x, p.clamp); }
+                            if (fabsf(v.y) > p.clamp) { sy = 2 <<  8; v.y = InternalType<T>::clamp(v.y, p.clamp); }
+                            if (fabsf(v.z) > p.clamp) { sz = 2 << 16; v.z = InternalType<T>::clamp(v.z, p.clamp); }
+                            if (fabsf(v.w) > p.clamp) { sw = 2 << 24; v.w = InternalType<T>::clamp(v.w, p.clamp); }
+
+                            if ((uint32_t)signXb < p.swLimit && signY >= minY)
+                            {
+                                // Combine signs.
+                                uint32_t s = sx + sy + sw + sz;
+                                s <<= (signX & 3) << 1;
+                                s |= __shfl_xor_sync(groupMask, s, 1);
+                                s |= __shfl_xor_sync(groupMask, s, 2);
+
+                                // Write signs.
+                                if ((uint32_t)(signY + 0) < sShapeMaxY) { p.s[si0] = (unsigned char)(s >>  0); }
+                                if ((uint32_t)(signY + 1) < sShapeMaxY) { p.s[si1] = (unsigned char)(s >>  8); }
+                                if ((uint32_t)(signY + 2) < sShapeMaxY) { p.s[si2] = (unsigned char)(s >> 16); }
+                                if ((uint32_t)(signY + 3) < sShapeMaxY) { p.s[si3] = (unsigned char)(s >> 24); }
+                            }
+                        }
+                        else
+                        {
+                            // Determine and write signs.
+                            if ((uint32_t)signXb < p.swLimit && signY >= minY)
+                            {
+                                int sx = __float_as_uint(v.x) >> 31 <<  0;
+                                int sy = __float_as_uint(v.y) >> 31 <<  8;
+                                int sz = __float_as_uint(v.z) >> 31 << 16;
+                                int sw = __float_as_uint(v.w) >> 31 << 24;
+                                if (sx) v.x *= p.slope;
+                                if (sy) v.y *= p.slope;
+                                if (sz) v.z *= p.slope;
+                                if (sw) v.w *= p.slope;
+                                if (fabsf(v.x) > p.clamp) { sx = 2 <<  0; v.x = InternalType<T>::clamp(v.x, p.clamp); }
+                                if (fabsf(v.y) > p.clamp) { sy = 2 <<  8; v.y = InternalType<T>::clamp(v.y, p.clamp); }
+                                if (fabsf(v.z) > p.clamp) { sz = 2 << 16; v.z = InternalType<T>::clamp(v.z, p.clamp); }
+                                if (fabsf(v.w) > p.clamp) { sw = 2 << 24; v.w = InternalType<T>::clamp(v.w, p.clamp); }
+
+                                // Combine signs.
+                                uint32_t s = sx + sy + sw + sz;
+                                s <<= (signX & 3) << 1;
+                                s |= __shfl_xor_sync(groupMask, s, 1);
+                                s |= __shfl_xor_sync(groupMask, s, 2);
+
+                                // Write signs.
+                                if ((uint32_t)(signY + 0) < sShapeMaxY) { p.s[si0] = (unsigned char)(s >>  0); }
+                                if ((uint32_t)(signY + 1) < sShapeMaxY) { p.s[si1] = (unsigned char)(s >>  8); }
+                                if ((uint32_t)(signY + 2) < sShapeMaxY) { p.s[si2] = (unsigned char)(s >> 16); }
+                                if ((uint32_t)(signY + 3) < sShapeMaxY) { p.s[si3] = (unsigned char)(s >> 24); }
+                            }
+                            else
+                            {
+                                // Just compute the values.
+                                if (v.x < 0.f) v.x *= p.slope; v.x = InternalType<T>::clamp(v.x, p.clamp);
+                                if (v.y < 0.f) v.y *= p.slope; v.y = InternalType<T>::clamp(v.y, p.clamp);
+                                if (v.z < 0.f) v.z *= p.slope; v.z = InternalType<T>::clamp(v.z, p.clamp);
+                                if (v.w < 0.f) v.w *= p.slope; v.w = InternalType<T>::clamp(v.w, p.clamp);
+                            }
+                        }
+                    }
+                    else if (signRead) // Read signs and apply.
+                    {
+                        if ((uint32_t)signXb < p.swLimit)
+                        {
+                            int ss = (signX & 3) << 1;
+                            if ((uint32_t)(signY + 0) < p.sShape.y) { int s = p.s[si0] >> ss; if (s & 1) v.x *= p.slope; if (s & 2) v.x = 0.f; }
+                            if ((uint32_t)(signY + 1) < p.sShape.y) { int s = p.s[si1] >> ss; if (s & 1) v.y *= p.slope; if (s & 2) v.y = 0.f; }
+                            if ((uint32_t)(signY + 2) < p.sShape.y) { int s = p.s[si2] >> ss; if (s & 1) v.z *= p.slope; if (s & 2) v.z = 0.f; }
+                            if ((uint32_t)(signY + 3) < p.sShape.y) { int s = p.s[si3] >> ss; if (s & 1) v.w *= p.slope; if (s & 2) v.w = 0.f; }
+                        }
+                    }
+                    else // Forward pass with no sign write.
+                    {
+                        if (v.x < 0.f) v.x *= p.slope; v.x = InternalType<T>::clamp(v.x, p.clamp);
+                        if (v.y < 0.f) v.y *= p.slope; v.y = InternalType<T>::clamp(v.y, p.clamp);
+                        if (v.z < 0.f) v.z *= p.slope; v.z = InternalType<T>::clamp(v.z, p.clamp);
+                        if (v.w < 0.f) v.w *= p.slope; v.w = InternalType<T>::clamp(v.w, p.clamp);
+                    }
+
+                    s_tileUpXY[dst + 0 * tileUpW] = v.x;
+                    if (relUpY0 + 1 < tileUpH) s_tileUpXY[dst + 1 * tileUpW] = v.y;
+                    if (relUpY0 + 2 < tileUpH) s_tileUpXY[dst + 2 * tileUpW] = v.z;
+                    if (relUpY0 + 3 < tileUpH) s_tileUpXY[dst + 3 * tileUpW] = v.w;
+                }
+            }
+            else if (up == 2)
+            {
+                minY -= 1; // Adjust according to block height.
+                for (int idx = threadIdx.x; idx < tileUpW * tileUpH_up / up; idx += blockDim.x)
+                {
+                    int relUpX, relInY0;
+                    fast_div_mod<tileUpW>(relUpX, relInY0, idx);
+                    int relUpY0 = relInY0 * up;
+                    int src0 = relInY0 * tileUpW + relUpX;
+                    int dst = relUpY0 * tileUpW + relUpX;
+                    vec2_t v = InternalType<T>::zero_vec2();
+
+                    scalar_t a = s_tileUpX[src0];
+                    if (phaseInY == 0)
+                    {
+                        #pragma unroll
+                        for (int step = 0; step < fuSize / up; step++)
+                        {
+                            v.x += a * (scalar_t)c_fu[step * up + 0];
+                            a = s_tileUpX[src0 + (step + 1) * tileUpW];
+                            v.y += a * (scalar_t)c_fu[step * up + 1];
+                        }
+                    }
+                    else // (phaseInY == 1)
+                    {
+                        #pragma unroll
+                        for (int step = 0; step < fuSize / up; step++)
+                        {
+                            v.x += a * (scalar_t)c_fu[step * up + 1];
+                            v.y += a * (scalar_t)c_fu[step * up + 0];
+                            a = s_tileUpX[src0 + (step + 1) * tileUpW];
+                        }
+                    }
+
+                    int x = tileOutX * down + relUpX;
+                    int y = tileOutY * down + relUpY0;
+                    int signX = x + p.sOfs.x;
+                    int signY = y + p.sOfs.y;
+                    int signZ = blockIdx.z + p.blockZofs;
+                    int signXb = signX >> 2;
+                    index_t si0 = signXb + p.sShape.x * (signY + (index_t)p.sShape.y * signZ);
+                    index_t si1 = si0 + p.sShape.x;
+
+                    v.x *= (scalar_t)((float)up * (float)up * p.gain);
+                    v.y *= (scalar_t)((float)up * (float)up * p.gain);
+
+                    if (signWrite)
+                    {
+                        if (!enableWriteSkip)
+                        {
+                            // Determine and write signs.
+                            int sx = __float_as_uint(v.x) >> 31 << 0;
+                            int sy = __float_as_uint(v.y) >> 31 << 8;
+                            if (sx) v.x *= p.slope;
+                            if (sy) v.y *= p.slope;
+                            if (fabsf(v.x) > p.clamp) { sx = 2 << 0; v.x = InternalType<T>::clamp(v.x, p.clamp); }
+                            if (fabsf(v.y) > p.clamp) { sy = 2 << 8; v.y = InternalType<T>::clamp(v.y, p.clamp); }
+
+                            if ((uint32_t)signXb < p.swLimit && signY >= minY)
+                            {
+                                // Combine signs.
+                                int s = sx + sy;
+                                s <<= signXo;
+                                s |= __shfl_xor_sync(groupMask, s, 1);
+                                s |= __shfl_xor_sync(groupMask, s, 2);
+
+                                // Write signs.
+                                if ((uint32_t)(signY + 0) < sShapeMaxY) { p.s[si0] = (unsigned char)(s >>  0); }
+                                if ((uint32_t)(signY + 1) < sShapeMaxY) { p.s[si1] = (unsigned char)(s >>  8); }
+                            }
+                        }
+                        else
+                        {
+                            // Determine and write signs.
+                            if ((uint32_t)signXb < p.swLimit && signY >= minY)
+                            {
+                                int sx = __float_as_uint(v.x) >> 31 << 0;
+                                int sy = __float_as_uint(v.y) >> 31 << 8;
+                                if (sx) v.x *= p.slope;
+                                if (sy) v.y *= p.slope;
+                                if (fabsf(v.x) > p.clamp) { sx = 2 << 0; v.x = InternalType<T>::clamp(v.x, p.clamp); }
+                                if (fabsf(v.y) > p.clamp) { sy = 2 << 8; v.y = InternalType<T>::clamp(v.y, p.clamp); }
+
+                                // Combine signs.
+                                int s = sx + sy;
+                                s <<= signXo;
+                                s |= __shfl_xor_sync(groupMask, s, 1);
+                                s |= __shfl_xor_sync(groupMask, s, 2);
+
+                                // Write signs.
+                                if ((uint32_t)(signY + 0) < sShapeMaxY) { p.s[si0] = (unsigned char)(s >>  0); }
+                                if ((uint32_t)(signY + 1) < sShapeMaxY) { p.s[si1] = (unsigned char)(s >>  8); }
+                            }
+                            else
+                            {
+                                // Just compute the values.
+                                if (v.x < 0.f) v.x *= p.slope; v.x = InternalType<T>::clamp(v.x, p.clamp);
+                                if (v.y < 0.f) v.y *= p.slope; v.y = InternalType<T>::clamp(v.y, p.clamp);
+                            }
+                        }
+                    }
+                    else if (signRead) // Read signs and apply.
+                    {
+                        if ((uint32_t)signXb < p.swLimit)
+                        {
+                            if ((uint32_t)(signY + 0) < p.sShape.y) { int s = p.s[si0] >> signXo; if (s & 1) v.x *= p.slope; if (s & 2) v.x = 0.f; }
+                            if ((uint32_t)(signY + 1) < p.sShape.y) { int s = p.s[si1] >> signXo; if (s & 1) v.y *= p.slope; if (s & 2) v.y = 0.f; }
+                        }
+                    }
+                    else // Forward pass with no sign write.
+                    {
+                        if (v.x < 0.f) v.x *= p.slope; v.x = InternalType<T>::clamp(v.x, p.clamp);
+                        if (v.y < 0.f) v.y *= p.slope; v.y = InternalType<T>::clamp(v.y, p.clamp);
+                    }
+
+                    if (!downInline)
+                    {
+                        // Write into temporary buffer.
+                        s_tileUpXY[dst] = v.x;
+                        if (relUpY0 < tileUpH - 1)
+                            s_tileUpXY[dst + tileUpW] = v.y;
+                    }
+                    else
+                    {
+                        // Write directly into output buffer.
+                        if ((uint32_t)x < p.yShape.x)
+                        {
+                            int ymax = MIN(p.yShape.y, tileUpH + tileOutY * down);
+                            index_t ofs = x * get_stride<index_t>(p.yStride.x) + y * get_stride<index_t>(p.yStride.y) + mapOfsOut;
+                            if ((uint32_t)y + 0 < p.yShape.y) *((T*)((char*)p.y + ofs)) = (T)(v.x * (scalar_t)c_fd[0]);
+                            if ((uint32_t)y + 1 < ymax) *((T*)((char*)p.y + ofs + get_stride<index_t>(p.yStride.y))) = (T)(v.y * (scalar_t)c_fd[0]);
+                        }
+                    }
+                }
+            }
+        }
+        else if (filterMode == MODE_FUSD || filterMode == MODE_FUFD)
+        {
+            // Full upsampling filter.
+
+            if (up == 2)
+            {
+                // 2 x 2-wide.
+                __syncthreads();
+                int minY = tileOutY ? (tileOutY - tileOutH) * down + tileUpH + p.sOfs.y : 0; // Skip already written signs.
+                for (int idx = threadIdx.x * 4; idx < tileUpW * tileUpH; idx += blockDim.x * 4)
+                {
+                    int relUpX0, relUpY0;
+                    fast_div_mod<tileUpW>(relUpX0, relUpY0, idx);
+                    int relInX0 = CEIL_DIV(relUpX0 - phaseInX, up);
+                    int relInY0 = CEIL_DIV(relUpY0 - phaseInY, up);
+                    int src0 = relInX0 + tileInW * relInY0;
+                    int tap0y = (relInY0 * up + phaseInY - relUpY0);
+
+                    #define X_LOOP(TAPY, PX) \
+                        for (int sx = 0; sx < fuSize / up; sx++) \
+                        { \
+                            v.x += a * (scalar_t)c_fu[(sx * up + (((PX) - 0) & (up - 1))) + (sy * up + (TAPY)) * MAX_FILTER_SIZE]; \
+                            v.z += b * (scalar_t)c_fu[(sx * up + (((PX) - 0) & (up - 1))) + (sy * up + (TAPY)) * MAX_FILTER_SIZE]; if ((PX) == 0) { a = b; b = s_tileIn[src0 + 2 + sx + sy * tileInW]; } \
+                            v.y += a * (scalar_t)c_fu[(sx * up + (((PX) - 1) & (up - 1))) + (sy * up + (TAPY)) * MAX_FILTER_SIZE]; \
+                            v.w += b * (scalar_t)c_fu[(sx * up + (((PX) - 1) & (up - 1))) + (sy * up + (TAPY)) * MAX_FILTER_SIZE]; if ((PX) == 1) { a = b; b = s_tileIn[src0 + 2 + sx + sy * tileInW]; } \
+                        }
+
+                    vec4_t v = InternalType<T>::zero_vec4();
+                    if (tap0y == 0 && phaseInX == 0)
+                        #pragma unroll
+                        for (int sy = 0; sy < fuSize / up; sy++) { scalar_t a = s_tileIn[src0 + sy * tileInW]; scalar_t b = s_tileIn[src0 + sy * tileInW + 1];
+                            #pragma unroll
+                            X_LOOP(0, 0) }
+                    if (tap0y == 0 && phaseInX == 1)
+                        #pragma unroll
+                        for (int sy = 0; sy < fuSize / up; sy++) { scalar_t a = s_tileIn[src0 + sy * tileInW]; scalar_t b = s_tileIn[src0 + sy * tileInW + 1];
+                            #pragma unroll
+                            X_LOOP(0, 1) }
+                    if (tap0y == 1 && phaseInX == 0)
+                        #pragma unroll
+                        for (int sy = 0; sy < fuSize / up; sy++) { scalar_t a = s_tileIn[src0 + sy * tileInW]; scalar_t b = s_tileIn[src0 + sy * tileInW + 1];
+                            #pragma unroll
+                            X_LOOP(1, 0) }
+                    if (tap0y == 1 && phaseInX == 1)
+                        #pragma unroll
+                        for (int sy = 0; sy < fuSize / up; sy++) { scalar_t a = s_tileIn[src0 + sy * tileInW]; scalar_t b = s_tileIn[src0 + sy * tileInW + 1];
+                            #pragma unroll
+                            X_LOOP(1, 1) }
+
+                    #undef X_LOOP
+
+                    int x = tileOutX * down + relUpX0;
+                    int y = tileOutY * down + relUpY0;
+                    int signX = x + p.sOfs.x;
+                    int signY = y + p.sOfs.y;
+                    int signZ = blockIdx.z + p.blockZofs;
+                    int signXb = signX >> 2;
+                    index_t si = signXb + p.sShape.x * (signY + (index_t)p.sShape.y * signZ);
+
+                    v.x *= (scalar_t)((float)up * (float)up * p.gain);
+                    v.y *= (scalar_t)((float)up * (float)up * p.gain);
+                    v.z *= (scalar_t)((float)up * (float)up * p.gain);
+                    v.w *= (scalar_t)((float)up * (float)up * p.gain);
+
+                    if (signWrite)
+                    {
+                        if (!enableWriteSkip)
+                        {
+                            // Determine and write signs.
+                            int sx = __float_as_uint(v.x) >> 31;
+                            int sy = __float_as_uint(v.y) >> 31;
+                            int sz = __float_as_uint(v.z) >> 31;
+                            int sw = __float_as_uint(v.w) >> 31;
+                            if (sx) v.x *= p.slope; if (fabsf(v.x) > p.clamp) { sx = 2; v.x = InternalType<T>::clamp(v.x, p.clamp); }
+                            if (sy) v.y *= p.slope; if (fabsf(v.y) > p.clamp) { sy = 2; v.y = InternalType<T>::clamp(v.y, p.clamp); }
+                            if (sz) v.z *= p.slope; if (fabsf(v.z) > p.clamp) { sz = 2; v.z = InternalType<T>::clamp(v.z, p.clamp); }
+                            if (sw) v.w *= p.slope; if (fabsf(v.w) > p.clamp) { sw = 2; v.w = InternalType<T>::clamp(v.w, p.clamp); }
+
+                            if ((uint32_t)signXb < p.swLimit && (uint32_t)signY < p.sShape.y && signY >= minY)
+                            {
+                                p.s[si] = sx + (sy << 2) + (sz << 4) + (sw << 6);
+                            }
+                        }
+                        else
+                        {
+                            // Determine and write signs.
+                            if ((uint32_t)signXb < p.swLimit && (uint32_t)signY < p.sShape.y && signY >= minY)
+                            {
+                                int sx = __float_as_uint(v.x) >> 31;
+                                int sy = __float_as_uint(v.y) >> 31;
+                                int sz = __float_as_uint(v.z) >> 31;
+                                int sw = __float_as_uint(v.w) >> 31;
+                                if (sx) v.x *= p.slope; if (fabsf(v.x) > p.clamp) { sx = 2; v.x = InternalType<T>::clamp(v.x, p.clamp); }
+                                if (sy) v.y *= p.slope; if (fabsf(v.y) > p.clamp) { sy = 2; v.y = InternalType<T>::clamp(v.y, p.clamp); }
+                                if (sz) v.z *= p.slope; if (fabsf(v.z) > p.clamp) { sz = 2; v.z = InternalType<T>::clamp(v.z, p.clamp); }
+                                if (sw) v.w *= p.slope; if (fabsf(v.w) > p.clamp) { sw = 2; v.w = InternalType<T>::clamp(v.w, p.clamp); }
+
+                                p.s[si] = sx + (sy << 2) + (sz << 4) + (sw << 6);
+                            }
+                            else
+                            {
+                                // Just compute the values.
+                                if (v.x < 0.f) v.x *= p.slope; v.x = InternalType<T>::clamp(v.x, p.clamp);
+                                if (v.y < 0.f) v.y *= p.slope; v.y = InternalType<T>::clamp(v.y, p.clamp);
+                                if (v.z < 0.f) v.z *= p.slope; v.z = InternalType<T>::clamp(v.z, p.clamp);
+                                if (v.w < 0.f) v.w *= p.slope; v.w = InternalType<T>::clamp(v.w, p.clamp);
+                            }
+                        }
+                    }
+                    else if (signRead) // Read sign and apply.
+                    {
+                        if ((uint32_t)signY < p.sShape.y)
+                        {
+                            int s = 0;
+                            if ((uint32_t)signXb     < p.swLimit) s  = p.s[si];
+                            if ((uint32_t)signXb + 1 < p.swLimit) s |= p.s[si + 1] << 8;
+                            s >>= (signX & 3) << 1;
+                            if (s & 0x01) v.x *= p.slope; if (s & 0x02) v.x = 0.f;
+                            if (s & 0x04) v.y *= p.slope; if (s & 0x08) v.y = 0.f;
+                            if (s & 0x10) v.z *= p.slope; if (s & 0x20) v.z = 0.f;
+                            if (s & 0x40) v.w *= p.slope; if (s & 0x80) v.w = 0.f;
+                        }
+                    }
+                    else // Forward pass with no sign write.
+                    {
+                        if (v.x < 0.f) v.x *= p.slope; v.x = InternalType<T>::clamp(v.x, p.clamp);
+                        if (v.y < 0.f) v.y *= p.slope; v.y = InternalType<T>::clamp(v.y, p.clamp);
+                        if (v.z < 0.f) v.z *= p.slope; v.z = InternalType<T>::clamp(v.z, p.clamp);
+                        if (v.w < 0.f) v.w *= p.slope; v.w = InternalType<T>::clamp(v.w, p.clamp);
+                    }
+
+                    s_tileUpXY[idx + 0] = v.x;
+                    s_tileUpXY[idx + 1] = v.y;
+                    s_tileUpXY[idx + 2] = v.z;
+                    s_tileUpXY[idx + 3] = v.w;
+                }
+            }
+            else if (up == 1)
+            {
+                __syncthreads();
+                uint32_t groupMask = 15 << ((threadIdx.x & 31) & ~3);
+                int minY = tileOutY ? (tileOutY - tileOutH) * down + tileUpH : 0; // Skip already written signs.
+                for (int idx = threadIdx.x; idx < tileUpW * tileUpH; idx += blockDim.x)
+                {
+                    int relUpX0, relUpY0;
+                    fast_div_mod<tileUpW>(relUpX0, relUpY0, idx);
+                    scalar_t v = s_tileIn[idx] * (scalar_t)c_fu[0]; // 1x1 filter.
+
+                    int x = tileOutX * down + relUpX0;
+                    int y = tileOutY * down + relUpY0;
+                    int signX = x + p.sOfs.x;
+                    int signY = y + p.sOfs.y;
+                    int signZ = blockIdx.z + p.blockZofs;
+                    int signXb = signX >> 2;
+                    index_t si = signXb + p.sShape.x * (signY + (index_t)p.sShape.y * signZ);
+                    v *= (scalar_t)((float)up * (float)up * p.gain);
+
+                    if (signWrite)
+                    {
+                        if (!enableWriteSkip)
+                        {
+                            // Determine and write sign.
+                            uint32_t s = 0;
+                            uint32_t signXbit = (1u << signXo);
+                            if (v < 0.f)
+                            {
+                                s = signXbit;
+                                v *= p.slope;
+                            }
+                            if (fabsf(v) > p.clamp)
+                            {
+                                s = signXbit * 2;
+                                v = InternalType<T>::clamp(v, p.clamp);
+                            }
+                            if ((uint32_t)signXb < p.swLimit && (uint32_t)signY < p.sShape.y && signY >= minY)
+                            {
+                                s += __shfl_xor_sync(groupMask, s, 1);  // Coalesce.
+                                s += __shfl_xor_sync(groupMask, s, 2);  // Coalesce.
+                                p.s[si] = s;                            // Write.
+                            }
+                        }
+                        else
+                        {
+                            // Determine and write sign.
+                            if ((uint32_t)signXb < p.swLimit && (uint32_t)signY < p.sShape.y && signY >= minY)
+                            {
+                                uint32_t s = 0;
+                                uint32_t signXbit = (1u << signXo);
+                                if (v < 0.f)
+                                {
+                                    s = signXbit;
+                                    v *= p.slope;
+                                }
+                                if (fabsf(v) > p.clamp)
+                                {
+                                    s = signXbit * 2;
+                                    v = InternalType<T>::clamp(v, p.clamp);
+                                }
+                                s += __shfl_xor_sync(groupMask, s, 1);  // Coalesce.
+                                s += __shfl_xor_sync(groupMask, s, 2);  // Coalesce.
+                                p.s[si] = s;                            // Write.
+                            }
+                            else
+                            {
+                                // Just compute the value.
+                                if (v < 0.f) v *= p.slope;
+                                v = InternalType<T>::clamp(v, p.clamp);
+                            }
+                        }
+                    }
+                    else if (signRead)
+                    {
+                        // Read sign and apply if within sign tensor bounds.
+                        if ((uint32_t)signXb < p.swLimit && (uint32_t)signY < p.sShape.y)
+                        {
+                            int s = p.s[si];
+                            s >>= signXo;
+                            if (s & 1) v *= p.slope;
+                            if (s & 2) v = 0.f;
+                        }
+                    }
+                    else // Forward pass with no sign write.
+                    {
+                        if (v < 0.f) v *= p.slope;
+                        v = InternalType<T>::clamp(v, p.clamp);
+                    }
+
+                    if (!downInline) // Write into temporary buffer.
+                        s_tileUpXY[idx] = v;
+                    else if ((uint32_t)x < p.yShape.x && (uint32_t)y < p.yShape.y) // Write directly into output buffer
+                        *((T*)((char*)p.y + (x * get_stride<index_t>(p.yStride.x) + y * get_stride<index_t>(p.yStride.y) + mapOfsOut))) = (T)(v * (scalar_t)c_fd[0]);
+                }
+            }
+        }
+
+        // Downsampling.
+        if (filterMode == MODE_SUSD || filterMode == MODE_FUSD)
+        {
+            // Horizontal downsampling.
+            __syncthreads();
+            if (down == 4 && tileOutW % 4 == 0)
+            {
+                // Calculate 4 pixels at a time.
+                for (int idx = threadIdx.x * 4; idx < tileOutW * tileUpH; idx += blockDim.x * 4)
+                {
+                    int relOutX0, relUpY;
+                    fast_div_mod<tileOutW>(relOutX0, relUpY, idx);
+                    int relUpX0 = relOutX0 * down;
+                    int src0 = relUpY * tileUpW + relUpX0;
+                    vec4_t v = InternalType<T>::zero_vec4();
+                    #pragma unroll
+                    for (int step = 0; step < fdSize; step++)
+                    {
+                        v.x += s_tileUpXY[src0 +  0 + step] * (scalar_t)c_fd[step];
+                        v.y += s_tileUpXY[src0 +  4 + step] * (scalar_t)c_fd[step];
+                        v.z += s_tileUpXY[src0 +  8 + step] * (scalar_t)c_fd[step];
+                        v.w += s_tileUpXY[src0 + 12 + step] * (scalar_t)c_fd[step];
+                    }
+                    s_tileDownX[idx+0] = v.x;
+                    s_tileDownX[idx+1] = v.y;
+                    s_tileDownX[idx+2] = v.z;
+                    s_tileDownX[idx+3] = v.w;
+                }
+            }
+            else if ((down == 2 || down == 4) && (tileOutW % 2 == 0))
+            {
+                // Calculate 2 pixels at a time.
+                for (int idx = threadIdx.x * 2; idx < tileOutW * tileUpH; idx += blockDim.x * 2)
+                {
+                    int relOutX0, relUpY;
+                    fast_div_mod<tileOutW>(relOutX0, relUpY, idx);
+                    int relUpX0 = relOutX0 * down;
+                    int src0 = relUpY * tileUpW + relUpX0;
+                    vec2_t v = InternalType<T>::zero_vec2();
+                    #pragma unroll
+                    for (int step = 0; step < fdSize; step++)
+                    {
+                        v.x += s_tileUpXY[src0 +    0 + step] * (scalar_t)c_fd[step];
+                        v.y += s_tileUpXY[src0 + down + step] * (scalar_t)c_fd[step];
+                    }
+                    s_tileDownX[idx+0] = v.x;
+                    s_tileDownX[idx+1] = v.y;
+                }
+            }
+            else
+            {
+                // Calculate 1 pixel at a time.
+                for (int idx = threadIdx.x; idx < tileOutW * tileUpH; idx += blockDim.x)
+                {
+                    int relOutX0, relUpY;
+                    fast_div_mod<tileOutW>(relOutX0, relUpY, idx);
+                    int relUpX0 = relOutX0 * down;
+                    int src = relUpY * tileUpW + relUpX0;
+                    scalar_t v = 0.f;
+                    #pragma unroll
+                    for (int step = 0; step < fdSize; step++)
+                        v += s_tileUpXY[src + step] * (scalar_t)c_fd[step];
+                    s_tileDownX[idx] = v;
+                }
+            }
+
+            // Vertical downsampling & store output tile.
+            __syncthreads();
+            for (int idx = threadIdx.x; idx < tileOutW * tileOutH; idx += blockDim.x)
+            {
+                int relOutX, relOutY0;
+                fast_div_mod<tileOutW>(relOutX, relOutY0, idx);
+                int relUpY0 = relOutY0 * down;
+                int src0 = relUpY0 * tileOutW + relOutX;
+                scalar_t v = 0;
+                #pragma unroll
+                for (int step = 0; step < fdSize; step++)
+                    v += s_tileDownX[src0 + step * tileOutW] * (scalar_t)c_fd[step];
+
+                int outX = tileOutX + relOutX;
+                int outY = tileOutY + relOutY0;
+
+                if (outX < p.yShape.x & outY < p.yShape.y)
+                    *((T*)((char*)p.y + (outX * get_stride<index_t>(p.yStride.x) + outY * get_stride<index_t>(p.yStride.y) + mapOfsOut))) = (T)v;
+            }
+        }
+        else if (filterMode == MODE_SUFD || filterMode == MODE_FUFD)
+        {
+            // Full downsampling filter.
+            if (down == 2)
+            {
+                // 2-wide.
+                __syncthreads();
+                for (int idx = threadIdx.x * 2; idx < tileOutW * tileOutH; idx += blockDim.x * 2)
+                {
+                    int relOutX0, relOutY0;
+                    fast_div_mod<tileOutW>(relOutX0, relOutY0, idx);
+                    int relUpX0 = relOutX0 * down;
+                    int relUpY0 = relOutY0 * down;
+                    int src0 = relUpY0 * tileUpW + relUpX0;
+                    vec2_t v = InternalType<T>::zero_vec2();
+                    #pragma unroll
+                    for (int sy = 0; sy < fdSize; sy++)
+                    #pragma unroll
+                    for (int sx = 0; sx < fdSize; sx++)
+                    {
+                        v.x += s_tileUpXY[src0 + 0 + sx + sy * tileUpW] * (scalar_t)c_fd[sx + sy * MAX_FILTER_SIZE];
+                        v.y += s_tileUpXY[src0 + 2 + sx + sy * tileUpW] * (scalar_t)c_fd[sx + sy * MAX_FILTER_SIZE];
+                    }
+
+                    int outX = tileOutX + relOutX0;
+                    int outY = tileOutY + relOutY0;
+                    if ((uint32_t)outY < p.yShape.y)
+                    {
+                        index_t ofs = outX * get_stride<index_t>(p.yStride.x) + outY * get_stride<index_t>(p.yStride.y) + mapOfsOut;
+                        if (outX + 0 < p.yShape.x) *((T*)((char*)p.y + ofs)) = (T)v.x;
+                        if (outX + 1 < p.yShape.x) *((T*)((char*)p.y + ofs + get_stride<index_t>(p.yStride.x))) = (T)v.y;
+                    }
+                }
+            }
+            else if (down == 1 && !downInline)
+            {
+                // Thread per pixel.
+                __syncthreads();
+                for (int idx = threadIdx.x; idx < tileOutW * tileOutH; idx += blockDim.x)
+                {
+                    int relOutX0, relOutY0;
+                    fast_div_mod<tileOutW>(relOutX0, relOutY0, idx);
+                    scalar_t v = s_tileUpXY[idx] * (scalar_t)c_fd[0]; // 1x1 filter.
+
+                    int outX = tileOutX + relOutX0;
+                    int outY = tileOutY + relOutY0;
+                    if ((uint32_t)outX < p.yShape.x && (uint32_t)outY < p.yShape.y)
+                        *((T*)((char*)p.y + (outX * get_stride<index_t>(p.yStride.x) + outY * get_stride<index_t>(p.yStride.y) + mapOfsOut))) = (T)v;
+                }
+            }
+        }
+
+        if (!enableXrep)
+            break;
+    }
+}
+
+//------------------------------------------------------------------------
+// Compute activation function and signs for upsampled data tensor, modifying data tensor in-place. Used for accelerating the generic variant.
+// Sign tensor is known to be contiguous, and p.x and p.s have the same z, w dimensions. 64-bit indexing is always used.
+
+template <class T, bool signWrite, bool signRead>
+static __global__ void filtered_lrelu_act_kernel(filtered_lrelu_act_kernel_params p)
+{
+    typedef typename InternalType<T>::scalar_t scalar_t;
+
+    // Indexing.
+    int32_t x = threadIdx.x + blockIdx.x * blockDim.x;
+    int32_t ymax = signWrite ? p.sShape.y : p.xShape.y;
+    int32_t qmax = p.xShape.z * p.xShape.w; // Combined minibatch*channel maximum index.
+
+    // Loop to accommodate oversized tensors.
+    for (int32_t q = blockIdx.z; q < qmax; q += gridDim.z)
+    for (int32_t y = blockIdx.y; y < ymax; y += gridDim.y)
+    {
+        // Extract z and w (channel, minibatch index).
+        int32_t w = q / p.xShape.z;
+        int32_t z = q - w * p.xShape.z;
+
+        // Choose behavior based on sign read/write mode.
+        if (signWrite)
+        {
+            // Process value if in p.x.
+            uint32_t s = 0;
+            if (x < p.xShape.x && y < p.xShape.y)
+            {
+                int64_t ix = x * p.xStride.x + y * p.xStride.y + z * p.xStride.z + w * p.xStride.w;
+                T* pv = ((T*)p.x) + ix;
+                scalar_t v = (scalar_t)(*pv);
+
+                // Gain, LReLU, clamp.
+                v *= p.gain;
+                if (v < 0.f)
+                {
+                    v *= p.slope;
+                    s = 1; // Sign.
+                }
+                if (fabsf(v) > p.clamp)
+                {
+                    v = InternalType<T>::clamp(v, p.clamp);
+                    s = 2; // Clamp.
+                }
+
+                *pv = (T)v; // Write value.
+            }
+
+            // Coalesce into threads 0 and 16 of warp.
+            uint32_t m = (threadIdx.x & 16) ? 0xffff0000u : 0x0000ffffu;
+            s <<= ((threadIdx.x & 15) << 1); // Shift into place.
+            s |= __shfl_xor_sync(m, s, 1); // Distribute.
+            s |= __shfl_xor_sync(m, s, 2);
+            s |= __shfl_xor_sync(m, s, 4);
+            s |= __shfl_xor_sync(m, s, 8);
+
+            // Write signs if leader and in p.s.
+            if (!(threadIdx.x & 15) && x < p.sShape.x) // y is always in.
+            {
+                uint64_t is = x + p.sShape.x * (y + (int64_t)p.sShape.y * q); // Contiguous.
+                ((uint32_t*)p.s)[is >> 4] = s;
+            }
+        }
+        else if (signRead)
+        {
+            // Process value if in p.x.
+            if (x < p.xShape.x) // y is always in.
+            {
+                int64_t ix = x * p.xStride.x + y * p.xStride.y + z * p.xStride.z + w * p.xStride.w;
+                T* pv = ((T*)p.x) + ix;
+                scalar_t v = (scalar_t)(*pv);
+                v *= p.gain;
+
+                // Apply sign buffer offset.
+                uint32_t sx = x + p.sOfs.x;
+                uint32_t sy = y + p.sOfs.y;
+
+                // Read and apply signs if we land inside valid region of sign buffer.
+                if (sx < p.sShape.x && sy < p.sShape.y)
+                {
+                    uint64_t is = (sx >> 2) + (p.sShape.x >> 2) * (sy + (uint64_t)p.sShape.y * q); // Contiguous.
+                    unsigned char s = p.s[is];
+                    s >>= (sx & 3) << 1; // Shift into place.
+                    if (s & 1) // Sign?
+                        v *= p.slope;
+                    if (s & 2) // Clamp?
+                        v = 0.f;
+                }
+
+                *pv = (T)v; // Write value.
+            }
+        }
+        else
+        {
+            // Forward pass with no sign write. Process value if in p.x.
+            if (x < p.xShape.x) // y is always in.
+            {
+                int64_t ix = x * p.xStride.x + y * p.xStride.y + z * p.xStride.z + w * p.xStride.w;
+                T* pv = ((T*)p.x) + ix;
+                scalar_t v = (scalar_t)(*pv);
+                v *= p.gain;
+                if (v < 0.f)
+                    v *= p.slope;
+                if (fabsf(v) > p.clamp)
+                    v = InternalType<T>::clamp(v, p.clamp);
+                *pv = (T)v; // Write value.
+            }
+        }
+    }
+}
+
+template <class T, bool signWrite, bool signRead> void* choose_filtered_lrelu_act_kernel(void)
+{
+    return (void*)filtered_lrelu_act_kernel<T, signWrite, signRead>;
+}
+
+//------------------------------------------------------------------------
+// CUDA kernel selection.
+
+template <class T, class index_t, bool signWrite, bool signRead> filtered_lrelu_kernel_spec choose_filtered_lrelu_kernel(const filtered_lrelu_kernel_params& p, int sharedKB)
+{
+    filtered_lrelu_kernel_spec s = { 0 };
+
+    // Return the first matching kernel.
+#define CASE(SH, U, FU, D, FD, MODE, TW, TH, W, XR, WS) \
+    if (sharedKB >= SH) \
+    if ((p.fuShape.y == 0 && (MODE == MODE_SUSD || MODE == MODE_SUFD)) || (p.fuShape.y > 0 && (MODE == MODE_FUSD || MODE == MODE_FUFD))) \
+    if ((p.fdShape.y == 0 && (MODE == MODE_SUSD || MODE == MODE_FUSD)) || (p.fdShape.y > 0 && (MODE == MODE_SUFD || MODE == MODE_FUFD))) \
+    if (p.up == U && p.fuShape.x <= FU && p.fuShape.y <= FU && p.down == D && p.fdShape.x <= FD && p.fdShape.y <= FD) \
+    { \
+        static_assert((D*TW % 4) == 0, "down * tileWidth must be divisible by 4"); \
+        static_assert(FU % U == 0, "upscaling filter size must be multiple of upscaling factor"); \
+        static_assert(FD % D == 0, "downscaling filter size must be multiple of downscaling factor"); \
+        s.setup = (void*)setup_filters_kernel; \
+        s.exec = (void*)filtered_lrelu_kernel<T, index_t, SH, signWrite, signRead, MODE, U, FU, D, FD, TW, TH, W*32, !!XR, !!WS>; \
+        s.tileOut = make_int2(TW, TH); \
+        s.numWarps = W; \
+        s.xrep = XR; \
+        s.dynamicSharedKB = (SH == 48) ? 0 : SH; \
+        return s; \
+    }
+
+    // Launch parameters for various kernel specializations.
+    // Small filters must be listed before large filters, otherwise the kernel for larger filter will always match first.
+    // Kernels that use more shared memory must be listed before those that use less, for the same reason.
+
+    CASE(/*sharedKB*/48, /*up,fu*/1,1,  /*down,fd*/1,1,  /*mode*/MODE_FUFD, /*tw,th,warps,xrep,wskip*/64,  178, 32,  0,  0) // 1t-upf1-downf1
+    CASE(/*sharedKB*/48, /*up,fu*/2,8,  /*down,fd*/1,1,  /*mode*/MODE_SUFD, /*tw,th,warps,xrep,wskip*/152, 95,  16,  0,  0) // 4t-ups2-downf1
+    CASE(/*sharedKB*/48, /*up,fu*/1,1,  /*down,fd*/2,8,  /*mode*/MODE_FUSD, /*tw,th,warps,xrep,wskip*/56,  22,  16,  0,  0) // 4t-upf1-downs2
+    CASE(/*sharedKB*/48, /*up,fu*/2,8,  /*down,fd*/2,8,  /*mode*/MODE_SUSD, /*tw,th,warps,xrep,wskip*/56,  29,  16,  11, 0) // 4t-ups2-downs2
+    CASE(/*sharedKB*/48, /*up,fu*/2,8,  /*down,fd*/2,8,  /*mode*/MODE_FUSD, /*tw,th,warps,xrep,wskip*/60,  28,  16,  0,  0) // 4t-upf2-downs2
+    CASE(/*sharedKB*/48, /*up,fu*/2,8,  /*down,fd*/2,8,  /*mode*/MODE_SUFD, /*tw,th,warps,xrep,wskip*/56,  28,  16,  0,  0) // 4t-ups2-downf2
+    CASE(/*sharedKB*/48, /*up,fu*/4,16, /*down,fd*/2,8,  /*mode*/MODE_SUSD, /*tw,th,warps,xrep,wskip*/56,  31,  16,  11, 0) // 4t-ups4-downs2
+    CASE(/*sharedKB*/48, /*up,fu*/4,16, /*down,fd*/2,8,  /*mode*/MODE_SUFD, /*tw,th,warps,xrep,wskip*/56,  36,  16,  0,  0) // 4t-ups4-downf2
+    CASE(/*sharedKB*/48, /*up,fu*/2,8,  /*down,fd*/4,16, /*mode*/MODE_SUSD, /*tw,th,warps,xrep,wskip*/16,  22,  16,  12, 0) // 4t-ups2-downs4
+    CASE(/*sharedKB*/48, /*up,fu*/2,8,  /*down,fd*/4,16, /*mode*/MODE_FUSD, /*tw,th,warps,xrep,wskip*/29,  15,  16,  0,  0) // 4t-upf2-downs4
+    CASE(/*sharedKB*/48, /*up,fu*/2,12, /*down,fd*/1,1,  /*mode*/MODE_SUFD, /*tw,th,warps,xrep,wskip*/96,  150, 28,  0,  0) // 6t-ups2-downf1
+    CASE(/*sharedKB*/48, /*up,fu*/1,1,  /*down,fd*/2,12, /*mode*/MODE_FUSD, /*tw,th,warps,xrep,wskip*/32,  35,  24,  0,  0) // 6t-upf1-downs2
+    CASE(/*sharedKB*/48, /*up,fu*/2,12, /*down,fd*/2,12, /*mode*/MODE_SUSD, /*tw,th,warps,xrep,wskip*/32,  46,  16,  10, 0) // 6t-ups2-downs2
+    CASE(/*sharedKB*/48, /*up,fu*/2,12, /*down,fd*/2,12, /*mode*/MODE_FUSD, /*tw,th,warps,xrep,wskip*/58,  28,  24,  8,  0) // 6t-upf2-downs2
+    CASE(/*sharedKB*/48, /*up,fu*/2,12, /*down,fd*/2,12, /*mode*/MODE_SUFD, /*tw,th,warps,xrep,wskip*/52,  28,  16,  0,  0) // 6t-ups2-downf2
+    CASE(/*sharedKB*/48, /*up,fu*/4,24, /*down,fd*/2,12, /*mode*/MODE_SUSD, /*tw,th,warps,xrep,wskip*/32,  51,  16,  5,  0) // 6t-ups4-downs2
+    CASE(/*sharedKB*/48, /*up,fu*/4,24, /*down,fd*/2,12, /*mode*/MODE_SUFD, /*tw,th,warps,xrep,wskip*/32,  56,  16,  6,  0) // 6t-ups4-downf2
+    CASE(/*sharedKB*/48, /*up,fu*/2,12, /*down,fd*/4,24, /*mode*/MODE_SUSD, /*tw,th,warps,xrep,wskip*/16,  18,  16,  12, 0) // 6t-ups2-downs4
+    CASE(/*sharedKB*/96, /*up,fu*/2,12, /*down,fd*/4,24, /*mode*/MODE_FUSD, /*tw,th,warps,xrep,wskip*/27,  31,  32,  6,  0) // 6t-upf2-downs4 96kB
+    CASE(/*sharedKB*/48, /*up,fu*/2,12, /*down,fd*/4,24, /*mode*/MODE_FUSD, /*tw,th,warps,xrep,wskip*/27,  13,  24,  0,  0) // 6t-upf2-downs4
+    CASE(/*sharedKB*/48, /*up,fu*/2,16, /*down,fd*/1,1,  /*mode*/MODE_SUFD, /*tw,th,warps,xrep,wskip*/148, 89,  24,  0,  0) // 8t-ups2-downf1
+    CASE(/*sharedKB*/48, /*up,fu*/1,1,  /*down,fd*/2,16, /*mode*/MODE_FUSD, /*tw,th,warps,xrep,wskip*/32,  31,  16,  5,  0) // 8t-upf1-downs2
+    CASE(/*sharedKB*/48, /*up,fu*/2,16, /*down,fd*/2,16, /*mode*/MODE_SUSD, /*tw,th,warps,xrep,wskip*/32,  41,  16,  9,  0) // 8t-ups2-downs2
+    CASE(/*sharedKB*/48, /*up,fu*/2,16, /*down,fd*/2,16, /*mode*/MODE_FUSD, /*tw,th,warps,xrep,wskip*/56,  26,  24,  0,  0) // 8t-upf2-downs2
+    CASE(/*sharedKB*/48, /*up,fu*/2,16, /*down,fd*/2,16, /*mode*/MODE_SUFD, /*tw,th,warps,xrep,wskip*/32,  40,  16,  0,  0) // 8t-ups2-downf2
+    CASE(/*sharedKB*/48, /*up,fu*/4,32, /*down,fd*/2,16, /*mode*/MODE_SUSD, /*tw,th,warps,xrep,wskip*/32,  46,  24,  5,  0) // 8t-ups4-downs2
+    CASE(/*sharedKB*/48, /*up,fu*/4,32, /*down,fd*/2,16, /*mode*/MODE_SUFD, /*tw,th,warps,xrep,wskip*/32,  50,  16,  0,  0) // 8t-ups4-downf2
+    CASE(/*sharedKB*/96, /*up,fu*/2,16, /*down,fd*/4,32, /*mode*/MODE_SUSD, /*tw,th,warps,xrep,wskip*/24,  24,  32,  12, 1) // 8t-ups2-downs4 96kB
+    CASE(/*sharedKB*/48, /*up,fu*/2,16, /*down,fd*/4,32, /*mode*/MODE_SUSD, /*tw,th,warps,xrep,wskip*/16,  13,  16,  10, 1) // 8t-ups2-downs4
+    CASE(/*sharedKB*/96, /*up,fu*/2,16, /*down,fd*/4,32, /*mode*/MODE_FUSD, /*tw,th,warps,xrep,wskip*/25,  28,  28,  4,  0) // 8t-upf2-downs4 96kB
+    CASE(/*sharedKB*/48, /*up,fu*/2,16, /*down,fd*/4,32, /*mode*/MODE_FUSD, /*tw,th,warps,xrep,wskip*/25,  10,  24,  0,  0) // 8t-upf2-downs4
+
+    #undef CASE
+    return s; // No kernel found.
+}
+
+//------------------------------------------------------------------------
diff --git a/modules/eg3ds/torch_utils/ops/filtered_lrelu.h b/modules/eg3ds/torch_utils/ops/filtered_lrelu.h
new file mode 100644
index 0000000000000000000000000000000000000000..f2bfd1dd537909de9cd3b14765a482056391683b
--- /dev/null
+++ b/modules/eg3ds/torch_utils/ops/filtered_lrelu.h
@@ -0,0 +1,94 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: LicenseRef-NvidiaProprietary
+ *
+ * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+ * property and proprietary rights in and to this material, related
+ * documentation and any modifications thereto. Any use, reproduction,
+ * disclosure or distribution of this material and related documentation
+ * without an express license agreement from NVIDIA CORPORATION or
+ * its affiliates is strictly prohibited.
+ */
+
+#include <cuda_runtime.h>
+
+//------------------------------------------------------------------------
+// CUDA kernel parameters.
+
+struct filtered_lrelu_kernel_params
+{
+    // These parameters decide which kernel to use.
+    int             up;         // upsampling ratio (1, 2, 4)
+    int             down;       // downsampling ratio (1, 2, 4)
+    int2            fuShape;    // [size, 1] | [size, size]
+    int2            fdShape;    // [size, 1] | [size, size]
+
+    int             _dummy;     // Alignment.
+
+    // Rest of the parameters.
+    const void*     x;          // Input tensor.
+    void*           y;          // Output tensor.
+    const void*     b;          // Bias tensor.
+    unsigned char*  s;          // Sign tensor in/out. NULL if unused.
+    const float*    fu;         // Upsampling filter.
+    const float*    fd;         // Downsampling filter.
+
+    int2            pad0;       // Left/top padding.
+    float           gain;       // Additional gain factor.
+    float           slope;      // Leaky ReLU slope on negative side.
+    float           clamp;      // Clamp after nonlinearity.
+    int             flip;       // Filter kernel flip for gradient computation.
+
+    int             tilesXdim;  // Original number of horizontal output tiles.
+    int             tilesXrep;  // Number of horizontal tiles per CTA.
+    int             blockZofs;  // Block z offset to support large minibatch, channel dimensions.
+
+    int4            xShape;     // [width, height, channel, batch]
+    int4            yShape;     // [width, height, channel, batch]
+    int2            sShape;     // [width, height] - width is in bytes. Contiguous. Zeros if unused.
+    int2            sOfs;       // [ofs_x, ofs_y] - offset between upsampled data and sign tensor.
+    int             swLimit;    // Active width of sign tensor in bytes.
+
+    longlong4       xStride;    // Strides of all tensors except signs, same component order as shapes.
+    longlong4       yStride;    //
+    int64_t         bStride;    //
+    longlong3       fuStride;   //
+    longlong3       fdStride;   //
+};
+
+struct filtered_lrelu_act_kernel_params
+{
+    void*           x;          // Input/output, modified in-place.
+    unsigned char*  s;          // Sign tensor in/out. NULL if unused.
+
+    float           gain;       // Additional gain factor.
+    float           slope;      // Leaky ReLU slope on negative side.
+    float           clamp;      // Clamp after nonlinearity.
+
+    int4            xShape;     // [width, height, channel, batch]
+    longlong4       xStride;    // Input/output tensor strides, same order as in shape.
+    int2            sShape;     // [width, height] - width is in elements. Contiguous. Zeros if unused.
+    int2            sOfs;       // [ofs_x, ofs_y] - offset between upsampled data and sign tensor.
+};
+
+//------------------------------------------------------------------------
+// CUDA kernel specialization.
+
+struct filtered_lrelu_kernel_spec
+{
+    void*   setup;              // Function for filter kernel setup.
+    void*   exec;               // Function for main operation.
+    int2    tileOut;            // Width/height of launch tile.
+    int     numWarps;           // Number of warps per thread block, determines launch block size.
+    int     xrep;               // For processing multiple horizontal tiles per thread block.
+    int     dynamicSharedKB;    // How much dynamic shared memory the exec kernel wants.
+};
+
+//------------------------------------------------------------------------
+// CUDA kernel selection.
+
+template <class T, class index_t, bool signWrite, bool signRead> filtered_lrelu_kernel_spec choose_filtered_lrelu_kernel(const filtered_lrelu_kernel_params& p, int sharedKB);
+template <class T, bool signWrite, bool signRead> void* choose_filtered_lrelu_act_kernel(void);
+template <bool signWrite, bool signRead> cudaError_t copy_filters(cudaStream_t stream);
+
+//------------------------------------------------------------------------
diff --git a/modules/eg3ds/torch_utils/ops/filtered_lrelu.py b/modules/eg3ds/torch_utils/ops/filtered_lrelu.py
new file mode 100644
index 0000000000000000000000000000000000000000..2047b7e19320e8d03e444ca1cb03fe00d0c5e96e
--- /dev/null
+++ b/modules/eg3ds/torch_utils/ops/filtered_lrelu.py
@@ -0,0 +1,276 @@
+# SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: LicenseRef-NvidiaProprietary
+#
+# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+# property and proprietary rights in and to this material, related
+# documentation and any modifications thereto. Any use, reproduction,
+# disclosure or distribution of this material and related documentation
+# without an express license agreement from NVIDIA CORPORATION or
+# its affiliates is strictly prohibited.
+
+import os
+import numpy as np
+import torch
+import warnings
+
+from .. import custom_ops
+from .. import misc
+from . import upfirdn2d
+from . import bias_act
+
+#----------------------------------------------------------------------------
+
+_plugin = None
+
+def _init():
+    global _plugin
+    if _plugin is None:
+        _plugin = custom_ops.get_plugin(
+            module_name='filtered_lrelu_plugin',
+            sources=['filtered_lrelu.cpp', 'filtered_lrelu_wr.cu', 'filtered_lrelu_rd.cu', 'filtered_lrelu_ns.cu'],
+            headers=['filtered_lrelu.h', 'filtered_lrelu.cu'],
+            source_dir=os.path.dirname(__file__),
+            extra_cuda_cflags=['--use_fast_math'],
+        )
+    return True
+
+def _get_filter_size(f):
+    if f is None:
+        return 1, 1
+    assert isinstance(f, torch.Tensor)
+    assert 1 <= f.ndim <= 2
+    return f.shape[-1], f.shape[0] # width, height
+
+def _parse_padding(padding):
+    if isinstance(padding, int):
+        padding = [padding, padding]
+    assert isinstance(padding, (list, tuple))
+    assert all(isinstance(x, (int, np.integer)) for x in padding)
+    padding = [int(x) for x in padding]
+    if len(padding) == 2:
+        px, py = padding
+        padding = [px, px, py, py]
+    px0, px1, py0, py1 = padding
+    return px0, px1, py0, py1
+
+#----------------------------------------------------------------------------
+
+def filtered_lrelu(x, fu=None, fd=None, b=None, up=1, down=1, padding=0, gain=np.sqrt(2), slope=0.2, clamp=None, flip_filter=False, impl='cuda'):
+    r"""Filtered leaky ReLU for a batch of 2D images.
+
+    Performs the following sequence of operations for each channel:
+
+    1. Add channel-specific bias if provided (`b`).
+
+    2. Upsample the image by inserting N-1 zeros after each pixel (`up`).
+
+    3. Pad the image with the specified number of zeros on each side (`padding`).
+       Negative padding corresponds to cropping the image.
+
+    4. Convolve the image with the specified upsampling FIR filter (`fu`), shrinking it
+       so that the footprint of all output pixels lies within the input image.
+
+    5. Multiply each value by the provided gain factor (`gain`).
+
+    6. Apply leaky ReLU activation function to each value.
+
+    7. Clamp each value between -clamp and +clamp, if `clamp` parameter is provided.
+
+    8. Convolve the image with the specified downsampling FIR filter (`fd`), shrinking
+       it so that the footprint of all output pixels lies within the input image.
+
+    9. Downsample the image by keeping every Nth pixel (`down`).
+
+    The fused op is considerably more efficient than performing the same calculation
+    using standard PyTorch ops. It supports gradients of arbitrary order.
+
+    Args:
+        x:           Float32/float16/float64 input tensor of the shape
+                     `[batch_size, num_channels, in_height, in_width]`.
+        fu:          Float32 upsampling FIR filter of the shape
+                     `[filter_height, filter_width]` (non-separable),
+                     `[filter_taps]` (separable), or
+                     `None` (identity).
+        fd:          Float32 downsampling FIR filter of the shape
+                     `[filter_height, filter_width]` (non-separable),
+                     `[filter_taps]` (separable), or
+                     `None` (identity).
+        b:           Bias vector, or `None` to disable. Must be a 1D tensor of the same type
+                     as `x`. The length of vector must must match the channel dimension of `x`.
+        up:          Integer upsampling factor (default: 1).
+        down:        Integer downsampling factor. (default: 1).
+        padding:     Padding with respect to the upsampled image. Can be a single number
+                     or a list/tuple `[x, y]` or `[x_before, x_after, y_before, y_after]`
+                     (default: 0).
+        gain:        Overall scaling factor for signal magnitude (default: sqrt(2)).
+        slope:       Slope on the negative side of leaky ReLU (default: 0.2).
+        clamp:       Maximum magnitude for leaky ReLU output (default: None).
+        flip_filter: False = convolution, True = correlation (default: False).
+        impl:        Implementation to use. Can be `'ref'` or `'cuda'` (default: `'cuda'`).
+
+    Returns:
+        Tensor of the shape `[batch_size, num_channels, out_height, out_width]`.
+    """
+    assert isinstance(x, torch.Tensor)
+    assert impl in ['ref', 'cuda']
+    if impl == 'cuda' and x.device.type == 'cuda' and _init():
+        return _filtered_lrelu_cuda(up=up, down=down, padding=padding, gain=gain, slope=slope, clamp=clamp, flip_filter=flip_filter).apply(x, fu, fd, b, None, 0, 0)
+    return _filtered_lrelu_ref(x, fu=fu, fd=fd, b=b, up=up, down=down, padding=padding, gain=gain, slope=slope, clamp=clamp, flip_filter=flip_filter)
+
+#----------------------------------------------------------------------------
+
+@misc.profiled_function
+def _filtered_lrelu_ref(x, fu=None, fd=None, b=None, up=1, down=1, padding=0, gain=np.sqrt(2), slope=0.2, clamp=None, flip_filter=False):
+    """Slow and memory-inefficient reference implementation of `filtered_lrelu()` using
+    existing `upfirdn2n()` and `bias_act()` ops.
+    """
+    assert isinstance(x, torch.Tensor) and x.ndim == 4
+    fu_w, fu_h = _get_filter_size(fu)
+    fd_w, fd_h = _get_filter_size(fd)
+    if b is not None:
+        assert isinstance(b, torch.Tensor) and b.dtype == x.dtype
+        misc.assert_shape(b, [x.shape[1]])
+    assert isinstance(up, int) and up >= 1
+    assert isinstance(down, int) and down >= 1
+    px0, px1, py0, py1 = _parse_padding(padding)
+    assert gain == float(gain) and gain > 0
+    assert slope == float(slope) and slope >= 0
+    assert clamp is None or (clamp == float(clamp) and clamp >= 0)
+
+    # Calculate output size.
+    batch_size, channels, in_h, in_w = x.shape
+    in_dtype = x.dtype
+    out_w = (in_w * up + (px0 + px1) - (fu_w - 1) - (fd_w - 1) + (down - 1)) // down
+    out_h = (in_h * up + (py0 + py1) - (fu_h - 1) - (fd_h - 1) + (down - 1)) // down
+
+    # Compute using existing ops.
+    x = bias_act.bias_act(x=x, b=b) # Apply bias.
+    x = upfirdn2d.upfirdn2d(x=x, f=fu, up=up, padding=[px0, px1, py0, py1], gain=up**2, flip_filter=flip_filter) # Upsample.
+    x = bias_act.bias_act(x=x, act='lrelu', alpha=slope, gain=gain, clamp=clamp) # Bias, leaky ReLU, clamp.
+    x = upfirdn2d.upfirdn2d(x=x, f=fd, down=down, flip_filter=flip_filter) # Downsample.
+
+    # Check output shape & dtype.
+    misc.assert_shape(x, [batch_size, channels, out_h, out_w])
+    assert x.dtype == in_dtype
+    return x
+
+#----------------------------------------------------------------------------
+
+_filtered_lrelu_cuda_cache = dict()
+
+def _filtered_lrelu_cuda(up=1, down=1, padding=0, gain=np.sqrt(2), slope=0.2, clamp=None, flip_filter=False):
+    """Fast CUDA implementation of `filtered_lrelu()` using custom ops.
+    """
+    assert isinstance(up, int) and up >= 1
+    assert isinstance(down, int) and down >= 1
+    px0, px1, py0, py1 = _parse_padding(padding)
+    assert gain == float(gain) and gain > 0
+    gain = float(gain)
+    assert slope == float(slope) and slope >= 0
+    slope = float(slope)
+    assert clamp is None or (clamp == float(clamp) and clamp >= 0)
+    clamp = float(clamp if clamp is not None else 'inf')
+
+    # Lookup from cache.
+    key = (up, down, px0, px1, py0, py1, gain, slope, clamp, flip_filter)
+    if key in _filtered_lrelu_cuda_cache:
+        return _filtered_lrelu_cuda_cache[key]
+
+    # Forward op.
+    class FilteredLReluCuda(torch.autograd.Function):
+        @staticmethod
+        def forward(ctx, x, fu, fd, b, si, sx, sy): # pylint: disable=arguments-differ
+            assert isinstance(x, torch.Tensor) and x.ndim == 4
+
+            # Replace empty up/downsample kernels with full 1x1 kernels (faster than separable).
+            if fu is None:
+                fu = torch.ones([1, 1], dtype=torch.float32, device=x.device)
+            if fd is None:
+                fd = torch.ones([1, 1], dtype=torch.float32, device=x.device)
+            assert 1 <= fu.ndim <= 2
+            assert 1 <= fd.ndim <= 2
+
+            # Replace separable 1x1 kernels with full 1x1 kernels when scale factor is 1.
+            if up == 1 and fu.ndim == 1 and fu.shape[0] == 1:
+                fu = fu.square()[None]
+            if down == 1 and fd.ndim == 1 and fd.shape[0] == 1:
+                fd = fd.square()[None]
+
+            # Missing sign input tensor.
+            if si is None:
+                si = torch.empty([0])
+
+            # Missing bias tensor.
+            if b is None:
+                b = torch.zeros([x.shape[1]], dtype=x.dtype, device=x.device)
+
+            # Construct internal sign tensor only if gradients are needed.
+            write_signs = (si.numel() == 0) and (x.requires_grad or b.requires_grad)
+
+            # Warn if input storage strides are not in decreasing order due to e.g. channels-last layout.
+            strides = [x.stride(i) for i in range(x.ndim) if x.size(i) > 1]
+            if any(a < b for a, b in zip(strides[:-1], strides[1:])):
+                warnings.warn("low-performance memory layout detected in filtered_lrelu input", RuntimeWarning)
+
+            # Call C++/Cuda plugin if datatype is supported.
+            if x.dtype in [torch.float16, torch.float32]:
+                if torch.cuda.current_stream(x.device) != torch.cuda.default_stream(x.device):
+                    warnings.warn("filtered_lrelu called with non-default cuda stream but concurrent execution is not supported", RuntimeWarning)
+                y, so, return_code = _plugin.filtered_lrelu(x, fu, fd, b, si, up, down, px0, px1, py0, py1, sx, sy, gain, slope, clamp, flip_filter, write_signs)
+            else:
+                return_code = -1
+
+            # No Cuda kernel found? Fall back to generic implementation. Still more memory efficient than the reference implementation because
+            # only the bit-packed sign tensor is retained for gradient computation.
+            if return_code < 0:
+                warnings.warn("filtered_lrelu called with parameters that have no optimized CUDA kernel, using generic fallback", RuntimeWarning)
+
+                y = x.add(b.unsqueeze(-1).unsqueeze(-1)) # Add bias.
+                y = upfirdn2d.upfirdn2d(x=y, f=fu, up=up, padding=[px0, px1, py0, py1], gain=up**2, flip_filter=flip_filter) # Upsample.
+                so = _plugin.filtered_lrelu_act_(y, si, sx, sy, gain, slope, clamp, write_signs) # Activation function and sign handling. Modifies y in-place.
+                y = upfirdn2d.upfirdn2d(x=y, f=fd, down=down, flip_filter=flip_filter) # Downsample.
+
+            # Prepare for gradient computation.
+            ctx.save_for_backward(fu, fd, (si if si.numel() else so))
+            ctx.x_shape = x.shape
+            ctx.y_shape = y.shape
+            ctx.s_ofs = sx, sy
+            return y
+
+        @staticmethod
+        def backward(ctx, dy): # pylint: disable=arguments-differ
+            fu, fd, si = ctx.saved_tensors
+            _, _, xh, xw = ctx.x_shape
+            _, _, yh, yw = ctx.y_shape
+            sx, sy = ctx.s_ofs
+            dx  = None # 0
+            dfu = None; assert not ctx.needs_input_grad[1]
+            dfd = None; assert not ctx.needs_input_grad[2]
+            db  = None # 3
+            dsi = None; assert not ctx.needs_input_grad[4]
+            dsx = None; assert not ctx.needs_input_grad[5]
+            dsy = None; assert not ctx.needs_input_grad[6]
+
+            if ctx.needs_input_grad[0] or ctx.needs_input_grad[3]:
+                pp = [
+                    (fu.shape[-1] - 1) + (fd.shape[-1] - 1) - px0,
+                    xw * up - yw * down + px0 - (up - 1),
+                    (fu.shape[0] - 1) + (fd.shape[0] - 1) - py0,
+                    xh * up - yh * down + py0 - (up - 1),
+                ]
+                gg = gain * (up ** 2) / (down ** 2)
+                ff = (not flip_filter)
+                sx = sx - (fu.shape[-1] - 1) + px0
+                sy = sy - (fu.shape[0]  - 1) + py0
+                dx = _filtered_lrelu_cuda(up=down, down=up, padding=pp, gain=gg, slope=slope, clamp=None, flip_filter=ff).apply(dy, fd, fu, None, si, sx, sy)
+
+            if ctx.needs_input_grad[3]:
+                db = dx.sum([0, 2, 3])
+
+            return dx, dfu, dfd, db, dsi, dsx, dsy
+
+    # Add to cache.
+    _filtered_lrelu_cuda_cache[key] = FilteredLReluCuda
+    return FilteredLReluCuda
+
+#----------------------------------------------------------------------------
diff --git a/modules/eg3ds/torch_utils/ops/filtered_lrelu_ns.cu b/modules/eg3ds/torch_utils/ops/filtered_lrelu_ns.cu
new file mode 100644
index 0000000000000000000000000000000000000000..8a3eae46215c3babea2c54e3ae255b05f4d777af
--- /dev/null
+++ b/modules/eg3ds/torch_utils/ops/filtered_lrelu_ns.cu
@@ -0,0 +1,31 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: LicenseRef-NvidiaProprietary
+ *
+ * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+ * property and proprietary rights in and to this material, related
+ * documentation and any modifications thereto. Any use, reproduction,
+ * disclosure or distribution of this material and related documentation
+ * without an express license agreement from NVIDIA CORPORATION or
+ * its affiliates is strictly prohibited.
+ */
+
+#include "filtered_lrelu.cu"
+
+// Template/kernel specializations for no signs mode (no gradients required).
+
+// Full op, 32-bit indexing.
+template filtered_lrelu_kernel_spec choose_filtered_lrelu_kernel<c10::Half, int32_t, false, false>(const filtered_lrelu_kernel_params& p, int sharedKB);
+template filtered_lrelu_kernel_spec choose_filtered_lrelu_kernel<float,     int32_t, false, false>(const filtered_lrelu_kernel_params& p, int sharedKB);
+
+// Full op, 64-bit indexing.
+template filtered_lrelu_kernel_spec choose_filtered_lrelu_kernel<c10::Half, int64_t, false, false>(const filtered_lrelu_kernel_params& p, int sharedKB);
+template filtered_lrelu_kernel_spec choose_filtered_lrelu_kernel<float,     int64_t, false, false>(const filtered_lrelu_kernel_params& p, int sharedKB);
+
+// Activation/signs only for generic variant. 64-bit indexing.
+template void* choose_filtered_lrelu_act_kernel<c10::Half, false, false>(void);
+template void* choose_filtered_lrelu_act_kernel<float,     false, false>(void);
+template void* choose_filtered_lrelu_act_kernel<double,    false, false>(void);
+
+// Copy filters to constant memory.
+template cudaError_t copy_filters<false, false>(cudaStream_t stream);
diff --git a/modules/eg3ds/torch_utils/ops/filtered_lrelu_rd.cu b/modules/eg3ds/torch_utils/ops/filtered_lrelu_rd.cu
new file mode 100644
index 0000000000000000000000000000000000000000..3cd43ec0648d3db05e5808299fc0ee318e5ceaa6
--- /dev/null
+++ b/modules/eg3ds/torch_utils/ops/filtered_lrelu_rd.cu
@@ -0,0 +1,31 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: LicenseRef-NvidiaProprietary
+ *
+ * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+ * property and proprietary rights in and to this material, related
+ * documentation and any modifications thereto. Any use, reproduction,
+ * disclosure or distribution of this material and related documentation
+ * without an express license agreement from NVIDIA CORPORATION or
+ * its affiliates is strictly prohibited.
+ */
+
+#include "filtered_lrelu.cu"
+
+// Template/kernel specializations for sign read mode.
+
+// Full op, 32-bit indexing.
+template filtered_lrelu_kernel_spec choose_filtered_lrelu_kernel<c10::Half, int32_t, false, true>(const filtered_lrelu_kernel_params& p, int sharedKB);
+template filtered_lrelu_kernel_spec choose_filtered_lrelu_kernel<float,     int32_t, false, true>(const filtered_lrelu_kernel_params& p, int sharedKB);
+
+// Full op, 64-bit indexing.
+template filtered_lrelu_kernel_spec choose_filtered_lrelu_kernel<c10::Half, int64_t, false, true>(const filtered_lrelu_kernel_params& p, int sharedKB);
+template filtered_lrelu_kernel_spec choose_filtered_lrelu_kernel<float,     int64_t, false, true>(const filtered_lrelu_kernel_params& p, int sharedKB);
+
+// Activation/signs only for generic variant. 64-bit indexing.
+template void* choose_filtered_lrelu_act_kernel<c10::Half, false, true>(void);
+template void* choose_filtered_lrelu_act_kernel<float,     false, true>(void);
+template void* choose_filtered_lrelu_act_kernel<double,    false, true>(void);
+
+// Copy filters to constant memory.
+template cudaError_t copy_filters<false, true>(cudaStream_t stream);
diff --git a/modules/eg3ds/torch_utils/ops/filtered_lrelu_wr.cu b/modules/eg3ds/torch_utils/ops/filtered_lrelu_wr.cu
new file mode 100644
index 0000000000000000000000000000000000000000..bc2fa06912eb703dd77ca64533208428bdf373ac
--- /dev/null
+++ b/modules/eg3ds/torch_utils/ops/filtered_lrelu_wr.cu
@@ -0,0 +1,31 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: LicenseRef-NvidiaProprietary
+ *
+ * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+ * property and proprietary rights in and to this material, related
+ * documentation and any modifications thereto. Any use, reproduction,
+ * disclosure or distribution of this material and related documentation
+ * without an express license agreement from NVIDIA CORPORATION or
+ * its affiliates is strictly prohibited.
+ */
+
+#include "filtered_lrelu.cu"
+
+// Template/kernel specializations for sign write mode.
+
+// Full op, 32-bit indexing.
+template filtered_lrelu_kernel_spec choose_filtered_lrelu_kernel<c10::Half, int32_t, true, false>(const filtered_lrelu_kernel_params& p, int sharedKB);
+template filtered_lrelu_kernel_spec choose_filtered_lrelu_kernel<float,     int32_t, true, false>(const filtered_lrelu_kernel_params& p, int sharedKB);
+
+// Full op, 64-bit indexing.
+template filtered_lrelu_kernel_spec choose_filtered_lrelu_kernel<c10::Half, int64_t, true, false>(const filtered_lrelu_kernel_params& p, int sharedKB);
+template filtered_lrelu_kernel_spec choose_filtered_lrelu_kernel<float,     int64_t, true, false>(const filtered_lrelu_kernel_params& p, int sharedKB);
+
+// Activation/signs only for generic variant. 64-bit indexing.
+template void* choose_filtered_lrelu_act_kernel<c10::Half, true, false>(void);
+template void* choose_filtered_lrelu_act_kernel<float,     true, false>(void);
+template void* choose_filtered_lrelu_act_kernel<double,    true, false>(void);
+
+// Copy filters to constant memory.
+template cudaError_t copy_filters<true, false>(cudaStream_t stream);
diff --git a/modules/eg3ds/torch_utils/ops/fma.py b/modules/eg3ds/torch_utils/ops/fma.py
new file mode 100644
index 0000000000000000000000000000000000000000..5458116d0b6f8b133608456bbe9003aa0283ac85
--- /dev/null
+++ b/modules/eg3ds/torch_utils/ops/fma.py
@@ -0,0 +1,62 @@
+# SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: LicenseRef-NvidiaProprietary
+#
+# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+# property and proprietary rights in and to this material, related
+# documentation and any modifications thereto. Any use, reproduction,
+# disclosure or distribution of this material and related documentation
+# without an express license agreement from NVIDIA CORPORATION or
+# its affiliates is strictly prohibited.
+
+"""Fused multiply-add, with slightly faster gradients than `torch.addcmul()`."""
+
+import torch
+
+#----------------------------------------------------------------------------
+
+def fma(a, b, c): # => a * b + c
+    return _FusedMultiplyAdd.apply(a, b, c)
+
+#----------------------------------------------------------------------------
+
+class _FusedMultiplyAdd(torch.autograd.Function): # a * b + c
+    @staticmethod
+    def forward(ctx, a, b, c): # pylint: disable=arguments-differ
+        out = torch.addcmul(c, a, b)
+        ctx.save_for_backward(a, b)
+        ctx.c_shape = c.shape
+        return out
+
+    @staticmethod
+    def backward(ctx, dout): # pylint: disable=arguments-differ
+        a, b = ctx.saved_tensors
+        c_shape = ctx.c_shape
+        da = None
+        db = None
+        dc = None
+
+        if ctx.needs_input_grad[0]:
+            da = _unbroadcast(dout * b, a.shape)
+
+        if ctx.needs_input_grad[1]:
+            db = _unbroadcast(dout * a, b.shape)
+
+        if ctx.needs_input_grad[2]:
+            dc = _unbroadcast(dout, c_shape)
+
+        return da, db, dc
+
+#----------------------------------------------------------------------------
+
+def _unbroadcast(x, shape):
+    extra_dims = x.ndim - len(shape)
+    assert extra_dims >= 0
+    dim = [i for i in range(x.ndim) if x.shape[i] > 1 and (i < extra_dims or shape[i - extra_dims] == 1)]
+    if len(dim):
+        x = x.sum(dim=dim, keepdim=True)
+    if extra_dims:
+        x = x.reshape(-1, *x.shape[extra_dims+1:])
+    assert x.shape == shape
+    return x
+
+#----------------------------------------------------------------------------
diff --git a/modules/eg3ds/torch_utils/ops/grid_sample_gradfix.py b/modules/eg3ds/torch_utils/ops/grid_sample_gradfix.py
new file mode 100644
index 0000000000000000000000000000000000000000..35d94724136ba162d8416803b1ad00d6da0db99f
--- /dev/null
+++ b/modules/eg3ds/torch_utils/ops/grid_sample_gradfix.py
@@ -0,0 +1,79 @@
+# SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: LicenseRef-NvidiaProprietary
+#
+# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+# property and proprietary rights in and to this material, related
+# documentation and any modifications thereto. Any use, reproduction,
+# disclosure or distribution of this material and related documentation
+# without an express license agreement from NVIDIA CORPORATION or
+# its affiliates is strictly prohibited.
+
+"""Custom replacement for `torch.nn.functional.grid_sample` that
+supports arbitrarily high order gradients between the input and output.
+Only works on 2D images and assumes
+`mode='bilinear'`, `padding_mode='zeros'`, `align_corners=False`."""
+
+import torch
+
+# pylint: disable=redefined-builtin
+# pylint: disable=arguments-differ
+# pylint: disable=protected-access
+
+#----------------------------------------------------------------------------
+
+enabled = False  # Enable the custom op by setting this to true.
+
+#----------------------------------------------------------------------------
+
+def grid_sample(input, grid):
+    if _should_use_custom_op():
+        return _GridSample2dForward.apply(input, grid)
+    return torch.nn.functional.grid_sample(input=input, grid=grid, mode='bilinear', padding_mode='zeros', align_corners=False)
+
+#----------------------------------------------------------------------------
+
+def _should_use_custom_op():
+    return enabled
+
+#----------------------------------------------------------------------------
+
+class _GridSample2dForward(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, input, grid):
+        assert input.ndim == 4
+        assert grid.ndim == 4
+        output = torch.nn.functional.grid_sample(input=input, grid=grid, mode='bilinear', padding_mode='zeros', align_corners=False)
+        ctx.save_for_backward(input, grid)
+        return output
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        input, grid = ctx.saved_tensors
+        grad_input, grad_grid = _GridSample2dBackward.apply(grad_output, input, grid)
+        return grad_input, grad_grid
+
+#----------------------------------------------------------------------------
+
+class _GridSample2dBackward(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, grad_output, input, grid):
+        op = torch._C._jit_get_operation('aten::grid_sampler_2d_backward')
+        grad_input, grad_grid = op(grad_output, input, grid, 0, 0, False)
+        ctx.save_for_backward(grid)
+        return grad_input, grad_grid
+
+    @staticmethod
+    def backward(ctx, grad2_grad_input, grad2_grad_grid):
+        _ = grad2_grad_grid # unused
+        grid, = ctx.saved_tensors
+        grad2_grad_output = None
+        grad2_input = None
+        grad2_grid = None
+
+        if ctx.needs_input_grad[0]:
+            grad2_grad_output = _GridSample2dForward.apply(grad2_grad_input, grid)
+
+        assert not ctx.needs_input_grad[2]
+        return grad2_grad_output, grad2_input, grad2_grid
+
+#----------------------------------------------------------------------------
diff --git a/modules/eg3ds/torch_utils/ops/upfirdn2d.cpp b/modules/eg3ds/torch_utils/ops/upfirdn2d.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..c1769c3cbe4dd04f76f9ccef726680720e6f39c8
--- /dev/null
+++ b/modules/eg3ds/torch_utils/ops/upfirdn2d.cpp
@@ -0,0 +1,111 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: LicenseRef-NvidiaProprietary
+ *
+ * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+ * property and proprietary rights in and to this material, related
+ * documentation and any modifications thereto. Any use, reproduction,
+ * disclosure or distribution of this material and related documentation
+ * without an express license agreement from NVIDIA CORPORATION or
+ * its affiliates is strictly prohibited.
+ */
+
+#include <torch/extension.h>
+#include <ATen/cuda/CUDAContext.h>
+#include <c10/cuda/CUDAGuard.h>
+#include "upfirdn2d.h"
+
+//------------------------------------------------------------------------
+
+static torch::Tensor upfirdn2d(torch::Tensor x, torch::Tensor f, int upx, int upy, int downx, int downy, int padx0, int padx1, int pady0, int pady1, bool flip, float gain)
+{
+    // Validate arguments.
+    TORCH_CHECK(x.is_cuda(), "x must reside on CUDA device");
+    TORCH_CHECK(f.device() == x.device(), "f must reside on the same device as x");
+    TORCH_CHECK(f.dtype() == torch::kFloat, "f must be float32");
+    TORCH_CHECK(x.numel() <= INT_MAX, "x is too large");
+    TORCH_CHECK(f.numel() <= INT_MAX, "f is too large");
+    TORCH_CHECK(x.numel() > 0, "x has zero size");
+    TORCH_CHECK(f.numel() > 0, "f has zero size");
+    TORCH_CHECK(x.dim() == 4, "x must be rank 4");
+    TORCH_CHECK(f.dim() == 2, "f must be rank 2");
+    TORCH_CHECK((x.size(0)-1)*x.stride(0) + (x.size(1)-1)*x.stride(1) + (x.size(2)-1)*x.stride(2) + (x.size(3)-1)*x.stride(3) <= INT_MAX, "x memory footprint is too large");
+    TORCH_CHECK(f.size(0) >= 1 && f.size(1) >= 1, "f must be at least 1x1");
+    TORCH_CHECK(upx >= 1 && upy >= 1, "upsampling factor must be at least 1");
+    TORCH_CHECK(downx >= 1 && downy >= 1, "downsampling factor must be at least 1");
+
+    // Create output tensor.
+    const at::cuda::OptionalCUDAGuard device_guard(device_of(x));
+    int outW = ((int)x.size(3) * upx + padx0 + padx1 - (int)f.size(1) + downx) / downx;
+    int outH = ((int)x.size(2) * upy + pady0 + pady1 - (int)f.size(0) + downy) / downy;
+    TORCH_CHECK(outW >= 1 && outH >= 1, "output must be at least 1x1");
+    torch::Tensor y = torch::empty({x.size(0), x.size(1), outH, outW}, x.options(), x.suggest_memory_format());
+    TORCH_CHECK(y.numel() <= INT_MAX, "output is too large");
+    TORCH_CHECK((y.size(0)-1)*y.stride(0) + (y.size(1)-1)*y.stride(1) + (y.size(2)-1)*y.stride(2) + (y.size(3)-1)*y.stride(3) <= INT_MAX, "output memory footprint is too large");
+
+    // Initialize CUDA kernel parameters.
+    upfirdn2d_kernel_params p;
+    p.x             = x.data_ptr();
+    p.f             = f.data_ptr<float>();
+    p.y             = y.data_ptr();
+    p.up            = make_int2(upx, upy);
+    p.down          = make_int2(downx, downy);
+    p.pad0          = make_int2(padx0, pady0);
+    p.flip          = (flip) ? 1 : 0;
+    p.gain          = gain;
+    p.inSize        = make_int4((int)x.size(3), (int)x.size(2), (int)x.size(1), (int)x.size(0));
+    p.inStride      = make_int4((int)x.stride(3), (int)x.stride(2), (int)x.stride(1), (int)x.stride(0));
+    p.filterSize    = make_int2((int)f.size(1), (int)f.size(0));
+    p.filterStride  = make_int2((int)f.stride(1), (int)f.stride(0));
+    p.outSize       = make_int4((int)y.size(3), (int)y.size(2), (int)y.size(1), (int)y.size(0));
+    p.outStride     = make_int4((int)y.stride(3), (int)y.stride(2), (int)y.stride(1), (int)y.stride(0));
+    p.sizeMajor     = (p.inStride.z == 1) ? p.inSize.w : p.inSize.w * p.inSize.z;
+    p.sizeMinor     = (p.inStride.z == 1) ? p.inSize.z : 1;
+
+    // Choose CUDA kernel.
+    upfirdn2d_kernel_spec spec;
+    AT_DISPATCH_FLOATING_TYPES_AND_HALF(x.scalar_type(), "upfirdn2d_cuda", [&]
+    {
+        spec = choose_upfirdn2d_kernel<scalar_t>(p);
+    });
+
+    // Set looping options.
+    p.loopMajor     = (p.sizeMajor - 1) / 16384 + 1;
+    p.loopMinor     = spec.loopMinor;
+    p.loopX         = spec.loopX;
+    p.launchMinor   = (p.sizeMinor - 1) / p.loopMinor + 1;
+    p.launchMajor   = (p.sizeMajor - 1) / p.loopMajor + 1;
+
+    // Compute grid size.
+    dim3 blockSize, gridSize;
+    if (spec.tileOutW < 0) // large
+    {
+        blockSize = dim3(4, 32, 1);
+        gridSize = dim3(
+            ((p.outSize.y - 1) / blockSize.x + 1) * p.launchMinor,
+            (p.outSize.x - 1) / (blockSize.y * p.loopX) + 1,
+            p.launchMajor);
+    }
+    else // small
+    {
+        blockSize = dim3(256, 1, 1);
+        gridSize = dim3(
+            ((p.outSize.y - 1) / spec.tileOutH + 1) * p.launchMinor,
+            (p.outSize.x - 1) / (spec.tileOutW * p.loopX) + 1,
+            p.launchMajor);
+    }
+
+    // Launch CUDA kernel.
+    void* args[] = {&p};
+    AT_CUDA_CHECK(cudaLaunchKernel(spec.kernel, gridSize, blockSize, args, 0, at::cuda::getCurrentCUDAStream()));
+    return y;
+}
+
+//------------------------------------------------------------------------
+
+PYBIND11_MODULE(TORCH_EXTENSION_NAME, m)
+{
+    m.def("upfirdn2d", &upfirdn2d);
+}
+
+//------------------------------------------------------------------------
diff --git a/modules/eg3ds/torch_utils/ops/upfirdn2d.cu b/modules/eg3ds/torch_utils/ops/upfirdn2d.cu
new file mode 100644
index 0000000000000000000000000000000000000000..7d182d7b86a9058d0c007b13716d6e7f08207f42
--- /dev/null
+++ b/modules/eg3ds/torch_utils/ops/upfirdn2d.cu
@@ -0,0 +1,388 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: LicenseRef-NvidiaProprietary
+ *
+ * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+ * property and proprietary rights in and to this material, related
+ * documentation and any modifications thereto. Any use, reproduction,
+ * disclosure or distribution of this material and related documentation
+ * without an express license agreement from NVIDIA CORPORATION or
+ * its affiliates is strictly prohibited.
+ */
+
+#include <c10/util/Half.h>
+#include "upfirdn2d.h"
+
+//------------------------------------------------------------------------
+// Helpers.
+
+template <class T> struct InternalType;
+template <> struct InternalType<double>     { typedef double scalar_t; };
+template <> struct InternalType<float>      { typedef float  scalar_t; };
+template <> struct InternalType<c10::Half>  { typedef float  scalar_t; };
+
+static __device__ __forceinline__ int floor_div(int a, int b)
+{
+    int t = 1 - a / b;
+    return (a + t * b) / b - t;
+}
+
+//------------------------------------------------------------------------
+// Generic CUDA implementation for large filters.
+
+template <class T> static __global__ void upfirdn2d_kernel_large(upfirdn2d_kernel_params p)
+{
+    typedef typename InternalType<T>::scalar_t scalar_t;
+
+    // Calculate thread index.
+    int minorBase = blockIdx.x * blockDim.x + threadIdx.x;
+    int outY = minorBase / p.launchMinor;
+    minorBase -= outY * p.launchMinor;
+    int outXBase = blockIdx.y * p.loopX * blockDim.y + threadIdx.y;
+    int majorBase = blockIdx.z * p.loopMajor;
+    if (outXBase >= p.outSize.x | outY >= p.outSize.y | majorBase >= p.sizeMajor)
+        return;
+
+    // Setup Y receptive field.
+    int midY = outY * p.down.y + p.up.y - 1 - p.pad0.y;
+    int inY = min(max(floor_div(midY, p.up.y), 0), p.inSize.y);
+    int h = min(max(floor_div(midY + p.filterSize.y, p.up.y), 0), p.inSize.y) - inY;
+    int filterY = midY + p.filterSize.y - (inY + 1) * p.up.y;
+    if (p.flip)
+        filterY = p.filterSize.y - 1 - filterY;
+
+    // Loop over major, minor, and X.
+    for (int majorIdx = 0, major = majorBase; majorIdx < p.loopMajor & major < p.sizeMajor; majorIdx++, major++)
+    for (int minorIdx = 0, minor = minorBase; minorIdx < p.loopMinor & minor < p.sizeMinor; minorIdx++, minor += p.launchMinor)
+    {
+        int nc = major * p.sizeMinor + minor;
+        int n = nc / p.inSize.z;
+        int c = nc - n * p.inSize.z;
+        for (int loopX = 0, outX = outXBase; loopX < p.loopX & outX < p.outSize.x; loopX++, outX += blockDim.y)
+        {
+            // Setup X receptive field.
+            int midX = outX * p.down.x + p.up.x - 1 - p.pad0.x;
+            int inX = min(max(floor_div(midX, p.up.x), 0), p.inSize.x);
+            int w = min(max(floor_div(midX + p.filterSize.x, p.up.x), 0), p.inSize.x) - inX;
+            int filterX = midX + p.filterSize.x - (inX + 1) * p.up.x;
+            if (p.flip)
+                filterX = p.filterSize.x - 1 - filterX;
+
+            // Initialize pointers.
+            const T* xp = &((const T*)p.x)[inX * p.inStride.x + inY * p.inStride.y + c * p.inStride.z + n * p.inStride.w];
+            const float* fp = &p.f[filterX * p.filterStride.x + filterY * p.filterStride.y];
+            int filterStepX = ((p.flip) ? p.up.x : -p.up.x) * p.filterStride.x;
+            int filterStepY = ((p.flip) ? p.up.y : -p.up.y) * p.filterStride.y;
+
+            // Inner loop.
+            scalar_t v = 0;
+            for (int y = 0; y < h; y++)
+            {
+                for (int x = 0; x < w; x++)
+                {
+                    v += (scalar_t)(*xp) * (scalar_t)(*fp);
+                    xp += p.inStride.x;
+                    fp += filterStepX;
+                }
+                xp += p.inStride.y - w * p.inStride.x;
+                fp += filterStepY - w * filterStepX;
+            }
+
+            // Store result.
+            v *= p.gain;
+            ((T*)p.y)[outX * p.outStride.x + outY * p.outStride.y + c * p.outStride.z + n * p.outStride.w] = (T)v;
+        }
+    }
+}
+
+//------------------------------------------------------------------------
+// Specialized CUDA implementation for small filters.
+
+template <class T, int upx, int upy, int downx, int downy, int filterW, int filterH, int tileOutW, int tileOutH, int loopMinor>
+static __global__ void upfirdn2d_kernel_small(upfirdn2d_kernel_params p)
+{
+    typedef typename InternalType<T>::scalar_t scalar_t;
+    const int tileInW = ((tileOutW - 1) * downx + filterW - 1) / upx + 1;
+    const int tileInH = ((tileOutH - 1) * downy + filterH - 1) / upy + 1;
+    __shared__ volatile scalar_t sf[filterH][filterW];
+    __shared__ volatile scalar_t sx[tileInH][tileInW][loopMinor];
+
+    // Calculate tile index.
+    int minorBase = blockIdx.x;
+    int tileOutY = minorBase / p.launchMinor;
+    minorBase -= tileOutY * p.launchMinor;
+    minorBase *= loopMinor;
+    tileOutY *= tileOutH;
+    int tileOutXBase = blockIdx.y * p.loopX * tileOutW;
+    int majorBase = blockIdx.z * p.loopMajor;
+    if (tileOutXBase >= p.outSize.x | tileOutY >= p.outSize.y | majorBase >= p.sizeMajor)
+        return;
+
+    // Load filter (flipped).
+    for (int tapIdx = threadIdx.x; tapIdx < filterH * filterW; tapIdx += blockDim.x)
+    {
+        int fy = tapIdx / filterW;
+        int fx = tapIdx - fy * filterW;
+        scalar_t v = 0;
+        if (fx < p.filterSize.x & fy < p.filterSize.y)
+        {
+            int ffx = (p.flip) ? fx : p.filterSize.x - 1 - fx;
+            int ffy = (p.flip) ? fy : p.filterSize.y - 1 - fy;
+            v = (scalar_t)p.f[ffx * p.filterStride.x + ffy * p.filterStride.y];
+        }
+        sf[fy][fx] = v;
+    }
+
+    // Loop over major and X.
+    for (int majorIdx = 0, major = majorBase; majorIdx < p.loopMajor & major < p.sizeMajor; majorIdx++, major++)
+    {
+        int baseNC = major * p.sizeMinor + minorBase;
+        int n = baseNC / p.inSize.z;
+        int baseC = baseNC - n * p.inSize.z;
+        for (int loopX = 0, tileOutX = tileOutXBase; loopX < p.loopX & tileOutX < p.outSize.x; loopX++, tileOutX += tileOutW)
+        {
+            // Load input pixels.
+            int tileMidX = tileOutX * downx + upx - 1 - p.pad0.x;
+            int tileMidY = tileOutY * downy + upy - 1 - p.pad0.y;
+            int tileInX = floor_div(tileMidX, upx);
+            int tileInY = floor_div(tileMidY, upy);
+            __syncthreads();
+            for (int inIdx = threadIdx.x; inIdx < tileInH * tileInW * loopMinor; inIdx += blockDim.x)
+            {
+                int relC = inIdx;
+                int relInX = relC / loopMinor;
+                int relInY = relInX / tileInW;
+                relC -= relInX * loopMinor;
+                relInX -= relInY * tileInW;
+                int c = baseC + relC;
+                int inX = tileInX + relInX;
+                int inY = tileInY + relInY;
+                scalar_t v = 0;
+                if (inX >= 0 & inY >= 0 & inX < p.inSize.x & inY < p.inSize.y & c < p.inSize.z)
+                    v = (scalar_t)((const T*)p.x)[inX * p.inStride.x + inY * p.inStride.y + c * p.inStride.z + n * p.inStride.w];
+                sx[relInY][relInX][relC] = v;
+            }
+
+            // Loop over output pixels.
+            __syncthreads();
+            for (int outIdx = threadIdx.x; outIdx < tileOutH * tileOutW * loopMinor; outIdx += blockDim.x)
+            {
+                int relC = outIdx;
+                int relOutX = relC / loopMinor;
+                int relOutY = relOutX / tileOutW;
+                relC -= relOutX * loopMinor;
+                relOutX -= relOutY * tileOutW;
+                int c = baseC + relC;
+                int outX = tileOutX + relOutX;
+                int outY = tileOutY + relOutY;
+
+                // Setup receptive field.
+                int midX = tileMidX + relOutX * downx;
+                int midY = tileMidY + relOutY * downy;
+                int inX = floor_div(midX, upx);
+                int inY = floor_div(midY, upy);
+                int relInX = inX - tileInX;
+                int relInY = inY - tileInY;
+                int filterX = (inX + 1) * upx - midX - 1; // flipped
+                int filterY = (inY + 1) * upy - midY - 1; // flipped
+
+                // Inner loop.
+                if (outX < p.outSize.x & outY < p.outSize.y & c < p.outSize.z)
+                {
+                    scalar_t v = 0;
+                    #pragma unroll
+                    for (int y = 0; y < filterH / upy; y++)
+                        #pragma unroll
+                        for (int x = 0; x < filterW / upx; x++)
+                            v += sx[relInY + y][relInX + x][relC] * sf[filterY + y * upy][filterX + x * upx];
+                    v *= p.gain;
+                    ((T*)p.y)[outX * p.outStride.x + outY * p.outStride.y + c * p.outStride.z + n * p.outStride.w] = (T)v;
+                }
+            }
+        }
+    }
+}
+
+//------------------------------------------------------------------------
+// CUDA kernel selection.
+
+template <class T> upfirdn2d_kernel_spec choose_upfirdn2d_kernel(const upfirdn2d_kernel_params& p)
+{
+    int s = p.inStride.z, fx = p.filterSize.x, fy = p.filterSize.y;
+    upfirdn2d_kernel_spec spec = {(void*)upfirdn2d_kernel_large<T>, -1,-1,1, 4}; // contiguous
+    if (s == 1)           spec = {(void*)upfirdn2d_kernel_large<T>, -1,-1,4, 1}; // channels_last
+
+    // No up/downsampling.
+    if (p.up.x == 1 && p.up.y == 1 && p.down.x == 1 && p.down.y == 1)
+    {
+        // contiguous
+        if (s != 1 && fx <= 24 && fy <= 24) spec = {(void*)upfirdn2d_kernel_small<T, 1,1, 1,1, 24,24, 64,32,1>, 64,32,1, 1};
+        if (s != 1 && fx <= 16 && fy <= 16) spec = {(void*)upfirdn2d_kernel_small<T, 1,1, 1,1, 16,16, 64,32,1>, 64,32,1, 1};
+        if (s != 1 && fx <= 7  && fy <= 7 ) spec = {(void*)upfirdn2d_kernel_small<T, 1,1, 1,1, 7,7,   64,16,1>, 64,16,1, 1};
+        if (s != 1 && fx <= 6  && fy <= 6 ) spec = {(void*)upfirdn2d_kernel_small<T, 1,1, 1,1, 6,6,   64,16,1>, 64,16,1, 1};
+        if (s != 1 && fx <= 5  && fy <= 5 ) spec = {(void*)upfirdn2d_kernel_small<T, 1,1, 1,1, 5,5,   64,16,1>, 64,16,1, 1};
+        if (s != 1 && fx <= 4  && fy <= 4 ) spec = {(void*)upfirdn2d_kernel_small<T, 1,1, 1,1, 4,4,   64,16,1>, 64,16,1, 1};
+        if (s != 1 && fx <= 3  && fy <= 3 ) spec = {(void*)upfirdn2d_kernel_small<T, 1,1, 1,1, 3,3,   64,16,1>, 64,16,1, 1};
+        if (s != 1 && fx <= 24 && fy <= 1 ) spec = {(void*)upfirdn2d_kernel_small<T, 1,1, 1,1, 24,1,  128,8,1>, 128,8,1, 1};
+        if (s != 1 && fx <= 16 && fy <= 1 ) spec = {(void*)upfirdn2d_kernel_small<T, 1,1, 1,1, 16,1,  128,8,1>, 128,8,1, 1};
+        if (s != 1 && fx <= 8  && fy <= 1 ) spec = {(void*)upfirdn2d_kernel_small<T, 1,1, 1,1, 8,1,   128,8,1>, 128,8,1, 1};
+        if (s != 1 && fx <= 1  && fy <= 24) spec = {(void*)upfirdn2d_kernel_small<T, 1,1, 1,1, 1,24,  32,32,1>, 32,32,1, 1};
+        if (s != 1 && fx <= 1  && fy <= 16) spec = {(void*)upfirdn2d_kernel_small<T, 1,1, 1,1, 1,16,  32,32,1>, 32,32,1, 1};
+        if (s != 1 && fx <= 1  && fy <= 8 ) spec = {(void*)upfirdn2d_kernel_small<T, 1,1, 1,1, 1,8,   32,32,1>, 32,32,1, 1};
+        // channels_last
+        if (s == 1 && fx <= 24 && fy <= 24) spec = {(void*)upfirdn2d_kernel_small<T, 1,1, 1,1, 24,24, 32,32,1>,  32,32,1,  1};
+        if (s == 1 && fx <= 16 && fy <= 16) spec = {(void*)upfirdn2d_kernel_small<T, 1,1, 1,1, 16,16, 32,32,1>,  32,32,1,  1};
+        if (s == 1 && fx <= 7  && fy <= 7 ) spec = {(void*)upfirdn2d_kernel_small<T, 1,1, 1,1, 7,7,   16,16,8>,  16,16,8,  1};
+        if (s == 1 && fx <= 6  && fy <= 6 ) spec = {(void*)upfirdn2d_kernel_small<T, 1,1, 1,1, 6,6,   16,16,8>,  16,16,8,  1};
+        if (s == 1 && fx <= 5  && fy <= 5 ) spec = {(void*)upfirdn2d_kernel_small<T, 1,1, 1,1, 5,5,   16,16,8>,  16,16,8,  1};
+        if (s == 1 && fx <= 4  && fy <= 4 ) spec = {(void*)upfirdn2d_kernel_small<T, 1,1, 1,1, 4,4,   16,16,8>,  16,16,8,  1};
+        if (s == 1 && fx <= 3  && fy <= 3 ) spec = {(void*)upfirdn2d_kernel_small<T, 1,1, 1,1, 3,3,   16,16,8>,  16,16,8,  1};
+        if (s == 1 && fx <= 24 && fy <= 1 ) spec = {(void*)upfirdn2d_kernel_small<T, 1,1, 1,1, 24,1,  128,1,16>, 128,1,16, 1};
+        if (s == 1 && fx <= 16 && fy <= 1 ) spec = {(void*)upfirdn2d_kernel_small<T, 1,1, 1,1, 16,1,  128,1,16>, 128,1,16, 1};
+        if (s == 1 && fx <= 8  && fy <= 1 ) spec = {(void*)upfirdn2d_kernel_small<T, 1,1, 1,1, 8,1,   128,1,16>, 128,1,16, 1};
+        if (s == 1 && fx <= 1  && fy <= 24) spec = {(void*)upfirdn2d_kernel_small<T, 1,1, 1,1, 1,24,  1,128,16>, 1,128,16, 1};
+        if (s == 1 && fx <= 1  && fy <= 16) spec = {(void*)upfirdn2d_kernel_small<T, 1,1, 1,1, 1,16,  1,128,16>, 1,128,16, 1};
+        if (s == 1 && fx <= 1  && fy <= 8 ) spec = {(void*)upfirdn2d_kernel_small<T, 1,1, 1,1, 1,8,   1,128,16>, 1,128,16, 1};
+    }
+
+    // 2x upsampling.
+    if (p.up.x == 2 && p.up.y == 2 && p.down.x == 1 && p.down.y == 1)
+    {
+        // contiguous
+        if (s != 1 && fx <= 24 && fy <= 24) spec = {(void*)upfirdn2d_kernel_small<T, 2,2, 1,1, 24,24, 64,32,1>, 64,32,1, 1};
+        if (s != 1 && fx <= 16 && fy <= 16) spec = {(void*)upfirdn2d_kernel_small<T, 2,2, 1,1, 16,16, 64,32,1>, 64,32,1, 1};
+        if (s != 1 && fx <= 8  && fy <= 8 ) spec = {(void*)upfirdn2d_kernel_small<T, 2,2, 1,1, 8,8,   64,16,1>, 64,16,1, 1};
+        if (s != 1 && fx <= 6  && fy <= 6 ) spec = {(void*)upfirdn2d_kernel_small<T, 2,2, 1,1, 6,6,   64,16,1>, 64,16,1, 1};
+        if (s != 1 && fx <= 4  && fy <= 4 ) spec = {(void*)upfirdn2d_kernel_small<T, 2,2, 1,1, 4,4,   64,16,1>, 64,16,1, 1};
+        if (s != 1 && fx <= 2  && fy <= 2 ) spec = {(void*)upfirdn2d_kernel_small<T, 2,2, 1,1, 2,2,   64,16,1>, 64,16,1, 1};
+        // channels_last
+        if (s == 1 && fx <= 24 && fy <= 24) spec = {(void*)upfirdn2d_kernel_small<T, 2,2, 1,1, 24,24, 32,32,1>, 32,32,1, 1};
+        if (s == 1 && fx <= 16 && fy <= 16) spec = {(void*)upfirdn2d_kernel_small<T, 2,2, 1,1, 16,16, 32,32,1>, 32,32,1, 1};
+        if (s == 1 && fx <= 8  && fy <= 8 ) spec = {(void*)upfirdn2d_kernel_small<T, 2,2, 1,1, 8,8,   16,16,8>, 16,16,8, 1};
+        if (s == 1 && fx <= 6  && fy <= 6 ) spec = {(void*)upfirdn2d_kernel_small<T, 2,2, 1,1, 6,6,   16,16,8>, 16,16,8, 1};
+        if (s == 1 && fx <= 4  && fy <= 4 ) spec = {(void*)upfirdn2d_kernel_small<T, 2,2, 1,1, 4,4,   16,16,8>, 16,16,8, 1};
+        if (s == 1 && fx <= 2  && fy <= 2 ) spec = {(void*)upfirdn2d_kernel_small<T, 2,2, 1,1, 2,2,   16,16,8>, 16,16,8, 1};
+    }
+    if (p.up.x == 2 && p.up.y == 1 && p.down.x == 1 && p.down.y == 1)
+    {
+        // contiguous
+        if (s != 1 && fx <= 24 && fy <= 1) spec = {(void*)upfirdn2d_kernel_small<T, 2,1, 1,1, 24,1, 128,8,1>, 128,8,1, 1};
+        if (s != 1 && fx <= 16 && fy <= 1) spec = {(void*)upfirdn2d_kernel_small<T, 2,1, 1,1, 16,1, 128,8,1>, 128,8,1, 1};
+        if (s != 1 && fx <= 8  && fy <= 1) spec = {(void*)upfirdn2d_kernel_small<T, 2,1, 1,1, 8,1,  128,8,1>, 128,8,1, 1};
+        // channels_last
+        if (s == 1 && fx <= 24 && fy <= 1) spec = {(void*)upfirdn2d_kernel_small<T, 2,1, 1,1, 24,1, 128,1,16>, 128,1,16, 1};
+        if (s == 1 && fx <= 16 && fy <= 1) spec = {(void*)upfirdn2d_kernel_small<T, 2,1, 1,1, 16,1, 128,1,16>, 128,1,16, 1};
+        if (s == 1 && fx <= 8  && fy <= 1) spec = {(void*)upfirdn2d_kernel_small<T, 2,1, 1,1, 8,1,  128,1,16>, 128,1,16, 1};
+    }
+    if (p.up.x == 1 && p.up.y == 2 && p.down.x == 1 && p.down.y == 1)
+    {
+        // contiguous
+        if (s != 1 && fx <= 1 && fy <= 24) spec = {(void*)upfirdn2d_kernel_small<T, 1,2, 1,1, 1,24, 32,32,1>, 32,32,1, 1};
+        if (s != 1 && fx <= 1 && fy <= 16) spec = {(void*)upfirdn2d_kernel_small<T, 1,2, 1,1, 1,16, 32,32,1>, 32,32,1, 1};
+        if (s != 1 && fx <= 1 && fy <= 8 ) spec = {(void*)upfirdn2d_kernel_small<T, 1,2, 1,1, 1,8,  32,32,1>, 32,32,1, 1};
+        // channels_last
+        if (s == 1 && fx <= 1 && fy <= 24) spec = {(void*)upfirdn2d_kernel_small<T, 1,2, 1,1, 1,24, 1,128,16>, 1,128,16, 1};
+        if (s == 1 && fx <= 1 && fy <= 16) spec = {(void*)upfirdn2d_kernel_small<T, 1,2, 1,1, 1,16, 1,128,16>, 1,128,16, 1};
+        if (s == 1 && fx <= 1 && fy <= 8 ) spec = {(void*)upfirdn2d_kernel_small<T, 1,2, 1,1, 1,8,  1,128,16>, 1,128,16, 1};
+    }
+
+    // 2x downsampling.
+    if (p.up.x == 1 && p.up.y == 1 && p.down.x == 2 && p.down.y == 2)
+    {
+        // contiguous
+        if (s != 1 && fx <= 24 && fy <= 24) spec = {(void*)upfirdn2d_kernel_small<T, 1,1, 2,2, 24,24, 32,16,1>, 32,16,1, 1};
+        if (s != 1 && fx <= 16 && fy <= 16) spec = {(void*)upfirdn2d_kernel_small<T, 1,1, 2,2, 16,16, 32,16,1>, 32,16,1, 1};
+        if (s != 1 && fx <= 8  && fy <= 8 ) spec = {(void*)upfirdn2d_kernel_small<T, 1,1, 2,2, 8,8,   32,8,1>,  32,8,1,  1};
+        if (s != 1 && fx <= 6  && fy <= 6 ) spec = {(void*)upfirdn2d_kernel_small<T, 1,1, 2,2, 6,6,   32,8,1>,  32,8,1,  1};
+        if (s != 1 && fx <= 4  && fy <= 4 ) spec = {(void*)upfirdn2d_kernel_small<T, 1,1, 2,2, 4,4,   32,8,1>,  32,8,1,  1};
+        if (s != 1 && fx <= 2  && fy <= 2 ) spec = {(void*)upfirdn2d_kernel_small<T, 1,1, 2,2, 2,2,   32,8,1>,  32,8,1,  1};
+        // channels_last
+        if (s == 1 && fx <= 24 && fy <= 24) spec = {(void*)upfirdn2d_kernel_small<T, 1,1, 2,2, 24,24, 16,16,1>, 16,16,1, 1};
+        if (s == 1 && fx <= 16 && fy <= 16) spec = {(void*)upfirdn2d_kernel_small<T, 1,1, 2,2, 16,16, 16,16,1>, 16,16,1, 1};
+        if (s == 1 && fx <= 8  && fy <= 8 ) spec = {(void*)upfirdn2d_kernel_small<T, 1,1, 2,2, 8,8,   8,8,8>,   8,8,8,   1};
+        if (s == 1 && fx <= 6  && fy <= 6 ) spec = {(void*)upfirdn2d_kernel_small<T, 1,1, 2,2, 6,6,   8,8,8>,   8,8,8,   1};
+        if (s == 1 && fx <= 4  && fy <= 4 ) spec = {(void*)upfirdn2d_kernel_small<T, 1,1, 2,2, 4,4,   8,8,8>,   8,8,8,   1};
+        if (s == 1 && fx <= 2  && fy <= 2 ) spec = {(void*)upfirdn2d_kernel_small<T, 1,1, 2,2, 2,2,   8,8,8>,   8,8,8,   1};
+    }
+    if (p.up.x == 1 && p.up.y == 1 && p.down.x == 2 && p.down.y == 1)
+    {
+        // contiguous
+        if (s != 1 && fx <= 24 && fy <= 1) spec = {(void*)upfirdn2d_kernel_small<T, 1,1, 2,1, 24,1, 64,8,1>, 64,8,1, 1};
+        if (s != 1 && fx <= 16 && fy <= 1) spec = {(void*)upfirdn2d_kernel_small<T, 1,1, 2,1, 16,1, 64,8,1>, 64,8,1, 1};
+        if (s != 1 && fx <= 8  && fy <= 1) spec = {(void*)upfirdn2d_kernel_small<T, 1,1, 2,1, 8,1,  64,8,1>, 64,8,1, 1};
+        // channels_last
+        if (s == 1 && fx <= 24 && fy <= 1) spec = {(void*)upfirdn2d_kernel_small<T, 1,1, 2,1, 24,1, 64,1,8>, 64,1,8, 1};
+        if (s == 1 && fx <= 16 && fy <= 1) spec = {(void*)upfirdn2d_kernel_small<T, 1,1, 2,1, 16,1, 64,1,8>, 64,1,8, 1};
+        if (s == 1 && fx <= 8  && fy <= 1) spec = {(void*)upfirdn2d_kernel_small<T, 1,1, 2,1, 8,1,  64,1,8>, 64,1,8, 1};
+    }
+    if (p.up.x == 1 && p.up.y == 1 && p.down.x == 1 && p.down.y == 2)
+    {
+        // contiguous
+        if (s != 1 && fx <= 1 && fy <= 24) spec = {(void*)upfirdn2d_kernel_small<T, 1,1, 1,2, 1,24, 32,16,1>, 32,16,1, 1};
+        if (s != 1 && fx <= 1 && fy <= 16) spec = {(void*)upfirdn2d_kernel_small<T, 1,1, 1,2, 1,16, 32,16,1>, 32,16,1, 1};
+        if (s != 1 && fx <= 1 && fy <= 8 ) spec = {(void*)upfirdn2d_kernel_small<T, 1,1, 1,2, 1,8,  32,16,1>, 32,16,1, 1};
+        // channels_last
+        if (s == 1 && fx <= 1  && fy <= 24) spec = {(void*)upfirdn2d_kernel_small<T, 1,1, 1,2, 1,24, 1,64,8>, 1,64,8, 1};
+        if (s == 1 && fx <= 1  && fy <= 16) spec = {(void*)upfirdn2d_kernel_small<T, 1,1, 1,2, 1,16, 1,64,8>, 1,64,8, 1};
+        if (s == 1 && fx <= 1  && fy <= 8 ) spec = {(void*)upfirdn2d_kernel_small<T, 1,1, 1,2, 1,8,  1,64,8>, 1,64,8, 1};
+    }
+
+    // 4x upsampling.
+    if (p.up.x == 4 && p.up.y == 4 && p.down.x == 1 && p.down.y == 1)
+    {
+        // contiguous
+        if (s != 1 && fx <= 48 && fy <= 48) spec = {(void*)upfirdn2d_kernel_small<T, 4,4, 1,1, 48,48, 64,32,1>, 64,32,1, 1};
+        if (s != 1 && fx <= 32 && fy <= 32) spec = {(void*)upfirdn2d_kernel_small<T, 4,4, 1,1, 32,32, 64,32,1>, 64,32,1, 1};
+        // channels_last
+        if (s == 1 && fx <= 48 && fy <= 48) spec = {(void*)upfirdn2d_kernel_small<T, 4,4, 1,1, 48,48, 32,32,1>, 32,32,1, 1};
+        if (s == 1 && fx <= 32 && fy <= 32) spec = {(void*)upfirdn2d_kernel_small<T, 4,4, 1,1, 32,32, 32,32,1>, 32,32,1, 1};
+    }
+    if (p.up.x == 4 && p.up.y == 1 && p.down.x == 1 && p.down.y == 1)
+    {
+        // contiguous
+        if (s != 1 && fx <= 48 && fy <= 1) spec = {(void*)upfirdn2d_kernel_small<T, 4,1, 1,1, 48,1, 128,8,1>, 128,8,1, 1};
+        if (s != 1 && fx <= 32 && fy <= 1) spec = {(void*)upfirdn2d_kernel_small<T, 4,1, 1,1, 32,1, 128,8,1>, 128,8,1, 1};
+        // channels_last
+        if (s == 1 && fx <= 48 && fy <= 1) spec = {(void*)upfirdn2d_kernel_small<T, 4,1, 1,1, 48,1, 128,1,16>, 128,1,16, 1};
+        if (s == 1 && fx <= 32 && fy <= 1) spec = {(void*)upfirdn2d_kernel_small<T, 4,1, 1,1, 32,1, 128,1,16>, 128,1,16, 1};
+    }
+    if (p.up.x == 1 && p.up.y == 4 && p.down.x == 1 && p.down.y == 1)
+    {
+        // contiguous
+        if (s != 1 && fx <= 1 && fy <= 48) spec = {(void*)upfirdn2d_kernel_small<T, 1,4, 1,1, 1,48, 32,32,1>, 32,32,1, 1};
+        if (s != 1 && fx <= 1 && fy <= 32) spec = {(void*)upfirdn2d_kernel_small<T, 1,4, 1,1, 1,32, 32,32,1>, 32,32,1, 1};
+        // channels_last
+        if (s == 1 && fx <= 1 && fy <= 48) spec = {(void*)upfirdn2d_kernel_small<T, 1,4, 1,1, 1,48, 1,128,16>, 1,128,16, 1};
+        if (s == 1 && fx <= 1 && fy <= 32) spec = {(void*)upfirdn2d_kernel_small<T, 1,4, 1,1, 1,32, 1,128,16>, 1,128,16, 1};
+    }
+
+    // 4x downsampling (inefficient).
+    if (p.up.x == 1 && p.up.y == 1 && p.down.x == 4 && p.down.y == 1)
+    {
+        // contiguous
+        if (s != 1 && fx <= 48 && fy <= 1) spec = {(void*)upfirdn2d_kernel_small<T, 1,1, 4,1, 48,1, 32,8,1>, 32,8,1, 1};
+        if (s != 1 && fx <= 32 && fy <= 1) spec = {(void*)upfirdn2d_kernel_small<T, 1,1, 4,1, 32,1, 32,8,1>, 32,8,1, 1};
+        // channels_last
+        if (s == 1 && fx <= 48 && fy <= 1) spec = {(void*)upfirdn2d_kernel_small<T, 1,1, 4,1, 48,1, 32,1,8>, 32,1,8, 1};
+        if (s == 1 && fx <= 32 && fy <= 1) spec = {(void*)upfirdn2d_kernel_small<T, 1,1, 4,1, 32,1, 32,1,8>, 32,1,8, 1};
+    }
+    if (p.up.x == 1 && p.up.y == 1 && p.down.x == 1 && p.down.y == 4)
+    {
+        // contiguous
+        if (s != 1 && fx <= 1 && fy <= 48) spec = {(void*)upfirdn2d_kernel_small<T, 1,1, 1,4, 1,48, 32,8,1>, 32,8,1, 1};
+        if (s != 1 && fx <= 1 && fy <= 32) spec = {(void*)upfirdn2d_kernel_small<T, 1,1, 1,4, 1,32, 32,8,1>, 32,8,1, 1};
+        // channels_last
+        if (s == 1 && fx <= 1  && fy <= 48) spec = {(void*)upfirdn2d_kernel_small<T, 1,1, 1,4, 1,48, 1,32,8>, 1,32,8, 1};
+        if (s == 1 && fx <= 1  && fy <= 32) spec = {(void*)upfirdn2d_kernel_small<T, 1,1, 1,4, 1,32, 1,32,8>, 1,32,8, 1};
+    }
+    return spec;
+}
+
+//------------------------------------------------------------------------
+// Template specializations.
+
+template upfirdn2d_kernel_spec choose_upfirdn2d_kernel<double>   (const upfirdn2d_kernel_params& p);
+template upfirdn2d_kernel_spec choose_upfirdn2d_kernel<float>    (const upfirdn2d_kernel_params& p);
+template upfirdn2d_kernel_spec choose_upfirdn2d_kernel<c10::Half>(const upfirdn2d_kernel_params& p);
+
+//------------------------------------------------------------------------
diff --git a/modules/eg3ds/torch_utils/ops/upfirdn2d.h b/modules/eg3ds/torch_utils/ops/upfirdn2d.h
new file mode 100644
index 0000000000000000000000000000000000000000..d5de893d6489921d4689ac1e2cdb45da9a253f18
--- /dev/null
+++ b/modules/eg3ds/torch_utils/ops/upfirdn2d.h
@@ -0,0 +1,63 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: LicenseRef-NvidiaProprietary
+ *
+ * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+ * property and proprietary rights in and to this material, related
+ * documentation and any modifications thereto. Any use, reproduction,
+ * disclosure or distribution of this material and related documentation
+ * without an express license agreement from NVIDIA CORPORATION or
+ * its affiliates is strictly prohibited.
+ */
+
+#include <cuda_runtime.h>
+
+//------------------------------------------------------------------------
+// CUDA kernel parameters.
+
+struct upfirdn2d_kernel_params
+{
+    const void*     x;
+    const float*    f;
+    void*           y;
+
+    int2            up;
+    int2            down;
+    int2            pad0;
+    int             flip;
+    float           gain;
+
+    int4            inSize;         // [width, height, channel, batch]
+    int4            inStride;
+    int2            filterSize;     // [width, height]
+    int2            filterStride;
+    int4            outSize;        // [width, height, channel, batch]
+    int4            outStride;
+    int             sizeMinor;
+    int             sizeMajor;
+
+    int             loopMinor;
+    int             loopMajor;
+    int             loopX;
+    int             launchMinor;
+    int             launchMajor;
+};
+
+//------------------------------------------------------------------------
+// CUDA kernel specialization.
+
+struct upfirdn2d_kernel_spec
+{
+    void*   kernel;
+    int     tileOutW;
+    int     tileOutH;
+    int     loopMinor;
+    int     loopX;
+};
+
+//------------------------------------------------------------------------
+// CUDA kernel selection.
+
+template <class T> upfirdn2d_kernel_spec choose_upfirdn2d_kernel(const upfirdn2d_kernel_params& p);
+
+//------------------------------------------------------------------------
diff --git a/modules/eg3ds/torch_utils/ops/upfirdn2d.py b/modules/eg3ds/torch_utils/ops/upfirdn2d.py
new file mode 100644
index 0000000000000000000000000000000000000000..5e11fb4ae3b763599eaeda815c7f98551a68b230
--- /dev/null
+++ b/modules/eg3ds/torch_utils/ops/upfirdn2d.py
@@ -0,0 +1,395 @@
+# SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: LicenseRef-NvidiaProprietary
+#
+# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+# property and proprietary rights in and to this material, related
+# documentation and any modifications thereto. Any use, reproduction,
+# disclosure or distribution of this material and related documentation
+# without an express license agreement from NVIDIA CORPORATION or
+# its affiliates is strictly prohibited.
+
+"""Custom PyTorch ops for efficient resampling of 2D images."""
+
+import os
+import numpy as np
+import torch
+
+from .. import custom_ops
+from .. import misc
+from . import conv2d_gradfix
+
+#----------------------------------------------------------------------------
+
+_plugin = None
+
+def _init():
+    global _plugin
+    if _plugin is None:
+        _plugin = custom_ops.get_plugin(
+            module_name='upfirdn2d_plugin',
+            sources=['upfirdn2d.cpp', 'upfirdn2d.cu'],
+            headers=['upfirdn2d.h'],
+            source_dir=os.path.dirname(__file__),
+            extra_cuda_cflags=['--use_fast_math'],
+        )
+    return True
+
+def _parse_scaling(scaling):
+    if isinstance(scaling, int):
+        scaling = [scaling, scaling]
+    assert isinstance(scaling, (list, tuple))
+    assert all(isinstance(x, int) for x in scaling)
+    sx, sy = scaling
+    assert sx >= 1 and sy >= 1
+    return sx, sy
+
+def _parse_padding(padding):
+    if isinstance(padding, int):
+        padding = [padding, padding]
+    assert isinstance(padding, (list, tuple))
+    assert all(isinstance(x, int) for x in padding)
+    if len(padding) == 2:
+        padx, pady = padding
+        padding = [padx, padx, pady, pady]
+    padx0, padx1, pady0, pady1 = padding
+    return padx0, padx1, pady0, pady1
+
+def _get_filter_size(f):
+    if f is None:
+        return 1, 1
+    assert isinstance(f, torch.Tensor) and f.ndim in [1, 2]
+    fw = f.shape[-1]
+    fh = f.shape[0]
+    with misc.suppress_tracer_warnings():
+        fw = int(fw)
+        fh = int(fh)
+    misc.assert_shape(f, [fh, fw][:f.ndim])
+    assert fw >= 1 and fh >= 1
+    return fw, fh
+
+#----------------------------------------------------------------------------
+
+def setup_filter(f, device=torch.device('cpu'), normalize=True, flip_filter=False, gain=1, separable=None):
+    r"""Convenience function to setup 2D FIR filter for `upfirdn2d()`.
+
+    Args:
+        f:           Torch tensor, numpy array, or python list of the shape
+                     `[filter_height, filter_width]` (non-separable),
+                     `[filter_taps]` (separable),
+                     `[]` (impulse), or
+                     `None` (identity).
+        device:      Result device (default: cpu).
+        normalize:   Normalize the filter so that it retains the magnitude
+                     for constant input signal (DC)? (default: True).
+        flip_filter: Flip the filter? (default: False).
+        gain:        Overall scaling factor for signal magnitude (default: 1).
+        separable:   Return a separable filter? (default: select automatically).
+
+    Returns:
+        Float32 tensor of the shape
+        `[filter_height, filter_width]` (non-separable) or
+        `[filter_taps]` (separable).
+    """
+    # Validate.
+    if f is None:
+        f = 1
+    f = torch.as_tensor(f, dtype=torch.float32)
+    assert f.ndim in [0, 1, 2]
+    assert f.numel() > 0
+    if f.ndim == 0:
+        f = f[np.newaxis]
+
+    # Separable?
+    if separable is None:
+        separable = (f.ndim == 1 and f.numel() >= 8)
+    if f.ndim == 1 and not separable:
+        f = f.ger(f)
+    assert f.ndim == (1 if separable else 2)
+
+    # Apply normalize, flip, gain, and device.
+    if normalize:
+        f /= f.sum()
+    if flip_filter:
+        f = f.flip(list(range(f.ndim)))
+    f = f * (gain ** (f.ndim / 2))
+    f = f.to(device=device)
+    return f
+
+#----------------------------------------------------------------------------
+
+def upfirdn2d(x, f, up=1, down=1, padding=0, flip_filter=False, gain=1, impl='cuda'):
+    r"""Pad, upsample, filter, and downsample a batch of 2D images.
+
+    Performs the following sequence of operations for each channel:
+
+    1. Upsample the image by inserting N-1 zeros after each pixel (`up`).
+
+    2. Pad the image with the specified number of zeros on each side (`padding`).
+       Negative padding corresponds to cropping the image.
+
+    3. Convolve the image with the specified 2D FIR filter (`f`), shrinking it
+       so that the footprint of all output pixels lies within the input image.
+
+    4. Downsample the image by keeping every Nth pixel (`down`).
+
+    This sequence of operations bears close resemblance to scipy.signal.upfirdn().
+    The fused op is considerably more efficient than performing the same calculation
+    using standard PyTorch ops. It supports gradients of arbitrary order.
+
+    Args:
+        x:           Float32/float64/float16 input tensor of the shape
+                     `[batch_size, num_channels, in_height, in_width]`.
+        f:           Float32 FIR filter of the shape
+                     `[filter_height, filter_width]` (non-separable),
+                     `[filter_taps]` (separable), or
+                     `None` (identity).
+        up:          Integer upsampling factor. Can be a single int or a list/tuple
+                     `[x, y]` (default: 1).
+        down:        Integer downsampling factor. Can be a single int or a list/tuple
+                     `[x, y]` (default: 1).
+        padding:     Padding with respect to the upsampled image. Can be a single number
+                     or a list/tuple `[x, y]` or `[x_before, x_after, y_before, y_after]`
+                     (default: 0).
+        flip_filter: False = convolution, True = correlation (default: False).
+        gain:        Overall scaling factor for signal magnitude (default: 1).
+        impl:        Implementation to use. Can be `'ref'` or `'cuda'` (default: `'cuda'`).
+
+    Returns:
+        Tensor of the shape `[batch_size, num_channels, out_height, out_width]`.
+    """
+    assert isinstance(x, torch.Tensor)
+    assert impl in ['ref', 'cuda']
+    if impl == 'cuda' and x.device.type == 'cuda' and _init():
+        return _upfirdn2d_cuda(up=up, down=down, padding=padding, flip_filter=flip_filter, gain=gain).apply(x, f)
+    if f is not None:
+        f = f.float()
+    return _upfirdn2d_ref(x, f, up=up, down=down, padding=padding, flip_filter=flip_filter, gain=gain)
+
+#----------------------------------------------------------------------------
+
+@misc.profiled_function
+def _upfirdn2d_ref(x, f, up=1, down=1, padding=0, flip_filter=False, gain=1):
+    """Slow reference implementation of `upfirdn2d()` using standard PyTorch ops.
+    """
+    # Validate arguments.
+    assert isinstance(x, torch.Tensor) and x.ndim == 4
+    if f is None:
+        f = torch.ones([1, 1], dtype=torch.float32, device=x.device)
+    assert isinstance(f, torch.Tensor) and f.ndim in [1, 2]
+    assert f.dtype == torch.float32 and not f.requires_grad
+    batch_size, num_channels, in_height, in_width = x.shape
+    upx, upy = _parse_scaling(up)
+    downx, downy = _parse_scaling(down)
+    padx0, padx1, pady0, pady1 = _parse_padding(padding)
+
+    # Check that upsampled buffer is not smaller than the filter.
+    upW = in_width * upx + padx0 + padx1
+    upH = in_height * upy + pady0 + pady1
+    assert upW >= f.shape[-1] and upH >= f.shape[0]
+
+    # Upsample by inserting zeros.
+    x = x.reshape([batch_size, num_channels, in_height, 1, in_width, 1])
+    x = torch.nn.functional.pad(x, [0, upx - 1, 0, 0, 0, upy - 1])
+    x = x.reshape([batch_size, num_channels, in_height * upy, in_width * upx])
+
+    # Pad or crop.
+    x = torch.nn.functional.pad(x, [max(padx0, 0), max(padx1, 0), max(pady0, 0), max(pady1, 0)])
+    x = x[:, :, max(-pady0, 0) : x.shape[2] - max(-pady1, 0), max(-padx0, 0) : x.shape[3] - max(-padx1, 0)]
+
+    # Setup filter.
+    f = f * (gain ** (f.ndim / 2))
+    f = f.to(x.dtype)
+    if not flip_filter:
+        f = f.flip(list(range(f.ndim)))
+
+    # Convolve with the filter.
+    f = f[np.newaxis, np.newaxis].repeat([num_channels, 1] + [1] * f.ndim)
+    if f.ndim == 4:
+        x = conv2d_gradfix.conv2d(input=x, weight=f, groups=num_channels)
+    else:
+        x = conv2d_gradfix.conv2d(input=x, weight=f.unsqueeze(2), groups=num_channels)
+        x = conv2d_gradfix.conv2d(input=x, weight=f.unsqueeze(3), groups=num_channels)
+
+    # Downsample by throwing away pixels.
+    x = x[:, :, ::downy, ::downx]
+    return x
+
+#----------------------------------------------------------------------------
+
+_upfirdn2d_cuda_cache = dict()
+
+def _upfirdn2d_cuda(up=1, down=1, padding=0, flip_filter=False, gain=1):
+    """Fast CUDA implementation of `upfirdn2d()` using custom ops.
+    """
+    # Parse arguments.
+    upx, upy = _parse_scaling(up)
+    downx, downy = _parse_scaling(down)
+    padx0, padx1, pady0, pady1 = _parse_padding(padding)
+
+    # Lookup from cache.
+    key = (upx, upy, downx, downy, padx0, padx1, pady0, pady1, flip_filter, gain)
+    if key in _upfirdn2d_cuda_cache:
+        return _upfirdn2d_cuda_cache[key]
+
+    # Forward op.
+    class Upfirdn2dCuda(torch.autograd.Function):
+        @staticmethod
+        def forward(ctx, x, f): # pylint: disable=arguments-differ
+            assert isinstance(x, torch.Tensor) and x.ndim == 4
+            if f is None:
+                f = torch.ones([1, 1], dtype=torch.float32, device=x.device)
+            if f.ndim == 1 and f.shape[0] == 1:
+                f = f.square().unsqueeze(0) # Convert separable-1 into full-1x1.
+            assert isinstance(f, torch.Tensor) and f.ndim in [1, 2]
+            y = x
+            if f.ndim == 2:
+                y = _plugin.upfirdn2d(y, f, upx, upy, downx, downy, padx0, padx1, pady0, pady1, flip_filter, gain)
+            else:
+                y = _plugin.upfirdn2d(y, f.unsqueeze(0), upx, 1, downx, 1, padx0, padx1, 0, 0, flip_filter, 1.0)
+                y = _plugin.upfirdn2d(y, f.unsqueeze(1), 1, upy, 1, downy, 0, 0, pady0, pady1, flip_filter, gain)
+            ctx.save_for_backward(f)
+            ctx.x_shape = x.shape
+            return y
+
+        @staticmethod
+        def backward(ctx, dy): # pylint: disable=arguments-differ
+            f, = ctx.saved_tensors
+            _, _, ih, iw = ctx.x_shape
+            _, _, oh, ow = dy.shape
+            fw, fh = _get_filter_size(f)
+            p = [
+                fw - padx0 - 1,
+                iw * upx - ow * downx + padx0 - upx + 1,
+                fh - pady0 - 1,
+                ih * upy - oh * downy + pady0 - upy + 1,
+            ]
+            dx = None
+            df = None
+
+            if ctx.needs_input_grad[0]:
+                dx = _upfirdn2d_cuda(up=down, down=up, padding=p, flip_filter=(not flip_filter), gain=gain).apply(dy, f)
+
+            assert not ctx.needs_input_grad[1]
+            return dx, df
+
+    # Add to cache.
+    _upfirdn2d_cuda_cache[key] = Upfirdn2dCuda
+    return Upfirdn2dCuda
+
+#----------------------------------------------------------------------------
+
+def filter2d(x, f, padding=0, flip_filter=False, gain=1, impl='cuda'):
+    r"""Filter a batch of 2D images using the given 2D FIR filter.
+
+    By default, the result is padded so that its shape matches the input.
+    User-specified padding is applied on top of that, with negative values
+    indicating cropping. Pixels outside the image are assumed to be zero.
+
+    Args:
+        x:           Float32/float64/float16 input tensor of the shape
+                     `[batch_size, num_channels, in_height, in_width]`.
+        f:           Float32 FIR filter of the shape
+                     `[filter_height, filter_width]` (non-separable),
+                     `[filter_taps]` (separable), or
+                     `None` (identity).
+        padding:     Padding with respect to the output. Can be a single number or a
+                     list/tuple `[x, y]` or `[x_before, x_after, y_before, y_after]`
+                     (default: 0).
+        flip_filter: False = convolution, True = correlation (default: False).
+        gain:        Overall scaling factor for signal magnitude (default: 1).
+        impl:        Implementation to use. Can be `'ref'` or `'cuda'` (default: `'cuda'`).
+
+    Returns:
+        Tensor of the shape `[batch_size, num_channels, out_height, out_width]`.
+    """
+    padx0, padx1, pady0, pady1 = _parse_padding(padding)
+    fw, fh = _get_filter_size(f)
+    p = [
+        padx0 + fw // 2,
+        padx1 + (fw - 1) // 2,
+        pady0 + fh // 2,
+        pady1 + (fh - 1) // 2,
+    ]
+    return upfirdn2d(x, f, padding=p, flip_filter=flip_filter, gain=gain, impl=impl)
+
+#----------------------------------------------------------------------------
+
+def upsample2d(x, f, up=2, padding=0, flip_filter=False, gain=1, impl='cuda'):
+    r"""Upsample a batch of 2D images using the given 2D FIR filter.
+
+    By default, the result is padded so that its shape is a multiple of the input.
+    User-specified padding is applied on top of that, with negative values
+    indicating cropping. Pixels outside the image are assumed to be zero.
+
+    Args:
+        x:           Float32/float64/float16 input tensor of the shape
+                     `[batch_size, num_channels, in_height, in_width]`.
+        f:           Float32 FIR filter of the shape
+                     `[filter_height, filter_width]` (non-separable),
+                     `[filter_taps]` (separable), or
+                     `None` (identity).
+        up:          Integer upsampling factor. Can be a single int or a list/tuple
+                     `[x, y]` (default: 1).
+        padding:     Padding with respect to the output. Can be a single number or a
+                     list/tuple `[x, y]` or `[x_before, x_after, y_before, y_after]`
+                     (default: 0).
+        flip_filter: False = convolution, True = correlation (default: False).
+        gain:        Overall scaling factor for signal magnitude (default: 1).
+        impl:        Implementation to use. Can be `'ref'` or `'cuda'` (default: `'cuda'`).
+
+    Returns:
+        Tensor of the shape `[batch_size, num_channels, out_height, out_width]`.
+    """
+    upx, upy = _parse_scaling(up)
+    padx0, padx1, pady0, pady1 = _parse_padding(padding)
+    fw, fh = _get_filter_size(f)
+    p = [
+        padx0 + (fw + upx - 1) // 2,
+        padx1 + (fw - upx) // 2,
+        pady0 + (fh + upy - 1) // 2,
+        pady1 + (fh - upy) // 2,
+    ]
+    if f is not None:
+        f = f.float()
+    return upfirdn2d(x, f, up=up, padding=p, flip_filter=flip_filter, gain=gain*upx*upy, impl=impl)
+
+#----------------------------------------------------------------------------
+
+def downsample2d(x, f, down=2, padding=0, flip_filter=False, gain=1, impl='cuda'):
+    r"""Downsample a batch of 2D images using the given 2D FIR filter.
+
+    By default, the result is padded so that its shape is a fraction of the input.
+    User-specified padding is applied on top of that, with negative values
+    indicating cropping. Pixels outside the image are assumed to be zero.
+
+    Args:
+        x:           Float32/float64/float16 input tensor of the shape
+                     `[batch_size, num_channels, in_height, in_width]`.
+        f:           Float32 FIR filter of the shape
+                     `[filter_height, filter_width]` (non-separable),
+                     `[filter_taps]` (separable), or
+                     `None` (identity).
+        down:        Integer downsampling factor. Can be a single int or a list/tuple
+                     `[x, y]` (default: 1).
+        padding:     Padding with respect to the input. Can be a single number or a
+                     list/tuple `[x, y]` or `[x_before, x_after, y_before, y_after]`
+                     (default: 0).
+        flip_filter: False = convolution, True = correlation (default: False).
+        gain:        Overall scaling factor for signal magnitude (default: 1).
+        impl:        Implementation to use. Can be `'ref'` or `'cuda'` (default: `'cuda'`).
+
+    Returns:
+        Tensor of the shape `[batch_size, num_channels, out_height, out_width]`.
+    """
+    downx, downy = _parse_scaling(down)
+    padx0, padx1, pady0, pady1 = _parse_padding(padding)
+    fw, fh = _get_filter_size(f)
+    p = [
+        padx0 + (fw - downx + 1) // 2,
+        padx1 + (fw - downx) // 2,
+        pady0 + (fh - downy + 1) // 2,
+        pady1 + (fh - downy) // 2,
+    ]
+    return upfirdn2d(x, f, down=down, padding=p, flip_filter=flip_filter, gain=gain, impl=impl)
+
+#----------------------------------------------------------------------------
diff --git a/modules/eg3ds/torch_utils/persistence.py b/modules/eg3ds/torch_utils/persistence.py
new file mode 100644
index 0000000000000000000000000000000000000000..b79ca807091c62765f4d157045048f92512ad59e
--- /dev/null
+++ b/modules/eg3ds/torch_utils/persistence.py
@@ -0,0 +1,253 @@
+# SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: LicenseRef-NvidiaProprietary
+#
+# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+# property and proprietary rights in and to this material, related
+# documentation and any modifications thereto. Any use, reproduction,
+# disclosure or distribution of this material and related documentation
+# without an express license agreement from NVIDIA CORPORATION or
+# its affiliates is strictly prohibited.
+
+"""Facilities for pickling Python code alongside other data.
+
+The pickled code is automatically imported into a separate Python module
+during unpickling. This way, any previously exported pickles will remain
+usable even if the original code is no longer available, or if the current
+version of the code is not consistent with what was originally pickled."""
+
+import sys
+import pickle
+import io
+import inspect
+import copy
+import uuid
+import types
+from modules.eg3ds import dnnlib
+
+#----------------------------------------------------------------------------
+
+_version            = 6         # internal version number
+_decorators         = set()     # {decorator_class, ...}
+_import_hooks       = []        # [hook_function, ...]
+_module_to_src_dict = dict()    # {module: src, ...}
+_src_to_module_dict = dict()    # {src: module, ...}
+
+#----------------------------------------------------------------------------
+
+def persistent_class(orig_class):
+    r"""Class decorator that extends a given class to save its source code
+    when pickled.
+
+    Example:
+
+        
+
+        
+        class MyNetwork(torch.nn.Module):
+            def __init__(self, num_inputs, num_outputs):
+                super().__init__()
+                self.fc = MyLayer(num_inputs, num_outputs)
+                ...
+
+        
+        class MyLayer(torch.nn.Module):
+            ...
+
+    When pickled, any instance of `MyNetwork` and `MyLayer` will save its
+    source code alongside other internal state (e.g., parameters, buffers,
+    and submodules). This way, any previously exported pickle will remain
+    usable even if the class definitions have been modified or are no
+    longer available.
+
+    The decorator saves the source code of the entire Python module
+    containing the decorated class. It does *not* save the source code of
+    any imported modules. Thus, the imported modules must be available
+    during unpickling, also including `torch_utils.persistence` itself.
+
+    It is ok to call functions defined in the same module from the
+    decorated class. However, if the decorated class depends on other
+    classes defined in the same module, they must be decorated as well.
+    This is illustrated in the above example in the case of `MyLayer`.
+
+    It is also possible to employ the decorator just-in-time before
+    calling the constructor. For example:
+
+        cls = MyLayer
+        if want_to_make_it_persistent:
+            cls = persistence.persistent_class(cls)
+        layer = cls(num_inputs, num_outputs)
+
+    As an additional feature, the decorator also keeps track of the
+    arguments that were used to construct each instance of the decorated
+    class. The arguments can be queried via `obj.init_args` and
+    `obj.init_kwargs`, and they are automatically pickled alongside other
+    object state. A typical use case is to first unpickle a previous
+    instance of a persistent class, and then upgrade it to use the latest
+    version of the source code:
+
+        with open('old_pickle.pkl', 'rb') as f:
+            old_net = pickle.load(f)
+        new_net = MyNetwork(*old_obj.init_args, **old_obj.init_kwargs)
+        misc.copy_params_and_buffers(old_net, new_net, require_all=True)
+    """
+    assert isinstance(orig_class, type)
+    if is_persistent(orig_class):
+        return orig_class
+
+    assert orig_class.__module__ in sys.modules
+    orig_module = sys.modules[orig_class.__module__]
+    orig_module_src = _module_to_src(orig_module)
+
+    class Decorator(orig_class):
+        _orig_module_src = orig_module_src
+        _orig_class_name = orig_class.__name__
+
+        def __init__(self, *args, **kwargs):
+            super().__init__(*args, **kwargs)
+            self._init_args = copy.deepcopy(args)
+            self._init_kwargs = copy.deepcopy(kwargs)
+            assert orig_class.__name__ in orig_module.__dict__
+            _check_pickleable(self.__reduce__())
+
+        @property
+        def init_args(self):
+            return copy.deepcopy(self._init_args)
+
+        @property
+        def init_kwargs(self):
+            return dnnlib.EasyDict(copy.deepcopy(self._init_kwargs))
+
+        def __reduce__(self):
+            fields = list(super().__reduce__())
+            fields += [None] * max(3 - len(fields), 0)
+            if fields[0] is not _reconstruct_persistent_obj:
+                meta = dict(type='class', version=_version, module_src=self._orig_module_src, class_name=self._orig_class_name, state=fields[2])
+                fields[0] = _reconstruct_persistent_obj # reconstruct func
+                fields[1] = (meta,) # reconstruct args
+                fields[2] = None # state dict
+            return tuple(fields)
+
+    Decorator.__name__ = orig_class.__name__
+    _decorators.add(Decorator)
+    return Decorator
+
+#----------------------------------------------------------------------------
+
+def is_persistent(obj):
+    r"""Test whether the given object or class is persistent, i.e.,
+    whether it will save its source code when pickled.
+    """
+    try:
+        if obj in _decorators:
+            return True
+    except TypeError:
+        pass
+    return type(obj) in _decorators # pylint: disable=unidiomatic-typecheck
+
+#----------------------------------------------------------------------------
+
+def import_hook(hook):
+    r"""Register an import hook that is called whenever a persistent object
+    is being unpickled. A typical use case is to patch the pickled source
+    code to avoid errors and inconsistencies when the API of some imported
+    module has changed.
+
+    The hook should have the following signature:
+
+        hook(meta) -> modified meta
+
+    `meta` is an instance of `dnnlib.EasyDict` with the following fields:
+
+        type:       Type of the persistent object, e.g. `'class'`.
+        version:    Internal version number of `torch_utils.persistence`.
+        module_src  Original source code of the Python module.
+        class_name: Class name in the original Python module.
+        state:      Internal state of the object.
+
+    Example:
+
+        @persistence.import_hook
+        def wreck_my_network(meta):
+            if meta.class_name == 'MyNetwork':
+                print('MyNetwork is being imported. I will wreck it!')
+                meta.module_src = meta.module_src.replace("True", "False")
+            return meta
+    """
+    assert callable(hook)
+    _import_hooks.append(hook)
+
+#----------------------------------------------------------------------------
+
+def _reconstruct_persistent_obj(meta):
+    r"""Hook that is called internally by the `pickle` module to unpickle
+    a persistent object.
+    """
+    meta = dnnlib.EasyDict(meta)
+    meta.state = dnnlib.EasyDict(meta.state)
+    for hook in _import_hooks:
+        meta = hook(meta)
+        assert meta is not None
+
+    assert meta.version == _version
+    module = _src_to_module(meta.module_src)
+
+    assert meta.type == 'class'
+    orig_class = module.__dict__[meta.class_name]
+    decorator_class = persistent_class(orig_class)
+    obj = decorator_class.__new__(decorator_class)
+
+    setstate = getattr(obj, '__setstate__', None)
+    if callable(setstate):
+        setstate(meta.state) # pylint: disable=not-callable
+    else:
+        obj.__dict__.update(meta.state)
+    return obj
+
+#----------------------------------------------------------------------------
+
+def _module_to_src(module):
+    r"""Query the source code of a given Python module.
+    """
+    src = _module_to_src_dict.get(module, None)
+    if src is None:
+        src = inspect.getsource(module)
+        _module_to_src_dict[module] = src
+        _src_to_module_dict[src] = module
+    return src
+
+def _src_to_module(src):
+    r"""Get or create a Python module for the given source code.
+    """
+    module = _src_to_module_dict.get(src, None)
+    if module is None:
+        module_name = "_imported_module_" + uuid.uuid4().hex
+        module = types.ModuleType(module_name)
+        sys.modules[module_name] = module
+        _module_to_src_dict[module] = src
+        _src_to_module_dict[src] = module
+        exec(src, module.__dict__) # pylint: disable=exec-used
+    return module
+
+#----------------------------------------------------------------------------
+
+def _check_pickleable(obj):
+    r"""Check that the given object is pickleable, raising an exception if
+    it is not. This function is expected to be considerably more efficient
+    than actually pickling the object.
+    """
+    def recurse(obj):
+        if isinstance(obj, (list, tuple, set)):
+            return [recurse(x) for x in obj]
+        if isinstance(obj, dict):
+            return [[recurse(x), recurse(y)] for x, y in obj.items()]
+        if isinstance(obj, (str, int, float, bool, bytes, bytearray)):
+            return None # Python primitive types are pickleable.
+        if f'{type(obj).__module__}.{type(obj).__name__}' in ['numpy.ndarray', 'torch.Tensor', 'torch.nn.parameter.Parameter']:
+            return None # NumPy arrays and PyTorch tensors are pickleable.
+        if is_persistent(obj):
+            return None # Persistent objects are pickleable, by virtue of the constructor check.
+        return obj
+    with io.BytesIO() as f:
+        pickle.dump(recurse(obj), f)
+
+#----------------------------------------------------------------------------
diff --git a/modules/eg3ds/torch_utils/training_stats.py b/modules/eg3ds/torch_utils/training_stats.py
new file mode 100644
index 0000000000000000000000000000000000000000..7b917e456f59f9aeaf5e279bd6066a15d8c5131b
--- /dev/null
+++ b/modules/eg3ds/torch_utils/training_stats.py
@@ -0,0 +1,270 @@
+# SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: LicenseRef-NvidiaProprietary
+#
+# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+# property and proprietary rights in and to this material, related
+# documentation and any modifications thereto. Any use, reproduction,
+# disclosure or distribution of this material and related documentation
+# without an express license agreement from NVIDIA CORPORATION or
+# its affiliates is strictly prohibited.
+
+"""Facilities for reporting and collecting training statistics across
+multiple processes and devices. The interface is designed to minimize
+synchronization overhead as well as the amount of boilerplate in user
+code."""
+
+import re
+import numpy as np
+import torch
+from modules.eg3ds import dnnlib
+
+from . import misc
+
+#----------------------------------------------------------------------------
+
+_num_moments    = 3             # [num_scalars, sum_of_scalars, sum_of_squares]
+_reduce_dtype   = torch.float32 # Data type to use for initial per-tensor reduction.
+_counter_dtype  = torch.float64 # Data type to use for the internal counters.
+_rank           = 0             # Rank of the current process.
+_sync_device    = None          # Device to use for multiprocess communication. None = single-process.
+_sync_called    = False         # Has _sync() been called yet?
+_counters       = dict()        # Running counters on each device, updated by report(): name => device => torch.Tensor
+_cumulative     = dict()        # Cumulative counters on the CPU, updated by _sync(): name => torch.Tensor
+
+#----------------------------------------------------------------------------
+
+def init_multiprocessing(rank, sync_device):
+    r"""Initializes `torch_utils.training_stats` for collecting statistics
+    across multiple processes.
+
+    This function must be called after
+    `torch.distributed.init_process_group()` and before `Collector.update()`.
+    The call is not necessary if multi-process collection is not needed.
+
+    Args:
+        rank:           Rank of the current process.
+        sync_device:    PyTorch device to use for inter-process
+                        communication, or None to disable multi-process
+                        collection. Typically `torch.device('cuda', rank)`.
+    """
+    global _rank, _sync_device
+    assert not _sync_called
+    _rank = rank
+    _sync_device = sync_device
+
+#----------------------------------------------------------------------------
+
+@misc.profiled_function
+def report(name, value):
+    r"""Broadcasts the given set of scalars to all interested instances of
+    `Collector`, across device and process boundaries.
+
+    This function is expected to be extremely cheap and can be safely
+    called from anywhere in the training loop, loss function, or inside a
+    `torch.nn.Module`.
+
+    Warning: The current implementation expects the set of unique names to
+    be consistent across processes. Please make sure that `report()` is
+    called at least once for each unique name by each process, and in the
+    same order. If a given process has no scalars to broadcast, it can do
+    `report(name, [])` (empty list).
+
+    Args:
+        name:   Arbitrary string specifying the name of the statistic.
+                Averages are accumulated separately for each unique name.
+        value:  Arbitrary set of scalars. Can be a list, tuple,
+                NumPy array, PyTorch tensor, or Python scalar.
+
+    Returns:
+        The same `value` that was passed in.
+    """
+    if name not in _counters:
+        _counters[name] = dict()
+
+    elems = torch.as_tensor(value)
+    if elems.numel() == 0:
+        return value
+
+    elems = elems.detach().flatten().to(_reduce_dtype)
+    moments = torch.stack([
+        torch.ones_like(elems).sum(),
+        elems.sum(),
+        elems.square().sum(),
+    ])
+    assert moments.ndim == 1 and moments.shape[0] == _num_moments
+    moments = moments.to(_counter_dtype)
+
+    device = moments.device
+    if device not in _counters[name]:
+        _counters[name][device] = torch.zeros_like(moments)
+    _counters[name][device].add_(moments)
+    return value
+
+#----------------------------------------------------------------------------
+
+def report0(name, value):
+    r"""Broadcasts the given set of scalars by the first process (`rank = 0`),
+    but ignores any scalars provided by the other processes.
+    See `report()` for further details.
+    """
+    report(name, value if _rank == 0 else [])
+    return value
+
+#----------------------------------------------------------------------------
+
+class Collector:
+    r"""Collects the scalars broadcasted by `report()` and `report0()` and
+    computes their long-term averages (mean and standard deviation) over
+    user-defined periods of time.
+
+    The averages are first collected into internal counters that are not
+    directly visible to the user. They are then copied to the user-visible
+    state as a result of calling `update()` and can then be queried using
+    `mean()`, `std()`, `as_dict()`, etc. Calling `update()` also resets the
+    internal counters for the next round, so that the user-visible state
+    effectively reflects averages collected between the last two calls to
+    `update()`.
+
+    Args:
+        regex:          Regular expression defining which statistics to
+                        collect. The default is to collect everything.
+        keep_previous:  Whether to retain the previous averages if no
+                        scalars were collected on a given round
+                        (default: True).
+    """
+    def __init__(self, regex='.*', keep_previous=True):
+        self._regex = re.compile(regex)
+        self._keep_previous = keep_previous
+        self._cumulative = dict()
+        self._moments = dict()
+        self.update()
+        self._moments.clear()
+
+    def names(self):
+        r"""Returns the names of all statistics broadcasted so far that
+        match the regular expression specified at construction time.
+        """
+        return [name for name in _counters if self._regex.fullmatch(name)]
+
+    def update(self):
+        r"""Copies current values of the internal counters to the
+        user-visible state and resets them for the next round.
+
+        If `keep_previous=True` was specified at construction time, the
+        operation is skipped for statistics that have received no scalars
+        since the last update, retaining their previous averages.
+
+        This method performs a number of GPU-to-CPU transfers and one
+        `torch.distributed.all_reduce()`. It is intended to be called
+        periodically in the main training loop, typically once every
+        N training steps.
+        """
+        if not self._keep_previous:
+            self._moments.clear()
+        for name, cumulative in _sync(self.names()):
+            if name not in self._cumulative:
+                self._cumulative[name] = torch.zeros([_num_moments], dtype=_counter_dtype)
+            delta = cumulative - self._cumulative[name]
+            self._cumulative[name].copy_(cumulative)
+            if float(delta[0]) != 0:
+                self._moments[name] = delta
+
+    def _get_delta(self, name):
+        r"""Returns the raw moments that were accumulated for the given
+        statistic between the last two calls to `update()`, or zero if
+        no scalars were collected.
+        """
+        assert self._regex.fullmatch(name)
+        if name not in self._moments:
+            self._moments[name] = torch.zeros([_num_moments], dtype=_counter_dtype)
+        return self._moments[name]
+
+    def num(self, name):
+        r"""Returns the number of scalars that were accumulated for the given
+        statistic between the last two calls to `update()`, or zero if
+        no scalars were collected.
+        """
+        delta = self._get_delta(name)
+        return int(delta[0])
+
+    def mean(self, name):
+        r"""Returns the mean of the scalars that were accumulated for the
+        given statistic between the last two calls to `update()`, or NaN if
+        no scalars were collected.
+        """
+        delta = self._get_delta(name)
+        if int(delta[0]) == 0:
+            return float('nan')
+        return float(delta[1] / delta[0])
+
+    def std(self, name):
+        r"""Returns the standard deviation of the scalars that were
+        accumulated for the given statistic between the last two calls to
+        `update()`, or NaN if no scalars were collected.
+        """
+        delta = self._get_delta(name)
+        if int(delta[0]) == 0 or not np.isfinite(float(delta[1])):
+            return float('nan')
+        if int(delta[0]) == 1:
+            return float(0)
+        mean = float(delta[1] / delta[0])
+        raw_var = float(delta[2] / delta[0])
+        return np.sqrt(max(raw_var - np.square(mean), 0))
+
+    def as_dict(self):
+        r"""Returns the averages accumulated between the last two calls to
+        `update()` as an `dnnlib.EasyDict`. The contents are as follows:
+
+            dnnlib.EasyDict(
+                NAME = dnnlib.EasyDict(num=FLOAT, mean=FLOAT, std=FLOAT),
+                ...
+            )
+        """
+        stats = dnnlib.EasyDict()
+        for name in self.names():
+            stats[name] = dnnlib.EasyDict(num=self.num(name), mean=self.mean(name), std=self.std(name))
+        return stats
+
+    def __getitem__(self, name):
+        r"""Convenience getter.
+        `collector[name]` is a synonym for `collector.mean(name)`.
+        """
+        return self.mean(name)
+
+#----------------------------------------------------------------------------
+
+def _sync(names):
+    r"""Synchronize the global cumulative counters across devices and
+    processes. Called internally by `Collector.update()`.
+    """
+    if len(names) == 0:
+        return []
+    global _sync_called
+    _sync_called = True
+
+    # Collect deltas within current rank.
+    deltas = []
+    device = _sync_device if _sync_device is not None else torch.device('cpu')
+    for name in names:
+        delta = torch.zeros([_num_moments], dtype=_counter_dtype, device=device)
+        for counter in _counters[name].values():
+            delta.add_(counter.to(device))
+            counter.copy_(torch.zeros_like(counter))
+        deltas.append(delta)
+    deltas = torch.stack(deltas)
+
+    # Sum deltas across ranks.
+    if _sync_device is not None:
+        torch.distributed.all_reduce(deltas)
+
+    # Update cumulative values.
+    deltas = deltas.cpu()
+    for idx, name in enumerate(names):
+        if name not in _cumulative:
+            _cumulative[name] = torch.zeros([_num_moments], dtype=_counter_dtype)
+        _cumulative[name].add_(deltas[idx])
+
+    # Return name-value pairs.
+    return [(name, _cumulative[name]) for name in names]
+
+#----------------------------------------------------------------------------
diff --git a/modules/eg3ds/volumetric_rendering/__init__.py b/modules/eg3ds/volumetric_rendering/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..daba66567a95beabb103f7996198a9675ab20b4a
--- /dev/null
+++ b/modules/eg3ds/volumetric_rendering/__init__.py
@@ -0,0 +1,11 @@
+# SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: LicenseRef-NvidiaProprietary
+#
+# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+# property and proprietary rights in and to this material, related
+# documentation and any modifications thereto. Any use, reproduction,
+# disclosure or distribution of this material and related documentation
+# without an express license agreement from NVIDIA CORPORATION or
+# its affiliates is strictly prohibited.
+
+# empty
\ No newline at end of file
diff --git a/modules/eg3ds/volumetric_rendering/math_utils.py b/modules/eg3ds/volumetric_rendering/math_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..afd05d524f70a7420555dc2e9aae6a613f66e476
--- /dev/null
+++ b/modules/eg3ds/volumetric_rendering/math_utils.py
@@ -0,0 +1,118 @@
+# MIT License
+
+# Copyright (c) 2022 Petr Kellnhofer
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+import torch
+
+def transform_vectors(matrix: torch.Tensor, vectors4: torch.Tensor) -> torch.Tensor:
+    """
+    Left-multiplies MxM @ NxM. Returns NxM.
+    """
+    res = torch.matmul(vectors4, matrix.T)
+    return res
+
+
+def normalize_vecs(vectors: torch.Tensor) -> torch.Tensor:
+    """
+    Normalize vector lengths.
+    """
+    return vectors / (torch.norm(vectors, dim=-1, keepdim=True))
+
+def torch_dot(x: torch.Tensor, y: torch.Tensor):
+    """
+    Dot product of two tensors.
+    """
+    return (x * y).sum(-1)
+
+
+def get_ray_limits_box(rays_o: torch.Tensor, rays_d: torch.Tensor, box_side_length):
+    """
+    Author: Petr Kellnhofer
+    Intersects rays with the [-1, 1] NDC volume.
+    Returns min and max distance of entry.
+    Returns -1 for no intersection.
+    https://www.scratchapixel.com/lessons/3d-basic-rendering/minimal-ray-tracer-rendering-simple-shapes/ray-box-intersection
+    """
+    o_shape = rays_o.shape
+    rays_o = rays_o.detach().reshape(-1, 3)
+    rays_d = rays_d.detach().reshape(-1, 3)
+
+
+    bb_min = [-1*(box_side_length/2), -1*(box_side_length/2), -1*(box_side_length/2)] # 不要急，虽然这里是[-0.5,0.5]，但是在sample_from_planes的时候又把coordinate乘了2, 所以box_side_length指的是box的正负左右边长。
+    bb_max = [1*(box_side_length/2), 1*(box_side_length/2), 1*(box_side_length/2)]
+    bounds = torch.tensor([bb_min, bb_max], dtype=rays_o.dtype, device=rays_o.device)
+    is_valid = torch.ones(rays_o.shape[:-1], dtype=bool, device=rays_o.device)
+
+    # Precompute inverse for stability.
+    invdir = 1 / rays_d # 1/k
+    sign = (invdir < 0).long()
+
+    # Intersect with YZ plane.
+    tmin = (bounds.index_select(0, sign[..., 0])[..., 0] - rays_o[..., 0]) * invdir[..., 0] # ymin = -0.5 = kx + b == > x = (-0.5-b) * (1/k)
+    tmax = (bounds.index_select(0, 1 - sign[..., 0])[..., 0] - rays_o[..., 0]) * invdir[..., 0] # ymax = 0.5 = kx + b == > x = (-0.5-b) * (1/k)
+
+    # Intersect with XZ plane.
+    tymin = (bounds.index_select(0, sign[..., 1])[..., 1] - rays_o[..., 1]) * invdir[..., 1]
+    tymax = (bounds.index_select(0, 1 - sign[..., 1])[..., 1] - rays_o[..., 1]) * invdir[..., 1]
+
+    # Resolve parallel rays.
+    is_valid[torch.logical_or(tmin > tymax, tymin > tmax)] = False
+
+    # Use the shortest intersection.
+    tmin = torch.max(tmin, tymin) # 分别和六个平面都取交点，然后选择is_valid（去掉反方向的、平行的），并且最近的两个平面提供tmin和tmax
+    tmax = torch.min(tmax, tymax)
+
+    # Intersect with XY plane.
+    tzmin = (bounds.index_select(0, sign[..., 2])[..., 2] - rays_o[..., 2]) * invdir[..., 2]
+    tzmax = (bounds.index_select(0, 1 - sign[..., 2])[..., 2] - rays_o[..., 2]) * invdir[..., 2]
+
+    # Resolve parallel rays.
+    is_valid[torch.logical_or(tmin > tzmax, tzmin > tmax)] = False
+
+    # Use the shortest intersection.
+    tmin = torch.max(tmin, tzmin)
+    tmax = torch.min(tmax, tzmax)
+
+    # Mark invalid.
+    tmin[torch.logical_not(is_valid)] = -1
+    tmax[torch.logical_not(is_valid)] = -2
+
+    return tmin.reshape(*o_shape[:-1], 1), tmax.reshape(*o_shape[:-1], 1)
+
+
+def linspace(start: torch.Tensor, stop: torch.Tensor, num: int):
+    """
+    Creates a tensor of shape [num, *start.shape] whose values are evenly spaced from start to end, inclusive.
+    Replicates but the multi-dimensional bahaviour of numpy.linspace in PyTorch.
+    """
+    # create a tensor of 'num' steps from 0 to 1
+    steps = torch.arange(num, dtype=torch.float32, device=start.device) / (num - 1)
+
+    # reshape the 'steps' tensor to [-1, *([1]*start.ndim)] to allow for broadcastings
+    # - using 'steps.reshape([-1, *([1]*start.ndim)])' would be nice here but torchscript
+    #   "cannot statically infer the expected size of a list in this contex", hence the code below
+    for i in range(start.ndim):
+        steps = steps.unsqueeze(-1)
+
+    # the output starts at 'start' and increments until 'stop' in each dimension
+    out = start[None] + steps * (stop - start)[None]
+
+    return out
diff --git a/modules/eg3ds/volumetric_rendering/ray_marcher.py b/modules/eg3ds/volumetric_rendering/ray_marcher.py
new file mode 100644
index 0000000000000000000000000000000000000000..d23d7f52a4d9b9f9f6d8ed7a028b94ce841f6c02
--- /dev/null
+++ b/modules/eg3ds/volumetric_rendering/ray_marcher.py
@@ -0,0 +1,65 @@
+# SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: LicenseRef-NvidiaProprietary
+#
+# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+# property and proprietary rights in and to this material, related
+# documentation and any modifications thereto. Any use, reproduction,
+# disclosure or distribution of this material and related documentation
+# without an express license agreement from NVIDIA CORPORATION or
+# its affiliates is strictly prohibited.
+
+"""
+The ray marcher takes the raw output of the implicit representation and uses the volume rendering equation to produce composited colors and depths.
+Based off of the implementation in MipNeRF (this one doesn't do any cone tracing though!)
+"""
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+class MipRayMarcher2(nn.Module):
+    def __init__(self):
+        super().__init__()
+
+
+    def run_forward(self, colors, densities, depths, rendering_options):
+        deltas = depths[:, :, 1:] - depths[:, :, :-1]
+        colors_mid = (colors[:, :, :-1] + colors[:, :, 1:]) / 2
+        densities_mid = (densities[:, :, :-1] + densities[:, :, 1:]) / 2
+        depths_mid = (depths[:, :, :-1] + depths[:, :, 1:]) / 2
+
+
+        if rendering_options['clamp_mode'] == 'softplus':
+            densities_mid = F.softplus(densities_mid - 1) # activation bias of -1 makes things initialize better
+        else:
+            assert False, "MipRayMarcher only supports `clamp_mode`=`softplus`!"
+
+        density_delta = densities_mid * deltas
+
+        alpha = 1 - torch.exp(-density_delta)
+
+        alpha_shifted = torch.cat([torch.ones_like(alpha[:, :, :1]), 1-alpha + 1e-10], -2)
+        weights = alpha * torch.cumprod(alpha_shifted, -2)[:, :, :-1]
+
+        composite_rgb = torch.sum(weights * colors_mid, -2) # ([4, 4096, 48, c=32]) ==> ([4, 4096, 32])
+        weight_total = weights.sum(2)
+        composite_depth = torch.sum(weights * depths_mid, -2) / weight_total
+
+        # clip the composite to min/max range of depths
+        composite_depth = torch.nan_to_num(composite_depth, float('inf'))
+        composite_depth = torch.clamp(composite_depth, torch.min(depths), torch.max(depths))
+
+        if rendering_options.get('white_back', False):
+            composite_rgb = composite_rgb + 1 - weight_total # 如果accu_weight为0， 则自动给color设置为0。
+
+        composite_rgb = composite_rgb * 2 - 1 # Scale to (-1, 1)
+
+        return composite_rgb, composite_depth, weights
+
+
+    def forward(self, colors, densities, depths, rendering_options):
+        composite_rgb, composite_depth, weights = self.run_forward(colors, densities, depths, rendering_options)
+
+        return composite_rgb, composite_depth, weights
+    
+
diff --git a/modules/eg3ds/volumetric_rendering/ray_sampler.py b/modules/eg3ds/volumetric_rendering/ray_sampler.py
new file mode 100644
index 0000000000000000000000000000000000000000..4f11544dccbbad095a353e4f5f5d7dc49a86fcb2
--- /dev/null
+++ b/modules/eg3ds/volumetric_rendering/ray_sampler.py
@@ -0,0 +1,111 @@
+# SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: LicenseRef-NvidiaProprietary
+#
+# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+# property and proprietary rights in and to this material, related
+# documentation and any modifications thereto. Any use, reproduction,
+# disclosure or distribution of this material and related documentation
+# without an express license agreement from NVIDIA CORPORATION or
+# its affiliates is strictly prohibited.
+
+"""
+The ray sampler is a module that takes in camera matrices and resolution and batches of rays.
+Expects cam2world matrices that use the OpenCV camera coordinate system conventions.
+"""
+
+import torch
+
+class RaySampler(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.ray_origins_h, self.ray_directions, self.depths, self.image_coords, self.rendering_options = None, None, None, None, None
+
+
+    def forward(self, cam2world_matrix, intrinsics, resolution):
+        """
+        Create batches of rays and return origins and directions.
+
+        cam2world_matrix: (N, 4, 4)
+        intrinsics: (N, 3, 3)
+        resolution: int
+
+        ray_origins: (N, M, 3)
+        ray_dirs: (N, M, 2)
+        """
+        N, M = cam2world_matrix.shape[0], resolution**2
+        cam_locs_world = cam2world_matrix[:, :3, 3]
+        fx = intrinsics[:, 0, 0]
+        fy = intrinsics[:, 1, 1]
+        cx = intrinsics[:, 0, 2]
+        cy = intrinsics[:, 1, 2]
+        sk = intrinsics[:, 0, 1]
+
+        uv = torch.stack(torch.meshgrid(torch.arange(resolution, dtype=torch.float32, device=cam2world_matrix.device), torch.arange(resolution, dtype=torch.float32, device=cam2world_matrix.device), indexing='ij')) * (1./resolution) + (0.5/resolution)
+        uv = uv.flip(0).reshape(2, -1).transpose(1, 0)
+        uv = uv.unsqueeze(0).repeat(cam2world_matrix.shape[0], 1, 1)
+
+        x_cam = uv[:, :, 0].view(N, -1)
+        y_cam = uv[:, :, 1].view(N, -1)
+        z_cam = torch.ones((N, M), device=cam2world_matrix.device)
+
+        x_lift = (x_cam - cx.unsqueeze(-1) + cy.unsqueeze(-1)*sk.unsqueeze(-1)/fy.unsqueeze(-1) - sk.unsqueeze(-1)*y_cam/fy.unsqueeze(-1)) / fx.unsqueeze(-1) * z_cam
+        y_lift = (y_cam - cy.unsqueeze(-1)) / fy.unsqueeze(-1) * z_cam
+
+        cam_rel_points = torch.stack((x_lift, y_lift, z_cam, torch.ones_like(z_cam)), dim=-1)
+
+        world_rel_points = torch.bmm(cam2world_matrix, cam_rel_points.permute(0, 2, 1)).permute(0, 2, 1)[:, :, :3]
+
+        ray_dirs = world_rel_points - cam_locs_world[:, None, :]
+        ray_dirs = torch.nn.functional.normalize(ray_dirs, dim=2)
+
+        ray_origins = cam_locs_world.unsqueeze(1).repeat(1, ray_dirs.shape[1], 1)
+
+        return ray_origins, ray_dirs
+    
+    # def forward_with_src_c2w(self, src_cam2word_matrix, cam2world_matrix, intrinsics, resolution):
+    #     """
+    #     Create batches of rays and return origins and directions.
+
+    #     cam2world_matrix: (N, 4, 4)
+    #     intrinsics: (N, 3, 3)
+    #     resolution: int
+
+    #     ray_origins: (N, M, 3)
+    #     ray_dirs: (N, M, 2)
+    #     """
+    #     # src_world2cam_matrix = src_cam2word_matrix.clone()
+    #     # src_world2cam_matrix[:, :3,:3] = src_world2cam_matrix[:, :3,:3].permute(0, 2, 1)
+    #     # src_world2cam_matrix[:, :, 3] = - src_world2cam_matrix[:, :, 3]
+    #     # new_cam2world_matrix = torch.bmm(src_world2cam_matrix, cam2world_matrix)
+    #     # cam2world_matrix = new_cam2world_matrix                                                                     
+           
+    #     N, M = cam2world_matrix.shape[0], resolution**2
+    #     cam_locs_world = cam2world_matrix[:, :3, 3]
+    #     fx = intrinsics[:, 0, 0]
+    #     fy = intrinsics[:, 1, 1]
+    #     cx = intrinsics[:, 0, 2]
+    #     cy = intrinsics[:, 1, 2]
+    #     sk = intrinsics[:, 0, 1]
+
+    #     uv = torch.stack(torch.meshgrid(torch.arange(resolution, dtype=torch.float32, device=cam2world_matrix.device), torch.arange(resolution, dtype=torch.float32, device=cam2world_matrix.device), indexing='ij')) * (1./resolution) + (0.5/resolution)
+    #     uv = uv.flip(0).reshape(2, -1).transpose(1, 0)
+    #     uv = uv.unsqueeze(0).repeat(cam2world_matrix.shape[0], 1, 1)
+
+    #     x_cam = uv[:, :, 0].view(N, -1)
+    #     y_cam = uv[:, :, 1].view(N, -1)
+    #     z_cam = torch.ones((N, M), device=cam2world_matrix.device)
+
+    #     x_lift = (x_cam - cx.unsqueeze(-1) + cy.unsqueeze(-1)*sk.unsqueeze(-1)/fy.unsqueeze(-1) - sk.unsqueeze(-1)*y_cam/fy.unsqueeze(-1)) / fx.unsqueeze(-1) * z_cam
+    #     y_lift = (y_cam - cy.unsqueeze(-1)) / fy.unsqueeze(-1) * z_cam
+
+    #     cam_rel_points = torch.stack((x_lift, y_lift, z_cam, torch.ones_like(z_cam)), dim=-1)
+
+    #     world_rel_points = torch.bmm(cam2world_matrix, cam_rel_points.permute(0, 2, 1)).permute(0, 2, 1)[:, :, :3]
+
+    #     ray_dirs = world_rel_points - cam_locs_world[:, None, :]
+    #     ray_dirs = torch.nn.functional.normalize(ray_dirs, dim=2)
+
+    #     ray_origins = cam_locs_world.unsqueeze(1).repeat(1, ray_dirs.shape[1], 1)
+
+    #     return ray_origins, ray_dirs
+    
\ No newline at end of file
diff --git a/modules/eg3ds/volumetric_rendering/renderer.py b/modules/eg3ds/volumetric_rendering/renderer.py
new file mode 100644
index 0000000000000000000000000000000000000000..8aa4ef29cd830bdfa77adb97ab56ab9482068456
--- /dev/null
+++ b/modules/eg3ds/volumetric_rendering/renderer.py
@@ -0,0 +1,297 @@
+# SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: LicenseRef-NvidiaProprietary
+#
+# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+# property and proprietary rights in and to this material, related
+# documentation and any modifications thereto. Any use, reproduction,
+# disclosure or distribution of this material and related documentation
+# without an express license agreement from NVIDIA CORPORATION or
+# its affiliates is strictly prohibited.
+
+"""
+The renderer is a module that takes in rays, decides where to sample along each
+ray, and computes pixel colors using the volume rendering equation.
+"""
+import copy
+import math
+import random
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from einops import rearrange
+
+from modules.eg3ds.volumetric_rendering.ray_marcher import MipRayMarcher2
+from modules.eg3ds.volumetric_rendering import math_utils
+from utils.commons.tensor_utils import convert_like
+from utils.commons.hparams import hparams
+import copy
+
+
+def generate_planes():
+    """
+    Defines planes by the three vectors that form the "axes" of the
+    plane. Should work with arbitrary number of planes and planes of
+    arbitrary orientation.
+    the acutally used axes is the inv_planes (transpose)
+    """
+    return torch.tensor([[[1, 0, 0],
+                        [0, 1, 0],
+                        [0, 0, 1]], # xyz
+
+                        [[1, 0, 0],
+                        [0, 0, 1],
+                        [0, 1, 0]], # xzy
+
+                        [[0, 0, 1],
+                        [1, 0, 0], # after transpose, is yzx
+                        [0, 1, 0]]], dtype=torch.float32)
+    
+def project_onto_planes(planes, coordinates):
+    """
+    Does a projection of a 3D point onto a batch of 2D planes,
+    returning 2D plane coordinates.
+
+    Takes plane axes of shape n_planes, 3, 3
+    # Takes coordinates of shape N, M, 3
+    # returns projections of shape N*n_planes, M, 2
+    """
+    N, M, C = coordinates.shape
+    n_planes, _, _ = planes.shape
+    coordinates = coordinates.unsqueeze(1).expand(-1, n_planes, -1, -1).reshape(N*n_planes, M, 3)
+    inv_planes = torch.linalg.inv(planes).unsqueeze(0).expand(N, -1, -1, -1).reshape(N*n_planes, 3, 3)
+    projections = torch.bmm(coordinates, inv_planes) # 
+    return projections
+
+def sample_from_planes(plane_axes, plane_features, coordinates, mode='bilinear', padding_mode='zeros', box_warp=None):
+    assert padding_mode == 'zeros'
+    N, n_planes, C, H, W = plane_features.shape
+    _, M, _ = coordinates.shape
+    plane_features = plane_features.reshape(N*n_planes, C, H, W)
+
+    coordinates = (2/box_warp) * coordinates # TODO: add specific box bounds
+
+    projected_coordinates = project_onto_planes(plane_axes, coordinates).unsqueeze(1)[..., :2]
+    output_features = torch.nn.functional.grid_sample(plane_features, projected_coordinates.float(), mode=mode, padding_mode=padding_mode, align_corners=False).permute(0, 3, 2, 1).reshape(N, n_planes, M, C)
+    return output_features
+
+
+def sample_from_trigrids(plane_axes, plane_features, coordinates, mode='bilinear', padding_mode='zeros', box_warp=None, triplane_depth=1):
+    assert padding_mode == 'zeros'
+    N, n_planes, CD, H, W = plane_features.shape
+    _, M, _ = coordinates.shape
+    C, D = CD // triplane_depth, triplane_depth
+    plane_features = plane_features.view(N*n_planes, C, D, H, W)
+
+    coordinates = (2/box_warp) * coordinates # TODO: add specific box bounds
+
+    projected_coordinates = project_onto_planes(plane_axes, coordinates).unsqueeze(1).unsqueeze(2) # (N x n_planes) x 1 x 1 x M x 3
+    output_features = torch.nn.functional.grid_sample(plane_features, projected_coordinates.float(), mode=mode, padding_mode=padding_mode, align_corners=False).permute(0, 4, 3, 2, 1).reshape(N, n_planes, M, C)
+    return output_features
+
+def sample_from_3dgrid(grid, coordinates):
+    """
+    Expects coordinates in shape (batch_size, num_points_per_batch, 3)
+    Expects grid in shape (1, channels, H, W, D)
+    (Also works if grid has batch size)
+    Returns sampled features of shape (batch_size, num_points_per_batch, feature_channels)
+    """
+    batch_size, n_coords, n_dims = coordinates.shape
+    sampled_features = torch.nn.functional.grid_sample(grid.expand(batch_size, -1, -1, -1, -1),
+                                                       coordinates.reshape(batch_size, 1, 1, -1, n_dims),
+                                                       mode='bilinear', padding_mode='zeros', align_corners=False)
+    N, C, H, W, D = sampled_features.shape
+    sampled_features = sampled_features.permute(0, 4, 3, 2, 1).reshape(N, H*W*D, C)
+    return sampled_features
+
+
+class ImportanceRenderer(torch.nn.Module):
+    def __init__(self, hp=None):
+        super().__init__()
+        global hparams
+        self.hparams = copy.copy(hparams) if hp is None else copy.copy(hp)
+        hparams = self.hparams
+
+        self.ray_marcher = MipRayMarcher2()
+        self.plane_axes = generate_planes()
+        self.triplane_feature_type = hparams.get("triplane_feature_type", "triplane")
+
+    def forward(self, planes, decoder, ray_origins, ray_directions, rendering_options):
+        self.plane_axes = self.plane_axes.to(ray_origins.device)
+
+        if rendering_options['ray_start'] == rendering_options['ray_end'] == 'auto':
+            ray_start, ray_end = math_utils.get_ray_limits_box(ray_origins, ray_directions, box_side_length=rendering_options['box_warp']) # 根据ndc world bbox的大小（默认-1，1），自动计算near和far
+            is_ray_valid = ray_end > ray_start
+            if torch.any(is_ray_valid).item():
+                ray_start[~is_ray_valid] = ray_start[is_ray_valid].min()
+                ray_end[~is_ray_valid] = ray_start[is_ray_valid].max()
+        else: # 如果bbox没有被限定在-1，1的bbox里面，使用自行设定的near far
+            # Create stratified depth samples
+            ray_start, ray_end = rendering_options['ray_start'], rendering_options['ray_end']
+
+        depths_coarse = self.sample_stratified(ray_origins, ray_start, ray_end, rendering_options['depth_resolution'], rendering_options['disparity_space_sampling'])
+        batch_size, num_rays, samples_per_ray, _ = depths_coarse.shape
+
+        # Coarse Pass
+        sample_coordinates = (ray_origins.unsqueeze(-2) + depths_coarse * ray_directions.unsqueeze(-2)).reshape(batch_size, -1, 3)
+        sample_directions = ray_directions.unsqueeze(-2).expand(-1, -1, samples_per_ray, -1).reshape(batch_size, -1, 3)
+        
+        out = self.run_model(planes, decoder, sample_coordinates, sample_directions, rendering_options)
+        colors_coarse = out['rgb']
+        densities_coarse = out['sigma']
+        colors_coarse = colors_coarse.reshape(batch_size, num_rays, samples_per_ray, colors_coarse.shape[-1])
+        densities_coarse = densities_coarse.reshape(batch_size, num_rays, samples_per_ray, 1)
+
+        # Fine Pass
+        N_importance = rendering_options['depth_resolution_importance']
+        if N_importance > 0:
+            _, _, weights = self.ray_marcher(colors_coarse, densities_coarse, depths_coarse, rendering_options)
+
+            depths_fine = self.sample_importance(depths_coarse, weights, N_importance)
+            sample_directions = ray_directions.unsqueeze(-2).expand(-1, -1, N_importance, -1).reshape(batch_size, -1, 3)
+            sample_coordinates = (ray_origins.unsqueeze(-2) + depths_fine * ray_directions.unsqueeze(-2)).reshape(batch_size, -1, 3)
+
+            out = self.run_model(planes, decoder, sample_coordinates, sample_directions, rendering_options)
+            colors_fine = out['rgb']
+            densities_fine = out['sigma']
+            colors_fine = colors_fine.reshape(batch_size, num_rays, N_importance, colors_fine.shape[-1])
+            densities_fine = densities_fine.reshape(batch_size, num_rays, N_importance, 1)
+
+            all_depths, all_colors, all_densities = self.unify_samples(depths_coarse, colors_coarse, densities_coarse,
+                                                                  depths_fine, colors_fine, densities_fine)
+
+            # Aggregate
+            rgb_final, depth_final, weights = self.ray_marcher(all_colors, all_densities, all_depths, rendering_options)
+        else:
+            rgb_final, depth_final, weights = self.ray_marcher(colors_coarse, densities_coarse, depths_coarse, rendering_options)
+
+        return rgb_final, depth_final, weights.sum(2), is_ray_valid
+
+    def run_model(self, planes, decoder, sample_coordinates, sample_directions, options):
+        hparams = self.hparams
+        if hparams['enable_rescale_plane_regulation'] and self.training:
+            target_size = random.randint(int(256 * hparams.get("min_rescale_factor", 0.5)), 256)
+            planes = rearrange(planes, "n k c h w -> n (k c) h w")
+            planes = F.interpolate(planes, (target_size, target_size), mode='bilinear', align_corners=False, antialias=False)
+            planes = rearrange(planes, "n (k c) h w -> n k c h w", k=3)
+        
+        self.plane_axes = self.plane_axes.to(planes.device)
+        if self.triplane_feature_type in ["triplane"]:
+            sampled_features = sample_from_planes(self.plane_axes, planes, sample_coordinates, padding_mode='zeros', box_warp=options['box_warp'])
+        elif self.triplane_feature_type in ["trigrid", 'trigrid_v2']:
+            sampled_features = sample_from_trigrids(self.plane_axes, planes, sample_coordinates, padding_mode='zeros', box_warp=options['box_warp'], triplane_depth=hparams.get("triplane_depth", 1))
+        elif self.triplane_feature_type == "3dgrid":
+            sampled_features = sample_from_3dgrid(planes, sample_coordinates)
+
+        out = decoder(sampled_features, sample_coordinates)
+        if options.get('density_noise', 0) > 0:
+            out['sigma'] += torch.randn_like(out['sigma']) * options['density_noise']
+        return out
+
+    def sort_samples(self, all_depths, all_colors, all_densities):
+        _, indices = torch.sort(all_depths, dim=-2)
+        all_depths = torch.gather(all_depths, -2, indices)
+        all_colors = torch.gather(all_colors, -2, indices.expand(-1, -1, -1, all_colors.shape[-1]))
+        all_densities = torch.gather(all_densities, -2, indices.expand(-1, -1, -1, 1))
+        return all_depths, all_colors, all_densities
+
+    def unify_samples(self, depths1, colors1, densities1, depths2, colors2, densities2):
+        all_depths = torch.cat([depths1, depths2], dim = -2)
+        all_colors = torch.cat([colors1, colors2], dim = -2)
+        all_densities = torch.cat([densities1, densities2], dim = -2)
+
+        _, indices = torch.sort(all_depths, dim=-2)
+        all_depths = torch.gather(all_depths, -2, indices)
+        all_colors = torch.gather(all_colors, -2, indices.expand(-1, -1, -1, all_colors.shape[-1]))
+        all_densities = torch.gather(all_densities, -2, indices.expand(-1, -1, -1, 1))
+
+        return all_depths, all_colors, all_densities
+
+    def sample_stratified(self, ray_origins, ray_start, ray_end, depth_resolution, disparity_space_sampling=False):
+        """
+        Return depths of approximately uniformly spaced samples along rays.
+        """
+        N, M, _ = ray_origins.shape
+        if disparity_space_sampling:
+            depths_coarse = torch.linspace(0,
+                                    1,
+                                    depth_resolution,
+                                    device=ray_origins.device).reshape(1, 1, depth_resolution, 1).repeat(N, M, 1, 1)
+            depth_delta = 1/(depth_resolution - 1)
+            depths_coarse += torch.rand_like(depths_coarse) * depth_delta
+            depths_coarse = 1./(1./ray_start * (1. - depths_coarse) + 1./ray_end * depths_coarse)
+        else:
+            if type(ray_start) == torch.Tensor:
+                depths_coarse = math_utils.linspace(ray_start, ray_end, depth_resolution).permute(1,2,0,3)
+                depth_delta = (ray_end - ray_start) / (depth_resolution - 1)
+                depths_coarse += torch.rand_like(depths_coarse) * depth_delta[..., None]
+            else:
+                depths_coarse = torch.linspace(ray_start, ray_end, depth_resolution, device=ray_origins.device).reshape(1, 1, depth_resolution, 1).repeat(N, M, 1, 1)
+                depth_delta = (ray_end - ray_start)/(depth_resolution - 1)
+                depths_coarse += torch.rand_like(depths_coarse) * depth_delta
+
+        return depths_coarse
+
+    def sample_importance(self, z_vals, weights, N_importance):
+        """
+        Return depths of importance sampled points along rays. See NeRF importance sampling for more.
+        """
+        with torch.no_grad():
+            batch_size, num_rays, samples_per_ray, _ = z_vals.shape
+
+            z_vals = z_vals.reshape(batch_size * num_rays, samples_per_ray)
+            weights = weights.reshape(batch_size * num_rays, -1) # -1 to account for loss of 1 sample in MipRayMarcher
+
+            # smooth weights
+            weights = torch.nn.functional.max_pool1d(weights.unsqueeze(1).float(), 2, 1, padding=1)
+            weights = torch.nn.functional.avg_pool1d(weights, 2, 1).squeeze()
+            weights = weights + 0.01
+
+            z_vals_mid = 0.5 * (z_vals[: ,:-1] + z_vals[: ,1:])
+            importance_z_vals = self.sample_pdf(z_vals_mid, weights[:, 1:-1],
+                                             N_importance).detach().reshape(batch_size, num_rays, N_importance, 1)
+        return importance_z_vals
+
+    def sample_pdf(self, bins, weights, N_importance, det=False, eps=1e-5):
+        """
+        Sample @N_importance samples from @bins with distribution defined by @weights.
+        Inputs:
+            bins: (N_rays, N_samples_+1) where N_samples_ is "the number of coarse samples per ray - 2"
+            weights: (N_rays, N_samples_)
+            N_importance: the number of samples to draw from the distribution
+            det: deterministic or not
+            eps: a small number to prevent division by zero
+        Outputs:
+            samples: the sampled samples
+        """
+        N_rays, N_samples_ = weights.shape
+        if isinstance(N_samples_, torch.Tensor):
+            N_samples_ = N_samples_.to(device=weights.device)
+        if isinstance(N_rays, torch.Tensor):
+            N_rays = N_rays.to(device=weights.device)
+        weights = weights + eps # prevent division by zero (don't do inplace op!)
+        pdf = weights / torch.sum(weights, -1, keepdim=True) # (N_rays, N_samples_)
+        cdf = torch.cumsum(pdf, -1) # (N_rays, N_samples), cumulative distribution function
+        cdf = torch.cat([torch.zeros_like(cdf[: ,:1]), cdf], -1)  # (N_rays, N_samples_+1)
+                                                                   # padded to 0~1 inclusive
+
+        if det:
+            u = torch.linspace(0, 1, N_importance, device=bins.device)
+            u = u.expand(N_rays, N_importance)
+        else:
+            u = torch.rand(N_rays, N_importance, device=bins.device)
+        u = u.contiguous()
+
+        inds = torch.searchsorted(cdf, u, right=True)
+        below = torch.clamp_min(inds-1, 0)
+        above = torch.clamp_max(inds, N_samples_)
+
+        inds_sampled = torch.stack([below, above], -1).view(N_rays, 2*N_importance)
+        cdf_g = torch.gather(cdf, 1, inds_sampled).view(N_rays, N_importance, 2)
+        bins_g = torch.gather(bins, 1, inds_sampled).view(N_rays, N_importance, 2)
+
+        denom = cdf_g[...,1]-cdf_g[...,0]
+        denom[denom<eps] = 1 # denom equals 0 means a bin has weight 0, in which case it will not be sampled
+                             # anyway, therefore any value for it is fine (set to 1 here)
+
+        samples = bins_g[...,0] + (u-cdf_g[...,0])/denom * (bins_g[...,1]-bins_g[...,0])
+        return samples
\ No newline at end of file
diff --git a/modules/img2plane/deeplabv3/__init__.py b/modules/img2plane/deeplabv3/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..a41931947f3c93fd5c00bc451c79b7aee4f16e67
--- /dev/null
+++ b/modules/img2plane/deeplabv3/__init__.py
@@ -0,0 +1 @@
+from .decoders.my_model import DeepLabV3
\ No newline at end of file
diff --git a/modules/img2plane/deeplabv3/base/__init__.py b/modules/img2plane/deeplabv3/base/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e2be3ca0a342daa2005040e0e7ef48cc1edaf2ce
--- /dev/null
+++ b/modules/img2plane/deeplabv3/base/__init__.py
@@ -0,0 +1,11 @@
+from .model import SegmentationModel
+
+from .modules import (
+    Conv2dReLU,
+    Attention,
+)
+
+from .heads import (
+    SegmentationHead,
+    ClassificationHead,
+)
diff --git a/modules/img2plane/deeplabv3/base/heads.py b/modules/img2plane/deeplabv3/base/heads.py
new file mode 100644
index 0000000000000000000000000000000000000000..067b09e15e22e52c1c1f2875e8340e9b24615598
--- /dev/null
+++ b/modules/img2plane/deeplabv3/base/heads.py
@@ -0,0 +1,22 @@
+import torch.nn as nn
+from .modules import Activation
+
+
+class SegmentationHead(nn.Sequential):
+    def __init__(self, in_channels, out_channels, kernel_size=3, activation=None, upsampling=1):
+        conv2d = nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, padding=kernel_size // 2)
+        upsampling = nn.UpsamplingBilinear2d(scale_factor=upsampling) if upsampling > 1 else nn.Identity()
+        activation = Activation(activation)
+        super().__init__(conv2d, upsampling, activation)
+
+
+class ClassificationHead(nn.Sequential):
+    def __init__(self, in_channels, classes, pooling="avg", dropout=0.2, activation=None):
+        if pooling not in ("max", "avg"):
+            raise ValueError("Pooling should be one of ('max', 'avg'), got {}.".format(pooling))
+        pool = nn.AdaptiveAvgPool2d(1) if pooling == "avg" else nn.AdaptiveMaxPool2d(1)
+        flatten = nn.Flatten()
+        dropout = nn.Dropout(p=dropout, inplace=True) if dropout else nn.Identity()
+        linear = nn.Linear(in_channels, classes, bias=True)
+        activation = Activation(activation)
+        super().__init__(pool, flatten, dropout, linear, activation)
diff --git a/modules/img2plane/deeplabv3/base/initialization.py b/modules/img2plane/deeplabv3/base/initialization.py
new file mode 100644
index 0000000000000000000000000000000000000000..9622130204a0172d43a5f32f4ade065e100f746e
--- /dev/null
+++ b/modules/img2plane/deeplabv3/base/initialization.py
@@ -0,0 +1,27 @@
+import torch.nn as nn
+
+
+def initialize_decoder(module):
+    for m in module.modules():
+
+        if isinstance(m, nn.Conv2d):
+            nn.init.kaiming_uniform_(m.weight, mode="fan_in", nonlinearity="relu")
+            if m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+
+        elif isinstance(m, nn.BatchNorm2d):
+            nn.init.constant_(m.weight, 1)
+            nn.init.constant_(m.bias, 0)
+
+        elif isinstance(m, nn.Linear):
+            nn.init.xavier_uniform_(m.weight)
+            if m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+
+
+def initialize_head(module):
+    for m in module.modules():
+        if isinstance(m, (nn.Linear, nn.Conv2d)):
+            nn.init.xavier_uniform_(m.weight)
+            if m.bias is not None:
+                nn.init.constant_(m.bias, 0)
diff --git a/modules/img2plane/deeplabv3/base/model.py b/modules/img2plane/deeplabv3/base/model.py
new file mode 100644
index 0000000000000000000000000000000000000000..806f27409ee0571966f1b3e72288b6a99f44fdbe
--- /dev/null
+++ b/modules/img2plane/deeplabv3/base/model.py
@@ -0,0 +1,56 @@
+import torch
+from . import initialization as init
+
+
+class SegmentationModel(torch.nn.Module):
+    def initialize(self):
+        init.initialize_decoder(self.decoder)
+        init.initialize_head(self.segmentation_head)
+        if self.classification_head is not None:
+            init.initialize_head(self.classification_head)
+
+    def check_input_shape(self, x):
+
+        h, w = x.shape[-2:]
+        output_stride = self.encoder.output_stride
+        if h % output_stride != 0 or w % output_stride != 0:
+            new_h = (h // output_stride + 1) * output_stride if h % output_stride != 0 else h
+            new_w = (w // output_stride + 1) * output_stride if w % output_stride != 0 else w
+            raise RuntimeError(
+                f"Wrong input shape height={h}, width={w}. Expected image height and width "
+                f"divisible by {output_stride}. Consider pad your images to shape ({new_h}, {new_w})."
+            )
+
+    def forward(self, x):
+        """Sequentially pass `x` trough model`s encoder, decoder and heads"""
+
+        self.check_input_shape(x)
+
+        features = self.encoder(x)
+        decoder_output = self.decoder(*features)
+
+        masks = self.segmentation_head(decoder_output)
+
+        if self.classification_head is not None:
+            labels = self.classification_head(features[-1])
+            return masks, labels
+
+        return masks
+
+    @torch.no_grad()
+    def predict(self, x):
+        """Inference method. Switch model to `eval` mode, call `.forward(x)` with `torch.no_grad()`
+
+        Args:
+            x: 4D torch tensor with shape (batch_size, channels, height, width)
+
+        Return:
+            prediction: 4D torch tensor with shape (batch_size, classes, height, width)
+
+        """
+        if self.training:
+            self.eval()
+
+        x = self.forward(x)
+
+        return x
diff --git a/modules/img2plane/deeplabv3/base/modules.py b/modules/img2plane/deeplabv3/base/modules.py
new file mode 100644
index 0000000000000000000000000000000000000000..096541fc248cfef434e1a9ffc6cfe1ad7f0acbe5
--- /dev/null
+++ b/modules/img2plane/deeplabv3/base/modules.py
@@ -0,0 +1,131 @@
+import torch
+import torch.nn as nn
+
+try:
+    from inplace_abn import InPlaceABN
+except ImportError:
+    InPlaceABN = None
+
+
+class Conv2dReLU(nn.Sequential):
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        kernel_size,
+        padding=0,
+        stride=1,
+        use_batchnorm=True,
+    ):
+
+        if use_batchnorm == "inplace" and InPlaceABN is None:
+            raise RuntimeError(
+                "In order to use `use_batchnorm='inplace'` inplace_abn package must be installed. "
+                + "To install see: https://github.com/mapillary/inplace_abn"
+            )
+
+        conv = nn.Conv2d(
+            in_channels,
+            out_channels,
+            kernel_size,
+            stride=stride,
+            padding=padding,
+            bias=not (use_batchnorm),
+        )
+        relu = nn.ReLU(inplace=True)
+
+        if use_batchnorm == "inplace":
+            bn = InPlaceABN(out_channels, activation="leaky_relu", activation_param=0.0)
+            relu = nn.Identity()
+
+        elif use_batchnorm and use_batchnorm != "inplace":
+            bn = nn.BatchNorm2d(out_channels)
+
+        else:
+            bn = nn.Identity()
+
+        super(Conv2dReLU, self).__init__(conv, bn, relu)
+
+
+class SCSEModule(nn.Module):
+    def __init__(self, in_channels, reduction=16):
+        super().__init__()
+        self.cSE = nn.Sequential(
+            nn.AdaptiveAvgPool2d(1),
+            nn.Conv2d(in_channels, in_channels // reduction, 1),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(in_channels // reduction, in_channels, 1),
+            nn.Sigmoid(),
+        )
+        self.sSE = nn.Sequential(nn.Conv2d(in_channels, 1, 1), nn.Sigmoid())
+
+    def forward(self, x):
+        return x * self.cSE(x) + x * self.sSE(x)
+
+
+class ArgMax(nn.Module):
+    def __init__(self, dim=None):
+        super().__init__()
+        self.dim = dim
+
+    def forward(self, x):
+        return torch.argmax(x, dim=self.dim)
+
+
+class Clamp(nn.Module):
+    def __init__(self, min=0, max=1):
+        super().__init__()
+        self.min, self.max = min, max
+
+    def forward(self, x):
+        return torch.clamp(x, self.min, self.max)
+
+
+class Activation(nn.Module):
+    def __init__(self, name, **params):
+
+        super().__init__()
+
+        if name is None or name == "identity":
+            self.activation = nn.Identity(**params)
+        elif name == "sigmoid":
+            self.activation = nn.Sigmoid()
+        elif name == "softmax2d":
+            self.activation = nn.Softmax(dim=1, **params)
+        elif name == "softmax":
+            self.activation = nn.Softmax(**params)
+        elif name == "logsoftmax":
+            self.activation = nn.LogSoftmax(**params)
+        elif name == "tanh":
+            self.activation = nn.Tanh()
+        elif name == "argmax":
+            self.activation = ArgMax(**params)
+        elif name == "argmax2d":
+            self.activation = ArgMax(dim=1, **params)
+        elif name == "clamp":
+            self.activation = Clamp(**params)
+        elif callable(name):
+            self.activation = name(**params)
+        else:
+            raise ValueError(
+                f"Activation should be callable/sigmoid/softmax/logsoftmax/tanh/"
+                f"argmax/argmax2d/clamp/None; got {name}"
+            )
+
+    def forward(self, x):
+        return self.activation(x)
+
+
+class Attention(nn.Module):
+    def __init__(self, name, **params):
+        super().__init__()
+
+        if name is None:
+            self.attention = nn.Identity(**params)
+        elif name == "scse":
+            self.attention = SCSEModule(**params)
+        else:
+            raise ValueError("Attention {} is not implemented".format(name))
+
+    def forward(self, x):
+        return self.attention(x)
diff --git a/modules/img2plane/deeplabv3/decoders/decoder.py b/modules/img2plane/deeplabv3/decoders/decoder.py
new file mode 100644
index 0000000000000000000000000000000000000000..f9cc05a690516f978b49b9e06d54a9dd7b6ba5ed
--- /dev/null
+++ b/modules/img2plane/deeplabv3/decoders/decoder.py
@@ -0,0 +1,219 @@
+"""
+BSD 3-Clause License
+
+Copyright (c) Soumith Chintala 2016,
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+* Neither the name of the copyright holder nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+"""
+
+import torch
+from torch import nn
+from torch.nn import functional as F
+
+__all__ = ["DeepLabV3Decoder"]
+
+
+class DeepLabV3Decoder(nn.Sequential):
+    def __init__(self, in_channels, out_channels=256, atrous_rates=(12, 24, 36)):
+        super().__init__(
+            ASPP(in_channels, out_channels, atrous_rates),
+            nn.Conv2d(out_channels, out_channels, 3, padding=1, bias=False),
+            # nn.BatchNorm2d(out_channels),
+            nn.ReLU(),
+        )
+        self.out_channels = out_channels
+
+    def forward(self, *features):
+        return super().forward(features[-1])
+
+
+class DeepLabV3PlusDecoder(nn.Module):
+    def __init__(
+        self,
+        encoder_channels,
+        out_channels=256,
+        atrous_rates=(12, 24, 36),
+        output_stride=16,
+    ):
+        super().__init__()
+        if output_stride not in {8, 16}:
+            raise ValueError("Output stride should be 8 or 16, got {}.".format(output_stride))
+
+        self.out_channels = out_channels
+        self.output_stride = output_stride
+
+        self.aspp = nn.Sequential(
+            ASPP(encoder_channels[-1], out_channels, atrous_rates, separable=True),
+            SeparableConv2d(out_channels, out_channels, kernel_size=3, padding=1, bias=False),
+            # nn.BatchNorm2d(out_channels),
+            nn.ReLU(),
+        )
+
+        scale_factor = 2 if output_stride == 8 else 4
+        self.up = nn.UpsamplingBilinear2d(scale_factor=scale_factor)
+
+        highres_in_channels = encoder_channels[-4]
+        highres_out_channels = 48  # proposed by authors of paper
+        self.block1 = nn.Sequential(
+            nn.Conv2d(highres_in_channels, highres_out_channels, kernel_size=1, bias=False),
+            # nn.BatchNorm2d(highres_out_channels),
+            nn.ReLU(),
+        )
+        self.block2 = nn.Sequential(
+            SeparableConv2d(
+                highres_out_channels + out_channels,
+                out_channels,
+                kernel_size=3,
+                padding=1,
+                bias=False,
+            ),
+            # nn.BatchNorm2d(out_channels),
+            nn.ReLU(),
+        )
+
+    def forward(self, *features):
+        aspp_features = self.aspp(features[-1])
+        aspp_features = self.up(aspp_features)
+        high_res_features = self.block1(features[-4])
+        concat_features = torch.cat([aspp_features, high_res_features], dim=1)
+        fused_features = self.block2(concat_features)
+        return fused_features
+
+
+class ASPPConv(nn.Sequential):
+    def __init__(self, in_channels, out_channels, dilation):
+        super().__init__(
+            nn.Conv2d(
+                in_channels,
+                out_channels,
+                kernel_size=3,
+                padding=dilation,
+                dilation=dilation,
+                bias=False,
+            ),
+            nn.BatchNorm2d(out_channels),
+            nn.ReLU(),
+        )
+
+
+class ASPPSeparableConv(nn.Sequential):
+    def __init__(self, in_channels, out_channels, dilation):
+        super().__init__(
+            SeparableConv2d(
+                in_channels,
+                out_channels,
+                kernel_size=3,
+                padding=dilation,
+                dilation=dilation,
+                bias=False,
+            ),
+            # nn.BatchNorm2d(out_channels),
+            nn.ReLU(),
+        )
+
+
+class ASPPPooling(nn.Sequential):
+    def __init__(self, in_channels, out_channels):
+        super().__init__(
+            nn.AdaptiveAvgPool2d(1),
+            nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=False),
+            # nn.BatchNorm2d(out_channels),
+            nn.ReLU(),
+        )
+
+    def forward(self, x):
+        size = x.shape[-2:]
+        for mod in self:
+            x = mod(x)
+        return F.interpolate(x, size=size, mode="bilinear", align_corners=False)
+
+
+class ASPP(nn.Module):
+    def __init__(self, in_channels, out_channels, atrous_rates, separable=False):
+        super(ASPP, self).__init__()
+        modules = []
+        modules.append(
+            nn.Sequential(
+                nn.Conv2d(in_channels, out_channels, 1, bias=False),
+                # nn.BatchNorm2d(out_channels),
+                nn.ReLU(),
+            )
+        )
+
+        rate1, rate2, rate3 = tuple(atrous_rates)
+        ASPPConvModule = ASPPConv if not separable else ASPPSeparableConv
+
+        modules.append(ASPPConvModule(in_channels, out_channels, rate1))
+        modules.append(ASPPConvModule(in_channels, out_channels, rate2))
+        modules.append(ASPPConvModule(in_channels, out_channels, rate3))
+        modules.append(ASPPPooling(in_channels, out_channels))
+
+        self.convs = nn.ModuleList(modules)
+
+        self.project = nn.Sequential(
+            nn.Conv2d(5 * out_channels, out_channels, kernel_size=1, bias=False),
+            # nn.BatchNorm2d(out_channels),
+            nn.ReLU(),
+            nn.Dropout(0.5),
+        )
+
+    def forward(self, x):
+        res = []
+        for conv in self.convs:
+            res.append(conv(x))
+        res = torch.cat(res, dim=1)
+        return self.project(res)
+
+
+class SeparableConv2d(nn.Sequential):
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        kernel_size,
+        stride=1,
+        padding=0,
+        dilation=1,
+        bias=True,
+    ):
+        dephtwise_conv = nn.Conv2d(
+            in_channels,
+            in_channels,
+            kernel_size,
+            stride=stride,
+            padding=padding,
+            dilation=dilation,
+            groups=in_channels,
+            bias=False,
+        )
+        pointwise_conv = nn.Conv2d(
+            in_channels,
+            out_channels,
+            kernel_size=1,
+            bias=bias,
+        )
+        super().__init__(dephtwise_conv, pointwise_conv)
diff --git a/modules/img2plane/deeplabv3/decoders/model.py b/modules/img2plane/deeplabv3/decoders/model.py
new file mode 100644
index 0000000000000000000000000000000000000000..8eda4fc0490956d9cdbb3cf5dbcdb579dc33aa17
--- /dev/null
+++ b/modules/img2plane/deeplabv3/decoders/model.py
@@ -0,0 +1,174 @@
+from torch import nn
+from typing import Optional
+
+from modules.img2plane.deeplabv3.encoders import get_encoder
+from modules.img2plane.deeplabv3.base import (
+    SegmentationModel,
+    SegmentationHead,
+    ClassificationHead,
+)
+
+
+from .decoder import DeepLabV3Decoder, DeepLabV3PlusDecoder
+
+
+class DeepLabV3(SegmentationModel):
+    """DeepLabV3_ implementation from "Rethinking Atrous Convolution for Semantic Image Segmentation"
+
+    Args:
+        encoder_name: Name of the classification model that will be used as an encoder (a.k.a backbone)
+            to extract features of different spatial resolution
+        encoder_depth: A number of stages used in encoder in range [3, 5]. Each stage generate features
+            two times smaller in spatial dimensions than previous one (e.g. for depth 0 we will have features
+            with shapes [(N, C, H, W),], for depth 1 - [(N, C, H, W), (N, C, H // 2, W // 2)] and so on).
+            Default is 5
+        encoder_weights: One of **None** (random initialization), **"imagenet"** (pre-training on ImageNet) and
+            other pretrained weights (see table with available weights for each encoder_name)
+        decoder_channels: A number of convolution filters in ASPP module. Default is 256
+        in_channels: A number of input channels for the model, default is 3 (RGB images)
+        classes: A number of classes for output mask (or you can think as a number of channels of output mask)
+        activation: An activation function to apply after the final convolution layer.
+            Available options are **"sigmoid"**, **"softmax"**, **"logsoftmax"**, **"tanh"**, **"identity"**,
+                **callable** and **None**.
+            Default is **None**
+        upsampling: Final upsampling factor. Default is 8 to preserve input-output spatial shape identity
+        aux_params: Dictionary with parameters of the auxiliary output (classification head). Auxiliary output is build
+            on top of encoder if **aux_params** is not **None** (default). Supported params:
+                - classes (int): A number of classes
+                - pooling (str): One of "max", "avg". Default is "avg"
+                - dropout (float): Dropout factor in [0, 1)
+                - activation (str): An activation function to apply "sigmoid"/"softmax"
+                    (could be **None** to return logits)
+    Returns:
+        ``torch.nn.Module``: **DeepLabV3**
+
+    .. _DeeplabV3:
+        https://arxiv.org/abs/1706.05587
+
+    """
+
+    def __init__(
+        self,
+        encoder_name: str = "resnet34",
+        encoder_depth: int = 5,
+        encoder_weights: Optional[str] = "imagenet",
+        decoder_channels: int = 256,
+        in_channels: int = 3,
+        classes: int = 1,
+        activation: Optional[str] = None,
+        upsampling: int = 8,
+        aux_params: Optional[dict] = None,
+    ):
+        super().__init__()
+
+        self.encoder = get_encoder(
+            encoder_name,
+            in_channels=in_channels,
+            depth=encoder_depth,
+            weights=encoder_weights,
+            output_stride=8,
+        )
+
+        self.decoder = DeepLabV3Decoder(
+            in_channels=self.encoder.out_channels[-1],
+            out_channels=decoder_channels,
+        )
+
+        self.segmentation_head = SegmentationHead(
+            in_channels=self.decoder.out_channels,
+            out_channels=classes,
+            activation=activation,
+            kernel_size=1,
+            upsampling=upsampling,
+        )
+
+        if aux_params is not None:
+            self.classification_head = ClassificationHead(in_channels=self.encoder.out_channels[-1], **aux_params)
+        else:
+            self.classification_head = None
+
+
+class DeepLabV3Plus(SegmentationModel):
+    """DeepLabV3+ implementation from "Encoder-Decoder with Atrous Separable
+    Convolution for Semantic Image Segmentation"
+
+    Args:
+        encoder_name: Name of the classification model that will be used as an encoder (a.k.a backbone)
+            to extract features of different spatial resolution
+        encoder_depth: A number of stages used in encoder in range [3, 5]. Each stage generate features
+            two times smaller in spatial dimensions than previous one (e.g. for depth 0 we will have features
+            with shapes [(N, C, H, W),], for depth 1 - [(N, C, H, W), (N, C, H // 2, W // 2)] and so on).
+            Default is 5
+        encoder_weights: One of **None** (random initialization), **"imagenet"** (pre-training on ImageNet) and
+            other pretrained weights (see table with available weights for each encoder_name)
+        encoder_output_stride: Downsampling factor for last encoder features (see original paper for explanation)
+        decoder_atrous_rates: Dilation rates for ASPP module (should be a tuple of 3 integer values)
+        decoder_channels: A number of convolution filters in ASPP module. Default is 256
+        in_channels: A number of input channels for the model, default is 3 (RGB images)
+        classes: A number of classes for output mask (or you can think as a number of channels of output mask)
+        activation: An activation function to apply after the final convolution layer.
+            Available options are **"sigmoid"**, **"softmax"**, **"logsoftmax"**, **"tanh"**, **"identity"**,
+                **callable** and **None**.
+            Default is **None**
+        upsampling: Final upsampling factor. Default is 4 to preserve input-output spatial shape identity
+        aux_params: Dictionary with parameters of the auxiliary output (classification head). Auxiliary output is build
+            on top of encoder if **aux_params** is not **None** (default). Supported params:
+                - classes (int): A number of classes
+                - pooling (str): One of "max", "avg". Default is "avg"
+                - dropout (float): Dropout factor in [0, 1)
+                - activation (str): An activation function to apply "sigmoid"/"softmax"
+                    (could be **None** to return logits)
+    Returns:
+        ``torch.nn.Module``: **DeepLabV3Plus**
+
+    Reference:
+        https://arxiv.org/abs/1802.02611v3
+
+    """
+
+    def __init__(
+        self,
+        encoder_name: str = "resnet34",
+        encoder_depth: int = 5,
+        encoder_weights: Optional[str] = "imagenet",
+        encoder_output_stride: int = 16,
+        decoder_channels: int = 256,
+        decoder_atrous_rates: tuple = (12, 24, 36),
+        in_channels: int = 3,
+        classes: int = 1,
+        activation: Optional[str] = None,
+        upsampling: int = 4,
+        aux_params: Optional[dict] = None,
+    ):
+        super().__init__()
+
+        if encoder_output_stride not in [8, 16]:
+            raise ValueError("Encoder output stride should be 8 or 16, got {}".format(encoder_output_stride))
+
+        self.encoder = get_encoder(
+            encoder_name,
+            in_channels=in_channels,
+            depth=encoder_depth,
+            weights=encoder_weights,
+            output_stride=encoder_output_stride,
+        )
+
+        self.decoder = DeepLabV3PlusDecoder(
+            encoder_channels=self.encoder.out_channels,
+            out_channels=decoder_channels,
+            atrous_rates=decoder_atrous_rates,
+            output_stride=encoder_output_stride,
+        )
+
+        self.segmentation_head = SegmentationHead(
+            in_channels=self.decoder.out_channels,
+            out_channels=classes,
+            activation=activation,
+            kernel_size=1,
+            upsampling=upsampling,
+        )
+
+        if aux_params is not None:
+            self.classification_head = ClassificationHead(in_channels=self.encoder.out_channels[-1], **aux_params)
+        else:
+            self.classification_head = None
\ No newline at end of file
diff --git a/modules/img2plane/deeplabv3/decoders/my_decoder.py b/modules/img2plane/deeplabv3/decoders/my_decoder.py
new file mode 100644
index 0000000000000000000000000000000000000000..1feb935132ec8b657be60c6a876d8279e1d63356
--- /dev/null
+++ b/modules/img2plane/deeplabv3/decoders/my_decoder.py
@@ -0,0 +1,219 @@
+"""
+BSD 3-Clause License
+
+Copyright (c) Soumith Chintala 2016,
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+* Neither the name of the copyright holder nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+"""
+
+import torch
+from torch import nn
+from torch.nn import functional as F
+
+__all__ = ["DeepLabV3Decoder"]
+
+
+class DeepLabV3Decoder(nn.Sequential):
+    def __init__(self, in_channels, out_channels=256, atrous_rates=(12, 24, 36)):
+        super().__init__(
+            ASPP(in_channels, out_channels, atrous_rates),
+            nn.Conv2d(out_channels, out_channels, 3, padding=1, bias=False),
+            # nn.BatchNorm2d(out_channels),
+            # nn.ReLU(),
+        )
+        self.out_channels = out_channels
+
+    def forward(self, *features):
+        return super().forward(features[-1])
+
+
+class DeepLabV3PlusDecoder(nn.Module):
+    def __init__(
+        self,
+        encoder_channels,
+        out_channels=256,
+        atrous_rates=(12, 24, 36),
+        output_stride=16,
+    ):
+        super().__init__()
+        if output_stride not in {8, 16}:
+            raise ValueError("Output stride should be 8 or 16, got {}.".format(output_stride))
+
+        self.out_channels = out_channels
+        self.output_stride = output_stride
+
+        self.aspp = nn.Sequential(
+            ASPP(encoder_channels[-1], out_channels, atrous_rates, separable=True),
+            SeparableConv2d(out_channels, out_channels, kernel_size=3, padding=1, bias=False),
+            # nn.BatchNorm2d(out_channels),
+            nn.ReLU(),
+        )
+
+        scale_factor = 2 if output_stride == 8 else 4
+        self.up = nn.UpsamplingBilinear2d(scale_factor=scale_factor)
+
+        highres_in_channels = encoder_channels[-4]
+        highres_out_channels = 48  # proposed by authors of paper
+        self.block1 = nn.Sequential(
+            nn.Conv2d(highres_in_channels, highres_out_channels, kernel_size=1, bias=False),
+            # nn.BatchNorm2d(highres_out_channels),
+            nn.ReLU(),
+        )
+        self.block2 = nn.Sequential(
+            SeparableConv2d(
+                highres_out_channels + out_channels,
+                out_channels,
+                kernel_size=3,
+                padding=1,
+                bias=False,
+            ),
+            # nn.BatchNorm2d(out_channels),
+            nn.ReLU(),
+        )
+
+    def forward(self, *features):
+        aspp_features = self.aspp(features[-1])
+        aspp_features = self.up(aspp_features)
+        high_res_features = self.block1(features[-4])
+        concat_features = torch.cat([aspp_features, high_res_features], dim=1)
+        fused_features = self.block2(concat_features)
+        return fused_features
+
+
+class ASPPConv(nn.Sequential):
+    def __init__(self, in_channels, out_channels, dilation):
+        super().__init__(
+            nn.Conv2d(
+                in_channels,
+                out_channels,
+                kernel_size=3,
+                padding=dilation,
+                dilation=dilation,
+                bias=False,
+            ),
+            # nn.BatchNorm2d(out_channels),
+            nn.ReLU(),
+        )
+
+
+class ASPPSeparableConv(nn.Sequential):
+    def __init__(self, in_channels, out_channels, dilation):
+        super().__init__(
+            SeparableConv2d(
+                in_channels,
+                out_channels,
+                kernel_size=3,
+                padding=dilation,
+                dilation=dilation,
+                bias=False,
+            ),
+            # nn.BatchNorm2d(out_channels),
+            nn.ReLU(),
+        )
+
+
+class ASPPPooling(nn.Sequential):
+    def __init__(self, in_channels, out_channels):
+        super().__init__(
+            nn.AdaptiveAvgPool2d(1),
+            nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=False),
+            # nn.BatchNorm2d(out_channels),
+            nn.ReLU(),
+        )
+
+    def forward(self, x):
+        size = x.shape[-2:]
+        for mod in self:
+            x = mod(x)
+        return F.interpolate(x, size=size, mode="bilinear", align_corners=False)
+
+
+class ASPP(nn.Module):
+    def __init__(self, in_channels, out_channels, atrous_rates, separable=False):
+        super(ASPP, self).__init__()
+        modules = []
+        modules.append(
+            nn.Sequential(
+                nn.Conv2d(in_channels, out_channels, 1, bias=False),
+                # nn.BatchNorm2d(out_channels),
+                nn.ReLU(),
+            )
+        )
+
+        rate1, rate2, rate3 = tuple(atrous_rates)
+        ASPPConvModule = ASPPConv if not separable else ASPPSeparableConv
+
+        modules.append(ASPPConvModule(in_channels, out_channels, rate1))
+        modules.append(ASPPConvModule(in_channels, out_channels, rate2))
+        modules.append(ASPPConvModule(in_channels, out_channels, rate3))
+        modules.append(ASPPPooling(in_channels, out_channels))
+
+        self.convs = nn.ModuleList(modules)
+
+        self.project = nn.Sequential(
+            nn.Conv2d(5 * out_channels, out_channels, kernel_size=1, bias=False),
+            # nn.BatchNorm2d(out_channels),
+            nn.ReLU(),
+            nn.Dropout(0.5),
+        )
+
+    def forward(self, x):
+        res = []
+        for conv in self.convs:
+            res.append(conv(x))
+        res = torch.cat(res, dim=1)
+        return self.project(res)
+
+
+class SeparableConv2d(nn.Sequential):
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        kernel_size,
+        stride=1,
+        padding=0,
+        dilation=1,
+        bias=True,
+    ):
+        dephtwise_conv = nn.Conv2d(
+            in_channels,
+            in_channels,
+            kernel_size,
+            stride=stride,
+            padding=padding,
+            dilation=dilation,
+            groups=in_channels,
+            bias=False,
+        )
+        pointwise_conv = nn.Conv2d(
+            in_channels,
+            out_channels,
+            kernel_size=1,
+            bias=bias,
+        )
+        super().__init__(dephtwise_conv, pointwise_conv)
diff --git a/modules/img2plane/deeplabv3/decoders/my_model.py b/modules/img2plane/deeplabv3/decoders/my_model.py
new file mode 100644
index 0000000000000000000000000000000000000000..ed9e337dfd956320b1c1b167772914e6a6db8217
--- /dev/null
+++ b/modules/img2plane/deeplabv3/decoders/my_model.py
@@ -0,0 +1,113 @@
+import torch
+from torch import nn
+
+from typing import Optional
+
+from modules.img2plane.deeplabv3.encoders import get_encoder
+from modules.img2plane.deeplabv3.base import initialization as init
+
+
+from .my_decoder import DeepLabV3Decoder
+
+
+class DeepLabV3(nn.Module):
+    """DeepLabV3_ implementation from "Rethinking Atrous Convolution for Semantic Image Segmentation"
+
+    Args:
+        encoder_name: Name of the classification model that will be used as an encoder (a.k.a backbone)
+            to extract features of different spatial resolution
+        encoder_depth: A number of stages used in encoder in range [3, 5]. Each stage generate features
+            two times smaller in spatial dimensions than previous one (e.g. for depth 0 we will have features
+            with shapes [(N, C, H, W),], for depth 1 - [(N, C, H, W), (N, C, H // 2, W // 2)] and so on).
+            Default is 5
+        encoder_weights: One of **None** (random initialization), **"imagenet"** (pre-training on ImageNet) and
+            other pretrained weights (see table with available weights for each encoder_name)
+        decoder_channels: A number of convolution filters in ASPP module. Default is 256
+        in_channels: A number of input channels for the model, default is 3 (RGB images)
+        classes: A number of classes for output mask (or you can think as a number of channels of output mask)
+        activation: An activation function to apply after the final convolution layer.
+            Available options are **"sigmoid"**, **"softmax"**, **"logsoftmax"**, **"tanh"**, **"identity"**,
+                **callable** and **None**.
+            Default is **None**
+        upsampling: Final upsampling factor. Default is 8 to preserve input-output spatial shape identity
+        aux_params: Dictionary with parameters of the auxiliary output (classification head). Auxiliary output is build
+            on top of encoder if **aux_params** is not **None** (default). Supported params:
+                - classes (int): A number of classes
+                - pooling (str): One of "max", "avg". Default is "avg"
+                - dropout (float): Dropout factor in [0, 1)
+                - activation (str): An activation function to apply "sigmoid"/"softmax"
+                    (could be **None** to return logits)
+    Returns:
+        ``torch.nn.Module``: **DeepLabV3**
+
+    .. _DeeplabV3:
+        https://arxiv.org/abs/1706.05587
+
+    """
+    
+    def initialize(self):
+        init.initialize_decoder(self.decoder)
+
+    def __init__(
+        self,
+        encoder_name: str = "resnet34",
+        encoder_depth: int = 5,
+        encoder_weights: Optional[str] = "imagenet",
+        decoder_channels: int = 256,
+        in_channels: int = 5, # 3 for rgb, and 2 for pixel coordinates
+    ):
+        super().__init__()
+
+        self.encoder = get_encoder(
+            encoder_name,
+            in_channels=in_channels,
+            depth=encoder_depth,
+            weights=encoder_weights,
+            output_stride=8,
+        )
+
+        self.decoder = DeepLabV3Decoder(
+            in_channels=self.encoder.out_channels[-1],
+            out_channels=decoder_channels,
+        )
+
+    def check_input_shape(self, x):
+
+        h, w = x.shape[-2:]
+        output_stride = self.encoder.output_stride
+        if h % output_stride != 0 or w % output_stride != 0:
+            new_h = (h // output_stride + 1) * output_stride if h % output_stride != 0 else h
+            new_w = (w // output_stride + 1) * output_stride if w % output_stride != 0 else w
+            raise RuntimeError(
+                f"Wrong input shape height={h}, width={w}. Expected image height and width "
+                f"divisible by {output_stride}. Consider pad your images to shape ({new_h}, {new_w})."
+            )
+
+    def forward(self, x):
+        """Sequentially pass `x` trough model`s encoder, decoder and heads"""
+
+        self.check_input_shape(x)
+
+        features = self.encoder(x)
+        decoder_output = self.decoder(*features)
+
+        return decoder_output
+
+    @torch.no_grad()
+    def predict(self, x):
+        """Inference method. Switch model to `eval` mode, call `.forward(x)` with `torch.no_grad()`
+
+        Args:
+            x: 4D torch tensor with shape (batch_size, channels, height, width)
+
+        Return:
+            prediction: 4D torch tensor with shape (batch_size, classes, height, width)
+
+        """
+        if self.training:
+            self.eval()
+
+        x = self.forward(x)
+
+        return x
+
diff --git a/modules/img2plane/deeplabv3/encoders/__init__.py b/modules/img2plane/deeplabv3/encoders/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..6d662e7436d279278da98bd602fd1ca380251cd5
--- /dev/null
+++ b/modules/img2plane/deeplabv3/encoders/__init__.py
@@ -0,0 +1,71 @@
+import timm
+import functools
+import torch.utils.model_zoo as model_zoo
+
+from .resnet import resnet_encoders
+
+
+
+encoders = {}
+encoders.update(resnet_encoders)
+
+def get_encoder(name, in_channels=3, depth=5, weights=None, output_stride=32, **kwargs):
+
+    try:
+        Encoder = encoders[name]["encoder"]
+    except KeyError:
+        raise KeyError("Wrong encoder name `{}`, supported encoders: {}".format(name, list(encoders.keys())))
+
+    params = encoders[name]["params"]
+    params.update(depth=depth)
+    encoder = Encoder(**params)
+
+    if weights is not None:
+        try:
+            settings = encoders[name]["pretrained_settings"][weights]
+        except KeyError:
+            raise KeyError(
+                "Wrong pretrained weights `{}` for encoder `{}`. Available options are: {}".format(
+                    weights,
+                    name,
+                    list(encoders[name]["pretrained_settings"].keys()),
+                )
+            )
+        encoder.load_state_dict(model_zoo.load_url(settings["url"]))
+
+    encoder.set_in_channels(in_channels, pretrained=weights is not None)
+    if output_stride != 32:
+        encoder.make_dilated(output_stride)
+
+    return encoder
+
+
+def get_encoder_names():
+    return list(encoders.keys())
+
+
+def get_preprocessing_params(encoder_name, pretrained="imagenet"):
+
+    if encoder_name.startswith("tu-"):
+        encoder_name = encoder_name[3:]
+        if not timm.models.is_model_pretrained(encoder_name):
+            raise ValueError(f"{encoder_name} does not have pretrained weights and preprocessing parameters")
+        settings = timm.models.get_pretrained_cfg(encoder_name)
+    else:
+        all_settings = encoders[encoder_name]["pretrained_settings"]
+        if pretrained not in all_settings.keys():
+            raise ValueError("Available pretrained options {}".format(all_settings.keys()))
+        settings = all_settings[pretrained]
+
+    formatted_settings = {}
+    formatted_settings["input_space"] = settings.get("input_space", "RGB")
+    formatted_settings["input_range"] = list(settings.get("input_range", [0, 1]))
+    formatted_settings["mean"] = list(settings.get("mean"))
+    formatted_settings["std"] = list(settings.get("std"))
+
+    return formatted_settings
+
+
+def get_preprocessing_fn(encoder_name, pretrained="imagenet"):
+    params = get_preprocessing_params(encoder_name, pretrained=pretrained)
+    return functools.partial(preprocess_input, **params)
diff --git a/modules/img2plane/deeplabv3/encoders/_base.py b/modules/img2plane/deeplabv3/encoders/_base.py
new file mode 100644
index 0000000000000000000000000000000000000000..aab838f130f2509a96edbaa1b2c6222e4064a18a
--- /dev/null
+++ b/modules/img2plane/deeplabv3/encoders/_base.py
@@ -0,0 +1,65 @@
+import torch
+import torch.nn as nn
+from typing import List
+from collections import OrderedDict
+
+from . import _utils as utils
+
+
+class EncoderMixin:
+    """Add encoder functionality such as:
+    - output channels specification of feature tensors (produced by encoder)
+    - patching first convolution for arbitrary input channels
+    """
+
+    _output_stride = 32
+
+    @property
+    def out_channels(self):
+        """Return channels dimensions for each tensor of forward output of encoder"""
+        return self._out_channels[: self._depth + 1]
+
+    @property
+    def output_stride(self):
+        return min(self._output_stride, 2**self._depth)
+
+    def set_in_channels(self, in_channels, pretrained=True):
+        """Change first convolution channels"""
+        if in_channels == 3:
+            return
+
+        self._in_channels = in_channels
+        if self._out_channels[0] == 3:
+            self._out_channels = tuple([in_channels] + list(self._out_channels)[1:])
+
+        utils.patch_first_conv(model=self, new_in_channels=in_channels, pretrained=pretrained)
+
+    def get_stages(self):
+        """Override it in your implementation"""
+        raise NotImplementedError
+
+    def make_dilated(self, output_stride):
+
+        if output_stride == 16:
+            stage_list = [
+                5,
+            ]
+            dilation_list = [
+                2,
+            ]
+
+        elif output_stride == 8:
+            stage_list = [4, 5]
+            dilation_list = [2, 4]
+
+        else:
+            raise ValueError("Output stride should be 16 or 8, got {}.".format(output_stride))
+
+        self._output_stride = output_stride
+
+        stages = self.get_stages()
+        for stage_indx, dilation_rate in zip(stage_list, dilation_list):
+            utils.replace_strides_with_dilation(
+                module=stages[stage_indx],
+                dilation_rate=dilation_rate,
+            )
diff --git a/modules/img2plane/deeplabv3/encoders/_utils.py b/modules/img2plane/deeplabv3/encoders/_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..46094dcd51afaec8854d79845d7264781fe919db
--- /dev/null
+++ b/modules/img2plane/deeplabv3/encoders/_utils.py
@@ -0,0 +1,51 @@
+import torch
+import torch.nn as nn
+
+
+def patch_first_conv(model, new_in_channels, default_in_channels=3, pretrained=True):
+    """Change first convolution layer input channels.
+    In case:
+        in_channels == 1 or in_channels == 2 -> reuse original weights
+        in_channels > 3 -> make random kaiming normal initialization
+    """
+
+    # get first conv
+    for module in model.modules():
+        if isinstance(module, nn.Conv2d) and module.in_channels == default_in_channels:
+            break
+
+    weight = module.weight.detach()
+    module.in_channels = new_in_channels
+
+    if not pretrained:
+        module.weight = nn.parameter.Parameter(
+            torch.Tensor(module.out_channels, new_in_channels // module.groups, *module.kernel_size)
+        )
+        module.reset_parameters()
+
+    elif new_in_channels == 1:
+        new_weight = weight.sum(1, keepdim=True)
+        module.weight = nn.parameter.Parameter(new_weight)
+
+    else:
+        new_weight = torch.Tensor(module.out_channels, new_in_channels // module.groups, *module.kernel_size)
+
+        for i in range(new_in_channels):
+            new_weight[:, i] = weight[:, i % default_in_channels]
+
+        new_weight = new_weight * (default_in_channels / new_in_channels)
+        module.weight = nn.parameter.Parameter(new_weight)
+
+
+def replace_strides_with_dilation(module, dilation_rate):
+    """Patch Conv2d modules replacing strides with dilation"""
+    for mod in module.modules():
+        if isinstance(mod, nn.Conv2d):
+            mod.stride = (1, 1)
+            mod.dilation = (dilation_rate, dilation_rate)
+            kh, kw = mod.kernel_size
+            mod.padding = ((kh // 2) * dilation_rate, (kh // 2) * dilation_rate)
+
+            # Kostyl for EfficientNet
+            if hasattr(mod, "static_padding"):
+                mod.static_padding = nn.Identity()
diff --git a/modules/img2plane/deeplabv3/encoders/resnet.py b/modules/img2plane/deeplabv3/encoders/resnet.py
new file mode 100644
index 0000000000000000000000000000000000000000..0cfc532b12f8e8a6f9c9656ee6fa411e8d5c1294
--- /dev/null
+++ b/modules/img2plane/deeplabv3/encoders/resnet.py
@@ -0,0 +1,238 @@
+"""Each encoder should have following attributes and methods and be inherited from `_base.EncoderMixin`
+
+Attributes:
+
+    _out_channels (list of int): specify number of channels for each encoder feature tensor
+    _depth (int): specify number of stages in decoder (in other words number of downsampling operations)
+    _in_channels (int): default number of input channels in first Conv2d layer for encoder (usually 3)
+
+Methods:
+
+    forward(self, x: torch.Tensor)
+        produce list of features of different spatial resolutions, each feature is a 4D torch.tensor of
+        shape NCHW (features should be sorted in descending order according to spatial resolution, starting
+        with resolution same as input `x` tensor).
+
+        Input: `x` with shape (1, 3, 64, 64)
+        Output: [f0, f1, f2, f3, f4, f5] - features with corresponding shapes
+                [(1, 3, 64, 64), (1, 64, 32, 32), (1, 128, 16, 16), (1, 256, 8, 8),
+                (1, 512, 4, 4), (1, 1024, 2, 2)] (C - dim may differ)
+
+        also should support number of features according to specified depth, e.g. if depth = 5,
+        number of feature tensors = 6 (one with same resolution as input and 5 downsampled),
+        depth = 3 -> number of feature tensors = 4 (one with same resolution as input and 3 downsampled).
+"""
+from copy import deepcopy
+
+import torch.nn as nn
+
+from torchvision.models.resnet import ResNet
+from torchvision.models.resnet import BasicBlock
+from torchvision.models.resnet import Bottleneck
+from pretrainedmodels.models.torchvision_models import pretrained_settings
+
+from ._base import EncoderMixin
+
+
+class ResNetEncoder(ResNet, EncoderMixin):
+    def __init__(self, out_channels, depth=5, **kwargs):
+        super().__init__(**kwargs)
+        self._depth = depth
+        self._out_channels = out_channels
+        self._in_channels = 3
+
+        del self.fc
+        del self.avgpool
+
+    def get_stages(self):
+        return [
+            nn.Identity(),
+            nn.Sequential(self.conv1, self.bn1, self.relu),
+            nn.Sequential(self.maxpool, self.layer1),
+            self.layer2,
+            self.layer3,
+            self.layer4,
+        ]
+
+    def forward(self, x):
+        stages = self.get_stages()
+
+        features = []
+        for i in range(self._depth + 1):
+            x = stages[i](x)
+            features.append(x)
+
+        return features
+
+    def load_state_dict(self, state_dict, **kwargs):
+        state_dict.pop("fc.bias", None)
+        state_dict.pop("fc.weight", None)
+        super().load_state_dict(state_dict, **kwargs)
+
+
+new_settings = {
+    "resnet18": {
+        "ssl": "https://dl.fbaipublicfiles.com/semiweaksupervision/model_files/semi_supervised_resnet18-d92f0530.pth",  # noqa
+        "swsl": "https://dl.fbaipublicfiles.com/semiweaksupervision/model_files/semi_weakly_supervised_resnet18-118f1556.pth",  # noqa
+    },
+    "resnet50": {
+        "ssl": "https://dl.fbaipublicfiles.com/semiweaksupervision/model_files/semi_supervised_resnet50-08389792.pth",  # noqa
+        "swsl": "https://dl.fbaipublicfiles.com/semiweaksupervision/model_files/semi_weakly_supervised_resnet50-16a12f1b.pth",  # noqa
+    },
+    "resnext50_32x4d": {
+        "imagenet": "https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth",
+        "ssl": "https://dl.fbaipublicfiles.com/semiweaksupervision/model_files/semi_supervised_resnext50_32x4-ddb3e555.pth",  # noqa
+        "swsl": "https://dl.fbaipublicfiles.com/semiweaksupervision/model_files/semi_weakly_supervised_resnext50_32x4-72679e44.pth",  # noqa
+    },
+    "resnext101_32x4d": {
+        "ssl": "https://dl.fbaipublicfiles.com/semiweaksupervision/model_files/semi_supervised_resnext101_32x4-dc43570a.pth",  # noqa
+        "swsl": "https://dl.fbaipublicfiles.com/semiweaksupervision/model_files/semi_weakly_supervised_resnext101_32x4-3f87e46b.pth",  # noqa
+    },
+    "resnext101_32x8d": {
+        "imagenet": "https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth",
+        "instagram": "https://download.pytorch.org/models/ig_resnext101_32x8-c38310e5.pth",
+        "ssl": "https://dl.fbaipublicfiles.com/semiweaksupervision/model_files/semi_supervised_resnext101_32x8-2cfe2f8b.pth",  # noqa
+        "swsl": "https://dl.fbaipublicfiles.com/semiweaksupervision/model_files/semi_weakly_supervised_resnext101_32x8-b4712904.pth",  # noqa
+    },
+    "resnext101_32x16d": {
+        "instagram": "https://download.pytorch.org/models/ig_resnext101_32x16-c6f796b0.pth",
+        "ssl": "https://dl.fbaipublicfiles.com/semiweaksupervision/model_files/semi_supervised_resnext101_32x16-15fffa57.pth",  # noqa
+        "swsl": "https://dl.fbaipublicfiles.com/semiweaksupervision/model_files/semi_weakly_supervised_resnext101_32x16-f3559a9c.pth",  # noqa
+    },
+    "resnext101_32x32d": {
+        "instagram": "https://download.pytorch.org/models/ig_resnext101_32x32-e4b90b00.pth",
+    },
+    "resnext101_32x48d": {
+        "instagram": "https://download.pytorch.org/models/ig_resnext101_32x48-3e41cc8a.pth",
+    },
+}
+
+pretrained_settings = deepcopy(pretrained_settings)
+for model_name, sources in new_settings.items():
+    if model_name not in pretrained_settings:
+        pretrained_settings[model_name] = {}
+
+    for source_name, source_url in sources.items():
+        pretrained_settings[model_name][source_name] = {
+            "url": source_url,
+            "input_size": [3, 224, 224],
+            "input_range": [0, 1],
+            "mean": [0.485, 0.456, 0.406],
+            "std": [0.229, 0.224, 0.225],
+            "num_classes": 1000,
+        }
+
+
+resnet_encoders = {
+    "resnet18": {
+        "encoder": ResNetEncoder,
+        "pretrained_settings": pretrained_settings["resnet18"],
+        "params": {
+            "out_channels": (3, 64, 64, 128, 256, 512),
+            "block": BasicBlock,
+            "layers": [2, 2, 2, 2],
+        },
+    },
+    "resnet34": {
+        "encoder": ResNetEncoder,
+        "pretrained_settings": pretrained_settings["resnet34"],
+        "params": {
+            "out_channels": (3, 64, 64, 128, 256, 512),
+            "block": BasicBlock,
+            "layers": [3, 4, 6, 3],
+        },
+    },
+    "resnet50": {
+        "encoder": ResNetEncoder,
+        "pretrained_settings": pretrained_settings["resnet50"],
+        "params": {
+            "out_channels": (3, 64, 256, 512, 1024, 2048),
+            "block": Bottleneck,
+            "layers": [3, 4, 6, 3],
+        },
+    },
+    "resnet101": {
+        "encoder": ResNetEncoder,
+        "pretrained_settings": pretrained_settings["resnet101"],
+        "params": {
+            "out_channels": (3, 64, 256, 512, 1024, 2048),
+            "block": Bottleneck,
+            "layers": [3, 4, 23, 3],
+        },
+    },
+    "resnet152": {
+        "encoder": ResNetEncoder,
+        "pretrained_settings": pretrained_settings["resnet152"],
+        "params": {
+            "out_channels": (3, 64, 256, 512, 1024, 2048),
+            "block": Bottleneck,
+            "layers": [3, 8, 36, 3],
+        },
+    },
+    "resnext50_32x4d": {
+        "encoder": ResNetEncoder,
+        "pretrained_settings": pretrained_settings["resnext50_32x4d"],
+        "params": {
+            "out_channels": (3, 64, 256, 512, 1024, 2048),
+            "block": Bottleneck,
+            "layers": [3, 4, 6, 3],
+            "groups": 32,
+            "width_per_group": 4,
+        },
+    },
+    "resnext101_32x4d": {
+        "encoder": ResNetEncoder,
+        "pretrained_settings": pretrained_settings["resnext101_32x4d"],
+        "params": {
+            "out_channels": (3, 64, 256, 512, 1024, 2048),
+            "block": Bottleneck,
+            "layers": [3, 4, 23, 3],
+            "groups": 32,
+            "width_per_group": 4,
+        },
+    },
+    "resnext101_32x8d": {
+        "encoder": ResNetEncoder,
+        "pretrained_settings": pretrained_settings["resnext101_32x8d"],
+        "params": {
+            "out_channels": (3, 64, 256, 512, 1024, 2048),
+            "block": Bottleneck,
+            "layers": [3, 4, 23, 3],
+            "groups": 32,
+            "width_per_group": 8,
+        },
+    },
+    "resnext101_32x16d": {
+        "encoder": ResNetEncoder,
+        "pretrained_settings": pretrained_settings["resnext101_32x16d"],
+        "params": {
+            "out_channels": (3, 64, 256, 512, 1024, 2048),
+            "block": Bottleneck,
+            "layers": [3, 4, 23, 3],
+            "groups": 32,
+            "width_per_group": 16,
+        },
+    },
+    "resnext101_32x32d": {
+        "encoder": ResNetEncoder,
+        "pretrained_settings": pretrained_settings["resnext101_32x32d"],
+        "params": {
+            "out_channels": (3, 64, 256, 512, 1024, 2048),
+            "block": Bottleneck,
+            "layers": [3, 4, 23, 3],
+            "groups": 32,
+            "width_per_group": 32,
+        },
+    },
+    "resnext101_32x48d": {
+        "encoder": ResNetEncoder,
+        "pretrained_settings": pretrained_settings["resnext101_32x48d"],
+        "params": {
+            "out_channels": (3, 64, 256, 512, 1024, 2048),
+            "block": Bottleneck,
+            "layers": [3, 4, 23, 3],
+            "groups": 32,
+            "width_per_group": 48,
+        },
+    },
+}
diff --git a/modules/img2plane/img2plane_model.py b/modules/img2plane/img2plane_model.py
new file mode 100644
index 0000000000000000000000000000000000000000..7bf131547563930ce387acfbcf9fab85d5e06a00
--- /dev/null
+++ b/modules/img2plane/img2plane_model.py
@@ -0,0 +1,82 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from .deeplabv3 import DeepLabV3
+from .simple_encoders.high_resolution_encoder import HighResoEncoder
+from .segformer import LowResolutionViT, TriplanePredictorViT
+import copy
+from utils.commons.hparams import hparams
+
+
+class Img2PlaneModel(nn.Module):
+    def __init__(self, out_channels=96, hp=None):
+        super().__init__()
+        global hparams
+        self.hparams = hp if hp is not None else copy.deepcopy(hparams)
+        hparams = self.hparams
+        
+        self.input_mode = hparams.get("img2plane_input_mode", "rgb")
+        if self.input_mode == 'rgb':
+            in_channels = 3 
+        elif self.input_mode == 'rgb_alpha':
+            in_channels = 4 
+        elif self.input_mode == 'rgb_camera':
+            self.camera_to_channel = nn.Linear(25, 3)
+            in_channels = 3 + 3
+        elif self.input_mode == 'rgb_alpha_camera':
+            self.camera_to_channel = nn.Linear(25, 3)
+            in_channels = 4 + 3
+
+        in_channels += 2 # add grid_x and grid_y, act as positional encoding
+        self.low_reso_encoder = DeepLabV3(in_channels=in_channels)
+        self.high_reso_encoder = HighResoEncoder(in_dim=in_channels)
+        self.low_reso_vit = LowResolutionViT()
+        self.triplane_predictor_vit = TriplanePredictorViT(out_channels=out_channels, img2plane_backbone_scale=hparams['img2plane_backbone_scale'])
+
+    def forward(self, x, cond=None, **synthesis_kwargs):
+        """
+        x: original image, [B, 3, H=512, W=512] 
+        return: predicted triplane, [B, 32*3, H=256, W=256]
+        optional:
+            ref_alphas: 0/1 mask, if img2plane, all ones; if secc2plane, only ones for head, [B, 1, H, W]
+            ref_camera: camera pose of the input img, [B, 25]
+        """
+        bs, _, H, W = x.shape
+
+        if self.input_mode in ['rgb_alpha', 'rgb_alpha_camera']:
+            if cond is None or cond.get("ref_alphas") is None:
+                ref_alphas = (x.mean(dim=1, keepdim=True) >= -0.999).float() # set non-black to ones
+            else:
+                ref_alphas = cond["ref_alphas"]
+            x = torch.cat([x, ref_alphas], dim=1)
+        if self.input_mode in ['rgb_camera', 'rgb_alpha_camera']:
+            ref_cameras = cond["ref_cameras"]
+            camera_feat = self.camera_to_channel(ref_cameras).reshape(bs, 3, 1, 1).repeat([1, 1, H, W])
+            x = torch.cat([x, camera_feat], dim=1)
+
+        # concat with pixel position
+        grid_x, grid_y = torch.meshgrid(torch.arange(H, device=x.device), torch.arange(W, device=x.device)) # [H, W]
+        grid_x = grid_x / H
+        grid_y = grid_y / H
+        expanded_x = grid_x[None, None, :, :].repeat(bs, 1, 1, 1) # [B, 1, H, W]
+        expanded_y = grid_y[None, None, :, :].repeat(bs, 1, 1, 1) # [B, 1, H, W]
+        x = torch.cat([x, expanded_x, expanded_y], dim=1) # [B, 3+1+1, H, W]
+
+        feat_low = self.low_reso_encoder(x)
+        feat_low_after_vit = self.low_reso_vit(feat_low)
+        feat_high = self.high_reso_encoder(x)
+        # self.triplane_predictor_vit OUTCHANNEL *4, VIEW 4,3,-1, FLIP 注意dim的idx
+        planes = self.triplane_predictor_vit(feat_low_after_vit, feat_high) # [B, C, H, W]
+
+        planes = planes.view(len(planes), 3, -1, planes.shape[-2], planes.shape[-1])
+
+        # borrowed from img2plane and hide-nerf
+        planes_xy = planes[:,0]
+        planes_xy = torch.flip(planes_xy, [2])
+        planes_xz = planes[:,1]
+        planes_xz = torch.flip(planes_xz, [2])
+        planes_zy = planes[:,2]
+        planes_zy = torch.flip(planes_zy, [2, 3])
+        planes = torch.stack([planes_xy, planes_xz, planes_zy], dim=1) # [N, 3, C, H, W]
+        return planes
\ No newline at end of file
diff --git a/modules/img2plane/segformer/__init__.py b/modules/img2plane/segformer/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..d1bfd51630dabbdad585a04cb4f3d463a28a04e0
--- /dev/null
+++ b/modules/img2plane/segformer/__init__.py
@@ -0,0 +1 @@
+from .models import LowResolutionViT, TriplanePredictorViT
\ No newline at end of file
diff --git a/modules/img2plane/segformer/base.py b/modules/img2plane/segformer/base.py
new file mode 100644
index 0000000000000000000000000000000000000000..cfec70516b04a786feaba3599df6207fc9d7babb
--- /dev/null
+++ b/modules/img2plane/segformer/base.py
@@ -0,0 +1,210 @@
+import math
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from timm.models.layers import DropPath, to_2tuple, trunc_normal_
+
+
+class DWConv(nn.Module):
+    def __init__(self, dim=768):
+        super(DWConv, self).__init__()
+        self.dwconv = nn.Conv2d(dim, dim, 3, 1, 1, bias=True, groups=dim)
+
+    def forward(self, x, H, W):
+        B, N, C = x.shape
+        x = x.transpose(1, 2).view(B, C, H, W)
+        x = self.dwconv(x)
+        x = x.flatten(2).transpose(1, 2)
+
+        return x
+
+
+class Mlp(nn.Module):
+    def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.):
+        super().__init__()
+        out_features = out_features or in_features
+        hidden_features = hidden_features or in_features
+        self.fc1 = nn.Linear(in_features, hidden_features)
+        self.dwconv = DWConv(hidden_features)
+        self.act = act_layer()
+        self.fc2 = nn.Linear(hidden_features, out_features)
+        self.drop = nn.Dropout(drop)
+
+        self.apply(self._init_weights)
+
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            trunc_normal_(m.weight, std=.02)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+        elif isinstance(m, nn.LayerNorm):
+            nn.init.constant_(m.bias, 0)
+            nn.init.constant_(m.weight, 1.0)
+        elif isinstance(m, nn.Conv2d):
+            fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+            fan_out //= m.groups
+            m.weight.data.normal_(0, math.sqrt(2.0 / fan_out))
+            if m.bias is not None:
+                m.bias.data.zero_()
+
+    def forward(self, x, H, W):
+        x = self.fc1(x)
+        x = self.dwconv(x, H, W)
+        x = self.act(x)
+        x = self.drop(x)
+        x = self.fc2(x)
+        x = self.drop(x)
+        return x
+
+
+class Attention(nn.Module):
+    def __init__(self, dim, num_heads=8, qkv_bias=False, qk_scale=None, attn_drop=0., proj_drop=0., sr_ratio=1):
+        super().__init__()
+        assert dim % num_heads == 0, f"dim {dim} should be divided by num_heads {num_heads}."
+
+        self.dim = dim
+        self.num_heads = num_heads
+        head_dim = dim // num_heads
+        self.scale = qk_scale or head_dim ** -0.5
+
+        self.q = nn.Linear(dim, dim, bias=qkv_bias)
+        self.kv = nn.Linear(dim, dim * 2, bias=qkv_bias)
+        self.attn_drop = nn.Dropout(attn_drop)
+        self.proj = nn.Linear(dim, dim)
+        self.proj_drop = nn.Dropout(proj_drop)
+
+        self.sr_ratio = sr_ratio
+        if sr_ratio > 1:
+            self.sr = nn.Conv2d(dim, dim, kernel_size=sr_ratio, stride=sr_ratio)
+            self.norm = nn.LayerNorm(dim)
+
+        self.apply(self._init_weights)
+
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            trunc_normal_(m.weight, std=.02)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+        elif isinstance(m, nn.LayerNorm):
+            nn.init.constant_(m.bias, 0)
+            nn.init.constant_(m.weight, 1.0)
+        elif isinstance(m, nn.Conv2d):
+            fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+            fan_out //= m.groups
+            m.weight.data.normal_(0, math.sqrt(2.0 / fan_out))
+            if m.bias is not None:
+                m.bias.data.zero_()
+        elif hasattr(m, "reset_parameters"):
+            m.reset_parameters()
+
+    def forward(self, x, H, W):
+        B, N, C = x.shape
+        q = self.q(x).reshape(B, N, self.num_heads, C // self.num_heads).permute(0, 2, 1, 3)
+
+        if self.sr_ratio > 1:
+            x_ = x.permute(0, 2, 1).reshape(B, C, H, W)
+            x_ = self.sr(x_).reshape(B, C, -1).permute(0, 2, 1)
+            x_ = self.norm(x_)
+            kv = self.kv(x_).reshape(B, -1, 2, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
+        else:
+            kv = self.kv(x).reshape(B, -1, 2, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
+        k, v = kv[0], kv[1]
+
+        attn = (q @ k.transpose(-2, -1)) * self.scale
+        attn = attn.softmax(dim=-1)
+        attn = self.attn_drop(attn)
+
+        x = (attn @ v).transpose(1, 2).reshape(B, N, C)
+        x = self.proj(x)
+        x = self.proj_drop(x)
+
+        return x
+
+
+class Block(nn.Module):
+
+    def __init__(self, dim, num_heads, mlp_ratio=4., qkv_bias=True, qk_scale=None, drop=0., attn_drop=0.,
+                 drop_path=0., act_layer=nn.GELU, norm_layer=nn.LayerNorm, sr_ratio=1):
+        super().__init__()
+        self.norm1 = norm_layer(dim)
+        self.attn = Attention(
+            dim,
+            num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale,
+            attn_drop=attn_drop, proj_drop=drop, sr_ratio=sr_ratio)
+        # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here
+        self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
+        self.norm2 = norm_layer(dim)
+        mlp_hidden_dim = int(dim * mlp_ratio)
+        self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop)
+
+        self.apply(self._init_weights)
+
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            trunc_normal_(m.weight, std=.02)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+        elif isinstance(m, nn.LayerNorm):
+            nn.init.constant_(m.bias, 0)
+            nn.init.constant_(m.weight, 1.0)
+        elif isinstance(m, nn.Conv2d):
+            fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+            fan_out //= m.groups
+            m.weight.data.normal_(0, math.sqrt(2.0 / fan_out))
+            if m.bias is not None:
+                m.bias.data.zero_()
+        elif hasattr(m, "reset_parameters"):
+            m.reset_parameters()
+
+    def forward(self, x, H, W):
+        x = x + self.drop_path(self.attn(self.norm1(x), H, W))
+        x = x + self.drop_path(self.mlp(self.norm2(x), H, W))
+
+        return x
+
+
+class OverlapPatchEmbed(nn.Module):
+    """ Image to Patch Embedding
+    """
+
+    def __init__(self, img_size=224, patch_size=3, stride=4, in_chans=3, embed_dim=768):
+        super().__init__()
+        img_size = to_2tuple(img_size)
+        patch_size = to_2tuple(patch_size)
+
+        self.img_size = img_size
+        self.patch_size = patch_size
+        self.H, self.W = img_size[0] // patch_size[0], img_size[1] // patch_size[1]
+        self.num_patches = self.H * self.W
+        self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=stride,
+                              padding=(patch_size[0] // 2, patch_size[1] // 2))
+        self.norm = nn.LayerNorm(embed_dim)
+
+        self.apply(self._init_weights)
+
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            trunc_normal_(m.weight, std=.02)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+        elif isinstance(m, nn.LayerNorm):
+            nn.init.constant_(m.bias, 0)
+            nn.init.constant_(m.weight, 1.0)
+        elif isinstance(m, nn.Conv2d):
+            fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+            fan_out //= m.groups
+            m.weight.data.normal_(0, math.sqrt(2.0 / fan_out))
+            if m.bias is not None:
+                m.bias.data.zero_()
+        elif hasattr(m, "reset_parameters"):
+            m.reset_parameters()
+            
+    def forward(self, x):
+        x = self.proj(x)
+        _, _, H, W = x.shape
+        x = x.flatten(2).transpose(1, 2) # [B, C, H, W] ==> [B, C, H*W] ==> [B, H*W, C]
+        x = self.norm(x)
+
+        return x, H, W
+
diff --git a/modules/img2plane/segformer/models.py b/modules/img2plane/segformer/models.py
new file mode 100644
index 0000000000000000000000000000000000000000..b384c34d96f0a35b891b850c65e6d1a765a7dc78
--- /dev/null
+++ b/modules/img2plane/segformer/models.py
@@ -0,0 +1,184 @@
+import math
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from timm.models.layers import DropPath, to_2tuple, trunc_normal_
+
+from .base import OverlapPatchEmbed, Block
+from utils.commons.hparams import hparams
+
+class LowResolutionViT(nn.Module):
+    """
+    This Vit process the output of low resolution image features produced by DeepLabv3
+    """
+    def __init__(self, img_size=64, in_chans=256):
+        super().__init__()
+
+        # patch_embed
+        self.patch_embed = OverlapPatchEmbed(img_size=img_size, patch_size=3, stride=2, in_chans=in_chans, embed_dim=1024)
+        
+        if hparams.get('img2plane_backbone_scale', 'standard') == 'small':
+            self.num_blocks = 2
+        if hparams.get('img2plane_backbone_scale', 'standard') == 'standard':
+            self.num_blocks = 5
+        elif hparams['img2plane_backbone_scale'] == 'large':
+            self.num_blocks = 10
+        for i in range(1, self.num_blocks+1):
+            setattr(self, f'block{i}', Block(dim=1024, num_heads=4, mlp_ratio=2, sr_ratio=1))
+        
+        self.pixel_shuffle = nn.PixelShuffle(upscale_factor=2)
+        self.upsampling_bilinear1 = nn.UpsamplingBilinear2d(scale_factor=2.)
+        self.conv_after_upsample1 = nn.Conv2d(in_channels=256, out_channels=128, kernel_size=3, stride=1, padding=1)
+        self.activation_conv1 = nn.ReLU()
+        self.upsampling_bilinear2 = nn.UpsamplingBilinear2d(scale_factor=2.)
+        self.conv_after_upsample2 = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=1, padding=1)
+        self.activation_conv2 = nn.ReLU()
+        self.final_conv = nn.Conv2d(in_channels=128, out_channels=96, kernel_size=3, stride=1, padding=1)
+
+        self.apply(self._init_weights)
+
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            trunc_normal_(m.weight, std=.02)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+        elif isinstance(m, nn.LayerNorm):
+            nn.init.constant_(m.bias, 0)
+            nn.init.constant_(m.weight, 1.0)
+        elif isinstance(m, nn.Conv2d):
+            fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+            fan_out //= m.groups
+            m.weight.data.normal_(0, math.sqrt(2.0 / fan_out))
+            if m.bias is not None:
+                m.bias.data.zero_()
+        elif hasattr(m, "reset_parameters"):
+            m.reset_parameters()
+
+    def freeze_patch_emb(self):
+        self.patch_embed.requires_grad = False
+
+    @torch.jit.ignore
+    def no_weight_decay(self):
+        return {'pos_embed'}  # has pos_embed may be better
+
+    def forward(self, x):
+        """
+        x: [B, 256, 64, 64]
+        return [B, C=96, H=256, W=256]
+        """
+        h, H, W = self.patch_embed(x)
+
+        for i in range(1, self.num_blocks+1):
+            block_i = getattr(self, f'block{i}')
+            h = block_i(h, H, H) # [B=2, 1024, H*W=1024]
+
+        h = h.permute(0, 2, 1) # [B, C, N=H*W]
+        h = h.view(h.shape[0], h.shape[1], H, W) # [B=2, C=1024, H=32, W=32]
+
+        h = self.pixel_shuffle(h) # [B=2, C=256, H=64, W=64]
+        h = self.upsampling_bilinear1(h) # [B=2, C=256, H=128, W=128]
+        h = self.conv_after_upsample1(h)
+        h = self.activation_conv1(h)
+        h = self.upsampling_bilinear2(h) # [B=2, C, H=256, W=256]
+        h = self.conv_after_upsample2(h)
+        h = self.activation_conv2(h)
+        
+        out = self.final_conv(h)
+        return out
+
+
+class TriplanePredictorViT(nn.Module):
+    """
+    This Vit process the concatenated features of LowResolutionViT and the CNN-based HighResoEncoder
+    It predicts the final Tri-plane!
+    """
+    def __init__(self, img_size=256, out_channels=96, img2plane_backbone_scale='standard'):
+        super().__init__()
+        # the input is concated features, 96 from low_reso_vit and 96 from high_resolution encoder
+        self.first_conv = nn.Conv2d(in_channels=192, out_channels=256, kernel_size=3, stride=1, padding=1)
+        self.activation = nn.LeakyReLU(negative_slope=0.01)
+        self.second_conv = nn.Conv2d(in_channels=256, out_channels=128, kernel_size=3, stride=1, padding=1)
+
+        self.patch_embed = OverlapPatchEmbed(img_size=img_size, patch_size=3, stride=2, in_chans=128, embed_dim=1024)
+
+        if img2plane_backbone_scale == 'small':
+            self.num_blocks = 1
+        if img2plane_backbone_scale == 'standard':
+            self.num_blocks = 1
+        elif img2plane_backbone_scale == 'large':
+            self.num_blocks = 3
+        for i in range(1, self.num_blocks+1):
+            # sr_ratio = 2 if i == 1 else 1
+            sr_ratio = 2
+            setattr(self, f'block{i}', Block(dim=1024, num_heads=4, mlp_ratio=2, sr_ratio=sr_ratio))
+        
+        self.pixel_shuffle = nn.PixelShuffle(upscale_factor=2)
+
+        # skip concat with low resolution, 256 from pixel_shuffle + 96 from low_reso_vit
+        self.first_conv_after_cat = nn.Conv2d(in_channels=352, out_channels=256, kernel_size=3, stride=1, padding=1) 
+        self.second_conv_after_cat = nn.Conv2d(in_channels=256, out_channels=128, kernel_size=3, stride=1, padding=1) 
+        self.third_conv_after_cat = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=1, padding=1) 
+
+        self.final_conv = nn.Conv2d(in_channels=128, out_channels=out_channels, kernel_size=3, stride=1, padding=1) 
+
+        self.apply(self._init_weights)
+
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            trunc_normal_(m.weight, std=.02)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+        elif isinstance(m, nn.LayerNorm):
+            nn.init.constant_(m.bias, 0)
+            nn.init.constant_(m.weight, 1.0)
+        elif isinstance(m, nn.Conv2d):
+            fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+            fan_out //= m.groups
+            m.weight.data.normal_(0, math.sqrt(2.0 / fan_out))
+            if m.bias is not None:
+                m.bias.data.zero_()
+        elif hasattr(m, "reset_parameters"):
+            m.reset_parameters()
+
+    def freeze_patch_emb(self):
+        self.patch_embed.requires_grad = False
+
+    @torch.jit.ignore
+    def no_weight_decay(self):
+        return {'pos_embed'}  # has pos_embed may be better
+
+    def forward(self, x_low_reso, x_high_resolu):
+        """
+        x_low_reso: [B, 96, 256, 256]
+        x_high_reso: [B, 96, 256, 256]
+        return [B, 96, 256, 256]
+        """
+        x = torch.cat([x_low_reso, x_high_resolu], dim=1)
+        h = self.first_conv(x)
+        h = self.activation(h)
+        h = self.second_conv(h)
+        h = self.activation(h) # [B=2, C=128, H=256, W=256]
+        
+        h, H, W = self.patch_embed(h) # [B, N, C]
+
+        for i in range(1, self.num_blocks+1):
+            block_i = getattr(self, f'block{i}')
+            h = block_i(h, H, H) # [B, N, C]
+
+        h = h.permute(0, 2, 1) # [B, C, N=H*W]
+        h = h.view(h.shape[0], h.shape[1], H, W) # [B=2, C=1024, H=256, W=256]
+        h = self.pixel_shuffle(h)
+
+        h = torch.cat([h, x_low_reso], dim=1) #  [B, 256+96, 256, 256]
+
+        h = self.first_conv_after_cat(h)
+        h = self.activation(h)
+        h = self.second_conv_after_cat(h)
+        h = self.activation(h)
+        h = self.third_conv_after_cat(h)
+        h = self.activation(h)
+
+        out = self.final_conv(h)
+        return out
+
diff --git a/modules/img2plane/simple_encoders/high_resolution_encoder.py b/modules/img2plane/simple_encoders/high_resolution_encoder.py
new file mode 100644
index 0000000000000000000000000000000000000000..62b20812a0322f89372c88077bd769f3e377a59b
--- /dev/null
+++ b/modules/img2plane/simple_encoders/high_resolution_encoder.py
@@ -0,0 +1,37 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class HighResoEncoder(nn.Module):
+    def __init__(self, 
+                 in_dim=5, # 3 for rgb and 2 for coordinate
+                 out_dim=96, 
+                 ):
+        super().__init__()
+        self.first = nn.Conv2d(in_channels=in_dim, out_channels=64, kernel_size=7, stride=2, padding=3)
+        self.activation = nn.LeakyReLU(negative_slope=0.01)
+
+        self.conv_layers = nn.Sequential(*[
+            nn.Conv2d(in_channels=64, out_channels=96, kernel_size=3, stride=1, padding=1),
+            nn.LeakyReLU(negative_slope=0.01),
+            nn.Conv2d(in_channels=96, out_channels=96, kernel_size=3, stride=1, padding=1),
+            nn.LeakyReLU(negative_slope=0.01),
+            nn.Conv2d(in_channels=96, out_channels=96, kernel_size=3, stride=1, padding=1),
+            nn.LeakyReLU(negative_slope=0.01),
+            nn.Conv2d(in_channels=96, out_channels=96, kernel_size=3, stride=1, padding=1),
+            nn.LeakyReLU(negative_slope=0.01),
+        ])
+
+        self.final = nn.Conv2d(in_channels=96, out_channels=out_dim, kernel_size=3, stride=1, padding=1)
+    
+    def forward(self, x):
+        """
+        x: [B, C=5, 256, 256]
+        return: [B, C=96, 256, 256]
+        """
+        h = self.first(x)
+        h = self.conv_layers(h)
+        h = self.final(h)
+        return h
+    
\ No newline at end of file
diff --git a/modules/img2plane/triplane.py b/modules/img2plane/triplane.py
new file mode 100644
index 0000000000000000000000000000000000000000..ae5e64227a3feb82053a17f4a97a29a8bde00acb
--- /dev/null
+++ b/modules/img2plane/triplane.py
@@ -0,0 +1,146 @@
+# SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: LicenseRef-NvidiaProprietary
+#
+# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+# property and proprietary rights in and to this material, related
+# documentation and any modifications thereto. Any use, reproduction,
+# disclosure or distribution of this material and related documentation
+# without an express license agreement from NVIDIA CORPORATION or
+# its affiliates is strictly prohibited.
+
+import torch
+import torch.nn as nn
+from modules.eg3ds.models.networks_stylegan2 import FullyConnectedLayer
+from modules.eg3ds.volumetric_rendering.renderer import ImportanceRenderer
+from modules.eg3ds.volumetric_rendering.ray_sampler import RaySampler
+from modules.eg3ds.models.superresolution import SuperresolutionHybrid2X, SuperresolutionHybrid4X, SuperresolutionHybrid8X, SuperresolutionHybrid8XDC
+
+from modules.img2plane.img2plane_model import Img2PlaneModel
+from utils.commons.hparams import hparams
+
+
+class Img2TriPlaneGenerator(torch.nn.Module):
+    def __init__(self):
+        super().__init__(hp=None)
+        global hparams
+        self.hparams = copy.copy(hparams) if hp is None else copy.copy(hp)
+        hparams = self.hparams
+
+        self.z_dim = hparams['z_dim']
+        self.camera_dim = 25
+        self.w_dim=hparams['w_dim']
+
+        self.img_resolution = hparams['final_resolution']
+        self.img_channels = 3
+        self.renderer = ImportanceRenderer(hp=hparams)
+        self.ray_sampler = RaySampler()
+
+        self.neural_rendering_resolution = hparams['neural_rendering_resolution']
+
+        self.img2plane_backbone = Img2PlaneModel()
+
+        self.decoder = OSGDecoder(32, {'decoder_lr_mul': 1, 'decoder_output_dim': 32})
+        
+        self.rendering_kwargs = {'image_resolution': hparams['final_resolution'], 
+                            'disparity_space_sampling': False, 
+                            'clamp_mode': 'softplus',
+                            'gpc_reg_prob': hparams['gpc_reg_prob'], 
+                            'c_scale': 1.0, 
+                            'superresolution_noise_mode': 'none', 
+                            'density_reg': hparams['lambda_density_reg'], 'density_reg_p_dist': hparams['density_reg_p_dist'], 
+                            'reg_type': 'l1', 'decoder_lr_mul': 1.0, 
+                            'sr_antialias': True, 
+                            'depth_resolution': hparams['num_samples_coarse'], 
+                            'depth_resolution_importance': hparams['num_samples_fine'],
+                            'ray_start': 'auto', 'ray_end': 'auto',
+                            # 'ray_start': hparams['ray_near'], 'ray_end': hparams['ray_far'],
+                            'box_warp': 1., # 3DMM坐标系==world坐标系，而3DMM的landmark的坐标均位于[-1,1]内
+                            'avg_camera_radius': 2.7,
+                            'avg_camera_pivot': [0, 0, 0.2],
+                            'white_back': False,
+                            }
+        
+        sr_num_fp16_res = hparams['num_fp16_layers_in_super_resolution']
+        sr_kwargs = {'channel_base': hparams['base_channel'], 'channel_max': hparams['max_channel'], 'fused_modconv_default': 'inference_only'}
+        self.superresolution = SuperresolutionHybrid8XDC(channels=32, img_resolution=self.img_resolution, sr_num_fp16_res=sr_num_fp16_res, sr_antialias=True, **sr_kwargs)
+
+    def cal_plane(self, img):
+        planes = self.img2plane_backbone.forward(img)
+        planes = planes.view(len(planes), 3, 32, planes.shape[-2], planes.shape[-1])
+        return planes
+    
+    def synthesis(self, img, camera, cond=None, update_emas=False, cache_backbone=False, use_cached_backbone=False, **synthesis_kwargs):
+        cam2world_matrix = camera[:, :16].view(-1, 4, 4)
+        intrinsics = camera[:, 16:25].view(-1, 3, 3)
+
+        neural_rendering_resolution = self.neural_rendering_resolution
+
+        # Create a batch of rays for volume rendering
+        # ray_origins, ray_directions = self.ray_sampler.forward_with_src_c2w(ref_cam2world_matrix, cam2world_matrix, intrinsics, neural_rendering_resolution)
+        ray_origins, ray_directions = self.ray_sampler(cam2world_matrix, intrinsics, neural_rendering_resolution)
+
+        # Create triplanes by running StyleGAN backbone
+        N, M, _ = ray_origins.shape
+        if use_cached_backbone and self._last_planes is not None:
+            planes = self._last_planes
+        else:
+            planes = self.img2plane_backbone.forward(img)
+        if cache_backbone:
+            self._last_planes = planes
+        
+        # Reshape output into three 32-channel planes
+        planes = planes.view(len(planes), 3, 32, planes.shape[-2], planes.shape[-1]) # [B, 3, 32, W, H]
+
+        # Perform volume rendering
+        feature_samples, depth_samples, weights_samples, _ = self.renderer(planes, self.decoder, ray_origins, ray_directions, self.rendering_kwargs) # channels last
+
+        # Reshape into 'raw' neural-rendered image
+        H = W = self.neural_rendering_resolution
+        feature_image = feature_samples.permute(0, 2, 1).reshape(N, feature_samples.shape[-1], H, W).contiguous()
+        depth_image = depth_samples.permute(0, 2, 1).reshape(N, 1, H, W)
+
+        # Run superresolution to get final image
+        rgb_image = feature_image[:, :3]
+        ws_to_sr = torch.ones([feature_image.shape[0], 14, hparams['w_dim']], dtype=feature_image.dtype, device=feature_image.device)
+        sr_image = self.superresolution(rgb_image, feature_image, ws_to_sr, noise_mode=self.rendering_kwargs['superresolution_noise_mode'], **{k:synthesis_kwargs[k] for k in synthesis_kwargs.keys() if k != 'noise_mode'})
+
+        ret = {'image_raw': rgb_image, 'image_depth': depth_image, 'image': sr_image, 'image_feature': feature_image[:, 3:], 'plane': planes}
+        return ret
+    
+    def sample(self, coordinates, directions, img, cond=None, truncation_psi=1, truncation_cutoff=None, update_emas=False, **synthesis_kwargs):
+        # Compute RGB features, density for arbitrary 3D coordinates. Mostly used for extracting shapes. 
+        planes = self.img2plane_backbone.forward(img, cond=cond)
+        planes = planes.view(len(planes), 3, 32, planes.shape[-2], planes.shape[-1])
+        return self.renderer.run_model(planes, self.decoder, coordinates, directions, self.rendering_kwargs)
+
+    def forward(self, img, camera, cond=None, update_emas=False, cache_backbone=False, use_cached_backbone=False, return_all=True, **synthesis_kwargs):
+        # Render a batch of generated images.
+        out = self.synthesis(img, camera, cond=cond, update_emas=update_emas, cache_backbone=cache_backbone, use_cached_backbone=use_cached_backbone, **synthesis_kwargs)
+        return out
+
+
+class OSGDecoder(torch.nn.Module):
+    def __init__(self, n_features, options):
+        super().__init__()
+        self.hidden_dim = 64
+
+        self.net = torch.nn.Sequential(
+            FullyConnectedLayer(n_features, self.hidden_dim, lr_multiplier=options['decoder_lr_mul']),
+            torch.nn.Softplus(),
+            FullyConnectedLayer(self.hidden_dim, 1 + options['decoder_output_dim'], lr_multiplier=options['decoder_lr_mul'])
+        )
+        
+    def forward(self, sampled_features, ray_directions=None, **kwargs):
+        # Aggregate features
+        if sampled_features.shape[1] == 3:
+            sampled_features = sampled_features.mean(1)
+        x = sampled_features
+
+        N, M, C = x.shape
+        x = x.reshape(N*M, C)
+
+        x = self.net(x)
+        x = x.reshape(N, M, -1)
+        rgb = torch.sigmoid(x[..., 1:])*(1 + 2*0.001) - 0.001 # Uses sigmoid clamping from MipNeRF
+        sigma = x[..., 0:1]
+        return {'rgb': rgb, 'sigma': sigma}
diff --git a/modules/img2plane/unit_test.ipynb b/modules/img2plane/unit_test.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..e0f42bcd8dfe4760091cc60b0a75debb8f7bb83c
--- /dev/null
+++ b/modules/img2plane/unit_test.ipynb
@@ -0,0 +1,308 @@
+{
+ "cells": [
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Unit Test for DeepLabv3"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "ROOT_DIR = \"/home/tiger/projects/GeneFace_private/\"\n",
+    "os.chdir(ROOT_DIR)\n",
+    "os.environ['PYTHONPATH'] = ROOT_DIR"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.img2plane.deeplabv3 import DeepLabV3\n",
+    "\n",
+    "model = DeepLabV3(decoder_channels=256, in_channels=3+2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "256"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model.decoder.out_channels"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "torch.Size([2, 256, 64, 64])"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import torch\n",
+    "x = torch.randn([2,3+2,512,512])\n",
+    "y = model(x)\n",
+    "\n",
+    "y.shape"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Unit Test for High Resolution Image Encoder"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "ROOT_DIR = \"/home/tiger/projects/GeneFace_private/\"\n",
+    "os.chdir(ROOT_DIR)\n",
+    "os.environ['PYTHONPATH'] = ROOT_DIR"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.img2plane.simple_encoders.high_resolution_encoder import HighResoEncoder\n",
+    "model = HighResoEncoder()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "torch.Size([2, 96, 256, 256])"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import torch\n",
+    "x = torch.randn([2, 5, 512, 512])\n",
+    "y = model(x)\n",
+    "\n",
+    "y.shape"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Unit Test for ViT"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "ROOT_DIR = \"/home/tiger/projects/GeneFace_private/\"\n",
+    "os.chdir(ROOT_DIR)\n",
+    "os.environ['PYTHONPATH'] = ROOT_DIR"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/usr/local/lib/python3.9/dist-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+      "  from .autonotebook import tqdm as notebook_tqdm\n"
+     ]
+    }
+   ],
+   "source": [
+    "from modules.img2plane.segformer import LowResolutionViT, TriplanePredictorViT\n",
+    "\n",
+    "model1 = LowResolutionViT()\n",
+    "model2 = TriplanePredictorViT()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "torch.Size([2, 96, 256, 256])"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import torch\n",
+    "deeplab_out = torch.randn([2, 256, 64, 64])\n",
+    "y1 = model1(deeplab_out) # [B, C=96, H=256, W=256]\n",
+    "\n",
+    "y1.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "torch.Size([2, 96, 256, 256])"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "high_reso_out = torch.randn([2, 96, 256, 256])\n",
+    "y2 = model2(y1, high_reso_out)\n",
+    "\n",
+    "y2.shape"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Unit Test for the Img2Plane Predictor"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "ROOT_DIR = \"/home/tiger/projects/GeneFace_private/\"\n",
+    "os.chdir(ROOT_DIR)\n",
+    "os.environ['PYTHONPATH'] = ROOT_DIR"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/usr/local/lib/python3.9/dist-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+      "  from .autonotebook import tqdm as notebook_tqdm\n"
+     ]
+    }
+   ],
+   "source": [
+    "from modules.img2plane.img2plane_model import Img2PlaneModel\n",
+    "\n",
+    "model = Img2PlaneModel()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "torch.Size([2, 96, 256, 256])"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import torch\n",
+    "x = torch.randn([2, 3, 512, 512])\n",
+    "\n",
+    "y = model(x)\n",
+    "\n",
+    "y.shape"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.2"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/modules/real3d/facev2v_warp/func_utils.py b/modules/real3d/facev2v_warp/func_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..e55bf4536d0b011c716d5fd10e13c563d901bce9
--- /dev/null
+++ b/modules/real3d/facev2v_warp/func_utils.py
@@ -0,0 +1,205 @@
+import torch
+import torch.nn.functional as F
+
+
+def rotation_matrix_x(theta):
+    theta = theta.reshape(-1, 1, 1)
+    z = torch.zeros_like(theta)
+    o = torch.ones_like(theta)
+    c = torch.cos(theta)
+    s = torch.sin(theta)
+    return torch.cat(
+        [
+            torch.cat([c, z, s], 2),
+            torch.cat([z, o, z], 2),
+            torch.cat([-s, z, c], 2),
+        ],
+        1,
+    )
+
+
+def rotation_matrix_y(theta):
+    theta = theta.reshape(-1, 1, 1)
+    z = torch.zeros_like(theta)
+    o = torch.ones_like(theta)
+    c = torch.cos(theta)
+    s = torch.sin(theta)
+    return torch.cat(
+        [
+            torch.cat([o, z, z], 2),
+            torch.cat([z, c, -s], 2),
+            torch.cat([z, s, c], 2),
+        ],
+        1,
+    )
+
+
+def rotation_matrix_z(theta):
+    theta = theta.reshape(-1, 1, 1)
+    z = torch.zeros_like(theta)
+    o = torch.ones_like(theta)
+    c = torch.cos(theta)
+    s = torch.sin(theta)
+    return torch.cat(
+        [
+            torch.cat([c, -s, z], 2),
+            torch.cat([s, c, z], 2),
+            torch.cat([z, z, o], 2),
+        ],
+        1,
+    )
+
+
+def transform_kp(canonical_kp, yaw, pitch, roll, t, delta):
+    # [N,K,3] [N,] [N,] [N,] [N,3] [N,K,3]
+    # y, x, z
+    # w, h, d
+    rot_mat = rotation_matrix_y(pitch) @ rotation_matrix_x(yaw) @ rotation_matrix_z(roll)
+    transformed_kp = torch.matmul(rot_mat.unsqueeze(1), canonical_kp.unsqueeze(-1)).squeeze(-1) + t.unsqueeze(1) + delta
+    return transformed_kp, rot_mat
+
+
+def transform_kp_with_new_pose(canonical_kp, yaw, pitch, roll, t, delta, new_yaw, new_pitch, new_roll):
+    # [N,K,3] [N,] [N,] [N,] [N,3] [N,K,3]
+    # y, x, z
+    # w, h, d
+    old_rot_mat = rotation_matrix_y(pitch) @ rotation_matrix_x(yaw) @ rotation_matrix_z(roll)
+    rot_mat = rotation_matrix_y(new_pitch) @ rotation_matrix_x(new_yaw) @ rotation_matrix_z(new_roll)
+    R = torch.matmul(rot_mat, torch.inverse(old_rot_mat))
+    transformed_kp = (
+        torch.matmul(rot_mat.unsqueeze(1), canonical_kp.unsqueeze(-1)).squeeze(-1)
+        + t.unsqueeze(1)
+        + torch.matmul(R.unsqueeze(1), delta.unsqueeze(-1)).squeeze(-1)
+    )
+    zt = 0.33 - transformed_kp[:, :, 2].mean()
+    transformed_kp = transformed_kp + torch.FloatTensor([0, 0, zt]).cuda()
+    return transformed_kp, rot_mat
+
+
+def make_coordinate_grid_2d(spatial_size):
+    h, w = spatial_size
+    x = torch.arange(h).cuda()
+    y = torch.arange(w).cuda()
+    x = 2 * (x / (h - 1)) - 1
+    y = 2 * (y / (w - 1)) - 1
+    xx = x.reshape(-1, 1).repeat(1, w)
+    yy = y.reshape(1, -1).repeat(h, 1)
+    meshed = torch.cat([yy.unsqueeze(2), xx.unsqueeze(2)], 2)
+    return meshed
+
+
+def make_coordinate_grid_3d(spatial_size):
+    d, h, w = spatial_size
+    z = torch.arange(d).cuda()
+    x = torch.arange(h).cuda()
+    y = torch.arange(w).cuda()
+    z = 2 * (z / (d - 1)) - 1
+    x = 2 * (x / (h - 1)) - 1
+    y = 2 * (y / (w - 1)) - 1
+    zz = z.reshape(-1, 1, 1).repeat(1, h, w)
+    xx = x.reshape(1, -1, 1).repeat(d, 1, w)
+    yy = y.reshape(1, 1, -1).repeat(d, h, 1)
+    meshed = torch.cat([yy.unsqueeze(3), xx.unsqueeze(3), zz.unsqueeze(3)], 3)
+    return meshed
+
+
+def out2heatmap(out, temperature=0.1):
+    final_shape = out.shape
+    heatmap = out.reshape(final_shape[0], final_shape[1], -1)
+    heatmap = F.softmax(heatmap / temperature, dim=2)
+    heatmap = heatmap.reshape(*final_shape)
+    return heatmap
+
+
+def heatmap2kp(heatmap):
+    shape = heatmap.shape
+    grid = make_coordinate_grid_3d(shape[2:]).unsqueeze(0).unsqueeze(0)
+    kp = (heatmap.unsqueeze(-1) * grid).sum(dim=(2, 3, 4))
+    return kp
+
+
+def kp2gaussian_2d(kp, spatial_size, kp_variance=0.01):
+    N, K = kp.shape[:2]
+    coordinate_grid = make_coordinate_grid_2d(spatial_size).reshape(1, 1, *spatial_size, 2).repeat(N, K, 1, 1, 1)
+    mean = kp.reshape(N, K, 1, 1, 2)
+    mean_sub = coordinate_grid - mean
+    out = torch.exp(-0.5 * (mean_sub ** 2).sum(-1) / kp_variance)
+    return out
+
+
+def kp2gaussian_3d(kp, spatial_size, kp_variance=0.01):
+    N, K = kp.shape[:2]
+    coordinate_grid = make_coordinate_grid_3d(spatial_size).reshape(1, 1, *spatial_size, 3).repeat(N, K, 1, 1, 1, 1)
+    mean = kp.reshape(N, K, 1, 1, 1, 3)
+    mean_sub = coordinate_grid - mean
+    out = torch.exp(-0.5 * (mean_sub ** 2).sum(-1) / kp_variance)
+    return out
+
+
+def create_heatmap_representations(fs, kp_s, kp_d):
+    spatial_size = fs.shape[2:]
+    heatmap_d = kp2gaussian_3d(kp_d, spatial_size)
+    heatmap_s = kp2gaussian_3d(kp_s, spatial_size)
+    heatmap = heatmap_d - heatmap_s
+    zeros = torch.zeros(heatmap.shape[0], 1, *spatial_size).cuda()
+    # [N,21,16,64,64]
+    heatmap = torch.cat([zeros, heatmap], dim=1)
+    # [N,21,1,16,64,64]
+    heatmap = heatmap.unsqueeze(2)
+    return heatmap
+
+
+def create_sparse_motions(fs, kp_s, kp_d, Rs, Rd):
+    N, _, D, H, W = fs.shape
+    K = kp_s.shape[1]
+    identity_grid = make_coordinate_grid_3d((D, H, W)).reshape(1, 1, D, H, W, 3).repeat(N, 1, 1, 1, 1, 1)
+    # [N,20,16,64,64,3]
+    coordinate_grid = identity_grid.repeat(1, K, 1, 1, 1, 1) - kp_d.reshape(N, K, 1, 1, 1, 3)
+    # [N,1,1,1,1,3,3]
+    jacobian = torch.matmul(Rs, torch.inverse(Rd)).unsqueeze(-3).unsqueeze(-3).unsqueeze(-3).unsqueeze(-3)
+    coordinate_grid = torch.matmul(jacobian, coordinate_grid.unsqueeze(-1)).squeeze(-1)
+    driving_to_source = coordinate_grid + kp_s.reshape(N, K, 1, 1, 1, 3)
+    sparse_motions = torch.cat([identity_grid, driving_to_source], dim=1)
+    # sparse_motions = driving_to_source
+    # [N,21,16,64,64,3]
+    return sparse_motions
+
+def create_deformed_source_image2d(fs, sparse_motions):
+    N, _, H, W = fs.shape
+    K = sparse_motions.shape[1] - 1
+    # [N*21,4,16,64,64]
+    source_repeat = fs.unsqueeze(1).repeat(1, K + 1, 1, 1, 1).reshape(N * (K + 1), -1, H, W)
+    # [N*21,16,64,64,3]
+    sparse_motions = sparse_motions.reshape((N * (K + 1), H, W, -1))
+    # [N*21,4,16,64,64]
+    sparse_deformed = F.grid_sample(source_repeat, sparse_motions, align_corners=True)
+    sparse_deformed = sparse_deformed.reshape((N, K + 1, -1, H, W))
+    # [N,21,4,16,64,64]
+    return sparse_deformed
+
+def create_deformed_source_image(fs, sparse_motions):
+    N, _, D, H, W = fs.shape
+    K = sparse_motions.shape[1] - 1
+    # [N*21,4,16,64,64]
+    source_repeat = fs.unsqueeze(1).repeat(1, K + 1, 1, 1, 1, 1).reshape(N * (K + 1), -1, D, H, W)
+    # [N*21,16,64,64,3]
+    sparse_motions = sparse_motions.reshape((N * (K + 1), D, H, W, -1))
+    # [N*21,4,16,64,64]
+    sparse_deformed = F.grid_sample(source_repeat, sparse_motions, align_corners=True)
+    sparse_deformed = sparse_deformed.reshape((N, K + 1, -1, D, H, W))
+    # [N,21,4,16,64,64]
+    return sparse_deformed
+
+
+def apply_imagenet_normalization(input):
+    mean = input.new_tensor([0.485, 0.456, 0.406]).reshape(1, 3, 1, 1)
+    std = input.new_tensor([0.229, 0.224, 0.225]).reshape(1, 3, 1, 1)
+    output = (input - mean) / std
+    return output
+
+
+def apply_vggface_normalization(input):
+    mean = input.new_tensor([129.186279296875, 104.76238250732422, 93.59396362304688]).reshape(1, 3, 1, 1)
+    std = input.new_tensor([1, 1, 1]).reshape(1, 3, 1, 1)
+    output = (input * 255 - mean) / std
+    return output
diff --git a/modules/real3d/facev2v_warp/layers.py b/modules/real3d/facev2v_warp/layers.py
new file mode 100644
index 0000000000000000000000000000000000000000..3e9b5e0a7dafba72317fa6bd5405dc7b979be2bc
--- /dev/null
+++ b/modules/real3d/facev2v_warp/layers.py
@@ -0,0 +1,132 @@
+import torch
+import torch.nn.functional as F
+from torch import nn
+from torch.nn.utils import spectral_norm
+
+class _ConvBlock(nn.Module):
+    def __init__(self, pattern, in_channels, out_channels, kernel_size, stride, padding, use_weight_norm, dim, activation_type, nonlinearity_type):
+        # the default weight norm is spectral norm
+        # pattern: C for conv, N for activation norm(SyncBatchNorm), A for nonlinearity(ReLU)
+        super().__init__()
+        norm_channels = out_channels if pattern.find("C") < pattern.find("N") else in_channels
+        weight_norm = spectral_norm if use_weight_norm else lambda x: x
+        base_conv = nn.Conv2d if dim == 2 else nn.Conv3d
+
+        def _get_activation():
+            if activation_type == "batch":
+                return nn.SyncBatchNorm(norm_channels)
+            elif activation_type == "instance":
+                return nn.InstanceNorm2d(norm_channels, affine=True) if dim == 2 else nn.InstanceNorm3d(norm_channels, affine=True)
+            elif activation_type == "none":
+                return nn.Identity()
+
+        def _get_nonlinearity():
+            if nonlinearity_type == "relu":
+                return nn.ReLU(inplace=True)
+            elif nonlinearity_type == "leakyrelu":
+                return nn.LeakyReLU(0.2, inplace=True)
+
+        mappings = {
+            "C": weight_norm(base_conv(in_channels, out_channels, kernel_size, stride, padding)),
+            "N": _get_activation(),
+            "A": _get_nonlinearity(),
+        }
+
+        module_list = []
+        for c in pattern:
+            module_list.append(mappings[c])
+        self.layers = nn.Sequential(*module_list)
+
+    def forward(self, x):
+        return self.layers(x)
+
+
+class ConvBlock2D(_ConvBlock):
+    def __init__(
+        self, pattern, in_channels, out_channels, kernel_size, stride, padding, use_weight_norm, activation_type="batch", nonlinearity_type="relu",
+    ):
+        super().__init__(pattern, in_channels, out_channels, kernel_size, stride, padding, use_weight_norm, 2, activation_type, nonlinearity_type)
+
+
+class ConvBlock3D(_ConvBlock):
+    def __init__(
+        self, pattern, in_channels, out_channels, kernel_size, stride, padding, use_weight_norm, activation_type="batch", nonlinearity_type="relu",
+    ):
+        super().__init__(pattern, in_channels, out_channels, kernel_size, stride, padding, use_weight_norm, 3, activation_type, nonlinearity_type)
+
+
+class _DownBlock(nn.Module):
+    def __init__(self, in_channels, out_channels, use_weight_norm, base_conv, base_pooling, kernel_size):
+        super().__init__()
+        self.layers = nn.Sequential(base_conv("CNA", in_channels, out_channels, kernel_size=3, stride=1, padding=1, use_weight_norm=use_weight_norm), base_pooling(kernel_size))
+
+    def forward(self, x):
+        return self.layers(x)
+
+
+class DownBlock2D(_DownBlock):
+    def __init__(self, in_channels, out_channels, use_weight_norm):
+        super().__init__(in_channels, out_channels, use_weight_norm, ConvBlock2D, nn.AvgPool2d, (2, 2))
+
+
+class DownBlock3D(_DownBlock):
+    def __init__(self, in_channels, out_channels, use_weight_norm):
+        super().__init__(in_channels, out_channels, use_weight_norm, ConvBlock3D, nn.AvgPool3d, (1, 2, 2))
+
+
+class _UpBlock(nn.Module):
+    def __init__(self, in_channels, out_channels, use_weight_norm, base_conv, scale_factor):
+        super().__init__()
+        self.layers = nn.Sequential(nn.Upsample(scale_factor=scale_factor), base_conv("CNA", in_channels, out_channels, 3, 1, 1, use_weight_norm))
+
+    def forward(self, x):
+        return self.layers(x)
+
+
+class UpBlock2D(_UpBlock):
+    def __init__(self, in_channels, out_channels, use_weight_norm):
+        super().__init__(in_channels, out_channels, use_weight_norm, ConvBlock2D, (2, 2))
+
+
+class UpBlock3D(_UpBlock):
+    def __init__(self, in_channels, out_channels, use_weight_norm):
+        super().__init__(in_channels, out_channels, use_weight_norm, ConvBlock3D, (1, 2, 2))
+
+
+class _ResBlock(nn.Module):
+    def __init__(self, in_channels, use_weight_norm, base_block):
+        super().__init__()
+        self.layers = nn.Sequential(
+            base_block("NAC", in_channels, in_channels, 3, 1, 1, use_weight_norm),
+            base_block("NAC", in_channels, in_channels, 3, 1, 1, use_weight_norm),
+        )
+
+    def forward(self, x):
+        return x + self.layers(x)
+
+
+class ResBlock2D(_ResBlock):
+    def __init__(self, in_channels, use_weight_norm):
+        super().__init__(in_channels, use_weight_norm, ConvBlock2D)
+
+
+class ResBlock3D(_ResBlock):
+    def __init__(self, in_channels, use_weight_norm):
+        super().__init__(in_channels, use_weight_norm, ConvBlock3D)
+
+
+class ResBottleneck(nn.Module):
+    def __init__(self, in_channels, out_channels, stride, use_weight_norm):
+        super().__init__()
+        self.down_sample = nn.Identity()
+        if stride != 1 or in_channels != out_channels:
+            self.down_sample = ConvBlock2D("CN", in_channels, out_channels, 1, stride, 0, use_weight_norm)
+        self.layers = nn.Sequential(
+            ConvBlock2D("CNA", in_channels, out_channels // 4, 1, 1, 0, use_weight_norm),
+            ConvBlock2D("CNA", out_channels // 4, out_channels // 4, 3, stride, 1, use_weight_norm),
+            ConvBlock2D("CN", out_channels // 4, out_channels, 1, 1, 0, use_weight_norm),
+        )
+        self.relu = nn.ReLU(inplace=True)
+
+    def forward(self, x):
+        return self.relu(self.down_sample(x) + self.layers(x))
diff --git a/modules/real3d/facev2v_warp/losses.py b/modules/real3d/facev2v_warp/losses.py
new file mode 100644
index 0000000000000000000000000000000000000000..598fe8aa46b71889cd7c34e272904a925f7e0fce
--- /dev/null
+++ b/modules/real3d/facev2v_warp/losses.py
@@ -0,0 +1,270 @@
+import torch
+import torch.nn.functional as F
+import numpy as np
+import torchvision
+from torch import nn
+from modules.real3d.facev2v_warp.func_utils import apply_imagenet_normalization, apply_vggface_normalization
+
+
+@torch.jit.script
+def fuse_math_min_mean_pos(x):
+    r"""Fuse operation min mean for hinge loss computation of positive
+    samples"""
+    minval = torch.min(x - 1, x * 0)
+    loss = -torch.mean(minval)
+    return loss
+
+
+@torch.jit.script
+def fuse_math_min_mean_neg(x):
+    r"""Fuse operation min mean for hinge loss computation of negative
+    samples"""
+    minval = torch.min(-x - 1, x * 0)
+    loss = -torch.mean(minval)
+    return loss
+
+
+class _PerceptualNetwork(nn.Module):
+    def __init__(self, network, layer_name_mapping, layers):
+        super().__init__()
+        self.network = network.cuda()
+        self.layer_name_mapping = layer_name_mapping
+        self.layers = layers
+        for param in self.parameters():
+            param.requires_grad = False
+
+    def forward(self, x):
+        output = {}
+        for i, layer in enumerate(self.network):
+            x = layer(x)
+            layer_name = self.layer_name_mapping.get(i, None)
+            if layer_name in self.layers:
+                output[layer_name] = x
+        return output
+
+
+def _vgg19(layers):
+    network = torchvision.models.vgg19()
+    state_dict = torch.utils.model_zoo.load_url(
+        "https://download.pytorch.org/models/vgg19-dcbb9e9d.pth", map_location=torch.device("cpu"), progress=True
+    )
+    network.load_state_dict(state_dict)
+    network = network.features
+    layer_name_mapping = {
+        1: "relu_1_1",
+        3: "relu_1_2",
+        6: "relu_2_1",
+        8: "relu_2_2",
+        11: "relu_3_1",
+        13: "relu_3_2",
+        15: "relu_3_3",
+        17: "relu_3_4",
+        20: "relu_4_1",
+        22: "relu_4_2",
+        24: "relu_4_3",
+        26: "relu_4_4",
+        29: "relu_5_1",
+    }
+    return _PerceptualNetwork(network, layer_name_mapping, layers)
+
+
+def _vgg_face(layers):
+    network = torchvision.models.vgg16(num_classes=2622)
+    state_dict = torch.utils.model_zoo.load_url(
+        "http://www.robots.ox.ac.uk/~albanie/models/pytorch-mcn/" "vgg_face_dag.pth", map_location=torch.device("cpu"), progress=True
+    )
+    feature_layer_name_mapping = {
+        0: "conv1_1",
+        2: "conv1_2",
+        5: "conv2_1",
+        7: "conv2_2",
+        10: "conv3_1",
+        12: "conv3_2",
+        14: "conv3_3",
+        17: "conv4_1",
+        19: "conv4_2",
+        21: "conv4_3",
+        24: "conv5_1",
+        26: "conv5_2",
+        28: "conv5_3",
+    }
+    new_state_dict = {}
+    for k, v in feature_layer_name_mapping.items():
+        new_state_dict["features." + str(k) + ".weight"] = state_dict[v + ".weight"]
+        new_state_dict["features." + str(k) + ".bias"] = state_dict[v + ".bias"]
+    classifier_layer_name_mapping = {0: "fc6", 3: "fc7", 6: "fc8"}
+    for k, v in classifier_layer_name_mapping.items():
+        new_state_dict["classifier." + str(k) + ".weight"] = state_dict[v + ".weight"]
+        new_state_dict["classifier." + str(k) + ".bias"] = state_dict[v + ".bias"]
+    network.load_state_dict(new_state_dict)
+    layer_name_mapping = {
+        1: "relu_1_1",
+        3: "relu_1_2",
+        6: "relu_2_1",
+        8: "relu_2_2",
+        11: "relu_3_1",
+        13: "relu_3_2",
+        15: "relu_3_3",
+        18: "relu_4_1",
+        20: "relu_4_2",
+        22: "relu_4_3",
+        25: "relu_5_1",
+    }
+    return _PerceptualNetwork(network.features, layer_name_mapping, layers)
+
+
+class PerceptualLoss(nn.Module):
+    def __init__(
+        self, 
+        layers_weight={"relu_1_1": 0.03125, "relu_2_1": 0.0625, "relu_3_1": 0.125, "relu_4_1": 0.25, "relu_5_1": 1.0}, 
+        n_scale=3,
+        vgg19_loss_weight=1.0,
+        vggface_loss_weight=1.0,
+    ):
+        super().__init__()
+        self.vgg19 = _vgg19(layers_weight.keys())
+        self.vggface = _vgg_face(layers_weight.keys())
+        self.mse_criterion = nn.MSELoss()
+        self.criterion = nn.L1Loss()
+        self.layers_weight, self.n_scale = layers_weight, n_scale
+        self.vgg19_loss_weight = vgg19_loss_weight
+        self.vggface_loss_weight = vggface_loss_weight
+        self.vgg19.eval()
+        self.vggface.eval()
+
+    def forward(self, input, target):
+        """
+        input: [B, 3, H, W] in 0.~1. scale
+        """
+        if input.shape[-1] != 512:
+            assert input.ndim == 4
+            input = F.interpolate(input, mode="bilinear", size=(512,512), antialias=True, align_corners=False)
+            target = F.interpolate(target, mode="bilinear", size=(512,512), antialias=True, align_corners=False)
+
+        self.vgg19.eval()
+        self.vggface.eval()
+        loss = 0
+        features_vggface_input = self.vggface(apply_vggface_normalization(input))
+        features_vggface_target = self.vggface(apply_vggface_normalization(target))
+        input = apply_imagenet_normalization(input)
+        target = apply_imagenet_normalization(target)
+        features_vgg19_input = self.vgg19(input)
+        features_vgg19_target = self.vgg19(target)
+        for layer, weight in self.layers_weight.items():
+            tmp = self.vggface_loss_weight * weight * self.criterion(features_vggface_input[layer], features_vggface_target[layer].detach()) / 255
+            if not torch.any(torch.isnan(tmp)):
+                loss += tmp
+            else:
+                loss += torch.zeros_like(tmp)
+            tmp = self.vgg19_loss_weight * weight * self.criterion(features_vgg19_input[layer], features_vgg19_target[layer].detach())
+            if not torch.any(torch.isnan(tmp)):
+                loss += tmp
+            else:
+                loss += torch.zeros_like(tmp)
+        for i in range(self.n_scale):
+            input = F.interpolate(input, mode="bilinear", scale_factor=0.5, align_corners=False, recompute_scale_factor=True)
+            target = F.interpolate(target, mode="bilinear", scale_factor=0.5, align_corners=False, recompute_scale_factor=True)
+            features_vgg19_input = self.vgg19(input)
+            features_vgg19_target = self.vgg19(target)
+            tmp = weight * self.criterion(features_vgg19_input[layer], features_vgg19_target[layer].detach())
+            if not torch.any(torch.isnan(tmp)):
+                loss += tmp
+            else:
+                loss += torch.zeros_like(tmp)
+        return loss
+
+
+class GANLoss(nn.Module):
+    # Update generator: gan_loss(fake_output, True, False) + other losses
+    # Update discriminator: gan_loss(fake_output(detached), False, True) + gan_loss(real_output, True, True)
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, dis_output, t_real, dis_update=True):
+        r"""GAN loss computation.
+        Args:
+            dis_output (tensor or list of tensors): Discriminator outputs.
+            t_real (bool): If ``True``, uses the real label as target, otherwise
+                uses the fake label as target.
+            dis_update (bool): If ``True``, the loss will be used to update the
+                discriminator, otherwise the generator.
+        Returns:
+            loss (tensor): Loss value.
+        """
+
+        if dis_update:
+            if t_real:
+                loss = fuse_math_min_mean_pos(dis_output)
+            else:
+                loss = fuse_math_min_mean_neg(dis_output)
+        else:
+            loss = -torch.mean(dis_output)
+        return loss
+
+
+class FeatureMatchingLoss(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.criterion = nn.L1Loss()
+
+    def forward(self, fake_features, real_features):
+        num_d = len(fake_features)
+        dis_weight = 1.0 / num_d
+        loss = fake_features[0][0].new_tensor(0)
+        for i in range(num_d):
+            for j in range(len(fake_features[i])):
+                tmp_loss = self.criterion(fake_features[i][j], real_features[i][j].detach())
+                loss += dis_weight * tmp_loss
+        return loss
+
+
+class EquivarianceLoss(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.criterion = nn.L1Loss()
+
+    def forward(self, kp_d, reverse_kp):
+        loss = self.criterion(kp_d[:, :, :2], reverse_kp)
+        return loss
+
+
+class KeypointPriorLoss(nn.Module):
+    def __init__(self, Dt=0.1, zt=0.33):
+        super().__init__()
+        self.Dt, self.zt = Dt, zt
+
+    def forward(self, kp_d):
+        # use distance matrix to avoid loop
+        dist_mat = torch.cdist(kp_d, kp_d).square()
+        loss = (
+            torch.max(0 * dist_mat, self.Dt - dist_mat).sum((1, 2)).mean()
+            + torch.abs(kp_d[:, :, 2].mean(1) - self.zt).mean()
+            - kp_d.shape[1] * self.Dt
+        )
+        return loss
+
+
+class HeadPoseLoss(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.criterion = nn.L1Loss()
+
+    def forward(self, yaw, pitch, roll, real_yaw, real_pitch, real_roll):
+        loss = (self.criterion(yaw, real_yaw.detach()) + self.criterion(pitch, real_pitch.detach()) + self.criterion(roll, real_roll.detach())) / 3
+        return loss / np.pi * 180
+
+
+class DeformationPriorLoss(nn.Module):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, delta_d):
+        loss = delta_d.abs().mean()
+        return loss
+
+
+if __name__ == '__main__':
+    loss_fn = PerceptualLoss()
+    x1 = torch.randn([4, 3, 512, 512]).cuda()
+    x2 = torch.randn([4, 3, 512, 512]).cuda()
+    loss = loss_fn(x1, x2)
\ No newline at end of file
diff --git a/modules/real3d/facev2v_warp/model.py b/modules/real3d/facev2v_warp/model.py
new file mode 100644
index 0000000000000000000000000000000000000000..e773c3312fbcadf7fb76e296c9138b555e6e4f72
--- /dev/null
+++ b/modules/real3d/facev2v_warp/model.py
@@ -0,0 +1,342 @@
+import torch
+from torch import nn
+import torch.nn.functional as F
+import torchvision
+import math
+import numpy as np
+
+from modules.real3d.facev2v_warp.network import AppearanceFeatureExtractor, CanonicalKeypointDetector, PoseExpressionEstimator, MotionFieldEstimator, Generator
+from modules.real3d.facev2v_warp.func_utils import transform_kp, make_coordinate_grid_2d, apply_imagenet_normalization
+from modules.real3d.facev2v_warp.losses import PerceptualLoss, GANLoss, FeatureMatchingLoss, EquivarianceLoss, KeypointPriorLoss, HeadPoseLoss, DeformationPriorLoss
+from utils.commons.image_utils import erode, dilate
+from utils.commons.hparams import hparams
+
+
+class Hopenet(nn.Module):
+    # Hopenet with 3 output layers for yaw, pitch and roll
+    # Predicts Euler angles by binning and regression with the expected value
+    def __init__(self, block, layers, num_bins):
+        self.inplanes = 64
+        super(Hopenet, self).__init__()
+        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
+        self.bn1 = nn.BatchNorm2d(64)
+        self.relu = nn.ReLU(inplace=True)
+        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+        self.layer1 = self._make_layer(block, 64, layers[0])
+        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
+        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
+        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
+        self.avgpool = nn.AvgPool2d(7)
+        self.fc_yaw = nn.Linear(512 * block.expansion, num_bins)
+        self.fc_pitch = nn.Linear(512 * block.expansion, num_bins)
+        self.fc_roll = nn.Linear(512 * block.expansion, num_bins)
+
+        # Vestigial layer from previous experiments
+        self.fc_finetune = nn.Linear(512 * block.expansion + 3, 3)
+        self.idx_tensor = torch.FloatTensor(list(range(num_bins))).unsqueeze(0).cuda()
+        self.n_bins = num_bins
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+                m.weight.data.normal_(0, math.sqrt(2.0 / n))
+            elif isinstance(m, nn.BatchNorm2d):
+                m.weight.data.fill_(1)
+                m.bias.data.zero_()
+
+    def _make_layer(self, block, planes, blocks, stride=1):
+        downsample = None
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                nn.Conv2d(self.inplanes, planes * block.expansion, kernel_size=1, stride=stride, bias=False),
+                nn.BatchNorm2d(planes * block.expansion),
+            )
+
+        layers = []
+        layers.append(block(self.inplanes, planes, stride, downsample))
+        self.inplanes = planes * block.expansion
+        for i in range(1, blocks):
+            layers.append(block(self.inplanes, planes))
+
+        return nn.Sequential(*layers)
+
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.maxpool(x)
+
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.layer4(x)
+        x = self.avgpool(x)
+        x = x.view(x.size(0), -1)
+        real_yaw = self.fc_yaw(x)
+        real_pitch = self.fc_pitch(x)
+        real_roll = self.fc_roll(x)
+        real_yaw = torch.softmax(real_yaw, dim=1)
+        real_pitch = torch.softmax(real_pitch, dim=1)
+        real_roll = torch.softmax(real_roll, dim=1)
+        real_yaw = (real_yaw * self.idx_tensor).sum(dim=1)
+        real_pitch = (real_pitch * self.idx_tensor).sum(dim=1)
+        real_roll = (real_roll * self.idx_tensor).sum(dim=1)
+        real_yaw = (real_yaw - self.n_bins // 2) * 3 * np.pi / 180
+        real_pitch = (real_pitch - self.n_bins // 2) * 3 * np.pi / 180
+        real_roll = (real_roll - self.n_bins // 2) * 3 * np.pi / 180
+
+        return real_yaw, real_pitch, real_roll
+
+
+class Transform:
+    """
+    Random tps transformation for equivariance constraints.
+    reference: FOMM
+    """
+
+    def __init__(self, bs, sigma_affine=0.05, sigma_tps=0.005, points_tps=5):
+        noise = torch.normal(mean=0, std=sigma_affine * torch.ones([bs, 2, 3]))
+        self.theta = noise + torch.eye(2, 3).view(1, 2, 3)
+        self.bs = bs
+
+        self.control_points = make_coordinate_grid_2d((points_tps, points_tps))
+        self.control_points = self.control_points.unsqueeze(0)
+        self.control_params = torch.normal(mean=0, std=sigma_tps * torch.ones([bs, 1, points_tps ** 2]))
+
+    def transform_frame(self, frame):
+        grid = make_coordinate_grid_2d(frame.shape[2:]).unsqueeze(0)
+        grid = grid.view(1, frame.shape[2] * frame.shape[3], 2)
+        grid = self.warp_coordinates(grid).view(self.bs, frame.shape[2], frame.shape[3], 2)
+        return F.grid_sample(frame, grid, align_corners=True, padding_mode="reflection")
+
+    def warp_coordinates(self, coordinates):
+        theta = self.theta.type(coordinates.type())
+        theta = theta.unsqueeze(1)
+        transformed = torch.matmul(theta[:, :, :, :2], coordinates.unsqueeze(-1)) + theta[:, :, :, 2:]
+        transformed = transformed.squeeze(-1)
+
+        control_points = self.control_points.type(coordinates.type())
+        control_params = self.control_params.type(coordinates.type())
+        distances = coordinates.view(coordinates.shape[0], -1, 1, 2) - control_points.view(1, 1, -1, 2)
+        distances = torch.abs(distances).sum(-1)
+
+        result = distances ** 2
+        result = result * torch.log(distances + 1e-6)
+        result = result * control_params
+        result = result.sum(dim=2).view(self.bs, coordinates.shape[1], 1)
+        transformed = transformed + result
+
+        return transformed
+
+
+class WarpBasedTorsoModel(nn.Module):
+    def __init__(self, model_scale='small'):
+        super().__init__()
+        self.appearance_extractor = AppearanceFeatureExtractor(model_scale)
+        self.canonical_kp_detector = CanonicalKeypointDetector(model_scale)
+        self.pose_exp_estimator = PoseExpressionEstimator(model_scale)
+        self.motion_field_estimator = MotionFieldEstimator(model_scale)
+        self.deform_based_generator = Generator()
+
+        self.pretrained_hopenet = Hopenet(torchvision.models.resnet.Bottleneck, [3, 4, 6, 3], num_bins=66).cuda()
+        pretrained_path = "/home/tiger/nfs/myenv/cache/useful_ckpts/hopenet_robust_alpha1.pkl" # https://drive.google.com/open?id=1m25PrSE7g9D2q2XJVMR6IA7RaCvWSzCR
+        self.pretrained_hopenet.load_state_dict(torch.load(pretrained_path, map_location=torch.device("cpu")))
+        self.pretrained_hopenet.requires_grad_(False)
+
+        self.pose_loss_fn = HeadPoseLoss() # 20
+        self.equivariance_loss_fn = EquivarianceLoss() # 20
+        self.keypoint_prior_loss_fn = KeypointPriorLoss()# 10
+        self.deform_prior_loss_fn = DeformationPriorLoss() # 5
+
+    def forward(self, torso_src_img, src_img, drv_img, cal_loss=False):
+        # predict cano keypoint
+        cano_keypoint = self.canonical_kp_detector(src_img)
+        # predict src_pose and drv_pose
+        transform_fn = Transform(drv_img.shape[0])
+        transformed_drv_img = transform_fn.transform_frame(drv_img)
+        cat_imgs = torch.cat([src_img, drv_img, transformed_drv_img], dim=0)
+        yaw, pitch, roll, t, delta = self.pose_exp_estimator(cat_imgs)
+        [yaw_s, yaw_d, yaw_tran], [pitch_s, pitch_d, pitch_tran], [roll_s, roll_d, roll_tran] = (
+            torch.chunk(yaw, 3, dim=0),
+            torch.chunk(pitch, 3, dim=0),
+            torch.chunk(roll, 3, dim=0),
+        )
+        [t_s, t_d, t_tran], [delta_s, delta_d, delta_tran] = (
+            torch.chunk(t, 3, dim=0),
+            torch.chunk(delta, 3, dim=0),
+        )
+        kp_s, Rs = transform_kp(cano_keypoint, yaw_s, pitch_s, roll_s, t_s, delta_s)
+        kp_d, Rd = transform_kp(cano_keypoint, yaw_d, pitch_d, roll_d, t_d, delta_d)
+        # deform the torso img
+        torso_appearance_feats = self.appearance_extractor(torso_src_img)
+        deformation, occlusion = self.motion_field_estimator(torso_appearance_feats, kp_s, kp_d, Rs, Rd)
+        deformed_torso_img = self.deform_based_generator(torso_appearance_feats, deformation, occlusion)
+        
+        ret = {'kp_src': kp_s, 'kp_drv': kp_d}
+        if cal_loss:
+            losses = {}
+            with torch.no_grad():
+                self.pretrained_hopenet.eval()
+                real_yaw, real_pitch, real_roll = self.pretrained_hopenet(F.interpolate(apply_imagenet_normalization(cat_imgs), size=(224, 224)))
+            pose_loss = self.pose_loss_fn(yaw, pitch, roll, real_yaw, real_pitch, real_roll)
+            losses['facev2v/pose_pred_loss'] = pose_loss
+
+            kp_tran, _ = transform_kp(cano_keypoint, yaw_tran, pitch_tran, roll_tran, t_tran, delta_tran)
+            reverse_kp = transform_fn.warp_coordinates(kp_tran[:, :, :2])
+            equivariance_loss = self.equivariance_loss_fn(kp_d, reverse_kp)
+            losses['facev2v/equivariance_loss'] = equivariance_loss
+
+            keypoint_prior_loss = self.keypoint_prior_loss_fn(kp_d)
+            losses['facev2v/keypoint_prior_loss'] = keypoint_prior_loss
+
+            deform_prior_loss = self.deform_prior_loss_fn(delta_d)
+            losses['facev2v/deform_prior_loss'] = deform_prior_loss
+            ret['losses'] = losses
+
+        return deformed_torso_img, ret
+
+
+class WarpBasedTorsoModelMediaPipe(nn.Module):
+    def __init__(self, model_scale='small'):
+        super().__init__()
+        self.appearance_extractor = AppearanceFeatureExtractor(model_scale)
+        self.motion_field_estimator = MotionFieldEstimator(model_scale, input_channels=32+2, num_keypoints=hparams['torso_kp_num']) # 32 channel appearance channel, and 3 channel for segmap
+        # self.motion_field_estimator = MotionFieldEstimator(model_scale, input_channels=32+2, num_keypoints=9) # 32 channel appearance channel, and 3 channel for segmap
+        self.deform_based_generator = Generator()
+
+        self.occlusion_2_predictor = nn.Sequential(*[
+            nn.Conv2d(64+1, 32, 3, 1, 1),
+            nn.ReLU(),
+            nn.Conv2d(32, 32, 3, 1, 1),
+            nn.ReLU(),
+            nn.Conv2d(32, 1, 3, 1, 1),
+            nn.Sigmoid()
+        ])
+
+    #  V2, 先warp， 再mean
+    def forward(self, torso_src_img, segmap, kp_s, kp_d, tgt_head_img, cal_loss=False, target_torso_mask=None):
+        """
+        kp_s, kp_d, [b, 68, 3], within the range of [-1,1]
+        """
+        torso_appearance_feats = self.appearance_extractor(torso_src_img) # [B, C, D, H, W]
+        torso_segmap = torch.nn.functional.interpolate(segmap[:,[2,4]].float(), size=(64,64), mode='bilinear', align_corners=False, antialias=False) # see tasks/eg3ds/loss_utils/segment_loss/mp_segmenter.py for the segmap convention
+        torso_mask = torso_segmap.sum(dim=1).unsqueeze(1) # [b, 1, ,h, w]
+        torso_mask = dilate(torso_mask, ksize=hparams.get("torso_mask_dilate_ksize", 7))
+        if hparams.get("mul_torso_mask", True):
+            torso_appearance_feats = torso_appearance_feats * torso_mask.unsqueeze(1)
+        motion_inp_appearance_feats = torch.cat([torso_appearance_feats, torso_segmap.unsqueeze(2).repeat([1,1,torso_appearance_feats.shape[2],1,1])], dim=1)
+
+        if hparams['torso_kp_num'] == 4:
+            kp_s = kp_s[:,[0,8,16,27],:]
+            kp_d = kp_d[:,[0,8,16,27],:]
+        elif hparams['torso_kp_num'] == 9:
+            kp_s = kp_s[:,[0, 3, 6, 8, 10, 13, 16, 27, 33],:]
+            kp_d = kp_d[:,[0, 3, 6, 8, 10, 13, 16, 27, 33],:]
+        else:
+            raise NotImplementedError()
+
+        # deform the torso img
+        Rs = torch.eye(3, 3).unsqueeze(0).repeat([kp_s.shape[0], 1, 1]).to(kp_s.device)
+        Rd = torch.eye(3, 3).unsqueeze(0).repeat([kp_d.shape[0], 1, 1]).to(kp_d.device)
+        deformation, occlusion, occlusion_2 = self.motion_field_estimator(motion_inp_appearance_feats, kp_s, kp_d, Rs, Rd)
+        motion_estimator_grad_scale_factor = 0.1
+        # motion_estimator_grad_scale_factor = 1.0
+        deformation = deformation * motion_estimator_grad_scale_factor + deformation.detach() * (1-motion_estimator_grad_scale_factor)
+        # occlusion, a 0~1 mask that predict the segment map of warped torso, used in oclcusion-aware decoder
+        occlusion = occlusion * motion_estimator_grad_scale_factor + occlusion.detach() * (1-motion_estimator_grad_scale_factor)
+        # occlusion_2, a 0~1 mask that predict the segment map of warped torso, but is used in alpha-blending
+        occlusion_2 = occlusion_2 * motion_estimator_grad_scale_factor + occlusion_2.detach() * (1-motion_estimator_grad_scale_factor)
+        ret = {'kp_src': kp_s, 'kp_drv': kp_d, 'occlusion': occlusion, 'occlusion_2': occlusion_2}
+
+        deformed_torso_img, deformed_torso_hid = self.deform_based_generator(torso_appearance_feats, deformation, occlusion, return_hid=True)
+        ret['deformed_torso_hid'] = deformed_torso_hid
+        occlusion_2 = self.occlusion_2_predictor(torch.cat([deformed_torso_hid, F.interpolate(occlusion_2, size=(256,256), mode='bilinear')], dim=1))
+        ret['occlusion_2'] = occlusion_2
+        alphas = occlusion_2.clamp(1e-5, 1 - 1e-5) 
+
+        if target_torso_mask is None:
+            ret['losses'] = {
+                'facev2v/occlusion_reg_l1': occlusion.mean(),
+                'facev2v/occlusion_2_reg_l1': occlusion_2.mean(),
+                'facev2v/occlusion_2_weights_entropy': torch.mean(- alphas * torch.log2(alphas) - (1 - alphas) * torch.log2(1 - alphas)), # you can visualize this fn at https://www.desmos.com/calculator/rwbs7bruvj?lang=zh-TW
+            }
+        else:
+            non_target_torso_mask_1 = torch.nn.functional.interpolate((~target_torso_mask).unsqueeze(1).float(), size=occlusion.shape[-2:])
+            non_target_torso_mask_2 = torch.nn.functional.interpolate((~target_torso_mask).unsqueeze(1).float(), size=occlusion_2.shape[-2:])
+            ret['losses'] = {
+                'facev2v/occlusion_reg_l1': self.masked_l1_reg_loss(occlusion, non_target_torso_mask_1.bool(), masked_weight=1, unmasked_weight=hparams['torso_occlusion_reg_unmask_factor']),
+                'facev2v/occlusion_2_reg_l1': self.masked_l1_reg_loss(occlusion_2, non_target_torso_mask_2.bool(), masked_weight=1, unmasked_weight=hparams['torso_occlusion_reg_unmask_factor']),
+                'facev2v/occlusion_2_weights_entropy': torch.mean(- alphas * torch.log2(alphas) - (1 - alphas) * torch.log2(1 - alphas)), # you can visualize this fn at https://www.desmos.com/calculator/rwbs7bruvj?lang=zh-TW
+            }
+        # if hparams.get("fuse_with_deform_source"):
+        #     B, _, H, W = deformed_torso_img.shape
+        #     deformation_256 = F.interpolate(deformation.mean(dim=1).permute(0,3,1,2), size=256, mode='bilinear',antialias=True).permute(0,2,3,1)[...,:2]
+        #     deformed_source_torso_img = F.grid_sample(torso_src_img, deformation_256, align_corners=True).view(B, -1, H, W)
+        #     occlusion_256 = F.interpolate(occlusion, size=256, antialias=True, mode='bilinear').reshape([B,1,H,W])
+        #     # deformed_torso_img = deformed_torso_img * (1 - occlusion_256[:,0]) + deformed_source_torso_img[:,0] * occlusion_256[:,0]
+        #     deformed_torso_img = deformed_torso_img * (1 - occlusion_256) + deformed_source_torso_img * occlusion_256
+        return deformed_torso_img, ret
+
+    def masked_l1_reg_loss(self, img_pred, mask, masked_weight=0.01, unmasked_weight=0.001, mode='l1'):
+        # 对raw图像，因为deform的原因背景没法全黑，导致这部分mse过高，我们将其mask掉，只计算人脸部分
+        masked_weight = 1.0
+        weight_mask = mask.float() * masked_weight + (~mask).float() * unmasked_weight
+        if mode == 'l1':
+            error = (img_pred).abs().sum(dim=1) * weight_mask
+        else:
+            error = (img_pred).pow(2).sum(dim=1) * weight_mask
+        loss = error.mean()
+        return loss
+
+    @torch.no_grad()
+    def infer_forward_stage1(self, torso_src_img, segmap, kp_s, kp_d, tgt_head_img, cal_loss=False):
+        """
+        kp_s, kp_d, [b, 68, 3], within the range of [-1,1]
+        """
+        kp_s = kp_s[:,[0,8,16,27],:]
+        kp_d = kp_d[:,[0,8,16,27],:]
+
+        torso_segmap = torch.nn.functional.interpolate(segmap[:,[2,4]].float(), size=(64,64), mode='bilinear', align_corners=False, antialias=False) # see tasks/eg3ds/loss_utils/segment_loss/mp_segmenter.py for the segmap convention
+        torso_appearance_feats = self.appearance_extractor(torso_src_img)
+        torso_mask = torso_segmap.sum(dim=1).unsqueeze(1) # [b, 1, ,h, w]
+        torso_mask = dilate(torso_mask, ksize=hparams.get("torso_mask_dilate_ksize", 7))
+        if hparams.get("mul_torso_mask", True):
+            torso_appearance_feats = torso_appearance_feats * torso_mask.unsqueeze(1)
+        motion_inp_appearance_feats = torch.cat([torso_appearance_feats, torso_segmap.unsqueeze(2).repeat([1,1,torso_appearance_feats.shape[2],1,1])], dim=1)
+        # deform the torso img
+        Rs = torch.eye(3, 3).unsqueeze(0).repeat([kp_s.shape[0], 1, 1]).to(kp_s.device)
+        Rd = torch.eye(3, 3).unsqueeze(0).repeat([kp_d.shape[0], 1, 1]).to(kp_d.device)
+        deformation, occlusion, occlusion_2 = self.motion_field_estimator(motion_inp_appearance_feats, kp_s, kp_d, Rs, Rd)
+        motion_estimator_grad_scale_factor = 0.1
+        deformation = deformation * motion_estimator_grad_scale_factor + deformation.detach() * (1-motion_estimator_grad_scale_factor)
+        occlusion = occlusion * motion_estimator_grad_scale_factor + occlusion.detach() * (1-motion_estimator_grad_scale_factor)
+        occlusion_2 = occlusion_2 * motion_estimator_grad_scale_factor + occlusion_2.detach() * (1-motion_estimator_grad_scale_factor)
+        ret = {'kp_src': kp_s, 'kp_drv': kp_d, 'occlusion': occlusion, 'occlusion_2': occlusion_2}
+        ret['torso_appearance_feats'] = torso_appearance_feats
+        ret['deformation'] = deformation
+        ret['occlusion'] = occlusion
+        return ret
+    
+    @torch.no_grad()
+    def infer_forward_stage2(self, ret):
+        torso_appearance_feats = ret['torso_appearance_feats']
+        deformation = ret['deformation']
+        occlusion = ret['occlusion']
+        deformed_torso_img, deformed_torso_hid = self.deform_based_generator(torso_appearance_feats, deformation, occlusion, return_hid=True)
+        ret['deformed_torso_hid'] = deformed_torso_hid
+        return deformed_torso_img
+    
+if __name__ == '__main__':
+    from utils.nn.model_utils import num_params
+    import tqdm
+    model = WarpBasedTorsoModel('small')
+    model.cuda()
+    num_params(model)
+    for n, m in model.named_children():
+        num_params(m, model_name=n)
+    torso_ref_img = torch.randn([2, 3, 256, 256]).cuda()
+    ref_img = torch.randn([2, 3, 256, 256]).cuda()
+    mv_img = torch.randn([2, 3, 256, 256]).cuda()
+    out = model(torso_ref_img, ref_img, mv_img)
+    for i in tqdm.trange(100):
+        out_img, losses = model(torso_ref_img, ref_img, mv_img, cal_loss=True)
+    print(" ")
\ No newline at end of file
diff --git a/modules/real3d/facev2v_warp/model2.py b/modules/real3d/facev2v_warp/model2.py
new file mode 100644
index 0000000000000000000000000000000000000000..555e8a2605295e5b75d6e996e2b3ae6d4fd0b1e5
--- /dev/null
+++ b/modules/real3d/facev2v_warp/model2.py
@@ -0,0 +1,344 @@
+import torch
+from torch import nn
+import torch.nn.functional as F
+import torchvision
+import math
+import numpy as np
+import copy 
+
+from modules.real3d.facev2v_warp.network2 import AppearanceFeatureExtractor, CanonicalKeypointDetector, PoseExpressionEstimator, MotionFieldEstimator, Generator
+from modules.real3d.facev2v_warp.func_utils import transform_kp, make_coordinate_grid_2d, apply_imagenet_normalization
+from modules.real3d.facev2v_warp.losses import PerceptualLoss, GANLoss, FeatureMatchingLoss, EquivarianceLoss, KeypointPriorLoss, HeadPoseLoss, DeformationPriorLoss
+from utils.commons.image_utils import erode, dilate
+from utils.commons.hparams import hparams
+
+
+class Hopenet(nn.Module):
+    # Hopenet with 3 output layers for yaw, pitch and roll
+    # Predicts Euler angles by binning and regression with the expected value
+    def __init__(self, block, layers, num_bins):
+        self.inplanes = 64
+        super(Hopenet, self).__init__()
+        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
+        self.bn1 = nn.BatchNorm2d(64)
+        self.relu = nn.ReLU(inplace=True)
+        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+        self.layer1 = self._make_layer(block, 64, layers[0])
+        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
+        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
+        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
+        self.avgpool = nn.AvgPool2d(7)
+        self.fc_yaw = nn.Linear(512 * block.expansion, num_bins)
+        self.fc_pitch = nn.Linear(512 * block.expansion, num_bins)
+        self.fc_roll = nn.Linear(512 * block.expansion, num_bins)
+
+        # Vestigial layer from previous experiments
+        self.fc_finetune = nn.Linear(512 * block.expansion + 3, 3)
+        self.idx_tensor = torch.FloatTensor(list(range(num_bins))).unsqueeze(0).cuda()
+        self.n_bins = num_bins
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+                m.weight.data.normal_(0, math.sqrt(2.0 / n))
+            elif isinstance(m, nn.BatchNorm2d):
+                m.weight.data.fill_(1)
+                m.bias.data.zero_()
+
+    def _make_layer(self, block, planes, blocks, stride=1):
+        downsample = None
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                nn.Conv2d(self.inplanes, planes * block.expansion, kernel_size=1, stride=stride, bias=False),
+                nn.BatchNorm2d(planes * block.expansion),
+            )
+
+        layers = []
+        layers.append(block(self.inplanes, planes, stride, downsample))
+        self.inplanes = planes * block.expansion
+        for i in range(1, blocks):
+            layers.append(block(self.inplanes, planes))
+
+        return nn.Sequential(*layers)
+
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.maxpool(x)
+
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.layer4(x)
+        x = self.avgpool(x)
+        x = x.view(x.size(0), -1)
+        real_yaw = self.fc_yaw(x)
+        real_pitch = self.fc_pitch(x)
+        real_roll = self.fc_roll(x)
+        real_yaw = torch.softmax(real_yaw, dim=1)
+        real_pitch = torch.softmax(real_pitch, dim=1)
+        real_roll = torch.softmax(real_roll, dim=1)
+        real_yaw = (real_yaw * self.idx_tensor).sum(dim=1)
+        real_pitch = (real_pitch * self.idx_tensor).sum(dim=1)
+        real_roll = (real_roll * self.idx_tensor).sum(dim=1)
+        real_yaw = (real_yaw - self.n_bins // 2) * 3 * np.pi / 180
+        real_pitch = (real_pitch - self.n_bins // 2) * 3 * np.pi / 180
+        real_roll = (real_roll - self.n_bins // 2) * 3 * np.pi / 180
+
+        return real_yaw, real_pitch, real_roll
+
+
+class Transform:
+    """
+    Random tps transformation for equivariance constraints.
+    reference: FOMM
+    """
+
+    def __init__(self, bs, sigma_affine=0.05, sigma_tps=0.005, points_tps=5):
+        noise = torch.normal(mean=0, std=sigma_affine * torch.ones([bs, 2, 3]))
+        self.theta = noise + torch.eye(2, 3).view(1, 2, 3)
+        self.bs = bs
+
+        self.control_points = make_coordinate_grid_2d((points_tps, points_tps))
+        self.control_points = self.control_points.unsqueeze(0)
+        self.control_params = torch.normal(mean=0, std=sigma_tps * torch.ones([bs, 1, points_tps ** 2]))
+
+    def transform_frame(self, frame):
+        grid = make_coordinate_grid_2d(frame.shape[2:]).unsqueeze(0)
+        grid = grid.view(1, frame.shape[2] * frame.shape[3], 2)
+        grid = self.warp_coordinates(grid).view(self.bs, frame.shape[2], frame.shape[3], 2)
+        return F.grid_sample(frame, grid, align_corners=True, padding_mode="reflection")
+
+    def warp_coordinates(self, coordinates):
+        theta = self.theta.type(coordinates.type())
+        theta = theta.unsqueeze(1)
+        transformed = torch.matmul(theta[:, :, :, :2], coordinates.unsqueeze(-1)) + theta[:, :, :, 2:]
+        transformed = transformed.squeeze(-1)
+
+        control_points = self.control_points.type(coordinates.type())
+        control_params = self.control_params.type(coordinates.type())
+        distances = coordinates.view(coordinates.shape[0], -1, 1, 2) - control_points.view(1, 1, -1, 2)
+        distances = torch.abs(distances).sum(-1)
+
+        result = distances ** 2
+        result = result * torch.log(distances + 1e-6)
+        result = result * control_params
+        result = result.sum(dim=2).view(self.bs, coordinates.shape[1], 1)
+        transformed = transformed + result
+
+        return transformed
+
+
+class WarpBasedTorsoModel(nn.Module):
+    def __init__(self, model_scale='small'):
+        super().__init__()
+        self.appearance_extractor = AppearanceFeatureExtractor(model_scale)
+        self.canonical_kp_detector = CanonicalKeypointDetector(model_scale)
+        self.pose_exp_estimator = PoseExpressionEstimator(model_scale)
+        self.motion_field_estimator = MotionFieldEstimator(model_scale)
+        self.deform_based_generator = Generator()
+
+        self.pretrained_hopenet = Hopenet(torchvision.models.resnet.Bottleneck, [3, 4, 6, 3], num_bins=66).cuda()
+        pretrained_path = "/home/tiger/nfs/myenv/cache/useful_ckpts/hopenet_robust_alpha1.pkl" # https://drive.google.com/open?id=1m25PrSE7g9D2q2XJVMR6IA7RaCvWSzCR
+        self.pretrained_hopenet.load_state_dict(torch.load(pretrained_path, map_location=torch.device("cpu")))
+        self.pretrained_hopenet.requires_grad_(False)
+
+        self.pose_loss_fn = HeadPoseLoss() # 20
+        self.equivariance_loss_fn = EquivarianceLoss() # 20
+        self.keypoint_prior_loss_fn = KeypointPriorLoss()# 10
+        self.deform_prior_loss_fn = DeformationPriorLoss() # 5
+
+    def forward(self, torso_src_img, src_img, drv_img, cal_loss=False):
+        # predict cano keypoint
+        cano_keypoint = self.canonical_kp_detector(src_img)
+        # predict src_pose and drv_pose
+        transform_fn = Transform(drv_img.shape[0])
+        transformed_drv_img = transform_fn.transform_frame(drv_img)
+        cat_imgs = torch.cat([src_img, drv_img, transformed_drv_img], dim=0)
+        yaw, pitch, roll, t, delta = self.pose_exp_estimator(cat_imgs)
+        [yaw_s, yaw_d, yaw_tran], [pitch_s, pitch_d, pitch_tran], [roll_s, roll_d, roll_tran] = (
+            torch.chunk(yaw, 3, dim=0),
+            torch.chunk(pitch, 3, dim=0),
+            torch.chunk(roll, 3, dim=0),
+        )
+        [t_s, t_d, t_tran], [delta_s, delta_d, delta_tran] = (
+            torch.chunk(t, 3, dim=0),
+            torch.chunk(delta, 3, dim=0),
+        )
+        kp_s, Rs = transform_kp(cano_keypoint, yaw_s, pitch_s, roll_s, t_s, delta_s)
+        kp_d, Rd = transform_kp(cano_keypoint, yaw_d, pitch_d, roll_d, t_d, delta_d)
+        # deform the torso img
+        torso_appearance_feats = self.appearance_extractor(torso_src_img)
+        deformation, occlusion = self.motion_field_estimator(torso_appearance_feats, kp_s, kp_d, Rs, Rd)
+        deformed_torso_img = self.deform_based_generator(torso_appearance_feats, deformation, occlusion)
+        
+        ret = {'kp_src': kp_s, 'kp_drv': kp_d}
+        if cal_loss:
+            losses = {}
+            with torch.no_grad():
+                self.pretrained_hopenet.eval()
+                real_yaw, real_pitch, real_roll = self.pretrained_hopenet(F.interpolate(apply_imagenet_normalization(cat_imgs), size=(224, 224)))
+            pose_loss = self.pose_loss_fn(yaw, pitch, roll, real_yaw, real_pitch, real_roll)
+            losses['facev2v/pose_pred_loss'] = pose_loss
+
+            kp_tran, _ = transform_kp(cano_keypoint, yaw_tran, pitch_tran, roll_tran, t_tran, delta_tran)
+            reverse_kp = transform_fn.warp_coordinates(kp_tran[:, :, :2])
+            equivariance_loss = self.equivariance_loss_fn(kp_d, reverse_kp)
+            losses['facev2v/equivariance_loss'] = equivariance_loss
+
+            keypoint_prior_loss = self.keypoint_prior_loss_fn(kp_d)
+            losses['facev2v/keypoint_prior_loss'] = keypoint_prior_loss
+
+            deform_prior_loss = self.deform_prior_loss_fn(delta_d)
+            losses['facev2v/deform_prior_loss'] = deform_prior_loss
+            ret['losses'] = losses
+
+        return deformed_torso_img, ret
+
+
+class WarpBasedTorsoModelMediaPipe(nn.Module):
+    def __init__(self, model_scale='small'):
+        super().__init__()
+        self.hparams = copy.deepcopy(hparams)
+        self.appearance_extractor = AppearanceFeatureExtractor(model_scale)
+        self.motion_field_estimator = MotionFieldEstimator(model_scale, input_channels=32+2, num_keypoints=self.hparams['torso_kp_num']) # 32 channel appearance channel, and 3 channel for segmap
+        # self.motion_field_estimator = MotionFieldEstimator(model_scale, input_channels=32+2, num_keypoints=9) # 32 channel appearance channel, and 3 channel for segmap
+        self.deform_based_generator = Generator()
+
+        self.occlusion_2_predictor = nn.Sequential(*[
+            nn.Conv2d(64+1, 32, 3, 1, 1),
+            nn.ReLU(),
+            nn.Conv2d(32, 32, 3, 1, 1),
+            nn.ReLU(),
+            nn.Conv2d(32, 1, 3, 1, 1),
+            nn.Sigmoid()
+        ])
+
+    #  V2, 先warp， 再mean
+    def forward(self, torso_src_img, segmap, kp_s, kp_d, tgt_head_img, tgt_head_weights, cal_loss=False, target_torso_mask=None):
+        """
+        kp_s, kp_d, [b, 68, 3], within the range of [-1,1]
+        """
+        torso_appearance_feats = self.appearance_extractor(torso_src_img) # [B, C, D, H, W]
+        torso_segmap = torch.nn.functional.interpolate(segmap[:,[2,4]].float(), size=(64,64), mode='bilinear', align_corners=False, antialias=False) # see tasks/eg3ds/loss_utils/segment_loss/mp_segmenter.py for the segmap convention
+        torso_mask = torso_segmap.sum(dim=1).unsqueeze(1) # [b, 1, ,h, w]
+        torso_mask = dilate(torso_mask, ksize=self.hparams.get("torso_mask_dilate_ksize", 7))
+        if self.hparams.get("mul_torso_mask", True):
+            torso_appearance_feats = torso_appearance_feats * torso_mask.unsqueeze(1)
+        motion_inp_appearance_feats = torch.cat([torso_appearance_feats, torso_segmap.unsqueeze(2).repeat([1,1,torso_appearance_feats.shape[2],1,1])], dim=1)
+
+        if self.hparams['torso_kp_num'] == 4:
+            kp_s = kp_s[:,[0,8,16,27],:]
+            kp_d = kp_d[:,[0,8,16,27],:]
+        elif self.hparams['torso_kp_num'] == 9:
+            kp_s = kp_s[:,[0, 3, 6, 8, 10, 13, 16, 27, 33],:]
+            kp_d = kp_d[:,[0, 3, 6, 8, 10, 13, 16, 27, 33],:]
+        else:
+            raise NotImplementedError()
+
+        # deform the torso img
+        Rs = torch.eye(3, 3).unsqueeze(0).repeat([kp_s.shape[0], 1, 1]).to(kp_s.device)
+        Rd = torch.eye(3, 3).unsqueeze(0).repeat([kp_d.shape[0], 1, 1]).to(kp_d.device)
+        deformation, occlusion, occlusion_2 = self.motion_field_estimator(motion_inp_appearance_feats, kp_s, kp_d, Rs, Rd, tgt_head_img, tgt_head_weights)
+        motion_estimator_grad_scale_factor = 0.1
+        # motion_estimator_grad_scale_factor = 1.0
+        deformation = deformation * motion_estimator_grad_scale_factor + deformation.detach() * (1-motion_estimator_grad_scale_factor)
+        # occlusion, a 0~1 mask that predict the segment map of warped torso, used in oclcusion-aware decoder
+        occlusion = occlusion * motion_estimator_grad_scale_factor + occlusion.detach() * (1-motion_estimator_grad_scale_factor)
+        # occlusion_2, a 0~1 mask that predict the segment map of warped torso, but is used in alpha-blending
+        occlusion_2 = occlusion_2 * motion_estimator_grad_scale_factor + occlusion_2.detach() * (1-motion_estimator_grad_scale_factor)
+        ret = {'kp_src': kp_s, 'kp_drv': kp_d, 'occlusion': occlusion, 'occlusion_2': occlusion_2}
+
+        deformed_torso_img, deformed_torso_hid = self.deform_based_generator(torso_appearance_feats, deformation, occlusion, return_hid=True)
+        ret['deformed_torso_hid'] = deformed_torso_hid
+        occlusion_2 = self.occlusion_2_predictor(torch.cat([deformed_torso_hid, F.interpolate(occlusion_2, size=(256,256), mode='bilinear')], dim=1))
+        ret['occlusion_2'] = occlusion_2
+        alphas = occlusion_2.clamp(1e-5, 1 - 1e-5) 
+
+        if target_torso_mask is None:
+            ret['losses'] = {
+                'facev2v/occlusion_reg_l1': occlusion.mean(),
+                'facev2v/occlusion_2_reg_l1': occlusion_2.mean(),
+                'facev2v/occlusion_2_weights_entropy': torch.mean(- alphas * torch.log2(alphas) - (1 - alphas) * torch.log2(1 - alphas)), # you can visualize this fn at https://www.desmos.com/calculator/rwbs7bruvj?lang=zh-TW
+            }
+        else:
+            non_target_torso_mask_1 = torch.nn.functional.interpolate((~target_torso_mask).unsqueeze(1).float(), size=occlusion.shape[-2:])
+            non_target_torso_mask_2 = torch.nn.functional.interpolate((~target_torso_mask).unsqueeze(1).float(), size=occlusion_2.shape[-2:])
+            ret['losses'] = {
+                'facev2v/occlusion_reg_l1': self.masked_l1_reg_loss(occlusion, non_target_torso_mask_1.bool(), masked_weight=1, unmasked_weight=self.hparams['torso_occlusion_reg_unmask_factor']),
+                'facev2v/occlusion_2_reg_l1': self.masked_l1_reg_loss(occlusion_2, non_target_torso_mask_2.bool(), masked_weight=1, unmasked_weight=self.hparams['torso_occlusion_reg_unmask_factor']),
+                'facev2v/occlusion_2_weights_entropy': torch.mean(- alphas * torch.log2(alphas) - (1 - alphas) * torch.log2(1 - alphas)), # you can visualize this fn at https://www.desmos.com/calculator/rwbs7bruvj?lang=zh-TW
+            }
+        # if self.hparams.get("fuse_with_deform_source"):
+        #     B, _, H, W = deformed_torso_img.shape
+        #     deformation_256 = F.interpolate(deformation.mean(dim=1).permute(0,3,1,2), size=256, mode='bilinear',antialias=True).permute(0,2,3,1)[...,:2]
+        #     deformed_source_torso_img = F.grid_sample(torso_src_img, deformation_256, align_corners=True).view(B, -1, H, W)
+        #     occlusion_256 = F.interpolate(occlusion, size=256, antialias=True, mode='bilinear').reshape([B,1,H,W])
+        #     # deformed_torso_img = deformed_torso_img * (1 - occlusion_256[:,0]) + deformed_source_torso_img[:,0] * occlusion_256[:,0]
+        #     deformed_torso_img = deformed_torso_img * (1 - occlusion_256) + deformed_source_torso_img * occlusion_256
+        return deformed_torso_img, ret
+
+    def masked_l1_reg_loss(self, img_pred, mask, masked_weight=0.01, unmasked_weight=0.001, mode='l1'):
+        # 对raw图像，因为deform的原因背景没法全黑，导致这部分mse过高，我们将其mask掉，只计算人脸部分
+        masked_weight = 1.0
+        weight_mask = mask.float() * masked_weight + (~mask).float() * unmasked_weight
+        if mode == 'l1':
+            error = (img_pred).abs().sum(dim=1) * weight_mask
+        else:
+            error = (img_pred).pow(2).sum(dim=1) * weight_mask
+        loss = error.mean()
+        return loss
+
+    @torch.no_grad()
+    def infer_forward_stage1(self, torso_src_img, segmap, kp_s, kp_d, tgt_head_img, cal_loss=False):
+        """
+        kp_s, kp_d, [b, 68, 3], within the range of [-1,1]
+        """
+        kp_s = kp_s[:,[0,8,16,27],:]
+        kp_d = kp_d[:,[0,8,16,27],:]
+
+        torso_segmap = torch.nn.functional.interpolate(segmap[:,[2,4]].float(), size=(64,64), mode='bilinear', align_corners=False, antialias=False) # see tasks/eg3ds/loss_utils/segment_loss/mp_segmenter.py for the segmap convention
+        torso_appearance_feats = self.appearance_extractor(torso_src_img)
+        torso_mask = torso_segmap.sum(dim=1).unsqueeze(1) # [b, 1, ,h, w]
+        torso_mask = dilate(torso_mask, ksize=self.hparams.get("torso_mask_dilate_ksize", 7))
+        if self.hparams.get("mul_torso_mask", True):
+            torso_appearance_feats = torso_appearance_feats * torso_mask.unsqueeze(1)
+        motion_inp_appearance_feats = torch.cat([torso_appearance_feats, torso_segmap.unsqueeze(2).repeat([1,1,torso_appearance_feats.shape[2],1,1])], dim=1)
+        # deform the torso img
+        Rs = torch.eye(3, 3).unsqueeze(0).repeat([kp_s.shape[0], 1, 1]).to(kp_s.device)
+        Rd = torch.eye(3, 3).unsqueeze(0).repeat([kp_d.shape[0], 1, 1]).to(kp_d.device)
+        deformation, occlusion, occlusion_2 = self.motion_field_estimator(motion_inp_appearance_feats, kp_s, kp_d, Rs, Rd)
+        motion_estimator_grad_scale_factor = 0.1
+        deformation = deformation * motion_estimator_grad_scale_factor + deformation.detach() * (1-motion_estimator_grad_scale_factor)
+        occlusion = occlusion * motion_estimator_grad_scale_factor + occlusion.detach() * (1-motion_estimator_grad_scale_factor)
+        occlusion_2 = occlusion_2 * motion_estimator_grad_scale_factor + occlusion_2.detach() * (1-motion_estimator_grad_scale_factor)
+        ret = {'kp_src': kp_s, 'kp_drv': kp_d, 'occlusion': occlusion, 'occlusion_2': occlusion_2}
+        ret['torso_appearance_feats'] = torso_appearance_feats
+        ret['deformation'] = deformation
+        ret['occlusion'] = occlusion
+        return ret
+    
+    @torch.no_grad()
+    def infer_forward_stage2(self, ret):
+        torso_appearance_feats = ret['torso_appearance_feats']
+        deformation = ret['deformation']
+        occlusion = ret['occlusion']
+        deformed_torso_img, deformed_torso_hid = self.deform_based_generator(torso_appearance_feats, deformation, occlusion, return_hid=True)
+        ret['deformed_torso_hid'] = deformed_torso_hid
+        return deformed_torso_img
+    
+if __name__ == '__main__':
+    from utils.nn.model_utils import num_params
+    import tqdm
+    model = WarpBasedTorsoModel('small')
+    model.cuda()
+    num_params(model)
+    for n, m in model.named_children():
+        num_params(m, model_name=n)
+    torso_ref_img = torch.randn([2, 3, 256, 256]).cuda()
+    ref_img = torch.randn([2, 3, 256, 256]).cuda()
+    mv_img = torch.randn([2, 3, 256, 256]).cuda()
+    out = model(torso_ref_img, ref_img, mv_img)
+    for i in tqdm.trange(100):
+        out_img, losses = model(torso_ref_img, ref_img, mv_img, cal_loss=True)
+    print(" ")
\ No newline at end of file
diff --git a/modules/real3d/facev2v_warp/network.py b/modules/real3d/facev2v_warp/network.py
new file mode 100644
index 0000000000000000000000000000000000000000..b51fb9adfc74c76195c6870bfa486b5b824921d4
--- /dev/null
+++ b/modules/real3d/facev2v_warp/network.py
@@ -0,0 +1,326 @@
+import torch
+import torch.nn.functional as F
+import numpy as np
+from torch import nn
+from utils.commons.hparams import hparams
+
+from modules.real3d.facev2v_warp.layers import ConvBlock2D, DownBlock2D, DownBlock3D, UpBlock2D, UpBlock3D, ResBlock2D, ResBlock3D, ResBottleneck
+from modules.real3d.facev2v_warp.func_utils import (
+    out2heatmap,
+    heatmap2kp,
+    kp2gaussian_2d,
+    create_heatmap_representations,
+    create_sparse_motions,
+    create_deformed_source_image,
+)
+
+class AppearanceFeatureExtractor(nn.Module):
+    # 3D appearance features extractor
+    # [N,3,256,256]
+    # [N,64,256,256]
+    # [N,128,128,128]
+    # [N,256,64,64]
+    # [N,512,64,64]
+    # [N,32,16,64,64]
+    def __init__(self, model_scale='standard'):
+        super().__init__()
+        use_weight_norm = False
+        down_seq = [64, 128, 256]
+        n_res = 6
+        C = 32
+        D = 16
+        self.in_conv = ConvBlock2D("CNA", 3, down_seq[0], 7, 1, 3, use_weight_norm)
+        self.down = nn.Sequential(*[DownBlock2D(down_seq[i], down_seq[i + 1], use_weight_norm) for i in range(len(down_seq) - 1)])
+        self.mid_conv = nn.Conv2d(down_seq[-1], C * D, 1, 1, 0)
+        self.res = nn.Sequential(*[ResBlock3D(C, use_weight_norm) for _ in range(n_res)])
+
+        self.C, self.D = C, D
+
+    def forward(self, x):
+        x = self.in_conv(x)
+        x = self.down(x)
+        x = self.mid_conv(x)
+        N, _, H, W = x.shape
+        x = x.view(N, self.C, self.D, H, W)
+        x = self.res(x)
+        return x
+
+
+class CanonicalKeypointDetector(nn.Module):
+    # Canonical keypoints detector
+    # [N,3,256,256]
+    # [N,64,128,128]
+    # [N,128,64,64]
+    # [N,256,32,32]
+    # [N,512,16,16]
+    # [N,1024,8,8]
+    # [N,16384,8,8]
+    # [N,1024,16,8,8]
+    # [N,512,16,16,16]
+    # [N,256,16,32,32]
+    # [N,128,16,64,64]
+    # [N,64,16,128,128]
+    # [N,32,16,256,256]
+    # [N,20,16,256,256] (heatmap)
+    # [N,20,3] (key points)
+    def __init__(self, model_scale='standard'):
+        super().__init__()
+        use_weight_norm=False
+
+        if model_scale == 'standard' or model_scale == 'large':
+            down_seq = [3, 64, 128, 256, 512, 1024]
+            up_seq = [1024, 512, 256, 128, 64, 32]
+            D = 16 # depth_channel 
+            K = 15
+            scale_factor=0.25
+        elif model_scale == 'small':
+            down_seq = [3, 32, 64, 128, 256, 512]
+            up_seq = [512, 256, 128, 64, 32, 16]
+            D = 6 # depth_channel 
+            K = 15
+            scale_factor=0.25
+        self.down = nn.Sequential(*[DownBlock2D(down_seq[i], down_seq[i + 1], use_weight_norm) for i in range(len(down_seq) - 1)])
+        self.mid_conv = nn.Conv2d(down_seq[-1], up_seq[0] * D, 1, 1, 0)
+        self.up = nn.Sequential(*[UpBlock3D(up_seq[i], up_seq[i + 1], use_weight_norm) for i in range(len(up_seq) - 1)])
+        self.out_conv = nn.Conv3d(up_seq[-1], K, 3, 1, 1)
+        self.C, self.D = up_seq[0], D
+        self.scale_factor = scale_factor
+
+    def forward(self, x):
+        x = F.interpolate(x, mode="bilinear", scale_factor=self.scale_factor, align_corners=False, recompute_scale_factor=True)
+        # [1, 3, 256, 256] ==> [1, 3, 64, 64]
+        x = self.down(x) # ==> [1, 1024, 2, 2]
+        x = self.mid_conv(x) # ==> [1, 16384, 2, 2]
+        N, _, H, W = x.shape
+        x = x.view(N, self.C, self.D, H, W) # ==> [1, 1024, 16, 2, 2]
+        x = self.up(x) # ==> [1, 32, 16, 64, 64]
+        x = self.out_conv(x) # ==> [1, 15, 16, 64, 64]
+        heatmap = out2heatmap(x)
+        kp = heatmap2kp(heatmap)
+        return kp
+
+
+class PoseExpressionEstimator(nn.Module):
+    # Head pose estimator && expression deformation estimator
+    # [N,3,256,256]
+    # [N,64,64,64]
+    # [N,256,64,64]
+    # [N,512,32,32]
+    # [N,1024,16,16]
+    # [N,2048,8,8]
+    # [N,2048]
+    # [N,66] [N,66] [N,66] [N,3] [N,60]
+    # [N,] [N,] [N,] [N,3] [N,20,3]
+    def __init__(self, model_scale='standard'):
+        super().__init__()
+        use_weight_norm=False
+        n_bins=66
+        K=15
+        if model_scale == 'standard' or model_scale == 'large':
+            n_filters=[64, 256, 512, 1024, 2048]
+            n_blocks=[3, 3, 5, 2]
+        elif model_scale == 'small':
+            n_filters=[32, 128, 256, 512, 512]
+            n_blocks=[2, 2, 4, 2]
+
+        self.pre_layers = nn.Sequential(ConvBlock2D("CNA", 3, n_filters[0], 7, 2, 3, use_weight_norm), nn.MaxPool2d(3, 2, 1))
+        res_layers = []
+        for i in range(len(n_filters) - 1):
+            res_layers.extend(self._make_layer(i, n_filters[i], n_filters[i + 1], n_blocks[i], use_weight_norm))
+        self.res_layers = nn.Sequential(*res_layers)
+        self.fc_yaw = nn.Linear(n_filters[-1], n_bins)
+        self.fc_pitch = nn.Linear(n_filters[-1], n_bins)
+        self.fc_roll = nn.Linear(n_filters[-1], n_bins)
+        self.fc_t = nn.Linear(n_filters[-1], 3)
+        self.fc_delta = nn.Linear(n_filters[-1], 3 * K)
+        self.n_bins = n_bins
+        self.idx_tensor = torch.FloatTensor(list(range(self.n_bins))).unsqueeze(0).cuda()
+
+    def _make_layer(self, i, in_channels, out_channels, n_block, use_weight_norm):
+        stride = 1 if i == 0 else 2
+        return [ResBottleneck(in_channels, out_channels, stride, use_weight_norm)] + [
+            ResBottleneck(out_channels, out_channels, 1, use_weight_norm) for _ in range(n_block)
+        ]
+
+    def forward(self, x):
+        x = self.pre_layers(x)
+        x = self.res_layers(x)
+        x = torch.mean(x, (2, 3))
+        yaw, pitch, roll, t, delta = self.fc_yaw(x), self.fc_pitch(x), self.fc_roll(x), self.fc_t(x), self.fc_delta(x)
+        yaw = torch.softmax(yaw, dim=1)
+        pitch = torch.softmax(pitch, dim=1)
+        roll = torch.softmax(roll, dim=1)
+        yaw = (yaw * self.idx_tensor).sum(dim=1)
+        pitch = (pitch * self.idx_tensor).sum(dim=1)
+        roll = (roll * self.idx_tensor).sum(dim=1)
+        yaw = (yaw - self.n_bins // 2) * 3 * np.pi / 180
+        pitch = (pitch - self.n_bins // 2) * 3 * np.pi / 180
+        roll = (roll - self.n_bins // 2) * 3 * np.pi / 180
+        delta = delta.view(x.shape[0], -1, 3)
+        return yaw, pitch, roll, t, delta
+
+
+class MotionFieldEstimator(nn.Module):
+    # Motion field estimator
+    # (4+1)x(20+1)=105
+    # [N,105,16,64,64]
+    # ...
+    # [N,32,16,64,64]
+    # [N,137,16,64,64]
+    # 1.
+    # [N,21,16,64,64] (mask)
+    # 2.
+    # [N,2192,64,64]
+    # [N,1,64,64] (occlusion)
+    def __init__(self, model_scale='standard', input_channels=32, num_keypoints=15, predict_multiref_occ=True, occ2_on_deformed_source=False):
+        super().__init__()
+        use_weight_norm=False
+        if model_scale == 'standard' or model_scale == 'large':
+            down_seq = [(num_keypoints+1)*5, 64, 128, 256, 512, 1024]
+            up_seq = [1024, 512, 256, 128, 64, 32]
+        elif model_scale == 'small':
+            down_seq = [(num_keypoints+1)*5, 32, 64, 128, 256, 512]
+            up_seq = [512, 256, 128, 64, 32, 16]
+        K = num_keypoints
+        D = 16
+        C1 = input_channels # appearance feats channel
+        C2 = 4
+        self.compress = nn.Conv3d(C1, C2, 1, 1, 0)
+        self.down = nn.Sequential(*[DownBlock3D(down_seq[i], down_seq[i + 1], use_weight_norm) for i in range(len(down_seq) - 1)])
+        self.up = nn.Sequential(*[UpBlock3D(up_seq[i], up_seq[i + 1], use_weight_norm) for i in range(len(up_seq) - 1)])
+        self.mask_conv = nn.Conv3d(down_seq[0] + up_seq[-1], K + 1, 7, 1, 3)
+        self.predict_multiref_occ = predict_multiref_occ
+        self.occ2_on_deformed_source = occ2_on_deformed_source
+        self.occlusion_conv = nn.Conv2d((down_seq[0] + up_seq[-1]) * D, 1, 7, 1, 3)
+        if self.occ2_on_deformed_source:
+            self.occlusion_conv2 = nn.Conv2d(3, 1, 7, 1, 3)
+        else:
+            self.occlusion_conv2 = nn.Conv2d((down_seq[0] + up_seq[-1]) * D, 1, 7, 1, 3)
+        self.C, self.D = down_seq[0] + up_seq[-1], D
+
+    def forward(self, fs, kp_s, kp_d, Rs, Rd):
+        # the original fs is compressed to 4 channels using a 1x1x1 conv
+        fs_compressed = self.compress(fs)
+        N, _, D, H, W = fs.shape
+        # [N,21,1,16,64,64]
+        heatmap_representation = create_heatmap_representations(fs_compressed, kp_s, kp_d)
+        # [N,21,16,64,64,3]
+        sparse_motion = create_sparse_motions(fs_compressed, kp_s, kp_d, Rs, Rd)
+        # [N,21,4,16,64,64]
+        deformed_source = create_deformed_source_image(fs_compressed, sparse_motion)
+        input = torch.cat([heatmap_representation, deformed_source], dim=2).view(N, -1, D, H, W)
+        output = self.down(input)
+        output = self.up(output)
+        x = torch.cat([input, output], dim=1) # [B, C1=25 + C2=32, D, H, W]
+        mask = self.mask_conv(x)
+        # [N,21,16,64,64,1]
+        mask = F.softmax(mask, dim=1).unsqueeze(-1)
+        # [N,16,64,64,3]
+        deformation = (sparse_motion * mask).sum(dim=1)
+        if self.predict_multiref_occ:
+            occlusion, occlusion_2 = self.create_occlusion(x.view(N, -1, H, W))
+            return deformation, occlusion, occlusion_2
+        else:
+            return deformation, x.view(N, -1, H, W)
+        
+    # x: torch.Tensor, N, M, H, W
+    def create_occlusion(self, x, deformed_source=None):
+        occlusion = self.occlusion_conv(x)
+        if self.occ2_on_deformed_source:
+            assert deformed_source is not None
+            occlusion_2 = self.occlusion_conv2(deformed_source)
+        else:
+            occlusion_2 = self.occlusion_conv2(x)
+        occlusion = torch.sigmoid(occlusion)
+        occlusion_2 = torch.sigmoid(occlusion_2)
+        return occlusion, occlusion_2
+    
+
+
+class Generator(nn.Module):
+    # Generator
+    # [N,32,16,64,64]
+    # [N,512,64,64]
+    # [N,256,64,64]
+    # [N,128,128,128]
+    # [N,64,256,256]
+    # [N,3,256,256]
+    def __init__(self, input_channels=32, model_scale='standard', more_res=False):
+        super().__init__()
+        use_weight_norm=True
+        C=input_channels
+        
+        if model_scale == 'large':
+            n_res = 12
+            up_seq = [256, 128, 64]
+            D = 16
+            use_up_res = True
+        elif model_scale in ['standard', 'small']:
+            n_res = 6
+            up_seq = [256, 128, 64]
+            D = 16 
+            use_up_res = False
+        self.in_conv = ConvBlock2D("CNA", C * D, up_seq[0], 3, 1, 1, use_weight_norm, nonlinearity_type="leakyrelu")
+        self.mid_conv = nn.Conv2d(up_seq[0], up_seq[0], 1, 1, 0)
+        self.res = nn.Sequential(*[ResBlock2D(up_seq[0], use_weight_norm) for _ in range(n_res)])
+        ups = []
+        for i in range(len(up_seq) - 1):
+            ups.append(UpBlock2D(up_seq[i], up_seq[i + 1], use_weight_norm))
+            if use_up_res:
+                ups.append(ResBlock2D(up_seq[i + 1], up_seq[i + 1]))
+        self.up = nn.Sequential(*ups)
+        self.out_conv = nn.Conv2d(up_seq[-1], 3, 7, 1, 3)
+               
+    def forward(self, fs, deformation, occlusion, return_hid=False):
+        deformed_fs = self.get_deformed_feature(fs, deformation)
+        return self.forward_with_deformed_feature(deformed_fs, occlusion, return_hid=return_hid)
+    
+    def forward_with_deformed_feature(self, deformed_fs, occlusion, return_hid=False):
+        fs = deformed_fs
+        fs = self.in_conv(fs)
+        fs = self.mid_conv(fs)
+        # if hparams.get("occlusion_fuse", True):
+        #     blank = torch.full_like(fs, 0.)
+        #     fs = fs * occlusion + blank * (1 - occlusion)
+        # else:
+        #     pass
+        fs = self.res(fs)
+        fs = self.up(fs)
+        rgb = self.out_conv(fs)
+        if return_hid:
+            return rgb, fs
+        return rgb
+    
+    @staticmethod
+    def get_deformed_feature(fs, deformation):
+        N, _, D, H, W = fs.shape
+        fs = F.grid_sample(fs, deformation, align_corners=True, padding_mode='border').view(N, -1, H, W)
+        return fs
+
+
+class Discriminator(nn.Module):
+    # Patch Discriminator
+
+    def __init__(self, use_weight_norm=True, down_seq=[64, 128, 256, 512], K=15):
+        super().__init__()
+        layers = []
+        layers.append(ConvBlock2D("CNA", 3 + K, down_seq[0], 3, 2, 1, use_weight_norm, "instance", "leakyrelu"))
+        layers.extend(
+            [
+                ConvBlock2D("CNA", down_seq[i], down_seq[i + 1], 3, 2 if i < len(down_seq) - 2 else 1, 1, use_weight_norm, "instance", "leakyrelu")
+                for i in range(len(down_seq) - 1)
+            ]
+        )
+        layers.append(ConvBlock2D("CN", down_seq[-1], 1, 3, 1, 1, use_weight_norm, activation_type="none"))
+        self.layers = nn.ModuleList(layers)
+
+    def forward(self, x, kp):
+        heatmap = kp2gaussian_2d(kp.detach()[:, :, :2], x.shape[2:])
+        x = torch.cat([x, heatmap], dim=1)
+        res = [x]
+        for layer in self.layers:
+            x = res[-1]
+            res.append(layer(x))
+        output = res[-1]
+        features = res[1:-1]
+        return output, features
diff --git a/modules/real3d/facev2v_warp/network2.py b/modules/real3d/facev2v_warp/network2.py
new file mode 100644
index 0000000000000000000000000000000000000000..51a10b3d559e2aa064be5aa3edc36fa331b2105a
--- /dev/null
+++ b/modules/real3d/facev2v_warp/network2.py
@@ -0,0 +1,329 @@
+import torch
+import torch.nn.functional as F
+import numpy as np
+from torch import nn
+
+from modules.real3d.facev2v_warp.layers import ConvBlock2D, DownBlock2D, DownBlock3D, UpBlock2D, UpBlock3D, ResBlock2D, ResBlock3D, ResBottleneck
+from modules.real3d.facev2v_warp.func_utils import (
+    out2heatmap,
+    heatmap2kp,
+    kp2gaussian_2d,
+    create_heatmap_representations,
+    create_sparse_motions,
+    create_deformed_source_image,
+)
+
+class AppearanceFeatureExtractor(nn.Module):
+    # 3D appearance features extractor
+    # [N,3,256,256]
+    # [N,64,256,256]
+    # [N,128,128,128]
+    # [N,256,64,64]
+    # [N,512,64,64]
+    # [N,32,16,64,64]
+    def __init__(self, model_scale='standard'):
+        super().__init__()
+        use_weight_norm = False
+        down_seq = [64, 128, 256]
+        n_res = 6
+        C = 32
+        D = 16
+        self.in_conv = ConvBlock2D("CNA", 3, down_seq[0], 7, 1, 3, use_weight_norm)
+        self.down = nn.Sequential(*[DownBlock2D(down_seq[i], down_seq[i + 1], use_weight_norm) for i in range(len(down_seq) - 1)])
+        self.mid_conv = nn.Conv2d(down_seq[-1], C * D, 1, 1, 0)
+        self.res = nn.Sequential(*[ResBlock3D(C, use_weight_norm) for _ in range(n_res)])
+
+        self.C, self.D = C, D
+
+    def forward(self, x):
+        x = self.in_conv(x)
+        x = self.down(x)
+        x = self.mid_conv(x)
+        N, _, H, W = x.shape
+        x = x.view(N, self.C, self.D, H, W)
+        x = self.res(x)
+        return x
+
+
+class CanonicalKeypointDetector(nn.Module):
+    # Canonical keypoints detector
+    # [N,3,256,256]
+    # [N,64,128,128]
+    # [N,128,64,64]
+    # [N,256,32,32]
+    # [N,512,16,16]
+    # [N,1024,8,8]
+    # [N,16384,8,8]
+    # [N,1024,16,8,8]
+    # [N,512,16,16,16]
+    # [N,256,16,32,32]
+    # [N,128,16,64,64]
+    # [N,64,16,128,128]
+    # [N,32,16,256,256]
+    # [N,20,16,256,256] (heatmap)
+    # [N,20,3] (key points)
+    def __init__(self, model_scale='standard'):
+        super().__init__()
+        use_weight_norm=False
+
+        if model_scale == 'standard' or model_scale == 'large':
+            down_seq = [3, 64, 128, 256, 512, 1024]
+            up_seq = [1024, 512, 256, 128, 64, 32]
+            D = 16 # depth_channel 
+            K = 15
+            scale_factor=0.25
+        elif model_scale == 'small':
+            down_seq = [3, 32, 64, 128, 256, 512]
+            up_seq = [512, 256, 128, 64, 32, 16]
+            D = 6 # depth_channel 
+            K = 15
+            scale_factor=0.25
+        self.down = nn.Sequential(*[DownBlock2D(down_seq[i], down_seq[i + 1], use_weight_norm) for i in range(len(down_seq) - 1)])
+        self.mid_conv = nn.Conv2d(down_seq[-1], up_seq[0] * D, 1, 1, 0)
+        self.up = nn.Sequential(*[UpBlock3D(up_seq[i], up_seq[i + 1], use_weight_norm) for i in range(len(up_seq) - 1)])
+        self.out_conv = nn.Conv3d(up_seq[-1], K, 3, 1, 1)
+        self.C, self.D = up_seq[0], D
+        self.scale_factor = scale_factor
+
+    def forward(self, x):
+        x = F.interpolate(x, mode="bilinear", scale_factor=self.scale_factor, align_corners=False, recompute_scale_factor=True)
+        # [1, 3, 256, 256] ==> [1, 3, 64, 64]
+        x = self.down(x) # ==> [1, 1024, 2, 2]
+        x = self.mid_conv(x) # ==> [1, 16384, 2, 2]
+        N, _, H, W = x.shape
+        x = x.view(N, self.C, self.D, H, W) # ==> [1, 1024, 16, 2, 2]
+        x = self.up(x) # ==> [1, 32, 16, 64, 64]
+        x = self.out_conv(x) # ==> [1, 15, 16, 64, 64]
+        heatmap = out2heatmap(x)
+        kp = heatmap2kp(heatmap)
+        return kp
+
+
+class PoseExpressionEstimator(nn.Module):
+    # Head pose estimator && expression deformation estimator
+    # [N,3,256,256]
+    # [N,64,64,64]
+    # [N,256,64,64]
+    # [N,512,32,32]
+    # [N,1024,16,16]
+    # [N,2048,8,8]
+    # [N,2048]
+    # [N,66] [N,66] [N,66] [N,3] [N,60]
+    # [N,] [N,] [N,] [N,3] [N,20,3]
+    def __init__(self, model_scale='standard'):
+        super().__init__()
+        use_weight_norm=False
+        n_bins=66
+        K=15
+        if model_scale == 'standard' or model_scale == 'large':
+            n_filters=[64, 256, 512, 1024, 2048]
+            n_blocks=[3, 3, 5, 2]
+        elif model_scale == 'small':
+            n_filters=[32, 128, 256, 512, 512]
+            n_blocks=[2, 2, 4, 2]
+
+        self.pre_layers = nn.Sequential(ConvBlock2D("CNA", 3, n_filters[0], 7, 2, 3, use_weight_norm), nn.MaxPool2d(3, 2, 1))
+        res_layers = []
+        for i in range(len(n_filters) - 1):
+            res_layers.extend(self._make_layer(i, n_filters[i], n_filters[i + 1], n_blocks[i], use_weight_norm))
+        self.res_layers = nn.Sequential(*res_layers)
+        self.fc_yaw = nn.Linear(n_filters[-1], n_bins)
+        self.fc_pitch = nn.Linear(n_filters[-1], n_bins)
+        self.fc_roll = nn.Linear(n_filters[-1], n_bins)
+        self.fc_t = nn.Linear(n_filters[-1], 3)
+        self.fc_delta = nn.Linear(n_filters[-1], 3 * K)
+        self.n_bins = n_bins
+        self.idx_tensor = torch.FloatTensor(list(range(self.n_bins))).unsqueeze(0).cuda()
+
+    def _make_layer(self, i, in_channels, out_channels, n_block, use_weight_norm):
+        stride = 1 if i == 0 else 2
+        return [ResBottleneck(in_channels, out_channels, stride, use_weight_norm)] + [
+            ResBottleneck(out_channels, out_channels, 1, use_weight_norm) for _ in range(n_block)
+        ]
+
+    def forward(self, x):
+        x = self.pre_layers(x)
+        x = self.res_layers(x)
+        x = torch.mean(x, (2, 3))
+        yaw, pitch, roll, t, delta = self.fc_yaw(x), self.fc_pitch(x), self.fc_roll(x), self.fc_t(x), self.fc_delta(x)
+        yaw = torch.softmax(yaw, dim=1)
+        pitch = torch.softmax(pitch, dim=1)
+        roll = torch.softmax(roll, dim=1)
+        yaw = (yaw * self.idx_tensor).sum(dim=1)
+        pitch = (pitch * self.idx_tensor).sum(dim=1)
+        roll = (roll * self.idx_tensor).sum(dim=1)
+        yaw = (yaw - self.n_bins // 2) * 3 * np.pi / 180
+        pitch = (pitch - self.n_bins // 2) * 3 * np.pi / 180
+        roll = (roll - self.n_bins // 2) * 3 * np.pi / 180
+        delta = delta.view(x.shape[0], -1, 3)
+        return yaw, pitch, roll, t, delta
+
+
+class MotionFieldEstimator(nn.Module):
+    # Motion field estimator
+    # (4+1)x(20+1)=105
+    # [N,105,16,64,64]
+    # ...
+    # [N,32,16,64,64]
+    # [N,137,16,64,64]
+    # 1.
+    # [N,21,16,64,64] (mask)
+    # 2.
+    # [N,2192,64,64]
+    # [N,1,64,64] (occlusion)
+    def __init__(self, model_scale='standard', input_channels=32, num_keypoints=15, predict_multiref_occ=True):
+        super().__init__()
+        use_weight_norm=False
+        if model_scale == 'standard' or model_scale == 'large':
+            down_seq = [(num_keypoints+1)*5, 64, 128, 256, 512, 1024]
+            up_seq = [1024, 512, 256, 128, 64, 32]
+        elif model_scale == 'small':
+            down_seq = [(num_keypoints+1)*5, 32, 64, 128, 256, 512]
+            up_seq = [512, 256, 128, 64, 32, 16]
+        K = num_keypoints
+        D = 16
+        C1 = input_channels # appearance feats channel
+        C2 = 4
+        self.compress = nn.Conv3d(C1, C2, 1, 1, 0)
+        self.down = nn.Sequential(*[DownBlock3D(down_seq[i], down_seq[i + 1], use_weight_norm) for i in range(len(down_seq) - 1)])
+        self.up = nn.Sequential(*[UpBlock3D(up_seq[i], up_seq[i + 1], use_weight_norm) for i in range(len(up_seq) - 1)])
+
+        tgt_head_in_dim = 3 + 1
+        tgt_head_hid_dim = 32
+        tgt_head_layers =  [ConvBlock2D("CNA", tgt_head_in_dim, tgt_head_hid_dim, 7, 1, 3, use_weight_norm)] + [ResBlock2D(tgt_head_hid_dim, use_weight_norm) for _ in range(3)]
+        self.tgt_head_encoder = nn.Sequential(*tgt_head_layers)
+        self.tgt_head_fuser = nn.Conv3d(tgt_head_hid_dim + down_seq[0] + up_seq[-1], tgt_head_hid_dim, 7, 1, 3)
+        
+        self.mask_conv = nn.Conv3d(tgt_head_hid_dim, K + 1, 7, 1, 3)
+        self.predict_multiref_occ = predict_multiref_occ
+        self.occlusion_conv = nn.Conv2d(tgt_head_hid_dim * D, 1, 7, 1, 3)
+        self.occlusion_conv2 = nn.Conv2d(tgt_head_hid_dim * D, 1, 7, 1, 3)
+
+        self.C, self.D = down_seq[0] + up_seq[-1], D
+        
+    def forward(self, fs, kp_s, kp_d, Rs, Rd, tgt_head_img, tgt_head_weights):
+        # the original fs is compressed to 4 channels using a 1x1x1 conv
+        fs_compressed = self.compress(fs)
+        N, _, D, H, W = fs.shape
+        # [N,21,1,16,64,64]
+        heatmap_representation = create_heatmap_representations(fs_compressed, kp_s, kp_d)
+        # [N,21,16,64,64,3]
+        sparse_motion = create_sparse_motions(fs_compressed, kp_s, kp_d, Rs, Rd)
+        # [N,21,4,16,64,64]
+        deformed_source = create_deformed_source_image(fs_compressed, sparse_motion)
+        input = torch.cat([heatmap_representation, deformed_source], dim=2).view(N, -1, D, H, W)
+        output = self.down(input)
+        output = self.up(output)
+        x = torch.cat([input, output], dim=1)
+
+        tgt_head_inp = torch.cat([tgt_head_img, tgt_head_weights], dim=1)
+        tgt_head_inp = torch.nn.functional.interpolate(tgt_head_inp, size=(128,128), mode='bilinear')
+        tgt_head_feats = self.tgt_head_encoder(tgt_head_inp) # [B, C=3+1, H=256, W=256]
+        tgt_head_feats = torch.nn.functional.interpolate(tgt_head_feats, size=(64,64), mode='bilinear')
+
+        fused_x = torch.cat([x, tgt_head_feats.unsqueeze(2).repeat([1,1,x.shape[2],1,1])], dim=1)
+        x = self.tgt_head_fuser(fused_x)
+
+        mask = self.mask_conv(x)
+        # [N,21,16,64,64,1]
+        mask = F.softmax(mask, dim=1).unsqueeze(-1)
+        # [N,16,64,64,3]
+        deformation = (sparse_motion * mask).sum(dim=1)
+        if self.predict_multiref_occ:
+            occlusion, occlusion_2 = self.create_occlusion(x.view(N, -1, H, W))
+            return deformation, occlusion, occlusion_2
+        else:
+            return deformation, x.view(N, -1, H, W)
+        
+    # x: torch.Tensor, N, M, H, W
+    def create_occlusion(self, x, deformed_source=None):
+        occlusion = self.occlusion_conv(x)
+        occlusion_2 = self.occlusion_conv2(x)
+        occlusion = torch.sigmoid(occlusion)
+        occlusion_2 = torch.sigmoid(occlusion_2)
+        return occlusion, occlusion_2
+    
+
+
+class Generator(nn.Module):
+    # Generator
+    # [N,32,16,64,64]
+    # [N,512,64,64]
+    # [N,256,64,64]
+    # [N,128,128,128]
+    # [N,64,256,256]
+    # [N,3,256,256]
+    def __init__(self, input_channels=32, model_scale='standard', more_res=False):
+        super().__init__()
+        use_weight_norm=True
+        C=input_channels
+        
+        if model_scale == 'large':
+            n_res = 12
+            up_seq = [256, 128, 64]
+            D = 16
+            use_up_res = True
+        elif model_scale in ['standard', 'small']:
+            n_res = 6
+            up_seq = [256, 128, 64]
+            D = 16 
+            use_up_res = False
+        self.in_conv = ConvBlock2D("CNA", C * D, up_seq[0], 3, 1, 1, use_weight_norm, nonlinearity_type="leakyrelu")
+        self.mid_conv = nn.Conv2d(up_seq[0], up_seq[0], 1, 1, 0)
+        self.res = nn.Sequential(*[ResBlock2D(up_seq[0], use_weight_norm) for _ in range(n_res)])
+        ups = []
+        for i in range(len(up_seq) - 1):
+            ups.append(UpBlock2D(up_seq[i], up_seq[i + 1], use_weight_norm))
+            if use_up_res:
+                ups.append(ResBlock2D(up_seq[i + 1], up_seq[i + 1]))
+        self.up = nn.Sequential(*ups)
+        self.out_conv = nn.Conv2d(up_seq[-1], 3, 7, 1, 3)
+               
+    def forward(self, fs, deformation, occlusion, return_hid=False):
+        deformed_fs = self.get_deformed_feature(fs, deformation)
+        return self.forward_with_deformed_feature(deformed_fs, occlusion, return_hid=return_hid)
+    
+    def forward_with_deformed_feature(self, deformed_fs, occlusion, return_hid=False):
+        fs = deformed_fs
+        fs = self.in_conv(fs)
+        fs = self.mid_conv(fs)
+        fs = self.res(fs)
+        fs = self.up(fs)
+        rgb = self.out_conv(fs)
+        if return_hid:
+            return rgb, fs
+        return rgb
+    
+    @staticmethod
+    def get_deformed_feature(fs, deformation):
+        N, _, D, H, W = fs.shape
+        fs = F.grid_sample(fs, deformation, align_corners=True, padding_mode='border').view(N, -1, H, W)
+        return fs
+
+
+class Discriminator(nn.Module):
+    # Patch Discriminator
+
+    def __init__(self, use_weight_norm=True, down_seq=[64, 128, 256, 512], K=15):
+        super().__init__()
+        layers = []
+        layers.append(ConvBlock2D("CNA", 3 + K, down_seq[0], 3, 2, 1, use_weight_norm, "instance", "leakyrelu"))
+        layers.extend(
+            [
+                ConvBlock2D("CNA", down_seq[i], down_seq[i + 1], 3, 2 if i < len(down_seq) - 2 else 1, 1, use_weight_norm, "instance", "leakyrelu")
+                for i in range(len(down_seq) - 1)
+            ]
+        )
+        layers.append(ConvBlock2D("CN", down_seq[-1], 1, 3, 1, 1, use_weight_norm, activation_type="none"))
+        self.layers = nn.ModuleList(layers)
+
+    def forward(self, x, kp):
+        heatmap = kp2gaussian_2d(kp.detach()[:, :, :2], x.shape[2:])
+        x = torch.cat([x, heatmap], dim=1)
+        res = [x]
+        for layer in self.layers:
+            x = res[-1]
+            res.append(layer(x))
+        output = res[-1]
+        features = res[1:-1]
+        return output, features
diff --git a/modules/real3d/img2plane_baseline.py b/modules/real3d/img2plane_baseline.py
new file mode 100644
index 0000000000000000000000000000000000000000..61299efb1391fcd739a0b1f82a4cbb97485f9615
--- /dev/null
+++ b/modules/real3d/img2plane_baseline.py
@@ -0,0 +1,202 @@
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import numpy as np
+import torch
+import copy
+from modules.real3d.segformer import SegFormerImg2PlaneBackbone
+from modules.img2plane.triplane import OSGDecoder
+from modules.eg3ds.models.superresolution import SuperresolutionHybrid8XDC
+from modules.eg3ds.volumetric_rendering.renderer import ImportanceRenderer
+from modules.eg3ds.volumetric_rendering.ray_sampler import RaySampler
+from modules.img2plane.img2plane_model import Img2PlaneModel
+
+from utils.commons.hparams import hparams
+import torch.nn.functional as F
+import torch.nn as nn
+from modules.real3d.facev2v_warp.layers import *
+from einops import rearrange
+
+
+class SameBlock3d(nn.Module):
+    """
+    Res block, preserve spatial resolution.
+    """
+
+    def __init__(self, in_features, kernel_size=3, padding=1):
+        super(SameBlock3d, self).__init__()
+        self.conv1 = nn.Conv3d(in_channels=in_features, out_channels=in_features, kernel_size=kernel_size,
+                               padding=padding, padding_mode='replicate')
+        self.conv2 = nn.Conv3d(in_channels=in_features, out_channels=in_features, kernel_size=kernel_size,
+                               padding=padding, padding_mode='replicate')
+        self.norm1 = nn.GroupNorm(4, in_features, affine=True)
+        self.norm2 = nn.GroupNorm(4, in_features, affine=True)
+        self.alpha = nn.Parameter(torch.tensor([0.01]))
+
+    def forward(self, x):
+        out = self.norm1(x)
+        out = F.relu(out)
+        out = self.conv1(out)
+        out = self.norm2(out)
+        out = F.relu(out)
+        out = self.conv2(out)
+        out = x + self.alpha * out
+        return out
+
+
+class Plane2GridModule(nn.Module):
+    def __init__(self, triplane_depth=3, in_out_dim=96):
+        super().__init__()
+        self.triplane_depth = triplane_depth
+        self.in_out_dim = in_out_dim
+        if self.triplane_depth <= 3:
+            self.num_layers_per_block = 1
+        else:
+            self.num_layers_per_block = 2
+        self.res_blocks_3d = nn.Sequential(*[SameBlock3d(in_out_dim//3) for _ in range(self.num_layers_per_block)])
+        
+    def forward(self, x):
+        x_inp = x # [1, 96*D, H, W]
+        N, KCD, H, W = x.shape
+        K, C, D = 3, KCD // self.triplane_depth // 3, self.triplane_depth
+        assert C == self.in_out_dim // 3
+        x = rearrange(x, 'n (k c d) h w -> (n k) c d h w', k=K, c=C, d=D) # ==> [1, 96, D, H, W]
+        x = self.res_blocks_3d(x) # ==> [1, 96, D, H, W]
+        x = rearrange(x, '(n k) c d h w -> n (k c d) h w', k=K)
+        return x
+
+
+class OSAvatar_Img2plane(torch.nn.Module):
+    def __init__(self, hp=None):
+        super().__init__()
+        global hparams
+        self.hparams = copy.copy(hparams) if hp is None else copy.copy(hp)
+        hparams = self.hparams
+
+        self.camera_dim = 25 # extrinsic 4x4 + intrinsic 3x3
+        self.neural_rendering_resolution = hparams.get("neural_rendering_resolution", 128)
+        self.w_dim = hparams['w_dim']
+        self.img_resolution = hparams['final_resolution']
+        self.triplane_depth = hparams.get("triplane_depth", 1)
+        
+        self.triplane_hid_dim = triplane_hid_dim = hparams.get("triplane_hid_dim", 32)
+        # extract canonical triplane from src img
+        self.img2plane_backbone = Img2PlaneModel(out_channels=3*triplane_hid_dim*self.triplane_depth, hp=hparams)
+        if hparams.get("triplane_feature_type", "triplane") in ['trigrid_v2']:
+            self.plane2grid_module = Plane2GridModule(triplane_depth=self.triplane_depth, in_out_dim=3*triplane_hid_dim) # add depth here
+          
+        # positional embedding
+        self.decoder = OSGDecoder(triplane_hid_dim, {'decoder_lr_mul': 1, 'decoder_output_dim': triplane_hid_dim})
+        # create super resolution network
+        self.sr_num_fp16_res = 0
+        self.sr_kwargs = {'channel_base': hparams['base_channel'], 'channel_max': hparams['max_channel'], 'fused_modconv_default': 'inference_only'}
+        self.superresolution = SuperresolutionHybrid8XDC(channels=triplane_hid_dim, img_resolution=self.img_resolution, sr_num_fp16_res=self.sr_num_fp16_res, sr_antialias=True, large_sr=hparams.get('large_sr',False), **self.sr_kwargs)
+        # Rendering Options
+        self.renderer = ImportanceRenderer(hp=hparams)
+        self.ray_sampler = RaySampler()
+        self.rendering_kwargs = {'image_resolution': hparams['final_resolution'], 
+                            'disparity_space_sampling': False, 
+                            'clamp_mode': 'softplus',
+                            'gpc_reg_prob': hparams['gpc_reg_prob'], 
+                            'c_scale': 1.0, 
+                            'superresolution_noise_mode': 'none', 
+                            'density_reg': hparams['lambda_density_reg'], 'density_reg_p_dist': hparams['density_reg_p_dist'], 
+                            'reg_type': 'l1', 'decoder_lr_mul': 1.0, 
+                            'sr_antialias': True, 
+                            'depth_resolution': hparams['num_samples_coarse'], 
+                            'depth_resolution_importance': hparams['num_samples_fine'],
+                            'ray_start': 'auto', 'ray_end': 'auto',
+                            'box_warp': hparams.get("box_warp", 1.), # 3DMM坐标系==world坐标系，而3DMM的landmark的坐标均位于[-1,1]内
+                            'avg_camera_radius': 2.7,
+                            'avg_camera_pivot': [0, 0, 0.2],
+                            'white_back': False,
+                            }
+
+    def cal_plane(self, img, cond=None, ret=None, **synthesis_kwargs):
+        hparams = self.hparams
+        planes = self.img2plane_backbone(img, cond, **synthesis_kwargs) #  [B, 3, C*D, H, W]
+        if hparams.get("triplane_feature_type", "triplane") in ['triplane', 'trigrid']:
+            planes = planes.view(len(planes), 3, self.triplane_hid_dim*self.triplane_depth, planes.shape[-2], planes.shape[-1])
+        elif hparams.get("triplane_feature_type", "triplane") in ['trigrid_v2']:
+            b, k, cd, h, w = planes.shape
+            planes = planes.reshape([b, k*cd, h, w])
+            planes = self.plane2grid_module(planes)
+            planes = planes.reshape([b, k, cd, h, w])
+        else:
+            raise NotImplementedError()
+        return planes # [B, 3, C*D, H, W]
+    
+    def _forward_sr(self, rgb_image, feature_image, cond, ret, **synthesis_kwargs):
+        hparams = self.hparams
+        ones_ws = torch.ones([feature_image.shape[0], 14, hparams['w_dim']], dtype=feature_image.dtype, device=feature_image.device)
+        if hparams.get("sr_type", "vanilla") == 'vanilla':
+            sr_image = self.superresolution(rgb_image, feature_image, ones_ws, noise_mode=self.rendering_kwargs['superresolution_noise_mode'], **{k:synthesis_kwargs[k] for k in synthesis_kwargs.keys() if k != 'noise_mode'})
+        elif hparams.get("sr_type", "vanilla") == 'spade':
+            sr_image = self.superresolution(rgb_image, feature_image, ones_ws, segmap=cond['ref_head_img'], noise_mode=self.rendering_kwargs['superresolution_noise_mode'], **{k:synthesis_kwargs[k] for k in synthesis_kwargs.keys() if k != 'noise_mode'})
+        return sr_image
+
+    def synthesis(self, img, camera, cond=None, ret=None, update_emas=False, cache_backbone=False, use_cached_backbone=False, **synthesis_kwargs):
+        hparams = self.hparams
+        if ret is None: ret = {}
+        cam2world_matrix = camera[:, :16].view(-1, 4, 4)
+        intrinsics = camera[:, 16:25].view(-1, 3, 3)
+
+        neural_rendering_resolution = self.neural_rendering_resolution
+
+        # Create a batch of rays for volume rendering
+        ray_origins, ray_directions = self.ray_sampler(cam2world_matrix, intrinsics, neural_rendering_resolution)
+
+        # Create triplanes by running StyleGAN backbone
+        N, M, _ = ray_origins.shape
+        if use_cached_backbone and self._last_planes is not None:
+            planes = self._last_planes
+        else:
+            planes = self.cal_plane(img, cond, ret, **synthesis_kwargs)
+        if cache_backbone:
+            self._last_planes = planes
+        
+        # Perform volume rendering
+        feature_samples, depth_samples, weights_samples, is_ray_valid = self.renderer(planes, self.decoder, ray_origins, ray_directions, self.rendering_kwargs) # channels last
+
+        # Reshape into 'raw' neural-rendered image
+        H = W = self.neural_rendering_resolution
+        feature_image = feature_samples.permute(0, 2, 1).reshape(N, feature_samples.shape[-1], H, W).contiguous()
+        weights_image = weights_samples.permute(0, 2, 1).reshape(N,1,H,W).contiguous() # [N,1,H,W]
+        depth_image = depth_samples.permute(0, 2, 1).reshape(N, 1, H, W)
+
+        if hparams.get("mask_invalid_rays", False):
+            is_ray_valid_mask = is_ray_valid.reshape([feature_samples.shape[0], 1,self.neural_rendering_resolution,self.neural_rendering_resolution]) # [B, 1, H, W]
+            feature_image[~is_ray_valid_mask.repeat([1,feature_image.shape[1],1,1])] = -1
+            # feature_image[~is_ray_valid_mask.repeat([1,feature_image.shape[1],1,1])] *= 0
+            # feature_image[~is_ray_valid_mask.repeat([1,feature_image.shape[1],1,1])] -= 1
+            depth_image[~is_ray_valid_mask] = depth_image[is_ray_valid_mask].min().item()
+
+        # Run superresolution to get final image
+        rgb_image = feature_image[:, :3]
+        ret['weights_img'] = weights_image
+        sr_image = self._forward_sr(rgb_image, feature_image, cond, ret, **synthesis_kwargs)
+        rgb_image = rgb_image.clamp(-1,1)
+        sr_image = sr_image.clamp(-1,1)
+        ret.update({'image_raw': rgb_image, 'image_depth': depth_image, 'image': sr_image, 'image_feature': feature_image[:, 3:], 'plane': planes})
+        return ret
+
+    def sample(self, coordinates, directions, img, cond=None, truncation_psi=1, truncation_cutoff=None, update_emas=False, ref_camera=None, **synthesis_kwargs):
+        # Compute RGB features, density for arbitrary 3D coordinates. Mostly used for extracting shapes. 
+        planes = self.cal_plane(img, cond, ret={}, ref_camera=ref_camera)
+        return self.renderer.run_model(planes, self.decoder, coordinates, directions, self.rendering_kwargs)
+
+    def forward(self, img, camera, cond=None, ret=None, update_emas=False, cache_backbone=False, use_cached_backbone=False, return_all=True, **synthesis_kwargs):
+        # Render a batch of generated images.
+        out = self.synthesis(img, camera, cond=cond, ret=ret, update_emas=update_emas, cache_backbone=cache_backbone, use_cached_backbone=use_cached_backbone, **synthesis_kwargs)
+        return out
diff --git a/modules/real3d/secc_img2plane.py b/modules/real3d/secc_img2plane.py
new file mode 100644
index 0000000000000000000000000000000000000000..16c3471533837f98a4feaa8928a07c0cdc087762
--- /dev/null
+++ b/modules/real3d/secc_img2plane.py
@@ -0,0 +1,137 @@
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import numpy as np
+import torch
+import math
+from modules.real3d.segformer import SegFormerImg2PlaneBackbone, SegFormerSECC2PlaneBackbone
+from modules.real3d.img2plane_baseline import OSAvatar_Img2plane
+from modules.img2plane.img2plane_model import Img2PlaneModel
+from utils.commons.hparams import hparams
+# 换成attention吧？value用plane。
+
+class OSAvatarSECC_Img2plane(OSAvatar_Img2plane):
+    def __init__(self,  hp=None):
+        super().__init__(hp=hp)
+        hparams = self.hparams
+        # extract canonical triplane from src img
+        self.cano_img2plane_backbone = self.img2plane_backbone # rename
+        del self.img2plane_backbone
+        self.secc_img2plane_backbone = SegFormerSECC2PlaneBackbone(mode=hparams['secc_segformer_scale'], out_channels=3*self.triplane_hid_dim*self.triplane_depth, pncc_cond_mode=hparams['pncc_cond_mode'])
+        self.lambda_pertube_blink_secc = torch.nn.Parameter(torch.tensor([0.001]), requires_grad=False)
+        self.lambda_pertube_secc = torch.nn.Parameter(torch.tensor([0.001]), requires_grad=False)
+
+    def on_train_full_model(self):
+        self.requires_grad_(True)
+            
+    def on_train_nerf(self):
+        self.cano_img2plane_backbone.requires_grad_(True)
+        self.secc_img2plane_backbone.requires_grad_(True)
+        self.decoder.requires_grad_(True)
+        self.superresolution.requires_grad_(False)
+
+    def on_train_superresolution(self):
+        self.cano_img2plane_backbone.requires_grad_(False)
+        self.secc_img2plane_backbone.requires_grad_(False)
+        self.decoder.requires_grad_(False)
+        self.superresolution.requires_grad_(True)
+
+    def cal_cano_plane(self, img, cond=None, **kwargs):
+        hparams = self.hparams
+        planes = cano_planes = self.cano_img2plane_backbone(img, cond, **kwargs)  # [B, 3, C*D, H, W]
+        if hparams.get("triplane_feature_type", "triplane") in ['triplane', 'trigrid']:
+            planes = planes.view(len(planes), 3, self.triplane_hid_dim*self.triplane_depth, planes.shape[-2], planes.shape[-1])
+        elif hparams.get("triplane_feature_type", "triplane") in ['trigrid_v2']:
+            b, k, cd, h, w = planes.shape # k = 3
+            planes = planes.reshape([b, k*cd, h, w])
+            planes = self.plane2grid_module(planes)
+            planes = planes.reshape([b, k, cd, h, w])
+        else:
+            raise NotImplementedError()
+        return planes
+
+    def cal_secc_plane(self, cond):
+        cano_pncc, src_pncc, tgt_pncc = cond['cond_cano'], cond['cond_src'], cond['cond_tgt']
+        if self.hparams.get("pncc_cond_mode", "cano_tgt") == 'cano_src_tgt':
+            inp_pncc = torch.cat([cano_pncc, src_pncc, tgt_pncc], dim=1)
+        else:
+            inp_pncc = torch.cat([cano_pncc, tgt_pncc], dim=1)
+        secc_planes = self.secc_img2plane_backbone(inp_pncc)
+        return secc_planes
+    
+    def cal_plane_given_cano(self, cano_planes, cond=None):
+        # cano_planes: # [B, 3, C*D, H, W]
+        secc_planes = self.cal_secc_plane(cond) # [B, 3, C*D, H, W]
+        if self.hparams.get("phase1_plane_fusion_mode", "add") == 'add':
+            planes = cano_planes + secc_planes
+        elif self.hparams.get("phase1_plane_fusion_mode", "add") == 'mul':
+            planes = cano_planes * secc_planes
+        else: raise NotImplementedError()
+        return planes
+
+    def cal_plane(self, img, cond, ret=None, **kwargs):
+        cano_planes = self.cal_cano_plane(img, cond, **kwargs) # [B, 3, C*D, H, W]
+        planes = self.cal_plane_given_cano(cano_planes, cond)
+        return planes, cano_planes
+
+    def sample(self, coordinates, directions, img, cond=None, truncation_psi=1, truncation_cutoff=None, update_emas=False, ref_camera=None, **synthesis_kwargs):
+        # Compute RGB features, density for arbitrary 3D coordinates. Mostly used for extracting shapes. 
+        planes, _ = self.cal_plane(img, cond, ret={}, ref_camera=ref_camera)
+        return self.renderer.run_model(planes, self.decoder, coordinates, directions, self.rendering_kwargs)
+
+    def synthesis(self, img, camera, cond=None, ret=None, update_emas=False, cache_backbone=True, use_cached_backbone=False, **synthesis_kwargs):
+        if ret is None: ret = {}
+        cam2world_matrix = camera[:, :16].view(-1, 4, 4)
+        intrinsics = camera[:, 16:25].view(-1, 3, 3)
+
+        neural_rendering_resolution = self.neural_rendering_resolution
+
+        # Create a batch of rays for volume rendering
+        ray_origins, ray_directions = self.ray_sampler(cam2world_matrix, intrinsics, neural_rendering_resolution)
+
+        # Create triplanes by running StyleGAN backbone
+        N, M, _ = ray_origins.shape
+        if use_cached_backbone:
+            # use the cached cano_planes obtained from a previous forward with flag cache_backbone=True
+            cano_planes = self._last_cano_planes
+            planes = self.cal_plane_given_cano(cano_planes, cond)
+        else:
+            planes, cano_planes = self.cal_plane(img, cond, ret, **synthesis_kwargs)
+        if cache_backbone:
+            self._last_cano_planes = cano_planes
+
+        # Perform volume rendering
+        feature_samples, depth_samples, weights_samples, is_ray_valid = self.renderer(planes, self.decoder, ray_origins, ray_directions, self.rendering_kwargs) # channels last
+
+        # Reshape into 'raw' neural-rendered image
+        H = W = self.neural_rendering_resolution
+        feature_image = feature_samples.permute(0, 2, 1).reshape(N, feature_samples.shape[-1], H, W).contiguous()
+        weights_image = weights_samples.permute(0, 2, 1).reshape(N,1,H,W).contiguous() # [N,1,H,W]
+        depth_image = depth_samples.permute(0, 2, 1).reshape(N, 1, H, W)
+
+        if self.hparams.get("mask_invalid_rays", False):
+            is_ray_valid_mask = is_ray_valid.reshape([feature_samples.shape[0], 1,self.neural_rendering_resolution,self.neural_rendering_resolution]) # [B, 1, H, W]
+            feature_image[~is_ray_valid_mask.repeat([1,feature_image.shape[1],1,1])] = -1
+            # feature_image[~is_ray_valid_mask.repeat([1,feature_image.shape[1],1,1])] *= 0
+            # feature_image[~is_ray_valid_mask.repeat([1,feature_image.shape[1],1,1])] -= 1
+            depth_image[~is_ray_valid_mask] = depth_image[is_ray_valid_mask].min().item()
+
+        # Run superresolution to get final image
+        rgb_image = feature_image[:, :3]
+        ret['weights_img'] = weights_image
+        sr_image = self._forward_sr(rgb_image, feature_image, cond, ret, **synthesis_kwargs)
+        rgb_image = rgb_image.clamp(-1,1)
+        sr_image = sr_image.clamp(-1,1)
+        ret.update({'image_raw': rgb_image, 'image_depth': depth_image, 'image': sr_image, 'image_feature': feature_image[:, 3:], 'plane': planes})
+        return ret
diff --git a/modules/real3d/secc_img2plane_torso.py b/modules/real3d/secc_img2plane_torso.py
new file mode 100644
index 0000000000000000000000000000000000000000..a98f4e3006b857fadfa0e4c5d750fa7ab1e83924
--- /dev/null
+++ b/modules/real3d/secc_img2plane_torso.py
@@ -0,0 +1,74 @@
+import torch
+from modules.real3d.secc_img2plane import OSAvatarSECC_Img2plane
+from modules.real3d.super_resolution.sr_with_ref import SuperresolutionHybrid8XDC_Warp
+from utils.commons.hparams import hparams
+
+
+class OSAvatarSECC_Img2plane_Torso(OSAvatarSECC_Img2plane):
+    def __init__(self, hp=None):
+        super().__init__(hp=hp)
+        del self.superresolution
+        self.superresolution = SuperresolutionHybrid8XDC_Warp(channels=32, img_resolution=self.img_resolution, sr_num_fp16_res=self.sr_num_fp16_res, sr_antialias=True, **self.sr_kwargs)
+    
+    def _forward_sr(self, rgb_image, feature_image, cond, ret, **synthesis_kwargs):
+        hparams = self.hparams
+        ones_ws = torch.ones([feature_image.shape[0], 14, hparams['w_dim']], dtype=feature_image.dtype, device=feature_image.device)
+        sr_image, facev2v_ret = self.superresolution(rgb_image, feature_image, ones_ws, cond['ref_torso_img'], cond['bg_img'], ret['weights_img'], cond['segmap'], cond['kp_s'], cond['kp_d'], cond.get('target_torso_mask'), noise_mode=self.rendering_kwargs['superresolution_noise_mode'], **{k:synthesis_kwargs[k] for k in synthesis_kwargs.keys() if k != 'noise_mode'})
+        ret.update(facev2v_ret)        
+        return sr_image
+
+    def infer_synthesis_stage1(self, img, camera, cond=None, ret=None, update_emas=False, cache_backbone=False, use_cached_backbone=False, **synthesis_kwargs):
+        hparams = self.hparams
+        if ret is None: ret = {}
+        cam2world_matrix = camera[:, :16].view(-1, 4, 4)
+        intrinsics = camera[:, 16:25].view(-1, 3, 3)
+
+        neural_rendering_resolution = self.neural_rendering_resolution
+
+        # Create a batch of rays for volume rendering
+        ray_origins, ray_directions = self.ray_sampler(cam2world_matrix, intrinsics, neural_rendering_resolution)
+
+        # Create triplanes by running StyleGAN backbone
+        N, M, _ = ray_origins.shape
+        if use_cached_backbone and self._last_planes is not None:
+            planes = self._last_planes
+        else:
+            planes = self.cal_plane(img, cond)
+        if cache_backbone:
+            self._last_planes = planes
+        
+        # Reshape output into three 32-channel planes
+        planes = planes.view(len(planes), 3, 32, planes.shape[-2], planes.shape[-1]) # [B, 3, 32, W, H]
+
+        # Perform volume rendering
+        feature_samples, depth_samples, weights_samples, is_ray_valid = self.renderer(planes, self.decoder, ray_origins, ray_directions, self.rendering_kwargs) # channels last
+
+        # Reshape into 'raw' neural-rendered image
+        H = W = self.neural_rendering_resolution
+        feature_image = feature_samples.permute(0, 2, 1).reshape(N, feature_samples.shape[-1], H, W).contiguous()
+        weights_image = weights_samples.permute(0, 2, 1).reshape(N,1,H,W).contiguous() # [N,1,H,W]
+        depth_image = depth_samples.permute(0, 2, 1).reshape(N, 1, H, W)
+
+        if hparams.get("mask_invalid_rays", False):
+            is_ray_valid_mask = is_ray_valid.reshape([feature_samples.shape[0], 1,self.neural_rendering_resolution,self.neural_rendering_resolution]) # [B, 1, H, W]
+            feature_image[~is_ray_valid_mask.repeat([1,feature_image.shape[1],1,1])] = -1
+            # feature_image[~is_ray_valid_mask.repeat([1,feature_image.shape[1],1,1])] *= 0
+            # feature_image[~is_ray_valid_mask.repeat([1,feature_image.shape[1],1,1])] -= 1
+            depth_image[~is_ray_valid_mask] = depth_image[is_ray_valid_mask].min().item()
+
+        # Run superresolution to get final image
+        rgb_image = feature_image[:, :3]
+        ret['weights_img'] = weights_image
+        ones_ws = torch.ones([feature_image.shape[0], 14, hparams['w_dim']], dtype=feature_image.dtype, device=feature_image.device)
+        facev2v_ret = self.superresolution.infer_forward_stage1(rgb_image, feature_image, ones_ws, cond['ref_torso_img'], cond['bg_img'], ret['weights_img'], cond['segmap'], cond['kp_s'], cond['kp_d'], noise_mode=self.rendering_kwargs['superresolution_noise_mode'], **{k:synthesis_kwargs[k] for k in synthesis_kwargs.keys() if k != 'noise_mode'})
+        rgb_image = rgb_image.clamp(-1,1)
+        facev2v_ret.update({'image_raw': rgb_image, 'image_depth': depth_image, 'image_feature': feature_image[:, 3:], 'plane': planes})
+        return facev2v_ret
+    
+    def infer_synthesis_stage2(self, facev2v_ret, **synthesis_kwargs):
+        hparams = self.hparams
+        ret = facev2v_ret
+        sr_image, facev2v_ret = self.superresolution.infer_forward_stage2(facev2v_ret, noise_mode=self.rendering_kwargs['superresolution_noise_mode'], **{k:synthesis_kwargs[k] for k in synthesis_kwargs.keys() if k != 'noise_mode'})
+        sr_image = sr_image.clamp(-1,1)
+        facev2v_ret['image'] = sr_image
+        return ret
\ No newline at end of file
diff --git a/modules/real3d/segformer.py b/modules/real3d/segformer.py
new file mode 100644
index 0000000000000000000000000000000000000000..3087448fa40e1f19e637e0cfe00db95e8fc3b977
--- /dev/null
+++ b/modules/real3d/segformer.py
@@ -0,0 +1,807 @@
+# ---------------------------------------------------------------
+# Copyright (c) 2021, NVIDIA Corporation. All rights reserved.
+#
+# This work is licensed under the NVIDIA Source Code License
+# ---------------------------------------------------------------
+import torch
+import torch.distributed as dist
+import torch.nn as nn
+import torch.nn.functional as F
+from functools import partial
+import warnings
+from einops import rearrange
+
+from timm.models.layers import DropPath, to_2tuple, trunc_normal_
+from timm.models.registry import register_model
+from timm.models.vision_transformer import _cfg
+import math
+from mmcv.cnn import ConvModule
+
+from utils.commons.hparams import hparams
+
+
+def resize(input,
+           size=None,
+           scale_factor=None,
+           mode='nearest',
+           align_corners=None,
+           warning=True):
+    if warning:
+        if size is not None and align_corners:
+            input_h, input_w = tuple(int(x) for x in input.shape[2:])
+            output_h, output_w = tuple(int(x) for x in size)
+            if output_h > input_h or output_w > output_h:
+                if ((output_h > 1 and output_w > 1 and input_h > 1
+                     and input_w > 1) and (output_h - 1) % (input_h - 1)
+                        and (output_w - 1) % (input_w - 1)):
+                    warnings.warn(
+                        f'When align_corners={align_corners}, '
+                        'the output would more aligned if '
+                        f'input size {(input_h, input_w)} is `x+1` and '
+                        f'out size {(output_h, output_w)} is `nx+1`')
+    if isinstance(size, torch.Size):
+        size = tuple(int(x) for x in size)
+    return F.interpolate(input, size, scale_factor, mode, align_corners)
+
+
+class HeadMLP(nn.Module):
+    """
+    Linear Embedding
+    """
+    def __init__(self, input_dim=2048, embed_dim=768):
+        super().__init__()
+        self.proj = nn.Linear(input_dim, embed_dim)
+
+    def forward(self, x):
+        x = x.flatten(2).transpose(1, 2)
+        x = self.proj(x)
+        return x
+
+
+class Mlp(nn.Module):
+    def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.):
+        super().__init__()
+        out_features = out_features or in_features
+        hidden_features = hidden_features or in_features
+        self.fc1 = nn.Linear(in_features, hidden_features)
+        self.dwconv = DWConv(hidden_features)
+        self.act = act_layer()
+        self.fc2 = nn.Linear(hidden_features, out_features)
+        self.drop = nn.Dropout(drop)
+
+        self.apply(self._init_weights)
+
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            trunc_normal_(m.weight, std=.02)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+        elif isinstance(m, nn.LayerNorm):
+            nn.init.constant_(m.bias, 0)
+            nn.init.constant_(m.weight, 1.0)
+        elif isinstance(m, nn.Conv2d):
+            fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+            fan_out //= m.groups
+            m.weight.data.normal_(0, math.sqrt(2.0 / fan_out))
+            if m.bias is not None:
+                m.bias.data.zero_()
+
+    def forward(self, x, H, W):
+        x = self.fc1(x)
+        x = self.dwconv(x, H, W)
+        x = self.act(x)
+        x = self.drop(x)
+        x = self.fc2(x)
+        x = self.drop(x)
+        return x
+
+
+class Attention(nn.Module):
+    def __init__(self, dim, num_heads=8, qkv_bias=False, qk_scale=None, attn_drop=0., proj_drop=0., sr_ratio=1):
+        super().__init__()
+        assert dim % num_heads == 0, f"dim {dim} should be divided by num_heads {num_heads}."
+
+        self.dim = dim
+        self.num_heads = num_heads
+        head_dim = dim // num_heads
+        self.scale = qk_scale or head_dim ** -0.5
+
+        self.q = nn.Linear(dim, dim, bias=qkv_bias)
+        self.kv = nn.Linear(dim, dim * 2, bias=qkv_bias)
+        self.attn_drop = nn.Dropout(attn_drop)
+        self.proj = nn.Linear(dim, dim)
+        self.proj_drop = nn.Dropout(proj_drop)
+
+        self.sr_ratio = sr_ratio
+        if sr_ratio > 1:
+            self.sr = nn.Conv2d(dim, dim, kernel_size=sr_ratio, stride=sr_ratio)
+            self.norm = nn.LayerNorm(dim)
+
+        self.apply(self._init_weights)
+
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            trunc_normal_(m.weight, std=.02)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+        elif isinstance(m, nn.LayerNorm):
+            nn.init.constant_(m.bias, 0)
+            nn.init.constant_(m.weight, 1.0)
+        elif isinstance(m, nn.Conv2d):
+            fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+            fan_out //= m.groups
+            m.weight.data.normal_(0, math.sqrt(2.0 / fan_out))
+            if m.bias is not None:
+                m.bias.data.zero_()
+
+    def forward(self, x, H, W):
+        B, N, C = x.shape
+        q = self.q(x).reshape(B, N, self.num_heads, C // self.num_heads).permute(0, 2, 1, 3)
+
+        if self.sr_ratio > 1:
+            x_ = x.permute(0, 2, 1).reshape(B, C, H, W)
+            x_ = self.sr(x_).reshape(B, C, -1).permute(0, 2, 1)
+            x_ = self.norm(x_)
+            kv = self.kv(x_).reshape(B, -1, 2, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
+        else:
+            kv = self.kv(x).reshape(B, -1, 2, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
+        k, v = kv[0], kv[1]
+
+        attn = (q @ k.transpose(-2, -1)) * self.scale
+        attn = attn.softmax(dim=-1)
+        attn = self.attn_drop(attn)
+
+        x = (attn @ v).transpose(1, 2).reshape(B, N, C)
+        x = self.proj(x)
+        x = self.proj_drop(x)
+
+        return x
+
+
+class Block(nn.Module):
+
+    def __init__(self, dim, num_heads, mlp_ratio=4., qkv_bias=False, qk_scale=None, drop=0., attn_drop=0.,
+                 drop_path=0., act_layer=nn.GELU, norm_layer=nn.LayerNorm, sr_ratio=1):
+        super().__init__()
+        self.norm1 = norm_layer(dim)
+        self.attn = Attention(
+            dim,
+            num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale,
+            attn_drop=attn_drop, proj_drop=drop, sr_ratio=sr_ratio)
+        # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here
+        self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
+        self.norm2 = norm_layer(dim)
+        mlp_hidden_dim = int(dim * mlp_ratio)
+        self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop)
+
+        self.apply(self._init_weights)
+
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            trunc_normal_(m.weight, std=.02)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+        elif isinstance(m, nn.LayerNorm):
+            nn.init.constant_(m.bias, 0)
+            nn.init.constant_(m.weight, 1.0)
+        elif isinstance(m, nn.Conv2d):
+            fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+            fan_out //= m.groups
+            m.weight.data.normal_(0, math.sqrt(2.0 / fan_out))
+            if m.bias is not None:
+                m.bias.data.zero_()
+
+    def forward(self, x, H, W):
+        x = x + self.drop_path(self.attn(self.norm1(x), H, W))
+        x = x + self.drop_path(self.mlp(self.norm2(x), H, W))
+
+        return x
+
+
+class OverlapPatchEmbed(nn.Module):
+    """ Image to Patch Embedding
+    """
+
+    def __init__(self, img_size=224, patch_size=7, stride=4, in_chans=3, embed_dim=768):
+        super().__init__()
+        img_size = to_2tuple(img_size)
+        patch_size = to_2tuple(patch_size)
+
+        self.img_size = img_size
+        self.patch_size = patch_size
+        self.H, self.W = img_size[0] // patch_size[0], img_size[1] // patch_size[1]
+        self.num_patches = self.H * self.W
+        self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=stride,
+                              padding=(patch_size[0] // 2, patch_size[1] // 2))
+        self.norm = nn.LayerNorm(embed_dim)
+
+        self.apply(self._init_weights)
+
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            trunc_normal_(m.weight, std=.02)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+        elif isinstance(m, nn.LayerNorm):
+            nn.init.constant_(m.bias, 0)
+            nn.init.constant_(m.weight, 1.0)
+        elif isinstance(m, nn.Conv2d):
+            fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+            fan_out //= m.groups
+            m.weight.data.normal_(0, math.sqrt(2.0 / fan_out))
+            if m.bias is not None:
+                m.bias.data.zero_()
+
+    def forward(self, x):
+        x = self.proj(x)
+        _, _, H, W = x.shape
+        x = x.flatten(2).transpose(1, 2)
+        x = self.norm(x)
+
+        return x, H, W
+
+
+class MixVisionTransformer(nn.Module):
+    def __init__(self, img_size=224, patch_size=16, in_chans=3, num_classes=1000, embed_dims=[64, 128, 256, 512],
+                 num_heads=[1, 2, 4, 8], mlp_ratios=[4, 4, 4, 4], qkv_bias=False, qk_scale=None, drop_rate=0.,
+                 attn_drop_rate=0., drop_path_rate=0., norm_layer=nn.LayerNorm,
+                 depths=[3, 4, 6, 3], sr_ratios=[8, 4, 2, 1]):
+        super().__init__()
+        self.num_classes = num_classes
+        self.depths = depths
+
+        # patch_embed
+        self.patch_embed1 = OverlapPatchEmbed(img_size=img_size, patch_size=7, stride=4, in_chans=in_chans,
+                                              embed_dim=embed_dims[0])
+        self.patch_embed2 = OverlapPatchEmbed(img_size=img_size // 4, patch_size=3, stride=2, in_chans=embed_dims[0],
+                                              embed_dim=embed_dims[1])
+        self.patch_embed3 = OverlapPatchEmbed(img_size=img_size // 8, patch_size=3, stride=2, in_chans=embed_dims[1],
+                                              embed_dim=embed_dims[2])
+        self.patch_embed4 = OverlapPatchEmbed(img_size=img_size // 16, patch_size=3, stride=2, in_chans=embed_dims[2],
+                                              embed_dim=embed_dims[3])
+
+        # transformer encoder
+        dpr = [x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))]  # stochastic depth decay rule
+        cur = 0
+        self.block1 = nn.ModuleList([Block(
+            dim=embed_dims[0], num_heads=num_heads[0], mlp_ratio=mlp_ratios[0], qkv_bias=qkv_bias, qk_scale=qk_scale,
+            drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[cur + i], norm_layer=norm_layer,
+            sr_ratio=sr_ratios[0])
+            for i in range(depths[0])])
+        self.norm1 = norm_layer(embed_dims[0])
+
+        cur += depths[0]
+        self.block2 = nn.ModuleList([Block(
+            dim=embed_dims[1], num_heads=num_heads[1], mlp_ratio=mlp_ratios[1], qkv_bias=qkv_bias, qk_scale=qk_scale,
+            drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[cur + i], norm_layer=norm_layer,
+            sr_ratio=sr_ratios[1])
+            for i in range(depths[1])])
+        self.norm2 = norm_layer(embed_dims[1])
+
+        cur += depths[1]
+        self.block3 = nn.ModuleList([Block(
+            dim=embed_dims[2], num_heads=num_heads[2], mlp_ratio=mlp_ratios[2], qkv_bias=qkv_bias, qk_scale=qk_scale,
+            drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[cur + i], norm_layer=norm_layer,
+            sr_ratio=sr_ratios[2])
+            for i in range(depths[2])])
+        self.norm3 = norm_layer(embed_dims[2])
+
+        cur += depths[2]
+        self.block4 = nn.ModuleList([Block(
+            dim=embed_dims[3], num_heads=num_heads[3], mlp_ratio=mlp_ratios[3], qkv_bias=qkv_bias, qk_scale=qk_scale,
+            drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[cur + i], norm_layer=norm_layer,
+            sr_ratio=sr_ratios[3])
+            for i in range(depths[3])])
+        self.norm4 = norm_layer(embed_dims[3])
+
+        # classification head
+        # self.head = nn.Linear(embed_dims[3], num_classes) if num_classes > 0 else nn.Identity()
+
+        self.apply(self._init_weights)
+
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            trunc_normal_(m.weight, std=.02)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+        elif isinstance(m, nn.LayerNorm):
+            nn.init.constant_(m.bias, 0)
+            nn.init.constant_(m.weight, 1.0)
+        elif isinstance(m, nn.Conv2d):
+            fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+            fan_out //= m.groups
+            m.weight.data.normal_(0, math.sqrt(2.0 / fan_out))
+            if m.bias is not None:
+                m.bias.data.zero_()
+
+    def reset_drop_path(self, drop_path_rate):
+        dpr = [x.item() for x in torch.linspace(0, drop_path_rate, sum(self.depths))]
+        cur = 0
+        for i in range(self.depths[0]):
+            self.block1[i].drop_path.drop_prob = dpr[cur + i]
+
+        cur += self.depths[0]
+        for i in range(self.depths[1]):
+            self.block2[i].drop_path.drop_prob = dpr[cur + i]
+
+        cur += self.depths[1]
+        for i in range(self.depths[2]):
+            self.block3[i].drop_path.drop_prob = dpr[cur + i]
+
+        cur += self.depths[2]
+        for i in range(self.depths[3]):
+            self.block4[i].drop_path.drop_prob = dpr[cur + i]
+
+    def freeze_patch_emb(self):
+        self.patch_embed1.requires_grad = False
+
+    @torch.jit.ignore
+    def no_weight_decay(self):
+        return {'pos_embed1', 'pos_embed2', 'pos_embed3', 'pos_embed4', 'cls_token'}  # has pos_embed may be better
+
+    def get_classifier(self):
+        return self.head
+
+    def reset_classifier(self, num_classes, global_pool=''):
+        self.num_classes = num_classes
+        self.head = nn.Linear(self.embed_dim, num_classes) if num_classes > 0 else nn.Identity()
+
+    def forward_features(self, x):
+        B = x.shape[0]
+        outs = []
+
+        # stage 1
+        x, H, W = self.patch_embed1(x)
+        for i, blk in enumerate(self.block1):
+            x = blk(x, H, W)
+        x = self.norm1(x)
+        x = x.reshape(B, H, W, -1).permute(0, 3, 1, 2).contiguous()
+        outs.append(x)
+
+        # stage 2
+        x, H, W = self.patch_embed2(x)
+        for i, blk in enumerate(self.block2):
+            x = blk(x, H, W)
+        x = self.norm2(x)
+        x = x.reshape(B, H, W, -1).permute(0, 3, 1, 2).contiguous()
+        outs.append(x)
+
+        # stage 3
+        x, H, W = self.patch_embed3(x)
+        for i, blk in enumerate(self.block3):
+            x = blk(x, H, W)
+        x = self.norm3(x)
+        x = x.reshape(B, H, W, -1).permute(0, 3, 1, 2).contiguous()
+        outs.append(x)
+
+        # stage 4
+        x, H, W = self.patch_embed4(x)
+        for i, blk in enumerate(self.block4):
+            x = blk(x, H, W)
+        x = self.norm4(x)
+        x = x.reshape(B, H, W, -1).permute(0, 3, 1, 2).contiguous()
+        outs.append(x)
+
+        return outs
+
+    def forward(self, x):
+        x = self.forward_features(x)
+        # x = self.head(x)
+
+        return x
+
+
+class DWConv(nn.Module):
+    def __init__(self, dim=768):
+        super(DWConv, self).__init__()
+        self.dwconv = nn.Conv2d(dim, dim, 3, 1, 1, bias=True, groups=dim)
+
+    def forward(self, x, H, W):
+        B, N, C = x.shape
+        x = x.transpose(1, 2).view(B, C, H, W)
+        x = self.dwconv(x)
+        x = x.flatten(2).transpose(1, 2)
+        return x
+
+
+class mit_b0(MixVisionTransformer): # 3.319M
+    def __init__(self, **kwargs):
+        super(mit_b0, self).__init__(
+            patch_size=4, embed_dims=[32, 64, 160, 256], num_heads=[1, 2, 5, 8], mlp_ratios=[4, 4, 4, 4],
+            qkv_bias=True, norm_layer=partial(nn.LayerNorm, eps=1e-6), depths=[2, 2, 2, 2], sr_ratios=[8, 4, 2, 1],
+            drop_rate=0.0, drop_path_rate=0.1)
+        self.load_state_dict(torch.load('checkpoints/pretrained_ckpts/mit_b0.pth'), strict=False)
+
+
+class mit_b1(MixVisionTransformer): # 13.151M 
+    def __init__(self, **kwargs):
+        super(mit_b1, self).__init__(
+            patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8], mlp_ratios=[4, 4, 4, 4],
+            qkv_bias=True, norm_layer=partial(nn.LayerNorm, eps=1e-6), depths=[2, 2, 2, 2], sr_ratios=[8, 4, 2, 1],
+            drop_rate=0.0, drop_path_rate=0.1)
+        self.load_state_dict(torch.load('checkpoints/pretrained_ckpts/mit_b1.pth'), strict=False)
+
+
+
+class mit_b2(MixVisionTransformer): # 24.196M
+    def __init__(self, **kwargs):
+        super(mit_b2, self).__init__(
+            patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8], mlp_ratios=[4, 4, 4, 4],
+            qkv_bias=True, norm_layer=partial(nn.LayerNorm, eps=1e-6), depths=[3, 4, 6, 3], sr_ratios=[8, 4, 2, 1],
+            drop_rate=0.0, drop_path_rate=0.1)
+        self.load_state_dict(torch.load('checkpoints/pretrained_ckpts/mit_b2.pth'), strict=False)
+
+
+
+class mit_b3(MixVisionTransformer): # 44.072M
+    def __init__(self, **kwargs):
+        super(mit_b3, self).__init__(
+            patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8], mlp_ratios=[4, 4, 4, 4],
+            qkv_bias=True, norm_layer=partial(nn.LayerNorm, eps=1e-6), depths=[3, 4, 18, 3], sr_ratios=[8, 4, 2, 1],
+            drop_rate=0.0, drop_path_rate=0.1)
+        self.load_state_dict(torch.load('checkpoints/pretrained_ckpts/mit_b3.pth'), strict=False)
+
+
+class mit_b4(MixVisionTransformer): # 60.843M
+    def __init__(self, **kwargs):
+        super(mit_b4, self).__init__(
+            patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8], mlp_ratios=[4, 4, 4, 4],
+            qkv_bias=True, norm_layer=partial(nn.LayerNorm, eps=1e-6), depths=[3, 8, 27, 3], sr_ratios=[8, 4, 2, 1],
+            drop_rate=0.0, drop_path_rate=0.1)
+        self.load_state_dict(torch.load('checkpoints/pretrained_ckpts/mit_b4.pth'), strict=False)
+
+
+class mit_b5(MixVisionTransformer): # 81.443M
+    def __init__(self, **kwargs):
+        super(mit_b5, self).__init__(
+            patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8], mlp_ratios=[4, 4, 4, 4],
+            qkv_bias=True, norm_layer=partial(nn.LayerNorm, eps=1e-6), depths=[3, 6, 40, 3], sr_ratios=[8, 4, 2, 1],
+            drop_rate=0.0, drop_path_rate=0.1)
+        self.load_state_dict(torch.load('checkpoints/pretrained_ckpts/mit_b5.pth'), strict=False)
+    
+
+class SegFormerHead(nn.Module):
+    """
+    SegFormer: Simple and Efficient Design for Semantic Segmentation with Transformers
+    """
+    def __init__(self, segformer_scale='b3'):
+        super().__init__()
+        self.segformer_scale = segformer_scale
+        
+        self.in_channels = [64, 128, 320, 512] if self.segformer_scale != 'b0' else [32, 64, 160, 256]
+        self.feature_strides = [4, 8, 16, 32]
+        self.in_index = [0, 1, 2, 3]
+        self.input_transform='multiple_select'
+        self.dropout = nn.Dropout2d(0.1)
+        c1_in_channels, c2_in_channels, c3_in_channels, c4_in_channels = self.in_channels
+        embedding_dim = self.embedding_dim = 256
+        self.linear_c4 = HeadMLP(input_dim=c4_in_channels, embed_dim=embedding_dim)
+        self.linear_c3 = HeadMLP(input_dim=c3_in_channels, embed_dim=embedding_dim)
+        self.linear_c2 = HeadMLP(input_dim=c2_in_channels, embed_dim=embedding_dim)
+        self.linear_c1 = HeadMLP(input_dim=c1_in_channels, embed_dim=embedding_dim)
+
+        if dist.is_initialized():
+            self.linear_fuse = ConvModule(
+                in_channels=embedding_dim*4,
+                out_channels=embedding_dim,
+                kernel_size=1,
+                norm_cfg=dict(type='SyncBN', requires_grad=True)
+            )
+        else:
+            self.linear_fuse = ConvModule(
+                in_channels=embedding_dim*4,
+                out_channels=embedding_dim,
+                kernel_size=1,
+                norm_cfg=dict(type='BN', requires_grad=True)
+            )
+
+    def _transform_inputs(self, inputs):
+        """Transform inputs for decoder.
+
+        Args:
+            inputs (list[Tensor]): List of multi-level img features.
+
+        Returns:
+            Tensor: The transformed inputs
+        """
+
+        if self.input_transform == 'multiple_select':
+            inputs = [inputs[i] for i in self.in_index]
+        else:
+            inputs = inputs[self.in_index]
+
+        return inputs
+    
+    def forward(self, inputs):
+        x = self._transform_inputs(inputs)  # len=4, 1/4,1/8,1/16,1/32
+        c1, c2, c3, c4 = x
+
+        ############## MLP decoder on C1-C4 ###########
+        n, _, h, w = c4.shape
+
+        _c4 = self.linear_c4(c4).permute(0,2,1).reshape(n, -1, c4.shape[2], c4.shape[3])
+        _c4 = resize(_c4, size=c1.size()[2:],mode='bilinear',align_corners=False)
+
+        _c3 = self.linear_c3(c3).permute(0,2,1).reshape(n, -1, c3.shape[2], c3.shape[3])
+        _c3 = resize(_c3, size=c1.size()[2:],mode='bilinear',align_corners=False)
+
+        _c2 = self.linear_c2(c2).permute(0,2,1).reshape(n, -1, c2.shape[2], c2.shape[3])
+        _c2 = resize(_c2, size=c1.size()[2:],mode='bilinear',align_corners=False)
+
+        _c1 = self.linear_c1(c1).permute(0,2,1).reshape(n, -1, c1.shape[2], c1.shape[3])
+
+        _c = self.linear_fuse(torch.cat([_c4, _c3, _c2, _c1], dim=1))
+
+        x = self.dropout(_c)
+
+        return x
+
+
+# from modules.hidenerf.models.networks_stylegan2 import Conv2dLayer
+from modules.eg3ds.models.networks_stylegan2 import Conv2dLayer
+class conv(nn.Module):
+    def __init__(self, num_in_layers, num_out_layers, kernel_size, up=1, down=1):
+        super(conv, self).__init__()
+        self.conv = Conv2dLayer(num_in_layers, num_out_layers, kernel_size, activation='elu', up=up, down=down)
+        self.bn = nn.InstanceNorm2d(
+            num_out_layers, track_running_stats=False, affine=True
+        )
+
+    def forward(self, x):
+        return self.bn(self.conv(x))
+
+
+class SegFormerImg2PlaneBackbone(nn.Module):
+    def __init__(self, mode='b3'):
+        super().__init__()
+        mode2cls = {
+            'b0': mit_b0,
+            'b1': mit_b1,
+            'b2': mit_b2,
+            'b3': mit_b3,
+            'b4': mit_b4,
+            'b5': mit_b5,
+        }
+        self.mode = mode
+        self.mix_vit = mode2cls[mode]()
+        self.fuse_head = SegFormerHead(mode)
+
+        self.to_plane_cnn = nn.Sequential(*[
+            nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1),
+            nn.LeakyReLU(negative_slope=0.01, inplace=True),
+            nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1),
+            nn.LeakyReLU(negative_slope=0.01, inplace=True),
+            nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1),
+            nn.LeakyReLU(negative_slope=0.01, inplace=True),
+            nn.UpsamplingBilinear2d(scale_factor=2.),
+            nn.Conv2d(in_channels=256, out_channels=96, kernel_size=3, stride=1, padding=1),
+        ])
+
+    def forward(self, x):
+        """
+        x: [B, 3, H=512, W=512]
+        return:
+            plane: [B, 96, H=256, W=256]
+        """
+
+        feats = self.mix_vit(x)
+        fused_feat = self.fuse_head(feats)
+
+        planes = self.to_plane_cnn(fused_feat)
+
+        planes = planes.view(len(planes), 3, -1, planes.shape[-2], planes.shape[-1])
+        planes_xy = planes[:,0]
+        planes_xy = torch.flip(planes_xy, [2])
+        planes_xz = planes[:,1]
+        planes_xz = torch.flip(planes_xz, [2])
+        planes_zy = planes[:,2]
+        planes_zy = torch.flip(planes_zy, [2, 3])
+        planes = torch.stack([planes_xy, planes_xz, planes_zy], dim=1) # [N, 3, C, H, W]
+        
+        return planes
+
+
+class TemporalAttNet(nn.Module):
+    """
+    Used to smooth the secc_plane with a window input
+    """
+    def __init__(self, in_dim=96, seq_len=5):
+        super().__init__()
+        self.seq_len = seq_len
+        self.conv2d_layers = nn.Sequential(*[
+            # [B, C=96, T, H=224, W=224] ==> [B, 64, T, 112, 112]
+            nn.Conv3d(in_dim, 64, kernel_size=(1, 3, 3), stride=1, padding=(0, 1, 1)),
+            nn.LeakyReLU(0.02, True),
+            nn.Conv3d(64, 64, kernel_size=(1, 3, 3), stride=1, padding=(0, 1, 1)),
+            nn.LeakyReLU(0.02, True),
+            nn.AvgPool3d(kernel_size=(1, 2, 2), stride=(1,2,2), count_include_pad=False),
+            # [B, C=64, T, H=112, W=112] ==> [B, 32, T, 56, 56]
+            nn.Conv3d(64, 32, kernel_size=(1, 3, 3), stride=1, padding=(0, 1, 1)),
+            nn.LeakyReLU(0.02, True),
+            nn.Conv3d(32, 32, kernel_size=(1, 3, 3), stride=1, padding=(0, 1, 1)),
+            nn.LeakyReLU(0.02, True),
+            nn.AvgPool3d(kernel_size=(1, 2, 2), stride=(1,2,2), count_include_pad=False),
+            # [B, C=32, T, H=56, W=56] ==> [B, 16, T, 28, 28]
+            nn.Conv3d(32, 16, kernel_size=(1, 3, 3), stride=1, padding=(0, 1, 1)),
+            nn.LeakyReLU(0.02, True),
+            nn.Conv3d(16, 16, kernel_size=(1, 3, 3), stride=1, padding=(0, 1, 1)),
+            nn.LeakyReLU(0.02, True),
+            nn.AvgPool3d(kernel_size=(1, 2, 2), stride=(1,2,2), count_include_pad=False),
+        ])
+
+        self.conv3d_layers = nn.Sequential(*[
+            # [B, C=16, T, H=28, W=28] ==> [B, 8, T, 14, 14]
+            nn.Conv3d(16, 8, kernel_size=3, stride=1, padding=1),
+            nn.LeakyReLU(0.02, True),
+            nn.AvgPool3d(kernel_size=(1, 2, 2), stride=(1,2,2), count_include_pad=False),
+            # [B, C=8, T, H=14, W=14] ==> [B, 8, T, 7, 7]
+            nn.Conv3d(8, 8, kernel_size=3, stride=1, padding=1),
+            nn.LeakyReLU(0.02, True),
+            nn.AvgPool3d(kernel_size=(1, 2, 2), stride=(1,2,2), count_include_pad=False),
+            # [B, C=8, T, H=7, W=7] ==> [B, 4, T, 1, 1]
+            nn.Conv3d(8, 4, kernel_size=3, stride=1, padding=1),
+            nn.LeakyReLU(0.02, True),
+            nn.Conv3d(4, 2, kernel_size=3, stride=1, padding=1),
+            nn.LeakyReLU(0.02, True),
+            nn.Conv3d(2, 1, kernel_size=3, stride=1, padding=1),
+            nn.LeakyReLU(0.02, True),
+            nn.AvgPool3d(kernel_size=(1, 7, 7), stride=1, count_include_pad=False),
+        ])
+
+        self.to_attention_weights = nn.Sequential(
+            nn.Linear(in_features=self.seq_len, out_features=self.seq_len, bias=True),
+            nn.Softmax(dim=1)
+        )
+
+    def forward(self, x):
+        """
+        x: [B, C, T, H, W]
+        y: [B, T] attention weights
+        out: [B, C, H, W]
+        """
+        b,c,t,h,w = x.shape
+        y = F.interpolate(x, size=(t, 224, 224), mode='trilinear')
+        y = self.conv2d_layers(y) # [B, 16, 5, 28, 28]
+        y = self.conv3d_layers(y) # [B, 1, T, 1, 1]
+        y = y.squeeze(1, 3, 4) # [B, T]
+        assert y.ndim == 2
+        y = y.reshape([b, 1, t, 1, 1])
+        out = (y * x).sum(dim=2)
+        return out
+
+
+class SegFormerSECC2PlaneBackbone(nn.Module):
+    def __init__(self, mode='b0', out_channels=96, pncc_cond_mode='cano_src_tgt'):
+        super().__init__()
+        mode2cls = {
+            'b0': mit_b0,
+            'b1': mit_b1,
+            'b2': mit_b2,
+            'b3': mit_b3,
+            'b4': mit_b4,
+            'b5': mit_b5,
+        }
+        self.mode = mode
+        self.pncc_cond_mode = pncc_cond_mode
+        in_dim = 9 if pncc_cond_mode == 'cano_src_tgt' else 6
+        self.prenet = Conv2dLayer(in_dim, 3, 1)
+        self.mix_vit = mode2cls[mode]()
+        self.fuse_head = SegFormerHead(mode)
+        self.to_plane_cnn = nn.Sequential(*[
+            nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1),
+            nn.LeakyReLU(negative_slope=0.01, inplace=True),
+            nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1),
+            nn.LeakyReLU(negative_slope=0.01, inplace=True),
+            nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1),
+            nn.LeakyReLU(negative_slope=0.01, inplace=True),
+            nn.UpsamplingBilinear2d(scale_factor=2.),
+            nn.Conv2d(in_channels=256, out_channels=out_channels, kernel_size=3, stride=1, padding=1),
+        ])
+        # if hparams['use_motion_smo_net']:
+            # self.motion_smo_win_size = hparams['motion_smo_win_size']
+            # self.smo_net = TemporalAttNet(in_dim=out_channels, seq_len=hparams['motion_smo_win_size'])
+
+    def forward(self, x):
+        """
+        x: [B, 3, H=512, W=512] or [B, 3, T, H, W]
+        return:
+            plane: [B, 96, H=256, W=256]
+        """
+        # if hparams['use_motion_smo_net']:
+            # assert x.ndim == 5
+            # x = rearrange(x, "n c t h w -> (n t) c h w", t=self.motion_smo_win_size)
+        x = self.prenet(x)
+        feats = self.mix_vit(x)
+        fused_feat = self.fuse_head(feats)
+        planes = self.to_plane_cnn(fused_feat)
+
+        # if hparams['use_motion_smo_net']:
+            # planes = rearrange(planes, "(n t) c h w -> n c t h w", t=self.motion_smo_win_size)
+            # planes = self.smo_net(planes)
+
+        planes = planes.view(len(planes), 3, -1, planes.shape[-2], planes.shape[-1])
+        planes_xy = planes[:,0]
+        planes_xy = torch.flip(planes_xy, [2])
+        planes_xz = planes[:,1]
+        planes_xz = torch.flip(planes_xz, [2])
+        planes_zy = planes[:,2]
+        planes_zy = torch.flip(planes_zy, [2, 3])
+        planes = torch.stack([planes_xy, planes_xz, planes_zy], dim=1) # [N, 3, C, H, W]
+        
+        return planes
+
+
+# from modules.hidenerf.new_modules.texture2plane_parser import Texture2PlaneParser
+# class SegFormerTexture2PlaneBackbone(nn.Module):
+#     def __init__(self, mode='b1'):
+#         super().__init__()
+#         mode2cls = {
+#             'b0': mit_b0,
+#             'b1': mit_b1,
+#             'b2': mit_b2,
+#             'b3': mit_b3,
+#             'b4': mit_b4,
+#             'b5': mit_b5,
+#         }
+#         self.mode = mode
+#         self.prenet = Conv2dLayer(5, 3, 1)
+#         self.tex2plane_parser = Texture2PlaneParser()
+#         self.mix_vit = mode2cls[mode]()
+#         self.fuse_head = SegFormerHead(mode)
+
+#         if hparams.get("new_tex_mode", False) is True:
+#             self.to_plane_cnn1 = nn.Sequential(*[
+#                 nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1),
+#                 nn.LeakyReLU(negative_slope=0.01, inplace=True),
+#                 nn.UpsamplingBilinear2d(scale_factor=2.),
+#             ])
+#             self.to_plane_cnn2 = nn.Sequential(*[
+#                 nn.Conv2d(in_channels=256*3, out_channels=256, kernel_size=3, stride=1, padding=1),
+#                 nn.LeakyReLU(negative_slope=0.01, inplace=True),
+#                 nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1),
+#                 nn.LeakyReLU(negative_slope=0.01, inplace=True),
+#                 nn.Conv2d(in_channels=256, out_channels=96, kernel_size=3, stride=1, padding=1)
+#             ])
+#         else:
+#             self.to_plane_cnn = nn.Sequential(*[
+#                 nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1),
+#                 nn.LeakyReLU(negative_slope=0.01, inplace=True),
+#                 nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1),
+#                 nn.LeakyReLU(negative_slope=0.01, inplace=True),
+#                 nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1),
+#                 nn.LeakyReLU(negative_slope=0.01, inplace=True),
+#                 nn.UpsamplingBilinear2d(scale_factor=2.),
+#                 nn.Conv2d(in_channels=256, out_channels=32, kernel_size=3, stride=1, padding=1),
+#             ])
+        
+#     def forward(self, x, idx_pixel_to_plane):
+#         """
+#         x: [B, 3, H=512, W=512]
+#         return:
+#             plane: [B, 96, H=256, W=256]
+#         """
+#         feats = self.mix_vit(x)
+#         fused_feat = self.fuse_head(feats) # [B, 256, 128, 128]
+#         if hparams.get("new_tex_mode", False) is True:
+#             fused_feat = self.to_plane_cnn1(fused_feat) # [B, 96, 256, 256]
+#             fused_feat = fused_feat.unsqueeze(1).repeat([1, 3, 1, 1, 1]) # [B, 3, 96, 256, 256]
+#             tex_plane = self.tex2plane_parser(fused_feat, idx_pixel_to_plane) # [B, 3, 96, 256, 256]
+#             tex_plane = rearrange(tex_plane, "n k c h w -> n (k c) h w") # [B, 3*96, 256, 256]
+#             tex_plane = self.to_plane_cnn2(tex_plane) # [B, 96, 256, 256]
+#             tex_plane = rearrange(tex_plane, "n (k c) h w -> n k c h w", k=3, c=32) # [B, 3*96, 256, 256]
+#         else:
+#             fused_feat = self.to_plane_cnn(fused_feat) # [B, 32, 256, 256]
+#             fused_feat = fused_feat.unsqueeze(1).repeat([1, 3, 1, 1, 1]) # [B, 3, 32, 256, 256]
+#             tex_plane = self.tex2plane_parser(fused_feat, idx_pixel_to_plane) # [B, 3, 32, 256, 256]
+#         return tex_plane
+    
+
+if __name__ == '__main__':
+    import tqdm
+    img2plane = SegFormerTexture2PlaneBackbone()
+    img2plane.cuda()
+    x = torch.randn([4, 3, 512, 512]).cuda()
+    idx = torch.randint(low=0, high=128*128, size=[4, 3, 256*256]).cuda()
+    for _ in tqdm.trange(100):
+        y = img2plane(x, idx)
+    print(" ")
\ No newline at end of file
diff --git a/modules/real3d/super_resolution/sr_with_ref.py b/modules/real3d/super_resolution/sr_with_ref.py
new file mode 100644
index 0000000000000000000000000000000000000000..914653e24f7205ceb8f7c1e363e97f9a5e86e1f8
--- /dev/null
+++ b/modules/real3d/super_resolution/sr_with_ref.py
@@ -0,0 +1,230 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.nn.utils.spectral_norm as spectral_norm
+from modules.eg3ds.models.networks_stylegan2 import SynthesisBlock
+from modules.eg3ds.models.superresolution import SynthesisBlockNoUp
+
+from modules.eg3ds.models.superresolution import SuperresolutionHybrid8XDC
+from modules.real3d.facev2v_warp.model import WarpBasedTorsoModelMediaPipe as torso_model_v1
+from modules.real3d.facev2v_warp.model2 import WarpBasedTorsoModelMediaPipe as torso_model_v2
+
+from utils.commons.hparams import hparams
+from utils.commons.image_utils import dilate, erode
+
+
+class SuperresolutionHybrid8XDC_Warp(SuperresolutionHybrid8XDC):
+    def __init__(self, channels, img_resolution, sr_num_fp16_res, sr_antialias, **block_kwargs):
+        super().__init__(channels, img_resolution, sr_num_fp16_res, sr_antialias, **block_kwargs)
+        if hparams.get("torso_model_version", "v1") == 'v1':
+            self.torso_model = torso_model_v1('standard')
+        elif hparams.get("torso_model_version", "v1") == 'v2':
+            self.torso_model = torso_model_v2('standard')
+        else: raise NotImplementedError()
+        # self.torso_model = WarpBasedTorsoModelMediaPipe('small')
+        self.torso_encoder = nn.Sequential(*[
+            nn.Conv2d(64, 256, 1, 1, padding=0),
+        ])
+        self.bg_encoder = nn.Sequential(*[
+            nn.Conv2d(3, 64, 3, 1, padding=1),
+            nn.LeakyReLU(),
+            nn.Conv2d(64, 256, 3, 1, padding=1),
+            nn.LeakyReLU(),
+            nn.Conv2d(256, 256, 3, 1, padding=1),
+        ])
+
+        if hparams.get("weight_fuse", True):
+            if hparams['htbsr_head_weight_fuse_mode'] in ['v1']:
+                fuse_in_dim = 512
+            # elif hparams['htbsr_head_weight_fuse_mode'] in ['v2']:
+            else:
+                fuse_in_dim = 512
+                self.head_torso_alpha_predictor = nn.Sequential(*[
+                    nn.Conv2d(3+1+3, 32, 3, 1, padding=1),
+                    nn.LeakyReLU(),
+                    nn.Conv2d(32, 32, 3, 1, padding=1),
+                    nn.LeakyReLU(),
+                    nn.Conv2d(32, 1, 3, 1, padding=1),
+                    nn.Sigmoid(),
+                ])
+                self.fuse_head_torso_convs = nn.Sequential(*[
+                    nn.Conv2d(256+256, 256, 3, 1, padding=1),
+                    nn.LeakyReLU(),
+                    nn.Conv2d(256, 256, 3, 1, padding=1),
+                ])
+                self.head_torso_block = SynthesisBlockNoUp(256, 256, w_dim=512, resolution=256,
+                    img_channels=3, is_last=False, use_fp16=False, conv_clamp=None, **block_kwargs)
+        else:
+            fuse_in_dim = 768
+        self.fuse_fg_bg_convs = nn.Sequential(*[
+            nn.Conv2d(fuse_in_dim, 64, 1, 1, padding=0),
+            nn.LeakyReLU(),
+            nn.Conv2d(64, 256, 3, 1, padding=1),
+            nn.LeakyReLU(),
+            nn.Conv2d(256, 256, 3, 1, padding=1),
+        ])
+
+    def forward(self, rgb, x, ws, ref_torso_rgb, ref_bg_rgb, weights_img, segmap, kp_s, kp_d, target_torso_mask=None, **block_kwargs):
+        weights_img = weights_img.detach()
+        ws = ws[:, -1:, :].expand([rgb.shape[0], 3, -1])
+        
+        if x.shape[-1] != self.input_resolution:
+            x = torch.nn.functional.interpolate(x, size=(self.input_resolution, self.input_resolution),
+                                                  mode='bilinear', align_corners=False, antialias=self.sr_antialias)
+            rgb = torch.nn.functional.interpolate(rgb, size=(self.input_resolution, self.input_resolution),
+                                                  mode='bilinear', align_corners=False, antialias=self.sr_antialias)
+        
+        rgb_256 = torch.nn.functional.interpolate(rgb, size=(256, 256), mode='bilinear', align_corners=False, antialias=self.sr_antialias)
+        weights_256 = torch.nn.functional.interpolate(weights_img, size=(256, 256), mode='bilinear', align_corners=False, antialias=self.sr_antialias)
+        
+        ref_torso_rgb_256 = torch.nn.functional.interpolate(ref_torso_rgb, size=(256, 256), mode='bilinear', align_corners=False, antialias=self.sr_antialias)
+        
+        ref_bg_rgb_256 = torch.nn.functional.interpolate(ref_bg_rgb, size=(256, 256), mode='bilinear', align_corners=False, antialias=self.sr_antialias)
+        x, rgb = self.block0(x, rgb, ws, **block_kwargs) # sr branch, 128x128 head img ==> 256x256 head img
+        if hparams.get("torso_model_version", "v1") == 'v1':
+            rgb_torso, facev2v_ret = self.torso_model.forward(ref_torso_rgb_256, segmap, kp_s, kp_d, rgb_256.detach(), cal_loss=True, target_torso_mask=target_torso_mask)
+        elif hparams.get("torso_model_version", "v1") == 'v2':
+            rgb_torso, facev2v_ret = self.torso_model.forward(ref_torso_rgb_256, segmap, kp_s, kp_d, rgb_256.detach(), weights_256.detach(), cal_loss=True, target_torso_mask=target_torso_mask)
+        x_torso = self.torso_encoder(facev2v_ret['deformed_torso_hid'])
+
+        x_bg = self.bg_encoder(ref_bg_rgb_256)
+        
+        if hparams.get("weight_fuse", True):
+            if hparams['htbsr_head_weight_fuse_mode'] == 'v1':
+                rgb = rgb * weights_256 + rgb_torso * (1-weights_256) # get person img
+                x = x * weights_256 + x_torso * (1-weights_256) # get person img
+                head_occlusion = weights_256.clone()    
+                htbsr_head_threshold = hparams['htbsr_head_threshold']
+                head_occlusion[head_occlusion > htbsr_head_threshold] = 1.
+                torso_occlusion = torch.nn.functional.interpolate(facev2v_ret['occlusion_2'], size=(256, 256), mode='bilinear', align_corners=False, antialias=self.sr_antialias)
+                person_occlusion = (torso_occlusion + head_occlusion).clamp_(0,1)
+                rgb = rgb * person_occlusion + ref_bg_rgb_256 * (1-person_occlusion) # run6
+                x = torch.cat([x * person_occlusion, x_bg * (1-person_occlusion)], dim=1) # run6
+                x = self.fuse_fg_bg_convs(x)
+                x, rgb = self.block1(x, rgb, ws, **block_kwargs)
+
+            elif hparams['htbsr_head_weight_fuse_mode'] == 'v2':
+                # 用alpha-cat实现head torso的x的融合；替代了之前的直接alpha相加
+                head_torso_alpha = weights_256.clone()
+                head_torso_alpha[head_torso_alpha>weights_256] = weights_256[head_torso_alpha>weights_256]
+                rgb = rgb * head_torso_alpha + rgb_torso * (1-head_torso_alpha) # get person img
+                x = torch.cat([x * head_torso_alpha, x_torso * (1-head_torso_alpha)], dim=1) 
+                x = self.fuse_head_torso_convs(x)
+                x, rgb = self.head_torso_block(x, rgb, ws, **block_kwargs)
+
+                head_occlusion = head_torso_alpha.clone()    
+                # 鼓励weights与mask逼近后，不再需要手动修改head weights threshold到很小的值了，0.7就行
+                htbsr_head_threshold = hparams['htbsr_head_threshold']
+                head_occlusion[head_occlusion > htbsr_head_threshold] = 1.
+                torso_occlusion = torch.nn.functional.interpolate(facev2v_ret['occlusion_2'], size=(256, 256), mode='bilinear', align_corners=False, antialias=self.sr_antialias)
+                person_occlusion = (torso_occlusion + head_occlusion).clamp_(0,1)
+                rgb = rgb * person_occlusion + ref_bg_rgb_256 * (1-person_occlusion) # run6
+                x = torch.cat([x * person_occlusion, x_bg * (1-person_occlusion)], dim=1) # run6
+                x = self.fuse_fg_bg_convs(x)
+                x, rgb = self.block1(x, rgb, ws, **block_kwargs)
+
+            elif hparams['htbsr_head_weight_fuse_mode'] == 'v3':
+                # v2：用alpha-cat实现head torso的x的融合；替代了之前的直接alpha相加
+                # v3: 用nn额外后处理head mask
+                head_torso_alpha_inp = torch.cat([rgb.clamp(-1,1)/2+0.5, weights_256, rgb_torso.clamp(-1,1)/2+0.5], dim=1)
+                head_torso_alpha_ = self.head_torso_alpha_predictor(head_torso_alpha_inp)
+                head_torso_alpha = head_torso_alpha_.clone()
+                head_torso_alpha[head_torso_alpha>weights_256] = weights_256[head_torso_alpha>weights_256]
+                rgb = rgb * head_torso_alpha + rgb_torso * (1-head_torso_alpha) # get person img
+                
+                x = torch.cat([x * head_torso_alpha, x_torso * (1-head_torso_alpha)], dim=1) # run6
+                x = self.fuse_head_torso_convs(x)
+                x, rgb = self.head_torso_block(x, rgb, ws, **block_kwargs)
+
+                head_occlusion = head_torso_alpha.clone()    
+                htbsr_head_threshold = hparams['htbsr_head_threshold']
+                if not self.training:
+                    head_occlusion_ = head_occlusion[head_occlusion>0.05]
+                    htbsr_head_threshold = max(head_occlusion_.quantile(0.05), htbsr_head_threshold) # 过滤掉比0.05大的最后5% voxels
+                head_occlusion[head_occlusion > htbsr_head_threshold] = 1.
+                torso_occlusion = torch.nn.functional.interpolate(facev2v_ret['occlusion_2'], size=(256, 256), mode='bilinear', align_corners=False, antialias=self.sr_antialias)
+                person_occlusion = (torso_occlusion + head_occlusion).clamp_(0,1)
+                rgb = rgb * person_occlusion + ref_bg_rgb_256 * (1-person_occlusion) # run6
+                # Todo: 修改这里，把cat的occlusion去掉？或者把occlusion截断一下。
+                x = torch.cat([x * person_occlusion, x_bg * (1-person_occlusion)], dim=1) # run6
+                x = self.fuse_fg_bg_convs(x)
+                x, rgb = self.block1(x, rgb, ws, **block_kwargs)
+
+            else:
+                # v4 尝试直接用cat处理head-torso的hid的融合，发现不好
+                # v5 try1处理x的时候也把cat里的alpha去掉了，但是try1发现导致occlusion直接变1.所以去掉
+                # v5 try2给torso也加了threshold让他算rgb的时候更加sharp,  会导致torso周围黑边？
+                raise NotImplementedError()
+        else:
+            x = torch.cat([x, x_torso, x_bg], dim=1) # run6
+            x = self.fuse_fg_bg_convs(x)
+            x, rgb = self.block1(x, None, ws, **block_kwargs)
+        return rgb, facev2v_ret
+   
+    @torch.no_grad()
+    def infer_forward_stage1(self, rgb, x, ws, ref_torso_rgb, ref_bg_rgb, weights_img, segmap, kp_s, kp_d, **block_kwargs):
+        weights_img = weights_img.detach()
+        ws = ws[:, -1:, :].repeat(1, 3, 1)
+
+        if x.shape[-1] != self.input_resolution:
+            x = torch.nn.functional.interpolate(x, size=(self.input_resolution, self.input_resolution),
+                                                  mode='bilinear', align_corners=False, antialias=self.sr_antialias)
+            rgb = torch.nn.functional.interpolate(rgb, size=(self.input_resolution, self.input_resolution),
+                                                  mode='bilinear', align_corners=False, antialias=self.sr_antialias)
+        
+        rgb_256 = torch.nn.functional.interpolate(rgb, size=(256, 256), mode='bilinear', align_corners=False, antialias=self.sr_antialias)
+        weights_256 = torch.nn.functional.interpolate(weights_img, size=(256, 256), mode='bilinear', align_corners=False, antialias=self.sr_antialias)
+        ref_torso_rgb_256 = torch.nn.functional.interpolate(ref_torso_rgb, size=(256, 256), mode='bilinear', align_corners=False, antialias=self.sr_antialias)
+        ref_bg_rgb_256 = torch.nn.functional.interpolate(ref_bg_rgb, size=(256, 256), mode='bilinear', align_corners=False, antialias=self.sr_antialias)
+
+        x, rgb = self.block0(x, rgb, ws, **block_kwargs)
+
+        facev2v_ret = self.torso_model.infer_forward_stage1(ref_torso_rgb_256, segmap, kp_s, kp_d, rgb_256.detach(), cal_loss=True)
+        facev2v_ret['ref_bg_rgb_256'] = ref_bg_rgb_256
+        facev2v_ret['weights_256'] = weights_256
+        facev2v_ret['x'] = x
+        facev2v_ret['ws'] = ws
+        facev2v_ret['rgb'] = rgb
+        return facev2v_ret
+    
+    @torch.no_grad()
+    def infer_forward_stage2(self, facev2v_ret, **block_kwargs):
+        x = facev2v_ret['x']
+        ws = facev2v_ret['ws']
+        rgb = facev2v_ret['rgb']
+        ref_bg_rgb_256 = facev2v_ret['ref_bg_rgb_256']
+        weights_256 = facev2v_ret['weights_256']
+        rgb_torso = self.torso_model.infer_forward_stage2(facev2v_ret)
+        x_torso = self.torso_encoder(facev2v_ret['deformed_torso_hid'])
+        x_bg = self.bg_encoder(ref_bg_rgb_256)
+        
+        if hparams.get("weight_fuse", True):
+            rgb = rgb * weights_256 + rgb_torso * (1-weights_256) # get person img
+            x = x * weights_256 + x_torso * (1-weights_256) # get person img
+
+            head_occlusion = weights_256.clone()
+            head_occlusion[head_occlusion > 0.5] = 1.
+            torso_occlusion = torch.nn.functional.interpolate(facev2v_ret['occlusion_2'], size=(256, 256), mode='bilinear', align_corners=False, antialias=self.sr_antialias)
+            person_occlusion = (torso_occlusion + head_occlusion).clamp_(0,1)
+
+            rgb = rgb * person_occlusion + ref_bg_rgb_256 * (1-person_occlusion) # run6
+            x = torch.cat([x * person_occlusion, x_bg * (1-person_occlusion)], dim=1) # run6
+            x = self.fuse_fg_bg_convs(x)
+            x, rgb = self.block1(x, rgb, ws, **block_kwargs)
+        else:
+            x = torch.cat([x, x_torso, x_bg], dim=1) # run6
+            x = self.fuse_fg_bg_convs(x)
+            x, rgb = self.block1(x, None, ws, **block_kwargs)
+        return rgb, facev2v_ret
+   
+
+if __name__ == '__main__':
+    model = SuperresolutionHybrid8XDC_Warp(32,512,0, False)
+    model.cuda()
+    rgb = torch.randn([4, 3, 128, 128]).cuda()
+    x = torch.randn([4, 32, 128, 128]).cuda()
+    ws = torch.randn([4, 14, 512]).cuda()
+    ref_rgb = torch.randn([4, 3, 128, 128]).cuda()
+    ref_torso_rgb = torch.randn([4, 3, 128, 128]).cuda()
+    y = model(rgb, x, ws, ref_rgb, ref_torso_rgb)
+    print(" ")
\ No newline at end of file
diff --git a/pytorch3d/.circleci/build_count.py b/pytorch3d/.circleci/build_count.py
new file mode 100644
index 0000000000000000000000000000000000000000..aecb54d1bfeeecfcc90570a68552f97011780cec
--- /dev/null
+++ b/pytorch3d/.circleci/build_count.py
@@ -0,0 +1,33 @@
+#!/usr/bin/env python3
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""
+Print the number of nightly builds
+"""
+
+from collections import Counter
+
+import yaml
+
+
+conf = yaml.safe_load(open("config.yml"))
+jobs = conf["workflows"]["build_and_test"]["jobs"]
+
+
+def jobtype(job):
+    if isinstance(job, str):
+        return job
+    if len(job) == 1:
+        [name] = job.keys()
+        return name
+    return "MULTIPLE PARTS"
+
+
+for i, j in Counter(map(jobtype, jobs)).items():
+    print(i, j)
+print()
+print(len(jobs))
diff --git a/pytorch3d/.circleci/check.sh b/pytorch3d/.circleci/check.sh
new file mode 100644
index 0000000000000000000000000000000000000000..bea4614153227769e0ab676ba857a473e32bfc8c
--- /dev/null
+++ b/pytorch3d/.circleci/check.sh
@@ -0,0 +1,13 @@
+#!/bin/bash -e
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# Run this script before committing config.yml to verify it is valid yaml.
+
+python -c 'import yaml; yaml.safe_load(open("config.yml"))' && echo OK - valid yaml
+
+msg="circleci not installed so can't check schema"
+command -v circleci > /dev/null && (cd ..; circleci config validate) || echo "$msg"
diff --git a/pytorch3d/.circleci/config.in.yml b/pytorch3d/.circleci/config.in.yml
new file mode 100644
index 0000000000000000000000000000000000000000..b32ba66a68c3a8e3bc232e5d2c36024bca799c5e
--- /dev/null
+++ b/pytorch3d/.circleci/config.in.yml
@@ -0,0 +1,171 @@
+version: 2.1
+
+#examples:
+#https://github.com/facebookresearch/ParlAI/blob/master/.circleci/config.yml
+#https://github.com/facebookresearch/hydra/blob/master/.circleci/config.yml
+#https://github.com/facebookresearch/habitat-api/blob/master/.circleci/config.yml
+
+#drive tests with nox or tox or pytest?
+
+# -------------------------------------------------------------------------------------
+# environments where we run our jobs
+# -------------------------------------------------------------------------------------
+
+
+setupcuda: &setupcuda
+  run:
+    name: Setup CUDA
+    working_directory: ~/
+    command: |
+      # download and install nvidia drivers, cuda, etc
+      wget --no-verbose --no-clobber -P ~/nvidia-downloads https://developer.download.nvidia.com/compute/cuda/11.3.1/local_installers/cuda_11.3.1_465.19.01_linux.run
+      sudo sh ~/nvidia-downloads/cuda_11.3.1_465.19.01_linux.run --silent
+      echo "Done installing CUDA."
+      pyenv versions
+      nvidia-smi
+      pyenv global 3.9.1
+
+binary_common: &binary_common
+  parameters:
+    # Edit these defaults to do a release`
+    build_version:
+      description: "version number of release binary; by default, build a nightly"
+      type: string
+      default: ""
+    pytorch_version:
+      description: "PyTorch version to build against; by default, use a nightly"
+      type: string
+      default: ""
+    # Don't edit these
+    python_version:
+      description: "Python version to build against (e.g., 3.7)"
+      type: string
+    cu_version:
+      description: "CUDA version to build against, in CU format (e.g., cpu or cu100)"
+      type: string
+    wheel_docker_image:
+      description: "Wheel only: what docker image to use"
+      type: string
+      default: "pytorch/manylinux-cuda101"
+    conda_docker_image:
+      description: "what docker image to use for docker"
+      type: string
+      default: "pytorch/conda-cuda"
+  environment:
+    PYTHON_VERSION: << parameters.python_version >>
+    BUILD_VERSION: << parameters.build_version >>
+    PYTORCH_VERSION: << parameters.pytorch_version >>
+    CU_VERSION: << parameters.cu_version >>
+    TESTRUN_DOCKER_IMAGE: << parameters.conda_docker_image >>
+
+jobs:
+  main:
+    environment:
+      CUDA_VERSION: "11.3"
+    resource_class: gpu.nvidia.small.multi
+    machine:
+      image: linux-cuda-11:default
+    steps:
+      - checkout
+      - <<: *setupcuda
+      - run: pip3 install --progress-bar off imageio wheel matplotlib 'pillow<7'
+      - run: pip3 install --progress-bar off torch==1.10.0+cu113 torchvision==0.11.1+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html
+      # - run: conda create -p ~/conda_env python=3.7 numpy
+      # - run: conda activate ~/conda_env
+      # - run: conda install -c pytorch pytorch torchvision
+
+      - run: pip3 install --progress-bar off 'git+https://github.com/facebookresearch/fvcore'
+      - run: pip3 install --progress-bar off 'git+https://github.com/facebookresearch/iopath'
+      - run:
+          name: build
+          command: |
+            export LD_LIBRARY_PATH=$LD_LIBARY_PATH:/usr/local/cuda-11.3/lib64
+            python3 setup.py build_ext --inplace
+      - run: LD_LIBRARY_PATH=$LD_LIBARY_PATH:/usr/local/cuda-11.3/lib64 python -m unittest discover -v -s tests -t .
+      - run: python3 setup.py bdist_wheel
+
+  binary_linux_wheel:
+    <<: *binary_common
+    docker:
+      - image: << parameters.wheel_docker_image >>
+        auth:
+          username: $DOCKERHUB_USERNAME
+          password: $DOCKERHUB_TOKEN
+    resource_class: 2xlarge+
+    steps:
+      - checkout
+      - run: MAX_JOBS=15 packaging/build_wheel.sh
+      - store_artifacts:
+          path: dist
+      - persist_to_workspace:
+          root: dist
+          paths:
+            - "*"
+
+  binary_linux_conda:
+    <<: *binary_common
+    docker:
+      - image: "<< parameters.conda_docker_image >>"
+        auth:
+          username: $DOCKERHUB_USERNAME
+          password: $DOCKERHUB_TOKEN
+    resource_class: 2xlarge+
+    steps:
+      - checkout
+      # This is building with cuda but no gpu present,
+      # so we aren't running the tests.
+      - run:
+          name: build
+          no_output_timeout: 40m
+          command: MAX_JOBS=15 TEST_FLAG=--no-test python3 packaging/build_conda.py
+      - store_artifacts:
+          path: /opt/conda/conda-bld/linux-64
+      - persist_to_workspace:
+          root: /opt/conda/conda-bld/linux-64
+          paths:
+            - "*"
+
+  binary_linux_conda_cuda:
+    <<: *binary_common
+    machine:
+      image: linux-cuda-11:default
+    resource_class: gpu.nvidia.small.multi
+    steps:
+    - checkout
+
+    - run:
+        name: Pull docker image
+        command: |
+          nvidia-smi
+          set -e
+
+          { docker login -u="$DOCKERHUB_USERNAME" -p="$DOCKERHUB_TOKEN" ; } 2> /dev/null
+
+          echo Pulling docker image $TESTRUN_DOCKER_IMAGE
+          docker pull $TESTRUN_DOCKER_IMAGE
+    - run:
+        name: Build and run tests
+        no_output_timeout: 40m
+        command: |
+          set -e
+
+          cd ${HOME}/project/
+
+          export JUST_TESTRUN=1
+          VARS_TO_PASS="-e PYTHON_VERSION -e BUILD_VERSION -e PYTORCH_VERSION -e CU_VERSION -e JUST_TESTRUN"
+
+          docker run --gpus all  --ipc=host -v $(pwd):/remote -w /remote ${VARS_TO_PASS} ${TESTRUN_DOCKER_IMAGE} python3 ./packaging/build_conda.py
+
+workflows:
+  version: 2
+  build_and_test:
+    jobs:
+      # - main:
+      #     context: DOCKERHUB_TOKEN
+      {{workflows()}}
+      - binary_linux_conda_cuda:
+          name: testrun_conda_cuda_py310_cu117_pyt201
+          context: DOCKERHUB_TOKEN
+          python_version: "3.10"
+          pytorch_version: '2.0.1'
+          cu_version: "cu117"
diff --git a/pytorch3d/.circleci/config.yml b/pytorch3d/.circleci/config.yml
new file mode 100644
index 0000000000000000000000000000000000000000..53be30b941dcb497d204a9b1eb1ea36b8e0817b2
--- /dev/null
+++ b/pytorch3d/.circleci/config.yml
@@ -0,0 +1,478 @@
+version: 2.1
+
+#examples:
+#https://github.com/facebookresearch/ParlAI/blob/master/.circleci/config.yml
+#https://github.com/facebookresearch/hydra/blob/master/.circleci/config.yml
+#https://github.com/facebookresearch/habitat-api/blob/master/.circleci/config.yml
+
+#drive tests with nox or tox or pytest?
+
+# -------------------------------------------------------------------------------------
+# environments where we run our jobs
+# -------------------------------------------------------------------------------------
+
+
+setupcuda: &setupcuda
+  run:
+    name: Setup CUDA
+    working_directory: ~/
+    command: |
+      # download and install nvidia drivers, cuda, etc
+      wget --no-verbose --no-clobber -P ~/nvidia-downloads https://developer.download.nvidia.com/compute/cuda/11.3.1/local_installers/cuda_11.3.1_465.19.01_linux.run
+      sudo sh ~/nvidia-downloads/cuda_11.3.1_465.19.01_linux.run --silent
+      echo "Done installing CUDA."
+      pyenv versions
+      nvidia-smi
+      pyenv global 3.9.1
+
+binary_common: &binary_common
+  parameters:
+    # Edit these defaults to do a release`
+    build_version:
+      description: "version number of release binary; by default, build a nightly"
+      type: string
+      default: ""
+    pytorch_version:
+      description: "PyTorch version to build against; by default, use a nightly"
+      type: string
+      default: ""
+    # Don't edit these
+    python_version:
+      description: "Python version to build against (e.g., 3.7)"
+      type: string
+    cu_version:
+      description: "CUDA version to build against, in CU format (e.g., cpu or cu100)"
+      type: string
+    wheel_docker_image:
+      description: "Wheel only: what docker image to use"
+      type: string
+      default: "pytorch/manylinux-cuda101"
+    conda_docker_image:
+      description: "what docker image to use for docker"
+      type: string
+      default: "pytorch/conda-cuda"
+  environment:
+    PYTHON_VERSION: << parameters.python_version >>
+    BUILD_VERSION: << parameters.build_version >>
+    PYTORCH_VERSION: << parameters.pytorch_version >>
+    CU_VERSION: << parameters.cu_version >>
+    TESTRUN_DOCKER_IMAGE: << parameters.conda_docker_image >>
+
+jobs:
+  main:
+    environment:
+      CUDA_VERSION: "11.3"
+    resource_class: gpu.nvidia.small.multi
+    machine:
+      image: linux-cuda-11:default
+    steps:
+      - checkout
+      - <<: *setupcuda
+      - run: pip3 install --progress-bar off imageio wheel matplotlib 'pillow<7'
+      - run: pip3 install --progress-bar off torch==1.10.0+cu113 torchvision==0.11.1+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html
+      # - run: conda create -p ~/conda_env python=3.7 numpy
+      # - run: conda activate ~/conda_env
+      # - run: conda install -c pytorch pytorch torchvision
+
+      - run: pip3 install --progress-bar off 'git+https://github.com/facebookresearch/fvcore'
+      - run: pip3 install --progress-bar off 'git+https://github.com/facebookresearch/iopath'
+      - run:
+          name: build
+          command: |
+            export LD_LIBRARY_PATH=$LD_LIBARY_PATH:/usr/local/cuda-11.3/lib64
+            python3 setup.py build_ext --inplace
+      - run: LD_LIBRARY_PATH=$LD_LIBARY_PATH:/usr/local/cuda-11.3/lib64 python -m unittest discover -v -s tests -t .
+      - run: python3 setup.py bdist_wheel
+
+  binary_linux_wheel:
+    <<: *binary_common
+    docker:
+      - image: << parameters.wheel_docker_image >>
+        auth:
+          username: $DOCKERHUB_USERNAME
+          password: $DOCKERHUB_TOKEN
+    resource_class: 2xlarge+
+    steps:
+      - checkout
+      - run: MAX_JOBS=15 packaging/build_wheel.sh
+      - store_artifacts:
+          path: dist
+      - persist_to_workspace:
+          root: dist
+          paths:
+            - "*"
+
+  binary_linux_conda:
+    <<: *binary_common
+    docker:
+      - image: "<< parameters.conda_docker_image >>"
+        auth:
+          username: $DOCKERHUB_USERNAME
+          password: $DOCKERHUB_TOKEN
+    resource_class: 2xlarge+
+    steps:
+      - checkout
+      # This is building with cuda but no gpu present,
+      # so we aren't running the tests.
+      - run:
+          name: build
+          no_output_timeout: 40m
+          command: MAX_JOBS=15 TEST_FLAG=--no-test python3 packaging/build_conda.py
+      - store_artifacts:
+          path: /opt/conda/conda-bld/linux-64
+      - persist_to_workspace:
+          root: /opt/conda/conda-bld/linux-64
+          paths:
+            - "*"
+
+  binary_linux_conda_cuda:
+    <<: *binary_common
+    machine:
+      image: linux-cuda-11:default
+    resource_class: gpu.nvidia.small.multi
+    steps:
+    - checkout
+
+    - run:
+        name: Pull docker image
+        command: |
+          nvidia-smi
+          set -e
+
+          { docker login -u="$DOCKERHUB_USERNAME" -p="$DOCKERHUB_TOKEN" ; } 2> /dev/null
+
+          echo Pulling docker image $TESTRUN_DOCKER_IMAGE
+          docker pull $TESTRUN_DOCKER_IMAGE
+    - run:
+        name: Build and run tests
+        no_output_timeout: 40m
+        command: |
+          set -e
+
+          cd ${HOME}/project/
+
+          export JUST_TESTRUN=1
+          VARS_TO_PASS="-e PYTHON_VERSION -e BUILD_VERSION -e PYTORCH_VERSION -e CU_VERSION -e JUST_TESTRUN"
+
+          docker run --gpus all  --ipc=host -v $(pwd):/remote -w /remote ${VARS_TO_PASS} ${TESTRUN_DOCKER_IMAGE} python3 ./packaging/build_conda.py
+
+workflows:
+  version: 2
+  build_and_test:
+    jobs:
+      # - main:
+      #     context: DOCKERHUB_TOKEN
+      - binary_linux_conda:
+          conda_docker_image: pytorch/conda-builder:cuda113
+          context: DOCKERHUB_TOKEN
+          cu_version: cu113
+          name: linux_conda_py38_cu113_pyt1120
+          python_version: '3.8'
+          pytorch_version: 1.12.0
+      - binary_linux_conda:
+          conda_docker_image: pytorch/conda-builder:cuda116
+          context: DOCKERHUB_TOKEN
+          cu_version: cu116
+          name: linux_conda_py38_cu116_pyt1120
+          python_version: '3.8'
+          pytorch_version: 1.12.0
+      - binary_linux_conda:
+          conda_docker_image: pytorch/conda-builder:cuda113
+          context: DOCKERHUB_TOKEN
+          cu_version: cu113
+          name: linux_conda_py38_cu113_pyt1121
+          python_version: '3.8'
+          pytorch_version: 1.12.1
+      - binary_linux_conda:
+          conda_docker_image: pytorch/conda-builder:cuda116
+          context: DOCKERHUB_TOKEN
+          cu_version: cu116
+          name: linux_conda_py38_cu116_pyt1121
+          python_version: '3.8'
+          pytorch_version: 1.12.1
+      - binary_linux_conda:
+          conda_docker_image: pytorch/conda-builder:cuda116
+          context: DOCKERHUB_TOKEN
+          cu_version: cu116
+          name: linux_conda_py38_cu116_pyt1130
+          python_version: '3.8'
+          pytorch_version: 1.13.0
+      - binary_linux_conda:
+          conda_docker_image: pytorch/conda-builder:cuda117
+          context: DOCKERHUB_TOKEN
+          cu_version: cu117
+          name: linux_conda_py38_cu117_pyt1130
+          python_version: '3.8'
+          pytorch_version: 1.13.0
+      - binary_linux_conda:
+          conda_docker_image: pytorch/conda-builder:cuda116
+          context: DOCKERHUB_TOKEN
+          cu_version: cu116
+          name: linux_conda_py38_cu116_pyt1131
+          python_version: '3.8'
+          pytorch_version: 1.13.1
+      - binary_linux_conda:
+          conda_docker_image: pytorch/conda-builder:cuda117
+          context: DOCKERHUB_TOKEN
+          cu_version: cu117
+          name: linux_conda_py38_cu117_pyt1131
+          python_version: '3.8'
+          pytorch_version: 1.13.1
+      - binary_linux_conda:
+          conda_docker_image: pytorch/conda-builder:cuda117
+          context: DOCKERHUB_TOKEN
+          cu_version: cu117
+          name: linux_conda_py38_cu117_pyt200
+          python_version: '3.8'
+          pytorch_version: 2.0.0
+      - binary_linux_conda:
+          conda_docker_image: pytorch/conda-builder:cuda118
+          context: DOCKERHUB_TOKEN
+          cu_version: cu118
+          name: linux_conda_py38_cu118_pyt200
+          python_version: '3.8'
+          pytorch_version: 2.0.0
+      - binary_linux_conda:
+          conda_docker_image: pytorch/conda-builder:cuda117
+          context: DOCKERHUB_TOKEN
+          cu_version: cu117
+          name: linux_conda_py38_cu117_pyt201
+          python_version: '3.8'
+          pytorch_version: 2.0.1
+      - binary_linux_conda:
+          conda_docker_image: pytorch/conda-builder:cuda118
+          context: DOCKERHUB_TOKEN
+          cu_version: cu118
+          name: linux_conda_py38_cu118_pyt201
+          python_version: '3.8'
+          pytorch_version: 2.0.1
+      - binary_linux_conda:
+          conda_docker_image: pytorch/conda-builder:cuda118
+          context: DOCKERHUB_TOKEN
+          cu_version: cu118
+          name: linux_conda_py38_cu118_pyt210
+          python_version: '3.8'
+          pytorch_version: 2.1.0
+      - binary_linux_conda:
+          conda_docker_image: pytorch/conda-builder:cuda121
+          context: DOCKERHUB_TOKEN
+          cu_version: cu121
+          name: linux_conda_py38_cu121_pyt210
+          python_version: '3.8'
+          pytorch_version: 2.1.0
+      - binary_linux_conda:
+          conda_docker_image: pytorch/conda-builder:cuda113
+          context: DOCKERHUB_TOKEN
+          cu_version: cu113
+          name: linux_conda_py39_cu113_pyt1120
+          python_version: '3.9'
+          pytorch_version: 1.12.0
+      - binary_linux_conda:
+          conda_docker_image: pytorch/conda-builder:cuda116
+          context: DOCKERHUB_TOKEN
+          cu_version: cu116
+          name: linux_conda_py39_cu116_pyt1120
+          python_version: '3.9'
+          pytorch_version: 1.12.0
+      - binary_linux_conda:
+          conda_docker_image: pytorch/conda-builder:cuda113
+          context: DOCKERHUB_TOKEN
+          cu_version: cu113
+          name: linux_conda_py39_cu113_pyt1121
+          python_version: '3.9'
+          pytorch_version: 1.12.1
+      - binary_linux_conda:
+          conda_docker_image: pytorch/conda-builder:cuda116
+          context: DOCKERHUB_TOKEN
+          cu_version: cu116
+          name: linux_conda_py39_cu116_pyt1121
+          python_version: '3.9'
+          pytorch_version: 1.12.1
+      - binary_linux_conda:
+          conda_docker_image: pytorch/conda-builder:cuda116
+          context: DOCKERHUB_TOKEN
+          cu_version: cu116
+          name: linux_conda_py39_cu116_pyt1130
+          python_version: '3.9'
+          pytorch_version: 1.13.0
+      - binary_linux_conda:
+          conda_docker_image: pytorch/conda-builder:cuda117
+          context: DOCKERHUB_TOKEN
+          cu_version: cu117
+          name: linux_conda_py39_cu117_pyt1130
+          python_version: '3.9'
+          pytorch_version: 1.13.0
+      - binary_linux_conda:
+          conda_docker_image: pytorch/conda-builder:cuda116
+          context: DOCKERHUB_TOKEN
+          cu_version: cu116
+          name: linux_conda_py39_cu116_pyt1131
+          python_version: '3.9'
+          pytorch_version: 1.13.1
+      - binary_linux_conda:
+          conda_docker_image: pytorch/conda-builder:cuda117
+          context: DOCKERHUB_TOKEN
+          cu_version: cu117
+          name: linux_conda_py39_cu117_pyt1131
+          python_version: '3.9'
+          pytorch_version: 1.13.1
+      - binary_linux_conda:
+          conda_docker_image: pytorch/conda-builder:cuda117
+          context: DOCKERHUB_TOKEN
+          cu_version: cu117
+          name: linux_conda_py39_cu117_pyt200
+          python_version: '3.9'
+          pytorch_version: 2.0.0
+      - binary_linux_conda:
+          conda_docker_image: pytorch/conda-builder:cuda118
+          context: DOCKERHUB_TOKEN
+          cu_version: cu118
+          name: linux_conda_py39_cu118_pyt200
+          python_version: '3.9'
+          pytorch_version: 2.0.0
+      - binary_linux_conda:
+          conda_docker_image: pytorch/conda-builder:cuda117
+          context: DOCKERHUB_TOKEN
+          cu_version: cu117
+          name: linux_conda_py39_cu117_pyt201
+          python_version: '3.9'
+          pytorch_version: 2.0.1
+      - binary_linux_conda:
+          conda_docker_image: pytorch/conda-builder:cuda118
+          context: DOCKERHUB_TOKEN
+          cu_version: cu118
+          name: linux_conda_py39_cu118_pyt201
+          python_version: '3.9'
+          pytorch_version: 2.0.1
+      - binary_linux_conda:
+          conda_docker_image: pytorch/conda-builder:cuda118
+          context: DOCKERHUB_TOKEN
+          cu_version: cu118
+          name: linux_conda_py39_cu118_pyt210
+          python_version: '3.9'
+          pytorch_version: 2.1.0
+      - binary_linux_conda:
+          conda_docker_image: pytorch/conda-builder:cuda121
+          context: DOCKERHUB_TOKEN
+          cu_version: cu121
+          name: linux_conda_py39_cu121_pyt210
+          python_version: '3.9'
+          pytorch_version: 2.1.0
+      - binary_linux_conda:
+          conda_docker_image: pytorch/conda-builder:cuda113
+          context: DOCKERHUB_TOKEN
+          cu_version: cu113
+          name: linux_conda_py310_cu113_pyt1120
+          python_version: '3.10'
+          pytorch_version: 1.12.0
+      - binary_linux_conda:
+          conda_docker_image: pytorch/conda-builder:cuda116
+          context: DOCKERHUB_TOKEN
+          cu_version: cu116
+          name: linux_conda_py310_cu116_pyt1120
+          python_version: '3.10'
+          pytorch_version: 1.12.0
+      - binary_linux_conda:
+          conda_docker_image: pytorch/conda-builder:cuda113
+          context: DOCKERHUB_TOKEN
+          cu_version: cu113
+          name: linux_conda_py310_cu113_pyt1121
+          python_version: '3.10'
+          pytorch_version: 1.12.1
+      - binary_linux_conda:
+          conda_docker_image: pytorch/conda-builder:cuda116
+          context: DOCKERHUB_TOKEN
+          cu_version: cu116
+          name: linux_conda_py310_cu116_pyt1121
+          python_version: '3.10'
+          pytorch_version: 1.12.1
+      - binary_linux_conda:
+          conda_docker_image: pytorch/conda-builder:cuda116
+          context: DOCKERHUB_TOKEN
+          cu_version: cu116
+          name: linux_conda_py310_cu116_pyt1130
+          python_version: '3.10'
+          pytorch_version: 1.13.0
+      - binary_linux_conda:
+          conda_docker_image: pytorch/conda-builder:cuda117
+          context: DOCKERHUB_TOKEN
+          cu_version: cu117
+          name: linux_conda_py310_cu117_pyt1130
+          python_version: '3.10'
+          pytorch_version: 1.13.0
+      - binary_linux_conda:
+          conda_docker_image: pytorch/conda-builder:cuda116
+          context: DOCKERHUB_TOKEN
+          cu_version: cu116
+          name: linux_conda_py310_cu116_pyt1131
+          python_version: '3.10'
+          pytorch_version: 1.13.1
+      - binary_linux_conda:
+          conda_docker_image: pytorch/conda-builder:cuda117
+          context: DOCKERHUB_TOKEN
+          cu_version: cu117
+          name: linux_conda_py310_cu117_pyt1131
+          python_version: '3.10'
+          pytorch_version: 1.13.1
+      - binary_linux_conda:
+          conda_docker_image: pytorch/conda-builder:cuda117
+          context: DOCKERHUB_TOKEN
+          cu_version: cu117
+          name: linux_conda_py310_cu117_pyt200
+          python_version: '3.10'
+          pytorch_version: 2.0.0
+      - binary_linux_conda:
+          conda_docker_image: pytorch/conda-builder:cuda118
+          context: DOCKERHUB_TOKEN
+          cu_version: cu118
+          name: linux_conda_py310_cu118_pyt200
+          python_version: '3.10'
+          pytorch_version: 2.0.0
+      - binary_linux_conda:
+          conda_docker_image: pytorch/conda-builder:cuda117
+          context: DOCKERHUB_TOKEN
+          cu_version: cu117
+          name: linux_conda_py310_cu117_pyt201
+          python_version: '3.10'
+          pytorch_version: 2.0.1
+      - binary_linux_conda:
+          conda_docker_image: pytorch/conda-builder:cuda118
+          context: DOCKERHUB_TOKEN
+          cu_version: cu118
+          name: linux_conda_py310_cu118_pyt201
+          python_version: '3.10'
+          pytorch_version: 2.0.1
+      - binary_linux_conda:
+          conda_docker_image: pytorch/conda-builder:cuda118
+          context: DOCKERHUB_TOKEN
+          cu_version: cu118
+          name: linux_conda_py310_cu118_pyt210
+          python_version: '3.10'
+          pytorch_version: 2.1.0
+      - binary_linux_conda:
+          conda_docker_image: pytorch/conda-builder:cuda121
+          context: DOCKERHUB_TOKEN
+          cu_version: cu121
+          name: linux_conda_py310_cu121_pyt210
+          python_version: '3.10'
+          pytorch_version: 2.1.0
+      - binary_linux_conda:
+          conda_docker_image: pytorch/conda-builder:cuda118
+          context: DOCKERHUB_TOKEN
+          cu_version: cu118
+          name: linux_conda_py311_cu118_pyt210
+          python_version: '3.11'
+          pytorch_version: 2.1.0
+      - binary_linux_conda:
+          conda_docker_image: pytorch/conda-builder:cuda121
+          context: DOCKERHUB_TOKEN
+          cu_version: cu121
+          name: linux_conda_py311_cu121_pyt210
+          python_version: '3.11'
+          pytorch_version: 2.1.0
+      - binary_linux_conda_cuda:
+          name: testrun_conda_cuda_py310_cu117_pyt201
+          context: DOCKERHUB_TOKEN
+          python_version: "3.10"
+          pytorch_version: '2.0.1'
+          cu_version: "cu117"
diff --git a/pytorch3d/.circleci/regenerate.py b/pytorch3d/.circleci/regenerate.py
new file mode 100644
index 0000000000000000000000000000000000000000..f335d5d38e4b80797e6c163b39842ea4a8eda9dd
--- /dev/null
+++ b/pytorch3d/.circleci/regenerate.py
@@ -0,0 +1,178 @@
+#!/usr/bin/env python3
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""
+This script is adapted from the torchvision one.
+"""
+
+import os.path
+
+import jinja2
+import yaml
+from packaging import version
+
+
+# The CUDA versions which have pytorch conda packages available for linux for each
+# version of pytorch.
+CONDA_CUDA_VERSIONS = {
+    "1.12.0": ["cu113", "cu116"],
+    "1.12.1": ["cu113", "cu116"],
+    "1.13.0": ["cu116", "cu117"],
+    "1.13.1": ["cu116", "cu117"],
+    "2.0.0": ["cu117", "cu118"],
+    "2.0.1": ["cu117", "cu118"],
+    "2.1.0": ["cu118", "cu121"],
+}
+
+
+def conda_docker_image_for_cuda(cuda_version):
+    if cuda_version in ("cu101", "cu102", "cu111"):
+        return None
+    if len(cuda_version) != 5:
+        raise ValueError("Unknown cuda version")
+    return "pytorch/conda-builder:cuda" + cuda_version[2:]
+
+
+def pytorch_versions_for_python(python_version):
+    if python_version in ["3.8", "3.9"]:
+        return list(CONDA_CUDA_VERSIONS)
+    if python_version == "3.10":
+        return [
+            i
+            for i in CONDA_CUDA_VERSIONS
+            if version.Version(i) >= version.Version("1.11.0")
+        ]
+    if python_version == "3.11":
+        return [
+            i
+            for i in CONDA_CUDA_VERSIONS
+            if version.Version(i) >= version.Version("2.1.0")
+        ]
+
+
+def workflows(prefix="", filter_branch=None, upload=False, indentation=6):
+    w = []
+    for btype in ["conda"]:
+        for python_version in ["3.8", "3.9", "3.10", "3.11"]:
+            for pytorch_version in pytorch_versions_for_python(python_version):
+                for cu_version in CONDA_CUDA_VERSIONS[pytorch_version]:
+                    w += workflow_pair(
+                        btype=btype,
+                        python_version=python_version,
+                        pytorch_version=pytorch_version,
+                        cu_version=cu_version,
+                        prefix=prefix,
+                        upload=upload,
+                        filter_branch=filter_branch,
+                    )
+
+    return indent(indentation, w)
+
+
+def workflow_pair(
+    *,
+    btype,
+    python_version,
+    pytorch_version,
+    cu_version,
+    prefix="",
+    upload=False,
+    filter_branch,
+):
+
+    w = []
+    py = python_version.replace(".", "")
+    pyt = pytorch_version.replace(".", "")
+    base_workflow_name = f"{prefix}linux_{btype}_py{py}_{cu_version}_pyt{pyt}"
+
+    w.append(
+        generate_base_workflow(
+            base_workflow_name=base_workflow_name,
+            python_version=python_version,
+            pytorch_version=pytorch_version,
+            cu_version=cu_version,
+            btype=btype,
+            filter_branch=filter_branch,
+        )
+    )
+
+    if upload:
+        w.append(
+            generate_upload_workflow(
+                base_workflow_name=base_workflow_name,
+                btype=btype,
+                cu_version=cu_version,
+                filter_branch=filter_branch,
+            )
+        )
+
+    return w
+
+
+def generate_base_workflow(
+    *,
+    base_workflow_name,
+    python_version,
+    cu_version,
+    pytorch_version,
+    btype,
+    filter_branch=None,
+):
+
+    d = {
+        "name": base_workflow_name,
+        "python_version": python_version,
+        "cu_version": cu_version,
+        "pytorch_version": pytorch_version,
+        "context": "DOCKERHUB_TOKEN",
+    }
+
+    conda_docker_image = conda_docker_image_for_cuda(cu_version)
+    if conda_docker_image is not None:
+        d["conda_docker_image"] = conda_docker_image
+
+    if filter_branch is not None:
+        d["filters"] = {"branches": {"only": filter_branch}}
+
+    return {f"binary_linux_{btype}": d}
+
+
+def generate_upload_workflow(*, base_workflow_name, btype, cu_version, filter_branch):
+    d = {
+        "name": f"{base_workflow_name}_upload",
+        "context": "org-member",
+        "requires": [base_workflow_name],
+    }
+
+    if btype == "wheel":
+        d["subfolder"] = cu_version + "/"
+
+    if filter_branch is not None:
+        d["filters"] = {"branches": {"only": filter_branch}}
+
+    return {f"binary_{btype}_upload": d}
+
+
+def indent(indentation, data_list):
+    if len(data_list) == 0:
+        return ""
+    return ("\n" + " " * indentation).join(
+        yaml.dump(data_list, default_flow_style=False).splitlines()
+    )
+
+
+if __name__ == "__main__":
+    d = os.path.dirname(__file__)
+    env = jinja2.Environment(
+        loader=jinja2.FileSystemLoader(d),
+        lstrip_blocks=True,
+        autoescape=False,
+        keep_trailing_newline=True,
+    )
+
+    with open(os.path.join(d, "config.yml"), "w") as f:
+        f.write(env.get_template("config.in.yml").render(workflows=workflows))
diff --git a/pytorch3d/.clang-format b/pytorch3d/.clang-format
new file mode 100644
index 0000000000000000000000000000000000000000..39b1b3d603ed0cf6b7f94c9c08067f148f35613f
--- /dev/null
+++ b/pytorch3d/.clang-format
@@ -0,0 +1,85 @@
+AccessModifierOffset: -1
+AlignAfterOpenBracket: AlwaysBreak
+AlignConsecutiveAssignments: false
+AlignConsecutiveDeclarations: false
+AlignEscapedNewlinesLeft: true
+AlignOperands:   false
+AlignTrailingComments: false
+AllowAllParametersOfDeclarationOnNextLine: false
+AllowShortBlocksOnASingleLine: false
+AllowShortCaseLabelsOnASingleLine: false
+AllowShortFunctionsOnASingleLine: Empty
+AllowShortIfStatementsOnASingleLine: false
+AllowShortLoopsOnASingleLine: false
+AlwaysBreakAfterReturnType: None
+AlwaysBreakBeforeMultilineStrings: true
+AlwaysBreakTemplateDeclarations: true
+BinPackArguments: false
+BinPackParameters: false
+BraceWrapping:
+  AfterClass:      false
+  AfterControlStatement: false
+  AfterEnum:       false
+  AfterFunction:   false
+  AfterNamespace:  false
+  AfterObjCDeclaration: false
+  AfterStruct:     false
+  AfterUnion:      false
+  BeforeCatch:     false
+  BeforeElse:      false
+  IndentBraces:    false
+BreakBeforeBinaryOperators: None
+BreakBeforeBraces: Attach
+BreakBeforeTernaryOperators: true
+BreakConstructorInitializersBeforeComma: false
+BreakAfterJavaFieldAnnotations: false
+BreakStringLiterals: false
+ColumnLimit:     80
+CommentPragmas:  '^ IWYU pragma:'
+ConstructorInitializerAllOnOneLineOrOnePerLine: true
+ConstructorInitializerIndentWidth: 4
+ContinuationIndentWidth: 4
+Cpp11BracedListStyle: true
+DerivePointerAlignment: false
+DisableFormat:   false
+ForEachMacros:   [ FOR_EACH, FOR_EACH_R, FOR_EACH_RANGE, ]
+IncludeCategories:
+  - Regex:           '^<.*\.h(pp)?>'
+    Priority:        1
+  - Regex:           '^<.*'
+    Priority:        2
+  - Regex:           '.*'
+    Priority:        3
+IndentCaseLabels: true
+IndentWidth:     2
+IndentWrappedFunctionNames: false
+KeepEmptyLinesAtTheStartOfBlocks: false
+MacroBlockBegin: ''
+MacroBlockEnd:   ''
+MaxEmptyLinesToKeep: 1
+NamespaceIndentation: None
+ObjCBlockIndentWidth: 2
+ObjCSpaceAfterProperty: false
+ObjCSpaceBeforeProtocolList: false
+PenaltyBreakBeforeFirstCallParameter: 1
+PenaltyBreakComment: 300
+PenaltyBreakFirstLessLess: 120
+PenaltyBreakString: 1000
+PenaltyExcessCharacter: 1000000
+PenaltyReturnTypeOnItsOwnLine: 200
+PointerAlignment: Left
+ReflowComments:  true
+SortIncludes:    true
+SpaceAfterCStyleCast: false
+SpaceBeforeAssignmentOperators: true
+SpaceBeforeParens: ControlStatements
+SpaceInEmptyParentheses: false
+SpacesBeforeTrailingComments: 1
+SpacesInAngles:  false
+SpacesInContainerLiterals: true
+SpacesInCStyleCastParentheses: false
+SpacesInParentheses: false
+SpacesInSquareBrackets: false
+Standard:        Cpp11
+TabWidth:        8
+UseTab:          Never
diff --git a/pytorch3d/.flake8 b/pytorch3d/.flake8
new file mode 100644
index 0000000000000000000000000000000000000000..7c9b9bd73cf4fc16b2454ee7c04640533453a7df
--- /dev/null
+++ b/pytorch3d/.flake8
@@ -0,0 +1,9 @@
+[flake8]
+# B028 No explicit stacklevel argument found.
+# B907 'foo' is manually surrounded by quotes, consider using the `!r` conversion flag.
+# B905 `zip()` without an explicit `strict=` parameter.
+ignore = E203, E266, E501, W503, E221, B028, B905, B907
+max-line-length = 88
+max-complexity = 18
+select = B,C,E,F,W,T4,B9
+exclude = build,__init__.py
diff --git a/pytorch3d/.github/CODE_OF_CONDUCT.md b/pytorch3d/.github/CODE_OF_CONDUCT.md
new file mode 100644
index 0000000000000000000000000000000000000000..f049d4c53173cc44e0d0755b874d108891a5bfc5
--- /dev/null
+++ b/pytorch3d/.github/CODE_OF_CONDUCT.md
@@ -0,0 +1,76 @@
+# Code of Conduct
+
+## Our Pledge
+
+In the interest of fostering an open and welcoming environment, we as
+contributors and maintainers pledge to make participation in our project and
+our community a harassment-free experience for everyone, regardless of age, body
+size, disability, ethnicity, sex characteristics, gender identity and expression,
+level of experience, education, socio-economic status, nationality, personal
+appearance, race, religion, or sexual identity and orientation.
+
+## Our Standards
+
+Examples of behavior that contributes to creating a positive environment
+include:
+
+* Using welcoming and inclusive language
+* Being respectful of differing viewpoints and experiences
+* Gracefully accepting constructive criticism
+* Focusing on what is best for the community
+* Showing empathy towards other community members
+
+Examples of unacceptable behavior by participants include:
+
+* The use of sexualized language or imagery and unwelcome sexual attention or
+  advances
+* Trolling, insulting/derogatory comments, and personal or political attacks
+* Public or private harassment
+* Publishing others' private information, such as a physical or electronic
+  address, without explicit permission
+* Other conduct which could reasonably be considered inappropriate in a
+  professional setting
+
+## Our Responsibilities
+
+Project maintainers are responsible for clarifying the standards of acceptable
+behavior and are expected to take appropriate and fair corrective action in
+response to any instances of unacceptable behavior.
+
+Project maintainers have the right and responsibility to remove, edit, or
+reject comments, commits, code, wiki edits, issues, and other contributions
+that are not aligned to this Code of Conduct, or to ban temporarily or
+permanently any contributor for other behaviors that they deem inappropriate,
+threatening, offensive, or harmful.
+
+## Scope
+
+This Code of Conduct applies within all project spaces, and it also applies when
+an individual is representing the project or its community in public spaces.
+Examples of representing a project or community include using an official
+project e-mail address, posting via an official social media account, or acting
+as an appointed representative at an online or offline event. Representation of
+a project may be further defined and clarified by project maintainers.
+
+## Enforcement
+
+Instances of abusive, harassing, or otherwise unacceptable behavior may be
+reported by contacting the project team at <opensource-conduct@fb.com>. All
+complaints will be reviewed and investigated and will result in a response that
+is deemed necessary and appropriate to the circumstances. The project team is
+obligated to maintain confidentiality with regard to the reporter of an incident.
+Further details of specific enforcement policies may be posted separately.
+
+Project maintainers who do not follow or enforce the Code of Conduct in good
+faith may face temporary or permanent repercussions as determined by other
+members of the project's leadership.
+
+## Attribution
+
+This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
+available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
+
+[homepage]: https://www.contributor-covenant.org
+
+For answers to common questions about this code of conduct, see
+https://www.contributor-covenant.org/faq
diff --git a/pytorch3d/.github/CONTRIBUTING.md b/pytorch3d/.github/CONTRIBUTING.md
new file mode 100644
index 0000000000000000000000000000000000000000..b43d2141dcb817182e060190e18fb62cebf19e29
--- /dev/null
+++ b/pytorch3d/.github/CONTRIBUTING.md
@@ -0,0 +1,53 @@
+# Contributing to PyTorch3D
+We want to make contributing to this project as easy and transparent as
+possible.
+
+## Pull Requests
+We actively welcome your pull requests.
+
+However, if you're adding any significant features, please make sure to have a corresponding issue to outline your proposal and motivation and allow time for us to give feedback, *before* you send a PR.
+We do not always accept new features, and we take the following factors into consideration:
+
+- Whether the same feature can be achieved without modifying PyTorch3D directly. If any aspect of the API is not extensible, please highlight this in an issue so we can work on making this more extensible.
+- Whether the feature is potentially useful to a large audience, or only to a small portion of users.
+- Whether the proposed solution has a good design and interface.
+- Whether the proposed solution adds extra mental/practical overhead to users who don't need such feature.
+- Whether the proposed solution breaks existing APIs.
+
+When sending a PR, please ensure you complete the following steps:
+
+1. Fork the repo and create your branch from `main`. Follow the instructions
+   in [INSTALL.md](../INSTALL.md) to build the repo.
+2. If you've added code that should be tested, add tests.
+3. If you've changed any APIs, please update the documentation.
+4. Ensure the test suite passes, by running this from the project root:
+    ```
+    python -m unittest discover -v -s tests -t .
+    ```
+5. Make sure your code lints by running `dev/linter.sh` from  the project root.
+6. If a PR contains multiple orthogonal changes, split it into multiple separate PRs.
+7. If you haven't already, complete the Contributor License Agreement ("CLA").
+
+## Contributor License Agreement ("CLA")
+In order to accept your pull request, we need you to submit a CLA. You only need
+to do this once to work on any of Facebook's open source projects.
+
+Complete your CLA here: <https://code.facebook.com/cla>
+
+## Issues
+We use GitHub issues to track public bugs. Please ensure your description is
+clear and has sufficient instructions to be able to reproduce the issue.
+
+Facebook has a [bounty program](https://www.facebook.com/whitehat/) for the safe
+disclosure of security bugs. In those cases, please go through the process
+outlined on that page and do not file a public issue.
+
+## Coding Style
+We follow these [python](http://google.github.io/styleguide/pyguide.html) and [C++](https://google.github.io/styleguide/cppguide.html) style guides.
+
+For the linter to work, you will need to install `black`, `flake`, `usort` and `clang-format`, and
+they need to be fairly up to date.
+
+## License
+By contributing to PyTorch3D, you agree that your contributions will be licensed
+under the LICENSE file in the root directory of this source tree.
diff --git a/pytorch3d/.github/ISSUE_TEMPLATE/bugs.md b/pytorch3d/.github/ISSUE_TEMPLATE/bugs.md
new file mode 100644
index 0000000000000000000000000000000000000000..bbba16affdb8efa3dee61ede84e5d406350a613a
--- /dev/null
+++ b/pytorch3d/.github/ISSUE_TEMPLATE/bugs.md
@@ -0,0 +1,30 @@
+---
+name: "🐛 Bugs / Unexpected behaviors"
+about: Please report unexpected behaviors or bugs in PyTorch3D.
+
+---
+
+If you do not know the root cause of the problem / bug, and wish someone to help you, please
+post according to this template:
+
+## 🐛 Bugs / Unexpected behaviors
+<!-- A clear and concise description of the issue -->
+
+NOTE: Please look at the existing list of Issues tagged with the label ['bug`](https://github.com/facebookresearch/pytorch3d/issues?q=label%3Abug). **Only open a new issue if this bug has not already been reported. If an issue already exists, please comment there instead.**.
+
+## Instructions To Reproduce the Issue:
+
+Please include the following (depending on what the issue is):
+
+1. Any changes you made (`git diff`) or code you wrote
+```
+<put diff or code here>
+```
+2. The exact command(s) you ran:
+3. What you observed (including the full logs):
+```
+<put logs here>
+```
+
+Please also simplify the steps as much as possible so they do not require additional resources to
+   run, such as a private dataset.
diff --git a/pytorch3d/.github/ISSUE_TEMPLATE/config.yml b/pytorch3d/.github/ISSUE_TEMPLATE/config.yml
new file mode 100644
index 0000000000000000000000000000000000000000..3ba13e0cec6cbbfd462e9ebf529dd2093148cd69
--- /dev/null
+++ b/pytorch3d/.github/ISSUE_TEMPLATE/config.yml
@@ -0,0 +1 @@
+blank_issues_enabled: false
diff --git a/pytorch3d/.github/ISSUE_TEMPLATE/feature_request.md b/pytorch3d/.github/ISSUE_TEMPLATE/feature_request.md
new file mode 100644
index 0000000000000000000000000000000000000000..6eaf025dfdb1bdd0944c65f1ab25b34f438a05de
--- /dev/null
+++ b/pytorch3d/.github/ISSUE_TEMPLATE/feature_request.md
@@ -0,0 +1,21 @@
+---
+name: "\U0001F680 Feature Request"
+about: Submit a proposal/request for a new PyTorch3D feature
+
+---
+
+## 🚀 Feature
+<!-- A clear and concise description of the feature proposal -->
+
+NOTE: Please look at the existing list of Issues tagged with the label ['enhancement`](https://github.com/facebookresearch/pytorch3d/issues?q=label%3Aenhancement). **Only open a new issue if you do not see your feature request there**.
+
+## Motivation
+
+<!-- Please outline the motivation for the proposal.
+e.g. It would be great if I could do [...], I'm always frustrated when [...]. If this is related to another GitHub issue, please link here too -->
+
+## Pitch
+
+<!-- A clear and concise description, optionally with code examples showing the functionality you want. -->
+
+NOTE: we only consider adding new features if they are useful for many users.
diff --git a/pytorch3d/.github/ISSUE_TEMPLATE/questions-help.md b/pytorch3d/.github/ISSUE_TEMPLATE/questions-help.md
new file mode 100644
index 0000000000000000000000000000000000000000..639d0ff823abd7555d987fd586797b457ff43f3b
--- /dev/null
+++ b/pytorch3d/.github/ISSUE_TEMPLATE/questions-help.md
@@ -0,0 +1,21 @@
+---
+name: "❓ Questions"
+about: How do I do X with PyTorch3D? How does PyTorch3D do X?
+
+---
+
+## ❓ Questions on how to use PyTorch3D
+
+<!-- A clear and concise description of the question you need help with. -->
+
+
+NOTE: Please look at the existing list of Issues tagged with the label ['question`](https://github.com/facebookresearch/pytorch3d/issues?q=label%3Aquestion) or ['how-to`](https://github.com/facebookresearch/pytorch3d/issues?q=label%3A%22how+to%22). **Only open a new issue if you cannot find an answer there**.
+
+Also note the following:
+
+1. If you encountered any errors or unexpected issues while using PyTorch3D and need help resolving them,
+   please use the "Bugs / Unexpected behaviors" issue template.
+
+2. We do not answer general machine learning / computer vision questions that are not specific to
+   PyTorch3D, such as how a model works or what algorithm/methods can be
+   used to achieve X.
diff --git a/pytorch3d/.github/bundle_adjust.gif b/pytorch3d/.github/bundle_adjust.gif
new file mode 100644
index 0000000000000000000000000000000000000000..f8a836828aecdc96a857f3f1ed3bdd3af1163bd6
--- /dev/null
+++ b/pytorch3d/.github/bundle_adjust.gif
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4e5e5c6e098cd231271cfb63a81a08c82a5213df90f487b77f79d3a69342a56a
+size 2108614
diff --git a/pytorch3d/.github/camera_position_teapot.gif b/pytorch3d/.github/camera_position_teapot.gif
new file mode 100644
index 0000000000000000000000000000000000000000..51435a9b3b1bde86dc928da3bafa1015dc6c3702
--- /dev/null
+++ b/pytorch3d/.github/camera_position_teapot.gif
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6eda9ca8e86552e57f299ae305742f99e224ad3a0fdb3434e11fb528bd8a89c1
+size 2092047
diff --git a/pytorch3d/.github/cow_deform.gif b/pytorch3d/.github/cow_deform.gif
new file mode 100644
index 0000000000000000000000000000000000000000..c73c44d73532cca65c17dd506f20f6dc599f9a7e
Binary files /dev/null and b/pytorch3d/.github/cow_deform.gif differ
diff --git a/pytorch3d/.github/dolphin_deform.gif b/pytorch3d/.github/dolphin_deform.gif
new file mode 100644
index 0000000000000000000000000000000000000000..c3912ba22aec851318531bcf0708a3dddb6d7107
Binary files /dev/null and b/pytorch3d/.github/dolphin_deform.gif differ
diff --git a/pytorch3d/.github/fit_nerf.gif b/pytorch3d/.github/fit_nerf.gif
new file mode 100644
index 0000000000000000000000000000000000000000..edd5a66ffa044d10f3fa11629b28ab6bb1d70eb6
--- /dev/null
+++ b/pytorch3d/.github/fit_nerf.gif
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:862ea112d530937aab5255deeae2426efe96b3e0774bbaa95604a8421042d92b
+size 4569104
diff --git a/pytorch3d/.github/fit_textured_volume.gif b/pytorch3d/.github/fit_textured_volume.gif
new file mode 100644
index 0000000000000000000000000000000000000000..675de0cad1140134177c44f2fff9f4674b0d8c16
--- /dev/null
+++ b/pytorch3d/.github/fit_textured_volume.gif
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4b929b43e60a9321b9179bbf572d67fb9ea3007bdebe3c9dd97e784047478668
+size 3713321
diff --git a/pytorch3d/.github/implicitron_config.gif b/pytorch3d/.github/implicitron_config.gif
new file mode 100644
index 0000000000000000000000000000000000000000..e6b2fc941d42cbc2adf09d4592760b001f6ce599
--- /dev/null
+++ b/pytorch3d/.github/implicitron_config.gif
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e92aa5ace402dc9dcb0bf0aa7792a9c74e05f30814804b2ee4aa2ea91528e20c
+size 5220742
diff --git a/pytorch3d/.github/nerf_project_logo.gif b/pytorch3d/.github/nerf_project_logo.gif
new file mode 100644
index 0000000000000000000000000000000000000000..4aa2ff6908400f8ce39f8f9f458f8eba6eef9b72
--- /dev/null
+++ b/pytorch3d/.github/nerf_project_logo.gif
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4bd77976d9412a8ca5730555ac4d5979cd21f1bbe8253fcaf66e645a05017cc9
+size 5128345
diff --git a/pytorch3d/.github/render_textured_mesh.gif b/pytorch3d/.github/render_textured_mesh.gif
new file mode 100644
index 0000000000000000000000000000000000000000..164651e081212b9ae40e9ae504f1a5057923ff1b
Binary files /dev/null and b/pytorch3d/.github/render_textured_mesh.gif differ
diff --git a/pytorch3d/.gitignore b/pytorch3d/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..30afb0d15147d3d6fadafd55d509916a0afe2d3a
--- /dev/null
+++ b/pytorch3d/.gitignore
@@ -0,0 +1,21 @@
+build/
+dist/
+*.egg-info/
+**/__pycache__/
+*-checkpoint.ipynb
+**/.ipynb_checkpoints
+**/.ipynb_checkpoints/**
+
+
+# Docusaurus site
+website/yarn.lock
+website/build/
+website/i18n/
+website/node_modules/*
+website/npm-debug.log
+
+## Generated for tutorials
+website/_tutorials/
+website/static/files/
+website/pages/tutorials/*
+!website/pages/tutorials/index.js
diff --git a/pytorch3d/INSTALL.md b/pytorch3d/INSTALL.md
new file mode 100644
index 0000000000000000000000000000000000000000..f1d48f290cd55e632a914691bb4affd798c987f5
--- /dev/null
+++ b/pytorch3d/INSTALL.md
@@ -0,0 +1,156 @@
+# Installation
+
+
+## Requirements
+
+### Core library
+
+The core library is written in PyTorch. Several components have underlying implementation in CUDA for improved performance. A subset of these components have CPU implementations in C++/PyTorch. It is advised to use PyTorch3D with GPU support in order to use all the features.
+
+- Linux or macOS or Windows
+- Python 3.8, 3.9 or 3.10
+- PyTorch 1.12.0, 1.12.1, 1.13.0, 2.0.0, 2.0.1 or 2.1.0.
+- torchvision that matches the PyTorch installation. You can install them together as explained at pytorch.org to make sure of this.
+- gcc & g++ ≥ 4.9
+- [fvcore](https://github.com/facebookresearch/fvcore)
+- [ioPath](https://github.com/facebookresearch/iopath)
+- If CUDA is to be used, use a version which is supported by the corresponding pytorch version and at least version 9.2.
+- If CUDA older than 11.7 is to be used and you are building from source, the CUB library must be available. We recommend version 1.10.0.
+
+The runtime dependencies can be installed by running:
+```
+conda create -n pytorch3d python=3.9
+conda activate pytorch3d
+conda install pytorch=1.13.0 torchvision pytorch-cuda=11.6 -c pytorch -c nvidia
+conda install -c fvcore -c iopath -c conda-forge fvcore iopath
+```
+
+For the CUB build time dependency, which you only need if you have CUDA older than 11.7, if you are using conda, you can continue with
+```
+conda install -c bottler nvidiacub
+```
+Otherwise download the CUB library from https://github.com/NVIDIA/cub/releases and unpack it to a folder of your choice.
+Define the environment variable CUB_HOME before building and point it to the directory that contains `CMakeLists.txt` for CUB.
+For example on Linux/Mac,
+```
+curl -LO https://github.com/NVIDIA/cub/archive/1.10.0.tar.gz
+tar xzf 1.10.0.tar.gz
+export CUB_HOME=$PWD/cub-1.10.0
+```
+
+### Tests/Linting and Demos
+
+For developing on top of PyTorch3D or contributing, you will need to run the linter and tests. If you want to run any of the notebook tutorials as `docs/tutorials` or the examples in `docs/examples` you will also need matplotlib and OpenCV.
+- scikit-image
+- black
+- usort
+- flake8
+- matplotlib
+- tdqm
+- jupyter
+- imageio
+- plotly
+- opencv-python
+
+These can be installed by running:
+```
+# Demos and examples
+conda install jupyter
+pip install scikit-image matplotlib imageio plotly opencv-python
+
+# Tests/Linting
+pip install black usort flake8 flake8-bugbear flake8-comprehensions
+```
+
+## Installing prebuilt binaries for PyTorch3D
+After installing the above dependencies, run one of the following commands:
+
+### 1. Install with CUDA support from Anaconda Cloud, on Linux only
+
+```
+# Anaconda Cloud
+conda install pytorch3d -c pytorch3d
+```
+
+Or, to install a nightly (non-official, alpha) build:
+```
+# Anaconda Cloud
+conda install pytorch3d -c pytorch3d-nightly
+```
+
+### 2. Install wheels for Linux
+We have prebuilt wheels with CUDA for Linux for PyTorch 1.11.0, for each of the supported CUDA versions,
+for Python 3.8 and 3.9. This is for ease of use on Google Colab.
+These are installed in a special way.
+For example, to install for Python 3.8, PyTorch 1.11.0 and CUDA 11.3
+```
+pip install --no-index --no-cache-dir pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/py38_cu113_pyt1110/download.html
+```
+
+In general, from inside IPython, or in Google Colab or a jupyter notebook, you can install with
+```
+import sys
+import torch
+pyt_version_str=torch.__version__.split("+")[0].replace(".", "")
+version_str="".join([
+    f"py3{sys.version_info.minor}_cu",
+    torch.version.cuda.replace(".",""),
+    f"_pyt{pyt_version_str}"
+])
+!pip install fvcore iopath
+!pip install --no-index --no-cache-dir pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/{version_str}/download.html
+```
+
+## Building / installing from source.
+CUDA support will be included if CUDA is available in pytorch or if the environment variable
+`FORCE_CUDA` is set to `1`.
+
+### 1. Install from GitHub
+```
+pip install "git+https://github.com/facebookresearch/pytorch3d.git"
+```
+To install using the code of the released version instead of from the main branch, use the following instead.
+```
+pip install "git+https://github.com/facebookresearch/pytorch3d.git@stable"
+```
+
+For CUDA builds with versions earlier than CUDA 11, set `CUB_HOME` before building as described above.
+
+**Install from Github on macOS:**
+Some environment variables should be provided, like this.
+```
+MACOSX_DEPLOYMENT_TARGET=10.14 CC=clang CXX=clang++ pip install "git+https://github.com/facebookresearch/pytorch3d.git"
+```
+
+### 2. Install from a local clone
+```
+git clone https://github.com/facebookresearch/pytorch3d.git
+cd pytorch3d && pip install -e .
+```
+To rebuild after installing from a local clone run, `rm -rf build/ **/*.so` then `pip install -e .`. You often need to rebuild pytorch3d after reinstalling PyTorch. For CUDA builds with versions earlier than CUDA 11, set `CUB_HOME` before building as described above.
+
+**Install from local clone on macOS:**
+```
+MACOSX_DEPLOYMENT_TARGET=10.14 CC=clang CXX=clang++ pip install -e .
+```
+
+**Install from local clone on Windows:**
+
+Depending on the version of PyTorch, changes to some PyTorch headers may be needed before compilation. These are often discussed in issues in this repository.
+
+After any necessary patching, you can go to "x64 Native Tools Command Prompt for VS 2019" to compile and install
+```
+cd pytorch3d
+python3 setup.py install
+```
+
+After installing, you can run **unit tests**
+```
+python3 -m unittest discover -v -s tests -t .
+```
+
+# FAQ
+
+### Can I use Docker?
+
+We don't provide a docker file but see [#113](https://github.com/facebookresearch/pytorch3d/issues/113) for a docker file shared by a user (NOTE: this has not been tested by the PyTorch3D team).
diff --git a/pytorch3d/LICENSE b/pytorch3d/LICENSE
new file mode 100644
index 0000000000000000000000000000000000000000..c55382ff0992d90ae5ecb2cd9ac624ccd20bda4d
--- /dev/null
+++ b/pytorch3d/LICENSE
@@ -0,0 +1,30 @@
+BSD License
+
+For PyTorch3D software
+
+Copyright (c) Meta Platforms, Inc. and affiliates. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+
+ * Neither the name Meta nor the names of its contributors may be used to
+   endorse or promote products derived from this software without specific
+   prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/pytorch3d/LICENSE-3RD-PARTY b/pytorch3d/LICENSE-3RD-PARTY
new file mode 100644
index 0000000000000000000000000000000000000000..f55b7dce68b9ba07e56d43a1d5e7134aa5fd4b6f
--- /dev/null
+++ b/pytorch3d/LICENSE-3RD-PARTY
@@ -0,0 +1,71 @@
+SRN license ( https://github.com/vsitzmann/scene-representation-networks/ ):
+
+MIT License
+
+Copyright (c) 2019 Vincent Sitzmann
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
+
+IDR license ( github.com/lioryariv/idr ):
+
+MIT License
+
+Copyright (c) 2020 Lior Yariv
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
+
+NeRF https://github.com/bmild/nerf/
+
+Copyright (c) 2020 bmild
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/pytorch3d/README.md b/pytorch3d/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..2ab5b7fae10d90b9461cd9cc207d0779e631a338
--- /dev/null
+++ b/pytorch3d/README.md
@@ -0,0 +1,177 @@
+<img src="https://raw.githubusercontent.com/facebookresearch/pytorch3d/main/.github/pytorch3dlogo.png" width="900"/>
+
+[![CircleCI](https://circleci.com/gh/facebookresearch/pytorch3d.svg?style=svg)](https://circleci.com/gh/facebookresearch/pytorch3d)
+[![Anaconda-Server Badge](https://anaconda.org/pytorch3d/pytorch3d/badges/version.svg)](https://anaconda.org/pytorch3d/pytorch3d)
+
+# Introduction
+
+PyTorch3D provides efficient, reusable components for 3D Computer Vision research with [PyTorch](https://pytorch.org).
+
+Key features include:
+
+- Data structure for storing and manipulating triangle meshes
+- Efficient operations on triangle meshes (projective transformations, graph convolution, sampling, loss functions)
+- A differentiable mesh renderer
+- Implicitron, see [its README](projects/implicitron_trainer), a framework for new-view synthesis via implicit representations. ([blog post](https://ai.facebook.com/blog/implicitron-a-new-modular-extensible-framework-for-neural-implicit-representations-in-pytorch3d/))
+
+PyTorch3D is designed to integrate smoothly with deep learning methods for predicting and manipulating 3D data.
+For this reason, all operators in PyTorch3D:
+
+- Are implemented using PyTorch tensors
+- Can handle minibatches of hetereogenous data
+- Can be differentiated
+- Can utilize GPUs for acceleration
+
+Within FAIR, PyTorch3D has been used to power research projects such as [Mesh R-CNN](https://arxiv.org/abs/1906.02739).
+
+See our [blog post](https://ai.facebook.com/blog/-introducing-pytorch3d-an-open-source-library-for-3d-deep-learning/) to see more demos and learn about PyTorch3D.
+
+## Installation
+
+For detailed instructions refer to [INSTALL.md](INSTALL.md).
+
+## License
+
+PyTorch3D is released under the [BSD License](LICENSE).
+
+## Tutorials
+
+Get started with PyTorch3D by trying one of the tutorial notebooks.
+
+|<img src="https://raw.githubusercontent.com/facebookresearch/pytorch3d/main/.github/dolphin_deform.gif" width="310"/>|<img src="https://raw.githubusercontent.com/facebookresearch/pytorch3d/main/.github/bundle_adjust.gif" width="310"/>|
+|:-----------------------------------------------------------------------------------------------------------:|:--------------------------------------------------:|
+| [Deform a sphere mesh to dolphin](https://github.com/facebookresearch/pytorch3d/blob/main/docs/tutorials/deform_source_mesh_to_target_mesh.ipynb)| [Bundle adjustment](https://github.com/facebookresearch/pytorch3d/blob/main/docs/tutorials/bundle_adjustment.ipynb) |
+
+| <img src="https://raw.githubusercontent.com/facebookresearch/pytorch3d/main/.github/render_textured_mesh.gif" width="310"/> | <img src="https://raw.githubusercontent.com/facebookresearch/pytorch3d/main/.github/camera_position_teapot.gif" width="310" height="310"/>
+|:------------------------------------------------------------:|:--------------------------------------------------:|
+| [Render textured meshes](https://github.com/facebookresearch/pytorch3d/blob/main/docs/tutorials/render_textured_meshes.ipynb)| [Camera position optimization](https://github.com/facebookresearch/pytorch3d/blob/main/docs/tutorials/camera_position_optimization_with_differentiable_rendering.ipynb)|
+
+| <img src="https://raw.githubusercontent.com/facebookresearch/pytorch3d/main/.github/pointcloud_render.png" width="310"/> | <img src="https://raw.githubusercontent.com/facebookresearch/pytorch3d/main/.github/cow_deform.gif" width="310" height="310"/>
+|:------------------------------------------------------------:|:--------------------------------------------------:|
+| [Render textured pointclouds](https://github.com/facebookresearch/pytorch3d/blob/main/docs/tutorials/render_colored_points.ipynb)| [Fit a mesh with texture](https://github.com/facebookresearch/pytorch3d/blob/main/docs/tutorials/fit_textured_mesh.ipynb)|
+
+| <img src="https://raw.githubusercontent.com/facebookresearch/pytorch3d/main/.github/densepose_render.png" width="310"/> | <img src="https://raw.githubusercontent.com/facebookresearch/pytorch3d/main/.github/shapenet_render.png" width="310" height="310"/>
+|:------------------------------------------------------------:|:--------------------------------------------------:|
+| [Render DensePose data](https://github.com/facebookresearch/pytorch3d/blob/main/docs/tutorials/render_densepose.ipynb)| [Load & Render ShapeNet data](https://github.com/facebookresearch/pytorch3d/blob/main/docs/tutorials/dataloaders_ShapeNetCore_R2N2.ipynb)|
+
+| <img src="https://raw.githubusercontent.com/facebookresearch/pytorch3d/main/.github/fit_textured_volume.gif" width="310"/> | <img src="https://raw.githubusercontent.com/facebookresearch/pytorch3d/main/.github/fit_nerf.gif" width="310" height="310"/>
+|:------------------------------------------------------------:|:--------------------------------------------------:|
+| [Fit Textured Volume](https://github.com/facebookresearch/pytorch3d/blob/main/docs/tutorials/fit_textured_volume.ipynb)| [Fit A Simple Neural Radiance Field](https://github.com/facebookresearch/pytorch3d/blob/main/docs/tutorials/fit_simple_neural_radiance_field.ipynb)|
+
+| <img src="https://raw.githubusercontent.com/facebookresearch/pytorch3d/main/.github/fit_textured_volume.gif" width="310"/> | <img src="https://raw.githubusercontent.com/facebookresearch/pytorch3d/main/.github/implicitron_config.gif" width="310" height="310"/>
+|:------------------------------------------------------------:|:--------------------------------------------------:|
+| [Fit Textured Volume in Implicitron](https://github.com/facebookresearch/pytorch3d/blob/main/docs/tutorials/implicitron_volumes.ipynb)| [Implicitron Config System](https://github.com/facebookresearch/pytorch3d/blob/main/docs/tutorials/implicitron_config_system.ipynb)|
+
+
+
+
+
+## Documentation
+
+Learn more about the API by reading the PyTorch3D [documentation](https://pytorch3d.readthedocs.org/).
+
+We also have deep dive notes on several API components:
+
+- [Heterogeneous Batching](https://github.com/facebookresearch/pytorch3d/tree/main/docs/notes/batching.md)
+- [Mesh IO](https://github.com/facebookresearch/pytorch3d/tree/main/docs/notes/meshes_io.md)
+- [Differentiable Rendering](https://github.com/facebookresearch/pytorch3d/tree/main/docs/notes/renderer_getting_started.md)
+
+### Overview Video
+
+We have created a short (~14 min) video tutorial providing an overview of the PyTorch3D codebase including several code examples. Click on the image below to watch the video on YouTube:
+
+<a href="http://www.youtube.com/watch?v=Pph1r-x9nyY"><img src="http://img.youtube.com/vi/Pph1r-x9nyY/0.jpg" height="225" ></a>
+
+## Development
+
+We welcome new contributions to PyTorch3D and we will be actively maintaining this library! Please refer to [CONTRIBUTING.md](./.github/CONTRIBUTING.md) for full instructions on how to run the code, tests and linter, and submit your pull requests.
+
+## Development and Compatibility
+
+- `main` branch: actively developed, without any guarantee, Anything can be broken at any time
+  - REMARK: this includes nightly builds which are built from `main`
+  - HINT: the commit history can help locate regressions or changes
+- backward-compatibility between releases: no guarantee. Best efforts to communicate breaking changes and facilitate migration of code or data (incl. models).
+
+## Contributors
+
+PyTorch3D is written and maintained by the Facebook AI Research Computer Vision Team.
+
+In alphabetical order:
+
+* Amitav Baruah
+* Steve Branson
+* Krzysztof Chalupka
+* Jiali Duan
+* Luya Gao
+* Georgia Gkioxari
+* Taylor Gordon
+* Justin Johnson
+* Patrick Labatut
+* Christoph Lassner
+* Wan-Yen Lo
+* David Novotny
+* Nikhila Ravi
+* Jeremy Reizenstein
+* Dave Schnizlein
+* Roman Shapovalov
+* Olivia Wiles
+
+## Citation
+
+If you find PyTorch3D useful in your research, please cite our tech report:
+
+```bibtex
+@article{ravi2020pytorch3d,
+    author = {Nikhila Ravi and Jeremy Reizenstein and David Novotny and Taylor Gordon
+                  and Wan-Yen Lo and Justin Johnson and Georgia Gkioxari},
+    title = {Accelerating 3D Deep Learning with PyTorch3D},
+    journal = {arXiv:2007.08501},
+    year = {2020},
+}
+```
+
+If you are using the pulsar backend for sphere-rendering (the `PulsarPointRenderer` or `pytorch3d.renderer.points.pulsar.Renderer`), please cite the tech report:
+
+```bibtex
+@article{lassner2020pulsar,
+    author = {Christoph Lassner and Michael Zollh\"ofer},
+    title = {Pulsar: Efficient Sphere-based Neural Rendering},
+    journal = {arXiv:2004.07484},
+    year = {2020},
+}
+```
+
+## News
+
+Please see below for a timeline of the codebase updates in reverse chronological order. We are sharing updates on the releases as well as research projects which are built with PyTorch3D. The changelogs for the releases are available under [`Releases`](https://github.com/facebookresearch/pytorch3d/releases),  and the builds can be installed using `conda` as per the instructions in [INSTALL.md](INSTALL.md).
+
+**[Dec 19th 2022]:**   PyTorch3D [v0.7.2](https://github.com/facebookresearch/pytorch3d/releases/tag/v0.7.2) released.
+
+**[Oct 23rd 2022]:**   PyTorch3D [v0.7.1](https://github.com/facebookresearch/pytorch3d/releases/tag/v0.7.1) released.
+
+**[Aug 10th 2022]:**   PyTorch3D [v0.7.0](https://github.com/facebookresearch/pytorch3d/releases/tag/v0.7.0) released with Implicitron and MeshRasterizerOpenGL.
+
+**[Apr 28th 2022]:**   PyTorch3D [v0.6.2](https://github.com/facebookresearch/pytorch3d/releases/tag/v0.6.2) released
+
+**[Dec 16th 2021]:**   PyTorch3D [v0.6.1](https://github.com/facebookresearch/pytorch3d/releases/tag/v0.6.1) released
+
+**[Oct 6th 2021]:**   PyTorch3D [v0.6.0](https://github.com/facebookresearch/pytorch3d/releases/tag/v0.6.0) released
+
+**[Aug 5th 2021]:**   PyTorch3D [v0.5.0](https://github.com/facebookresearch/pytorch3d/releases/tag/v0.5.0) released
+
+**[Feb 9th 2021]:** PyTorch3D [v0.4.0](https://github.com/facebookresearch/pytorch3d/releases/tag/v0.4.0) released with support for implicit functions, volume rendering and a [reimplementation of NeRF](https://github.com/facebookresearch/pytorch3d/tree/main/projects/nerf).
+
+**[November 2nd 2020]:** PyTorch3D [v0.3.0](https://github.com/facebookresearch/pytorch3d/releases/tag/v0.3.0) released, integrating the pulsar backend.
+
+**[Aug 28th 2020]:**   PyTorch3D [v0.2.5](https://github.com/facebookresearch/pytorch3d/releases/tag/v0.2.5) released
+
+**[July 17th 2020]:**   PyTorch3D tech report published on ArXiv: https://arxiv.org/abs/2007.08501
+
+**[April 24th 2020]:**   PyTorch3D [v0.2.0](https://github.com/facebookresearch/pytorch3d/releases/tag/v0.2.0) released
+
+**[March 25th 2020]:**   [SynSin](https://arxiv.org/abs/1912.08804) codebase released using PyTorch3D: https://github.com/facebookresearch/synsin
+
+**[March 8th 2020]:**   PyTorch3D [v0.1.1](https://github.com/facebookresearch/pytorch3d/releases/tag/v0.1.1) bug fix release
+
+**[Jan 23rd 2020]:**   PyTorch3D [v0.1.0](https://github.com/facebookresearch/pytorch3d/releases/tag/v0.1.0) released. [Mesh R-CNN](https://arxiv.org/abs/1906.02739) codebase released: https://github.com/facebookresearch/meshrcnn
diff --git a/pytorch3d/dev/linter.sh b/pytorch3d/dev/linter.sh
new file mode 100644
index 0000000000000000000000000000000000000000..43c64084d31229ba18560997735637030cdd2a2f
--- /dev/null
+++ b/pytorch3d/dev/linter.sh
@@ -0,0 +1,40 @@
+#!/bin/bash -e
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# Run this script at project root by "./dev/linter.sh" before you commit
+
+DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
+DIR=$(dirname "${DIR}")
+
+if [[ -f "${DIR}/TARGETS" ]]
+then
+  pyfmt "${DIR}"
+else
+# run usort externally only
+  echo "Running usort..."
+  usort  "${DIR}"
+fi
+
+echo "Running black..."
+black "${DIR}"
+
+echo "Running flake..."
+flake8 "${DIR}" || true
+
+echo "Running clang-format ..."
+clangformat=$(command -v clang-format-8 || echo clang-format)
+find "${DIR}" -regex ".*\.\(cpp\|c\|cc\|cu\|cuh\|cxx\|h\|hh\|hpp\|hxx\|tcc\|mm\|m\)" -print0 | xargs -0 "${clangformat}" -i
+
+# Run arc and pyre internally only.
+if [[ -f "${DIR}/TARGETS" ]]
+then
+  (cd "${DIR}"; command -v arc > /dev/null && arc lint) || true
+
+  echo "Running pyre..."
+  echo "To restart/kill pyre server, run 'pyre restart' or 'pyre kill' in fbcode/"
+  ( cd ~/fbsource/fbcode; pyre -l vision/fair/pytorch3d/ )
+fi
diff --git a/pytorch3d/dev/run_tutorials.sh b/pytorch3d/dev/run_tutorials.sh
new file mode 100644
index 0000000000000000000000000000000000000000..304e471b252cd707ba77e87cfab4fbc0afb1ac56
--- /dev/null
+++ b/pytorch3d/dev/run_tutorials.sh
@@ -0,0 +1,56 @@
+#!/usr/bin/bash
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# This script is for running some of the tutorials using the nightly build in
+# an isolated environment. It is designed to be run in docker.
+
+# If you run this script in this directory with
+#   sudo docker run --runtime=nvidia -it --rm -v $PWD/../docs/tutorials:/notebooks -v $PWD:/loc pytorch/conda-cuda bash /loc/run_tutorials.sh | tee log.txt
+# it should execute some tutorials with the nightly build and resave them, and
+# save a log in the current directory.
+
+# We use nbconvert. runipy would be an alternative but it currently doesn't
+# work well with plotly.
+
+set -e
+
+conda init bash
+# shellcheck source=/dev/null
+source ~/.bashrc
+conda create -y -n myenv python=3.8 matplotlib ipython ipywidgets nbconvert
+conda activate myenv
+conda install -y -c fvcore -c iopath -c conda-forge fvcore iopath
+conda install -y -c pytorch pytorch=1.6.0 cudatoolkit=10.1 torchvision
+conda install -y -c pytorch3d-nightly pytorch3d
+pip install plotly scikit-image
+
+for notebook in /notebooks/*.ipynb
+do
+    name=$(basename "$notebook")
+
+    if [[ "$name" == "dataloaders_ShapeNetCore_R2N2.ipynb" ]]
+    then
+        #skip as data not easily available
+        continue
+    fi
+    if [[ "$name" == "render_densepose.ipynb" ]]
+    then
+        #skip as data not easily available
+        continue
+    fi
+
+    #comment the lines which install torch, torchvision and pytorch3d
+    sed -Ei '/(torchvision)|(pytorch3d)/ s/!pip/!#pip/' "$notebook"
+    #Don't let tqdm use widgets
+    sed -i 's/from tqdm.notebook import tqdm/from tqdm import tqdm/' "$notebook"
+
+    echo
+    echo "###   ###   ###"
+    echo "starting $name"
+    time jupyter nbconvert --to notebook --inplace --ExecutePreprocessor.kernel_name=python3 --execute "$notebook" || true
+    echo "ending $name"
+done
diff --git a/pytorch3d/dev/test_list.py b/pytorch3d/dev/test_list.py
new file mode 100644
index 0000000000000000000000000000000000000000..f00facd8ddae2414c431cbae9ba5f9a810a0e15a
--- /dev/null
+++ b/pytorch3d/dev/test_list.py
@@ -0,0 +1,65 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import ast
+from pathlib import Path
+from typing import List
+
+
+"""
+This module outputs a list of tests for completion.
+It has no dependencies.
+"""
+
+
+def get_test_files() -> List[Path]:
+    root = Path(__file__).parent.parent
+    dirs = ["tests", "projects/implicitron_trainer"]
+    return [i for dir in dirs for i in (root / dir).glob("**/test*.py")]
+
+
+def tests_from_file(path: Path, base: str) -> List[str]:
+    """
+    Returns all the tests in the given file, in format
+    expected as arguments when running the tests.
+    e.g.
+        file_stem
+        file_stem.TestFunctionality
+        file_stem.TestFunctionality.test_f
+        file_stem.TestFunctionality.test_g
+    """
+    with open(path) as f:
+        node = ast.parse(f.read())
+    out = [base]
+    for cls in node.body:
+        if not isinstance(cls, ast.ClassDef):
+            continue
+        if not cls.name.startswith("Test"):
+            continue
+        class_base = base + "." + cls.name
+        out.append(class_base)
+        for method in cls.body:
+            if not isinstance(method, ast.FunctionDef):
+                continue
+            if not method.name.startswith("test"):
+                continue
+            out.append(class_base + "." + method.name)
+    return out
+
+
+def main() -> None:
+    files = get_test_files()
+    test_root = Path(__file__).parent.parent
+    all_tests = []
+    for f in files:
+        file_base = str(f.relative_to(test_root))[:-3].replace("/", ".")
+        all_tests.extend(tests_from_file(f, file_base))
+    for test in sorted(all_tests):
+        print(test)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/pytorch3d/docs/.gitignore b/pytorch3d/docs/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..edcd0a80383dbc7d866c26633dcd0a80f5423fbd
--- /dev/null
+++ b/pytorch3d/docs/.gitignore
@@ -0,0 +1,7 @@
+source
+_build
+_static
+_template
+*-checkpoint.ipynb
+.ipynb_checkpoints
+.ipynb_checkpoints/**
diff --git a/pytorch3d/docs/.readthedocs.yaml b/pytorch3d/docs/.readthedocs.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e446f0af546d804e8e6742c064d2eb4023de2691
--- /dev/null
+++ b/pytorch3d/docs/.readthedocs.yaml
@@ -0,0 +1,27 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# Read the Docs configuration file
+# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
+
+# Required
+version: 2
+
+# Set the version of Python and other tools you might need
+build:
+  os: ubuntu-22.04
+  tools:
+    python: "3.11"
+
+# Build documentation in the docs/ directory with Sphinx
+sphinx:
+  configuration: docs/conf.py
+
+# We recommend specifying your dependencies to enable reproducible builds:
+# https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html
+python:
+  install:
+  - requirements: docs/requirements.txt
diff --git a/pytorch3d/docs/Makefile b/pytorch3d/docs/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..3d3d13d52320668c607e18611ff8bfe6fe50f411
--- /dev/null
+++ b/pytorch3d/docs/Makefile
@@ -0,0 +1,25 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# Minimal makefile for Sphinx documentation
+
+# You can set these variables from the command line, and also
+# from the environment for the first two.
+SPHINXOPTS    =
+SPHINXBUILD   = sphinx-build
+SOURCEDIR     = .
+BUILDDIR      = _build
+
+# Put it first so that "make" without argument is like "make help".
+help:
+	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+.PHONY: help Makefile
+
+# Catch-all target: route all unknown targets to Sphinx using the new
+# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
+%: Makefile
+	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
diff --git a/pytorch3d/docs/README.md b/pytorch3d/docs/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..62649528962e563a3b0beea96089fcb8d6411bce
--- /dev/null
+++ b/pytorch3d/docs/README.md
@@ -0,0 +1,77 @@
+## Setup
+
+### Install dependencies
+
+```
+pip install -U recommonmark sphinx sphinx_rtd_theme sphinx_markdown_tables
+```
+
+### Add symlink to the root README.md
+
+We want to include the root readme as an overview. Before generating the docs create a symlink to the root readme.
+
+```
+cd docs
+ln -s ../README.md  overview.md
+```
+
+In `conf.py` for deployment this is done using `subprocess.call`.
+
+### Add a new file
+
+Add a new `.md` or `.rst` file and add the name to the doc tree in `index.rst` e.g
+
+```
+.. toctree::
+   :maxdepth: 1
+   :caption: Intro Documentation
+
+   overview
+```
+
+To autogenerate docs from docstrings in the source code, add the import path for the function e.g.
+
+```
+Chamfer Loss
+--------------------
+
+.. autoclass:: loss.chamfer.chamfer_distance
+    :members:
+    :undoc-members:
+
+    .. automethod:: __init__
+
+````
+
+### Build
+
+From `pytorch3d/docs` run:
+
+```
+> make html
+```
+
+The website is generated in `_build/html`.
+
+### Common Issues
+
+Sphinx can be fussy, and sometimes about things you weren’t expecting. For example, you might encounter something like:
+
+WARNING: toctree contains reference to nonexisting document u'overview'
+...
+checking consistency...
+<pytorch3d>/docs/overview.rst::
+WARNING: document isn't included in any toctree
+
+You might have indented overview in the .. toctree:: in index.rst with four spaces, when Sphinx is expecting three.
+
+
+### View
+
+Start a python simple server:
+
+```
+> python -m http.server
+```
+
+Navigate to: `http://0.0.0.0:8000/`
diff --git a/pytorch3d/docs/conf.py b/pytorch3d/docs/conf.py
new file mode 100644
index 0000000000000000000000000000000000000000..28f5760cd7f6eb1dc7ddb185909c956912bcd11c
--- /dev/null
+++ b/pytorch3d/docs/conf.py
@@ -0,0 +1,198 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# flake8: noqa
+
+# Configuration file for the Sphinx documentation builder.
+#
+# This file only contains a selection of the most common options. For a full
+# list see the documentation:
+# https://www.sphinx-doc.org/en/master/usage/configuration.html
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+#
+import os
+import sys
+
+import unittest.mock as mock
+
+from recommonmark.parser import CommonMarkParser
+from recommonmark.states import DummyStateMachine
+from sphinx.builders.html import StandaloneHTMLBuilder
+from sphinx.ext.autodoc import between
+
+
+# Monkey patch to fix recommonmark 0.4 doc reference issues.
+orig_run_role = DummyStateMachine.run_role
+
+
+def run_role(self, name, options=None, content=None):
+    if name == "doc":
+        name = "any"
+    return orig_run_role(self, name, options, content)
+
+
+DummyStateMachine.run_role = run_role
+
+
+StandaloneHTMLBuilder.supported_image_types = [
+    "image/svg+xml",
+    "image/gif",
+    "image/png",
+    "image/jpeg",
+]
+
+# -- Path setup --------------------------------------------------------------
+
+
+sys.path.insert(0, os.path.abspath("../"))
+sys.path.insert(0, os.path.abspath("../pytorch3d"))
+sys.path.insert(0, os.path.abspath("../../"))
+
+DEPLOY = os.environ.get("READTHEDOCS") == "True"
+needs_sphinx = "1.7"
+
+
+try:
+    import torch  # noqa
+except ImportError:
+    for m in [
+        "torch",
+        "torchvision",
+        "torch.nn",
+        "torch.autograd",
+        "torch.autograd.function",
+        "torch.nn.modules",
+        "torch.nn.modules.utils",
+        "torch.utils",
+        "torch.utils.data",
+        "torchvision",
+        "torchvision.ops",
+    ]:
+        sys.modules[m] = mock.Mock(name=m)
+
+for m in ["cv2", "scipy", "numpy", "pytorch3d._C", "np.eye", "np.zeros"]:
+    sys.modules[m] = mock.Mock(name=m)
+
+# -- Project information -----------------------------------------------------
+
+project = "PyTorch3D"
+copyright = "Meta Platforms, Inc"
+author = "facebookresearch"
+
+# The short X.Y version
+version = ""
+
+# The full version, including alpha/beta/rc tags
+release = version
+
+# -- General configuration ---------------------------------------------------
+
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+
+extensions = [
+    "sphinx_markdown_tables",
+    "sphinx.ext.autodoc",
+    "sphinx.ext.mathjax",
+    "sphinx.ext.napoleon",
+    "sphinx.ext.intersphinx",
+    "sphinx.ext.todo",
+    "sphinx.ext.coverage",
+    "sphinx.ext.viewcode",
+    "sphinx.ext.githubpages",
+]
+
+# -- Configurations for plugins ------------
+napoleon_google_docstring = True
+napoleon_include_init_with_doc = True
+napoleon_include_special_with_doc = True
+napoleon_numpy_docstring = False
+# napoleon_use_param = False
+napoleon_use_rtype = False
+autodoc_inherit_docstrings = False
+autodoc_member_order = "bysource"
+
+source_parsers = {".md": CommonMarkParser}
+
+
+# The suffix(es) of source filenames.
+# You can specify multiple suffix as a list of string:
+#
+source_suffix = [".rst", ".md"]
+
+# The master toctree document.
+master_doc = "index"
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ["_templates"]
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+# This pattern also affects html_static_path and html_extra_path.
+exclude_patterns = ["_build", "Thumbs.db", ".DS_Store", "build", "README.md"]
+
+# The name of the Pygments (syntax highlighting) style to use.
+pygments_style = "sphinx"
+
+# -- Options for HTML output -------------------------------------------------
+
+# The theme to use for HTML and HTML Help pages.  See the documentation for
+# a list of builtin themes.
+#
+html_theme = "sphinx_rtd_theme"
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ["_static"]
+
+html_theme_options = {"collapse_navigation": True}
+
+
+def url_resolver(url):
+    if ".html" not in url:
+        url = url.replace("../", "")
+        return "https://github.com/facebookresearch/pytorch3d/blob/main/" + url
+    else:
+        if DEPLOY:
+            return "http://pytorch3d.readthedocs.io/" + url
+        else:
+            return "/" + url
+
+
+def setup(app):
+    # Add symlink to root README
+    if DEPLOY:
+        import subprocess
+
+        subprocess.call(["ln", "-s", "../README.md", "overview.md"])
+
+    from recommonmark.transform import AutoStructify
+
+    app.add_config_value(
+        "recommonmark_config",
+        {
+            "url_resolver": url_resolver,
+            "auto_toc_tree_section": "Contents",
+            "enable_math": True,
+            "enable_inline_math": True,
+            "enable_eval_rst": True,
+            "enable_auto_toc_tree": True,
+        },
+        True,
+    )
+
+    # Register a sphinx.ext.autodoc.between listener to ignore everything
+    # between lines that contain the word IGNORE
+    app.connect("autodoc-process-docstring", between("^.*IGNORE.*$", exclude=True))
+    app.add_transform(AutoStructify)
+
+    return app
diff --git a/pytorch3d/docs/examples/pulsar_basic.py b/pytorch3d/docs/examples/pulsar_basic.py
new file mode 100644
index 0000000000000000000000000000000000000000..be547e340ec4f7fe1b9348d022d36673fe575bd8
--- /dev/null
+++ b/pytorch3d/docs/examples/pulsar_basic.py
@@ -0,0 +1,76 @@
+#!/usr/bin/env python3
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""
+This example demonstrates the most trivial, direct interface of the pulsar
+sphere renderer. It renders and saves an image with 10 random spheres.
+Output: basic.png.
+"""
+import logging
+import math
+from os import path
+
+import imageio
+import torch
+from pytorch3d.renderer.points.pulsar import Renderer
+
+
+LOGGER = logging.getLogger(__name__)
+
+
+def cli():
+    """
+    Basic example for the pulsar sphere renderer.
+
+    Writes to `basic.png`.
+    """
+    LOGGER.info("Rendering on GPU...")
+    torch.manual_seed(1)
+    n_points = 10
+    width = 1_000
+    height = 1_000
+    device = torch.device("cuda")
+    # The PyTorch3D system is right handed; in pulsar you can choose the handedness.
+    # For easy reproducibility we use a right handed coordinate system here.
+    renderer = Renderer(width, height, n_points, right_handed_system=True).to(device)
+    # Generate sample data.
+    vert_pos = torch.rand(n_points, 3, dtype=torch.float32, device=device) * 10.0
+    vert_pos[:, 2] += 25.0
+    vert_pos[:, :2] -= 5.0
+    vert_col = torch.rand(n_points, 3, dtype=torch.float32, device=device)
+    vert_rad = torch.rand(n_points, dtype=torch.float32, device=device)
+    cam_params = torch.tensor(
+        [
+            0.0,
+            0.0,
+            0.0,  # Position 0, 0, 0 (x, y, z).
+            0.0,
+            math.pi,  # Because of the right handed system, the camera must look 'back'.
+            0.0,  # Rotation 0, 0, 0 (in axis-angle format).
+            5.0,  # Focal length in world size.
+            2.0,  # Sensor size in world size.
+        ],
+        dtype=torch.float32,
+        device=device,
+    )
+    # Render.
+    image = renderer(
+        vert_pos,
+        vert_col,
+        vert_rad,
+        cam_params,
+        1.0e-1,  # Renderer blending parameter gamma, in [1., 1e-5].
+        45.0,  # Maximum depth.
+    )
+    LOGGER.info("Writing image to `%s`.", path.abspath("basic.png"))
+    imageio.imsave("basic.png", (image.cpu().detach() * 255.0).to(torch.uint8).numpy())
+    LOGGER.info("Done.")
+
+
+if __name__ == "__main__":
+    logging.basicConfig(level=logging.INFO)
+    cli()
diff --git a/pytorch3d/docs/examples/pulsar_basic_unified.py b/pytorch3d/docs/examples/pulsar_basic_unified.py
new file mode 100644
index 0000000000000000000000000000000000000000..ae93f49d928adb46eda2857f3cf919a4f364882e
--- /dev/null
+++ b/pytorch3d/docs/examples/pulsar_basic_unified.py
@@ -0,0 +1,89 @@
+#!/usr/bin/env python3
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""
+This example demonstrates the most trivial use of the pulsar PyTorch3D
+interface for sphere renderering. It renders and saves an image with
+10 random spheres.
+Output: basic-pt3d.png.
+"""
+import logging
+from os import path
+
+import imageio
+import torch
+from pytorch3d.renderer import (
+    PerspectiveCameras,
+    PointsRasterizationSettings,
+    PointsRasterizer,
+    PulsarPointsRenderer,
+)
+from pytorch3d.structures import Pointclouds
+
+
+LOGGER = logging.getLogger(__name__)
+
+
+def cli():
+    """
+    Basic example for the pulsar sphere renderer using the PyTorch3D interface.
+
+    Writes to `basic-pt3d.png`.
+    """
+    LOGGER.info("Rendering on GPU...")
+    torch.manual_seed(1)
+    n_points = 10
+    width = 1_000
+    height = 1_000
+    device = torch.device("cuda")
+    # Generate sample data.
+    vert_pos = torch.rand(n_points, 3, dtype=torch.float32, device=device) * 10.0
+    vert_pos[:, 2] += 25.0
+    vert_pos[:, :2] -= 5.0
+    vert_col = torch.rand(n_points, 3, dtype=torch.float32, device=device)
+    pcl = Pointclouds(points=vert_pos[None, ...], features=vert_col[None, ...])
+    # Alternatively, you can also use the look_at_view_transform to get R and T:
+    # R, T = look_at_view_transform(
+    #     dist=30.0, elev=0.0, azim=180.0, at=((0.0, 0.0, 30.0),), up=((0, 1, 0),),
+    # )
+    cameras = PerspectiveCameras(
+        # The focal length must be double the size for PyTorch3D because of the NDC
+        # coordinates spanning a range of two - and they must be normalized by the
+        # sensor width (see the pulsar example). This means we need here
+        # 5.0 * 2.0 / 2.0 to get the equivalent results as in pulsar.
+        focal_length=(5.0 * 2.0 / 2.0,),
+        R=torch.eye(3, dtype=torch.float32, device=device)[None, ...],
+        T=torch.zeros((1, 3), dtype=torch.float32, device=device),
+        image_size=((height, width),),
+        device=device,
+    )
+    vert_rad = torch.rand(n_points, dtype=torch.float32, device=device)
+    raster_settings = PointsRasterizationSettings(
+        image_size=(height, width),
+        radius=vert_rad,
+    )
+    rasterizer = PointsRasterizer(cameras=cameras, raster_settings=raster_settings)
+    renderer = PulsarPointsRenderer(rasterizer=rasterizer).to(device)
+    # Render.
+    image = renderer(
+        pcl,
+        gamma=(1.0e-1,),  # Renderer blending parameter gamma, in [1., 1e-5].
+        znear=(1.0,),
+        zfar=(45.0,),
+        radius_world=True,
+        bg_col=torch.ones((3,), dtype=torch.float32, device=device),
+    )[0]
+    LOGGER.info("Writing image to `%s`.", path.abspath("basic-pt3d.png"))
+    imageio.imsave(
+        "basic-pt3d.png", (image.cpu().detach() * 255.0).to(torch.uint8).numpy()
+    )
+    LOGGER.info("Done.")
+
+
+if __name__ == "__main__":
+    logging.basicConfig(level=logging.INFO)
+    cli()
diff --git a/pytorch3d/docs/examples/pulsar_cam.py b/pytorch3d/docs/examples/pulsar_cam.py
new file mode 100644
index 0000000000000000000000000000000000000000..cd1dfabc60a414d27a6743e77b1f984002edc46a
--- /dev/null
+++ b/pytorch3d/docs/examples/pulsar_cam.py
@@ -0,0 +1,180 @@
+#!/usr/bin/env python3
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""
+This example demonstrates camera parameter optimization with the plain
+pulsar interface. For this, a reference image has been pre-generated
+(you can find it at `../../tests/pulsar/reference/examples_TestRenderer_test_cam.png`).
+The same scene parameterization is loaded and the camera parameters
+distorted. Gradient-based optimization is used to converge towards the
+original camera parameters.
+Output: cam.gif.
+"""
+import logging
+import math
+from os import path
+
+import cv2
+import imageio
+import numpy as np
+import torch
+from pytorch3d.renderer.points.pulsar import Renderer
+from pytorch3d.transforms import axis_angle_to_matrix, matrix_to_rotation_6d
+from torch import nn, optim
+
+
+LOGGER = logging.getLogger(__name__)
+N_POINTS = 20
+WIDTH = 1_000
+HEIGHT = 1_000
+DEVICE = torch.device("cuda")
+
+
+class SceneModel(nn.Module):
+    """
+    A simple scene model to demonstrate use of pulsar in PyTorch modules.
+
+    The scene model is parameterized with sphere locations (vert_pos),
+    channel content (vert_col), radiuses (vert_rad), camera position (cam_pos),
+    camera rotation (cam_rot) and sensor focal length and width (cam_sensor).
+
+    The forward method of the model renders this scene description. Any
+    of these parameters could instead be passed as inputs to the forward
+    method and come from a different model.
+    """
+
+    def __init__(self):
+        super(SceneModel, self).__init__()
+        self.gamma = 0.1
+        # Points.
+        torch.manual_seed(1)
+        vert_pos = torch.rand(N_POINTS, 3, dtype=torch.float32) * 10.0
+        vert_pos[:, 2] += 25.0
+        vert_pos[:, :2] -= 5.0
+        self.register_parameter("vert_pos", nn.Parameter(vert_pos, requires_grad=False))
+        self.register_parameter(
+            "vert_col",
+            nn.Parameter(
+                torch.rand(N_POINTS, 3, dtype=torch.float32), requires_grad=False
+            ),
+        )
+        self.register_parameter(
+            "vert_rad",
+            nn.Parameter(
+                torch.rand(N_POINTS, dtype=torch.float32), requires_grad=False
+            ),
+        )
+        self.register_parameter(
+            "cam_pos",
+            nn.Parameter(
+                torch.tensor([0.1, 0.1, 0.0], dtype=torch.float32), requires_grad=True
+            ),
+        )
+        self.register_parameter(
+            "cam_rot",
+            # We're using the 6D rot. representation for better gradients.
+            nn.Parameter(
+                matrix_to_rotation_6d(
+                    axis_angle_to_matrix(
+                        torch.tensor(
+                            [
+                                [0.02, math.pi + 0.02, 0.01],
+                            ],
+                            dtype=torch.float32,
+                        )
+                    )
+                )[0],
+                requires_grad=True,
+            ),
+        )
+        self.register_parameter(
+            "cam_sensor",
+            nn.Parameter(
+                torch.tensor([4.8, 1.8], dtype=torch.float32), requires_grad=True
+            ),
+        )
+        self.renderer = Renderer(WIDTH, HEIGHT, N_POINTS, right_handed_system=True)
+
+    def forward(self):
+        return self.renderer.forward(
+            self.vert_pos,
+            self.vert_col,
+            self.vert_rad,
+            torch.cat([self.cam_pos, self.cam_rot, self.cam_sensor]),
+            self.gamma,
+            45.0,
+        )
+
+
+def cli():
+    """
+    Camera optimization example using pulsar.
+
+    Writes to `cam.gif`.
+    """
+    LOGGER.info("Loading reference...")
+    # Load reference.
+    ref = (
+        torch.from_numpy(
+            imageio.imread(
+                "../../tests/pulsar/reference/examples_TestRenderer_test_cam.png"
+            )[:, ::-1, :].copy()
+        ).to(torch.float32)
+        / 255.0
+    ).to(DEVICE)
+    # Set up model.
+    model = SceneModel().to(DEVICE)
+    # Optimizer.
+    optimizer = optim.SGD(
+        [
+            {"params": [model.cam_pos], "lr": 1e-4},  # 1e-3
+            {"params": [model.cam_rot], "lr": 5e-6},
+            {"params": [model.cam_sensor], "lr": 1e-4},
+        ]
+    )
+
+    LOGGER.info("Writing video to `%s`.", path.abspath("cam.gif"))
+    writer = imageio.get_writer("cam.gif", format="gif", fps=25)
+
+    # Optimize.
+    for i in range(300):
+        optimizer.zero_grad()
+        result = model()
+        # Visualize.
+        result_im = (result.cpu().detach().numpy() * 255).astype(np.uint8)
+        cv2.imshow("opt", result_im[:, :, ::-1])
+        writer.append_data(result_im)
+        overlay_img = np.ascontiguousarray(
+            ((result * 0.5 + ref * 0.5).cpu().detach().numpy() * 255).astype(np.uint8)[
+                :, :, ::-1
+            ]
+        )
+        overlay_img = cv2.putText(
+            overlay_img,
+            "Step %d" % (i),
+            (10, 40),
+            cv2.FONT_HERSHEY_SIMPLEX,
+            1,
+            (0, 0, 0),
+            2,
+            cv2.LINE_AA,
+            False,
+        )
+        cv2.imshow("overlay", overlay_img)
+        cv2.waitKey(1)
+        # Update.
+        loss = ((result - ref) ** 2).sum()
+        LOGGER.info("loss %d: %f", i, loss.item())
+        loss.backward()
+        optimizer.step()
+    writer.close()
+    LOGGER.info("Done.")
+
+
+if __name__ == "__main__":
+    logging.basicConfig(level=logging.INFO)
+    cli()
diff --git a/pytorch3d/docs/examples/pulsar_cam_unified.py b/pytorch3d/docs/examples/pulsar_cam_unified.py
new file mode 100644
index 0000000000000000000000000000000000000000..8ef3a8df408a8750acc4fc415174f1456d25836b
--- /dev/null
+++ b/pytorch3d/docs/examples/pulsar_cam_unified.py
@@ -0,0 +1,230 @@
+#!/usr/bin/env python3
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""
+This example demonstrates camera parameter optimization with the pulsar
+PyTorch3D interface. For this, a reference image has been pre-generated
+(you can find it at `../../tests/pulsar/reference/examples_TestRenderer_test_cam.png`).
+The same scene parameterization is loaded and the camera parameters
+distorted. Gradient-based optimization is used to converge towards the
+original camera parameters.
+Output: cam-pt3d.gif
+"""
+import logging
+from os import path
+
+import cv2
+import imageio
+import numpy as np
+import torch
+from pytorch3d.renderer.cameras import PerspectiveCameras
+from pytorch3d.renderer.points import (
+    PointsRasterizationSettings,
+    PointsRasterizer,
+    PulsarPointsRenderer,
+)
+from pytorch3d.structures.pointclouds import Pointclouds
+from pytorch3d.transforms import axis_angle_to_matrix
+from torch import nn, optim
+
+
+LOGGER = logging.getLogger(__name__)
+N_POINTS = 20
+WIDTH = 1_000
+HEIGHT = 1_000
+DEVICE = torch.device("cuda")
+
+
+class SceneModel(nn.Module):
+    """
+    A simple scene model to demonstrate use of pulsar in PyTorch modules.
+
+    The scene model is parameterized with sphere locations (vert_pos),
+    channel content (vert_col), radiuses (vert_rad), camera position (cam_pos),
+    camera rotation (cam_rot) and sensor focal length and width (cam_sensor).
+
+    The forward method of the model renders this scene description. Any
+    of these parameters could instead be passed as inputs to the forward
+    method and come from a different model.
+    """
+
+    def __init__(self):
+        super(SceneModel, self).__init__()
+        self.gamma = 0.1
+        # Points.
+        torch.manual_seed(1)
+        vert_pos = torch.rand(N_POINTS, 3, dtype=torch.float32) * 10.0
+        vert_pos[:, 2] += 25.0
+        vert_pos[:, :2] -= 5.0
+        self.register_parameter("vert_pos", nn.Parameter(vert_pos, requires_grad=False))
+        self.register_parameter(
+            "vert_col",
+            nn.Parameter(
+                torch.rand(N_POINTS, 3, dtype=torch.float32),
+                requires_grad=False,
+            ),
+        )
+        self.register_parameter(
+            "vert_rad",
+            nn.Parameter(
+                torch.rand(N_POINTS, dtype=torch.float32),
+                requires_grad=False,
+            ),
+        )
+        self.register_parameter(
+            "cam_pos",
+            nn.Parameter(
+                torch.tensor([0.1, 0.1, 0.0], dtype=torch.float32),
+                requires_grad=True,
+            ),
+        )
+        self.register_parameter(
+            "cam_rot",
+            # We're using the 6D rot. representation for better gradients.
+            nn.Parameter(
+                axis_angle_to_matrix(
+                    torch.tensor(
+                        [
+                            [0.02, 0.02, 0.01],
+                        ],
+                        dtype=torch.float32,
+                    )
+                )[0],
+                requires_grad=True,
+            ),
+        )
+        self.register_parameter(
+            "focal_length",
+            nn.Parameter(
+                torch.tensor(
+                    [
+                        4.8 * 2.0 / 2.0,
+                    ],
+                    dtype=torch.float32,
+                ),
+                requires_grad=True,
+            ),
+        )
+        self.cameras = PerspectiveCameras(
+            # The focal length must be double the size for PyTorch3D because of the NDC
+            # coordinates spanning a range of two - and they must be normalized by the
+            # sensor width (see the pulsar example). This means we need here
+            # 5.0 * 2.0 / 2.0 to get the equivalent results as in pulsar.
+            #
+            # R, T and f are provided here, but will be provided again
+            # at every call to the forward method. The reason are problems
+            # with PyTorch which makes device placement for gradients problematic
+            # for tensors which are themselves on a 'gradient path' but not
+            # leafs in the calculation tree. This will be addressed by an architectural
+            # change in PyTorch3D in the future. Until then, this workaround is
+            # recommended.
+            focal_length=self.focal_length,
+            R=self.cam_rot[None, ...],
+            T=self.cam_pos[None, ...],
+            image_size=((HEIGHT, WIDTH),),
+            device=DEVICE,
+        )
+        raster_settings = PointsRasterizationSettings(
+            image_size=(HEIGHT, WIDTH),
+            radius=self.vert_rad,
+        )
+        rasterizer = PointsRasterizer(
+            cameras=self.cameras, raster_settings=raster_settings
+        )
+        self.renderer = PulsarPointsRenderer(rasterizer=rasterizer)
+
+    def forward(self):
+        # The Pointclouds object creates copies of it's arguments - that's why
+        # we have to create a new object in every forward step.
+        pcl = Pointclouds(
+            points=self.vert_pos[None, ...], features=self.vert_col[None, ...]
+        )
+        return self.renderer(
+            pcl,
+            gamma=(self.gamma,),
+            zfar=(45.0,),
+            znear=(1.0,),
+            radius_world=True,
+            bg_col=torch.ones((3,), dtype=torch.float32, device=DEVICE),
+            # As mentioned above: workaround for device placement of gradients for
+            # camera parameters.
+            focal_length=self.focal_length,
+            R=self.cam_rot[None, ...],
+            T=self.cam_pos[None, ...],
+        )[0]
+
+
+def cli():
+    """
+    Camera optimization example using pulsar.
+
+    Writes to `cam.gif`.
+    """
+    LOGGER.info("Loading reference...")
+    # Load reference.
+    ref = (
+        torch.from_numpy(
+            imageio.imread(
+                "../../tests/pulsar/reference/examples_TestRenderer_test_cam.png"
+            )[:, ::-1, :].copy()
+        ).to(torch.float32)
+        / 255.0
+    ).to(DEVICE)
+    # Set up model.
+    model = SceneModel().to(DEVICE)
+    # Optimizer.
+    optimizer = optim.SGD(
+        [
+            {"params": [model.cam_pos], "lr": 1e-4},
+            {"params": [model.cam_rot], "lr": 5e-6},
+            # Using a higher lr for the focal length here, because
+            # the sensor width can not be optimized directly.
+            {"params": [model.focal_length], "lr": 1e-3},
+        ]
+    )
+
+    LOGGER.info("Writing video to `%s`.", path.abspath("cam-pt3d.gif"))
+    writer = imageio.get_writer("cam-pt3d.gif", format="gif", fps=25)
+
+    # Optimize.
+    for i in range(300):
+        optimizer.zero_grad()
+        result = model()
+        # Visualize.
+        result_im = (result.cpu().detach().numpy() * 255).astype(np.uint8)
+        cv2.imshow("opt", result_im[:, :, ::-1])
+        writer.append_data(result_im)
+        overlay_img = np.ascontiguousarray(
+            ((result * 0.5 + ref * 0.5).cpu().detach().numpy() * 255).astype(np.uint8)[
+                :, :, ::-1
+            ]
+        )
+        overlay_img = cv2.putText(
+            overlay_img,
+            "Step %d" % (i),
+            (10, 40),
+            cv2.FONT_HERSHEY_SIMPLEX,
+            1,
+            (0, 0, 0),
+            2,
+            cv2.LINE_AA,
+            False,
+        )
+        cv2.imshow("overlay", overlay_img)
+        cv2.waitKey(1)
+        # Update.
+        loss = ((result - ref) ** 2).sum()
+        LOGGER.info("loss %d: %f", i, loss.item())
+        loss.backward()
+        optimizer.step()
+    writer.close()
+    LOGGER.info("Done.")
+
+
+if __name__ == "__main__":
+    logging.basicConfig(level=logging.INFO)
+    cli()
diff --git a/pytorch3d/docs/examples/pulsar_multiview.py b/pytorch3d/docs/examples/pulsar_multiview.py
new file mode 100644
index 0000000000000000000000000000000000000000..a6a05b4a6ed895f1a2f465d073559b3dad73f6dd
--- /dev/null
+++ b/pytorch3d/docs/examples/pulsar_multiview.py
@@ -0,0 +1,230 @@
+#!/usr/bin/env python3
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""
+This example demonstrates multiview 3D reconstruction using the plain
+pulsar interface. For this, reference images have been pre-generated
+(you can find them at
+`../../tests/pulsar/reference/examples_TestRenderer_test_multiview_%d.png`).
+The camera parameters are assumed given. The scene is initialized with
+random spheres. Gradient-based optimization is used to optimize sphere
+parameters and prune spheres to converge to a 3D representation.
+
+This example is not available yet through the 'unified' interface,
+because opacity support has not landed in PyTorch3D for general data
+structures yet.
+"""
+import logging
+import math
+from os import path
+
+import cv2
+import imageio
+import numpy as np
+import torch
+from pytorch3d.renderer.points.pulsar import Renderer
+from torch import nn, optim
+
+
+LOGGER = logging.getLogger(__name__)
+N_POINTS = 400_000
+WIDTH = 1_000
+HEIGHT = 1_000
+VISUALIZE_IDS = [0, 1]
+DEVICE = torch.device("cuda")
+
+
+class SceneModel(nn.Module):
+    """
+    A simple scene model to demonstrate use of pulsar in PyTorch modules.
+
+    The scene model is parameterized with sphere locations (vert_pos),
+    channel content (vert_col), radiuses (vert_rad), camera position (cam_pos),
+    camera rotation (cam_rot) and sensor focal length and width (cam_sensor).
+
+    The forward method of the model renders this scene description. Any
+    of these parameters could instead be passed as inputs to the forward
+    method and come from a different model. Optionally, camera parameters can
+    be provided to the forward method in which case the scene is rendered
+    using those parameters.
+    """
+
+    def __init__(self):
+        super(SceneModel, self).__init__()
+        self.gamma = 1.0
+        # Points.
+        torch.manual_seed(1)
+        vert_pos = torch.rand((1, N_POINTS, 3), dtype=torch.float32) * 10.0
+        vert_pos[:, :, 2] += 25.0
+        vert_pos[:, :, :2] -= 5.0
+        self.register_parameter("vert_pos", nn.Parameter(vert_pos, requires_grad=True))
+        self.register_parameter(
+            "vert_col",
+            nn.Parameter(
+                torch.ones(1, N_POINTS, 3, dtype=torch.float32) * 0.5,
+                requires_grad=True,
+            ),
+        )
+        self.register_parameter(
+            "vert_rad",
+            nn.Parameter(
+                torch.ones(1, N_POINTS, dtype=torch.float32) * 0.05, requires_grad=True
+            ),
+        )
+        self.register_parameter(
+            "vert_opy",
+            nn.Parameter(
+                torch.ones(1, N_POINTS, dtype=torch.float32), requires_grad=True
+            ),
+        )
+        self.register_buffer(
+            "cam_params",
+            torch.tensor(
+                [
+                    [
+                        np.sin(angle) * 35.0,
+                        0.0,
+                        30.0 - np.cos(angle) * 35.0,
+                        0.0,
+                        -angle + math.pi,
+                        0.0,
+                        5.0,
+                        2.0,
+                    ]
+                    for angle in [-1.5, -0.8, -0.4, -0.1, 0.1, 0.4, 0.8, 1.5]
+                ],
+                dtype=torch.float32,
+            ),
+        )
+        self.renderer = Renderer(WIDTH, HEIGHT, N_POINTS, right_handed_system=True)
+
+    def forward(self, cam=None):
+        if cam is None:
+            cam = self.cam_params
+            n_views = 8
+        else:
+            n_views = 1
+        return self.renderer.forward(
+            self.vert_pos.expand(n_views, -1, -1),
+            self.vert_col.expand(n_views, -1, -1),
+            self.vert_rad.expand(n_views, -1),
+            cam,
+            self.gamma,
+            45.0,
+        )
+
+
+def cli():
+    """
+    Simple demonstration for a multi-view 3D reconstruction using pulsar.
+
+    This example makes use of opacity, which is not yet supported through
+    the unified PyTorch3D interface.
+
+    Writes to `multiview.gif`.
+    """
+    LOGGER.info("Loading reference...")
+    # Load reference.
+    ref = torch.stack(
+        [
+            torch.from_numpy(
+                imageio.imread(
+                    "../../tests/pulsar/reference/examples_TestRenderer_test_multiview_%d.png"
+                    % idx
+                )
+            ).to(torch.float32)
+            / 255.0
+            for idx in range(8)
+        ]
+    ).to(DEVICE)
+    # Set up model.
+    model = SceneModel().to(DEVICE)
+    # Optimizer.
+    optimizer = optim.SGD(
+        [
+            {"params": [model.vert_col], "lr": 1e-1},
+            {"params": [model.vert_rad], "lr": 1e-3},
+            {"params": [model.vert_pos], "lr": 1e-3},
+        ]
+    )
+
+    # For visualization.
+    angle = 0.0
+    LOGGER.info("Writing video to `%s`.", path.abspath("multiview.avi"))
+    writer = imageio.get_writer("multiview.gif", format="gif", fps=25)
+
+    # Optimize.
+    for i in range(300):
+        optimizer.zero_grad()
+        result = model()
+        # Visualize.
+        result_im = (result.cpu().detach().numpy() * 255).astype(np.uint8)
+        cv2.imshow("opt", result_im[0, :, :, ::-1])
+        overlay_img = np.ascontiguousarray(
+            ((result * 0.5 + ref * 0.5).cpu().detach().numpy() * 255).astype(np.uint8)[
+                0, :, :, ::-1
+            ]
+        )
+        overlay_img = cv2.putText(
+            overlay_img,
+            "Step %d" % (i),
+            (10, 40),
+            cv2.FONT_HERSHEY_SIMPLEX,
+            1,
+            (0, 0, 0),
+            2,
+            cv2.LINE_AA,
+            False,
+        )
+        cv2.imshow("overlay", overlay_img)
+        cv2.waitKey(1)
+        # Update.
+        loss = ((result - ref) ** 2).sum()
+        LOGGER.info("loss %d: %f", i, loss.item())
+        loss.backward()
+        optimizer.step()
+        # Cleanup.
+        with torch.no_grad():
+            model.vert_col.data = torch.clamp(model.vert_col.data, 0.0, 1.0)
+            # Remove points.
+            model.vert_pos.data[model.vert_rad < 0.001, :] = -1000.0
+            model.vert_rad.data[model.vert_rad < 0.001] = 0.0001
+            vd = (
+                (model.vert_col - torch.ones(1, 1, 3, dtype=torch.float32).to(DEVICE))
+                .abs()
+                .sum(dim=2)
+            )
+            model.vert_pos.data[vd <= 0.2] = -1000.0
+        # Rotating visualization.
+        cam_control = torch.tensor(
+            [
+                [
+                    np.sin(angle) * 35.0,
+                    0.0,
+                    30.0 - np.cos(angle) * 35.0,
+                    0.0,
+                    -angle + math.pi,
+                    0.0,
+                    5.0,
+                    2.0,
+                ]
+            ],
+            dtype=torch.float32,
+        ).to(DEVICE)
+        with torch.no_grad():
+            result = model.forward(cam=cam_control)[0]
+            result_im = (result.cpu().detach().numpy() * 255).astype(np.uint8)
+            cv2.imshow("vis", result_im[:, :, ::-1])
+            writer.append_data(result_im)
+            angle += 0.05
+    writer.close()
+    LOGGER.info("Done.")
+
+
+if __name__ == "__main__":
+    logging.basicConfig(level=logging.INFO)
+    cli()
diff --git a/pytorch3d/docs/examples/pulsar_optimization.py b/pytorch3d/docs/examples/pulsar_optimization.py
new file mode 100644
index 0000000000000000000000000000000000000000..530233f3f11f47ad78fb9a04bbc22004ac8c620f
--- /dev/null
+++ b/pytorch3d/docs/examples/pulsar_optimization.py
@@ -0,0 +1,164 @@
+#!/usr/bin/env python3
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""
+This example demonstrates scene optimization with the plain
+pulsar interface. For this, a reference image has been pre-generated
+(you can find it at `../../tests/pulsar/reference/examples_TestRenderer_test_smallopt.png`).
+The scene is initialized with random spheres. Gradient-based
+optimization is used to converge towards a faithful
+scene representation.
+"""
+import logging
+import math
+
+import cv2
+import imageio
+import numpy as np
+import torch
+from pytorch3d.renderer.points.pulsar import Renderer
+from torch import nn, optim
+
+
+LOGGER = logging.getLogger(__name__)
+N_POINTS = 10_000
+WIDTH = 1_000
+HEIGHT = 1_000
+DEVICE = torch.device("cuda")
+
+
+class SceneModel(nn.Module):
+    """
+    A simple scene model to demonstrate use of pulsar in PyTorch modules.
+
+    The scene model is parameterized with sphere locations (vert_pos),
+    channel content (vert_col), radiuses (vert_rad), camera position (cam_pos),
+    camera rotation (cam_rot) and sensor focal length and width (cam_sensor).
+
+    The forward method of the model renders this scene description. Any
+    of these parameters could instead be passed as inputs to the forward
+    method and come from a different model.
+    """
+
+    def __init__(self):
+        super(SceneModel, self).__init__()
+        self.gamma = 1.0
+        # Points.
+        torch.manual_seed(1)
+        vert_pos = torch.rand(N_POINTS, 3, dtype=torch.float32) * 10.0
+        vert_pos[:, 2] += 25.0
+        vert_pos[:, :2] -= 5.0
+        self.register_parameter("vert_pos", nn.Parameter(vert_pos, requires_grad=True))
+        self.register_parameter(
+            "vert_col",
+            nn.Parameter(
+                torch.ones(N_POINTS, 3, dtype=torch.float32) * 0.5, requires_grad=True
+            ),
+        )
+        self.register_parameter(
+            "vert_rad",
+            nn.Parameter(
+                torch.ones(N_POINTS, dtype=torch.float32) * 0.3, requires_grad=True
+            ),
+        )
+        self.register_buffer(
+            "cam_params",
+            torch.tensor(
+                [0.0, 0.0, 0.0, 0.0, math.pi, 0.0, 5.0, 2.0], dtype=torch.float32
+            ),
+        )
+        # The volumetric optimization works better with a higher number of tracked
+        # intersections per ray.
+        self.renderer = Renderer(
+            WIDTH, HEIGHT, N_POINTS, n_track=32, right_handed_system=True
+        )
+
+    def forward(self):
+        return self.renderer.forward(
+            self.vert_pos,
+            self.vert_col,
+            self.vert_rad,
+            self.cam_params,
+            self.gamma,
+            45.0,
+            return_forward_info=True,
+        )
+
+
+def cli():
+    """
+    Scene optimization example using pulsar.
+    """
+    LOGGER.info("Loading reference...")
+    # Load reference.
+    ref = (
+        torch.from_numpy(
+            imageio.imread(
+                "../../tests/pulsar/reference/examples_TestRenderer_test_smallopt.png"
+            )[:, ::-1, :].copy()
+        ).to(torch.float32)
+        / 255.0
+    ).to(DEVICE)
+    # Set up model.
+    model = SceneModel().to(DEVICE)
+    # Optimizer.
+    optimizer = optim.SGD(
+        [
+            {"params": [model.vert_col], "lr": 1e0},
+            {"params": [model.vert_rad], "lr": 5e-3},
+            {"params": [model.vert_pos], "lr": 1e-2},
+        ]
+    )
+    LOGGER.info("Optimizing...")
+    # Optimize.
+    for i in range(500):
+        optimizer.zero_grad()
+        result, result_info = model()
+        # Visualize.
+        result_im = (result.cpu().detach().numpy() * 255).astype(np.uint8)
+        cv2.imshow("opt", result_im[:, :, ::-1])
+        overlay_img = np.ascontiguousarray(
+            ((result * 0.5 + ref * 0.5).cpu().detach().numpy() * 255).astype(np.uint8)[
+                :, :, ::-1
+            ]
+        )
+        overlay_img = cv2.putText(
+            overlay_img,
+            "Step %d" % (i),
+            (10, 40),
+            cv2.FONT_HERSHEY_SIMPLEX,
+            1,
+            (0, 0, 0),
+            2,
+            cv2.LINE_AA,
+            False,
+        )
+        cv2.imshow("overlay", overlay_img)
+        cv2.waitKey(1)
+        # Update.
+        loss = ((result - ref) ** 2).sum()
+        LOGGER.info("loss %d: %f", i, loss.item())
+        loss.backward()
+        optimizer.step()
+        # Cleanup.
+        with torch.no_grad():
+            model.vert_col.data = torch.clamp(model.vert_col.data, 0.0, 1.0)
+            # Remove points.
+            model.vert_pos.data[model.vert_rad < 0.001, :] = -1000.0
+            model.vert_rad.data[model.vert_rad < 0.001] = 0.0001
+            vd = (
+                (model.vert_col - torch.ones(3, dtype=torch.float32).to(DEVICE))
+                .abs()
+                .sum(dim=1)
+            )
+            model.vert_pos.data[vd <= 0.2] = -1000.0
+    LOGGER.info("Done.")
+
+
+if __name__ == "__main__":
+    logging.basicConfig(level=logging.INFO)
+    cli()
diff --git a/pytorch3d/docs/examples/pulsar_optimization_unified.py b/pytorch3d/docs/examples/pulsar_optimization_unified.py
new file mode 100644
index 0000000000000000000000000000000000000000..be4d92998231bd3aeb7c0e3bfc1ad08d97d8bca9
--- /dev/null
+++ b/pytorch3d/docs/examples/pulsar_optimization_unified.py
@@ -0,0 +1,189 @@
+#!/usr/bin/env python3
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""
+This example demonstrates scene optimization with the PyTorch3D
+pulsar interface. For this, a reference image has been pre-generated
+(you can find it at `../../tests/pulsar/reference/examples_TestRenderer_test_smallopt.png`).
+The scene is initialized with random spheres. Gradient-based
+optimization is used to converge towards a faithful
+scene representation.
+"""
+import logging
+import math
+
+import cv2
+import imageio
+import numpy as np
+import torch
+from pytorch3d.renderer.cameras import PerspectiveCameras
+from pytorch3d.renderer.points import (
+    PointsRasterizationSettings,
+    PointsRasterizer,
+    PulsarPointsRenderer,
+)
+from pytorch3d.structures.pointclouds import Pointclouds
+from torch import nn, optim
+
+
+LOGGER = logging.getLogger(__name__)
+N_POINTS = 10_000
+WIDTH = 1_000
+HEIGHT = 1_000
+DEVICE = torch.device("cuda")
+
+
+class SceneModel(nn.Module):
+    """
+    A simple scene model to demonstrate use of pulsar in PyTorch modules.
+
+    The scene model is parameterized with sphere locations (vert_pos),
+    channel content (vert_col), radiuses (vert_rad), camera position (cam_pos),
+    camera rotation (cam_rot) and sensor focal length and width (cam_sensor).
+
+    The forward method of the model renders this scene description. Any
+    of these parameters could instead be passed as inputs to the forward
+    method and come from a different model.
+    """
+
+    def __init__(self):
+        super(SceneModel, self).__init__()
+        self.gamma = 1.0
+        # Points.
+        torch.manual_seed(1)
+        vert_pos = torch.rand(N_POINTS, 3, dtype=torch.float32, device=DEVICE) * 10.0
+        vert_pos[:, 2] += 25.0
+        vert_pos[:, :2] -= 5.0
+        self.register_parameter("vert_pos", nn.Parameter(vert_pos, requires_grad=True))
+        self.register_parameter(
+            "vert_col",
+            nn.Parameter(
+                torch.ones(N_POINTS, 3, dtype=torch.float32, device=DEVICE) * 0.5,
+                requires_grad=True,
+            ),
+        )
+        self.register_parameter(
+            "vert_rad",
+            nn.Parameter(
+                torch.ones(N_POINTS, dtype=torch.float32) * 0.3, requires_grad=True
+            ),
+        )
+        self.register_buffer(
+            "cam_params",
+            torch.tensor(
+                [0.0, 0.0, 0.0, 0.0, math.pi, 0.0, 5.0, 2.0], dtype=torch.float32
+            ),
+        )
+        self.cameras = PerspectiveCameras(
+            # The focal length must be double the size for PyTorch3D because of the NDC
+            # coordinates spanning a range of two - and they must be normalized by the
+            # sensor width (see the pulsar example). This means we need here
+            # 5.0 * 2.0 / 2.0 to get the equivalent results as in pulsar.
+            focal_length=5.0,
+            R=torch.eye(3, dtype=torch.float32, device=DEVICE)[None, ...],
+            T=torch.zeros((1, 3), dtype=torch.float32, device=DEVICE),
+            image_size=((HEIGHT, WIDTH),),
+            device=DEVICE,
+        )
+        raster_settings = PointsRasterizationSettings(
+            image_size=(HEIGHT, WIDTH),
+            radius=self.vert_rad,
+        )
+        rasterizer = PointsRasterizer(
+            cameras=self.cameras, raster_settings=raster_settings
+        )
+        self.renderer = PulsarPointsRenderer(rasterizer=rasterizer, n_track=32)
+
+    def forward(self):
+        # The Pointclouds object creates copies of it's arguments - that's why
+        # we have to create a new object in every forward step.
+        pcl = Pointclouds(
+            points=self.vert_pos[None, ...], features=self.vert_col[None, ...]
+        )
+        return self.renderer(
+            pcl,
+            gamma=(self.gamma,),
+            zfar=(45.0,),
+            znear=(1.0,),
+            radius_world=True,
+            bg_col=torch.ones((3,), dtype=torch.float32, device=DEVICE),
+        )[0]
+
+
+def cli():
+    """
+    Scene optimization example using pulsar and the unified PyTorch3D interface.
+    """
+    LOGGER.info("Loading reference...")
+    # Load reference.
+    ref = (
+        torch.from_numpy(
+            imageio.imread(
+                "../../tests/pulsar/reference/examples_TestRenderer_test_smallopt.png"
+            )[:, ::-1, :].copy()
+        ).to(torch.float32)
+        / 255.0
+    ).to(DEVICE)
+    # Set up model.
+    model = SceneModel().to(DEVICE)
+    # Optimizer.
+    optimizer = optim.SGD(
+        [
+            {"params": [model.vert_col], "lr": 1e0},
+            {"params": [model.vert_rad], "lr": 5e-3},
+            {"params": [model.vert_pos], "lr": 1e-2},
+        ]
+    )
+    LOGGER.info("Optimizing...")
+    # Optimize.
+    for i in range(500):
+        optimizer.zero_grad()
+        result = model()
+        # Visualize.
+        result_im = (result.cpu().detach().numpy() * 255).astype(np.uint8)
+        cv2.imshow("opt", result_im[:, :, ::-1])
+        overlay_img = np.ascontiguousarray(
+            ((result * 0.5 + ref * 0.5).cpu().detach().numpy() * 255).astype(np.uint8)[
+                :, :, ::-1
+            ]
+        )
+        overlay_img = cv2.putText(
+            overlay_img,
+            "Step %d" % (i),
+            (10, 40),
+            cv2.FONT_HERSHEY_SIMPLEX,
+            1,
+            (0, 0, 0),
+            2,
+            cv2.LINE_AA,
+            False,
+        )
+        cv2.imshow("overlay", overlay_img)
+        cv2.waitKey(1)
+        # Update.
+        loss = ((result - ref) ** 2).sum()
+        LOGGER.info("loss %d: %f", i, loss.item())
+        loss.backward()
+        optimizer.step()
+        # Cleanup.
+        with torch.no_grad():
+            model.vert_col.data = torch.clamp(model.vert_col.data, 0.0, 1.0)
+            # Remove points.
+            model.vert_pos.data[model.vert_rad < 0.001, :] = -1000.0
+            model.vert_rad.data[model.vert_rad < 0.001] = 0.0001
+            vd = (
+                (model.vert_col - torch.ones(3, dtype=torch.float32).to(DEVICE))
+                .abs()
+                .sum(dim=1)
+            )
+            model.vert_pos.data[vd <= 0.2] = -1000.0
+    LOGGER.info("Done.")
+
+
+if __name__ == "__main__":
+    logging.basicConfig(level=logging.INFO)
+    cli()
diff --git a/pytorch3d/docs/generate_stubs.py b/pytorch3d/docs/generate_stubs.py
new file mode 100644
index 0000000000000000000000000000000000000000..e31519f944a8b3d94bb4aa976637b6c5872afdcf
--- /dev/null
+++ b/pytorch3d/docs/generate_stubs.py
@@ -0,0 +1,162 @@
+#!/usr/bin/env python3
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""
+This script makes the stubs for implicitron in docs/modules.
+"""
+
+from pathlib import Path
+
+ROOT_DIR = Path(__file__).resolve().parent.parent
+
+
+def paths_to_modules(paths):
+    """
+    Given an iterable of paths, return equivalent list of modules.
+    """
+    return [
+        str(i.relative_to(ROOT_DIR))[:-3].replace("/", ".")
+        for i in paths
+        if "__pycache__" not in str(i)
+    ]
+
+
+def create_one_file(title, description, sources, dest_file):
+    with open(dest_file, "w") as f:
+        print(title, file=f)
+        print("=" * len(title), file=f)
+        print(file=f)
+        print(description, file=f)
+        for source in sources:
+            if source.find("._") != -1:
+                # ignore internal modules including __init__.py
+                continue
+            print(f"\n.. automodule:: {source}", file=f)
+            print("    :members:", file=f)
+            print("    :undoc-members:", file=f)
+            print("    :show-inheritance:", file=f)
+
+
+def iterate_directory(directory_path, dest):
+    """
+    Create a file for each module in the given path
+    """
+    toc = []
+    if not dest.exists():
+        dest.mkdir()
+    for file in sorted(directory_path.glob("*.py")):
+        if file.stem.startswith("_"):
+            continue
+        module = paths_to_modules([file])
+        create_one_file(module[0], file.stem, module, dest / f"{file.stem}.rst")
+        toc.append(file.stem)
+
+    for subdir in directory_path.iterdir():
+        if not subdir.is_dir():
+            continue
+        if subdir.name == "fb":
+            continue
+        if subdir.name.startswith("_"):
+            continue
+        iterate_directory(subdir, dest / (subdir.name))
+        toc.append(f"{subdir.name}/index")
+
+    paths_to_modules_ = paths_to_modules([directory_path.with_suffix(".XX")])
+    if len(paths_to_modules_) == 0:
+        return
+    title = paths_to_modules_[0]
+
+    with open(dest / "index.rst", "w") as f:
+        print(title, file=f)
+        print("=" * len(title), file=f)
+        print("\n.. toctree::\n", file=f)
+        for item in toc:
+            print(f"    {item}", file=f)
+
+
+def make_directory_index(title: str, directory_path: Path):
+    index_file = directory_path / "index.rst"
+    directory_rsts = sorted(directory_path.glob("*.rst"))
+    subdirs = sorted([f for f in directory_path.iterdir() if f.is_dir()])
+    with open(index_file, "w") as f:
+        print(title, file=f)
+        print("=" * len(title), file=f)
+        print("\n.. toctree::\n", file=f)
+        for subdir in subdirs:
+            print(f"    {subdir.stem}/index.rst", file=f)
+        for rst in directory_rsts:
+            if rst.stem == "index":
+                continue
+            print(f"    {rst.stem}", file=f)
+
+
+def do_implicitron():
+    DEST_DIR = Path(__file__).resolve().parent / "modules/implicitron"
+
+    iterate_directory(ROOT_DIR / "pytorch3d/implicitron/models", DEST_DIR / "models")
+
+    unwanted_tools = ["configurable", "depth_cleanup", "utils"]
+    tools_sources = sorted(ROOT_DIR.glob("pytorch3d/implicitron/tools/*.py"))
+    tools_modules = [
+        str(i.relative_to(ROOT_DIR))[:-3].replace("/", ".")
+        for i in tools_sources
+        if i.stem not in unwanted_tools
+    ]
+    create_one_file(
+        "pytorch3d.implicitron.tools",
+        "Tools for implicitron",
+        tools_modules,
+        DEST_DIR / "tools.rst",
+    )
+
+    dataset_files = sorted(ROOT_DIR.glob("pytorch3d/implicitron/dataset/*.py"))
+    basic_dataset = [
+        "dataset_base",
+        "dataset_map_provider",
+        "data_loader_map_provider",
+        "data_source",
+        "scene_batch_sampler",
+    ]
+    basic_dataset_modules = [
+        f"pytorch3d.implicitron.dataset.{i}" for i in basic_dataset
+    ]
+    create_one_file(
+        "pytorch3d.implicitron.dataset in general",
+        "Basics of data for implicitron",
+        basic_dataset_modules,
+        DEST_DIR / "data_basics.rst",
+    )
+
+    specific_dataset_files = [
+        i for i in dataset_files if i.stem.find("_dataset_map_provider") != -1
+    ]
+    create_one_file(
+        "pytorch3d.implicitron.dataset specific datasets",
+        "specific datasets",
+        paths_to_modules(specific_dataset_files),
+        DEST_DIR / "datasets.rst",
+    )
+
+    evaluation_files = sorted(ROOT_DIR.glob("pytorch3d/implicitron/evaluation/*.py"))
+    create_one_file(
+        "pytorch3d.implicitron.evaluation",
+        "evaluation",
+        paths_to_modules(evaluation_files),
+        DEST_DIR / "evaluation.rst",
+    )
+
+    make_directory_index("pytorch3d.implicitron", DEST_DIR)
+
+
+def iterate_toplevel_module(name: str) -> None:
+    dest_dir = Path(__file__).resolve().parent / "modules" / name
+    iterate_directory(ROOT_DIR / "pytorch3d" / name, dest_dir)
+
+
+do_implicitron()
+iterate_toplevel_module("renderer")
+iterate_toplevel_module("vis")
diff --git a/pytorch3d/docs/index.rst b/pytorch3d/docs/index.rst
new file mode 100644
index 0000000000000000000000000000000000000000..f7cd28f6b515ad14726b8313067c674ad725d065
--- /dev/null
+++ b/pytorch3d/docs/index.rst
@@ -0,0 +1,17 @@
+Welcome to PyTorch3D's documentation!
+=====================================
+
+PyTorch3D is a library of reusable components for Deep Learning with 3D data.
+
+Table of Contents
+=================
+
+.. toctree::
+   :maxdepth: 2
+
+   overview
+
+.. toctree::
+   :maxdepth: 2
+
+   modules/index
diff --git a/pytorch3d/docs/modules/common.rst b/pytorch3d/docs/modules/common.rst
new file mode 100644
index 0000000000000000000000000000000000000000..7ca686904e085fcbbc847fb633c025958be3f3ce
--- /dev/null
+++ b/pytorch3d/docs/modules/common.rst
@@ -0,0 +1,6 @@
+pytorch3d.common
+===========================
+
+.. automodule:: pytorch3d.common
+    :members:
+    :undoc-members:
diff --git a/pytorch3d/docs/modules/datasets.rst b/pytorch3d/docs/modules/datasets.rst
new file mode 100644
index 0000000000000000000000000000000000000000..296a560d7aa9ec3a13c8da763977c96650610e62
--- /dev/null
+++ b/pytorch3d/docs/modules/datasets.rst
@@ -0,0 +1,9 @@
+pytorch3d.datasets
+===========================
+
+Dataset loaders for datasets including ShapeNetCore.
+
+.. automodule:: pytorch3d.datasets
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/pytorch3d/docs/modules/implicitron/data_basics.rst b/pytorch3d/docs/modules/implicitron/data_basics.rst
new file mode 100644
index 0000000000000000000000000000000000000000..3d0bdd8723d27919cfaea511f3dacd79acab34a4
--- /dev/null
+++ b/pytorch3d/docs/modules/implicitron/data_basics.rst
@@ -0,0 +1,29 @@
+pytorch3d.implicitron.dataset in general
+========================================
+
+Basics of data for implicitron
+
+.. automodule:: pytorch3d.implicitron.dataset.dataset_base
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+.. automodule:: pytorch3d.implicitron.dataset.dataset_map_provider
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+.. automodule:: pytorch3d.implicitron.dataset.data_loader_map_provider
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+.. automodule:: pytorch3d.implicitron.dataset.data_source
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+.. automodule:: pytorch3d.implicitron.dataset.scene_batch_sampler
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/pytorch3d/docs/modules/implicitron/datasets.rst b/pytorch3d/docs/modules/implicitron/datasets.rst
new file mode 100644
index 0000000000000000000000000000000000000000..f012fe8b351045fd15ad52673d9e4d258a6ef6d9
--- /dev/null
+++ b/pytorch3d/docs/modules/implicitron/datasets.rst
@@ -0,0 +1,29 @@
+pytorch3d.implicitron.dataset specific datasets
+===============================================
+
+specific datasets
+
+.. automodule:: pytorch3d.implicitron.dataset.blender_dataset_map_provider
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+.. automodule:: pytorch3d.implicitron.dataset.json_index_dataset_map_provider
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+.. automodule:: pytorch3d.implicitron.dataset.json_index_dataset_map_provider_v2
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+.. automodule:: pytorch3d.implicitron.dataset.llff_dataset_map_provider
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+.. automodule:: pytorch3d.implicitron.dataset.rendered_mesh_dataset_map_provider
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/pytorch3d/docs/modules/implicitron/evaluation.rst b/pytorch3d/docs/modules/implicitron/evaluation.rst
new file mode 100644
index 0000000000000000000000000000000000000000..094d22e37b52a1bdeb2ad67a05279c5448c58e73
--- /dev/null
+++ b/pytorch3d/docs/modules/implicitron/evaluation.rst
@@ -0,0 +1,14 @@
+pytorch3d.implicitron.evaluation
+================================
+
+evaluation
+
+.. automodule:: pytorch3d.implicitron.evaluation.evaluate_new_view_synthesis
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+.. automodule:: pytorch3d.implicitron.evaluation.evaluator
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/pytorch3d/docs/modules/implicitron/index.rst b/pytorch3d/docs/modules/implicitron/index.rst
new file mode 100644
index 0000000000000000000000000000000000000000..1fc4fa86f79f5fb4f1d2f941d4b40fa068859a69
--- /dev/null
+++ b/pytorch3d/docs/modules/implicitron/index.rst
@@ -0,0 +1,10 @@
+pytorch3d.implicitron
+=====================
+
+.. toctree::
+
+    models/index.rst
+    data_basics
+    datasets
+    evaluation
+    tools
diff --git a/pytorch3d/docs/modules/implicitron/models/base_model.rst b/pytorch3d/docs/modules/implicitron/models/base_model.rst
new file mode 100644
index 0000000000000000000000000000000000000000..9208d491d631a07fea6c8c5bee8d65b2e3707f19
--- /dev/null
+++ b/pytorch3d/docs/modules/implicitron/models/base_model.rst
@@ -0,0 +1,9 @@
+pytorch3d.implicitron.models.base_model
+=======================================
+
+base_model
+
+.. automodule:: pytorch3d.implicitron.models.base_model
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/pytorch3d/docs/modules/implicitron/models/feature_extractor/feature_extractor.rst b/pytorch3d/docs/modules/implicitron/models/feature_extractor/feature_extractor.rst
new file mode 100644
index 0000000000000000000000000000000000000000..1121a2014ad25d2b4beffee748307472940ac098
--- /dev/null
+++ b/pytorch3d/docs/modules/implicitron/models/feature_extractor/feature_extractor.rst
@@ -0,0 +1,9 @@
+pytorch3d.implicitron.models.feature_extractor.feature_extractor
+================================================================
+
+feature_extractor
+
+.. automodule:: pytorch3d.implicitron.models.feature_extractor.feature_extractor
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/pytorch3d/docs/modules/implicitron/models/feature_extractor/index.rst b/pytorch3d/docs/modules/implicitron/models/feature_extractor/index.rst
new file mode 100644
index 0000000000000000000000000000000000000000..51a22ac07e6b8cb309ac8090f91999ac06708bb6
--- /dev/null
+++ b/pytorch3d/docs/modules/implicitron/models/feature_extractor/index.rst
@@ -0,0 +1,7 @@
+pytorch3d.implicitron.models.feature_extractor
+==============================================
+
+.. toctree::
+
+    feature_extractor
+    resnet_feature_extractor
diff --git a/pytorch3d/docs/modules/implicitron/models/feature_extractor/resnet_feature_extractor.rst b/pytorch3d/docs/modules/implicitron/models/feature_extractor/resnet_feature_extractor.rst
new file mode 100644
index 0000000000000000000000000000000000000000..13d37ed4ea8c827a9c46fdc5f51f1435ad75b7fe
--- /dev/null
+++ b/pytorch3d/docs/modules/implicitron/models/feature_extractor/resnet_feature_extractor.rst
@@ -0,0 +1,9 @@
+pytorch3d.implicitron.models.feature_extractor.resnet_feature_extractor
+=======================================================================
+
+resnet_feature_extractor
+
+.. automodule:: pytorch3d.implicitron.models.feature_extractor.resnet_feature_extractor
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/pytorch3d/docs/modules/implicitron/models/generic_model.rst b/pytorch3d/docs/modules/implicitron/models/generic_model.rst
new file mode 100644
index 0000000000000000000000000000000000000000..41ccc89ac4d8c3c678989225029674a9f1f47253
--- /dev/null
+++ b/pytorch3d/docs/modules/implicitron/models/generic_model.rst
@@ -0,0 +1,9 @@
+pytorch3d.implicitron.models.generic_model
+==========================================
+
+generic_model
+
+.. automodule:: pytorch3d.implicitron.models.generic_model
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/pytorch3d/docs/modules/implicitron/models/global_encoder/autodecoder.rst b/pytorch3d/docs/modules/implicitron/models/global_encoder/autodecoder.rst
new file mode 100644
index 0000000000000000000000000000000000000000..ee7e36c95d0ddd3148bdf4aeae570fbd60ccc8da
--- /dev/null
+++ b/pytorch3d/docs/modules/implicitron/models/global_encoder/autodecoder.rst
@@ -0,0 +1,9 @@
+pytorch3d.implicitron.models.global_encoder.autodecoder
+=======================================================
+
+autodecoder
+
+.. automodule:: pytorch3d.implicitron.models.global_encoder.autodecoder
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/pytorch3d/docs/modules/implicitron/models/global_encoder/global_encoder.rst b/pytorch3d/docs/modules/implicitron/models/global_encoder/global_encoder.rst
new file mode 100644
index 0000000000000000000000000000000000000000..d8c3fb2f06dbefef5022b1c66e919ae7140a36ad
--- /dev/null
+++ b/pytorch3d/docs/modules/implicitron/models/global_encoder/global_encoder.rst
@@ -0,0 +1,9 @@
+pytorch3d.implicitron.models.global_encoder.global_encoder
+==========================================================
+
+global_encoder
+
+.. automodule:: pytorch3d.implicitron.models.global_encoder.global_encoder
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/pytorch3d/docs/modules/implicitron/models/global_encoder/index.rst b/pytorch3d/docs/modules/implicitron/models/global_encoder/index.rst
new file mode 100644
index 0000000000000000000000000000000000000000..b861de01b36ade75b10b78cb721072ec3ca6cd81
--- /dev/null
+++ b/pytorch3d/docs/modules/implicitron/models/global_encoder/index.rst
@@ -0,0 +1,7 @@
+pytorch3d.implicitron.models.global_encoder
+===========================================
+
+.. toctree::
+
+    autodecoder
+    global_encoder
diff --git a/pytorch3d/docs/modules/implicitron/models/implicit_function/base.rst b/pytorch3d/docs/modules/implicitron/models/implicit_function/base.rst
new file mode 100644
index 0000000000000000000000000000000000000000..1161b18157d9eb03cef8c4c28a72f6342fe8df8a
--- /dev/null
+++ b/pytorch3d/docs/modules/implicitron/models/implicit_function/base.rst
@@ -0,0 +1,9 @@
+pytorch3d.implicitron.models.implicit_function.base
+===================================================
+
+base
+
+.. automodule:: pytorch3d.implicitron.models.implicit_function.base
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/pytorch3d/docs/modules/implicitron/models/implicit_function/decoding_functions.rst b/pytorch3d/docs/modules/implicitron/models/implicit_function/decoding_functions.rst
new file mode 100644
index 0000000000000000000000000000000000000000..7a1f135fd894906a0cd05f1fe484c1b20a764e53
--- /dev/null
+++ b/pytorch3d/docs/modules/implicitron/models/implicit_function/decoding_functions.rst
@@ -0,0 +1,9 @@
+pytorch3d.implicitron.models.implicit_function.decoding_functions
+=================================================================
+
+decoding_functions
+
+.. automodule:: pytorch3d.implicitron.models.implicit_function.decoding_functions
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/pytorch3d/docs/modules/implicitron/models/implicit_function/idr_feature_field.rst b/pytorch3d/docs/modules/implicitron/models/implicit_function/idr_feature_field.rst
new file mode 100644
index 0000000000000000000000000000000000000000..8a6d9b5b3c1bfb923b86a3999625b42e955b5dd2
--- /dev/null
+++ b/pytorch3d/docs/modules/implicitron/models/implicit_function/idr_feature_field.rst
@@ -0,0 +1,9 @@
+pytorch3d.implicitron.models.implicit_function.idr_feature_field
+================================================================
+
+idr_feature_field
+
+.. automodule:: pytorch3d.implicitron.models.implicit_function.idr_feature_field
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/pytorch3d/docs/modules/implicitron/models/implicit_function/index.rst b/pytorch3d/docs/modules/implicitron/models/implicit_function/index.rst
new file mode 100644
index 0000000000000000000000000000000000000000..60fdcd7add50ede8a831479212ae2d340f37194f
--- /dev/null
+++ b/pytorch3d/docs/modules/implicitron/models/implicit_function/index.rst
@@ -0,0 +1,13 @@
+pytorch3d.implicitron.models.implicit_function
+==============================================
+
+.. toctree::
+
+    base
+    decoding_functions
+    idr_feature_field
+    neural_radiance_field
+    scene_representation_networks
+    utils
+    voxel_grid
+    voxel_grid_implicit_function
diff --git a/pytorch3d/docs/modules/implicitron/models/implicit_function/neural_radiance_field.rst b/pytorch3d/docs/modules/implicitron/models/implicit_function/neural_radiance_field.rst
new file mode 100644
index 0000000000000000000000000000000000000000..7d81e4f81b75f53c14a15950da9c7259fb634024
--- /dev/null
+++ b/pytorch3d/docs/modules/implicitron/models/implicit_function/neural_radiance_field.rst
@@ -0,0 +1,9 @@
+pytorch3d.implicitron.models.implicit_function.neural_radiance_field
+====================================================================
+
+neural_radiance_field
+
+.. automodule:: pytorch3d.implicitron.models.implicit_function.neural_radiance_field
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/pytorch3d/docs/modules/implicitron/models/implicit_function/scene_representation_networks.rst b/pytorch3d/docs/modules/implicitron/models/implicit_function/scene_representation_networks.rst
new file mode 100644
index 0000000000000000000000000000000000000000..8c9eba8018a50ab3b5ca8b0e3357f1c486f88d86
--- /dev/null
+++ b/pytorch3d/docs/modules/implicitron/models/implicit_function/scene_representation_networks.rst
@@ -0,0 +1,9 @@
+pytorch3d.implicitron.models.implicit_function.scene_representation_networks
+============================================================================
+
+scene_representation_networks
+
+.. automodule:: pytorch3d.implicitron.models.implicit_function.scene_representation_networks
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/pytorch3d/docs/modules/implicitron/models/implicit_function/utils.rst b/pytorch3d/docs/modules/implicitron/models/implicit_function/utils.rst
new file mode 100644
index 0000000000000000000000000000000000000000..5c8dddb9c4321d798d0261056b9b7996a351a685
--- /dev/null
+++ b/pytorch3d/docs/modules/implicitron/models/implicit_function/utils.rst
@@ -0,0 +1,9 @@
+pytorch3d.implicitron.models.implicit_function.utils
+====================================================
+
+utils
+
+.. automodule:: pytorch3d.implicitron.models.implicit_function.utils
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/pytorch3d/docs/modules/implicitron/models/implicit_function/voxel_grid.rst b/pytorch3d/docs/modules/implicitron/models/implicit_function/voxel_grid.rst
new file mode 100644
index 0000000000000000000000000000000000000000..137dea9cf4c5b3c473a06bc961db2a836ff6ec92
--- /dev/null
+++ b/pytorch3d/docs/modules/implicitron/models/implicit_function/voxel_grid.rst
@@ -0,0 +1,9 @@
+pytorch3d.implicitron.models.implicit_function.voxel_grid
+=========================================================
+
+voxel_grid
+
+.. automodule:: pytorch3d.implicitron.models.implicit_function.voxel_grid
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/pytorch3d/docs/modules/implicitron/models/implicit_function/voxel_grid_implicit_function.rst b/pytorch3d/docs/modules/implicitron/models/implicit_function/voxel_grid_implicit_function.rst
new file mode 100644
index 0000000000000000000000000000000000000000..5687bc401d83e2e3ed72c02673a6c4e62cbedae3
--- /dev/null
+++ b/pytorch3d/docs/modules/implicitron/models/implicit_function/voxel_grid_implicit_function.rst
@@ -0,0 +1,9 @@
+pytorch3d.implicitron.models.implicit_function.voxel_grid_implicit_function
+===========================================================================
+
+voxel_grid_implicit_function
+
+.. automodule:: pytorch3d.implicitron.models.implicit_function.voxel_grid_implicit_function
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/pytorch3d/docs/modules/implicitron/models/index.rst b/pytorch3d/docs/modules/implicitron/models/index.rst
new file mode 100644
index 0000000000000000000000000000000000000000..4bd70a446c519d92017a5c1fcbbd3f15ebe0a04b
--- /dev/null
+++ b/pytorch3d/docs/modules/implicitron/models/index.rst
@@ -0,0 +1,15 @@
+pytorch3d.implicitron.models
+============================
+
+.. toctree::
+
+    base_model
+    generic_model
+    metrics
+    model_dbir
+    feature_extractor/index
+    global_encoder/index
+    implicit_function/index
+    renderer/index
+    view_pooler/index
+    visualization/index
diff --git a/pytorch3d/docs/modules/implicitron/models/metrics.rst b/pytorch3d/docs/modules/implicitron/models/metrics.rst
new file mode 100644
index 0000000000000000000000000000000000000000..6895e1ea5dd0759e4e01c1cbd1956159f9faaaea
--- /dev/null
+++ b/pytorch3d/docs/modules/implicitron/models/metrics.rst
@@ -0,0 +1,9 @@
+pytorch3d.implicitron.models.metrics
+====================================
+
+metrics
+
+.. automodule:: pytorch3d.implicitron.models.metrics
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/pytorch3d/docs/modules/implicitron/models/model_dbir.rst b/pytorch3d/docs/modules/implicitron/models/model_dbir.rst
new file mode 100644
index 0000000000000000000000000000000000000000..c47c784ee9510cf0e6e725eb55d3882be8f77706
--- /dev/null
+++ b/pytorch3d/docs/modules/implicitron/models/model_dbir.rst
@@ -0,0 +1,9 @@
+pytorch3d.implicitron.models.model_dbir
+=======================================
+
+model_dbir
+
+.. automodule:: pytorch3d.implicitron.models.model_dbir
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/pytorch3d/docs/modules/implicitron/models/renderer/base.rst b/pytorch3d/docs/modules/implicitron/models/renderer/base.rst
new file mode 100644
index 0000000000000000000000000000000000000000..67ad3661ee5253d2d19c22cf46c149af0de10764
--- /dev/null
+++ b/pytorch3d/docs/modules/implicitron/models/renderer/base.rst
@@ -0,0 +1,9 @@
+pytorch3d.implicitron.models.renderer.base
+==========================================
+
+base
+
+.. automodule:: pytorch3d.implicitron.models.renderer.base
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/pytorch3d/docs/modules/implicitron/models/renderer/index.rst b/pytorch3d/docs/modules/implicitron/models/renderer/index.rst
new file mode 100644
index 0000000000000000000000000000000000000000..24e7dde7a356123711bf53ce92da87c905d4254e
--- /dev/null
+++ b/pytorch3d/docs/modules/implicitron/models/renderer/index.rst
@@ -0,0 +1,14 @@
+pytorch3d.implicitron.models.renderer
+=====================================
+
+.. toctree::
+
+    base
+    lstm_renderer
+    multipass_ea
+    ray_point_refiner
+    ray_sampler
+    ray_tracing
+    raymarcher
+    rgb_net
+    sdf_renderer
diff --git a/pytorch3d/docs/modules/implicitron/models/renderer/lstm_renderer.rst b/pytorch3d/docs/modules/implicitron/models/renderer/lstm_renderer.rst
new file mode 100644
index 0000000000000000000000000000000000000000..f23d985c48e968c275184e289b7483fe4b84ed4d
--- /dev/null
+++ b/pytorch3d/docs/modules/implicitron/models/renderer/lstm_renderer.rst
@@ -0,0 +1,9 @@
+pytorch3d.implicitron.models.renderer.lstm_renderer
+===================================================
+
+lstm_renderer
+
+.. automodule:: pytorch3d.implicitron.models.renderer.lstm_renderer
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/pytorch3d/docs/modules/implicitron/models/renderer/multipass_ea.rst b/pytorch3d/docs/modules/implicitron/models/renderer/multipass_ea.rst
new file mode 100644
index 0000000000000000000000000000000000000000..745c3249e389e96a45133b1aab5e41cb30654dc5
--- /dev/null
+++ b/pytorch3d/docs/modules/implicitron/models/renderer/multipass_ea.rst
@@ -0,0 +1,9 @@
+pytorch3d.implicitron.models.renderer.multipass_ea
+==================================================
+
+multipass_ea
+
+.. automodule:: pytorch3d.implicitron.models.renderer.multipass_ea
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/pytorch3d/docs/modules/implicitron/models/renderer/ray_point_refiner.rst b/pytorch3d/docs/modules/implicitron/models/renderer/ray_point_refiner.rst
new file mode 100644
index 0000000000000000000000000000000000000000..f5387ce8ff0d18970c483e56b4a8d340017b9cfb
--- /dev/null
+++ b/pytorch3d/docs/modules/implicitron/models/renderer/ray_point_refiner.rst
@@ -0,0 +1,9 @@
+pytorch3d.implicitron.models.renderer.ray_point_refiner
+=======================================================
+
+ray_point_refiner
+
+.. automodule:: pytorch3d.implicitron.models.renderer.ray_point_refiner
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/pytorch3d/docs/modules/implicitron/models/renderer/ray_sampler.rst b/pytorch3d/docs/modules/implicitron/models/renderer/ray_sampler.rst
new file mode 100644
index 0000000000000000000000000000000000000000..5c638abe0e82ad5d6b5b76b6f5353c057bd2bd00
--- /dev/null
+++ b/pytorch3d/docs/modules/implicitron/models/renderer/ray_sampler.rst
@@ -0,0 +1,9 @@
+pytorch3d.implicitron.models.renderer.ray_sampler
+=================================================
+
+ray_sampler
+
+.. automodule:: pytorch3d.implicitron.models.renderer.ray_sampler
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/pytorch3d/docs/modules/implicitron/models/renderer/ray_tracing.rst b/pytorch3d/docs/modules/implicitron/models/renderer/ray_tracing.rst
new file mode 100644
index 0000000000000000000000000000000000000000..fe2033df3199bf6af429d3c6f60f3126f0d31b55
--- /dev/null
+++ b/pytorch3d/docs/modules/implicitron/models/renderer/ray_tracing.rst
@@ -0,0 +1,9 @@
+pytorch3d.implicitron.models.renderer.ray_tracing
+=================================================
+
+ray_tracing
+
+.. automodule:: pytorch3d.implicitron.models.renderer.ray_tracing
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/pytorch3d/docs/modules/implicitron/models/renderer/raymarcher.rst b/pytorch3d/docs/modules/implicitron/models/renderer/raymarcher.rst
new file mode 100644
index 0000000000000000000000000000000000000000..687202e98b5d4cfe1a5ec4351cf808af4feba636
--- /dev/null
+++ b/pytorch3d/docs/modules/implicitron/models/renderer/raymarcher.rst
@@ -0,0 +1,9 @@
+pytorch3d.implicitron.models.renderer.raymarcher
+================================================
+
+raymarcher
+
+.. automodule:: pytorch3d.implicitron.models.renderer.raymarcher
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/pytorch3d/docs/modules/implicitron/models/renderer/rgb_net.rst b/pytorch3d/docs/modules/implicitron/models/renderer/rgb_net.rst
new file mode 100644
index 0000000000000000000000000000000000000000..dc40dc0799f31d6a22be60d7682d03d13cc37b65
--- /dev/null
+++ b/pytorch3d/docs/modules/implicitron/models/renderer/rgb_net.rst
@@ -0,0 +1,9 @@
+pytorch3d.implicitron.models.renderer.rgb_net
+=============================================
+
+rgb_net
+
+.. automodule:: pytorch3d.implicitron.models.renderer.rgb_net
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/pytorch3d/docs/modules/implicitron/models/renderer/sdf_renderer.rst b/pytorch3d/docs/modules/implicitron/models/renderer/sdf_renderer.rst
new file mode 100644
index 0000000000000000000000000000000000000000..ca0e48df8cfe982f2d304aa6fdb3c6f29e5ded02
--- /dev/null
+++ b/pytorch3d/docs/modules/implicitron/models/renderer/sdf_renderer.rst
@@ -0,0 +1,9 @@
+pytorch3d.implicitron.models.renderer.sdf_renderer
+==================================================
+
+sdf_renderer
+
+.. automodule:: pytorch3d.implicitron.models.renderer.sdf_renderer
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/pytorch3d/docs/modules/implicitron/models/view_pooler/feature_aggregator.rst b/pytorch3d/docs/modules/implicitron/models/view_pooler/feature_aggregator.rst
new file mode 100644
index 0000000000000000000000000000000000000000..fb874f03240d5150be6713170a776a9757527639
--- /dev/null
+++ b/pytorch3d/docs/modules/implicitron/models/view_pooler/feature_aggregator.rst
@@ -0,0 +1,9 @@
+pytorch3d.implicitron.models.view_pooler.feature_aggregator
+===========================================================
+
+feature_aggregator
+
+.. automodule:: pytorch3d.implicitron.models.view_pooler.feature_aggregator
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/pytorch3d/docs/modules/implicitron/models/view_pooler/index.rst b/pytorch3d/docs/modules/implicitron/models/view_pooler/index.rst
new file mode 100644
index 0000000000000000000000000000000000000000..b682259801059263ccbf2d8979950b17a84de6fd
--- /dev/null
+++ b/pytorch3d/docs/modules/implicitron/models/view_pooler/index.rst
@@ -0,0 +1,8 @@
+pytorch3d.implicitron.models.view_pooler
+========================================
+
+.. toctree::
+
+    feature_aggregator
+    view_pooler
+    view_sampler
diff --git a/pytorch3d/docs/modules/implicitron/models/view_pooler/view_pooler.rst b/pytorch3d/docs/modules/implicitron/models/view_pooler/view_pooler.rst
new file mode 100644
index 0000000000000000000000000000000000000000..bfd90434d2a589ea18a047350c6cf55e253e85f7
--- /dev/null
+++ b/pytorch3d/docs/modules/implicitron/models/view_pooler/view_pooler.rst
@@ -0,0 +1,9 @@
+pytorch3d.implicitron.models.view_pooler.view_pooler
+====================================================
+
+view_pooler
+
+.. automodule:: pytorch3d.implicitron.models.view_pooler.view_pooler
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/pytorch3d/docs/modules/implicitron/models/view_pooler/view_sampler.rst b/pytorch3d/docs/modules/implicitron/models/view_pooler/view_sampler.rst
new file mode 100644
index 0000000000000000000000000000000000000000..eb02e8566fb0f51064b381ddb2c39f6fec1081e3
--- /dev/null
+++ b/pytorch3d/docs/modules/implicitron/models/view_pooler/view_sampler.rst
@@ -0,0 +1,9 @@
+pytorch3d.implicitron.models.view_pooler.view_sampler
+=====================================================
+
+view_sampler
+
+.. automodule:: pytorch3d.implicitron.models.view_pooler.view_sampler
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/pytorch3d/docs/modules/implicitron/models/visualization/index.rst b/pytorch3d/docs/modules/implicitron/models/visualization/index.rst
new file mode 100644
index 0000000000000000000000000000000000000000..ebc041f29773580c929665cb904afef416f53d7c
--- /dev/null
+++ b/pytorch3d/docs/modules/implicitron/models/visualization/index.rst
@@ -0,0 +1,6 @@
+pytorch3d.implicitron.models.visualization
+==========================================
+
+.. toctree::
+
+    render_flyaround
diff --git a/pytorch3d/docs/modules/implicitron/models/visualization/render_flyaround.rst b/pytorch3d/docs/modules/implicitron/models/visualization/render_flyaround.rst
new file mode 100644
index 0000000000000000000000000000000000000000..49779a125357af228e83ebce2bd4d4edd3718fdc
--- /dev/null
+++ b/pytorch3d/docs/modules/implicitron/models/visualization/render_flyaround.rst
@@ -0,0 +1,9 @@
+pytorch3d.implicitron.models.visualization.render_flyaround
+===========================================================
+
+render_flyaround
+
+.. automodule:: pytorch3d.implicitron.models.visualization.render_flyaround
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/pytorch3d/docs/modules/implicitron/tools.rst b/pytorch3d/docs/modules/implicitron/tools.rst
new file mode 100644
index 0000000000000000000000000000000000000000..a694ed3e4cc282cf7884f2d12870737ffae4d1d9
--- /dev/null
+++ b/pytorch3d/docs/modules/implicitron/tools.rst
@@ -0,0 +1,64 @@
+pytorch3d.implicitron.tools
+===========================
+
+Tools for implicitron
+
+.. automodule:: pytorch3d.implicitron.tools.camera_utils
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+.. automodule:: pytorch3d.implicitron.tools.circle_fitting
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+.. automodule:: pytorch3d.implicitron.tools.config
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+.. automodule:: pytorch3d.implicitron.tools.eval_video_trajectory
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+.. automodule:: pytorch3d.implicitron.tools.image_utils
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+.. automodule:: pytorch3d.implicitron.tools.metric_utils
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+.. automodule:: pytorch3d.implicitron.tools.model_io
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+.. automodule:: pytorch3d.implicitron.tools.point_cloud_utils
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+.. automodule:: pytorch3d.implicitron.tools.rasterize_mc
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+.. automodule:: pytorch3d.implicitron.tools.stats
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+.. automodule:: pytorch3d.implicitron.tools.video_writer
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+.. automodule:: pytorch3d.implicitron.tools.vis_utils
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/pytorch3d/docs/modules/index.rst b/pytorch3d/docs/modules/index.rst
new file mode 100644
index 0000000000000000000000000000000000000000..54550d8aca317d6b6950e8a63ef9a74091623027
--- /dev/null
+++ b/pytorch3d/docs/modules/index.rst
@@ -0,0 +1,16 @@
+API Documentation
+==================
+
+.. toctree::
+
+    structures
+    io
+    loss
+    ops
+    renderer/index
+    transforms
+    utils
+    datasets
+    common
+    vis/index
+    implicitron/index
diff --git a/pytorch3d/docs/modules/io.rst b/pytorch3d/docs/modules/io.rst
new file mode 100644
index 0000000000000000000000000000000000000000..a30c6dd5a4838b899fbc12cdbfbc57e674b83a8e
--- /dev/null
+++ b/pytorch3d/docs/modules/io.rst
@@ -0,0 +1,7 @@
+pytorch3d.io 
+===========================
+
+.. automodule:: pytorch3d.io
+    :members:
+    :undoc-members:
+    :show-inheritance:
\ No newline at end of file
diff --git a/pytorch3d/docs/modules/loss.rst b/pytorch3d/docs/modules/loss.rst
new file mode 100644
index 0000000000000000000000000000000000000000..3ae9fdcbdbaed5f9935893953b1bc930fe69d1aa
--- /dev/null
+++ b/pytorch3d/docs/modules/loss.rst
@@ -0,0 +1,9 @@
+pytorch3d.loss
+====================
+
+Loss functions for meshes and point clouds.
+
+.. automodule:: pytorch3d.loss
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/pytorch3d/docs/modules/ops.rst b/pytorch3d/docs/modules/ops.rst
new file mode 100644
index 0000000000000000000000000000000000000000..148b6b3ddd951e88d9cc1c7d8ae6a4459787101c
--- /dev/null
+++ b/pytorch3d/docs/modules/ops.rst
@@ -0,0 +1,6 @@
+pytorch3d.ops 
+===========================
+
+.. automodule:: pytorch3d.ops
+    :members:
+    :undoc-members:
\ No newline at end of file
diff --git a/pytorch3d/docs/modules/renderer/blending.rst b/pytorch3d/docs/modules/renderer/blending.rst
new file mode 100644
index 0000000000000000000000000000000000000000..6f6a19105e8d678fd35872da264173b5b85a1fe5
--- /dev/null
+++ b/pytorch3d/docs/modules/renderer/blending.rst
@@ -0,0 +1,9 @@
+pytorch3d.renderer.blending
+===========================
+
+blending
+
+.. automodule:: pytorch3d.renderer.blending
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/pytorch3d/docs/modules/renderer/camera_conversions.rst b/pytorch3d/docs/modules/renderer/camera_conversions.rst
new file mode 100644
index 0000000000000000000000000000000000000000..c6144e0c02899f3c68b79021677f312086306a6c
--- /dev/null
+++ b/pytorch3d/docs/modules/renderer/camera_conversions.rst
@@ -0,0 +1,9 @@
+pytorch3d.renderer.camera_conversions
+=====================================
+
+camera_conversions
+
+.. automodule:: pytorch3d.renderer.camera_conversions
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/pytorch3d/docs/modules/renderer/camera_utils.rst b/pytorch3d/docs/modules/renderer/camera_utils.rst
new file mode 100644
index 0000000000000000000000000000000000000000..2cd3c5ac5f502b4e5576a09dbb820dc6638ed044
--- /dev/null
+++ b/pytorch3d/docs/modules/renderer/camera_utils.rst
@@ -0,0 +1,9 @@
+pytorch3d.renderer.camera_utils
+===============================
+
+camera_utils
+
+.. automodule:: pytorch3d.renderer.camera_utils
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/pytorch3d/docs/modules/renderer/cameras.rst b/pytorch3d/docs/modules/renderer/cameras.rst
new file mode 100644
index 0000000000000000000000000000000000000000..201ca0446309040e8e96402eafc74cad207e1283
--- /dev/null
+++ b/pytorch3d/docs/modules/renderer/cameras.rst
@@ -0,0 +1,9 @@
+pytorch3d.renderer.cameras
+==========================
+
+cameras
+
+.. automodule:: pytorch3d.renderer.cameras
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/pytorch3d/docs/modules/renderer/compositing.rst b/pytorch3d/docs/modules/renderer/compositing.rst
new file mode 100644
index 0000000000000000000000000000000000000000..7b2c5ea90a920137056ddb25715d8f50f66fd4b5
--- /dev/null
+++ b/pytorch3d/docs/modules/renderer/compositing.rst
@@ -0,0 +1,9 @@
+pytorch3d.renderer.compositing
+==============================
+
+compositing
+
+.. automodule:: pytorch3d.renderer.compositing
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/pytorch3d/docs/modules/renderer/fisheyecameras.rst b/pytorch3d/docs/modules/renderer/fisheyecameras.rst
new file mode 100644
index 0000000000000000000000000000000000000000..039b1646773a7f113a65efcc7ed22399d5c4297b
--- /dev/null
+++ b/pytorch3d/docs/modules/renderer/fisheyecameras.rst
@@ -0,0 +1,9 @@
+pytorch3d.renderer.fisheyecameras
+=================================
+
+fisheyecameras
+
+.. automodule:: pytorch3d.renderer.fisheyecameras
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/pytorch3d/docs/modules/renderer/implicit/harmonic_embedding.rst b/pytorch3d/docs/modules/renderer/implicit/harmonic_embedding.rst
new file mode 100644
index 0000000000000000000000000000000000000000..460381313e37e7dbc5f37ad385d228b544c37544
--- /dev/null
+++ b/pytorch3d/docs/modules/renderer/implicit/harmonic_embedding.rst
@@ -0,0 +1,9 @@
+pytorch3d.renderer.implicit.harmonic_embedding
+==============================================
+
+harmonic_embedding
+
+.. automodule:: pytorch3d.renderer.implicit.harmonic_embedding
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/pytorch3d/docs/modules/renderer/implicit/index.rst b/pytorch3d/docs/modules/renderer/implicit/index.rst
new file mode 100644
index 0000000000000000000000000000000000000000..e62048b623b1cd760680b2c4f94e7f3ddba30ce9
--- /dev/null
+++ b/pytorch3d/docs/modules/renderer/implicit/index.rst
@@ -0,0 +1,11 @@
+pytorch3d.renderer.implicit
+===========================
+
+.. toctree::
+
+    harmonic_embedding
+    raymarching
+    raysampling
+    renderer
+    sample_pdf
+    utils
diff --git a/pytorch3d/docs/modules/renderer/implicit/raymarching.rst b/pytorch3d/docs/modules/renderer/implicit/raymarching.rst
new file mode 100644
index 0000000000000000000000000000000000000000..c44c607d60fadbe606bca5a83752fcfc5badc31c
--- /dev/null
+++ b/pytorch3d/docs/modules/renderer/implicit/raymarching.rst
@@ -0,0 +1,9 @@
+pytorch3d.renderer.implicit.raymarching
+=======================================
+
+raymarching
+
+.. automodule:: pytorch3d.renderer.implicit.raymarching
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/pytorch3d/docs/modules/renderer/implicit/raysampling.rst b/pytorch3d/docs/modules/renderer/implicit/raysampling.rst
new file mode 100644
index 0000000000000000000000000000000000000000..23f9e409d2332740ab958ae16f81881400341c91
--- /dev/null
+++ b/pytorch3d/docs/modules/renderer/implicit/raysampling.rst
@@ -0,0 +1,9 @@
+pytorch3d.renderer.implicit.raysampling
+=======================================
+
+raysampling
+
+.. automodule:: pytorch3d.renderer.implicit.raysampling
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/pytorch3d/docs/modules/renderer/implicit/renderer.rst b/pytorch3d/docs/modules/renderer/implicit/renderer.rst
new file mode 100644
index 0000000000000000000000000000000000000000..2e27e86f26173b5252bea8bec21d640a5b4c2585
--- /dev/null
+++ b/pytorch3d/docs/modules/renderer/implicit/renderer.rst
@@ -0,0 +1,9 @@
+pytorch3d.renderer.implicit.renderer
+====================================
+
+renderer
+
+.. automodule:: pytorch3d.renderer.implicit.renderer
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/pytorch3d/docs/modules/renderer/implicit/sample_pdf.rst b/pytorch3d/docs/modules/renderer/implicit/sample_pdf.rst
new file mode 100644
index 0000000000000000000000000000000000000000..5c35f789f9286b4d9e434763f77953fc282fc495
--- /dev/null
+++ b/pytorch3d/docs/modules/renderer/implicit/sample_pdf.rst
@@ -0,0 +1,9 @@
+pytorch3d.renderer.implicit.sample_pdf
+======================================
+
+sample_pdf
+
+.. automodule:: pytorch3d.renderer.implicit.sample_pdf
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/pytorch3d/docs/modules/renderer/implicit/utils.rst b/pytorch3d/docs/modules/renderer/implicit/utils.rst
new file mode 100644
index 0000000000000000000000000000000000000000..1d7f976f41613a24ad7ee5e1a92d30ecf7b66011
--- /dev/null
+++ b/pytorch3d/docs/modules/renderer/implicit/utils.rst
@@ -0,0 +1,9 @@
+pytorch3d.renderer.implicit.utils
+=================================
+
+utils
+
+.. automodule:: pytorch3d.renderer.implicit.utils
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/pytorch3d/docs/modules/renderer/index.rst b/pytorch3d/docs/modules/renderer/index.rst
new file mode 100644
index 0000000000000000000000000000000000000000..23459149a756d598f58acf47e4c78266710d2f13
--- /dev/null
+++ b/pytorch3d/docs/modules/renderer/index.rst
@@ -0,0 +1,19 @@
+pytorch3d.renderer
+==================
+
+.. toctree::
+
+    blending
+    camera_conversions
+    camera_utils
+    cameras
+    compositing
+    fisheyecameras
+    lighting
+    materials
+    splatter_blend
+    utils
+    implicit/index
+    mesh/index
+    opengl/index
+    points/index
diff --git a/pytorch3d/docs/modules/renderer/lighting.rst b/pytorch3d/docs/modules/renderer/lighting.rst
new file mode 100644
index 0000000000000000000000000000000000000000..314824ded61747a63bd23f32ba4fcab9044d1995
--- /dev/null
+++ b/pytorch3d/docs/modules/renderer/lighting.rst
@@ -0,0 +1,9 @@
+pytorch3d.renderer.lighting
+===========================
+
+lighting
+
+.. automodule:: pytorch3d.renderer.lighting
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/pytorch3d/docs/modules/renderer/materials.rst b/pytorch3d/docs/modules/renderer/materials.rst
new file mode 100644
index 0000000000000000000000000000000000000000..c759e57362efc39c843d969ddecd88c166aebc23
--- /dev/null
+++ b/pytorch3d/docs/modules/renderer/materials.rst
@@ -0,0 +1,9 @@
+pytorch3d.renderer.materials
+============================
+
+materials
+
+.. automodule:: pytorch3d.renderer.materials
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/pytorch3d/docs/modules/renderer/mesh/clip.rst b/pytorch3d/docs/modules/renderer/mesh/clip.rst
new file mode 100644
index 0000000000000000000000000000000000000000..57d9f78f24c52e3d3094f4641a121024b8d4d40b
--- /dev/null
+++ b/pytorch3d/docs/modules/renderer/mesh/clip.rst
@@ -0,0 +1,9 @@
+pytorch3d.renderer.mesh.clip
+============================
+
+clip
+
+.. automodule:: pytorch3d.renderer.mesh.clip
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/pytorch3d/docs/modules/renderer/mesh/index.rst b/pytorch3d/docs/modules/renderer/mesh/index.rst
new file mode 100644
index 0000000000000000000000000000000000000000..9fccba0152c990b7f5ea0958019994c1bb8006b9
--- /dev/null
+++ b/pytorch3d/docs/modules/renderer/mesh/index.rst
@@ -0,0 +1,13 @@
+pytorch3d.renderer.mesh
+=======================
+
+.. toctree::
+
+    clip
+    rasterize_meshes
+    rasterizer
+    renderer
+    shader
+    shading
+    textures
+    utils
diff --git a/pytorch3d/docs/modules/renderer/mesh/rasterize_meshes.rst b/pytorch3d/docs/modules/renderer/mesh/rasterize_meshes.rst
new file mode 100644
index 0000000000000000000000000000000000000000..3f46d3a7bae20f5bf4fed5df69e751af64e66107
--- /dev/null
+++ b/pytorch3d/docs/modules/renderer/mesh/rasterize_meshes.rst
@@ -0,0 +1,9 @@
+pytorch3d.renderer.mesh.rasterize_meshes
+========================================
+
+rasterize_meshes
+
+.. automodule:: pytorch3d.renderer.mesh.rasterize_meshes
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/pytorch3d/docs/modules/renderer/mesh/rasterizer.rst b/pytorch3d/docs/modules/renderer/mesh/rasterizer.rst
new file mode 100644
index 0000000000000000000000000000000000000000..28cb526e656dea293b12803fc83ae4af42a008e0
--- /dev/null
+++ b/pytorch3d/docs/modules/renderer/mesh/rasterizer.rst
@@ -0,0 +1,9 @@
+pytorch3d.renderer.mesh.rasterizer
+==================================
+
+rasterizer
+
+.. automodule:: pytorch3d.renderer.mesh.rasterizer
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/pytorch3d/docs/modules/renderer/mesh/renderer.rst b/pytorch3d/docs/modules/renderer/mesh/renderer.rst
new file mode 100644
index 0000000000000000000000000000000000000000..54b5682c97e88774b95f343005bd95f80d8a4462
--- /dev/null
+++ b/pytorch3d/docs/modules/renderer/mesh/renderer.rst
@@ -0,0 +1,9 @@
+pytorch3d.renderer.mesh.renderer
+================================
+
+renderer
+
+.. automodule:: pytorch3d.renderer.mesh.renderer
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/pytorch3d/docs/modules/renderer/mesh/shader.rst b/pytorch3d/docs/modules/renderer/mesh/shader.rst
new file mode 100644
index 0000000000000000000000000000000000000000..694e3df26de1bdbc82aae29d559d8b074d667547
--- /dev/null
+++ b/pytorch3d/docs/modules/renderer/mesh/shader.rst
@@ -0,0 +1,9 @@
+pytorch3d.renderer.mesh.shader
+==============================
+
+shader
+
+.. automodule:: pytorch3d.renderer.mesh.shader
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/pytorch3d/docs/modules/renderer/mesh/shading.rst b/pytorch3d/docs/modules/renderer/mesh/shading.rst
new file mode 100644
index 0000000000000000000000000000000000000000..90aa3b602407842ba40d438ad27aa880f712e624
--- /dev/null
+++ b/pytorch3d/docs/modules/renderer/mesh/shading.rst
@@ -0,0 +1,9 @@
+pytorch3d.renderer.mesh.shading
+===============================
+
+shading
+
+.. automodule:: pytorch3d.renderer.mesh.shading
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/pytorch3d/docs/modules/renderer/mesh/textures.rst b/pytorch3d/docs/modules/renderer/mesh/textures.rst
new file mode 100644
index 0000000000000000000000000000000000000000..7ca173c422b0abfeecfb68e7ec468a243e769a7a
--- /dev/null
+++ b/pytorch3d/docs/modules/renderer/mesh/textures.rst
@@ -0,0 +1,9 @@
+pytorch3d.renderer.mesh.textures
+================================
+
+textures
+
+.. automodule:: pytorch3d.renderer.mesh.textures
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/pytorch3d/docs/modules/renderer/mesh/utils.rst b/pytorch3d/docs/modules/renderer/mesh/utils.rst
new file mode 100644
index 0000000000000000000000000000000000000000..2944ba84eb6a9dc2a9f33a612305c45a73056e2f
--- /dev/null
+++ b/pytorch3d/docs/modules/renderer/mesh/utils.rst
@@ -0,0 +1,9 @@
+pytorch3d.renderer.mesh.utils
+=============================
+
+utils
+
+.. automodule:: pytorch3d.renderer.mesh.utils
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/pytorch3d/docs/modules/renderer/opengl/index.rst b/pytorch3d/docs/modules/renderer/opengl/index.rst
new file mode 100644
index 0000000000000000000000000000000000000000..5bc1f306401d3377a99c3a6c06c53828f6753da2
--- /dev/null
+++ b/pytorch3d/docs/modules/renderer/opengl/index.rst
@@ -0,0 +1,7 @@
+pytorch3d.renderer.opengl
+=========================
+
+.. toctree::
+
+    opengl_utils
+    rasterizer_opengl
diff --git a/pytorch3d/docs/modules/renderer/opengl/opengl_utils.rst b/pytorch3d/docs/modules/renderer/opengl/opengl_utils.rst
new file mode 100644
index 0000000000000000000000000000000000000000..64a8660912f1215b0ad545b65278f02a266bd7b5
--- /dev/null
+++ b/pytorch3d/docs/modules/renderer/opengl/opengl_utils.rst
@@ -0,0 +1,9 @@
+pytorch3d.renderer.opengl.opengl_utils
+======================================
+
+opengl_utils
+
+.. automodule:: pytorch3d.renderer.opengl.opengl_utils
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/pytorch3d/docs/modules/renderer/opengl/rasterizer_opengl.rst b/pytorch3d/docs/modules/renderer/opengl/rasterizer_opengl.rst
new file mode 100644
index 0000000000000000000000000000000000000000..28cf96462f6cc23fb869d8f30ed09a4f9e684018
--- /dev/null
+++ b/pytorch3d/docs/modules/renderer/opengl/rasterizer_opengl.rst
@@ -0,0 +1,9 @@
+pytorch3d.renderer.opengl.rasterizer_opengl
+===========================================
+
+rasterizer_opengl
+
+.. automodule:: pytorch3d.renderer.opengl.rasterizer_opengl
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/pytorch3d/docs/modules/renderer/points/compositor.rst b/pytorch3d/docs/modules/renderer/points/compositor.rst
new file mode 100644
index 0000000000000000000000000000000000000000..4f97015ad07f2e1116572e5af07ed864b418fbd2
--- /dev/null
+++ b/pytorch3d/docs/modules/renderer/points/compositor.rst
@@ -0,0 +1,9 @@
+pytorch3d.renderer.points.compositor
+====================================
+
+compositor
+
+.. automodule:: pytorch3d.renderer.points.compositor
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/pytorch3d/docs/modules/renderer/points/index.rst b/pytorch3d/docs/modules/renderer/points/index.rst
new file mode 100644
index 0000000000000000000000000000000000000000..2eb454d2f407b6113d7b02cb9c7a6a5baba14722
--- /dev/null
+++ b/pytorch3d/docs/modules/renderer/points/index.rst
@@ -0,0 +1,10 @@
+pytorch3d.renderer.points
+=========================
+
+.. toctree::
+
+    compositor
+    rasterize_points
+    rasterizer
+    renderer
+    pulsar/index
diff --git a/pytorch3d/docs/modules/renderer/points/pulsar/index.rst b/pytorch3d/docs/modules/renderer/points/pulsar/index.rst
new file mode 100644
index 0000000000000000000000000000000000000000..064c518ff317227eca4fd54f16752dc143246422
--- /dev/null
+++ b/pytorch3d/docs/modules/renderer/points/pulsar/index.rst
@@ -0,0 +1,7 @@
+pytorch3d.renderer.points.pulsar
+================================
+
+.. toctree::
+
+    renderer
+    unified
diff --git a/pytorch3d/docs/modules/renderer/points/pulsar/renderer.rst b/pytorch3d/docs/modules/renderer/points/pulsar/renderer.rst
new file mode 100644
index 0000000000000000000000000000000000000000..219566128185aac8ca8bd51786703671878223ce
--- /dev/null
+++ b/pytorch3d/docs/modules/renderer/points/pulsar/renderer.rst
@@ -0,0 +1,9 @@
+pytorch3d.renderer.points.pulsar.renderer
+=========================================
+
+renderer
+
+.. automodule:: pytorch3d.renderer.points.pulsar.renderer
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/pytorch3d/docs/modules/renderer/points/pulsar/unified.rst b/pytorch3d/docs/modules/renderer/points/pulsar/unified.rst
new file mode 100644
index 0000000000000000000000000000000000000000..f5cc49387f95bfe289d21dd702c50ead5b2404f0
--- /dev/null
+++ b/pytorch3d/docs/modules/renderer/points/pulsar/unified.rst
@@ -0,0 +1,9 @@
+pytorch3d.renderer.points.pulsar.unified
+========================================
+
+unified
+
+.. automodule:: pytorch3d.renderer.points.pulsar.unified
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/pytorch3d/docs/modules/renderer/points/rasterize_points.rst b/pytorch3d/docs/modules/renderer/points/rasterize_points.rst
new file mode 100644
index 0000000000000000000000000000000000000000..1e61f9d02a5a2fd871e1d73f162c4be419cc9712
--- /dev/null
+++ b/pytorch3d/docs/modules/renderer/points/rasterize_points.rst
@@ -0,0 +1,9 @@
+pytorch3d.renderer.points.rasterize_points
+==========================================
+
+rasterize_points
+
+.. automodule:: pytorch3d.renderer.points.rasterize_points
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/pytorch3d/docs/modules/renderer/points/rasterizer.rst b/pytorch3d/docs/modules/renderer/points/rasterizer.rst
new file mode 100644
index 0000000000000000000000000000000000000000..1b5e2aa9b7c04a25b2af636e22977cc99e6cdb4f
--- /dev/null
+++ b/pytorch3d/docs/modules/renderer/points/rasterizer.rst
@@ -0,0 +1,9 @@
+pytorch3d.renderer.points.rasterizer
+====================================
+
+rasterizer
+
+.. automodule:: pytorch3d.renderer.points.rasterizer
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/pytorch3d/docs/modules/renderer/points/renderer.rst b/pytorch3d/docs/modules/renderer/points/renderer.rst
new file mode 100644
index 0000000000000000000000000000000000000000..57e9fd30613c58ece2d1c17bba0ecac042429ea7
--- /dev/null
+++ b/pytorch3d/docs/modules/renderer/points/renderer.rst
@@ -0,0 +1,9 @@
+pytorch3d.renderer.points.renderer
+==================================
+
+renderer
+
+.. automodule:: pytorch3d.renderer.points.renderer
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/pytorch3d/docs/modules/renderer/splatter_blend.rst b/pytorch3d/docs/modules/renderer/splatter_blend.rst
new file mode 100644
index 0000000000000000000000000000000000000000..d092dc2b712efcf0b5f1ef8712618b40c971e68e
--- /dev/null
+++ b/pytorch3d/docs/modules/renderer/splatter_blend.rst
@@ -0,0 +1,9 @@
+pytorch3d.renderer.splatter_blend
+=================================
+
+splatter_blend
+
+.. automodule:: pytorch3d.renderer.splatter_blend
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/pytorch3d/docs/modules/renderer/utils.rst b/pytorch3d/docs/modules/renderer/utils.rst
new file mode 100644
index 0000000000000000000000000000000000000000..a196433c8bd675569fde769a2ed48c59392350e2
--- /dev/null
+++ b/pytorch3d/docs/modules/renderer/utils.rst
@@ -0,0 +1,9 @@
+pytorch3d.renderer.utils
+========================
+
+utils
+
+.. automodule:: pytorch3d.renderer.utils
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/pytorch3d/docs/modules/structures.rst b/pytorch3d/docs/modules/structures.rst
new file mode 100644
index 0000000000000000000000000000000000000000..1f73a5595ef5dc32d67a6902a2daeae9ebfc7dce
--- /dev/null
+++ b/pytorch3d/docs/modules/structures.rst
@@ -0,0 +1,8 @@
+pytorch3d.structures 
+====================
+
+.. automodule:: pytorch3d.structures
+    :members:
+    :undoc-members:
+
+
diff --git a/pytorch3d/docs/modules/transforms.rst b/pytorch3d/docs/modules/transforms.rst
new file mode 100644
index 0000000000000000000000000000000000000000..3962f7f19690054d9eb0488e310973cd252ca9ef
--- /dev/null
+++ b/pytorch3d/docs/modules/transforms.rst
@@ -0,0 +1,7 @@
+pytorch3d.transforms 
+===========================
+
+.. automodule:: pytorch3d.transforms
+    :members:
+    :undoc-members:
+    :show-inheritance:
\ No newline at end of file
diff --git a/pytorch3d/docs/modules/utils.rst b/pytorch3d/docs/modules/utils.rst
new file mode 100644
index 0000000000000000000000000000000000000000..3aaf1df154cc9b5b0eb3d3d0daae0661b7f26433
--- /dev/null
+++ b/pytorch3d/docs/modules/utils.rst
@@ -0,0 +1,7 @@
+pytorch3d.utils
+====================
+
+.. automodule:: pytorch3d.utils
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/pytorch3d/docs/modules/vis/index.rst b/pytorch3d/docs/modules/vis/index.rst
new file mode 100644
index 0000000000000000000000000000000000000000..79e1bda1723efd81431c53ccbc7aa904e9847166
--- /dev/null
+++ b/pytorch3d/docs/modules/vis/index.rst
@@ -0,0 +1,7 @@
+pytorch3d.vis
+=============
+
+.. toctree::
+
+    plotly_vis
+    texture_vis
diff --git a/pytorch3d/docs/modules/vis/plotly_vis.rst b/pytorch3d/docs/modules/vis/plotly_vis.rst
new file mode 100644
index 0000000000000000000000000000000000000000..2380a7b1879d9b0619743e1dc4d9e6ccff0d2ded
--- /dev/null
+++ b/pytorch3d/docs/modules/vis/plotly_vis.rst
@@ -0,0 +1,9 @@
+pytorch3d.vis.plotly_vis
+========================
+
+plotly_vis
+
+.. automodule:: pytorch3d.vis.plotly_vis
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/pytorch3d/docs/modules/vis/texture_vis.rst b/pytorch3d/docs/modules/vis/texture_vis.rst
new file mode 100644
index 0000000000000000000000000000000000000000..5e59744ed150be134884467ff05b18297b5781bb
--- /dev/null
+++ b/pytorch3d/docs/modules/vis/texture_vis.rst
@@ -0,0 +1,9 @@
+pytorch3d.vis.texture_vis
+=========================
+
+texture_vis
+
+.. automodule:: pytorch3d.vis.texture_vis
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/pytorch3d/docs/notes/assets/batch_modes.gif b/pytorch3d/docs/notes/assets/batch_modes.gif
new file mode 100644
index 0000000000000000000000000000000000000000..57412693f3be961c0ab6db581d41db88e2dab6b6
--- /dev/null
+++ b/pytorch3d/docs/notes/assets/batch_modes.gif
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3d87ed3c62980c4fb1d3b849b02623c8fa18e351a3f40ecd4880fd68cb2d0eb6
+size 1525658
diff --git a/pytorch3d/docs/notes/assets/iou3d.gif b/pytorch3d/docs/notes/assets/iou3d.gif
new file mode 100644
index 0000000000000000000000000000000000000000..9225ebf71b9974de28105201cd41e5c2fae5fa36
Binary files /dev/null and b/pytorch3d/docs/notes/assets/iou3d.gif differ
diff --git a/pytorch3d/docs/notes/batching.md b/pytorch3d/docs/notes/batching.md
new file mode 100644
index 0000000000000000000000000000000000000000..9aa9943e4408d952a466f653b4342c46161f01b7
--- /dev/null
+++ b/pytorch3d/docs/notes/batching.md
@@ -0,0 +1,32 @@
+---
+hide_title: true
+sidebar_label: Batching
+---
+
+# Batching
+
+In deep learning, every optimization step operates on multiple input examples for robust training. Thus, efficient batching is crucial. For image inputs, batching is straightforward; N images are resized to the same height and width and stacked as a 4 dimensional tensor of shape `N x 3 x H x W`. For meshes, batching is less straightforward.
+
+<img src="assets/batch_intro.png" alt="batch_intro" align="middle"/>
+
+## Batch modes for meshes
+
+Assume you want to construct a batch containing two meshes, with `mesh1 = (v1: V1 x 3, f1: F1 x 3)` containing `V1` vertices and `F1` faces, and `mesh2 = (v2: V2 x 3, f2: F2 x 3)` with `V2 (!= V1)` vertices and `F2 (!= F1)` faces. The [Meshes][meshes] data structure provides three different ways to batch *heterogeneous* meshes. If `meshes = Meshes(verts = [v1, v2], faces = [f1, f2])` is an instantiation of the data structure, then
+
+* List: Returns the examples in the batch as a list of tensors. Specifically, `meshes.verts_list()` returns the list of vertices `[v1, v2]`. Similarly, `meshes.faces_list()` returns the list of faces `[f1, f2]`.
+* Padded: The padded representation constructs a tensor by padding the extra values. Specifically, `meshes.verts_padded()` returns a tensor of shape `2 x max(V1, V2) x 3` and pads the extra vertices with `0`s. Similarly, `meshes.faces_padded()` returns a tensor of shape `2 x max(F1, F2) x 3` and pads the extra faces with `-1`s.
+* Packed: The packed representation concatenates the examples in the batch into a tensor. In particular, `meshes.verts_packed()` returns a tensor of shape `(V1 + V2) x 3`. Similarly, `meshes.faces_packed()` returns a tensor of shape `(F1 + F2) x 3` for the faces. In the packed mode, auxiliary variables are computed that enable efficient conversion between packed and padded or list modes.
+
+<img src="assets/batch_modes.gif" alt="batch_modes" height="450" align="middle" />
+
+## Use cases for batch modes
+
+The need for different mesh batch modes is inherent to the way PyTorch operators are implemented. To fully utilize the optimized PyTorch ops, the [Meshes][meshes] data structure allows for efficient conversion between the different batch modes. This is crucial when aiming for a fast and efficient training cycle. An example of this is [Mesh R-CNN][meshrcnn]. Here, in the same forward pass different parts of the network assume different inputs, which are computed by converting between the different batch modes. In particular, [vert_align][vert_align] assumes a *padded* input tensor while immediately after [graph_conv][graphconv] assumes a *packed* input tensor.
+
+<img src="assets/meshrcnn.png" alt="meshrcnn" width="700" align="middle" />
+
+
+[meshes]: https://github.com/facebookresearch/pytorch3d/blob/main/pytorch3d/structures/meshes.py
+[graphconv]: https://github.com/facebookresearch/pytorch3d/blob/main/pytorch3d/ops/graph_conv.py
+[vert_align]: https://github.com/facebookresearch/pytorch3d/blob/main/pytorch3d/ops/vert_align.py
+[meshrcnn]: https://github.com/facebookresearch/meshrcnn
diff --git a/pytorch3d/docs/notes/cameras.md b/pytorch3d/docs/notes/cameras.md
new file mode 100644
index 0000000000000000000000000000000000000000..84e52dba03d16b54251d3fe0e68ed6daa2e62e71
--- /dev/null
+++ b/pytorch3d/docs/notes/cameras.md
@@ -0,0 +1,101 @@
+---
+hide_title: true
+sidebar_label: Cameras
+---
+
+# Cameras
+
+## Camera Coordinate Systems
+
+When working with 3D data, there are 4 coordinate systems users need to know
+* **World coordinate system**
+This is the system the object/scene lives - the world.
+* **Camera view coordinate system**
+This is the system that has its origin on the image plane and the `Z`-axis perpendicular to the image plane. In PyTorch3D, we assume that `+X` points left, and `+Y` points up and `+Z` points out from the image plane. The transformation from world to view happens after applying a rotation (`R`) and translation (`T`).
+* **NDC coordinate system**
+This is the normalized coordinate system that confines in a volume the rendered part of the object/scene. Also known as view volume. For square images, under the PyTorch3D convention, `(+1, +1, znear)` is the top left near corner, and `(-1, -1, zfar)` is the bottom right far corner of the volume. For non-square images, the side of the volume in `XY` with the smallest length ranges from `[-1, 1]` while the larger side from `[-s, s]`, where `s` is the aspect ratio and `s > 1` (larger divided by smaller side).
+The transformation from view to NDC happens after applying the camera projection matrix (`P`).
+* **Screen coordinate system**
+This is another representation of the view volume with the `XY` coordinates defined in pixel space instead of a normalized space. (0,0) is the top left corner of the top left pixel
+and (W,H) is the bottom right corner of the bottom right pixel.
+
+An illustration of the 4 coordinate systems is shown below
+![cameras](https://user-images.githubusercontent.com/669761/145090051-67b506d7-6d73-4826-a677-5873b7cb92ba.png)
+
+## Defining Cameras in PyTorch3D
+
+Cameras in PyTorch3D transform an object/scene from world to view by first transforming the object/scene to view (via transforms `R` and `T`) and then projecting the 3D object/scene to a normalized space via the projection matrix `P = K[R | T]`, where `K` is the intrinsic matrix. The camera parameters in `K` define the normalized space. If users define the camera parameters in NDC space, then the transform projects points to NDC. If the camera parameters are defined in screen space, the transformed points are in screen space.
+
+Note that the base `CamerasBase` class makes no assumptions about the coordinate systems. All the above transforms are geometric transforms defined purely by `R`, `T` and `K`. This means that users can define cameras  in any coordinate system and for any transforms. The method `transform_points` will apply `K` , `R` and `T` to the input points as a simple matrix transformation. However, if users wish to use cameras with the PyTorch3D renderer, they need to abide to PyTorch3D's coordinate system assumptions (read below).
+
+We provide instantiations of common camera types in PyTorch3D and how users can flexibly define the projection space below.
+
+## Interfacing with the PyTorch3D Renderer
+
+The PyTorch3D renderer for both meshes and point clouds assumes that the camera transformed points, meaning the points passed as input to the rasterizer, are in PyTorch3D's NDC space. So to get the expected rendering outcome, users need to make sure that their 3D input data and cameras abide by these PyTorch3D coordinate system assumptions. The PyTorch3D coordinate system assumes `+X:left`, `+Y: up` and `+Z: from us to scene` (right-handed) . Confusions regarding coordinate systems are common so we advise that you spend some time understanding your data and the coordinate system they live in and transform them accordingly before using the PyTorch3D renderer.
+
+Examples of cameras and how they interface with the PyTorch3D renderer can be found in our tutorials.
+
+### Camera Types
+
+All cameras inherit from `CamerasBase` which is a base class for all cameras. PyTorch3D provides four different camera types. The `CamerasBase` defines methods that are common to all camera models:
+* `get_camera_center` that returns the optical center of the camera in world coordinates
+* `get_world_to_view_transform` which returns a 3D transform from world coordinates to the camera view coordinates `(R, T)`
+* `get_full_projection_transform` which composes the projection transform (`K`) with the world-to-view transform `(R, T)`
+* `transform_points` which takes a set of input points in world coordinates and projects to NDC coordinates ranging from [-1, -1, znear] to  [+1, +1, zfar].
+* `get_ndc_camera_transform` which defines the conversion to PyTorch3D's NDC space and is called when interfacing with the PyTorch3D renderer. If the camera is defined in NDC space, then the identity transform is returned. If the cameras is defined in screen space, the conversion from screen to NDC is returned. If users define their own camera in screen space, they need to think of the screen to NDC conversion. We provide examples for the `PerspectiveCameras` and `OrthographicCameras`.
+* `transform_points_ndc` which takes a set of points in world coordinates and projects them to PyTorch3D's NDC space
+* `transform_points_screen` which takes a set of input points in world coordinates and projects them to the screen coordinates ranging from [0, 0, znear] to [W, H, zfar]
+
+Users can easily customize their own cameras. For each new camera, users should implement the `get_projection_transform` routine that returns the mapping `P` from camera view coordinates to NDC coordinates.
+
+#### FoVPerspectiveCameras, FoVOrthographicCameras
+These two cameras follow the OpenGL convention for perspective and orthographic cameras respectively. The user provides the near `znear` and far `zfar` field which confines the view volume in the `Z` axis. The view volume in the `XY` plane is defined by field of view angle (`fov`) in the case of `FoVPerspectiveCameras` and by `min_x, min_y, max_x, max_y` in the case of `FoVOrthographicCameras`.
+These cameras are by default in NDC space.
+
+#### PerspectiveCameras, OrthographicCameras
+These two cameras follow the Multi-View Geometry convention for cameras. The user provides the focal length (`fx`, `fy`) and the principal point (`px`, `py`). For example, `camera = PerspectiveCameras(focal_length=((fx, fy),), principal_point=((px, py),))`
+
+The camera projection of a 3D point `(X, Y, Z)` in view coordinates to a point `(x, y, z)` in projection space (either NDC or screen) is
+
+```
+# for perspective camera
+x = fx * X / Z + px
+y = fy * Y / Z + py
+z = 1 / Z
+
+# for orthographic camera
+x = fx * X + px
+y = fy * Y + py
+z = Z
+```
+
+The user can define the camera parameters in NDC or in screen space. Screen space camera parameters are common and for that case the user needs to set `in_ndc` to `False` and also provide the `image_size=(height, width)` of the screen, aka the image.
+
+The `get_ndc_camera_transform` provides the transform from screen to NDC space in PyTorch3D. Note that the screen space assumes that the principal point is provided in the space with `+X left`, `+Y down` and origin at the top left corner of the image. To convert to NDC we need to account for the scaling of the normalized space as well as the change in `XY` direction.
+
+Below are example of equivalent `PerspectiveCameras` instantiations in NDC and screen space, respectively.
+
+```python
+# NDC space camera
+fcl_ndc = (1.2,)
+prp_ndc = ((0.2, 0.5),)
+cameras_ndc = PerspectiveCameras(focal_length=fcl_ndc, principal_point=prp_ndc)
+
+# Screen space camera
+image_size = ((128, 256),)    # (h, w)
+fcl_screen = (76.8,)          # fcl_ndc * min(image_size) / 2
+prp_screen = ((115.2, 32), )  # w / 2 - px_ndc * min(image_size) / 2, h / 2 - py_ndc * min(image_size) / 2
+cameras_screen = PerspectiveCameras(focal_length=fcl_screen, principal_point=prp_screen, in_ndc=False, image_size=image_size)
+```
+
+The relationship between screen and NDC specifications of a camera's `focal_length` and `principal_point` is given by the following equations, where `s = min(image_width, image_height)`.
+The transformation of x and y coordinates between screen and NDC is exactly the same as for px and py.
+
+```
+fx_ndc = fx_screen * 2.0 / s
+fy_ndc = fy_screen * 2.0 / s
+
+px_ndc = - (px_screen - image_width / 2.0) * 2.0 / s
+py_ndc = - (py_screen - image_height / 2.0) * 2.0 / s
+```
diff --git a/pytorch3d/docs/notes/cubify.md b/pytorch3d/docs/notes/cubify.md
new file mode 100644
index 0000000000000000000000000000000000000000..41f8ecdf008ad72e30ebf7f99bb8db3fdb1edcdb
--- /dev/null
+++ b/pytorch3d/docs/notes/cubify.md
@@ -0,0 +1,12 @@
+---
+hide_title: true
+sidebar_label: Cubify
+---
+
+# Cubify
+
+The [cubify operator](https://github.com/facebookresearch/pytorch3d/blob/main/pytorch3d/ops/cubify.py) converts an 3D occupancy grid of shape `BxDxHxW`, where `B` is the batch size, into a mesh instantiated as a [Meshes](https://github.com/facebookresearch/pytorch3d/blob/main/pytorch3d/structures/meshes.py) data structure of `B` elements. The operator replaces every occupied voxel (if its occupancy probability is greater than a user defined threshold) with a cuboid of 12 faces and 8 vertices. Shared vertices are merged, and internal faces are removed resulting in a **watertight** mesh.
+
+The operator provides three alignment modes {*topleft*, *corner*, *center*} which define the span of the mesh vertices with respect to the voxel grid. The alignment modes are described in the figure below for a 2D grid.
+
+![input](https://user-images.githubusercontent.com/4369065/81032959-af697380-8e46-11ea-91a8-fae89597f988.png)
diff --git a/pytorch3d/docs/notes/datasets.md b/pytorch3d/docs/notes/datasets.md
new file mode 100644
index 0000000000000000000000000000000000000000..ee3588f2ae444567c9aa8787300add255e42e49d
--- /dev/null
+++ b/pytorch3d/docs/notes/datasets.md
@@ -0,0 +1,20 @@
+---
+hide_title: true
+sidebar_label: Data loaders
+---
+
+# Data loaders for common 3D Datasets
+
+### ShapetNetCore
+
+ShapeNet is a dataset of 3D CAD models. ShapeNetCore is a subset of the ShapeNet dataset and can be downloaded from https://www.shapenet.org/. There are two versions ShapeNetCore: v1 (55 categories) and v2 (57 categories).
+
+The PyTorch3D [ShapeNetCore data loader](https://github.com/facebookresearch/pytorch3d/blob/main/pytorch3d/datasets/shapenet/shapenet_core.py) inherits from `torch.utils.data.Dataset`. It takes the path where the ShapeNetCore dataset is stored locally and loads models in the dataset. The ShapeNetCore class loads and returns models with their `categories`, `model_ids`, `vertices` and `faces`. The `ShapeNetCore` data loader also has a customized `render` function that renders models by the specified `model_ids (List[int])`, `categories (List[str])` or `indices (List[int])` with PyTorch3D's differentiable renderer.
+
+The loaded dataset can be passed to `torch.utils.data.DataLoader` with PyTorch3D's customized collate_fn: `collate_batched_meshes` from the `pytorch3d.dataset.utils` module. The `vertices` and `faces` of the models are used to construct a [Meshes](https://github.com/facebookresearch/pytorch3d/blob/main/pytorch3d/structures/meshes.py) object representing the batched meshes. This `Meshes` representation can be easily used with other ops and rendering in PyTorch3D.
+
+### R2N2
+
+The R2N2 dataset contains 13 categories that are a subset of the ShapeNetCore v.1 dataset. The R2N2 dataset also contains its own 24 renderings of each object and voxelized models. The R2N2 Dataset can be downloaded following the instructions [here](http://3d-r2n2.stanford.edu/).
+
+The PyTorch3D [R2N2 data loader](https://github.com/facebookresearch/pytorch3d/blob/main/pytorch3d/datasets/r2n2/r2n2.py) is initialized with the paths to the ShapeNet dataset, the R2N2 dataset and the splits file for R2N2. Just like `ShapeNetCore`, it can be passed to `torch.utils.data.DataLoader` with a customized collate_fn: `collate_batched_R2N2` from the `pytorch3d.dataset.r2n2.utils` module. It returns all the data that `ShapeNetCore` returns, and in addition, it returns the R2N2 renderings (24 views for each model) along with the camera calibration matrices and a voxel representation for each model. Similar to `ShapeNetCore`, it has a customized `render` function that supports rendering specified models with the PyTorch3D differentiable renderer. In addition, it supports rendering models with the same orientations as R2N2's original renderings.
diff --git a/pytorch3d/docs/notes/io.md b/pytorch3d/docs/notes/io.md
new file mode 100644
index 0000000000000000000000000000000000000000..a75842716cb12e87069889255f5fcdce60b6b55a
--- /dev/null
+++ b/pytorch3d/docs/notes/io.md
@@ -0,0 +1,34 @@
+---
+hide_title: true
+sidebar_label: File IO
+---
+
+# File IO
+There is a flexible interface for loading and saving point clouds and meshes from different formats.
+
+The main usage is via the `pytorch3d.io.IO` object, and its methods
+`load_mesh`, `save_mesh`, `load_pointcloud` and `save_pointcloud`.
+
+For example, to load a mesh you might do
+```
+from pytorch3d.io import IO
+
+device=torch.device("cuda:0")
+mesh = IO().load_mesh("mymesh.obj", device=device)
+```
+
+and to save a pointcloud you might do
+```
+pcl = Pointclouds(...)
+IO().save_pointcloud(pcl, "output_pointcloud.ply")
+```
+
+For meshes, this supports OBJ, PLY and OFF files.
+
+For pointclouds, this supports PLY files.
+
+In addition, there is experimental support for loading meshes from
+[glTF 2 assets](https://github.com/KhronosGroup/glTF/tree/master/specification/2.0)
+stored either in a GLB container file or a glTF JSON file with embedded binary data.
+This must be enabled explicitly, as described in
+`pytorch3d/io/experimental_gltf_io.py`.
diff --git a/pytorch3d/docs/notes/iou3d.md b/pytorch3d/docs/notes/iou3d.md
new file mode 100644
index 0000000000000000000000000000000000000000..548d67811cdcc91c30ef12858a6ccda4952c3b87
--- /dev/null
+++ b/pytorch3d/docs/notes/iou3d.md
@@ -0,0 +1,93 @@
+---
+hide_title: true
+sidebar_label: IoU3D
+---
+
+# Intersection Over Union of Oriented 3D Boxes: A New Algorithm
+
+Author: Georgia Gkioxari
+
+Implementation: Georgia Gkioxari and Nikhila Ravi
+
+## Description
+
+Intersection over union (IoU) of boxes is widely used as an evaluation metric in object detection ([1][pascalvoc], [2][coco]).
+In 2D, IoU is commonly applied to axis-aligned boxes, namely boxes with edges parallel to the image axis.
+In 3D, boxes are usually not axis aligned and can be oriented in any way in the world.
+We introduce a new algorithm which computes the *exact* IoU of two *oriented 3D boxes*.
+
+Our algorithm is based on the simple observation that the intersection of two oriented 3D boxes, `box1` and `box2`, is a convex polyhedron (convex n-gon in 2D) with `n > 2` comprised of connected *planar units*.
+In 3D, these planar units are 3D triangular faces.
+In 2D, they are 2D edges.
+Each planar unit belongs strictly to either `box1` or `box2`.
+Our algorithm finds these units by iterating through the sides of each box.
+
+1. For each 3D triangular face `e` in `box1` we check wether `e` is *inside* `box2`.
+2. If `e` is not *inside*, then we discard it.
+3. If `e` is *inside* or *partially inside*, then the part of `e` *inside* `box2` is added to the units that comprise the final intersection shape.
+4. We repeat for `box2`.
+
+Below, we show a visualization of our algorithm for the case of 2D oriented boxes.
+
+<p align="center">
+<img src="assets/iou3d.gif" alt="drawing" width="400"/>
+</p>
+
+Note that when a box's unit `e` is *partially inside* a `box` then `e` breaks into smaller units. In 2D, `e` is an edge and breaks into smaller edges. In 3D, `e` is a 3D triangular face and is clipped to more and smaller faces by the plane of the `box` it intersects with.
+This is the sole fundamental difference between the algorithms for 2D and 3D.
+
+## Comparison With Other Algorithms
+
+Current algorithms for 3D box IoU rely on crude approximations or make box assumptions, for example they restrict the orientation of the 3D boxes.
+[Objectron][objectron] provides a nice discussion on the limitations of prior works.
+[Objectron][objectron] introduces a great algorithm for exact IoU computation of oriented 3D boxes.
+Objectron's algorithm computes the intersection points of two boxes using the [Sutherland-Hodgman algorithm][clipalgo].
+The intersection shape is formed by the convex hull from the intersection points, using the [Qhull library][qhull].
+
+Our algorithm has several advantages over Objectron's:
+
+* Our algorithm also computes the points of intersection, similar to Objectron, but in addition stores the *planar units* the points belong to. This eliminates the need for convex hull computation which is `O(nlogn)` and relies on a third party library which often crashes with nondescript error messages.
+* Objectron's implementation assumes that boxes are a rotation away from axis aligned. Our algorithm and implementation make no such assumption and work for any 3D boxes.
+* Our implementation supports batching, unlike Objectron which assumes single element inputs for `box1` and `box2`.
+* Our implementation is easily parallelizable and in fact we provide a custom C++/CUDA implementation which is **450 times faster than Objectron**.
+
+Below we compare the performance for Objectron (in C++) and our algorithm, in C++ and CUDA. We benchmark for a common use case in object detection where `boxes1` hold M predictions and `boxes2` hold N ground truth 3D boxes in an image and compute the `MxN` IoU matrix. We report the time in ms for `M=N=16`.
+
+<p align="center">
+<img src="assets/iou3d_comp.png" alt="drawing" width="400"/>
+</p>
+
+## Usage and Code
+
+```python
+from pytorch3d.ops import box3d_overlap
+# Assume inputs: boxes1 (M, 8, 3) and boxes2 (N, 8, 3)
+intersection_vol, iou_3d = box3d_overlap(boxes1, boxes2)
+```
+
+For more details, read [iou_box3d.py](https://github.com/facebookresearch/pytorch3d/blob/main/pytorch3d/ops/iou_box3d.py).
+
+Note that our implementation is not differentiable as of now. We plan to add gradient support soon.
+
+We also include have extensive [tests](https://github.com/facebookresearch/pytorch3d/blob/main/tests/test_iou_box3d.py) comparing our implementation with Objectron and MeshLab.
+
+
+## Cite
+
+If you use our 3D IoU algorithm, please cite PyTorch3D
+
+```bibtex
+@article{ravi2020pytorch3d,
+    author = {Nikhila Ravi and Jeremy Reizenstein and David Novotny and Taylor Gordon
+                  and Wan-Yen Lo and Justin Johnson and Georgia Gkioxari},
+    title = {Accelerating 3D Deep Learning with PyTorch3D},
+    journal = {arXiv:2007.08501},
+    year = {2020},
+}
+```
+
+[pascalvoc]: http://host.robots.ox.ac.uk/pascal/VOC/
+[coco]: https://cocodataset.org/
+[objectron]: https://arxiv.org/abs/2012.09988
+[qhull]: http://www.qhull.org/
+[clipalgo]: https://en.wikipedia.org/wiki/Sutherland%E2%80%93Hodgman_algorithm
diff --git a/pytorch3d/docs/notes/meshes_io.md b/pytorch3d/docs/notes/meshes_io.md
new file mode 100644
index 0000000000000000000000000000000000000000..330c2abff7ed1a8a5c9489d5db9e7bb2f2433e6c
--- /dev/null
+++ b/pytorch3d/docs/notes/meshes_io.md
@@ -0,0 +1,75 @@
+---
+sidebar_label: Loading from file
+hide_title: true
+---
+
+# Meshes and IO
+
+The Meshes object represents a batch of triangulated meshes, and is central to
+much of the functionality of PyTorch3D. There is no insistence that each mesh in
+the batch has the same number of vertices or faces. When available, it can store
+other data which pertains to the mesh, for example face normals, face areas
+and textures.
+
+Two common file formats for storing single meshes are ".obj" and ".ply" files,
+and PyTorch3D has functions for reading these.
+
+## OBJ
+
+Obj files have a standard way to store extra information about a mesh. Given an
+obj file, it can be read with
+
+```
+  verts, faces, aux = load_obj(filename)
+```
+
+which sets `verts` to be a (V,3)-tensor of vertices and `faces.verts_idx` to be
+an (F,3)- tensor of the vertex-indices of each of the corners of the faces.
+Faces which are not triangles will be split into triangles. `aux` is an object
+which may contain normals, uv coordinates, material colors and textures if they
+are present, and `faces` may additionally contain indices into these normals,
+textures and materials in its NamedTuple structure. A Meshes object containing a
+single mesh can be created from just the vertices and faces using
+```
+    meshes = Meshes(verts=[verts], faces=[faces.verts_idx])
+```
+
+If there is texture information in the `.obj` it can be used to initialize a
+`Textures` class which is passed into the `Meshes` constructor.  Currently we
+support loading of texture maps for meshes which have one texture map for the
+entire mesh e.g.
+
+```
+verts_uvs = aux.verts_uvs[None, ...]  # (1, V, 2)
+faces_uvs = faces.textures_idx[None, ...]  # (1, F, 3)
+tex_maps = aux.texture_images
+
+# tex_maps is a dictionary of {material name: texture image}.
+# Take the first image:
+texture_image = list(tex_maps.values())[0]
+texture_image = texture_image[None, ...]  # (1, H, W, 3)
+
+# Create a textures object
+tex = Textures(verts_uvs=verts_uvs, faces_uvs=faces_uvs, maps=texture_image)
+
+# Initialise the mesh with textures
+meshes = Meshes(verts=[verts], faces=[faces.verts_idx], textures=tex)
+```
+
+The `load_objs_as_meshes` function provides this procedure.
+
+## PLY
+
+Ply files are flexible in the way they store additional information. PyTorch3D
+provides a function just to read the vertices and faces from a ply file.
+The call
+```
+    verts, faces = load_ply(filename)
+```
+sets `verts` to be a (V,3)-tensor of vertices and `faces` to be an (F,3)-
+tensor of the vertex-indices of each of the corners of the faces. Faces which
+are not triangles will be split into triangles. A Meshes object containing a
+single mesh can be created from this data using
+```
+    meshes = Meshes(verts=[verts], faces=[faces])
+```
diff --git a/pytorch3d/docs/notes/renderer.md b/pytorch3d/docs/notes/renderer.md
new file mode 100644
index 0000000000000000000000000000000000000000..463dd2f0702c040fdb34a051cc789b14422528a0
--- /dev/null
+++ b/pytorch3d/docs/notes/renderer.md
@@ -0,0 +1,76 @@
+---
+hide_title: true
+sidebar_label:  Overview
+---
+
+# Rendering Overview
+
+Differentiable rendering is a relatively new and exciting research area in computer vision, bridging the gap between 2D and 3D by allowing 2D image pixels to be related back to 3D properties of a scene.
+
+For example, by rendering an image from a 3D shape predicted by a neural network, it is possible to compute a 2D loss with a reference image. Inverting the rendering step means we can relate the 2D loss from the pixels back to the 3D properties of the shape such as the positions of mesh vertices, enabling 3D shapes to be learnt without any explicit 3D supervision.
+
+We extensively researched existing codebases for differentiable rendering and found that:
+- the rendering pipeline is complex with more than 7 separate components which need to interoperate and be differentiable
+- popular existing approaches [[1](#1), [2](#2)] are based on the same core implementation which bundles many of the key components into large CUDA kernels which require significant expertise to understand, and has limited scope for extensions
+- existing methods either do not support batching or assume that meshes in a batch have the same number of vertices and faces
+- existing projects only provide CUDA implementations so they cannot be used without GPUs
+
+In order to experiment with different approaches, we wanted a modular implementation that is easy to use and extend, and supports [heterogeneous batching](batching.md). Taking inspiration from existing work [[1](#1), [2](#2)], we have created a new, modular, differentiable renderer with **parallel implementations in PyTorch, C++ and CUDA**, as well as comprehensive documentation and tests, with the aim of helping to further research in this field.
+
+Our implementation decouples the rasterization and shading steps of rendering. The core rasterization step (based on [[2]](#2)) returns several intermediate variables and has an optimized implementation in CUDA. The rest of the pipeline is implemented purely in PyTorch, and is designed to be customized and extended. With this approach, the PyTorch3D differentiable renderer can be imported as a library.
+
+## <u>Get started</u>
+
+To learn about more the implementation and start using the renderer refer to [getting started with renderer](renderer_getting_started.md), which also contains the [architecture overview](assets/architecture_renderer.jpg) and [coordinate transformation conventions](assets/transforms_overview.jpg).
+
+## <u>Tech Report</u>
+
+For an in depth explanation of the renderer design, key features and benchmarks please refer to the PyTorch3D Technical Report on ArXiv: [Accelerating 3D Deep Learning with PyTorch3D](https://arxiv.org/abs/2007.08501), for the pulsar backend see here: [Fast Differentiable Raycasting for Neural Rendering using Sphere-based Representations](https://arxiv.org/abs/2004.07484).
+
+---
+
+**NOTE: CUDA Memory usage**
+
+The main comparison in the Technical Report is with SoftRasterizer [[2](#2)]. The SoftRasterizer forward CUDA kernel only outputs one `(N, H, W, 4)` FloatTensor compared with the PyTorch3D rasterizer forward CUDA kernel which outputs 4 tensors:
+
+  - `pix_to_face`, LongTensor `(N, H, W, K)`
+  - `zbuf`, FloatTensor `(N, H, W, K)`
+  - `dist`, FloatTensor `(N, H, W, K)`
+  - `bary_coords`, FloatTensor `(N, H, W, K, 3)`
+
+where **N** = batch size, **H/W** are image height/width, **K** is the faces per pixel. The PyTorch3D backward pass returns gradients for `zbuf`, `dist` and `bary_coords`.
+
+Returning intermediate variables from rasterization has an associated memory cost. We can calculate the theoretical lower bound on the memory usage for the forward and backward pass as follows:
+
+```
+# Assume 4 bytes per float, and 8 bytes for long
+
+memory_forward_pass = ((N * H * W * K) * 2 + (N * H * W * K * 3)) * 4 + (N * H * W * K) * 8
+memory_backward_pass = ((N * H * W * K) * 2 + (N * H * W * K * 3)) * 4
+
+total_memory = memory_forward_pass + memory_backward_pass
+             = (N * H * W * K) * (5 * 4 * 2 + 8)
+             = (N * H * W * K) * 48
+```
+
+We need 48 bytes per face per pixel of the rasterized output. In order to remain within bounds for memory usage we can vary the batch size (**N**), image size (**H/W**) and faces per pixel (**K**).  For example, for a fixed batch size, if using a larger image size, try reducing the faces per pixel.
+
+---
+
+### References
+
+<a id="1">[1]</a> Kato et al, 'Neural 3D Mesh Renderer', CVPR 2018
+
+<a id="2">[2]</a> Liu et al, 'Soft Rasterizer: A Differentiable Renderer for Image-based 3D Reasoning', ICCV 2019
+
+<a id="3">[3]</a> Loper et al, 'OpenDR: An Approximate Differentiable Renderer', ECCV 2014
+
+<a id="4">[4]</a> De La Gorce et al, 'Model-based 3D Hand Pose Estimation from Monocular Video', PAMI 2011
+
+<a id="5">[5]</a> Li et al, 'Differentiable Monte Carlo Ray Tracing through Edge Sampling', SIGGRAPH Asia 2018
+
+<a id="6">[6]</a> Yifan et al, 'Differentiable Surface Splatting for Point-based Geometry Processing', SIGGRAPH Asia 2019
+
+<a id="7">[7]</a> Loubet et al, 'Reparameterizing Discontinuous Integrands for Differentiable Rendering', SIGGRAPH Asia 2019
+
+<a id="8">[8]</a> Chen et al, 'Learning to Predict 3D Objects with an Interpolation-based Differentiable Renderer', NeurIPS 2019
diff --git a/pytorch3d/docs/notes/renderer_getting_started.md b/pytorch3d/docs/notes/renderer_getting_started.md
new file mode 100644
index 0000000000000000000000000000000000000000..21a480a9444b8f1312a79c73a7289d108aeef358
--- /dev/null
+++ b/pytorch3d/docs/notes/renderer_getting_started.md
@@ -0,0 +1,147 @@
+---
+hide_title: true
+sidebar_label: Getting Started
+---
+
+# Getting Started With Renderer
+
+### Architecture Overview
+
+The renderer is designed to be modular, extensible and support batching and gradients for all inputs. The following figure describes all the components of the rendering pipeline.
+
+<img src="assets/architecture_renderer.jpg" width="1000">
+
+##### Fragments
+
+The **rasterizer** returns 4 output tensors in a named tuple.
+
+- **`pix_to_face`**: LongTensor of shape `(N, image_size, image_size, faces_per_pixel)` specifying the indices of the faces (in the packed faces) which overlap each pixel in the image.
+- **`zbuf`**: FloatTensor of shape `(N, image_size, image_size, faces_per_pixel)` giving the z-coordinates of the nearest faces at each pixel in world coordinates, sorted in ascending z-order.
+- **`bary_coords`**: FloatTensor of shape `(N, image_size, image_size, faces_per_pixel, 3)`
+  giving the barycentric coordinates in NDC units of the nearest faces at each pixel, sorted in ascending z-order.
+- **`pix_dists`**: FloatTensor of shape `(N, image_size, image_size, faces_per_pixel)` giving the signed Euclidean distance (in NDC units) in the x/y plane of each point closest to the pixel.
+
+
+See the renderer API reference for more details about each component in the pipeline.
+
+---
+
+**NOTE:**
+
+The differentiable renderer API is experimental and subject to change!.
+
+---
+
+### Coordinate transformation conventions
+
+Rendering requires transformations between several different coordinate frames: world space, view/camera space, NDC space and screen space. At each step it is important to know where the camera is located, how the +X, +Y, +Z axes are aligned and the possible range of values. The following figure outlines the conventions used PyTorch3D.
+
+<img src="assets/transforms_overview.jpg" width="1000">
+
+
+For example, given a teapot mesh, the world coordinate frame, camera coordinate frame and image are shown in the figure below. Note that the world and camera coordinate frames have the +z direction pointing in to the page.
+
+<img src="assets/world_camera_image.jpg" width="1000">
+
+---
+
+**NOTE: PyTorch3D vs OpenGL**
+
+While we tried to emulate several aspects of OpenGL, there are differences in the coordinate frame conventions.
+- The default world coordinate frame in PyTorch3D has +Z pointing in to the screen whereas in OpenGL, +Z is pointing out of the screen.  Both are right handed.
+- The NDC coordinate system in PyTorch3D is **right-handed** compared with a **left-handed** NDC coordinate system in OpenGL (the projection matrix switches the handedness).
+
+<img align="center" src="assets/opengl_coordframes.png" width="300">
+
+---
+
+### Rasterizing Non Square Images
+
+To rasterize an image where H != W, you can specify the `image_size` in the `RasterizationSettings` as a tuple of (H, W).
+
+The aspect ratio needs special consideration. There are two aspect ratios to be aware of:
+    - the aspect ratio of each pixel
+    - the aspect ratio of the output image
+In the cameras e.g. `FoVPerspectiveCameras`, the `aspect_ratio` argument can be used to set the pixel aspect ratio. In the rasterizer, we assume square pixels, but variable image aspect ratio (i.e rectangle images).
+
+In most cases you will want to set the camera aspect ratio to 1.0 (i.e. square pixels) and only vary the `image_size` in the `RasterizationSettings`(i.e. the output image dimensions in pixels).
+
+---
+
+### The pulsar backend
+
+Since v0.3, [pulsar](https://arxiv.org/abs/2004.07484) can be used as a backend for point-rendering. It has a focus on efficiency, which comes with pros and cons: it is highly optimized and all rendering stages are integrated in the CUDA kernels. This leads to significantly higher speed and better scaling behavior. We use it at Facebook Reality Labs to render and optimize scenes with millions of spheres in resolutions up to 4K. You can find a runtime comparison plot below (settings: `bin_size=None`, `points_per_pixel=5`, `image_size=1024`, `radius=1e-2`, `composite_params.radius=1e-4`; benchmarked on an RTX 2070 GPU).
+
+<img align="center" src="assets/pulsar_bm.png" width="300">
+
+Pulsar's processing steps are tightly integrated CUDA kernels and do not work with custom `rasterizer` and `compositor` components. We provide two ways to use Pulsar: (1) there is a unified interface to match the PyTorch3D calling convention seamlessly. This is, for example, illustrated in the [point cloud tutorial](https://github.com/facebookresearch/pytorch3d/blob/main/docs/tutorials/render_colored_points.ipynb). (2) There is a direct interface available to the pulsar backend, which exposes the full functionality of the backend (including opacity, which is not yet available in PyTorch3D). Examples showing its use as well as the matching PyTorch3D interface code are available in [this folder](https://github.com/facebookresearch/pytorch3d/tree/master/docs/examples).
+
+---
+
+### Texturing options
+
+For mesh texturing we offer several options (in `pytorch3d/renderer/mesh/texturing.py`):
+
+1. **Vertex Textures**: D dimensional textures for each vertex (for example an RGB color) which can be interpolated across the face. This can be represented as an `(N, V, D)` tensor. This is a fairly simple representation though and cannot model complex textures if the mesh faces are large.
+2. **UV Textures**: vertex UV coordinates and **one** texture map for the whole mesh. For a point on a face with given barycentric coordinates, the face color can be computed by interpolating the vertex uv coordinates and then sampling from the texture map. This representation requires two tensors (UVs: `(N, V, 2), Texture map: `(N, H, W, 3)`), and is limited to only support one texture map per mesh.
+3. **Face Textures**: In more complex cases such as ShapeNet meshes, there are multiple texture maps per mesh and some faces have texture while other do not. For these cases, a more flexible representation is a texture atlas, where each face is represented as an `(RxR)` texture map where R is the texture resolution. For a given point on the face, the texture value can be sampled from the per face texture map using the barycentric coordinates of the point. This representation requires one tensor of shape `(N, F, R, R, 3)`. This texturing method is inspired by the SoftRasterizer implementation. For more details refer to the [`make_material_atlas`](https://github.com/facebookresearch/pytorch3d/blob/main/pytorch3d/io/mtl_io.py#L123) and [`sample_textures`](https://github.com/facebookresearch/pytorch3d/blob/main/pytorch3d/renderer/mesh/textures.py#L452) functions. **NOTE:**: The `TexturesAtlas` texture sampling is only differentiable with respect to the texture atlas but not differentiable with respect to the barycentric coordinates.
+
+
+<img src="assets/texturing.jpg" width="1000">
+
+---
+
+### A simple renderer
+
+A renderer in PyTorch3D is composed of a **rasterizer** and a **shader**. Create a renderer in a few simple steps:
+
+```
+# Imports
+from pytorch3d.renderer import (
+    FoVPerspectiveCameras, look_at_view_transform,
+    RasterizationSettings, BlendParams,
+    MeshRenderer, MeshRasterizer, HardPhongShader
+)
+
+# Initialize an OpenGL perspective camera.
+R, T = look_at_view_transform(2.7, 10, 20)
+cameras = FoVPerspectiveCameras(device=device, R=R, T=T)
+
+# Define the settings for rasterization and shading. Here we set the output image to be of size
+# 512x512. As we are rendering images for visualization purposes only we will set faces_per_pixel=1
+# and blur_radius=0.0. Refer to rasterize_meshes.py for explanations of these parameters.
+raster_settings = RasterizationSettings(
+    image_size=512,
+    blur_radius=0.0,
+    faces_per_pixel=1,
+)
+
+# Create a Phong renderer by composing a rasterizer and a shader. Here we can use a predefined
+# PhongShader, passing in the device on which to initialize the default parameters
+renderer = MeshRenderer(
+    rasterizer=MeshRasterizer(cameras=cameras, raster_settings=raster_settings),
+    shader=HardPhongShader(device=device, cameras=cameras)
+)
+```
+
+---
+
+### A custom shader
+
+Shaders are the most flexible part of the PyTorch3D rendering API. We have created some examples of shaders in `shaders.py` but this is a non exhaustive set.
+
+A shader can incorporate several steps:
+- **texturing** (e.g interpolation of vertex RGB colors or interpolation of vertex UV coordinates followed by sampling from a texture map (interpolation uses barycentric coordinates output from rasterization))
+- **lighting/shading** (e.g. ambient, diffuse, specular lighting, Phong, Gouraud, Flat)
+- **blending** (e.g. hard blending using only the closest face for each pixel, or soft blending using a weighted sum of the top K faces per pixel)
+
+ We have examples of several combinations of these functions based on the texturing/shading/blending support we have currently. These are summarised in this table below. Many other combinations are possible and we plan to expand the options available for texturing, shading and blending.
+
+|Example Shaders  | Vertex Textures| UV Textures | Textures Atlas | Flat Shading| Gouraud Shading| Phong Shading | Hard blending | Soft Blending |
+| ------------- |:-------------:  | :--------------:| :--------------:| :--------------:| :--------------:| :--------------:|:--------------:|:--------------:|
+| HardPhongShader |  ✔️ |✔️|✔️|||  ✔️ |  ✔️||
+| SoftPhongShader |  ✔️ |✔️|✔️|||  ✔️ | | ✔️|
+| HardGouraudShader |  ✔️ |✔️|✔️||  ✔️ ||  ✔️||
+| SoftGouraudShader |  ✔️ |✔️|✔️||  ✔️ |||  ✔️|
+| HardFlatShader |  ✔️ |✔️|✔️|  ✔️ |||  ✔️||
+| SoftSilhouetteShader ||||||||  ✔️|
diff --git a/pytorch3d/docs/notes/visualization.md b/pytorch3d/docs/notes/visualization.md
new file mode 100644
index 0000000000000000000000000000000000000000..71aa51654f85882d70bcc8eaf90f1920e145c1e1
--- /dev/null
+++ b/pytorch3d/docs/notes/visualization.md
@@ -0,0 +1,27 @@
+---
+hide_title: true
+sidebar_label: Plotly Visualization
+---
+
+# Overview
+
+PyTorch3D provides a modular differentiable renderer, but for instances where we want interactive plots or are not concerned with the differentiability of the rendering process, we provide [functions to render meshes and pointclouds in plotly](https://github.com/facebookresearch/pytorch3d/blob/main/pytorch3d/vis/plotly_vis.py). These plotly figures allow you to rotate and zoom the rendered images and support plotting batched data as multiple traces in a singular plot or divided into individual subplots.
+
+
+# Examples
+
+These rendering functions accept plotly x,y, and z axis arguments as `kwargs`, allowing us to customize the plots. Here are two plots with colored axes, a [Pointclouds plot](assets/plotly_pointclouds.png), a [batched Meshes plot in subplots](assets/plotly_meshes_batch.png), and a [batched Meshes plot with multiple traces](assets/plotly_meshes_trace.png). Refer to the [render textured meshes](https://pytorch3d.org/tutorials/render_textured_meshes) and [render colored pointclouds](https://pytorch3d.org/tutorials/render_colored_points) tutorials for code examples.
+
+# Saving plots to images
+
+If you want to save these plotly plots, you will need to install a separate library such as [Kaleido](https://plotly.com/python/static-image-export/).
+
+Install Kaleido
+```
+$ pip install Kaleido
+```
+Export a figure as a .png image. The image will be saved in the current working directory.
+```
+fig = ...
+fig.write_image("image_name.png")
+```
diff --git a/pytorch3d/docs/notes/why_pytorch3d.md b/pytorch3d/docs/notes/why_pytorch3d.md
new file mode 100644
index 0000000000000000000000000000000000000000..6d11e65987e58966ae08fd0edd5367081c391298
--- /dev/null
+++ b/pytorch3d/docs/notes/why_pytorch3d.md
@@ -0,0 +1,12 @@
+---
+hide_title: true
+sidebar_label: Why PyTorch3D
+---
+
+
+# Why PyTorch3D
+
+
+Our goal with PyTorch3D is to help accelerate research at the intersection of deep learning and 3D. 3D data is more complex than 2D images and while working on projects such as [Mesh R-CNN](https://github.com/facebookresearch/meshrcnn) and [C3DPO](https://github.com/facebookresearch/c3dpo_nrsfm), we encountered several challenges including 3D data representation, batching, and speed. We have developed many useful operators and abstractions for working on 3D deep learning and want to share this with the community to drive novel research in this area.
+
+In PyTorch3D we have included efficient 3D operators, heterogeneous batching capabilities, and a modular differentiable rendering API, to equip researchers in this field with a much needed toolkit to implement cutting-edge research with complex 3D inputs.
diff --git a/pytorch3d/docs/requirements.txt b/pytorch3d/docs/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..577a18a94937e3dd23aed9663a3f0cd3dcce8681
--- /dev/null
+++ b/pytorch3d/docs/requirements.txt
@@ -0,0 +1,11 @@
+docutils>=0.14
+Sphinx>=1.7
+recommonmark
+sphinx_rtd_theme
+sphinx_markdown_tables
+numpy
+iopath
+fvcore
+https://download.pytorch.org/whl/cpu/torchvision-0.15.2%2Bcpu-cp311-cp311-linux_x86_64.whl
+https://download.pytorch.org/whl/cpu/torch-2.0.1%2Bcpu-cp311-cp311-linux_x86_64.whl
+omegaconf
diff --git a/pytorch3d/docs/tutorials/README.md b/pytorch3d/docs/tutorials/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..1f3f3649184a00a5711a661a77b722174618d71d
--- /dev/null
+++ b/pytorch3d/docs/tutorials/README.md
@@ -0,0 +1,23 @@
+# Tutorial notebooks
+
+For current versions of the tutorials, which correspond to the latest release,
+please look at this directory at the `stable` tag, namely at
+https://github.com/facebookresearch/pytorch3d/tree/stable/docs/tutorials .
+
+There are links at the project homepage for opening these directly in colab.
+
+They install PyTorch3D from pip, which should work inside a GPU colab notebook.
+If you need to install PyTorch3D from source inside colab, you can use
+```
+import os
+!curl -LO https://github.com/NVIDIA/cub/archive/1.10.0.tar.gz
+!tar xzf 1.10.0.tar.gz
+os.environ["CUB_HOME"] = os.getcwd() + "/cub-1.10.0"
+!pip install 'git+https://github.com/facebookresearch/pytorch3d.git@stable'`
+```
+instead.
+
+The versions of these tutorials on the main branch may need to use the latest
+PyTorch3D from the main branch. You may be able to install this from source
+with the same commands as above, but replacing the last line with
+`!pip install 'git+https://github.com/facebookresearch/pytorch3d.git'`.
diff --git a/pytorch3d/docs/tutorials/bundle_adjustment.ipynb b/pytorch3d/docs/tutorials/bundle_adjustment.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..1002b457902fb0a9229909d50ccbfe18b4b85219
--- /dev/null
+++ b/pytorch3d/docs/tutorials/bundle_adjustment.ipynb
@@ -0,0 +1,466 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "bD6DUkgzmFoR"
+   },
+   "outputs": [],
+   "source": [
+    "# Copyright (c) Meta Platforms, Inc. and affiliates. All rights reserved."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "Jj6j6__ZmFoW"
+   },
+   "source": [
+    "# Absolute camera orientation given set of relative camera pairs\n",
+    "\n",
+    "This tutorial showcases the `cameras`, `transforms` and `so3` API.\n",
+    "\n",
+    "The problem we deal with is defined as follows:\n",
+    "\n",
+    "Given an optical system of $N$ cameras with extrinsics $\\{g_1, ..., g_N | g_i \\in SE(3)\\}$, and a set of relative camera positions $\\{g_{ij} | g_{ij}\\in SE(3)\\}$ that map between coordinate frames of randomly selected pairs of cameras $(i, j)$, we search for the absolute extrinsic parameters $\\{g_1, ..., g_N\\}$ that are consistent with the relative camera motions.\n",
+    "\n",
+    "More formally:\n",
+    "$$\n",
+    "g_1, ..., g_N = \n",
+    "{\\arg \\min}_{g_1, ..., g_N} \\sum_{g_{ij}} d(g_{ij}, g_i^{-1} g_j),\n",
+    "$$,\n",
+    "where $d(g_i, g_j)$ is a suitable metric that compares the extrinsics of cameras $g_i$ and $g_j$. \n",
+    "\n",
+    "Visually, the problem can be described as follows. The picture below depicts the situation at the beginning of our optimization. The ground truth cameras are plotted in purple while the randomly initialized estimated cameras are plotted in orange:\n",
+    "![Initialization](https://github.com/facebookresearch/pytorch3d/blob/main/docs/tutorials/data/bundle_adjustment_initialization.png?raw=1)\n",
+    "\n",
+    "Our optimization seeks to align the estimated (orange) cameras with the ground truth (purple) cameras, by minimizing the discrepancies between pairs of relative cameras. Thus, the solution to the problem should look as follows:\n",
+    "![Solution](https://github.com/facebookresearch/pytorch3d/blob/main/docs/tutorials/data/bundle_adjustment_final.png?raw=1)\n",
+    "\n",
+    "In practice, the camera extrinsics $g_{ij}$ and $g_i$ are represented using objects from the `SfMPerspectiveCameras` class initialized with the corresponding rotation and translation matrices `R_absolute` and `T_absolute` that define the extrinsic parameters $g = (R, T); R \\in SO(3); T \\in \\mathbb{R}^3$. In order to ensure that `R_absolute` is a valid rotation matrix, we represent it using an exponential map (implemented with `so3_exp_map`) of the axis-angle representation of the rotation `log_R_absolute`.\n",
+    "\n",
+    "Note that the solution to this problem could only be recovered up to an unknown global rigid transformation $g_{glob} \\in SE(3)$. Thus, for simplicity, we assume knowledge of the absolute extrinsics of the first camera $g_0$. We set $g_0$ as a trivial camera $g_0 = (I, \\vec{0})$.\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "nAQY4EnHmFoX"
+   },
+   "source": [
+    "## 0. Install and Import Modules"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "WAHR1LMJmP-h"
+   },
+   "source": [
+    "Ensure `torch` and `torchvision` are installed. If `pytorch3d` is not installed, install it using the following cell:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 717
+    },
+    "colab_type": "code",
+    "id": "uo7a3gdImMZx",
+    "outputId": "bf07fd03-dec0-4294-b2ba-9cf5b7333672"
+   },
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import sys\n",
+    "import torch\n",
+    "need_pytorch3d=False\n",
+    "try:\n",
+    "    import pytorch3d\n",
+    "except ModuleNotFoundError:\n",
+    "    need_pytorch3d=True\n",
+    "if need_pytorch3d:\n",
+    "    if torch.__version__.startswith(\"2.1.\") and sys.platform.startswith(\"linux\"):\n",
+    "        # We try to install PyTorch3D via a released wheel.\n",
+    "        pyt_version_str=torch.__version__.split(\"+\")[0].replace(\".\", \"\")\n",
+    "        version_str=\"\".join([\n",
+    "            f\"py3{sys.version_info.minor}_cu\",\n",
+    "            torch.version.cuda.replace(\".\",\"\"),\n",
+    "            f\"_pyt{pyt_version_str}\"\n",
+    "        ])\n",
+    "        !pip install fvcore iopath\n",
+    "        !pip install --no-index --no-cache-dir pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/{version_str}/download.html\n",
+    "    else:\n",
+    "        # We try to install PyTorch3D from source.\n",
+    "        !pip install 'git+https://github.com/facebookresearch/pytorch3d.git@stable'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 34
+    },
+    "colab_type": "code",
+    "id": "UgLa7XQimFoY",
+    "outputId": "16404f4f-4c7c-4f3f-b96a-e9a876def4c1"
+   },
+   "outputs": [],
+   "source": [
+    "# imports\n",
+    "import torch\n",
+    "from pytorch3d.transforms.so3 import (\n",
+    "    so3_exp_map,\n",
+    "    so3_relative_angle,\n",
+    ")\n",
+    "from pytorch3d.renderer.cameras import (\n",
+    "    SfMPerspectiveCameras,\n",
+    ")\n",
+    "\n",
+    "# add path for demo utils\n",
+    "import sys\n",
+    "import os\n",
+    "sys.path.append(os.path.abspath(''))\n",
+    "\n",
+    "# set for reproducibility\n",
+    "torch.manual_seed(42)\n",
+    "if torch.cuda.is_available():\n",
+    "    device = torch.device(\"cuda:0\")\n",
+    "else:\n",
+    "    device = torch.device(\"cpu\")\n",
+    "    print(\"WARNING: CPU only, this will be slow!\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "u4emnRuzmpRB"
+   },
+   "source": [
+    "If using **Google Colab**, fetch the utils file for plotting the camera scene, and the ground truth camera positions:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 391
+    },
+    "colab_type": "code",
+    "id": "kOvMPYJdmd15",
+    "outputId": "9f2a601b-891b-4cb6-d8f6-a444f7829132"
+   },
+   "outputs": [],
+   "source": [
+    "!wget https://raw.githubusercontent.com/facebookresearch/pytorch3d/main/docs/tutorials/utils/camera_visualization.py\n",
+    "from camera_visualization import plot_camera_scene\n",
+    "\n",
+    "!mkdir data\n",
+    "!wget -P data https://raw.githubusercontent.com/facebookresearch/pytorch3d/main/docs/tutorials/data/camera_graph.pth"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "L9WD5vaimw3K"
+   },
+   "source": [
+    "OR if running **locally** uncomment and run the following cell:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "ucGlQj5EmmJ5"
+   },
+   "outputs": [],
+   "source": [
+    "# from utils import plot_camera_scene"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "7WeEi7IgmFoc"
+   },
+   "source": [
+    "## 1. Set up Cameras and load ground truth positions"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "D_Wm0zikmFod"
+   },
+   "outputs": [],
+   "source": [
+    "# load the SE3 graph of relative/absolute camera positions\n",
+    "camera_graph_file = './data/camera_graph.pth'\n",
+    "(R_absolute_gt, T_absolute_gt), \\\n",
+    "    (R_relative, T_relative), \\\n",
+    "    relative_edges = \\\n",
+    "        torch.load(camera_graph_file)\n",
+    "\n",
+    "# create the relative cameras\n",
+    "cameras_relative = SfMPerspectiveCameras(\n",
+    "    R = R_relative.to(device),\n",
+    "    T = T_relative.to(device),\n",
+    "    device = device,\n",
+    ")\n",
+    "\n",
+    "# create the absolute ground truth cameras\n",
+    "cameras_absolute_gt = SfMPerspectiveCameras(\n",
+    "    R = R_absolute_gt.to(device),\n",
+    "    T = T_absolute_gt.to(device),\n",
+    "    device = device,\n",
+    ")\n",
+    "\n",
+    "# the number of absolute camera positions\n",
+    "N = R_absolute_gt.shape[0]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "-f-RNlGemFog"
+   },
+   "source": [
+    "## 2. Define optimization functions\n",
+    "\n",
+    "### Relative cameras and camera distance\n",
+    "We now define two functions crucial for the optimization.\n",
+    "\n",
+    "**`calc_camera_distance`** compares a pair of cameras. This function is important as it defines the loss that we are minimizing. The method utilizes the `so3_relative_angle` function from the SO3 API.\n",
+    "\n",
+    "**`get_relative_camera`** computes the parameters of a relative camera that maps between a pair of absolute cameras. Here we utilize the `compose` and `inverse` class methods from the PyTorch3D Transforms API."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "xzzk88RHmFoh"
+   },
+   "outputs": [],
+   "source": [
+    "def calc_camera_distance(cam_1, cam_2):\n",
+    "    \"\"\"\n",
+    "    Calculates the divergence of a batch of pairs of cameras cam_1, cam_2.\n",
+    "    The distance is composed of the cosine of the relative angle between \n",
+    "    the rotation components of the camera extrinsics and the l2 distance\n",
+    "    between the translation vectors.\n",
+    "    \"\"\"\n",
+    "    # rotation distance\n",
+    "    R_distance = (1.-so3_relative_angle(cam_1.R, cam_2.R, cos_angle=True)).mean()\n",
+    "    # translation distance\n",
+    "    T_distance = ((cam_1.T - cam_2.T)**2).sum(1).mean()\n",
+    "    # the final distance is the sum\n",
+    "    return R_distance + T_distance\n",
+    "\n",
+    "def get_relative_camera(cams, edges):\n",
+    "    \"\"\"\n",
+    "    For each pair of indices (i,j) in \"edges\" generate a camera\n",
+    "    that maps from the coordinates of the camera cams[i] to \n",
+    "    the coordinates of the camera cams[j]\n",
+    "    \"\"\"\n",
+    "\n",
+    "    # first generate the world-to-view Transform3d objects of each \n",
+    "    # camera pair (i, j) according to the edges argument\n",
+    "    trans_i, trans_j = [\n",
+    "        SfMPerspectiveCameras(\n",
+    "            R = cams.R[edges[:, i]],\n",
+    "            T = cams.T[edges[:, i]],\n",
+    "            device = device,\n",
+    "        ).get_world_to_view_transform()\n",
+    "         for i in (0, 1)\n",
+    "    ]\n",
+    "    \n",
+    "    # compose the relative transformation as g_i^{-1} g_j\n",
+    "    trans_rel = trans_i.inverse().compose(trans_j)\n",
+    "    \n",
+    "    # generate a camera from the relative transform\n",
+    "    matrix_rel = trans_rel.get_matrix()\n",
+    "    cams_relative = SfMPerspectiveCameras(\n",
+    "                        R = matrix_rel[:, :3, :3],\n",
+    "                        T = matrix_rel[:, 3, :3],\n",
+    "                        device = device,\n",
+    "                    )\n",
+    "    return cams_relative"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "Ys9J7MbMmFol"
+   },
+   "source": [
+    "## 3. Optimization\n",
+    "Finally, we start the optimization of the absolute cameras.\n",
+    "\n",
+    "We use SGD with momentum and optimize over `log_R_absolute` and `T_absolute`. \n",
+    "\n",
+    "As mentioned earlier, `log_R_absolute` is the axis angle representation of the rotation part of our absolute cameras. We can obtain the 3x3 rotation matrix `R_absolute` that corresponds to `log_R_absolute` with:\n",
+    "\n",
+    "`R_absolute = so3_exp_map(log_R_absolute)`\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 1000
+    },
+    "colab_type": "code",
+    "id": "iOK_DUzVmFom",
+    "outputId": "4195bc36-7b84-4070-dcc1-d3abb1e12031"
+   },
+   "outputs": [],
+   "source": [
+    "# initialize the absolute log-rotations/translations with random entries\n",
+    "log_R_absolute_init = torch.randn(N, 3, dtype=torch.float32, device=device)\n",
+    "T_absolute_init = torch.randn(N, 3, dtype=torch.float32, device=device)\n",
+    "\n",
+    "# furthermore, we know that the first camera is a trivial one \n",
+    "#    (see the description above)\n",
+    "log_R_absolute_init[0, :] = 0.\n",
+    "T_absolute_init[0, :] = 0.\n",
+    "\n",
+    "# instantiate a copy of the initialization of log_R / T\n",
+    "log_R_absolute = log_R_absolute_init.clone().detach()\n",
+    "log_R_absolute.requires_grad = True\n",
+    "T_absolute = T_absolute_init.clone().detach()\n",
+    "T_absolute.requires_grad = True\n",
+    "\n",
+    "# the mask the specifies which cameras are going to be optimized\n",
+    "#     (since we know the first camera is already correct, \n",
+    "#      we only optimize over the 2nd-to-last cameras)\n",
+    "camera_mask = torch.ones(N, 1, dtype=torch.float32, device=device)\n",
+    "camera_mask[0] = 0.\n",
+    "\n",
+    "# init the optimizer\n",
+    "optimizer = torch.optim.SGD([log_R_absolute, T_absolute], lr=.1, momentum=0.9)\n",
+    "\n",
+    "# run the optimization\n",
+    "n_iter = 2000  # fix the number of iterations\n",
+    "for it in range(n_iter):\n",
+    "    # re-init the optimizer gradients\n",
+    "    optimizer.zero_grad()\n",
+    "\n",
+    "    # compute the absolute camera rotations as \n",
+    "    # an exponential map of the logarithms (=axis-angles)\n",
+    "    # of the absolute rotations\n",
+    "    R_absolute = so3_exp_map(log_R_absolute * camera_mask)\n",
+    "\n",
+    "    # get the current absolute cameras\n",
+    "    cameras_absolute = SfMPerspectiveCameras(\n",
+    "        R = R_absolute,\n",
+    "        T = T_absolute * camera_mask,\n",
+    "        device = device,\n",
+    "    )\n",
+    "\n",
+    "    # compute the relative cameras as a composition of the absolute cameras\n",
+    "    cameras_relative_composed = \\\n",
+    "        get_relative_camera(cameras_absolute, relative_edges)\n",
+    "\n",
+    "    # compare the composed cameras with the ground truth relative cameras\n",
+    "    # camera_distance corresponds to $d$ from the description\n",
+    "    camera_distance = \\\n",
+    "        calc_camera_distance(cameras_relative_composed, cameras_relative)\n",
+    "\n",
+    "    # our loss function is the camera_distance\n",
+    "    camera_distance.backward()\n",
+    "    \n",
+    "    # apply the gradients\n",
+    "    optimizer.step()\n",
+    "\n",
+    "    # plot and print status message\n",
+    "    if it % 200==0 or it==n_iter-1:\n",
+    "        status = 'iteration=%3d; camera_distance=%1.3e' % (it, camera_distance)\n",
+    "        plot_camera_scene(cameras_absolute, cameras_absolute_gt, status)\n",
+    "\n",
+    "print('Optimization finished.')\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "vncLMvxWnhmO"
+   },
+   "source": [
+    "## 4. Conclusion \n",
+    "\n",
+    "In this tutorial we learnt how to initialize a batch of SfM Cameras, set up loss functions for bundle adjustment, and run an optimization loop. "
+   ]
+  }
+ ],
+ "metadata": {
+  "accelerator": "GPU",
+  "bento_stylesheets": {
+   "bento/extensions/flow/main.css": true,
+   "bento/extensions/kernel_selector/main.css": true,
+   "bento/extensions/kernel_ui/main.css": true,
+   "bento/extensions/new_kernel/main.css": true,
+   "bento/extensions/system_usage/main.css": true,
+   "bento/extensions/theme/main.css": true
+  },
+  "colab": {
+   "name": "bundle_adjustment.ipynb",
+   "provenance": [],
+   "toc_visible": true
+  },
+  "file_extension": ".py",
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.5+"
+  },
+  "mimetype": "text/x-python",
+  "name": "python",
+  "npconvert_exporter": "python",
+  "pygments_lexer": "ipython3",
+  "version": 3
+ },
+ "nbformat": 4,
+ "nbformat_minor": 1
+}
diff --git a/pytorch3d/docs/tutorials/camera_position_optimization_with_differentiable_rendering.ipynb b/pytorch3d/docs/tutorials/camera_position_optimization_with_differentiable_rendering.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..79163a2c22c12807a06f73382be71e06ae069628
--- /dev/null
+++ b/pytorch3d/docs/tutorials/camera_position_optimization_with_differentiable_rendering.ipynb
@@ -0,0 +1,581 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "-P3OUvJirQdR"
+   },
+   "outputs": [],
+   "source": [
+    "# Copyright (c) Meta Platforms, Inc. and affiliates. All rights reserved."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "44lB2sH-rQdW"
+   },
+   "source": [
+    "# Camera position optimization using differentiable rendering\n",
+    "\n",
+    "In this tutorial we will learn the [x, y, z] position of a camera given a reference image using differentiable rendering. \n",
+    "\n",
+    "We will first initialize a renderer with a starting position for the camera. We will then use this to generate an image, compute a loss with the reference image, and finally backpropagate through the entire pipeline to update the position of the camera. \n",
+    "\n",
+    "This tutorial shows how to:\n",
+    "- load a mesh from an `.obj` file\n",
+    "- initialize a `Camera`, `Shader` and `Renderer`,\n",
+    "- render a mesh\n",
+    "- set up an optimization loop with a loss function and optimizer\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "AZGmIlmWrQdX"
+   },
+   "source": [
+    "##  0. Install and import modules"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "qkX7DiM6rmeM"
+   },
+   "source": [
+    "Ensure `torch` and `torchvision` are installed. If `pytorch3d` is not installed, install it using the following cell:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 717
+    },
+    "colab_type": "code",
+    "id": "sEVdNGFwripM",
+    "outputId": "27047061-a29b-4562-c164-c1288e24c266"
+   },
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import sys\n",
+    "import torch\n",
+    "need_pytorch3d=False\n",
+    "try:\n",
+    "    import pytorch3d\n",
+    "except ModuleNotFoundError:\n",
+    "    need_pytorch3d=True\n",
+    "if need_pytorch3d:\n",
+    "    if torch.__version__.startswith(\"2.1.\") and sys.platform.startswith(\"linux\"):\n",
+    "        # We try to install PyTorch3D via a released wheel.\n",
+    "        pyt_version_str=torch.__version__.split(\"+\")[0].replace(\".\", \"\")\n",
+    "        version_str=\"\".join([\n",
+    "            f\"py3{sys.version_info.minor}_cu\",\n",
+    "            torch.version.cuda.replace(\".\",\"\"),\n",
+    "            f\"_pyt{pyt_version_str}\"\n",
+    "        ])\n",
+    "        !pip install fvcore iopath\n",
+    "        !pip install --no-index --no-cache-dir pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/{version_str}/download.html\n",
+    "    else:\n",
+    "        # We try to install PyTorch3D from source.\n",
+    "        !pip install 'git+https://github.com/facebookresearch/pytorch3d.git@stable'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "w9mH5iVprQdZ"
+   },
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import torch\n",
+    "import numpy as np\n",
+    "from tqdm.notebook import tqdm\n",
+    "import imageio\n",
+    "import torch.nn as nn\n",
+    "import torch.nn.functional as F\n",
+    "import matplotlib.pyplot as plt\n",
+    "from skimage import img_as_ubyte\n",
+    "\n",
+    "# io utils\n",
+    "from pytorch3d.io import load_obj\n",
+    "\n",
+    "# datastructures\n",
+    "from pytorch3d.structures import Meshes\n",
+    "\n",
+    "# 3D transformations functions\n",
+    "from pytorch3d.transforms import Rotate, Translate\n",
+    "\n",
+    "# rendering components\n",
+    "from pytorch3d.renderer import (\n",
+    "    FoVPerspectiveCameras, look_at_view_transform, look_at_rotation, \n",
+    "    RasterizationSettings, MeshRenderer, MeshRasterizer, BlendParams,\n",
+    "    SoftSilhouetteShader, HardPhongShader, PointLights, TexturesVertex,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "cpUf2UvirQdc"
+   },
+   "source": [
+    "## 1. Load the Obj\n",
+    "\n",
+    "We will load an obj file and create a **Meshes** object. **Meshes** is a unique datastructure provided in PyTorch3D for working with **batches of meshes of different sizes**. It has several useful class methods which are used in the rendering pipeline. "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "8d-oREfkrt_Z"
+   },
+   "source": [
+    "If you are running this notebook locally after cloning the PyTorch3D repository, the mesh will already be available. **If using Google Colab, fetch the mesh and save it at the path `data/`**:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 204
+    },
+    "colab_type": "code",
+    "id": "sD5KcLuJr0PL",
+    "outputId": "e65061fa-dbd5-4c06-b559-3592632983ee"
+   },
+   "outputs": [],
+   "source": [
+    "!mkdir -p data\n",
+    "!wget -P data https://dl.fbaipublicfiles.com/pytorch3d/data/teapot/teapot.obj"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "VWiPKnEIrQdd"
+   },
+   "outputs": [],
+   "source": [
+    "# Set the cuda device \n",
+    "if torch.cuda.is_available():\n",
+    "    device = torch.device(\"cuda:0\")\n",
+    "    torch.cuda.set_device(device)\n",
+    "else:\n",
+    "    device = torch.device(\"cpu\")\n",
+    "\n",
+    "# Load the obj and ignore the textures and materials.\n",
+    "verts, faces_idx, _ = load_obj(\"./data/teapot.obj\")\n",
+    "faces = faces_idx.verts_idx\n",
+    "\n",
+    "# Initialize each vertex to be white in color.\n",
+    "verts_rgb = torch.ones_like(verts)[None]  # (1, V, 3)\n",
+    "textures = TexturesVertex(verts_features=verts_rgb.to(device))\n",
+    "\n",
+    "# Create a Meshes object for the teapot. Here we have only one mesh in the batch.\n",
+    "teapot_mesh = Meshes(\n",
+    "    verts=[verts.to(device)],   \n",
+    "    faces=[faces.to(device)], \n",
+    "    textures=textures\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "mgtGbQktrQdh"
+   },
+   "source": [
+    "\n",
+    "\n",
+    "## 2. Optimization setup"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "Q6PzKD_NrQdi"
+   },
+   "source": [
+    "### Create a renderer\n",
+    "\n",
+    "A **renderer** in PyTorch3D is composed of a **rasterizer** and a **shader** which each have a number of subcomponents such as a **camera** (orthographic/perspective). Here we initialize some of these components and use default values for the rest. \n",
+    "\n",
+    "For optimizing the camera position we will use a renderer which produces a **silhouette** of the object only and does not apply any **lighting** or **shading**. We will also initialize another renderer which applies full **Phong shading** and use this for visualizing the outputs. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "KPlby75GrQdj"
+   },
+   "outputs": [],
+   "source": [
+    "# Initialize a perspective camera.\n",
+    "cameras = FoVPerspectiveCameras(device=device)\n",
+    "\n",
+    "# To blend the 100 faces we set a few parameters which control the opacity and the sharpness of \n",
+    "# edges. Refer to blending.py for more details. \n",
+    "blend_params = BlendParams(sigma=1e-4, gamma=1e-4)\n",
+    "\n",
+    "# Define the settings for rasterization and shading. Here we set the output image to be of size\n",
+    "# 256x256. To form the blended image we use 100 faces for each pixel. We also set bin_size and max_faces_per_bin to None which ensure that \n",
+    "# the faster coarse-to-fine rasterization method is used. Refer to rasterize_meshes.py for \n",
+    "# explanations of these parameters. Refer to docs/notes/renderer.md for an explanation of \n",
+    "# the difference between naive and coarse-to-fine rasterization. \n",
+    "raster_settings = RasterizationSettings(\n",
+    "    image_size=256, \n",
+    "    blur_radius=np.log(1. / 1e-4 - 1.) * blend_params.sigma, \n",
+    "    faces_per_pixel=100, \n",
+    ")\n",
+    "\n",
+    "# Create a silhouette mesh renderer by composing a rasterizer and a shader. \n",
+    "silhouette_renderer = MeshRenderer(\n",
+    "    rasterizer=MeshRasterizer(\n",
+    "        cameras=cameras, \n",
+    "        raster_settings=raster_settings\n",
+    "    ),\n",
+    "    shader=SoftSilhouetteShader(blend_params=blend_params)\n",
+    ")\n",
+    "\n",
+    "\n",
+    "# We will also create a Phong renderer. This is simpler and only needs to render one face per pixel.\n",
+    "raster_settings = RasterizationSettings(\n",
+    "    image_size=256, \n",
+    "    blur_radius=0.0, \n",
+    "    faces_per_pixel=1, \n",
+    ")\n",
+    "# We can add a point light in front of the object. \n",
+    "lights = PointLights(device=device, location=((2.0, 2.0, -2.0),))\n",
+    "phong_renderer = MeshRenderer(\n",
+    "    rasterizer=MeshRasterizer(\n",
+    "        cameras=cameras, \n",
+    "        raster_settings=raster_settings\n",
+    "    ),\n",
+    "    shader=HardPhongShader(device=device, cameras=cameras, lights=lights)\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "osOy2OIJrQdn"
+   },
+   "source": [
+    "### Create a reference image\n",
+    "\n",
+    "We will first position the teapot and generate an image. We use helper functions to rotate the teapot to a desired viewpoint. Then we can use the renderers to produce an image. Here we will use both renderers and visualize the silhouette and full shaded image. \n",
+    "\n",
+    "The world coordinate system is defined as +Y up, +X left and +Z in. The teapot in world coordinates has the spout pointing to the left. \n",
+    "\n",
+    "We defined a camera which is positioned on the positive z axis hence sees the spout to the right. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 305
+    },
+    "colab_type": "code",
+    "id": "EjJrW7qerQdo",
+    "outputId": "93545b65-269e-4719-f4a2-52cbc6c9c974"
+   },
+   "outputs": [],
+   "source": [
+    "# Select the viewpoint using spherical angles  \n",
+    "distance = 3   # distance from camera to the object\n",
+    "elevation = 50.0   # angle of elevation in degrees\n",
+    "azimuth = 0.0  # No rotation so the camera is positioned on the +Z axis. \n",
+    "\n",
+    "# Get the position of the camera based on the spherical angles\n",
+    "R, T = look_at_view_transform(distance, elevation, azimuth, device=device)\n",
+    "\n",
+    "# Render the teapot providing the values of R and T. \n",
+    "silhouette = silhouette_renderer(meshes_world=teapot_mesh, R=R, T=T)\n",
+    "image_ref = phong_renderer(meshes_world=teapot_mesh, R=R, T=T)\n",
+    "\n",
+    "silhouette = silhouette.cpu().numpy()\n",
+    "image_ref = image_ref.cpu().numpy()\n",
+    "\n",
+    "plt.figure(figsize=(10, 10))\n",
+    "plt.subplot(1, 2, 1)\n",
+    "plt.imshow(silhouette.squeeze()[..., 3])  # only plot the alpha channel of the RGBA image\n",
+    "plt.grid(False)\n",
+    "plt.subplot(1, 2, 2)\n",
+    "plt.imshow(image_ref.squeeze())\n",
+    "plt.grid(False)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "plBJwEslrQdt"
+   },
+   "source": [
+    "### Set up a basic model \n",
+    "\n",
+    "Here we create a simple model class and initialize a parameter for the camera position. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "YBbP1-EDrQdu"
+   },
+   "outputs": [],
+   "source": [
+    "class Model(nn.Module):\n",
+    "    def __init__(self, meshes, renderer, image_ref):\n",
+    "        super().__init__()\n",
+    "        self.meshes = meshes\n",
+    "        self.device = meshes.device\n",
+    "        self.renderer = renderer\n",
+    "        \n",
+    "        # Get the silhouette of the reference RGB image by finding all non-white pixel values. \n",
+    "        image_ref = torch.from_numpy((image_ref[..., :3].max(-1) != 1).astype(np.float32))\n",
+    "        self.register_buffer('image_ref', image_ref)\n",
+    "        \n",
+    "        # Create an optimizable parameter for the x, y, z position of the camera. \n",
+    "        self.camera_position = nn.Parameter(\n",
+    "            torch.from_numpy(np.array([3.0,  6.9, +2.5], dtype=np.float32)).to(meshes.device))\n",
+    "\n",
+    "    def forward(self):\n",
+    "        \n",
+    "        # Render the image using the updated camera position. Based on the new position of the \n",
+    "        # camera we calculate the rotation and translation matrices\n",
+    "        R = look_at_rotation(self.camera_position[None, :], device=self.device)  # (1, 3, 3)\n",
+    "        T = -torch.bmm(R.transpose(1, 2), self.camera_position[None, :, None])[:, :, 0]   # (1, 3)\n",
+    "        \n",
+    "        image = self.renderer(meshes_world=self.meshes.clone(), R=R, T=T)\n",
+    "        \n",
+    "        # Calculate the silhouette loss\n",
+    "        loss = torch.sum((image[..., 3] - self.image_ref) ** 2)\n",
+    "        return loss, image\n",
+    "  "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "qCGLSJtfrQdy"
+   },
+   "source": [
+    "## 3. Initialize the model and optimizer\n",
+    "\n",
+    "Now we can create an instance of the **model** above and set up an **optimizer** for the camera position parameter. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "srZPBU7_rQdz"
+   },
+   "outputs": [],
+   "source": [
+    "# We will save images periodically and compose them into a GIF.\n",
+    "filename_output = \"./teapot_optimization_demo.gif\"\n",
+    "writer = imageio.get_writer(filename_output, mode='I', duration=0.3)\n",
+    "\n",
+    "# Initialize a model using the renderer, mesh and reference image\n",
+    "model = Model(meshes=teapot_mesh, renderer=silhouette_renderer, image_ref=image_ref).to(device)\n",
+    "\n",
+    "# Create an optimizer. Here we are using Adam and we pass in the parameters of the model\n",
+    "optimizer = torch.optim.Adam(model.parameters(), lr=0.05)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "dvTLnrWorQd2"
+   },
+   "source": [
+    "### Visualize the starting position and the reference position"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 335
+    },
+    "colab_type": "code",
+    "id": "qyRXpP3mrQd3",
+    "outputId": "47ecb12a-e68c-47f5-92fc-821a7a9bd661"
+   },
+   "outputs": [],
+   "source": [
+    "plt.figure(figsize=(10, 10))\n",
+    "\n",
+    "_, image_init = model()\n",
+    "plt.subplot(1, 2, 1)\n",
+    "plt.imshow(image_init.detach().squeeze().cpu().numpy()[..., 3])\n",
+    "plt.grid(False)\n",
+    "plt.title(\"Starting position\")\n",
+    "\n",
+    "plt.subplot(1, 2, 2)\n",
+    "plt.imshow(model.image_ref.cpu().numpy().squeeze())\n",
+    "plt.grid(False)\n",
+    "plt.title(\"Reference silhouette\");\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "aGJu7h-lrQd5"
+   },
+   "source": [
+    "## 4. Run the optimization \n",
+    "\n",
+    "We run several iterations of the forward and backward pass and save outputs every 10 iterations. When this has finished take a look at `./teapot_optimization_demo.gif` for a cool gif of the optimization process!"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 1000,
+     "referenced_widgets": [
+      "79d7fc84b5564206ab64b2759474da04",
+      "02acadb61c3949fcaeab177fd184c388",
+      "efd9860908c64bfe9d47118be4734648",
+      "f8df7c6efb7d47f5be760a39b4bdbcf8",
+      "d8a109658c364a00ab4d298112dac6db",
+      "2d05db82cc99482bb3d62b6d4e5b1a98",
+      "c621d425e2c8426c8cd4f9136d392af1",
+      "3df8063f307040ebb8ff8e2f26ccf729"
+     ]
+    },
+    "colab_type": "code",
+    "id": "HvnK5VI5rQd6",
+    "outputId": "4019c697-3fc6-4c7b-cdfe-225633cc0d60"
+   },
+   "outputs": [],
+   "source": [
+    "loop = tqdm(range(200))\n",
+    "for i in loop:\n",
+    "    optimizer.zero_grad()\n",
+    "    loss, _ = model()\n",
+    "    loss.backward()\n",
+    "    optimizer.step()\n",
+    "    \n",
+    "    loop.set_description('Optimizing (loss %.4f)' % loss.data)\n",
+    "    \n",
+    "    if loss.item() < 200:\n",
+    "        break\n",
+    "    \n",
+    "    # Save outputs to create a GIF. \n",
+    "    if i % 10 == 0:\n",
+    "        R = look_at_rotation(model.camera_position[None, :], device=model.device)\n",
+    "        T = -torch.bmm(R.transpose(1, 2), model.camera_position[None, :, None])[:, :, 0]   # (1, 3)\n",
+    "        image = phong_renderer(meshes_world=model.meshes.clone(), R=R, T=T)\n",
+    "        image = image[0, ..., :3].detach().squeeze().cpu().numpy()\n",
+    "        image = img_as_ubyte(image)\n",
+    "        writer.append_data(image)\n",
+    "        \n",
+    "        plt.figure()\n",
+    "        plt.imshow(image[..., :3])\n",
+    "        plt.title(\"iter: %d, loss: %0.2f\" % (i, loss.data))\n",
+    "        plt.axis(\"off\")\n",
+    "    \n",
+    "writer.close()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "mWj80P_SsPTN"
+   },
+   "source": [
+    "## 5. Conclusion \n",
+    "\n",
+    "In this tutorial we learnt how to **load** a mesh from an obj file, initialize a PyTorch3D datastructure called **Meshes**, set up an **Renderer** consisting of a **Rasterizer** and a **Shader**, set up an optimization loop including a **Model** and a **loss function**, and run  the optimization. "
+   ]
+  }
+ ],
+ "metadata": {
+  "accelerator": "GPU",
+  "anp_metadata": {
+   "path": "fbsource/fbcode/vision/fair/pytorch3d/docs/tutorials/camera_position_optimization_with_differentiable_rendering.ipynb"
+  },
+  "bento_stylesheets": {
+   "bento/extensions/flow/main.css": true,
+   "bento/extensions/kernel_selector/main.css": true,
+   "bento/extensions/kernel_ui/main.css": true,
+   "bento/extensions/new_kernel/main.css": true,
+   "bento/extensions/system_usage/main.css": true,
+   "bento/extensions/theme/main.css": true
+  },
+  "colab": {
+   "name": "camera_position_optimization_with_differentiable_rendering.ipynb",
+   "provenance": [],
+   "toc_visible": true
+  },
+  "disseminate_notebook_info": {
+   "backup_notebook_id": "1062179640844868"
+  },
+  "kernelspec": {
+   "display_name": "pytorch3d (local)",
+   "language": "python",
+   "name": "pytorch3d_local"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.5+"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 1
+}
diff --git a/pytorch3d/docs/tutorials/data/cow_mesh/README.md b/pytorch3d/docs/tutorials/data/cow_mesh/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..3e5cefc78cbca14945740e135f3f5cdcd17dd291
--- /dev/null
+++ b/pytorch3d/docs/tutorials/data/cow_mesh/README.md
@@ -0,0 +1,5 @@
+# Acknowledgements
+
+Thank you to Keenan Crane for allowing the cow mesh model to be used freely in the public domain.
+
+###### Source: http://www.cs.cmu.edu/~kmcrane/Projects/ModelRepository/
diff --git a/pytorch3d/docs/tutorials/data/cow_mesh/cow.mtl b/pytorch3d/docs/tutorials/data/cow_mesh/cow.mtl
new file mode 100644
index 0000000000000000000000000000000000000000..c3bc054358c3db2526520c29f4c30ac812f7c370
--- /dev/null
+++ b/pytorch3d/docs/tutorials/data/cow_mesh/cow.mtl
@@ -0,0 +1,9 @@
+newmtl material_1
+map_Kd cow_texture.png
+
+# Test colors
+
+Ka 1.000 1.000 1.000  # white
+Kd 1.000 1.000 1.000  # white
+Ks 0.000 0.000 0.000  # black
+Ns 10.0
diff --git a/pytorch3d/docs/tutorials/data/cow_mesh/cow.obj b/pytorch3d/docs/tutorials/data/cow_mesh/cow.obj
new file mode 100644
index 0000000000000000000000000000000000000000..6fb82827d385d94bbe54e05dfeb8ab016904bda9
--- /dev/null
+++ b/pytorch3d/docs/tutorials/data/cow_mesh/cow.obj
@@ -0,0 +1,12015 @@
+
+mtllib cow.mtl
+
+v 0.348799 -0.334989 -0.0832331
+v 0.313132 -0.399051 0.881192
+v 0.266758 0.181628 0.122726
+v 0.229555 0.0663178 0.828702
+v 0.353623 -0.0486456 0.443969
+v 0.335878 -0.384495 0.425693
+v 0.305362 0.0307983 -0.00655663
+v 0.279611 -0.0552387 0.858547
+v 0.266719 0.10578 0.46681
+v 0.149341 -0.451522 0.166423
+v 0.12606 -0.162036 -0.185668
+v 0.114009 -0.358339 -0.0759521
+v 0.101546 -0.475382 0.663157
+v 0.240757 -0.435635 0.421981
+v 0.385825 -0.174501 0.180766
+v 0.358602 -0.37797 0.17897
+v 0.345953 -0.229705 0.664126
+v 0.316102 -0.419396 0.649153
+v 0.131961 -0.201382 0.951731
+v 0.0876766 -0.391182 0.901606
+v 0.161859 0.285356 0.303098
+v 0.177264 0.282839 0.0875537
+v 0.158786 0.0751889 -0.141343
+v 0.310974 -0.0974099 -0.0697762
+v 0.138236 0.191142 0.700393
+v 0.13061 0.109881 0.873657
+v 0.287544 -0.214549 0.864938
+v 0.25445 0.101336 0.680112
+v 0.34304 -0.0541165 0.672286
+v 0.369388 -0.0242775 0.205838
+v 0.273782 0.160332 0.273861
+v 0.36907 -0.228332 0.435279
+v 0.146074 0.206063 0.488052
+v 0.15689 0.317253 -0.0179554
+v 0.184529 0.181235 -0.238574
+v 0.229877 0.446038 -0.0428095
+v 0.326584 0.303676 -0.375494
+v 0.257915 0.488969 -0.517721
+v 0.187711 0.730295 -0.437134
+v 0.190442 0.122856 -0.437696
+v 0.271501 0.201979 -0.579853
+v 0.227961 0.337016 -0.641468
+v 0.323288 -0.645898 -0.0650069
+v 0.277559 -0.660659 0.872426
+v 0.15321 -0.671553 0.116597
+v 0.153343 -0.652512 -0.0628489
+v 0.129752 -0.680413 0.714233
+v 0.310744 -0.658159 0.12474
+v 0.270197 -0.670204 0.705429
+v 0.125665 -0.664371 0.877017
+v 0.264169 0.646946 -0.369897
+v 0.180359 0.68646 -0.148279
+v 0.263204 0.560375 -0.137191
+v 0.371765 0.683869 -0.321196
+v 0.432195 0.751246 -0.233457
+v 0.424574 0.680439 -0.150855
+v 0.379346 0.604049 -0.213075
+v 0 -0.192084 -0.196407
+v 0 0.505678 -0.563016
+v 0 0.340641 -0.665066
+v 0 0.0414775 -0.241591
+v 0 0.117615 -0.305384
+v 0 0.100384 -0.450564
+v 0 0.182587 -0.616155
+v -4.33681e-19 0.765067 -0.449072
+v -4.33681e-19 0.834757 -0.307668
+v 0 -0.434774 0.0615192
+v 0 -0.354126 -0.0677048
+v 0 0.326124 0.139538
+v 0 0.352207 0.0388631
+v 0 0.499175 0.0586613
+v -4.33681e-19 0.723906 -0.0597107
+v 0 0.816061 -0.189369
+v 0 -0.435249 0.726548
+v 0 -0.533724 0.415511
+v 0 0.301194 0.312719
+v 0 0.233342 0.49566
+v 0 -0.340037 0.891691
+v 0 -0.194995 0.971379
+v 0 0.220553 0.711025
+v 0 0.130478 0.891171
+v 0.164015 0.929339 -0.232094
+v 0.205905 0.915741 -0.313374
+v 0.15338 0.935554 -0.282029
+v 0.221786 0.905461 -0.256021
+v 0.115236 0.837896 -0.222054
+v 0.086327 0.812209 -0.211465
+v 0.172231 0.819312 -0.334104
+v 0.162464 0.791078 -0.351846
+v 0.0787362 0.822102 -0.294366
+v 0.102999 0.844736 -0.288229
+v 0.219399 0.754552 -0.28348
+v 0.199751 0.790382 -0.244881
+v 0.184684 0.762009 -0.196152
+v 0.279703 0.762439 -0.233186
+v 0.120311 -0.456517 0.243094
+v 0.111282 -0.477316 0.58188
+v 0.186164 -0.487701 0.417533
+v 0 -0.462404 0.208739
+v 0 -0.483948 0.606421
+v 0.17185 -0.0410977 0.94527
+v 0.106249 -0.0142167 0.968533
+v 0.107395 -0.0653463 0.975253
+v 0 0.00228925 0.987177
+v 0 -0.0756163 0.995507
+v 0.0517509 -0.0608359 1.02473
+v 0.0517951 -0.0798093 1.0112
+v 0 -0.0688251 1.04807
+v 0 -0.0952192 1.02642
+v -0.348799 -0.334989 -0.0832331
+v -0.313132 -0.399051 0.881192
+v -0.266758 0.181628 0.122726
+v -0.229555 0.0663178 0.828702
+v -0.353623 -0.0486456 0.443969
+v -0.335878 -0.384495 0.425693
+v -0.305362 0.0307983 -0.00655663
+v -0.279611 -0.0552387 0.858547
+v -0.266719 0.10578 0.46681
+v -0.149341 -0.451522 0.166423
+v -0.12606 -0.162036 -0.185668
+v -0.114009 -0.358339 -0.0759521
+v -0.101546 -0.475382 0.663157
+v -0.240757 -0.435635 0.421981
+v -0.385825 -0.174501 0.180766
+v -0.358602 -0.37797 0.17897
+v -0.345953 -0.229705 0.664126
+v -0.316102 -0.419396 0.649153
+v -0.131961 -0.201382 0.951731
+v -0.0876766 -0.391182 0.901606
+v -0.161859 0.285356 0.303098
+v -0.177264 0.282839 0.0875537
+v -0.158786 0.0751889 -0.141343
+v -0.310974 -0.0974099 -0.0697762
+v -0.138236 0.191142 0.700393
+v -0.13061 0.109881 0.873657
+v -0.287544 -0.214549 0.864938
+v -0.25445 0.101336 0.680112
+v -0.34304 -0.0541165 0.672286
+v -0.369388 -0.0242775 0.205838
+v -0.273782 0.160332 0.273861
+v -0.36907 -0.228332 0.435279
+v -0.146074 0.206063 0.488052
+v -0.15689 0.317253 -0.0179554
+v -0.184529 0.181235 -0.238574
+v -0.229877 0.446038 -0.0428095
+v -0.326584 0.303676 -0.375494
+v -0.257915 0.488969 -0.517721
+v -0.187711 0.730295 -0.437134
+v -0.190442 0.122856 -0.437696
+v -0.271501 0.201979 -0.579853
+v -0.227961 0.337016 -0.641468
+v -0.323288 -0.645898 -0.0650069
+v -0.277559 -0.660659 0.872426
+v -0.15321 -0.671553 0.116597
+v -0.153343 -0.652512 -0.0628489
+v -0.129752 -0.680413 0.714233
+v -0.310744 -0.658159 0.12474
+v -0.270197 -0.670204 0.705429
+v -0.125665 -0.664371 0.877017
+v -0.264169 0.646946 -0.369897
+v -0.180359 0.68646 -0.148279
+v -0.263204 0.560375 -0.137191
+v -0.371765 0.683869 -0.321196
+v -0.432195 0.751246 -0.233457
+v -0.424574 0.680439 -0.150855
+v -0.379346 0.604049 -0.213075
+v -0.164015 0.929339 -0.232094
+v -0.205905 0.915741 -0.313374
+v -0.15338 0.935554 -0.282029
+v -0.221786 0.905461 -0.256021
+v -0.115236 0.837896 -0.222054
+v -0.086327 0.812209 -0.211465
+v -0.172231 0.819312 -0.334104
+v -0.162464 0.791078 -0.351846
+v -0.0787362 0.822102 -0.294366
+v -0.102999 0.844736 -0.288229
+v -0.219399 0.754552 -0.28348
+v -0.199751 0.790382 -0.244881
+v -0.184684 0.762009 -0.196152
+v -0.279703 0.762439 -0.233186
+v -0.120311 -0.456517 0.243094
+v -0.111282 -0.477316 0.58188
+v -0.186164 -0.487701 0.417533
+v -0.17185 -0.0410977 0.94527
+v -0.106249 -0.0142167 0.968533
+v -0.107395 -0.0653463 0.975253
+v -0.0517509 -0.0608359 1.02473
+v -0.0517951 -0.0798093 1.0112
+v 0.287063 -0.417912 0.42339
+v 0.223323 -0.431749 0.300839
+v 0.262599 -0.446574 0.214165
+v 0.345683 -0.367458 0.305622
+v 0.284862 -0.410239 0.304857
+v 0.320467 -0.390454 0.538527
+v 0.218649 -0.474853 0.619945
+v 0.19621 -0.447285 0.538732
+v 0.257025 -0.429064 0.536591
+v 0.240602 -0.341214 -0.117452
+v 0.12147 -0.249274 -0.145022
+v 0.221157 -0.120393 -0.146058
+v 0.328715 -0.192761 -0.0778193
+v 0.229478 -0.212011 -0.133147
+v 0.307987 -0.0331195 -0.049691
+v 0.139893 -0.0548182 -0.181103
+v 0.237075 0.0521949 -0.0759725
+v 0.227173 -0.0372356 -0.128413
+v 0.301492 -0.292885 0.870674
+v 0.213279 -0.204742 0.922518
+v 0.117499 -0.281642 0.935739
+v 0.211816 -0.394732 0.922409
+v 0.215547 -0.286415 0.919674
+v 0.364968 -0.12541 0.046235
+v 0.382458 -0.25968 0.176197
+v 0.385194 -0.351205 0.0306159
+v 0.375418 -0.216313 0.0359591
+v 0.382575 -0.209764 0.311653
+v 0.361147 -0.316624 0.430338
+v 0.374384 -0.294532 0.308248
+v 0.368668 -0.137134 0.438729
+v 0.385132 -0.101185 0.188168
+v 0.365146 -0.0406368 0.324829
+v 0.381949 -0.125584 0.315412
+v 0.350232 0.00253885 0.0908221
+v 0.361458 -0.0619596 0.0609153
+v 0.343331 -0.409171 0.778577
+v 0.341862 -0.313907 0.657949
+v 0.328019 -0.223318 0.773747
+v 0.336544 -0.303107 0.775339
+v 0.283052 -0.132631 0.863571
+v 0.351001 -0.142438 0.668828
+v 0.322487 -0.0565049 0.774019
+v 0.329 -0.140489 0.774308
+v 0.349301 -0.0521301 0.560904
+v 0.357572 -0.233122 0.551664
+v 0.360647 -0.141814 0.556564
+v 0.349272 -0.319879 0.544579
+v 0.23281 0.229543 0.110217
+v 0.170184 0.299829 0.20555
+v 0.221341 0.236481 0.293418
+v 0.272682 0.181352 0.189589
+v 0.227226 0.252859 0.204008
+v 0.271155 0.129527 0.366223
+v 0.153276 0.244216 0.39384
+v 0.20969 0.164574 0.478658
+v 0.215375 0.198113 0.382954
+v 0.24282 0.0875955 0.770725
+v 0.198521 0.154682 0.689709
+v 0.134533 0.161682 0.796578
+v 0.187262 0.086887 0.854887
+v 0.191558 0.132026 0.78245
+v 0.141545 0.196582 0.593509
+v 0.26179 0.101674 0.574769
+v 0.204392 0.157257 0.584394
+v 0.327755 0.0676272 0.239516
+v 0.317321 0.0335117 0.454173
+v 0.324726 0.0469262 0.344192
+v 0.307072 0.0296092 0.675079
+v 0.253941 0.0157408 0.848168
+v 0.289863 0.0225221 0.77107
+v 0.190901 0.188016 -0.0300482
+v 0.288174 0.116771 0.0639197
+v 0.239299 0.151564 0.0118342
+v 0.313467 0.030566 0.566851
+v 0.317033 0.0909783 0.142973
+v 0.150018 0.148066 -0.170355
+v 0.192003 0.257627 -0.114007
+v 0.150923 0.290686 0.0162651
+v 0.172619 0.221245 -0.0722104
+v 0.274031 0.23831 -0.301318
+v 0.294876 0.383503 -0.203773
+v 0.194068 0.37243 -0.0269037
+v 0.248629 0.311098 -0.153862
+v 0.196806 0.141274 -0.362924
+v 0.23911 0.14497 -0.502892
+v 0.324701 0.24108 -0.502727
+v 0.278147 0.181024 -0.423508
+v 0.263786 0.27805 -0.630217
+v 0.250979 0.388662 -0.601302
+v 0.318198 0.390558 -0.446528
+v 0.314593 0.32071 -0.563292
+v 0.338046 -0.680095 0.0309463
+v 0.228165 -0.692422 0.147407
+v 0.12828 -0.689708 0.0245746
+v 0.238161 -0.677382 -0.0918856
+v 0.229681 -0.733842 0.0266899
+v 0.20056 -0.687393 0.898863
+v 0.107919 -0.696884 0.796995
+v 0.199176 -0.699686 0.68654
+v 0.291818 -0.690195 0.788408
+v 0.198244 -0.736784 0.793448
+v 0.307855 -0.55258 0.89062
+v 0.294592 -0.571581 0.678753
+v 0.335563 -0.561971 0.78431
+v 0.0593616 -0.458076 0.795932
+v 0.110927 -0.595142 0.691599
+v 0.102733 -0.558625 0.896274
+v 0.0716769 -0.583028 0.796447
+v 0.203007 -0.591569 0.647127
+v 0.206666 -0.553636 0.930804
+v 0.339121 -0.546516 0.150832
+v 0.352933 -0.520557 -0.0885122
+v 0.383244 -0.532332 0.0297109
+v 0.132569 -0.534652 -0.0767316
+v 0.24402 -0.524662 -0.124285
+v 0.143499 -0.577512 0.141914
+v 0.241073 -0.571789 0.188527
+v 0.0964942 -0.434904 0.0342915
+v 0.0999951 -0.56397 0.0317575
+v 0.271576 0.561647 -0.416318
+v 0.282894 0.56989 -0.257283
+v 0.23851 0.520387 -0.0907886
+v 0.293195 0.482084 -0.276654
+v 0.213045 0.609177 -0.109989
+v 0.321376 0.576724 -0.185683
+v 0.422878 0.632073 -0.168661
+v 0.32688 0.680784 -0.145207
+v 0.326345 0.612341 -0.141546
+v 0.312594 0.674309 -0.341185
+v 0.379053 0.63587 -0.283895
+v 0.3154 0.606871 -0.278418
+v 0.417773 0.731266 -0.288914
+v 0.460287 0.727236 -0.177367
+v 0.454185 0.684351 -0.230461
+v 0 0.320051 0.223298
+v 0.0857686 0.301184 0.309872
+v 0.0922474 0.320203 0.127906
+v 0.0910385 0.318251 0.216664
+v 0 0.227033 0.602086
+v 0.0710686 0.213115 0.708099
+v 0.0750596 0.22755 0.493766
+v 0.0724773 0.219588 0.599773
+v 0 0.186603 0.810651
+v 0.0670456 0.125156 0.886255
+v 0.0695062 0.18012 0.806775
+v 0 -0.40432 -0.00187701
+v 0.0502685 -0.34467 -0.0665397
+v 0.0661753 -0.432919 0.0879789
+v 0.056363 -0.399912 0.0126968
+v 0 -0.395546 0.810009
+v 0.0398588 -0.442711 0.714357
+v 0.0361117 -0.34665 0.891624
+v 0.0321101 -0.404726 0.804765
+v 0 0.264692 0.401284
+v 0.0798727 0.262384 0.399618
+v 0 0.330325 0.0716557
+v 0.0874752 0.345901 0.0284499
+v 0.0858416 0.32349 0.0605597
+v 0 0.409384 0.0515977
+v 0.124296 0.487548 0.0340513
+v 0.105997 0.402715 0.0360884
+v 0 0.641579 -0.507325
+v 0.104733 0.755088 -0.444831
+v 0.214286 0.629383 -0.484754
+v 0.136392 0.50382 -0.556974
+v 0.121817 0.636846 -0.502061
+v 0 0.412943 -0.623842
+v 0.139278 0.340909 -0.661549
+v 0.138945 0.411476 -0.620153
+v 0 -0.271137 0.943676
+v 0.0622199 -0.197393 0.966789
+v 0.0501277 -0.274014 0.941915
+v 0 -0.286073 -0.138523
+v 0.0553298 -0.185801 -0.197022
+v 0.050202 -0.275405 -0.142323
+v 0 -0.0695404 -0.225378
+v 0.0766662 0.0457919 -0.217551
+v 0.0655056 -0.0670069 -0.214493
+v 0 0.101519 -0.264918
+v 0.0933959 0.130309 -0.285229
+v 0.0838463 0.11062 -0.238883
+v 0 0.106874 -0.369301
+v 0.114603 0.104637 -0.445501
+v 0.102596 0.115199 -0.359834
+v 0 0.126441 -0.539369
+v 0.153665 0.185489 -0.611191
+v 0.137484 0.129002 -0.534091
+v 0 0.259825 -0.661348
+v 0.150371 0.262453 -0.657355
+v 0.0819636 0.707011 -0.0978848
+v 0 0.613137 0.0118372
+v 0.128934 0.593549 -0.0276065
+v 0.230899 0.696193 -0.415496
+v 0.2462 0.632058 -0.451998
+v 0.2255 0.918412 -0.287909
+v 0.182289 0.937359 -0.304334
+v 0.155831 0.94592 -0.253016
+v 0.199042 0.926972 -0.236591
+v 0.198381 0.950853 -0.26973
+v 0.195536 0.768708 -0.323732
+v 0.162591 0.796641 -0.337706
+v 0.198135 0.804412 -0.300184
+v 0.204856 0.767832 -0.258627
+v 0.190344 0.780419 -0.305321
+v 0.158595 0.817014 -0.220186
+v 0.102838 0.815304 -0.21903
+v 0.12548 0.786281 -0.190531
+v 0.18859 0.771476 -0.215131
+v 0.14521 0.793415 -0.208712
+v 0.113814 0.810725 -0.339255
+v 0.0911506 0.822276 -0.28966
+v 0.134346 0.834135 -0.324328
+v 0.12321 0.811864 -0.327105
+v 0.0699464 0.823757 -0.248502
+v 0.0956349 0.846841 -0.248877
+v 0.0836698 0.824545 -0.249204
+v 0.24364 0.757848 -0.207433
+v 0.246443 0.756953 -0.257453
+v 0.21289 0.762245 -0.22883
+v 0.355751 0.762428 -0.238497
+v 0.249966 0.723503 -0.332259
+v 0.32003 0.739642 -0.304714
+v 0.181848 0.741315 -0.180043
+v 0.328516 0.741977 -0.181175
+v 0.0499681 0.812176 -0.199395
+v 0 0.785709 -0.129116
+v 0.0788354 0.774284 -0.158189
+v 0.0505691 0.828853 -0.304368
+v 0.168022 0.778755 -0.386065
+v 0 0.821114 -0.378707
+v 0.0897195 0.811796 -0.373587
+v 0 0.831803 -0.246303
+v 0.0404745 0.828353 -0.247459
+v 0.211302 0.744371 -0.365196
+v 0.193702 0.87269 -0.329392
+v 0.216553 0.856661 -0.251136
+v 0.218551 0.860755 -0.297102
+v 0.126841 0.897388 -0.285836
+v 0.157513 0.887986 -0.320358
+v 0.138938 0.890583 -0.222912
+v 0.120161 0.900194 -0.247801
+v 0.181137 0.872948 -0.224154
+v 0.106299 -0.467348 0.612794
+v 0.156751 -0.485651 0.517662
+v 0.218865 -0.458568 0.419816
+v 0.177713 -0.464461 0.527871
+v 0 -0.459742 0.666605
+v 0.0640756 -0.481225 0.60015
+v 0.0486808 -0.462533 0.655449
+v 0.129769 -0.445891 0.210992
+v 0.0674919 -0.459184 0.218582
+v 0 -0.445956 0.12884
+v 0.0638357 -0.444618 0.15006
+v 0.160168 -0.471758 0.313909
+v 0.192938 -0.45045 0.307624
+v 0 -0.518477 0.519852
+v 0.106971 -0.518553 0.416032
+v 0.0913755 -0.507594 0.518132
+v 0 -0.503509 0.307563
+v 0.0906862 -0.492986 0.310407
+v 0.148938 -0.0188178 0.950827
+v 0.109744 -0.0412763 0.976341
+v 0.152295 -0.0598695 0.957881
+v 0.133131 -0.0374762 0.965091
+v 0.129144 -0.123186 0.961976
+v 0.214922 -0.0472768 0.917853
+v 0.202946 -0.113321 0.930645
+v 0.124777 0.0448234 0.926767
+v 0.189431 0.0176502 0.907717
+v 0.0541294 -0.000582154 0.980957
+v 0 0.0636729 0.946137
+v 0.0627521 0.0592134 0.940397
+v 0.0544734 -0.0758718 0.989219
+v 0 -0.121134 0.98554
+v 0.0631528 -0.123094 0.979468
+v 0.0716307 -0.0432 1.00541
+v 0.052795 -0.0725924 1.01945
+v 0.0717681 -0.0690106 0.994894
+v 0.0809297 -0.0558501 0.997263
+v 0 -0.0400118 1.02513
+v 0.0297219 -0.0653048 1.04059
+v 0.0406958 -0.0395224 1.01839
+v 0.0297587 -0.0908237 1.02001
+v 0 -0.0782912 1.00911
+v 0.0407812 -0.0765567 1.00299
+v 0 -0.0903561 1.04431
+v 0.0264068 -0.0851806 1.03686
+v -0.345683 -0.367458 0.305622
+v -0.262599 -0.446574 0.214165
+v -0.223323 -0.431749 0.300839
+v -0.287063 -0.417912 0.42339
+v -0.284862 -0.410239 0.304857
+v -0.19621 -0.447285 0.538732
+v -0.218649 -0.474853 0.619945
+v -0.320467 -0.390454 0.538527
+v -0.257025 -0.429064 0.536591
+v -0.328715 -0.192761 -0.0778193
+v -0.221157 -0.120393 -0.146058
+v -0.12147 -0.249274 -0.145022
+v -0.240602 -0.341214 -0.117452
+v -0.229478 -0.212011 -0.133147
+v -0.237075 0.0521949 -0.0759725
+v -0.139893 -0.0548182 -0.181103
+v -0.307987 -0.0331195 -0.049691
+v -0.227173 -0.0372356 -0.128413
+v -0.211816 -0.394732 0.922409
+v -0.117499 -0.281642 0.935739
+v -0.213279 -0.204742 0.922518
+v -0.301492 -0.292885 0.870674
+v -0.215547 -0.286415 0.919674
+v -0.385194 -0.351205 0.0306159
+v -0.382458 -0.25968 0.176197
+v -0.364968 -0.12541 0.046235
+v -0.375418 -0.216313 0.0359591
+v -0.361147 -0.316624 0.430338
+v -0.382575 -0.209764 0.311653
+v -0.374384 -0.294532 0.308248
+v -0.365146 -0.0406368 0.324829
+v -0.385132 -0.101185 0.188168
+v -0.368668 -0.137134 0.438729
+v -0.381949 -0.125584 0.315412
+v -0.350232 0.00253885 0.0908221
+v -0.361458 -0.0619596 0.0609153
+v -0.328019 -0.223318 0.773747
+v -0.341862 -0.313907 0.657949
+v -0.343331 -0.409171 0.778577
+v -0.336544 -0.303107 0.775339
+v -0.322487 -0.0565049 0.774019
+v -0.351001 -0.142438 0.668828
+v -0.283052 -0.132631 0.863571
+v -0.329 -0.140489 0.774308
+v -0.357572 -0.233122 0.551664
+v -0.349301 -0.0521301 0.560904
+v -0.360647 -0.141814 0.556564
+v -0.349272 -0.319879 0.544579
+v -0.272682 0.181352 0.189589
+v -0.221341 0.236481 0.293418
+v -0.170184 0.299829 0.20555
+v -0.23281 0.229543 0.110217
+v -0.227226 0.252859 0.204008
+v -0.20969 0.164574 0.478658
+v -0.153276 0.244216 0.39384
+v -0.271155 0.129527 0.366223
+v -0.215375 0.198113 0.382954
+v -0.187262 0.086887 0.854887
+v -0.134533 0.161682 0.796578
+v -0.198521 0.154682 0.689709
+v -0.24282 0.0875955 0.770725
+v -0.191558 0.132026 0.78245
+v -0.26179 0.101674 0.574769
+v -0.141545 0.196582 0.593509
+v -0.204392 0.157257 0.584394
+v -0.317321 0.0335117 0.454173
+v -0.327755 0.0676272 0.239516
+v -0.324726 0.0469262 0.344192
+v -0.253941 0.0157408 0.848168
+v -0.307072 0.0296092 0.675079
+v -0.289863 0.0225221 0.77107
+v -0.288174 0.116771 0.0639197
+v -0.190901 0.188016 -0.0300482
+v -0.239299 0.151564 0.0118342
+v -0.313467 0.030566 0.566851
+v -0.317033 0.0909783 0.142973
+v -0.150923 0.290686 0.0162651
+v -0.192003 0.257627 -0.114007
+v -0.150018 0.148066 -0.170355
+v -0.172619 0.221245 -0.0722104
+v -0.194068 0.37243 -0.0269037
+v -0.294876 0.383503 -0.203773
+v -0.274031 0.23831 -0.301318
+v -0.248629 0.311098 -0.153862
+v -0.324701 0.24108 -0.502727
+v -0.23911 0.14497 -0.502892
+v -0.196806 0.141274 -0.362924
+v -0.278147 0.181024 -0.423508
+v -0.318198 0.390558 -0.446528
+v -0.250979 0.388662 -0.601302
+v -0.263786 0.27805 -0.630217
+v -0.314593 0.32071 -0.563292
+v -0.238161 -0.677382 -0.0918856
+v -0.12828 -0.689708 0.0245746
+v -0.228165 -0.692422 0.147407
+v -0.338046 -0.680095 0.0309463
+v -0.229681 -0.733842 0.0266899
+v -0.291818 -0.690195 0.788408
+v -0.199176 -0.699686 0.68654
+v -0.107919 -0.696884 0.796995
+v -0.20056 -0.687393 0.898863
+v -0.198244 -0.736784 0.793448
+v -0.294592 -0.571581 0.678753
+v -0.307855 -0.55258 0.89062
+v -0.335563 -0.561971 0.78431
+v -0.102733 -0.558625 0.896274
+v -0.110927 -0.595142 0.691599
+v -0.0593616 -0.458076 0.795932
+v -0.0716769 -0.583028 0.796447
+v -0.203007 -0.591569 0.647127
+v -0.206666 -0.553636 0.930804
+v -0.352933 -0.520557 -0.0885122
+v -0.339121 -0.546516 0.150832
+v -0.383244 -0.532332 0.0297109
+v -0.132569 -0.534652 -0.0767316
+v -0.24402 -0.524662 -0.124285
+v -0.143499 -0.577512 0.141914
+v -0.241073 -0.571789 0.188527
+v -0.0964942 -0.434904 0.0342915
+v -0.0999951 -0.56397 0.0317575
+v -0.23851 0.520387 -0.0907886
+v -0.282894 0.56989 -0.257283
+v -0.271576 0.561647 -0.416318
+v -0.293195 0.482084 -0.276654
+v -0.32688 0.680784 -0.145207
+v -0.422878 0.632073 -0.168661
+v -0.321376 0.576724 -0.185683
+v -0.213045 0.609177 -0.109989
+v -0.326345 0.612341 -0.141546
+v -0.379053 0.63587 -0.283895
+v -0.312594 0.674309 -0.341185
+v -0.3154 0.606871 -0.278418
+v -0.460287 0.727236 -0.177367
+v -0.417773 0.731266 -0.288914
+v -0.454185 0.684351 -0.230461
+v -0.0922474 0.320203 0.127906
+v -0.0857686 0.301184 0.309872
+v -0.0910385 0.318251 0.216664
+v -0.0750596 0.22755 0.493766
+v -0.0710686 0.213115 0.708099
+v -0.0724773 0.219588 0.599773
+v -0.0670456 0.125156 0.886255
+v -0.0695062 0.18012 0.806775
+v -0.0661753 -0.432919 0.0879789
+v -0.0502685 -0.34467 -0.0665397
+v -0.056363 -0.399912 0.0126968
+v -0.0361117 -0.34665 0.891624
+v -0.0398588 -0.442711 0.714357
+v -0.0321101 -0.404726 0.804765
+v -0.0798727 0.262384 0.399618
+v -0.0874752 0.345901 0.0284499
+v -0.0858416 0.32349 0.0605597
+v -0.124296 0.487548 0.0340513
+v -0.105997 0.402715 0.0360884
+v -0.136392 0.50382 -0.556974
+v -0.214286 0.629383 -0.484754
+v -0.104733 0.755088 -0.444831
+v -0.121817 0.636846 -0.502061
+v -0.139278 0.340909 -0.661549
+v -0.138945 0.411476 -0.620153
+v -0.0622199 -0.197393 0.966789
+v -0.0501277 -0.274014 0.941915
+v -0.0553298 -0.185801 -0.197022
+v -0.050202 -0.275405 -0.142323
+v -0.0766662 0.0457919 -0.217551
+v -0.0655056 -0.0670069 -0.214493
+v -0.0933959 0.130309 -0.285229
+v -0.0838463 0.11062 -0.238883
+v -0.114603 0.104637 -0.445501
+v -0.102596 0.115199 -0.359834
+v -0.153665 0.185489 -0.611191
+v -0.137484 0.129002 -0.534091
+v -0.150371 0.262453 -0.657355
+v -0.0819636 0.707011 -0.0978848
+v -0.128934 0.593549 -0.0276065
+v -0.230899 0.696193 -0.415496
+v -0.2462 0.632058 -0.451998
+v -0.199042 0.926972 -0.236591
+v -0.155831 0.94592 -0.253016
+v -0.182289 0.937359 -0.304334
+v -0.2255 0.918412 -0.287909
+v -0.198381 0.950853 -0.26973
+v -0.204856 0.767832 -0.258627
+v -0.198135 0.804412 -0.300184
+v -0.162591 0.796641 -0.337706
+v -0.195536 0.768708 -0.323732
+v -0.190344 0.780419 -0.305321
+v -0.18859 0.771476 -0.215131
+v -0.12548 0.786281 -0.190531
+v -0.102838 0.815304 -0.21903
+v -0.158595 0.817014 -0.220186
+v -0.14521 0.793415 -0.208712
+v -0.134346 0.834135 -0.324328
+v -0.0911506 0.822276 -0.28966
+v -0.113814 0.810725 -0.339255
+v -0.12321 0.811864 -0.327105
+v -0.0956349 0.846841 -0.248877
+v -0.0699464 0.823757 -0.248502
+v -0.0836698 0.824545 -0.249204
+v -0.246443 0.756953 -0.257453
+v -0.24364 0.757848 -0.207433
+v -0.21289 0.762245 -0.22883
+v -0.249966 0.723503 -0.332259
+v -0.355751 0.762428 -0.238497
+v -0.32003 0.739642 -0.304714
+v -0.181848 0.741315 -0.180043
+v -0.328516 0.741977 -0.181175
+v -0.0499681 0.812176 -0.199395
+v -0.0788354 0.774284 -0.158189
+v -0.168022 0.778755 -0.386065
+v -0.0505691 0.828853 -0.304368
+v -0.0897195 0.811796 -0.373587
+v -0.0404745 0.828353 -0.247459
+v -0.211302 0.744371 -0.365196
+v -0.216553 0.856661 -0.251136
+v -0.193702 0.87269 -0.329392
+v -0.218551 0.860755 -0.297102
+v -0.126841 0.897388 -0.285836
+v -0.157513 0.887986 -0.320358
+v -0.138938 0.890583 -0.222912
+v -0.120161 0.900194 -0.247801
+v -0.181137 0.872948 -0.224154
+v -0.218865 -0.458568 0.419816
+v -0.156751 -0.485651 0.517662
+v -0.106299 -0.467348 0.612794
+v -0.177713 -0.464461 0.527871
+v -0.0640756 -0.481225 0.60015
+v -0.0486808 -0.462533 0.655449
+v -0.0674919 -0.459184 0.218582
+v -0.129769 -0.445891 0.210992
+v -0.0638357 -0.444618 0.15006
+v -0.160168 -0.471758 0.313909
+v -0.192938 -0.45045 0.307624
+v -0.106971 -0.518553 0.416032
+v -0.0913755 -0.507594 0.518132
+v -0.0906862 -0.492986 0.310407
+v -0.152295 -0.0598695 0.957881
+v -0.109744 -0.0412763 0.976341
+v -0.148938 -0.0188178 0.950827
+v -0.133131 -0.0374762 0.965091
+v -0.214922 -0.0472768 0.917853
+v -0.129144 -0.123186 0.961976
+v -0.202946 -0.113321 0.930645
+v -0.124777 0.0448234 0.926767
+v -0.189431 0.0176502 0.907717
+v -0.0541294 -0.000582154 0.980957
+v -0.0627521 0.0592134 0.940397
+v -0.0544734 -0.0758718 0.989219
+v -0.0631528 -0.123094 0.979468
+v -0.0717681 -0.0690106 0.994894
+v -0.052795 -0.0725924 1.01945
+v -0.0716307 -0.0432 1.00541
+v -0.0809297 -0.0558501 0.997263
+v -0.0297219 -0.0653048 1.04059
+v -0.0406958 -0.0395224 1.01839
+v -0.0297587 -0.0908237 1.02001
+v -0.0407812 -0.0765567 1.00299
+v -0.0264068 -0.0851806 1.03686
+v 0.313121 -0.40468 0.424303
+v 0.289638 -0.411984 0.363044
+v 0.317818 -0.392423 0.305485
+v 0.341299 -0.375944 0.366232
+v 0.317288 -0.397295 0.364448
+v 0.23935 -0.43199 0.361842
+v 0.251544 -0.422435 0.302332
+v 0.261632 -0.427221 0.422701
+v 0.262332 -0.422664 0.362137
+v 0.2099 -0.457622 0.202318
+v 0.274846 -0.418754 0.252658
+v 0.194817 -0.437402 0.239358
+v 0.231211 -0.430966 0.245486
+v 0.350284 -0.364453 0.244783
+v 0.312495 -0.42258 0.208415
+v 0.316601 -0.396416 0.250199
+v 0.328162 -0.387696 0.483083
+v 0.291362 -0.413379 0.536815
+v 0.275025 -0.421949 0.482067
+v 0.303426 -0.408222 0.482265
+v 0.26979 -0.455493 0.624003
+v 0.236564 -0.444228 0.584476
+v 0.315111 -0.397679 0.592146
+v 0.280089 -0.425608 0.586435
+v 0.156119 -0.458526 0.595789
+v 0.222989 -0.439421 0.53835
+v 0.164011 -0.483033 0.631406
+v 0.191143 -0.453861 0.591118
+v 0.225399 -0.440193 0.480871
+v 0.247502 -0.431674 0.481825
+v 0.300873 -0.335808 -0.11045
+v 0.23546 -0.269794 -0.12322
+v 0.283535 -0.198083 -0.11242
+v 0.339519 -0.257182 -0.0802831
+v 0.292915 -0.260195 -0.110308
+v 0.119478 -0.295776 -0.113872
+v 0.173106 -0.230478 -0.142628
+v 0.176383 -0.34915 -0.10631
+v 0.174532 -0.282877 -0.121531
+v 0.172014 -0.141551 -0.170002
+v 0.224047 -0.163495 -0.142015
+v 0.123113 -0.206388 -0.170045
+v 0.171621 -0.184999 -0.160037
+v 0.318497 -0.140059 -0.0746981
+v 0.26898 -0.103901 -0.112945
+v 0.274851 -0.146856 -0.114171
+v 0.307808 -0.00318692 -0.0317095
+v 0.269878 -0.0318881 -0.0928838
+v 0.232462 0.00608204 -0.106821
+v 0.273281 0.0420733 -0.0438498
+v 0.272342 0.00331308 -0.0724007
+v 0.222802 -0.0788504 -0.141507
+v 0.308256 -0.0631453 -0.0619102
+v 0.26804 -0.0666021 -0.106124
+v 0.131964 -0.111136 -0.188613
+v 0.182883 -0.0457092 -0.157469
+v 0.176218 -0.0952439 -0.168768
+v 0.199052 0.0623656 -0.105989
+v 0.148915 0.00578811 -0.165362
+v 0.190658 0.0074224 -0.137241
+v 0.30851 -0.340088 0.875405
+v 0.263105 -0.289222 0.900577
+v 0.214141 -0.334681 0.919903
+v 0.26887 -0.396352 0.909752
+v 0.266995 -0.336923 0.904292
+v 0.253262 -0.209214 0.897882
+v 0.215216 -0.244729 0.920604
+v 0.293898 -0.252639 0.867139
+v 0.258195 -0.248301 0.898482
+v 0.126656 -0.240414 0.945024
+v 0.164801 -0.284207 0.93026
+v 0.172003 -0.202637 0.93974
+v 0.169955 -0.242358 0.935335
+v 0.149733 -0.3934 0.920339
+v 0.104904 -0.328487 0.922781
+v 0.15673 -0.332639 0.924128
+v 0.358457 -0.200112 -0.0267162
+v 0.380901 -0.277243 0.032671
+v 0.375987 -0.340814 -0.0337268
+v 0.367895 -0.263853 -0.0306343
+v 0.342627 -0.106257 -0.015664
+v 0.369603 -0.166291 0.0406057
+v 0.349444 -0.148083 -0.0217878
+v 0.385185 -0.214712 0.178025
+v 0.382253 -0.237542 0.105277
+v 0.379025 -0.149836 0.112764
+v 0.38088 -0.190046 0.108528
+v 0.379176 -0.364086 0.102343
+v 0.375596 -0.311145 0.175939
+v 0.381399 -0.294389 0.103665
+v 0.363729 -0.333817 0.306674
+v 0.367965 -0.307651 0.370431
+v 0.351404 -0.354331 0.427814
+v 0.35769 -0.345276 0.36828
+v 0.379525 -0.278609 0.243679
+v 0.369702 -0.323364 0.243084
+v 0.386398 -0.194374 0.247084
+v 0.380079 -0.252636 0.309953
+v 0.384425 -0.235652 0.245143
+v 0.366553 -0.273812 0.432936
+v 0.376188 -0.22098 0.374406
+v 0.373629 -0.265433 0.372527
+v 0.363531 -0.0923241 0.440867
+v 0.375379 -0.132559 0.377674
+v 0.376221 -0.0834201 0.319025
+v 0.359143 -0.0454376 0.384517
+v 0.369754 -0.0887861 0.380331
+v 0.383633 -0.167556 0.313214
+v 0.370145 -0.182621 0.437038
+v 0.377148 -0.176656 0.375911
+v 0.386429 -0.137312 0.183762
+v 0.385994 -0.115397 0.252169
+v 0.387444 -0.154662 0.24911
+v 0.369328 -0.0337485 0.265179
+v 0.380073 -0.0641595 0.195141
+v 0.380479 -0.0754649 0.257209
+v 0.331012 0.0171674 0.0389716
+v 0.357964 -0.0317511 0.0731655
+v 0.338487 -0.0443826 0.00235778
+v 0.336366 -0.0157204 0.0175237
+v 0.37698 -0.0821329 0.123634
+v 0.363023 -0.0117289 0.147082
+v 0.372551 -0.0487428 0.133083
+v 0.363023 -0.0919062 0.0524717
+v 0.378431 -0.114848 0.117331
+v 0.339774 -0.073126 -0.00816016
+v 0.336842 -0.40362 0.835552
+v 0.340911 -0.350598 0.77666
+v 0.324728 -0.29766 0.82767
+v 0.331904 -0.344898 0.831349
+v 0.334716 -0.360298 0.654176
+v 0.340676 -0.308781 0.717453
+v 0.335933 -0.414813 0.716014
+v 0.339263 -0.356428 0.715872
+v 0.338685 -0.226798 0.720092
+v 0.331546 -0.262105 0.774295
+v 0.344609 -0.271269 0.661246
+v 0.339619 -0.266826 0.718796
+v 0.311718 -0.219243 0.822794
+v 0.317327 -0.257308 0.824743
+v 0.282982 -0.0932074 0.86163
+v 0.309753 -0.137557 0.821642
+v 0.32815 -0.0982774 0.774453
+v 0.304296 -0.0566442 0.818942
+v 0.309064 -0.0966912 0.82069
+v 0.328063 -0.182474 0.773912
+v 0.284251 -0.173813 0.864211
+v 0.309915 -0.178867 0.822052
+v 0.348891 -0.186539 0.666645
+v 0.342261 -0.141944 0.72292
+v 0.340465 -0.184932 0.72153
+v 0.335097 -0.0554519 0.724795
+v 0.349859 -0.0980743 0.670701
+v 0.341372 -0.0985454 0.724072
+v 0.350887 -0.0507559 0.502911
+v 0.3576 -0.096426 0.55865
+v 0.364199 -0.140124 0.498352
+v 0.360004 -0.0947951 0.500374
+v 0.356689 -0.14249 0.613383
+v 0.34723 -0.0531297 0.617509
+v 0.354744 -0.0974434 0.615492
+v 0.35206 -0.232064 0.608144
+v 0.360088 -0.187609 0.554338
+v 0.355112 -0.187591 0.610995
+v 0.363053 -0.232129 0.494204
+v 0.364779 -0.186112 0.496433
+v 0.354812 -0.320109 0.488079
+v 0.338808 -0.358381 0.541229
+v 0.344689 -0.357576 0.485189
+v 0.354746 -0.277669 0.548235
+v 0.360332 -0.277544 0.491251
+v 0.344841 -0.317709 0.600763
+v 0.349578 -0.27523 0.604646
+v 0.335024 -0.360119 0.596759
+v 0.253104 0.203435 0.118078
+v 0.22988 0.247545 0.157829
+v 0.250535 0.219935 0.200112
+v 0.270021 0.183796 0.151405
+v 0.251096 0.217673 0.158942
+v 0.173937 0.295795 0.151795
+v 0.201028 0.279793 0.204606
+v 0.208445 0.255016 0.0997486
+v 0.204742 0.274117 0.153286
+v 0.19325 0.264928 0.298974
+v 0.224342 0.248556 0.249012
+v 0.166122 0.2964 0.255682
+v 0.197174 0.276415 0.252836
+v 0.273837 0.172969 0.230557
+v 0.247672 0.201215 0.285394
+v 0.249328 0.213762 0.242242
+v 0.269047 0.115689 0.415447
+v 0.243641 0.166107 0.375362
+v 0.21248 0.178956 0.429644
+v 0.238934 0.137098 0.472953
+v 0.241351 0.149381 0.423
+v 0.218341 0.218477 0.337805
+v 0.272811 0.14513 0.319056
+v 0.245762 0.184358 0.32963
+v 0.157504 0.266542 0.348704
+v 0.185529 0.224498 0.389084
+v 0.189327 0.246276 0.344018
+v 0.178799 0.187723 0.48375
+v 0.149393 0.222637 0.439843
+v 0.181979 0.203634 0.435289
+v 0.235856 0.0765799 0.805621
+v 0.217701 0.11212 0.775527
+v 0.188626 0.112308 0.821548
+v 0.212012 0.0755801 0.841201
+v 0.213324 0.0965225 0.811853
+v 0.226814 0.130295 0.684497
+v 0.195015 0.146226 0.738197
+v 0.249108 0.0961373 0.728207
+v 0.222367 0.12354 0.732548
+v 0.136442 0.179619 0.750387
+v 0.163968 0.1484 0.789706
+v 0.169138 0.174847 0.695193
+v 0.166543 0.164739 0.744396
+v 0.159999 0.0987976 0.865373
+v 0.13257 0.138159 0.837992
+v 0.161693 0.126272 0.830304
+v 0.207042 0.158534 0.530777
+v 0.233686 0.131486 0.579487
+v 0.264402 0.101968 0.520397
+v 0.236433 0.132176 0.525634
+v 0.143538 0.198754 0.539805
+v 0.173735 0.178964 0.589175
+v 0.176115 0.180806 0.535582
+v 0.201599 0.157166 0.637907
+v 0.139858 0.195421 0.647569
+v 0.171473 0.178284 0.642915
+v 0.258575 0.102323 0.628487
+v 0.230534 0.131902 0.632959
+v 0.34742 0.00319315 0.333602
+v 0.320929 0.0389878 0.398635
+v 0.337836 -0.00655256 0.448549
+v 0.342419 -0.00261151 0.390867
+v 0.351215 0.0204208 0.221415
+v 0.327429 0.0566863 0.291075
+v 0.350974 0.0108683 0.277027
+v 0.298744 0.0894187 0.355449
+v 0.30121 0.115034 0.257783
+v 0.300703 0.102007 0.305644
+v 0.293232 0.0711038 0.460405
+v 0.296022 0.0786559 0.407077
+v 0.328122 -0.0112347 0.673608
+v 0.30029 0.0266191 0.725054
+v 0.308985 -0.0158364 0.772679
+v 0.320738 -0.0133772 0.724898
+v 0.267341 0.0573644 0.769829
+v 0.281859 0.0674534 0.67711
+v 0.275748 0.0634772 0.725933
+v 0.24017 0.0457536 0.838346
+v 0.274888 0.0183198 0.812257
+v 0.256292 0.0501447 0.807253
+v 0.268509 -0.018423 0.854481
+v 0.291897 -0.0177918 0.816142
+v 0.181753 0.135303 -0.0848536
+v 0.213035 0.170022 -0.00953204
+v 0.239418 0.102195 -0.0351077
+v 0.20757 0.117335 -0.0616182
+v 0.236523 0.195781 0.0609139
+v 0.190293 0.237055 0.0258931
+v 0.212268 0.218116 0.0448664
+v 0.276588 0.155574 0.0981021
+v 0.265278 0.133372 0.0353559
+v 0.258648 0.173682 0.0768666
+v 0.298292 0.0729821 0.0271766
+v 0.270541 0.0874645 -0.00640695
+v 0.311113 0.0304905 0.622016
+v 0.288873 0.0678482 0.57056
+v 0.286019 0.0682635 0.624906
+v 0.334099 -0.00961306 0.563635
+v 0.332102 -0.0102295 0.619617
+v 0.315185 0.0311388 0.510574
+v 0.335537 -0.00863692 0.506376
+v 0.291063 0.0681481 0.515313
+v 0.33595 0.0447295 0.115485
+v 0.324423 0.0791082 0.189745
+v 0.346277 0.0318576 0.16704
+v 0.305188 0.103086 0.100412
+v 0.320025 0.0585873 0.0683354
+v 0.295396 0.137443 0.169109
+v 0.288286 0.145212 0.129998
+v 0.299573 0.127115 0.211987
+v 0.149034 0.119255 -0.152767
+v 0.167034 0.18223 -0.122798
+v 0.177314 0.204818 -0.0510199
+v 0.168725 0.158519 -0.103729
+v 0.194921 0.221357 -0.173014
+v 0.177164 0.238112 -0.093383
+v 0.160058 0.168229 -0.195838
+v 0.175228 0.201681 -0.146613
+v 0.149721 0.3008 -0.00470029
+v 0.167426 0.259011 -0.0241111
+v 0.17918 0.290497 -0.0609743
+v 0.168557 0.27236 -0.0447148
+v 0.159665 0.285267 0.0452783
+v 0.175009 0.248965 0.00116269
+v 0.233098 0.21084 -0.267301
+v 0.26422 0.274293 -0.228207
+v 0.21819 0.281995 -0.133552
+v 0.227725 0.245742 -0.199837
+v 0.314639 0.343639 -0.292523
+v 0.27622 0.344814 -0.176785
+v 0.306638 0.268641 -0.338166
+v 0.294985 0.306469 -0.259249
+v 0.21453 0.407775 -0.0317089
+v 0.225521 0.344757 -0.0845003
+v 0.267222 0.417414 -0.116444
+v 0.249809 0.379204 -0.0978893
+v 0.173263 0.341693 -0.0238379
+v 0.200073 0.314908 -0.0732157
+v 0.192666 0.159829 -0.308601
+v 0.240251 0.15911 -0.387649
+v 0.279797 0.207117 -0.366981
+v 0.238726 0.181983 -0.331175
+v 0.213461 0.12942 -0.465108
+v 0.265715 0.160342 -0.469212
+v 0.195451 0.128497 -0.407372
+v 0.233311 0.142158 -0.432916
+v 0.305787 0.218563 -0.547441
+v 0.307399 0.208213 -0.46356
+v 0.259412 0.1698 -0.542722
+v 0.290901 0.186126 -0.509143
+v 0.330786 0.26947 -0.445487
+v 0.31171 0.235804 -0.406418
+v 0.32675 0.280027 -0.535933
+v 0.321008 0.352655 -0.511417
+v 0.329541 0.345256 -0.410956
+v 0.333345 0.309542 -0.480189
+v 0.272516 0.239582 -0.609537
+v 0.296456 0.295786 -0.60247
+v 0.307295 0.256707 -0.578835
+v 0.240548 0.35766 -0.62796
+v 0.289278 0.358786 -0.584918
+v 0.246645 0.312827 -0.640337
+v 0.274305 0.331026 -0.617491
+v 0.295245 0.436762 -0.484187
+v 0.255976 0.429549 -0.563622
+v 0.295399 0.395443 -0.540067
+v 0.335047 -0.66747 -0.026194
+v 0.288313 -0.71953 0.0292002
+v 0.233667 -0.718383 -0.0394682
+v 0.288912 -0.666279 -0.0831981
+v 0.288523 -0.704927 -0.0329901
+v 0.276152 -0.679477 0.14116
+v 0.227442 -0.724741 0.0933852
+v 0.327044 -0.674066 0.0872825
+v 0.281161 -0.710704 0.0910758
+v 0.136631 -0.686121 0.0787634
+v 0.172796 -0.723594 0.0247169
+v 0.182766 -0.687312 0.135767
+v 0.176191 -0.716151 0.0857864
+v 0.187443 -0.669698 -0.0818819
+v 0.136552 -0.67511 -0.0280042
+v 0.179633 -0.708233 -0.0350267
+v 0.2461 -0.678205 0.889508
+v 0.198896 -0.723335 0.85193
+v 0.248642 -0.724519 0.790895
+v 0.288217 -0.679435 0.838379
+v 0.246831 -0.711912 0.845432
+v 0.113033 -0.684171 0.844965
+v 0.148661 -0.727347 0.795626
+v 0.15574 -0.680004 0.892121
+v 0.151829 -0.713932 0.849146
+v 0.157976 -0.694713 0.696986
+v 0.198408 -0.728532 0.734591
+v 0.115859 -0.693483 0.748115
+v 0.153047 -0.720555 0.741401
+v 0.283587 -0.684515 0.738785
+v 0.24088 -0.688682 0.691628
+v 0.244434 -0.716483 0.736358
+v 0.342489 -0.483419 0.78141
+v 0.32282 -0.565952 0.726198
+v 0.30391 -0.498066 0.668956
+v 0.331237 -0.488983 0.719836
+v 0.31354 -0.474577 0.887895
+v 0.331331 -0.556659 0.842236
+v 0.337524 -0.478425 0.840051
+v 0.319732 -0.633454 0.786428
+v 0.294197 -0.618974 0.884469
+v 0.315639 -0.625673 0.839496
+v 0.28283 -0.632563 0.691009
+v 0.308326 -0.632734 0.733956
+v 0.0634711 -0.431765 0.851899
+v 0.0654567 -0.515878 0.79543
+v 0.0774719 -0.570173 0.85133
+v 0.0969194 -0.478778 0.897495
+v 0.0707234 -0.496321 0.852392
+v 0.106684 -0.534636 0.682874
+v 0.0832655 -0.591144 0.740087
+v 0.069956 -0.47422 0.735292
+v 0.0773661 -0.528459 0.735056
+v 0.0840444 -0.64607 0.797118
+v 0.119063 -0.647679 0.702012
+v 0.0943475 -0.648249 0.746058
+v 0.112805 -0.624088 0.888986
+v 0.0899111 -0.634521 0.847274
+v 0.252371 -0.581833 0.652832
+v 0.208287 -0.52835 0.637223
+v 0.260651 -0.513618 0.642398
+v 0.20034 -0.651356 0.660475
+v 0.244653 -0.641994 0.666514
+v 0.15335 -0.595644 0.66033
+v 0.156436 -0.650983 0.672896
+v 0.153645 -0.534978 0.650489
+v 0.149543 -0.554429 0.922812
+v 0.209392 -0.47177 0.928314
+v 0.147798 -0.472236 0.922294
+v 0.203436 -0.628169 0.923071
+v 0.153584 -0.625261 0.914716
+v 0.262868 -0.552296 0.920108
+v 0.253551 -0.622939 0.912658
+v 0.267734 -0.472455 0.917082
+v 0.348561 -0.464054 0.158716
+v 0.369537 -0.538419 0.0961211
+v 0.387436 -0.440321 0.0296682
+v 0.376444 -0.45013 0.0999599
+v 0.368236 -0.614976 0.0306267
+v 0.325558 -0.615423 0.139592
+v 0.354786 -0.614851 0.0904298
+v 0.340742 -0.597899 -0.0798229
+v 0.378455 -0.525308 -0.0356201
+v 0.364087 -0.605614 -0.0296813
+v 0.354445 -0.427851 -0.0878125
+v 0.380964 -0.432504 -0.0361787
+v 0.243518 -0.430642 -0.121062
+v 0.182701 -0.527846 -0.109937
+v 0.12704 -0.448099 -0.0735785
+v 0.179139 -0.436248 -0.106528
+v 0.304443 -0.521287 -0.117606
+v 0.305291 -0.427537 -0.115465
+v 0.241923 -0.609859 -0.117359
+v 0.296601 -0.602967 -0.109127
+v 0.141316 -0.607217 -0.0730372
+v 0.186967 -0.607615 -0.104287
+v 0.148115 -0.512563 0.149929
+v 0.188536 -0.577394 0.174934
+v 0.25113 -0.502745 0.197094
+v 0.196177 -0.511769 0.183546
+v 0.233064 -0.638226 0.175543
+v 0.145237 -0.635178 0.13052
+v 0.185474 -0.638484 0.162245
+v 0.293784 -0.559568 0.181063
+v 0.282371 -0.626877 0.167977
+v 0.304357 -0.484216 0.189956
+v 0.105712 -0.548895 -0.0268432
+v 0.0997946 -0.492468 0.0352712
+v 0.0953701 -0.405611 -0.0214782
+v 0.102107 -0.469936 -0.0239275
+v 0.106751 -0.633108 0.0275567
+v 0.114552 -0.619641 -0.0267392
+v 0.113288 -0.573273 0.0910993
+v 0.117988 -0.635837 0.0828
+v 0.112515 -0.451919 0.0935688
+v 0.115559 -0.506404 0.0968639
+v 0.271184 0.525759 -0.460079
+v 0.285353 0.520853 -0.361551
+v 0.308658 0.43336 -0.368305
+v 0.291391 0.478507 -0.418527
+v 0.278055 0.600765 -0.320108
+v 0.282577 0.535883 -0.259681
+v 0.267 0.601931 -0.388185
+v 0.278645 0.563976 -0.33172
+v 0.247008 0.54474 -0.114114
+v 0.271214 0.498067 -0.167977
+v 0.277875 0.55689 -0.190965
+v 0.271665 0.531425 -0.180446
+v 0.298509 0.427522 -0.236282
+v 0.235335 0.485531 -0.0647615
+v 0.272045 0.45855 -0.143225
+v 0.317802 0.38717 -0.32759
+v 0.192626 0.647576 -0.125774
+v 0.267679 0.610284 -0.129549
+v 0.326452 0.645309 -0.138504
+v 0.252363 0.682167 -0.15033
+v 0.258116 0.645451 -0.13607
+v 0.289471 0.569074 -0.162469
+v 0.324346 0.587714 -0.15672
+v 0.239082 0.578957 -0.111996
+v 0.279361 0.583208 -0.137022
+v 0.401583 0.612716 -0.19176
+v 0.380321 0.61857 -0.152413
+v 0.35448 0.589213 -0.202564
+v 0.367842 0.597467 -0.17194
+v 0.388682 0.680406 -0.144893
+v 0.430349 0.658277 -0.154382
+v 0.384792 0.647908 -0.143704
+v 0.294439 0.591849 -0.266039
+v 0.318359 0.584666 -0.230094
+v 0.294318 0.572729 -0.209871
+v 0.28588 0.662049 -0.349089
+v 0.312606 0.638668 -0.319124
+v 0.290372 0.624108 -0.317788
+v 0.375881 0.662505 -0.310821
+v 0.343967 0.620068 -0.286887
+v 0.345079 0.681327 -0.333156
+v 0.34177 0.649107 -0.318477
+v 0.380591 0.614582 -0.245535
+v 0.347363 0.598425 -0.244565
+v 0.393743 0.706765 -0.3123
+v 0.444197 0.711394 -0.263256
+v 0.419572 0.658706 -0.261892
+v 0.412958 0.685943 -0.291128
+v 0.452589 0.744444 -0.206816
+v 0.471552 0.708579 -0.199184
+v 0.43241 0.746455 -0.258303
+v 0.460792 0.730831 -0.231063
+v 0.447289 0.655956 -0.196803
+v 0.447397 0.702392 -0.15815
+v 0.460337 0.681448 -0.17325
+v 0.417303 0.633713 -0.225256
+v 0 0.324094 0.180165
+v 0.0462436 0.320164 0.221337
+v 0.0925539 0.320139 0.170699
+v 0.0467382 0.325284 0.136739
+v 0.0468991 0.323707 0.17766
+v 0.0434892 0.30185 0.311934
+v 0.0886491 0.312396 0.263435
+v 0 0.312812 0.267846
+v 0.0450231 0.313323 0.266515
+v 0.132936 0.312061 0.210485
+v 0.125628 0.296564 0.306829
+v 0.129481 0.307575 0.259519
+v 0.135299 0.30832 0.112598
+v 0.13572 0.310986 0.159838
+v 0 0.228164 0.547798
+v 0.0364517 0.2252 0.601479
+v 0.0735327 0.221293 0.545706
+v 0.0377827 0.232065 0.495188
+v 0.0369868 0.226526 0.54726
+v 0.0357781 0.218679 0.710249
+v 0.0717153 0.218255 0.65442
+v 0 0.22586 0.657001
+v 0.0360809 0.22396 0.656319
+v 0.107651 0.210086 0.597079
+v 0.105384 0.203917 0.704755
+v 0.106456 0.208743 0.651454
+v 0.111325 0.219111 0.491389
+v 0.109197 0.212143 0.543201
+v 0 0.20702 0.762635
+v 0.035033 0.18494 0.809623
+v 0.0703592 0.199987 0.759265
+v 0.0354469 0.205232 0.761742
+v 0.0337403 0.129119 0.889852
+v 0.0684286 0.154761 0.849486
+v 0 0.160642 0.853891
+v 0.0344822 0.159128 0.852718
+v 0.10286 0.172311 0.802347
+v 0.0994805 0.118705 0.880737
+v 0.101303 0.147713 0.844481
+v 0.104202 0.191409 0.755412
+v 0 -0.422236 0.0297518
+v 0.0290027 -0.401336 0.00260959
+v 0.0619855 -0.419454 0.0516405
+v 0.032544 -0.433336 0.0684456
+v 0.0312142 -0.420036 0.03584
+v 0.024587 -0.350255 -0.0669927
+v 0.0524989 -0.374486 -0.0268658
+v 0 -0.38147 -0.0341137
+v 0.0265726 -0.377866 -0.0315624
+v 0.0804386 -0.407577 0.025012
+v 0.0781392 -0.343402 -0.0680291
+v 0.0771327 -0.378765 -0.022981
+v 0.101981 -0.43742 0.118873
+v 0.0918712 -0.426509 0.0746889
+v 0 -0.369655 0.853165
+v 0.0162142 -0.396858 0.808469
+v 0.032518 -0.377955 0.851217
+v 0.0173499 -0.340897 0.891348
+v 0.0161103 -0.370733 0.852319
+v 0.0190571 -0.436735 0.723869
+v 0.0352263 -0.426311 0.757202
+v 0 -0.417485 0.766409
+v 0.0173127 -0.418962 0.764189
+v 0.0473693 -0.423079 0.799815
+v 0.0641499 -0.454693 0.696538
+v 0.0543417 -0.44245 0.745122
+v 0.0576974 -0.360469 0.893822
+v 0.0495204 -0.395304 0.851292
+v 0 0.284012 0.356829
+v 0.0403407 0.264584 0.4009
+v 0.08278 0.283223 0.354838
+v 0.0418935 0.284411 0.356338
+v 0.0772361 0.242537 0.445498
+v 0 0.24666 0.447191
+v 0.0389355 0.245943 0.446792
+v 0.117787 0.25622 0.397308
+v 0.114267 0.235186 0.443214
+v 0.121652 0.278062 0.352302
+v 0 0.337744 0.0495788
+v 0.0441574 0.329119 0.0693112
+v 0.084777 0.331134 0.0392602
+v 0.0448664 0.351036 0.0368917
+v 0.0436281 0.336529 0.047541
+v 0.0891709 0.320583 0.0906503
+v 0 0.327327 0.102505
+v 0.0455092 0.326229 0.0998335
+v 0.122579 0.311429 0.0436831
+v 0.129137 0.308034 0.0737864
+v 0.125569 0.33518 0.0110101
+v 0.120968 0.319807 0.0225689
+v 0 0.450581 0.0596803
+v 0.0541098 0.40816 0.0483714
+v 0.116503 0.442256 0.0399308
+v 0.0632284 0.496693 0.0522852
+v 0.0593708 0.448955 0.0551193
+v 0.0954346 0.370179 0.029827
+v 0 0.37634 0.0420973
+v 0.0488177 0.375217 0.039732
+v 0.153438 0.391277 0.0121446
+v 0.13765 0.35956 0.0095734
+v 0.18104 0.470039 0.00485434
+v 0.169159 0.42866 0.0126096
+v 0 0.570891 -0.534672
+v 0.0626485 0.640189 -0.506046
+v 0.130136 0.568004 -0.528764
+v 0.0692761 0.505582 -0.562042
+v 0.0664738 0.570216 -0.533525
+v 0.0536794 0.762368 -0.447664
+v 0.113201 0.701726 -0.474505
+v 0 0.709164 -0.479338
+v 0.0582379 0.707055 -0.477971
+v 0.199905 0.685497 -0.462496
+v 0.174025 0.632377 -0.495223
+v 0.150533 0.744043 -0.44196
+v 0.161614 0.694175 -0.469576
+v 0.199187 0.49892 -0.545668
+v 0.232047 0.565216 -0.501918
+v 0.188173 0.564066 -0.51907
+v 0 0.376475 -0.648788
+v 0.0711811 0.413275 -0.623292
+v 0.138282 0.375681 -0.645424
+v 0.0726958 0.340957 -0.664799
+v 0.0716465 0.376815 -0.648416
+v 0.138821 0.452917 -0.589054
+v 0 0.45452 -0.593995
+v 0.0706181 0.454733 -0.593233
+v 0.199873 0.404748 -0.612935
+v 0.202195 0.446614 -0.579567
+v 0.193634 0.339499 -0.652866
+v 0.194895 0.370921 -0.637938
+v 0 -0.233549 0.959877
+v 0.0240468 -0.271835 0.943264
+v 0.057123 -0.235922 0.956621
+v 0.0306399 -0.195728 0.970285
+v 0.0277858 -0.234252 0.959139
+v 0.0425527 -0.311463 0.921053
+v 0 -0.306916 0.921405
+v 0.0202817 -0.307657 0.921165
+v 0.0802769 -0.277764 0.939515
+v 0.0688023 -0.319918 0.921674
+v 0.0956803 -0.199455 0.960676
+v 0.0895627 -0.238122 0.952018
+v 0 -0.322733 -0.103397
+v 0.0238367 -0.283103 -0.139843
+v 0.049547 -0.311956 -0.105817
+v 0.023709 -0.319134 -0.103998
+v 0.0267024 -0.190718 -0.197111
+v 0.0521008 -0.23408 -0.173683
+v 0 -0.242929 -0.170415
+v 0.0248684 -0.240765 -0.171792
+v 0.0816243 -0.264189 -0.144485
+v 0.0878074 -0.176513 -0.193941
+v 0.0840611 -0.222685 -0.173847
+v 0.0796433 -0.304819 -0.108871
+v 0 -0.132321 -0.213832
+v 0.0321871 -0.0691596 -0.222731
+v 0.0599751 -0.128388 -0.209467
+v 0.0292367 -0.131565 -0.213064
+v 0.0381981 0.0419961 -0.235526
+v 0.0713902 -0.00682142 -0.215572
+v 0 -0.00964083 -0.233734
+v 0.035269 -0.00935459 -0.229192
+v 0.101087 -0.0620719 -0.200364
+v 0.115674 0.055105 -0.187883
+v 0.109216 -0.00036684 -0.192884
+v 0.0937169 -0.121882 -0.201748
+v 0 0.0779149 -0.251636
+v 0.0428926 0.103386 -0.257977
+v 0.0803711 0.0844567 -0.225271
+v 0.0406901 0.0790388 -0.244814
+v 0.0470212 0.120443 -0.299626
+v 0.0884331 0.12573 -0.258533
+v 0 0.114136 -0.282485
+v 0.0449537 0.11665 -0.27594
+v 0.120922 0.124855 -0.209363
+v 0.138478 0.148595 -0.265069
+v 0.128964 0.142918 -0.232488
+v 0.118034 0.096215 -0.193935
+v 0 0.113801 -0.334664
+v 0.0518947 0.108853 -0.365753
+v 0.097999 0.124878 -0.319976
+v 0.0492429 0.116377 -0.329956
+v 0.0595919 0.101024 -0.449026
+v 0.107541 0.107031 -0.401861
+v 0 0.101009 -0.408276
+v 0.0552525 0.102268 -0.405836
+v 0.150911 0.126322 -0.356271
+v 0.16045 0.112915 -0.441079
+v 0.153901 0.116281 -0.399697
+v 0.145782 0.140078 -0.308863
+v 0 0.109177 -0.495145
+v 0.0709982 0.126709 -0.538642
+v 0.125551 0.112279 -0.490196
+v 0.065189 0.10952 -0.494143
+v 0.0790718 0.182988 -0.615599
+v 0.1475 0.153854 -0.575332
+v 0 0.151226 -0.580589
+v 0.0759743 0.151547 -0.579976
+v 0.194944 0.134806 -0.523346
+v 0.219301 0.191387 -0.600189
+v 0.210129 0.15949 -0.56385
+v 0.176258 0.119186 -0.482359
+v 0 0.219576 -0.64342
+v 0.0775425 0.260206 -0.661002
+v 0.154044 0.222564 -0.638941
+v 0.0792455 0.219987 -0.642965
+v 0.144378 0.302534 -0.665208
+v 0 0.300969 -0.668909
+v 0.0750102 0.301309 -0.668637
+v 0.213772 0.26767 -0.647803
+v 0.202463 0.304851 -0.656009
+v 0.21995 0.228648 -0.628685
+v 0.126715 0.537337 0.0111468
+v 0.0606884 0.60785 0.00104436
+v 0 0.554408 0.0408567
+v 0.0643107 0.550463 0.0321785
+v 0.191637 0.550233 -0.0477524
+v 0.185308 0.513743 -0.0172354
+v 0.171165 0.605799 -0.0764289
+v 0.209701 0.569968 -0.0807895
+v 0.126096 0.69557 -0.127071
+v 0.0987466 0.656786 -0.062447
+v 0.142824 0.650926 -0.0969906
+v 0 0.672219 -0.0228265
+v 0.0403517 0.7183 -0.0719016
+v 0.0504326 0.666442 -0.034872
+v 0.237614 0.630468 -0.469873
+v 0.258514 0.602197 -0.445199
+v 0.253217 0.578269 -0.474783
+v 0.212896 0.714905 -0.428435
+v 0.240943 0.662224 -0.43894
+v 0.224501 0.676789 -0.452578
+v 0.246529 0.67248 -0.397952
+v 0.254771 0.635976 -0.4207
+v 0.226436 0.912723 -0.26973
+v 0.21544 0.938681 -0.279423
+v 0.201014 0.943338 -0.25145
+v 0.214224 0.916672 -0.246046
+v 0.216965 0.931825 -0.261167
+v 0.196917 0.927676 -0.311546
+v 0.191876 0.948989 -0.288378
+v 0.216862 0.918936 -0.303965
+v 0.207634 0.937732 -0.29671
+v 0.1537 0.943252 -0.269054
+v 0.17745 0.953646 -0.260405
+v 0.165912 0.939303 -0.292739
+v 0.173109 0.951035 -0.277835
+v 0.179801 0.930945 -0.23207
+v 0.159856 0.939686 -0.239651
+v 0.180783 0.946411 -0.244635
+v 0.207814 0.759869 -0.30299
+v 0.191438 0.774734 -0.312022
+v 0.199084 0.77337 -0.281405
+v 0.209996 0.761876 -0.268358
+v 0.201734 0.767127 -0.28955
+v 0.161743 0.793141 -0.342758
+v 0.178221 0.788374 -0.325575
+v 0.180956 0.7796 -0.34144
+v 0.178357 0.783728 -0.331262
+v 0.18742 0.811743 -0.321532
+v 0.192387 0.789092 -0.301842
+v 0.165819 0.804418 -0.335288
+v 0.181068 0.796668 -0.322844
+v 0.202175 0.775482 -0.251997
+v 0.203233 0.797472 -0.273932
+v 0.199221 0.78193 -0.2764
+v 0.181336 0.804317 -0.228606
+v 0.151415 0.801648 -0.215482
+v 0.168503 0.781447 -0.210541
+v 0.192516 0.777938 -0.227759
+v 0.174268 0.789156 -0.220565
+v 0.108345 0.822919 -0.221075
+v 0.12229 0.805309 -0.211386
+v 0.135327 0.828572 -0.217894
+v 0.128262 0.813355 -0.215464
+v 0.103071 0.800257 -0.198013
+v 0.137434 0.789176 -0.20042
+v 0.0963023 0.812563 -0.21585
+v 0.114773 0.801816 -0.205592
+v 0.186175 0.766882 -0.204739
+v 0.153638 0.772539 -0.191376
+v 0.162292 0.776737 -0.200861
+v 0.13845 0.801677 -0.350685
+v 0.119159 0.809804 -0.331351
+v 0.143329 0.804627 -0.337252
+v 0.140844 0.802002 -0.341998
+v 0.086003 0.820458 -0.291304
+v 0.10501 0.817882 -0.310157
+v 0.0934554 0.817549 -0.318853
+v 0.100185 0.816042 -0.313185
+v 0.116614 0.840248 -0.308051
+v 0.127696 0.819206 -0.325202
+v 0.0961567 0.829666 -0.288835
+v 0.109848 0.825231 -0.308777
+v 0.153713 0.826966 -0.334024
+v 0.147191 0.8121 -0.335056
+v 0.0710179 0.824335 -0.270682
+v 0.0780919 0.822576 -0.248928
+v 0.084054 0.824638 -0.268864
+v 0.0786961 0.822795 -0.269513
+v 0.0899479 0.821575 -0.232115
+v 0.0751675 0.819879 -0.228529
+v 0.084018 0.819324 -0.230668
+v 0.102029 0.843892 -0.232989
+v 0.0888081 0.831892 -0.249223
+v 0.095203 0.828981 -0.232883
+v 0.0959818 0.847028 -0.267898
+v 0.0891348 0.832003 -0.268429
+v 0.208076 0.764415 -0.241789
+v 0.226075 0.759994 -0.240858
+v 0.231902 0.754223 -0.26947
+v 0.216972 0.759952 -0.252956
+v 0.20189 0.765574 -0.22074
+v 0.201854 0.770051 -0.234112
+v 0.215759 0.757673 -0.200462
+v 0.223444 0.760451 -0.217689
+v 0.205912 0.761992 -0.209879
+v 0.264144 0.760816 -0.243923
+v 0.265103 0.760852 -0.220189
+v 0.240714 0.760641 -0.229576
+v 0.278995 0.751255 -0.279238
+v 0.277858 0.727498 -0.319523
+v 0.234872 0.742799 -0.306286
+v 0.256224 0.744786 -0.293184
+v 0.311167 0.763685 -0.236173
+v 0.343965 0.756617 -0.269721
+v 0.300161 0.759231 -0.259113
+v 0.37536 0.738148 -0.30116
+v 0.402685 0.758643 -0.236201
+v 0.390758 0.753705 -0.27032
+v 0.317986 0.708047 -0.333577
+v 0.358205 0.712515 -0.324745
+v 0.258236 0.693552 -0.356883
+v 0.284726 0.698216 -0.343913
+v 0.183529 0.754714 -0.188935
+v 0.248651 0.737773 -0.18066
+v 0.277376 0.753251 -0.197126
+v 0.23327 0.750984 -0.191422
+v 0.329845 0.712932 -0.159268
+v 0.178779 0.718135 -0.16643
+v 0.252112 0.713887 -0.166078
+v 0.409003 0.736167 -0.174314
+v 0.39942 0.710994 -0.155057
+v 0.352617 0.756796 -0.208565
+v 0.409429 0.752424 -0.202781
+v 0.303456 0.759588 -0.213776
+v 0.0705006 0.811753 -0.205807
+v 0.0602661 0.797425 -0.177797
+v 0.106741 0.782082 -0.178969
+v 0.0845451 0.798015 -0.188581
+v 0 0.80352 -0.16013
+v 0.0359638 0.781745 -0.13792
+v 0.0258744 0.814456 -0.19275
+v 0.0313039 0.800824 -0.16616
+v 0.0771688 0.744134 -0.128582
+v 0 0.760454 -0.0953915
+v 0.0372542 0.755486 -0.106376
+v 0.128352 0.75368 -0.16956
+v 0.122404 0.729951 -0.151266
+v 0.140544 0.766364 -0.181936
+v 0.0269807 0.832763 -0.306956
+v 0.0647564 0.824008 -0.337021
+v 0.0438017 0.818098 -0.377412
+v 0 0.831312 -0.342444
+v 0.0352238 0.828719 -0.341387
+v 0.105446 0.812324 -0.352131
+v 0.0673729 0.8251 -0.299446
+v 0.0829065 0.820245 -0.328154
+v 0.163948 0.787613 -0.366372
+v 0.134148 0.794627 -0.38492
+v 0.134862 0.801108 -0.364702
+v 0.0968568 0.789376 -0.411772
+v 0.176513 0.760515 -0.410655
+v 0.140375 0.776566 -0.412207
+v 0 0.800708 -0.414889
+v 0.0494105 0.797681 -0.413491
+v 0.0431316 0.822381 -0.222498
+v 0.0209585 0.83068 -0.246717
+v 0 0.825505 -0.217769
+v 0.0222752 0.824379 -0.219341
+v 0.0571055 0.825864 -0.248034
+v 0.0611506 0.820889 -0.225684
+v 0.0427143 0.830406 -0.274631
+v 0.0589761 0.827121 -0.27268
+v 0 0.834904 -0.275945
+v 0.0223763 0.833445 -0.275661
+v 0.192333 0.761236 -0.378584
+v 0.220893 0.723503 -0.390265
+v 0.20182 0.742487 -0.402976
+v 0.202503 0.759013 -0.342238
+v 0.185497 0.772855 -0.357643
+v 0.229147 0.73046 -0.348619
+v 0.217964 0.748268 -0.323878
+v 0.238328 0.704586 -0.373617
+v 0.182638 0.844163 -0.332751
+v 0.20855 0.866089 -0.317157
+v 0.208154 0.830042 -0.298948
+v 0.197796 0.836731 -0.320106
+v 0.225435 0.891324 -0.293612
+v 0.202088 0.898609 -0.32219
+v 0.216036 0.894273 -0.311279
+v 0.221713 0.885753 -0.253374
+v 0.22169 0.856724 -0.273536
+v 0.227222 0.886884 -0.273083
+v 0.207473 0.823979 -0.249259
+v 0.212209 0.824662 -0.273398
+v 0.113654 0.869597 -0.287244
+v 0.140188 0.893495 -0.304854
+v 0.144889 0.85895 -0.323167
+v 0.127225 0.865094 -0.306986
+v 0.170407 0.915574 -0.314537
+v 0.141278 0.921251 -0.283964
+v 0.154011 0.919177 -0.300925
+v 0.17602 0.88052 -0.329499
+v 0.186655 0.907214 -0.322451
+v 0.164181 0.851772 -0.332764
+v 0.125921 0.862724 -0.222037
+v 0.126235 0.896792 -0.232757
+v 0.106278 0.87162 -0.248059
+v 0.112672 0.868658 -0.232446
+v 0.136706 0.926532 -0.249134
+v 0.152825 0.914652 -0.226568
+v 0.141818 0.921807 -0.235425
+v 0.120264 0.900007 -0.266157
+v 0.136018 0.925025 -0.266007
+v 0.106638 0.871852 -0.266963
+v 0.201725 0.863227 -0.234578
+v 0.169297 0.842653 -0.222278
+v 0.191458 0.831381 -0.232334
+v 0.191735 0.902282 -0.228209
+v 0.209229 0.892731 -0.238166
+v 0.158724 0.882351 -0.220155
+v 0.171199 0.908866 -0.224253
+v 0.146124 0.853579 -0.218747
+v 0.183641 -0.455489 0.534379
+v 0.20511 -0.461837 0.475502
+v 0.228365 -0.446181 0.420977
+v 0.21326 -0.450303 0.478539
+v 0.102981 -0.465723 0.632974
+v 0.142559 -0.466669 0.57469
+v 0.143787 -0.462243 0.587914
+v 0.132033 -0.481167 0.556781
+v 0.17086 -0.474631 0.521789
+v 0.109552 -0.472057 0.595485
+v 0.140849 -0.473458 0.563316
+v 0.206663 -0.472505 0.418626
+v 0.176881 -0.48852 0.469991
+v 0.194975 -0.474631 0.472428
+v 0.0439995 -0.453929 0.682063
+v 0.0755892 -0.465387 0.639018
+v 0.0705061 -0.461083 0.663389
+v 0 -0.448613 0.69461
+v 0.0238268 -0.46053 0.664171
+v 0.0210984 -0.44987 0.691901
+v 0.0329053 -0.483094 0.604968
+v 0.0549351 -0.471091 0.629642
+v 0 -0.470799 0.638539
+v 0.0276324 -0.470883 0.636565
+v 0.0917758 -0.479034 0.591505
+v 0.0815781 -0.471375 0.616771
+v 0.065741 -0.440111 0.119726
+v 0.0317646 -0.445352 0.134276
+v 0 -0.441491 0.0941719
+v 0.0323291 -0.440606 0.100743
+v 0.138015 -0.443895 0.192281
+v 0.0965198 -0.444834 0.175662
+v 0.101319 -0.442167 0.150389
+v 0.0981278 -0.457279 0.230677
+v 0.0636123 -0.450028 0.182161
+v 0.124102 -0.450719 0.228684
+v 0.0942201 -0.449501 0.201118
+v 0 -0.451738 0.166652
+v 0.034368 -0.461296 0.211229
+v 0.0320458 -0.45108 0.17059
+v 0.205661 -0.440923 0.30331
+v 0.163536 -0.447685 0.255959
+v 0.174785 -0.442414 0.24495
+v 0.213095 -0.454425 0.36305
+v 0.224677 -0.442616 0.362277
+v 0.178344 -0.481123 0.364052
+v 0.179537 -0.460826 0.31166
+v 0.199389 -0.4673 0.363772
+v 0.138385 -0.462583 0.271465
+v 0.153491 -0.454897 0.266081
+v 0 -0.501354 0.566258
+v 0.0472884 -0.515301 0.519274
+v 0.0774159 -0.49456 0.56248
+v 0.0400356 -0.499329 0.565268
+v 0.0552219 -0.529321 0.415644
+v 0.102374 -0.516827 0.468605
+v 0 -0.53078 0.469003
+v 0.0529288 -0.526727 0.468796
+v 0.12906 -0.497175 0.517019
+v 0.151776 -0.503865 0.416665
+v 0.144853 -0.503338 0.468859
+v 0.109485 -0.488354 0.558077
+v 0 -0.522771 0.361177
+v 0.046641 -0.500432 0.308431
+v 0.101727 -0.509066 0.362546
+v 0.0524328 -0.518796 0.361628
+v 0.0778956 -0.474931 0.261875
+v 0 -0.481524 0.256229
+v 0.0399262 -0.479506 0.257737
+v 0.12954 -0.482953 0.312862
+v 0.111951 -0.469275 0.268257
+v 0.144743 -0.495777 0.363499
+v 0.16267 -0.0297463 0.947597
+v 0.137897 -0.0298265 0.961061
+v 0.139042 -0.0451633 0.963659
+v 0.164631 -0.0504489 0.951176
+v 0.150007 -0.038161 0.957034
+v 0.109331 -0.0304408 0.974293
+v 0.122462 -0.037914 0.969963
+v 0.130968 -0.0135459 0.956851
+v 0.124172 -0.0273895 0.966259
+v 0.133714 -0.0642499 0.965416
+v 0.109851 -0.0516794 0.976283
+v 0.125245 -0.0487781 0.969619
+v 0.172782 -0.0812671 0.947651
+v 0.213918 -0.078022 0.922841
+v 0.18873 -0.0434748 0.935764
+v 0.185903 -0.064746 0.940486
+v 0.119758 -0.0911109 0.968067
+v 0.163882 -0.120989 0.949271
+v 0.149065 -0.0885975 0.957545
+v 0.209867 -0.161573 0.925244
+v 0.133001 -0.161124 0.956956
+v 0.170754 -0.160591 0.943861
+v 0.24743 -0.125194 0.898745
+v 0.2493 -0.167014 0.898651
+v 0.247517 -0.0516572 0.891815
+v 0.248772 -0.0872467 0.896118
+v 0.206641 -0.0168625 0.913183
+v 0.165953 -0.00373407 0.934179
+v 0.181563 -0.0218941 0.9332
+v 0.225597 0.0165132 0.878354
+v 0.237857 -0.0169093 0.886885
+v 0.188508 0.055577 0.881861
+v 0.216542 0.0481267 0.863247
+v 0.128713 0.0776757 0.9026
+v 0.155037 0.0342159 0.918365
+v 0.159167 0.0664178 0.894094
+v 0.116698 0.0146029 0.948102
+v 0.143447 0.00727528 0.940085
+v 0.0272441 0.00174637 0.985368
+v 0.0591053 0.0273947 0.96148
+v 0.0314129 0.062618 0.944549
+v 0 0.031249 0.967483
+v 0.0296124 0.0304081 0.965777
+v 0.0939443 0.0532189 0.93429
+v 0.0802969 -0.0053962 0.974961
+v 0.0883591 0.0217178 0.955411
+v 0.0652762 0.0925539 0.91594
+v 0.0973395 0.0863776 0.910036
+v 0 0.0974522 0.921309
+v 0.032753 0.0962353 0.919852
+v 0.0640997 -0.158632 0.974036
+v 0.0313918 -0.121887 0.983992
+v 0 -0.156323 0.979553
+v 0.0317497 -0.157085 0.978196
+v 0.0956523 -0.123707 0.972085
+v 0.0976501 -0.160225 0.966985
+v 0.0810019 -0.0733797 0.982855
+v 0.05977 -0.0942344 0.98419
+v 0.0898671 -0.0931811 0.977114
+v 0 -0.0930316 0.990478
+v 0.0273784 -0.0760818 0.993721
+v 0.0298425 -0.0936976 0.988793
+v 0.0955999 -0.0476973 0.985969
+v 0.0789269 -0.06284 0.994959
+v 0.0864529 -0.0640166 0.98579
+v 0.0940951 -0.0565412 0.98502
+v 0.0861068 -0.0306433 0.989153
+v 0.0788425 -0.0488271 1.00064
+v 0.0938769 -0.0385712 0.986828
+v 0.0525009 -0.0662129 1.0231
+v 0.0666347 -0.0644079 1.00864
+v 0.0598411 -0.0536198 1.01768
+v 0.0652608 -0.0582549 1.01251
+v 0.0598989 -0.0753391 1.00401
+v 0.0525276 -0.0775374 1.01483
+v 0.0652946 -0.0700523 1.00523
+v 0.0476184 -0.0223971 1.0006
+v 0.0582598 -0.0403976 1.0119
+v 0.0694706 -0.025394 0.994502
+v 0 -0.0210975 1.00705
+v 0.0209047 -0.039677 1.02317
+v 0.0242006 -0.0212264 1.00515
+v 0.0152462 -0.0676801 1.04602
+v 0.0344846 -0.0534302 1.03232
+v 0 -0.0555541 1.03946
+v 0.0177789 -0.0547726 1.03745
+v 0.0426567 -0.0627588 1.03251
+v 0.0490439 -0.0525287 1.02501
+v 0.0477856 -0.0718253 0.995571
+v 0.0209476 -0.0779737 1.00716
+v 0 -0.0724905 1.00177
+v 0.0242757 -0.0726037 0.999895
+v 0.0583871 -0.073576 0.998233
+v 0.0697639 -0.0690371 0.990088
+v 0.0427012 -0.0854758 1.01505
+v 0.0345351 -0.0845283 1.01121
+v 0.0491057 -0.0804193 1.00671
+v 0 -0.0876552 1.01738
+v 0.0152667 -0.0940994 1.02438
+v 0.0178069 -0.0869021 1.01538
+v 0.0271444 -0.0763309 1.04139
+v 0.0394909 -0.0796254 1.02906
+v 0.0399318 -0.0720589 1.03309
+v 0 -0.0809251 1.049
+v 0.0132272 -0.0889316 1.04223
+v 0.0137291 -0.0795664 1.04692
+v 0.0271637 -0.0905263 1.02914
+v 0 -0.0956202 1.03609
+v 0.01374 -0.0942759 1.03402
+v 0.0399551 -0.0846544 1.02277
+v -0.341299 -0.375944 0.366232
+v -0.317818 -0.392423 0.305485
+v -0.289638 -0.411984 0.363044
+v -0.313121 -0.40468 0.424303
+v -0.317288 -0.397295 0.364448
+v -0.312495 -0.42258 0.208415
+v -0.274846 -0.418754 0.252658
+v -0.350284 -0.364453 0.244783
+v -0.316601 -0.396416 0.250199
+v -0.194817 -0.437402 0.239358
+v -0.251544 -0.422435 0.302332
+v -0.2099 -0.457622 0.202318
+v -0.231211 -0.430966 0.245486
+v -0.261632 -0.427221 0.422701
+v -0.23935 -0.43199 0.361842
+v -0.262332 -0.422664 0.362137
+v -0.275025 -0.421949 0.482067
+v -0.291362 -0.413379 0.536815
+v -0.328162 -0.387696 0.483083
+v -0.303426 -0.408222 0.482265
+v -0.225399 -0.440193 0.480871
+v -0.222989 -0.439421 0.53835
+v -0.247502 -0.431674 0.481825
+v -0.164011 -0.483033 0.631406
+v -0.236564 -0.444228 0.584476
+v -0.156119 -0.458526 0.595789
+v -0.191143 -0.453861 0.591118
+v -0.315111 -0.397679 0.592146
+v -0.26979 -0.455493 0.624003
+v -0.280089 -0.425608 0.586435
+v -0.339519 -0.257182 -0.0802831
+v -0.283535 -0.198083 -0.11242
+v -0.23546 -0.269794 -0.12322
+v -0.300873 -0.335808 -0.11045
+v -0.292915 -0.260195 -0.110308
+v -0.26898 -0.103901 -0.112945
+v -0.224047 -0.163495 -0.142015
+v -0.318497 -0.140059 -0.0746981
+v -0.274851 -0.146856 -0.114171
+v -0.123113 -0.206388 -0.170045
+v -0.173106 -0.230478 -0.142628
+v -0.172014 -0.141551 -0.170002
+v -0.171621 -0.184999 -0.160037
+v -0.176383 -0.34915 -0.10631
+v -0.119478 -0.295776 -0.113872
+v -0.174532 -0.282877 -0.121531
+v -0.273281 0.0420733 -0.0438498
+v -0.232462 0.00608204 -0.106821
+v -0.269878 -0.0318881 -0.0928838
+v -0.307808 -0.00318692 -0.0317095
+v -0.272342 0.00331308 -0.0724007
+v -0.148915 0.00578811 -0.165362
+v -0.182883 -0.0457092 -0.157469
+v -0.199052 0.0623656 -0.105989
+v -0.190658 0.0074224 -0.137241
+v -0.222802 -0.0788504 -0.141507
+v -0.131964 -0.111136 -0.188613
+v -0.176218 -0.0952439 -0.168768
+v -0.308256 -0.0631453 -0.0619102
+v -0.26804 -0.0666021 -0.106124
+v -0.26887 -0.396352 0.909752
+v -0.214141 -0.334681 0.919903
+v -0.263105 -0.289222 0.900577
+v -0.30851 -0.340088 0.875405
+v -0.266995 -0.336923 0.904292
+v -0.104904 -0.328487 0.922781
+v -0.164801 -0.284207 0.93026
+v -0.149733 -0.3934 0.920339
+v -0.15673 -0.332639 0.924128
+v -0.172003 -0.202637 0.93974
+v -0.215216 -0.244729 0.920604
+v -0.126656 -0.240414 0.945024
+v -0.169955 -0.242358 0.935335
+v -0.293898 -0.252639 0.867139
+v -0.253262 -0.209214 0.897882
+v -0.258195 -0.248301 0.898482
+v -0.375987 -0.340814 -0.0337268
+v -0.380901 -0.277243 0.032671
+v -0.358457 -0.200112 -0.0267162
+v -0.367895 -0.263853 -0.0306343
+v -0.375596 -0.311145 0.175939
+v -0.382253 -0.237542 0.105277
+v -0.379176 -0.364086 0.102343
+v -0.381399 -0.294389 0.103665
+v -0.379025 -0.149836 0.112764
+v -0.369603 -0.166291 0.0406057
+v -0.385185 -0.214712 0.178025
+v -0.38088 -0.190046 0.108528
+v -0.342627 -0.106257 -0.015664
+v -0.349444 -0.148083 -0.0217878
+v -0.351404 -0.354331 0.427814
+v -0.367965 -0.307651 0.370431
+v -0.363729 -0.333817 0.306674
+v -0.35769 -0.345276 0.36828
+v -0.376188 -0.22098 0.374406
+v -0.380079 -0.252636 0.309953
+v -0.366553 -0.273812 0.432936
+v -0.373629 -0.265433 0.372527
+v -0.379525 -0.278609 0.243679
+v -0.386398 -0.194374 0.247084
+v -0.384425 -0.235652 0.245143
+v -0.369702 -0.323364 0.243084
+v -0.359143 -0.0454376 0.384517
+v -0.376221 -0.0834201 0.319025
+v -0.375379 -0.132559 0.377674
+v -0.363531 -0.0923241 0.440867
+v -0.369754 -0.0887861 0.380331
+v -0.380073 -0.0641595 0.195141
+v -0.385994 -0.115397 0.252169
+v -0.369328 -0.0337485 0.265179
+v -0.380479 -0.0754649 0.257209
+v -0.383633 -0.167556 0.313214
+v -0.386429 -0.137312 0.183762
+v -0.387444 -0.154662 0.24911
+v -0.370145 -0.182621 0.437038
+v -0.377148 -0.176656 0.375911
+v -0.338487 -0.0443826 0.00235778
+v -0.357964 -0.0317511 0.0731655
+v -0.331012 0.0171674 0.0389716
+v -0.336366 -0.0157204 0.0175237
+v -0.363023 -0.0919062 0.0524717
+v -0.339774 -0.073126 -0.00816016
+v -0.37698 -0.0821329 0.123634
+v -0.378431 -0.114848 0.117331
+v -0.363023 -0.0117289 0.147082
+v -0.372551 -0.0487428 0.133083
+v -0.324728 -0.29766 0.82767
+v -0.340911 -0.350598 0.77666
+v -0.336842 -0.40362 0.835552
+v -0.331904 -0.344898 0.831349
+v -0.311718 -0.219243 0.822794
+v -0.331546 -0.262105 0.774295
+v -0.317327 -0.257308 0.824743
+v -0.344609 -0.271269 0.661246
+v -0.340676 -0.308781 0.717453
+v -0.338685 -0.226798 0.720092
+v -0.339619 -0.266826 0.718796
+v -0.335933 -0.414813 0.716014
+v -0.334716 -0.360298 0.654176
+v -0.339263 -0.356428 0.715872
+v -0.304296 -0.0566442 0.818942
+v -0.32815 -0.0982774 0.774453
+v -0.309753 -0.137557 0.821642
+v -0.282982 -0.0932074 0.86163
+v -0.309064 -0.0966912 0.82069
+v -0.349859 -0.0980743 0.670701
+v -0.342261 -0.141944 0.72292
+v -0.335097 -0.0554519 0.724795
+v -0.341372 -0.0985454 0.724072
+v -0.328063 -0.182474 0.773912
+v -0.348891 -0.186539 0.666645
+v -0.340465 -0.184932 0.72153
+v -0.284251 -0.173813 0.864211
+v -0.309915 -0.178867 0.822052
+v -0.364199 -0.140124 0.498352
+v -0.3576 -0.096426 0.55865
+v -0.350887 -0.0507559 0.502911
+v -0.360004 -0.0947951 0.500374
+v -0.363053 -0.232129 0.494204
+v -0.360088 -0.187609 0.554338
+v -0.364779 -0.186112 0.496433
+v -0.356689 -0.14249 0.613383
+v -0.35206 -0.232064 0.608144
+v -0.355112 -0.187591 0.610995
+v -0.34723 -0.0531297 0.617509
+v -0.354744 -0.0974434 0.615492
+v -0.338808 -0.358381 0.541229
+v -0.354812 -0.320109 0.488079
+v -0.344689 -0.357576 0.485189
+v -0.344841 -0.317709 0.600763
+v -0.335024 -0.360119 0.596759
+v -0.354746 -0.277669 0.548235
+v -0.349578 -0.27523 0.604646
+v -0.360332 -0.277544 0.491251
+v -0.270021 0.183796 0.151405
+v -0.250535 0.219935 0.200112
+v -0.22988 0.247545 0.157829
+v -0.253104 0.203435 0.118078
+v -0.251096 0.217673 0.158942
+v -0.247672 0.201215 0.285394
+v -0.224342 0.248556 0.249012
+v -0.273837 0.172969 0.230557
+v -0.249328 0.213762 0.242242
+v -0.166122 0.2964 0.255682
+v -0.201028 0.279793 0.204606
+v -0.19325 0.264928 0.298974
+v -0.197174 0.276415 0.252836
+v -0.208445 0.255016 0.0997486
+v -0.173937 0.295795 0.151795
+v -0.204742 0.274117 0.153286
+v -0.238934 0.137098 0.472953
+v -0.21248 0.178956 0.429644
+v -0.243641 0.166107 0.375362
+v -0.269047 0.115689 0.415447
+v -0.241351 0.149381 0.423
+v -0.149393 0.222637 0.439843
+v -0.185529 0.224498 0.389084
+v -0.178799 0.187723 0.48375
+v -0.181979 0.203634 0.435289
+v -0.218341 0.218477 0.337805
+v -0.157504 0.266542 0.348704
+v -0.189327 0.246276 0.344018
+v -0.272811 0.14513 0.319056
+v -0.245762 0.184358 0.32963
+v -0.212012 0.0755801 0.841201
+v -0.188626 0.112308 0.821548
+v -0.217701 0.11212 0.775527
+v -0.235856 0.0765799 0.805621
+v -0.213324 0.0965225 0.811853
+v -0.13257 0.138159 0.837992
+v -0.163968 0.1484 0.789706
+v -0.159999 0.0987976 0.865373
+v -0.161693 0.126272 0.830304
+v -0.169138 0.174847 0.695193
+v -0.195015 0.146226 0.738197
+v -0.136442 0.179619 0.750387
+v -0.166543 0.164739 0.744396
+v -0.249108 0.0961373 0.728207
+v -0.226814 0.130295 0.684497
+v -0.222367 0.12354 0.732548
+v -0.264402 0.101968 0.520397
+v -0.233686 0.131486 0.579487
+v -0.207042 0.158534 0.530777
+v -0.236433 0.132176 0.525634
+v -0.201599 0.157166 0.637907
+v -0.258575 0.102323 0.628487
+v -0.230534 0.131902 0.632959
+v -0.139858 0.195421 0.647569
+v -0.173735 0.178964 0.589175
+v -0.171473 0.178284 0.642915
+v -0.143538 0.198754 0.539805
+v -0.176115 0.180806 0.535582
+v -0.337836 -0.00655256 0.448549
+v -0.320929 0.0389878 0.398635
+v -0.34742 0.00319315 0.333602
+v -0.342419 -0.00261151 0.390867
+v -0.298744 0.0894187 0.355449
+v -0.293232 0.0711038 0.460405
+v -0.296022 0.0786559 0.407077
+v -0.30121 0.115034 0.257783
+v -0.327429 0.0566863 0.291075
+v -0.300703 0.102007 0.305644
+v -0.351215 0.0204208 0.221415
+v -0.350974 0.0108683 0.277027
+v -0.308985 -0.0158364 0.772679
+v -0.30029 0.0266191 0.725054
+v -0.328122 -0.0112347 0.673608
+v -0.320738 -0.0133772 0.724898
+v -0.268509 -0.018423 0.854481
+v -0.274888 0.0183198 0.812257
+v -0.291897 -0.0177918 0.816142
+v -0.267341 0.0573644 0.769829
+v -0.24017 0.0457536 0.838346
+v -0.256292 0.0501447 0.807253
+v -0.281859 0.0674534 0.67711
+v -0.275748 0.0634772 0.725933
+v -0.239418 0.102195 -0.0351077
+v -0.213035 0.170022 -0.00953204
+v -0.181753 0.135303 -0.0848536
+v -0.20757 0.117335 -0.0616182
+v -0.298292 0.0729821 0.0271766
+v -0.265278 0.133372 0.0353559
+v -0.270541 0.0874645 -0.00640695
+v -0.236523 0.195781 0.0609139
+v -0.276588 0.155574 0.0981021
+v -0.258648 0.173682 0.0768666
+v -0.190293 0.237055 0.0258931
+v -0.212268 0.218116 0.0448664
+v -0.288873 0.0678482 0.57056
+v -0.311113 0.0304905 0.622016
+v -0.286019 0.0682635 0.624906
+v -0.315185 0.0311388 0.510574
+v -0.291063 0.0681481 0.515313
+v -0.334099 -0.00961306 0.563635
+v -0.335537 -0.00863692 0.506376
+v -0.332102 -0.0102295 0.619617
+v -0.324423 0.0791082 0.189745
+v -0.33595 0.0447295 0.115485
+v -0.346277 0.0318576 0.16704
+v -0.295396 0.137443 0.169109
+v -0.299573 0.127115 0.211987
+v -0.305188 0.103086 0.100412
+v -0.288286 0.145212 0.129998
+v -0.320025 0.0585873 0.0683354
+v -0.177314 0.204818 -0.0510199
+v -0.167034 0.18223 -0.122798
+v -0.149034 0.119255 -0.152767
+v -0.168725 0.158519 -0.103729
+v -0.159665 0.285267 0.0452783
+v -0.167426 0.259011 -0.0241111
+v -0.175009 0.248965 0.00116269
+v -0.17918 0.290497 -0.0609743
+v -0.177164 0.238112 -0.093383
+v -0.149721 0.3008 -0.00470029
+v -0.168557 0.27236 -0.0447148
+v -0.160058 0.168229 -0.195838
+v -0.194921 0.221357 -0.173014
+v -0.175228 0.201681 -0.146613
+v -0.21819 0.281995 -0.133552
+v -0.26422 0.274293 -0.228207
+v -0.233098 0.21084 -0.267301
+v -0.227725 0.245742 -0.199837
+v -0.173263 0.341693 -0.0238379
+v -0.225521 0.344757 -0.0845003
+v -0.200073 0.314908 -0.0732157
+v -0.267222 0.417414 -0.116444
+v -0.27622 0.344814 -0.176785
+v -0.21453 0.407775 -0.0317089
+v -0.249809 0.379204 -0.0978893
+v -0.306638 0.268641 -0.338166
+v -0.314639 0.343639 -0.292523
+v -0.294985 0.306469 -0.259249
+v -0.279797 0.207117 -0.366981
+v -0.240251 0.15911 -0.387649
+v -0.192666 0.159829 -0.308601
+v -0.238726 0.181983 -0.331175
+v -0.330786 0.26947 -0.445487
+v -0.307399 0.208213 -0.46356
+v -0.31171 0.235804 -0.406418
+v -0.259412 0.1698 -0.542722
+v -0.265715 0.160342 -0.469212
+v -0.305787 0.218563 -0.547441
+v -0.290901 0.186126 -0.509143
+v -0.195451 0.128497 -0.407372
+v -0.213461 0.12942 -0.465108
+v -0.233311 0.142158 -0.432916
+v -0.329541 0.345256 -0.410956
+v -0.321008 0.352655 -0.511417
+v -0.32675 0.280027 -0.535933
+v -0.333345 0.309542 -0.480189
+v -0.255976 0.429549 -0.563622
+v -0.289278 0.358786 -0.584918
+v -0.295245 0.436762 -0.484187
+v -0.295399 0.395443 -0.540067
+v -0.246645 0.312827 -0.640337
+v -0.296456 0.295786 -0.60247
+v -0.240548 0.35766 -0.62796
+v -0.274305 0.331026 -0.617491
+v -0.272516 0.239582 -0.609537
+v -0.307295 0.256707 -0.578835
+v -0.288912 -0.666279 -0.0831981
+v -0.233667 -0.718383 -0.0394682
+v -0.288313 -0.71953 0.0292002
+v -0.335047 -0.66747 -0.026194
+v -0.288523 -0.704927 -0.0329901
+v -0.136552 -0.67511 -0.0280042
+v -0.172796 -0.723594 0.0247169
+v -0.187443 -0.669698 -0.0818819
+v -0.179633 -0.708233 -0.0350267
+v -0.182766 -0.687312 0.135767
+v -0.227442 -0.724741 0.0933852
+v -0.136631 -0.686121 0.0787634
+v -0.176191 -0.716151 0.0857864
+v -0.327044 -0.674066 0.0872825
+v -0.276152 -0.679477 0.14116
+v -0.281161 -0.710704 0.0910758
+v -0.288217 -0.679435 0.838379
+v -0.248642 -0.724519 0.790895
+v -0.198896 -0.723335 0.85193
+v -0.2461 -0.678205 0.889508
+v -0.246831 -0.711912 0.845432
+v -0.24088 -0.688682 0.691628
+v -0.198408 -0.728532 0.734591
+v -0.283587 -0.684515 0.738785
+v -0.244434 -0.716483 0.736358
+v -0.115859 -0.693483 0.748115
+v -0.148661 -0.727347 0.795626
+v -0.157976 -0.694713 0.696986
+v -0.153047 -0.720555 0.741401
+v -0.15574 -0.680004 0.892121
+v -0.113033 -0.684171 0.844965
+v -0.151829 -0.713932 0.849146
+v -0.30391 -0.498066 0.668956
+v -0.32282 -0.565952 0.726198
+v -0.342489 -0.483419 0.78141
+v -0.331237 -0.488983 0.719836
+v -0.319732 -0.633454 0.786428
+v -0.28283 -0.632563 0.691009
+v -0.308326 -0.632734 0.733956
+v -0.294197 -0.618974 0.884469
+v -0.331331 -0.556659 0.842236
+v -0.315639 -0.625673 0.839496
+v -0.31354 -0.474577 0.887895
+v -0.337524 -0.478425 0.840051
+v -0.0969194 -0.478778 0.897495
+v -0.0774719 -0.570173 0.85133
+v -0.0654567 -0.515878 0.79543
+v -0.0634711 -0.431765 0.851899
+v -0.0707234 -0.496321 0.852392
+v -0.0840444 -0.64607 0.797118
+v -0.112805 -0.624088 0.888986
+v -0.0899111 -0.634521 0.847274
+v -0.119063 -0.647679 0.702012
+v -0.0832655 -0.591144 0.740087
+v -0.0943475 -0.648249 0.746058
+v -0.069956 -0.47422 0.735292
+v -0.106684 -0.534636 0.682874
+v -0.0773661 -0.528459 0.735056
+v -0.208287 -0.52835 0.637223
+v -0.252371 -0.581833 0.652832
+v -0.260651 -0.513618 0.642398
+v -0.15335 -0.595644 0.66033
+v -0.153645 -0.534978 0.650489
+v -0.20034 -0.651356 0.660475
+v -0.156436 -0.650983 0.672896
+v -0.244653 -0.641994 0.666514
+v -0.209392 -0.47177 0.928314
+v -0.149543 -0.554429 0.922812
+v -0.147798 -0.472236 0.922294
+v -0.262868 -0.552296 0.920108
+v -0.267734 -0.472455 0.917082
+v -0.203436 -0.628169 0.923071
+v -0.253551 -0.622939 0.912658
+v -0.153584 -0.625261 0.914716
+v -0.387436 -0.440321 0.0296682
+v -0.369537 -0.538419 0.0961211
+v -0.348561 -0.464054 0.158716
+v -0.376444 -0.45013 0.0999599
+v -0.354445 -0.427851 -0.0878125
+v -0.378455 -0.525308 -0.0356201
+v -0.380964 -0.432504 -0.0361787
+v -0.368236 -0.614976 0.0306267
+v -0.340742 -0.597899 -0.0798229
+v -0.364087 -0.605614 -0.0296813
+v -0.325558 -0.615423 0.139592
+v -0.354786 -0.614851 0.0904298
+v -0.12704 -0.448099 -0.0735785
+v -0.182701 -0.527846 -0.109937
+v -0.243518 -0.430642 -0.121062
+v -0.179139 -0.436248 -0.106528
+v -0.241923 -0.609859 -0.117359
+v -0.141316 -0.607217 -0.0730372
+v -0.186967 -0.607615 -0.104287
+v -0.304443 -0.521287 -0.117606
+v -0.296601 -0.602967 -0.109127
+v -0.305291 -0.427537 -0.115465
+v -0.25113 -0.502745 0.197094
+v -0.188536 -0.577394 0.174934
+v -0.148115 -0.512563 0.149929
+v -0.196177 -0.511769 0.183546
+v -0.293784 -0.559568 0.181063
+v -0.304357 -0.484216 0.189956
+v -0.233064 -0.638226 0.175543
+v -0.282371 -0.626877 0.167977
+v -0.145237 -0.635178 0.13052
+v -0.185474 -0.638484 0.162245
+v -0.0953701 -0.405611 -0.0214782
+v -0.0997946 -0.492468 0.0352712
+v -0.105712 -0.548895 -0.0268432
+v -0.102107 -0.469936 -0.0239275
+v -0.113288 -0.573273 0.0910993
+v -0.112515 -0.451919 0.0935688
+v -0.115559 -0.506404 0.0968639
+v -0.106751 -0.633108 0.0275567
+v -0.117988 -0.635837 0.0828
+v -0.114552 -0.619641 -0.0267392
+v -0.308658 0.43336 -0.368305
+v -0.285353 0.520853 -0.361551
+v -0.271184 0.525759 -0.460079
+v -0.291391 0.478507 -0.418527
+v -0.298509 0.427522 -0.236282
+v -0.317802 0.38717 -0.32759
+v -0.235335 0.485531 -0.0647615
+v -0.271214 0.498067 -0.167977
+v -0.272045 0.45855 -0.143225
+v -0.277875 0.55689 -0.190965
+v -0.282577 0.535883 -0.259681
+v -0.247008 0.54474 -0.114114
+v -0.271665 0.531425 -0.180446
+v -0.267 0.601931 -0.388185
+v -0.278055 0.600765 -0.320108
+v -0.278645 0.563976 -0.33172
+v -0.252363 0.682167 -0.15033
+v -0.326452 0.645309 -0.138504
+v -0.267679 0.610284 -0.129549
+v -0.192626 0.647576 -0.125774
+v -0.258116 0.645451 -0.13607
+v -0.430349 0.658277 -0.154382
+v -0.380321 0.61857 -0.152413
+v -0.388682 0.680406 -0.144893
+v -0.384792 0.647908 -0.143704
+v -0.35448 0.589213 -0.202564
+v -0.324346 0.587714 -0.15672
+v -0.401583 0.612716 -0.19176
+v -0.367842 0.597467 -0.17194
+v -0.239082 0.578957 -0.111996
+v -0.289471 0.569074 -0.162469
+v -0.279361 0.583208 -0.137022
+v -0.318359 0.584666 -0.230094
+v -0.294439 0.591849 -0.266039
+v -0.294318 0.572729 -0.209871
+v -0.380591 0.614582 -0.245535
+v -0.343967 0.620068 -0.286887
+v -0.347363 0.598425 -0.244565
+v -0.345079 0.681327 -0.333156
+v -0.312606 0.638668 -0.319124
+v -0.375881 0.662505 -0.310821
+v -0.34177 0.649107 -0.318477
+v -0.28588 0.662049 -0.349089
+v -0.290372 0.624108 -0.317788
+v -0.419572 0.658706 -0.261892
+v -0.444197 0.711394 -0.263256
+v -0.393743 0.706765 -0.3123
+v -0.412958 0.685943 -0.291128
+v -0.447289 0.655956 -0.196803
+v -0.417303 0.633713 -0.225256
+v -0.447397 0.702392 -0.15815
+v -0.471552 0.708579 -0.199184
+v -0.460337 0.681448 -0.17325
+v -0.43241 0.746455 -0.258303
+v -0.452589 0.744444 -0.206816
+v -0.460792 0.730831 -0.231063
+v -0.0467382 0.325284 0.136739
+v -0.0925539 0.320139 0.170699
+v -0.0462436 0.320164 0.221337
+v -0.0468991 0.323707 0.17766
+v -0.132936 0.312061 0.210485
+v -0.135299 0.30832 0.112598
+v -0.13572 0.310986 0.159838
+v -0.125628 0.296564 0.306829
+v -0.0886491 0.312396 0.263435
+v -0.129481 0.307575 0.259519
+v -0.0434892 0.30185 0.311934
+v -0.0450231 0.313323 0.266515
+v -0.0377827 0.232065 0.495188
+v -0.0735327 0.221293 0.545706
+v -0.0364517 0.2252 0.601479
+v -0.0369868 0.226526 0.54726
+v -0.107651 0.210086 0.597079
+v -0.111325 0.219111 0.491389
+v -0.109197 0.212143 0.543201
+v -0.105384 0.203917 0.704755
+v -0.0717153 0.218255 0.65442
+v -0.106456 0.208743 0.651454
+v -0.0357781 0.218679 0.710249
+v -0.0360809 0.22396 0.656319
+v -0.0703592 0.199987 0.759265
+v -0.035033 0.18494 0.809623
+v -0.0354469 0.205232 0.761742
+v -0.10286 0.172311 0.802347
+v -0.104202 0.191409 0.755412
+v -0.0994805 0.118705 0.880737
+v -0.0684286 0.154761 0.849486
+v -0.101303 0.147713 0.844481
+v -0.0337403 0.129119 0.889852
+v -0.0344822 0.159128 0.852718
+v -0.032544 -0.433336 0.0684456
+v -0.0619855 -0.419454 0.0516405
+v -0.0290027 -0.401336 0.00260959
+v -0.0312142 -0.420036 0.03584
+v -0.0804386 -0.407577 0.025012
+v -0.101981 -0.43742 0.118873
+v -0.0918712 -0.426509 0.0746889
+v -0.0781392 -0.343402 -0.0680291
+v -0.0524989 -0.374486 -0.0268658
+v -0.0771327 -0.378765 -0.022981
+v -0.024587 -0.350255 -0.0669927
+v -0.0265726 -0.377866 -0.0315624
+v -0.0173499 -0.340897 0.891348
+v -0.032518 -0.377955 0.851217
+v -0.0162142 -0.396858 0.808469
+v -0.0161103 -0.370733 0.852319
+v -0.0473693 -0.423079 0.799815
+v -0.0576974 -0.360469 0.893822
+v -0.0495204 -0.395304 0.851292
+v -0.0641499 -0.454693 0.696538
+v -0.0352263 -0.426311 0.757202
+v -0.0543417 -0.44245 0.745122
+v -0.0190571 -0.436735 0.723869
+v -0.0173127 -0.418962 0.764189
+v -0.08278 0.283223 0.354838
+v -0.0403407 0.264584 0.4009
+v -0.0418935 0.284411 0.356338
+v -0.117787 0.25622 0.397308
+v -0.121652 0.278062 0.352302
+v -0.0772361 0.242537 0.445498
+v -0.114267 0.235186 0.443214
+v -0.0389355 0.245943 0.446792
+v -0.0448664 0.351036 0.0368917
+v -0.084777 0.331134 0.0392602
+v -0.0441574 0.329119 0.0693112
+v -0.0436281 0.336529 0.047541
+v -0.122579 0.311429 0.0436831
+v -0.125569 0.33518 0.0110101
+v -0.120968 0.319807 0.0225689
+v -0.0891709 0.320583 0.0906503
+v -0.129137 0.308034 0.0737864
+v -0.0455092 0.326229 0.0998335
+v -0.0632284 0.496693 0.0522852
+v -0.116503 0.442256 0.0399308
+v -0.0541098 0.40816 0.0483714
+v -0.0593708 0.448955 0.0551193
+v -0.153438 0.391277 0.0121446
+v -0.18104 0.470039 0.00485434
+v -0.169159 0.42866 0.0126096
+v -0.0954346 0.370179 0.029827
+v -0.13765 0.35956 0.0095734
+v -0.0488177 0.375217 0.039732
+v -0.0692761 0.505582 -0.562042
+v -0.130136 0.568004 -0.528764
+v -0.0626485 0.640189 -0.506046
+v -0.0664738 0.570216 -0.533525
+v -0.232047 0.565216 -0.501918
+v -0.174025 0.632377 -0.495223
+v -0.199187 0.49892 -0.545668
+v -0.188173 0.564066 -0.51907
+v -0.150533 0.744043 -0.44196
+v -0.113201 0.701726 -0.474505
+v -0.199905 0.685497 -0.462496
+v -0.161614 0.694175 -0.469576
+v -0.0536794 0.762368 -0.447664
+v -0.0582379 0.707055 -0.477971
+v -0.0726958 0.340957 -0.664799
+v -0.138282 0.375681 -0.645424
+v -0.0711811 0.413275 -0.623292
+v -0.0716465 0.376815 -0.648416
+v -0.199873 0.404748 -0.612935
+v -0.193634 0.339499 -0.652866
+v -0.194895 0.370921 -0.637938
+v -0.138821 0.452917 -0.589054
+v -0.202195 0.446614 -0.579567
+v -0.0706181 0.454733 -0.593233
+v -0.0306399 -0.195728 0.970285
+v -0.057123 -0.235922 0.956621
+v -0.0240468 -0.271835 0.943264
+v -0.0277858 -0.234252 0.959139
+v -0.0802769 -0.277764 0.939515
+v -0.0956803 -0.199455 0.960676
+v -0.0895627 -0.238122 0.952018
+v -0.0425527 -0.311463 0.921053
+v -0.0688023 -0.319918 0.921674
+v -0.0202817 -0.307657 0.921165
+v -0.049547 -0.311956 -0.105817
+v -0.0238367 -0.283103 -0.139843
+v -0.023709 -0.319134 -0.103998
+v -0.0816243 -0.264189 -0.144485
+v -0.0796433 -0.304819 -0.108871
+v -0.0878074 -0.176513 -0.193941
+v -0.0521008 -0.23408 -0.173683
+v -0.0840611 -0.222685 -0.173847
+v -0.0267024 -0.190718 -0.197111
+v -0.0248684 -0.240765 -0.171792
+v -0.0599751 -0.128388 -0.209467
+v -0.0321871 -0.0691596 -0.222731
+v -0.0292367 -0.131565 -0.213064
+v -0.101087 -0.0620719 -0.200364
+v -0.0937169 -0.121882 -0.201748
+v -0.115674 0.055105 -0.187883
+v -0.0713902 -0.00682142 -0.215572
+v -0.109216 -0.00036684 -0.192884
+v -0.0381981 0.0419961 -0.235526
+v -0.035269 -0.00935459 -0.229192
+v -0.0803711 0.0844567 -0.225271
+v -0.0428926 0.103386 -0.257977
+v -0.0406901 0.0790388 -0.244814
+v -0.120922 0.124855 -0.209363
+v -0.118034 0.096215 -0.193935
+v -0.138478 0.148595 -0.265069
+v -0.0884331 0.12573 -0.258533
+v -0.128964 0.142918 -0.232488
+v -0.0470212 0.120443 -0.299626
+v -0.0449537 0.11665 -0.27594
+v -0.097999 0.124878 -0.319976
+v -0.0518947 0.108853 -0.365753
+v -0.0492429 0.116377 -0.329956
+v -0.150911 0.126322 -0.356271
+v -0.145782 0.140078 -0.308863
+v -0.16045 0.112915 -0.441079
+v -0.107541 0.107031 -0.401861
+v -0.153901 0.116281 -0.399697
+v -0.0595919 0.101024 -0.449026
+v -0.0552525 0.102268 -0.405836
+v -0.125551 0.112279 -0.490196
+v -0.0709982 0.126709 -0.538642
+v -0.065189 0.10952 -0.494143
+v -0.194944 0.134806 -0.523346
+v -0.176258 0.119186 -0.482359
+v -0.219301 0.191387 -0.600189
+v -0.1475 0.153854 -0.575332
+v -0.210129 0.15949 -0.56385
+v -0.0790718 0.182988 -0.615599
+v -0.0759743 0.151547 -0.579976
+v -0.154044 0.222564 -0.638941
+v -0.0775425 0.260206 -0.661002
+v -0.0792455 0.219987 -0.642965
+v -0.213772 0.26767 -0.647803
+v -0.21995 0.228648 -0.628685
+v -0.144378 0.302534 -0.665208
+v -0.202463 0.304851 -0.656009
+v -0.0750102 0.301309 -0.668637
+v -0.0606884 0.60785 0.00104436
+v -0.126715 0.537337 0.0111468
+v -0.0643107 0.550463 0.0321785
+v -0.0403517 0.7183 -0.0719016
+v -0.0987466 0.656786 -0.062447
+v -0.0504326 0.666442 -0.034872
+v -0.171165 0.605799 -0.0764289
+v -0.126096 0.69557 -0.127071
+v -0.142824 0.650926 -0.0969906
+v -0.191637 0.550233 -0.0477524
+v -0.209701 0.569968 -0.0807895
+v -0.185308 0.513743 -0.0172354
+v -0.258514 0.602197 -0.445199
+v -0.237614 0.630468 -0.469873
+v -0.253217 0.578269 -0.474783
+v -0.246529 0.67248 -0.397952
+v -0.240943 0.662224 -0.43894
+v -0.254771 0.635976 -0.4207
+v -0.212896 0.714905 -0.428435
+v -0.224501 0.676789 -0.452578
+v -0.214224 0.916672 -0.246046
+v -0.201014 0.943338 -0.25145
+v -0.21544 0.938681 -0.279423
+v -0.226436 0.912723 -0.26973
+v -0.216965 0.931825 -0.261167
+v -0.159856 0.939686 -0.239651
+v -0.17745 0.953646 -0.260405
+v -0.179801 0.930945 -0.23207
+v -0.180783 0.946411 -0.244635
+v -0.165912 0.939303 -0.292739
+v -0.191876 0.948989 -0.288378
+v -0.1537 0.943252 -0.269054
+v -0.173109 0.951035 -0.277835
+v -0.216862 0.918936 -0.303965
+v -0.196917 0.927676 -0.311546
+v -0.207634 0.937732 -0.29671
+v -0.209996 0.761876 -0.268358
+v -0.199084 0.77337 -0.281405
+v -0.191438 0.774734 -0.312022
+v -0.207814 0.759869 -0.30299
+v -0.201734 0.767127 -0.28955
+v -0.203233 0.797472 -0.273932
+v -0.192387 0.789092 -0.301842
+v -0.202175 0.775482 -0.251997
+v -0.199221 0.78193 -0.2764
+v -0.165819 0.804418 -0.335288
+v -0.178221 0.788374 -0.325575
+v -0.18742 0.811743 -0.321532
+v -0.181068 0.796668 -0.322844
+v -0.180956 0.7796 -0.34144
+v -0.161743 0.793141 -0.342758
+v -0.178357 0.783728 -0.331262
+v -0.192516 0.777938 -0.227759
+v -0.168503 0.781447 -0.210541
+v -0.151415 0.801648 -0.215482
+v -0.181336 0.804317 -0.228606
+v -0.174268 0.789156 -0.220565
+v -0.153638 0.772539 -0.191376
+v -0.137434 0.789176 -0.20042
+v -0.186175 0.766882 -0.204739
+v -0.162292 0.776737 -0.200861
+v -0.0963023 0.812563 -0.21585
+v -0.12229 0.805309 -0.211386
+v -0.103071 0.800257 -0.198013
+v -0.114773 0.801816 -0.205592
+v -0.135327 0.828572 -0.217894
+v -0.108345 0.822919 -0.221075
+v -0.128262 0.813355 -0.215464
+v -0.143329 0.804627 -0.337252
+v -0.119159 0.809804 -0.331351
+v -0.13845 0.801677 -0.350685
+v -0.140844 0.802002 -0.341998
+v -0.153713 0.826966 -0.334024
+v -0.127696 0.819206 -0.325202
+v -0.147191 0.8121 -0.335056
+v -0.0961567 0.829666 -0.288835
+v -0.10501 0.817882 -0.310157
+v -0.116614 0.840248 -0.308051
+v -0.109848 0.825231 -0.308777
+v -0.0934554 0.817549 -0.318853
+v -0.086003 0.820458 -0.291304
+v -0.100185 0.816042 -0.313185
+v -0.084054 0.824638 -0.268864
+v -0.0780919 0.822576 -0.248928
+v -0.0710179 0.824335 -0.270682
+v -0.0786961 0.822795 -0.269513
+v -0.0959818 0.847028 -0.267898
+v -0.0888081 0.831892 -0.249223
+v -0.0891348 0.832003 -0.268429
+v -0.0899479 0.821575 -0.232115
+v -0.102029 0.843892 -0.232989
+v -0.095203 0.828981 -0.232883
+v -0.0751675 0.819879 -0.228529
+v -0.084018 0.819324 -0.230668
+v -0.231902 0.754223 -0.26947
+v -0.226075 0.759994 -0.240858
+v -0.208076 0.764415 -0.241789
+v -0.216972 0.759952 -0.252956
+v -0.265103 0.760852 -0.220189
+v -0.223444 0.760451 -0.217689
+v -0.264144 0.760816 -0.243923
+v -0.240714 0.760641 -0.229576
+v -0.20189 0.765574 -0.22074
+v -0.215759 0.757673 -0.200462
+v -0.205912 0.761992 -0.209879
+v -0.201854 0.770051 -0.234112
+v -0.234872 0.742799 -0.306286
+v -0.277858 0.727498 -0.319523
+v -0.278995 0.751255 -0.279238
+v -0.256224 0.744786 -0.293184
+v -0.317986 0.708047 -0.333577
+v -0.258236 0.693552 -0.356883
+v -0.284726 0.698216 -0.343913
+v -0.37536 0.738148 -0.30116
+v -0.358205 0.712515 -0.324745
+v -0.402685 0.758643 -0.236201
+v -0.343965 0.756617 -0.269721
+v -0.390758 0.753705 -0.27032
+v -0.311167 0.763685 -0.236173
+v -0.300161 0.759231 -0.259113
+v -0.277376 0.753251 -0.197126
+v -0.248651 0.737773 -0.18066
+v -0.183529 0.754714 -0.188935
+v -0.23327 0.750984 -0.191422
+v -0.352617 0.756796 -0.208565
+v -0.303456 0.759588 -0.213776
+v -0.409003 0.736167 -0.174314
+v -0.409429 0.752424 -0.202781
+v -0.329845 0.712932 -0.159268
+v -0.39942 0.710994 -0.155057
+v -0.178779 0.718135 -0.16643
+v -0.252112 0.713887 -0.166078
+v -0.106741 0.782082 -0.178969
+v -0.0602661 0.797425 -0.177797
+v -0.0705006 0.811753 -0.205807
+v -0.0845451 0.798015 -0.188581
+v -0.128352 0.75368 -0.16956
+v -0.140544 0.766364 -0.181936
+v -0.0771688 0.744134 -0.128582
+v -0.122404 0.729951 -0.151266
+v -0.0359638 0.781745 -0.13792
+v -0.0372542 0.755486 -0.106376
+v -0.0258744 0.814456 -0.19275
+v -0.0313039 0.800824 -0.16616
+v -0.0438017 0.818098 -0.377412
+v -0.0647564 0.824008 -0.337021
+v -0.0269807 0.832763 -0.306956
+v -0.0352238 0.828719 -0.341387
+v -0.0968568 0.789376 -0.411772
+v -0.0494105 0.797681 -0.413491
+v -0.176513 0.760515 -0.410655
+v -0.134148 0.794627 -0.38492
+v -0.140375 0.776566 -0.412207
+v -0.105446 0.812324 -0.352131
+v -0.163948 0.787613 -0.366372
+v -0.134862 0.801108 -0.364702
+v -0.0673729 0.8251 -0.299446
+v -0.0829065 0.820245 -0.328154
+v -0.0209585 0.83068 -0.246717
+v -0.0431316 0.822381 -0.222498
+v -0.0222752 0.824379 -0.219341
+v -0.0427143 0.830406 -0.274631
+v -0.0223763 0.833445 -0.275661
+v -0.0571055 0.825864 -0.248034
+v -0.0589761 0.827121 -0.27268
+v -0.0611506 0.820889 -0.225684
+v -0.220893 0.723503 -0.390265
+v -0.192333 0.761236 -0.378584
+v -0.20182 0.742487 -0.402976
+v -0.229147 0.73046 -0.348619
+v -0.238328 0.704586 -0.373617
+v -0.202503 0.759013 -0.342238
+v -0.217964 0.748268 -0.323878
+v -0.185497 0.772855 -0.357643
+v -0.208154 0.830042 -0.298948
+v -0.20855 0.866089 -0.317157
+v -0.182638 0.844163 -0.332751
+v -0.197796 0.836731 -0.320106
+v -0.207473 0.823979 -0.249259
+v -0.22169 0.856724 -0.273536
+v -0.212209 0.824662 -0.273398
+v -0.225435 0.891324 -0.293612
+v -0.221713 0.885753 -0.253374
+v -0.227222 0.886884 -0.273083
+v -0.202088 0.898609 -0.32219
+v -0.216036 0.894273 -0.311279
+v -0.144889 0.85895 -0.323167
+v -0.140188 0.893495 -0.304854
+v -0.113654 0.869597 -0.287244
+v -0.127225 0.865094 -0.306986
+v -0.17602 0.88052 -0.329499
+v -0.164181 0.851772 -0.332764
+v -0.170407 0.915574 -0.314537
+v -0.186655 0.907214 -0.322451
+v -0.141278 0.921251 -0.283964
+v -0.154011 0.919177 -0.300925
+v -0.106278 0.87162 -0.248059
+v -0.126235 0.896792 -0.232757
+v -0.125921 0.862724 -0.222037
+v -0.112672 0.868658 -0.232446
+v -0.120264 0.900007 -0.266157
+v -0.106638 0.871852 -0.266963
+v -0.136706 0.926532 -0.249134
+v -0.136018 0.925025 -0.266007
+v -0.152825 0.914652 -0.226568
+v -0.141818 0.921807 -0.235425
+v -0.169297 0.842653 -0.222278
+v -0.201725 0.863227 -0.234578
+v -0.191458 0.831381 -0.232334
+v -0.158724 0.882351 -0.220155
+v -0.146124 0.853579 -0.218747
+v -0.191735 0.902282 -0.228209
+v -0.171199 0.908866 -0.224253
+v -0.209229 0.892731 -0.238166
+v -0.228365 -0.446181 0.420977
+v -0.20511 -0.461837 0.475502
+v -0.183641 -0.455489 0.534379
+v -0.21326 -0.450303 0.478539
+v -0.176881 -0.48852 0.469991
+v -0.17086 -0.474631 0.521789
+v -0.206663 -0.472505 0.418626
+v -0.194975 -0.474631 0.472428
+v -0.109552 -0.472057 0.595485
+v -0.142559 -0.466669 0.57469
+v -0.132033 -0.481167 0.556781
+v -0.140849 -0.473458 0.563316
+v -0.102981 -0.465723 0.632974
+v -0.143787 -0.462243 0.587914
+v -0.0755892 -0.465387 0.639018
+v -0.0439995 -0.453929 0.682063
+v -0.0705061 -0.461083 0.663389
+v -0.0917758 -0.479034 0.591505
+v -0.0549351 -0.471091 0.629642
+v -0.0815781 -0.471375 0.616771
+v -0.0238268 -0.46053 0.664171
+v -0.0329053 -0.483094 0.604968
+v -0.0276324 -0.470883 0.636565
+v -0.0210984 -0.44987 0.691901
+v -0.0317646 -0.445352 0.134276
+v -0.065741 -0.440111 0.119726
+v -0.0323291 -0.440606 0.100743
+v -0.034368 -0.461296 0.211229
+v -0.0636123 -0.450028 0.182161
+v -0.0320458 -0.45108 0.17059
+v -0.124102 -0.450719 0.228684
+v -0.0965198 -0.444834 0.175662
+v -0.0981278 -0.457279 0.230677
+v -0.0942201 -0.449501 0.201118
+v -0.138015 -0.443895 0.192281
+v -0.101319 -0.442167 0.150389
+v -0.163536 -0.447685 0.255959
+v -0.205661 -0.440923 0.30331
+v -0.174785 -0.442414 0.24495
+v -0.138385 -0.462583 0.271465
+v -0.179537 -0.460826 0.31166
+v -0.153491 -0.454897 0.266081
+v -0.213095 -0.454425 0.36305
+v -0.178344 -0.481123 0.364052
+v -0.199389 -0.4673 0.363772
+v -0.224677 -0.442616 0.362277
+v -0.0774159 -0.49456 0.56248
+v -0.0472884 -0.515301 0.519274
+v -0.0400356 -0.499329 0.565268
+v -0.12906 -0.497175 0.517019
+v -0.109485 -0.488354 0.558077
+v -0.151776 -0.503865 0.416665
+v -0.102374 -0.516827 0.468605
+v -0.144853 -0.503338 0.468859
+v -0.0552219 -0.529321 0.415644
+v -0.0529288 -0.526727 0.468796
+v -0.101727 -0.509066 0.362546
+v -0.046641 -0.500432 0.308431
+v -0.0524328 -0.518796 0.361628
+v -0.12954 -0.482953 0.312862
+v -0.144743 -0.495777 0.363499
+v -0.0778956 -0.474931 0.261875
+v -0.111951 -0.469275 0.268257
+v -0.0399262 -0.479506 0.257737
+v -0.164631 -0.0504489 0.951176
+v -0.139042 -0.0451633 0.963659
+v -0.137897 -0.0298265 0.961061
+v -0.16267 -0.0297463 0.947597
+v -0.150007 -0.038161 0.957034
+v -0.109851 -0.0516794 0.976283
+v -0.122462 -0.037914 0.969963
+v -0.133714 -0.0642499 0.965416
+v -0.125245 -0.0487781 0.969619
+v -0.130968 -0.0135459 0.956851
+v -0.109331 -0.0304408 0.974293
+v -0.124172 -0.0273895 0.966259
+v -0.18873 -0.0434748 0.935764
+v -0.213918 -0.078022 0.922841
+v -0.172782 -0.0812671 0.947651
+v -0.185903 -0.064746 0.940486
+v -0.24743 -0.125194 0.898745
+v -0.247517 -0.0516572 0.891815
+v -0.248772 -0.0872467 0.896118
+v -0.209867 -0.161573 0.925244
+v -0.2493 -0.167014 0.898651
+v -0.133001 -0.161124 0.956956
+v -0.163882 -0.120989 0.949271
+v -0.170754 -0.160591 0.943861
+v -0.119758 -0.0911109 0.968067
+v -0.149065 -0.0885975 0.957545
+v -0.165953 -0.00373407 0.934179
+v -0.206641 -0.0168625 0.913183
+v -0.181563 -0.0218941 0.9332
+v -0.116698 0.0146029 0.948102
+v -0.155037 0.0342159 0.918365
+v -0.143447 0.00727528 0.940085
+v -0.188508 0.055577 0.881861
+v -0.128713 0.0776757 0.9026
+v -0.159167 0.0664178 0.894094
+v -0.225597 0.0165132 0.878354
+v -0.216542 0.0481267 0.863247
+v -0.237857 -0.0169093 0.886885
+v -0.0314129 0.062618 0.944549
+v -0.0591053 0.0273947 0.96148
+v -0.0272441 0.00174637 0.985368
+v -0.0296124 0.0304081 0.965777
+v -0.0652762 0.0925539 0.91594
+v -0.032753 0.0962353 0.919852
+v -0.0939443 0.0532189 0.93429
+v -0.0973395 0.0863776 0.910036
+v -0.0802969 -0.0053962 0.974961
+v -0.0883591 0.0217178 0.955411
+v -0.0313918 -0.121887 0.983992
+v -0.0640997 -0.158632 0.974036
+v -0.0317497 -0.157085 0.978196
+v -0.0273784 -0.0760818 0.993721
+v -0.05977 -0.0942344 0.98419
+v -0.0298425 -0.0936976 0.988793
+v -0.0956523 -0.123707 0.972085
+v -0.0810019 -0.0733797 0.982855
+v -0.0898671 -0.0931811 0.977114
+v -0.0976501 -0.160225 0.966985
+v -0.0864529 -0.0640166 0.98579
+v -0.0789269 -0.06284 0.994959
+v -0.0955999 -0.0476973 0.985969
+v -0.0940951 -0.0565412 0.98502
+v -0.0525276 -0.0775374 1.01483
+v -0.0666347 -0.0644079 1.00864
+v -0.0598989 -0.0753391 1.00401
+v -0.0652946 -0.0700523 1.00523
+v -0.0598411 -0.0536198 1.01768
+v -0.0788425 -0.0488271 1.00064
+v -0.0525009 -0.0662129 1.0231
+v -0.0652608 -0.0582549 1.01251
+v -0.0861068 -0.0306433 0.989153
+v -0.0938769 -0.0385712 0.986828
+v -0.0582598 -0.0403976 1.0119
+v -0.0476184 -0.0223971 1.0006
+v -0.0694706 -0.025394 0.994502
+v -0.0426567 -0.0627588 1.03251
+v -0.0344846 -0.0534302 1.03232
+v -0.0490439 -0.0525287 1.02501
+v -0.0209047 -0.039677 1.02317
+v -0.0152462 -0.0676801 1.04602
+v -0.0177789 -0.0547726 1.03745
+v -0.0242006 -0.0212264 1.00515
+v -0.0209476 -0.0779737 1.00716
+v -0.0477856 -0.0718253 0.995571
+v -0.0242757 -0.0726037 0.999895
+v -0.0152667 -0.0940994 1.02438
+v -0.0345351 -0.0845283 1.01121
+v -0.0178069 -0.0869021 1.01538
+v -0.0583871 -0.073576 0.998233
+v -0.0427012 -0.0854758 1.01505
+v -0.0491057 -0.0804193 1.00671
+v -0.0697639 -0.0690371 0.990088
+v -0.0394909 -0.0796254 1.02906
+v -0.0271444 -0.0763309 1.04139
+v -0.0399318 -0.0720589 1.03309
+v -0.0271637 -0.0905263 1.02914
+v -0.0399551 -0.0846544 1.02277
+v -0.0132272 -0.0889316 1.04223
+v -0.01374 -0.0942759 1.03402
+v -0.0137291 -0.0795664 1.04692
+vt 0.800375 0.667457
+vt 0.789584 0.668215
+vt 0.799923 0.663933
+vt 0.789057 0.664897
+vt 0.811842 0.670848
+vt 0.80103 0.671718
+vt 0.811217 0.666381
+vt 0.810346 0.655189
+vt 0.810548 0.658683
+vt 0.799442 0.658809
+vt 0.799641 0.661178
+vt 0.788653 0.662549
+vt 0.821717 0.660492
+vt 0.821466 0.655998
+vt 0.832622 0.658487
+vt 0.832389 0.653218
+vt 0.821251 0.651569
+vt 0.822539 0.669852
+vt 0.822041 0.665116
+vt 0.832855 0.663756
+vt 0.768389 0.668485
+vt 0.778915 0.668524
+vt 0.769427 0.673433
+vt 0.779732 0.673035
+vt 0.778165 0.664791
+vt 0.74577 0.661865
+vt 0.756532 0.663039
+vt 0.747747 0.667793
+vt 0.758027 0.6682
+vt 0.759367 0.673605
+vt 0.753496 0.653114
+vt 0.765177 0.656219
+vt 0.755019 0.658037
+vt 0.76625 0.660042
+vt 0.743793 0.655937
+vt 0.777492 0.661858
+vt 0.776857 0.659324
+vt 0.860743 0.66105
+vt 0.85437 0.658994
+vt 0.861885 0.655612
+vt 0.85471 0.654338
+vt 0.858253 0.665619
+vt 0.86356 0.668573
+vt 0.867109 0.663417
+vt 0.870052 0.650753
+vt 0.862482 0.650015
+vt 0.870421 0.644717
+vt 0.862589 0.644612
+vt 0.85505 0.649682
+vt 0.883125 0.662497
+vt 0.876218 0.659429
+vt 0.885992 0.655087
+vt 0.877897 0.652301
+vt 0.878895 0.645534
+vt 0.869196 0.672347
+vt 0.87346 0.666407
+vt 0.879453 0.669777
+vt 0.889305 0.677736
+vt 0.893116 0.683944
+vt 0.881403 0.682863
+vt 0.884277 0.688027
+vt 0.896144 0.692562
+vt 0.90593 0.689794
+vt 0.901798 0.679591
+vt 0.889995 0.666526
+vt 0.884742 0.673283
+vt 0.878268 0.679148
+vt 0.902596 0.665971
+vt 0.89431 0.659538
+vt 0.908133 0.659549
+vt 0.89794 0.652608
+vt 0.916609 0.686107
+vt 0.910596 0.674701
+vt 0.918591 0.668435
+vt 0.713434 0.678889
+vt 0.718232 0.673865
+vt 0.719971 0.68226
+vt 0.723135 0.677582
+vt 0.720227 0.663887
+vt 0.722594 0.669482
+vt 0.714094 0.668877
+vt 0.690854 0.689174
+vt 0.699881 0.681004
+vt 0.701094 0.693026
+vt 0.707762 0.685198
+vt 0.716169 0.688193
+vt 0.697269 0.662673
+vt 0.702107 0.668949
+vt 0.686646 0.668347
+vt 0.692886 0.67536
+vt 0.6822 0.683174
+vt 0.710252 0.663404
+vt 0.717861 0.658291
+vt 0.706788 0.657767
+vt 0.858365 0.672357
+vt 0.853611 0.668611
+vt 0.843559 0.666338
+vt 0.848795 0.664994
+vt 0.847566 0.670615
+vt 0.855227 0.679121
+vt 0.862995 0.676364
+vt 0.858343 0.683022
+vt 0.867102 0.680206
+vt 0.834415 0.678265
+vt 0.843484 0.67676
+vt 0.836434 0.683129
+vt 0.846261 0.68128
+vt 0.848914 0.685681
+vt 0.840865 0.672225
+vt 0.838324 0.667682
+vt 0.832962 0.673545
+vt 0.812703 0.675707
+vt 0.801925 0.676679
+vt 0.791163 0.677558
+vt 0.792268 0.683394
+vt 0.803066 0.682261
+vt 0.82446 0.679693
+vt 0.823312 0.674685
+vt 0.815369 0.686723
+vt 0.826081 0.684957
+vt 0.817113 0.692704
+vt 0.827981 0.690382
+vt 0.79352 0.689825
+vt 0.804461 0.688386
+vt 0.806115 0.694921
+vt 0.798864 0.71184
+vt 0.810186 0.708981
+vt 0.801046 0.72017
+vt 0.812549 0.716812
+vt 0.823667 0.712859
+vt 0.826337 0.721153
+vt 0.81509 0.725328
+vt 0.819076 0.698906
+vt 0.808032 0.701735
+vt 0.796857 0.704106
+vt 0.829963 0.69587
+vt 0.840668 0.692798
+vt 0.832092 0.701818
+vt 0.842717 0.698135
+vt 0.834432 0.708622
+vt 0.837107 0.716573
+vt 0.844937 0.704332
+vt 0.867471 0.703655
+vt 0.86533 0.696176
+vt 0.877095 0.699305
+vt 0.875007 0.692109
+vt 0.872828 0.687102
+vt 0.853212 0.694487
+vt 0.855272 0.700217
+vt 0.857607 0.707609
+vt 0.851162 0.689863
+vt 0.861137 0.686828
+vt 0.870286 0.68346
+vt 0.737342 0.674399
+vt 0.736161 0.679693
+vt 0.731151 0.674738
+vt 0.729268 0.679242
+vt 0.727195 0.684244
+vt 0.743049 0.685744
+vt 0.743345 0.679784
+vt 0.751789 0.685777
+vt 0.751276 0.679628
+vt 0.743533 0.67406
+vt 0.73204 0.698591
+vt 0.733516 0.691513
+vt 0.741945 0.699272
+vt 0.742539 0.692176
+vt 0.751932 0.692359
+vt 0.72474 0.690242
+vt 0.722029 0.697328
+vt 0.716708 0.714828
+vt 0.715092 0.72481
+vt 0.702617 0.714132
+vt 0.700538 0.724407
+vt 0.728191 0.725265
+vt 0.727285 0.735883
+vt 0.713954 0.735509
+vt 0.730575 0.706575
+vt 0.719186 0.705595
+vt 0.70688 0.704276
+vt 0.740902 0.715992
+vt 0.741399 0.707225
+vt 0.75229 0.716217
+vt 0.752093 0.707508
+vt 0.740453 0.725533
+vt 0.740092 0.735897
+vt 0.752425 0.725529
+vt 0.789126 0.722623
+vt 0.790991 0.731868
+vt 0.776943 0.72426
+vt 0.778298 0.733889
+vt 0.787299 0.713921
+vt 0.76386 0.71596
+vt 0.764649 0.725167
+vt 0.765442 0.735096
+vt 0.773153 0.698992
+vt 0.774288 0.706826
+vt 0.762394 0.699466
+vt 0.763091 0.707391
+vt 0.785576 0.705767
+vt 0.784024 0.698116
+vt 0.781583 0.684283
+vt 0.780606 0.678305
+vt 0.77033 0.678903
+vt 0.772112 0.691699
+vt 0.782707 0.69092
+vt 0.761214 0.685501
+vt 0.761821 0.692189
+vt 0.760412 0.679339
+vt 0.899727 0.753469
+vt 0.893091 0.747999
+vt 0.905106 0.743418
+vt 0.897762 0.739947
+vt 0.887783 0.741206
+vt 0.880731 0.745814
+vt 0.886161 0.754214
+vt 0.904716 0.775474
+vt 0.898651 0.768079
+vt 0.914323 0.765617
+vt 0.907202 0.758849
+vt 0.913707 0.74723
+vt 0.877978 0.781668
+vt 0.872582 0.772759
+vt 0.888929 0.775502
+vt 0.883177 0.767576
+vt 0.894526 0.783756
+vt 0.872228 0.750111
+vt 0.877492 0.759308
+vt 0.867387 0.763696
+vt 0.843778 0.771729
+vt 0.83028 0.775596
+vt 0.839713 0.76143
+vt 0.826842 0.765165
+vt 0.83386 0.786091
+vt 0.815809 0.779609
+vt 0.818643 0.790465
+vt 0.860927 0.77752
+vt 0.856148 0.767799
+vt 0.851602 0.757722
+vt 0.852231 0.792074
+vt 0.865736 0.787127
+vt 0.856409 0.802551
+vt 0.870501 0.796949
+vt 0.837491 0.796705
+vt 0.821654 0.801351
+vt 0.841215 0.807639
+vt 0.724356 0.780932
+vt 0.721897 0.791331
+vt 0.712133 0.77893
+vt 0.708432 0.787628
+vt 0.688991 0.788336
+vt 0.696924 0.781352
+vt 0.7032 0.796344
+vt 0.734947 0.804179
+vt 0.736823 0.793158
+vt 0.751835 0.804371
+vt 0.752608 0.793134
+vt 0.738282 0.782037
+vt 0.732844 0.815397
+vt 0.730608 0.827229
+vt 0.715066 0.812163
+vt 0.711339 0.823597
+vt 0.751007 0.816099
+vt 0.681689 0.797702
+vt 0.697958 0.806263
+vt 0.692594 0.817566
+vt 0.802671 0.795049
+vt 0.800517 0.783977
+vt 0.798447 0.773092
+vt 0.783398 0.776846
+vt 0.784699 0.788076
+vt 0.788691 0.822864
+vt 0.787323 0.810943
+vt 0.807157 0.818048
+vt 0.804858 0.806345
+vt 0.768947 0.80275
+vt 0.769269 0.814405
+vt 0.769629 0.82649
+vt 0.768262 0.779889
+vt 0.768648 0.791284
+vt 0.829319 0.730383
+vt 0.817782 0.734631
+vt 0.807922 0.748352
+vt 0.80562 0.738467
+vt 0.820629 0.744508
+vt 0.843731 0.736281
+vt 0.840238 0.725959
+vt 0.854152 0.732184
+vt 0.850604 0.721518
+vt 0.847491 0.747042
+vt 0.836031 0.750919
+vt 0.858049 0.743181
+vt 0.823637 0.754744
+vt 0.810425 0.758547
+vt 0.739591 0.758816
+vt 0.739858 0.747133
+vt 0.753132 0.758368
+vt 0.752889 0.746823
+vt 0.726681 0.747058
+vt 0.739135 0.77052
+vt 0.725558 0.770057
+vt 0.753262 0.7701
+vt 0.69786 0.758579
+vt 0.712375 0.758296
+vt 0.700367 0.76912
+vt 0.712782 0.769064
+vt 0.712907 0.746891
+vt 0.698005 0.747078
+vt 0.929639 0.699595
+vt 0.928443 0.716654
+vt 0.918934 0.700571
+vt 0.918764 0.716584
+vt 0.908376 0.702434
+vt 0.917291 0.732639
+vt 0.908091 0.730817
+vt 0.926839 0.733706
+vt 0.899583 0.717133
+vt 0.899655 0.729345
+vt 0.890467 0.718513
+vt 0.891789 0.729325
+vt 0.898356 0.704249
+vt 0.888732 0.70646
+vt 0.766981 0.757058
+vt 0.767683 0.768445
+vt 0.782175 0.765786
+vt 0.779622 0.744124
+vt 0.766227 0.745834
+vt 0.794659 0.751851
+vt 0.792827 0.741649
+vt 0.796513 0.762357
+vt 0.870023 0.713177
+vt 0.860485 0.717218
+vt 0.863872 0.728122
+vt 0.881776 0.720957
+vt 0.879363 0.709514
+vt 0.876505 0.735646
+vt 0.884297 0.731857
+vt 0.867737 0.739401
+vt 0.942885 0.685053
+vt 0.953367 0.687709
+vt 0.940102 0.70044
+vt 0.950147 0.702062
+vt 0.938165 0.716742
+vt 0.968754 0.704629
+vt 0.964644 0.717556
+vt 0.959599 0.703413
+vt 0.956139 0.717201
+vt 0.963128 0.689798
+vt 0.953145 0.730828
+vt 0.951463 0.743182
+vt 0.945243 0.731521
+vt 0.944286 0.744982
+vt 0.960533 0.730482
+vt 0.936315 0.732906
+vt 0.935707 0.747908
+vt 0.168776 0.186925
+vt 0.153188 0.186399
+vt 0.169355 0.173108
+vt 0.15372 0.172537
+vt 0.137511 0.174005
+vt 0.154886 0.15836
+vt 0.139434 0.160894
+vt 0.135878 0.200202
+vt 0.153921 0.199631
+vt 0.13741 0.213278
+vt 0.156324 0.212569
+vt 0.169774 0.199674
+vt 0.115769 0.200908
+vt 0.0946273 0.201684
+vt 0.11798 0.188318
+vt 0.0992616 0.189776
+vt 0.115489 0.212947
+vt 0.120823 0.175844
+vt 0.123982 0.163427
+vt 0.103896 0.177868
+vt 0.197705 0.18287
+vt 0.184318 0.177386
+vt 0.204286 0.174321
+vt 0.191177 0.166862
+vt 0.182759 0.189357
+vt 0.218051 0.196712
+vt 0.206672 0.195261
+vt 0.218754 0.189245
+vt 0.208611 0.187062
+vt 0.213838 0.179979
+vt 0.195322 0.2024
+vt 0.206572 0.20383
+vt 0.197221 0.212029
+vt 0.207879 0.212331
+vt 0.217851 0.204542
+vt 0.183316 0.200806
+vt 0.185616 0.211745
+vt 0.200177 0.221191
+vt 0.189289 0.222187
+vt 0.176497 0.223558
+vt 0.183072 0.235218
+vt 0.194814 0.232099
+vt 0.222696 0.225794
+vt 0.213531 0.227462
+vt 0.220244 0.219432
+vt 0.210159 0.220329
+vt 0.219048 0.242324
+vt 0.210793 0.236729
+vt 0.224444 0.237674
+vt 0.218111 0.23337
+vt 0.225701 0.231231
+vt 0.192704 0.246651
+vt 0.20267 0.241446
+vt 0.21291 0.248905
+vt 0.147552 0.942756
+vt 0.13896 0.937482
+vt 0.15442 0.936036
+vt 0.145004 0.930904
+vt 0.131991 0.943023
+vt 0.122815 0.936756
+vt 0.130641 0.931218
+vt 0.148962 0.952945
+vt 0.155644 0.947301
+vt 0.157149 0.955791
+vt 0.164222 0.950166
+vt 0.162965 0.941085
+vt 0.125551 0.956525
+vt 0.133402 0.953212
+vt 0.134656 0.9604
+vt 0.141677 0.957166
+vt 0.14902 0.960085
+vt 0.115375 0.942445
+vt 0.124894 0.947972
+vt 0.116632 0.951525
+vt 0.871579 0.942636
+vt 0.878596 0.946725
+vt 0.86416 0.948207
+vt 0.872055 0.95214
+vt 0.886187 0.942108
+vt 0.885478 0.951547
+vt 0.893652 0.946846
+vt 0.872605 0.934139
+vt 0.865133 0.939046
+vt 0.865671 0.931509
+vt 0.858017 0.93669
+vt 0.857275 0.944342
+vt 0.895145 0.930139
+vt 0.887213 0.93361
+vt 0.887365 0.926738
+vt 0.880339 0.930202
+vt 0.873942 0.927331
+vt 0.901581 0.942011
+vt 0.893918 0.937827
+vt 0.902324 0.934359
+vt 0.937778 0.969427
+vt 0.924742 0.963135
+vt 0.963417 0.953814
+vt 0.94277 0.951002
+vt 0.929636 0.938971
+vt 0.950985 0.939207
+vt 0.924587 0.948175
+vt 0.897606 0.964725
+vt 0.904637 0.974329
+vt 0.880983 0.968761
+vt 0.883219 0.980673
+vt 0.91214 0.985041
+vt 0.902437 0.951808
+vt 0.891517 0.95734
+vt 0.87954 0.960235
+vt 0.911331 0.94532
+vt 0.915423 0.938551
+vt 0.837802 0.927994
+vt 0.819804 0.940196
+vt 0.826204 0.927352
+vt 0.800609 0.943494
+vt 0.811605 0.952025
+vt 0.831685 0.94681
+vt 0.836697 0.937574
+vt 0.857877 0.916732
+vt 0.87928 0.910354
+vt 0.863614 0.921492
+vt 0.880267 0.917735
+vt 0.851798 0.911211
+vt 0.857703 0.929735
+vt 0.868668 0.924726
+vt 0.880724 0.922242
+vt 0.845039 0.94326
+vt 0.848987 0.936302
+vt 0.924475 0.930625
+vt 0.941648 0.929031
+vt 0.96114 0.927417
+vt 0.933224 0.916635
+vt 0.922496 0.92029
+vt 0.903495 0.926861
+vt 0.911938 0.93218
+vt 0.897103 0.91889
+vt 0.892712 0.923251
+vt 0.905308 0.905852
+vt 0.901263 0.912802
+vt 0.820926 0.962167
+vt 0.836809 0.955124
+vt 0.840048 0.970839
+vt 0.802886 0.96989
+vt 0.830758 0.980145
+vt 0.856645 0.954682
+vt 0.848379 0.949442
+vt 0.864116 0.967327
+vt 0.867473 0.958814
+vt 0.86125 0.978259
+vt 0.85863 0.990399
+vt 0.191842 0.938528
+vt 0.18483 0.928669
+vt 0.214023 0.932819
+vt 0.202519 0.920832
+vt 0.190621 0.899112
+vt 0.222723 0.912315
+vt 0.180131 0.909732
+vt 0.161958 0.928403
+vt 0.17217 0.935147
+vt 0.177196 0.942628
+vt 0.133862 0.910183
+vt 0.152639 0.912726
+vt 0.135915 0.920575
+vt 0.149326 0.922843
+vt 0.155653 0.899975
+vt 0.158518 0.885909
+vt 0.131 0.896051
+vt 0.0727151 0.905945
+vt 0.0872616 0.914579
+vt 0.045865 0.922565
+vt 0.0688679 0.927864
+vt 0.0891699 0.932953
+vt 0.0611106 0.940896
+vt 0.0850445 0.943365
+vt 0.116168 0.914322
+vt 0.108088 0.902447
+vt 0.0995652 0.889326
+vt 0.112481 0.9301
+vt 0.123364 0.923705
+vt 0.10407 0.937626
+vt 0.101008 0.945485
+vt 0.141749 0.967835
+vt 0.159735 0.964585
+vt 0.141615 0.973986
+vt 0.164947 0.968203
+vt 0.198452 0.956803
+vt 0.170264 0.972446
+vt 0.186612 0.957297
+vt 0.165935 0.957311
+vt 0.154732 0.962219
+vt 0.141924 0.964387
+vt 0.187555 0.948567
+vt 0.17427 0.950399
+vt 0.205846 0.945238
+vt 0.226639 0.94116
+vt 0.0918948 0.952851
+vt 0.072194 0.95227
+vt 0.0497805 0.951411
+vt 0.080546 0.963637
+vt 0.0937428 0.962144
+vt 0.116457 0.959008
+vt 0.10617 0.952878
+vt 0.123264 0.966181
+vt 0.12877 0.963081
+vt 0.111312 0.975863
+vt 0.117382 0.970674
+vt 0.168188 0.257701
+vt 0.179853 0.251475
+vt 0.181861 0.270696
+vt 0.19145 0.264485
+vt 0.168322 0.238524
+vt 0.157989 0.278114
+vt 0.139623 0.26474
+vt 0.161775 0.267111
+vt 0.143271 0.254169
+vt 0.176773 0.279775
+vt 0.122129 0.24267
+vt 0.0996677 0.231893
+vt 0.121641 0.233595
+vt 0.0964428 0.225792
+vt 0.121258 0.251367
+vt 0.141637 0.226246
+vt 0.118437 0.223765
+vt 0.0932179 0.219692
+vt 0.160802 0.225544
+vt 0.0419152 0.53618
+vt 0.0472563 0.523503
+vt 0.0332681 0.539408
+vt 0.038828 0.522754
+vt 0.0547796 0.5128
+vt 0.0629257 0.517019
+vt 0.0550589 0.52441
+vt 0.0478604 0.554178
+vt 0.0476885 0.543737
+vt 0.0376346 0.561691
+vt 0.0380519 0.549348
+vt 0.0277081 0.556063
+vt 0.0662616 0.533766
+vt 0.0581 0.532485
+vt 0.0632249 0.540131
+vt 0.0559107 0.540335
+vt 0.055828 0.548702
+vt 0.0616102 0.525629
+vt 0.0679204 0.520553
+vt 0.0687181 0.527585
+vt 0.0509533 0.580146
+vt 0.0426311 0.57189
+vt 0.0442882 0.588789
+vt 0.0332182 0.580753
+vt 0.0513392 0.563763
+vt 0.0757862 0.577556
+vt 0.0655669 0.57722
+vt 0.0718228 0.588438
+vt 0.060513 0.586655
+vt 0.0553583 0.596825
+vt 0.0637107 0.564356
+vt 0.0704192 0.569255
+vt 0.070071 0.55722
+vt 0.0760258 0.562179
+vt 0.0789464 0.569999
+vt 0.0586375 0.557107
+vt 0.0648092 0.550898
+vt 0.0904211 0.548013
+vt 0.0833422 0.555408
+vt 0.0957205 0.55254
+vt 0.0881221 0.560358
+vt 0.0767944 0.549978
+vt 0.090315 0.526886
+vt 0.0877993 0.53464
+vt 0.0982366 0.53011
+vt 0.0953147 0.539062
+vt 0.100791 0.542278
+vt 0.0746039 0.537227
+vt 0.0781908 0.530701
+vt 0.0798442 0.524364
+vt 0.0701373 0.544116
+vt 0.925171 0.815649
+vt 0.920243 0.805051
+vt 0.934775 0.80669
+vt 0.929739 0.796503
+vt 0.93322 0.778201
+vt 0.938087 0.788562
+vt 0.924761 0.786096
+vt 0.904862 0.803051
+vt 0.910168 0.813683
+vt 0.893079 0.810992
+vt 0.898686 0.821929
+vt 0.915568 0.824608
+vt 0.910263 0.784583
+vt 0.899742 0.793007
+vt 0.888113 0.80057
+vt 0.919899 0.77525
+vt 0.928896 0.766724
+vt 0.815787 0.855921
+vt 0.819336 0.868912
+vt 0.793818 0.862058
+vt 0.795798 0.875795
+vt 0.831679 0.83698
+vt 0.836161 0.849417
+vt 0.812478 0.843031
+vt 0.770591 0.852857
+vt 0.771369 0.867508
+vt 0.748599 0.854921
+vt 0.748212 0.870509
+vt 0.772261 0.882678
+vt 0.790259 0.835472
+vt 0.77004 0.839246
+vt 0.749297 0.841006
+vt 0.80965 0.830342
+vt 0.827993 0.824626
+vt 0.724119 0.869298
+vt 0.722193 0.885121
+vt 0.69961 0.865356
+vt 0.695663 0.88068
+vt 0.726136 0.854086
+vt 0.681189 0.844692
+vt 0.675206 0.860163
+vt 0.659402 0.836491
+vt 0.651382 0.852648
+vt 0.669133 0.87624
+vt 0.686995 0.830434
+vt 0.707465 0.836555
+vt 0.667117 0.821916
+vt 0.728335 0.840094
+vt 0.844791 0.630193
+vt 0.844724 0.634228
+vt 0.843982 0.630547
+vt 0.842478 0.634715
+vt 0.837664 0.639425
+vt 0.840419 0.635163
+vt 0.840973 0.638882
+vt 0.848717 0.637567
+vt 0.847346 0.633664
+vt 0.854083 0.636646
+vt 0.851791 0.632782
+vt 0.845599 0.629839
+vt 0.849337 0.641623
+vt 0.849581 0.645756
+vt 0.844215 0.642375
+vt 0.843773 0.646487
+vt 0.855223 0.640762
+vt 0.839469 0.64305
+vt 0.83491 0.643687
+vt 0.837964 0.647218
+vt 0.725656 0.643083
+vt 0.719643 0.64232
+vt 0.724939 0.640251
+vt 0.71996 0.638782
+vt 0.711679 0.641543
+vt 0.711659 0.645946
+vt 0.719749 0.645744
+vt 0.738356 0.646598
+vt 0.732577 0.6462
+vt 0.736627 0.644893
+vt 0.731247 0.643959
+vt 0.729918 0.641719
+vt 0.727515 0.648634
+vt 0.733906 0.64844
+vt 0.728655 0.651352
+vt 0.735235 0.65068
+vt 0.740086 0.648303
+vt 0.712958 0.649959
+vt 0.7207 0.648941
+vt 0.722074 0.652024
+vt 0.884966 0.82932
+vt 0.890192 0.840682
+vt 0.86976 0.836188
+vt 0.87442 0.847798
+vt 0.879899 0.818138
+vt 0.849222 0.830896
+vt 0.853822 0.842863
+vt 0.858647 0.854913
+vt 0.860816 0.813432
+vt 0.845073 0.819095
+vt 0.87515 0.807317
+vt 0.953522 0.784398
+vt 0.950371 0.775653
+vt 0.958094 0.777732
+vt 0.955796 0.769832
+vt 0.959544 0.757237
+vt 0.961105 0.764151
+vt 0.953683 0.761713
+vt 0.940794 0.771842
+vt 0.944716 0.781755
+vt 0.94895 0.791065
+vt 0.945308 0.756461
+vt 0.937496 0.760723
+vt 0.957984 0.750323
+vt 0.951941 0.753153
+vt 0.237985 0.738297
+vt 0.268705 0.737229
+vt 0.241633 0.762787
+vt 0.268705 0.761697
+vt 0.200475 0.715676
+vt 0.234693 0.711537
+vt 0.207029 0.741466
+vt 0.222333 0.788147
+vt 0.245519 0.78554
+vt 0.230341 0.810102
+vt 0.249523 0.807426
+vt 0.268705 0.784364
+vt 0.159946 0.773803
+vt 0.187254 0.769363
+vt 0.175961 0.794629
+vt 0.199147 0.791261
+vt 0.211159 0.812779
+vt 0.165843 0.72393
+vt 0.175598 0.746705
+vt 0.14393 0.752977
+vt 0.253345 0.280308
+vt 0.268706 0.280765
+vt 0.253464 0.295744
+vt 0.268576 0.296249
+vt 0.238317 0.265747
+vt 0.253558 0.266767
+vt 0.238168 0.279086
+vt 0.239577 0.31028
+vt 0.253859 0.311911
+vt 0.240382 0.325349
+vt 0.254334 0.327593
+vt 0.268615 0.312525
+vt 0.21323 0.291052
+vt 0.225134 0.292637
+vt 0.214544 0.305067
+vt 0.22632 0.307825
+vt 0.227134 0.322083
+vt 0.223441 0.277203
+vt 0.223004 0.263413
+vt 0.210121 0.276206
+vt 0.268934 0.241592
+vt 0.268768 0.248379
+vt 0.255927 0.241503
+vt 0.254998 0.248112
+vt 0.243677 0.241038
+vt 0.256594 0.23557
+vt 0.244966 0.235422
+vt 0.239676 0.25526
+vt 0.254158 0.256284
+vt 0.268799 0.256538
+vt 0.225586 0.253155
+vt 0.229504 0.24562
+vt 0.233074 0.239996
+vt 0.235164 0.235206
+vt 0.672084 0.639787
+vt 0.66845 0.628475
+vt 0.686608 0.639919
+vt 0.68396 0.631421
+vt 0.660815 0.654442
+vt 0.655577 0.64149
+vt 0.676137 0.650509
+vt 0.700237 0.640623
+vt 0.701506 0.646666
+vt 0.69947 0.634368
+vt 0.693146 0.655714
+vt 0.703781 0.652284
+vt 0.681027 0.660046
+vt 0.66725 0.665722
+vt 0.869213 0.629853
+vt 0.859507 0.631337
+vt 0.868187 0.625508
+vt 0.857298 0.627496
+vt 0.861301 0.635363
+vt 0.891113 0.635857
+vt 0.879896 0.634217
+vt 0.89156 0.629458
+vt 0.879626 0.628853
+vt 0.879077 0.62352
+vt 0.870406 0.639191
+vt 0.879607 0.639641
+vt 0.889853 0.641715
+vt 0.862262 0.639758
+vt 0.92121 0.637592
+vt 0.905828 0.63279
+vt 0.925118 0.630408
+vt 0.907542 0.62597
+vt 0.903798 0.639508
+vt 0.942567 0.656273
+vt 0.930472 0.650822
+vt 0.949711 0.646823
+vt 0.936482 0.642606
+vt 0.942694 0.634846
+vt 0.912967 0.652217
+vt 0.924862 0.659954
+vt 0.936428 0.667127
+vt 0.901136 0.646022
+vt 0.966001 0.650564
+vt 0.958961 0.649232
+vt 0.970644 0.639264
+vt 0.965457 0.639274
+vt 0.952791 0.65981
+vt 0.969418 0.663982
+vt 0.972601 0.651548
+vt 0.976942 0.665473
+vt 0.97898 0.652359
+vt 0.975831 0.639254
+vt 0.957281 0.67448
+vt 0.966331 0.676693
+vt 0.974903 0.678588
+vt 0.947274 0.67163
+vt 0.269766 0.158525
+vt 0.250658 0.159766
+vt 0.270029 0.149052
+vt 0.248065 0.150831
+vt 0.220307 0.146351
+vt 0.245291 0.141613
+vt 0.226699 0.154829
+vt 0.24059 0.178843
+vt 0.237237 0.170956
+vt 0.254532 0.176342
+vt 0.25289 0.168134
+vt 0.269586 0.166899
+vt 0.216438 0.167843
+vt 0.223799 0.174757
+vt 0.228354 0.181409
+vt 0.20652 0.159758
+vt 0.195322 0.151089
+vt 0.255674 0.193252
+vt 0.255353 0.184799
+vt 0.26904 0.192481
+vt 0.269203 0.183716
+vt 0.242017 0.186776
+vt 0.242969 0.210229
+vt 0.242663 0.202753
+vt 0.255922 0.209205
+vt 0.255817 0.201447
+vt 0.269103 0.200716
+vt 0.229862 0.196333
+vt 0.229944 0.203984
+vt 0.230377 0.211402
+vt 0.229853 0.188704
+vt 0.256133 0.216346
+vt 0.269011 0.215922
+vt 0.256393 0.223033
+vt 0.26887 0.222737
+vt 0.243486 0.217235
+vt 0.244951 0.229727
+vt 0.256648 0.229427
+vt 0.268973 0.2293
+vt 0.232926 0.224635
+vt 0.234642 0.230172
+vt 0.231438 0.218332
+vt 0.231875 0.681974
+vt 0.195405 0.687835
+vt 0.268705 0.681041
+vt 0.268707 0.649795
+vt 0.231298 0.650222
+vt 0.159651 0.701684
+vt 0.124255 0.719528
+vt 0.152896 0.681504
+vt 0.120595 0.706905
+vt 0.162996 0.634435
+vt 0.141452 0.664926
+vt 0.142052 0.61864
+vt 0.127664 0.650149
+vt 0.116935 0.694282
+vt 0.211524 0.580428
+vt 0.203702 0.612749
+vt 0.187831 0.570739
+vt 0.178593 0.601672
+vt 0.15644 0.58713
+vt 0.268705 0.617234
+vt 0.234725 0.616894
+vt 0.239135 0.585332
+vt 0.199167 0.27764
+vt 0.203439 0.290476
+vt 0.191338 0.282164
+vt 0.195938 0.298294
+vt 0.204702 0.30253
+vt 0.194745 0.309006
+vt 0.204397 0.314025
+vt 0.187394 0.290439
+vt 0.185378 0.30188
+vt 0.158413 0.418134
+vt 0.159054 0.422514
+vt 0.151701 0.418126
+vt 0.153357 0.422511
+vt 0.148767 0.427196
+vt 0.147324 0.422254
+vt 0.154331 0.42702
+vt 0.164378 0.426932
+vt 0.164461 0.422716
+vt 0.169254 0.42687
+vt 0.169733 0.422548
+vt 0.164807 0.418553
+vt 0.159617 0.435925
+vt 0.159655 0.431437
+vt 0.163693 0.43525
+vt 0.1639 0.431123
+vt 0.16807 0.431122
+vt 0.15511 0.43158
+vt 0.150273 0.432153
+vt 0.155198 0.436148
+vt 0.137897 0.391939
+vt 0.139469 0.389504
+vt 0.17063 0.392906
+vt 0.177579 0.390684
+vt 0.108525 0.396264
+vt 0.10962 0.392167
+vt 0.136029 0.394644
+vt 0.189135 0.398093
+vt 0.201386 0.395105
+vt 0.205359 0.404085
+vt 0.221392 0.400547
+vt 0.215689 0.391863
+vt 0.159402 0.397834
+vt 0.180988 0.400577
+vt 0.156251 0.401361
+vt 0.175531 0.40322
+vt 0.1956 0.40625
+vt 0.133566 0.39789
+vt 0.106177 0.400376
+vt 0.13333 0.401583
+vt 0.105778 0.419582
+vt 0.112909 0.431692
+vt 0.0857539 0.42372
+vt 0.0978159 0.435407
+vt 0.0792543 0.412786
+vt 0.0493589 0.420043
+vt 0.0604704 0.429958
+vt 0.113824 0.445537
+vt 0.130697 0.452864
+vt 0.104492 0.449389
+vt 0.127195 0.455721
+vt 0.123421 0.442151
+vt 0.0620736 0.445754
+vt 0.0967788 0.453971
+vt 0.042503 0.451596
+vt 0.0898755 0.458919
+vt 0.12693 0.459685
+vt 0.0109524 0.427572
+vt 0.0292777 0.436932
+vt -0.00486957 0.444274
+vt 0.218498 0.42425
+vt 0.211205 0.424626
+vt 0.236149 0.408646
+vt 0.221871 0.411246
+vt 0.209393 0.413479
+vt 0.194343 0.436667
+vt 0.197771 0.438115
+vt 0.182271 0.446693
+vt 0.18375 0.449328
+vt 0.200848 0.439853
+vt 0.190212 0.435797
+vt 0.185733 0.435099
+vt 0.197971 0.425316
+vt 0.191608 0.425692
+vt 0.179819 0.445093
+vt 0.200514 0.414982
+vt 0.193566 0.416303
+vt 0.160223 0.460849
+vt 0.159192 0.459271
+vt 0.17171 0.458153
+vt 0.171322 0.455881
+vt 0.17064 0.453743
+vt 0.14455 0.458507
+vt 0.146064 0.460995
+vt 0.148735 0.463545
+vt 0.158128 0.455703
+vt 0.14535 0.456142
+vt 0.158194 0.452773
+vt 0.147456 0.453335
+vt 0.169371 0.451874
+vt 0.167924 0.449711
+vt 0.0851413 0.39891
+vt 0.0922294 0.393554
+vt 0.0991561 0.388722
+vt 0.100257 0.388524
+vt 0.0806903 0.395723
+vt 0.0520286 0.41091
+vt 0.0799332 0.404918
+vt 0.0223556 0.413036
+vt 0.0433475 0.407848
+vt -0.0149432 0.424994
+vt 0.019659 0.417742
+vt 0.0699681 0.398298
+vt 0.0980548 0.388921
+vt 0.0596545 0.401078
+vt 0.112325 0.545469
+vt 0.112227 0.562147
+vt 0.121292 0.533311
+vt 0.124355 0.551553
+vt 0.0994555 0.572536
+vt 0.0969232 0.587328
+vt 0.112171 0.578561
+vt 0.106779 0.524557
+vt 0.107589 0.538443
+vt 0.110423 0.513527
+vt 0.112506 0.528264
+vt 0.118228 0.515069
+vt 0.0989423 0.555133
+vt 0.104283 0.542593
+vt 0.104701 0.530629
+vt 0.0882754 0.571292
+vt 0.0894754 0.56478
+vt 0.0853955 0.582515
+vt 0.0816758 0.596094
+vt 0.0753139 0.485928
+vt 0.0650052 0.492652
+vt 0.0850253 0.491819
+vt 0.076075 0.499356
+vt 0.0930081 0.48361
+vt 0.100394 0.488016
+vt 0.0945577 0.497873
+vt 0.0651895 0.505551
+vt 0.073766 0.51176
+vt 0.0546966 0.499376
+vt 0.0900973 0.518593
+vt 0.0785101 0.518041
+vt 0.0994002 0.522709
+vt 0.100625 0.514782
+vt 0.107779 0.492421
+vt 0.103732 0.504253
+vt 0.23946 0.508287
+vt 0.230571 0.517622
+vt 0.228964 0.503271
+vt 0.217261 0.507781
+vt 0.188396 0.491269
+vt 0.206255 0.496224
+vt 0.200655 0.50824
+vt 0.268702 0.540749
+vt 0.243925 0.538213
+vt 0.268704 0.526377
+vt 0.248486 0.524031
+vt 0.253257 0.51249
+vt 0.215104 0.553361
+vt 0.241505 0.558876
+vt 0.268704 0.561105
+vt 0.190298 0.545736
+vt 0.166291 0.537055
+vt 0.187254 0.524533
+vt 0.161754 0.51849
+vt 0.170538 0.486314
+vt 0.179959 0.505005
+vt 0.157216 0.499924
+vt 0.24946 0.367438
+vt 0.258636 0.367175
+vt 0.255765 0.379731
+vt 0.262283 0.37965
+vt 0.255826 0.354536
+vt 0.268674 0.354878
+vt 0.268728 0.367451
+vt 0.234227 0.359499
+vt 0.242109 0.369275
+vt 0.228611 0.363909
+vt 0.23567 0.3719
+vt 0.249247 0.379811
+vt 0.215158 0.338968
+vt 0.227222 0.344776
+vt 0.214279 0.345817
+vt 0.224857 0.351242
+vt 0.221552 0.355917
+vt 0.240937 0.339238
+vt 0.227708 0.334728
+vt 0.215289 0.329543
+vt 0.268674 0.342201
+vt 0.254691 0.341571
+vt 0.244771 0.497304
+vt 0.256308 0.499747
+vt 0.268704 0.500841
+vt 0.268703 0.486337
+vt 0.258069 0.48605
+vt 0.225998 0.491141
+vt 0.227882 0.481103
+vt 0.234955 0.49435
+vt 0.237658 0.483058
+vt 0.249236 0.471672
+vt 0.239501 0.471434
+vt 0.250178 0.458265
+vt 0.240915 0.459645
+vt 0.229767 0.471064
+vt 0.25897 0.471644
+vt 0.268704 0.471564
+vt 0.259441 0.456884
+vt 0.203423 0.33552
+vt 0.203968 0.325186
+vt 0.193422 0.320894
+vt 0.18965 0.357864
+vt 0.190908 0.345555
+vt 0.202072 0.352895
+vt 0.202774 0.344537
+vt 0.18128 0.331343
+vt 0.179249 0.347026
+vt 0.17058 0.329645
+vt 0.167693 0.348723
+vt 0.177228 0.362833
+vt 0.183331 0.315904
+vt 0.173467 0.310566
+vt 0.17135 0.406685
+vt 0.182441 0.411169
+vt 0.16826 0.410583
+vt 0.176559 0.414437
+vt 0.155839 0.404764
+vt 0.157521 0.413533
+vt 0.166076 0.414524
+vt 0.172571 0.417098
+vt 0.135724 0.413665
+vt 0.144491 0.409813
+vt 0.142544 0.416326
+vt 0.148879 0.413937
+vt 0.13814 0.405626
+vt 0.124506 0.410074
+vt 0.174252 0.441387
+vt 0.171559 0.438762
+vt 0.181259 0.434168
+vt 0.176807 0.433082
+vt 0.186036 0.426037
+vt 0.174588 0.426678
+vt 0.172391 0.431921
+vt 0.168862 0.436383
+vt 0.180644 0.419795
+vt 0.175028 0.421443
+vt 0.186883 0.417995
+vt 0.149859 0.449516
+vt 0.140851 0.446115
+vt 0.152195 0.445145
+vt 0.145413 0.441926
+vt 0.158597 0.449574
+vt 0.159394 0.440679
+vt 0.154101 0.440679
+vt 0.14915 0.438598
+vt 0.165593 0.443116
+vt 0.164422 0.439274
+vt 0.166713 0.446693
+vt 0.121193 0.418911
+vt 0.132677 0.420037
+vt 0.1248 0.429441
+vt 0.142327 0.427533
+vt 0.140909 0.42129
+vt 0.13921 0.436123
+vt 0.145149 0.43376
+vt 0.131925 0.438966
+vt 0.718331 0.590305
+vt 0.746935 0.609408
+vt 0.714187 0.592962
+vt 0.74538 0.618837
+vt 0.776045 0.619539
+vt 0.775836 0.606046
+vt 0.748316 0.600181
+vt 0.698297 0.570265
+vt 0.691016 0.56935
+vt 0.678793 0.553513
+vt 0.668258 0.550016
+vt 0.682993 0.567088
+vt 0.724973 0.581367
+vt 0.704095 0.568485
+vt 0.726629 0.571953
+vt 0.707052 0.562689
+vt 0.687534 0.554647
+vt 0.775571 0.595734
+vt 0.74935 0.591359
+vt 0.749786 0.580241
+vt 0.631527 0.521919
+vt 0.643349 0.524282
+vt 0.641663 0.531566
+vt 0.653323 0.535777
+vt 0.66485 0.539385
+vt 0.649849 0.514184
+vt 0.635476 0.513181
+vt 0.647883 0.501604
+vt 0.631937 0.501954
+vt 0.62139 0.512271
+vt 0.684619 0.531929
+vt 0.668859 0.529
+vt 0.681508 0.516892
+vt 0.664797 0.515372
+vt 0.663468 0.501153
+vt 0.676113 0.541788
+vt 0.689104 0.545131
+vt 0.919101 0.529512
+vt 0.932277 0.513107
+vt 0.933894 0.53169
+vt 0.952549 0.514234
+vt 0.938271 0.4968
+vt 0.915407 0.496828
+vt 0.912378 0.512315
+vt 0.874997 0.559455
+vt 0.89162 0.543518
+vt 0.884334 0.563198
+vt 0.903546 0.546094
+vt 0.915238 0.549145
+vt 0.888008 0.52727
+vt 0.879227 0.541892
+vt 0.86975 0.528566
+vt 0.864702 0.54298
+vt 0.866281 0.556104
+vt 0.894945 0.496872
+vt 0.893224 0.512192
+vt 0.87312 0.513029
+vt 0.860186 0.581167
+vt 0.86658 0.586129
+vt 0.833052 0.598471
+vt 0.836578 0.605656
+vt 0.854086 0.575883
+vt 0.804878 0.613577
+vt 0.803304 0.602321
+vt 0.806576 0.625184
+vt 0.801979 0.591764
+vt 0.80095 0.579355
+vt 0.826879 0.582241
+vt 0.824828 0.570521
+vt 0.848572 0.569954
+vt 0.845415 0.561476
+vt 0.723056 0.499144
+vt 0.723365 0.520768
+vt 0.700461 0.499917
+vt 0.701169 0.518891
+vt 0.702985 0.536491
+vt 0.773996 0.544472
+vt 0.748527 0.544271
+vt 0.773366 0.521907
+vt 0.747679 0.521923
+vt 0.747248 0.498271
+vt 0.725988 0.558685
+vt 0.74937 0.564127
+vt 0.774586 0.566031
+vt 0.705814 0.551558
+vt 0.799676 0.520274
+vt 0.799659 0.497042
+vt 0.825661 0.517765
+vt 0.825711 0.49681
+vt 0.799857 0.542467
+vt 0.848896 0.532613
+vt 0.850371 0.515118
+vt 0.850862 0.496842
+vt 0.824745 0.55595
+vt 0.846383 0.548548
+vt 0.800262 0.562542
+vt 0.622835 0.741898
+vt 0.629822 0.737341
+vt 0.630937 0.748106
+vt 0.637348 0.741916
+vt 0.638168 0.732456
+vt 0.632743 0.72671
+vt 0.623594 0.732979
+vt 0.601364 0.736825
+vt 0.612351 0.737387
+vt 0.601727 0.746549
+vt 0.614012 0.74515
+vt 0.621199 0.753622
+vt 0.623692 0.721651
+vt 0.615298 0.729693
+vt 0.603694 0.727299
+vt 0.64705 0.720202
+vt 0.651064 0.728223
+vt 0.666861 0.725286
+vt 0.666356 0.736499
+vt 0.651523 0.736804
+vt 0.655097 0.70327
+vt 0.638367 0.712843
+vt 0.644545 0.693632
+vt 0.627328 0.703786
+vt 0.680292 0.699272
+vt 0.669656 0.692929
+vt 0.66043 0.68407
+vt 0.686784 0.713263
+vt 0.693307 0.702612
+vt 0.683906 0.72429
+vt 0.682763 0.73584
+vt 0.664776 0.7482
+vt 0.649538 0.745843
+vt 0.644256 0.754878
+vt 0.68235 0.760278
+vt 0.681446 0.748063
+vt 0.67624 0.777475
+vt 0.68787 0.771805
+vt 0.657375 0.798004
+vt 0.64028 0.785894
+vt 0.666427 0.786487
+vt 0.651148 0.774455
+vt 0.634824 0.763445
+vt 0.622928 0.773335
+vt 0.597165 0.801047
+vt 0.584037 0.816073
+vt 0.575655 0.789046
+vt 0.561282 0.802854
+vt 0.606349 0.829025
+vt 0.594793 0.84477
+vt 0.570888 0.831255
+vt 0.629233 0.799056
+vt 0.610251 0.786335
+vt 0.590115 0.774943
+vt 0.648026 0.811186
+vt 0.638424 0.825833
+vt 0.628613 0.841743
+vt 0.618698 0.858285
+vt 0.643447 0.660513
+vt 0.635997 0.646287
+vt 0.6049 0.637043
+vt 0.628919 0.631286
+vt 0.614692 0.652678
+vt 0.634531 0.681735
+vt 0.651642 0.673186
+vt 0.584936 0.681759
+vt 0.604883 0.675219
+vt 0.597105 0.697898
+vt 0.616244 0.690669
+vt 0.593009 0.659162
+vt 0.58088 0.642801
+vt 0.571868 0.665802
+vt 0.586302 0.735665
+vt 0.587541 0.725613
+vt 0.566532 0.713946
+vt 0.585456 0.714475
+vt 0.569532 0.724249
+vt 0.584349 0.745549
+vt 0.579071 0.755761
+vt 0.565094 0.743135
+vt 0.557994 0.751956
+vt 0.550572 0.731836
+vt 0.547179 0.740294
+vt 0.532926 0.729581
+vt 0.530226 0.737883
+vt 0.540511 0.747664
+vt 0.550413 0.713789
+vt 0.552359 0.722818
+vt 0.536163 0.720718
+vt 0.553225 0.778158
+vt 0.567858 0.766798
+vt 0.546459 0.760192
+vt 0.496408 0.78312
+vt 0.5145 0.775693
+vt 0.521696 0.80043
+vt 0.537687 0.789339
+vt 0.512875 0.758052
+vt 0.492969 0.762083
+vt 0.500324 0.747435
+vt 0.478432 0.748437
+vt 0.471121 0.76581
+vt 0.5289 0.753414
+vt 0.51732 0.74582
+vt 0.55219 0.672662
+vt 0.56563 0.687095
+vt 0.519008 0.667707
+vt 0.537934 0.658133
+vt 0.533662 0.679776
+vt 0.558759 0.703094
+vt 0.577435 0.701334
+vt 0.516448 0.703502
+vt 0.530573 0.696655
+vt 0.529774 0.709521
+vt 0.542602 0.705297
+vt 0.515971 0.687177
+vt 0.500082 0.677282
+vt 0.500703 0.696106
+vt 0.496419 0.736002
+vt 0.513862 0.73676
+vt 0.515969 0.727011
+vt 0.476228 0.734685
+vt 0.454664 0.733089
+vt 0.481514 0.721061
+vt 0.463494 0.717678
+vt 0.505915 0.712954
+vt 0.489445 0.707801
+vt 0.472325 0.702267
+vt 0.521085 0.717345
+vt 0.817772 0.32972
+vt 0.817268 0.334177
+vt 0.807098 0.329266
+vt 0.806518 0.333516
+vt 0.806012 0.337004
+vt 0.795796 0.333142
+vt 0.795184 0.336418
+vt 0.838686 0.341177
+vt 0.827785 0.339607
+vt 0.838685 0.335915
+vt 0.827985 0.335004
+vt 0.82832 0.330282
+vt 0.827498 0.348469
+vt 0.816308 0.345239
+vt 0.82763 0.344073
+vt 0.816582 0.341797
+vt 0.838688 0.346438
+vt 0.805553 0.339703
+vt 0.794562 0.338721
+vt 0.805117 0.342008
+vt 0.784417 0.336886
+vt 0.785167 0.333173
+vt 0.774651 0.33352
+vt 0.775476 0.328538
+vt 0.785864 0.328653
+vt 0.771718 0.345756
+vt 0.772735 0.341953
+vt 0.782822 0.342267
+vt 0.783629 0.339781
+vt 0.763085 0.339283
+vt 0.761859 0.344306
+vt 0.752475 0.340764
+vt 0.750993 0.34675
+vt 0.760614 0.349246
+vt 0.765302 0.328633
+vt 0.764271 0.334092
+vt 0.753958 0.334778
+vt 0.872696 0.335583
+vt 0.866407 0.338084
+vt 0.86882 0.330441
+vt 0.863658 0.333516
+vt 0.867867 0.343569
+vt 0.860064 0.340257
+vt 0.860616 0.34494
+vt 0.882136 0.339457
+vt 0.878875 0.332422
+vt 0.888775 0.336165
+vt 0.88462 0.328842
+vt 0.874176 0.326498
+vt 0.876903 0.354499
+vt 0.876407 0.348394
+vt 0.885368 0.353577
+vt 0.88421 0.346688
+vt 0.892061 0.343631
+vt 0.868751 0.349227
+vt 0.861168 0.349623
+vt 0.869036 0.354668
+vt 0.905832 0.318045
+vt 0.897126 0.314006
+vt 0.909273 0.307792
+vt 0.899606 0.305356
+vt 0.888216 0.310241
+vt 0.885668 0.315488
+vt 0.89379 0.320385
+vt 0.912856 0.338167
+vt 0.907418 0.331814
+vt 0.922353 0.328655
+vt 0.914539 0.322618
+vt 0.91968 0.310994
+vt 0.895263 0.331765
+vt 0.899861 0.338745
+vt 0.903538 0.34573
+vt 0.882771 0.319356
+vt 0.889606 0.32508
+vt 0.726761 0.338632
+vt 0.720269 0.333665
+vt 0.728661 0.332789
+vt 0.724048 0.328479
+vt 0.728685 0.324646
+vt 0.725283 0.320091
+vt 0.718943 0.323554
+vt 0.708141 0.33392
+vt 0.716815 0.339387
+vt 0.703619 0.340611
+vt 0.713734 0.345346
+vt 0.724861 0.344474
+vt 0.705092 0.321759
+vt 0.698212 0.327693
+vt 0.695328 0.313899
+vt 0.68654 0.320218
+vt 0.692159 0.335172
+vt 0.721158 0.3143
+vt 0.712853 0.317398
+vt 0.705649 0.309794
+vt 0.849098 0.333114
+vt 0.852849 0.328742
+vt 0.854305 0.334344
+vt 0.858906 0.330607
+vt 0.863427 0.326736
+vt 0.848559 0.322731
+vt 0.846187 0.327308
+vt 0.839608 0.321509
+vt 0.838379 0.326237
+vt 0.84389 0.331884
+vt 0.862887 0.316059
+vt 0.859996 0.320044
+vt 0.853481 0.313684
+vt 0.851078 0.31815
+vt 0.841341 0.316617
+vt 0.867789 0.322599
+vt 0.871643 0.31863
+vt 0.808609 0.31864
+vt 0.807782 0.324278
+vt 0.797841 0.317858
+vt 0.797082 0.323766
+vt 0.818436 0.324849
+vt 0.821851 0.307693
+vt 0.820468 0.313742
+vt 0.810856 0.305827
+vt 0.809616 0.312441
+vt 0.79869 0.311344
+vt 0.829771 0.320444
+vt 0.831091 0.315141
+vt 0.832672 0.309663
+vt 0.828882 0.32546
+vt 0.827295 0.287378
+vt 0.81609 0.283741
+vt 0.829537 0.279056
+vt 0.81819 0.275174
+vt 0.802762 0.289081
+vt 0.80451 0.280694
+vt 0.814142 0.291631
+vt 0.836158 0.298133
+vt 0.838143 0.291297
+vt 0.84681 0.301473
+vt 0.848714 0.295264
+vt 0.84041 0.283327
+vt 0.823454 0.301435
+vt 0.834348 0.304125
+vt 0.84504 0.30685
+vt 0.801169 0.296892
+vt 0.812381 0.298942
+vt 0.876986 0.311536
+vt 0.878898 0.306488
+vt 0.870963 0.295312
+vt 0.88062 0.29932
+vt 0.869186 0.302754
+vt 0.865464 0.312169
+vt 0.874638 0.315263
+vt 0.855505 0.309448
+vt 0.857319 0.304788
+vt 0.859093 0.299048
+vt 0.861066 0.291675
+vt 0.732418 0.318062
+vt 0.734712 0.322982
+vt 0.743001 0.327869
+vt 0.736781 0.327408
+vt 0.741594 0.322579
+vt 0.738426 0.310793
+vt 0.729673 0.312154
+vt 0.736628 0.303749
+vt 0.726615 0.305165
+vt 0.757036 0.316401
+vt 0.748245 0.31651
+vt 0.75687 0.309754
+vt 0.747447 0.310045
+vt 0.746533 0.302937
+vt 0.748795 0.322531
+vt 0.749221 0.328331
+vt 0.756792 0.322664
+vt 0.731654 0.277237
+vt 0.718591 0.277987
+vt 0.730298 0.26671
+vt 0.717089 0.267402
+vt 0.704046 0.278768
+vt 0.706282 0.288897
+vt 0.720525 0.287868
+vt 0.756105 0.285792
+vt 0.744751 0.286232
+vt 0.755817 0.276484
+vt 0.743878 0.276719
+vt 0.743048 0.266393
+vt 0.734816 0.295809
+vt 0.745635 0.294985
+vt 0.756313 0.294527
+vt 0.710968 0.298566
+vt 0.723383 0.296998
+vt 0.792536 0.278545
+vt 0.791154 0.28731
+vt 0.780323 0.277204
+vt 0.793935 0.269245
+vt 0.781204 0.267531
+vt 0.78873 0.303265
+vt 0.777827 0.302657
+vt 0.789858 0.295534
+vt 0.778545 0.294752
+vt 0.767676 0.285819
+vt 0.767323 0.294432
+vt 0.767022 0.302415
+vt 0.768024 0.276581
+vt 0.768346 0.266629
+vt 0.776124 0.323009
+vt 0.786496 0.323337
+vt 0.787134 0.317284
+vt 0.766566 0.316536
+vt 0.766062 0.322804
+vt 0.777196 0.310038
+vt 0.766828 0.309761
+vt 0.78785 0.310554
+vt 0.887192 0.245173
+vt 0.894254 0.251226
+vt 0.882204 0.253513
+vt 0.889316 0.257984
+vt 0.899158 0.259059
+vt 0.906216 0.25556
+vt 0.900579 0.245749
+vt 0.873068 0.226854
+vt 0.883747 0.231913
+vt 0.868263 0.235954
+vt 0.878436 0.240212
+vt 0.873612 0.249405
+vt 0.899039 0.231269
+vt 0.889175 0.22393
+vt 0.904851 0.223823
+vt 0.894483 0.215603
+vt 0.878092 0.21784
+vt 0.907771 0.240362
+vt 0.914548 0.251706
+vt 0.914646 0.233613
+vt 0.83088 0.224634
+vt 0.816277 0.220886
+vt 0.83396 0.214024
+vt 0.818586 0.209908
+vt 0.827947 0.235154
+vt 0.840898 0.238644
+vt 0.844486 0.228275
+vt 0.855747 0.197071
+vt 0.852027 0.207701
+vt 0.840323 0.192176
+vt 0.837098 0.203273
+vt 0.821041 0.19888
+vt 0.861296 0.22221
+vt 0.8657 0.212494
+vt 0.870075 0.20254
+vt 0.852868 0.242144
+vt 0.856947 0.232012
+vt 0.689623 0.215775
+vt 0.70332 0.207275
+vt 0.697939 0.222493
+vt 0.709017 0.215819
+vt 0.725203 0.22199
+vt 0.713143 0.224363
+vt 0.722188 0.211648
+vt 0.691318 0.186238
+vt 0.709695 0.179461
+vt 0.697458 0.197483
+vt 0.714155 0.190926
+vt 0.68169 0.206684
+vt 0.751207 0.197567
+vt 0.734425 0.198302
+vt 0.74968 0.185753
+vt 0.731649 0.18705
+vt 0.728678 0.175143
+vt 0.738996 0.220423
+vt 0.736932 0.209325
+vt 0.752641 0.20886
+vt 0.784858 0.21308
+vt 0.800826 0.216826
+vt 0.784162 0.2244
+vt 0.799325 0.227809
+vt 0.802394 0.205632
+vt 0.768345 0.19871
+vt 0.768707 0.210268
+vt 0.768953 0.22173
+vt 0.786206 0.189958
+vt 0.767985 0.186934
+vt 0.786915 0.177877
+vt 0.767639 0.174701
+vt 0.803984 0.194191
+vt 0.805673 0.182317
+vt 0.809955 0.252364
+vt 0.822756 0.255918
+vt 0.80815 0.262293
+vt 0.820407 0.265831
+vt 0.832047 0.269793
+vt 0.837735 0.249207
+vt 0.825254 0.245637
+vt 0.811949 0.242115
+vt 0.856542 0.267463
+vt 0.846044 0.263615
+vt 0.859897 0.256492
+vt 0.849272 0.252848
+vt 0.843066 0.273938
+vt 0.853522 0.278101
+vt 0.742291 0.255201
+vt 0.729185 0.255607
+vt 0.755289 0.255203
+vt 0.754976 0.243665
+vt 0.741468 0.243564
+vt 0.715531 0.256156
+vt 0.700801 0.256437
+vt 0.714427 0.244869
+vt 0.700028 0.24508
+vt 0.72694 0.232765
+vt 0.714289 0.234159
+vt 0.701971 0.234531
+vt 0.754532 0.231935
+vt 0.740439 0.231904
+vt 0.911041 0.2953
+vt 0.921301 0.296781
+vt 0.93161 0.297423
+vt 0.929903 0.281016
+vt 0.920542 0.281282
+vt 0.901221 0.293843
+vt 0.891715 0.291968
+vt 0.901867 0.281252
+vt 0.892897 0.280142
+vt 0.909625 0.267746
+vt 0.901437 0.269362
+vt 0.893739 0.269552
+vt 0.927824 0.264632
+vt 0.918548 0.265802
+vt 0.783513 0.23552
+vt 0.768967 0.233217
+vt 0.768829 0.24463
+vt 0.796609 0.249173
+vt 0.797938 0.238614
+vt 0.782032 0.257262
+vt 0.795284 0.259419
+vt 0.768618 0.255873
+vt 0.866329 0.271283
+vt 0.863474 0.282119
+vt 0.873054 0.28587
+vt 0.878455 0.263629
+vt 0.869642 0.26007
+vt 0.884271 0.277953
+vt 0.886283 0.267215
+vt 0.882397 0.289232
+vt 0.941581 0.296368
+vt 0.939264 0.280761
+vt 0.944658 0.31122
+vt 0.954503 0.308374
+vt 0.951091 0.294591
+vt 0.945764 0.266671
+vt 0.937066 0.26534
+vt 0.944666 0.253926
+vt 0.936188 0.251002
+vt 0.964594 0.279716
+vt 0.956479 0.280128
+vt 0.960523 0.267756
+vt 0.953407 0.267371
+vt 0.951694 0.255808
+vt 0.963661 0.306017
+vt 0.960023 0.293069
+vt 0.968665 0.291676
+vt 0.386764 0.173365
+vt 0.386251 0.159166
+vt 0.403134 0.175347
+vt 0.40173 0.162164
+vt 0.370756 0.187276
+vt 0.370795 0.173446
+vt 0.38669 0.187253
+vt 0.422389 0.190328
+vt 0.419955 0.177717
+vt 0.441316 0.192402
+vt 0.437002 0.180281
+vt 0.417209 0.165162
+vt 0.401826 0.214817
+vt 0.403793 0.201683
+vt 0.424104 0.215186
+vt 0.424198 0.203052
+vt 0.44563 0.204522
+vt 0.385442 0.200518
+vt 0.36925 0.200049
+vt 0.382596 0.213492
+vt 0.356262 0.189263
+vt 0.355175 0.177265
+vt 0.348714 0.166736
+vt 0.334762 0.173858
+vt 0.341079 0.182428
+vt 0.340863 0.211835
+vt 0.342946 0.202073
+vt 0.352708 0.211778
+vt 0.355322 0.200767
+vt 0.331414 0.194664
+vt 0.331412 0.203371
+vt 0.319782 0.196009
+vt 0.319946 0.204047
+vt 0.330031 0.212063
+vt 0.324674 0.179566
+vt 0.32968 0.18654
+vt 0.319256 0.188731
+vt 0.34302 0.232291
+vt 0.34877 0.222299
+vt 0.354926 0.235741
+vt 0.361782 0.224028
+vt 0.337711 0.221083
+vt 0.32682 0.236795
+vt 0.334971 0.241716
+vt 0.31849 0.242434
+vt 0.324587 0.249202
+vt 0.345001 0.247217
+vt 0.32414 0.227342
+vt 0.319507 0.233375
+vt 0.314915 0.225618
+vt 0.311922 0.231227
+vt 0.313143 0.237776
+vt 0.327627 0.220146
+vt 0.317481 0.219239
+vt 0.63785 0.953205
+vt 0.630881 0.947663
+vt 0.647016 0.946972
+vt 0.6392 0.9414
+vt 0.622289 0.952938
+vt 0.615431 0.946184
+vt 0.624837 0.941085
+vt 0.636439 0.963394
+vt 0.644947 0.958154
+vt 0.644288 0.966733
+vt 0.653209 0.961707
+vt 0.654465 0.952626
+vt 0.612697 0.965958
+vt 0.620878 0.963127
+vt 0.620821 0.970267
+vt 0.628164 0.967348
+vt 0.635184 0.970582
+vt 0.606875 0.951267
+vt 0.614197 0.957483
+vt 0.605619 0.960347
+vt 0.367121 0.917394
+vt 0.374142 0.923099
+vt 0.360165 0.922438
+vt 0.367724 0.92812
+vt 0.381201 0.91911
+vt 0.388569 0.925813
+vt 0.380653 0.929651
+vt 0.367048 0.90811
+vt 0.360477 0.912329
+vt 0.360243 0.904335
+vt 0.353385 0.908573
+vt 0.353375 0.916883
+vt 0.388646 0.907697
+vt 0.381128 0.909826
+vt 0.380927 0.902848
+vt 0.374115 0.905116
+vt 0.367999 0.901318
+vt 0.396044 0.921905
+vt 0.388216 0.915684
+vt 0.396055 0.913595
+vt 0.328818 0.915038
+vt 0.333156 0.924854
+vt 0.309844 0.916921
+vt 0.317341 0.929308
+vt 0.32345 0.950038
+vt 0.299401 0.934005
+vt 0.334074 0.942122
+vt 0.352513 0.92816
+vt 0.344718 0.920885
+vt 0.341477 0.913527
+vt 0.373801 0.947808
+vt 0.357974 0.943112
+vt 0.374058 0.938334
+vt 0.362633 0.93464
+vt 0.352852 0.95399
+vt 0.347499 0.966072
+vt 0.373169 0.960769
+vt 0.418321 0.915362
+vt 0.435775 0.918439
+vt 0.422947 0.925132
+vt 0.443681 0.930756
+vt 0.455589 0.922184
+vt 0.430406 0.905691
+vt 0.418226 0.905767
+vt 0.397194 0.906411
+vt 0.405585 0.913617
+vt 0.409188 0.921144
+vt 0.376316 0.893935
+vt 0.392361 0.898152
+vt 0.375205 0.897753
+vt 0.386735 0.900808
+vt 0.405223 0.889198
+vt 0.398631 0.894039
+vt 0.377906 0.887331
+vt 0.336894 0.897334
+vt 0.318775 0.906333
+vt 0.327696 0.895339
+vt 0.301524 0.906655
+vt 0.333962 0.906288
+vt 0.360174 0.89468
+vt 0.356966 0.889714
+vt 0.353868 0.884022
+vt 0.353137 0.901438
+vt 0.363602 0.898194
+vt 0.345023 0.906797
+vt 0.434342 0.941415
+vt 0.453466 0.949534
+vt 0.415406 0.950555
+vt 0.42616 0.960391
+vt 0.417515 0.933928
+vt 0.390161 0.946583
+vt 0.394517 0.958315
+vt 0.398853 0.971248
+vt 0.39657 0.933133
+vt 0.385766 0.937253
+vt 0.40528 0.927706
+vt 0.57922 0.909293
+vt 0.58971 0.919914
+vt 0.547117 0.922496
+vt 0.567321 0.931013
+vt 0.577999 0.948709
+vt 0.555818 0.943001
+vt 0.585011 0.938851
+vt 0.617202 0.922908
+vt 0.614188 0.910157
+vt 0.635979 0.920365
+vt 0.638841 0.906233
+vt 0.611323 0.89609
+vt 0.607883 0.938585
+vt 0.620514 0.933025
+vt 0.633926 0.930757
+vt 0.597671 0.945329
+vt 0.592645 0.95281
+vt 0.700973 0.938045
+vt 0.70873 0.951078
+vt 0.680671 0.943135
+vt 0.684796 0.953548
+vt 0.697126 0.916127
+vt 0.723976 0.932747
+vt 0.682579 0.924761
+vt 0.65736 0.940282
+vt 0.665771 0.947808
+vt 0.668833 0.955667
+vt 0.653673 0.924504
+vt 0.646476 0.933887
+vt 0.670275 0.899507
+vt 0.661753 0.912628
+vt 0.571389 0.966985
+vt 0.583228 0.967478
+vt 0.599576 0.982628
+vt 0.604894 0.978385
+vt 0.628092 0.978017
+vt 0.628226 0.984167
+vt 0.610106 0.974767
+vt 0.582286 0.958749
+vt 0.563995 0.95542
+vt 0.543202 0.951342
+vt 0.603906 0.967493
+vt 0.595571 0.96058
+vt 0.615108 0.972401
+vt 0.627917 0.974569
+vt 0.676098 0.972325
+vt 0.697647 0.962452
+vt 0.689295 0.973819
+vt 0.72006 0.961593
+vt 0.677946 0.963033
+vt 0.646577 0.976363
+vt 0.652459 0.980856
+vt 0.658529 0.986044
+vt 0.653384 0.96919
+vt 0.64107 0.973262
+vt 0.663671 0.963059
+vt 0.36984 0.23949
+vt 0.357948 0.252377
+vt 0.346028 0.26526
+vt 0.355607 0.271672
+vt 0.369662 0.258935
+vt 0.397205 0.227812
+vt 0.377725 0.226506
+vt 0.417354 0.23588
+vt 0.420828 0.226027
+vt 0.4431 0.22875
+vt 0.446522 0.222697
+vt 0.398497 0.266882
+vt 0.394993 0.256063
+vt 0.417376 0.253869
+vt 0.416668 0.245046
+vt 0.439677 0.234803
+vt 0.375987 0.26863
+vt 0.360587 0.280948
+vt 0.379618 0.279895
+vt 0.470993 0.595329
+vt 0.466212 0.581429
+vt 0.482679 0.589544
+vt 0.476881 0.579741
+vt 0.489311 0.600458
+vt 0.499356 0.579702
+vt 0.49224 0.574945
+vt 0.479771 0.564752
+vt 0.472749 0.572159
+vt 0.474256 0.558067
+vt 0.468458 0.56471
+vt 0.46434 0.573191
+vt 0.492648 0.557082
+vt 0.486003 0.554296
+vt 0.497062 0.544746
+vt 0.490231 0.543792
+vt 0.480758 0.549772
+vt 0.509401 0.558946
+vt 0.500678 0.558385
+vt 0.505852 0.543851
+vt 0.497996 0.536049
+vt 0.505621 0.535332
+vt 0.501948 0.53012
+vt 0.507915 0.530515
+vt 0.51368 0.534353
+vt 0.482316 0.532549
+vt 0.489579 0.530568
+vt 0.484501 0.537962
+vt 0.491239 0.536243
+vt 0.491373 0.524968
+vt 0.485801 0.52569
+vt 0.484285 0.519868
+vt 0.479547 0.521219
+vt 0.479193 0.527051
+vt 0.496798 0.525508
+vt 0.50215 0.526678
+vt 0.490005 0.520085
+vt 0.473801 0.535793
+vt 0.470659 0.528429
+vt 0.469034 0.521857
+vt 0.460157 0.521751
+vt 0.461514 0.530505
+vt 0.469248 0.549809
+vt 0.476929 0.543255
+vt 0.456268 0.544777
+vt 0.462181 0.555305
+vt 0.449568 0.548141
+vt 0.456741 0.560093
+vt 0.451562 0.524114
+vt 0.453071 0.533958
+vt 0.446261 0.536501
+vt 0.933404 0.221309
+vt 0.924739 0.213284
+vt 0.938258 0.211078
+vt 0.929637 0.202912
+vt 0.924585 0.183534
+vt 0.934625 0.192739
+vt 0.919804 0.19418
+vt 0.910176 0.214711
+vt 0.920022 0.224053
+vt 0.929173 0.232598
+vt 0.892208 0.188152
+vt 0.904301 0.196133
+vt 0.88756 0.198715
+vt 0.899431 0.206272
+vt 0.909372 0.185384
+vt 0.914544 0.174329
+vt 0.89751 0.177035
+vt 0.829357 0.162643
+vt 0.809729 0.156901
+vt 0.833272 0.14992
+vt 0.812406 0.143745
+vt 0.789947 0.13803
+vt 0.815325 0.130471
+vt 0.791208 0.124039
+vt 0.787797 0.165074
+vt 0.807536 0.169823
+vt 0.826253 0.175235
+vt 0.767318 0.161769
+vt 0.746459 0.160576
+vt 0.767097 0.147949
+vt 0.744921 0.146466
+vt 0.767051 0.133048
+vt 0.767091 0.117607
+vt 0.743596 0.130619
+vt 0.722473 0.147996
+vt 0.719464 0.132562
+vt 0.695065 0.137329
+vt 0.716504 0.116494
+vt 0.690034 0.121812
+vt 0.725582 0.162157
+vt 0.70494 0.166437
+vt 0.656395 0.167991
+vt 0.677929 0.158979
+vt 0.665245 0.182639
+vt 0.684786 0.173336
+vt 0.670811 0.143368
+vt 0.663563 0.127131
+vt 0.647183 0.151721
+vt 0.844502 0.359871
+vt 0.847714 0.360296
+vt 0.847408 0.363957
+vt 0.849348 0.364269
+vt 0.851648 0.36844
+vt 0.850981 0.368241
+vt 0.851475 0.364632
+vt 0.850753 0.356913
+vt 0.846081 0.356324
+vt 0.850205 0.353003
+vt 0.844447 0.352351
+vt 0.841595 0.355786
+vt 0.855799 0.357604
+vt 0.861641 0.358449
+vt 0.855262 0.361451
+vt 0.860512 0.36239
+vt 0.855963 0.353655
+vt 0.853976 0.365096
+vt 0.852315 0.368639
+vt 0.858224 0.366005
+vt 0.720036 0.362311
+vt 0.71959 0.357661
+vt 0.728015 0.361123
+vt 0.727779 0.35756
+vt 0.73391 0.360061
+vt 0.73344 0.362971
+vt 0.728708 0.364784
+vt 0.736235 0.351526
+vt 0.735307 0.354339
+vt 0.729598 0.350921
+vt 0.728459 0.354192
+vt 0.720575 0.353357
+vt 0.740522 0.356645
+vt 0.741697 0.354388
+vt 0.746207 0.356041
+vt 0.747858 0.354388
+vt 0.742872 0.352131
+vt 0.739347 0.358902
+vt 0.738172 0.361159
+vt 0.744555 0.357694
+vt 0.878699 0.181029
+vt 0.883387 0.169647
+vt 0.867761 0.162851
+vt 0.888238 0.158069
+vt 0.871973 0.151014
+vt 0.874335 0.192023
+vt 0.859716 0.186019
+vt 0.843668 0.180528
+vt 0.847301 0.168505
+vt 0.85139 0.156282
+vt 0.855707 0.143958
+vt 0.959959 0.243101
+vt 0.954077 0.238396
+vt 0.961713 0.236753
+vt 0.956323 0.230743
+vt 0.954067 0.216174
+vt 0.958767 0.22329
+vt 0.950811 0.224595
+vt 0.952226 0.24645
+vt 0.945638 0.243001
+vt 0.958206 0.249448
+vt 0.941091 0.227849
+vt 0.937833 0.238648
+vt 0.949366 0.209059
+vt 0.945053 0.218173
+vt 0.336934 0.71568
+vt 0.33038 0.741466
+vt 0.302716 0.711537
+vt 0.299424 0.738297
+vt 0.295776 0.762787
+vt 0.393479 0.752977
+vt 0.377463 0.773803
+vt 0.361811 0.746705
+vt 0.350155 0.769363
+vt 0.371566 0.72393
+vt 0.338262 0.791261
+vt 0.32625 0.812779
+vt 0.315076 0.788147
+vt 0.307068 0.810102
+vt 0.361448 0.794629
+vt 0.29189 0.78554
+vt 0.287886 0.807426
+vt 0.299023 0.2659
+vt 0.299047 0.279209
+vt 0.283956 0.266822
+vt 0.283986 0.280354
+vt 0.283688 0.295775
+vt 0.327087 0.27663
+vt 0.323816 0.291325
+vt 0.313749 0.277461
+vt 0.311916 0.292761
+vt 0.31433 0.263721
+vt 0.296663 0.325331
+vt 0.297456 0.310292
+vt 0.309865 0.321929
+vt 0.31066 0.307783
+vt 0.322425 0.305028
+vt 0.283294 0.311929
+vt 0.282898 0.327607
+vt 0.281812 0.241506
+vt 0.281311 0.235544
+vt 0.29388 0.241063
+vt 0.292617 0.235389
+vt 0.282538 0.248141
+vt 0.307969 0.245729
+vt 0.304463 0.240079
+vt 0.302421 0.235265
+vt 0.297743 0.255375
+vt 0.311831 0.253377
+vt 0.283363 0.256332
+vt 0.665914 0.351378
+vt 0.682493 0.354562
+vt 0.661557 0.365446
+vt 0.679123 0.366239
+vt 0.694271 0.365301
+vt 0.676262 0.378559
+vt 0.692167 0.374571
+vt 0.699871 0.348099
+vt 0.686879 0.344167
+vt 0.6718 0.339046
+vt 0.709168 0.357329
+vt 0.711074 0.351244
+vt 0.708348 0.363848
+vt 0.708071 0.370584
+vt 0.867597 0.363815
+vt 0.865591 0.367703
+vt 0.874845 0.369526
+vt 0.873284 0.373919
+vt 0.863133 0.371378
+vt 0.876827 0.360099
+vt 0.868698 0.3595
+vt 0.885694 0.365176
+vt 0.885881 0.359586
+vt 0.896427 0.363349
+vt 0.895778 0.357262
+vt 0.884753 0.370807
+vt 0.883435 0.376459
+vt 0.896131 0.370165
+vt 0.908527 0.359153
+vt 0.909799 0.366215
+vt 0.924545 0.360491
+vt 0.927731 0.367892
+vt 0.910658 0.373446
+vt 0.917439 0.345549
+vt 0.906434 0.352429
+vt 0.933647 0.34626
+vt 0.928384 0.337086
+vt 0.944982 0.340124
+vt 0.93903 0.329282
+vt 0.939163 0.354526
+vt 0.944804 0.362339
+vt 0.951802 0.349505
+vt 0.954599 0.336095
+vt 0.960613 0.346611
+vt 0.967306 0.344981
+vt 0.971931 0.35627
+vt 0.966904 0.356527
+vt 0.949139 0.32438
+vt 0.958528 0.321259
+vt 0.967014 0.318797
+vt 0.975048 0.316665
+vt 0.970264 0.331384
+vt 0.97736 0.329695
+vt 0.976958 0.356012
+vt 0.973591 0.343753
+vt 0.979672 0.342725
+vt 0.320524 0.146521
+vt 0.313373 0.154734
+vt 0.2954 0.141698
+vt 0.292003 0.150779
+vt 0.28884 0.159589
+vt 0.322801 0.167455
+vt 0.333528 0.159639
+vt 0.345648 0.151344
+vt 0.301572 0.170503
+vt 0.31486 0.174313
+vt 0.297761 0.178302
+vt 0.309891 0.180957
+vt 0.286147 0.16786
+vt 0.284108 0.176013
+vt 0.296019 0.186221
+vt 0.282908 0.18447
+vt 0.282346 0.192959
+vt 0.307895 0.195694
+vt 0.308085 0.188134
+vt 0.295125 0.202337
+vt 0.307775 0.203492
+vt 0.294766 0.209933
+vt 0.307347 0.211107
+vt 0.28222 0.201212
+vt 0.28215 0.209032
+vt 0.294211 0.217025
+vt 0.281754 0.216221
+vt 0.281328 0.222946
+vt 0.304681 0.22447
+vt 0.306231 0.218115
+vt 0.292675 0.229646
+vt 0.302952 0.230124
+vt 0.281165 0.229372
+vt 0.305534 0.681974
+vt 0.306111 0.650222
+vt 0.342004 0.687835
+vt 0.333707 0.612749
+vt 0.302684 0.616894
+vt 0.325885 0.580428
+vt 0.298274 0.585332
+vt 0.395357 0.618639
+vt 0.374414 0.634434
+vt 0.380969 0.58713
+vt 0.358816 0.601672
+vt 0.349578 0.570739
+vt 0.384513 0.681504
+vt 0.395957 0.664926
+vt 0.416814 0.706905
+vt 0.420474 0.694282
+vt 0.409745 0.650149
+vt 0.377758 0.701684
+vt 0.413154 0.719528
+vt 0.338053 0.278233
+vt 0.345863 0.28289
+vt 0.333606 0.290848
+vt 0.34226 0.309133
+vt 0.341074 0.298701
+vt 0.351677 0.302345
+vt 0.349731 0.291219
+vt 0.332267 0.302652
+vt 0.332584 0.313884
+vt 0.387723 0.422423
+vt 0.382162 0.422373
+vt 0.388869 0.417427
+vt 0.382859 0.417822
+vt 0.377549 0.413541
+vt 0.384244 0.41338
+vt 0.377169 0.417955
+vt 0.377116 0.426931
+vt 0.381664 0.426972
+vt 0.377432 0.431442
+vt 0.381856 0.431562
+vt 0.386531 0.427438
+vt 0.367249 0.422554
+vt 0.372123 0.422513
+vt 0.368692 0.426806
+vt 0.372857 0.426712
+vt 0.373317 0.430853
+vt 0.371782 0.41828
+vt 0.37118 0.414106
+vt 0.366505 0.41823
+vt 0.426004 0.390403
+vt 0.398456 0.389437
+vt 0.424679 0.386349
+vt 0.396424 0.386762
+vt 0.363793 0.388475
+vt 0.394705 0.384351
+vt 0.356716 0.386401
+vt 0.378598 0.396645
+vt 0.375308 0.393171
+vt 0.401577 0.396345
+vt 0.401115 0.392641
+vt 0.428621 0.394516
+vt 0.353918 0.396416
+vt 0.339672 0.402446
+vt 0.345629 0.394106
+vt 0.329812 0.40053
+vt 0.35953 0.398947
+vt 0.33321 0.391382
+vt 0.318726 0.38845
+vt 0.313563 0.397303
+vt 0.476084 0.423195
+vt 0.45045 0.417502
+vt 0.486588 0.412986
+vt 0.456271 0.406372
+vt 0.430198 0.413801
+vt 0.423791 0.426121
+vt 0.43912 0.429517
+vt 0.507665 0.429493
+vt 0.542219 0.436093
+vt 0.475452 0.439099
+vt 0.495356 0.444524
+vt 0.525393 0.419675
+vt 0.43331 0.443713
+vt 0.441295 0.448141
+vt 0.411039 0.45059
+vt 0.411532 0.454564
+vt 0.448493 0.452954
+vt 0.423753 0.440055
+vt 0.413961 0.436871
+vt 0.407352 0.447798
+vt 0.31374 0.40806
+vt 0.32634 0.410021
+vt 0.317907 0.421046
+vt 0.299322 0.405773
+vt 0.325214 0.421259
+vt 0.338474 0.421652
+vt 0.3353 0.411329
+vt 0.344846 0.421904
+vt 0.34232 0.412499
+vt 0.355509 0.44277
+vt 0.342793 0.432971
+vt 0.357815 0.441106
+vt 0.346865 0.432004
+vt 0.351296 0.431202
+vt 0.336492 0.43632
+vt 0.339458 0.434504
+vt 0.354149 0.44545
+vt 0.366963 0.45175
+vt 0.367513 0.449587
+vt 0.378353 0.456489
+vt 0.366715 0.454041
+vt 0.379286 0.45488
+vt 0.368666 0.447681
+vt 0.369978 0.445476
+vt 0.38013 0.451272
+vt 0.379885 0.448331
+vt 0.392918 0.451424
+vt 0.393862 0.45378
+vt 0.390643 0.448651
+vt 0.392502 0.456314
+vt 0.389991 0.458936
+vt 0.434919 0.382651
+vt 0.453791 0.389264
+vt 0.433807 0.382477
+vt 0.442133 0.387347
+vt 0.449541 0.392567
+vt 0.49183 0.400597
+vt 0.464657 0.391608
+vt 0.513113 0.405332
+vt 0.475128 0.394166
+vt 0.436031 0.382826
+vt 0.516095 0.409998
+vt 0.483347 0.40387
+vt 0.551098 0.416497
+vt 0.455111 0.398484
+vt 0.472738 0.503366
+vt 0.465757 0.493643
+vt 0.482409 0.502085
+vt 0.475421 0.491708
+vt 0.456009 0.49597
+vt 0.450804 0.485866
+vt 0.459618 0.483598
+vt 0.474669 0.513513
+vt 0.481402 0.512442
+vt 0.489397 0.512463
+vt 0.46027 0.514483
+vt 0.470459 0.516768
+vt 0.449301 0.508912
+vt 0.450715 0.51651
+vt 0.441643 0.516565
+vt 0.44392 0.523436
+vt 0.44199 0.488134
+vt 0.446094 0.499081
+vt 0.437618 0.505763
+vt 0.409641 0.53125
+vt 0.417486 0.517634
+vt 0.424725 0.549489
+vt 0.428372 0.534587
+vt 0.421164 0.563953
+vt 0.440531 0.58304
+vt 0.440131 0.567599
+vt 0.438076 0.530458
+vt 0.43219 0.518805
+vt 0.42533 0.504018
+vt 0.445266 0.55059
+vt 0.442015 0.535846
+vt 0.453943 0.564519
+vt 0.453765 0.571874
+vt 0.456182 0.585446
+vt 0.459898 0.602127
+vt 0.349013 0.491269
+vt 0.336754 0.508239
+vt 0.331154 0.496224
+vt 0.320148 0.507781
+vt 0.297949 0.508264
+vt 0.308444 0.503271
+vt 0.306838 0.517622
+vt 0.350155 0.524533
+vt 0.35745 0.505005
+vt 0.375656 0.518489
+vt 0.380193 0.499924
+vt 0.366871 0.486313
+vt 0.322305 0.553361
+vt 0.347111 0.545735
+vt 0.371118 0.537055
+vt 0.293484 0.538213
+vt 0.295904 0.558876
+vt 0.288923 0.524031
+vt 0.284152 0.512489
+vt 0.281519 0.354585
+vt 0.27883 0.367232
+vt 0.275331 0.37971
+vt 0.281861 0.37985
+vt 0.288051 0.367559
+vt 0.296239 0.339242
+vt 0.282591 0.341601
+vt 0.310112 0.344766
+vt 0.309416 0.334597
+vt 0.322149 0.338741
+vt 0.321809 0.329248
+vt 0.30337 0.359703
+vt 0.312751 0.351414
+vt 0.309216 0.364244
+vt 0.316363 0.356301
+vt 0.323307 0.345678
+vt 0.295502 0.369475
+vt 0.288392 0.37999
+vt 0.302069 0.372187
+vt 0.281101 0.499747
+vt 0.27934 0.48605
+vt 0.292638 0.497304
+vt 0.28723 0.458265
+vt 0.288173 0.471672
+vt 0.277967 0.456884
+vt 0.278439 0.471644
+vt 0.299751 0.483057
+vt 0.297907 0.471434
+vt 0.309526 0.481102
+vt 0.307642 0.471064
+vt 0.296494 0.459645
+vt 0.302454 0.49435
+vt 0.311411 0.491141
+vt 0.343632 0.320416
+vt 0.333105 0.324778
+vt 0.333814 0.334838
+vt 0.363585 0.310039
+vt 0.366431 0.327171
+vt 0.353718 0.315393
+vt 0.355806 0.329495
+vt 0.346397 0.343469
+vt 0.357889 0.343783
+vt 0.347817 0.354895
+vt 0.35997 0.358164
+vt 0.369277 0.344302
+vt 0.334695 0.343565
+vt 0.335664 0.351626
+vt 0.379291 0.400052
+vt 0.363918 0.402333
+vt 0.367243 0.406178
+vt 0.353116 0.407088
+vt 0.35919 0.410239
+vt 0.41091 0.404675
+vt 0.399926 0.408536
+vt 0.397021 0.400516
+vt 0.390935 0.404866
+vt 0.37815 0.408898
+vt 0.386805 0.409108
+vt 0.393279 0.411364
+vt 0.363338 0.412821
+vt 0.369666 0.410088
+vt 0.350438 0.422105
+vt 0.355707 0.430165
+vt 0.363148 0.437257
+vt 0.365676 0.434559
+vt 0.360087 0.428972
+vt 0.349098 0.414046
+vt 0.35544 0.415713
+vt 0.361911 0.422489
+vt 0.36115 0.417241
+vt 0.368224 0.432108
+vt 0.364426 0.427706
+vt 0.379285 0.445126
+vt 0.388009 0.44487
+vt 0.385408 0.440532
+vt 0.396797 0.441249
+vt 0.391983 0.437144
+vt 0.371003 0.442416
+vt 0.371902 0.438797
+vt 0.377944 0.436209
+vt 0.372836 0.434912
+vt 0.388047 0.433887
+vt 0.38323 0.436089
+vt 0.411804 0.424136
+vt 0.414761 0.413476
+vt 0.40336 0.414868
+vt 0.405273 0.433865
+vt 0.397822 0.431174
+vt 0.394183 0.422617
+vt 0.391746 0.428935
+vt 0.395216 0.416313
+vt 0.775924 0.375151
+vt 0.775728 0.388743
+vt 0.746453 0.386032
+vt 0.747928 0.395308
+vt 0.717395 0.406114
+vt 0.713072 0.403611
+vt 0.744823 0.376573
+vt 0.749092 0.40422
+vt 0.749645 0.415483
+vt 0.72449 0.414957
+vt 0.72621 0.424666
+vt 0.7756 0.399248
+vt 0.70368 0.428763
+vt 0.687208 0.443737
+vt 0.697322 0.42707
+vt 0.677635 0.445158
+vt 0.707038 0.434775
+vt 0.68965 0.428163
+vt 0.681322 0.43065
+vt 0.666675 0.448773
+vt 0.663815 0.460707
+vt 0.651924 0.464538
+vt 0.630412 0.480156
+vt 0.639991 0.468922
+vt 0.642366 0.477595
+vt 0.66825 0.472074
+vt 0.675624 0.457981
+vt 0.68415 0.468328
+vt 0.689031 0.45406
+vt 0.649396 0.488552
+vt 0.664408 0.486592
+vt 0.681197 0.484039
+vt 0.634969 0.490079
+vt 0.620833 0.49139
+vt 0.912385 0.481335
+vt 0.932278 0.480511
+vt 0.952544 0.479369
+vt 0.933883 0.461959
+vt 0.919103 0.464149
+vt 0.869741 0.465154
+vt 0.887953 0.466437
+vt 0.873151 0.480724
+vt 0.893241 0.481526
+vt 0.865824 0.437779
+vt 0.874945 0.43437
+vt 0.878921 0.451884
+vt 0.891561 0.450226
+vt 0.864248 0.450831
+vt 0.915223 0.44455
+vt 0.903554 0.447624
+vt 0.884353 0.430602
+vt 0.854087 0.418075
+vt 0.860227 0.412749
+vt 0.833101 0.395604
+vt 0.866565 0.407739
+vt 0.836568 0.388337
+vt 0.826949 0.411924
+vt 0.848341 0.424031
+vt 0.824944 0.4236
+vt 0.84502 0.432484
+vt 0.803376 0.3921
+vt 0.802127 0.402739
+vt 0.801153 0.415145
+vt 0.804904 0.380706
+vt 0.806571 0.368936
+vt 0.702688 0.462561
+vt 0.700952 0.48073
+vt 0.723247 0.47733
+vt 0.725766 0.438352
+vt 0.705712 0.446642
+vt 0.74847 0.451919
+vt 0.749286 0.431814
+vt 0.774044 0.450536
+vt 0.774704 0.42899
+vt 0.747651 0.474504
+vt 0.773403 0.473071
+vt 0.799984 0.45176
+vt 0.799741 0.47385
+vt 0.825727 0.475971
+vt 0.824826 0.438046
+vt 0.800446 0.431839
+vt 0.848903 0.461112
+vt 0.846154 0.445298
+vt 0.850422 0.478567
+vt 0.62364 0.273336
+vt 0.631303 0.268742
+vt 0.633284 0.279074
+vt 0.640023 0.273125
+vt 0.639784 0.263935
+vt 0.632769 0.258173
+vt 0.623739 0.264647
+vt 0.600029 0.280268
+vt 0.598688 0.270903
+vt 0.613376 0.277111
+vt 0.610984 0.269667
+vt 0.621969 0.284647
+vt 0.621719 0.253201
+vt 0.613611 0.261924
+vt 0.599928 0.261159
+vt 0.670072 0.278979
+vt 0.653616 0.276716
+vt 0.669832 0.267969
+vt 0.654614 0.268283
+vt 0.648422 0.284838
+vt 0.690213 0.290189
+vt 0.687377 0.279381
+vt 0.68627 0.268024
+vt 0.683842 0.304224
+vt 0.697203 0.300525
+vt 0.645124 0.311795
+vt 0.656458 0.301386
+vt 0.663002 0.320505
+vt 0.672639 0.311009
+vt 0.637949 0.292669
+vt 0.625515 0.302527
+vt 0.64648 0.25088
+vt 0.652498 0.259521
+vt 0.667905 0.256541
+vt 0.65209 0.231125
+vt 0.635774 0.242815
+vt 0.639961 0.22002
+vt 0.62248 0.233347
+vt 0.677556 0.227123
+vt 0.666981 0.218489
+vt 0.656893 0.207226
+vt 0.684642 0.243963
+vt 0.689474 0.232327
+vt 0.684466 0.25604
+vt 0.602642 0.176918
+vt 0.580529 0.190632
+vt 0.589953 0.161017
+vt 0.566382 0.1753
+vt 0.557833 0.204588
+vt 0.573039 0.21849
+vt 0.594645 0.205765
+vt 0.635672 0.179448
+vt 0.62466 0.163426
+vt 0.613523 0.146733
+vt 0.646437 0.19413
+vt 0.627782 0.207015
+vt 0.588272 0.232651
+vt 0.608699 0.2205
+vt 0.608211 0.373618
+vt 0.616832 0.356454
+vt 0.634284 0.378082
+vt 0.640225 0.361705
+vt 0.646724 0.346174
+vt 0.603899 0.333518
+vt 0.593087 0.35113
+vt 0.582299 0.32801
+vt 0.570201 0.34546
+vt 0.582138 0.369154
+vt 0.635039 0.324678
+vt 0.614437 0.31673
+vt 0.593411 0.310569
+vt 0.65434 0.332333
+vt 0.561644 0.296106
+vt 0.564966 0.285457
+vt 0.580542 0.294418
+vt 0.583116 0.283009
+vt 0.582766 0.272899
+vt 0.546607 0.278466
+vt 0.547908 0.287734
+vt 0.528899 0.281331
+vt 0.53186 0.290461
+vt 0.545646 0.297066
+vt 0.561955 0.266155
+vt 0.543727 0.269714
+vt 0.555322 0.257166
+vt 0.537387 0.262094
+vt 0.526554 0.27266
+vt 0.581699 0.262937
+vt 0.576957 0.252641
+vt 0.565582 0.241532
+vt 0.543702 0.248829
+vt 0.550357 0.230094
+vt 0.496387 0.262785
+vt 0.509284 0.25153
+vt 0.513745 0.264403
+vt 0.525705 0.256225
+vt 0.51058 0.233141
+vt 0.488727 0.247432
+vt 0.491672 0.225616
+vt 0.466102 0.243633
+vt 0.473932 0.261753
+vt 0.534066 0.218808
+vt 0.517242 0.2076
+vt 0.516335 0.345283
+vt 0.530115 0.332372
+vt 0.5362 0.354987
+vt 0.549386 0.339171
+vt 0.561842 0.32354
+vt 0.526052 0.315178
+vt 0.511862 0.325172
+vt 0.511906 0.308506
+vt 0.496234 0.31623
+vt 0.49647 0.33558
+vt 0.537837 0.306011
+vt 0.553916 0.307529
+vt 0.525234 0.302122
+vt 0.572839 0.308275
+vt 0.511827 0.28428
+vt 0.509974 0.274041
+vt 0.492244 0.274874
+vt 0.501435 0.298913
+vt 0.516739 0.294259
+vt 0.458569 0.293763
+vt 0.476889 0.290416
+vt 0.467587 0.30982
+vt 0.484835 0.304206
+vt 0.471621 0.276173
+vt 0.449551 0.277706
+vt 0.79029 0.672484
+vt 0.810842 0.6624
+vt 0.788538 0.66243
+vt 0.832155 0.647948
+vt 0.833088 0.669026
+vt 0.767347 0.664111
+vt 0.749725 0.673721
+vt 0.741816 0.650008
+vt 0.85403 0.66365
+vt 0.868881 0.657298
+vt 0.85539 0.645025
+vt 0.888074 0.648027
+vt 0.874212 0.676013
+vt 0.886607 0.695639
+vt 0.89598 0.672443
+vt 0.929571 0.681021
+vt 0.72496 0.675077
+vt 0.707382 0.674426
+vt 0.711754 0.695525
+vt 0.674442 0.675309
+vt 0.715494 0.652696
+vt 0.851241 0.674754
+vt 0.83861 0.688008
+vt 0.813905 0.681083
+vt 0.795054 0.69679
+vt 0.803314 0.729031
+vt 0.821237 0.705648
+vt 0.847489 0.712022
+vt 0.863108 0.691149
+vt 0.734908 0.685353
+vt 0.751974 0.699565
+vt 0.699285 0.735409
+vt 0.729177 0.71566
+vt 0.752628 0.735708
+vt 0.775609 0.715256
+vt 0.771295 0.685081
+vt 0.892477 0.736968
+vt 0.891992 0.760942
+vt 0.924257 0.752441
+vt 0.883161 0.790753
+vt 0.862406 0.753942
+vt 0.813012 0.768991
+vt 0.847704 0.781855
+vt 0.824711 0.812689
+vt 0.703179 0.776568
+vt 0.71878 0.801121
+vt 0.75314 0.781706
+vt 0.750148 0.828139
+vt 0.674567 0.808938
+vt 0.785894 0.799205
+vt 0.832437 0.740517
+vt 0.726143 0.758511
+vt 0.908808 0.716733
+vt 0.780855 0.754707
+vt 0.872961 0.724451
+vt 0.947275 0.716911
+vt 0.972864 0.691703
+vt 0.956423 0.743409
+vt 0.170337 0.155827
+vt 0.136076 0.187112
+vt 0.172299 0.21177
+vt 0.0899929 0.213591
+vt 0.10853 0.16596
+vt 0.195445 0.192426
+vt 0.220169 0.183556
+vt 0.218642 0.212212
+vt 0.204829 0.229447
+vt 0.228379 0.234767
+vt 0.205687 0.259413
+vt 0.137176 0.927417
+vt 0.140528 0.948093
+vt 0.168383 0.944741
+vt 0.141913 0.961891
+vt 0.11077 0.946594
+vt 0.878967 0.954757
+vt 0.879367 0.938048
+vt 0.853203 0.941375
+vt 0.880712 0.92523
+vt 0.906681 0.938262
+vt 0.989055 0.9382
+vt 0.912651 0.957127
+vt 0.886501 1.00065
+vt 0.775015 0.959635
+vt 0.84864 0.929044
+vt 0.877392 0.895069
+vt 0.91222 0.923978
+vt 0.849069 0.962192
+vt 0.254826 0.925518
+vt 0.170213 0.919654
+vt 0.126415 0.872706
+vt 0.0190149 0.939185
+vt 0.100899 0.922774
+vt 0.142077 0.988088
+vt 0.17534 0.957124
+vt 0.106027 0.960245
+vt 0.148988 0.242126
+vt 0.176354 0.291487
+vt 0.102893 0.237993
+vt 0.044388 0.5061
+vt 0.0504472 0.534124
+vt 0.0221481 0.572717
+vt 0.0611958 0.546374
+vt 0.0719578 0.52396
+vt 0.0576334 0.571748
+vt 0.0664284 0.60486
+vt 0.0810156 0.564522
+vt 0.0825732 0.542567
+vt 0.101769 0.534189
+vt 0.944378 0.797731
+vt 0.915109 0.794589
+vt 0.905965 0.833567
+vt 0.842874 0.862029
+vt 0.791851 0.848437
+vt 0.748723 0.889561
+vt 0.703598 0.850293
+vt 0.642603 0.8718
+vt 0.843173 0.630902
+vt 0.84458 0.638283
+vt 0.846408 0.629484
+vt 0.71498 0.637314
+vt 0.726461 0.64589
+vt 0.734897 0.643187
+vt 0.865004 0.824698
+vt 0.962666 0.771065
+vt 0.94749 0.766441
+vt 0.268705 0.710477
+vt 0.214528 0.765549
+vt 0.268705 0.80475
+vt 0.191977 0.815455
+vt 0.127915 0.732152
+vt 0.268814 0.267129
+vt 0.238957 0.294344
+vt 0.268669 0.328244
+vt 0.215045 0.317962
+vt 0.269051 0.235581
+vt 0.241893 0.247542
+vt 0.652939 0.625528
+vt 0.689461 0.648127
+vt 0.870091 0.634597
+vt 0.889967 0.621532
+vt 0.917129 0.645104
+vt 0.960269 0.639284
+vt 0.9614 0.662237
+vt 0.981019 0.639244
+vt 0.270276 0.136874
+vt 0.232631 0.163206
+vt 0.269409 0.175133
+vt 0.242681 0.194855
+vt 0.269141 0.208639
+vt 0.244466 0.223601
+vt 0.189521 0.653078
+vt 0.113276 0.681659
+vt 0.170828 0.555621
+vt 0.268705 0.586687
+vt 0.197069 0.290309
+vt 0.147015 0.418787
+vt 0.159401 0.426978
+vt 0.169209 0.419346
+vt 0.166865 0.4343
+vt 0.151569 0.435731
+vt 0.101359 0.388325
+vt 0.164076 0.395797
+vt 0.253799 0.393043
+vt 0.188648 0.408382
+vt 0.10719 0.406598
+vt 0.0810976 0.440196
+vt 0.135792 0.449792
+vt 0.137248 0.466241
+vt -0.0522421 0.436952
+vt 0.203585 0.425014
+vt 0.183198 0.455456
+vt 0.176955 0.4435
+vt 0.158284 0.457002
+vt 0.0646922 0.403325
+vt 0.0969534 0.389119
+vt 0.127418 0.569795
+vt 0.101838 0.557019
+vt 0.115165 0.496827
+vt 0.0856225 0.479205
+vt 0.0876161 0.508163
+vt 0.224113 0.501179
+vt 0.216094 0.528531
+vt 0.268704 0.514179
+vt 0.152679 0.481359
+vt 0.268801 0.37957
+vt 0.241478 0.353126
+vt 0.242729 0.379892
+vt 0.214493 0.347926
+vt 0.247642 0.484503
+vt 0.231652 0.461026
+vt 0.268704 0.455503
+vt 0.192219 0.333104
+vt 0.164806 0.367802
+vt 0.156354 0.409665
+vt 0.179537 0.42641
+vt 0.158914 0.444672
+vt 0.1348 0.428233
+vt 0.776574 0.644712
+vt 0.722394 0.585817
+vt 0.651799 0.541213
+vt 0.694305 0.554226
+vt 0.775203 0.583316
+vt 0.656336 0.526524
+vt 0.611254 0.502624
+vt 0.680604 0.50058
+vt 0.971205 0.496778
+vt 0.903217 0.52798
+vt 0.896582 0.566601
+vt 0.859045 0.553539
+vt 0.874064 0.496896
+vt 0.829393 0.590056
+vt 0.725299 0.540094
+vt 0.773136 0.497495
+vt 0.8247 0.537119
+vt 0.64197 0.737033
+vt 0.61964 0.737363
+vt 0.604328 0.759732
+vt 0.608299 0.714718
+vt 0.667128 0.712276
+vt 0.663413 0.762979
+vt 0.546984 0.81774
+vt 0.617742 0.813781
+vt 0.624356 0.66752
+vt 0.55686 0.648558
+vt 0.568421 0.733816
+vt 0.529143 0.743889
+vt 0.538817 0.714517
+vt 0.531598 0.768055
+vt 0.445833 0.7485
+vt 0.547123 0.691597
+vt 0.481156 0.686856
+vt 0.499408 0.724217
+vt 0.796428 0.328877
+vt 0.8169 0.338119
+vt 0.838683 0.330654
+vt 0.838689 0.3517
+vt 0.793926 0.338777
+vt 0.773732 0.337889
+vt 0.74951 0.352736
+vt 0.755441 0.328792
+vt 0.859512 0.335574
+vt 0.874845 0.341757
+vt 0.878929 0.322664
+vt 0.894264 0.350808
+vt 0.86172 0.354306
+vt 0.890121 0.302638
+vt 0.900633 0.325513
+vt 0.932192 0.315548
+vt 0.730562 0.326947
+vt 0.713106 0.328223
+vt 0.72296 0.350316
+vt 0.678736 0.328632
+vt 0.716325 0.307131
+vt 0.856285 0.324517
+vt 0.84324 0.311687
+vt 0.81933 0.31943
+vt 0.799782 0.304286
+vt 0.806322 0.271776
+vt 0.825244 0.294637
+vt 0.850879 0.287575
+vt 0.86722 0.30781
+vt 0.740091 0.316932
+vt 0.756566 0.302501
+vt 0.702583 0.267919
+vt 0.73303 0.286789
+vt 0.75555 0.26631
+vt 0.779434 0.286256
+vt 0.776741 0.316735
+vt 0.894098 0.262064
+vt 0.892695 0.238422
+vt 0.863735 0.245736
+vt 0.882937 0.208649
+vt 0.924796 0.246563
+vt 0.814013 0.231601
+vt 0.847948 0.218052
+vt 0.823544 0.187373
+vt 0.704382 0.227027
+vt 0.718515 0.201921
+vt 0.673705 0.195595
+vt 0.748091 0.173595
+vt 0.753811 0.220319
+vt 0.785435 0.201844
+vt 0.834654 0.259639
+vt 0.728096 0.244245
+vt 0.910872 0.281409
+vt 0.782742 0.246644
+vt 0.87545 0.274707
+vt 0.948004 0.28051
+vt 0.956452 0.255796
+vt 0.972736 0.303635
+vt 0.370772 0.156168
+vt 0.404072 0.188535
+vt 0.432688 0.16816
+vt 0.449944 0.216643
+vt 0.366322 0.212187
+vt 0.343097 0.19204
+vt 0.319164 0.211948
+vt 0.318073 0.183115
+vt 0.332918 0.229442
+vt 0.33175 0.259955
+vt 0.309225 0.234833
+vt 0.632664 0.937599
+vt 0.629313 0.958275
+vt 0.659071 0.956775
+vt 0.627927 0.972073
+vt 0.601457 0.954923
+vt 0.374133 0.932158
+vt 0.374091 0.913509
+vt 0.349193 0.91288
+vt 0.374675 0.900396
+vt 0.400674 0.918841
+vt 0.275352 0.917971
+vt 0.344112 0.934726
+vt 0.371547 0.982105
+vt 0.480772 0.938676
+vt 0.406833 0.906276
+vt 0.38004 0.872706
+vt 0.345731 0.899719
+vt 0.405214 0.941284
+vt 0.515015 0.935699
+vt 0.599629 0.929832
+vt 0.643425 0.882887
+vt 0.750826 0.949367
+vt 0.668941 0.932959
+vt 0.627764 0.99827
+vt 0.594501 0.967304
+vt 0.663814 0.97043
+vt 0.389421 0.243699
+vt 0.436255 0.240856
+vt 0.360739 0.292908
+vt 0.479266 0.621214
+vt 0.485524 0.569542
+vt 0.463254 0.566666
+vt 0.48531 0.54359
+vt 0.519446 0.53819
+vt 0.495549 0.530351
+vt 0.47602 0.523291
+vt 0.496385 0.522841
+vt 0.465093 0.54022
+vt 0.446797 0.527765
+vt 0.944666 0.201943
+vt 0.914837 0.204675
+vt 0.904503 0.165124
+vt 0.839442 0.136903
+vt 0.788702 0.151899
+vt 0.742974 0.111175
+vt 0.700099 0.152559
+vt 0.637093 0.13245
+vt 0.850315 0.368043
+vt 0.851223 0.360798
+vt 0.852982 0.368837
+vt 0.723976 0.366596
+vt 0.734474 0.357174
+vt 0.742904 0.359347
+vt 0.863449 0.174562
+vt 0.963467 0.230405
+vt 0.947856 0.233441
+vt 0.322881 0.76555
+vt 0.409494 0.732152
+vt 0.345432 0.815455
+vt 0.298143 0.294421
+vt 0.321942 0.31772
+vt 0.295585 0.24761
+vt 0.660357 0.382546
+vt 0.69659 0.356392
+vt 0.876132 0.364721
+vt 0.893585 0.379
+vt 0.921054 0.3528
+vt 0.961877 0.356785
+vt 0.962698 0.33339
+vt 0.981984 0.355755
+vt 0.306768 0.162912
+vt 0.295194 0.194359
+vt 0.29317 0.223456
+vt 0.347888 0.653078
+vt 0.366581 0.55562
+vt 0.424133 0.681659
+vt 0.340033 0.290817
+vt 0.388965 0.413938
+vt 0.377097 0.422446
+vt 0.385455 0.431061
+vt 0.370091 0.429971
+vt 0.366833 0.415002
+vt 0.432694 0.382302
+vt 0.370508 0.391231
+vt 0.427991 0.40079
+vt 0.346746 0.40443
+vt 0.280737 0.3905
+vt 0.456111 0.433947
+vt 0.589083 0.427663
+vt 0.401629 0.461384
+vt 0.402075 0.444828
+vt 0.332854 0.421481
+vt 0.360577 0.439441
+vt 0.355077 0.451593
+vt 0.380059 0.45258
+vt 0.470234 0.396534
+vt 0.437143 0.383
+vt 0.468433 0.48133
+vt 0.462264 0.506664
+vt 0.433175 0.490402
+vt 0.401796 0.544866
+vt 0.440576 0.55139
+vt 0.313296 0.501179
+vt 0.321314 0.528529
+vt 0.38473 0.481358
+vt 0.295899 0.353213
+vt 0.32351 0.348357
+vt 0.294922 0.38013
+vt 0.289767 0.484501
+vt 0.305757 0.461026
+vt 0.344952 0.331857
+vt 0.372123 0.361433
+vt 0.379071 0.404986
+vt 0.356951 0.422334
+vt 0.378668 0.44021
+vt 0.40174 0.423148
+vt 0.776574 0.349534
+vt 0.721657 0.410555
+vt 0.77532 0.411729
+vt 0.694232 0.4441
+vt 0.649571 0.457689
+vt 0.655529 0.474973
+vt 0.903209 0.465704
+vt 0.85855 0.440351
+vt 0.896562 0.42714
+vt 0.829462 0.404087
+vt 0.725094 0.457517
+vt 0.824792 0.456782
+vt 0.644474 0.268488
+vt 0.619495 0.269287
+vt 0.604207 0.292645
+vt 0.603235 0.247789
+vt 0.669742 0.291872
+vt 0.665491 0.242026
+vt 0.542812 0.189584
+vt 0.615129 0.192251
+vt 0.625589 0.340224
+vt 0.556065 0.36469
+vt 0.56464 0.275636
+vt 0.534275 0.296732
+vt 0.525704 0.266294
+vt 0.528338 0.24086
+vt 0.440532 0.261649
+vt 0.542934 0.319725
+vt 0.476605 0.325877
+vt 0.495044 0.287182
+usemtl material_1
+f 739/1 735/2 736/3
+f 189/4 736/3 735/2
+f 192/5 738/6 737/7
+f 739/1 737/7 738/6
+f 190/8 741/9 740/10
+f 743/11 740/10 741/9
+f 736/3 189/4 743/11
+f 742/12 743/11 189/4
+f 745/13 747/14 191/15
+f 744/16 191/15 747/14
+f 746/17 747/14 190/8
+f 741/9 190/8 747/14
+f 748/18 192/5 750/19
+f 737/7 750/19 192/5
+f 191/15 749/20 745/13
+f 750/19 745/13 749/20
+f 752/21 754/22 194/23
+f 751/24 194/23 754/22
+f 753/25 189/4 754/22
+f 735/2 754/22 189/4
+f 195/26 756/27 755/28
+f 758/29 755/28 756/27
+f 194/23 757/30 752/21
+f 758/29 752/21 757/30
+f 759/31 196/32 762/33
+f 760/34 762/33 196/32
+f 761/35 762/33 195/26
+f 756/27 195/26 762/33
+f 764/36 742/12 753/25
+f 189/4 753/25 742/12
+f 196/32 763/37 760/34
+f 764/36 760/34 763/37
+f 769/38 765/39 766/40
+f 198/41 766/40 765/39
+f 768/42 769/38 201/43
+f 767/44 201/43 769/38
+f 771/45 773/46 199/47
+f 770/48 199/47 773/46
+f 772/49 773/46 198/41
+f 766/40 198/41 773/46
+f 200/50 775/51 774/52
+f 777/53 774/52 775/51
+f 199/47 776/54 771/45
+f 777/53 771/45 776/54
+f 201/43 767/44 778/55
+f 780/56 778/55 767/44
+f 779/57 780/56 200/50
+f 775/51 200/50 780/56
+f 782/58 785/59 203/60
+f 781/61 203/60 785/59
+f 784/62 785/59 205/63
+f 783/64 205/63 785/59
+f 200/50 786/65 779/57
+f 788/66 779/57 786/65
+f 203/60 787/67 782/58
+f 788/66 782/58 787/67
+f 790/68 791/69 204/70
+f 789/71 204/70 791/69
+f 774/52 791/69 200/50
+f 786/65 200/50 791/69
+f 205/63 783/64 792/72
+f 794/73 792/72 783/64
+f 204/70 793/74 790/68
+f 794/73 790/68 793/74
+f 796/75 799/76 207/77
+f 795/78 207/77 799/76
+f 210/79 798/80 797/81
+f 799/76 797/81 798/80
+f 208/82 801/83 800/84
+f 803/85 800/84 801/83
+f 207/77 802/86 796/75
+f 803/85 796/75 802/86
+f 209/87 805/88 804/89
+f 807/90 804/89 805/88
+f 806/91 807/90 208/82
+f 801/83 208/82 807/90
+f 797/81 810/92 210/79
+f 808/93 210/79 810/92
+f 809/94 810/92 209/87
+f 805/88 209/87 810/92
+f 201/43 811/95 768/42
+f 814/96 768/42 811/95
+f 214/97 813/98 812/99
+f 814/96 812/99 813/98
+f 816/100 817/101 212/102
+f 815/103 212/102 817/101
+f 778/55 817/101 201/43
+f 811/95 201/43 817/101
+f 213/104 819/105 818/106
+f 821/107 818/106 819/105
+f 212/102 820/108 816/100
+f 821/107 816/100 820/108
+f 812/99 824/109 214/97
+f 822/110 214/97 824/109
+f 823/111 824/109 213/104
+f 819/105 213/104 824/109
+f 825/112 828/113 192/5
+f 738/6 192/5 828/113
+f 827/114 828/113 217/115
+f 826/116 217/115 828/113
+f 213/104 829/117 823/111
+f 830/118 823/111 829/117
+f 830/118 825/112 748/18
+f 192/5 748/18 825/112
+f 832/119 833/120 216/121
+f 831/122 216/121 833/120
+f 818/106 833/120 213/104
+f 829/117 213/104 833/120
+f 217/115 826/116 834/123
+f 836/124 834/123 826/116
+f 216/121 835/125 832/119
+f 836/124 832/119 835/125
+f 219/126 838/127 837/128
+f 841/129 837/128 838/127
+f 839/130 221/131 841/129
+f 840/132 841/129 221/131
+f 842/133 844/134 216/121
+f 835/125 216/121 844/134
+f 843/135 844/134 219/126
+f 838/127 219/126 844/134
+f 847/136 845/137 846/138
+f 220/139 846/138 845/137
+f 216/121 831/122 842/133
+f 847/136 842/133 831/122
+f 850/140 848/141 839/130
+f 221/131 839/130 848/141
+f 846/138 220/139 850/140
+f 849/142 850/140 220/139
+f 223/143 852/144 851/145
+f 854/146 851/145 852/144
+f 203/60 781/61 853/147
+f 854/146 853/147 781/61
+f 220/139 855/148 849/142
+f 857/149 849/142 855/148
+f 852/144 223/143 857/149
+f 856/150 857/149 223/143
+f 820/108 212/102 859/151
+f 858/152 859/151 212/102
+f 845/137 859/151 220/139
+f 855/148 220/139 859/151
+f 787/67 203/60 860/153
+f 853/147 860/153 203/60
+f 212/102 815/103 858/152
+f 860/153 858/152 815/103
+f 225/154 862/155 861/156
+f 864/157 861/156 862/155
+f 207/77 795/78 863/158
+f 864/157 863/158 795/78
+f 866/159 868/160 226/161
+f 865/162 226/161 868/160
+f 867/163 868/160 225/154
+f 862/155 225/154 868/160
+f 227/164 870/165 869/166
+f 872/167 869/166 870/165
+f 226/161 871/168 866/159
+f 872/167 866/159 871/168
+f 863/158 874/169 207/77
+f 802/86 207/77 874/169
+f 873/170 874/169 227/164
+f 870/165 227/164 874/169
+f 876/171 879/172 229/173
+f 875/174 229/173 879/172
+f 877/175 231/176 879/172
+f 878/177 879/172 231/176
+f 227/164 880/178 873/170
+f 882/179 873/170 880/178
+f 229/173 881/180 876/171
+f 882/179 876/171 881/180
+f 884/181 885/182 230/183
+f 883/184 230/183 885/182
+f 869/166 885/182 227/164
+f 880/178 227/164 885/182
+f 888/185 886/186 877/175
+f 231/176 877/175 886/186
+f 230/183 887/187 884/181
+f 888/185 884/181 887/187
+f 892/188 889/189 890/190
+f 233/191 890/190 889/189
+f 892/188 891/192 837/128
+f 219/126 837/128 891/192
+f 893/193 895/194 230/183
+f 887/187 230/183 895/194
+f 894/195 895/194 233/191
+f 890/190 233/191 895/194
+f 234/196 897/197 896/198
+f 898/199 896/198 897/197
+f 230/183 883/184 893/193
+f 898/199 893/193 883/184
+f 900/200 843/135 891/192
+f 219/126 891/192 843/135
+f 899/201 900/200 234/196
+f 897/197 234/196 900/200
+f 217/115 901/202 827/114
+f 903/203 827/114 901/202
+f 194/23 751/24 902/204
+f 903/203 902/204 751/24
+f 234/196 904/205 899/201
+f 905/206 899/201 904/205
+f 834/123 905/206 217/115
+f 901/202 217/115 905/206
+f 906/207 907/208 226/161
+f 871/168 226/161 907/208
+f 896/198 907/208 234/196
+f 904/205 234/196 907/208
+f 908/209 757/30 902/204
+f 194/23 902/204 757/30
+f 226/161 865/162 906/207
+f 908/209 906/207 865/162
+f 910/210 913/211 237/212
+f 909/213 237/212 913/211
+f 912/214 913/211 240/215
+f 911/216 240/215 913/211
+f 238/217 915/218 914/219
+f 917/220 914/219 915/218
+f 917/220 910/210 916/221
+f 237/212 916/221 910/210
+f 918/222 239/223 921/224
+f 919/225 921/224 239/223
+f 920/226 921/224 238/217
+f 915/218 238/217 921/224
+f 240/215 911/216 922/227
+f 924/228 922/227 911/216
+f 239/223 923/229 919/225
+f 924/228 919/225 923/229
+f 926/230 929/231 242/232
+f 925/233 242/232 929/231
+f 929/231 927/234 928/235
+f 244/236 928/235 927/234
+f 239/223 930/237 923/229
+f 932/238 923/229 930/237
+f 242/232 931/239 926/230
+f 932/238 926/230 931/239
+f 934/240 935/241 243/242
+f 933/243 243/242 935/241
+f 918/222 935/241 239/223
+f 930/237 239/223 935/241
+f 938/244 936/245 927/234
+f 244/236 927/234 936/245
+f 243/242 937/246 934/240
+f 938/244 934/240 937/246
+f 246/247 940/248 939/249
+f 943/250 939/249 940/248
+f 249/251 942/252 941/253
+f 943/250 941/253 942/252
+f 945/254 947/255 247/256
+f 944/257 247/256 947/255
+f 946/258 947/255 246/247
+f 940/248 246/247 947/255
+f 951/259 948/260 949/261
+f 248/262 949/261 948/260
+f 247/256 950/263 945/254
+f 951/259 945/254 950/263
+f 952/264 249/251 954/265
+f 941/253 954/265 249/251
+f 953/266 954/265 248/262
+f 949/261 248/262 954/265
+f 955/267 958/268 244/236
+f 928/235 244/236 958/268
+f 957/269 958/268 252/270
+f 956/271 252/270 958/268
+f 251/272 960/273 959/274
+f 961/275 959/274 960/273
+f 244/236 936/245 955/267
+f 961/275 955/267 936/245
+f 962/276 964/277 247/256
+f 950/263 247/256 964/277
+f 963/278 964/277 251/272
+f 960/273 251/272 964/277
+f 252/270 956/271 965/279
+f 966/280 965/279 956/271
+f 247/256 944/257 962/276
+f 966/280 962/276 944/257
+f 221/131 967/281 840/132
+f 970/282 840/132 967/281
+f 255/283 969/284 968/285
+f 970/282 968/285 969/284
+f 972/286 973/287 254/288
+f 971/289 254/288 973/287
+f 848/141 973/287 221/131
+f 967/281 221/131 973/287
+f 976/290 931/239 974/291
+f 242/232 974/291 931/239
+f 254/288 975/292 972/286
+f 976/290 972/286 975/292
+f 968/285 978/293 255/283
+f 977/294 255/283 978/293
+f 974/291 242/232 978/293
+f 925/233 978/293 242/232
+f 980/295 982/296 257/297
+f 979/298 257/297 982/296
+f 886/186 982/296 231/176
+f 981/299 231/176 982/296
+f 985/300 946/258 983/301
+f 246/247 983/301 946/258
+f 257/297 984/302 980/295
+f 985/300 980/295 984/302
+f 258/303 987/304 986/305
+f 988/306 986/305 987/304
+f 246/247 939/249 983/301
+f 988/306 983/301 939/249
+f 231/176 981/299 878/177
+f 990/307 878/177 981/299
+f 990/307 987/304 989/308
+f 258/303 989/308 987/304
+f 991/309 260/310 994/311
+f 992/312 994/311 260/310
+f 792/72 994/311 205/63
+f 993/313 205/63 994/311
+f 916/221 237/212 997/314
+f 995/315 997/314 237/212
+f 260/310 996/316 992/312
+f 997/314 992/312 996/316
+f 999/317 1000/318 261/319
+f 998/320 261/319 1000/318
+f 237/212 909/213 995/315
+f 1000/318 995/315 909/213
+f 205/63 993/313 784/62
+f 1002/321 784/62 993/313
+f 1002/321 999/317 1001/322
+f 261/319 1001/322 999/317
+f 1003/323 1005/324 257/297
+f 984/302 257/297 1005/324
+f 965/279 1005/324 252/270
+f 1004/325 252/270 1005/324
+f 233/191 1006/326 894/195
+f 1007/327 894/195 1006/326
+f 257/297 979/298 1003/323
+f 1007/327 1003/323 979/298
+f 1008/328 1009/329 255/283
+f 969/284 255/283 1009/329
+f 889/189 1009/329 233/191
+f 1006/326 233/191 1009/329
+f 252/270 1004/325 957/269
+f 1010/330 957/269 1004/325
+f 255/283 977/294 1008/328
+f 1010/330 1008/328 977/294
+f 223/143 1011/331 856/150
+f 1013/332 856/150 1011/331
+f 254/288 971/289 1012/333
+f 1013/332 1012/333 971/289
+f 1014/334 1015/335 261/319
+f 1001/322 261/319 1015/335
+f 851/145 1015/335 223/143
+f 1011/331 223/143 1015/335
+f 240/215 1016/336 912/214
+f 1017/337 912/214 1016/336
+f 261/319 998/320 1014/334
+f 1017/337 1014/334 998/320
+f 1012/333 1018/338 254/288
+f 975/292 254/288 1018/338
+f 922/227 1018/338 240/215
+f 1016/336 240/215 1018/338
+f 1019/339 265/340 1022/341
+f 1020/342 1022/341 265/340
+f 1022/341 1021/343 991/309
+f 260/310 991/309 1021/343
+f 1023/344 266/345 1026/346
+f 1024/347 1026/346 266/345
+f 1025/348 1026/346 265/340
+f 1020/342 265/340 1026/346
+f 1030/349 1027/350 1028/351
+f 267/352 1028/351 1027/350
+f 266/345 1029/353 1024/347
+f 1030/349 1024/347 1029/353
+f 1021/343 1032/354 260/310
+f 996/316 260/310 1032/354
+f 1028/351 267/352 1032/354
+f 1031/355 1032/354 267/352
+f 269/356 1034/357 1033/358
+f 1036/359 1033/358 1034/357
+f 1036/359 1035/360 1023/361
+f 266/362 1023/361 1035/360
+f 1038/363 1040/364 270/365
+f 1037/366 270/365 1040/364
+f 1039/367 1040/364 269/356
+f 1034/357 269/356 1040/364
+f 1044/368 1041/369 1042/370
+f 271/371 1042/370 1041/369
+f 270/365 1043/372 1038/363
+f 1044/368 1038/363 1043/372
+f 1035/360 1046/373 266/362
+f 1029/374 266/362 1046/373
+f 1042/370 271/371 1046/373
+f 1045/375 1046/373 271/371
+f 1048/376 1050/377 273/378
+f 1047/379 273/378 1050/377
+f 1033/358 1050/377 269/356
+f 1049/380 269/356 1050/377
+f 274/381 1052/382 1051/383
+f 1054/384 1051/383 1052/382
+f 273/378 1053/385 1048/376
+f 1054/384 1048/376 1053/385
+f 1056/386 1058/387 275/388
+f 1055/389 275/388 1058/387
+f 1057/390 1058/387 274/381
+f 1052/382 274/381 1058/387
+f 269/356 1049/380 1039/367
+f 1060/391 1039/367 1049/380
+f 275/388 1059/392 1056/386
+f 1060/391 1056/386 1059/392
+f 1061/393 1064/394 275/388
+f 1059/392 275/388 1064/394
+f 1063/395 1064/394 279/396
+f 1062/397 279/396 1064/394
+f 277/398 1066/399 1065/400
+f 1067/401 1065/400 1066/399
+f 275/388 1055/389 1061/393
+f 1067/401 1061/393 1055/389
+f 278/402 1069/403 1068/404
+f 1071/405 1068/404 1069/403
+f 1070/406 1071/405 277/398
+f 1066/399 277/398 1071/405
+f 279/396 1062/397 1072/407
+f 1074/408 1072/407 1062/397
+f 1073/409 1074/408 278/402
+f 1069/403 278/402 1074/408
+f 1076/410 1079/411 281/412
+f 1075/413 281/412 1079/411
+f 1077/414 284/415 1079/411
+f 1078/416 1079/411 284/415
+f 1081/417 1083/418 282/419
+f 1080/420 282/419 1083/418
+f 281/412 1082/421 1076/410
+f 1083/418 1076/410 1082/421
+f 283/422 1085/423 1084/424
+f 1087/425 1084/424 1085/423
+f 282/419 1086/426 1081/417
+f 1087/425 1081/417 1086/426
+f 284/415 1077/414 1088/427
+f 1090/428 1088/427 1077/414
+f 1090/428 1085/423 1089/429
+f 283/422 1089/429 1085/423
+f 1092/430 1095/431 286/432
+f 1091/433 286/432 1095/431
+f 1095/431 1093/434 1094/435
+f 289/436 1094/435 1093/434
+f 1097/437 1099/438 287/439
+f 1096/440 287/439 1099/438
+f 286/432 1098/441 1092/430
+f 1099/438 1092/430 1098/441
+f 288/442 1101/443 1100/444
+f 1103/445 1100/444 1101/443
+f 287/439 1102/446 1097/437
+f 1103/445 1097/437 1102/446
+f 289/436 1093/434 1104/447
+f 1106/448 1104/447 1093/434
+f 1101/443 288/442 1106/448
+f 1105/449 1106/448 288/442
+f 225/450 1107/451 867/452
+f 1110/453 867/452 1107/451
+f 292/454 1109/455 1108/456
+f 1110/453 1108/456 1109/455
+f 1112/457 1113/458 291/459
+f 1111/460 291/459 1113/458
+f 861/461 1113/458 225/450
+f 1107/451 225/450 1113/458
+f 289/436 1114/462 1094/435
+f 1116/463 1094/435 1114/462
+f 291/459 1115/464 1112/457
+f 1116/463 1112/457 1115/464
+f 1108/456 1118/465 292/454
+f 1117/466 292/454 1118/465
+f 289/436 1104/447 1114/462
+f 1118/465 1114/462 1104/447
+f 1120/467 1123/468 294/469
+f 1119/470 294/469 1123/468
+f 1122/471 1123/468 296/472
+f 1121/473 296/472 1123/468
+f 1127/474 1124/475 1125/476
+f 295/477 1125/476 1124/475
+f 294/469 1126/478 1120/467
+f 1127/474 1120/467 1126/478
+f 1128/479 1130/480 287/439
+f 1102/446 287/439 1130/480
+f 295/477 1129/481 1125/476
+f 1130/480 1125/476 1129/481
+f 296/472 1121/473 1131/482
+f 1132/483 1131/482 1121/473
+f 287/439 1096/440 1128/479
+f 1132/483 1128/479 1096/440
+f 1133/484 1135/485 292/454
+f 1109/455 292/454 1135/485
+f 755/486 1135/485 195/487
+f 1134/488 195/487 1135/485
+f 288/442 1136/489 1105/449
+f 1137/490 1105/449 1136/489
+f 292/454 1117/466 1133/484
+f 1137/490 1133/484 1117/466
+f 295/477 1138/491 1129/481
+f 1139/492 1129/481 1138/491
+f 1136/489 288/442 1139/492
+f 1100/444 1139/492 288/442
+f 195/487 1134/488 761/493
+f 1140/494 761/493 1134/488
+f 295/477 1124/475 1138/491
+f 1140/494 1138/491 1124/475
+f 1143/495 1122/471 1141/496
+f 296/472 1141/496 1122/471
+f 1143/495 1142/497 808/498
+f 210/499 808/498 1142/497
+f 1144/500 1145/501 286/432
+f 1098/441 286/432 1145/501
+f 1141/496 296/472 1145/501
+f 1131/482 1145/501 296/472
+f 291/459 1146/502 1115/464
+f 1147/503 1115/464 1146/502
+f 286/432 1091/433 1144/500
+f 1147/503 1144/500 1091/433
+f 1142/497 1148/504 210/499
+f 798/505 210/499 1148/504
+f 1146/502 291/459 1148/504
+f 1111/460 1148/504 291/459
+f 300/506 1150/507 1149/508
+f 1152/509 1149/508 1150/507
+f 214/510 822/511 1151/512
+f 1152/509 1151/512 822/511
+f 1153/513 1155/514 281/412
+f 1082/421 281/412 1155/514
+f 1150/507 300/506 1155/514
+f 1154/515 1155/514 300/506
+f 301/516 1157/517 1156/518
+f 1158/519 1156/518 1157/517
+f 281/412 1075/413 1153/513
+f 1158/519 1153/513 1075/413
+f 1151/512 1160/520 214/510
+f 813/521 214/510 1160/520
+f 1157/517 301/516 1160/520
+f 1159/522 1160/520 301/516
+f 198/523 1161/524 772/525
+f 1164/526 772/525 1161/524
+f 1164/526 1162/527 1163/528
+f 303/529 1163/528 1162/527
+f 1165/530 1166/531 301/516
+f 1159/522 301/516 1166/531
+f 765/532 1166/531 198/523
+f 1161/524 198/523 1166/531
+f 284/415 1167/533 1078/416
+f 1168/534 1078/416 1167/533
+f 301/516 1156/518 1165/530
+f 1168/534 1165/530 1156/518
+f 1162/527 1170/535 303/529
+f 1169/536 303/529 1170/535
+f 1167/533 284/415 1170/535
+f 1088/427 1170/535 284/415
+f 305/537 1172/538 1171/539
+f 1174/540 1171/539 1172/538
+f 191/541 744/542 1173/543
+f 1174/540 1173/543 744/542
+f 1175/544 1177/545 282/419
+f 1086/426 282/419 1177/545
+f 1176/546 1177/545 305/537
+f 1172/538 305/537 1177/545
+f 300/506 1178/547 1154/515
+f 1179/548 1154/515 1178/547
+f 282/419 1080/420 1175/544
+f 1179/548 1175/544 1080/420
+f 1173/543 1180/549 191/541
+f 749/550 191/541 1180/549
+f 1149/508 1180/549 300/506
+f 1178/547 300/506 1180/549
+f 1181/551 1184/552 303/529
+f 1163/528 303/529 1184/552
+f 1183/553 1184/552 307/554
+f 1182/555 307/554 1184/552
+f 283/422 1185/556 1089/429
+f 1186/557 1089/429 1185/556
+f 303/529 1169/536 1181/551
+f 1186/557 1181/551 1169/536
+f 305/537 1187/558 1176/546
+f 1188/559 1176/546 1187/558
+f 283/422 1084/424 1185/556
+f 1188/559 1185/556 1084/424
+f 307/554 1182/555 1189/560
+f 1190/561 1189/560 1182/555
+f 1171/539 1190/561 305/537
+f 1187/558 305/537 1190/561
+f 1192/562 1194/563 309/564
+f 1191/565 309/564 1194/563
+f 1072/407 1194/563 279/396
+f 1193/566 279/396 1194/563
+f 1195/567 310/568 1198/569
+f 1196/570 1198/569 310/568
+f 1192/562 309/564 1198/569
+f 1197/571 1198/569 309/564
+f 1202/572 1199/573 1200/574
+f 311/575 1200/574 1199/573
+f 310/568 1201/576 1196/570
+f 1202/572 1196/570 1201/576
+f 1203/577 1205/578 270/365
+f 1043/372 270/365 1205/578
+f 1200/574 311/575 1205/578
+f 1204/579 1205/578 311/575
+f 279/396 1193/566 1063/395
+f 1206/580 1063/395 1193/566
+f 270/365 1037/366 1203/577
+f 1206/580 1203/577 1037/366
+f 1208/581 1211/582 313/583
+f 1207/584 313/583 1211/582
+f 1210/585 1211/582 316/586
+f 1209/587 316/586 1211/582
+f 314/588 1213/589 1212/590
+f 1215/591 1212/590 1213/589
+f 313/583 1214/592 1208/581
+f 1215/591 1208/581 1214/592
+f 315/593 1217/594 1216/595
+f 1219/596 1216/595 1217/594
+f 1219/596 1213/589 1218/597
+f 314/588 1218/597 1213/589
+f 1209/587 1222/598 316/586
+f 1220/599 316/586 1222/598
+f 1221/600 1222/598 315/593
+f 1217/594 315/593 1222/598
+f 1223/601 1225/602 310/603
+f 1201/604 310/603 1225/602
+f 1212/590 1225/602 314/588
+f 1224/605 314/588 1225/602
+f 318/606 1227/607 1226/608
+f 1228/609 1226/608 1227/607
+f 1228/609 1223/601 1195/610
+f 310/603 1195/610 1223/601
+f 1230/611 1232/612 319/613
+f 1229/614 319/613 1232/612
+f 1231/615 1232/612 318/606
+f 1227/607 318/606 1232/612
+f 314/588 1224/605 1218/597
+f 1234/616 1218/597 1224/605
+f 319/613 1233/617 1230/611
+f 1234/616 1230/611 1233/617
+f 1236/618 1238/619 321/620
+f 1235/621 321/620 1238/619
+f 1237/622 319/613 1238/619
+f 1229/614 1238/619 319/613
+f 322/623 1240/624 1239/625
+f 1242/626 1239/625 1240/624
+f 321/620 1241/627 1236/618
+f 1242/626 1236/618 1241/627
+f 315/593 1243/628 1221/600
+f 1245/629 1221/600 1243/628
+f 1240/624 322/623 1245/629
+f 1244/630 1245/629 322/623
+f 319/613 1237/622 1233/617
+f 1246/631 1233/617 1237/622
+f 1246/631 1243/628 1216/595
+f 315/593 1216/595 1243/628
+f 324/632 1248/633 1247/634
+f 1251/635 1247/634 1248/633
+f 326/636 1250/637 1249/638
+f 1251/635 1249/638 1250/637
+f 1253/639 1255/640 325/641
+f 1252/642 325/641 1255/640
+f 1254/643 1255/640 324/632
+f 1248/633 324/632 1255/640
+f 238/217 1256/644 920/226
+f 1258/645 920/226 1256/644
+f 325/641 1257/646 1253/639
+f 1258/645 1253/639 1257/646
+f 1249/638 1260/647 326/636
+f 1259/648 326/636 1260/647
+f 914/219 1260/647 238/217
+f 1256/644 238/217 1260/647
+f 1265/649 1261/650 1262/651
+f 328/652 1262/651 1261/650
+f 330/653 1264/654 1263/655
+f 1265/649 1263/655 1264/654
+f 1267/656 1269/657 329/658
+f 1266/659 329/658 1269/657
+f 1262/651 328/652 1269/657
+f 1268/660 1269/657 328/652
+f 251/272 1270/661 963/278
+f 1272/662 963/278 1270/661
+f 1272/662 1267/656 1271/663
+f 329/658 1271/663 1267/656
+f 1263/655 1274/664 330/653
+f 1273/665 330/653 1274/664
+f 959/274 1274/664 251/272
+f 1270/661 251/272 1274/664
+f 1278/666 1275/667 1276/668
+f 332/669 1276/668 1275/667
+f 329/658 1266/659 1277/670
+f 1278/666 1277/670 1266/659
+f 1280/671 1282/672 333/673
+f 1279/674 333/673 1282/672
+f 1276/668 332/669 1282/672
+f 1281/675 1282/672 332/669
+f 1285/676 953/266 1283/677
+f 248/262 1283/677 953/266
+f 333/673 1284/678 1280/671
+f 1285/676 1280/671 1284/678
+f 1271/663 329/658 1286/679
+f 1277/670 1286/679 329/658
+f 948/260 1286/679 248/262
+f 1283/677 248/262 1286/679
+f 335/680 1288/681 1287/682
+f 1291/683 1287/682 1288/681
+f 337/684 1290/685 1289/686
+f 1291/683 1289/686 1290/685
+f 1293/687 1295/688 336/689
+f 1292/690 336/689 1295/688
+f 1288/681 335/680 1295/688
+f 1294/691 1295/688 335/680
+f 1298/692 1183/693 1296/694
+f 307/695 1296/694 1183/693
+f 1298/692 1293/687 1297/696
+f 336/689 1297/696 1293/687
+f 1289/686 1300/697 337/684
+f 1299/698 337/684 1300/697
+f 1296/694 307/695 1300/697
+f 1189/699 1300/697 307/695
+f 1302/700 1305/701 339/702
+f 1301/703 339/702 1305/701
+f 1304/704 1305/701 341/705
+f 1303/706 341/705 1305/701
+f 340/707 1307/708 1306/709
+f 1309/710 1306/709 1307/708
+f 1309/710 1302/700 1308/711
+f 339/702 1308/711 1302/700
+f 1310/712 1312/713 294/714
+f 1126/715 294/714 1312/713
+f 1307/708 340/707 1312/713
+f 1311/716 1312/713 340/707
+f 341/705 1303/706 1313/717
+f 1314/718 1313/717 1303/706
+f 294/714 1119/719 1310/712
+f 1314/718 1310/712 1119/719
+f 1318/720 1315/721 1316/722
+f 343/723 1316/722 1315/721
+f 325/641 1252/642 1317/724
+f 1318/720 1317/724 1252/642
+f 1319/725 1321/726 330/653
+f 1264/654 330/653 1321/726
+f 1316/722 343/723 1321/726
+f 1320/727 1321/726 343/723
+f 243/242 1322/728 937/246
+f 1323/729 937/246 1322/728
+f 1323/729 1319/725 1273/665
+f 330/653 1273/665 1319/725
+f 1317/724 1324/730 325/641
+f 1257/646 325/641 1324/730
+f 933/243 1324/730 243/242
+f 1322/728 243/242 1324/730
+f 345/731 1326/732 1325/733
+f 1329/734 1325/733 1326/732
+f 346/735 1328/736 1327/737
+f 1329/734 1327/737 1328/736
+f 1330/738 1332/739 326/636
+f 1250/637 326/636 1332/739
+f 1331/740 1332/739 345/731
+f 1326/732 345/731 1332/739
+f 267/352 1333/741 1031/355
+f 1334/742 1031/355 1333/741
+f 326/636 1259/648 1330/738
+f 1334/742 1330/738 1259/648
+f 1335/743 346/735 1336/744
+f 1327/737 1336/744 346/735
+f 1027/350 1336/744 267/352
+f 1333/741 267/352 1336/744
+f 1341/745 1337/746 1338/747
+f 348/748 1338/747 1337/746
+f 349/749 1340/750 1339/751
+f 1341/745 1339/751 1340/750
+f 1342/752 1344/753 346/754
+f 1328/755 346/754 1344/753
+f 1338/747 348/748 1344/753
+f 1343/756 1344/753 348/748
+f 271/757 1345/758 1045/759
+f 1346/760 1045/759 1345/758
+f 1346/760 1342/752 1335/761
+f 346/754 1335/761 1342/752
+f 349/749 1339/751 1347/762
+f 1348/763 1347/762 1339/751
+f 1041/764 1348/763 271/757
+f 1345/758 271/757 1348/763
+f 1353/765 1349/766 1350/767
+f 351/768 1350/767 1349/766
+f 354/769 1352/770 1351/771
+f 1353/765 1351/771 1352/770
+f 1355/772 1357/773 352/774
+f 1354/775 352/774 1357/773
+f 1350/767 351/768 1357/773
+f 1356/776 1357/773 351/768
+f 353/777 1359/778 1358/779
+f 1361/780 1358/779 1359/778
+f 352/774 1360/781 1355/772
+f 1361/780 1355/772 1360/781
+f 1351/771 1364/782 354/769
+f 1362/783 354/769 1364/782
+f 1363/784 1364/782 353/777
+f 1359/778 353/777 1364/782
+f 1365/785 356/786 1369/787
+f 1366/788 1369/787 356/786
+f 1369/787 1367/789 1368/790
+f 357/791 1368/790 1367/789
+f 1370/792 1372/793 354/769
+f 1352/770 354/769 1372/793
+f 356/786 1371/794 1366/788
+f 1372/793 1366/788 1371/794
+f 1374/795 1073/409 1373/796
+f 278/402 1373/796 1073/409
+f 354/769 1362/783 1370/792
+f 1374/795 1370/792 1362/783
+f 1367/789 1376/797 357/791
+f 1375/798 357/791 1376/797
+f 278/402 1068/404 1373/796
+f 1376/797 1373/796 1068/404
+f 1381/799 1377/800 1378/801
+f 359/802 1378/801 1377/800
+f 360/803 1380/804 1379/805
+f 1381/799 1379/805 1380/804
+f 1384/806 1304/704 1382/807
+f 341/705 1382/807 1304/704
+f 1378/801 359/802 1384/806
+f 1383/808 1384/806 359/802
+f 209/87 1385/809 809/94
+f 1386/810 809/94 1385/809
+f 1382/807 341/705 1386/810
+f 1313/717 1386/810 341/705
+f 1379/805 1388/811 360/803
+f 1387/812 360/803 1388/811
+f 804/89 1388/811 209/87
+f 1385/809 209/87 1388/811
+f 1390/813 1392/814 362/815
+f 1389/816 362/815 1392/814
+f 1391/817 336/689 1392/814
+f 1292/690 1392/814 336/689
+f 363/818 1394/819 1393/820
+f 1396/821 1393/820 1394/819
+f 1396/821 1390/813 1395/822
+f 362/815 1395/822 1390/813
+f 1397/823 1399/824 199/47
+f 776/54 199/47 1399/824
+f 1398/825 1399/824 363/818
+f 1394/819 363/818 1399/824
+f 1400/826 1297/696 1391/817
+f 336/689 1391/817 1297/696
+f 199/47 770/48 1397/823
+f 1400/826 1397/823 770/48
+f 1402/827 1404/828 365/829
+f 1401/830 365/829 1404/828
+f 1403/831 363/818 1404/828
+f 1393/820 1404/828 363/818
+f 366/832 1406/833 1405/834
+f 1408/835 1405/834 1406/833
+f 1408/835 1402/827 1407/836
+f 365/829 1407/836 1402/827
+f 1409/837 1411/838 204/70
+f 793/74 204/70 1411/838
+f 1410/839 1411/838 366/832
+f 1406/833 366/832 1411/838
+f 363/818 1403/831 1398/825
+f 1412/840 1398/825 1403/831
+f 204/70 789/71 1409/837
+f 1412/840 1409/837 789/71
+f 1414/841 1416/842 368/843
+f 1413/844 368/843 1416/842
+f 1405/834 1416/842 366/832
+f 1415/845 366/832 1416/842
+f 1418/846 1420/847 369/848
+f 1417/849 369/848 1420/847
+f 1419/850 1420/847 368/843
+f 1414/841 368/843 1420/847
+f 265/340 1421/851 1025/348
+f 1423/852 1025/348 1421/851
+f 1423/852 1418/846 1422/853
+f 369/848 1422/853 1418/846
+f 366/832 1415/845 1410/839
+f 1424/854 1410/839 1415/845
+f 1424/854 1421/851 1019/339
+f 265/340 1019/339 1421/851
+f 371/855 1426/856 1425/857
+f 1428/858 1425/857 1426/856
+f 369/859 1417/860 1427/861
+f 1428/858 1427/861 1417/860
+f 372/862 1430/863 1429/864
+f 1432/865 1429/864 1430/863
+f 1431/866 1432/865 371/855
+f 1426/856 371/855 1432/865
+f 1433/867 1435/868 273/378
+f 1053/385 273/378 1435/868
+f 1434/869 1435/868 372/862
+f 1430/863 372/862 1435/868
+f 1427/861 1436/870 369/859
+f 1422/871 369/859 1436/870
+f 273/378 1047/379 1433/867
+f 1436/870 1433/867 1047/379
+f 1438/872 1440/873 374/874
+f 1437/875 374/874 1440/873
+f 1429/864 1440/873 372/862
+f 1439/876 372/862 1440/873
+f 375/877 1442/878 1441/879
+f 1444/880 1441/879 1442/878
+f 1444/880 1438/872 1443/881
+f 374/874 1443/881 1438/872
+f 1445/882 1447/883 274/381
+f 1057/390 274/381 1447/883
+f 1446/884 1447/883 375/877
+f 1442/878 375/877 1447/883
+f 372/862 1439/876 1434/869
+f 1448/885 1434/869 1439/876
+f 274/381 1051/383 1445/882
+f 1448/885 1445/882 1051/383
+f 1452/886 1449/887 1450/888
+f 377/889 1450/888 1449/887
+f 1451/890 375/877 1452/886
+f 1441/879 1452/886 375/877
+f 1453/891 1455/892 357/791
+f 1368/790 357/791 1455/892
+f 1450/888 377/889 1455/892
+f 1454/893 1455/892 377/889
+f 277/398 1456/894 1070/406
+f 1457/895 1070/406 1456/894
+f 357/791 1375/798 1453/891
+f 1457/895 1453/891 1375/798
+f 1446/884 375/877 1458/896
+f 1451/890 1458/896 375/877
+f 1065/400 1458/896 277/398
+f 1456/894 277/398 1458/896
+f 1462/897 1340/750 1459/898
+f 349/749 1459/898 1340/750
+f 1461/899 1462/897 380/900
+f 1460/901 380/900 1462/897
+f 1464/902 1204/903 1463/904
+f 311/905 1463/904 1204/903
+f 1459/898 349/749 1464/902
+f 1347/762 1464/902 349/749
+f 1465/906 1466/907 313/908
+f 1214/909 313/908 1466/907
+f 1199/910 1466/907 311/905
+f 1463/904 311/905 1466/907
+f 379/911 1468/912 1467/913
+f 1469/914 1467/913 1468/912
+f 313/908 1207/915 1465/906
+f 1469/914 1465/906 1207/915
+f 380/900 1460/901 1470/916
+f 1472/917 1470/916 1460/901
+f 1472/917 1468/912 1471/918
+f 379/911 1471/918 1468/912
+f 1363/784 353/777 1475/919
+f 1473/920 1475/919 353/777
+f 1191/565 1475/919 309/564
+f 1474/921 309/564 1475/919
+f 1477/922 1478/923 382/924
+f 1476/925 382/924 1478/923
+f 1358/779 1478/923 353/777
+f 1473/920 353/777 1478/923
+f 309/564 1474/921 1197/571
+f 1480/926 1197/571 1474/921
+f 382/924 1479/927 1477/922
+f 1480/926 1477/922 1479/927
+f 384/928 1482/929 1481/930
+f 1485/931 1481/930 1482/929
+f 387/932 1484/933 1483/934
+f 1485/931 1483/934 1484/933
+f 1487/935 1489/936 385/937
+f 1486/938 385/937 1489/936
+f 1488/939 1489/936 384/928
+f 1482/929 384/928 1489/936
+f 386/940 1491/941 1490/942
+f 1493/943 1490/942 1491/941
+f 385/937 1492/944 1487/935
+f 1493/943 1487/935 1492/944
+f 1483/934 1496/945 387/932
+f 1494/946 387/932 1496/945
+f 1491/941 386/940 1496/945
+f 1495/947 1496/945 386/940
+f 1501/948 1497/949 1498/950
+f 389/951 1498/950 1497/949
+f 392/952 1500/953 1499/954
+f 1501/948 1499/954 1500/953
+f 1503/955 1505/956 390/957
+f 1502/958 390/957 1505/956
+f 1498/950 389/951 1505/956
+f 1504/959 1505/956 389/951
+f 1507/960 1509/961 391/962
+f 1506/963 391/962 1509/961
+f 1509/961 1503/955 1508/964
+f 390/957 1508/964 1503/955
+f 1499/954 1512/965 392/952
+f 1510/966 392/952 1512/965
+f 391/962 1511/967 1507/960
+f 1512/965 1507/960 1511/967
+f 1513/968 394/969 1517/970
+f 1514/971 1517/970 394/969
+f 1516/972 1517/970 397/973
+f 1515/974 397/973 1517/970
+f 1521/975 1518/976 1519/977
+f 395/978 1519/977 1518/976
+f 1514/971 394/969 1521/975
+f 1520/979 1521/975 394/969
+f 1523/980 1525/981 396/982
+f 1522/983 396/982 1525/981
+f 1519/977 395/978 1525/981
+f 1524/984 1525/981 395/978
+f 397/973 1515/974 1526/985
+f 1528/986 1526/985 1515/974
+f 1528/986 1523/980 1527/987
+f 396/982 1527/987 1523/980
+f 399/988 1530/989 1529/990
+f 1532/991 1529/990 1530/989
+f 1502/958 1532/991 390/957
+f 1531/992 390/957 1532/991
+f 1534/993 1536/994 400/995
+f 1533/996 400/995 1536/994
+f 399/988 1535/997 1530/989
+f 1536/994 1530/989 1535/997
+f 1540/998 1537/999 1538/1000
+f 401/1001 1538/1000 1537/999
+f 400/995 1539/1002 1534/993
+f 1540/998 1534/993 1539/1002
+f 1531/992 1542/1003 390/957
+f 1508/964 390/957 1542/1003
+f 1538/1000 401/1001 1542/1003
+f 1541/1004 1542/1003 401/1001
+f 403/1005 1544/1006 1543/1007
+f 1546/1008 1543/1007 1544/1006
+f 1546/1008 1545/1009 1533/996
+f 400/995 1533/996 1545/1009
+f 395/978 1547/1010 1524/984
+f 1549/1011 1524/984 1547/1010
+f 1549/1011 1544/1006 1548/1012
+f 403/1005 1548/1012 1544/1006
+f 1551/1013 1552/1014 404/1015
+f 1550/1016 404/1015 1552/1014
+f 1518/976 1552/1014 395/978
+f 1547/1010 395/978 1552/1014
+f 1545/1009 1554/1017 400/995
+f 1539/1002 400/995 1554/1017
+f 1554/1017 1551/1013 1553/1018
+f 404/1015 1553/1018 1551/1013
+f 1555/1019 1558/1020 392/952
+f 1500/953 392/952 1558/1020
+f 407/1021 1557/1022 1556/1023
+f 1558/1020 1556/1023 1557/1022
+f 397/973 1559/1024 1516/972
+f 1560/1025 1516/972 1559/1024
+f 392/952 1510/966 1555/1019
+f 1560/1025 1555/1019 1510/966
+f 406/1026 1562/1027 1561/1028
+f 1563/1029 1561/1028 1562/1027
+f 1526/985 1563/1029 397/973
+f 1559/1024 397/973 1563/1029
+f 1556/1023 1566/1030 407/1021
+f 1564/1031 407/1021 1566/1030
+f 1565/1032 1566/1030 406/1026
+f 1562/1027 406/1026 1566/1030
+f 1567/1033 1570/1034 407/1035
+f 1557/1036 407/1035 1570/1034
+f 1568/1037 410/1038 1570/1034
+f 1569/1039 1570/1034 410/1038
+f 409/1040 1572/1041 1571/1042
+f 1573/1043 1571/1042 1572/1041
+f 407/1035 1564/1044 1567/1033
+f 1573/1043 1567/1033 1564/1044
+f 1574/1045 1576/1046 321/620
+f 1241/627 321/620 1576/1046
+f 1575/1047 1576/1046 409/1040
+f 1572/1041 409/1040 1576/1046
+f 318/606 1577/1048 1231/615
+f 1578/1049 1231/615 1577/1048
+f 321/620 1235/621 1574/1045
+f 1578/1049 1574/1045 1235/621
+f 1568/1037 1580/1050 410/1038
+f 1579/1051 410/1038 1580/1050
+f 1226/608 1580/1050 318/606
+f 1577/1048 318/606 1580/1050
+f 1581/1052 412/1053 1584/1054
+f 1582/1055 1584/1054 412/1053
+f 1561/1056 1584/1054 406/1057
+f 1583/1058 406/1057 1584/1054
+f 1587/1059 1210/585 1585/1060
+f 316/586 1585/1060 1210/585
+f 1587/1059 1582/1055 1586/1061
+f 412/1053 1586/1061 1582/1055
+f 322/623 1588/1062 1244/630
+f 1589/1063 1244/630 1588/1062
+f 1589/1063 1585/1060 1220/599
+f 316/586 1220/599 1585/1060
+f 1591/1064 1575/1047 1590/1065
+f 409/1040 1590/1065 1575/1047
+f 322/623 1239/625 1588/1062
+f 1591/1064 1588/1062 1239/625
+f 406/1057 1583/1058 1565/1066
+f 1592/1067 1565/1066 1583/1058
+f 409/1040 1571/1042 1590/1065
+f 1592/1067 1590/1065 1571/1042
+f 414/1068 1594/1069 1593/1070
+f 1596/1071 1593/1070 1594/1069
+f 396/1072 1522/1073 1595/1074
+f 1596/1071 1595/1074 1522/1073
+f 415/1075 1598/1076 1597/1077
+f 1600/1078 1597/1077 1598/1076
+f 1600/1078 1594/1069 1599/1079
+f 414/1068 1599/1079 1594/1069
+f 1601/1080 1603/1081 379/911
+f 1471/918 379/911 1603/1081
+f 1602/1082 1603/1081 415/1075
+f 1598/1076 415/1075 1603/1081
+f 1605/1083 1586/1084 1604/1085
+f 412/1086 1604/1085 1586/1084
+f 379/911 1467/913 1601/1080
+f 1605/1083 1601/1080 1467/913
+f 1527/1087 396/1072 1606/1088
+f 1595/1074 1606/1088 396/1072
+f 1604/1085 412/1086 1606/1088
+f 1581/1089 1606/1088 412/1086
+f 1608/1090 1611/1091 417/1092
+f 1607/1093 417/1092 1611/1091
+f 1609/1094 419/1095 1611/1091
+f 1610/1096 1611/1091 419/1095
+f 1612/1097 1614/1098 399/1099
+f 1535/1100 399/1099 1614/1098
+f 1614/1098 1608/1090 1613/1101
+f 417/1092 1613/1101 1608/1090
+f 418/1102 1616/1103 1615/1104
+f 1617/1105 1615/1104 1616/1103
+f 1612/1097 399/1099 1617/1105
+f 1529/1106 1617/1105 399/1099
+f 1618/1107 1620/1108 352/774
+f 1360/781 352/774 1620/1108
+f 1619/1109 1620/1108 418/1102
+f 1616/1103 418/1102 1620/1108
+f 419/1095 1609/1094 1621/1110
+f 1622/1111 1621/1110 1609/1094
+f 352/774 1354/775 1618/1107
+f 1622/1111 1618/1107 1354/775
+f 414/1068 1623/1112 1599/1079
+f 1626/1113 1599/1079 1623/1112
+f 1625/1114 1626/1113 421/1115
+f 1624/1116 421/1115 1626/1113
+f 1548/1117 403/1118 1628/1119
+f 1627/1120 1628/1119 403/1118
+f 1593/1070 1628/1119 414/1068
+f 1623/1112 414/1068 1628/1119
+f 1629/1121 1630/1122 417/1123
+f 1613/1124 417/1123 1630/1122
+f 1627/1120 403/1118 1630/1122
+f 1543/1125 1630/1122 403/1118
+f 1632/1126 1631/1127 1624/1116
+f 421/1115 1624/1116 1631/1127
+f 417/1123 1607/1128 1629/1121
+f 1632/1126 1629/1121 1607/1128
+f 418/1102 1633/1129 1619/1109
+f 1635/1130 1619/1109 1633/1129
+f 382/924 1476/925 1634/1131
+f 1635/1130 1634/1131 1476/925
+f 389/1132 1636/1133 1504/1134
+f 1637/1135 1504/1134 1636/1133
+f 1615/1104 1637/1135 418/1102
+f 1633/1129 418/1102 1637/1135
+f 1638/1136 1639/1137 410/1138
+f 1569/1139 410/1138 1639/1137
+f 1497/1140 1639/1137 389/1132
+f 1636/1133 389/1132 1639/1137
+f 1634/1131 1640/1141 382/924
+f 1479/927 382/924 1640/1141
+f 1579/1142 1640/1141 410/1138
+f 1638/1136 410/1138 1640/1141
+f 1644/1143 1641/1144 1642/1145
+f 424/1146 1642/1145 1641/1144
+f 391/962 1506/963 1643/1147
+f 1644/1143 1643/1147 1506/963
+f 384/928 1645/1148 1488/939
+f 1647/1149 1488/939 1645/1148
+f 424/1146 1646/1150 1642/1145
+f 1647/1149 1642/1145 1646/1150
+f 425/1151 1649/1152 1648/1153
+f 1650/1154 1648/1153 1649/1152
+f 384/928 1481/930 1645/1148
+f 1650/1154 1645/1148 1481/930
+f 1643/1147 1652/1155 391/962
+f 1511/967 391/962 1652/1155
+f 1651/1156 1652/1155 425/1151
+f 1649/1152 425/1151 1652/1155
+f 1653/1157 427/1158 1656/1159
+f 1654/1160 1656/1159 427/1158
+f 1537/999 1656/1159 401/1001
+f 1655/1161 401/1001 1656/1159
+f 385/937 1657/1162 1492/944
+f 1659/1163 1492/944 1657/1162
+f 427/1158 1658/1164 1654/1160
+f 1659/1163 1654/1160 1658/1164
+f 424/1146 1660/1165 1646/1150
+f 1661/1166 1646/1150 1660/1165
+f 385/937 1486/938 1657/1162
+f 1661/1166 1657/1162 1486/938
+f 401/1001 1655/1161 1541/1004
+f 1662/1167 1541/1004 1655/1161
+f 1641/1144 1662/1167 424/1146
+f 1660/1165 424/1146 1662/1167
+f 1666/1168 1663/1169 1664/1170
+f 429/1171 1664/1170 1663/1169
+f 404/1015 1550/1016 1665/1172
+f 1666/1168 1665/1172 1550/1016
+f 386/940 1667/1173 1495/947
+f 1669/1174 1495/947 1667/1173
+f 429/1171 1668/1175 1664/1170
+f 1669/1174 1664/1170 1668/1175
+f 1670/1176 1671/1177 427/1158
+f 1658/1164 427/1158 1671/1177
+f 1667/1173 386/940 1671/1177
+f 1490/942 1671/1177 386/940
+f 1553/1018 404/1015 1672/1178
+f 1665/1172 1672/1178 404/1015
+f 1672/1178 1670/1176 1653/1157
+f 427/1158 1653/1157 1670/1176
+f 1651/1156 425/1151 1675/1179
+f 1673/1180 1675/1179 425/1151
+f 1513/968 1675/1179 394/969
+f 1674/1181 394/969 1675/1179
+f 387/932 1676/1182 1484/933
+f 1677/1183 1484/933 1676/1182
+f 425/1151 1648/1153 1673/1180
+f 1677/1183 1673/1180 1648/1153
+f 429/1171 1678/1184 1668/1175
+f 1679/1185 1668/1175 1678/1184
+f 387/932 1494/946 1676/1182
+f 1679/1185 1676/1182 1494/946
+f 394/969 1674/1181 1520/979
+f 1680/1186 1520/979 1674/1181
+f 1680/1186 1678/1184 1663/1169
+f 429/1171 1663/1169 1678/1184
+f 1681/1187 1684/1188 196/1189
+f 763/1190 196/1189 1684/1188
+f 1683/1191 1684/1188 434/1192
+f 1682/1193 434/1192 1684/1188
+f 1686/1194 1687/1195 432/1196
+f 1685/1197 432/1196 1687/1195
+f 1687/1195 1681/1187 759/1198
+f 196/1189 759/1198 1681/1187
+f 1689/1199 1691/1200 433/1201
+f 1688/1202 433/1201 1691/1200
+f 1691/1200 1686/1194 1690/1203
+f 432/1196 1690/1203 1686/1194
+f 434/1192 1682/1193 1692/1204
+f 1694/1205 1692/1204 1682/1193
+f 1694/1205 1689/1199 1693/1206
+f 433/1201 1693/1206 1689/1199
+f 340/1207 1695/1208 1311/1209
+f 1697/1210 1311/1209 1695/1208
+f 432/1196 1685/1197 1696/1211
+f 1697/1210 1696/1211 1685/1197
+f 1699/1212 1700/1213 436/1214
+f 1698/1215 436/1214 1700/1213
+f 1306/1216 1700/1213 340/1207
+f 1695/1208 340/1207 1700/1213
+f 437/1217 1702/1218 1701/1219
+f 1704/1220 1701/1219 1702/1218
+f 436/1214 1703/1221 1699/1212
+f 1704/1220 1699/1212 1703/1221
+f 1690/1203 432/1196 1706/1222
+f 1696/1211 1706/1222 432/1196
+f 1705/1223 1706/1222 437/1217
+f 1702/1218 437/1217 1706/1222
+f 1707/1224 1710/1225 337/1226
+f 1290/1227 337/1226 1710/1225
+f 1709/1228 1710/1225 441/1229
+f 1708/1230 441/1229 1710/1225
+f 439/1231 1712/1232 1711/1233
+f 1713/1234 1711/1233 1712/1232
+f 337/1226 1299/1235 1707/1224
+f 1713/1234 1707/1224 1299/1235
+f 1715/1236 1717/1237 440/1238
+f 1714/1239 440/1238 1717/1237
+f 1716/1240 1717/1237 439/1231
+f 1712/1232 439/1231 1717/1237
+f 441/1229 1708/1230 1718/1241
+f 1720/1242 1718/1241 1708/1230
+f 440/1238 1719/1243 1715/1236
+f 1720/1242 1715/1236 1719/1243
+f 1723/1244 746/1245 1721/1246
+f 190/1247 1721/1246 746/1245
+f 1722/1248 439/1231 1723/1244
+f 1711/1233 1723/1244 439/1231
+f 1725/1249 1683/1191 1724/1250
+f 434/1192 1724/1250 1683/1191
+f 1721/1246 190/1247 1725/1249
+f 740/1251 1725/1249 190/1247
+f 1728/1252 1726/1253 1727/1254
+f 443/1255 1727/1254 1726/1253
+f 434/1192 1692/1204 1724/1250
+f 1728/1252 1724/1250 1692/1204
+f 1730/1256 1716/1240 1722/1248
+f 439/1231 1722/1248 1716/1240
+f 1727/1254 443/1255 1730/1256
+f 1729/1257 1730/1256 443/1255
+f 445/1258 1732/1259 1731/1260
+f 1734/1261 1731/1260 1732/1259
+f 1701/1219 1734/1261 437/1217
+f 1733/1262 437/1217 1734/1261
+f 446/1263 1736/1264 1735/1265
+f 1738/1266 1735/1265 1736/1264
+f 1737/1267 1738/1266 445/1258
+f 1732/1259 445/1258 1738/1266
+f 1739/1268 1741/1269 433/1201
+f 1693/1206 433/1201 1741/1269
+f 1740/1270 1741/1269 446/1263
+f 1736/1264 446/1263 1741/1269
+f 437/1217 1733/1262 1705/1223
+f 1742/1271 1705/1223 1733/1262
+f 433/1201 1688/1202 1739/1268
+f 1742/1271 1739/1268 1688/1202
+f 1746/1272 1743/1273 1744/1274
+f 448/1275 1744/1274 1743/1273
+f 446/1263 1735/1265 1745/1276
+f 1746/1272 1745/1276 1735/1265
+f 1747/1277 1749/1278 440/1238
+f 1719/1243 440/1238 1749/1278
+f 1744/1274 448/1275 1749/1278
+f 1748/1279 1749/1278 448/1275
+f 443/1255 1750/1280 1729/1257
+f 1751/1281 1729/1257 1750/1280
+f 440/1238 1714/1239 1747/1277
+f 1751/1281 1747/1277 1714/1239
+f 1745/1276 1752/1282 446/1263
+f 1740/1270 446/1263 1752/1282
+f 1726/1253 1752/1282 443/1255
+f 1750/1280 443/1255 1752/1282
+f 1754/1283 1757/1284 450/1285
+f 1753/1286 450/1285 1757/1284
+f 1756/1287 1757/1284 452/1288
+f 1755/1289 452/1288 1757/1284
+f 451/1290 1759/1291 1758/1292
+f 1761/1293 1758/1292 1759/1291
+f 450/1285 1760/1294 1754/1283
+f 1761/1293 1754/1283 1760/1294
+f 452/1288 1755/1289 1762/1295
+f 1764/1296 1762/1295 1755/1289
+f 1763/1297 1764/1296 451/1290
+f 1759/1291 451/1290 1764/1296
+f 452/1288 1765/1298 1756/1287
+f 1768/1299 1756/1287 1765/1298
+f 1766/1300 455/1301 1768/1299
+f 1767/1302 1768/1299 455/1301
+f 1770/1303 1771/1304 454/1305
+f 1769/1306 454/1305 1771/1304
+f 1762/1295 1771/1304 452/1288
+f 1765/1298 452/1288 1771/1304
+f 208/82 1772/1307 806/91
+f 1774/1308 806/91 1772/1307
+f 454/1305 1773/1309 1770/1303
+f 1774/1308 1770/1303 1773/1309
+f 1775/1310 1776/1311 229/173
+f 881/180 229/173 1776/1311
+f 800/84 1776/1311 208/82
+f 1772/1307 208/82 1776/1311
+f 1778/1312 1777/1313 1766/1300
+f 455/1301 1766/1300 1777/1313
+f 229/173 875/174 1775/1310
+f 1778/1312 1775/1310 875/174
+f 455/1301 1779/1314 1767/1302
+f 1781/1315 1767/1302 1779/1314
+f 450/1285 1753/1286 1780/1316
+f 1781/1315 1780/1316 1753/1286
+f 1782/1317 1783/1318 258/303
+f 989/308 258/303 1783/1318
+f 1777/1313 1783/1318 455/1301
+f 1779/1314 455/1301 1783/1318
+f 1784/1319 1785/1320 249/251
+f 942/252 249/251 1785/1320
+f 258/303 986/305 1782/1317
+f 1785/1320 1782/1317 986/305
+f 1786/1321 457/1322 1788/1323
+f 1787/1324 1788/1323 457/1322
+f 1784/1319 249/251 1788/1323
+f 952/264 1788/1323 249/251
+f 1780/1316 1790/1325 450/1285
+f 1760/1294 450/1285 1790/1325
+f 1789/1326 1790/1325 457/1322
+f 1787/1324 457/1322 1790/1325
+f 1792/1327 1795/1328 459/1329
+f 1791/1330 459/1329 1795/1328
+f 1793/1331 460/1332 1795/1328
+f 1794/1333 1795/1328 460/1332
+f 457/1322 1796/1334 1789/1326
+f 1798/1335 1789/1326 1796/1334
+f 459/1329 1797/1336 1792/1327
+f 1798/1335 1792/1327 1797/1336
+f 1284/678 333/673 1800/1337
+f 1799/1338 1800/1337 333/673
+f 1786/1321 1800/1337 457/1322
+f 1796/1334 457/1322 1800/1337
+f 1802/1339 1801/1340 1793/1331
+f 460/1332 1793/1331 1801/1340
+f 333/673 1279/674 1799/1338
+f 1802/1339 1799/1338 1279/674
+f 360/803 1803/1341 1380/804
+f 1806/1342 1380/804 1803/1341
+f 463/1343 1805/1344 1804/1345
+f 1806/1342 1804/1345 1805/1344
+f 1807/1346 1808/1347 454/1305
+f 1773/1309 454/1305 1808/1347
+f 1387/812 1808/1347 360/803
+f 1803/1341 360/803 1808/1347
+f 462/1348 1810/1349 1809/1350
+f 1811/1351 1809/1350 1810/1349
+f 454/1305 1769/1306 1807/1346
+f 1811/1351 1807/1346 1769/1306
+f 1804/1345 1814/1352 463/1343
+f 1812/1353 463/1343 1814/1352
+f 1813/1354 1814/1352 462/1348
+f 1810/1349 462/1348 1814/1352
+f 451/1290 1815/1355 1763/1297
+f 1818/1356 1763/1297 1815/1355
+f 467/1357 1817/1358 1816/1359
+f 1818/1356 1816/1359 1817/1358
+f 1821/1360 1819/1361 1820/1362
+f 465/1363 1820/1362 1819/1361
+f 1758/1292 1821/1360 451/1290
+f 1815/1355 451/1290 1821/1360
+f 1823/1364 1825/1365 466/1366
+f 1822/1367 466/1366 1825/1365
+f 1820/1362 465/1363 1825/1365
+f 1824/1368 1825/1365 465/1363
+f 1826/1369 467/1357 1828/1370
+f 1816/1359 1828/1370 467/1357
+f 1823/1364 466/1366 1828/1370
+f 1827/1371 1828/1370 466/1366
+f 1829/1372 1831/1373 459/1329
+f 1797/1336 459/1329 1831/1373
+f 1819/1361 1831/1373 465/1363
+f 1830/1374 465/1363 1831/1373
+f 469/1375 1833/1376 1832/1377
+f 1834/1378 1832/1377 1833/1376
+f 459/1329 1791/1330 1829/1372
+f 1834/1378 1829/1372 1791/1330
+f 1836/1379 1838/1380 470/1381
+f 1835/1382 470/1381 1838/1380
+f 1837/1383 1838/1380 469/1375
+f 1833/1376 469/1375 1838/1380
+f 465/1363 1830/1374 1824/1368
+f 1840/1384 1824/1368 1830/1374
+f 470/1381 1839/1385 1836/1379
+f 1840/1384 1836/1379 1839/1385
+f 1844/1386 1813/1354 1841/1387
+f 462/1348 1841/1387 1813/1354
+f 473/1388 1843/1389 1842/1390
+f 1844/1386 1842/1390 1843/1389
+f 1845/1391 1846/1392 467/1357
+f 1817/1358 467/1357 1846/1392
+f 1841/1387 462/1348 1846/1392
+f 1809/1350 1846/1392 462/1348
+f 472/1393 1848/1394 1847/1395
+f 1849/1396 1847/1395 1848/1394
+f 1849/1396 1845/1391 1826/1369
+f 467/1357 1826/1369 1845/1391
+f 1842/1390 1852/1397 473/1388
+f 1850/1398 473/1388 1852/1397
+f 1851/1399 1852/1397 472/1393
+f 1848/1394 472/1393 1852/1397
+f 1853/1400 1855/1401 470/1381
+f 1839/1385 470/1381 1855/1401
+f 1854/1402 466/1366 1855/1401
+f 1822/1367 1855/1401 466/1366
+f 1858/1403 1856/1404 1857/1405
+f 475/1406 1857/1405 1856/1404
+f 470/1381 1835/1382 1853/1400
+f 1858/1403 1853/1400 1835/1382
+f 472/1393 1859/1407 1851/1399
+f 1861/1408 1851/1399 1859/1407
+f 475/1406 1860/1409 1857/1405
+f 1861/1408 1857/1405 1860/1409
+f 1854/1402 1862/1410 466/1366
+f 1827/1371 466/1366 1862/1410
+f 1859/1407 472/1393 1862/1410
+f 1847/1395 1862/1410 472/1393
+f 477/1411 1864/1412 1863/1413
+f 1867/1414 1863/1413 1864/1412
+f 1867/1414 1865/1415 1866/1416
+f 480/1417 1866/1416 1865/1415
+f 478/1418 1869/1419 1868/1420
+f 1871/1421 1868/1420 1869/1419
+f 1870/1422 1871/1421 477/1411
+f 1864/1412 477/1411 1871/1421
+f 1872/1423 479/1424 1875/1425
+f 1873/1426 1875/1425 479/1424
+f 1874/1427 1875/1425 478/1418
+f 1869/1419 478/1418 1875/1425
+f 1865/1415 1878/1428 480/1417
+f 1876/1429 480/1417 1878/1428
+f 479/1424 1877/1430 1873/1426
+f 1878/1428 1873/1426 1877/1430
+f 1879/1431 1882/1432 480/1417
+f 1866/1416 480/1417 1882/1432
+f 1880/1433 484/1434 1882/1432
+f 1881/1435 1882/1432 484/1434
+f 482/1436 1884/1437 1883/1438
+f 1885/1439 1883/1438 1884/1437
+f 1885/1439 1879/1431 1876/1429
+f 480/1417 1876/1429 1879/1431
+f 1887/1440 1889/1441 483/1442
+f 1886/1443 483/1442 1889/1441
+f 1888/1444 1889/1441 482/1436
+f 1884/1437 482/1436 1889/1441
+f 484/1434 1880/1433 1890/1445
+f 1892/1446 1890/1445 1880/1433
+f 483/1442 1891/1447 1887/1440
+f 1892/1446 1887/1440 1891/1447
+f 1894/1448 1897/1449 486/1450
+f 1893/1451 486/1450 1897/1449
+f 1897/1449 1895/1452 1896/1453
+f 489/1454 1896/1453 1895/1452
+f 1899/1455 1901/1456 487/1457
+f 1898/1458 487/1457 1901/1456
+f 486/1450 1900/1459 1894/1448
+f 1901/1456 1894/1448 1900/1459
+f 488/1460 1903/1461 1902/1462
+f 1905/1463 1902/1462 1903/1461
+f 487/1457 1904/1464 1899/1455
+f 1905/1463 1899/1455 1904/1464
+f 1895/1452 1908/1465 489/1454
+f 1906/1466 489/1454 1908/1465
+f 1907/1467 1908/1465 488/1460
+f 1903/1461 488/1460 1908/1465
+f 1910/1468 1913/1469 491/1470
+f 1909/1471 491/1470 1913/1469
+f 1912/1472 1913/1469 493/1473
+f 1911/1474 493/1473 1913/1469
+f 492/1475 1915/1476 1914/1477
+f 1917/1478 1914/1477 1915/1476
+f 491/1470 1916/1479 1910/1468
+f 1917/1478 1910/1468 1916/1479
+f 1918/1480 1920/1481 487/1457
+f 1904/1464 487/1457 1920/1481
+f 1919/1482 1920/1481 492/1475
+f 1915/1476 492/1475 1920/1481
+f 493/1473 1911/1474 1921/1483
+f 1922/1484 1921/1483 1911/1474
+f 487/1457 1898/1458 1918/1480
+f 1922/1484 1918/1480 1898/1458
+f 495/1485 1924/1486 1923/1487
+f 1927/1488 1923/1487 1924/1486
+f 1926/1489 1927/1488 498/1490
+f 1925/1491 498/1490 1927/1488
+f 1929/1492 1931/1493 496/1494
+f 1928/1495 496/1494 1931/1493
+f 1930/1496 1931/1493 495/1485
+f 1924/1486 495/1485 1931/1493
+f 1933/1497 1935/1498 497/1499
+f 1932/1500 497/1499 1935/1498
+f 496/1494 1934/1501 1929/1492
+f 1935/1498 1929/1492 1934/1501
+f 498/1490 1925/1491 1936/1502
+f 1938/1503 1936/1502 1925/1491
+f 497/1499 1937/1504 1933/1497
+f 1938/1503 1933/1497 1937/1504
+f 500/1505 1940/1506 1939/1507
+f 1942/1508 1939/1507 1940/1506
+f 486/1450 1893/1451 1941/1509
+f 1942/1508 1941/1509 1893/1451
+f 1944/1510 1946/1511 501/1512
+f 1943/1513 501/1512 1946/1511
+f 1945/1514 1946/1511 500/1505
+f 1940/1506 500/1505 1946/1511
+f 502/1515 1948/1516 1947/1517
+f 1950/1518 1947/1517 1948/1516
+f 501/1512 1949/1519 1944/1510
+f 1950/1518 1944/1510 1949/1519
+f 1941/1509 1952/1520 486/1450
+f 1900/1459 486/1450 1952/1520
+f 1951/1521 1952/1520 502/1515
+f 1948/1516 502/1515 1952/1520
+f 1954/1522 1956/1523 504/1524
+f 1953/1525 504/1524 1956/1523
+f 1955/1526 477/1411 1956/1523
+f 1863/1413 1956/1523 477/1411
+f 505/1527 1958/1528 1957/1529
+f 1960/1530 1957/1529 1958/1528
+f 504/1524 1959/1531 1954/1522
+f 1960/1530 1954/1522 1959/1531
+f 1961/1532 1963/1533 501/1512
+f 1949/1519 501/1512 1963/1533
+f 1962/1534 1963/1533 505/1527
+f 1958/1528 505/1527 1963/1533
+f 1964/1535 1870/1422 1955/1526
+f 477/1411 1955/1526 1870/1422
+f 501/1512 1943/1513 1961/1532
+f 1964/1535 1961/1532 1943/1513
+f 1966/1536 1969/1537 507/1538
+f 1965/1539 507/1538 1969/1537
+f 509/1540 1968/1541 1967/1542
+f 1969/1537 1967/1542 1968/1541
+f 1971/1543 1973/1544 508/1545
+f 1970/1546 508/1545 1973/1544
+f 1973/1544 1966/1536 1972/1547
+f 507/1538 1972/1547 1966/1536
+f 505/1527 1974/1548 1962/1534
+f 1976/1549 1962/1534 1974/1548
+f 1976/1549 1971/1543 1975/1550
+f 508/1545 1975/1550 1971/1543
+f 1977/1551 509/1540 1978/1552
+f 1967/1542 1978/1552 509/1540
+f 1957/1529 1978/1552 505/1527
+f 1974/1548 505/1527 1978/1552
+f 493/1473 1979/1553 1912/1472
+f 1982/1554 1912/1472 1979/1553
+f 511/1555 1981/1556 1980/1557
+f 1982/1554 1980/1557 1981/1556
+f 502/1515 1983/1558 1951/1521
+f 1984/1559 1951/1521 1983/1558
+f 1921/1483 1984/1559 493/1473
+f 1979/1553 493/1473 1984/1559
+f 1975/1550 508/1545 1986/1560
+f 1985/1561 1986/1560 508/1545
+f 1947/1517 1986/1560 502/1515
+f 1983/1558 502/1515 1986/1560
+f 1980/1557 1988/1562 511/1555
+f 1987/1563 511/1555 1988/1562
+f 508/1545 1970/1546 1985/1561
+f 1988/1562 1985/1561 1970/1546
+f 498/1490 1989/1564 1926/1489
+f 1992/1565 1926/1489 1989/1564
+f 515/1566 1991/1567 1990/1568
+f 1992/1565 1990/1568 1991/1567
+f 1994/1569 1995/1570 513/1571
+f 1993/1572 513/1571 1995/1570
+f 1936/1502 1995/1570 498/1490
+f 1989/1564 498/1490 1995/1570
+f 514/1573 1997/1574 1996/1575
+f 1999/1576 1996/1575 1997/1574
+f 513/1571 1998/1577 1994/1569
+f 1999/1576 1994/1569 1998/1577
+f 1990/1568 2002/1578 515/1566
+f 2000/1579 515/1566 2002/1578
+f 2001/1580 2002/1578 514/1573
+f 1997/1574 514/1573 2002/1578
+f 2004/1581 2007/1582 517/1583
+f 2003/1584 517/1583 2007/1582
+f 2006/1585 2007/1582 519/1586
+f 2005/1587 519/1586 2007/1582
+f 518/1588 2009/1589 2008/1590
+f 2011/1591 2008/1590 2009/1589
+f 2011/1591 2004/1581 2010/1592
+f 517/1583 2010/1592 2004/1581
+f 2012/1593 2014/1594 513/1571
+f 1998/1577 513/1571 2014/1594
+f 2013/1595 2014/1594 518/1588
+f 2009/1589 518/1588 2014/1594
+f 519/1586 2005/1587 2015/1596
+f 2016/1597 2015/1596 2005/1587
+f 513/1571 1993/1572 2012/1593
+f 2016/1597 2012/1593 1993/1572
+f 2020/1598 1968/1541 2017/1599
+f 509/1540 2017/1599 1968/1541
+f 2020/1598 2018/1600 2019/1601
+f 522/1602 2019/1601 2018/1600
+f 2021/1603 521/1604 2023/1605
+f 2022/1606 2023/1605 521/1604
+f 2023/1605 2017/1599 1977/1551
+f 509/1540 1977/1551 2017/1599
+f 518/1588 2024/1607 2013/1595
+f 2026/1608 2013/1595 2024/1607
+f 521/1604 2025/1609 2022/1606
+f 2026/1608 2022/1606 2025/1609
+f 2018/1600 2028/1610 522/1602
+f 2027/1611 522/1602 2028/1610
+f 2008/1590 2028/1610 518/1588
+f 2024/1607 518/1588 2028/1610
+f 484/1434 2029/1612 1881/1435
+f 2031/1613 1881/1435 2029/1612
+f 504/1524 1953/1525 2030/1614
+f 2031/1613 2030/1614 1953/1525
+f 514/1573 2032/1615 2001/1580
+f 2033/1616 2001/1580 2032/1615
+f 1890/1445 2033/1616 484/1434
+f 2029/1612 484/1434 2033/1616
+f 2034/1617 2035/1618 521/1604
+f 2025/1609 521/1604 2035/1618
+f 1996/1575 2035/1618 514/1573
+f 2032/1615 514/1573 2035/1618
+f 1959/1531 504/1524 2036/1619
+f 2030/1614 2036/1619 504/1524
+f 521/1604 2021/1603 2034/1617
+f 2036/1619 2034/1617 2021/1603
+f 2038/1620 2041/1621 525/1622
+f 2037/1623 525/1622 2041/1621
+f 2040/1624 2041/1621 528/1625
+f 2039/1626 528/1625 2041/1621
+f 526/1627 2043/1628 2042/1629
+f 2045/1630 2042/1629 2043/1628
+f 525/1622 2044/1631 2038/1620
+f 2045/1630 2038/1620 2044/1631
+f 2047/1632 2049/1633 527/1634
+f 2046/1635 527/1634 2049/1633
+f 2048/1636 2049/1633 526/1627
+f 2043/1628 526/1627 2049/1633
+f 2052/1637 2050/1638 2039/1626
+f 528/1625 2039/1626 2050/1638
+f 527/1634 2051/1639 2047/1632
+f 2052/1637 2047/1632 2051/1639
+f 2057/1640 2053/1641 2054/1642
+f 530/1643 2054/1642 2053/1641
+f 2056/1644 2057/1640 532/1645
+f 2055/1646 532/1645 2057/1640
+f 531/1647 2059/1648 2058/1649
+f 2061/1650 2058/1649 2059/1648
+f 2061/1650 2054/1642 2060/1651
+f 530/1643 2060/1651 2054/1642
+f 2062/1652 2064/1653 526/1627
+f 2048/1636 526/1627 2064/1653
+f 2063/1654 2064/1653 531/1647
+f 2059/1648 531/1647 2064/1653
+f 532/1645 2055/1646 2065/1655
+f 2066/1656 2065/1655 2055/1646
+f 526/1627 2042/1629 2062/1652
+f 2066/1656 2062/1652 2042/1629
+f 534/1657 2068/1658 2067/1659
+f 2071/1660 2067/1659 2068/1658
+f 537/1661 2070/1662 2069/1663
+f 2071/1660 2069/1663 2070/1662
+f 2072/1664 535/1665 2075/1666
+f 2073/1667 2075/1666 535/1665
+f 2074/1668 2075/1666 534/1657
+f 2068/1658 534/1657 2075/1666
+f 536/1669 2077/1670 2076/1671
+f 2079/1672 2076/1671 2077/1670
+f 2079/1672 2073/1667 2078/1673
+f 535/1665 2078/1673 2073/1667
+f 2080/1674 537/1661 2082/1675
+f 2069/1663 2082/1675 537/1661
+f 2081/1676 2082/1675 536/1669
+f 2077/1670 536/1669 2082/1675
+f 2084/1677 2086/1678 539/1679
+f 2083/1680 539/1679 2086/1678
+f 2053/1641 2086/1678 530/1643
+f 2085/1681 530/1643 2086/1678
+f 536/1669 2087/1682 2081/1676
+f 2089/1683 2081/1676 2087/1682
+f 539/1679 2088/1684 2084/1677
+f 2089/1683 2084/1677 2088/1684
+f 2091/1685 2092/1686 540/1687
+f 2090/1688 540/1687 2092/1686
+f 2076/1671 2092/1686 536/1669
+f 2087/1682 536/1669 2092/1686
+f 530/1643 2085/1681 2060/1651
+f 2094/1689 2060/1651 2085/1681
+f 540/1687 2093/1690 2091/1685
+f 2094/1689 2091/1685 2093/1690
+f 542/1691 2096/1692 2095/1693
+f 2098/1694 2095/1693 2096/1692
+f 507/1538 1965/1539 2097/1695
+f 2098/1694 2097/1695 1965/1539
+f 2099/1696 2101/1697 532/1645
+f 2056/1644 532/1645 2101/1697
+f 2100/1698 2101/1697 542/1691
+f 2096/1692 542/1691 2101/1697
+f 543/1699 2103/1700 2102/1701
+f 2104/1702 2102/1701 2103/1700
+f 2104/1702 2099/1696 2065/1655
+f 532/1645 2065/1655 2099/1696
+f 2097/1695 2106/1703 507/1538
+f 1972/1547 507/1538 2106/1703
+f 2105/1704 2106/1703 543/1699
+f 2103/1700 543/1699 2106/1703
+f 2010/1592 517/1583 2110/1705
+f 2107/1706 2110/1705 517/1583
+f 2109/1707 2110/1705 546/1708
+f 2108/1709 546/1708 2110/1705
+f 2113/1710 2111/1711 2112/1712
+f 545/1713 2112/1712 2111/1711
+f 517/1583 2003/1584 2107/1706
+f 2113/1710 2107/1706 2003/1584
+f 537/1661 2114/1714 2070/1662
+f 2116/1715 2070/1662 2114/1714
+f 545/1713 2115/1716 2112/1712
+f 2116/1715 2112/1712 2115/1716
+f 546/1708 2108/1709 2117/1717
+f 2118/1718 2117/1717 2108/1709
+f 2080/1674 2118/1718 537/1661
+f 2114/1714 537/1661 2118/1718
+f 2119/1719 2122/1720 491/1470
+f 1916/1479 491/1470 2122/1720
+f 2121/1721 2122/1720 549/1722
+f 2120/1723 549/1722 2122/1720
+f 2125/1724 2123/1725 2124/1726
+f 548/1727 2124/1726 2123/1725
+f 491/1470 1909/1471 2119/1719
+f 2125/1724 2119/1719 1909/1471
+f 528/1625 2126/1728 2040/1624
+f 2128/1729 2040/1624 2126/1728
+f 2124/1726 548/1727 2128/1729
+f 2127/1730 2128/1729 548/1727
+f 549/1722 2120/1723 2129/1731
+f 2130/1732 2129/1731 2120/1723
+f 2050/1638 2130/1732 528/1625
+f 2126/1728 528/1625 2130/1732
+f 2131/1733 2133/1734 539/1679
+f 2088/1684 539/1679 2133/1734
+f 2117/1717 2133/1734 546/1708
+f 2132/1735 546/1708 2133/1734
+f 542/1691 2134/1736 2100/1698
+f 2135/1737 2100/1698 2134/1736
+f 539/1679 2083/1680 2131/1733
+f 2135/1737 2131/1733 2083/1680
+f 2136/1738 2137/1739 522/1602
+f 2019/1601 522/1602 2137/1739
+f 2095/1693 2137/1739 542/1691
+f 2134/1736 542/1691 2137/1739
+f 546/1708 2132/1735 2109/1707
+f 2138/1740 2109/1707 2132/1735
+f 522/1602 2027/1611 2136/1738
+f 2138/1740 2136/1738 2027/1611
+f 543/1699 2139/1741 2105/1704
+f 2141/1742 2105/1704 2139/1741
+f 511/1555 1987/1563 2140/1743
+f 2141/1742 2140/1743 1987/1563
+f 2142/1744 2143/1745 525/1622
+f 2044/1631 525/1622 2143/1745
+f 2102/1701 2143/1745 543/1699
+f 2139/1741 543/1699 2143/1745
+f 548/1727 2144/1746 2127/1730
+f 2145/1747 2127/1730 2144/1746
+f 525/1622 2037/1623 2142/1744
+f 2145/1747 2142/1744 2037/1623
+f 2140/1743 2146/1748 511/1555
+f 1981/1556 511/1555 2146/1748
+f 2123/1725 2146/1748 548/1727
+f 2144/1746 548/1727 2146/1748
+f 2150/1749 2121/1721 2147/1750
+f 549/1722 2147/1750 2121/1721
+f 2149/1751 2150/1749 555/1752
+f 2148/1753 555/1752 2150/1749
+f 2152/1754 2153/1755 553/1756
+f 2151/1757 553/1756 2153/1755
+f 2147/1750 549/1722 2153/1755
+f 2129/1731 2153/1755 549/1722
+f 554/1758 2155/1759 2154/1760
+f 2157/1761 2154/1760 2155/1759
+f 2157/1761 2152/1754 2156/1762
+f 553/1756 2156/1762 2152/1754
+f 2158/1763 555/1752 2160/1764
+f 2148/1753 2160/1764 555/1752
+f 2159/1765 2160/1764 554/1758
+f 2155/1759 554/1758 2160/1764
+f 2164/1766 2159/1767 2161/1768
+f 554/1769 2161/1768 2159/1767
+f 559/1770 2163/1771 2162/1772
+f 2164/1766 2162/1772 2163/1771
+f 2166/1773 2167/1774 557/1775
+f 2165/1776 557/1775 2167/1774
+f 2161/1768 554/1769 2167/1774
+f 2154/1777 2167/1774 554/1769
+f 558/1778 2169/1779 2168/1780
+f 2171/1781 2168/1780 2169/1779
+f 2171/1781 2166/1773 2170/1782
+f 557/1775 2170/1782 2166/1773
+f 2162/1772 2174/1783 559/1770
+f 2172/1784 559/1770 2174/1783
+f 2173/1785 2174/1783 558/1778
+f 2169/1779 558/1778 2174/1783
+f 2175/1786 2178/1787 559/1770
+f 2163/1771 559/1770 2178/1787
+f 2177/1788 2178/1787 563/1789
+f 2176/1790 563/1789 2178/1787
+f 561/1791 2180/1792 2179/1793
+f 2181/1794 2179/1793 2180/1792
+f 559/1770 2172/1784 2175/1786
+f 2181/1794 2175/1786 2172/1784
+f 2183/1795 2185/1796 562/1797
+f 2182/1798 562/1797 2185/1796
+f 2184/1799 2185/1796 561/1791
+f 2180/1792 561/1791 2185/1796
+f 563/1789 2176/1790 2186/1800
+f 2188/1801 2186/1800 2176/1790
+f 562/1797 2187/1802 2183/1795
+f 2188/1801 2183/1795 2187/1802
+f 2190/1803 2192/1804 565/1805
+f 2189/1806 565/1805 2192/1804
+f 2179/1793 2192/1804 561/1791
+f 2191/1807 561/1791 2192/1804
+f 2194/1808 2196/1809 566/1810
+f 2193/1811 566/1810 2196/1809
+f 565/1805 2195/1812 2190/1803
+f 2196/1809 2190/1803 2195/1812
+f 2198/1813 2200/1814 567/1815
+f 2197/1816 567/1815 2200/1814
+f 566/1810 2199/1817 2194/1808
+f 2200/1814 2194/1808 2199/1817
+f 561/1791 2191/1807 2184/1799
+f 2202/1818 2184/1799 2191/1807
+f 567/1815 2201/1819 2198/1813
+f 2202/1818 2198/1813 2201/1819
+f 2204/1820 2207/1821 569/1822
+f 2203/1823 569/1822 2207/1821
+f 2205/1824 572/1825 2207/1821
+f 2206/1826 2207/1821 572/1825
+f 2209/1827 2211/1828 570/1829
+f 2208/1830 570/1829 2211/1828
+f 569/1822 2210/1831 2204/1820
+f 2211/1828 2204/1820 2210/1831
+f 571/1832 2213/1833 2212/1834
+f 2215/1835 2212/1834 2213/1833
+f 570/1829 2214/1836 2209/1827
+f 2215/1835 2209/1827 2214/1836
+f 572/1825 2205/1824 2216/1837
+f 2218/1838 2216/1837 2205/1824
+f 2213/1833 571/1832 2218/1838
+f 2217/1839 2218/1838 571/1832
+f 2220/1840 2223/1841 574/1842
+f 2219/1843 574/1842 2223/1841
+f 2221/1844 577/1845 2223/1841
+f 2222/1846 2223/1841 577/1845
+f 2225/1847 2227/1848 575/1849
+f 2224/1850 575/1849 2227/1848
+f 574/1842 2226/1851 2220/1840
+f 2227/1848 2220/1840 2226/1851
+f 576/1852 2229/1853 2228/1854
+f 2231/1855 2228/1854 2229/1853
+f 575/1849 2230/1856 2225/1847
+f 2231/1855 2225/1847 2230/1856
+f 577/1845 2221/1844 2232/1857
+f 2234/1858 2232/1857 2221/1844
+f 2229/1853 576/1852 2234/1858
+f 2233/1859 2234/1858 576/1852
+f 579/1860 2236/1861 2235/1862
+f 2238/1863 2235/1862 2236/1861
+f 515/1864 2000/1865 2237/1866
+f 2238/1863 2237/1866 2000/1865
+f 574/1842 2239/1867 2226/1851
+f 2241/1868 2226/1851 2239/1867
+f 2236/1861 579/1860 2241/1868
+f 2240/1869 2241/1868 579/1860
+f 580/1870 2243/1871 2242/1872
+f 2244/1873 2242/1872 2243/1871
+f 574/1842 2219/1843 2239/1867
+f 2244/1873 2239/1867 2219/1843
+f 2237/1866 2246/1874 515/1864
+f 1991/1875 515/1864 2246/1874
+f 2243/1871 580/1870 2246/1874
+f 2245/1876 2246/1874 580/1870
+f 2248/1877 2251/1878 582/1879
+f 2247/1880 582/1879 2251/1878
+f 2250/1881 2251/1878 584/1882
+f 2249/1883 584/1882 2251/1878
+f 576/1852 2252/1884 2233/1859
+f 2254/1885 2233/1859 2252/1884
+f 582/1879 2253/1886 2248/1877
+f 2254/1885 2248/1877 2253/1886
+f 583/1887 2256/1888 2255/1889
+f 2257/1890 2255/1889 2256/1888
+f 2252/1884 576/1852 2257/1890
+f 2228/1854 2257/1890 576/1852
+f 584/1882 2249/1883 2258/1891
+f 2260/1892 2258/1891 2249/1883
+f 2260/1892 2256/1888 2259/1893
+f 583/1887 2259/1893 2256/1888
+f 2261/1894 2263/1895 483/1896
+f 1891/1897 483/1896 2263/1895
+f 2235/1862 2263/1895 579/1860
+f 2262/1898 579/1860 2263/1895
+f 583/1887 2264/1899 2259/1893
+f 2265/1900 2259/1893 2264/1899
+f 483/1896 1886/1901 2261/1894
+f 2265/1900 2261/1894 1886/1901
+f 2266/1902 2267/1903 575/1849
+f 2230/1856 575/1849 2267/1903
+f 583/1887 2255/1889 2264/1899
+f 2267/1903 2264/1899 2255/1889
+f 579/1860 2262/1898 2240/1869
+f 2268/1904 2240/1869 2262/1898
+f 575/1849 2224/1850 2266/1902
+f 2268/1904 2266/1902 2224/1850
+f 2271/1905 1930/1906 2269/1907
+f 495/1908 2269/1907 1930/1906
+f 2271/1905 2270/1909 2247/1880
+f 582/1879 2247/1880 2270/1909
+f 2272/1910 2273/1911 580/1870
+f 2245/1876 580/1870 2273/1911
+f 2269/1907 495/1908 2273/1911
+f 1923/1912 2273/1911 495/1908
+f 577/1845 2274/1913 2222/1846
+f 2275/1914 2222/1846 2274/1913
+f 580/1870 2242/1872 2272/1910
+f 2275/1914 2272/1910 2242/1872
+f 2270/1909 2276/1915 582/1879
+f 2253/1886 582/1879 2276/1915
+f 2276/1915 2274/1913 2232/1857
+f 577/1845 2232/1857 2274/1913
+f 500/1916 2277/1917 1945/1918
+f 2280/1919 1945/1918 2277/1917
+f 589/1920 2279/1921 2278/1922
+f 2280/1919 2278/1922 2279/1921
+f 2282/1923 2283/1924 588/1925
+f 2281/1926 588/1925 2283/1924
+f 1939/1927 2283/1924 500/1916
+f 2277/1917 500/1916 2283/1924
+f 572/1825 2284/1928 2206/1826
+f 2286/1929 2206/1826 2284/1928
+f 588/1925 2285/1930 2282/1923
+f 2286/1929 2282/1923 2285/1930
+f 2278/1922 2288/1931 589/1920
+f 2287/1932 589/1920 2288/1931
+f 2284/1928 572/1825 2288/1931
+f 2216/1837 2288/1931 572/1825
+f 2292/1933 2289/1934 2290/1935
+f 591/1936 2290/1935 2289/1934
+f 489/1937 1906/1938 2291/1939
+f 2292/1933 2291/1939 1906/1938
+f 2293/1940 2295/1941 569/1822
+f 2210/1831 569/1822 2295/1941
+f 2290/1935 591/1936 2295/1941
+f 2294/1942 2295/1941 591/1936
+f 588/1925 2296/1943 2285/1930
+f 2297/1944 2285/1930 2296/1943
+f 569/1822 2203/1823 2293/1940
+f 2297/1944 2293/1940 2203/1823
+f 1896/1945 489/1937 2298/1946
+f 2291/1939 2298/1946 489/1937
+f 2296/1943 588/1925 2298/1946
+f 2281/1926 2298/1946 588/1925
+f 478/1947 2299/1948 1874/1949
+f 2302/1950 1874/1949 2299/1948
+f 593/1951 2301/1952 2300/1953
+f 2302/1950 2300/1953 2301/1952
+f 2303/1954 2304/1955 589/1920
+f 2279/1921 589/1920 2304/1955
+f 1868/1956 2304/1955 478/1947
+f 2299/1948 478/1947 2304/1955
+f 571/1832 2305/1957 2217/1839
+f 2306/1958 2217/1839 2305/1957
+f 589/1920 2287/1932 2303/1954
+f 2306/1958 2303/1954 2287/1932
+f 2300/1953 2308/1959 593/1951
+f 2307/1960 593/1951 2308/1959
+f 2212/1834 2308/1959 571/1832
+f 2305/1957 571/1832 2308/1959
+f 2310/1961 2312/1962 595/1963
+f 2309/1964 595/1963 2312/1962
+f 2289/1934 2312/1962 591/1936
+f 2311/1965 591/1936 2312/1962
+f 2313/1966 2315/1967 593/1951
+f 2301/1952 593/1951 2315/1967
+f 595/1963 2314/1968 2310/1961
+f 2315/1967 2310/1961 2314/1968
+f 570/1829 2316/1969 2214/1836
+f 2317/1970 2214/1836 2316/1969
+f 593/1951 2307/1960 2313/1966
+f 2317/1970 2313/1966 2307/1960
+f 591/1936 2311/1965 2294/1942
+f 2318/1971 2294/1942 2311/1965
+f 570/1829 2208/1830 2316/1969
+f 2318/1971 2316/1969 2208/1830
+f 2319/1972 2322/1973 565/1805
+f 2195/1812 565/1805 2322/1973
+f 2321/1974 2322/1973 599/1975
+f 2320/1976 599/1975 2322/1973
+f 558/1778 2323/1977 2173/1785
+f 2324/1978 2173/1785 2323/1977
+f 565/1805 2189/1806 2319/1972
+f 2324/1978 2319/1972 2189/1806
+f 2326/1979 2327/1980 597/1981
+f 2325/1982 597/1981 2327/1980
+f 2323/1977 558/1778 2327/1980
+f 2168/1780 2327/1980 558/1778
+f 598/1983 2329/1984 2328/1985
+f 2331/1986 2328/1985 2329/1984
+f 2331/1986 2326/1979 2330/1987
+f 597/1981 2330/1987 2326/1979
+f 2320/1976 2334/1988 599/1975
+f 2332/1989 599/1975 2334/1988
+f 2333/1990 2334/1988 598/1983
+f 2329/1984 598/1983 2334/1988
+f 2335/1991 601/1992 2339/1993
+f 2336/1994 2339/1993 601/1992
+f 2338/1995 2339/1993 604/1996
+f 2337/1997 604/1996 2339/1993
+f 2341/1998 2343/1999 602/2000
+f 2340/2001 602/2000 2343/1999
+f 2336/1994 601/1992 2343/1999
+f 2342/2002 2343/1999 601/1992
+f 2345/2003 2347/2004 603/2005
+f 2344/2006 603/2005 2347/2004
+f 602/2000 2346/2007 2341/1998
+f 2347/2004 2341/1998 2346/2007
+f 604/1996 2337/1997 2348/2008
+f 2350/2009 2348/2008 2337/1997
+f 603/2005 2349/2010 2345/2003
+f 2350/2009 2345/2003 2349/2010
+f 2351/2011 2353/2012 603/2005
+f 2349/2010 603/2005 2353/2012
+f 2352/2013 598/2014 2353/2012
+f 2328/2015 2353/2012 598/2014
+f 606/2016 2355/2017 2354/2018
+f 2356/2019 2354/2018 2355/2017
+f 603/2005 2344/2006 2351/2011
+f 2356/2019 2351/2011 2344/2006
+f 2358/2020 2360/2021 607/2022
+f 2357/2023 607/2022 2360/2021
+f 2359/2024 2360/2021 606/2016
+f 2355/2017 606/2016 2360/2021
+f 2362/2025 2333/2026 2352/2013
+f 598/2014 2352/2013 2333/2026
+f 607/2022 2361/2027 2358/2020
+f 2362/2025 2358/2020 2361/2027
+f 2363/2028 2366/2029 606/2016
+f 2359/2024 606/2016 2366/2029
+f 2365/2030 2366/2029 610/2031
+f 2364/2032 610/2031 2366/2029
+f 2367/2033 2368/2034 602/2000
+f 2346/2007 602/2000 2368/2034
+f 606/2016 2354/2018 2363/2028
+f 2368/2034 2363/2028 2354/2018
+f 2370/2035 2371/2036 609/2037
+f 2369/2038 609/2037 2371/2036
+f 602/2000 2340/2001 2367/2033
+f 2371/2036 2367/2033 2340/2001
+f 610/2031 2364/2032 2372/2039
+f 2374/2040 2372/2039 2364/2032
+f 609/2037 2373/2041 2370/2035
+f 2374/2040 2370/2035 2373/2041
+f 612/2042 2376/2043 2375/2044
+f 2378/2045 2375/2044 2376/2043
+f 324/2046 1247/2047 2377/2048
+f 2378/2045 2377/2048 1247/2047
+f 2379/2049 2381/2050 527/1634
+f 2051/1639 527/1634 2381/2050
+f 2380/2051 2381/2050 612/2042
+f 2376/2043 612/2042 2381/2050
+f 613/2052 2383/2053 2382/2054
+f 2384/2055 2382/2054 2383/2053
+f 527/1634 2046/1635 2379/2049
+f 2384/2055 2379/2049 2046/1635
+f 2377/2048 2386/2056 324/2046
+f 1254/2057 324/2046 2386/2056
+f 2385/2058 2386/2056 613/2052
+f 2383/2053 613/2052 2386/2056
+f 615/2059 2388/2060 2387/2061
+f 2390/2062 2387/2061 2388/2060
+f 2390/2062 2389/2063 1261/2064
+f 328/2065 1261/2064 2389/2063
+f 2391/2066 2393/2067 540/1687
+f 2093/1690 540/1687 2393/2067
+f 2392/2068 2393/2067 615/2059
+f 2388/2060 615/2059 2393/2067
+f 2396/2069 2394/2070 2395/2071
+f 616/2072 2395/2071 2394/2070
+f 540/1687 2090/1688 2391/2066
+f 2396/2069 2391/2066 2090/1688
+f 2389/2063 2398/2073 328/2065
+f 1268/2074 328/2065 2398/2073
+f 2395/2071 616/2072 2398/2073
+f 2397/2075 2398/2073 616/2072
+f 616/2072 2399/2076 2397/2075
+f 2401/2077 2397/2075 2399/2076
+f 2401/2077 2400/2078 1275/2079
+f 332/2080 1275/2079 2400/2078
+f 2078/1673 535/1665 2403/2081
+f 2402/2082 2403/2081 535/1665
+f 2394/2070 2403/2081 616/2072
+f 2399/2076 616/2072 2403/2081
+f 618/2083 2405/2084 2404/2085
+f 2406/2086 2404/2085 2405/2084
+f 2406/2086 2402/2082 2072/1664
+f 535/1665 2072/1664 2402/2082
+f 2400/2078 2408/2087 332/2080
+f 1281/2088 332/2080 2408/2087
+f 2405/2084 618/2083 2408/2087
+f 2407/2089 2408/2087 618/2083
+f 620/2090 2410/2091 2409/2092
+f 2412/2093 2409/2092 2410/2091
+f 335/2094 1287/2095 2411/2096
+f 2412/2093 2411/2096 1287/2095
+f 2413/2097 2415/2098 595/2099
+f 2314/2100 595/2099 2415/2098
+f 2414/2101 2415/2098 620/2090
+f 2410/2091 620/2090 2415/2098
+f 2418/2102 2416/2103 2417/2104
+f 621/2105 2417/2104 2416/2103
+f 2418/2102 2413/2097 2309/2106
+f 595/2099 2309/2106 2413/2097
+f 2411/2096 2420/2107 335/2094
+f 1294/2108 335/2094 2420/2107
+f 2417/2104 621/2105 2420/2107
+f 2419/2109 2420/2107 621/2105
+f 2421/2110 623/2111 2424/2112
+f 2422/2113 2424/2112 623/2111
+f 2423/2114 339/2115 2424/2112
+f 1301/2116 2424/2112 339/2115
+f 584/2117 2425/2118 2250/2119
+f 2427/2120 2250/2119 2425/2118
+f 623/2111 2426/2121 2422/2113
+f 2427/2120 2422/2113 2426/2121
+f 2429/2122 2430/2123 624/2124
+f 2428/2125 624/2124 2430/2123
+f 2425/2118 584/2117 2430/2123
+f 2258/2126 2430/2123 584/2117
+f 2432/2127 1308/2128 2423/2114
+f 339/2115 2423/2114 1308/2128
+f 624/2124 2431/2129 2429/2122
+f 2432/2127 2429/2122 2431/2129
+f 613/2052 2433/2130 2385/2058
+f 2435/2131 2385/2058 2433/2130
+f 2435/2131 2434/2132 1315/2133
+f 343/2134 1315/2133 2434/2132
+f 2063/1654 531/1647 2437/2135
+f 2436/2136 2437/2135 531/1647
+f 2382/2054 2437/2135 613/2052
+f 2433/2130 613/2052 2437/2135
+f 2439/2137 2392/2068 2438/2138
+f 615/2059 2438/2138 2392/2068
+f 531/1647 2058/1649 2436/2136
+f 2439/2137 2436/2136 2058/1649
+f 2434/2132 2440/2139 343/2134
+f 1320/2140 343/2134 2440/2139
+f 2438/2138 615/2059 2440/2139
+f 2387/2061 2440/2139 615/2059
+f 627/2141 2442/2142 2441/2143
+f 2444/2144 2441/2143 2442/2142
+f 345/2145 1325/2146 2443/2147
+f 2444/2144 2443/2147 1325/2146
+f 2156/1762 553/1756 2447/2148
+f 2445/2149 2447/2148 553/1756
+f 2446/2150 2447/2148 627/2141
+f 2442/2142 627/2141 2447/2148
+f 612/2042 2448/2151 2380/2051
+f 2449/2152 2380/2051 2448/2151
+f 553/1756 2151/1757 2445/2149
+f 2449/2152 2445/2149 2151/1757
+f 1331/2153 345/2145 2450/2154
+f 2443/2147 2450/2154 345/2145
+f 2375/2044 2450/2154 612/2042
+f 2448/2151 612/2042 2450/2154
+f 629/2155 2452/2156 2451/2157
+f 2454/2158 2451/2157 2452/2156
+f 2454/2158 2453/2159 1337/746
+f 348/748 1337/746 2453/2159
+f 2170/2160 557/2161 2457/2162
+f 2455/2163 2457/2162 557/2161
+f 629/2155 2456/2164 2452/2156
+f 2457/2162 2452/2156 2456/2164
+f 2459/2165 2446/2166 2458/2167
+f 627/2168 2458/2167 2446/2166
+f 557/2161 2165/2169 2455/2163
+f 2459/2165 2455/2163 2165/2169
+f 2453/2159 2460/2170 348/748
+f 1343/756 348/748 2460/2170
+f 2458/2167 627/2168 2460/2170
+f 2441/2171 2460/2170 627/2168
+f 631/2172 2462/2173 2461/2174
+f 2464/2175 2461/2174 2462/2173
+f 2464/2175 2463/2176 1349/766
+f 351/768 1349/766 2463/2176
+f 2465/2177 632/2178 2468/2179
+f 2466/2180 2468/2179 632/2178
+f 2467/2181 2468/2179 631/2172
+f 2462/2173 631/2172 2468/2179
+f 633/2182 2470/2183 2469/2184
+f 2472/2185 2469/2184 2470/2183
+f 632/2178 2471/2186 2466/2180
+f 2472/2185 2466/2180 2471/2186
+f 2463/2176 2474/2187 351/768
+f 1356/776 351/768 2474/2187
+f 2473/2188 2474/2187 633/2182
+f 2470/2183 633/2182 2474/2187
+f 2478/2189 2475/2190 2476/2191
+f 635/2192 2476/2191 2475/2190
+f 1365/785 2478/2189 356/786
+f 2477/2193 356/786 2478/2189
+f 566/1810 2479/2194 2199/1817
+f 2481/2195 2199/1817 2479/2194
+f 2476/2191 635/2192 2481/2195
+f 2480/2196 2481/2195 635/2192
+f 631/2172 2482/2197 2467/2181
+f 2483/2198 2467/2181 2482/2197
+f 2193/1811 2483/2198 566/1810
+f 2479/2194 566/1810 2483/2198
+f 356/786 2477/2193 1371/794
+f 2484/2199 1371/794 2477/2193
+f 2482/2197 631/2172 2484/2199
+f 2461/2174 2484/2199 631/2172
+f 637/2200 2486/2201 2485/2202
+f 2488/2203 2485/2202 2486/2201
+f 2488/2203 2487/2204 1377/2205
+f 359/2206 1377/2205 2487/2204
+f 2489/2207 2491/2208 496/1494
+f 1934/1501 496/1494 2491/2208
+f 2490/2209 2491/2208 637/2200
+f 2486/2201 637/2200 2491/2208
+f 2492/2210 2493/2211 623/2111
+f 2426/2121 623/2111 2493/2211
+f 496/1494 1928/1495 2489/2207
+f 2493/2211 2489/2207 1928/1495
+f 2487/2204 2494/2212 359/2206
+f 1383/2213 359/2206 2494/2212
+f 2494/2212 2492/2210 2421/2110
+f 623/2111 2421/2110 2492/2210
+f 2495/2214 2497/2215 621/2105
+f 2419/2109 621/2105 2497/2215
+f 2496/2216 362/2217 2497/2215
+f 1389/2218 2497/2215 362/2217
+f 488/1460 2498/2219 1907/1467
+f 2499/2220 1907/1467 2498/2219
+f 2499/2220 2495/2214 2416/2103
+f 621/2105 2416/2103 2495/2214
+f 2501/2221 2502/2222 639/2223
+f 2500/2224 639/2223 2502/2222
+f 1902/1462 2502/2222 488/1460
+f 2498/2219 488/1460 2502/2222
+f 2504/2225 1395/2226 2496/2216
+f 362/2217 2496/2216 1395/2226
+f 639/2223 2503/2227 2501/2221
+f 2504/2225 2501/2221 2503/2227
+f 2505/2228 2507/2229 639/2223
+f 2503/2227 639/2223 2507/2229
+f 2506/2230 365/2231 2507/2229
+f 1401/2232 2507/2229 365/2231
+f 492/1475 2508/2233 1919/1482
+f 2509/2234 1919/1482 2508/2233
+f 639/2223 2500/2224 2505/2228
+f 2509/2234 2505/2228 2500/2224
+f 2511/2235 2512/2236 641/2237
+f 2510/2238 641/2237 2512/2236
+f 1914/1477 2512/2236 492/1475
+f 2508/2233 492/1475 2512/2236
+f 2514/2239 1407/2240 2506/2230
+f 365/2231 2506/2230 1407/2240
+f 641/2237 2513/2241 2511/2235
+f 2514/2239 2511/2235 2513/2241
+f 2515/2242 2517/2243 641/2237
+f 2513/2241 641/2237 2517/2243
+f 2516/2244 368/2245 2517/2243
+f 1413/2246 2517/2243 368/2245
+f 2519/2247 2149/1751 2518/2248
+f 555/1752 2518/2248 2149/1751
+f 641/2237 2510/2238 2515/2242
+f 2519/2247 2515/2242 2510/2238
+f 2522/2249 2520/2250 2521/2251
+f 643/2252 2521/2251 2520/2250
+f 555/1752 2158/1763 2518/2248
+f 2522/2249 2518/2248 2158/1763
+f 1419/2253 368/2245 2524/2254
+f 2516/2244 2524/2254 368/2245
+f 2521/2251 643/2252 2524/2254
+f 2523/2255 2524/2254 643/2252
+f 643/2256 2525/2257 2523/2258
+f 2527/2259 2523/2258 2525/2257
+f 371/855 1425/857 2526/2260
+f 2527/2259 2526/2260 1425/857
+f 563/1789 2528/2261 2177/1788
+f 2529/2262 2177/1788 2528/2261
+f 2520/2263 2529/2262 643/2256
+f 2525/2257 643/2256 2529/2262
+f 2531/2264 2532/2265 645/2266
+f 2530/2267 645/2266 2532/2265
+f 2186/1800 2532/2265 563/1789
+f 2528/2261 563/1789 2532/2265
+f 1431/866 371/855 2534/2268
+f 2526/2260 2534/2268 371/855
+f 645/2266 2533/2269 2531/2264
+f 2534/2268 2531/2264 2533/2269
+f 2535/2270 2537/2271 645/2266
+f 2533/2269 645/2266 2537/2271
+f 2536/2272 374/874 2537/2271
+f 1437/875 2537/2271 374/874
+f 562/1797 2538/2273 2187/1802
+f 2539/2274 2187/1802 2538/2273
+f 645/2266 2530/2267 2535/2270
+f 2539/2274 2535/2270 2530/2267
+f 2541/2275 2542/2276 647/2277
+f 2540/2278 647/2277 2542/2276
+f 2182/1798 2542/2276 562/1797
+f 2538/2273 562/1797 2542/2276
+f 2544/2279 1443/881 2536/2272
+f 374/874 2536/2272 1443/881
+f 647/2277 2543/2280 2541/2275
+f 2544/2279 2541/2275 2543/2280
+f 2545/2281 2547/2282 647/2277
+f 2543/2280 647/2277 2547/2282
+f 2547/2282 2546/2283 1449/887
+f 377/889 1449/887 2546/2283
+f 2548/2284 2549/2285 567/1815
+f 2201/1819 567/1815 2549/2285
+f 2540/2278 2549/2285 647/2277
+f 2545/2281 647/2277 2549/2285
+f 635/2192 2550/2286 2480/2196
+f 2551/2287 2480/2196 2550/2286
+f 567/1815 2197/1816 2548/2284
+f 2551/2287 2548/2284 2197/1816
+f 2546/2283 2552/2288 377/889
+f 1454/893 377/889 2552/2288
+f 2550/2286 635/2192 2552/2288
+f 2475/2190 2552/2288 635/2192
+f 2555/2289 1461/899 2553/2290
+f 380/900 2553/2290 1461/899
+f 2555/2289 2554/2291 2451/2157
+f 629/2155 2451/2157 2554/2291
+f 2557/2292 2558/2293 650/2294
+f 2556/2295 650/2294 2558/2293
+f 380/900 1470/916 2553/2290
+f 2558/2293 2553/2290 1470/916
+f 604/2296 2559/2297 2338/2298
+f 2561/2299 2338/2298 2559/2297
+f 650/2294 2560/2300 2557/2292
+f 2561/2299 2557/2292 2560/2300
+f 2562/2301 2563/2302 597/2303
+f 2330/2304 597/2303 2563/2302
+f 2348/2305 2563/2302 604/2296
+f 2559/2297 604/2296 2563/2302
+f 2554/2291 2564/2306 629/2155
+f 2456/2164 629/2155 2564/2306
+f 2564/2306 2562/2301 2325/2307
+f 597/2303 2325/2307 2562/2301
+f 2321/1974 599/1975 2567/2308
+f 2565/2309 2567/2308 599/1975
+f 2465/2177 2567/2308 632/2178
+f 2566/2310 632/2178 2567/2308
+f 652/2311 2569/2312 2568/2313
+f 2570/2314 2568/2313 2569/2312
+f 599/1975 2332/1989 2565/2309
+f 2570/2314 2565/2309 2332/1989
+f 2471/2186 632/2178 2572/2315
+f 2566/2310 2572/2315 632/2178
+f 2571/2316 2572/2315 652/2311
+f 2569/2312 652/2311 2572/2315
+f 654/2317 2574/2318 2573/2319
+f 2577/2320 2573/2319 2574/2318
+f 657/2321 2576/2322 2575/2323
+f 2577/2320 2575/2323 2576/2322
+f 2579/2324 2581/2325 655/2326
+f 2578/2327 655/2326 2581/2325
+f 2580/2328 2581/2325 654/2317
+f 2574/2318 654/2317 2581/2325
+f 656/2329 2583/2330 2582/2331
+f 2585/2332 2582/2331 2583/2330
+f 655/2326 2584/2333 2579/2324
+f 2585/2332 2579/2324 2584/2333
+f 2575/2323 2588/2334 657/2321
+f 2586/2335 657/2321 2588/2334
+f 2587/2336 2588/2334 656/2329
+f 2583/2330 656/2329 2588/2334
+f 659/2337 2590/2338 2589/2339
+f 2593/2340 2589/2339 2590/2338
+f 2593/2340 2591/2341 2592/2342
+f 662/2343 2592/2342 2591/2341
+f 660/2344 2595/2345 2594/2346
+f 2597/2347 2594/2346 2595/2345
+f 2590/2338 659/2337 2597/2347
+f 2596/2348 2597/2347 659/2337
+f 2601/2349 2598/2350 2599/2351
+f 661/2352 2599/2351 2598/2350
+f 2595/2345 660/2344 2601/2349
+f 2600/2353 2601/2349 660/2344
+f 2591/2341 2604/2354 662/2343
+f 2602/2355 662/2343 2604/2354
+f 2599/2351 661/2352 2604/2354
+f 2603/2356 2604/2354 661/2352
+f 2606/2357 2609/2358 664/2359
+f 2605/2360 664/2359 2609/2358
+f 2608/2361 2609/2358 667/2362
+f 2607/2363 667/2362 2609/2358
+f 2613/2364 2610/2365 2611/2366
+f 665/2367 2611/2366 2610/2365
+f 664/2359 2612/2368 2606/2357
+f 2613/2364 2606/2357 2612/2368
+f 2615/2369 2617/2370 666/2371
+f 2614/2372 666/2371 2617/2370
+f 2611/2366 665/2367 2617/2370
+f 2616/2373 2617/2370 665/2367
+f 2607/2363 2620/2374 667/2362
+f 2618/2375 667/2362 2620/2374
+f 2620/2374 2615/2369 2619/2376
+f 666/2371 2619/2376 2615/2369
+f 2603/2356 661/2352 2624/2377
+f 2621/2378 2624/2377 661/2352
+f 671/2379 2623/2380 2622/2381
+f 2624/2377 2622/2381 2623/2380
+f 2626/2382 2627/2383 669/2384
+f 2625/2385 669/2384 2627/2383
+f 2621/2378 661/2352 2627/2383
+f 2598/2350 2627/2383 661/2352
+f 670/2386 2629/2387 2628/2388
+f 2631/2389 2628/2388 2629/2387
+f 2631/2389 2626/2382 2630/2390
+f 669/2384 2630/2390 2626/2382
+f 671/2379 2622/2381 2632/2391
+f 2634/2392 2632/2391 2622/2381
+f 2629/2387 670/2386 2634/2392
+f 2633/2393 2634/2392 670/2386
+f 2638/2394 2633/2393 2635/2395
+f 670/2386 2635/2395 2633/2393
+f 674/2396 2637/2397 2636/2398
+f 2638/2394 2636/2398 2637/2397
+f 2641/2399 2639/2400 2640/2401
+f 673/2402 2640/2401 2639/2400
+f 2635/2395 670/2386 2641/2399
+f 2628/2388 2641/2399 670/2386
+f 2619/2376 666/2371 2644/2403
+f 2642/2404 2644/2403 666/2371
+f 2644/2403 2640/2401 2643/2405
+f 673/2402 2643/2405 2640/2401
+f 2646/2406 2645/2407 2636/2398
+f 674/2396 2636/2398 2645/2407
+f 666/2371 2614/2372 2642/2404
+f 2646/2406 2642/2404 2614/2372
+f 676/2408 2648/2409 2647/2410
+f 2650/2411 2647/2410 2648/2409
+f 2589/2339 2650/2411 659/2337
+f 2649/2412 659/2337 2650/2411
+f 2652/2413 2654/2414 677/2415
+f 2651/2416 677/2415 2654/2414
+f 2653/2417 2654/2414 676/2408
+f 2648/2409 676/2408 2654/2414
+f 2612/2368 664/2359 2657/2418
+f 2655/2419 2657/2418 664/2359
+f 677/2415 2656/2420 2652/2413
+f 2657/2418 2652/2413 2656/2420
+f 659/2337 2649/2412 2596/2348
+f 2658/2421 2596/2348 2649/2412
+f 664/2359 2605/2360 2655/2419
+f 2658/2421 2655/2419 2605/2360
+f 2660/2422 2662/2423 679/2424
+f 2659/2425 679/2424 2662/2423
+f 2661/2426 676/2427 2662/2423
+f 2647/2428 2662/2423 676/2427
+f 2663/2429 2665/2430 607/2022
+f 2361/2027 607/2022 2665/2430
+f 2664/2431 2665/2430 679/2424
+f 2660/2422 679/2424 2665/2430
+f 610/2031 2666/2432 2365/2030
+f 2667/2433 2365/2030 2666/2432
+f 607/2022 2357/2023 2663/2429
+f 2667/2433 2663/2429 2357/2023
+f 2669/2434 2670/2435 680/2436
+f 2668/2437 680/2436 2670/2435
+f 2372/2039 2670/2435 610/2031
+f 2666/2432 610/2031 2670/2435
+f 676/2427 2661/2426 2653/2438
+f 2672/2439 2653/2438 2661/2426
+f 680/2436 2671/2440 2669/2434
+f 2672/2439 2669/2434 2671/2440
+f 2656/2441 677/2442 2676/2443
+f 2673/2444 2676/2443 677/2442
+f 2675/2445 2676/2443 682/2446
+f 2674/2447 682/2446 2676/2443
+f 680/2436 2677/2448 2671/2440
+f 2678/2449 2671/2440 2677/2448
+f 677/2442 2651/2450 2673/2444
+f 2678/2449 2673/2444 2651/2450
+f 609/2037 2679/2451 2373/2041
+f 2680/2452 2373/2041 2679/2451
+f 2680/2452 2677/2448 2668/2437
+f 680/2436 2668/2437 2677/2448
+f 2682/2453 2342/2002 2681/2454
+f 601/1992 2681/2454 2342/2002
+f 609/2037 2369/2038 2679/2451
+f 2682/2453 2679/2451 2369/2038
+f 2684/2455 2683/2456 2674/2447
+f 682/2446 2674/2447 2683/2456
+f 2684/2455 2681/2454 2335/1991
+f 601/1992 2335/1991 2681/2454
+f 665/2457 2685/2458 2616/2459
+f 2688/2460 2616/2459 2685/2458
+f 684/2461 2687/2462 2686/2463
+f 2688/2460 2686/2463 2687/2462
+f 2689/2464 2690/2465 682/2466
+f 2675/2467 682/2466 2690/2465
+f 2610/2468 2690/2465 665/2457
+f 2685/2458 665/2457 2690/2465
+f 650/2294 2691/2469 2560/2300
+f 2692/2470 2560/2300 2691/2469
+f 2692/2470 2689/2464 2683/2471
+f 682/2466 2683/2471 2689/2464
+f 2693/2472 2694/2473 415/1075
+f 1602/1082 415/1075 2694/2473
+f 2556/2295 2694/2473 650/2294
+f 2691/2469 650/2294 2694/2473
+f 2686/2463 2696/2474 684/2461
+f 2695/2475 684/2461 2696/2474
+f 415/1075 1597/1077 2693/2472
+f 2696/2474 2693/2472 1597/1077
+f 2697/2476 2700/2477 419/1095
+f 1610/1096 419/1095 2700/2477
+f 2699/2478 2700/2477 687/2479
+f 2698/2480 687/2479 2700/2477
+f 633/2182 2701/2481 2473/2188
+f 2702/2482 2473/2188 2701/2481
+f 419/1095 1621/1110 2697/2476
+f 2702/2482 2697/2476 1621/1110
+f 2704/2483 2705/2484 686/2485
+f 2703/2486 686/2485 2705/2484
+f 2469/2184 2705/2484 633/2182
+f 2701/2481 633/2182 2705/2484
+f 2706/2487 2708/2488 671/2489
+f 2623/2490 671/2489 2708/2488
+f 686/2485 2707/2491 2704/2483
+f 2708/2488 2704/2483 2707/2491
+f 2710/2492 2709/2493 2698/2480
+f 687/2479 2698/2480 2709/2493
+f 2706/2487 671/2489 2710/2492
+f 2632/2494 2710/2492 671/2489
+f 1625/1114 421/1115 2713/2495
+f 2711/2496 2713/2495 421/1115
+f 684/2461 2695/2475 2712/2497
+f 2713/2495 2712/2497 2695/2475
+f 687/2498 2714/2499 2699/2500
+f 2715/2501 2699/2500 2714/2499
+f 2715/2501 2711/2496 1631/1127
+f 421/1115 1631/1127 2711/2496
+f 2716/2502 2717/2503 674/2504
+f 2637/2505 674/2504 2717/2503
+f 2714/2499 687/2498 2717/2503
+f 2709/2506 2717/2503 687/2498
+f 2712/2497 2718/2507 684/2461
+f 2687/2462 684/2461 2718/2507
+f 2645/2508 2718/2507 674/2504
+f 2716/2502 674/2504 2718/2507
+f 652/2311 2719/2509 2571/2316
+f 2721/2510 2571/2316 2719/2509
+f 686/2485 2703/2486 2720/2511
+f 2721/2510 2720/2511 2703/2486
+f 2664/2512 679/2513 2723/2514
+f 2722/2515 2723/2514 679/2513
+f 2568/2313 2723/2514 652/2311
+f 2719/2509 652/2311 2723/2514
+f 2724/2516 2725/2517 662/2518
+f 2592/2519 662/2518 2725/2517
+f 2659/2520 2725/2517 679/2513
+f 2722/2515 679/2513 2725/2517
+f 2720/2511 2726/2521 686/2485
+f 2707/2491 686/2485 2726/2521
+f 662/2518 2602/2522 2724/2516
+f 2726/2521 2724/2516 2602/2522
+f 660/2344 2727/2523 2600/2353
+f 2730/2524 2600/2353 2727/2523
+f 2730/2524 2728/2525 2729/2526
+f 692/2527 2729/2526 2728/2525
+f 2731/2528 691/2529 2733/2530
+f 2732/2531 2733/2530 691/2529
+f 2594/2346 2733/2530 660/2344
+f 2727/2523 660/2344 2733/2530
+f 657/2321 2734/2532 2576/2322
+f 2736/2533 2576/2322 2734/2532
+f 691/2529 2735/2534 2732/2531
+f 2736/2533 2732/2531 2735/2534
+f 692/2527 2728/2525 2737/2535
+f 2738/2536 2737/2535 2728/2525
+f 657/2321 2586/2335 2734/2532
+f 2738/2536 2734/2532 2586/2335
+f 2739/2537 2742/2538 669/2384
+f 2630/2390 669/2384 2742/2538
+f 2741/2539 2742/2538 694/2540
+f 2740/2541 694/2540 2742/2538
+f 2729/2526 692/2527 2744/2542
+f 2743/2543 2744/2542 692/2527
+f 669/2384 2625/2385 2739/2537
+f 2744/2542 2739/2537 2625/2385
+f 656/2329 2745/2544 2587/2336
+f 2746/2545 2587/2336 2745/2544
+f 692/2527 2737/2535 2743/2543
+f 2746/2545 2743/2543 2737/2535
+f 694/2540 2740/2541 2747/2546
+f 2748/2547 2747/2546 2740/2541
+f 656/2329 2582/2331 2745/2544
+f 2748/2547 2745/2544 2582/2331
+f 673/2402 2749/2548 2643/2405
+f 2752/2549 2643/2405 2749/2548
+f 2752/2549 2750/2550 2751/2551
+f 696/2552 2751/2551 2750/2550
+f 2754/2553 2741/2539 2753/2554
+f 694/2540 2753/2554 2741/2539
+f 2639/2400 2754/2553 673/2402
+f 2749/2548 673/2402 2754/2553
+f 2755/2555 2756/2556 655/2326
+f 2584/2333 655/2326 2756/2556
+f 2753/2554 694/2540 2756/2556
+f 2747/2546 2756/2556 694/2540
+f 696/2552 2750/2550 2757/2557
+f 2758/2558 2757/2557 2750/2550
+f 655/2326 2578/2327 2755/2555
+f 2758/2558 2755/2555 2578/2327
+f 2759/2559 2761/2560 667/2362
+f 2608/2361 667/2362 2761/2560
+f 2731/2528 2761/2560 691/2529
+f 2760/2561 691/2529 2761/2560
+f 2763/2562 2751/2551 2762/2563
+f 696/2552 2762/2563 2751/2551
+f 667/2362 2618/2375 2759/2559
+f 2763/2562 2759/2559 2618/2375
+f 654/2317 2764/2564 2580/2328
+f 2765/2565 2580/2328 2764/2564
+f 696/2552 2757/2557 2762/2563
+f 2765/2565 2762/2563 2757/2557
+f 691/2529 2760/2561 2735/2534
+f 2766/2566 2735/2534 2760/2561
+f 654/2317 2573/2319 2764/2564
+f 2766/2566 2764/2564 2573/2319
+f 2767/2567 699/2568 2770/2569
+f 2768/2570 2770/2569 699/2568
+f 2769/2571 482/2572 2770/2569
+f 1883/2573 2770/2569 482/2572
+f 2774/2574 2771/2575 2772/2576
+f 700/2577 2772/2576 2771/2575
+f 699/2568 2773/2578 2768/2570
+f 2774/2574 2768/2570 2773/2578
+f 2778/2579 2775/2580 2776/2581
+f 701/2582 2776/2581 2775/2580
+f 2772/2576 700/2577 2778/2579
+f 2777/2583 2778/2579 700/2577
+f 2780/2584 1888/2585 2769/2571
+f 482/2572 2769/2571 1888/2585
+f 2776/2581 701/2582 2780/2584
+f 2779/2586 2780/2584 701/2582
+f 701/2582 2781/2587 2779/2586
+f 2783/2588 2779/2586 2781/2587
+f 624/2589 2428/2590 2782/2591
+f 2783/2588 2782/2591 2428/2590
+f 2785/2592 2786/2593 703/2594
+f 2784/2595 703/2594 2786/2593
+f 2775/2580 2786/2593 701/2582
+f 2781/2587 701/2582 2786/2593
+f 436/1214 2787/2596 1703/1221
+f 2789/2597 1703/1221 2787/2596
+f 703/2594 2788/2598 2785/2592
+f 2789/2597 2785/2592 2788/2598
+f 2782/2591 2790/2599 624/2589
+f 2431/2600 624/2589 2790/2599
+f 1698/1215 2790/2599 436/1214
+f 2787/2596 436/1214 2790/2599
+f 2791/2601 2793/2602 441/1229
+f 1709/1228 441/1229 2793/2602
+f 2409/2603 2793/2602 620/2604
+f 2792/2605 620/2604 2793/2602
+f 705/2606 2795/2607 2794/2608
+f 2796/2609 2794/2608 2795/2607
+f 441/1229 1718/1241 2791/2601
+f 2796/2609 2791/2601 1718/1241
+f 2797/2610 706/2611 2800/2612
+f 2798/2613 2800/2612 706/2611
+f 2799/2614 2800/2612 705/2606
+f 2795/2607 705/2606 2800/2612
+f 620/2604 2792/2605 2414/2615
+f 2802/2616 2414/2615 2792/2605
+f 706/2611 2801/2617 2798/2613
+f 2802/2616 2798/2613 2801/2617
+f 2803/2618 2805/2619 706/2611
+f 2801/2617 706/2611 2805/2619
+f 2805/2619 2804/2620 1872/2621
+f 479/2622 1872/2621 2804/2620
+f 2807/2623 2808/2624 708/2625
+f 2806/2626 708/2625 2808/2624
+f 2808/2624 2803/2618 2797/2610
+f 706/2611 2797/2610 2803/2618
+f 699/2568 2809/2627 2773/2578
+f 2811/2628 2773/2578 2809/2627
+f 2811/2628 2807/2623 2810/2629
+f 708/2625 2810/2629 2807/2623
+f 2804/2620 2812/2630 479/2622
+f 1877/2631 479/2622 2812/2630
+f 2812/2630 2809/2627 2767/2567
+f 699/2568 2767/2567 2809/2627
+f 2813/2632 2815/2633 703/2594
+f 2788/2598 703/2594 2815/2633
+f 445/1258 1731/1260 2814/2634
+f 2815/2633 2814/2634 1731/1260
+f 700/2577 2816/2635 2777/2583
+f 2817/2636 2777/2583 2816/2635
+f 703/2594 2784/2595 2813/2632
+f 2817/2636 2813/2632 2784/2595
+f 2819/2637 2820/2638 710/2639
+f 2818/2640 710/2639 2820/2638
+f 2771/2575 2820/2638 700/2577
+f 2816/2635 700/2577 2820/2638
+f 2822/2641 1737/1267 2814/2634
+f 445/1258 2814/2634 1737/1267
+f 710/2639 2821/2642 2819/2637
+f 2822/2641 2819/2637 2821/2642
+f 710/2639 2823/2643 2821/2642
+f 2825/2644 2821/2642 2823/2643
+f 1743/1273 2825/2644 448/1275
+f 2824/2645 448/1275 2825/2644
+f 2826/2646 2827/2647 708/2625
+f 2810/2629 708/2625 2827/2647
+f 2818/2640 2827/2647 710/2639
+f 2823/2643 710/2639 2827/2647
+f 705/2606 2828/2648 2799/2614
+f 2829/2649 2799/2614 2828/2648
+f 708/2625 2806/2626 2826/2646
+f 2829/2649 2826/2646 2806/2626
+f 2824/2645 2830/2650 448/1275
+f 1748/1279 448/1275 2830/2650
+f 2794/2608 2830/2650 705/2606
+f 2828/2648 705/2606 2830/2650
+f 2832/2651 2835/2652 713/2653
+f 2831/2654 713/2653 2835/2652
+f 2834/2655 2835/2652 715/2656
+f 2833/2657 715/2656 2835/2652
+f 2836/2658 714/2659 2839/2660
+f 2837/2661 2839/2660 714/2659
+f 713/2653 2838/2662 2832/2651
+f 2839/2660 2832/2651 2838/2662
+f 715/2656 2833/2657 2840/2663
+f 2842/2664 2840/2663 2833/2657
+f 714/2659 2841/2665 2837/2661
+f 2842/2664 2837/2661 2841/2665
+f 2844/2666 2846/2667 717/2668
+f 2843/2669 717/2668 2846/2667
+f 713/2653 2831/2654 2845/2670
+f 2846/2667 2845/2670 2831/2654
+f 519/1586 2847/2671 2006/1585
+f 2849/2672 2006/1585 2847/2671
+f 2849/2672 2844/2666 2848/2673
+f 717/2668 2848/2673 2844/2666
+f 2850/2674 2851/2675 497/1499
+f 1937/1504 497/1499 2851/2675
+f 2015/1596 2851/2675 519/1586
+f 2847/2671 519/1586 2851/2675
+f 718/2676 2853/2677 2852/2678
+f 2854/2679 2852/2678 2853/2677
+f 497/1499 1932/1500 2850/2674
+f 2854/2679 2850/2674 1932/1500
+f 2845/2670 2856/2680 713/2653
+f 2838/2662 713/2653 2856/2680
+f 2855/2681 2856/2680 718/2676
+f 2853/2677 718/2676 2856/2680
+f 715/2656 2857/2682 2834/2655
+f 2859/2683 2834/2655 2857/2682
+f 717/2668 2843/2669 2858/2684
+f 2859/2683 2858/2684 2843/2669
+f 2861/2685 2862/2686 720/2687
+f 2860/2688 720/2687 2862/2686
+f 2840/2663 2862/2686 715/2656
+f 2857/2682 715/2656 2862/2686
+f 2863/2689 2865/2690 534/1657
+f 2074/1668 534/1657 2865/2690
+f 2864/2691 2865/2690 720/2687
+f 2861/2685 720/2687 2865/2690
+f 545/1713 2866/2692 2115/1716
+f 2867/2693 2115/1716 2866/2692
+f 2863/2689 534/1657 2867/2693
+f 2067/1659 2867/2693 534/1657
+f 2858/2684 2868/2694 717/2668
+f 2848/2673 717/2668 2868/2694
+f 2111/1711 2868/2694 545/1713
+f 2866/2692 545/1713 2868/2694
+f 2869/2695 2872/2696 460/2697
+f 1794/2698 460/2697 2872/2696
+f 2871/2699 2872/2696 722/2700
+f 2870/2701 722/2700 2872/2696
+f 618/2083 2873/2702 2407/2089
+f 2874/2703 2407/2089 2873/2702
+f 2874/2703 2869/2695 1801/2704
+f 460/2697 1801/2704 2869/2695
+f 2864/2691 720/2687 2876/2705
+f 2875/2706 2876/2705 720/2687
+f 2404/2085 2876/2705 618/2083
+f 2873/2702 618/2083 2876/2705
+f 722/2700 2870/2701 2877/2707
+f 2878/2708 2877/2707 2870/2701
+f 720/2687 2860/2688 2875/2706
+f 2878/2708 2875/2706 2860/2688
+f 463/2709 2879/2710 1805/2711
+f 2881/2712 1805/2711 2879/2710
+f 637/2200 2485/2202 2880/2713
+f 2881/2712 2880/2713 2485/2202
+f 2883/2714 2884/2715 724/2716
+f 2882/2717 724/2716 2884/2715
+f 1812/2718 2884/2715 463/2709
+f 2879/2710 463/2709 2884/2715
+f 718/2676 2885/2719 2855/2681
+f 2887/2720 2855/2681 2885/2719
+f 724/2716 2886/2721 2883/2714
+f 2887/2720 2883/2714 2886/2721
+f 2880/2713 2888/2722 637/2200
+f 2490/2209 637/2200 2888/2722
+f 2852/2678 2888/2722 718/2676
+f 2885/2719 718/2676 2888/2722
+f 726/2723 2890/2724 2889/2725
+f 2892/2726 2889/2725 2890/2724
+f 714/2659 2836/2658 2891/2727
+f 2892/2726 2891/2727 2836/2658
+f 2894/2728 2896/2729 727/2730
+f 2893/2731 727/2730 2896/2729
+f 2895/2732 2896/2729 726/2723
+f 2890/2724 726/2723 2896/2729
+f 2898/2733 2900/2734 728/2735
+f 2897/2736 728/2735 2900/2734
+f 2894/2728 727/2730 2900/2734
+f 2899/2737 2900/2734 727/2730
+f 2841/2665 714/2659 2902/2738
+f 2891/2727 2902/2738 714/2659
+f 2902/2738 2898/2733 2901/2739
+f 728/2735 2901/2739 2898/2733
+f 2901/2739 728/2735 2905/2740
+f 2903/2741 2905/2740 728/2735
+f 2877/2707 2905/2740 722/2700
+f 2904/2742 722/2700 2905/2740
+f 730/2743 2907/2744 2906/2745
+f 2908/2746 2906/2745 2907/2744
+f 728/2735 2897/2736 2903/2741
+f 2908/2746 2903/2741 2897/2736
+f 2909/2747 2911/2748 469/2749
+f 1837/2750 469/2749 2911/2748
+f 2910/2751 2911/2748 730/2743
+f 2907/2744 730/2743 2911/2748
+f 722/2700 2904/2742 2871/2699
+f 2912/2752 2871/2699 2904/2742
+f 469/2749 1832/2753 2909/2747
+f 2912/2752 2909/2747 1832/2753
+f 473/2754 2913/2755 1843/2756
+f 2915/2757 1843/2756 2913/2755
+f 2915/2757 2914/2758 2882/2717
+f 724/2716 2882/2717 2914/2758
+f 2917/2759 2918/2760 732/2761
+f 2916/2762 732/2761 2918/2760
+f 1850/2763 2918/2760 473/2754
+f 2913/2755 473/2754 2918/2760
+f 2921/2764 2895/2732 2919/2765
+f 726/2723 2919/2765 2895/2732
+f 732/2761 2920/2766 2917/2759
+f 2921/2764 2917/2759 2920/2766
+f 2914/2758 2922/2767 724/2716
+f 2886/2721 724/2716 2922/2767
+f 2919/2765 726/2723 2922/2767
+f 2889/2725 2922/2767 726/2723
+f 2923/2768 2925/2769 727/2730
+f 2899/2737 727/2730 2925/2769
+f 2906/2745 2925/2769 730/2743
+f 2924/2770 730/2743 2925/2769
+f 2926/2771 2927/2772 732/2761
+f 2920/2766 732/2761 2927/2772
+f 2923/2768 727/2730 2927/2772
+f 2893/2731 2927/2772 727/2730
+f 475/2773 2928/2774 1860/2775
+f 2929/2776 1860/2775 2928/2774
+f 732/2761 2916/2762 2926/2771
+f 2929/2776 2926/2771 2916/2762
+f 730/2743 2924/2770 2910/2751
+f 2930/2777 2910/2751 2924/2770
+f 2930/2777 2928/2774 1856/2778
+f 475/2773 1856/2778 2928/2774
+f 738/6 6/2779 739/1
+f 735/2 739/1 6/2779
+f 193/2780 737/7 736/3
+f 739/1 736/3 737/7
+f 743/11 742/12 740/10
+f 14/2781 740/10 742/12
+f 741/9 193/2780 743/11
+f 736/3 743/11 193/2780
+f 747/14 746/17 744/16
+f 10/2782 744/16 746/17
+f 193/2780 741/9 745/13
+f 747/14 745/13 741/9
+f 16/2783 748/18 749/20
+f 750/19 749/20 748/18
+f 745/13 750/19 193/2780
+f 737/7 193/2780 750/19
+f 754/22 735/2 751/24
+f 6/2779 751/24 735/2
+f 197/2784 753/25 752/21
+f 754/22 752/21 753/25
+f 18/2785 755/28 757/30
+f 758/29 757/30 755/28
+f 752/21 758/29 197/2784
+f 756/27 197/2784 758/29
+f 762/33 761/35 759/31
+f 13/2786 759/31 761/35
+f 197/2784 756/27 760/34
+f 762/33 760/34 756/27
+f 763/37 14/2781 764/36
+f 742/12 764/36 14/2781
+f 760/34 764/36 197/2784
+f 753/25 197/2784 764/36
+f 1/2787 765/39 768/42
+f 769/38 768/42 765/39
+f 767/44 769/38 202/2788
+f 766/40 202/2788 769/38
+f 12/2789 770/48 772/49
+f 773/46 772/49 770/48
+f 202/2788 766/40 771/45
+f 773/46 771/45 766/40
+f 774/52 777/53 11/2790
+f 776/54 11/2790 777/53
+f 771/45 777/53 202/2788
+f 775/51 202/2788 777/53
+f 24/2791 778/55 779/57
+f 780/56 779/57 778/55
+f 202/2788 775/51 767/44
+f 780/56 767/44 775/51
+f 7/2792 781/61 784/62
+f 785/59 784/62 781/61
+f 206/2793 783/64 782/58
+f 785/59 782/58 783/64
+f 779/57 788/66 24/2791
+f 787/67 24/2791 788/66
+f 782/58 788/66 206/2793
+f 786/65 206/2793 788/66
+f 11/2790 789/71 774/52
+f 791/69 774/52 789/71
+f 206/2793 786/65 790/68
+f 791/69 790/68 786/65
+f 792/72 794/73 23/2794
+f 793/74 23/2794 794/73
+f 790/68 794/73 206/2793
+f 783/64 206/2793 794/73
+f 2/2795 795/78 798/80
+f 799/76 798/80 795/78
+f 211/2796 797/81 796/75
+f 799/76 796/75 797/81
+f 800/84 803/85 27/2797
+f 802/86 27/2797 803/85
+f 796/75 803/85 211/2796
+f 801/83 211/2796 803/85
+f 804/89 807/90 19/2798
+f 806/91 19/2798 807/90
+f 211/2796 801/83 805/88
+f 807/90 805/88 801/83
+f 20/2799 808/93 809/94
+f 810/92 809/94 808/93
+f 810/92 797/81 805/88
+f 211/2796 805/88 797/81
+f 768/42 814/96 1/2787
+f 813/98 1/2787 814/96
+f 812/99 814/96 215/2800
+f 811/95 215/2800 814/96
+f 24/2791 815/103 778/55
+f 817/101 778/55 815/103
+f 215/2800 811/95 816/100
+f 817/101 816/100 811/95
+f 818/106 821/107 15/2801
+f 820/108 15/2801 821/107
+f 816/100 821/107 215/2800
+f 819/105 215/2800 821/107
+f 16/2783 822/110 823/111
+f 824/109 823/111 822/110
+f 215/2800 819/105 812/99
+f 824/109 812/99 819/105
+f 828/113 827/114 738/6
+f 6/2779 738/6 827/114
+f 218/2802 826/116 825/112
+f 828/113 825/112 826/116
+f 16/2783 823/111 748/18
+f 830/118 748/18 823/111
+f 825/112 830/118 218/2802
+f 829/117 218/2802 830/118
+f 15/2801 831/122 818/106
+f 833/120 818/106 831/122
+f 218/2802 829/117 832/119
+f 833/120 832/119 829/117
+f 834/123 836/124 32/2803
+f 835/125 32/2803 836/124
+f 832/119 836/124 218/2802
+f 826/116 218/2802 836/124
+f 841/129 840/132 837/128
+f 5/2804 837/128 840/132
+f 838/127 222/2805 841/129
+f 839/130 841/129 222/2805
+f 32/2803 835/125 843/135
+f 844/134 843/135 835/125
+f 844/134 842/133 838/127
+f 222/2805 838/127 842/133
+f 831/122 15/2801 847/136
+f 845/137 847/136 15/2801
+f 842/133 847/136 222/2805
+f 846/138 222/2805 847/136
+f 849/142 30/2806 850/140
+f 848/141 850/140 30/2806
+f 222/2805 846/138 839/130
+f 850/140 839/130 846/138
+f 851/145 854/146 7/2792
+f 781/61 7/2792 854/146
+f 853/147 854/146 224/2807
+f 852/144 224/2807 854/146
+f 857/149 856/150 849/142
+f 30/2806 849/142 856/150
+f 855/148 224/2807 857/149
+f 852/144 857/149 224/2807
+f 15/2801 820/108 845/137
+f 859/151 845/137 820/108
+f 859/151 858/152 855/148
+f 224/2807 855/148 858/152
+f 24/2791 787/67 815/103
+f 860/153 815/103 787/67
+f 858/152 860/153 224/2807
+f 853/147 224/2807 860/153
+f 861/156 864/157 2/2795
+f 795/78 2/2795 864/157
+f 863/158 864/157 228/2808
+f 862/155 228/2808 864/157
+f 18/2785 865/162 867/163
+f 868/160 867/163 865/162
+f 228/2808 862/155 866/159
+f 868/160 866/159 862/155
+f 869/166 872/167 17/2809
+f 871/168 17/2809 872/167
+f 866/159 872/167 228/2808
+f 870/165 228/2808 872/167
+f 27/2797 802/86 873/170
+f 874/169 873/170 802/86
+f 228/2808 870/165 863/158
+f 874/169 863/158 870/165
+f 879/172 878/177 875/174
+f 8/2810 875/174 878/177
+f 232/2811 877/175 876/171
+f 879/172 876/171 877/175
+f 873/170 882/179 27/2797
+f 881/180 27/2797 882/179
+f 876/171 882/179 232/2811
+f 880/178 232/2811 882/179
+f 17/2809 883/184 869/166
+f 885/182 869/166 883/184
+f 232/2811 880/178 884/181
+f 885/182 884/181 880/178
+f 887/187 29/2812 888/185
+f 886/186 888/185 29/2812
+f 884/181 888/185 232/2811
+f 877/175 232/2811 888/185
+f 837/128 5/2804 892/188
+f 889/189 892/188 5/2804
+f 891/192 892/188 235/2813
+f 890/190 235/2813 892/188
+f 29/2812 887/187 894/195
+f 895/194 894/195 887/187
+f 235/2813 890/190 893/193
+f 895/194 893/193 890/190
+f 896/198 898/199 17/2809
+f 883/184 17/2809 898/199
+f 893/193 898/199 235/2813
+f 897/197 235/2813 898/199
+f 899/201 32/2803 900/200
+f 843/135 900/200 32/2803
+f 235/2813 897/197 891/192
+f 900/200 891/192 897/197
+f 751/24 6/2779 903/203
+f 827/114 903/203 6/2779
+f 236/2814 902/204 901/202
+f 903/203 901/202 902/204
+f 32/2803 899/201 834/123
+f 905/206 834/123 899/201
+f 901/202 905/206 236/2814
+f 904/205 236/2814 905/206
+f 17/2809 871/168 896/198
+f 907/208 896/198 871/168
+f 236/2814 904/205 906/207
+f 907/208 906/207 904/205
+f 18/2785 757/30 865/162
+f 908/209 865/162 757/30
+f 906/207 908/209 236/2814
+f 902/204 236/2814 908/209
+f 913/211 912/214 909/213
+f 3/2815 909/213 912/214
+f 241/2816 911/216 910/210
+f 913/211 910/210 911/216
+f 22/2817 914/219 916/221
+f 917/220 916/221 914/219
+f 915/218 241/2816 917/220
+f 910/210 917/220 241/2816
+f 21/2818 918/222 920/226
+f 921/224 920/226 918/222
+f 921/224 919/225 915/218
+f 241/2816 915/218 919/225
+f 31/2819 922/227 923/229
+f 924/228 923/229 922/227
+f 919/225 924/228 241/2816
+f 911/216 241/2816 924/228
+f 925/233 929/231 9/2820
+f 928/235 9/2820 929/231
+f 245/2821 927/234 926/230
+f 929/231 926/230 927/234
+f 923/229 932/238 31/2819
+f 931/239 31/2819 932/238
+f 926/230 932/238 245/2821
+f 930/237 245/2821 932/238
+f 21/2818 933/243 918/222
+f 935/241 918/222 933/243
+f 245/2821 930/237 934/240
+f 935/241 934/240 930/237
+f 938/244 937/246 936/245
+f 33/2822 936/245 937/246
+f 934/240 938/244 245/2821
+f 927/234 245/2821 938/244
+f 939/249 943/250 4/2823
+f 942/252 4/2823 943/250
+f 941/253 943/250 250/2824
+f 940/248 250/2824 943/250
+f 28/2825 944/257 946/258
+f 947/255 946/258 944/257
+f 250/2824 940/248 945/254
+f 947/255 945/254 940/248
+f 948/260 951/259 25/2826
+f 950/263 25/2826 951/259
+f 945/254 951/259 250/2824
+f 949/261 250/2824 951/259
+f 26/2827 952/264 953/266
+f 954/265 953/266 952/264
+f 954/265 941/253 949/261
+f 250/2824 949/261 941/253
+f 9/2820 928/235 957/269
+f 958/268 957/269 928/235
+f 253/2828 956/271 955/267
+f 958/268 955/267 956/271
+f 959/274 961/275 33/2822
+f 936/245 33/2822 961/275
+f 955/267 961/275 253/2828
+f 960/273 253/2828 961/275
+f 25/2826 950/263 963/278
+f 964/277 963/278 950/263
+f 253/2828 960/273 962/276
+f 964/277 962/276 960/273
+f 965/279 966/280 28/2825
+f 944/257 28/2825 966/280
+f 962/276 966/280 253/2828
+f 956/271 253/2828 966/280
+f 840/132 970/282 5/2804
+f 969/284 5/2804 970/282
+f 968/285 970/282 256/2829
+f 967/281 256/2829 970/282
+f 30/2806 971/289 848/141
+f 973/287 848/141 971/289
+f 256/2829 967/281 972/286
+f 973/287 972/286 967/281
+f 975/292 31/2819 976/290
+f 931/239 976/290 31/2819
+f 972/286 976/290 256/2829
+f 974/291 256/2829 976/290
+f 9/2820 977/294 925/233
+f 978/293 925/233 977/294
+f 256/2829 974/291 968/285
+f 978/293 968/285 974/291
+f 29/2812 979/298 886/186
+f 982/296 886/186 979/298
+f 982/296 980/295 981/299
+f 259/2830 981/299 980/295
+f 946/258 985/300 28/2825
+f 984/302 28/2825 985/300
+f 980/295 985/300 259/2830
+f 983/301 259/2830 985/300
+f 986/305 988/306 4/2823
+f 939/249 4/2823 988/306
+f 259/2830 983/301 987/304
+f 988/306 987/304 983/301
+f 878/177 990/307 8/2810
+f 989/308 8/2810 990/307
+f 981/299 259/2830 990/307
+f 987/304 990/307 259/2830
+f 23/2794 991/309 792/72
+f 994/311 792/72 991/309
+f 994/311 992/312 993/313
+f 262/2831 993/313 992/312
+f 22/2817 916/221 996/316
+f 997/314 996/316 916/221
+f 992/312 997/314 262/2831
+f 995/315 262/2831 997/314
+f 998/320 1000/318 3/2815
+f 909/213 3/2815 1000/318
+f 262/2831 995/315 999/317
+f 1000/318 999/317 995/315
+f 784/62 1002/321 7/2792
+f 1001/322 7/2792 1002/321
+f 993/313 262/2831 1002/321
+f 999/317 1002/321 262/2831
+f 28/2825 984/302 965/279
+f 1005/324 965/279 984/302
+f 263/2832 1004/325 1003/323
+f 1005/324 1003/323 1004/325
+f 894/195 1007/327 29/2812
+f 979/298 29/2812 1007/327
+f 1003/323 1007/327 263/2832
+f 1006/326 263/2832 1007/327
+f 5/2804 969/284 889/189
+f 1009/329 889/189 969/284
+f 263/2832 1006/326 1008/328
+f 1009/329 1008/328 1006/326
+f 957/269 1010/330 9/2820
+f 977/294 9/2820 1010/330
+f 1008/328 1010/330 263/2832
+f 1004/325 263/2832 1010/330
+f 856/150 1013/332 30/2806
+f 971/289 30/2806 1013/332
+f 1012/333 1013/332 264/2833
+f 1011/331 264/2833 1013/332
+f 7/2792 1001/322 851/145
+f 1015/335 851/145 1001/322
+f 264/2833 1011/331 1014/334
+f 1015/335 1014/334 1011/331
+f 912/214 1017/337 3/2815
+f 998/320 3/2815 1017/337
+f 1014/334 1017/337 264/2833
+f 1016/336 264/2833 1017/337
+f 31/2819 975/292 922/227
+f 1018/338 922/227 975/292
+f 264/2833 1016/336 1012/333
+f 1018/338 1012/333 1016/336
+f 23/2794 1019/339 991/309
+f 1022/341 991/309 1019/339
+f 1020/342 268/2834 1022/341
+f 1021/343 1022/341 268/2834
+f 35/2835 1023/344 1025/348
+f 1026/346 1025/348 1023/344
+f 1026/346 1024/347 1020/342
+f 268/2834 1020/342 1024/347
+f 1029/353 34/2836 1030/349
+f 1027/350 1030/349 34/2836
+f 1024/347 1030/349 268/2834
+f 1028/351 268/2834 1030/349
+f 1032/354 1031/355 996/316
+f 22/2817 996/316 1031/355
+f 268/2834 1028/351 1021/343
+f 1032/354 1021/343 1028/351
+f 1033/358 1036/359 35/2837
+f 1023/361 35/2837 1036/359
+f 1035/360 1036/359 272/2838
+f 1034/357 272/2838 1036/359
+f 37/2839 1037/366 1039/367
+f 1040/364 1039/367 1037/366
+f 272/2838 1034/357 1038/363
+f 1040/364 1038/363 1034/357
+f 1043/372 36/2840 1044/368
+f 1041/369 1044/368 36/2840
+f 1038/363 1044/368 272/2838
+f 1042/370 272/2838 1044/368
+f 1046/373 1045/375 1029/374
+f 34/2841 1029/374 1045/375
+f 272/2838 1042/370 1035/360
+f 1046/373 1035/360 1042/370
+f 35/2837 1047/379 1033/358
+f 1050/377 1033/358 1047/379
+f 276/2842 1049/380 1048/376
+f 1050/377 1048/376 1049/380
+f 1051/383 1054/384 40/2843
+f 1053/385 40/2843 1054/384
+f 1048/376 1054/384 276/2842
+f 1052/382 276/2842 1054/384
+f 41/2844 1055/389 1057/390
+f 1058/387 1057/390 1055/389
+f 276/2842 1052/382 1056/386
+f 1058/387 1056/386 1052/382
+f 1039/367 1060/391 37/2839
+f 1059/392 37/2839 1060/391
+f 1056/386 1060/391 276/2842
+f 1049/380 276/2842 1060/391
+f 37/2839 1059/392 1063/395
+f 1064/394 1063/395 1059/392
+f 280/2845 1062/397 1061/393
+f 1064/394 1061/393 1062/397
+f 1065/400 1067/401 41/2844
+f 1055/389 41/2844 1067/401
+f 1061/393 1067/401 280/2845
+f 1066/399 280/2845 1067/401
+f 1071/405 1070/406 1068/404
+f 42/2846 1068/404 1070/406
+f 280/2845 1066/399 1069/403
+f 1071/405 1069/403 1066/399
+f 38/2847 1072/407 1073/409
+f 1074/408 1073/409 1072/407
+f 1074/408 1062/397 1069/403
+f 280/2845 1069/403 1062/397
+f 1079/411 1078/416 1075/413
+f 43/2848 1075/413 1078/416
+f 285/2849 1077/414 1076/410
+f 1079/411 1076/410 1077/414
+f 1080/420 1083/418 48/2850
+f 1082/421 48/2850 1083/418
+f 285/2849 1076/410 1081/417
+f 1083/418 1081/417 1076/410
+f 1084/424 1087/425 45/2851
+f 1086/426 45/2851 1087/425
+f 285/2849 1081/417 1085/423
+f 1087/425 1085/423 1081/417
+f 1088/427 1090/428 46/2852
+f 1089/429 46/2852 1090/428
+f 285/2849 1085/423 1077/414
+f 1090/428 1077/414 1085/423
+f 1091/433 1095/431 44/2853
+f 1094/435 44/2853 1095/431
+f 290/2854 1093/434 1092/430
+f 1095/431 1092/430 1093/434
+f 1099/438 1098/441 1096/440
+f 50/2855 1096/440 1098/441
+f 290/2854 1092/430 1097/437
+f 1099/438 1097/437 1092/430
+f 1100/444 1103/445 47/2856
+f 1102/446 47/2856 1103/445
+f 1097/437 1103/445 290/2854
+f 1101/443 290/2854 1103/445
+f 1104/447 1106/448 49/2857
+f 1105/449 49/2857 1106/448
+f 290/2854 1101/443 1093/434
+f 1106/448 1093/434 1101/443
+f 18/2858 867/452 1109/455
+f 1110/453 1109/455 867/452
+f 1107/451 293/2859 1110/453
+f 1108/456 1110/453 293/2859
+f 1113/458 861/461 1111/460
+f 2/2860 1111/460 861/461
+f 293/2859 1107/451 1112/457
+f 1113/458 1112/457 1107/451
+f 1094/435 1116/463 44/2853
+f 1115/464 44/2853 1116/463
+f 1112/457 1116/463 293/2859
+f 1114/462 293/2859 1116/463
+f 1117/466 1118/465 49/2857
+f 1104/447 49/2857 1118/465
+f 293/2859 1114/462 1108/456
+f 1118/465 1108/456 1114/462
+f 20/2861 1119/470 1122/471
+f 1123/468 1122/471 1119/470
+f 297/2862 1121/473 1120/467
+f 1123/468 1120/467 1121/473
+f 13/2863 1124/475 1126/478
+f 1127/474 1126/478 1124/475
+f 1120/467 1127/474 297/2862
+f 1125/476 297/2862 1127/474
+f 1102/446 1130/480 47/2856
+f 1129/481 47/2856 1130/480
+f 297/2862 1125/476 1128/479
+f 1130/480 1128/479 1125/476
+f 1131/482 1132/483 50/2855
+f 1096/440 50/2855 1132/483
+f 1128/479 1132/483 297/2862
+f 1121/473 297/2862 1132/483
+f 18/2858 1109/455 755/486
+f 1135/485 755/486 1109/455
+f 298/2864 1134/488 1133/484
+f 1135/485 1133/484 1134/488
+f 1105/449 1137/490 49/2857
+f 1117/466 49/2857 1137/490
+f 1133/484 1137/490 298/2864
+f 1136/489 298/2864 1137/490
+f 1129/481 1139/492 47/2856
+f 1100/444 47/2856 1139/492
+f 298/2864 1136/489 1138/491
+f 1139/492 1138/491 1136/489
+f 13/2863 761/493 1124/475
+f 1140/494 1124/475 761/493
+f 1138/491 1140/494 298/2864
+f 1134/488 298/2864 1140/494
+f 20/2861 1122/471 808/498
+f 1143/495 808/498 1122/471
+f 1142/497 1143/495 299/2865
+f 1141/496 299/2865 1143/495
+f 1098/441 1145/501 50/2855
+f 1131/482 50/2855 1145/501
+f 299/2865 1141/496 1144/500
+f 1145/501 1144/500 1141/496
+f 1115/464 1147/503 44/2853
+f 1091/433 44/2853 1147/503
+f 1144/500 1147/503 299/2865
+f 1146/502 299/2865 1147/503
+f 1148/504 1111/460 798/505
+f 2/2860 798/505 1111/460
+f 1148/504 1142/497 1146/502
+f 299/2865 1146/502 1142/497
+f 16/2866 1149/508 822/511
+f 1152/509 822/511 1149/508
+f 1151/512 1152/509 302/2867
+f 1150/507 302/2867 1152/509
+f 1082/421 1155/514 48/2850
+f 1154/515 48/2850 1155/514
+f 302/2867 1150/507 1153/513
+f 1155/514 1153/513 1150/507
+f 1156/518 1158/519 43/2848
+f 1075/413 43/2848 1158/519
+f 1153/513 1158/519 302/2867
+f 1157/517 302/2867 1158/519
+f 1160/520 1159/522 813/521
+f 1/2868 813/521 1159/522
+f 302/2867 1157/517 1151/512
+f 1160/520 1151/512 1157/517
+f 772/525 1164/526 12/2869
+f 1163/528 12/2869 1164/526
+f 1161/524 304/2870 1164/526
+f 1162/527 1164/526 304/2870
+f 1159/522 1166/531 1/2868
+f 765/532 1/2868 1166/531
+f 1166/531 1165/530 1161/524
+f 304/2870 1161/524 1165/530
+f 1078/416 1168/534 43/2848
+f 1156/518 43/2848 1168/534
+f 1165/530 1168/534 304/2870
+f 1167/533 304/2870 1168/534
+f 1169/536 1170/535 46/2852
+f 1088/427 46/2852 1170/535
+f 304/2870 1167/533 1162/527
+f 1170/535 1162/527 1167/533
+f 1171/539 1174/540 10/2871
+f 744/542 10/2871 1174/540
+f 1173/543 1174/540 306/2872
+f 1172/538 306/2872 1174/540
+f 1086/426 1177/545 45/2851
+f 1176/546 45/2851 1177/545
+f 306/2872 1172/538 1175/544
+f 1177/545 1175/544 1172/538
+f 1154/515 1179/548 48/2850
+f 1080/420 48/2850 1179/548
+f 1175/544 1179/548 306/2872
+f 1178/547 306/2872 1179/548
+f 16/2866 749/550 1149/508
+f 1180/549 1149/508 749/550
+f 306/2872 1178/547 1173/543
+f 1180/549 1173/543 1178/547
+f 12/2869 1163/528 1183/553
+f 1184/552 1183/553 1163/528
+f 308/2873 1182/555 1181/551
+f 1184/552 1181/551 1182/555
+f 1089/429 1186/557 46/2852
+f 1169/536 46/2852 1186/557
+f 1181/551 1186/557 308/2873
+f 1185/556 308/2873 1186/557
+f 1176/546 1188/559 45/2851
+f 1084/424 45/2851 1188/559
+f 308/2873 1185/556 1187/558
+f 1188/559 1187/558 1185/556
+f 10/2871 1189/560 1171/539
+f 1190/561 1171/539 1189/560
+f 1187/558 1190/561 308/2873
+f 1182/555 308/2873 1190/561
+f 38/2847 1191/565 1072/407
+f 1194/563 1072/407 1191/565
+f 312/2874 1193/566 1192/562
+f 1194/563 1192/562 1193/566
+f 51/2875 1195/567 1197/571
+f 1198/569 1197/571 1195/567
+f 312/2874 1192/562 1196/570
+f 1198/569 1196/570 1192/562
+f 1201/576 53/2876 1202/572
+f 1199/573 1202/572 53/2876
+f 312/2874 1196/570 1200/574
+f 1202/572 1200/574 1196/570
+f 1205/578 1204/579 1043/372
+f 36/2840 1043/372 1204/579
+f 312/2874 1200/574 1203/577
+f 1205/578 1203/577 1200/574
+f 1063/395 1206/580 37/2839
+f 1037/366 37/2839 1206/580
+f 1203/577 1206/580 312/2874
+f 1193/566 312/2874 1206/580
+f 52/2877 1207/584 1210/585
+f 1211/582 1210/585 1207/584
+f 1211/582 1208/581 1209/587
+f 317/2878 1209/587 1208/581
+f 1212/590 1215/591 53/2879
+f 1214/592 53/2879 1215/591
+f 1208/581 1215/591 317/2878
+f 1213/589 317/2878 1215/591
+f 1216/595 1219/596 57/2880
+f 1218/597 57/2880 1219/596
+f 1217/594 317/2878 1219/596
+f 1213/589 1219/596 317/2878
+f 1222/598 1221/600 1220/599
+f 56/2881 1220/599 1221/600
+f 317/2878 1217/594 1209/587
+f 1222/598 1209/587 1217/594
+f 1225/602 1212/590 1201/604
+f 53/2879 1201/604 1212/590
+f 320/2882 1224/605 1223/601
+f 1225/602 1223/601 1224/605
+f 51/2883 1226/608 1195/610
+f 1228/609 1195/610 1226/608
+f 1227/607 320/2882 1228/609
+f 1223/601 1228/609 320/2882
+f 1232/612 1231/615 1229/614
+f 54/2884 1229/614 1231/615
+f 320/2882 1227/607 1230/611
+f 1232/612 1230/611 1227/607
+f 1218/597 1234/616 57/2880
+f 1233/617 57/2880 1234/616
+f 1230/611 1234/616 320/2882
+f 1224/605 320/2882 1234/616
+f 1238/619 1229/614 1235/621
+f 54/2884 1235/621 1229/614
+f 323/2885 1237/622 1236/618
+f 1238/619 1236/618 1237/622
+f 1239/625 1242/626 55/2886
+f 1241/627 55/2886 1242/626
+f 1236/618 1242/626 323/2885
+f 1240/624 323/2885 1242/626
+f 1221/600 1245/629 56/2881
+f 1244/630 56/2881 1245/629
+f 323/2885 1240/624 1243/628
+f 1245/629 1243/628 1240/624
+f 1233/617 1246/631 57/2880
+f 1216/595 57/2880 1246/631
+f 1243/628 1246/631 323/2885
+f 1237/622 323/2885 1246/631
+f 1247/634 1251/635 69/2887
+f 1250/637 69/2887 1251/635
+f 1249/638 1251/635 327/2888
+f 1248/633 327/2888 1251/635
+f 76/2889 1252/642 1254/643
+f 1255/640 1254/643 1252/642
+f 327/2888 1248/633 1253/639
+f 1255/640 1253/639 1248/633
+f 920/226 1258/645 21/2818
+f 1257/646 21/2818 1258/645
+f 1253/639 1258/645 327/2888
+f 1256/644 327/2888 1258/645
+f 22/2817 1259/648 914/219
+f 1260/647 914/219 1259/648
+f 327/2888 1256/644 1249/638
+f 1260/647 1249/638 1256/644
+f 1264/654 77/2890 1265/649
+f 1261/650 1265/649 77/2890
+f 1263/655 1265/649 331/2891
+f 1262/651 331/2891 1265/649
+f 1269/657 1268/660 1266/659
+f 80/2892 1266/659 1268/660
+f 331/2891 1262/651 1267/656
+f 1269/657 1267/656 1262/651
+f 963/278 1272/662 25/2826
+f 1271/663 25/2826 1272/662
+f 1270/661 331/2891 1272/662
+f 1267/656 1272/662 331/2891
+f 33/2822 1273/665 959/274
+f 1274/664 959/274 1273/665
+f 331/2891 1270/661 1263/655
+f 1274/664 1263/655 1270/661
+f 1266/659 80/2892 1278/666
+f 1275/667 1278/666 80/2892
+f 1277/670 1278/666 334/2893
+f 1276/668 334/2893 1278/666
+f 1282/672 1281/675 1279/674
+f 81/2894 1279/674 1281/675
+f 334/2893 1276/668 1280/671
+f 1282/672 1280/671 1276/668
+f 1284/678 26/2827 1285/676
+f 953/266 1285/676 26/2827
+f 1280/671 1285/676 334/2893
+f 1283/677 334/2893 1285/676
+f 25/2826 1271/663 948/260
+f 1286/679 948/260 1271/663
+f 1286/679 1277/670 1283/677
+f 334/2893 1283/677 1277/670
+f 1287/682 1291/683 67/2895
+f 1290/685 67/2895 1291/683
+f 1289/686 1291/683 338/2896
+f 1288/681 338/2896 1291/683
+f 1295/688 1294/691 1292/690
+f 68/2897 1292/690 1294/691
+f 338/2896 1288/681 1293/687
+f 1295/688 1293/687 1288/681
+f 1297/696 12/2789 1298/692
+f 1183/693 1298/692 12/2789
+f 1293/687 1298/692 338/2896
+f 1296/694 338/2896 1298/692
+f 1300/697 1189/699 1299/698
+f 10/2782 1299/698 1189/699
+f 338/2896 1296/694 1289/686
+f 1300/697 1289/686 1296/694
+f 78/2898 1301/703 1304/704
+f 1305/701 1304/704 1301/703
+f 1305/701 1302/700 1303/706
+f 342/2899 1303/706 1302/700
+f 1306/709 1309/710 74/2900
+f 1308/711 74/2900 1309/710
+f 1307/708 342/2899 1309/710
+f 1302/700 1309/710 342/2899
+f 1312/713 1311/716 1126/715
+f 13/2786 1126/715 1311/716
+f 342/2899 1307/708 1310/712
+f 1312/713 1310/712 1307/708
+f 1313/717 1314/718 20/2799
+f 1119/719 20/2799 1314/718
+f 1303/706 342/2899 1314/718
+f 1310/712 1314/718 342/2899
+f 1315/721 1318/720 76/2889
+f 1252/642 76/2889 1318/720
+f 1317/724 1318/720 344/2901
+f 1316/722 344/2901 1318/720
+f 1321/726 1320/727 1264/654
+f 77/2890 1264/654 1320/727
+f 344/2901 1316/722 1319/725
+f 1321/726 1319/725 1316/722
+f 937/246 1323/729 33/2822
+f 1273/665 33/2822 1323/729
+f 1322/728 344/2901 1323/729
+f 1319/725 1323/729 344/2901
+f 21/2818 1257/646 933/243
+f 1324/730 933/243 1257/646
+f 344/2901 1322/728 1317/724
+f 1324/730 1317/724 1322/728
+f 1328/736 70/2902 1329/734
+f 1325/733 1329/734 70/2902
+f 1326/732 347/2903 1329/734
+f 1327/737 1329/734 347/2903
+f 69/2887 1250/637 1331/740
+f 1332/739 1331/740 1250/637
+f 347/2903 1326/732 1330/738
+f 1332/739 1330/738 1326/732
+f 1031/355 1334/742 22/2817
+f 1259/648 22/2817 1334/742
+f 1330/738 1334/742 347/2903
+f 1333/741 347/2903 1334/742
+f 34/2836 1335/743 1027/350
+f 1336/744 1027/350 1335/743
+f 347/2903 1333/741 1327/737
+f 1336/744 1327/737 1333/741
+f 1340/750 71/2904 1341/745
+f 1337/746 1341/745 71/2904
+f 1339/751 1341/745 350/2905
+f 1338/747 350/2905 1341/745
+f 1344/753 1343/756 1328/755
+f 70/2906 1328/755 1343/756
+f 350/2905 1338/747 1342/752
+f 1344/753 1342/752 1338/747
+f 1045/759 1346/760 34/2907
+f 1335/761 34/2907 1346/760
+f 1345/758 350/2905 1346/760
+f 1342/752 1346/760 350/2905
+f 36/2908 1347/762 1041/764
+f 1348/763 1041/764 1347/762
+f 1348/763 1339/751 1345/758
+f 350/2905 1345/758 1339/751
+f 59/2909 1349/766 1352/770
+f 1353/765 1352/770 1349/766
+f 1351/771 1353/765 355/2910
+f 1350/767 355/2910 1353/765
+f 65/2911 1354/775 1356/776
+f 1357/773 1356/776 1354/775
+f 355/2910 1350/767 1355/772
+f 1357/773 1355/772 1350/767
+f 1358/779 1361/780 39/2912
+f 1360/781 39/2912 1361/780
+f 1355/772 1361/780 355/2910
+f 1359/778 355/2910 1361/780
+f 38/2847 1362/783 1363/784
+f 1364/782 1363/784 1362/783
+f 355/2910 1359/778 1351/771
+f 1364/782 1351/771 1359/778
+f 1368/790 60/2913 1369/787
+f 1365/785 1369/787 60/2913
+f 358/2914 1367/789 1366/788
+f 1369/787 1366/788 1367/789
+f 1372/793 1371/794 1352/770
+f 59/2909 1352/770 1371/794
+f 1370/792 358/2914 1372/793
+f 1366/788 1372/793 358/2914
+f 38/2847 1073/409 1362/783
+f 1374/795 1362/783 1073/409
+f 1374/795 1373/796 1370/792
+f 358/2914 1370/792 1373/796
+f 1375/798 1376/797 42/2846
+f 1068/404 42/2846 1376/797
+f 1373/796 1376/797 358/2914
+f 1367/789 358/2914 1376/797
+f 1380/804 79/2915 1381/799
+f 1377/800 1381/799 79/2915
+f 1379/805 1381/799 361/2916
+f 1378/801 361/2916 1381/799
+f 1383/808 78/2898 1384/806
+f 1304/704 1384/806 78/2898
+f 361/2916 1378/801 1382/807
+f 1384/806 1382/807 1378/801
+f 20/2799 809/94 1313/717
+f 1386/810 1313/717 809/94
+f 1385/809 361/2916 1386/810
+f 1382/807 1386/810 361/2916
+f 19/2798 1387/812 804/89
+f 1388/811 804/89 1387/812
+f 1388/811 1379/805 1385/809
+f 361/2916 1385/809 1379/805
+f 68/2897 1389/816 1292/690
+f 1392/814 1292/690 1389/816
+f 364/2917 1391/817 1390/813
+f 1392/814 1390/813 1391/817
+f 1393/820 1396/821 58/2918
+f 1395/822 58/2918 1396/821
+f 1394/819 364/2917 1396/821
+f 1390/813 1396/821 364/2917
+f 11/2790 776/54 1398/825
+f 1399/824 1398/825 776/54
+f 364/2917 1394/819 1397/823
+f 1399/824 1397/823 1394/819
+f 770/48 12/2789 1400/826
+f 1297/696 1400/826 12/2789
+f 1397/823 1400/826 364/2917
+f 1391/817 364/2917 1400/826
+f 1404/828 1393/820 1401/830
+f 58/2918 1401/830 1393/820
+f 367/2919 1403/831 1402/827
+f 1404/828 1402/827 1403/831
+f 1405/834 1408/835 61/2920
+f 1407/836 61/2920 1408/835
+f 1406/833 367/2919 1408/835
+f 1402/827 1408/835 367/2919
+f 23/2794 793/74 1410/839
+f 1411/838 1410/839 793/74
+f 367/2919 1406/833 1409/837
+f 1411/838 1409/837 1406/833
+f 1398/825 1412/840 11/2790
+f 789/71 11/2790 1412/840
+f 1409/837 1412/840 367/2919
+f 1403/831 367/2919 1412/840
+f 1413/844 1416/842 61/2920
+f 1405/834 61/2920 1416/842
+f 370/2921 1415/845 1414/841
+f 1416/842 1414/841 1415/845
+f 1420/847 1419/850 1417/849
+f 62/2922 1417/849 1419/850
+f 370/2921 1414/841 1418/846
+f 1420/847 1418/846 1414/841
+f 35/2835 1025/348 1422/853
+f 1423/852 1422/853 1025/348
+f 1421/851 370/2921 1423/852
+f 1418/846 1423/852 370/2921
+f 1410/839 1424/854 23/2794
+f 1019/339 23/2794 1424/854
+f 1421/851 1424/854 370/2921
+f 1415/845 370/2921 1424/854
+f 1425/857 1428/858 62/2923
+f 1417/860 62/2923 1428/858
+f 1427/861 1428/858 373/2924
+f 1426/856 373/2924 1428/858
+f 1429/864 1432/865 63/2925
+f 1431/866 63/2925 1432/865
+f 373/2924 1426/856 1430/863
+f 1432/865 1430/863 1426/856
+f 1435/868 1434/869 1053/385
+f 40/2843 1053/385 1434/869
+f 1433/867 373/2924 1435/868
+f 1430/863 1435/868 373/2924
+f 35/2837 1422/871 1047/379
+f 1436/870 1047/379 1422/871
+f 1436/870 1427/861 1433/867
+f 373/2924 1433/867 1427/861
+f 1440/873 1429/864 1437/875
+f 63/2925 1437/875 1429/864
+f 376/2926 1439/876 1438/872
+f 1440/873 1438/872 1439/876
+f 1441/879 1444/880 64/2927
+f 1443/881 64/2927 1444/880
+f 1438/872 1444/880 376/2926
+f 1442/878 376/2926 1444/880
+f 41/2844 1057/390 1446/884
+f 1447/883 1446/884 1057/390
+f 376/2926 1442/878 1445/882
+f 1447/883 1445/882 1442/878
+f 1434/869 1448/885 40/2843
+f 1051/383 40/2843 1448/885
+f 1445/882 1448/885 376/2926
+f 1439/876 376/2926 1448/885
+f 1452/886 1441/879 1449/887
+f 64/2927 1449/887 1441/879
+f 1451/890 1452/886 378/2928
+f 1450/888 378/2928 1452/886
+f 1455/892 1454/893 1368/790
+f 60/2913 1368/790 1454/893
+f 378/2928 1450/888 1453/891
+f 1455/892 1453/891 1450/888
+f 1070/406 1457/895 42/2846
+f 1375/798 42/2846 1457/895
+f 1453/891 1457/895 378/2928
+f 1456/894 378/2928 1457/895
+f 41/2844 1446/884 1065/400
+f 1458/896 1065/400 1446/884
+f 1458/896 1451/890 1456/894
+f 378/2928 1456/894 1451/890
+f 1462/897 1461/899 1340/750
+f 71/2904 1340/750 1461/899
+f 381/2929 1460/901 1459/898
+f 1462/897 1459/898 1460/901
+f 1347/762 36/2908 1464/902
+f 1204/903 1464/902 36/2908
+f 381/2929 1459/898 1463/904
+f 1464/902 1463/904 1459/898
+f 53/2930 1214/909 1199/910
+f 1466/907 1199/910 1214/909
+f 381/2929 1463/904 1465/906
+f 1466/907 1465/906 1463/904
+f 1467/913 1469/914 52/2931
+f 1207/915 52/2931 1469/914
+f 381/2929 1465/906 1468/912
+f 1469/914 1468/912 1465/906
+f 1470/916 1472/917 72/2932
+f 1471/918 72/2932 1472/917
+f 381/2929 1468/912 1460/901
+f 1472/917 1460/901 1468/912
+f 38/2847 1363/784 1191/565
+f 1475/919 1191/565 1363/784
+f 1475/919 1473/920 1474/921
+f 383/2933 1474/921 1473/920
+f 39/2912 1476/925 1358/779
+f 1478/923 1358/779 1476/925
+f 1478/923 1477/922 1473/920
+f 383/2933 1473/920 1477/922
+f 1197/571 1480/926 51/2875
+f 1479/927 51/2875 1480/926
+f 1477/922 1480/926 383/2933
+f 1474/921 383/2933 1480/926
+f 1485/931 1484/933 1481/930
+f 85/2934 1481/930 1484/933
+f 1483/934 1485/931 388/2935
+f 1482/929 388/2935 1485/931
+f 1489/936 1488/939 1486/938
+f 83/2936 1486/938 1488/939
+f 388/2935 1482/929 1487/935
+f 1489/936 1487/935 1482/929
+f 1490/942 1493/943 84/2937
+f 1492/944 84/2937 1493/943
+f 1487/935 1493/943 388/2935
+f 1491/941 388/2935 1493/943
+f 1496/945 1495/947 1494/946
+f 82/2938 1494/946 1495/947
+f 388/2935 1491/941 1483/934
+f 1496/945 1483/934 1491/941
+f 1497/949 1501/948 92/2939
+f 1500/953 92/2939 1501/948
+f 1499/954 1501/948 393/2940
+f 1498/950 393/2940 1501/948
+f 1505/956 1504/959 1502/958
+f 89/2941 1502/958 1504/959
+f 393/2940 1498/950 1503/955
+f 1505/956 1503/955 1498/950
+f 1509/961 1508/964 1506/963
+f 88/2942 1506/963 1508/964
+f 1507/960 393/2940 1509/961
+f 1503/955 1509/961 393/2940
+f 93/2943 1510/966 1511/967
+f 1512/965 1511/967 1510/966
+f 1512/965 1499/954 1507/960
+f 393/2940 1507/960 1499/954
+f 93/2943 1513/968 1516/972
+f 1517/970 1516/972 1513/968
+f 1517/970 1514/971 1515/974
+f 398/2944 1515/974 1514/971
+f 1521/975 1520/979 1518/976
+f 86/2945 1518/976 1520/979
+f 1514/971 1521/975 398/2944
+f 1519/977 398/2944 1521/975
+f 1525/981 1524/984 1522/983
+f 87/2946 1522/983 1524/984
+f 398/2944 1519/977 1523/980
+f 1525/981 1523/980 1519/977
+f 1526/985 1528/986 94/2947
+f 1527/987 94/2947 1528/986
+f 1515/974 398/2944 1528/986
+f 1523/980 1528/986 398/2944
+f 89/2941 1529/990 1502/958
+f 1532/991 1502/958 1529/990
+f 1530/989 402/2948 1532/991
+f 1531/992 1532/991 402/2948
+f 1535/997 90/2949 1536/994
+f 1533/996 1536/994 90/2949
+f 1530/989 1536/994 402/2948
+f 1534/993 402/2948 1536/994
+f 1539/1002 91/2950 1540/998
+f 1537/999 1540/998 91/2950
+f 1534/993 1540/998 402/2948
+f 1538/1000 402/2948 1540/998
+f 1542/1003 1541/1004 1508/964
+f 88/2942 1508/964 1541/1004
+f 402/2948 1538/1000 1531/992
+f 1542/1003 1531/992 1538/1000
+f 1543/1007 1546/1008 90/2949
+f 1533/996 90/2949 1546/1008
+f 1544/1006 405/2951 1546/1008
+f 1545/1009 1546/1008 405/2951
+f 1524/984 1549/1011 87/2946
+f 1548/1012 87/2946 1549/1011
+f 1547/1010 405/2951 1549/1011
+f 1544/1006 1549/1011 405/2951
+f 1518/976 86/2945 1552/1014
+f 1550/1016 1552/1014 86/2945
+f 405/2951 1547/1010 1551/1013
+f 1552/1014 1551/1013 1547/1010
+f 1554/1017 1553/1018 1539/1002
+f 91/2950 1539/1002 1553/1018
+f 1545/1009 405/2951 1554/1017
+f 1551/1013 1554/1017 405/2951
+f 92/2939 1500/953 1557/1022
+f 1558/1020 1557/1022 1500/953
+f 408/2952 1556/1023 1555/1019
+f 1558/1020 1555/1019 1556/1023
+f 93/2943 1516/972 1510/966
+f 1560/1025 1510/966 1516/972
+f 1555/1019 1560/1025 408/2952
+f 1559/1024 408/2952 1560/1025
+f 1561/1028 1563/1029 94/2947
+f 1526/985 94/2947 1563/1029
+f 1562/1027 408/2952 1563/1029
+f 1559/1024 1563/1029 408/2952
+f 95/2953 1564/1031 1565/1032
+f 1566/1030 1565/1032 1564/1031
+f 408/2952 1562/1027 1556/1023
+f 1566/1030 1556/1023 1562/1027
+f 1570/1034 1569/1039 1557/1036
+f 92/2954 1557/1036 1569/1039
+f 411/2955 1568/1037 1567/1033
+f 1570/1034 1567/1033 1568/1037
+f 1571/1042 1573/1043 95/2956
+f 1564/1044 95/2956 1573/1043
+f 411/2955 1567/1033 1572/1041
+f 1573/1043 1572/1041 1567/1033
+f 55/2886 1241/627 1575/1047
+f 1576/1046 1575/1047 1241/627
+f 411/2955 1572/1041 1574/1045
+f 1576/1046 1574/1045 1572/1041
+f 1231/615 1578/1049 54/2884
+f 1235/621 54/2884 1578/1049
+f 1574/1045 1578/1049 411/2955
+f 1577/1048 411/2955 1578/1049
+f 51/2883 1579/1051 1226/608
+f 1580/1050 1226/608 1579/1051
+f 411/2955 1577/1048 1568/1037
+f 1580/1050 1568/1037 1577/1048
+f 94/2957 1581/1052 1561/1056
+f 1584/1054 1561/1056 1581/1052
+f 1584/1054 1582/1055 1583/1058
+f 413/2958 1583/1058 1582/1055
+f 1586/1061 52/2877 1587/1059
+f 1210/585 1587/1059 52/2877
+f 1585/1060 413/2958 1587/1059
+f 1582/1055 1587/1059 413/2958
+f 1244/630 1589/1063 56/2881
+f 1220/599 56/2881 1589/1063
+f 413/2958 1585/1060 1588/1062
+f 1589/1063 1588/1062 1585/1060
+f 1575/1047 1591/1064 55/2886
+f 1239/625 55/2886 1591/1064
+f 413/2958 1588/1062 1590/1065
+f 1591/1064 1590/1065 1588/1062
+f 1571/1042 95/2956 1592/1067
+f 1565/1066 1592/1067 95/2956
+f 1590/1065 1592/1067 413/2958
+f 1583/1058 413/2958 1592/1067
+f 1593/1070 1596/1071 87/2959
+f 1522/1073 87/2959 1596/1071
+f 416/2960 1595/1074 1594/1069
+f 1596/1071 1594/1069 1595/1074
+f 1597/1077 1600/1078 73/2961
+f 1599/1079 73/2961 1600/1078
+f 416/2960 1594/1069 1598/1076
+f 1600/1078 1598/1076 1594/1069
+f 72/2932 1471/918 1602/1082
+f 1603/1081 1602/1082 1471/918
+f 416/2960 1598/1076 1601/1080
+f 1603/1081 1601/1080 1598/1076
+f 1605/1083 1467/913 1586/1084
+f 52/2931 1586/1084 1467/913
+f 416/2960 1601/1080 1604/1085
+f 1605/1083 1604/1085 1601/1080
+f 94/2962 1527/1087 1581/1089
+f 1606/1088 1581/1089 1527/1087
+f 1606/1088 1595/1074 1604/1085
+f 416/2960 1604/1085 1595/1074
+f 1611/1091 1610/1096 1607/1093
+f 66/2963 1607/1093 1610/1096
+f 420/2964 1609/1094 1608/1090
+f 1611/1091 1608/1090 1609/1094
+f 1614/1098 1613/1101 1535/1100
+f 90/2965 1535/1100 1613/1101
+f 420/2964 1608/1090 1612/1097
+f 1614/1098 1612/1097 1608/1090
+f 1615/1104 1617/1105 89/2966
+f 1529/1106 89/2966 1617/1105
+f 420/2964 1612/1097 1616/1103
+f 1617/1105 1616/1103 1612/1097
+f 39/2912 1360/781 1619/1109
+f 1620/1108 1619/1109 1360/781
+f 420/2964 1616/1103 1618/1107
+f 1620/1108 1618/1107 1616/1103
+f 1621/1110 1622/1111 65/2911
+f 1354/775 65/2911 1622/1111
+f 1618/1107 1622/1111 420/2964
+f 1609/1094 420/2964 1622/1111
+f 73/2961 1599/1079 1625/1114
+f 1626/1113 1625/1114 1599/1079
+f 1626/1113 1623/1112 1624/1116
+f 422/2967 1624/1116 1623/1112
+f 87/2959 1548/1117 1593/1070
+f 1628/1119 1593/1070 1548/1117
+f 422/2967 1623/1112 1627/1120
+f 1628/1119 1627/1120 1623/1112
+f 1630/1122 1543/1125 1613/1124
+f 90/2968 1613/1124 1543/1125
+f 422/2967 1627/1120 1629/1121
+f 1630/1122 1629/1121 1627/1120
+f 66/2969 1631/1127 1607/1128
+f 1632/1126 1607/1128 1631/1127
+f 1624/1116 422/2967 1632/1126
+f 1629/1121 1632/1126 422/2967
+f 1619/1109 1635/1130 39/2912
+f 1476/925 39/2912 1635/1130
+f 1634/1131 1635/1130 423/2970
+f 1633/1129 423/2970 1635/1130
+f 1637/1135 1615/1104 1504/1134
+f 89/2966 1504/1134 1615/1104
+f 423/2970 1633/1129 1636/1133
+f 1637/1135 1636/1133 1633/1129
+f 92/2971 1569/1139 1497/1140
+f 1639/1137 1497/1140 1569/1139
+f 423/2970 1636/1133 1638/1136
+f 1639/1137 1638/1136 1636/1133
+f 51/2875 1479/927 1579/1142
+f 1640/1141 1579/1142 1479/927
+f 423/2970 1638/1136 1634/1131
+f 1640/1141 1634/1131 1638/1136
+f 1506/963 88/2942 1644/1143
+f 1641/1144 1644/1143 88/2942
+f 1643/1147 1644/1143 426/2972
+f 1642/1145 426/2972 1644/1143
+f 1488/939 1647/1149 83/2936
+f 1646/1150 83/2936 1647/1149
+f 1642/1145 1647/1149 426/2972
+f 1645/1148 426/2972 1647/1149
+f 1648/1153 1650/1154 85/2934
+f 1481/930 85/2934 1650/1154
+f 1649/1152 426/2972 1650/1154
+f 1645/1148 1650/1154 426/2972
+f 93/2943 1511/967 1651/1156
+f 1652/1155 1651/1156 1511/967
+f 426/2972 1649/1152 1643/1147
+f 1652/1155 1643/1147 1649/1152
+f 91/2950 1653/1157 1537/999
+f 1656/1159 1537/999 1653/1157
+f 1656/1159 1654/1160 1655/1161
+f 428/2973 1655/1161 1654/1160
+f 1492/944 1659/1163 84/2937
+f 1658/1164 84/2937 1659/1163
+f 1654/1160 1659/1163 428/2973
+f 1657/1162 428/2973 1659/1163
+f 1646/1150 1661/1166 83/2936
+f 1486/938 83/2936 1661/1166
+f 1657/1162 1661/1166 428/2973
+f 1660/1165 428/2973 1661/1166
+f 1541/1004 1662/1167 88/2942
+f 1641/1144 88/2942 1662/1167
+f 1662/1167 1655/1161 1660/1165
+f 428/2973 1660/1165 1655/1161
+f 86/2945 1663/1169 1550/1016
+f 1666/1168 1550/1016 1663/1169
+f 1665/1172 1666/1168 430/2974
+f 1664/1170 430/2974 1666/1168
+f 1668/1175 82/2938 1669/1174
+f 1495/947 1669/1174 82/2938
+f 1664/1170 1669/1174 430/2974
+f 1667/1173 430/2974 1669/1174
+f 1658/1164 1671/1177 84/2937
+f 1490/942 84/2937 1671/1177
+f 1670/1176 430/2974 1671/1177
+f 1667/1173 1671/1177 430/2974
+f 91/2950 1553/1018 1653/1157
+f 1672/1178 1653/1157 1553/1018
+f 1672/1178 1665/1172 1670/1176
+f 430/2974 1670/1176 1665/1172
+f 93/2943 1651/1156 1513/968
+f 1675/1179 1513/968 1651/1156
+f 431/2975 1674/1181 1673/1180
+f 1675/1179 1673/1180 1674/1181
+f 1484/933 1677/1183 85/2934
+f 1648/1153 85/2934 1677/1183
+f 1673/1180 1677/1183 431/2975
+f 1676/1182 431/2975 1677/1183
+f 1668/1175 1679/1185 82/2938
+f 1494/946 82/2938 1679/1185
+f 1678/1184 431/2975 1679/1185
+f 1676/1182 1679/1185 431/2975
+f 1520/979 1680/1186 86/2945
+f 1663/1169 86/2945 1680/1186
+f 1674/1181 431/2975 1680/1186
+f 1678/1184 1680/1186 431/2975
+f 14/2976 763/1190 1683/1191
+f 1684/1188 1683/1191 763/1190
+f 1684/1188 1681/1187 1682/1193
+f 435/2977 1682/1193 1681/1187
+f 1687/1195 759/1198 1685/1197
+f 13/2978 1685/1197 759/1198
+f 435/2977 1681/1187 1686/1194
+f 1687/1195 1686/1194 1681/1187
+f 1688/1202 1691/1200 97/2979
+f 1690/1203 97/2979 1691/1200
+f 1689/1199 435/2977 1691/1200
+f 1686/1194 1691/1200 435/2977
+f 1692/1204 1694/1205 98/2980
+f 1693/1206 98/2980 1694/1205
+f 1682/1193 435/2977 1694/1205
+f 1689/1199 1694/1205 435/2977
+f 1685/1197 13/2978 1697/1210
+f 1311/1209 1697/1210 13/2978
+f 1696/1211 1697/1210 438/2981
+f 1695/1208 438/2981 1697/1210
+f 1700/1213 1306/1216 1698/1215
+f 74/2982 1698/1215 1306/1216
+f 438/2981 1695/1208 1699/1212
+f 1700/1213 1699/1212 1695/1208
+f 1701/1219 1704/1220 100/2983
+f 1703/1221 100/2983 1704/1220
+f 1699/1212 1704/1220 438/2981
+f 1702/1218 438/2981 1704/1220
+f 97/2979 1690/1203 1705/1223
+f 1706/1222 1705/1223 1690/1203
+f 438/2981 1702/1218 1696/1211
+f 1706/1222 1696/1211 1702/1218
+f 67/2984 1290/1227 1709/1228
+f 1710/1225 1709/1228 1290/1227
+f 442/2985 1708/1230 1707/1224
+f 1710/1225 1707/1224 1708/1230
+f 1711/1233 1713/1234 10/2986
+f 1299/1235 10/2986 1713/1234
+f 1707/1224 1713/1234 442/2985
+f 1712/1232 442/2985 1713/1234
+f 96/2987 1714/1239 1716/1240
+f 1717/1237 1716/1240 1714/1239
+f 442/2985 1712/1232 1715/1236
+f 1717/1237 1715/1236 1712/1232
+f 1718/1241 1720/1242 99/2988
+f 1719/1243 99/2988 1720/1242
+f 1715/1236 1720/1242 442/2985
+f 1708/1230 442/2985 1720/1242
+f 10/2986 746/1245 1711/1233
+f 1723/1244 1711/1233 746/1245
+f 1721/1246 444/2989 1723/1244
+f 1722/1248 1723/1244 444/2989
+f 740/1251 14/2976 1725/1249
+f 1683/1191 1725/1249 14/2976
+f 1725/1249 1724/1250 1721/1246
+f 444/2989 1721/1246 1724/1250
+f 1692/1204 98/2980 1728/1252
+f 1726/1253 1728/1252 98/2980
+f 1724/1250 1728/1252 444/2989
+f 1727/1254 444/2989 1728/1252
+f 1729/1257 96/2987 1730/1256
+f 1716/1240 1730/1256 96/2987
+f 444/2989 1727/1254 1722/1248
+f 1730/1256 1722/1248 1727/1254
+f 1734/1261 1701/1219 1731/1260
+f 100/2983 1731/1260 1701/1219
+f 447/2990 1733/1262 1732/1259
+f 1734/1261 1732/1259 1733/1262
+f 75/2991 1735/1265 1737/1267
+f 1738/1266 1737/1267 1735/1265
+f 1732/1259 1738/1266 447/2990
+f 1736/1264 447/2990 1738/1266
+f 98/2980 1693/1206 1740/1270
+f 1741/1269 1740/1270 1693/1206
+f 447/2990 1736/1264 1739/1268
+f 1741/1269 1739/1268 1736/1264
+f 1688/1202 97/2979 1742/1271
+f 1705/1223 1742/1271 97/2979
+f 1739/1268 1742/1271 447/2990
+f 1733/1262 447/2990 1742/1271
+f 75/2991 1743/1273 1735/1265
+f 1746/1272 1735/1265 1743/1273
+f 1745/1276 1746/1272 449/2992
+f 1744/1274 449/2992 1746/1272
+f 1749/1278 1748/1279 1719/1243
+f 99/2988 1719/1243 1748/1279
+f 449/2992 1744/1274 1747/1277
+f 1749/1278 1747/1277 1744/1274
+f 1729/1257 1751/1281 96/2987
+f 1714/1239 96/2987 1751/1281
+f 1747/1277 1751/1281 449/2992
+f 1750/1280 449/2992 1751/1281
+f 98/2980 1740/1270 1726/1253
+f 1752/1282 1726/1253 1740/1270
+f 449/2992 1750/1280 1745/1276
+f 1752/1282 1745/1276 1750/1280
+f 1757/1284 1756/1287 1753/1286
+f 101/2993 1753/1286 1756/1287
+f 1757/1284 1754/1283 1755/1289
+f 453/2994 1755/1289 1754/1283
+f 1758/1292 1761/1293 102/2995
+f 1760/1294 102/2995 1761/1293
+f 1754/1283 1761/1293 453/2994
+f 1759/1291 453/2994 1761/1293
+f 1762/1295 1764/1296 103/2996
+f 1763/1297 103/2996 1764/1296
+f 1759/1291 1764/1296 453/2994
+f 1755/1289 453/2994 1764/1296
+f 1768/1299 1767/1302 1756/1287
+f 101/2993 1756/1287 1767/1302
+f 456/2997 1766/1300 1765/1298
+f 1768/1299 1765/1298 1766/1300
+f 103/2996 1769/1306 1762/1295
+f 1771/1304 1762/1295 1769/1306
+f 456/2997 1765/1298 1770/1303
+f 1771/1304 1770/1303 1765/1298
+f 806/91 1774/1308 19/2798
+f 1773/1309 19/2798 1774/1308
+f 1770/1303 1774/1308 456/2997
+f 1772/1307 456/2997 1774/1308
+f 27/2797 881/180 800/84
+f 1776/1311 800/84 881/180
+f 456/2997 1772/1307 1775/1310
+f 1776/1311 1775/1310 1772/1307
+f 875/174 8/2810 1778/1312
+f 1777/1313 1778/1312 8/2810
+f 456/2997 1775/1310 1766/1300
+f 1778/1312 1766/1300 1775/1310
+f 1767/1302 1781/1315 101/2993
+f 1753/1286 101/2993 1781/1315
+f 458/2998 1780/1316 1779/1314
+f 1781/1315 1779/1314 1780/1316
+f 8/2810 989/308 1777/1313
+f 1783/1318 1777/1313 989/308
+f 458/2998 1779/1314 1782/1317
+f 1783/1318 1782/1317 1779/1314
+f 1785/1320 986/305 942/252
+f 4/2823 942/252 986/305
+f 458/2998 1782/1317 1784/1319
+f 1785/1320 1784/1319 1782/1317
+f 26/2827 1786/1321 952/264
+f 1788/1323 952/264 1786/1321
+f 458/2998 1784/1319 1787/1324
+f 1788/1323 1787/1324 1784/1319
+f 102/2995 1760/1294 1789/1326
+f 1790/1325 1789/1326 1760/1294
+f 458/2998 1787/1324 1780/1316
+f 1790/1325 1780/1316 1787/1324
+f 1795/1328 1794/1333 1791/1330
+f 104/2999 1791/1330 1794/1333
+f 461/3000 1793/1331 1792/1327
+f 1795/1328 1792/1327 1793/1331
+f 1789/1326 1798/1335 102/2995
+f 1797/1336 102/2995 1798/1335
+f 1792/1327 1798/1335 461/3000
+f 1796/1334 461/3000 1798/1335
+f 1786/1321 26/2827 1800/1337
+f 1284/678 1800/1337 26/2827
+f 1800/1337 1799/1338 1796/1334
+f 461/3000 1796/1334 1799/1338
+f 1279/674 81/2894 1802/1339
+f 1801/1340 1802/1339 81/2894
+f 1799/1338 1802/1339 461/3000
+f 1793/1331 461/3000 1802/1339
+f 1805/1344 79/2915 1806/1342
+f 1380/804 1806/1342 79/2915
+f 1804/1345 1806/1342 464/3001
+f 1803/1341 464/3001 1806/1342
+f 19/2798 1773/1309 1387/812
+f 1808/1347 1387/812 1773/1309
+f 464/3001 1803/1341 1807/1346
+f 1808/1347 1807/1346 1803/1341
+f 1809/1350 1811/1351 103/2996
+f 1769/1306 103/2996 1811/1351
+f 1807/1346 1811/1351 464/3001
+f 1810/1349 464/3001 1811/1351
+f 1814/1352 1813/1354 1812/1353
+f 105/3002 1812/1353 1813/1354
+f 464/3001 1810/1349 1804/1345
+f 1814/1352 1804/1345 1810/1349
+f 1763/1297 1818/1356 103/2996
+f 1817/1358 103/2996 1818/1356
+f 1815/1355 468/3003 1818/1356
+f 1816/1359 1818/1356 468/3003
+f 102/2995 1819/1361 1758/1292
+f 1821/1360 1758/1292 1819/1361
+f 1821/1360 1820/1362 1815/1355
+f 468/3003 1815/1355 1820/1362
+f 1825/1365 1824/1368 1822/1367
+f 106/3004 1822/1367 1824/1368
+f 468/3003 1820/1362 1823/1364
+f 1825/1365 1823/1364 1820/1362
+f 1828/1370 1827/1371 1826/1369
+f 107/3005 1826/1369 1827/1371
+f 1828/1370 1816/1359 1823/1364
+f 468/3003 1823/1364 1816/1359
+f 102/2995 1797/1336 1819/1361
+f 1831/1373 1819/1361 1797/1336
+f 471/3006 1830/1374 1829/1372
+f 1831/1373 1829/1372 1830/1374
+f 1832/1377 1834/1378 104/2999
+f 1791/1330 104/2999 1834/1378
+f 1829/1372 1834/1378 471/3006
+f 1833/1376 471/3006 1834/1378
+f 108/3007 1835/1382 1837/1383
+f 1838/1380 1837/1383 1835/1382
+f 471/3006 1833/1376 1836/1379
+f 1838/1380 1836/1379 1833/1376
+f 1824/1368 1840/1384 106/3004
+f 1839/1385 106/3004 1840/1384
+f 1836/1379 1840/1384 471/3006
+f 1830/1374 471/3006 1840/1384
+f 1843/1389 105/3002 1844/1386
+f 1813/1354 1844/1386 105/3002
+f 1842/1390 1844/1386 474/3008
+f 1841/1387 474/3008 1844/1386
+f 1846/1392 1809/1350 1817/1358
+f 103/2996 1817/1358 1809/1350
+f 474/3008 1841/1387 1845/1391
+f 1846/1392 1845/1391 1841/1387
+f 1847/1395 1849/1396 107/3005
+f 1826/1369 107/3005 1849/1396
+f 1848/1394 474/3008 1849/1396
+f 1845/1391 1849/1396 474/3008
+f 109/3009 1850/1398 1851/1399
+f 1852/1397 1851/1399 1850/1398
+f 1852/1397 1842/1390 1848/1394
+f 474/3008 1848/1394 1842/1390
+f 1855/1401 1822/1367 1839/1385
+f 106/3004 1839/1385 1822/1367
+f 476/3010 1854/1402 1853/1400
+f 1855/1401 1853/1400 1854/1402
+f 1856/1404 1858/1403 108/3007
+f 1835/1382 108/3007 1858/1403
+f 1853/1400 1858/1403 476/3010
+f 1857/1405 476/3010 1858/1403
+f 1851/1399 1861/1408 109/3009
+f 1860/1409 109/3009 1861/1408
+f 1857/1405 1861/1408 476/3010
+f 1859/1407 476/3010 1861/1408
+f 1862/1410 1847/1395 1827/1371
+f 107/3005 1827/1371 1847/1395
+f 476/3010 1859/1407 1854/1402
+f 1862/1410 1854/1402 1859/1407
+f 1863/1413 1867/1414 115/3011
+f 1866/1416 115/3011 1867/1414
+f 481/3012 1865/1415 1864/1412
+f 1867/1414 1864/1412 1865/1415
+f 125/3013 1868/1420 1870/1422
+f 1871/1421 1870/1422 1868/1420
+f 1869/1419 481/3012 1871/1421
+f 1864/1412 1871/1421 481/3012
+f 1875/1425 1874/1427 1872/1423
+f 119/3014 1872/1423 1874/1427
+f 481/3012 1869/1419 1873/1426
+f 1875/1425 1873/1426 1869/1419
+f 1878/1428 1877/1430 1876/1429
+f 123/3015 1876/1429 1877/1430
+f 1873/1426 1878/1428 481/3012
+f 1865/1415 481/3012 1878/1428
+f 1882/1432 1881/1435 1866/1416
+f 115/3011 1866/1416 1881/1435
+f 485/3016 1880/1433 1879/1431
+f 1882/1432 1879/1431 1880/1433
+f 1883/1438 1885/1439 123/3015
+f 1876/1429 123/3015 1885/1439
+f 1884/1437 485/3016 1885/1439
+f 1879/1431 1885/1439 485/3016
+f 1889/1441 1888/1444 1886/1443
+f 122/3017 1886/1443 1888/1444
+f 485/3016 1884/1437 1887/1440
+f 1889/1441 1887/1440 1884/1437
+f 127/3018 1890/1445 1891/1447
+f 1892/1446 1891/1447 1890/1445
+f 1887/1440 1892/1446 485/3016
+f 1880/1433 485/3016 1892/1446
+f 110/3019 1893/1451 1896/1453
+f 1897/1449 1896/1453 1893/1451
+f 1895/1452 1897/1449 490/3020
+f 1894/1448 490/3020 1897/1449
+f 133/3021 1898/1458 1900/1459
+f 1901/1456 1900/1459 1898/1458
+f 490/3020 1894/1448 1899/1455
+f 1901/1456 1899/1455 1894/1448
+f 1902/1462 1905/1463 120/3022
+f 1904/1464 120/3022 1905/1463
+f 1899/1455 1905/1463 490/3020
+f 1903/1461 490/3020 1905/1463
+f 121/3023 1906/1466 1907/1467
+f 1908/1465 1907/1467 1906/1466
+f 490/3020 1903/1461 1895/1452
+f 1908/1465 1895/1452 1903/1461
+f 116/3024 1909/1471 1912/1472
+f 1913/1469 1912/1472 1909/1471
+f 494/3025 1911/1474 1910/1468
+f 1913/1469 1910/1468 1911/1474
+f 1914/1477 1917/1478 132/3026
+f 1916/1479 132/3026 1917/1478
+f 1910/1468 1917/1478 494/3025
+f 1915/1476 494/3025 1917/1478
+f 120/3022 1904/1464 1919/1482
+f 1920/1481 1919/1482 1904/1464
+f 494/3025 1915/1476 1918/1480
+f 1920/1481 1918/1480 1915/1476
+f 1921/1483 1922/1484 133/3021
+f 1898/1458 133/3021 1922/1484
+f 1918/1480 1922/1484 494/3025
+f 1911/1474 494/3025 1922/1484
+f 111/3027 1923/1487 1926/1489
+f 1927/1488 1926/1489 1923/1487
+f 499/3028 1925/1491 1924/1486
+f 1927/1488 1924/1486 1925/1491
+f 129/3029 1928/1495 1930/1496
+f 1931/1493 1930/1496 1928/1495
+f 1931/1493 1929/1492 1924/1486
+f 499/3028 1924/1486 1929/1492
+f 1935/1498 1934/1501 1932/1500
+f 128/3030 1932/1500 1934/1501
+f 499/3028 1929/1492 1933/1497
+f 1935/1498 1933/1497 1929/1492
+f 1936/1502 1938/1503 136/3031
+f 1937/1504 136/3031 1938/1503
+f 1933/1497 1938/1503 499/3028
+f 1925/1491 499/3028 1938/1503
+f 1939/1507 1942/1508 110/3019
+f 1893/1451 110/3019 1942/1508
+f 1941/1509 1942/1508 503/3032
+f 1940/1506 503/3032 1942/1508
+f 125/3013 1943/1513 1945/1514
+f 1946/1511 1945/1514 1943/1513
+f 503/3032 1940/1506 1944/1510
+f 1946/1511 1944/1510 1940/1506
+f 1947/1517 1950/1518 124/3033
+f 1949/1519 124/3033 1950/1518
+f 1944/1510 1950/1518 503/3032
+f 1948/1516 503/3032 1950/1518
+f 133/3021 1900/1459 1951/1521
+f 1952/1520 1951/1521 1900/1459
+f 503/3032 1948/1516 1941/1509
+f 1952/1520 1941/1509 1948/1516
+f 1956/1523 1863/1413 1953/1525
+f 115/3011 1953/1525 1863/1413
+f 506/3034 1955/1526 1954/1522
+f 1956/1523 1954/1522 1955/1526
+f 1957/1529 1960/1530 141/3035
+f 1959/1531 141/3035 1960/1530
+f 1954/1522 1960/1530 506/3034
+f 1958/1528 506/3034 1960/1530
+f 124/3033 1949/1519 1962/1534
+f 1963/1533 1962/1534 1949/1519
+f 506/3034 1958/1528 1961/1532
+f 1963/1533 1961/1532 1958/1528
+f 125/3013 1870/1422 1943/1513
+f 1964/1535 1943/1513 1870/1422
+f 1961/1532 1964/1535 506/3034
+f 1955/1526 506/3034 1964/1535
+f 1969/1537 1968/1541 1965/1539
+f 114/3036 1965/1539 1968/1541
+f 1967/1542 1969/1537 510/3037
+f 1966/1536 510/3037 1969/1537
+f 1970/1546 1973/1544 139/3038
+f 1972/1547 139/3038 1973/1544
+f 510/3037 1966/1536 1971/1543
+f 1973/1544 1971/1543 1966/1536
+f 1962/1534 1976/1549 124/3033
+f 1975/1550 124/3033 1976/1549
+f 1974/1548 510/3037 1976/1549
+f 1971/1543 1976/1549 510/3037
+f 141/3035 1977/1551 1957/1529
+f 1978/1552 1957/1529 1977/1551
+f 1978/1552 1967/1542 1974/1548
+f 510/3037 1974/1548 1967/1542
+f 1912/1472 1982/1554 116/3024
+f 1981/1556 116/3024 1982/1554
+f 1979/1553 512/3039 1982/1554
+f 1980/1557 1982/1554 512/3039
+f 133/3021 1951/1521 1921/1483
+f 1984/1559 1921/1483 1951/1521
+f 1983/1558 512/3039 1984/1559
+f 1979/1553 1984/1559 512/3039
+f 124/3033 1975/1550 1947/1517
+f 1986/1560 1947/1517 1975/1550
+f 1986/1560 1985/1561 1983/1558
+f 512/3039 1983/1558 1985/1561
+f 1988/1562 1970/1546 1987/1563
+f 139/3038 1987/1563 1970/1546
+f 1985/1561 1988/1562 512/3039
+f 1980/1557 512/3039 1988/1562
+f 1926/1489 1992/1565 111/3027
+f 1991/1567 111/3027 1992/1565
+f 1990/1568 1992/1565 516/3040
+f 1989/1564 516/3040 1992/1565
+f 136/3031 1993/1572 1936/1502
+f 1995/1570 1936/1502 1993/1572
+f 516/3040 1989/1564 1994/1569
+f 1995/1570 1994/1569 1989/1564
+f 1996/1575 1999/1576 126/3041
+f 1998/1577 126/3041 1999/1576
+f 1994/1569 1999/1576 516/3040
+f 1997/1574 516/3040 1999/1576
+f 127/3018 2000/1579 2001/1580
+f 2002/1578 2001/1580 2000/1579
+f 516/3040 1997/1574 1990/1568
+f 2002/1578 1990/1568 1997/1574
+f 2007/1582 2006/1585 2003/1584
+f 117/3042 2003/1584 2006/1585
+f 520/3043 2005/1587 2004/1581
+f 2007/1582 2004/1581 2005/1587
+f 2008/1590 2011/1591 138/3044
+f 2010/1592 138/3044 2011/1591
+f 2009/1589 520/3043 2011/1591
+f 2004/1581 2011/1591 520/3043
+f 126/3041 1998/1577 2013/1595
+f 2014/1594 2013/1595 1998/1577
+f 520/3043 2009/1589 2012/1593
+f 2014/1594 2012/1593 2009/1589
+f 2015/1596 2016/1597 136/3031
+f 1993/1572 136/3031 2016/1597
+f 2012/1593 2016/1597 520/3043
+f 2005/1587 520/3043 2016/1597
+f 1968/1541 2020/1598 114/3036
+f 2019/1601 114/3036 2020/1598
+f 2017/1599 523/3045 2020/1598
+f 2018/1600 2020/1598 523/3045
+f 2021/1603 2023/1605 141/3035
+f 1977/1551 141/3035 2023/1605
+f 523/3045 2017/1599 2022/1606
+f 2023/1605 2022/1606 2017/1599
+f 2013/1595 2026/1608 126/3041
+f 2025/1609 126/3041 2026/1608
+f 2022/1606 2026/1608 523/3045
+f 2024/1607 523/3045 2026/1608
+f 138/3044 2027/1611 2008/1590
+f 2028/1610 2008/1590 2027/1611
+f 523/3045 2024/1607 2018/1600
+f 2028/1610 2018/1600 2024/1607
+f 1881/1435 2031/1613 115/3011
+f 1953/1525 115/3011 2031/1613
+f 524/3046 2030/1614 2029/1612
+f 2031/1613 2029/1612 2030/1614
+f 127/3018 2001/1580 1890/1445
+f 2033/1616 1890/1445 2001/1580
+f 2032/1615 524/3046 2033/1616
+f 2029/1612 2033/1616 524/3046
+f 126/3041 2025/1609 1996/1575
+f 2035/1618 1996/1575 2025/1609
+f 524/3046 2032/1615 2034/1617
+f 2035/1618 2034/1617 2032/1615
+f 141/3035 1959/1531 2021/1603
+f 2036/1619 2021/1603 1959/1531
+f 2034/1617 2036/1619 524/3046
+f 2030/1614 524/3046 2036/1619
+f 2041/1621 2040/1624 2037/1623
+f 112/3047 2037/1623 2040/1624
+f 529/3048 2039/1626 2038/1620
+f 2041/1621 2038/1620 2039/1626
+f 140/3049 2042/1629 2044/1631
+f 2045/1630 2044/1631 2042/1629
+f 2043/1628 529/3048 2045/1630
+f 2038/1620 2045/1630 529/3048
+f 130/3050 2046/1635 2048/1636
+f 2049/1633 2048/1636 2046/1635
+f 2049/1633 2047/1632 2043/1628
+f 529/3048 2043/1628 2047/1632
+f 131/3051 2050/1638 2051/1639
+f 2052/1637 2051/1639 2050/1638
+f 2047/1632 2052/1637 529/3048
+f 2039/1626 529/3048 2052/1637
+f 2056/1644 118/3052 2057/1640
+f 2053/1641 2057/1640 118/3052
+f 533/3053 2055/1646 2054/1642
+f 2057/1640 2054/1642 2055/1646
+f 2061/1650 2060/1651 2058/1649
+f 142/3054 2058/1649 2060/1651
+f 2054/1642 2061/1650 533/3053
+f 2059/1648 533/3053 2061/1650
+f 130/3050 2048/1636 2063/1654
+f 2064/1653 2063/1654 2048/1636
+f 533/3053 2059/1648 2062/1652
+f 2064/1653 2062/1652 2059/1648
+f 2065/1655 2066/1656 140/3049
+f 2042/1629 140/3049 2066/1656
+f 2062/1652 2066/1656 533/3053
+f 2055/1646 533/3053 2066/1656
+f 2067/1659 2071/1660 113/3055
+f 2070/1662 113/3055 2071/1660
+f 2069/1663 2071/1660 538/3056
+f 2068/1658 538/3056 2071/1660
+f 135/3057 2072/1664 2074/1668
+f 2075/1666 2074/1668 2072/1664
+f 2075/1666 2073/1667 2068/1658
+f 538/3056 2068/1658 2073/1667
+f 2076/1671 2079/1672 134/3058
+f 2078/1673 134/3058 2079/1672
+f 2073/1667 2079/1672 538/3056
+f 2077/1670 538/3056 2079/1672
+f 137/3059 2080/1674 2081/1676
+f 2082/1675 2081/1676 2080/1674
+f 538/3056 2077/1670 2069/1663
+f 2082/1675 2069/1663 2077/1670
+f 118/3052 2083/1680 2053/1641
+f 2086/1678 2053/1641 2083/1680
+f 541/3060 2085/1681 2084/1677
+f 2086/1678 2084/1677 2085/1681
+f 2081/1676 2089/1683 137/3059
+f 2088/1684 137/3059 2089/1683
+f 2084/1677 2089/1683 541/3060
+f 2087/1682 541/3060 2089/1683
+f 134/3058 2090/1688 2076/1671
+f 2092/1686 2076/1671 2090/1688
+f 541/3060 2087/1682 2091/1685
+f 2092/1686 2091/1685 2087/1682
+f 2060/1651 2094/1689 142/3054
+f 2093/1690 142/3054 2094/1689
+f 2091/1685 2094/1689 541/3060
+f 2085/1681 541/3060 2094/1689
+f 2095/1693 2098/1694 114/3036
+f 1965/1539 114/3036 2098/1694
+f 2097/1695 2098/1694 544/3061
+f 2096/1692 544/3061 2098/1694
+f 118/3052 2056/1644 2100/1698
+f 2101/1697 2100/1698 2056/1644
+f 544/3061 2096/1692 2099/1696
+f 2101/1697 2099/1696 2096/1692
+f 2102/1701 2104/1702 140/3049
+f 2065/1655 140/3049 2104/1702
+f 2099/1696 2104/1702 544/3061
+f 2103/1700 544/3061 2104/1702
+f 139/3038 1972/1547 2105/1704
+f 2106/1703 2105/1704 1972/1547
+f 544/3061 2103/1700 2097/1695
+f 2106/1703 2097/1695 2103/1700
+f 138/3044 2010/1592 2109/1707
+f 2110/1705 2109/1707 2010/1592
+f 2110/1705 2107/1706 2108/1709
+f 547/3062 2108/1709 2107/1706
+f 2111/1711 2113/1710 117/3042
+f 2003/1584 117/3042 2113/1710
+f 2107/1706 2113/1710 547/3062
+f 2112/1712 547/3062 2113/1710
+f 2070/1662 2116/1715 113/3055
+f 2115/1716 113/3055 2116/1715
+f 547/3062 2112/1712 2114/1714
+f 2116/1715 2114/1714 2112/1712
+f 2117/1717 2118/1718 137/3059
+f 2080/1674 137/3059 2118/1718
+f 2108/1709 547/3062 2118/1718
+f 2114/1714 2118/1718 547/3062
+f 132/3026 1916/1479 2121/1721
+f 2122/1720 2121/1721 1916/1479
+f 2122/1720 2119/1719 2120/1723
+f 550/3063 2120/1723 2119/1719
+f 2123/1725 2125/1724 116/3024
+f 1909/1471 116/3024 2125/1724
+f 2119/1719 2125/1724 550/3063
+f 2124/1726 550/3063 2125/1724
+f 2128/1729 2127/1730 2040/1624
+f 112/3047 2040/1624 2127/1730
+f 550/3063 2124/1726 2126/1728
+f 2128/1729 2126/1728 2124/1726
+f 131/3051 2129/1731 2050/1638
+f 2130/1732 2050/1638 2129/1731
+f 2120/1723 550/3063 2130/1732
+f 2126/1728 2130/1732 550/3063
+f 137/3059 2088/1684 2117/1717
+f 2133/1734 2117/1717 2088/1684
+f 551/3064 2132/1735 2131/1733
+f 2133/1734 2131/1733 2132/1735
+f 2100/1698 2135/1737 118/3052
+f 2083/1680 118/3052 2135/1737
+f 2131/1733 2135/1737 551/3064
+f 2134/1736 551/3064 2135/1737
+f 114/3036 2019/1601 2095/1693
+f 2137/1739 2095/1693 2019/1601
+f 551/3064 2134/1736 2136/1738
+f 2137/1739 2136/1738 2134/1736
+f 2109/1707 2138/1740 138/3044
+f 2027/1611 138/3044 2138/1740
+f 2136/1738 2138/1740 551/3064
+f 2132/1735 551/3064 2138/1740
+f 2105/1704 2141/1742 139/3038
+f 1987/1563 139/3038 2141/1742
+f 2140/1743 2141/1742 552/3065
+f 2139/1741 552/3065 2141/1742
+f 140/3049 2044/1631 2102/1701
+f 2143/1745 2102/1701 2044/1631
+f 552/3065 2139/1741 2142/1744
+f 2143/1745 2142/1744 2139/1741
+f 2127/1730 2145/1747 112/3047
+f 2037/1623 112/3047 2145/1747
+f 2142/1744 2145/1747 552/3065
+f 2144/1746 552/3065 2145/1747
+f 116/3024 1981/1556 2123/1725
+f 2146/1748 2123/1725 1981/1556
+f 552/3065 2144/1746 2140/1743
+f 2146/1748 2140/1743 2144/1746
+f 132/3026 2121/1721 2149/1751
+f 2150/1749 2149/1751 2121/1721
+f 2148/1753 2150/1749 556/3066
+f 2147/1750 556/3066 2150/1749
+f 2153/1755 2129/1731 2151/1757
+f 131/3051 2151/1757 2129/1731
+f 556/3066 2147/1750 2152/1754
+f 2153/1755 2152/1754 2147/1750
+f 2154/1760 2157/1761 143/3067
+f 2156/1762 143/3067 2157/1761
+f 2155/1759 556/3066 2157/1761
+f 2152/1754 2157/1761 556/3066
+f 144/3068 2158/1763 2159/1765
+f 2160/1764 2159/1765 2158/1763
+f 2160/1764 2148/1753 2155/1759
+f 556/3066 2155/1759 2148/1753
+f 2159/1767 2164/1766 144/3069
+f 2163/1771 144/3069 2164/1766
+f 2162/1772 2164/1766 560/3070
+f 2161/1768 560/3070 2164/1766
+f 2167/1774 2154/1777 2165/1776
+f 143/3071 2165/1776 2154/1777
+f 560/3070 2161/1768 2166/1773
+f 2167/1774 2166/1773 2161/1768
+f 2168/1780 2171/1781 145/3072
+f 2170/1782 145/3072 2171/1781
+f 2169/1779 560/3070 2171/1781
+f 2166/1773 2171/1781 560/3070
+f 146/3073 2172/1784 2173/1785
+f 2174/1783 2173/1785 2172/1784
+f 560/3070 2169/1779 2162/1772
+f 2174/1783 2162/1772 2169/1779
+f 144/3069 2163/1771 2177/1788
+f 2178/1787 2177/1788 2163/1771
+f 564/3074 2176/1790 2175/1786
+f 2178/1787 2175/1786 2176/1790
+f 2179/1793 2181/1794 146/3073
+f 2172/1784 146/3073 2181/1794
+f 2175/1786 2181/1794 564/3074
+f 2180/1792 564/3074 2181/1794
+f 150/3075 2182/1798 2184/1799
+f 2185/1796 2184/1799 2182/1798
+f 564/3074 2180/1792 2183/1795
+f 2185/1796 2183/1795 2180/1792
+f 2186/1800 2188/1801 149/3076
+f 2187/1802 149/3076 2188/1801
+f 2183/1795 2188/1801 564/3074
+f 2176/1790 564/3074 2188/1801
+f 146/3073 2189/1806 2179/1793
+f 2192/1804 2179/1793 2189/1806
+f 568/3077 2191/1807 2190/1803
+f 2192/1804 2190/1803 2191/1807
+f 147/3078 2193/1811 2195/1812
+f 2196/1809 2195/1812 2193/1811
+f 2190/1803 2196/1809 568/3077
+f 2194/1808 568/3077 2196/1809
+f 2200/1814 2199/1817 2197/1816
+f 151/3079 2197/1816 2199/1817
+f 568/3077 2194/1808 2198/1813
+f 2200/1814 2198/1813 2194/1808
+f 2184/1799 2202/1818 150/3075
+f 2201/1819 150/3075 2202/1818
+f 2198/1813 2202/1818 568/3077
+f 2191/1807 568/3077 2202/1818
+f 2207/1821 2206/1826 2203/1823
+f 152/3080 2203/1823 2206/1826
+f 573/3081 2205/1824 2204/1820
+f 2207/1821 2204/1820 2205/1824
+f 2208/1830 2211/1828 155/3082
+f 2210/1831 155/3082 2211/1828
+f 573/3081 2204/1820 2209/1827
+f 2211/1828 2209/1827 2204/1820
+f 2212/1834 2215/1835 154/3083
+f 2214/1836 154/3083 2215/1835
+f 573/3081 2209/1827 2213/1833
+f 2215/1835 2213/1833 2209/1827
+f 2216/1837 2218/1838 157/3084
+f 2217/1839 157/3084 2218/1838
+f 573/3081 2213/1833 2205/1824
+f 2218/1838 2205/1824 2213/1833
+f 2223/1841 2222/1846 2219/1843
+f 153/3085 2219/1843 2222/1846
+f 578/3086 2221/1844 2220/1840
+f 2223/1841 2220/1840 2221/1844
+f 2224/1850 2227/1848 158/3087
+f 2226/1851 158/3087 2227/1848
+f 578/3086 2220/1840 2225/1847
+f 2227/1848 2225/1847 2220/1840
+f 2228/1854 2231/1855 156/3088
+f 2230/1856 156/3088 2231/1855
+f 2225/1847 2231/1855 578/3086
+f 2229/1853 578/3086 2231/1855
+f 2232/1857 2234/1858 159/3089
+f 2233/1859 159/3089 2234/1858
+f 578/3086 2229/1853 2221/1844
+f 2234/1858 2221/1844 2229/1853
+f 127/3090 2235/1862 2000/1865
+f 2238/1863 2000/1865 2235/1862
+f 2237/1866 2238/1863 581/3091
+f 2236/1861 581/3091 2238/1863
+f 2226/1851 2241/1868 158/3087
+f 2240/1869 158/3087 2241/1868
+f 581/3091 2236/1861 2239/1867
+f 2241/1868 2239/1867 2236/1861
+f 2242/1872 2244/1873 153/3085
+f 2219/1843 153/3085 2244/1873
+f 2239/1867 2244/1873 581/3091
+f 2243/1871 581/3091 2244/1873
+f 2246/1874 2245/1876 1991/1875
+f 111/3092 1991/1875 2245/1876
+f 581/3091 2243/1871 2237/1866
+f 2246/1874 2237/1866 2243/1871
+f 129/3093 2247/1880 2250/1881
+f 2251/1878 2250/1881 2247/1880
+f 585/3094 2249/1883 2248/1877
+f 2251/1878 2248/1877 2249/1883
+f 2233/1859 2254/1885 159/3089
+f 2253/1886 159/3089 2254/1885
+f 2248/1877 2254/1885 585/3094
+f 2252/1884 585/3094 2254/1885
+f 2255/1889 2257/1890 156/3088
+f 2228/1854 156/3088 2257/1890
+f 585/3094 2252/1884 2256/1888
+f 2257/1890 2256/1888 2252/1884
+f 122/3095 2258/1891 2259/1893
+f 2260/1892 2259/1893 2258/1891
+f 2256/1888 2260/1892 585/3094
+f 2249/1883 585/3094 2260/1892
+f 127/3090 1891/1897 2235/1862
+f 2263/1895 2235/1862 1891/1897
+f 586/3096 2262/1898 2261/1894
+f 2263/1895 2261/1894 2262/1898
+f 122/3095 2259/1893 1886/1901
+f 2265/1900 1886/1901 2259/1893
+f 2261/1894 2265/1900 586/3096
+f 2264/1899 586/3096 2265/1900
+f 2230/1856 2267/1903 156/3088
+f 2255/1889 156/3088 2267/1903
+f 586/3096 2264/1899 2266/1902
+f 2267/1903 2266/1902 2264/1899
+f 2240/1869 2268/1904 158/3087
+f 2224/1850 158/3087 2268/1904
+f 2266/1902 2268/1904 586/3096
+f 2262/1898 586/3096 2268/1904
+f 129/3093 1930/1906 2247/1880
+f 2271/1905 2247/1880 1930/1906
+f 2269/1907 587/3097 2271/1905
+f 2270/1909 2271/1905 587/3097
+f 2273/1911 1923/1912 2245/1876
+f 111/3092 2245/1876 1923/1912
+f 2273/1911 2272/1910 2269/1907
+f 587/3097 2269/1907 2272/1910
+f 2222/1846 2275/1914 153/3085
+f 2242/1872 153/3085 2275/1914
+f 2272/1910 2275/1914 587/3097
+f 2274/1913 587/3097 2275/1914
+f 2253/1886 2276/1915 159/3089
+f 2232/1857 159/3089 2276/1915
+f 587/3097 2274/1913 2270/1909
+f 2276/1915 2270/1909 2274/1913
+f 125/3098 1945/1918 2279/1921
+f 2280/1919 2279/1921 1945/1918
+f 2278/1922 2280/1919 590/3099
+f 2277/1917 590/3099 2280/1919
+f 2283/1924 1939/1927 2281/1926
+f 110/3100 2281/1926 1939/1927
+f 590/3099 2277/1917 2282/1923
+f 2283/1924 2282/1923 2277/1917
+f 2206/1826 2286/1929 152/3080
+f 2285/1930 152/3080 2286/1929
+f 2282/1923 2286/1929 590/3099
+f 2284/1928 590/3099 2286/1929
+f 2287/1932 2288/1931 157/3084
+f 2216/1837 157/3084 2288/1931
+f 590/3099 2284/1928 2278/1922
+f 2288/1931 2278/1922 2284/1928
+f 2289/1934 2292/1933 121/3101
+f 1906/1938 121/3101 2292/1933
+f 2291/1939 2292/1933 592/3102
+f 2290/1935 592/3102 2292/1933
+f 2210/1831 2295/1941 155/3082
+f 2294/1942 155/3082 2295/1941
+f 592/3102 2290/1935 2293/1940
+f 2295/1941 2293/1940 2290/1935
+f 2285/1930 2297/1944 152/3080
+f 2203/1823 152/3080 2297/1944
+f 2293/1940 2297/1944 592/3102
+f 2296/1943 592/3102 2297/1944
+f 2281/1926 110/3100 2298/1946
+f 1896/1945 2298/1946 110/3100
+f 2298/1946 2291/1939 2296/1943
+f 592/3102 2296/1943 2291/1939
+f 1874/1949 2302/1950 119/3103
+f 2301/1952 119/3103 2302/1950
+f 2300/1953 2302/1950 594/3104
+f 2299/1948 594/3104 2302/1950
+f 125/3098 2279/1921 1868/1956
+f 2304/1955 1868/1956 2279/1921
+f 594/3104 2299/1948 2303/1954
+f 2304/1955 2303/1954 2299/1948
+f 2217/1839 2306/1958 157/3084
+f 2287/1932 157/3084 2306/1958
+f 2303/1954 2306/1958 594/3104
+f 2305/1957 594/3104 2306/1958
+f 2308/1959 2212/1834 2307/1960
+f 154/3083 2307/1960 2212/1834
+f 594/3104 2305/1957 2300/1953
+f 2308/1959 2300/1953 2305/1957
+f 121/3101 2309/1964 2289/1934
+f 2312/1962 2289/1934 2309/1964
+f 596/3105 2311/1965 2310/1961
+f 2312/1962 2310/1961 2311/1965
+f 119/3103 2301/1952 2314/1968
+f 2315/1967 2314/1968 2301/1952
+f 2310/1961 2315/1967 596/3105
+f 2313/1966 596/3105 2315/1967
+f 2214/1836 2317/1970 154/3083
+f 2307/1960 154/3083 2317/1970
+f 596/3105 2313/1966 2316/1969
+f 2317/1970 2316/1969 2313/1966
+f 2294/1942 2318/1971 155/3082
+f 2208/1830 155/3082 2318/1971
+f 2316/1969 2318/1971 596/3105
+f 2311/1965 596/3105 2318/1971
+f 147/3078 2195/1812 2321/1974
+f 2322/1973 2321/1974 2195/1812
+f 600/3106 2320/1976 2319/1972
+f 2322/1973 2319/1972 2320/1976
+f 2173/1785 2324/1978 146/3073
+f 2189/1806 146/3073 2324/1978
+f 2319/1972 2324/1978 600/3106
+f 2323/1977 600/3106 2324/1978
+f 2327/1980 2168/1780 2325/1982
+f 145/3072 2325/1982 2168/1780
+f 600/3106 2323/1977 2326/1979
+f 2327/1980 2326/1979 2323/1977
+f 2328/1985 2331/1986 162/3107
+f 2330/1987 162/3107 2331/1986
+f 600/3106 2326/1979 2329/1984
+f 2331/1986 2329/1984 2326/1979
+f 160/3108 2332/1989 2333/1990
+f 2334/1988 2333/1990 2332/1989
+f 600/3106 2329/1984 2320/1976
+f 2334/1988 2320/1976 2329/1984
+f 161/3109 2335/1991 2338/1995
+f 2339/1993 2338/1995 2335/1991
+f 2339/1993 2336/1994 2337/1997
+f 605/3110 2337/1997 2336/1994
+f 2343/1999 2342/2002 2340/2001
+f 165/3111 2340/2001 2342/2002
+f 605/3110 2336/1994 2341/1998
+f 2343/1999 2341/1998 2336/1994
+f 2344/2006 2347/2004 166/3112
+f 2346/2007 166/3112 2347/2004
+f 2341/1998 2347/2004 605/3110
+f 2345/2003 605/3110 2347/2004
+f 2349/2010 162/3113 2350/2009
+f 2348/2008 2350/2009 162/3113
+f 2337/1997 605/3110 2350/2009
+f 2345/2003 2350/2009 605/3110
+f 2353/2012 2328/2015 2349/2010
+f 162/3113 2349/2010 2328/2015
+f 608/3114 2352/2013 2351/2011
+f 2353/2012 2351/2011 2352/2013
+f 2354/2018 2356/2019 166/3112
+f 2344/2006 166/3112 2356/2019
+f 2351/2011 2356/2019 608/3114
+f 2355/2017 608/3114 2356/2019
+f 2360/2021 2359/2024 2357/2023
+f 163/3115 2357/2023 2359/2024
+f 608/3114 2355/2017 2358/2020
+f 2360/2021 2358/2020 2355/2017
+f 160/3116 2333/2026 2361/2027
+f 2362/2025 2361/2027 2333/2026
+f 2358/2020 2362/2025 608/3114
+f 2352/2013 608/3114 2362/2025
+f 2366/2029 2365/2030 2359/2024
+f 163/3115 2359/2024 2365/2030
+f 611/3117 2364/2032 2363/2028
+f 2366/2029 2363/2028 2364/2032
+f 2346/2007 2368/2034 166/3112
+f 2354/2018 166/3112 2368/2034
+f 2363/2028 2368/2034 611/3117
+f 2367/2033 611/3117 2368/2034
+f 2369/2038 2371/2036 165/3111
+f 2340/2001 165/3111 2371/2036
+f 611/3117 2367/2033 2370/2035
+f 2371/2036 2370/2035 2367/2033
+f 2373/2041 164/3118 2374/2040
+f 2372/2039 2374/2040 164/3118
+f 2370/2035 2374/2040 611/3117
+f 2364/2032 611/3117 2374/2040
+f 2375/2044 2378/2045 69/3119
+f 1247/2047 69/3119 2378/2045
+f 2377/2048 2378/2045 614/3120
+f 2376/2043 614/3120 2378/2045
+f 131/3051 2051/1639 2380/2051
+f 2381/2050 2380/2051 2051/1639
+f 614/3120 2376/2043 2379/2049
+f 2381/2050 2379/2049 2376/2043
+f 2382/2054 2384/2055 130/3050
+f 2046/1635 130/3050 2384/2055
+f 2379/2049 2384/2055 614/3120
+f 2383/2053 614/3120 2384/2055
+f 76/3121 1254/2057 2385/2058
+f 2386/2056 2385/2058 1254/2057
+f 614/3120 2383/2053 2377/2048
+f 2386/2056 2377/2048 2383/2053
+f 2387/2061 2390/2062 77/3122
+f 1261/2064 77/3122 2390/2062
+f 2388/2060 617/3123 2390/2062
+f 2389/2063 2390/2062 617/3123
+f 142/3054 2093/1690 2392/2068
+f 2393/2067 2392/2068 2093/1690
+f 617/3123 2388/2060 2391/2066
+f 2393/2067 2391/2066 2388/2060
+f 2394/2070 2396/2069 134/3058
+f 2090/1688 134/3058 2396/2069
+f 2391/2066 2396/2069 617/3123
+f 2395/2071 617/3123 2396/2069
+f 2398/2073 2397/2075 1268/2074
+f 80/3124 1268/2074 2397/2075
+f 617/3123 2395/2071 2389/2063
+f 2398/2073 2389/2063 2395/2071
+f 2397/2075 2401/2077 80/3124
+f 1275/2079 80/3124 2401/2077
+f 2399/2076 619/3125 2401/2077
+f 2400/2078 2401/2077 619/3125
+f 134/3058 2078/1673 2394/2070
+f 2403/2081 2394/2070 2078/1673
+f 2403/2081 2402/2082 2399/2076
+f 619/3125 2399/2076 2402/2082
+f 2404/2085 2406/2086 135/3057
+f 2072/1664 135/3057 2406/2086
+f 2405/2084 619/3125 2406/2086
+f 2402/2082 2406/2086 619/3125
+f 2408/2087 2407/2089 1281/2088
+f 81/3126 1281/2088 2407/2089
+f 619/3125 2405/2084 2400/2078
+f 2408/2087 2400/2078 2405/2084
+f 2409/2092 2412/2093 67/3127
+f 1287/2095 67/3127 2412/2093
+f 2411/2096 2412/2093 622/3128
+f 2410/2091 622/3128 2412/2093
+f 2415/2098 2414/2101 2314/2100
+f 119/3014 2314/2100 2414/2101
+f 622/3128 2410/2091 2413/2097
+f 2415/2098 2413/2097 2410/2091
+f 2416/2103 2418/2102 121/3023
+f 2309/2106 121/3023 2418/2102
+f 2417/2104 622/3128 2418/2102
+f 2413/2097 2418/2102 622/3128
+f 2420/2107 2419/2109 1294/2108
+f 68/3129 1294/2108 2419/2109
+f 622/3128 2417/2104 2411/2096
+f 2420/2107 2411/2096 2417/2104
+f 78/3130 2421/2110 1301/2116
+f 2424/2112 1301/2116 2421/2110
+f 2424/2112 2422/2113 2423/2114
+f 625/3131 2423/2114 2422/2113
+f 2250/2119 2427/2120 129/3029
+f 2426/2121 129/3029 2427/2120
+f 2422/2113 2427/2120 625/3131
+f 2425/2118 625/3131 2427/2120
+f 2430/2123 2258/2126 2428/2125
+f 122/3017 2428/2125 2258/2126
+f 625/3131 2425/2118 2429/2122
+f 2430/2123 2429/2122 2425/2118
+f 2431/2129 74/3132 2432/2127
+f 1308/2128 2432/2127 74/3132
+f 2429/2122 2432/2127 625/3131
+f 2423/2114 625/3131 2432/2127
+f 2385/2058 2435/2131 76/3121
+f 1315/2133 76/3121 2435/2131
+f 2434/2132 2435/2131 626/3133
+f 2433/2130 626/3133 2435/2131
+f 130/3050 2063/1654 2382/2054
+f 2437/2135 2382/2054 2063/1654
+f 626/3133 2433/2130 2436/2136
+f 2437/2135 2436/2136 2433/2130
+f 2058/1649 142/3054 2439/2137
+f 2392/2068 2439/2137 142/3054
+f 2436/2136 2439/2137 626/3133
+f 2438/2138 626/3133 2439/2137
+f 2440/2139 2387/2061 1320/2140
+f 77/3122 1320/2140 2387/2061
+f 626/3133 2438/2138 2434/2132
+f 2440/2139 2434/2132 2438/2138
+f 2441/2143 2444/2144 70/3134
+f 1325/2146 70/3134 2444/2144
+f 2443/2147 2444/2144 628/3135
+f 2442/2142 628/3135 2444/2144
+f 143/3067 2156/1762 2446/2150
+f 2447/2148 2446/2150 2156/1762
+f 628/3135 2442/2142 2445/2149
+f 2447/2148 2445/2149 2442/2142
+f 2380/2051 2449/2152 131/3051
+f 2151/1757 131/3051 2449/2152
+f 2445/2149 2449/2152 628/3135
+f 2448/2151 628/3135 2449/2152
+f 69/3119 1331/2153 2375/2044
+f 2450/2154 2375/2044 1331/2153
+f 628/3135 2448/2151 2443/2147
+f 2450/2154 2443/2147 2448/2151
+f 2451/2157 2454/2158 71/2904
+f 1337/746 71/2904 2454/2158
+f 2452/2156 630/3136 2454/2158
+f 2453/2159 2454/2158 630/3136
+f 145/3137 2170/2160 2456/2164
+f 2457/2162 2456/2164 2170/2160
+f 2457/2162 2455/2163 2452/2156
+f 630/3136 2452/2156 2455/2163
+f 2165/2169 143/3138 2459/2165
+f 2446/2166 2459/2165 143/3138
+f 2455/2163 2459/2165 630/3136
+f 2458/2167 630/3136 2459/2165
+f 2460/2170 2441/2171 1343/756
+f 70/2906 1343/756 2441/2171
+f 630/3136 2458/2167 2453/2159
+f 2460/2170 2453/2159 2458/2167
+f 59/2909 2461/2174 1349/766
+f 2464/2175 1349/766 2461/2174
+f 2463/2176 2464/2175 634/3139
+f 2462/2173 634/3139 2464/2175
+f 147/3078 2465/2177 2467/2181
+f 2468/2179 2467/2181 2465/2177
+f 634/3139 2462/2173 2466/2180
+f 2468/2179 2466/2180 2462/2173
+f 2469/2184 2472/2185 148/3140
+f 2471/2186 148/3140 2472/2185
+f 2466/2180 2472/2185 634/3139
+f 2470/2183 634/3139 2472/2185
+f 65/2911 1356/776 2473/2188
+f 2474/2187 2473/2188 1356/776
+f 634/3139 2470/2183 2463/2176
+f 2474/2187 2463/2176 2470/2183
+f 2475/2190 2478/2189 60/2913
+f 1365/785 60/2913 2478/2189
+f 636/3141 2477/2193 2476/2191
+f 2478/2189 2476/2191 2477/2193
+f 2199/1817 2481/2195 151/3079
+f 2480/2196 151/3079 2481/2195
+f 2479/2194 636/3141 2481/2195
+f 2476/2191 2481/2195 636/3141
+f 147/3078 2467/2181 2193/1811
+f 2483/2198 2193/1811 2467/2181
+f 2483/2198 2482/2197 2479/2194
+f 636/3141 2479/2194 2482/2197
+f 2484/2199 2461/2174 1371/794
+f 59/2909 1371/794 2461/2174
+f 2482/2197 2484/2199 636/3141
+f 2477/2193 636/3141 2484/2199
+f 2485/2202 2488/2203 79/3142
+f 1377/2205 79/3142 2488/2203
+f 2486/2201 638/3143 2488/2203
+f 2487/2204 2488/2203 638/3143
+f 128/3030 1934/1501 2490/2209
+f 2491/2208 2490/2209 1934/1501
+f 2491/2208 2489/2207 2486/2201
+f 638/3143 2486/2201 2489/2207
+f 129/3029 2426/2121 1928/1495
+f 2493/2211 1928/1495 2426/2121
+f 2489/2207 2493/2211 638/3143
+f 2492/2210 638/3143 2493/2211
+f 1383/2213 2494/2212 78/3130
+f 2421/2110 78/3130 2494/2212
+f 638/3143 2492/2210 2487/2204
+f 2494/2212 2487/2204 2492/2210
+f 68/3129 2419/2109 1389/2218
+f 2497/2215 1389/2218 2419/2109
+f 640/3144 2496/2216 2495/2214
+f 2497/2215 2495/2214 2496/2216
+f 1907/1467 2499/2220 121/3023
+f 2416/2103 121/3023 2499/2220
+f 2495/2214 2499/2220 640/3144
+f 2498/2219 640/3144 2499/2220
+f 120/3022 2500/2224 1902/1462
+f 2502/2222 1902/1462 2500/2224
+f 640/3144 2498/2219 2501/2221
+f 2502/2222 2501/2221 2498/2219
+f 2503/2227 58/3145 2504/2225
+f 1395/2226 2504/2225 58/3145
+f 2501/2221 2504/2225 640/3144
+f 2496/2216 640/3144 2504/2225
+f 2507/2229 1401/2232 2503/2227
+f 58/3145 2503/2227 1401/2232
+f 642/3146 2506/2230 2505/2228
+f 2507/2229 2505/2228 2506/2230
+f 1919/1482 2509/2234 120/3022
+f 2500/2224 120/3022 2509/2234
+f 2505/2228 2509/2234 642/3146
+f 2508/2233 642/3146 2509/2234
+f 132/3026 2510/2238 1914/1477
+f 2512/2236 1914/1477 2510/2238
+f 642/3146 2508/2233 2511/2235
+f 2512/2236 2511/2235 2508/2233
+f 2513/2241 61/3147 2514/2239
+f 1407/2240 2514/2239 61/3147
+f 2511/2235 2514/2239 642/3146
+f 2506/2230 642/3146 2514/2239
+f 2517/2243 1413/2246 2513/2241
+f 61/3147 2513/2241 1413/2246
+f 644/3148 2516/2244 2515/2242
+f 2517/2243 2515/2242 2516/2244
+f 2149/1751 2519/2247 132/3026
+f 2510/2238 132/3026 2519/2247
+f 2518/2248 644/3148 2519/2247
+f 2515/2242 2519/2247 644/3148
+f 144/3068 2520/2250 2158/1763
+f 2522/2249 2158/1763 2520/2250
+f 2518/2248 2522/2249 644/3148
+f 2521/2251 644/3148 2522/2249
+f 2524/2254 2523/2255 1419/2253
+f 62/3149 1419/2253 2523/2255
+f 644/3148 2521/2251 2516/2244
+f 2524/2254 2516/2244 2521/2251
+f 1425/857 62/2923 2527/2259
+f 2523/2258 2527/2259 62/2923
+f 2526/2260 2527/2259 646/3150
+f 2525/2257 646/3150 2527/2259
+f 144/3069 2177/1788 2520/2263
+f 2529/2262 2520/2263 2177/1788
+f 2529/2262 2528/2261 2525/2257
+f 646/3150 2525/2257 2528/2261
+f 2532/2265 2186/1800 2530/2267
+f 149/3076 2530/2267 2186/1800
+f 2528/2261 2532/2265 646/3150
+f 2531/2264 646/3150 2532/2265
+f 2533/2269 63/2925 2534/2268
+f 1431/866 2534/2268 63/2925
+f 646/3150 2531/2264 2526/2260
+f 2534/2268 2526/2260 2531/2264
+f 2537/2271 1437/875 2533/2269
+f 63/2925 2533/2269 1437/875
+f 648/3151 2536/2272 2535/2270
+f 2537/2271 2535/2270 2536/2272
+f 2187/1802 2539/2274 149/3076
+f 2530/2267 149/3076 2539/2274
+f 2535/2270 2539/2274 648/3151
+f 2538/2273 648/3151 2539/2274
+f 150/3075 2540/2278 2182/1798
+f 2542/2276 2182/1798 2540/2278
+f 648/3151 2538/2273 2541/2275
+f 2542/2276 2541/2275 2538/2273
+f 2543/2280 64/2927 2544/2279
+f 1443/881 2544/2279 64/2927
+f 2541/2275 2544/2279 648/3151
+f 2536/2272 648/3151 2544/2279
+f 2547/2282 1449/887 2543/2280
+f 64/2927 2543/2280 1449/887
+f 2545/2281 649/3152 2547/2282
+f 2546/2283 2547/2282 649/3152
+f 150/3075 2201/1819 2540/2278
+f 2549/2285 2540/2278 2201/1819
+f 2549/2285 2548/2284 2545/2281
+f 649/3152 2545/2281 2548/2284
+f 2480/2196 2551/2287 151/3079
+f 2197/1816 151/3079 2551/2287
+f 2548/2284 2551/2287 649/3152
+f 2550/2286 649/3152 2551/2287
+f 2552/2288 2475/2190 1454/893
+f 60/2913 1454/893 2475/2190
+f 649/3152 2550/2286 2546/2283
+f 2552/2288 2546/2283 2550/2286
+f 1461/899 2555/2289 71/2904
+f 2451/2157 71/2904 2555/2289
+f 651/3153 2554/2291 2553/2290
+f 2555/2289 2553/2290 2554/2291
+f 2558/2293 1470/916 2556/2295
+f 72/2932 2556/2295 1470/916
+f 651/3153 2553/2290 2557/2292
+f 2558/2293 2557/2292 2553/2290
+f 2338/2298 2561/2299 161/3154
+f 2560/2300 161/3154 2561/2299
+f 651/3153 2557/2292 2559/2297
+f 2561/2299 2559/2297 2557/2292
+f 162/3155 2330/2304 2348/2305
+f 2563/2302 2348/2305 2330/2304
+f 651/3153 2559/2297 2562/2301
+f 2563/2302 2562/2301 2559/2297
+f 2456/2164 2564/2306 145/3137
+f 2325/2307 145/3137 2564/2306
+f 651/3153 2562/2301 2554/2291
+f 2564/2306 2554/2291 2562/2301
+f 147/3078 2321/1974 2465/2177
+f 2567/2308 2465/2177 2321/1974
+f 2567/2308 2565/2309 2566/2310
+f 653/3156 2566/2310 2565/2309
+f 2568/2313 2570/2314 160/3108
+f 2332/1989 160/3108 2570/2314
+f 2565/2309 2570/2314 653/3156
+f 2569/2312 653/3156 2570/2314
+f 148/3140 2471/2186 2571/2316
+f 2572/2315 2571/2316 2471/2186
+f 2572/2315 2566/2310 2569/2312
+f 653/3156 2569/2312 2566/2310
+f 2577/2320 2576/2322 2573/2319
+f 170/3157 2573/2319 2576/2322
+f 2575/2323 2577/2320 658/3158
+f 2574/2318 658/3158 2577/2320
+f 2581/2325 2580/2328 2578/2327
+f 167/3159 2578/2327 2580/2328
+f 658/3158 2574/2318 2579/2324
+f 2581/2325 2579/2324 2574/2318
+f 2582/2331 2585/2332 169/3160
+f 2584/2333 169/3160 2585/2332
+f 2579/2324 2585/2332 658/3158
+f 2583/2330 658/3158 2585/2332
+f 2588/2334 2587/2336 2586/2335
+f 168/3161 2586/2335 2587/2336
+f 658/3158 2583/2330 2575/2323
+f 2588/2334 2575/2323 2583/2330
+f 2589/2339 2593/2340 177/3162
+f 2592/2342 177/3162 2593/2340
+f 2590/2338 663/3163 2593/2340
+f 2591/2341 2593/2340 663/3163
+f 178/3164 2594/2346 2596/2348
+f 2597/2347 2596/2348 2594/2346
+f 2597/2347 2595/2345 2590/2338
+f 663/3163 2590/2338 2595/2345
+f 2601/2349 2600/2353 2598/2350
+f 173/3165 2598/2350 2600/2353
+f 2595/2345 2601/2349 663/3163
+f 2599/2351 663/3163 2601/2349
+f 2604/2354 2603/2356 2602/2355
+f 174/3166 2602/2355 2603/2356
+f 663/3163 2599/2351 2591/2341
+f 2604/2354 2591/2341 2599/2351
+f 178/3164 2605/2360 2608/2361
+f 2609/2358 2608/2361 2605/2360
+f 2609/2358 2606/2357 2607/2363
+f 668/3167 2607/2363 2606/2357
+f 2612/2368 179/3168 2613/2364
+f 2610/2365 2613/2364 179/3168
+f 2606/2357 2613/2364 668/3167
+f 2611/2366 668/3167 2613/2364
+f 2617/2370 2616/2373 2614/2372
+f 172/3169 2614/2372 2616/2373
+f 668/3167 2611/2366 2615/2369
+f 2617/2370 2615/2369 2611/2366
+f 2620/2374 2619/2376 2618/2375
+f 171/3170 2618/2375 2619/2376
+f 2607/2363 668/3167 2620/2374
+f 2615/2369 2620/2374 668/3167
+f 174/3166 2603/2356 2623/2380
+f 2624/2377 2623/2380 2603/2356
+f 2622/2381 2624/2377 672/3171
+f 2621/2378 672/3171 2624/2377
+f 2627/2383 2598/2350 2625/2385
+f 173/3165 2625/2385 2598/2350
+f 672/3171 2621/2378 2626/2382
+f 2627/2383 2626/2382 2621/2378
+f 2628/2388 2631/2389 176/3172
+f 2630/2390 176/3172 2631/2389
+f 2629/2387 672/3171 2631/2389
+f 2626/2382 2631/2389 672/3171
+f 2632/2391 2634/2392 175/3173
+f 2633/2393 175/3173 2634/2392
+f 2622/2381 672/3171 2634/2392
+f 2629/2387 2634/2392 672/3171
+f 2637/2397 175/3173 2638/2394
+f 2633/2393 2638/2394 175/3173
+f 2636/2398 2638/2394 675/3174
+f 2635/2395 675/3174 2638/2394
+f 2641/2399 2628/2388 2639/2400
+f 176/3172 2639/2400 2628/2388
+f 2635/2395 2641/2399 675/3174
+f 2640/2401 675/3174 2641/2399
+f 2619/2376 2644/2403 171/3170
+f 2643/2405 171/3170 2644/2403
+f 675/3174 2640/2401 2642/2404
+f 2644/2403 2642/2404 2640/2401
+f 2614/2372 172/3169 2646/2406
+f 2645/2407 2646/2406 172/3169
+f 2642/2404 2646/2406 675/3174
+f 2636/2398 675/3174 2646/2406
+f 177/3162 2647/2410 2589/2339
+f 2650/2411 2589/2339 2647/2410
+f 678/3175 2649/2412 2648/2409
+f 2650/2411 2648/2409 2649/2412
+f 180/3176 2651/2416 2653/2417
+f 2654/2414 2653/2417 2651/2416
+f 678/3175 2648/2409 2652/2413
+f 2654/2414 2652/2413 2648/2409
+f 2656/2420 179/3168 2657/2418
+f 2612/2368 2657/2418 179/3168
+f 2652/2413 2657/2418 678/3175
+f 2655/2419 678/3175 2657/2418
+f 178/3164 2596/2348 2605/2360
+f 2658/2421 2605/2360 2596/2348
+f 2655/2419 2658/2421 678/3175
+f 2649/2412 678/3175 2658/2421
+f 2662/2423 2647/2428 2659/2425
+f 177/3177 2659/2425 2647/2428
+f 681/3178 2661/2426 2660/2422
+f 2662/2423 2660/2422 2661/2426
+f 160/3116 2361/2027 2664/2431
+f 2665/2430 2664/2431 2361/2027
+f 681/3178 2660/2422 2663/2429
+f 2665/2430 2663/2429 2660/2422
+f 2365/2030 2667/2433 163/3115
+f 2357/2023 163/3115 2667/2433
+f 2663/2429 2667/2433 681/3178
+f 2666/2432 681/3178 2667/2433
+f 164/3118 2668/2437 2372/2039
+f 2670/2435 2372/2039 2668/2437
+f 681/3178 2666/2432 2669/2434
+f 2670/2435 2669/2434 2666/2432
+f 2671/2440 180/3179 2672/2439
+f 2653/2438 2672/2439 180/3179
+f 681/3178 2669/2434 2661/2426
+f 2672/2439 2661/2426 2669/2434
+f 179/3180 2656/2441 2675/2445
+f 2676/2443 2675/2445 2656/2441
+f 2676/2443 2673/2444 2674/2447
+f 683/3181 2674/2447 2673/2444
+f 2671/2440 2678/2449 180/3179
+f 2651/2450 180/3179 2678/2449
+f 2677/2448 683/3181 2678/2449
+f 2673/2444 2678/2449 683/3181
+f 2373/2041 2680/2452 164/3118
+f 2668/2437 164/3118 2680/2452
+f 683/3181 2677/2448 2679/2451
+f 2680/2452 2679/2451 2677/2448
+f 2342/2002 2682/2453 165/3111
+f 2369/2038 165/3111 2682/2453
+f 683/3181 2679/2451 2681/2454
+f 2682/2453 2681/2454 2679/2451
+f 2683/2456 2684/2455 161/3109
+f 2335/1991 161/3109 2684/2455
+f 2681/2454 2684/2455 683/3181
+f 2674/2447 683/3181 2684/2455
+f 2616/2459 2688/2460 172/3182
+f 2687/2462 172/3182 2688/2460
+f 685/3183 2686/2463 2685/2458
+f 2688/2460 2685/2458 2686/2463
+f 179/3184 2675/2467 2610/2468
+f 2690/2465 2610/2468 2675/2467
+f 2690/2465 2689/2464 2685/2458
+f 685/3183 2685/2458 2689/2464
+f 2692/2470 2683/2471 2560/2300
+f 161/3154 2560/2300 2683/2471
+f 685/3183 2689/2464 2691/2469
+f 2692/2470 2691/2469 2689/2464
+f 72/2932 1602/1082 2556/2295
+f 2694/2473 2556/2295 1602/1082
+f 685/3183 2691/2469 2693/2472
+f 2694/2473 2693/2472 2691/2469
+f 2696/2474 1597/1077 2695/2475
+f 73/2961 2695/2475 1597/1077
+f 685/3183 2693/2472 2686/2463
+f 2696/2474 2686/2463 2693/2472
+f 1610/1096 2700/2477 66/2963
+f 2699/2478 66/2963 2700/2477
+f 688/3185 2698/2480 2697/2476
+f 2700/2477 2697/2476 2698/2480
+f 2473/2188 2702/2482 65/2911
+f 1621/1110 65/2911 2702/2482
+f 2697/2476 2702/2482 688/3185
+f 2701/2481 688/3185 2702/2482
+f 148/3140 2703/2486 2469/2184
+f 2705/2484 2469/2184 2703/2486
+f 688/3185 2701/2481 2704/2483
+f 2705/2484 2704/2483 2701/2481
+f 2707/2491 174/3186 2708/2488
+f 2623/2490 2708/2488 174/3186
+f 688/3185 2704/2483 2706/2487
+f 2708/2488 2706/2487 2704/2483
+f 2710/2492 2632/2494 2709/2493
+f 175/3187 2709/2493 2632/2494
+f 688/3185 2706/2487 2698/2480
+f 2710/2492 2698/2480 2706/2487
+f 73/2961 1625/1114 2695/2475
+f 2713/2495 2695/2475 1625/1114
+f 2713/2495 2711/2496 2712/2497
+f 689/3188 2712/2497 2711/2496
+f 66/2969 2699/2500 1631/1127
+f 2715/2501 1631/1127 2699/2500
+f 2711/2496 2715/2501 689/3188
+f 2714/2499 689/3188 2715/2501
+f 2717/2503 2709/2506 2637/2505
+f 175/3189 2637/2505 2709/2506
+f 689/3188 2714/2499 2716/2502
+f 2717/2503 2716/2502 2714/2499
+f 172/3182 2687/2462 2645/2508
+f 2718/2507 2645/2508 2687/2462
+f 689/3188 2716/2502 2712/2497
+f 2718/2507 2712/2497 2716/2502
+f 2571/2316 2721/2510 148/3140
+f 2703/2486 148/3140 2721/2510
+f 2720/2511 2721/2510 690/3190
+f 2719/2509 690/3190 2721/2510
+f 160/3108 2664/2512 2568/2313
+f 2723/2514 2568/2313 2664/2512
+f 690/3190 2719/2509 2722/2515
+f 2723/2514 2722/2515 2719/2509
+f 177/3191 2592/2519 2659/2520
+f 2725/2517 2659/2520 2592/2519
+f 690/3190 2722/2515 2724/2516
+f 2725/2517 2724/2516 2722/2515
+f 2726/2521 2602/2522 2707/2491
+f 174/3186 2707/2491 2602/2522
+f 690/3190 2724/2516 2720/2511
+f 2726/2521 2720/2511 2724/2516
+f 2600/2353 2730/2524 173/3165
+f 2729/2526 173/3165 2730/2524
+f 2727/2523 693/3192 2730/2524
+f 2728/2525 2730/2524 693/3192
+f 178/3164 2731/2528 2594/2346
+f 2733/2530 2594/2346 2731/2528
+f 693/3192 2727/2523 2732/2531
+f 2733/2530 2732/2531 2727/2523
+f 2576/2322 2736/2533 170/3157
+f 2735/2534 170/3157 2736/2533
+f 2732/2531 2736/2533 693/3192
+f 2734/2532 693/3192 2736/2533
+f 2737/2535 2738/2536 168/3161
+f 2586/2335 168/3161 2738/2536
+f 2728/2525 693/3192 2738/2536
+f 2734/2532 2738/2536 693/3192
+f 176/3172 2630/2390 2741/2539
+f 2742/2538 2741/2539 2630/2390
+f 2742/2538 2739/2537 2740/2541
+f 695/3193 2740/2541 2739/2537
+f 2625/2385 173/3165 2744/2542
+f 2729/2526 2744/2542 173/3165
+f 2744/2542 2743/2543 2739/2537
+f 695/3193 2739/2537 2743/2543
+f 2587/2336 2746/2545 168/3161
+f 2737/2535 168/3161 2746/2545
+f 2743/2543 2746/2545 695/3193
+f 2745/2544 695/3193 2746/2545
+f 2747/2546 2748/2547 169/3160
+f 2582/2331 169/3160 2748/2547
+f 2740/2541 695/3193 2748/2547
+f 2745/2544 2748/2547 695/3193
+f 171/3170 2643/2405 2751/2551
+f 2752/2549 2751/2551 2643/2405
+f 2749/2548 697/3194 2752/2549
+f 2750/2550 2752/2549 697/3194
+f 176/3172 2741/2539 2639/2400
+f 2754/2553 2639/2400 2741/2539
+f 2754/2553 2753/2554 2749/2548
+f 697/3194 2749/2548 2753/2554
+f 2584/2333 2756/2556 169/3160
+f 2747/2546 169/3160 2756/2556
+f 2753/2554 2756/2556 697/3194
+f 2755/2555 697/3194 2756/2556
+f 2757/2557 2758/2558 167/3159
+f 2578/2327 167/3159 2758/2558
+f 2750/2550 697/3194 2758/2558
+f 2755/2555 2758/2558 697/3194
+f 178/3164 2608/2361 2731/2528
+f 2761/2560 2731/2528 2608/2361
+f 698/3195 2760/2561 2759/2559
+f 2761/2560 2759/2559 2760/2561
+f 2751/2551 2763/2562 171/3170
+f 2618/2375 171/3170 2763/2562
+f 2759/2559 2763/2562 698/3195
+f 2762/2563 698/3195 2763/2562
+f 2580/2328 2765/2565 167/3159
+f 2757/2557 167/3159 2765/2565
+f 2762/2563 2765/2565 698/3195
+f 2764/2564 698/3195 2765/2565
+f 2735/2534 2766/2566 170/3157
+f 2573/2319 170/3157 2766/2566
+f 2764/2564 2766/2566 698/3195
+f 2760/2561 698/3195 2766/2566
+f 123/3196 2767/2567 1883/2573
+f 2770/2569 1883/2573 2767/2567
+f 2770/2569 2768/2570 2769/2571
+f 702/3197 2769/2571 2768/2570
+f 2773/2578 183/3198 2774/2574
+f 2771/2575 2774/2574 183/3198
+f 2768/2570 2774/2574 702/3197
+f 2772/2576 702/3197 2774/2574
+f 2777/2583 182/3199 2778/2579
+f 2775/2580 2778/2579 182/3199
+f 2772/2576 2778/2579 702/3197
+f 2776/2581 702/3197 2778/2579
+f 2780/2584 2779/2586 1888/2585
+f 122/3200 1888/2585 2779/2586
+f 702/3197 2776/2581 2769/2571
+f 2780/2584 2769/2571 2776/2581
+f 2779/2586 2783/2588 122/3200
+f 2428/2590 122/3200 2783/2588
+f 2782/2591 2783/2588 704/3201
+f 2781/2587 704/3201 2783/2588
+f 182/3199 2784/2595 2775/2580
+f 2786/2593 2775/2580 2784/2595
+f 704/3201 2781/2587 2785/2592
+f 2786/2593 2785/2592 2781/2587
+f 1703/1221 2789/2597 100/2983
+f 2788/2598 100/2983 2789/2597
+f 2785/2592 2789/2597 704/3201
+f 2787/2596 704/3201 2789/2597
+f 2790/2599 1698/1215 2431/2600
+f 74/2982 2431/2600 1698/1215
+f 704/3201 2787/2596 2782/2591
+f 2790/2599 2782/2591 2787/2596
+f 67/2984 1709/1228 2409/2603
+f 2793/2602 2409/2603 1709/1228
+f 707/3202 2792/2605 2791/2601
+f 2793/2602 2791/2601 2792/2605
+f 2794/2608 2796/2609 99/2988
+f 1718/1241 99/2988 2796/2609
+f 2791/2601 2796/2609 707/3202
+f 2795/2607 707/3202 2796/2609
+f 181/3203 2797/2610 2799/2614
+f 2800/2612 2799/2614 2797/2610
+f 707/3202 2795/2607 2798/2613
+f 2800/2612 2798/2613 2795/2607
+f 2801/2617 119/3204 2802/2616
+f 2414/2615 2802/2616 119/3204
+f 2798/2613 2802/2616 707/3202
+f 2792/2605 707/3202 2802/2616
+f 119/3204 2801/2617 1872/2621
+f 2805/2619 1872/2621 2801/2617
+f 2804/2620 2805/2619 709/3205
+f 2803/2618 709/3205 2805/2619
+f 2806/2626 2808/2624 181/3203
+f 2797/2610 181/3203 2808/2624
+f 709/3205 2803/2618 2807/2623
+f 2808/2624 2807/2623 2803/2618
+f 2773/2578 2811/2628 183/3198
+f 2810/2629 183/3198 2811/2628
+f 2809/2627 709/3205 2811/2628
+f 2807/2623 2811/2628 709/3205
+f 1877/2631 2812/2630 123/3196
+f 2767/2567 123/3196 2812/2630
+f 2812/2630 2804/2620 2809/2627
+f 709/3205 2809/2627 2804/2620
+f 2815/2633 1731/1260 2788/2598
+f 100/2983 2788/2598 1731/1260
+f 711/3206 2814/2634 2813/2632
+f 2815/2633 2813/2632 2814/2634
+f 2777/2583 2817/2636 182/3199
+f 2784/2595 182/3199 2817/2636
+f 2813/2632 2817/2636 711/3206
+f 2816/2635 711/3206 2817/2636
+f 183/3198 2818/2640 2771/2575
+f 2820/2638 2771/2575 2818/2640
+f 711/3206 2816/2635 2819/2637
+f 2820/2638 2819/2637 2816/2635
+f 75/2991 1737/1267 2821/2642
+f 2822/2641 2821/2642 1737/1267
+f 2819/2637 2822/2641 711/3206
+f 2814/2634 711/3206 2822/2641
+f 75/2991 2821/2642 1743/1273
+f 2825/2644 1743/1273 2821/2642
+f 2824/2645 2825/2644 712/3207
+f 2823/2643 712/3207 2825/2644
+f 183/3198 2810/2629 2818/2640
+f 2827/2647 2818/2640 2810/2629
+f 712/3207 2823/2643 2826/2646
+f 2827/2647 2826/2646 2823/2643
+f 2806/2626 181/3203 2829/2649
+f 2799/2614 2829/2649 181/3203
+f 2826/2646 2829/2649 712/3207
+f 2828/2648 712/3207 2829/2649
+f 2830/2650 2794/2608 1748/1279
+f 99/2988 1748/1279 2794/2608
+f 712/3207 2828/2648 2824/2645
+f 2830/2650 2824/2645 2828/2648
+f 2835/2652 2834/2655 2831/2654
+f 184/3208 2831/2654 2834/2655
+f 2835/2652 2832/2651 2833/2657
+f 716/3209 2833/2657 2832/2651
+f 2838/2662 186/3210 2839/2660
+f 2836/2658 2839/2660 186/3210
+f 2832/2651 2839/2660 716/3209
+f 2837/2661 716/3209 2839/2660
+f 2840/2663 2842/2664 185/3211
+f 2841/2665 185/3211 2842/2664
+f 2837/2661 2842/2664 716/3209
+f 2833/2657 716/3209 2842/2664
+f 2843/2669 2846/2667 184/3208
+f 2831/2654 184/3208 2846/2667
+f 719/3212 2845/2670 2844/2666
+f 2846/2667 2844/2666 2845/2670
+f 2006/1585 2849/2672 117/3042
+f 2848/2673 117/3042 2849/2672
+f 719/3212 2844/2666 2847/2671
+f 2849/2672 2847/2671 2844/2666
+f 136/3031 1937/1504 2015/1596
+f 2851/2675 2015/1596 1937/1504
+f 719/3212 2847/2671 2850/2674
+f 2851/2675 2850/2674 2847/2671
+f 2852/2678 2854/2679 128/3030
+f 1932/1500 128/3030 2854/2679
+f 2850/2674 2854/2679 719/3212
+f 2853/2677 719/3212 2854/2679
+f 186/3210 2838/2662 2855/2681
+f 2856/2680 2855/2681 2838/2662
+f 719/3212 2853/2677 2845/2670
+f 2856/2680 2845/2670 2853/2677
+f 2834/2655 2859/2683 184/3208
+f 2843/2669 184/3208 2859/2683
+f 721/3213 2858/2684 2857/2682
+f 2859/2683 2857/2682 2858/2684
+f 185/3211 2860/2688 2840/2663
+f 2862/2686 2840/2663 2860/2688
+f 721/3213 2857/2682 2861/2685
+f 2862/2686 2861/2685 2857/2682
+f 135/3057 2074/1668 2864/2691
+f 2865/2690 2864/2691 2074/1668
+f 721/3213 2861/2685 2863/2689
+f 2865/2690 2863/2689 2861/2685
+f 2115/1716 2867/2693 113/3055
+f 2067/1659 113/3055 2867/2693
+f 721/3213 2863/2689 2866/2692
+f 2867/2693 2866/2692 2863/2689
+f 117/3042 2848/2673 2111/1711
+f 2868/2694 2111/1711 2848/2673
+f 721/3213 2866/2692 2858/2684
+f 2868/2694 2858/2684 2866/2692
+f 2872/2696 2871/2699 1794/2698
+f 104/3214 1794/2698 2871/2699
+f 723/3215 2870/2701 2869/2695
+f 2872/2696 2869/2695 2870/2701
+f 2407/2089 2874/2703 81/3126
+f 1801/2704 81/3126 2874/2703
+f 2873/2702 723/3215 2874/2703
+f 2869/2695 2874/2703 723/3215
+f 2864/2691 2876/2705 135/3057
+f 2404/2085 135/3057 2876/2705
+f 2876/2705 2875/2706 2873/2702
+f 723/3215 2873/2702 2875/2706
+f 2877/2707 2878/2708 185/3211
+f 2860/2688 185/3211 2878/2708
+f 2875/2706 2878/2708 723/3215
+f 2870/2701 723/3215 2878/2708
+f 1805/2711 2881/2712 79/3142
+f 2485/2202 79/3142 2881/2712
+f 2880/2713 2881/2712 725/3216
+f 2879/2710 725/3216 2881/2712
+f 2884/2715 1812/2718 2882/2717
+f 105/3217 2882/2717 1812/2718
+f 725/3216 2879/2710 2883/2714
+f 2884/2715 2883/2714 2879/2710
+f 2855/2681 2887/2720 186/3210
+f 2886/2721 186/3210 2887/2720
+f 2883/2714 2887/2720 725/3216
+f 2885/2719 725/3216 2887/2720
+f 128/3030 2490/2209 2852/2678
+f 2888/2722 2852/2678 2490/2209
+f 725/3216 2885/2719 2880/2713
+f 2888/2722 2880/2713 2885/2719
+f 2836/2658 186/3210 2892/2726
+f 2889/2725 2892/2726 186/3210
+f 2891/2727 2892/2726 729/3218
+f 2890/2724 729/3218 2892/2726
+f 2896/2729 2895/2732 2893/2731
+f 188/3219 2893/2731 2895/2732
+f 2896/2729 2894/2728 2890/2724
+f 729/3218 2890/2724 2894/2728
+f 2900/2734 2899/2737 2897/2736
+f 187/3220 2897/2736 2899/2737
+f 729/3218 2894/2728 2898/2733
+f 2900/2734 2898/2733 2894/2728
+f 185/3211 2841/2665 2901/2739
+f 2902/2738 2901/2739 2841/2665
+f 2902/2738 2891/2727 2898/2733
+f 729/3218 2898/2733 2891/2727
+f 185/3211 2901/2739 2877/2707
+f 2905/2740 2877/2707 2901/2739
+f 731/3221 2904/2742 2903/2741
+f 2905/2740 2903/2741 2904/2742
+f 2906/2745 2908/2746 187/3220
+f 2897/2736 187/3220 2908/2746
+f 2903/2741 2908/2746 731/3221
+f 2907/2744 731/3221 2908/2746
+f 108/3222 1837/2750 2910/2751
+f 2911/2748 2910/2751 1837/2750
+f 731/3221 2907/2744 2909/2747
+f 2911/2748 2909/2747 2907/2744
+f 2871/2699 2912/2752 104/3214
+f 1832/2753 104/3214 2912/2752
+f 2909/2747 2912/2752 731/3221
+f 2904/2742 731/3221 2912/2752
+f 1843/2756 2915/2757 105/3217
+f 2882/2717 105/3217 2915/2757
+f 2913/2755 733/3223 2915/2757
+f 2914/2758 2915/2757 733/3223
+f 109/3224 2916/2762 1850/2763
+f 2918/2760 1850/2763 2916/2762
+f 2918/2760 2917/2759 2913/2755
+f 733/3223 2913/2755 2917/2759
+f 2920/2766 188/3219 2921/2764
+f 2895/2732 2921/2764 188/3219
+f 2917/2759 2921/2764 733/3223
+f 2919/2765 733/3223 2921/2764
+f 2922/2767 2889/2725 2886/2721
+f 186/3210 2886/2721 2889/2725
+f 733/3223 2919/2765 2914/2758
+f 2922/2767 2914/2758 2919/2765
+f 2925/2769 2906/2745 2899/2737
+f 187/3220 2899/2737 2906/2745
+f 734/3225 2924/2770 2923/2768
+f 2925/2769 2923/2768 2924/2770
+f 2927/2772 2893/2731 2920/2766
+f 188/3219 2920/2766 2893/2731
+f 734/3225 2923/2768 2926/2771
+f 2927/2772 2926/2771 2923/2768
+f 1860/2775 2929/2776 109/3224
+f 2916/2762 109/3224 2929/2776
+f 2926/2771 2929/2776 734/3225
+f 2928/2774 734/3225 2929/2776
+f 2910/2751 2930/2777 108/3222
+f 1856/2778 108/3222 2930/2777
+f 2928/2774 2930/2777 734/3225
+f 2924/2770 734/3225 2930/2777
diff --git a/pytorch3d/docs/tutorials/data/teapot.obj b/pytorch3d/docs/tutorials/data/teapot.obj
new file mode 100644
index 0000000000000000000000000000000000000000..c861018b21985d29a3e11576d8c9b32a7135d207
--- /dev/null
+++ b/pytorch3d/docs/tutorials/data/teapot.obj
@@ -0,0 +1,5046 @@
+v -0.498530 0.712498 -0.039883
+v -0.501666 0.699221 -0.063813
+v -0.501255 0.717792 0.000000
+v -0.624036 0.711938 -0.039883
+v -0.526706 0.651362 -0.039883
+v -0.508714 0.682112 -0.071712
+v -0.622039 0.698704 -0.063813
+v -0.624834 0.717232 0.000000
+v -0.498530 0.712498 0.039883
+v -0.638129 0.287158 0.000000
+v -0.517593 0.664661 -0.063813
+v -0.534329 0.646030 0.000000
+v -0.614850 0.651067 -0.039883
+v -0.616848 0.664299 -0.063813
+v -0.619445 0.681503 -0.071790
+v -0.741245 0.707456 -0.039883
+v -0.744483 0.712577 0.000000
+v -0.624036 0.711938 0.039883
+v -0.501667 0.699221 0.063813
+v -0.622039 0.698704 0.063813
+v -0.712095 0.661370 -0.063813
+v -0.733150 0.694655 -0.063813
+v -0.741245 0.707456 0.039883
+v -0.733150 0.694655 0.063813
+v -0.631184 0.277569 -0.039883
+v -0.526706 0.651362 0.039883
+v -0.614053 0.645774 0.000000
+v -0.704000 0.648569 -0.039883
+v -0.722621 0.678012 -0.071790
+v -0.832523 0.695296 -0.039883
+v -0.837545 0.699948 0.000000
+v -0.832523 0.695296 0.039883
+v -0.619445 0.681503 0.071790
+v -0.508714 0.682112 0.071712
+v -0.722621 0.678012 0.071790
+v -0.517593 0.664661 0.063813
+v -0.619922 0.238069 -0.071790
+v -0.624826 0.259599 -0.063813
+v -0.710066 0.328372 0.000000
+v -0.614850 0.651067 0.039883
+v -0.787321 0.653419 -0.063813
+v -0.803644 0.668539 -0.071790
+v -0.819967 0.683663 -0.063813
+v -0.819967 0.683663 0.063813
+v -0.803644 0.668539 0.071790
+v -0.711425 0.307332 -0.063813
+v -0.615553 0.216807 -0.063813
+v -0.712688 0.287795 -0.071790
+v -0.631184 0.277569 0.039883
+v -0.710455 0.322361 -0.039883
+v -0.710455 0.322361 0.039883
+v -0.700762 0.643448 0.000000
+v -0.774766 0.641786 -0.039883
+v -0.897800 0.671612 -0.039883
+v -0.904015 0.675354 0.000000
+v -0.897800 0.671612 0.039883
+v -0.882265 0.662257 0.063813
+v -0.712095 0.661370 0.063813
+v -0.787321 0.653419 0.063813
+v -0.608884 0.198682 -0.039883
+v -0.624828 0.259599 0.063813
+v -0.766936 0.377559 0.000000
+v -0.769651 0.372307 0.039883
+v -0.616848 0.664299 0.063813
+v -0.704000 0.648569 0.039883
+v -0.841868 0.637931 -0.063813
+v -0.862065 0.650094 -0.071790
+v -0.882265 0.662257 -0.063813
+v -0.862065 0.650094 0.071790
+v -0.841868 0.637931 0.063813
+v -0.611709 0.194244 0.000000
+v -0.776434 0.359177 -0.063813
+v -0.769651 0.372307 -0.039883
+v -0.713952 0.268259 -0.063813
+v -0.711425 0.307332 0.063813
+v -0.776434 0.359177 0.063813
+v -0.769743 0.637131 0.000000
+v -0.826329 0.628576 -0.039883
+v -0.937016 0.632565 -0.039883
+v -0.943899 0.634805 0.000000
+v -0.937016 0.632565 0.039883
+v -0.919812 0.626965 0.063813
+v -0.897443 0.619684 0.071790
+v -0.774766 0.641786 0.039883
+v -0.826329 0.628576 0.039883
+v -0.714922 0.253231 -0.039883
+v -0.608883 0.198681 0.039883
+v -0.715311 0.247220 0.000000
+v -0.785253 0.342107 -0.071790
+v -0.619922 0.238069 0.071790
+v -0.712688 0.287795 0.071790
+v -0.809626 0.430737 0.000000
+v -0.814205 0.426194 0.039883
+v -0.825653 0.414838 0.063813
+v -0.875076 0.612403 -0.063813
+v -0.897443 0.619684 -0.071790
+v -0.919812 0.626965 -0.063813
+v -0.875076 0.612403 0.063813
+v -0.857869 0.606800 0.039883
+v -0.794072 0.325038 -0.063813
+v -0.800855 0.311909 -0.039883
+v -0.825653 0.414838 -0.063813
+v -0.814205 0.426194 -0.039883
+v -0.615480 0.216617 0.063578
+v -0.785253 0.342107 0.071790
+v -0.840534 0.400078 0.071790
+v -0.820114 0.624834 0.000000
+v -0.857869 0.606800 -0.039883
+v -0.950104 0.574316 -0.039883
+v -0.957194 0.574316 0.000000
+v -0.950104 0.574316 0.039883
+v -0.932377 0.574316 0.063813
+v -0.909334 0.574316 0.071790
+v -0.886292 0.574316 0.063813
+v -0.850987 0.604560 0.000000
+v -0.714922 0.253231 0.039883
+v -0.803571 0.306656 0.000000
+v -0.840534 0.400078 -0.071790
+v -0.713952 0.268259 0.063813
+v -0.794072 0.325038 0.063813
+v -0.839022 0.483916 0.000000
+v -0.844976 0.480304 0.039883
+v -0.859854 0.471278 0.063813
+v -0.879202 0.459542 0.071790
+v -0.886292 0.574316 -0.063813
+v -0.909334 0.574316 -0.071790
+v -0.932377 0.574316 -0.063813
+v -0.868564 0.574316 0.039883
+v -0.861474 0.574316 0.000000
+v -0.855419 0.385315 -0.063813
+v -0.866867 0.373960 -0.039883
+v -0.859854 0.471278 -0.063813
+v -0.844976 0.480304 -0.039883
+v -0.855419 0.385315 0.063813
+v -0.898547 0.447807 0.063813
+v -0.868564 0.574316 -0.039883
+v -0.941014 0.505765 -0.039883
+v -0.947813 0.503580 0.000000
+v -0.941014 0.505765 0.039883
+v -0.924011 0.511234 0.063813
+v -0.901913 0.518343 0.071790
+v -0.879811 0.525448 0.063813
+v -0.862808 0.530917 0.039883
+v -0.800855 0.311909 0.039883
+v -0.871445 0.369416 0.000000
+v -0.879202 0.459542 -0.071790
+v -0.866867 0.373960 0.039883
+v -0.856009 0.533103 0.000000
+v -0.879811 0.525448 -0.063813
+v -0.901913 0.518343 -0.071790
+v -0.924011 0.511234 -0.063813
+v -0.862808 0.530917 -0.039883
+v -0.898547 0.447807 -0.063813
+v -0.913428 0.438781 -0.039883
+v -0.913428 0.438781 0.039883
+v -0.919378 0.435169 0.000000
+v 0.600960 0.444810 0.085753
+v 0.605956 0.463769 0.000000
+v 0.600959 0.444810 -0.085753
+v 0.656890 0.471064 0.000000
+v 0.661223 0.454734 -0.083705
+v 0.730696 0.501576 -0.073611
+v 0.661223 0.454734 0.083705
+v 0.605101 0.399712 -0.137265
+v 0.746455 0.470391 -0.117778
+v 0.724395 0.514048 0.000000
+v 0.605100 0.399712 0.137265
+v 0.672055 0.413907 -0.133928
+v 0.613258 0.341675 -0.154354
+v 0.786583 0.544847 -0.096783
+v 0.768856 0.565896 -0.060489
+v 0.672055 0.413907 0.133928
+v 0.730696 0.501576 0.073611
+v 0.686135 0.360830 -0.150669
+v 0.809626 0.517481 -0.108881
+v 0.766935 0.429850 -0.132501
+v 0.761767 0.574316 0.000000
+v 0.613258 0.341675 0.154354
+v 0.813417 0.626247 -0.075788
+v 0.839021 0.611098 -0.085261
+v 0.793721 0.637899 -0.047367
+v 0.686135 0.360830 0.150669
+v 0.768856 0.565896 0.060489
+v 0.746455 0.470391 0.117778
+v 0.619427 0.283145 -0.137236
+v 0.864627 0.595949 -0.075788
+v 0.832669 0.490118 -0.096783
+v 0.787419 0.389310 -0.117778
+v 0.785843 0.642561 0.000000
+v 0.619427 0.283145 0.137236
+v 0.700219 0.307756 -0.133928
+v 0.847933 0.703560 -0.059638
+v 0.879938 0.698065 -0.067092
+v 0.911944 0.692571 -0.059638
+v 0.823314 0.707784 -0.037273
+v 0.766935 0.429850 0.132501
+v 0.793721 0.637899 0.047367
+v 0.786583 0.544847 0.096783
+v 0.700219 0.307756 0.133928
+v 0.617684 0.235930 -0.085941
+v 0.936563 0.688344 -0.037273
+v 0.884319 0.584297 -0.047367
+v 0.850396 0.469070 -0.060489
+v 0.803175 0.358128 -0.073611
+v 0.813468 0.709475 0.000000
+v 0.617684 0.235930 0.085941
+v 0.625577 0.219883 0.000000
+v 0.711051 0.266929 -0.083705
+v 0.911107 0.765755 -0.053178
+v 0.957193 0.765755 -0.059825
+v 1.003279 0.765755 -0.053178
+v 1.038733 0.765755 -0.033236
+v 0.875654 0.765755 -0.033236
+v 0.809626 0.517481 0.108881
+v 0.787419 0.389310 0.117778
+v 0.823314 0.707784 0.037273
+v 0.813417 0.626247 0.075788
+v 0.711051 0.266929 0.083705
+v 0.715384 0.250599 0.000000
+v 1.052913 0.765755 0.000000
+v 0.946409 0.686653 0.000000
+v 0.892200 0.579635 0.000000
+v 0.857486 0.460650 0.000000
+v 0.809479 0.345652 0.000000
+v 0.861474 0.765755 0.000000
+v 0.929990 0.776479 -0.051602
+v 0.979075 0.777181 -0.058052
+v 1.028157 0.777879 -0.051602
+v 1.065915 0.778419 -0.032251
+v 1.081016 0.778632 0.000000
+v 0.892235 0.775943 -0.032251
+v 0.839021 0.611098 0.085261
+v 0.832669 0.490118 0.096783
+v 0.803175 0.358128 0.073611
+v 0.875654 0.765755 0.033236
+v 0.847933 0.703560 0.059638
+v 1.065915 0.778419 0.032174
+v 1.038733 0.765755 0.033236
+v 0.936563 0.688344 0.037273
+v 0.884319 0.584297 0.047367
+v 0.850396 0.469070 0.060489
+v 0.877131 0.775726 0.000000
+v 0.943713 0.783087 -0.047663
+v 0.992645 0.784366 -0.053621
+v 1.041577 0.785649 -0.047663
+v 1.079216 0.786631 -0.029789
+v 1.094273 0.787027 0.000000
+v 1.079216 0.786631 0.029174
+v 0.906073 0.782101 -0.029789
+v 0.879938 0.698065 0.067092
+v 0.864627 0.595949 0.075788
+v 0.892235 0.775943 0.032236
+v 0.911107 0.765755 0.053178
+v 1.041577 0.785649 0.046875
+v 1.028157 0.777879 0.051503
+v 1.003279 0.765755 0.053178
+v 0.911944 0.692571 0.059638
+v 0.891016 0.781708 0.000000
+v 0.951249 0.785448 -0.042542
+v 0.997575 0.787068 -0.047860
+v 1.043903 0.788686 -0.042542
+v 1.079539 0.789934 -0.026589
+v 1.093795 0.790431 0.000000
+v 1.079539 0.789934 0.024511
+v 1.043903 0.788686 0.039883
+v 0.915613 0.784200 -0.026589
+v 0.957193 0.765755 0.059825
+v 0.906073 0.782101 0.029666
+v 0.929990 0.776479 0.051553
+v 0.997575 0.787068 0.045616
+v 0.992645 0.784366 0.052956
+v 0.979075 0.777181 0.057969
+v 0.901357 0.783702 0.000000
+v 0.951569 0.783431 -0.037421
+v 0.993532 0.785033 -0.042099
+v 1.035492 0.786631 -0.037421
+v 1.067772 0.787863 -0.023388
+v 1.080684 0.788354 0.000000
+v 1.067772 0.787863 0.018464
+v 1.035492 0.786631 0.031119
+v 0.993532 0.785033 0.036781
+v 0.919292 0.782200 -0.023388
+v 0.915613 0.784200 0.026173
+v 0.943713 0.783087 0.047269
+v 0.951569 0.783431 0.034270
+v 0.951249 0.785448 0.041213
+v 0.906379 0.781708 0.000000
+v 0.943653 0.776909 -0.033482
+v 0.980182 0.778010 -0.037667
+v 1.016712 0.779111 -0.033482
+v 1.044812 0.779957 -0.020926
+v 1.056052 0.780295 0.000000
+v 1.044812 0.779957 0.011310
+v 1.016712 0.779111 0.021172
+v 0.980182 0.778010 0.027281
+v 0.943653 0.776909 0.027327
+v 0.915553 0.776064 -0.020926
+v 0.919292 0.782200 0.022403
+v 0.915553 0.776064 0.019003
+v 0.904312 0.775726 0.000000
+v 0.926468 0.765755 -0.031906
+v 0.957193 0.765755 -0.035895
+v 0.987920 0.765755 -0.031906
+v 1.011552 0.765755 -0.019942
+v 1.021006 0.765755 0.000000
+v 1.011552 0.765755 0.003324
+v 0.987920 0.765755 0.010635
+v 0.957193 0.765755 0.017947
+v 0.926468 0.765755 0.021271
+v 0.902834 0.765755 0.016618
+v 0.902834 0.765755 -0.019942
+v 0.893380 0.765755 0.000000
+v 0.886428 0.750924 -0.019014
+v 0.908324 0.750924 -0.030099
+v 0.936793 0.750924 -0.033795
+v 0.965261 0.750924 -0.030099
+v 0.987158 0.750924 -0.019014
+v 0.995918 0.750924 -0.000537
+v 0.987158 0.750924 0.002542
+v 0.965261 0.750924 0.009317
+v 0.936793 0.750924 0.016092
+v 0.908324 0.750924 0.019171
+v 0.886428 0.750924 0.014860
+v 0.877668 0.750924 -0.000537
+v 0.936793 0.750924 -0.007312
+v 0.440746 0.783205 0.000000
+v 0.446690 0.765755 0.000000
+v 0.430973 0.765755 0.119945
+v 0.425236 0.783205 0.118348
+v 0.425236 0.783205 -0.118348
+v 0.453011 0.750009 0.000000
+v 0.437073 0.750009 0.121642
+v 0.441668 0.793673 0.000000
+v 0.386470 0.765755 0.226985
+v 0.430973 0.765755 -0.119945
+v 0.426127 0.793673 -0.118596
+v 0.437073 0.750009 -0.121642
+v 0.426127 0.793673 0.118596
+v 0.381327 0.783205 0.223964
+v 0.381327 0.783205 -0.223964
+v 0.382124 0.793673 -0.224433
+v 0.317150 0.765755 0.317150
+v 0.391939 0.750009 0.230197
+v 0.321638 0.750009 0.321639
+v 0.386470 0.765755 -0.226985
+v 0.391939 0.750009 -0.230197
+v 0.447686 0.797164 0.000000
+v 0.431936 0.797164 -0.120212
+v 0.387332 0.797164 -0.227491
+v 0.230197 0.750009 0.391940
+v 0.226984 0.765755 0.386470
+v 0.317150 0.765755 -0.317150
+v 0.321638 0.750009 -0.321639
+v 0.431936 0.797164 0.120212
+v 0.382124 0.793673 0.224433
+v 0.312929 0.783205 0.312929
+v 0.313584 0.793673 -0.313584
+v 0.312929 0.783205 -0.312929
+v 0.317858 0.797164 -0.317858
+v 0.121642 0.750009 0.437072
+v 0.119944 0.765755 0.430973
+v 0.226984 0.765755 -0.386470
+v 0.230197 0.750009 -0.391940
+v 0.457031 0.793673 0.000000
+v 0.440950 0.793673 -0.122721
+v 0.395416 0.793673 -0.232239
+v 0.324491 0.793673 -0.324492
+v -0.000000 0.750009 0.453012
+v -0.000000 0.765755 0.446690
+v 0.223963 0.783205 0.381327
+v 0.223963 0.783205 -0.381327
+v 0.119944 0.765755 -0.430973
+v 0.121642 0.750009 -0.437072
+v 0.440950 0.793673 0.122721
+v 0.387332 0.797164 0.227491
+v 0.313584 0.793673 0.313584
+v 0.227491 0.797164 -0.387332
+v 0.224433 0.793673 -0.382125
+v 0.232239 0.793673 -0.395417
+v -0.119945 0.765755 0.430973
+v -0.121642 0.750009 0.437072
+v 0.118348 0.783205 0.425237
+v 0.118348 0.783205 -0.425237
+v -0.000000 0.750009 -0.453012
+v -0.000000 0.765755 -0.446690
+v 0.467924 0.783205 0.000000
+v 0.451460 0.783205 -0.125646
+v 0.404842 0.783205 -0.237775
+v 0.332226 0.783205 -0.332226
+v 0.237775 0.783205 -0.404842
+v -0.226985 0.765755 0.386470
+v -0.000000 0.783205 0.440746
+v 0.224433 0.793673 0.382125
+v 0.118596 0.793673 -0.426127
+v -0.000000 0.783205 -0.440746
+v -0.119945 0.765755 -0.430973
+v -0.121642 0.750009 -0.437072
+v 0.451460 0.783205 0.125646
+v 0.395416 0.793673 0.232239
+v 0.317858 0.797164 0.317858
+v 0.122721 0.793673 -0.440950
+v 0.120212 0.797164 -0.431937
+v 0.125646 0.783205 -0.451460
+v -0.317150 0.765755 0.317150
+v -0.230198 0.750009 0.391939
+v -0.321639 0.750009 0.321639
+v -0.118348 0.783205 0.425237
+v 0.118596 0.793673 0.426127
+v -0.000000 0.793673 -0.441668
+v -0.118348 0.783205 -0.425237
+v -0.226985 0.765755 -0.386470
+v 0.478596 0.765755 0.000000
+v 0.461756 0.765755 -0.128512
+v 0.414076 0.765755 -0.243198
+v 0.339803 0.765755 -0.339804
+v 0.243198 0.765755 -0.414076
+v 0.128512 0.765755 -0.461757
+v -0.391940 0.750009 0.230197
+v -0.386470 0.765755 0.226985
+v -0.223964 0.783205 0.381327
+v -0.000000 0.793673 0.441668
+v 0.227491 0.797164 0.387332
+v -0.000000 0.797164 -0.447686
+v -0.118596 0.793673 -0.426127
+v -0.223964 0.783205 -0.381327
+v -0.317150 0.765755 -0.317150
+v -0.230198 0.750009 -0.391939
+v -0.321639 0.750009 -0.321639
+v 0.461756 0.765755 0.128512
+v 0.404842 0.783205 0.237775
+v 0.324491 0.793673 0.324492
+v -0.000000 0.783205 -0.467924
+v -0.000000 0.793673 -0.457031
+v -0.000000 0.765755 -0.478597
+v -0.437073 0.750009 0.121642
+v -0.430974 0.765755 0.119945
+v -0.312929 0.783205 0.312929
+v -0.118596 0.793673 0.426127
+v 0.120212 0.797164 0.431937
+v -0.120212 0.797164 -0.431937
+v -0.224433 0.793673 -0.382125
+v -0.312929 0.783205 -0.312929
+v -0.386470 0.765755 -0.226985
+v -0.391940 0.750009 -0.230197
+v 0.518110 0.682112 0.000000
+v 0.499881 0.682112 -0.139122
+v 0.448260 0.682112 -0.263277
+v 0.367859 0.682112 -0.367859
+v 0.263277 0.682112 -0.448260
+v 0.139122 0.682112 -0.499882
+v -0.000000 0.682112 -0.518110
+v -0.453012 0.750009 0.000000
+v -0.446690 0.765755 0.000000
+v -0.381327 0.783205 0.223964
+v -0.224433 0.793673 0.382125
+v -0.000000 0.797164 0.447686
+v 0.232239 0.793673 0.395417
+v -0.122721 0.793673 -0.440950
+v -0.227491 0.797164 -0.387332
+v -0.313584 0.793673 -0.313584
+v -0.381327 0.783205 -0.223964
+v -0.430974 0.765755 -0.119945
+v 0.499881 0.682112 0.139122
+v 0.414076 0.765755 0.243198
+v 0.332226 0.783205 0.332226
+v -0.128513 0.765755 -0.461757
+v -0.125646 0.783205 -0.451460
+v -0.139123 0.682112 -0.499882
+v -0.437073 0.750009 -0.121642
+v -0.425237 0.783205 0.118348
+v -0.313584 0.793673 0.313584
+v -0.120212 0.797164 0.431937
+v 0.122721 0.793673 0.440950
+v -0.232240 0.793673 -0.395417
+v -0.317859 0.797164 -0.317858
+v -0.382125 0.793673 -0.224433
+v -0.425237 0.783205 -0.118348
+v 0.555408 0.599133 0.000000
+v 0.535865 0.599133 -0.149137
+v 0.480530 0.599133 -0.282230
+v 0.394341 0.599133 -0.394341
+v 0.282230 0.599133 -0.480530
+v 0.149137 0.599133 -0.535866
+v -0.000000 0.599133 -0.555408
+v -0.149138 0.599133 -0.535866
+v -0.440746 0.783205 0.000000
+v -0.382125 0.793673 0.224433
+v -0.227491 0.797164 0.387332
+v -0.000000 0.793673 0.457031
+v 0.237775 0.783205 0.404842
+v -0.237775 0.783205 -0.404842
+v -0.324492 0.793673 -0.324492
+v -0.387332 0.797164 -0.227491
+v -0.426127 0.793673 -0.118596
+v 0.535865 0.599133 0.149137
+v 0.448260 0.682112 0.263277
+v 0.339803 0.765755 0.339804
+v -0.263278 0.682112 -0.448260
+v -0.243198 0.765755 -0.414076
+v -0.282230 0.599133 -0.480530
+v -0.426127 0.793673 0.118596
+v -0.317859 0.797164 0.317858
+v -0.122721 0.793673 0.440950
+v 0.125646 0.783205 0.451460
+v -0.332226 0.783205 -0.332226
+v -0.395417 0.793673 -0.232239
+v -0.431937 0.797164 -0.120212
+v -0.441668 0.793673 0.000000
+v 0.588275 0.517481 0.000000
+v 0.567578 0.517481 -0.157963
+v 0.508969 0.517485 -0.298931
+v 0.417675 0.517481 -0.417675
+v 0.298931 0.517485 -0.508969
+v 0.157963 0.517485 -0.567578
+v -0.000000 0.517481 -0.588275
+v -0.157963 0.517481 -0.567578
+v -0.298931 0.517485 -0.508969
+v -0.387332 0.797164 0.227491
+v -0.232240 0.793673 0.395417
+v -0.000000 0.783205 0.467924
+v 0.243198 0.765755 0.414076
+v -0.339804 0.765755 -0.339804
+v -0.404842 0.783205 -0.237775
+v -0.440950 0.793673 -0.122721
+v -0.447686 0.797164 0.000000
+v 0.567578 0.517485 0.157963
+v 0.480530 0.599133 0.282230
+v 0.367859 0.682112 0.367859
+v -0.394341 0.599133 -0.394341
+v -0.367859 0.682112 -0.367859
+v -0.417675 0.517481 -0.417675
+v -0.431937 0.797164 0.120212
+v -0.324492 0.793673 0.324492
+v -0.125646 0.783205 0.451460
+v 0.128512 0.765755 0.461757
+v -0.414076 0.765755 -0.243198
+v -0.451461 0.783205 -0.125646
+v -0.457031 0.793673 0.000000
+v 0.592873 0.437827 -0.165003
+v 0.531651 0.437827 -0.312254
+v 0.436292 0.437827 -0.436292
+v 0.312254 0.437827 -0.531651
+v 0.165003 0.437827 -0.592873
+v -0.000000 0.437827 -0.614496
+v -0.165004 0.437827 -0.592873
+v -0.312255 0.437827 -0.531651
+v -0.436292 0.437827 -0.436292
+v -0.395417 0.793673 0.232239
+v -0.237775 0.783205 0.404842
+v -0.000000 0.765755 0.478597
+v 0.263277 0.682112 0.448260
+v -0.448260 0.682112 -0.263277
+v -0.461757 0.765755 -0.128512
+v -0.467924 0.783205 0.000000
+v -0.440950 0.793673 0.122721
+v 0.592873 0.437827 0.165003
+v 0.508969 0.517485 0.298931
+v 0.394341 0.599133 0.394341
+v -0.508969 0.517485 -0.298931
+v -0.480530 0.599133 -0.282230
+v -0.531651 0.437827 -0.312254
+v -0.332226 0.783205 0.332226
+v -0.128513 0.765755 0.461757
+v 0.139122 0.682112 0.499882
+v -0.499882 0.682112 -0.139122
+v -0.478597 0.765755 0.000000
+v -0.451461 0.783205 0.125646
+v 0.546669 0.360830 -0.321075
+v 0.448614 0.360830 -0.448614
+v 0.321074 0.360830 -0.546669
+v 0.169664 0.360830 -0.609621
+v -0.000000 0.360830 -0.631850
+v -0.169664 0.360830 -0.609621
+v -0.321075 0.360830 -0.546669
+v -0.448615 0.360830 -0.448614
+v -0.546669 0.360830 -0.321075
+v -0.404842 0.783205 0.237775
+v -0.243198 0.765755 0.414076
+v -0.000000 0.682112 0.518110
+v 0.282230 0.599133 0.480530
+v -0.535866 0.599133 -0.149137
+v -0.461757 0.765755 0.128512
+v 0.531651 0.437827 0.312254
+v 0.417675 0.517481 0.417675
+v 0.609621 0.360830 -0.169664
+v -0.592873 0.437827 -0.165003
+v -0.567578 0.517485 -0.157963
+v -0.609621 0.360830 -0.169664
+v -0.339804 0.765755 0.339804
+v -0.139123 0.682112 0.499882
+v 0.149137 0.599133 0.535866
+v -0.555408 0.599133 0.000000
+v -0.499882 0.682112 0.139122
+v -0.414076 0.765755 0.243198
+v 0.609621 0.360830 0.169664
+v 0.552100 0.287158 -0.324265
+v 0.453072 0.287158 -0.453072
+v 0.324265 0.287158 -0.552100
+v 0.171349 0.287158 -0.615677
+v -0.000000 0.287158 -0.638129
+v -0.171350 0.287158 -0.615677
+v -0.324265 0.287158 -0.552100
+v -0.453072 0.287158 -0.453072
+v -0.552100 0.287158 -0.324265
+v -0.615677 0.287158 -0.171349
+v -0.263278 0.682112 0.448260
+v -0.000000 0.599133 0.555408
+v 0.298931 0.517485 0.508969
+v -0.588275 0.517481 0.000000
+v -0.448260 0.682112 0.263277
+v 0.546669 0.360830 0.321075
+v 0.436292 0.437827 0.436292
+v 0.615677 0.287158 -0.171349
+v -0.631850 0.360830 0.000000
+v -0.614496 0.437827 0.000000
+v -0.367859 0.682112 0.367859
+v -0.149138 0.599133 0.535866
+v 0.157963 0.517481 0.567578
+v -0.567578 0.517481 0.157963
+v -0.480530 0.599133 0.282230
+v 0.615677 0.287158 0.171349
+v 0.541877 0.221240 -0.318259
+v 0.444680 0.221240 -0.444680
+v 0.318259 0.221240 -0.541877
+v 0.168176 0.221240 -0.604276
+v -0.000000 0.221240 -0.626311
+v -0.168177 0.221240 -0.604276
+v -0.318259 0.221240 -0.541877
+v -0.444680 0.221240 -0.444680
+v -0.541877 0.221240 -0.318259
+v -0.604277 0.221240 -0.168176
+v -0.282230 0.599133 0.480530
+v -0.000000 0.517481 0.588275
+v 0.312254 0.437827 0.531651
+v -0.592873 0.437827 0.165003
+v -0.535866 0.599133 0.149137
+v -0.394341 0.599133 0.394341
+v 0.552100 0.287158 0.324265
+v 0.448614 0.360830 0.448614
+v 0.604276 0.221240 -0.168176
+v -0.615677 0.287158 0.171349
+v -0.609621 0.360830 0.169664
+v -0.157963 0.517485 0.567578
+v 0.165003 0.437827 0.592873
+v -0.531651 0.437827 0.312254
+v -0.508969 0.517485 0.298931
+v -0.417675 0.517481 0.417675
+v 0.604276 0.221240 0.168176
+v 0.516317 0.166623 -0.303247
+v 0.423705 0.166623 -0.423705
+v 0.303247 0.166623 -0.516317
+v 0.160243 0.166623 -0.575771
+v -0.000000 0.166623 -0.596769
+v -0.160244 0.166623 -0.575771
+v -0.303247 0.166623 -0.516317
+v -0.423705 0.166623 -0.423705
+v -0.516317 0.166623 -0.303247
+v -0.575771 0.166623 -0.160243
+v -0.298931 0.517485 0.508969
+v -0.000000 0.437827 0.614496
+v 0.321074 0.360830 0.546669
+v -0.546669 0.360830 0.321075
+v 0.541877 0.221240 0.318259
+v 0.453072 0.287158 0.453072
+v 0.575771 0.166623 -0.160243
+v -0.596769 0.166623 0.000000
+v -0.604277 0.221240 0.168176
+v -0.552100 0.287158 0.324265
+v -0.165004 0.437827 0.592873
+v 0.169664 0.360830 0.609621
+v -0.448615 0.360830 0.448614
+v -0.436292 0.437827 0.436292
+v -0.312255 0.437827 0.531651
+v 0.575771 0.166623 0.160243
+v 0.483086 0.122640 -0.283731
+v 0.396438 0.122640 -0.396438
+v 0.283731 0.122640 -0.483086
+v 0.149931 0.122640 -0.538718
+v -0.000000 0.122640 -0.558363
+v -0.149931 0.122640 -0.538718
+v -0.283731 0.122640 -0.483086
+v -0.396438 0.122640 -0.396438
+v -0.483087 0.122640 -0.283731
+v -0.538718 0.122640 -0.149931
+v -0.558363 0.122640 0.000000
+v -0.541877 0.221240 0.318259
+v -0.000000 0.360830 0.631850
+v 0.324265 0.287158 0.552100
+v -0.453072 0.287158 0.453072
+v 0.516317 0.166623 0.303247
+v 0.596768 0.166623 0.000000
+v 0.444680 0.221240 0.444680
+v 0.538718 0.122640 -0.149931
+v -0.538718 0.122640 0.149931
+v -0.516317 0.166623 0.303247
+v -0.444680 0.221240 0.444680
+v -0.169664 0.360830 0.609621
+v 0.171349 0.287158 0.615677
+v -0.324265 0.287158 0.552100
+v -0.321075 0.360830 0.546669
+v 0.538718 0.122640 0.149931
+v 0.558363 0.122640 0.000000
+v 0.449858 0.088629 -0.264215
+v 0.369171 0.088629 -0.369171
+v 0.264215 0.088629 -0.449859
+v 0.139618 0.088629 -0.501662
+v -0.000000 0.088629 -0.519957
+v -0.139618 0.088629 -0.501662
+v -0.264215 0.088629 -0.449859
+v -0.369171 0.088629 -0.369171
+v -0.449859 0.088629 -0.264215
+v -0.501662 0.088629 -0.139618
+v -0.519957 0.088629 0.000000
+v -0.501662 0.088629 0.139618
+v -0.575771 0.166623 0.160243
+v -0.423705 0.166623 0.423705
+v -0.000000 0.287158 0.638129
+v 0.318259 0.221240 0.541877
+v -0.318259 0.221240 0.541877
+v 0.483086 0.122640 0.283731
+v 0.423705 0.166623 0.423705
+v 0.501662 0.088629 -0.139618
+v -0.449859 0.088629 0.264215
+v -0.483087 0.122640 0.283731
+v -0.396438 0.122640 0.396438
+v -0.303247 0.166623 0.516317
+v -0.171350 0.287158 0.615677
+v 0.168176 0.221240 0.604276
+v -0.168177 0.221240 0.604276
+v 0.501662 0.088629 0.139618
+v 0.519957 0.088629 0.000000
+v 0.424299 0.063924 -0.249203
+v 0.348195 0.063924 -0.348195
+v 0.249203 0.063924 -0.424298
+v 0.131685 0.063924 -0.473160
+v -0.000000 0.063924 -0.490415
+v -0.131686 0.063924 -0.473160
+v -0.249203 0.063924 -0.424298
+v -0.348196 0.063924 -0.348195
+v -0.424299 0.063924 -0.249203
+v -0.473160 0.063924 -0.131685
+v -0.490415 0.063924 0.000000
+v -0.473160 0.063924 0.131685
+v -0.424299 0.063924 0.249203
+v -0.283731 0.122640 0.483086
+v -0.000000 0.221240 0.626311
+v 0.303247 0.166623 0.516317
+v -0.160244 0.166623 0.575771
+v 0.449858 0.088629 0.264215
+v 0.396438 0.122640 0.396438
+v 0.473160 0.063924 -0.131685
+v -0.348196 0.063924 0.348195
+v -0.369171 0.088629 0.369171
+v -0.264215 0.088629 0.449859
+v -0.149931 0.122640 0.538718
+v 0.160243 0.166623 0.575771
+v -0.000000 0.166623 0.596769
+v 0.473160 0.063924 0.131685
+v 0.490415 0.063924 0.000000
+v 0.414076 0.047860 -0.243198
+v 0.339803 0.047860 -0.339804
+v 0.243198 0.047860 -0.414076
+v 0.128512 0.047860 -0.461757
+v -0.000000 0.047860 -0.478597
+v -0.128513 0.047860 -0.461757
+v -0.243198 0.047860 -0.414076
+v -0.339804 0.047860 -0.339804
+v -0.414076 0.047860 -0.243198
+v -0.461757 0.047860 -0.128512
+v -0.478597 0.047860 0.000000
+v -0.461757 0.047860 0.128512
+v -0.414076 0.047860 0.243198
+v -0.339804 0.047860 0.339804
+v -0.139618 0.088629 0.501662
+v 0.283731 0.122640 0.483086
+v -0.000000 0.122640 0.558363
+v 0.424299 0.063924 0.249203
+v 0.369171 0.088629 0.369171
+v 0.461756 0.047860 -0.128512
+v -0.243198 0.047860 0.414076
+v -0.249203 0.063924 0.424298
+v -0.131686 0.063924 0.473160
+v -0.000000 0.088629 0.519957
+v 0.149931 0.122640 0.538718
+v 0.461756 0.047860 0.128512
+v 0.478596 0.047860 0.000000
+v 0.410719 0.036005 -0.241228
+v 0.337050 0.036005 -0.337050
+v 0.241227 0.036005 -0.410719
+v 0.127471 0.036005 -0.458017
+v -0.000000 0.036005 -0.474720
+v -0.127471 0.036005 -0.458017
+v -0.241228 0.036005 -0.410719
+v -0.337051 0.036005 -0.337050
+v -0.410719 0.036005 -0.241228
+v -0.458017 0.036005 -0.127471
+v -0.474721 0.036005 0.000000
+v -0.458017 0.036005 0.127471
+v -0.410719 0.036005 0.241228
+v -0.337051 0.036005 0.337050
+v -0.241228 0.036005 0.410719
+v -0.000000 0.063924 0.490415
+v 0.264215 0.088629 0.449859
+v 0.139618 0.088629 0.501662
+v 0.414076 0.047860 0.243198
+v 0.348195 0.063924 0.348195
+v 0.458017 0.036005 -0.127471
+v -0.127471 0.036005 0.458017
+v -0.128513 0.047860 0.461757
+v -0.000000 0.047860 0.478597
+v 0.131685 0.063924 0.473160
+v 0.458017 0.036005 0.127471
+v 0.474720 0.036005 0.000000
+v 0.394137 0.024816 -0.231489
+v 0.323442 0.024816 -0.323442
+v 0.231489 0.024816 -0.394137
+v 0.122324 0.024816 -0.439524
+v -0.000000 0.024816 -0.455554
+v -0.122325 0.024816 -0.439524
+v -0.231489 0.024816 -0.394137
+v -0.323442 0.024816 -0.323442
+v -0.394137 0.024816 -0.231489
+v -0.439524 0.024816 -0.122325
+v -0.455554 0.024816 0.000000
+v -0.439524 0.024816 0.122325
+v -0.394137 0.024816 0.231489
+v -0.323442 0.024816 0.323442
+v -0.231489 0.024816 0.394137
+v -0.122325 0.024816 0.439524
+v 0.128512 0.047860 0.461757
+v 0.249203 0.063924 0.424298
+v 0.410719 0.036005 0.241228
+v 0.339803 0.047860 0.339804
+v 0.439524 0.024816 -0.122325
+v -0.000000 0.036005 0.474720
+v -0.000000 0.024816 0.455554
+v 0.127471 0.036005 0.458017
+v 0.243198 0.047860 0.414076
+v 0.439524 0.024816 0.122325
+v 0.455554 0.024816 0.000000
+v 0.354551 0.014956 -0.208238
+v 0.290957 0.014956 -0.290957
+v 0.208238 0.014956 -0.354551
+v 0.110038 0.014956 -0.395378
+v -0.000000 0.014956 -0.409797
+v -0.110038 0.014956 -0.395378
+v -0.208239 0.014956 -0.354551
+v -0.290957 0.014956 -0.290957
+v -0.354551 0.014956 -0.208238
+v -0.395378 0.014956 -0.110038
+v -0.409797 0.014956 0.000000
+v -0.395378 0.014956 0.110038
+v -0.354551 0.014956 0.208238
+v -0.290957 0.014956 0.290957
+v -0.208239 0.014956 0.354551
+v -0.110038 0.014956 0.395378
+v -0.000000 0.014956 0.409797
+v 0.241227 0.036005 0.410719
+v 0.337050 0.036005 0.337050
+v 0.394137 0.024816 0.231489
+v 0.395378 0.014956 -0.110038
+v 0.122324 0.024816 0.439524
+v 0.110038 0.014956 0.395378
+v 0.231489 0.024816 0.394137
+v 0.395378 0.014956 0.110038
+v 0.409797 0.014956 0.000000
+v 0.282184 0.007090 -0.165735
+v 0.231570 0.007090 -0.231570
+v 0.165735 0.007090 -0.282185
+v 0.087579 0.007090 -0.314679
+v -0.000000 0.007090 -0.326154
+v -0.087579 0.007090 -0.314679
+v -0.165735 0.007090 -0.282185
+v -0.231570 0.007090 -0.231570
+v -0.282184 0.007090 -0.165735
+v -0.314679 0.007090 -0.087579
+v -0.326155 0.007090 0.000000
+v -0.314679 0.007090 0.087579
+v -0.282184 0.007090 0.165735
+v -0.231570 0.007090 0.231570
+v -0.165735 0.007090 0.282185
+v -0.087579 0.007090 0.314679
+v -0.000000 0.007090 0.326154
+v 0.087579 0.007090 0.314679
+v 0.323442 0.024816 0.323442
+v 0.354551 0.014956 0.208238
+v 0.314679 0.007090 -0.087579
+v 0.208238 0.014956 0.354551
+v 0.165735 0.007090 0.282185
+v 0.290957 0.014956 0.290957
+v 0.314679 0.007090 0.087579
+v 0.326154 0.007090 0.000000
+v 0.167259 0.001883 -0.098236
+v 0.137258 0.001883 -0.137259
+v 0.098236 0.001883 -0.167259
+v 0.051910 0.001883 -0.186520
+v -0.000000 0.001883 -0.193322
+v -0.051911 0.001883 -0.186520
+v -0.098237 0.001883 -0.167259
+v -0.137259 0.001883 -0.137259
+v -0.167259 0.001883 -0.098236
+v -0.186520 0.001883 -0.051911
+v -0.193323 0.001883 0.000000
+v -0.186520 0.001883 0.051911
+v -0.167259 0.001883 0.098236
+v -0.137259 0.001883 0.137259
+v -0.098237 0.001883 0.167259
+v -0.051911 0.001883 0.186520
+v -0.000000 0.001883 0.193322
+v 0.051910 0.001883 0.186520
+v 0.098236 0.001883 0.167259
+v 0.282184 0.007090 0.165735
+v 0.186520 0.001883 -0.051911
+v 0.231570 0.007090 0.231570
+v 0.137258 0.001883 0.137259
+v 0.186520 0.001883 0.051911
+v 0.193322 0.001883 0.000000
+v -0.000000 0.000000 0.000000
+v 0.167259 0.001883 0.098236
+v 0.063813 0.861474 0.000000
+v 0.054654 0.888729 0.000000
+v 0.052734 0.888729 0.014691
+v 0.061568 0.861474 0.017135
+v 0.061568 0.861474 -0.017135
+v 0.072979 0.919969 0.020357
+v 0.111968 0.841089 0.000000
+v 0.047296 0.888729 0.027792
+v 0.052734 0.888729 -0.014691
+v 0.108028 0.841089 -0.030065
+v 0.075630 0.919969 0.000000
+v 0.065466 0.919969 0.038494
+v 0.108028 0.841089 0.030065
+v 0.055210 0.861474 0.032427
+v 0.055210 0.861474 -0.032427
+v 0.096873 0.841089 -0.056896
+v 0.100064 0.951211 0.027927
+v 0.089769 0.951211 0.052799
+v 0.183167 0.826023 0.000000
+v 0.176722 0.826023 -0.049184
+v 0.038821 0.888729 0.038821
+v 0.053751 0.919969 0.053751
+v 0.047296 0.888729 -0.027792
+v 0.072979 0.919969 -0.020357
+v 0.158473 0.826023 -0.093076
+v 0.103696 0.951211 0.000000
+v 0.073714 0.951211 0.073714
+v 0.176722 0.826023 0.049184
+v 0.096873 0.841089 0.056896
+v 0.045307 0.861474 0.045307
+v 0.079497 0.841089 -0.079497
+v 0.045307 0.861474 -0.045307
+v 0.130048 0.826023 -0.130048
+v 0.111754 0.978466 0.031195
+v 0.100259 0.978466 0.058974
+v 0.082330 0.978466 0.082330
+v 0.263228 0.813615 0.000000
+v 0.253966 0.813615 -0.070682
+v 0.227741 0.813615 -0.133759
+v 0.027792 0.888729 0.047296
+v 0.038494 0.919969 0.065466
+v 0.052799 0.951211 0.089769
+v 0.038821 0.888729 -0.038821
+v 0.065466 0.919969 -0.038494
+v 0.100064 0.951211 -0.027927
+v 0.186892 0.813615 -0.186892
+v 0.115809 0.978466 0.000000
+v 0.058974 0.978466 0.100259
+v 0.253966 0.813615 0.070682
+v 0.158473 0.826023 0.093076
+v 0.079497 0.841089 0.079497
+v 0.032426 0.861474 0.055210
+v 0.093076 0.826023 -0.158473
+v 0.056896 0.841089 -0.096873
+v 0.032426 0.861474 -0.055210
+v 0.133759 0.813615 -0.227741
+v 0.085811 0.997741 0.023955
+v 0.076985 0.997741 0.045285
+v 0.063219 0.997741 0.063219
+v 0.045285 0.997741 0.076986
+v 0.337972 0.801206 0.000000
+v 0.326081 0.801206 -0.090752
+v 0.292408 0.801206 -0.171740
+v 0.239960 0.801206 -0.239960
+v 0.014691 0.888729 0.052735
+v 0.020357 0.919969 0.072979
+v 0.027927 0.951211 0.100064
+v 0.031195 0.978466 0.111754
+v 0.027792 0.888729 -0.047296
+v 0.053751 0.919969 -0.053751
+v 0.089769 0.951211 -0.052799
+v 0.111754 0.978466 -0.031195
+v 0.171740 0.801206 -0.292408
+v 0.088924 0.997741 0.000000
+v 0.023955 0.997741 0.085811
+v 0.326081 0.801206 0.090752
+v 0.227741 0.813615 0.133759
+v 0.130048 0.826023 0.130048
+v 0.056896 0.841089 0.096873
+v 0.017135 0.861474 0.061568
+v 0.070682 0.813615 -0.253966
+v 0.049184 0.826023 -0.176722
+v 0.030065 0.841089 -0.108029
+v 0.017135 0.861474 -0.061568
+v 0.090752 0.801206 -0.326081
+v -0.000000 1.005054 0.000000
+v 0.393218 0.786140 0.000000
+v 0.379380 0.786140 -0.105586
+v 0.340206 0.786140 -0.199813
+v 0.279184 0.786140 -0.279184
+v 0.199813 0.786140 -0.340206
+v -0.000000 0.888729 0.054654
+v -0.000000 0.919969 0.075630
+v -0.000000 0.951211 0.103696
+v -0.000000 0.978466 0.115809
+v -0.000000 0.997741 0.088925
+v 0.014691 0.888729 -0.052735
+v 0.038494 0.919969 -0.065466
+v 0.073714 0.951211 -0.073714
+v 0.100259 0.978466 -0.058974
+v 0.085811 0.997741 -0.023955
+v 0.105586 0.786140 -0.379381
+v 0.379380 0.786140 0.105586
+v 0.292408 0.801206 0.171740
+v 0.186892 0.813615 0.186892
+v 0.093076 0.826023 0.158473
+v 0.030065 0.841089 0.108029
+v -0.000000 0.861474 0.063813
+v -0.000000 0.801206 -0.337972
+v -0.000000 0.813615 -0.263228
+v -0.000000 0.826023 -0.183167
+v -0.000000 0.841089 -0.111968
+v -0.000000 0.861474 -0.063813
+v -0.000000 0.786140 -0.393218
+v 0.076985 0.997741 -0.045285
+v -0.023955 0.997741 0.085811
+v 0.414784 0.765755 0.000000
+v 0.400190 0.765755 -0.111377
+v 0.358865 0.765755 -0.210772
+v 0.294497 0.765755 -0.294497
+v 0.210772 0.765755 -0.358865
+v 0.111377 0.765755 -0.400190
+v -0.014691 0.888729 0.052735
+v -0.020357 0.919969 0.072979
+v -0.027927 0.951211 0.100064
+v -0.031195 0.978466 0.111754
+v -0.000000 0.888729 -0.054654
+v 0.020357 0.919969 -0.072979
+v 0.052799 0.951211 -0.089769
+v 0.082330 0.978466 -0.082330
+v -0.000000 0.765755 -0.414784
+v 0.063219 0.997741 -0.063219
+v -0.045285 0.997741 0.076986
+v 0.400190 0.765755 0.111377
+v 0.340206 0.786140 0.199813
+v 0.239960 0.801206 0.239960
+v 0.133759 0.813615 0.227741
+v 0.049184 0.826023 0.176722
+v -0.000000 0.841089 0.111968
+v -0.017135 0.861474 0.061568
+v -0.105586 0.786140 -0.379381
+v -0.090752 0.801206 -0.326081
+v -0.070682 0.813615 -0.253966
+v -0.049184 0.826023 -0.176722
+v -0.030066 0.841089 -0.108029
+v -0.017135 0.861474 -0.061568
+v -0.111377 0.765755 -0.400190
+v 0.045285 0.997741 -0.076986
+v -0.063220 0.997741 0.063219
+v 0.414952 0.750806 0.115486
+v 0.430085 0.750806 0.000000
+v 0.414952 0.750806 -0.115486
+v 0.372103 0.750806 -0.218547
+v 0.305360 0.750806 -0.305360
+v 0.218547 0.750806 -0.372103
+v 0.115486 0.750806 -0.414952
+v -0.000000 0.750806 -0.430085
+v -0.027793 0.888729 0.047296
+v -0.038494 0.919969 0.065466
+v -0.052799 0.951211 0.089769
+v -0.058974 0.978466 0.100259
+v -0.014691 0.888729 -0.052735
+v -0.000000 0.919969 -0.075630
+v 0.027927 0.951211 -0.100064
+v 0.058974 0.978466 -0.100259
+v -0.115486 0.750806 -0.414952
+v 0.023955 0.997741 -0.085811
+v -0.076986 0.997741 0.045285
+v 0.372103 0.750806 0.218547
+v 0.358865 0.765755 0.210772
+v 0.279184 0.786140 0.279184
+v 0.171740 0.801206 0.292408
+v 0.070682 0.813615 0.253966
+v -0.000000 0.826023 0.183167
+v -0.030066 0.841089 0.108029
+v -0.032427 0.861474 0.055210
+v -0.210772 0.765755 -0.358865
+v -0.199813 0.786140 -0.340206
+v -0.171740 0.801206 -0.292408
+v -0.133759 0.813615 -0.227741
+v -0.093076 0.826023 -0.158473
+v -0.056896 0.841089 -0.096873
+v -0.032427 0.861474 -0.055210
+v -0.218547 0.750806 -0.372103
+v 0.031195 0.978466 -0.111754
+v -0.000000 0.997741 -0.088925
+v -0.082331 0.978466 0.082330
+v -0.085811 0.997741 0.023955
+v 0.305360 0.750806 0.305360
+v 0.294497 0.765755 0.294497
+v -0.038821 0.888729 0.038821
+v -0.053751 0.919969 0.053751
+v -0.073714 0.951211 0.073714
+v -0.027793 0.888729 -0.047296
+v -0.020357 0.919969 -0.072979
+v -0.000000 0.951211 -0.103696
+v -0.305360 0.750806 -0.305360
+v -0.294497 0.765755 -0.294497
+v -0.000000 0.978466 -0.115809
+v -0.023955 0.997741 -0.085811
+v -0.100259 0.978466 0.058974
+v -0.088925 0.997741 0.000000
+v 0.210772 0.765755 0.358865
+v 0.218547 0.750806 0.372103
+v 0.199813 0.786140 0.340206
+v 0.090752 0.801206 0.326081
+v -0.000000 0.813615 0.263228
+v -0.049184 0.826023 0.176722
+v -0.056896 0.841089 0.096873
+v -0.045307 0.861474 0.045307
+v -0.279185 0.786140 -0.279184
+v -0.239960 0.801206 -0.239960
+v -0.186892 0.813615 -0.186892
+v -0.130049 0.826023 -0.130048
+v -0.079497 0.841089 -0.079497
+v -0.045307 0.861474 -0.045307
+v -0.372103 0.750806 -0.218547
+v -0.358865 0.765755 -0.210772
+v -0.031195 0.978466 -0.111754
+v -0.045285 0.997741 -0.076986
+v -0.111754 0.978466 0.031195
+v -0.085811 0.997741 -0.023955
+v 0.111377 0.765755 0.400190
+v 0.115486 0.750806 0.414952
+v -0.047296 0.888729 0.027792
+v -0.065466 0.919969 0.038494
+v -0.089770 0.951211 0.052799
+v -0.038821 0.888729 -0.038821
+v -0.038494 0.919969 -0.065466
+v -0.027927 0.951211 -0.100064
+v -0.414952 0.750806 -0.115486
+v -0.400190 0.765755 -0.111377
+v -0.058974 0.978466 -0.100259
+v -0.063220 0.997741 -0.063219
+v -0.115809 0.978466 0.000000
+v -0.076986 0.997741 -0.045285
+v 0.105586 0.786140 0.379381
+v -0.000000 0.765755 0.414784
+v -0.000000 0.750806 0.430085
+v -0.000000 0.801206 0.337972
+v -0.070682 0.813615 0.253966
+v -0.093076 0.826023 0.158473
+v -0.079497 0.841089 0.079497
+v -0.055210 0.861474 0.032427
+v -0.340206 0.786140 -0.199813
+v -0.292408 0.801206 -0.171740
+v -0.227741 0.813615 -0.133759
+v -0.158473 0.826023 -0.093076
+v -0.096873 0.841089 -0.056896
+v -0.055210 0.861474 -0.032427
+v -0.430085 0.750806 0.000000
+v -0.414784 0.765755 0.000000
+v -0.052799 0.951211 -0.089769
+v -0.082331 0.978466 -0.082330
+v -0.100064 0.951211 0.027927
+v -0.111754 0.978466 -0.031195
+v -0.000000 0.786140 0.393218
+v -0.115486 0.750806 0.414952
+v -0.111377 0.765755 0.400190
+v -0.052735 0.888729 0.014691
+v -0.072979 0.919969 0.020357
+v -0.047296 0.888729 -0.027792
+v -0.053751 0.919969 -0.053751
+v -0.414952 0.750806 0.115486
+v -0.400190 0.765755 0.111377
+v -0.379381 0.786140 -0.105586
+v -0.073714 0.951211 -0.073714
+v -0.100259 0.978466 -0.058974
+v -0.103696 0.951211 0.000000
+v -0.105586 0.786140 0.379381
+v -0.218547 0.750806 0.372103
+v -0.210772 0.765755 0.358865
+v -0.090752 0.801206 0.326081
+v -0.133759 0.813615 0.227741
+v -0.130049 0.826023 0.130048
+v -0.096873 0.841089 0.056896
+v -0.061568 0.861474 0.017135
+v -0.326081 0.801206 -0.090752
+v -0.253966 0.813615 -0.070682
+v -0.176722 0.826023 -0.049184
+v -0.108029 0.841089 -0.030065
+v -0.061568 0.861474 -0.017135
+v -0.372103 0.750806 0.218547
+v -0.358865 0.765755 0.210772
+v -0.393219 0.786140 0.000000
+v -0.089770 0.951211 -0.052799
+v -0.100064 0.951211 -0.027927
+v -0.199813 0.786140 0.340206
+v -0.305360 0.750806 0.305360
+v -0.294497 0.765755 0.294497
+v -0.054655 0.888729 0.000000
+v -0.075630 0.919969 0.000000
+v -0.052735 0.888729 -0.014691
+v -0.065466 0.919969 -0.038494
+v -0.379381 0.786140 0.105586
+v -0.171740 0.801206 0.292408
+v -0.279185 0.786140 0.279184
+v -0.186892 0.813615 0.186892
+v -0.158473 0.826023 0.093076
+v -0.108029 0.841089 0.030065
+v -0.063813 0.861474 0.000000
+v -0.337972 0.801206 0.000000
+v -0.263228 0.813615 0.000000
+v -0.183167 0.826023 0.000000
+v -0.111968 0.841089 0.000000
+v -0.340206 0.786140 0.199813
+v -0.072979 0.919969 -0.020357
+v -0.239960 0.801206 0.239960
+v -0.326081 0.801206 0.090752
+v -0.292408 0.801206 0.171740
+v -0.227741 0.813615 0.133759
+v -0.176722 0.826023 0.049184
+v -0.253966 0.813615 0.070682
+v -0.526706 0.651362 -0.039883
+v -0.534329 0.646030 0.000000
+v -0.619922 0.238069 -0.071790
+v -0.624826 0.259599 -0.063813
+v -0.638129 0.287158 0.000000
+v -0.631184 0.277569 0.039883
+v -0.501666 0.699221 -0.063813
+v -0.508714 0.682112 -0.071712
+v -0.611709 0.194244 0.000000
+v -0.608883 0.198681 0.039883
+v -0.517593 0.664661 0.063813
+v -0.508714 0.682112 0.071712
+v -0.631184 0.277569 -0.039883
+v -0.624828 0.259599 0.063813
+v -0.615480 0.216617 0.063578
+v -0.615553 0.216807 -0.063813
+v -0.517593 0.664661 -0.063813
+v -0.498530 0.712498 -0.039883
+v -0.619922 0.238069 0.071790
+v -0.526706 0.651362 0.039883
+v -0.608884 0.198682 -0.039883
+v 0.605100 0.399712 0.137265
+v 0.613258 0.341675 0.154354
+v 0.605956 0.463769 0.000000
+v 0.600959 0.444810 -0.085753
+v 0.613258 0.341675 -0.154354
+v 0.605101 0.399712 -0.137265
+v 0.600960 0.444810 0.085753
+v 0.121642 0.750009 -0.437072
+v -0.000000 0.750009 -0.453012
+v 0.453011 0.750009 0.000000
+v 0.437073 0.750009 -0.121642
+v -0.453012 0.750009 0.000000
+v -0.437073 0.750009 -0.121642
+v -0.230198 0.750009 0.391939
+v -0.321639 0.750009 0.321639
+v -0.391940 0.750009 0.230197
+v -0.437073 0.750009 0.121642
+v 0.121642 0.750009 0.437072
+v -0.000000 0.750009 0.453012
+v -0.121642 0.750009 0.437072
+v 0.437073 0.750009 0.121642
+v 0.391939 0.750009 0.230197
+v 0.321638 0.750009 -0.321639
+v 0.230197 0.750009 -0.391940
+v -0.121642 0.750009 -0.437072
+v 0.391939 0.750009 -0.230197
+v 0.321638 0.750009 0.321639
+v 0.230197 0.750009 0.391940
+v -0.230198 0.750009 -0.391939
+v -0.501255 0.717792 0.000000
+v 0.617684 0.235930 0.085941
+v 0.625577 0.219883 0.000000
+v -0.321639 0.750009 -0.321639
+v -0.391940 0.750009 -0.230197
+v -0.498530 0.712498 0.039883
+v -0.501667 0.699221 0.063813
+v 0.617684 0.235930 -0.085941
+v 0.619427 0.283145 -0.137236
+v 0.619427 0.283145 0.137236
+vn -0.901883 0.415418 0.118168
+vn -0.905637 0.407056 0.118656
+vn -0.877041 0.418744 0.235298
+vn 0.058443 -0.998260 0.000732
+vn 0.015107 -0.999878 0.000183
+vn 0.014557 -0.949278 0.314035
+vn 0.056703 -0.947539 0.314524
+vn 0.162053 -0.986755 0.002014
+vn 0.157933 -0.933592 0.321604
+vn 0.392376 -0.919767 0.004334
+vn 0.378307 -0.856655 0.350688
+vn 0.783776 -0.620991 0.005249
+vn 0.726829 -0.553880 0.406079
+vn 0.994812 -0.101627 0.001984
+vn 0.908139 -0.082766 0.410321
+vn 0.003082 -0.939787 0.341685
+vn 0.002167 -0.619495 0.784967
+vn 0.011536 -0.679403 0.733634
+vn 0.044679 -0.675588 0.735923
+vn 0.123325 -0.652272 0.747856
+vn 0.275399 -0.556871 0.783593
+vn 0.460067 -0.316263 0.829615
+vn 0.563036 -0.041200 0.825373
+vn -0.000427 0.122166 0.992492
+vn 0.000397 0.003632 0.999969
+vn 0.002869 0.011841 0.999908
+vn 0.004852 0.029298 0.999542
+vn -0.008179 0.053499 0.998505
+vn -0.046510 0.041536 0.998047
+vn -0.039155 0.003113 0.999207
+vn -0.850551 0.473769 -0.228217
+vn -0.897885 0.424177 -0.117649
+vn -0.880886 0.473281 0.000000
+vn -0.013611 0.682394 0.730827
+vn -0.053896 0.680441 0.730796
+vn -0.147557 0.656789 0.739464
+vn -0.325968 0.560564 0.761223
+vn -0.537645 0.315806 0.781762
+vn -0.611530 0.029939 0.790613
+vn -0.904172 0.427137 0.000000
+vn -0.897885 0.424146 0.117618
+vn -0.020112 0.949461 0.313150
+vn -0.081820 0.945433 0.315287
+vn -0.227699 0.916379 0.329173
+vn -0.504196 0.785302 0.359203
+vn -0.810633 0.443220 0.382611
+vn -0.921232 0.039705 0.386944
+vn -0.020569 0.949400 -0.313334
+vn -0.021729 0.999756 -0.000092
+vn -0.004242 0.950468 -0.310770
+vn -0.088260 0.996094 -0.000488
+vn -0.246895 0.969024 -0.001343
+vn -0.549730 0.835322 -0.002350
+vn -0.880673 0.473647 -0.001984
+vn -0.999084 0.042146 -0.000610
+vn -0.877041 0.418744 -0.235298
+vn -0.920286 0.391156 0.000000
+vn -0.905637 0.407056 -0.118656
+vn -0.083132 0.945006 -0.316202
+vn -0.230201 0.914823 -0.331797
+vn -0.505570 0.782800 -0.362743
+vn -0.808710 0.444960 -0.384625
+vn -0.920835 0.042055 -0.387646
+vn -0.897885 0.424146 -0.117618
+vn -0.901883 0.415448 -0.118168
+vn -0.014161 0.682394 -0.730796
+vn -0.055361 0.680074 -0.731010
+vn -0.150029 0.655660 -0.739982
+vn -0.327616 0.560594 -0.760491
+vn -0.537431 0.320933 -0.779809
+vn -0.611988 0.033387 -0.790155
+vn 0.015168 -0.949339 -0.313852
+vn 0.011902 -0.679403 -0.733634
+vn 0.003265 -0.939817 -0.341594
+vn 0.000183 0.004212 -0.999969
+vn 0.003510 0.014008 -0.999878
+vn 0.005921 0.035951 -0.999329
+vn -0.010132 0.064333 -0.997864
+vn -0.051576 0.048463 -0.997467
+vn -0.041597 0.003998 -0.999115
+vn 0.003082 -0.620106 -0.784478
+vn -0.000031 0.122440 -0.992462
+vn -0.897885 0.424177 0.117649
+vn 0.046449 -0.674398 -0.736869
+vn 0.125980 -0.648946 -0.750298
+vn 0.275430 -0.552477 -0.786676
+vn 0.455519 -0.320536 -0.830500
+vn 0.561693 -0.046480 -0.826014
+vn -0.888668 0.391644 0.238441
+vn 0.058046 -0.947630 -0.314005
+vn 0.159948 -0.933836 -0.319865
+vn 0.380169 -0.857753 -0.345927
+vn 0.725547 -0.560930 -0.398602
+vn 0.908597 -0.089236 -0.407971
+vn 0.003235 -0.999969 0.000031
+vn 0.973144 0.230110 0.000824
+vn 0.890896 0.211737 0.401776
+vn 0.912900 0.408094 0.002533
+vn 0.836970 0.380932 0.392834
+vn 0.829035 0.559160 0.003784
+vn 0.764519 0.528550 0.368969
+vn 0.718650 0.695334 0.003937
+vn 0.668294 0.663717 0.335917
+vn 0.579577 0.814905 0.002838
+vn 0.542650 0.779687 0.312357
+vn 0.495163 0.868770 0.002258
+vn 0.458052 0.820643 0.341624
+vn 0.561205 0.137028 0.816218
+vn 0.532029 0.253456 0.807886
+vn 0.497543 0.363445 0.787591
+vn 0.449538 0.472060 0.758293
+vn 0.373669 0.563555 0.736686
+vn 0.289041 0.531114 0.796442
+vn -0.023225 -0.005249 0.999695
+vn -0.016785 -0.010254 0.999786
+vn -0.011444 -0.012940 0.999847
+vn -0.009796 -0.013276 0.999847
+vn -0.014801 -0.013916 0.999786
+vn -0.089755 -0.176122 0.980255
+vn -0.585772 -0.152379 0.795984
+vn -0.538896 -0.288766 0.791314
+vn -0.484146 -0.407910 0.774071
+vn -0.424635 -0.509781 0.748161
+vn -0.355907 -0.584765 0.728935
+vn -0.889828 -0.237159 0.389782
+vn -0.808740 -0.446852 0.382366
+vn -0.702475 -0.613269 0.361095
+vn -0.590625 -0.734855 0.333293
+vn -0.483291 -0.816767 0.315104
+vn -0.912076 0.409955 0.000000
+vn -0.965606 -0.259987 -0.000458
+vn -0.872433 -0.488693 -0.001465
+vn -0.748436 -0.663167 -0.002197
+vn -0.621601 -0.783288 -0.002136
+vn -0.507065 -0.861873 -0.001251
+vn -0.438215 -0.854366 0.279183
+vn -0.456130 -0.889889 -0.000732
+vn -0.889126 -0.238868 -0.390332
+vn -0.807001 -0.448531 -0.384075
+vn -0.700980 -0.613392 -0.363750
+vn -0.590442 -0.733757 -0.336039
+vn -0.484787 -0.815332 -0.316477
+vn -0.440962 -0.852931 -0.279305
+vn -0.359691 -0.584185 -0.727531
+vn -0.358074 -0.682241 -0.637410
+vn -0.585467 -0.154668 -0.795770
+vn -0.538499 -0.291696 -0.790490
+vn -0.484512 -0.409772 -0.772851
+vn -0.426496 -0.510056 -0.746910
+vn -0.909543 -0.399274 -0.115207
+vn -0.971191 -0.204688 -0.121891
+vn -0.912931 -0.326609 -0.244606
+vn -0.020478 -0.017853 -0.999603
+vn -0.024537 -0.005737 -0.999664
+vn -0.020844 -0.012207 -0.999695
+vn -0.017548 -0.016846 -0.999695
+vn -0.016724 -0.018097 -0.999695
+vn -0.909116 -0.400311 0.115055
+vn -0.873775 -0.472610 0.114475
+vn -0.795892 -0.566485 0.213538
+vn -0.353069 -0.684103 0.638203
+vn 0.559679 0.139714 -0.816828
+vn 0.528581 0.255501 -0.809473
+vn 0.494217 0.362987 -0.789911
+vn 0.449049 0.469283 -0.760308
+vn 0.378246 0.560869 -0.736412
+vn -0.091983 -0.174383 -0.980346
+vn 0.295267 0.530625 -0.794488
+vn 0.890500 0.214759 -0.401044
+vn 0.836634 0.384075 -0.390515
+vn 0.765191 0.530198 -0.365123
+vn 0.671041 0.663228 -0.331339
+vn 0.547929 0.777642 -0.308206
+vn 0.464522 0.818842 -0.337199
+vn 0.931486 0.265572 -0.248543
+vn 0.939543 0.342357 0.000000
+vn 0.947539 0.295114 -0.122684
+vn -0.351421 0.936186 0.001953
+vn -0.144444 0.989502 0.003174
+vn -0.126743 0.878811 0.459975
+vn -0.716758 0.697287 -0.000946
+vn -0.299997 0.838313 0.455214
+vn -0.621876 0.660207 0.421155
+vn -0.901822 0.432081 -0.004517
+vn -0.807031 0.443434 0.389904
+vn -0.930204 0.366863 -0.008484
+vn -0.824549 0.383312 0.416059
+vn -0.850673 0.525529 -0.011628
+vn -0.722465 0.508988 0.467910
+vn -0.668447 0.743645 -0.011139
+vn -0.531449 0.686514 0.496170
+vn -0.116459 0.505448 0.854946
+vn -0.258400 0.470656 0.843593
+vn -0.407605 0.396985 0.822321
+vn -0.450270 0.352367 0.820399
+vn -0.385876 0.395734 0.833338
+vn -0.270669 0.487838 0.829890
+vn 0.141606 -0.001190 0.989898
+vn -0.067690 0.525346 0.848170
+vn 0.989593 0.100253 0.103122
+vn 0.970244 0.213324 0.114475
+vn 0.960418 0.152654 0.232917
+vn 0.241829 0.092502 0.965880
+vn 0.209296 0.170660 0.962828
+vn 0.096194 0.178625 0.979186
+vn 0.009552 0.154332 0.987945
+vn -0.000122 0.151952 0.988372
+vn 0.361248 -0.477279 0.801019
+vn 0.607929 -0.282540 0.741997
+vn 0.679220 -0.106754 0.726096
+vn 0.583911 -0.078524 0.807978
+vn 0.402722 -0.205237 0.891995
+vn 0.279519 -0.338694 0.898404
+vn 0.488601 -0.768700 0.412671
+vn 0.784570 -0.501511 0.364544
+vn 0.893918 -0.279611 0.350291
+vn 0.861415 -0.285287 0.420179
+vn 0.679373 -0.540452 0.496323
+vn 0.458327 -0.754540 0.469588
+vn 0.524155 -0.851588 -0.000153
+vn 0.827143 -0.561968 0.001679
+vn 0.943205 -0.332133 0.003479
+vn 0.933256 -0.359081 0.005737
+vn 0.756859 -0.653493 0.006470
+vn 0.492843 -0.870083 0.004120
+vn 0.322489 -0.946562 -0.001129
+vn 0.912839 -0.326609 0.244942
+vn 0.824396 -0.565996 0.000000
+vn 0.894162 -0.447676 0.000000
+vn 0.486557 -0.770501 -0.411756
+vn 0.783990 -0.504074 -0.362285
+vn 0.895199 -0.281899 -0.345134
+vn 0.863735 -0.289010 -0.412824
+vn 0.682119 -0.544267 -0.488296
+vn 0.461928 -0.758202 -0.460128
+vn 0.357463 -0.479141 -0.801599
+vn 0.605884 -0.285867 -0.742393
+vn 0.679739 -0.108646 -0.725333
+vn 0.583636 -0.077883 -0.808222
+vn 0.401440 -0.199225 -0.893918
+vn 0.281961 -0.330638 -0.900632
+vn 0.135228 -0.002380 -0.990783
+vn 0.235755 0.091128 -0.967498
+vn 0.200720 0.170629 -0.964660
+vn 0.086123 0.184576 -0.979003
+vn 0.000671 0.174322 -0.984680
+vn -0.007141 0.169012 -0.985565
+vn 0.966613 -0.042848 -0.252602
+vn 0.960418 0.152654 -0.232917
+vn 0.989593 0.100223 -0.103092
+vn -0.119297 0.503342 -0.855770
+vn -0.261269 0.472793 -0.841517
+vn -0.414075 0.399792 -0.817713
+vn -0.459700 0.357036 -0.813105
+vn -0.394024 0.408490 -0.823298
+vn -0.277871 0.487381 -0.827754
+vn -0.072207 0.520127 -0.851009
+vn -0.298654 0.840297 -0.452376
+vn -0.617512 0.663961 -0.421613
+vn -0.801324 0.450209 -0.393872
+vn -0.819422 0.389691 -0.420270
+vn -0.721274 0.506912 -0.471969
+vn -0.538347 0.670644 -0.510239
+vn -0.482009 0.876125 -0.005127
+vn -0.394635 0.815363 0.423536
+vn -0.321909 0.946745 -0.002594
+vn -0.255287 0.921995 0.291086
+vn 0.004242 0.999969 0.000397
+vn -0.002960 0.999939 -0.010102
+vn 0.853450 0.521073 -0.007050
+vn 0.392041 0.688986 -0.609546
+vn 0.805170 -0.592517 -0.023621
+vn 0.588763 -0.206122 -0.781579
+vn 0.681478 -0.731040 -0.033296
+vn 0.485031 -0.441237 -0.754967
+vn -0.206824 0.638203 0.741539
+vn -0.129490 0.862056 0.489944
+vn -0.033418 0.999176 0.022340
+vn 0.047700 0.864040 -0.501114
+vn 0.099307 0.538743 -0.836573
+vn 0.053560 0.251839 -0.966277
+vn 0.020112 0.322611 0.946287
+vn 0.021943 0.748894 0.662282
+vn -0.025941 0.995605 0.089908
+vn -0.059175 0.930876 -0.360424
+vn -0.080172 0.784448 -0.614948
+vn -0.142582 0.557604 -0.817743
+vn 0.281747 -0.174993 0.943388
+vn 0.303903 0.444136 0.842830
+vn 0.034333 0.983764 0.175970
+vn -0.113865 0.956420 -0.268777
+vn -0.155339 0.858852 -0.488021
+vn -0.207404 0.641865 -0.738182
+vn 0.467238 -0.683187 0.561144
+vn 0.699515 0.004364 0.714560
+vn 0.355296 0.891934 0.279641
+vn -0.170354 0.971465 -0.164861
+vn -0.245552 0.901181 -0.357097
+vn -0.248726 0.651082 -0.717063
+vn 0.494430 -0.869167 0.006317
+vn 0.933134 -0.358898 0.019868
+vn 0.703146 0.710685 0.021790
+vn -0.203650 0.978942 -0.012574
+vn -0.326456 0.942869 -0.066073
+vn -0.397595 0.857082 -0.327525
+vn 0.460616 -0.712546 -0.529221
+vn 0.695486 -0.098544 -0.711722
+vn 0.397534 0.853816 -0.336070
+vn -0.198248 0.963439 0.180151
+vn -0.306833 0.888516 0.341075
+vn -0.393689 0.807672 0.438887
+vn 0.276559 -0.211951 -0.937315
+vn 0.294626 0.366161 -0.882656
+vn 0.046632 0.973235 -0.224982
+vn -0.146733 0.912168 0.382611
+vn -0.202643 0.700797 0.683950
+vn -0.232673 0.506485 0.830226
+vn 0.011689 0.309397 -0.950835
+vn 0.013245 0.708640 -0.705435
+vn -0.027589 0.995758 -0.087680
+vn -0.047395 0.829371 0.556658
+vn -0.015259 0.386608 0.922086
+vn 0.004639 0.119510 0.992798
+vn -0.211035 0.631001 -0.746513
+vn -0.138768 0.848781 -0.510147
+vn -0.024995 0.999664 -0.001190
+vn 0.120426 0.724570 0.678579
+vn 0.260750 0.006531 0.965361
+vn 0.272500 -0.255898 0.927488
+vn -0.395581 0.809961 -0.432905
+vn -0.258827 0.918851 -0.297800
+vn 0.004730 0.999878 0.013031
+vn 0.466628 0.599780 0.649983
+vn 0.621937 -0.400861 0.672628
+vn 0.551042 -0.592181 0.587878
+vn 0.649068 0.384991 -0.656087
+vn 0.055971 0.691214 0.720450
+vn -0.115940 0.804590 0.582354
+vn -0.262185 0.897946 0.353435
+vn -0.341655 0.885006 -0.316263
+vn -0.081423 0.793085 -0.603626
+vn -0.155675 0.857753 -0.489883
+vn 0.728690 0.365978 0.578784
+vn 0.318674 0.539384 0.779382
+vn 0.953551 0.300150 -0.024415
+vn -0.133976 0.836818 -0.530808
+vn 0.095401 0.667959 -0.738029
+vn 0.000000 0.999969 0.000000
+vn -0.992523 -0.122013 0.000000
+vn -0.937346 -0.348338 0.000000
+vn -0.905148 -0.348827 -0.242836
+vn -0.958617 -0.122227 -0.257057
+vn -0.832057 0.554674 0.000000
+vn -0.803217 0.555376 -0.215339
+vn -0.048616 0.998810 0.000000
+vn -0.046236 0.998840 -0.012726
+vn 0.544267 0.838893 0.000000
+vn 0.525376 0.839106 0.140843
+vn 0.783471 0.621387 0.000000
+vn 0.756371 0.621845 0.202918
+vn 0.880886 0.473281 0.000000
+vn 0.850551 0.473769 0.228217
+vn -0.810907 -0.349376 -0.469375
+vn -0.859004 -0.122410 -0.497085
+vn -0.719657 0.555559 -0.416425
+vn -0.041749 0.998810 -0.024415
+vn 0.470077 0.839625 0.272011
+vn 0.677236 0.622608 0.391980
+vn 0.761803 0.474471 0.440962
+vn -0.662465 -0.349620 -0.662465
+vn -0.701773 -0.122440 -0.701773
+vn -0.587878 0.555650 -0.587878
+vn -0.034272 0.998810 -0.034272
+vn 0.383831 0.839808 0.383831
+vn 0.553148 0.622913 0.553148
+vn 0.622303 0.474776 0.622303
+vn -0.469375 -0.349376 -0.810907
+vn -0.497085 -0.122379 -0.859004
+vn -0.416425 0.555559 -0.719657
+vn -0.024415 0.998810 -0.041749
+vn 0.272011 0.839625 0.470077
+vn 0.391980 0.622608 0.677236
+vn 0.440962 0.474471 0.761834
+vn -0.242836 -0.348827 -0.905148
+vn -0.257057 -0.122227 -0.958617
+vn -0.215339 0.555376 -0.803217
+vn -0.012726 0.998840 -0.046205
+vn 0.140843 0.839106 0.525376
+vn 0.202918 0.621845 0.756371
+vn 0.228217 0.473769 0.850551
+vn 0.000000 -0.348338 -0.937346
+vn 0.000000 -0.122013 -0.992523
+vn 0.000000 0.554674 -0.832057
+vn 0.000000 0.998810 -0.048616
+vn 0.000000 0.838893 0.544267
+vn 0.000000 0.621387 0.783471
+vn 0.000000 0.473281 0.880886
+vn 0.242836 -0.348827 -0.905148
+vn 0.257057 -0.122227 -0.958617
+vn 0.215308 0.555376 -0.803217
+vn 0.012726 0.998840 -0.046205
+vn -0.140843 0.839106 0.525376
+vn -0.202918 0.621845 0.756340
+vn -0.228217 0.473769 0.850551
+vn 0.469375 -0.349376 -0.810907
+vn 0.497085 -0.122379 -0.859004
+vn 0.416425 0.555559 -0.719657
+vn 0.024415 0.998810 -0.041749
+vn -0.272011 0.839625 0.470077
+vn -0.391980 0.622608 0.677236
+vn -0.440962 0.474502 0.761803
+vn 0.662465 -0.349620 -0.662465
+vn 0.701773 -0.122471 -0.701773
+vn 0.587878 0.555650 -0.587878
+vn 0.034272 0.998810 -0.034272
+vn -0.383831 0.839808 0.383831
+vn -0.553148 0.622913 0.553148
+vn -0.622303 0.474776 0.622303
+vn 0.810907 -0.349406 -0.469375
+vn 0.859004 -0.122379 -0.497085
+vn 0.719657 0.555559 -0.416425
+vn 0.041749 0.998810 -0.024415
+vn -0.470077 0.839625 0.272011
+vn -0.677236 0.622608 0.391980
+vn -0.761803 0.474471 0.440962
+vn 0.905148 -0.348827 -0.242836
+vn 0.958617 -0.122227 -0.257057
+vn 0.803217 0.555376 -0.215339
+vn 0.046205 0.998840 -0.012726
+vn -0.525376 0.839106 0.140843
+vn -0.756340 0.621876 0.202918
+vn -0.850551 0.473769 0.228217
+vn 0.937346 -0.348338 0.000000
+vn 0.992523 -0.122013 0.000000
+vn 0.832026 0.554674 0.000000
+vn 0.048616 0.998810 0.000000
+vn -0.544267 0.838893 0.000000
+vn -0.783471 0.621387 0.000000
+vn 0.905148 -0.348827 0.242836
+vn 0.958617 -0.122227 0.257057
+vn 0.803217 0.555376 0.215308
+vn 0.046205 0.998840 0.012726
+vn -0.525376 0.839106 -0.140843
+vn -0.756340 0.621876 -0.202918
+vn 0.810907 -0.349406 0.469375
+vn 0.859004 -0.122379 0.497085
+vn 0.719657 0.555559 0.416425
+vn 0.041749 0.998810 0.024415
+vn -0.470077 0.839625 -0.272011
+vn -0.677236 0.622608 -0.391980
+vn -0.761803 0.474471 -0.440962
+vn 0.662465 -0.349620 0.662465
+vn 0.701773 -0.122471 0.701773
+vn 0.587878 0.555650 0.587878
+vn 0.034272 0.998810 0.034272
+vn -0.383831 0.839808 -0.383831
+vn -0.553148 0.622913 -0.553148
+vn -0.622303 0.474776 -0.622303
+vn 0.469375 -0.349376 0.810907
+vn 0.497085 -0.122379 0.859004
+vn 0.416425 0.555559 0.719657
+vn 0.024415 0.998810 0.041749
+vn -0.272011 0.839625 -0.470077
+vn -0.391980 0.622608 -0.677236
+vn -0.440962 0.474471 -0.761803
+vn 0.242836 -0.348827 0.905148
+vn 0.257057 -0.122227 0.958617
+vn 0.215339 0.555376 0.803217
+vn 0.012726 0.998840 0.046205
+vn -0.140843 0.839106 -0.525376
+vn -0.202918 0.621845 -0.756371
+vn -0.228217 0.473769 -0.850551
+vn 0.000000 -0.348338 0.937346
+vn 0.000000 -0.122013 0.992523
+vn 0.000000 0.554674 0.832057
+vn 0.000000 0.998810 0.048616
+vn 0.000000 0.838893 -0.544267
+vn 0.000000 0.621387 -0.783471
+vn 0.000000 0.473281 -0.880886
+vn -0.242836 -0.348827 0.905148
+vn -0.257057 -0.122227 0.958617
+vn -0.215308 0.555376 0.803217
+vn -0.012726 0.998840 0.046205
+vn 0.140843 0.839106 -0.525376
+vn 0.202918 0.621845 -0.756371
+vn 0.228217 0.473769 -0.850551
+vn -0.469375 -0.349376 0.810907
+vn -0.497085 -0.122379 0.859004
+vn -0.416425 0.555559 0.719657
+vn -0.024415 0.998810 0.041749
+vn 0.272011 0.839625 -0.470077
+vn 0.391980 0.622608 -0.677236
+vn 0.440962 0.474471 -0.761803
+vn -0.662465 -0.349620 0.662465
+vn -0.701773 -0.122440 0.701773
+vn -0.587878 0.555650 0.587878
+vn -0.034272 0.998810 0.034272
+vn 0.383831 0.839808 -0.383831
+vn 0.553148 0.622913 -0.553148
+vn 0.622303 0.474776 -0.622303
+vn -0.810907 -0.349376 0.469375
+vn -0.859004 -0.122410 0.497085
+vn -0.719657 0.555528 0.416425
+vn -0.041749 0.998810 0.024415
+vn 0.470077 0.839625 -0.272011
+vn 0.677236 0.622639 -0.391980
+vn 0.761803 0.474471 -0.440962
+vn -0.905148 -0.348827 0.242836
+vn -0.958617 -0.122227 0.257057
+vn -0.803217 0.555376 0.215339
+vn -0.046236 0.998840 0.012726
+vn 0.525376 0.839106 -0.140843
+vn 0.756371 0.621845 -0.202918
+vn 0.850551 0.473769 -0.228217
+vn 0.908292 0.418256 0.000000
+vn 0.877041 0.418744 0.235298
+vn 0.920286 0.391156 0.000000
+vn 0.888668 0.391644 0.238441
+vn 0.907315 0.342753 0.243446
+vn 0.785638 0.419416 0.454756
+vn 0.796075 0.392285 0.460799
+vn 0.812830 0.343333 0.470504
+vn 0.931486 0.265542 0.248543
+vn 0.834162 0.266366 0.482864
+vn 0.855312 0.152379 0.495132
+vn 0.966613 -0.042848 0.252602
+vn 0.864498 -0.045808 0.500504
+vn 0.641804 0.419691 0.641804
+vn 0.650349 0.392499 0.650349
+vn 0.664052 0.343577 0.664052
+vn 0.681509 0.266579 0.681509
+vn 0.698813 0.152501 0.698813
+vn 0.706351 -0.045869 0.706351
+vn 0.454756 0.419416 0.785638
+vn 0.460799 0.392285 0.796075
+vn 0.470504 0.343333 0.812830
+vn 0.482864 0.266366 0.834162
+vn 0.495132 0.152409 0.855312
+vn 0.500504 -0.045808 0.864498
+vn 0.235298 0.418744 0.877041
+vn 0.238441 0.391644 0.888668
+vn 0.243446 0.342753 0.907315
+vn 0.249855 0.265908 0.931028
+vn 0.256172 0.152104 0.954558
+vn 0.258980 -0.045717 0.964782
+vn 0.000000 0.418256 0.908292
+vn 0.000000 0.391156 0.920286
+vn 0.000000 0.342357 0.939543
+vn 0.000000 0.265542 0.964080
+vn 0.000000 0.151891 0.988372
+vn 0.000000 -0.045656 0.998932
+vn -0.235298 0.418744 0.877041
+vn -0.238441 0.391644 0.888668
+vn -0.243446 0.342753 0.907315
+vn -0.249855 0.265877 0.931028
+vn -0.256172 0.152104 0.954558
+vn -0.258980 -0.045717 0.964782
+vn -0.454756 0.419416 0.785638
+vn -0.460799 0.392285 0.796075
+vn -0.470504 0.343333 0.812830
+vn -0.482864 0.266366 0.834162
+vn -0.495132 0.152379 0.855312
+vn -0.500504 -0.045808 0.864498
+vn -0.641804 0.419691 0.641804
+vn -0.650349 0.392499 0.650349
+vn -0.664052 0.343577 0.664052
+vn -0.681509 0.266579 0.681509
+vn -0.698813 0.152501 0.698813
+vn -0.706351 -0.045869 0.706351
+vn -0.785638 0.419416 0.454756
+vn -0.796075 0.392285 0.460799
+vn -0.812830 0.343364 0.470504
+vn -0.834162 0.266366 0.482864
+vn -0.855312 0.152379 0.495132
+vn -0.864498 -0.045808 0.500504
+vn -0.907315 0.342753 0.243446
+vn -0.931028 0.265908 0.249855
+vn -0.954558 0.152104 0.256172
+vn -0.964782 -0.045717 0.258980
+vn -0.939543 0.342357 0.000000
+vn -0.964080 0.265542 0.000000
+vn -0.988372 0.151891 0.000000
+vn -0.888668 0.391644 -0.238441
+vn -0.907315 0.342753 -0.243446
+vn -0.931028 0.265877 -0.249855
+vn -0.954558 0.152104 -0.256172
+vn -0.785638 0.419416 -0.454756
+vn -0.796075 0.392285 -0.460799
+vn -0.812830 0.343333 -0.470504
+vn -0.834162 0.266366 -0.482864
+vn -0.855312 0.152379 -0.495132
+vn -0.964782 -0.045717 -0.258980
+vn -0.864498 -0.045808 -0.500504
+vn -0.641804 0.419691 -0.641804
+vn -0.650349 0.392499 -0.650349
+vn -0.664052 0.343577 -0.664052
+vn -0.681509 0.266579 -0.681509
+vn -0.698813 0.152501 -0.698813
+vn -0.706351 -0.045869 -0.706351
+vn -0.454756 0.419416 -0.785638
+vn -0.460799 0.392285 -0.796075
+vn -0.470504 0.343333 -0.812830
+vn -0.482864 0.266366 -0.834162
+vn -0.495132 0.152379 -0.855312
+vn -0.500504 -0.045808 -0.864498
+vn -0.235298 0.418744 -0.877041
+vn -0.238441 0.391644 -0.888668
+vn -0.243446 0.342753 -0.907315
+vn -0.249855 0.265908 -0.931028
+vn -0.256172 0.152104 -0.954558
+vn -0.258980 -0.045717 -0.964782
+vn 0.000000 0.418256 -0.908292
+vn 0.000000 0.391156 -0.920286
+vn 0.000000 0.342357 -0.939543
+vn 0.000000 0.265542 -0.964080
+vn 0.000000 0.151891 -0.988372
+vn 0.000000 -0.045656 -0.998932
+vn 0.235298 0.418744 -0.877041
+vn 0.238441 0.391644 -0.888668
+vn 0.243446 0.342753 -0.907315
+vn 0.249855 0.265877 -0.931028
+vn 0.256172 0.152104 -0.954558
+vn 0.258980 -0.045717 -0.964782
+vn 0.454756 0.419416 -0.785638
+vn 0.460799 0.392285 -0.796075
+vn 0.470504 0.343333 -0.812830
+vn 0.482864 0.266366 -0.834162
+vn 0.495132 0.152379 -0.855312
+vn 0.500504 -0.045808 -0.864498
+vn 0.641804 0.419691 -0.641804
+vn 0.650349 0.392499 -0.650349
+vn 0.664052 0.343577 -0.664052
+vn 0.681509 0.266579 -0.681509
+vn 0.698813 0.152501 -0.698813
+vn 0.706351 -0.045869 -0.706351
+vn 0.785638 0.419416 -0.454756
+vn 0.796075 0.392285 -0.460799
+vn 0.812830 0.343364 -0.470504
+vn 0.834162 0.266366 -0.482864
+vn 0.855312 0.152379 -0.495132
+vn 0.864498 -0.045808 -0.500504
+vn 0.877041 0.418744 -0.235298
+vn 0.888668 0.391644 -0.238441
+vn 0.907315 0.342753 -0.243446
+vn 0.795892 -0.566485 0.213538
+vn 0.712180 -0.701987 0.000000
+vn 0.687399 -0.702445 0.184393
+vn 0.652974 -0.757347 0.000000
+vn 0.630146 -0.757805 0.169012
+vn 0.724021 -0.689749 0.000000
+vn 0.698752 -0.690329 0.187414
+vn 0.886410 -0.462874 0.000000
+vn 0.855861 -0.463454 0.229530
+vn 0.817774 -0.327158 0.473434
+vn 0.712729 -0.567248 0.412549
+vn 0.615345 -0.703146 0.356151
+vn 0.564043 -0.758446 0.326456
+vn 0.625660 -0.690939 0.362102
+vn 0.766625 -0.464125 0.443678
+vn 0.668111 -0.327403 0.668111
+vn 0.582171 -0.567522 0.582171
+vn 0.502579 -0.703421 0.502579
+vn 0.460646 -0.758660 0.460646
+vn 0.510971 -0.691183 0.510971
+vn 0.626209 -0.464370 0.626240
+vn 0.473434 -0.327158 0.817774
+vn 0.412549 -0.567248 0.712729
+vn 0.356151 -0.703146 0.615375
+vn 0.326456 -0.758446 0.564043
+vn 0.362102 -0.690939 0.625660
+vn 0.443678 -0.464125 0.766625
+vn 0.245003 -0.326609 0.912839
+vn 0.213538 -0.566485 0.795892
+vn 0.184393 -0.702445 0.687399
+vn 0.169012 -0.757805 0.630146
+vn 0.187414 -0.690329 0.698752
+vn 0.229530 -0.463454 0.855831
+vn 0.000000 -0.326243 0.945250
+vn 0.000000 -0.565996 0.824396
+vn 0.000000 -0.701987 0.712180
+vn 0.000000 -0.757347 0.652974
+vn 0.000000 -0.689749 0.724021
+vn 0.000000 -0.462905 0.886380
+vn -0.245003 -0.326609 0.912839
+vn -0.213538 -0.566485 0.795892
+vn -0.184393 -0.702445 0.687399
+vn -0.169012 -0.757805 0.630146
+vn -0.187414 -0.690329 0.698752
+vn -0.229530 -0.463454 0.855861
+vn -0.473434 -0.327158 0.817774
+vn -0.412549 -0.567248 0.712729
+vn -0.356151 -0.703146 0.615375
+vn -0.326456 -0.758446 0.564043
+vn -0.362102 -0.690939 0.625660
+vn -0.443678 -0.464125 0.766625
+vn -0.668111 -0.327403 0.668111
+vn -0.582171 -0.567522 0.582171
+vn -0.502579 -0.703421 0.502579
+vn -0.460646 -0.758660 0.460646
+vn -0.510971 -0.691183 0.510971
+vn -0.626209 -0.464370 0.626209
+vn -0.817774 -0.327158 0.473434
+vn -0.712729 -0.567248 0.412549
+vn -0.615375 -0.703146 0.356151
+vn -0.564043 -0.758446 0.326456
+vn -0.625660 -0.690939 0.362102
+vn -0.766625 -0.464125 0.443678
+vn -0.912931 -0.326609 0.244575
+vn -0.687399 -0.702445 0.184393
+vn -0.630146 -0.757805 0.169012
+vn -0.698752 -0.690329 0.187414
+vn -0.855831 -0.463485 0.229530
+vn -0.824396 -0.565996 0.000000
+vn -0.712180 -0.701987 0.000000
+vn -0.652974 -0.757347 0.000000
+vn -0.724021 -0.689749 0.000000
+vn -0.886410 -0.462874 0.000000
+vn -0.795892 -0.566485 -0.213538
+vn -0.687399 -0.702445 -0.184393
+vn -0.630146 -0.757805 -0.169012
+vn -0.698752 -0.690329 -0.187414
+vn -0.855831 -0.463454 -0.229530
+vn -0.817774 -0.327158 -0.473434
+vn -0.712729 -0.567217 -0.412549
+vn -0.615375 -0.703146 -0.356151
+vn -0.564043 -0.758446 -0.326456
+vn -0.625660 -0.690939 -0.362102
+vn -0.766625 -0.464125 -0.443678
+vn -0.668111 -0.327403 -0.668111
+vn -0.582171 -0.567522 -0.582171
+vn -0.502579 -0.703421 -0.502579
+vn -0.460646 -0.758660 -0.460646
+vn -0.510971 -0.691183 -0.510971
+vn -0.626209 -0.464370 -0.626209
+vn -0.473434 -0.327158 -0.817774
+vn -0.412549 -0.567248 -0.712729
+vn -0.356151 -0.703146 -0.615375
+vn -0.326456 -0.758446 -0.564043
+vn -0.362102 -0.690939 -0.625660
+vn -0.443678 -0.464125 -0.766625
+vn -0.245003 -0.326609 -0.912839
+vn -0.213538 -0.566485 -0.795892
+vn -0.184393 -0.702445 -0.687399
+vn -0.169012 -0.757805 -0.630146
+vn -0.187414 -0.690329 -0.698752
+vn -0.229530 -0.463454 -0.855831
+vn 0.000000 -0.326243 -0.945250
+vn 0.000000 -0.565996 -0.824396
+vn 0.000000 -0.701987 -0.712180
+vn 0.000000 -0.757347 -0.652974
+vn 0.000000 -0.689749 -0.724021
+vn 0.000000 -0.462905 -0.886380
+vn 0.245003 -0.326609 -0.912839
+vn 0.213538 -0.566485 -0.795892
+vn 0.184393 -0.702445 -0.687399
+vn 0.169012 -0.757805 -0.630146
+vn 0.187445 -0.690329 -0.698752
+vn 0.229530 -0.463454 -0.855861
+vn 0.473434 -0.327158 -0.817774
+vn 0.412549 -0.567248 -0.712729
+vn 0.356151 -0.703146 -0.615375
+vn 0.326456 -0.758446 -0.564043
+vn 0.362102 -0.690939 -0.625660
+vn 0.443678 -0.464125 -0.766625
+vn 0.668111 -0.327403 -0.668111
+vn 0.582171 -0.567522 -0.582171
+vn 0.502579 -0.703421 -0.502579
+vn 0.460646 -0.758660 -0.460646
+vn 0.510971 -0.691183 -0.510971
+vn 0.626209 -0.464370 -0.626209
+vn 0.817774 -0.327158 -0.473434
+vn 0.712729 -0.567248 -0.412549
+vn 0.615375 -0.703146 -0.356151
+vn 0.564043 -0.758446 -0.326456
+vn 0.625660 -0.690939 -0.362102
+vn 0.766625 -0.464125 -0.443678
+vn 0.912839 -0.326609 -0.244942
+vn 0.795892 -0.566485 -0.213538
+vn 0.687399 -0.702445 -0.184393
+vn 0.630146 -0.757805 -0.169012
+vn 0.698752 -0.690329 -0.187414
+vn 0.855861 -0.463454 -0.229530
+vn 0.025666 -0.999664 0.000000
+vn 0.000000 -1.000000 0.000000
+vn 0.024781 -0.999664 -0.006623
+vn 0.068667 -0.997620 0.000000
+vn 0.066256 -0.997620 -0.017731
+vn 0.157170 -0.987548 0.000000
+vn 0.151677 -0.987579 -0.040620
+vn 0.373150 -0.927763 0.000000
+vn 0.360118 -0.927885 -0.096469
+vn 0.789148 -0.614154 0.000000
+vn 0.762017 -0.614399 -0.204505
+vn 0.022156 -0.999664 -0.012787
+vn 0.059236 -0.997650 -0.034272
+vn 0.135624 -0.987640 -0.078463
+vn 0.322153 -0.928129 -0.186377
+vn 0.682333 -0.615131 -0.394971
+vn 0.018067 -0.999664 -0.018067
+vn 0.048341 -0.997650 -0.048341
+vn 0.110691 -0.987640 -0.110691
+vn 0.262947 -0.928251 -0.262947
+vn 0.557329 -0.615375 -0.557329
+vn 0.012787 -0.999664 -0.022156
+vn 0.034272 -0.997650 -0.059236
+vn 0.078463 -0.987640 -0.135624
+vn 0.186377 -0.928129 -0.322153
+vn 0.394971 -0.615131 -0.682302
+vn 0.006623 -0.999664 -0.024781
+vn 0.017731 -0.997620 -0.066256
+vn 0.040620 -0.987579 -0.151677
+vn 0.096469 -0.927885 -0.360118
+vn 0.204474 -0.614399 -0.762017
+vn 0.000000 -0.999664 -0.025666
+vn 0.000000 -0.997620 -0.068667
+vn 0.000000 -0.987548 -0.157170
+vn 0.000000 -0.927763 -0.373150
+vn 0.000000 -0.614154 -0.789148
+vn -0.006623 -0.999664 -0.024781
+vn -0.017731 -0.997620 -0.066256
+vn -0.040620 -0.987579 -0.151677
+vn -0.096469 -0.927885 -0.360118
+vn -0.204474 -0.614399 -0.762017
+vn -0.012787 -0.999664 -0.022156
+vn -0.034272 -0.997650 -0.059236
+vn -0.078463 -0.987640 -0.135624
+vn -0.186377 -0.928129 -0.322153
+vn -0.394971 -0.615131 -0.682333
+vn -0.018067 -0.999664 -0.018067
+vn -0.048341 -0.997650 -0.048341
+vn -0.110691 -0.987640 -0.110691
+vn -0.262947 -0.928251 -0.262947
+vn -0.557329 -0.615375 -0.557329
+vn -0.022156 -0.999664 -0.012787
+vn -0.059236 -0.997650 -0.034272
+vn -0.135624 -0.987640 -0.078463
+vn -0.322153 -0.928129 -0.186377
+vn -0.682302 -0.615131 -0.394971
+vn -0.024781 -0.999664 -0.006623
+vn -0.066256 -0.997620 -0.017731
+vn -0.151677 -0.987579 -0.040620
+vn -0.360118 -0.927885 -0.096469
+vn -0.762017 -0.614399 -0.204474
+vn -0.025666 -0.999664 0.000000
+vn -0.068667 -0.997620 0.000000
+vn -0.157170 -0.987548 0.000000
+vn -0.373150 -0.927763 0.000000
+vn -0.789148 -0.614154 0.000000
+vn -0.024781 -0.999664 0.006623
+vn -0.066256 -0.997620 0.017731
+vn -0.151677 -0.987579 0.040620
+vn -0.360149 -0.927885 0.096469
+vn -0.762017 -0.614399 0.204474
+vn -0.022156 -0.999664 0.012787
+vn -0.059236 -0.997650 0.034272
+vn -0.135624 -0.987640 0.078463
+vn -0.322153 -0.928129 0.186377
+vn -0.682333 -0.615131 0.394971
+vn -0.018067 -0.999664 0.018067
+vn -0.048341 -0.997650 0.048341
+vn -0.110691 -0.987640 0.110691
+vn -0.262947 -0.928251 0.262947
+vn -0.557329 -0.615375 0.557329
+vn -0.012787 -0.999664 0.022156
+vn -0.034272 -0.997650 0.059236
+vn -0.078463 -0.987640 0.135624
+vn -0.186377 -0.928129 0.322153
+vn -0.394971 -0.615131 0.682302
+vn -0.006623 -0.999664 0.024781
+vn -0.017731 -0.997620 0.066256
+vn -0.040620 -0.987579 0.151677
+vn -0.096469 -0.927885 0.360118
+vn -0.204474 -0.614399 0.762017
+vn 0.000000 -0.999664 0.025666
+vn 0.000000 -0.997620 0.068667
+vn 0.000000 -0.987548 0.157170
+vn 0.000000 -0.927763 0.373150
+vn 0.000000 -0.614154 0.789148
+vn 0.006623 -0.999664 0.024781
+vn 0.017731 -0.997620 0.066256
+vn 0.040620 -0.987579 0.151677
+vn 0.096469 -0.927885 0.360149
+vn 0.204474 -0.614399 0.762017
+vn 0.012787 -0.999664 0.022156
+vn 0.034272 -0.997650 0.059236
+vn 0.078463 -0.987640 0.135624
+vn 0.186377 -0.928129 0.322153
+vn 0.394971 -0.615131 0.682333
+vn 0.018067 -0.999664 0.018067
+vn 0.048341 -0.997650 0.048341
+vn 0.110691 -0.987640 0.110691
+vn 0.262947 -0.928251 0.262947
+vn 0.557329 -0.615375 0.557329
+vn 0.022156 -0.999664 0.012787
+vn 0.059236 -0.997650 0.034272
+vn 0.135624 -0.987640 0.078463
+vn 0.322153 -0.928129 0.186346
+vn 0.682302 -0.615131 0.394971
+vn 0.024781 -0.999664 0.006623
+vn 0.066256 -0.997620 0.017731
+vn 0.151677 -0.987579 0.040620
+vn 0.360118 -0.927885 0.096469
+vn 0.762017 -0.614399 0.204474
+vn 0.464827 -0.373638 -0.802667
+vn 0.655812 -0.373882 -0.655812
+vn 0.000000 -0.372539 0.927976
+vn -0.240699 -0.373028 0.896023
+vn -0.802667 -0.373608 0.464827
+vn 0.896023 -0.372997 -0.240699
+vn -0.927976 -0.372539 0.000000
+vn 0.927976 -0.372539 0.000000
+vn 0.717063 0.696982 0.000000
+vn 0.990387 -0.138310 -0.000061
+vn 0.956694 -0.138737 0.255806
+vn 0.692129 0.697470 0.185583
+vn 0.857326 -0.139317 0.495529
+vn 0.620045 0.697653 0.358837
+vn 0.700125 -0.139531 0.700217
+vn 0.506516 0.697714 0.506546
+vn 0.495468 -0.139286 0.857356
+vn 0.358776 0.697714 0.620014
+vn 0.255867 -0.138737 0.956694
+vn 0.185583 0.697531 0.692068
+vn 0.000061 -0.138310 0.990387
+vn 0.000000 0.696982 0.717063
+vn -0.255806 -0.138737 0.956694
+vn -0.185583 0.697470 0.692129
+vn -0.495529 -0.139317 0.857326
+vn -0.358837 0.697653 0.620045
+vn -0.700217 -0.139531 0.700156
+vn -0.506546 0.697714 0.506516
+vn -0.857356 -0.139286 0.495468
+vn -0.620014 0.697714 0.358776
+vn -0.956694 -0.138737 0.255867
+vn -0.692068 0.697531 0.185583
+vn -0.990387 -0.138310 0.000061
+vn -0.717063 0.696982 0.000000
+vn -0.956694 -0.138737 -0.255806
+vn -0.692129 0.697470 -0.185583
+vn -0.857326 -0.139317 -0.495529
+vn -0.620045 0.697653 -0.358837
+vn -0.700125 -0.139531 -0.700217
+vn -0.506516 0.697714 -0.506546
+vn -0.495468 -0.139286 -0.857356
+vn -0.358776 0.697714 -0.620014
+vn -0.255867 -0.138737 -0.956694
+vn -0.185583 0.697531 -0.692068
+vn -0.000061 -0.138310 -0.990387
+vn 0.000000 0.696982 -0.717063
+vn 0.255806 -0.138737 -0.956694
+vn 0.185583 0.697470 -0.692129
+vn 0.495529 -0.139317 -0.857326
+vn 0.358837 0.697653 -0.620045
+vn 0.700217 -0.139531 -0.700156
+vn 0.506546 0.697714 -0.506516
+vn 0.857356 -0.139286 -0.495468
+vn 0.620014 0.697714 -0.358776
+vn 0.956694 -0.138737 -0.255867
+vn 0.692068 0.697531 -0.185583
+vn 0.292520 0.956236 0.000000
+vn 0.282083 0.956389 0.075686
+vn 0.177953 0.984008 0.000000
+vn 0.171606 0.984069 0.046022
+vn 0.158879 0.987274 0.000000
+vn 0.153264 0.987304 0.041078
+vn 0.217719 0.975982 0.000000
+vn 0.210059 0.976043 0.056276
+vn 0.504715 0.863277 0.000000
+vn 0.487197 0.863460 0.130558
+vn 0.693258 0.720664 0.000000
+vn 0.669057 0.721183 0.179449
+vn 0.252388 0.956511 0.146092
+vn 0.153508 0.984130 0.088839
+vn 0.137059 0.987365 0.079318
+vn 0.187872 0.976135 0.108676
+vn 0.435926 0.863887 0.252205
+vn 0.598956 0.721824 0.346660
+vn 0.206091 0.956572 0.206091
+vn 0.125340 0.984161 0.125340
+vn 0.111911 0.987396 0.111911
+vn 0.153356 0.976196 0.153356
+vn 0.355907 0.864071 0.355907
+vn 0.489151 0.722098 0.489151
+vn 0.146092 0.956511 0.252388
+vn 0.088839 0.984130 0.153508
+vn 0.079318 0.987365 0.137059
+vn 0.108676 0.976135 0.187872
+vn 0.252205 0.863887 0.435926
+vn 0.346660 0.721824 0.598956
+vn 0.075686 0.956389 0.282083
+vn 0.046022 0.984069 0.171606
+vn 0.041078 0.987304 0.153264
+vn 0.056276 0.976043 0.210059
+vn 0.130558 0.863460 0.487197
+vn 0.179449 0.721183 0.669057
+vn 0.000000 0.956236 0.292520
+vn 0.000000 0.984008 0.177953
+vn 0.000000 0.987274 0.158879
+vn 0.000000 0.975982 0.217719
+vn 0.000000 0.863277 0.504715
+vn 0.000000 0.720664 0.693258
+vn -0.075686 0.956389 0.282083
+vn -0.046022 0.984069 0.171606
+vn -0.041078 0.987304 0.153264
+vn -0.056276 0.976043 0.210059
+vn -0.130558 0.863460 0.487197
+vn -0.179449 0.721183 0.669057
+vn -0.146092 0.956511 0.252388
+vn -0.088839 0.984130 0.153508
+vn -0.079318 0.987365 0.137059
+vn -0.108676 0.976135 0.187872
+vn -0.252205 0.863887 0.435926
+vn -0.346660 0.721824 0.598956
+vn -0.206091 0.956572 0.206091
+vn -0.125340 0.984161 0.125340
+vn -0.111911 0.987396 0.111911
+vn -0.153356 0.976196 0.153356
+vn -0.355907 0.864071 0.355907
+vn -0.489151 0.722098 0.489151
+vn -0.252388 0.956511 0.146092
+vn -0.153508 0.984130 0.088839
+vn -0.137059 0.987365 0.079318
+vn -0.187872 0.976135 0.108676
+vn -0.435926 0.863887 0.252205
+vn -0.598956 0.721824 0.346660
+vn -0.282083 0.956389 0.075686
+vn -0.171606 0.984069 0.046022
+vn -0.153264 0.987304 0.041078
+vn -0.210059 0.976043 0.056276
+vn -0.487197 0.863460 0.130558
+vn -0.669057 0.721183 0.179449
+vn -0.292520 0.956236 0.000000
+vn -0.177953 0.984008 0.000000
+vn -0.158879 0.987274 0.000000
+vn -0.217719 0.975982 0.000000
+vn -0.504715 0.863277 0.000000
+vn -0.693258 0.720664 0.000000
+vn -0.282083 0.956389 -0.075686
+vn -0.171606 0.984069 -0.046022
+vn -0.153264 0.987304 -0.041078
+vn -0.210059 0.976043 -0.056276
+vn -0.487197 0.863460 -0.130558
+vn -0.669057 0.721183 -0.179449
+vn -0.252388 0.956511 -0.146092
+vn -0.153508 0.984130 -0.088839
+vn -0.137059 0.987365 -0.079318
+vn -0.187872 0.976135 -0.108676
+vn -0.435926 0.863887 -0.252205
+vn -0.598956 0.721824 -0.346660
+vn -0.206091 0.956572 -0.206091
+vn -0.125340 0.984161 -0.125340
+vn -0.111911 0.987396 -0.111911
+vn -0.153356 0.976196 -0.153356
+vn -0.355907 0.864071 -0.355907
+vn -0.489151 0.722098 -0.489151
+vn -0.146092 0.956511 -0.252388
+vn -0.088839 0.984130 -0.153508
+vn -0.079318 0.987365 -0.137059
+vn -0.108676 0.976135 -0.187872
+vn -0.252205 0.863887 -0.435926
+vn -0.346660 0.721824 -0.598956
+vn -0.075686 0.956389 -0.282083
+vn -0.046022 0.984069 -0.171606
+vn -0.041078 0.987304 -0.153264
+vn -0.056276 0.976043 -0.210059
+vn -0.130558 0.863460 -0.487197
+vn -0.179449 0.721183 -0.669057
+vn 0.000000 0.956236 -0.292520
+vn 0.000000 0.984008 -0.177953
+vn 0.000000 0.987274 -0.158879
+vn 0.000000 0.975982 -0.217719
+vn 0.000000 0.863277 -0.504715
+vn 0.000000 0.720664 -0.693258
+vn 0.075686 0.956389 -0.282083
+vn 0.046022 0.984069 -0.171606
+vn 0.041078 0.987304 -0.153264
+vn 0.056276 0.976043 -0.210059
+vn 0.130558 0.863460 -0.487197
+vn 0.179449 0.721183 -0.669057
+vn 0.146092 0.956511 -0.252388
+vn 0.088839 0.984130 -0.153508
+vn 0.079318 0.987365 -0.137059
+vn 0.108676 0.976135 -0.187872
+vn 0.252205 0.863887 -0.435926
+vn 0.346660 0.721824 -0.598956
+vn 0.206091 0.956572 -0.206091
+vn 0.125340 0.984161 -0.125340
+vn 0.111911 0.987396 -0.111911
+vn 0.153356 0.976196 -0.153356
+vn 0.355907 0.864071 -0.355907
+vn 0.489151 0.722098 -0.489151
+vn 0.252388 0.956511 -0.146092
+vn 0.153508 0.984130 -0.088839
+vn 0.137059 0.987365 -0.079318
+vn 0.187872 0.976135 -0.108676
+vn 0.435926 0.863887 -0.252205
+vn 0.598956 0.721824 -0.346660
+vn 0.282083 0.956389 -0.075686
+vn 0.171606 0.984069 -0.046022
+vn 0.153264 0.987304 -0.041078
+vn 0.210059 0.976043 -0.056276
+vn 0.487197 0.863460 -0.130558
+vn 0.669057 0.721183 -0.179449
+vn 0.363842 0.931455 0.000000
+vn 0.000000 1.000000 0.000000
+vn 0.351451 0.931516 0.093509
+vn 0.968261 0.249916 0.000000
+vn 0.935423 0.249763 0.250130
+vn 0.842860 -0.538102 0.000000
+vn 0.813959 -0.538713 0.217292
+vn 0.786767 -0.617206 -0.000031
+vn 0.759514 -0.618000 0.202857
+vn 0.314432 0.931791 0.181280
+vn 0.838404 0.249855 0.484359
+vn 0.729026 -0.539720 0.420911
+vn 0.680013 -0.619068 0.392743
+vn 0.256386 0.931913 0.256417
+vn 0.684652 0.249886 0.684652
+vn 0.595050 -0.540147 0.595050
+vn 0.555010 -0.619526 0.555040
+vn 0.181280 0.931791 0.314432
+vn 0.484359 0.249825 0.838404
+vn 0.420881 -0.539720 0.729026
+vn 0.392712 -0.619098 0.680013
+vn 0.093509 0.931516 0.351451
+vn 0.250160 0.249763 0.935423
+vn 0.217322 -0.538713 0.813959
+vn 0.202887 -0.618030 0.759484
+vn 0.000000 0.931455 0.363842
+vn 0.000000 0.249916 0.968261
+vn 0.000000 -0.538102 0.842860
+vn 0.000031 -0.617206 0.786767
+vn -0.093509 0.931516 0.351451
+vn -0.250130 0.249763 0.935423
+vn -0.217292 -0.538682 0.813959
+vn -0.202857 -0.618000 0.759514
+vn -0.181280 0.931791 0.314432
+vn -0.484359 0.249855 0.838404
+vn -0.420911 -0.539720 0.729026
+vn -0.392743 -0.619068 0.680013
+vn -0.256417 0.931913 0.256386
+vn -0.684652 0.249886 0.684652
+vn -0.595050 -0.540147 0.595050
+vn -0.555040 -0.619526 0.555010
+vn -0.314432 0.931791 0.181280
+vn -0.838404 0.249825 0.484359
+vn -0.729026 -0.539720 0.420881
+vn -0.680013 -0.619098 0.392712
+vn -0.351451 0.931516 0.093509
+vn -0.935423 0.249763 0.250160
+vn -0.813959 -0.538713 0.217292
+vn -0.759484 -0.618030 0.202887
+vn -0.363842 0.931455 0.000000
+vn -0.968261 0.249916 0.000000
+vn -0.842860 -0.538102 0.000000
+vn -0.786767 -0.617206 0.000031
+vn -0.351451 0.931516 -0.093509
+vn -0.935423 0.249763 -0.250130
+vn -0.813959 -0.538713 -0.217292
+vn -0.759514 -0.618000 -0.202857
+vn -0.314432 0.931791 -0.181280
+vn -0.838404 0.249855 -0.484359
+vn -0.729026 -0.539720 -0.420911
+vn -0.680013 -0.619068 -0.392743
+vn -0.256386 0.931913 -0.256417
+vn -0.684652 0.249886 -0.684652
+vn -0.595050 -0.540147 -0.595050
+vn -0.555010 -0.619526 -0.555040
+vn -0.181280 0.931791 -0.314432
+vn -0.484359 0.249825 -0.838404
+vn -0.420881 -0.539720 -0.729026
+vn -0.392712 -0.619098 -0.680013
+vn -0.093509 0.931516 -0.351451
+vn -0.250160 0.249763 -0.935423
+vn -0.217322 -0.538713 -0.813959
+vn -0.202887 -0.618030 -0.759484
+vn 0.000000 0.931455 -0.363842
+vn 0.000000 0.249916 -0.968261
+vn 0.000000 -0.538102 -0.842860
+vn -0.000031 -0.617206 -0.786767
+vn 0.093509 0.931516 -0.351451
+vn 0.250130 0.249763 -0.935423
+vn 0.217292 -0.538682 -0.813959
+vn 0.202857 -0.618000 -0.759514
+vn 0.181280 0.931791 -0.314432
+vn 0.484359 0.249855 -0.838404
+vn 0.420911 -0.539720 -0.729026
+vn 0.392743 -0.619068 -0.680013
+vn 0.256417 0.931913 -0.256386
+vn 0.684652 0.249886 -0.684652
+vn 0.595050 -0.540147 -0.595050
+vn 0.555040 -0.619526 -0.555010
+vn 0.314432 0.931791 -0.181280
+vn 0.838404 0.249825 -0.484359
+vn 0.729026 -0.539720 -0.420881
+vn 0.680013 -0.619098 -0.392712
+vn 0.351451 0.931516 -0.093509
+vn 0.935423 0.249763 -0.250160
+vn 0.813959 -0.538713 -0.217292
+vn 0.759484 -0.618030 -0.202887
+vn -0.354198 0.930296 -0.095187
+vn 0.095187 0.930296 0.354198
+vn 0.354198 0.930296 0.095187
+vn 0.183721 0.930387 0.317179
+vn -0.183721 0.930387 -0.317179
+vn -0.367443 0.930021 0.000000
+vn -0.183721 0.930387 0.317179
+vn 0.367412 0.930021 0.000000
+vn -0.317179 0.930387 0.183721
+vn -0.095187 0.930296 -0.354198
+vn 0.000000 0.930021 -0.367443
+vn -0.354198 0.930296 0.095187
+vn 0.095187 0.930296 -0.354198
+vn 0.000000 0.930021 0.367443
+vn -0.317179 0.930387 -0.183721
+vn 0.317179 0.930387 -0.183721
+vn -0.095187 0.930296 0.354198
+vn 0.317179 0.930387 0.183721
+vn 0.354198 0.930296 -0.095187
+vn 0.183721 0.930387 -0.317179
+vn -0.034730 0.999390 0.000000
+vn 0.033479 0.999390 -0.009003
+vn 0.034730 0.999390 0.000000
+vn 0.009003 0.999390 -0.033479
+vn 0.000000 0.999390 -0.034730
+vn -0.259163 0.930387 0.259163
+vn -0.017335 0.999390 0.029939
+vn 0.947539 0.295083 0.122654
+vn -0.004486 0.999969 0.000000
+vn -0.004151 0.950468 0.310739
+vn -0.003021 0.719291 0.694662
+vn -0.998688 0.050722 0.000000
+vn -0.003143 0.719321 -0.694632
+vn 0.970214 0.213324 -0.114505
+vn -0.136235 0.879482 -0.455947
+vn 0.949858 0.312662 0.000000
+vn 0.055757 -0.017579 -0.998260
+vn 0.201300 -0.540880 -0.816645
+vn 0.974456 -0.186071 -0.125645
+vn 0.988098 -0.096286 0.119938
+vn 0.974456 -0.186041 0.125614
+vn -0.879574 -0.475723 0.000000
+vn -0.873775 -0.472610 -0.114475
+vn 0.988067 -0.096286 -0.119938
+vn 0.295480 -0.855464 -0.425214
+vn -0.976196 -0.174993 -0.127903
+vn -0.971007 -0.205725 0.121677
+vn -0.976196 -0.174963 0.127903
+vn -0.976196 -0.174993 0.127903
+vn 0.896054 -0.372997 0.240699
+vn -0.802667 -0.373638 -0.464827
+vn -0.655812 -0.373852 -0.655812
+vn -0.240699 -0.373028 -0.896023
+vn -0.464827 -0.373638 -0.802667
+vn -0.896023 -0.373028 0.240699
+vn 0.000000 -0.372539 -0.927976
+vn -0.655812 -0.373852 0.655812
+vn 0.240699 -0.373028 0.896023
+vn -0.896023 -0.373028 -0.240699
+vn 0.802667 -0.373638 -0.464827
+vn 0.655812 -0.373882 0.655812
+vn 0.464827 -0.373638 0.802667
+vn -0.464827 -0.373638 0.802667
+vn 0.240699 -0.373028 -0.896023
+vn 0.802667 -0.373638 0.464827
+vn -0.033479 0.999390 0.009003
+vn -0.259163 0.930387 -0.259163
+vn -0.024445 0.999390 -0.024445
+vn -0.029939 0.999390 -0.017335
+vn 0.259163 0.930387 -0.259163
+vn 0.024445 0.999390 -0.024445
+vn 0.017335 0.999390 -0.029939
+vn 0.029939 0.999390 -0.017335
+vn 0.033479 0.999390 0.009003
+vn -0.009003 0.999390 0.033479
+vn 0.000000 0.999390 0.034730
+vn -0.017335 0.999390 -0.029939
+vn 0.259163 0.930387 0.259163
+vn 0.029939 0.999390 0.017335
+vn -0.009003 0.999390 -0.033479
+vn 0.024445 0.999390 0.024445
+vn 0.017335 0.999390 0.029939
+vn 0.009003 0.999390 0.033479
+vn -0.033479 0.999390 -0.009003
+vn -0.024445 0.999390 0.024445
+vn -0.029939 0.999390 0.017335
+vn 0.055757 -0.017579 0.998260
+vn 0.294198 -0.855403 0.426252
+vn 0.201300 -0.540880 0.816614
+s 1
+f 34//1 1243//2 593//3
+f 52//4 27//5 40//6
+f 52//4 40//6 65//7
+f 77//8 52//4 65//7
+f 77//8 65//7 84//9
+f 107//10 77//8 84//9
+f 107//10 84//9 85//11
+f 115//12 107//10 85//11
+f 115//12 85//11 99//13
+f 129//14 115//12 99//13
+f 129//14 99//13 128//15
+f 1252//16 36//17 40//6
+f 65//7 40//6 64//18
+f 65//7 64//18 58//19
+f 84//9 65//7 58//19
+f 84//9 58//19 59//20
+f 85//11 84//9 59//20
+f 85//11 59//20 70//21
+f 99//13 85//11 70//21
+f 99//13 70//21 98//22
+f 128//15 99//13 98//22
+f 128//15 98//22 114//23
+f 1244//24 33//25 64//18
+f 58//19 64//18 33//25
+f 58//19 33//25 35//26
+f 59//20 58//19 35//26
+f 59//20 35//26 45//27
+f 70//21 59//20 45//27
+f 70//21 45//27 69//28
+f 98//22 70//21 69//28
+f 98//22 69//28 83//29
+f 114//23 98//22 83//29
+f 114//23 83//29 113//30
+f 553//31 1//32 566//33
+f 35//26 33//25 20//34
+f 35//26 20//34 24//35
+f 45//27 35//26 24//35
+f 45//27 24//35 44//36
+f 69//28 45//27 44//36
+f 69//28 44//36 57//37
+f 83//29 69//28 57//37
+f 83//29 57//37 82//38
+f 113//30 83//29 82//38
+f 113//30 82//38 112//39
+f 566//33 1283//40 9//41
+f 24//35 20//34 18//42
+f 24//35 18//42 23//43
+f 44//36 24//35 23//43
+f 44//36 23//43 32//44
+f 57//37 44//36 32//44
+f 57//37 32//44 56//45
+f 82//38 57//37 56//45
+f 82//38 56//45 81//46
+f 112//39 82//38 81//46
+f 112//39 81//46 111//47
+f 4//48 8//49 1250//50
+f 23//43 18//42 8//49
+f 23//43 8//49 17//51
+f 32//44 23//43 17//51
+f 32//44 17//51 31//52
+f 56//45 32//44 31//52
+f 56//45 31//52 55//53
+f 81//46 56//45 55//53
+f 81//46 55//53 80//54
+f 111//47 81//46 80//54
+f 111//47 80//54 110//55
+f 565//56 592//57 1233//58
+f 17//51 8//49 4//48
+f 17//51 4//48 16//59
+f 31//52 17//51 16//59
+f 31//52 16//59 30//60
+f 55//53 31//52 30//60
+f 55//53 30//60 54//61
+f 80//54 55//53 54//61
+f 80//54 54//61 79//62
+f 110//55 80//54 79//62
+f 110//55 79//62 109//63
+f 1//32 565//56 2//64
+f 6//65 2//64 565//56
+f 16//59 4//48 7//66
+f 16//59 7//66 22//67
+f 30//60 16//59 22//67
+f 30//60 22//67 43//68
+f 54//61 30//60 43//68
+f 54//61 43//68 68//69
+f 79//62 54//61 68//69
+f 79//62 68//69 97//70
+f 109//63 79//62 97//70
+f 109//63 97//70 127//71
+f 565//56 1233//58 1249//58
+f 13//72 14//73 5//74
+f 22//67 7//66 15//75
+f 22//67 15//75 29//76
+f 43//68 22//67 29//76
+f 43//68 29//76 42//77
+f 68//69 43//68 42//77
+f 68//69 42//77 67//78
+f 97//70 68//69 67//78
+f 97//70 67//78 96//79
+f 127//71 97//70 96//79
+f 127//71 96//79 126//80
+f 11//81 15//75 1240//82
+f 1289//83 34//1 593//3
+f 29//76 15//75 14//73
+f 29//76 14//73 21//84
+f 42//77 29//76 21//84
+f 42//77 21//84 41//85
+f 67//78 42//77 41//85
+f 67//78 41//85 66//86
+f 96//79 67//78 66//86
+f 96//79 66//86 95//87
+f 126//80 96//79 95//87
+f 126//80 95//87 125//88
+f 636//89 1243//2 26//2
+f 636//89 26//2 592//57
+f 21//84 14//73 13//72
+f 21//84 13//72 28//90
+f 41//85 21//84 28//90
+f 41//85 28//90 53//91
+f 66//86 41//85 53//91
+f 66//86 53//91 78//92
+f 95//87 66//86 78//92
+f 95//87 78//92 108//93
+f 125//88 95//87 108//93
+f 125//88 108//93 136//94
+f 12//95 13//72 5//74
+f 28//90 13//72 27//5
+f 28//90 27//5 52//4
+f 53//91 28//90 52//4
+f 53//91 52//4 77//8
+f 78//92 53//91 77//8
+f 78//92 77//8 107//10
+f 108//93 78//92 107//10
+f 108//93 107//10 115//12
+f 136//94 108//93 115//12
+f 136//94 115//12 129//14
+f 148//96 129//14 128//15
+f 148//96 128//15 143//97
+f 121//98 148//96 143//97
+f 121//98 143//97 122//99
+f 92//100 121//98 122//99
+f 92//100 122//99 93//101
+f 62//102 92//100 93//101
+f 62//102 93//101 63//103
+f 39//104 62//102 63//103
+f 39//104 63//103 51//105
+f 10//106 39//104 51//105
+f 10//106 51//105 49//107
+f 143//97 128//15 114//23
+f 143//97 114//23 142//108
+f 122//99 143//97 142//108
+f 122//99 142//108 123//109
+f 93//101 122//99 123//109
+f 93//101 123//109 94//110
+f 63//103 93//101 94//110
+f 63//103 94//110 76//111
+f 51//105 63//103 76//111
+f 51//105 76//111 75//112
+f 49//107 51//105 75//112
+f 49//107 75//112 61//113
+f 142//108 114//23 113//30
+f 142//108 113//30 141//114
+f 123//109 142//108 141//114
+f 123//109 141//114 124//115
+f 94//110 123//109 124//115
+f 94//110 124//115 106//116
+f 76//111 94//110 106//116
+f 76//111 106//116 105//117
+f 75//112 76//111 105//117
+f 75//112 105//117 91//118
+f 61//113 75//112 91//118
+f 61//113 91//118 90//119
+f 141//114 113//30 112//39
+f 141//114 112//39 140//120
+f 124//115 141//114 140//120
+f 124//115 140//120 135//121
+f 106//116 124//115 135//121
+f 106//116 135//121 134//122
+f 105//117 106//116 134//122
+f 105//117 134//122 120//123
+f 91//118 105//117 120//123
+f 91//118 120//123 119//124
+f 90//119 91//118 119//124
+f 140//120 112//39 111//47
+f 140//120 111//47 139//125
+f 135//121 140//120 139//125
+f 135//121 139//125 155//126
+f 134//122 135//121 155//126
+f 134//122 155//126 147//127
+f 120//123 134//122 147//127
+f 120//123 147//127 144//128
+f 119//124 120//123 144//128
+f 119//124 144//128 116//129
+f 26//2 1234//130 592//57
+f 139//125 111//47 110//55
+f 139//125 110//55 138//131
+f 155//126 139//125 138//131
+f 155//126 138//131 156//132
+f 147//127 155//126 156//132
+f 147//127 156//132 145//133
+f 144//128 147//127 145//133
+f 144//128 145//133 117//134
+f 116//129 144//128 117//134
+f 116//129 117//134 88//135
+f 27//5 13//72 12//95
+f 1242//136 88//135 71//137
+f 138//131 110//55 109//63
+f 138//131 109//63 137//138
+f 156//132 138//131 137//138
+f 156//132 137//138 154//139
+f 145//133 156//132 154//139
+f 145//133 154//139 131//140
+f 117//134 145//133 131//140
+f 117//134 131//140 101//141
+f 88//135 117//134 101//141
+f 88//135 101//141 86//142
+f 60//143 74//144 1248//145
+f 137//138 109//63 127//71
+f 137//138 127//71 151//146
+f 154//139 137//138 151//146
+f 154//139 151//146 153//147
+f 131//140 154//139 153//147
+f 131//140 153//147 130//148
+f 101//141 131//140 130//148
+f 101//141 130//148 100//149
+f 86//142 101//141 100//149
+f 86//142 100//149 74//144
+f 47//150 1235//151 631//152
+f 74//144 48//153 1248//145
+f 151//146 127//71 126//80
+f 151//146 126//80 150//154
+f 153//147 151//146 150//154
+f 153//147 150//154 146//155
+f 130//148 153//147 146//155
+f 130//148 146//155 118//156
+f 100//149 130//148 118//156
+f 100//149 118//156 89//157
+f 74//144 100//149 89//157
+f 74//144 89//157 48//153
+f 1247//158 87//159 715//160
+f 104//161 116//129 1242//136
+f 150//154 126//80 125//88
+f 150//154 125//88 149//162
+f 146//155 150//154 149//162
+f 146//155 149//162 132//163
+f 118//156 146//155 132//163
+f 118//156 132//163 102//164
+f 89//157 118//156 102//164
+f 89//157 102//164 72//165
+f 48//153 89//157 72//165
+f 48//153 72//165 46//166
+f 37//167 48//153 46//166
+f 37//167 46//166 38//168
+f 149//162 125//88 136//94
+f 149//162 136//94 152//169
+f 132//163 149//162 152//169
+f 132//163 152//169 133//170
+f 102//164 132//163 133//170
+f 102//164 133//170 103//171
+f 72//165 102//164 103//171
+f 72//165 103//171 73//172
+f 46//166 72//165 73//172
+f 46//166 73//172 50//173
+f 38//168 46//166 50//173
+f 38//168 50//173 25//174
+f 152//169 136//94 129//14
+f 152//169 129//14 148//96
+f 133//170 152//169 148//96
+f 133//170 148//96 121//98
+f 103//171 133//170 121//98
+f 103//171 121//98 92//100
+f 73//172 103//171 92//100
+f 73//172 92//100 62//102
+f 50//173 73//172 62//102
+f 50//173 62//102 39//104
+f 25//174 50//173 39//104
+f 39//104 10//106 25//174
+f 539//175 509//176 159//177
+f 160//178 158//179 157//180
+f 166//181 160//178 163//182
+f 166//181 163//182 173//183
+f 177//184 166//181 173//183
+f 177//184 173//183 183//185
+f 189//186 177//184 183//185
+f 189//186 183//185 197//187
+f 205//188 189//186 197//187
+f 205//188 197//187 216//189
+f 225//190 205//188 216//189
+f 225//190 216//189 235//191
+f 173//183 163//182 172//192
+f 173//183 172//192 184//193
+f 183//185 173//183 184//193
+f 183//185 184//193 198//194
+f 197//187 183//185 198//194
+f 197//187 198//194 217//195
+f 216//189 197//187 217//195
+f 216//189 217//195 236//196
+f 235//191 216//189 236//196
+f 235//191 236//196 253//197
+f 182//198 172//192 1254//199
+f 1255//200 167//201 595//202
+f 184//193 172//192 182//198
+f 184//193 182//198 196//203
+f 198//194 184//193 196//203
+f 198//194 196//203 214//204
+f 217//195 198//194 214//204
+f 217//195 214//204 232//205
+f 236//196 217//195 232//205
+f 236//196 232//205 250//206
+f 253//197 236//196 250//206
+f 253//197 250//206 267//207
+f 196//203 182//198 199//208
+f 196//203 199//208 215//209
+f 214//204 196//203 215//209
+f 214//204 215//209 233//210
+f 232//205 214//204 233//210
+f 232//205 233//210 251//211
+f 250//206 232//205 251//211
+f 250//206 251//211 257//212
+f 267//207 250//206 257//212
+f 267//207 257//212 256//213
+f 215//209 199//208 218//214
+f 215//209 218//214 234//215
+f 233//210 215//209 234//215
+f 233//210 234//215 241//216
+f 251//211 233//210 241//216
+f 251//211 241//216 240//217
+f 257//212 251//211 240//217
+f 257//212 240//217 239//218
+f 256//213 257//212 239//218
+f 256//213 239//218 238//219
+f 234//215 218//214 219//220
+f 234//215 219//220 224//221
+f 241//216 234//215 224//221
+f 241//216 224//221 223//222
+f 240//217 241//216 223//222
+f 240//217 223//222 222//223
+f 239//218 240//217 222//223
+f 239//218 222//223 221//224
+f 238//219 239//218 221//224
+f 238//219 221//224 220//225
+f 219//220 218//214 1285//226
+f 648//227 691//228 207//229
+f 224//221 219//220 208//230
+f 224//221 208//230 204//231
+f 223//222 224//221 204//231
+f 223//222 204//231 203//232
+f 222//223 223//222 203//232
+f 222//223 203//232 202//233
+f 221//224 222//223 202//233
+f 221//224 202//233 201//234
+f 220//225 221//224 201//234
+f 220//225 201//234 212//235
+f 204//231 208//230 191//236
+f 204//231 191//236 188//237
+f 203//232 204//231 188//237
+f 203//232 188//237 187//238
+f 202//233 203//232 187//238
+f 202//233 187//238 186//239
+f 201//234 202//233 186//239
+f 201//234 186//239 194//240
+f 212//235 201//234 194//240
+f 212//235 194//240 211//241
+f 188//237 191//236 174//242
+f 188//237 174//242 176//243
+f 187//238 188//237 176//243
+f 187//238 176//243 175//244
+f 186//239 187//238 175//244
+f 186//239 175//244 180//245
+f 194//240 186//239 180//245
+f 194//240 180//245 193//246
+f 211//241 194//240 193//246
+f 211//241 193//246 210//247
+f 613//248 585//249 1258//250
+f 176//243 174//242 168//251
+f 176//243 168//251 165//252
+f 175//244 176//243 165//252
+f 175//244 165//252 170//253
+f 180//245 175//244 170//253
+f 180//245 170//253 179//254
+f 193//246 180//245 179//254
+f 193//246 179//254 192//255
+f 210//247 193//246 192//255
+f 210//247 192//255 209//256
+f 168//251 174//242 164//257
+f 165//252 168//251 161//258
+f 165//252 161//258 162//259
+f 170//253 165//252 162//259
+f 170//253 162//259 171//260
+f 179//254 170//253 171//260
+f 179//254 171//260 181//261
+f 192//255 179//254 181//261
+f 192//255 181//261 195//262
+f 209//256 192//255 195//262
+f 209//256 195//262 213//263
+f 160//178 161//258 158//179
+f 161//258 160//178 162//259
+f 162//259 160//178 166//181
+f 171//260 162//259 166//181
+f 171//260 166//181 177//184
+f 181//261 171//260 177//184
+f 181//261 177//184 189//186
+f 195//262 181//261 189//186
+f 195//262 189//186 205//188
+f 213//263 195//262 205//188
+f 213//263 205//188 225//190
+f 242//264 225//190 235//191
+f 242//264 235//191 252//265
+f 258//266 242//264 252//265
+f 258//266 252//265 268//267
+f 273//268 258//266 268//267
+f 273//268 268//267 283//269
+f 287//270 273//268 283//269
+f 287//270 283//269 298//271
+f 300//272 287//270 298//271
+f 300//272 298//271 299//273
+f 312//274 300//272 299//273
+f 312//274 299//273 310//275
+f 252//265 235//191 253//197
+f 252//265 253//197 269//276
+f 268//267 252//265 269//276
+f 268//267 269//276 284//277
+f 283//269 268//267 284//277
+f 283//269 284//277 286//278
+f 298//271 283//269 286//278
+f 298//271 286//278 285//279
+f 299//273 298//271 285//279
+f 299//273 285//279 296//280
+f 310//275 299//273 296//280
+f 310//275 296//280 309//281
+f 269//276 253//197 267//207
+f 269//276 267//207 272//282
+f 284//277 269//276 272//282
+f 284//277 272//282 271//283
+f 286//278 284//277 271//283
+f 286//278 271//283 270//284
+f 285//279 286//278 270//284
+f 285//279 270//284 281//285
+f 296//280 285//279 281//285
+f 296//280 281//285 295//286
+f 309//281 296//280 295//286
+f 309//281 295//286 308//287
+f 272//282 267//207 256//213
+f 272//282 256//213 255//288
+f 271//283 272//282 255//288
+f 271//283 255//288 254//289
+f 270//284 271//283 254//289
+f 270//284 254//289 265//290
+f 281//285 270//284 265//290
+f 281//285 265//290 280//291
+f 295//286 281//285 280//291
+f 295//286 280//291 294//292
+f 308//287 295//286 294//292
+f 308//287 294//292 307//293
+f 255//288 256//213 238//219
+f 255//288 238//219 237//294
+f 254//289 255//288 237//294
+f 254//289 237//294 248//295
+f 265//290 254//289 248//295
+f 265//290 248//295 264//296
+f 280//291 265//290 264//296
+f 280//291 264//296 279//297
+f 294//292 280//291 279//297
+f 294//292 279//297 293//298
+f 307//293 294//292 293//298
+f 307//293 293//298 306//299
+f 237//294 238//219 220//225
+f 237//294 220//225 230//300
+f 248//295 237//294 230//300
+f 248//295 230//300 247//301
+f 264//296 248//295 247//301
+f 264//296 247//301 263//302
+f 279//297 264//296 263//302
+f 279//297 263//302 278//303
+f 293//298 279//297 278//303
+f 293//298 278//303 292//304
+f 306//299 293//298 292//304
+f 306//299 292//304 305//305
+f 230//300 220//225 212//235
+f 230//300 212//235 229//306
+f 247//301 230//300 229//306
+f 247//301 229//306 246//307
+f 263//302 247//301 246//307
+f 263//302 246//307 262//308
+f 278//303 263//302 262//308
+f 278//303 262//308 277//309
+f 292//304 278//303 277//309
+f 292//304 277//309 291//310
+f 305//305 292//304 291//310
+f 305//305 291//310 304//311
+f 229//306 212//235 211//241
+f 229//306 211//241 228//312
+f 246//307 229//306 228//312
+f 246//307 228//312 245//313
+f 262//308 246//307 245//313
+f 262//308 245//313 261//314
+f 277//309 262//308 261//314
+f 277//309 261//314 276//315
+f 291//310 277//309 276//315
+f 291//310 276//315 290//316
+f 304//311 291//310 290//316
+f 304//311 290//316 303//317
+f 228//312 211//241 210//247
+f 228//312 210//247 227//318
+f 245//313 228//312 227//318
+f 245//313 227//318 244//319
+f 261//314 245//313 244//319
+f 261//314 244//319 260//320
+f 276//315 261//314 260//320
+f 276//315 260//320 275//321
+f 290//316 276//315 275//321
+f 290//316 275//321 289//322
+f 303//317 290//316 289//322
+f 303//317 289//322 302//323
+f 227//318 210//247 209//256
+f 227//318 209//256 226//324
+f 244//319 227//318 226//324
+f 244//319 226//324 243//325
+f 260//320 244//319 243//325
+f 260//320 243//325 259//326
+f 275//321 260//320 259//326
+f 275//321 259//326 274//327
+f 289//322 275//321 274//327
+f 289//322 274//327 288//328
+f 302//323 289//322 288//328
+f 302//323 288//328 301//329
+f 226//324 209//256 213//263
+f 226//324 213//263 231//330
+f 243//325 226//324 231//330
+f 243//325 231//330 249//331
+f 259//326 243//325 249//331
+f 259//326 249//331 266//332
+f 274//327 259//326 266//332
+f 274//327 266//332 282//333
+f 288//328 274//327 282//333
+f 288//328 282//333 297//334
+f 301//329 288//328 297//334
+f 301//329 297//334 311//335
+f 231//330 213//263 225//190
+f 231//330 225//190 242//264
+f 249//331 231//330 242//264
+f 249//331 242//264 258//266
+f 266//332 249//331 258//266
+f 266//332 258//266 273//268
+f 282//333 266//332 273//268
+f 282//333 273//268 287//270
+f 297//334 282//333 287//270
+f 297//334 287//270 300//272
+f 311//335 297//334 300//272
+f 311//335 300//272 312//274
+f 312//274 310//275 323//336
+f 302//323 315//337 316//338
+f 304//311 317//339 318//340
+f 303//317 316//338 317//339
+f 309//281 308//287 321//341
+f 307//293 306//299 319//342
+f 311//335 313//343 314//344
+f 301//329 314//344 315//337
+f 312//274 324//345 313//343
+f 306//299 305//305 318//340
+f 308//287 307//293 320//346
+f 310//275 309//281 322//347
+f 322//347 321//341 325//348
+f 314//344 313//343 325//348
+f 323//336 322//347 325//348
+f 319//342 318//340 325//348
+f 315//337 314//344 325//348
+f 321//341 320//346 325//348
+f 324//345 323//336 325//348
+f 320//346 319//342 325//348
+f 317//339 316//338 325//348
+f 316//338 315//337 325//348
+f 313//343 324//345 325//348
+f 318//340 317//339 325//348
+f 326//349 327//350 328//351
+f 326//349 328//351 329//352
+f 333//353 326//349 329//352
+f 333//353 329//352 338//354
+f 347//355 333//353 338//354
+f 347//355 338//354 354//356
+f 364//357 347//355 354//356
+f 364//357 354//356 374//358
+f 386//359 364//357 374//358
+f 386//359 374//358 398//360
+f 412//361 386//359 398//360
+f 412//361 398//360 429//362
+f 329//352 328//351 334//363
+f 329//352 334//363 339//364
+f 338//354 329//352 339//364
+f 338//354 339//364 355//365
+f 354//356 338//354 355//365
+f 354//356 355//365 375//366
+f 374//358 354//356 375//366
+f 374//358 375//366 399//367
+f 398//360 374//358 399//367
+f 398//360 399//367 430//368
+f 429//362 398//360 430//368
+f 429//362 430//368 464//369
+f 339//364 334//363 342//370
+f 339//364 342//370 356//371
+f 355//365 339//364 356//371
+f 355//365 356//371 376//372
+f 375//366 355//365 376//372
+f 375//366 376//372 400//373
+f 399//367 375//366 400//373
+f 399//367 400//373 431//374
+f 430//368 399//367 431//374
+f 430//368 431//374 465//375
+f 464//369 430//368 465//375
+f 464//369 465//375 497//376
+f 356//371 342//370 351//377
+f 356//371 351//377 370//378
+f 376//372 356//371 370//378
+f 376//372 370//378 393//379
+f 400//373 376//372 393//379
+f 400//373 393//379 422//380
+f 431//374 400//373 422//380
+f 431//374 422//380 457//381
+f 465//375 431//374 457//381
+f 465//375 457//381 490//382
+f 497//376 465//375 490//382
+f 497//376 490//382 521//383
+f 370//378 351//377 361//384
+f 370//378 361//384 382//385
+f 393//379 370//378 382//385
+f 393//379 382//385 408//386
+f 422//380 393//379 408//386
+f 422//380 408//386 439//387
+f 457//381 422//380 439//387
+f 457//381 439//387 473//388
+f 490//382 457//381 473//388
+f 490//382 473//388 504//389
+f 521//383 490//382 504//389
+f 521//383 504//389 535//390
+f 382//385 361//384 369//391
+f 382//385 369//391 392//392
+f 408//386 382//385 392//392
+f 408//386 392//392 421//393
+f 439//387 408//386 421//393
+f 439//387 421//393 456//394
+f 473//388 439//387 456//394
+f 473//388 456//394 489//395
+f 504//389 473//388 489//395
+f 504//389 489//395 520//396
+f 535//390 504//389 520//396
+f 535//390 520//396 550//397
+f 392//392 369//391 380//398
+f 392//392 380//398 407//399
+f 421//393 392//392 407//399
+f 421//393 407//399 438//400
+f 456//394 421//393 438//400
+f 456//394 438//400 472//401
+f 489//395 456//394 472//401
+f 489//395 472//401 503//402
+f 520//396 489//395 503//402
+f 520//396 503//402 534//403
+f 550//397 520//396 534//403
+f 550//397 534//403 563//404
+f 407//399 380//398 391//405
+f 407//399 391//405 420//406
+f 438//400 407//399 420//406
+f 438//400 420//406 455//407
+f 472//401 438//400 455//407
+f 472//401 455//407 488//408
+f 503//402 472//401 488//408
+f 503//402 488//408 519//409
+f 534//403 503//402 519//409
+f 534//403 519//409 549//410
+f 563//404 534//403 549//410
+f 563//404 549//410 578//411
+f 420//406 391//405 404//412
+f 420//406 404//412 437//413
+f 455//407 420//406 437//413
+f 455//407 437//413 471//414
+f 488//408 455//407 471//414
+f 488//408 471//414 502//415
+f 519//409 488//408 502//415
+f 519//409 502//415 533//416
+f 549//410 519//409 533//416
+f 549//410 533//416 562//417
+f 578//411 549//410 562//417
+f 578//411 562//417 589//418
+f 437//413 404//412 419//419
+f 437//413 419//419 454//420
+f 471//414 437//413 454//420
+f 471//414 454//420 487//421
+f 502//415 471//414 487//421
+f 502//415 487//421 518//422
+f 533//416 502//415 518//422
+f 533//416 518//422 548//423
+f 562//417 533//416 548//423
+f 562//417 548//423 577//424
+f 589//418 562//417 577//424
+f 589//418 577//424 594//425
+f 454//420 419//419 436//426
+f 454//420 436//426 470//427
+f 487//421 454//420 470//427
+f 487//421 470//427 501//428
+f 518//422 487//421 501//428
+f 518//422 501//428 532//429
+f 548//423 518//422 532//429
+f 548//423 532//429 555//430
+f 577//424 548//423 555//430
+f 577//424 555//430 567//431
+f 594//425 577//424 567//431
+f 594//425 567//431 582//432
+f 470//427 436//426 453//433
+f 470//427 453//433 486//434
+f 501//428 470//427 486//434
+f 501//428 486//434 508//435
+f 532//429 501//428 508//435
+f 532//429 508//435 525//436
+f 555//430 532//429 525//436
+f 555//430 525//436 538//437
+f 567//431 555//430 538//437
+f 567//431 538//437 554//438
+f 582//432 567//431 554//438
+f 582//432 554//438 566//33
+f 486//434 453//433 462//439
+f 486//434 462//439 477//440
+f 508//435 486//434 477//440
+f 508//435 477//440 494//441
+f 525//436 508//435 494//441
+f 525//436 494//441 507//442
+f 538//437 525//436 507//442
+f 538//437 507//442 524//443
+f 554//438 538//437 524//443
+f 554//438 524//443 537//444
+f 566//33 554//438 537//444
+f 566//33 537//444 553//31
+f 477//440 462//439 443//445
+f 477//440 443//445 461//446
+f 494//441 477//440 461//446
+f 494//441 461//446 476//447
+f 507//442 494//441 476//447
+f 507//442 476//447 493//448
+f 524//443 507//442 493//448
+f 524//443 493//448 506//449
+f 537//444 524//443 506//449
+f 537//444 506//449 523//450
+f 553//31 537//444 523//450
+f 553//31 523//450 536//451
+f 461//446 443//445 426//452
+f 461//446 426//452 442//453
+f 476//447 461//446 442//453
+f 476//447 442//453 460//454
+f 493//448 476//447 460//454
+f 493//448 460//454 475//455
+f 506//449 493//448 475//455
+f 506//449 475//455 492//456
+f 523//450 506//449 492//456
+f 523//450 492//456 505//457
+f 536//451 523//450 505//457
+f 536//451 505//457 522//458
+f 442//453 426//452 411//459
+f 442//453 411//459 425//460
+f 460//454 442//453 425//460
+f 460//454 425//460 441//461
+f 475//455 460//454 441//461
+f 475//455 441//461 459//462
+f 492//456 475//455 459//462
+f 492//456 459//462 474//463
+f 505//457 492//456 474//463
+f 505//457 474//463 491//464
+f 522//458 505//457 491//464
+f 522//458 491//464 499//465
+f 425//460 411//459 396//466
+f 425//460 396//466 410//467
+f 441//461 425//460 410//467
+f 441//461 410//467 424//468
+f 459//462 441//461 424//468
+f 459//462 424//468 440//469
+f 474//463 459//462 440//469
+f 474//463 440//469 458//470
+f 491//464 474//463 458//470
+f 491//464 458//470 467//471
+f 499//465 491//464 467//471
+f 499//465 467//471 466//472
+f 410//467 396//466 385//473
+f 410//467 385//473 395//474
+f 424//468 410//467 395//474
+f 424//468 395//474 409//475
+f 440//469 424//468 409//475
+f 440//469 409//475 423//476
+f 458//470 440//469 423//476
+f 458//470 423//476 433//477
+f 467//471 458//470 433//477
+f 467//471 433//477 432//478
+f 466//472 467//471 432//478
+f 466//472 432//478 434//479
+f 395//474 385//473 372//480
+f 395//474 372//480 383//481
+f 409//475 395//474 383//481
+f 409//475 383//481 394//482
+f 423//476 409//475 394//482
+f 423//476 394//482 402//483
+f 433//477 423//476 402//483
+f 433//477 402//483 401//484
+f 432//478 433//477 401//484
+f 432//478 401//484 403//485
+f 434//479 432//478 403//485
+f 434//479 403//485 417//486
+f 383//481 372//480 362//487
+f 383//481 362//487 371//488
+f 394//482 383//481 371//488
+f 394//482 371//488 378//489
+f 402//483 394//482 378//489
+f 402//483 378//489 377//490
+f 401//484 402//483 377//490
+f 401//484 377//490 379//491
+f 403//485 401//484 379//491
+f 403//485 379//491 390//492
+f 417//486 403//485 390//492
+f 417//486 390//492 416//493
+f 371//488 362//487 352//494
+f 371//488 352//494 358//495
+f 378//489 371//488 358//495
+f 378//489 358//495 357//496
+f 377//490 378//489 357//496
+f 377//490 357//496 359//497
+f 379//491 377//490 359//497
+f 379//491 359//497 367//498
+f 390//492 379//491 367//498
+f 390//492 367//498 389//499
+f 416//493 390//492 389//499
+f 416//493 389//499 415//500
+f 358//495 352//494 345//501
+f 358//495 345//501 340//502
+f 357//496 358//495 340//502
+f 357//496 340//502 341//503
+f 359//497 357//496 341//503
+f 359//497 341//503 349//504
+f 367//498 359//497 349//504
+f 367//498 349//504 366//505
+f 389//499 367//498 366//505
+f 389//499 366//505 388//506
+f 415//500 389//499 388//506
+f 415//500 388//506 414//507
+f 340//502 345//501 335//508
+f 340//502 335//508 330//509
+f 341//503 340//502 330//509
+f 341//503 330//509 336//510
+f 349//504 341//503 336//510
+f 349//504 336//510 348//511
+f 366//505 349//504 348//511
+f 366//505 348//511 365//512
+f 388//506 366//505 365//512
+f 388//506 365//512 387//513
+f 414//507 388//506 387//513
+f 414//507 387//513 413//514
+f 330//509 335//508 327//350
+f 327//350 326//349 330//509
+f 330//509 326//349 336//510
+f 336//510 326//349 333//353
+f 348//511 336//510 333//353
+f 348//511 333//353 347//355
+f 365//512 348//511 347//355
+f 365//512 347//355 364//357
+f 387//513 365//512 364//357
+f 387//513 364//357 386//359
+f 413//514 387//513 386//359
+f 413//514 386//359 412//361
+f 445//515 412//361 429//362
+f 445//515 429//362 463//516
+f 478//517 445//515 463//516
+f 478//517 463//516 495//518
+f 509//176 478//517 495//518
+f 509//176 495//518 526//519
+f 463//516 429//362 464//369
+f 463//516 464//369 496//520
+f 495//518 463//516 496//520
+f 495//518 496//520 527//521
+f 526//519 495//518 527//521
+f 526//519 527//521 557//522
+f 556//523 526//519 557//522
+f 556//523 557//522 583//524
+f 595//202 556//523 583//524
+f 595//202 583//524 611//525
+f 621//526 595//202 611//525
+f 621//526 611//525 638//527
+f 496//520 464//369 497//376
+f 496//520 497//376 528//528
+f 527//521 496//520 528//528
+f 527//521 528//528 558//529
+f 557//522 527//521 558//529
+f 557//522 558//529 584//530
+f 583//524 557//522 584//530
+f 583//524 584//530 612//531
+f 611//525 583//524 612//531
+f 611//525 612//531 639//532
+f 638//527 611//525 639//532
+f 638//527 639//532 664//533
+f 528//528 497//376 521//383
+f 528//528 521//383 551//534
+f 558//529 528//528 551//534
+f 558//529 551//534 580//535
+f 584//530 558//529 580//535
+f 584//530 580//535 608//536
+f 612//531 584//530 608//536
+f 612//531 608//536 634//537
+f 639//532 612//531 634//537
+f 639//532 634//537 661//538
+f 664//533 639//532 661//538
+f 664//533 661//538 688//539
+f 551//534 521//383 535//390
+f 551//534 535//390 564//540
+f 580//535 551//534 564//540
+f 580//535 564//540 591//541
+f 608//536 580//535 591//541
+f 608//536 591//541 618//542
+f 634//537 608//536 618//542
+f 634//537 618//542 644//543
+f 661//538 634//537 644//543
+f 661//538 644//543 670//544
+f 688//539 661//538 670//544
+f 688//539 670//544 698//545
+f 564//540 535//390 550//397
+f 564//540 550//397 579//546
+f 591//541 564//540 579//546
+f 591//541 579//546 607//547
+f 618//542 591//541 607//547
+f 618//542 607//547 633//548
+f 644//543 618//542 633//548
+f 644//543 633//548 660//549
+f 670//544 644//543 660//549
+f 670//544 660//549 687//550
+f 698//545 670//544 687//550
+f 698//545 687//550 717//551
+f 579//546 550//397 563//404
+f 579//546 563//404 590//552
+f 607//547 579//546 590//552
+f 607//547 590//552 617//553
+f 633//548 607//547 617//553
+f 633//548 617//553 643//554
+f 660//549 633//548 643//554
+f 660//549 643//554 669//555
+f 687//550 660//549 669//555
+f 687//550 669//555 697//556
+f 717//551 687//550 697//556
+f 717//551 697//556 727//557
+f 590//552 563//404 578//411
+f 590//552 578//411 606//558
+f 617//553 590//552 606//558
+f 617//553 606//558 632//559
+f 643//554 617//553 632//559
+f 643//554 632//559 659//560
+f 669//555 643//554 659//560
+f 669//555 659//560 673//561
+f 697//556 669//555 673//561
+f 697//556 673//561 700//562
+f 727//557 697//556 700//562
+f 727//557 700//562 699//563
+f 606//558 578//411 589//418
+f 606//558 589//418 616//564
+f 632//559 606//558 616//564
+f 632//559 616//564 637//565
+f 659//560 632//559 637//565
+f 659//560 637//565 647//566
+f 673//561 659//560 647//566
+f 673//561 647//566 672//567
+f 700//562 673//561 672//567
+f 700//562 672//567 671//568
+f 699//563 700//562 671//568
+f 699//563 671//568 689//569
+f 616//564 589//418 594//425
+f 616//564 594//425 610//570
+f 637//565 616//564 610//570
+f 637//565 610//570 620//571
+f 647//566 637//565 620//571
+f 647//566 620//571 646//572
+f 672//567 647//566 646//572
+f 672//567 646//572 645//573
+f 671//568 672//567 645//573
+f 671//568 645//573 662//574
+f 689//569 671//568 662//574
+f 689//569 662//574 668//575
+f 610//570 594//425 582//432
+f 610//570 582//432 593//3
+f 620//571 610//570 593//3
+f 620//571 593//3 636//89
+f 646//572 620//571 636//89
+f 646//572 636//89 619//576
+f 645//573 646//572 619//576
+f 645//573 619//576 635//577
+f 662//574 645//573 635//577
+f 662//574 635//577 642//578
+f 668//575 662//574 642//578
+f 668//575 642//578 641//579
+f 593//3 582//432 566//33
+f 619//576 636//89 592//57
+f 619//576 592//57 609//580
+f 635//577 619//576 609//580
+f 635//577 609//580 615//581
+f 642//578 635//577 615//581
+f 642//578 615//581 614//582
+f 641//579 642//578 614//582
+f 592//57 565//56 581//583
+f 609//580 592//57 581//583
+f 609//580 581//583 587//584
+f 615//581 609//580 587//584
+f 615//581 587//584 586//585
+f 614//582 615//581 586//585
+f 614//582 586//585 588//586
+f 565//56 553//31 536//451
+f 565//56 536//451 552//587
+f 581//583 565//56 552//587
+f 581//583 552//587 560//588
+f 587//584 581//583 560//588
+f 587//584 560//588 559//589
+f 586//585 587//584 559//589
+f 586//585 559//589 561//590
+f 588//586 586//585 561//590
+f 588//586 561//590 576//591
+f 605//592 588//586 576//591
+f 605//592 576//591 604//593
+f 552//587 536//451 522//458
+f 552//587 522//458 530//594
+f 560//588 552//587 530//594
+f 560//588 530//594 529//595
+f 559//589 560//588 529//595
+f 559//589 529//595 531//596
+f 561//590 559//589 531//596
+f 561//590 531//596 547//597
+f 576//591 561//590 547//597
+f 576//591 547//597 575//598
+f 604//593 576//591 575//598
+f 604//593 575//598 603//599
+f 530//594 522//458 499//465
+f 530//594 499//465 498//600
+f 529//595 530//594 498//600
+f 529//595 498//600 500//601
+f 531//596 529//595 500//601
+f 531//596 500//601 517//602
+f 547//597 531//596 517//602
+f 547//597 517//602 546//603
+f 575//598 547//597 546//603
+f 575//598 546//603 574//604
+f 603//599 575//598 574//604
+f 603//599 574//604 602//605
+f 498//600 499//465 466//472
+f 498//600 466//472 468//606
+f 500//601 498//600 468//606
+f 500//601 468//606 485//607
+f 517//602 500//601 485//607
+f 517//602 485//607 516//608
+f 546//603 517//602 516//608
+f 546//603 516//608 545//609
+f 574//604 546//603 545//609
+f 574//604 545//609 573//610
+f 602//605 574//604 573//610
+f 602//605 573//610 601//611
+f 468//606 466//472 434//479
+f 468//606 434//479 451//612
+f 485//607 468//606 451//612
+f 485//607 451//612 484//613
+f 516//608 485//607 484//613
+f 516//608 484//613 515//614
+f 545//609 516//608 515//614
+f 545//609 515//614 544//615
+f 573//610 545//609 544//615
+f 573//610 544//615 572//616
+f 601//611 573//610 572//616
+f 601//611 572//616 600//617
+f 451//612 434//479 417//486
+f 451//612 417//486 450//618
+f 484//613 451//612 450//618
+f 484//613 450//618 483//619
+f 515//614 484//613 483//619
+f 515//614 483//619 514//620
+f 544//615 515//614 514//620
+f 544//615 514//620 543//621
+f 572//616 544//615 543//621
+f 572//616 543//621 571//622
+f 600//617 572//616 571//622
+f 600//617 571//622 599//623
+f 450//618 417//486 416//493
+f 450//618 416//493 449//624
+f 483//619 450//618 449//624
+f 483//619 449//624 482//625
+f 514//620 483//619 482//625
+f 514//620 482//625 513//626
+f 543//621 514//620 513//626
+f 543//621 513//626 542//627
+f 571//622 543//621 542//627
+f 571//622 542//627 570//628
+f 599//623 571//622 570//628
+f 599//623 570//628 598//629
+f 449//624 416//493 415//500
+f 449//624 415//500 448//630
+f 482//625 449//624 448//630
+f 482//625 448//630 481//631
+f 513//626 482//625 481//631
+f 513//626 481//631 512//632
+f 542//627 513//626 512//632
+f 542//627 512//632 541//633
+f 570//628 542//627 541//633
+f 570//628 541//633 569//634
+f 598//629 570//628 569//634
+f 598//629 569//634 597//635
+f 448//630 415//500 414//507
+f 448//630 414//507 447//636
+f 481//631 448//630 447//636
+f 481//631 447//636 480//637
+f 512//632 481//631 480//637
+f 512//632 480//637 511//638
+f 541//633 512//632 511//638
+f 541//633 511//638 540//639
+f 569//634 541//633 540//639
+f 569//634 540//639 568//640
+f 597//635 569//634 568//640
+f 597//635 568//640 596//641
+f 447//636 414//507 413//514
+f 447//636 413//514 446//642
+f 480//637 447//636 446//642
+f 480//637 446//642 479//643
+f 511//638 480//637 479//643
+f 511//638 479//643 510//644
+f 540//639 511//638 510//644
+f 540//639 510//644 539//175
+f 568//640 540//639 539//175
+f 568//640 539//175 585//249
+f 596//641 568//640 585//249
+f 596//641 585//249 613//248
+f 446//642 413//514 412//361
+f 446//642 412//361 445//515
+f 479//643 446//642 445//515
+f 479//643 445//515 478//517
+f 510//644 479//643 478//517
+f 510//644 478//517 509//176
+f 539//175 510//644 509//176
+f 691//228 648//227 674//645
+f 702//646 691//228 674//645
+f 702//646 674//645 701//647
+f 731//648 702//646 701//647
+f 731//648 701//647 730//649
+f 759//650 731//648 730//649
+f 759//650 730//649 758//651
+f 786//652 759//650 758//651
+f 786//652 758//651 785//653
+f 648//227 621//526 638//527
+f 648//227 638//527 663//654
+f 674//645 648//227 663//654
+f 674//645 663//654 690//655
+f 701//647 674//645 690//655
+f 701//647 690//655 720//656
+f 730//649 701//647 720//656
+f 730//649 720//656 749//657
+f 758//651 730//649 749//657
+f 758//651 749//657 777//658
+f 785//653 758//651 777//658
+f 785//653 777//658 805//659
+f 663//654 638//527 664//533
+f 663//654 664//533 692//660
+f 690//655 663//654 692//660
+f 690//655 692//660 721//661
+f 720//656 690//655 721//661
+f 720//656 721//661 750//662
+f 749//657 720//656 750//662
+f 749//657 750//662 778//663
+f 777//658 749//657 778//663
+f 777//658 778//663 806//664
+f 805//659 777//658 806//664
+f 805//659 806//664 833//665
+f 692//660 664//533 688//539
+f 692//660 688//539 718//666
+f 721//661 692//660 718//666
+f 721//661 718//666 747//667
+f 750//662 721//661 747//667
+f 750//662 747//667 775//668
+f 778//663 750//662 775//668
+f 778//663 775//668 803//669
+f 806//664 778//663 803//669
+f 806//664 803//669 831//670
+f 833//665 806//664 831//670
+f 833//665 831//670 838//671
+f 718//666 688//539 698//545
+f 718//666 698//545 728//672
+f 747//667 718//666 728//672
+f 747//667 728//672 756//673
+f 775//668 747//667 756//673
+f 775//668 756//673 784//674
+f 803//669 775//668 784//674
+f 803//669 784//674 804//675
+f 831//670 803//669 804//675
+f 831//670 804//675 811//676
+f 838//671 831//670 811//676
+f 838//671 811//676 830//677
+f 728//672 698//545 717//551
+f 728//672 717//551 746//678
+f 756//673 728//672 746//678
+f 756//673 746//678 757//679
+f 784//674 756//673 757//679
+f 784//674 757//679 776//680
+f 804//675 784//674 776//680
+f 804//675 776//680 783//681
+f 811//676 804//675 783//681
+f 811//676 783//681 802//682
+f 830//677 811//676 802//682
+f 830//677 802//682 810//683
+f 746//678 717//551 727//557
+f 746//678 727//557 729//684
+f 757//679 746//678 729//684
+f 757//679 729//684 748//685
+f 776//680 757//679 748//685
+f 776//680 748//685 755//686
+f 783//681 776//680 755//686
+f 783//681 755//686 774//687
+f 802//682 783//681 774//687
+f 802//682 774//687 782//688
+f 810//683 802//682 782//688
+f 810//683 782//688 809//689
+f 729//684 727//557 699//563
+f 729//684 699//563 719//690
+f 748//685 729//684 719//690
+f 748//685 719//690 726//691
+f 755//686 748//685 726//691
+f 755//686 726//691 745//692
+f 774//687 755//686 745//692
+f 774//687 745//692 754//693
+f 782//688 774//687 754//693
+f 782//688 754//693 781//694
+f 809//689 782//688 781//694
+f 809//689 781//694 780//695
+f 719//690 699//563 689//569
+f 719//690 689//569 696//696
+f 726//691 719//690 696//696
+f 726//691 696//696 716//697
+f 745//692 726//691 716//697
+f 745//692 716//697 725//698
+f 754//693 745//692 725//698
+f 754//693 725//698 753//699
+f 781//694 754//693 753//699
+f 781//694 753//699 752//700
+f 780//695 781//694 752//700
+f 780//695 752//700 773//701
+f 696//696 689//569 668//575
+f 696//696 668//575 686//702
+f 716//697 696//696 686//702
+f 716//697 686//702 695//703
+f 725//698 716//697 695//703
+f 725//698 695//703 724//704
+f 753//699 725//698 724//704
+f 753//699 724//704 723//705
+f 752//700 753//699 723//705
+f 752//700 723//705 744//706
+f 773//701 752//700 744//706
+f 773//701 744//706 772//707
+f 686//702 668//575 641//579
+f 686//702 641//579 667//708
+f 695//703 686//702 667//708
+f 695//703 667//708 715//160
+f 724//704 695//703 715//160
+f 724//704 715//160 694//709
+f 723//705 724//704 694//709
+f 723//705 694//709 714//710
+f 744//706 723//705 714//710
+f 744//706 714//710 743//711
+f 772//707 744//706 743//711
+f 772//707 743//711 771//712
+f 694//709 715//160 666//713
+f 694//709 666//713 685//714
+f 714//710 694//709 685//714
+f 714//710 685//714 713//715
+f 743//711 714//710 713//715
+f 743//711 713//715 742//716
+f 771//712 743//711 742//716
+f 771//712 742//716 770//717
+f 666//713 631//152 658//718
+f 685//714 666//713 658//718
+f 685//714 658//718 684//719
+f 713//715 685//714 684//719
+f 713//715 684//719 712//720
+f 742//716 713//715 712//720
+f 742//716 712//720 741//721
+f 770//717 742//716 741//721
+f 770//717 741//721 769//722
+f 631//152 605//592 604//593
+f 631//152 604//593 630//723
+f 658//718 631//152 630//723
+f 658//718 630//723 657//724
+f 684//719 658//718 657//724
+f 684//719 657//724 683//725
+f 712//720 684//719 683//725
+f 712//720 683//725 711//726
+f 741//721 712//720 711//726
+f 741//721 711//726 740//727
+f 769//722 741//721 740//727
+f 769//722 740//727 768//728
+f 630//723 604//593 603//599
+f 630//723 603//599 629//729
+f 657//724 630//723 629//729
+f 657//724 629//729 656//730
+f 683//725 657//724 656//730
+f 683//725 656//730 682//731
+f 711//726 683//725 682//731
+f 711//726 682//731 710//732
+f 740//727 711//726 710//732
+f 740//727 710//732 739//733
+f 768//728 740//727 739//733
+f 768//728 739//733 767//734
+f 629//729 603//599 602//605
+f 629//729 602//605 628//735
+f 656//730 629//729 628//735
+f 656//730 628//735 655//736
+f 682//731 656//730 655//736
+f 682//731 655//736 681//737
+f 710//732 682//731 681//737
+f 710//732 681//737 709//738
+f 739//733 710//732 709//738
+f 739//733 709//738 738//739
+f 767//734 739//733 738//739
+f 767//734 738//739 766//740
+f 628//735 602//605 601//611
+f 628//735 601//611 627//741
+f 655//736 628//735 627//741
+f 655//736 627//741 654//742
+f 681//737 655//736 654//742
+f 681//737 654//742 680//743
+f 709//738 681//737 680//743
+f 709//738 680//743 708//744
+f 738//739 709//738 708//744
+f 738//739 708//744 737//745
+f 766//740 738//739 737//745
+f 766//740 737//745 765//746
+f 627//741 601//611 600//617
+f 627//741 600//617 626//747
+f 654//742 627//741 626//747
+f 654//742 626//747 653//748
+f 680//743 654//742 653//748
+f 680//743 653//748 679//749
+f 708//744 680//743 679//749
+f 708//744 679//749 707//750
+f 737//745 708//744 707//750
+f 737//745 707//750 736//751
+f 765//746 737//745 736//751
+f 765//746 736//751 764//752
+f 626//747 600//617 599//623
+f 626//747 599//623 625//753
+f 653//748 626//747 625//753
+f 653//748 625//753 652//754
+f 679//749 653//748 652//754
+f 679//749 652//754 678//755
+f 707//750 679//749 678//755
+f 707//750 678//755 706//756
+f 736//751 707//750 706//756
+f 736//751 706//756 735//757
+f 764//752 736//751 735//757
+f 764//752 735//757 763//758
+f 625//753 599//623 598//629
+f 625//753 598//629 624//759
+f 652//754 625//753 624//759
+f 652//754 624//759 651//760
+f 678//755 652//754 651//760
+f 678//755 651//760 677//761
+f 706//756 678//755 677//761
+f 706//756 677//761 705//762
+f 735//757 706//756 705//762
+f 735//757 705//762 734//763
+f 763//758 735//757 734//763
+f 763//758 734//763 762//764
+f 624//759 598//629 597//635
+f 624//759 597//635 623//765
+f 651//760 624//759 623//765
+f 651//760 623//765 650//766
+f 677//761 651//760 650//766
+f 677//761 650//766 676//767
+f 705//762 677//761 676//767
+f 705//762 676//767 704//768
+f 734//763 705//762 704//768
+f 734//763 704//768 733//769
+f 762//764 734//763 733//769
+f 762//764 733//769 761//770
+f 623//765 597//635 596//641
+f 623//765 596//641 622//771
+f 650//766 623//765 622//771
+f 650//766 622//771 649//772
+f 676//767 650//766 649//772
+f 676//767 649//772 675//773
+f 704//768 676//767 675//773
+f 704//768 675//773 703//774
+f 733//769 704//768 703//774
+f 733//769 703//774 732//775
+f 761//770 733//769 732//775
+f 761//770 732//775 760//776
+f 622//771 596//641 613//248
+f 622//771 613//248 640//777
+f 649//772 622//771 640//777
+f 649//772 640//777 665//778
+f 675//773 649//772 665//778
+f 675//773 665//778 693//779
+f 703//774 675//773 693//779
+f 703//774 693//779 722//780
+f 732//775 703//774 722//780
+f 732//775 722//780 751//781
+f 760//776 732//775 751//781
+f 760//776 751//781 779//782
+f 693//779 665//778 691//228
+f 693//779 691//228 702//646
+f 722//780 693//779 702//646
+f 722//780 702//646 731//648
+f 751//781 722//780 731//648
+f 751//781 731//648 759//650
+f 779//782 751//781 759//650
+f 779//782 759//650 786//652
+f 917//783 918//784 913//785
+f 892//786 917//783 913//785
+f 892//786 913//785 887//787
+f 866//788 892//786 887//787
+f 866//788 887//787 861//789
+f 840//790 866//788 861//789
+f 840//790 861//789 834//791
+f 813//792 840//790 834//791
+f 813//792 834//791 807//793
+f 786//652 813//792 807//793
+f 786//652 807//793 779//782
+f 913//785 918//784 893//794
+f 887//787 913//785 893//794
+f 887//787 893//794 867//795
+f 861//789 887//787 867//795
+f 861//789 867//795 841//796
+f 834//791 861//789 841//796
+f 834//791 841//796 814//797
+f 807//793 834//791 814//797
+f 807//793 814//797 787//798
+f 779//782 807//793 787//798
+f 779//782 787//798 760//776
+f 893//794 918//784 894//799
+f 867//795 893//794 894//799
+f 867//795 894//799 868//800
+f 841//796 867//795 868//800
+f 841//796 868//800 842//801
+f 814//797 841//796 842//801
+f 814//797 842//801 815//802
+f 787//798 814//797 815//802
+f 787//798 815//802 788//803
+f 760//776 787//798 788//803
+f 760//776 788//803 761//770
+f 894//799 918//784 895//804
+f 868//800 894//799 895//804
+f 868//800 895//804 869//805
+f 842//801 868//800 869//805
+f 842//801 869//805 843//806
+f 815//802 842//801 843//806
+f 815//802 843//806 816//807
+f 788//803 815//802 816//807
+f 788//803 816//807 789//808
+f 761//770 788//803 789//808
+f 761//770 789//808 762//764
+f 895//804 918//784 896//809
+f 869//805 895//804 896//809
+f 869//805 896//809 870//810
+f 843//806 869//805 870//810
+f 843//806 870//810 844//811
+f 816//807 843//806 844//811
+f 816//807 844//811 817//812
+f 789//808 816//807 817//812
+f 789//808 817//812 790//813
+f 762//764 789//808 790//813
+f 762//764 790//813 763//758
+f 896//809 918//784 897//814
+f 870//810 896//809 897//814
+f 870//810 897//814 871//815
+f 844//811 870//810 871//815
+f 844//811 871//815 845//816
+f 817//812 844//811 845//816
+f 817//812 845//816 818//817
+f 790//813 817//812 818//817
+f 790//813 818//817 791//818
+f 763//758 790//813 791//818
+f 763//758 791//818 764//752
+f 897//814 918//784 898//819
+f 871//815 897//814 898//819
+f 871//815 898//819 872//820
+f 845//816 871//815 872//820
+f 845//816 872//820 846//821
+f 818//817 845//816 846//821
+f 818//817 846//821 819//822
+f 791//818 818//817 819//822
+f 791//818 819//822 792//823
+f 764//752 791//818 792//823
+f 764//752 792//823 765//746
+f 898//819 918//784 899//824
+f 872//820 898//819 899//824
+f 872//820 899//824 873//825
+f 846//821 872//820 873//825
+f 846//821 873//825 847//826
+f 819//822 846//821 847//826
+f 819//822 847//826 820//827
+f 792//823 819//822 820//827
+f 792//823 820//827 793//828
+f 765//746 792//823 793//828
+f 765//746 793//828 766//740
+f 899//824 918//784 900//829
+f 873//825 899//824 900//829
+f 873//825 900//829 874//830
+f 847//826 873//825 874//830
+f 847//826 874//830 848//831
+f 820//827 847//826 848//831
+f 820//827 848//831 821//832
+f 793//828 820//827 821//832
+f 793//828 821//832 794//833
+f 766//740 793//828 794//833
+f 766//740 794//833 767//734
+f 900//829 918//784 901//834
+f 874//830 900//829 901//834
+f 874//830 901//834 875//835
+f 848//831 874//830 875//835
+f 848//831 875//835 849//836
+f 821//832 848//831 849//836
+f 821//832 849//836 822//837
+f 794//833 821//832 822//837
+f 794//833 822//837 795//838
+f 767//734 794//833 795//838
+f 767//734 795//838 768//728
+f 901//834 918//784 902//839
+f 875//835 901//834 902//839
+f 875//835 902//839 876//840
+f 849//836 875//835 876//840
+f 849//836 876//840 850//841
+f 822//837 849//836 850//841
+f 822//837 850//841 823//842
+f 795//838 822//837 823//842
+f 795//838 823//842 796//843
+f 768//728 795//838 796//843
+f 768//728 796//843 769//722
+f 902//839 918//784 903//844
+f 876//840 902//839 903//844
+f 876//840 903//844 877//845
+f 850//841 876//840 877//845
+f 850//841 877//845 851//846
+f 823//842 850//841 851//846
+f 823//842 851//846 824//847
+f 796//843 823//842 824//847
+f 796//843 824//847 797//848
+f 769//722 796//843 797//848
+f 769//722 797//848 770//717
+f 903//844 918//784 904//849
+f 877//845 903//844 904//849
+f 877//845 904//849 878//850
+f 851//846 877//845 878//850
+f 851//846 878//850 852//851
+f 824//847 851//846 852//851
+f 824//847 852//851 825//852
+f 797//848 824//847 825//852
+f 797//848 825//852 798//853
+f 770//717 797//848 798//853
+f 770//717 798//853 771//712
+f 904//849 918//784 905//854
+f 878//850 904//849 905//854
+f 878//850 905//854 879//855
+f 852//851 878//850 879//855
+f 852//851 879//855 853//856
+f 825//852 852//851 853//856
+f 825//852 853//856 826//857
+f 798//853 825//852 826//857
+f 798//853 826//857 799//858
+f 771//712 798//853 799//858
+f 771//712 799//858 772//707
+f 905//854 918//784 906//859
+f 879//855 905//854 906//859
+f 879//855 906//859 880//860
+f 853//856 879//855 880//860
+f 853//856 880//860 854//861
+f 826//857 853//856 854//861
+f 826//857 854//861 827//862
+f 799//858 826//857 827//862
+f 799//858 827//862 800//863
+f 772//707 799//858 800//863
+f 772//707 800//863 773//701
+f 906//859 918//784 907//864
+f 880//860 906//859 907//864
+f 880//860 907//864 881//865
+f 854//861 880//860 881//865
+f 854//861 881//865 855//866
+f 827//862 854//861 855//866
+f 827//862 855//866 828//867
+f 800//863 827//862 828//867
+f 800//863 828//867 801//868
+f 773//701 800//863 801//868
+f 773//701 801//868 780//695
+f 907//864 918//784 908//869
+f 881//865 907//864 908//869
+f 881//865 908//869 882//870
+f 855//866 881//865 882//870
+f 855//866 882//870 856//871
+f 828//867 855//866 856//871
+f 828//867 856//871 829//872
+f 801//868 828//867 829//872
+f 801//868 829//872 808//873
+f 780//695 801//868 808//873
+f 780//695 808//873 809//689
+f 908//869 918//784 909//874
+f 882//870 908//869 909//874
+f 882//870 909//874 883//875
+f 856//871 882//870 883//875
+f 856//871 883//875 857//876
+f 829//872 856//871 857//876
+f 829//872 857//876 836//877
+f 808//873 829//872 836//877
+f 808//873 836//877 835//878
+f 809//689 808//873 835//878
+f 809//689 835//878 810//683
+f 909//874 918//784 910//879
+f 883//875 909//874 910//879
+f 883//875 910//879 884//880
+f 857//876 883//875 884//880
+f 857//876 884//880 863//881
+f 836//877 857//876 863//881
+f 836//877 863//881 862//882
+f 835//878 836//877 862//882
+f 835//878 862//882 837//883
+f 810//683 835//878 837//883
+f 810//683 837//883 830//677
+f 910//879 918//784 911//884
+f 884//880 910//879 911//884
+f 884//880 911//884 889//885
+f 863//881 884//880 889//885
+f 863//881 889//885 888//886
+f 862//882 863//881 888//886
+f 862//882 888//886 864//887
+f 837//883 862//882 864//887
+f 837//883 864//887 858//888
+f 830//677 837//883 858//888
+f 830//677 858//888 838//671
+f 911//884 918//784 915//889
+f 889//885 911//884 915//889
+f 889//885 915//889 914//890
+f 888//886 889//885 914//890
+f 888//886 914//890 890//891
+f 864//887 888//886 890//891
+f 864//887 890//891 885//892
+f 858//888 864//887 885//892
+f 858//888 885//892 859//893
+f 838//671 858//888 859//893
+f 838//671 859//893 833//665
+f 915//889 918//784 919//894
+f 914//890 915//889 919//894
+f 914//890 919//894 912//895
+f 890//891 914//890 912//895
+f 890//891 912//895 886//896
+f 885//892 890//891 886//896
+f 885//892 886//896 860//897
+f 859//893 885//892 860//897
+f 859//893 860//897 832//898
+f 833//665 859//893 832//898
+f 833//665 832//898 805//659
+f 919//894 918//784 916//899
+f 912//895 919//894 916//899
+f 912//895 916//899 891//900
+f 886//896 912//895 891//900
+f 886//896 891//900 865//901
+f 860//897 886//896 865//901
+f 860//897 865//901 839//902
+f 832//898 860//897 839//902
+f 832//898 839//902 812//903
+f 805//659 832//898 812//903
+f 805//659 812//903 785//653
+f 916//899 918//784 917//783
+f 891//900 916//899 917//783
+f 891//900 917//783 892//786
+f 865//901 891//900 892//786
+f 865//901 892//786 866//788
+f 839//902 865//901 866//788
+f 839//902 866//788 840//790
+f 812//903 839//902 840//790
+f 812//903 840//790 813//792
+f 785//653 812//903 813//792
+f 785//653 813//792 786//652
+f 404//412 1267//904 1268//905
+f 384//906 373//907 372//480
+f 346//908 345//501 352//494
+f 435//909 436//426 419//419
+f 331//910 327//350 335//508
+f 462//439 453//433 1265//911
+f 920//912 921//913 922//914
+f 920//912 922//914 923//915
+f 923//915 922//914 927//916
+f 923//915 927//916 933//917
+f 933//917 927//916 940//918
+f 933//917 940//918 949//919
+f 949//919 940//918 959//920
+f 949//919 959//920 971//921
+f 971//921 959//920 984//922
+f 971//921 984//922 999//923
+f 999//923 984//922 1011//924
+f 999//923 1011//924 1027//925
+f 1027//925 1011//924 1042//926
+f 1027//925 1042//926 1059//927
+f 1059//927 1042//926 1077//928
+f 1059//927 1077//928 1095//929
+f 1095//929 1077//928 1110//930
+f 1095//929 1110//930 1129//931
+f 1129//931 1110//930 1144//932
+f 1129//931 1144//932 1163//933
+f 1163//933 1144//932 1179//934
+f 1163//933 1179//934 1196//935
+f 1196//935 1179//934 1210//936
+f 1196//935 1210//936 1220//937
+f 1220//937 1210//936 1212//938
+f 1220//937 1212//938 1201//939
+f 1201//939 1212//938 1181//940
+f 1201//939 1181//940 1169//941
+f 1169//941 1181//940 1147//942
+f 1169//941 1147//942 1135//943
+f 1135//943 1147//942 1113//944
+f 1135//943 1113//944 1102//945
+f 1102//945 1113//944 1081//946
+f 1102//945 1081//946 1065//947
+f 1065//947 1081//946 1046//948
+f 1065//947 1046//948 1032//949
+f 1032//949 1046//948 1016//950
+f 1032//949 1016//950 1003//951
+f 1003//951 1016//950 988//952
+f 1003//951 988//952 974//953
+f 974//953 988//952 962//954
+f 974//953 962//954 951//955
+f 951//955 962//954 942//956
+f 951//955 942//956 934//957
+f 934//957 942//956 928//958
+f 934//957 928//958 924//959
+f 924//959 928//958 921//913
+f 921//913 920//912 924//959
+f 926//960 920//912 923//915
+f 926//960 923//915 932//961
+f 938//962 926//960 932//961
+f 938//962 932//961 947//963
+f 956//964 938//962 947//963
+f 956//964 947//963 968//965
+f 980//966 956//964 968//965
+f 980//966 968//965 995//967
+f 1006//968 980//966 995//967
+f 1006//968 995//967 1022//969
+f 1036//970 1006//968 1022//969
+f 1036//970 1022//969 1053//971
+f 932//961 923//915 933//917
+f 932//961 933//917 948//972
+f 947//963 932//961 948//972
+f 947//963 948//972 969//973
+f 968//965 947//963 969//973
+f 968//965 969//973 996//974
+f 995//967 968//965 996//974
+f 995//967 996//974 1023//975
+f 1022//969 995//967 1023//975
+f 1022//969 1023//975 1054//976
+f 1053//971 1022//969 1054//976
+f 1053//971 1054//976 1089//977
+f 948//972 933//917 949//919
+f 948//972 949//919 970//978
+f 969//973 948//972 970//978
+f 969//973 970//978 997//979
+f 996//974 969//973 997//979
+f 996//974 997//979 1024//980
+f 1023//975 996//974 1024//980
+f 1023//975 1024//980 1055//981
+f 1054//976 1023//975 1055//981
+f 1054//976 1055//981 1090//982
+f 1089//977 1054//976 1090//982
+f 1089//977 1090//982 1109//983
+f 970//978 949//919 971//921
+f 970//978 971//921 998//984
+f 997//979 970//978 998//984
+f 997//979 998//984 1025//985
+f 1024//980 997//979 1025//985
+f 1024//980 1025//985 1056//986
+f 1055//981 1024//980 1056//986
+f 1055//981 1056//986 1091//987
+f 1090//982 1055//981 1091//987
+f 1090//982 1091//987 1124//988
+f 1109//983 1090//982 1124//988
+f 1109//983 1124//988 1122//989
+f 998//984 971//921 999//923
+f 998//984 999//923 1026//990
+f 1025//985 998//984 1026//990
+f 1025//985 1026//990 1057//991
+f 1056//986 1025//985 1057//991
+f 1056//986 1057//991 1092//992
+f 1091//987 1056//986 1092//992
+f 1091//987 1092//992 1125//993
+f 1124//988 1091//987 1125//993
+f 1124//988 1125//993 1156//994
+f 1122//989 1124//988 1156//994
+f 1122//989 1156//994 1142//995
+f 1026//990 999//923 1027//925
+f 1026//990 1027//925 1058//996
+f 1057//991 1026//990 1058//996
+f 1057//991 1058//996 1093//997
+f 1092//992 1057//991 1093//997
+f 1092//992 1093//997 1126//998
+f 1125//993 1092//992 1126//998
+f 1125//993 1126//998 1159//999
+f 1156//994 1125//993 1159//999
+f 1156//994 1159//999 1176//1000
+f 1142//995 1156//994 1176//1000
+f 1142//995 1176//1000 1157//1001
+f 1058//996 1027//925 1059//927
+f 1058//996 1059//927 1094//1002
+f 1093//997 1058//996 1094//1002
+f 1093//997 1094//1002 1127//1003
+f 1126//998 1093//997 1127//1003
+f 1126//998 1127//1003 1160//1004
+f 1159//999 1126//998 1160//1004
+f 1159//999 1160//1004 1192//1005
+f 1176//1000 1159//999 1192//1005
+f 1176//1000 1192//1005 1189//1006
+f 1157//1001 1176//1000 1189//1006
+f 1157//1001 1189//1006 1178//1007
+f 1094//1002 1059//927 1095//929
+f 1094//1002 1095//929 1128//1008
+f 1127//1003 1094//1002 1128//1008
+f 1127//1003 1128//1008 1161//1009
+f 1160//1004 1127//1003 1161//1009
+f 1160//1004 1161//1009 1193//1010
+f 1192//1005 1160//1004 1193//1010
+f 1192//1005 1193//1010 1215//1011
+f 1189//1006 1192//1005 1215//1011
+f 1189//1006 1215//1011 1207//1012
+f 1178//1007 1189//1006 1207//1012
+f 1178//1007 1207//1012 1191//1013
+f 1128//1008 1095//929 1129//931
+f 1128//1008 1129//931 1162//1014
+f 1161//1009 1128//1008 1162//1014
+f 1161//1009 1162//1014 1194//1015
+f 1193//1010 1161//1009 1194//1015
+f 1193//1010 1194//1015 1217//1016
+f 1215//1011 1193//1010 1217//1016
+f 1215//1011 1217//1016 1227//1017
+f 1207//1012 1215//1011 1227//1017
+f 1207//1012 1227//1017 1216//1018
+f 1191//1013 1207//1012 1216//1018
+f 1191//1013 1216//1018 1209//1019
+f 1162//1014 1129//931 1163//933
+f 1162//1014 1163//933 1195//1020
+f 1194//1015 1162//1014 1195//1020
+f 1194//1015 1195//1020 1218//1021
+f 1217//1016 1194//1015 1218//1021
+f 1217//1016 1218//1021 1230//1022
+f 1227//1017 1217//1016 1230//1022
+f 1227//1017 1230//1022 1229//1023
+f 1216//1018 1227//1017 1229//1023
+f 1216//1018 1229//1023 1225//1024
+f 1209//1019 1216//1018 1225//1024
+f 1209//1019 1225//1024 1203//1025
+f 1195//1020 1163//933 1196//935
+f 1195//1020 1196//935 1219//1026
+f 1218//1021 1195//1020 1219//1026
+f 1218//1021 1219//1026 1231//1027
+f 1230//1022 1218//1021 1231//1027
+f 1230//1022 1231//1027 1232//1028
+f 1229//1023 1230//1022 1232//1028
+f 1229//1023 1232//1028 1228//1029
+f 1225//1024 1229//1023 1228//1029
+f 1225//1024 1228//1029 1214//1030
+f 1203//1025 1225//1024 1214//1030
+f 1203//1025 1214//1030 1184//1031
+f 1219//1026 1196//935 1220//937
+f 1219//1026 1220//937 1224//1032
+f 1231//1027 1219//1026 1224//1032
+f 1231//1027 1224//1032 1223//1033
+f 1232//1028 1231//1027 1223//1033
+f 1232//1028 1223//1033 1222//1034
+f 1228//1029 1232//1028 1222//1034
+f 1228//1029 1222//1034 1221//1035
+f 1214//1030 1228//1029 1221//1035
+f 1214//1030 1221//1035 1204//1036
+f 1184//1031 1214//1030 1204//1036
+f 1184//1031 1204//1036 1171//1037
+f 1224//1032 1220//937 1201//939
+f 1224//1032 1201//939 1200//1038
+f 1223//1033 1224//1032 1200//1038
+f 1223//1033 1200//1038 1199//1039
+f 1222//1034 1223//1033 1199//1039
+f 1222//1034 1199//1039 1198//1040
+f 1221//1035 1222//1034 1198//1040
+f 1221//1035 1198//1040 1197//1041
+f 1204//1036 1221//1035 1197//1041
+f 1204//1036 1197//1041 1185//1042
+f 1171//1037 1204//1036 1185//1042
+f 1171//1037 1185//1042 1151//1043
+f 1200//1038 1201//939 1169//941
+f 1200//1038 1169//941 1168//1044
+f 1199//1039 1200//1038 1168//1044
+f 1199//1039 1168//1044 1167//1045
+f 1198//1040 1199//1039 1167//1045
+f 1198//1040 1167//1045 1166//1046
+f 1197//1041 1198//1040 1166//1046
+f 1197//1041 1166//1046 1165//1047
+f 1185//1042 1197//1041 1165//1047
+f 1185//1042 1165//1047 1164//1048
+f 1151//1043 1185//1042 1164//1048
+f 1151//1043 1164//1048 1137//1049
+f 1168//1044 1169//941 1135//943
+f 1168//1044 1135//943 1134//1050
+f 1167//1045 1168//1044 1134//1050
+f 1167//1045 1134//1050 1133//1051
+f 1166//1046 1167//1045 1133//1051
+f 1166//1046 1133//1051 1132//1052
+f 1165//1047 1166//1046 1132//1052
+f 1165//1047 1132//1052 1131//1053
+f 1164//1048 1165//1047 1131//1053
+f 1164//1048 1131//1053 1130//1054
+f 1137//1049 1164//1048 1130//1054
+f 1137//1049 1130//1054 1117//1055
+f 1134//1050 1135//943 1102//945
+f 1134//1050 1102//945 1101//1056
+f 1133//1051 1134//1050 1101//1056
+f 1133//1051 1101//1056 1100//1057
+f 1132//1052 1133//1051 1100//1057
+f 1132//1052 1100//1057 1099//1058
+f 1131//1053 1132//1052 1099//1058
+f 1131//1053 1099//1058 1098//1059
+f 1130//1054 1131//1053 1098//1059
+f 1130//1054 1098//1059 1097//1060
+f 1117//1055 1130//1054 1097//1060
+f 1117//1055 1097//1060 1096//1061
+f 1101//1056 1102//945 1065//947
+f 1101//1056 1065//947 1064//1062
+f 1100//1057 1101//1056 1064//1062
+f 1100//1057 1064//1062 1063//1063
+f 1099//1058 1100//1057 1063//1063
+f 1099//1058 1063//1063 1062//1064
+f 1098//1059 1099//1058 1062//1064
+f 1098//1059 1062//1064 1061//1065
+f 1097//1060 1098//1059 1061//1065
+f 1097//1060 1061//1065 1060//1066
+f 1096//1061 1097//1060 1060//1066
+f 1096//1061 1060//1066 1066//1067
+f 1064//1062 1065//947 1032//949
+f 1064//1062 1032//949 1031//1068
+f 1063//1063 1064//1062 1031//1068
+f 1063//1063 1031//1068 1030//1069
+f 1062//1064 1063//1063 1030//1069
+f 1062//1064 1030//1069 1029//1070
+f 1061//1065 1062//1064 1029//1070
+f 1061//1065 1029//1070 1028//1071
+f 1060//1066 1061//1065 1028//1071
+f 1060//1066 1028//1071 1033//1072
+f 1066//1067 1060//1066 1033//1072
+f 1066//1067 1033//1072 1050//1073
+f 1031//1068 1032//949 1003//951
+f 1031//1068 1003//951 1002//1074
+f 1030//1069 1031//1068 1002//1074
+f 1030//1069 1002//1074 1001//1075
+f 1029//1070 1030//1069 1001//1075
+f 1029//1070 1001//1075 1000//1076
+f 1028//1071 1029//1070 1000//1076
+f 1028//1071 1000//1076 1004//1077
+f 1033//1072 1028//1071 1004//1077
+f 1033//1072 1004//1077 1021//1078
+f 1050//1073 1033//1072 1021//1078
+f 1050//1073 1021//1078 1041//1079
+f 1002//1074 1003//951 974//953
+f 1002//1074 974//953 973//1080
+f 1001//1075 1002//1074 973//1080
+f 1001//1075 973//1080 972//1081
+f 1000//1076 1001//1075 972//1081
+f 1000//1076 972//1081 975//1082
+f 1004//1077 1000//1076 975//1082
+f 1004//1077 975//1082 992//1083
+f 1021//1078 1004//1077 992//1083
+f 1021//1078 992//1083 1010//1084
+f 1041//1079 1021//1078 1010//1084
+f 1041//1079 1010//1084 1040//1085
+f 973//1080 974//953 951//955
+f 973//1080 951//955 950//1086
+f 972//1081 973//1080 950//1086
+f 972//1081 950//1086 952//1087
+f 975//1082 972//1081 952//1087
+f 975//1082 952//1087 965//1088
+f 992//1083 975//1082 965//1088
+f 992//1083 965//1088 983//1089
+f 1010//1084 992//1083 983//1089
+f 1010//1084 983//1089 1009//1090
+f 1040//1085 1010//1084 1009//1090
+f 1040//1085 1009//1090 1039//1091
+f 950//1086 951//955 934//957
+f 950//1086 934//957 935//1092
+f 952//1087 950//1086 935//1092
+f 952//1087 935//1092 944//1093
+f 965//1088 952//1087 944//1093
+f 965//1088 944//1093 958//1094
+f 983//1089 965//1088 958//1094
+f 983//1089 958//1094 982//1095
+f 1009//1090 983//1089 982//1095
+f 1009//1090 982//1095 1008//1096
+f 1039//1091 1009//1090 1008//1096
+f 1039//1091 1008//1096 1038//1097
+f 935//1092 934//957 924//959
+f 935//1092 924//959 929//1098
+f 944//1093 935//1092 929//1098
+f 944//1093 929//1098 939//1099
+f 958//1094 944//1093 939//1099
+f 958//1094 939//1099 957//1100
+f 982//1095 958//1094 957//1100
+f 982//1095 957//1100 981//1101
+f 1008//1096 982//1095 981//1101
+f 1008//1096 981//1101 1007//1102
+f 1038//1097 1008//1096 1007//1102
+f 1038//1097 1007//1102 1037//1103
+f 924//959 920//912 929//1098
+f 929//1098 920//912 926//960
+f 939//1099 929//1098 926//960
+f 939//1099 926//960 938//962
+f 957//1100 939//1099 938//962
+f 957//1100 938//962 956//964
+f 981//1101 957//1100 956//964
+f 981//1101 956//964 980//966
+f 1007//1102 981//1101 980//966
+f 1007//1102 980//966 1006//968
+f 1037//1103 1007//1102 1006//968
+f 1037//1103 1006//968 1036//970
+f 993//1104 1005//1105 976//1106
+f 966//1107 993//1104 976//1106
+f 966//1107 976//1106 953//1108
+f 945//1109 966//1107 953//1108
+f 945//1109 953//1108 936//1110
+f 930//1111 945//1109 936//1110
+f 930//1111 936//1110 925//1112
+f 921//913 930//1111 925//1112
+f 921//913 925//1112 922//914
+f 976//1106 1005//1105 977//1113
+f 953//1108 976//1106 977//1113
+f 953//1108 977//1113 954//1114
+f 936//1110 953//1108 954//1114
+f 936//1110 954//1114 937//1115
+f 925//1112 936//1110 937//1115
+f 925//1112 937//1115 931//1116
+f 922//914 925//1112 931//1116
+f 922//914 931//1116 927//916
+f 977//1113 1005//1105 978//1117
+f 954//1114 977//1113 978//1117
+f 954//1114 978//1117 955//1118
+f 937//1115 954//1114 955//1118
+f 937//1115 955//1118 946//1119
+f 931//1116 937//1115 946//1119
+f 931//1116 946//1119 941//1120
+f 927//916 931//1116 941//1120
+f 927//916 941//1120 940//918
+f 978//1117 1005//1105 979//1121
+f 955//1118 978//1117 979//1121
+f 955//1118 979//1121 967//1122
+f 946//1119 955//1118 967//1122
+f 946//1119 967//1122 961//1123
+f 941//1120 946//1119 961//1123
+f 941//1120 961//1123 960//1124
+f 940//918 941//1120 960//1124
+f 940//918 960//1124 959//920
+f 979//1121 1005//1105 994//1125
+f 967//1122 979//1121 994//1125
+f 967//1122 994//1125 987//1126
+f 961//1123 967//1122 987//1126
+f 961//1123 987//1126 986//1127
+f 960//1124 961//1123 986//1127
+f 960//1124 986//1127 985//1128
+f 959//920 960//1124 985//1128
+f 959//920 985//1128 984//922
+f 994//1125 1005//1105 1015//1129
+f 987//1126 994//1125 1015//1129
+f 987//1126 1015//1129 1014//1130
+f 986//1127 987//1126 1014//1130
+f 986//1127 1014//1130 1013//1131
+f 985//1128 986//1127 1013//1131
+f 985//1128 1013//1131 1012//1132
+f 984//922 985//1128 1012//1132
+f 984//922 1012//1132 1011//924
+f 1015//1129 1005//1105 1035//1133
+f 1014//1130 1015//1129 1035//1133
+f 1014//1130 1035//1133 1045//1134
+f 1013//1131 1014//1130 1045//1134
+f 1013//1131 1045//1134 1044//1135
+f 1012//1132 1013//1131 1044//1135
+f 1012//1132 1044//1135 1043//1136
+f 1011//924 1012//1132 1043//1136
+f 1011//924 1043//1136 1042//926
+f 1035//1133 1005//1105 1052//1137
+f 1045//1134 1035//1133 1052//1137
+f 1045//1134 1052//1137 1080//1138
+f 1044//1135 1045//1134 1080//1138
+f 1044//1135 1080//1138 1079//1139
+f 1043//1136 1044//1135 1079//1139
+f 1043//1136 1079//1139 1078//1140
+f 1042//926 1043//1136 1078//1140
+f 1042//926 1078//1140 1077//928
+f 1052//1137 1005//1105 1068//1141
+f 1080//1138 1052//1137 1068//1141
+f 1080//1138 1068//1141 1106//1142
+f 1079//1139 1080//1138 1106//1142
+f 1079//1139 1106//1142 1112//1143
+f 1078//1140 1079//1139 1112//1143
+f 1078//1140 1112//1143 1111//1144
+f 1077//928 1078//1140 1111//1144
+f 1077//928 1111//1144 1110//930
+f 1068//1141 1005//1105 1087//1145
+f 1106//1142 1068//1141 1087//1145
+f 1106//1142 1087//1145 1120//1146
+f 1112//1143 1106//1142 1120//1146
+f 1112//1143 1120//1146 1146//1147
+f 1111//1144 1112//1143 1146//1147
+f 1111//1144 1146//1147 1145//1148
+f 1110//930 1111//1144 1145//1148
+f 1110//930 1145//1148 1144//932
+f 1087//1145 1005//1105 1107//1149
+f 1120//1146 1087//1145 1107//1149
+f 1120//1146 1107//1149 1140//1150
+f 1146//1147 1120//1146 1140//1150
+f 1146//1147 1140//1150 1174//1151
+f 1145//1148 1146//1147 1174//1151
+f 1145//1148 1174//1151 1180//1152
+f 1144//932 1145//1148 1180//1152
+f 1144//932 1180//1152 1179//934
+f 1107//1149 1005//1105 1121//1153
+f 1140//1150 1107//1149 1121//1153
+f 1140//1150 1121//1153 1154//1154
+f 1174//1151 1140//1150 1154//1154
+f 1174//1151 1154//1154 1188//1155
+f 1180//1152 1174//1151 1188//1155
+f 1180//1152 1188//1155 1211//1156
+f 1179//934 1180//1152 1211//1156
+f 1179//934 1211//1156 1210//936
+f 1121//1153 1005//1105 1141//1157
+f 1154//1154 1121//1153 1141//1157
+f 1154//1154 1141//1157 1175//1158
+f 1188//1155 1154//1154 1175//1158
+f 1188//1155 1175//1158 1206//1159
+f 1211//1156 1188//1155 1206//1159
+f 1211//1156 1206//1159 1226//1160
+f 1210//936 1211//1156 1226//1160
+f 1210//936 1226//1160 1212//938
+f 1141//1157 1005//1105 1155//1161
+f 1175//1158 1141//1157 1155//1161
+f 1175//1158 1155//1161 1187//1162
+f 1206//1159 1175//1158 1187//1162
+f 1206//1159 1187//1162 1205//1163
+f 1226//1160 1206//1159 1205//1163
+f 1226//1160 1205//1163 1213//1164
+f 1212//938 1226//1160 1213//1164
+f 1212//938 1213//1164 1181//940
+f 1155//1161 1005//1105 1153//1165
+f 1187//1162 1155//1161 1153//1165
+f 1187//1162 1153//1165 1173//1166
+f 1205//1163 1187//1162 1173//1166
+f 1205//1163 1173//1166 1186//1167
+f 1213//1164 1205//1163 1186//1167
+f 1213//1164 1186//1167 1182//1168
+f 1181//940 1213//1164 1182//1168
+f 1181//940 1182//1168 1147//942
+f 1153//1165 1005//1105 1139//1169
+f 1173//1166 1153//1165 1139//1169
+f 1173//1166 1139//1169 1152//1170
+f 1186//1167 1173//1166 1152//1170
+f 1186//1167 1152//1170 1172//1171
+f 1182//1168 1186//1167 1172//1171
+f 1182//1168 1172//1171 1148//1172
+f 1147//942 1182//1168 1148//1172
+f 1147//942 1148//1172 1113//944
+f 1139//1169 1005//1105 1119//1173
+f 1152//1170 1139//1169 1119//1173
+f 1152//1170 1119//1173 1138//1174
+f 1172//1171 1152//1170 1138//1174
+f 1172//1171 1138//1174 1149//1175
+f 1148//1172 1172//1171 1149//1175
+f 1148//1172 1149//1175 1114//1176
+f 1113//944 1148//1172 1114//1176
+f 1113//944 1114//1176 1081//946
+f 1119//1173 1005//1105 1105//1177
+f 1138//1174 1119//1173 1105//1177
+f 1138//1174 1105//1177 1118//1178
+f 1149//1175 1138//1174 1118//1178
+f 1149//1175 1118//1178 1115//1179
+f 1114//1176 1149//1175 1115//1179
+f 1114//1176 1115//1179 1082//1180
+f 1081//946 1114//1176 1082//1180
+f 1081//946 1082//1180 1046//948
+f 1105//1177 1005//1105 1086//1181
+f 1118//1178 1105//1177 1086//1181
+f 1118//1178 1086//1181 1104//1182
+f 1115//1179 1118//1178 1104//1182
+f 1115//1179 1104//1182 1083//1183
+f 1082//1180 1115//1179 1083//1183
+f 1082//1180 1083//1183 1047//1184
+f 1046//948 1082//1180 1047//1184
+f 1046//948 1047//1184 1016//950
+f 1086//1181 1005//1105 1067//1185
+f 1104//1182 1086//1181 1067//1185
+f 1104//1182 1067//1185 1084//1186
+f 1083//1183 1104//1182 1084//1186
+f 1083//1183 1084//1186 1048//1187
+f 1047//1184 1083//1183 1048//1187
+f 1047//1184 1048//1187 1017//1188
+f 1016//950 1047//1184 1017//1188
+f 1016//950 1017//1188 988//952
+f 1067//1185 1005//1105 1051//1189
+f 1084//1186 1067//1185 1051//1189
+f 1084//1186 1051//1189 1049//1190
+f 1048//1187 1084//1186 1049//1190
+f 1048//1187 1049//1190 1018//1191
+f 1017//1188 1048//1187 1018//1191
+f 1017//1188 1018//1191 989//1192
+f 988//952 1017//1188 989//1192
+f 988//952 989//1192 962//954
+f 1051//1189 1005//1105 1034//1193
+f 1049//1190 1051//1189 1034//1193
+f 1049//1190 1034//1193 1019//1194
+f 1018//1191 1049//1190 1019//1194
+f 1018//1191 1019//1194 990//1195
+f 989//1192 1018//1191 990//1195
+f 989//1192 990//1195 963//1196
+f 962//954 989//1192 963//1196
+f 962//954 963//1196 942//956
+f 1034//1193 1005//1105 1020//1197
+f 1019//1194 1034//1193 1020//1197
+f 1019//1194 1020//1197 991//1198
+f 990//1195 1019//1194 991//1198
+f 990//1195 991//1198 964//1199
+f 963//1196 990//1195 964//1199
+f 963//1196 964//1199 943//1200
+f 942//956 963//1196 943//1200
+f 942//956 943//1200 928//958
+f 1020//1197 1005//1105 993//1104
+f 991//1198 1020//1197 993//1104
+f 991//1198 993//1104 966//1107
+f 964//1199 991//1198 966//1107
+f 964//1199 966//1107 945//1109
+f 943//1200 964//1199 945//1109
+f 943//1200 945//1109 930//1111
+f 928//958 943//1200 930//1111
+f 928//958 930//1111 921//913
+f 1151//1043 1137//1049 1150//1201
+f 1142//995 1143//1202 1122//989
+f 1053//971 1089//977 1069//1203
+f 1122//989 1123//1204 1109//983
+f 1096//1061 1103//1205 1117//1055
+f 1171//1037 1170//1206 1184//1031
+f 1191//1013 1209//1019 1190//1207
+f 1036//970 1053//971 1070//1208
+f 1036//970 1070//1208 1037//1103
+f 1203//1025 1202//1209 1209//1019
+f 1066//1067 1085//1210 1096//1061
+f 1050//1073 1076//1211 1066//1067
+f 1184//1031 1183//1212 1203//1025
+f 1041//1079 1040//1085 1075//1213
+f 1171//1037 1151//1043 1170//1206
+f 1157//1001 1158//1214 1142//995
+f 1137//1049 1117//1055 1136//1215
+f 1038//1097 1072//1216 1039//1091
+f 1178//1007 1191//1013 1177//1217
+f 1089//977 1109//983 1088//1218
+f 1037//1103 1071//1219 1038//1097
+f 1050//1073 1041//1079 1076//1211
+f 1157//1001 1178//1007 1158//1214
+f 1040//1085 1039//1091 1074//1220
+f 1170//1206 1150//1201 452//1221
+f 1264//1222 1071//1219 1263//1223
+f 1261//1224 1262//1225 1076//1211
+f 1085//1210 1076//1211 1262//1225
+f 1190//1207 1208//1226 405//1227
+f 509//176 526//519 1260//1228
+f 1260//1228 526//519 556//523
+f 167//201 556//523 595//202
+f 595//202 621//526 1255//200
+f 172//192 163//182 1254//199
+f 8//49 18//42 3//1229
+f 3//1229 18//42 1288//1230
+f 15//75 7//66 1240//82
+f 1250//50 8//49 3//1229
+f 1//32 1283//40 566//33
+f 593//3 566//33 9//41
+f 19//1231 20//34 33//25
+f 1243//2 636//89 593//3
+f 64//18 40//6 36//17
+f 1244//24 64//18 36//17
+f 641//579 614//582 1237//1232
+f 1250//50 1239//1233 4//48
+f 7//66 4//48 1239//1233
+f 5//74 14//73 11//81
+f 1240//82 7//66 1239//1233
+f 1//32 553//31 565//56
+f 593//3 9//41 1289//83
+f 18//42 20//34 1288//1230
+f 40//6 27//5 1252//16
+f 1249//58 6//65 565//56
+f 14//73 15//75 11//81
+f 592//57 1234//130 1233//58
+f 1237//1232 614//582 588//586
+f 1237//1232 588//586 605//592
+f 539//175 159//177 1259//1234
+f 164//257 1257//1235 168//251
+f 509//176 1256//1236 159//177
+f 1258//250 585//249 1259//1234
+f 169//1237 164//257 174//242
+f 191//236 208//230 185//1238
+f 1290//1239 207//229 640//777
+f 621//526 1292//1240 1255//200
+f 163//182 160//178 157//180
+f 1284//1241 1292//1240 648//227
+f 1252//16 27//5 12//95
+f 119//124 116//129 104//161
+f 116//129 88//135 1242//136
+f 87//159 666//713 715//160
+f 87//159 1241//1242 666//713
+f 1253//1243 47//150 631//152
+f 631//152 666//713 1253//1243
+f 86//142 74//144 60//143
+f 640//777 1291//1244 1290//1239
+f 1291//1244 640//777 613//248
+f 208//230 219//220 200//1245
+f 665//778 640//777 207//229
+f 1285//226 200//1245 219//220
+f 648//227 207//229 1284//1241
+f 691//228 665//778 207//229
+f 605//592 1236//1246 1245//1246
+f 1235//151 605//592 631//152
+f 1248//145 48//153 37//167
+f 667//708 1251//1247 1247//158
+f 605//592 1235//151 1236//1246
+f 641//579 1238//1248 1246//1249
+f 641//579 1237//1232 1238//1248
+f 404//412 391//405 1267//904
+f 1265//911 453//433 436//426
+f 328//351 327//350 331//910
+f 443//445 462//439 469//1250
+f 342//370 1275//1251 344//1252
+f 342//370 334//363 1275//1251
+f 372//480 385//473 384//906
+f 351//377 1271//1253 361//384
+f 351//377 350//1254 1271//1253
+f 337//1255 335//508 345//501
+f 384//906 385//473 396//466
+f 361//384 368//1256 369//391
+f 361//384 1271//1253 368//1256
+f 352//494 1276//1257 346//908
+f 397//1258 396//466 411//459
+f 342//370 350//1254 351//377
+f 342//370 344//1252 350//1254
+f 380//398 369//391 368//1256
+f 334//363 328//351 332//1259
+f 404//412 418//1260 419//419
+f 404//412 1268//905 418//1260
+f 426//452 1286//1261 1282//1262
+f 1282//1262 411//459 426//452
+f 352//494 362//487 1277//1263
+f 1277//1263 1276//1257 352//494
+f 391//405 380//398 381//1264
+f 426//452 443//445 444//1265
+f 444//1265 1286//1261 426//452
+f 362//487 372//480 373//907
+f 373//907 1277//1263 362//487
+f 1202//1209 1183//1212 1270//1266
+f 1116//1267 428//1268 1136//1215
+f 428//1268 1287//1269 1136//1215
+f 1073//1270 353//1271 1074//1220
+f 353//1271 363//1272 1074//1220
+f 353//1271 1073//1270 1279//1273
+f 1070//1208 1069//1203 1263//1223
+f 1274//1274 1069//1203 1088//1218
+f 1177//1217 1273//1275 1272//1276
+f 1272//1276 1158//1214 1177//1217
+f 428//1268 1116//1267 427//1277
+f 1088//1218 1108//1278 343//1279
+f 1103//1205 1085//1210 1278//1280
+f 1108//1278 1123//1204 1280//1281
+f 1123//1204 1281//1282 1280//1281
+f 1143//1202 1158//1214 360//1283
+f 1158//1214 1272//1276 360//1283
+f 1075//1213 1261//1224 1076//1211
+f 1085//1210 1262//1225 1278//1280
+f 1123//1204 1143//1202 1281//1282
+f 1143//1202 360//1283 1281//1282
+f 1074//1220 363//1272 1075//1213
+f 363//1272 1261//1224 1075//1213
+f 1270//1266 1183//1212 452//1221
+f 1266//1284 1150//1201 1136//1215
+f 1273//1275 1177//1217 1190//1207
+f 1208//1226 1202//1209 406//1285
+f 1202//1209 1269//1286 406//1285
+f 1072//1216 1071//1219 1264//1222
+f 1241//1242 1253//1243 666//713
+f 71//137 86//142 60//143
+f 88//135 86//142 71//137
+f 1245//1246 1237//1232 605//592
+f 119//124 104//161 90//119
+f 667//708 1246//1249 1251//1247
+f 621//526 648//227 1292//1240
+f 208//230 200//1245 185//1238
+f 174//242 191//236 169//1237
+f 1258//250 1291//1244 613//248
+f 1244//24 19//1231 33//25
+f 1288//1230 20//34 19//1231
+f 199//208 182//198 178//1287
+f 206//1288 218//214 190//1289
+f 1260//1228 556//523 167//201
+f 157//180 1254//199 163//182
+f 178//1287 182//198 1254//199
+f 218//214 206//1288 1285//226
+f 161//258 1257//1235 158//179
+f 324//345 312//274 323//336
+f 303//317 302//323 316//338
+f 305//305 304//311 318//340
+f 304//311 303//317 317//339
+f 322//347 309//281 321//341
+f 320//346 307//293 319//342
+f 301//329 311//335 314//344
+f 302//323 301//329 315//337
+f 311//335 312//274 313//343
+f 319//342 306//299 318//340
+f 321//341 308//287 320//346
+f 323//336 310//275 322//347
+f 418//1260 435//909 419//419
+f 337//1255 331//910 335//508
+f 469//1250 462//439 1265//911
+f 1137//1049 1136//1215 1150//1201
+f 1143//1202 1123//1204 1122//989
+f 1089//977 1088//1218 1069//1203
+f 1123//1204 1108//1278 1109//983
+f 1103//1205 1116//1267 1117//1055
+f 1170//1206 1183//1212 1184//1031
+f 1209//1019 1208//1226 1190//1207
+f 1053//971 1069//1203 1070//1208
+f 1070//1208 1071//1219 1037//1103
+f 1202//1209 1208//1226 1209//1019
+f 1085//1210 1103//1205 1096//1061
+f 1076//1211 1085//1210 1066//1067
+f 1183//1212 1202//1209 1203//1025
+f 1040//1085 1074//1220 1075//1213
+f 1151//1043 1150//1201 1170//1206
+f 1158//1214 1143//1202 1142//995
+f 1117//1055 1116//1267 1136//1215
+f 1072//1216 1073//1270 1039//1091
+f 1191//1013 1190//1207 1177//1217
+f 1109//983 1108//1278 1088//1218
+f 1071//1219 1072//1216 1038//1097
+f 1041//1079 1075//1213 1076//1211
+f 1178//1007 1177//1217 1158//1214
+f 1039//1091 1073//1270 1074//1220
+f 1150//1201 1266//1284 452//1221
+f 1071//1219 1070//1208 1263//1223
+f 1208//1226 406//1285 405//1227
+f 1256//1236 509//176 1260//1228
+f 1257//1235 161//258 168//251
+f 585//249 539//175 1259//1234
+f 715//160 667//708 1247//158
+f 667//708 641//579 1246//1249
+f 435//909 1265//911 436//426
+f 332//1259 328//351 331//910
+f 444//1265 443//445 469//1250
+f 346//908 337//1255 345//501
+f 397//1258 384//906 396//466
+f 1282//1262 397//1258 411//459
+f 381//1264 380//398 368//1256
+f 1275//1251 334//363 332//1259
+f 1267//904 391//405 381//1264
+f 1269//1286 1202//1209 1270//1266
+f 1073//1270 1072//1216 1279//1273
+f 1069//1203 1274//1274 1263//1223
+f 343//1279 1274//1274 1088//1218
+f 1116//1267 1103//1205 427//1277
+f 1108//1278 1280//1281 343//1279
+f 427//1277 1103//1205 1278//1280
+f 1183//1212 1170//1206 452//1221
+f 1287//1269 1266//1284 1136//1215
+f 405//1227 1273//1275 1190//1207
+f 1279//1273 1072//1216 1264//1222
+f 191//236 185//1238 169//1237
+f 190//1289 199//208 178//1287
+f 218//214 199//208 190//1289
diff --git a/pytorch3d/docs/tutorials/dataloaders_ShapeNetCore_R2N2.ipynb b/pytorch3d/docs/tutorials/dataloaders_ShapeNetCore_R2N2.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..6986da2888655622a61c1145e9b59fea9d7387a8
--- /dev/null
+++ b/pytorch3d/docs/tutorials/dataloaders_ShapeNetCore_R2N2.ipynb
@@ -0,0 +1,556 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Copyright (c) Meta Platforms, Inc. and affiliates. All rights reserved."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Dataloaders for ShapeNetCore and R2N2\n",
+    "This tutorial shows how to:\n",
+    "- Load models from ShapeNetCore and R2N2 using PyTorch3D's data loaders.\n",
+    "- Pass the loaded datasets to `torch.utils.data.DataLoader`.\n",
+    "- Render ShapeNetCore models with PyTorch3D's renderer.\n",
+    "- Render R2N2 models with the same orientations as the original renderings in the dataset.\n",
+    "- Visualize R2N2 model voxels."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 0. Install and import modules"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Ensure `torch` and `torchvision` are installed. If `pytorch3d` is not installed, install it using the following cell:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import sys\n",
+    "import torch\n",
+    "need_pytorch3d=False\n",
+    "try:\n",
+    "    import pytorch3d\n",
+    "except ModuleNotFoundError:\n",
+    "    need_pytorch3d=True\n",
+    "if need_pytorch3d:\n",
+    "    if torch.__version__.startswith(\"2.1.\") and sys.platform.startswith(\"linux\"):\n",
+    "        # We try to install PyTorch3D via a released wheel.\n",
+    "        pyt_version_str=torch.__version__.split(\"+\")[0].replace(\".\", \"\")\n",
+    "        version_str=\"\".join([\n",
+    "            f\"py3{sys.version_info.minor}_cu\",\n",
+    "            torch.version.cuda.replace(\".\",\"\"),\n",
+    "            f\"_pyt{pyt_version_str}\"\n",
+    "        ])\n",
+    "        !pip install fvcore iopath\n",
+    "        !pip install --no-index --no-cache-dir pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/{version_str}/download.html\n",
+    "    else:\n",
+    "        # We try to install PyTorch3D from source.\n",
+    "        !pip install 'git+https://github.com/facebookresearch/pytorch3d.git@stable'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import torch\n",
+    "\n",
+    "from pytorch3d.datasets import (\n",
+    "    R2N2,\n",
+    "    ShapeNetCore,\n",
+    "    collate_batched_meshes,\n",
+    "    render_cubified_voxels,\n",
+    ")\n",
+    "from pytorch3d.renderer import (\n",
+    "    OpenGLPerspectiveCameras,\n",
+    "    PointLights,\n",
+    "    RasterizationSettings,\n",
+    "    TexturesVertex,\n",
+    "    look_at_view_transform,\n",
+    ")\n",
+    "\n",
+    "from pytorch3d.structures import Meshes\n",
+    "from torch.utils.data import DataLoader\n",
+    "\n",
+    "# add path for demo utils functions \n",
+    "import sys\n",
+    "import os\n",
+    "sys.path.append(os.path.abspath(''))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "If using **Google Colab**, fetch the utils file for plotting image grids:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!wget https://raw.githubusercontent.com/facebookresearch/pytorch3d/main/docs/tutorials/utils/plot_image_grid.py\n",
+    "from plot_image_grid import image_grid"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "OR if running locally uncomment and run the following cell:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# from utils import image_grid"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 1. Load the datasets"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "If you haven't already downloaded the ShapeNetCore dataset, first do that following the instructions here: https://www.shapenet.org/. ShapeNetCore is a subset of the ShapeNet dataset. In PyTorch3D we support both version 1 (57 categories) and version 2 (55 categories).\n",
+    "\n",
+    "Then modify `SHAPENET_PATH` below to you local path to the ShapeNetCore dataset folder. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Setup\n",
+    "if torch.cuda.is_available():\n",
+    "    device = torch.device(\"cuda:0\")\n",
+    "    torch.cuda.set_device(device)\n",
+    "else:\n",
+    "    device = torch.device(\"cpu\")\n",
+    "    \n",
+    "SHAPENET_PATH = \"\"\n",
+    "shapenet_dataset = ShapeNetCore(SHAPENET_PATH)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The R2N2 dataset can be downloaded using the instructions here: http://3d-r2n2.stanford.edu/. Look at the links for `ShapeNetRendering` and `ShapeNetVox32`. The R2N2 dataset contains 13 categories that are a subset of the ShapeNetCore v.1\n",
+    "dataset. The R2N2 dataset also contains its own 24 renderings of each object and voxelized models.\n",
+    "\n",
+    "Then modify `R2N2_PATH` and `SPLITS_PATH` below to your local R2N2 dataset folder path and splits file path respectively. Here we will load the `train` split of R2N2 and ask the voxels of each model to be returned."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "R2N2_PATH = \"\"\n",
+    "SPLITS_PATH = \"None\"\n",
+    "r2n2_dataset = R2N2(\"train\", SHAPENET_PATH, R2N2_PATH, SPLITS_PATH, return_voxels=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We can retrieve a model by indexing into the loaded dataset. For both ShapeNetCore and R2N2, we can examine the category this model belongs to (in the form of a synset id, equivalent to wnid described in ImageNet's API: http://image-net.org/download-API), its model id, and its vertices and faces."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "shapenet_model = shapenet_dataset[6]\n",
+    "print(\"This model belongs to the category \" + shapenet_model[\"synset_id\"] + \".\")\n",
+    "print(\"This model has model id \" + shapenet_model[\"model_id\"] + \".\")\n",
+    "model_verts, model_faces = shapenet_model[\"verts\"], shapenet_model[\"faces\"]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We can use its vertices and faces to form a `Meshes` object which is a PyTorch3D datastructure for working with batched meshes."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model_textures = TexturesVertex(verts_features=torch.ones_like(model_verts, device=device)[None])\n",
+    "shapenet_model_mesh = Meshes(\n",
+    "    verts=[model_verts.to(device)],   \n",
+    "    faces=[model_faces.to(device)],\n",
+    "    textures=model_textures\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "With R2N2, we can further examine R2N2's original renderings. For instance, if we would like to see the second and third views of the eleventh objects in the R2N2 dataset, we can do the following:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "r2n2_renderings = r2n2_dataset[10,[1,2]]\n",
+    "image_grid(r2n2_renderings.numpy(), rows=1, cols=2, rgb=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 2. Use the datasets with `torch.utils.data.DataLoader`"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Training deep learning models, usually requires passing in batches of inputs. The `torch.utils.data.DataLoader` from PyTorch helps us do this. PyTorch3D provides a function `collate_batched_meshes` to group the input meshes into a single `Meshes` object which represents the batch. The `Meshes` datastructure can then be used directly by other PyTorch3D ops which might be part of the deep learning model (e.g. `graph_conv`).\n",
+    "\n",
+    "For R2N2, if all the models in the batch have the same number of views, the views, rotation matrices, translation matrices, intrinsic matrices and voxels will also be stacked into batched tensors.\n",
+    "\n",
+    "**NOTE**: All models in the `val` split of R2N2 have 24 views, but there are 8 models that split their 24 views between `train` and `test` splits, in which case `collate_batched_meshes` will only be able to join the matrices, views and voxels as lists. However, this can be avoided by loading only one view of each model by setting `return_all_views = False`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "batch_size = 12\n",
+    "r2n2_single_view = R2N2(\"train\", SHAPENET_PATH, R2N2_PATH, SPLITS_PATH, return_all_views=False, return_voxels=True)\n",
+    "r2n2_loader = DataLoader(r2n2_single_view, batch_size=batch_size, collate_fn=collate_batched_meshes)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's visualize all the views (one for each model) in the batch:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "it = iter(r2n2_loader)\n",
+    "r2n2_batch = next(it)\n",
+    "batch_renderings = r2n2_batch[\"images\"] # (N, V, H, W, 3), and in this case V is 1.\n",
+    "image_grid(batch_renderings.squeeze().numpy(), rows=3, cols=4, rgb=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 3. Render ShapeNetCore models with PyTorch3D's differentiable renderer"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Both `ShapeNetCore` and `R2N2` dataloaders have customized `render` functions that support rendering models by specifying their model ids, categories or indices using PyTorch3D's differentiable renderer implementation."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Rendering settings.\n",
+    "R, T = look_at_view_transform(1.0, 1.0, 90)\n",
+    "cameras = OpenGLPerspectiveCameras(R=R, T=T, device=device)\n",
+    "raster_settings = RasterizationSettings(image_size=512)\n",
+    "lights = PointLights(location=torch.tensor([0.0, 1.0, -2.0], device=device)[None],device=device)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "First we will try to render three models by their model ids:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "images_by_model_ids = shapenet_dataset.render(\n",
+    "    model_ids=[\n",
+    "        \"13394ca47c89f91525a3aaf903a41c90\",\n",
+    "        \"14755c2ee8e693aba508f621166382b0\",\n",
+    "        \"156c4207af6d2c8f1fdc97905708b8ea\",\n",
+    "    ],\n",
+    "    device=device,\n",
+    "    cameras=cameras,\n",
+    "    raster_settings=raster_settings,\n",
+    "    lights=lights,\n",
+    ")\n",
+    "image_grid(images_by_model_ids.cpu().numpy(), rows=1, cols=3, rgb=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Suppose we would like to render the first three models in the dataset, we can render models by their indices:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "images_by_idxs = shapenet_dataset.render(\n",
+    "    idxs=list(range(3)),\n",
+    "    device=device,\n",
+    "    cameras=cameras,\n",
+    "    raster_settings=raster_settings,\n",
+    "    lights=lights,\n",
+    ")\n",
+    "image_grid(images_by_idxs.cpu().numpy(), rows=1, cols=3, rgb=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Alternatively, if we are not interested in any particular models but would like see random models from some specific categories, we can do that by specifying `categories` and `sample_nums`. For example, if we would like to render 2 models from the category \"faucet\" and 3 models from the category \"chair\", we can do the following:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "images_by_categories = shapenet_dataset.render(\n",
+    "    categories=[\"faucet\", \"chair\"],\n",
+    "    sample_nums=[2, 3],\n",
+    "    device=device,\n",
+    "    cameras=cameras,\n",
+    "    raster_settings=raster_settings,\n",
+    "    lights=lights,\n",
+    ")\n",
+    "image_grid(images_by_categories.cpu().numpy(), rows=1, cols=5, rgb=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "If we are not interested in any particular categories and just would like to render some random models from the whole dataset, we can set the number of models to be rendered in `sample_nums` and not specify any `categories`:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "random_model_images = shapenet_dataset.render(\n",
+    "    sample_nums=[3],\n",
+    "    device=device,\n",
+    "    cameras=cameras,\n",
+    "    raster_settings=raster_settings,\n",
+    "    lights=lights,\n",
+    ")\n",
+    "image_grid(random_model_images.cpu().numpy(), rows=1, cols=5, rgb=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 4. Render R2N2 models with the same orientations as the original renderings in the dataset"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We can render R2N2 models the same way as we rendered ShapeNetCore models above. In addition, we can also render R2N2 models with the same orientations as the original renderings in the dataset. For this we will use R2N2's customized `render` function and a different type of PyTorch3D camera called `BlenderCamera`."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "In this example, we will render the seventh model with the same orientations as its second and third views. First we will retrieve R2N2's original renderings to compare with the result."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "original_rendering = r2n2_dataset[6,[1,2]][\"images\"]\n",
+    "image_grid(original_rendering.numpy(), rows=1, cols=2, rgb=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Next, we will visualize PyTorch3D's renderings:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "r2n2_oriented_images = r2n2_dataset.render(\n",
+    "    idxs=[6],\n",
+    "    view_idxs=[1,2],\n",
+    "    device=device,\n",
+    "    raster_settings=raster_settings,\n",
+    "    lights=lights,\n",
+    ")\n",
+    "image_grid(r2n2_oriented_images.cpu().numpy(), rows=1, cols=2, rgb=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 5. Visualize R2N2 models' voxels"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "R2N2 dataloader also returns models' voxels. We can visualize them by utilizing R2N2's `render_vox_to_mesh` function. This will cubify the voxels to a Meshes object, which will then be rendered."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "In this example we will visualize the tenth model in the dataset with the same orientation of its second and third views. First we will retrieve R2N2's original renderings to compare with the result."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "r2n2_model = r2n2_dataset[9,[1,2]]\n",
+    "original_rendering = r2n2_model[\"images\"]\n",
+    "image_grid(original_rendering.numpy(), rows=1, cols=2, rgb=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Next, we will pass the voxels to `render_vox_to_mesh`:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "vox_render = render_cubified_voxels(r2n2_model[\"voxels\"], device=device)\n",
+    "image_grid(vox_render.cpu().numpy(), rows=1, cols=2, rgb=True)"
+   ]
+  }
+ ],
+ "metadata": {
+  "anp_metadata": {
+   "path": "fbsource/fbcode/vision/fair/pytorch3d/docs/tutorials/Dataloaders_ShapeNetCore_R2N2.ipynb"
+  },
+  "bento_stylesheets": {
+   "bento/extensions/flow/main.css": true,
+   "bento/extensions/kernel_selector/main.css": true,
+   "bento/extensions/kernel_ui/main.css": true,
+   "bento/extensions/new_kernel/main.css": true,
+   "bento/extensions/system_usage/main.css": true,
+   "bento/extensions/theme/main.css": true
+  },
+  "disseminate_notebook_info": {
+   "backup_notebook_id": "669429066983805"
+  },
+  "kernelspec": {
+   "display_name": "intro_to_cv",
+   "language": "python",
+   "name": "bento_kernel_intro_to_cv"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.5+"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/pytorch3d/docs/tutorials/deform_source_mesh_to_target_mesh.ipynb b/pytorch3d/docs/tutorials/deform_source_mesh_to_target_mesh.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..1b7d04552beb443604f5036d7e3fac1ae05dfe5d
--- /dev/null
+++ b/pytorch3d/docs/tutorials/deform_source_mesh_to_target_mesh.ipynb
@@ -0,0 +1,536 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "nF07sDgJWHQy"
+   },
+   "outputs": [],
+   "source": [
+    "# Copyright (c) Meta Platforms, Inc. and affiliates. All rights reserved."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "BRcOouCyWHQ2"
+   },
+   "source": [
+    "# Deform a source mesh to form a target mesh using 3D loss functions"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "HfwwW9HqtuvQ"
+   },
+   "source": [
+    "In this tutorial, we learn to deform an initial generic shape (e.g. sphere) to fit a target shape.\n",
+    "\n",
+    "We will cover: \n",
+    "\n",
+    "- How to **load a mesh** from an `.obj` file\n",
+    "- How to use the PyTorch3D **Meshes** datastructure\n",
+    "- How to use 4 different PyTorch3D **mesh loss functions**\n",
+    "- How to set up an **optimization loop**\n",
+    "\n",
+    "\n",
+    "Starting from a sphere mesh, we learn the offset to each vertex in the mesh such that\n",
+    "the predicted mesh is closer to the target mesh at each optimization step. To achieve this we minimize:\n",
+    "\n",
+    "+ `chamfer_distance`, the distance between the predicted (deformed) and target mesh, defined as the chamfer distance between the set of pointclouds resulting from **differentiably sampling points** from their surfaces. \n",
+    "\n",
+    "However, solely minimizing the chamfer distance between the predicted and the target mesh will lead to a non-smooth shape (verify this by setting  `w_chamfer=1.0` and all other weights to `0.0`). \n",
+    "\n",
+    "We enforce smoothness by adding **shape regularizers** to the objective. Namely, we add:\n",
+    "\n",
+    "+ `mesh_edge_length`, which minimizes the length of the edges in the predicted mesh.\n",
+    "+ `mesh_normal_consistency`, which enforces consistency across the normals of neighboring faces.\n",
+    "+ `mesh_laplacian_smoothing`, which is the laplacian regularizer."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "P-h1ji4dWHQ5"
+   },
+   "source": [
+    "## 0. Install and Import modules"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Ensure `torch` and `torchvision` are installed. If `pytorch3d` is not installed, install it using the following cell:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "_qkuyhyTeRyM"
+   },
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import sys\n",
+    "import torch\n",
+    "need_pytorch3d=False\n",
+    "try:\n",
+    "    import pytorch3d\n",
+    "except ModuleNotFoundError:\n",
+    "    need_pytorch3d=True\n",
+    "if need_pytorch3d:\n",
+    "    if torch.__version__.startswith(\"2.1.\") and sys.platform.startswith(\"linux\"):\n",
+    "        # We try to install PyTorch3D via a released wheel.\n",
+    "        pyt_version_str=torch.__version__.split(\"+\")[0].replace(\".\", \"\")\n",
+    "        version_str=\"\".join([\n",
+    "            f\"py3{sys.version_info.minor}_cu\",\n",
+    "            torch.version.cuda.replace(\".\",\"\"),\n",
+    "            f\"_pyt{pyt_version_str}\"\n",
+    "        ])\n",
+    "        !pip install fvcore iopath\n",
+    "        !pip install --no-index --no-cache-dir pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/{version_str}/download.html\n",
+    "    else:\n",
+    "        # We try to install PyTorch3D from source.\n",
+    "        !pip install 'git+https://github.com/facebookresearch/pytorch3d.git@stable'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "ylbZGXYBtuvB"
+   },
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import torch\n",
+    "from pytorch3d.io import load_obj, save_obj\n",
+    "from pytorch3d.structures import Meshes\n",
+    "from pytorch3d.utils import ico_sphere\n",
+    "from pytorch3d.ops import sample_points_from_meshes\n",
+    "from pytorch3d.loss import (\n",
+    "    chamfer_distance, \n",
+    "    mesh_edge_loss, \n",
+    "    mesh_laplacian_smoothing, \n",
+    "    mesh_normal_consistency,\n",
+    ")\n",
+    "import numpy as np\n",
+    "from tqdm.notebook import tqdm\n",
+    "%matplotlib notebook \n",
+    "from mpl_toolkits.mplot3d import Axes3D\n",
+    "import matplotlib.pyplot as plt\n",
+    "import matplotlib as mpl\n",
+    "mpl.rcParams['savefig.dpi'] = 80\n",
+    "mpl.rcParams['figure.dpi'] = 80\n",
+    "\n",
+    "# Set the device\n",
+    "if torch.cuda.is_available():\n",
+    "    device = torch.device(\"cuda:0\")\n",
+    "else:\n",
+    "    device = torch.device(\"cpu\")\n",
+    "    print(\"WARNING: CPU only, this will be slow!\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "yT1JTXu1WHQ_"
+   },
+   "source": [
+    "## 1. Load an obj file and create a Meshes object"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Download the target 3D model of a dolphin. It will be saved locally as a file called `dolphin.obj`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 204
+    },
+    "colab_type": "code",
+    "id": "oFNkB6nQWZSw",
+    "outputId": "c1bbe6e2-a4ea-4113-d53d-1cb1ece130f1"
+   },
+   "outputs": [],
+   "source": [
+    "!wget https://dl.fbaipublicfiles.com/pytorch3d/data/dolphin/dolphin.obj"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "dz0imH-ltuvS"
+   },
+   "outputs": [],
+   "source": [
+    "# Load the dolphin mesh.\n",
+    "trg_obj = 'dolphin.obj'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "rbyRhI8ituvW"
+   },
+   "outputs": [],
+   "source": [
+    "# We read the target 3D model using load_obj\n",
+    "verts, faces, aux = load_obj(trg_obj)\n",
+    "\n",
+    "# verts is a FloatTensor of shape (V, 3) where V is the number of vertices in the mesh\n",
+    "# faces is an object which contains the following LongTensors: verts_idx, normals_idx and textures_idx\n",
+    "# For this tutorial, normals and textures are ignored.\n",
+    "faces_idx = faces.verts_idx.to(device)\n",
+    "verts = verts.to(device)\n",
+    "\n",
+    "# We scale normalize and center the target mesh to fit in a sphere of radius 1 centered at (0,0,0). \n",
+    "# (scale, center) will be used to bring the predicted mesh to its original center and scale\n",
+    "# Note that normalizing the target mesh, speeds up the optimization but is not necessary!\n",
+    "center = verts.mean(0)\n",
+    "verts = verts - center\n",
+    "scale = max(verts.abs().max(0)[0])\n",
+    "verts = verts / scale\n",
+    "\n",
+    "# We construct a Meshes structure for the target mesh\n",
+    "trg_mesh = Meshes(verts=[verts], faces=[faces_idx])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "6BxDTpB2WHRH"
+   },
+   "outputs": [],
+   "source": [
+    "# We initialize the source shape to be a sphere of radius 1\n",
+    "src_mesh = ico_sphere(4, device)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "dYWDl4VGWHRK"
+   },
+   "source": [
+    "## 2. Visualize the source and target meshes"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "482YycLHWHRL"
+   },
+   "outputs": [],
+   "source": [
+    "def plot_pointcloud(mesh, title=\"\"):\n",
+    "    # Sample points uniformly from the surface of the mesh.\n",
+    "    points = sample_points_from_meshes(mesh, 5000)\n",
+    "    x, y, z = points.clone().detach().cpu().squeeze().unbind(1)    \n",
+    "    fig = plt.figure(figsize=(5, 5))\n",
+    "    ax = fig.add_subplot(111, projection='3d')\n",
+    "    ax.scatter3D(x, z, -y)\n",
+    "    ax.set_xlabel('x')\n",
+    "    ax.set_ylabel('z')\n",
+    "    ax.set_zlabel('y')\n",
+    "    ax.set_title(title)\n",
+    "    ax.view_init(190, 30)\n",
+    "    plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 765
+    },
+    "colab_type": "code",
+    "id": "UoGcflJ_WHRO",
+    "outputId": "b9a2d699-2c68-4696-9dff-d30eea7a0fb0"
+   },
+   "outputs": [],
+   "source": [
+    "# %matplotlib notebook\n",
+    "plot_pointcloud(trg_mesh, \"Target mesh\")\n",
+    "plot_pointcloud(src_mesh, \"Source mesh\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "8uzMiTUSWHRS"
+   },
+   "source": [
+    "## 3. Optimization loop "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "Sc-3M17Ltuvh"
+   },
+   "outputs": [],
+   "source": [
+    "# We will learn to deform the source mesh by offsetting its vertices\n",
+    "# The shape of the deform parameters is equal to the total number of vertices in src_mesh\n",
+    "deform_verts = torch.full(src_mesh.verts_packed().shape, 0.0, device=device, requires_grad=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "0BtSUfMYtuvl"
+   },
+   "outputs": [],
+   "source": [
+    "# The optimizer\n",
+    "optimizer = torch.optim.SGD([deform_verts], lr=1.0, momentum=0.9)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 1000,
+     "referenced_widgets": [
+      "12fdcbc799cc4da899d889d0399616c2",
+      "0bd231c2134e4127a3756807317d6aae",
+      "23804ad243d44cecbff89ab0b7f40c7e",
+      "be25dd06faf04bf29733cc16deefb189",
+      "283601ac2fe54ecc8716aed8842a5dd2",
+      "6e2ff75105a74afbb4ed3fafd414e16f",
+      "5462de8f68be408d98a6a495e630f448",
+      "6e1e9eb164434a06b7b1bc73e4eb4fcd"
+     ]
+    },
+    "colab_type": "code",
+    "id": "9DAjqI9Atuvp",
+    "outputId": "d59e959b-8616-40fe-aec4-5b09b27e325f"
+   },
+   "outputs": [],
+   "source": [
+    "# Number of optimization steps\n",
+    "Niter = 2000\n",
+    "# Weight for the chamfer loss\n",
+    "w_chamfer = 1.0 \n",
+    "# Weight for mesh edge loss\n",
+    "w_edge = 1.0 \n",
+    "# Weight for mesh normal consistency\n",
+    "w_normal = 0.01 \n",
+    "# Weight for mesh laplacian smoothing\n",
+    "w_laplacian = 0.1 \n",
+    "# Plot period for the losses\n",
+    "plot_period = 250\n",
+    "loop = tqdm(range(Niter))\n",
+    "\n",
+    "chamfer_losses = []\n",
+    "laplacian_losses = []\n",
+    "edge_losses = []\n",
+    "normal_losses = []\n",
+    "\n",
+    "%matplotlib inline\n",
+    "\n",
+    "for i in loop:\n",
+    "    # Initialize optimizer\n",
+    "    optimizer.zero_grad()\n",
+    "    \n",
+    "    # Deform the mesh\n",
+    "    new_src_mesh = src_mesh.offset_verts(deform_verts)\n",
+    "    \n",
+    "    # We sample 5k points from the surface of each mesh \n",
+    "    sample_trg = sample_points_from_meshes(trg_mesh, 5000)\n",
+    "    sample_src = sample_points_from_meshes(new_src_mesh, 5000)\n",
+    "    \n",
+    "    # We compare the two sets of pointclouds by computing (a) the chamfer loss\n",
+    "    loss_chamfer, _ = chamfer_distance(sample_trg, sample_src)\n",
+    "    \n",
+    "    # and (b) the edge length of the predicted mesh\n",
+    "    loss_edge = mesh_edge_loss(new_src_mesh)\n",
+    "    \n",
+    "    # mesh normal consistency\n",
+    "    loss_normal = mesh_normal_consistency(new_src_mesh)\n",
+    "    \n",
+    "    # mesh laplacian smoothing\n",
+    "    loss_laplacian = mesh_laplacian_smoothing(new_src_mesh, method=\"uniform\")\n",
+    "    \n",
+    "    # Weighted sum of the losses\n",
+    "    loss = loss_chamfer * w_chamfer + loss_edge * w_edge + loss_normal * w_normal + loss_laplacian * w_laplacian\n",
+    "    \n",
+    "    # Print the losses\n",
+    "    loop.set_description('total_loss = %.6f' % loss)\n",
+    "    \n",
+    "    # Save the losses for plotting\n",
+    "    chamfer_losses.append(float(loss_chamfer.detach().cpu()))\n",
+    "    edge_losses.append(float(loss_edge.detach().cpu()))\n",
+    "    normal_losses.append(float(loss_normal.detach().cpu()))\n",
+    "    laplacian_losses.append(float(loss_laplacian.detach().cpu()))\n",
+    "    \n",
+    "    # Plot mesh\n",
+    "    if i % plot_period == 0:\n",
+    "        plot_pointcloud(new_src_mesh, title=\"iter: %d\" % i)\n",
+    "        \n",
+    "    # Optimization step\n",
+    "    loss.backward()\n",
+    "    optimizer.step()\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "VGcZsvWBWHRc"
+   },
+   "source": [
+    "## 4. Visualize the loss"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 374
+    },
+    "colab_type": "code",
+    "id": "baXvAo1yWHRd",
+    "outputId": "11ebe2ad-4352-4492-bd67-e6a3c95adc85"
+   },
+   "outputs": [],
+   "source": [
+    "fig = plt.figure(figsize=(13, 5))\n",
+    "ax = fig.gca()\n",
+    "ax.plot(chamfer_losses, label=\"chamfer loss\")\n",
+    "ax.plot(edge_losses, label=\"edge loss\")\n",
+    "ax.plot(normal_losses, label=\"normal loss\")\n",
+    "ax.plot(laplacian_losses, label=\"laplacian loss\")\n",
+    "ax.legend(fontsize=\"16\")\n",
+    "ax.set_xlabel(\"Iteration\", fontsize=\"16\")\n",
+    "ax.set_ylabel(\"Loss\", fontsize=\"16\")\n",
+    "ax.set_title(\"Loss vs iterations\", fontsize=\"16\");"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "Y9vSKErDWHRg"
+   },
+   "source": [
+    "## 5. Save the predicted mesh"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "krikJzrLtuvw"
+   },
+   "outputs": [],
+   "source": [
+    "# Fetch the verts and faces of the final predicted mesh\n",
+    "final_verts, final_faces = new_src_mesh.get_mesh_verts_faces(0)\n",
+    "\n",
+    "# Scale normalize back to the original target size\n",
+    "final_verts = final_verts * scale + center\n",
+    "\n",
+    "# Store the predicted mesh using save_obj\n",
+    "final_obj = 'final_model.obj'\n",
+    "save_obj(final_obj, final_verts, final_faces)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "quR1DVAcWHRk"
+   },
+   "source": [
+    "## 6. Conclusion \n",
+    "\n",
+    "In this tutorial we learnt how to load a mesh from an obj file, initialize a PyTorch3D datastructure called **Meshes**, set up an optimization loop and use four different PyTorch3D mesh loss functions. "
+   ]
+  }
+ ],
+ "metadata": {
+  "accelerator": "GPU",
+  "bento_stylesheets": {
+   "bento/extensions/flow/main.css": true,
+   "bento/extensions/kernel_selector/main.css": true,
+   "bento/extensions/kernel_ui/main.css": true,
+   "bento/extensions/new_kernel/main.css": true,
+   "bento/extensions/system_usage/main.css": true,
+   "bento/extensions/theme/main.css": true
+  },
+  "colab": {
+   "name": "deform_source_mesh_to_target_mesh.ipynb",
+   "provenance": []
+  },
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 1
+}
diff --git a/pytorch3d/docs/tutorials/fit_simple_neural_radiance_field.ipynb b/pytorch3d/docs/tutorials/fit_simple_neural_radiance_field.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..0ad532befe842d38899a149c1ee440756008a967
--- /dev/null
+++ b/pytorch3d/docs/tutorials/fit_simple_neural_radiance_field.ipynb
@@ -0,0 +1,900 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Copyright (c) Meta Platforms, Inc. and affiliates. All rights reserved."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Fit a simple Neural Radiance Field via raymarching\n",
+    "\n",
+    "This tutorial shows how to fit Neural Radiance Field given a set of views of a scene using differentiable implicit function rendering.\n",
+    "\n",
+    "More specifically, this tutorial will explain how to:\n",
+    "1. Create a differentiable implicit function renderer with either image-grid or Monte Carlo ray sampling.\n",
+    "2. Create an Implicit model of a scene.\n",
+    "3. Fit the implicit function (Neural Radiance Field) based on input images using the differentiable implicit renderer. \n",
+    "4. Visualize the learnt implicit function.\n",
+    "\n",
+    "Note that the presented implicit model is a simplified version of NeRF:<br>\n",
+    "_Ben Mildenhall, Pratul P. Srinivasan, Matthew Tancik, Jonathan T. Barron, Ravi Ramamoorthi, Ren Ng: NeRF: Representing Scenes as Neural Radiance Fields for View Synthesis, ECCV 2020._\n",
+    "\n",
+    "The simplifications include:\n",
+    "* *Ray sampling*: This notebook does not perform stratified ray sampling but rather ray sampling at equidistant depths.\n",
+    "* *Rendering*: We do a single rendering pass, as opposed to the original implementation that does a coarse and fine rendering pass.\n",
+    "* *Architecture*: Our network is shallower which allows for faster optimization possibly at the cost of surface details.\n",
+    "* *Mask loss*: Since our observations include segmentation masks, we also optimize a silhouette loss that forces rays to either get fully absorbed inside the volume, or to completely pass through it.\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 0. Install and Import modules\n",
+    "Ensure `torch` and `torchvision` are installed. If `pytorch3d` is not installed, install it using the following cell:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import sys\n",
+    "import torch\n",
+    "need_pytorch3d=False\n",
+    "try:\n",
+    "    import pytorch3d\n",
+    "except ModuleNotFoundError:\n",
+    "    need_pytorch3d=True\n",
+    "if need_pytorch3d:\n",
+    "    if torch.__version__.startswith(\"2.1.\") and sys.platform.startswith(\"linux\"):\n",
+    "        # We try to install PyTorch3D via a released wheel.\n",
+    "        pyt_version_str=torch.__version__.split(\"+\")[0].replace(\".\", \"\")\n",
+    "        version_str=\"\".join([\n",
+    "            f\"py3{sys.version_info.minor}_cu\",\n",
+    "            torch.version.cuda.replace(\".\",\"\"),\n",
+    "            f\"_pyt{pyt_version_str}\"\n",
+    "        ])\n",
+    "        !pip install fvcore iopath\n",
+    "        !pip install --no-index --no-cache-dir pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/{version_str}/download.html\n",
+    "    else:\n",
+    "        # We try to install PyTorch3D from source.\n",
+    "        !pip install 'git+https://github.com/facebookresearch/pytorch3d.git@stable'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# %matplotlib inline\n",
+    "# %matplotlib notebook\n",
+    "import os\n",
+    "import sys\n",
+    "import time\n",
+    "import json\n",
+    "import glob\n",
+    "import torch\n",
+    "import math\n",
+    "import matplotlib.pyplot as plt\n",
+    "import numpy as np\n",
+    "from PIL import Image\n",
+    "from IPython import display\n",
+    "from tqdm.notebook import tqdm\n",
+    "\n",
+    "# Data structures and functions for rendering\n",
+    "from pytorch3d.structures import Volumes\n",
+    "from pytorch3d.transforms import so3_exp_map\n",
+    "from pytorch3d.renderer import (\n",
+    "    FoVPerspectiveCameras, \n",
+    "    NDCMultinomialRaysampler,\n",
+    "    MonteCarloRaysampler,\n",
+    "    EmissionAbsorptionRaymarcher,\n",
+    "    ImplicitRenderer,\n",
+    "    RayBundle,\n",
+    "    ray_bundle_to_ray_points,\n",
+    ")\n",
+    "\n",
+    "# obtain the utilized device\n",
+    "if torch.cuda.is_available():\n",
+    "    device = torch.device(\"cuda:0\")\n",
+    "    torch.cuda.set_device(device)\n",
+    "else:\n",
+    "    print(\n",
+    "        'Please note that NeRF is a resource-demanding method.'\n",
+    "        + ' Running this notebook on CPU will be extremely slow.'\n",
+    "        + ' We recommend running the example on a GPU'\n",
+    "        + ' with at least 10 GB of memory.'\n",
+    "    )\n",
+    "    device = torch.device(\"cpu\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!wget https://raw.githubusercontent.com/facebookresearch/pytorch3d/main/docs/tutorials/utils/plot_image_grid.py\n",
+    "!wget https://raw.githubusercontent.com/facebookresearch/pytorch3d/main/docs/tutorials/utils/generate_cow_renders.py\n",
+    "from plot_image_grid import image_grid\n",
+    "from generate_cow_renders import generate_cow_renders"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "OR if running locally uncomment and run the following cell:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# from utils.generate_cow_renders import generate_cow_renders\n",
+    "# from utils import image_grid"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 1. Generate images of the scene and masks\n",
+    "\n",
+    "The following cell generates our training data.\n",
+    "It renders the cow mesh from the `fit_textured_mesh.ipynb` tutorial from several viewpoints and returns:\n",
+    "1. A batch of image and silhouette tensors that are produced by the cow mesh renderer.\n",
+    "2. A set of cameras corresponding to each render.\n",
+    "\n",
+    "Note: For the purpose of this tutorial, which aims at explaining the details of implicit rendering, we do not explain how the mesh rendering, implemented in the `generate_cow_renders` function, works. Please refer to `fit_textured_mesh.ipynb` for a detailed explanation of mesh rendering."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "target_cameras, target_images, target_silhouettes = generate_cow_renders(num_views=40, azimuth_range=180)\n",
+    "print(f'Generated {len(target_images)} images/silhouettes/cameras.')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 2. Initialize the implicit renderer\n",
+    "\n",
+    "The following initializes an implicit renderer that emits a ray from each pixel of a target image and samples a set of uniformly-spaced points along the ray. At each ray-point, the corresponding density and color value is obtained by querying the corresponding location in the neural model of the scene (the model is described & instantiated in a later cell).\n",
+    "\n",
+    "The renderer is composed of a *raymarcher* and a *raysampler*.\n",
+    "- The *raysampler* is responsible for emitting rays from image pixels and sampling the points along them. Here, we use two different raysamplers:\n",
+    "    - `MonteCarloRaysampler` is used to generate rays from a random subset of pixels of the image plane. The random subsampling of pixels is carried out during **training** to decrease the memory consumption of the implicit model.\n",
+    "    - `NDCMultinomialRaysampler` which follows the standard PyTorch3D coordinate grid convention (+X from right to left; +Y from bottom to top; +Z away from the user). In combination with the implicit model of the scene, `NDCMultinomialRaysampler` consumes a large amount of memory and, hence, is only used for visualizing the results of the training at **test** time.\n",
+    "- The *raymarcher* takes the densities and colors sampled along each ray and renders each ray into a color and an opacity value of the ray's source pixel. Here we use the `EmissionAbsorptionRaymarcher` which implements the standard Emission-Absorption raymarching algorithm."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# render_size describes the size of both sides of the \n",
+    "# rendered images in pixels. Since an advantage of \n",
+    "# Neural Radiance Fields are high quality renders\n",
+    "# with a significant amount of details, we render\n",
+    "# the implicit function at double the size of \n",
+    "# target images.\n",
+    "render_size = target_images.shape[1] * 2\n",
+    "\n",
+    "# Our rendered scene is centered around (0,0,0) \n",
+    "# and is enclosed inside a bounding box\n",
+    "# whose side is roughly equal to 3.0 (world units).\n",
+    "volume_extent_world = 3.0\n",
+    "\n",
+    "# 1) Instantiate the raysamplers.\n",
+    "\n",
+    "# Here, NDCMultinomialRaysampler generates a rectangular image\n",
+    "# grid of rays whose coordinates follow the PyTorch3D\n",
+    "# coordinate conventions.\n",
+    "raysampler_grid = NDCMultinomialRaysampler(\n",
+    "    image_height=render_size,\n",
+    "    image_width=render_size,\n",
+    "    n_pts_per_ray=128,\n",
+    "    min_depth=0.1,\n",
+    "    max_depth=volume_extent_world,\n",
+    ")\n",
+    "\n",
+    "# MonteCarloRaysampler generates a random subset \n",
+    "# of `n_rays_per_image` rays emitted from the image plane.\n",
+    "raysampler_mc = MonteCarloRaysampler(\n",
+    "    min_x = -1.0,\n",
+    "    max_x = 1.0,\n",
+    "    min_y = -1.0,\n",
+    "    max_y = 1.0,\n",
+    "    n_rays_per_image=750,\n",
+    "    n_pts_per_ray=128,\n",
+    "    min_depth=0.1,\n",
+    "    max_depth=volume_extent_world,\n",
+    ")\n",
+    "\n",
+    "# 2) Instantiate the raymarcher.\n",
+    "# Here, we use the standard EmissionAbsorptionRaymarcher \n",
+    "# which marches along each ray in order to render\n",
+    "# the ray into a single 3D color vector \n",
+    "# and an opacity scalar.\n",
+    "raymarcher = EmissionAbsorptionRaymarcher()\n",
+    "\n",
+    "# Finally, instantiate the implicit renders\n",
+    "# for both raysamplers.\n",
+    "renderer_grid = ImplicitRenderer(\n",
+    "    raysampler=raysampler_grid, raymarcher=raymarcher,\n",
+    ")\n",
+    "renderer_mc = ImplicitRenderer(\n",
+    "    raysampler=raysampler_mc, raymarcher=raymarcher,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 3. Define the neural radiance field model\n",
+    "\n",
+    "In this cell we define the `NeuralRadianceField` module, which specifies a continuous field of colors and opacities over the 3D domain of the scene.\n",
+    "\n",
+    "The `forward` function of `NeuralRadianceField` (NeRF) receives as input a set of tensors that parametrize a bundle of rendering rays. The ray bundle is later converted to 3D ray points in the world coordinates of the scene. Each 3D point is then mapped to a harmonic representation using the `HarmonicEmbedding` layer (defined in the next cell). The harmonic embeddings then enter the _color_ and _opacity_ branches of the NeRF model in order to label each ray point with a 3D vector and a 1D scalar ranging in [0-1] which define the point's RGB color and opacity respectively.\n",
+    "\n",
+    "Since NeRF has a large memory footprint, we also implement the `NeuralRadianceField.forward_batched` method. The method splits the input rays into batches and executes the `forward` function for each batch separately in a for loop. This lets us render a large set of rays without running out of GPU memory. Standardly, `forward_batched` would be used to render rays emitted from all pixels of an image in order to produce a full-sized render of a scene.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class HarmonicEmbedding(torch.nn.Module):\n",
+    "    def __init__(self, n_harmonic_functions=60, omega0=0.1):\n",
+    "        \"\"\"\n",
+    "        Given an input tensor `x` of shape [minibatch, ... , dim],\n",
+    "        the harmonic embedding layer converts each feature\n",
+    "        in `x` into a series of harmonic features `embedding`\n",
+    "        as follows:\n",
+    "            embedding[..., i*dim:(i+1)*dim] = [\n",
+    "                sin(x[..., i]),\n",
+    "                sin(2*x[..., i]),\n",
+    "                sin(4*x[..., i]),\n",
+    "                ...\n",
+    "                sin(2**(self.n_harmonic_functions-1) * x[..., i]),\n",
+    "                cos(x[..., i]),\n",
+    "                cos(2*x[..., i]),\n",
+    "                cos(4*x[..., i]),\n",
+    "                ...\n",
+    "                cos(2**(self.n_harmonic_functions-1) * x[..., i])\n",
+    "            ]\n",
+    "            \n",
+    "        Note that `x` is also premultiplied by `omega0` before\n",
+    "        evaluating the harmonic functions.\n",
+    "        \"\"\"\n",
+    "        super().__init__()\n",
+    "        self.register_buffer(\n",
+    "            'frequencies',\n",
+    "            omega0 * (2.0 ** torch.arange(n_harmonic_functions)),\n",
+    "        )\n",
+    "    def forward(self, x):\n",
+    "        \"\"\"\n",
+    "        Args:\n",
+    "            x: tensor of shape [..., dim]\n",
+    "        Returns:\n",
+    "            embedding: a harmonic embedding of `x`\n",
+    "                of shape [..., n_harmonic_functions * dim * 2]\n",
+    "        \"\"\"\n",
+    "        embed = (x[..., None] * self.frequencies).view(*x.shape[:-1], -1)\n",
+    "        return torch.cat((embed.sin(), embed.cos()), dim=-1)\n",
+    "\n",
+    "\n",
+    "class NeuralRadianceField(torch.nn.Module):\n",
+    "    def __init__(self, n_harmonic_functions=60, n_hidden_neurons=256):\n",
+    "        super().__init__()\n",
+    "        \"\"\"\n",
+    "        Args:\n",
+    "            n_harmonic_functions: The number of harmonic functions\n",
+    "                used to form the harmonic embedding of each point.\n",
+    "            n_hidden_neurons: The number of hidden units in the\n",
+    "                fully connected layers of the MLPs of the model.\n",
+    "        \"\"\"\n",
+    "        \n",
+    "        # The harmonic embedding layer converts input 3D coordinates\n",
+    "        # to a representation that is more suitable for\n",
+    "        # processing with a deep neural network.\n",
+    "        self.harmonic_embedding = HarmonicEmbedding(n_harmonic_functions)\n",
+    "        \n",
+    "        # The dimension of the harmonic embedding.\n",
+    "        embedding_dim = n_harmonic_functions * 2 * 3\n",
+    "        \n",
+    "        # self.mlp is a simple 2-layer multi-layer perceptron\n",
+    "        # which converts the input per-point harmonic embeddings\n",
+    "        # to a latent representation.\n",
+    "        # Not that we use Softplus activations instead of ReLU.\n",
+    "        self.mlp = torch.nn.Sequential(\n",
+    "            torch.nn.Linear(embedding_dim, n_hidden_neurons),\n",
+    "            torch.nn.Softplus(beta=10.0),\n",
+    "            torch.nn.Linear(n_hidden_neurons, n_hidden_neurons),\n",
+    "            torch.nn.Softplus(beta=10.0),\n",
+    "        )        \n",
+    "        \n",
+    "        # Given features predicted by self.mlp, self.color_layer\n",
+    "        # is responsible for predicting a 3-D per-point vector\n",
+    "        # that represents the RGB color of the point.\n",
+    "        self.color_layer = torch.nn.Sequential(\n",
+    "            torch.nn.Linear(n_hidden_neurons + embedding_dim, n_hidden_neurons),\n",
+    "            torch.nn.Softplus(beta=10.0),\n",
+    "            torch.nn.Linear(n_hidden_neurons, 3),\n",
+    "            torch.nn.Sigmoid(),\n",
+    "            # To ensure that the colors correctly range between [0-1],\n",
+    "            # the layer is terminated with a sigmoid layer.\n",
+    "        )  \n",
+    "        \n",
+    "        # The density layer converts the features of self.mlp\n",
+    "        # to a 1D density value representing the raw opacity\n",
+    "        # of each point.\n",
+    "        self.density_layer = torch.nn.Sequential(\n",
+    "            torch.nn.Linear(n_hidden_neurons, 1),\n",
+    "            torch.nn.Softplus(beta=10.0),\n",
+    "            # Sofplus activation ensures that the raw opacity\n",
+    "            # is a non-negative number.\n",
+    "        )\n",
+    "        \n",
+    "        # We set the bias of the density layer to -1.5\n",
+    "        # in order to initialize the opacities of the\n",
+    "        # ray points to values close to 0. \n",
+    "        # This is a crucial detail for ensuring convergence\n",
+    "        # of the model.\n",
+    "        self.density_layer[0].bias.data[0] = -1.5        \n",
+    "                \n",
+    "    def _get_densities(self, features):\n",
+    "        \"\"\"\n",
+    "        This function takes `features` predicted by `self.mlp`\n",
+    "        and converts them to `raw_densities` with `self.density_layer`.\n",
+    "        `raw_densities` are later mapped to [0-1] range with\n",
+    "        1 - inverse exponential of `raw_densities`.\n",
+    "        \"\"\"\n",
+    "        raw_densities = self.density_layer(features)\n",
+    "        return 1 - (-raw_densities).exp()\n",
+    "    \n",
+    "    def _get_colors(self, features, rays_directions):\n",
+    "        \"\"\"\n",
+    "        This function takes per-point `features` predicted by `self.mlp`\n",
+    "        and evaluates the color model in order to attach to each\n",
+    "        point a 3D vector of its RGB color.\n",
+    "        \n",
+    "        In order to represent viewpoint dependent effects,\n",
+    "        before evaluating `self.color_layer`, `NeuralRadianceField`\n",
+    "        concatenates to the `features` a harmonic embedding\n",
+    "        of `ray_directions`, which are per-point directions \n",
+    "        of point rays expressed as 3D l2-normalized vectors\n",
+    "        in world coordinates.\n",
+    "        \"\"\"\n",
+    "        spatial_size = features.shape[:-1]\n",
+    "        \n",
+    "        # Normalize the ray_directions to unit l2 norm.\n",
+    "        rays_directions_normed = torch.nn.functional.normalize(\n",
+    "            rays_directions, dim=-1\n",
+    "        )\n",
+    "        \n",
+    "        # Obtain the harmonic embedding of the normalized ray directions.\n",
+    "        rays_embedding = self.harmonic_embedding(\n",
+    "            rays_directions_normed\n",
+    "        )\n",
+    "        \n",
+    "        # Expand the ray directions tensor so that its spatial size\n",
+    "        # is equal to the size of features.\n",
+    "        rays_embedding_expand = rays_embedding[..., None, :].expand(\n",
+    "            *spatial_size, rays_embedding.shape[-1]\n",
+    "        )\n",
+    "        \n",
+    "        # Concatenate ray direction embeddings with \n",
+    "        # features and evaluate the color model.\n",
+    "        color_layer_input = torch.cat(\n",
+    "            (features, rays_embedding_expand),\n",
+    "            dim=-1\n",
+    "        )\n",
+    "        return self.color_layer(color_layer_input)\n",
+    "    \n",
+    "  \n",
+    "    def forward(\n",
+    "        self, \n",
+    "        ray_bundle: RayBundle,\n",
+    "        **kwargs,\n",
+    "    ):\n",
+    "        \"\"\"\n",
+    "        The forward function accepts the parametrizations of\n",
+    "        3D points sampled along projection rays. The forward\n",
+    "        pass is responsible for attaching a 3D vector\n",
+    "        and a 1D scalar representing the point's \n",
+    "        RGB color and opacity respectively.\n",
+    "        \n",
+    "        Args:\n",
+    "            ray_bundle: A RayBundle object containing the following variables:\n",
+    "                origins: A tensor of shape `(minibatch, ..., 3)` denoting the\n",
+    "                    origins of the sampling rays in world coords.\n",
+    "                directions: A tensor of shape `(minibatch, ..., 3)`\n",
+    "                    containing the direction vectors of sampling rays in world coords.\n",
+    "                lengths: A tensor of shape `(minibatch, ..., num_points_per_ray)`\n",
+    "                    containing the lengths at which the rays are sampled.\n",
+    "\n",
+    "        Returns:\n",
+    "            rays_densities: A tensor of shape `(minibatch, ..., num_points_per_ray, 1)`\n",
+    "                denoting the opacity of each ray point.\n",
+    "            rays_colors: A tensor of shape `(minibatch, ..., num_points_per_ray, 3)`\n",
+    "                denoting the color of each ray point.\n",
+    "        \"\"\"\n",
+    "        # We first convert the ray parametrizations to world\n",
+    "        # coordinates with `ray_bundle_to_ray_points`.\n",
+    "        rays_points_world = ray_bundle_to_ray_points(ray_bundle)\n",
+    "        # rays_points_world.shape = [minibatch x ... x 3]\n",
+    "        \n",
+    "        # For each 3D world coordinate, we obtain its harmonic embedding.\n",
+    "        embeds = self.harmonic_embedding(\n",
+    "            rays_points_world\n",
+    "        )\n",
+    "        # embeds.shape = [minibatch x ... x self.n_harmonic_functions*6]\n",
+    "        \n",
+    "        # self.mlp maps each harmonic embedding to a latent feature space.\n",
+    "        features = self.mlp(embeds)\n",
+    "        # features.shape = [minibatch x ... x n_hidden_neurons]\n",
+    "        \n",
+    "        # Finally, given the per-point features, \n",
+    "        # execute the density and color branches.\n",
+    "        \n",
+    "        rays_densities = self._get_densities(features)\n",
+    "        # rays_densities.shape = [minibatch x ... x 1]\n",
+    "\n",
+    "        rays_colors = self._get_colors(features, ray_bundle.directions)\n",
+    "        # rays_colors.shape = [minibatch x ... x 3]\n",
+    "        \n",
+    "        return rays_densities, rays_colors\n",
+    "    \n",
+    "    def batched_forward(\n",
+    "        self, \n",
+    "        ray_bundle: RayBundle,\n",
+    "        n_batches: int = 16,\n",
+    "        **kwargs,        \n",
+    "    ):\n",
+    "        \"\"\"\n",
+    "        This function is used to allow for memory efficient processing\n",
+    "        of input rays. The input rays are first split to `n_batches`\n",
+    "        chunks and passed through the `self.forward` function one at a time\n",
+    "        in a for loop. Combined with disabling PyTorch gradient caching\n",
+    "        (`torch.no_grad()`), this allows for rendering large batches\n",
+    "        of rays that do not all fit into GPU memory in a single forward pass.\n",
+    "        In our case, batched_forward is used to export a fully-sized render\n",
+    "        of the radiance field for visualization purposes.\n",
+    "        \n",
+    "        Args:\n",
+    "            ray_bundle: A RayBundle object containing the following variables:\n",
+    "                origins: A tensor of shape `(minibatch, ..., 3)` denoting the\n",
+    "                    origins of the sampling rays in world coords.\n",
+    "                directions: A tensor of shape `(minibatch, ..., 3)`\n",
+    "                    containing the direction vectors of sampling rays in world coords.\n",
+    "                lengths: A tensor of shape `(minibatch, ..., num_points_per_ray)`\n",
+    "                    containing the lengths at which the rays are sampled.\n",
+    "            n_batches: Specifies the number of batches the input rays are split into.\n",
+    "                The larger the number of batches, the smaller the memory footprint\n",
+    "                and the lower the processing speed.\n",
+    "\n",
+    "        Returns:\n",
+    "            rays_densities: A tensor of shape `(minibatch, ..., num_points_per_ray, 1)`\n",
+    "                denoting the opacity of each ray point.\n",
+    "            rays_colors: A tensor of shape `(minibatch, ..., num_points_per_ray, 3)`\n",
+    "                denoting the color of each ray point.\n",
+    "\n",
+    "        \"\"\"\n",
+    "\n",
+    "        # Parse out shapes needed for tensor reshaping in this function.\n",
+    "        n_pts_per_ray = ray_bundle.lengths.shape[-1]  \n",
+    "        spatial_size = [*ray_bundle.origins.shape[:-1], n_pts_per_ray]\n",
+    "\n",
+    "        # Split the rays to `n_batches` batches.\n",
+    "        tot_samples = ray_bundle.origins.shape[:-1].numel()\n",
+    "        batches = torch.chunk(torch.arange(tot_samples), n_batches)\n",
+    "\n",
+    "        # For each batch, execute the standard forward pass.\n",
+    "        batch_outputs = [\n",
+    "            self.forward(\n",
+    "                RayBundle(\n",
+    "                    origins=ray_bundle.origins.view(-1, 3)[batch_idx],\n",
+    "                    directions=ray_bundle.directions.view(-1, 3)[batch_idx],\n",
+    "                    lengths=ray_bundle.lengths.view(-1, n_pts_per_ray)[batch_idx],\n",
+    "                    xys=None,\n",
+    "                )\n",
+    "            ) for batch_idx in batches\n",
+    "        ]\n",
+    "        \n",
+    "        # Concatenate the per-batch rays_densities and rays_colors\n",
+    "        # and reshape according to the sizes of the inputs.\n",
+    "        rays_densities, rays_colors = [\n",
+    "            torch.cat(\n",
+    "                [batch_output[output_i] for batch_output in batch_outputs], dim=0\n",
+    "            ).view(*spatial_size, -1) for output_i in (0, 1)\n",
+    "        ]\n",
+    "        return rays_densities, rays_colors"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 4. Helper functions\n",
+    "\n",
+    "In this function we define functions that help with the Neural Radiance Field optimization."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def huber(x, y, scaling=0.1):\n",
+    "    \"\"\"\n",
+    "    A helper function for evaluating the smooth L1 (huber) loss\n",
+    "    between the rendered silhouettes and colors.\n",
+    "    \"\"\"\n",
+    "    diff_sq = (x - y) ** 2\n",
+    "    loss = ((1 + diff_sq / (scaling**2)).clamp(1e-4).sqrt() - 1) * float(scaling)\n",
+    "    return loss\n",
+    "\n",
+    "def sample_images_at_mc_locs(target_images, sampled_rays_xy):\n",
+    "    \"\"\"\n",
+    "    Given a set of Monte Carlo pixel locations `sampled_rays_xy`,\n",
+    "    this method samples the tensor `target_images` at the\n",
+    "    respective 2D locations.\n",
+    "    \n",
+    "    This function is used in order to extract the colors from\n",
+    "    ground truth images that correspond to the colors\n",
+    "    rendered using `MonteCarloRaysampler`.\n",
+    "    \"\"\"\n",
+    "    ba = target_images.shape[0]\n",
+    "    dim = target_images.shape[-1]\n",
+    "    spatial_size = sampled_rays_xy.shape[1:-1]\n",
+    "    # In order to sample target_images, we utilize\n",
+    "    # the grid_sample function which implements a\n",
+    "    # bilinear image sampler.\n",
+    "    # Note that we have to invert the sign of the \n",
+    "    # sampled ray positions to convert the NDC xy locations\n",
+    "    # of the MonteCarloRaysampler to the coordinate\n",
+    "    # convention of grid_sample.\n",
+    "    images_sampled = torch.nn.functional.grid_sample(\n",
+    "        target_images.permute(0, 3, 1, 2), \n",
+    "        -sampled_rays_xy.view(ba, -1, 1, 2),  # note the sign inversion\n",
+    "        align_corners=True\n",
+    "    )\n",
+    "    return images_sampled.permute(0, 2, 3, 1).view(\n",
+    "        ba, *spatial_size, dim\n",
+    "    )\n",
+    "\n",
+    "def show_full_render(\n",
+    "    neural_radiance_field, camera,\n",
+    "    target_image, target_silhouette,\n",
+    "    loss_history_color, loss_history_sil,\n",
+    "):\n",
+    "    \"\"\"\n",
+    "    This is a helper function for visualizing the\n",
+    "    intermediate results of the learning. \n",
+    "    \n",
+    "    Since the `NeuralRadianceField` suffers from\n",
+    "    a large memory footprint, which does not let us\n",
+    "    render the full image grid in a single forward pass,\n",
+    "    we utilize the `NeuralRadianceField.batched_forward`\n",
+    "    function in combination with disabling the gradient caching.\n",
+    "    This chunks the set of emitted rays to batches and \n",
+    "    evaluates the implicit function on one batch at a time\n",
+    "    to prevent GPU memory overflow.\n",
+    "    \"\"\"\n",
+    "    \n",
+    "    # Prevent gradient caching.\n",
+    "    with torch.no_grad():\n",
+    "        # Render using the grid renderer and the\n",
+    "        # batched_forward function of neural_radiance_field.\n",
+    "        rendered_image_silhouette, _ = renderer_grid(\n",
+    "            cameras=camera, \n",
+    "            volumetric_function=neural_radiance_field.batched_forward\n",
+    "        )\n",
+    "        # Split the rendering result to a silhouette render\n",
+    "        # and the image render.\n",
+    "        rendered_image, rendered_silhouette = (\n",
+    "            rendered_image_silhouette[0].split([3, 1], dim=-1)\n",
+    "        )\n",
+    "        \n",
+    "    # Generate plots.\n",
+    "    fig, ax = plt.subplots(2, 3, figsize=(15, 10))\n",
+    "    ax = ax.ravel()\n",
+    "    clamp_and_detach = lambda x: x.clamp(0.0, 1.0).cpu().detach().numpy()\n",
+    "    ax[0].plot(list(range(len(loss_history_color))), loss_history_color, linewidth=1)\n",
+    "    ax[1].imshow(clamp_and_detach(rendered_image))\n",
+    "    ax[2].imshow(clamp_and_detach(rendered_silhouette[..., 0]))\n",
+    "    ax[3].plot(list(range(len(loss_history_sil))), loss_history_sil, linewidth=1)\n",
+    "    ax[4].imshow(clamp_and_detach(target_image))\n",
+    "    ax[5].imshow(clamp_and_detach(target_silhouette))\n",
+    "    for ax_, title_ in zip(\n",
+    "        ax,\n",
+    "        (\n",
+    "            \"loss color\", \"rendered image\", \"rendered silhouette\",\n",
+    "            \"loss silhouette\", \"target image\",  \"target silhouette\",\n",
+    "        )\n",
+    "    ):\n",
+    "        if not title_.startswith('loss'):\n",
+    "            ax_.grid(\"off\")\n",
+    "            ax_.axis(\"off\")\n",
+    "        ax_.set_title(title_)\n",
+    "    fig.canvas.draw(); fig.show()\n",
+    "    display.clear_output(wait=True)\n",
+    "    display.display(fig)\n",
+    "    return fig\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 5. Fit the radiance field\n",
+    "\n",
+    "Here we carry out the radiance field fitting with differentiable rendering.\n",
+    "\n",
+    "In order to fit the radiance field, we render it from the viewpoints of the `target_cameras`\n",
+    "and compare the resulting renders with the observed `target_images` and `target_silhouettes`.\n",
+    "\n",
+    "The comparison is done by evaluating the mean huber (smooth-l1) error between corresponding\n",
+    "pairs of `target_images`/`rendered_images` and `target_silhouettes`/`rendered_silhouettes`.\n",
+    "\n",
+    "Since we use the `MonteCarloRaysampler`, the outputs of the training renderer `renderer_mc`\n",
+    "are colors of pixels that are randomly sampled from the image plane, not a lattice of pixels forming\n",
+    "a valid image. Thus, in order to compare the rendered colors with the ground truth, we \n",
+    "utilize the random MonteCarlo pixel locations to sample the ground truth images/silhouettes\n",
+    "`target_silhouettes`/`rendered_silhouettes` at the xy locations corresponding to the render\n",
+    "locations. This is done with the helper function `sample_images_at_mc_locs`, which is\n",
+    "described in the previous cell."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# First move all relevant variables to the correct device.\n",
+    "renderer_grid = renderer_grid.to(device)\n",
+    "renderer_mc = renderer_mc.to(device)\n",
+    "target_cameras = target_cameras.to(device)\n",
+    "target_images = target_images.to(device)\n",
+    "target_silhouettes = target_silhouettes.to(device)\n",
+    "\n",
+    "# Set the seed for reproducibility\n",
+    "torch.manual_seed(1)\n",
+    "\n",
+    "# Instantiate the radiance field model.\n",
+    "neural_radiance_field = NeuralRadianceField().to(device)\n",
+    "\n",
+    "# Instantiate the Adam optimizer. We set its master learning rate to 1e-3.\n",
+    "lr = 1e-3\n",
+    "optimizer = torch.optim.Adam(neural_radiance_field.parameters(), lr=lr)\n",
+    "\n",
+    "# We sample 6 random cameras in a minibatch. Each camera\n",
+    "# emits raysampler_mc.n_pts_per_image rays.\n",
+    "batch_size = 6\n",
+    "\n",
+    "# 3000 iterations take ~20 min on a Tesla M40 and lead to\n",
+    "# reasonably sharp results. However, for the best possible\n",
+    "# results, we recommend setting n_iter=20000.\n",
+    "n_iter = 3000\n",
+    "\n",
+    "# Init the loss history buffers.\n",
+    "loss_history_color, loss_history_sil = [], []\n",
+    "\n",
+    "# The main optimization loop.\n",
+    "for iteration in range(n_iter):      \n",
+    "    # In case we reached the last 75% of iterations,\n",
+    "    # decrease the learning rate of the optimizer 10-fold.\n",
+    "    if iteration == round(n_iter * 0.75):\n",
+    "        print('Decreasing LR 10-fold ...')\n",
+    "        optimizer = torch.optim.Adam(\n",
+    "            neural_radiance_field.parameters(), lr=lr * 0.1\n",
+    "        )\n",
+    "    \n",
+    "    # Zero the optimizer gradient.\n",
+    "    optimizer.zero_grad()\n",
+    "    \n",
+    "    # Sample random batch indices.\n",
+    "    batch_idx = torch.randperm(len(target_cameras))[:batch_size]\n",
+    "    \n",
+    "    # Sample the minibatch of cameras.\n",
+    "    batch_cameras = FoVPerspectiveCameras(\n",
+    "        R = target_cameras.R[batch_idx], \n",
+    "        T = target_cameras.T[batch_idx], \n",
+    "        znear = target_cameras.znear[batch_idx],\n",
+    "        zfar = target_cameras.zfar[batch_idx],\n",
+    "        aspect_ratio = target_cameras.aspect_ratio[batch_idx],\n",
+    "        fov = target_cameras.fov[batch_idx],\n",
+    "        device = device,\n",
+    "    )\n",
+    "    \n",
+    "    # Evaluate the nerf model.\n",
+    "    rendered_images_silhouettes, sampled_rays = renderer_mc(\n",
+    "        cameras=batch_cameras, \n",
+    "        volumetric_function=neural_radiance_field\n",
+    "    )\n",
+    "    rendered_images, rendered_silhouettes = (\n",
+    "        rendered_images_silhouettes.split([3, 1], dim=-1)\n",
+    "    )\n",
+    "    \n",
+    "    # Compute the silhouette error as the mean huber\n",
+    "    # loss between the predicted masks and the\n",
+    "    # sampled target silhouettes.\n",
+    "    silhouettes_at_rays = sample_images_at_mc_locs(\n",
+    "        target_silhouettes[batch_idx, ..., None], \n",
+    "        sampled_rays.xys\n",
+    "    )\n",
+    "    sil_err = huber(\n",
+    "        rendered_silhouettes, \n",
+    "        silhouettes_at_rays,\n",
+    "    ).abs().mean()\n",
+    "\n",
+    "    # Compute the color error as the mean huber\n",
+    "    # loss between the rendered colors and the\n",
+    "    # sampled target images.\n",
+    "    colors_at_rays = sample_images_at_mc_locs(\n",
+    "        target_images[batch_idx], \n",
+    "        sampled_rays.xys\n",
+    "    )\n",
+    "    color_err = huber(\n",
+    "        rendered_images, \n",
+    "        colors_at_rays,\n",
+    "    ).abs().mean()\n",
+    "    \n",
+    "    # The optimization loss is a simple\n",
+    "    # sum of the color and silhouette errors.\n",
+    "    loss = color_err + sil_err\n",
+    "    \n",
+    "    # Log the loss history.\n",
+    "    loss_history_color.append(float(color_err))\n",
+    "    loss_history_sil.append(float(sil_err))\n",
+    "    \n",
+    "    # Every 10 iterations, print the current values of the losses.\n",
+    "    if iteration % 10 == 0:\n",
+    "        print(\n",
+    "            f'Iteration {iteration:05d}:'\n",
+    "            + f' loss color = {float(color_err):1.2e}'\n",
+    "            + f' loss silhouette = {float(sil_err):1.2e}'\n",
+    "        )\n",
+    "    \n",
+    "    # Take the optimization step.\n",
+    "    loss.backward()\n",
+    "    optimizer.step()\n",
+    "    \n",
+    "    # Visualize the full renders every 100 iterations.\n",
+    "    if iteration % 100 == 0:\n",
+    "        show_idx = torch.randperm(len(target_cameras))[:1]\n",
+    "        show_full_render(\n",
+    "            neural_radiance_field,\n",
+    "            FoVPerspectiveCameras(\n",
+    "                R = target_cameras.R[show_idx], \n",
+    "                T = target_cameras.T[show_idx], \n",
+    "                znear = target_cameras.znear[show_idx],\n",
+    "                zfar = target_cameras.zfar[show_idx],\n",
+    "                aspect_ratio = target_cameras.aspect_ratio[show_idx],\n",
+    "                fov = target_cameras.fov[show_idx],\n",
+    "                device = device,\n",
+    "            ), \n",
+    "            target_images[show_idx][0],\n",
+    "            target_silhouettes[show_idx][0],\n",
+    "            loss_history_color,\n",
+    "            loss_history_sil,\n",
+    "        )"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 6. Visualizing the optimized neural radiance field\n",
+    "\n",
+    "Finally, we visualize the neural radiance field by rendering from multiple viewpoints that rotate around the volume's y-axis."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def generate_rotating_nerf(neural_radiance_field, n_frames = 50):\n",
+    "    logRs = torch.zeros(n_frames, 3, device=device)\n",
+    "    logRs[:, 1] = torch.linspace(-3.14, 3.14, n_frames, device=device)\n",
+    "    Rs = so3_exp_map(logRs)\n",
+    "    Ts = torch.zeros(n_frames, 3, device=device)\n",
+    "    Ts[:, 2] = 2.7\n",
+    "    frames = []\n",
+    "    print('Rendering rotating NeRF ...')\n",
+    "    for R, T in zip(tqdm(Rs), Ts):\n",
+    "        camera = FoVPerspectiveCameras(\n",
+    "            R=R[None], \n",
+    "            T=T[None], \n",
+    "            znear=target_cameras.znear[0],\n",
+    "            zfar=target_cameras.zfar[0],\n",
+    "            aspect_ratio=target_cameras.aspect_ratio[0],\n",
+    "            fov=target_cameras.fov[0],\n",
+    "            device=device,\n",
+    "        )\n",
+    "        # Note that we again render with `NDCMultinomialRaysampler`\n",
+    "        # and the batched_forward function of neural_radiance_field.\n",
+    "        frames.append(\n",
+    "            renderer_grid(\n",
+    "                cameras=camera, \n",
+    "                volumetric_function=neural_radiance_field.batched_forward,\n",
+    "            )[0][..., :3]\n",
+    "        )\n",
+    "    return torch.cat(frames)\n",
+    "    \n",
+    "with torch.no_grad():\n",
+    "    rotating_nerf_frames = generate_rotating_nerf(neural_radiance_field, n_frames=3*5)\n",
+    "    \n",
+    "image_grid(rotating_nerf_frames.clamp(0., 1.).cpu().numpy(), rows=3, cols=5, rgb=True, fill=True)\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 7. Conclusion\n",
+    "\n",
+    "In this tutorial, we have shown how to optimize an implicit representation of a scene such that the renders of the scene from known viewpoints match the observed images for each viewpoint. The rendering was carried out using the PyTorch3D's implicit function renderer composed of either a `MonteCarloRaysampler` or `NDCMultinomialRaysampler`, and an `EmissionAbsorptionRaymarcher`."
+   ]
+  }
+ ],
+ "metadata": {
+  "bento_stylesheets": {
+   "bento/extensions/flow/main.css": true,
+   "bento/extensions/kernel_selector/main.css": true,
+   "bento/extensions/kernel_ui/main.css": true,
+   "bento/extensions/new_kernel/main.css": true,
+   "bento/extensions/system_usage/main.css": true,
+   "bento/extensions/theme/main.css": true
+  },
+  "kernelspec": {
+   "display_name": "pytorch3d_etc (local)",
+   "language": "python",
+   "name": "pytorch3d_etc_local"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.5+"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/pytorch3d/docs/tutorials/fit_textured_mesh.ipynb b/pytorch3d/docs/tutorials/fit_textured_mesh.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..4a18359397d09ef768a4b80ce7ca5bf53a3ee188
--- /dev/null
+++ b/pytorch3d/docs/tutorials/fit_textured_mesh.ipynb
@@ -0,0 +1,956 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "_Ip8kp4TfBLZ"
+   },
+   "outputs": [],
+   "source": [
+    "# Copyright (c) Meta Platforms, Inc. and affiliates. All rights reserved."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "kuXHJv44fBLe"
+   },
+   "source": [
+    "# Fit a mesh via rendering\n",
+    "\n",
+    "This tutorial shows how to:\n",
+    "- Load a mesh and textures from an `.obj` file. \n",
+    "- Create a synthetic dataset by rendering a textured mesh from multiple viewpoints\n",
+    "- Fit a mesh to the observed synthetic images using differential silhouette rendering\n",
+    "- Fit a mesh and its textures using differential textured rendering"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "Bnj3THhzfBLf"
+   },
+   "source": [
+    "## 0. Install and Import modules"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "okLalbR_g7NS"
+   },
+   "source": [
+    "Ensure `torch` and `torchvision` are installed. If `pytorch3d` is not installed, install it using the following cell:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "musUWTglgxSB"
+   },
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import sys\n",
+    "import torch\n",
+    "need_pytorch3d=False\n",
+    "try:\n",
+    "    import pytorch3d\n",
+    "except ModuleNotFoundError:\n",
+    "    need_pytorch3d=True\n",
+    "if need_pytorch3d:\n",
+    "    if torch.__version__.startswith(\"2.1.\") and sys.platform.startswith(\"linux\"):\n",
+    "        # We try to install PyTorch3D via a released wheel.\n",
+    "        pyt_version_str=torch.__version__.split(\"+\")[0].replace(\".\", \"\")\n",
+    "        version_str=\"\".join([\n",
+    "            f\"py3{sys.version_info.minor}_cu\",\n",
+    "            torch.version.cuda.replace(\".\",\"\"),\n",
+    "            f\"_pyt{pyt_version_str}\"\n",
+    "        ])\n",
+    "        !pip install fvcore iopath\n",
+    "        !pip install --no-index --no-cache-dir pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/{version_str}/download.html\n",
+    "    else:\n",
+    "        # We try to install PyTorch3D from source.\n",
+    "        !pip install 'git+https://github.com/facebookresearch/pytorch3d.git@stable'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "nX99zdoffBLg"
+   },
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import torch\n",
+    "import matplotlib.pyplot as plt\n",
+    "\n",
+    "from pytorch3d.utils import ico_sphere\n",
+    "import numpy as np\n",
+    "from tqdm.notebook import tqdm\n",
+    "\n",
+    "# Util function for loading meshes\n",
+    "from pytorch3d.io import load_objs_as_meshes, save_obj\n",
+    "\n",
+    "from pytorch3d.loss import (\n",
+    "    chamfer_distance, \n",
+    "    mesh_edge_loss, \n",
+    "    mesh_laplacian_smoothing, \n",
+    "    mesh_normal_consistency,\n",
+    ")\n",
+    "\n",
+    "# Data structures and functions for rendering\n",
+    "from pytorch3d.structures import Meshes\n",
+    "from pytorch3d.renderer import (\n",
+    "    look_at_view_transform,\n",
+    "    FoVPerspectiveCameras, \n",
+    "    PointLights, \n",
+    "    DirectionalLights, \n",
+    "    Materials, \n",
+    "    RasterizationSettings, \n",
+    "    MeshRenderer, \n",
+    "    MeshRasterizer,  \n",
+    "    SoftPhongShader,\n",
+    "    SoftSilhouetteShader,\n",
+    "    SoftPhongShader,\n",
+    "    TexturesVertex\n",
+    ")\n",
+    "\n",
+    "# add path for demo utils functions \n",
+    "import sys\n",
+    "import os\n",
+    "sys.path.append(os.path.abspath(''))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "Lxmehq6Zhrzv"
+   },
+   "source": [
+    "If using **Google Colab**, fetch the utils file for plotting image grids:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "HZozr3Pmho-5"
+   },
+   "outputs": [],
+   "source": [
+    "!wget https://raw.githubusercontent.com/facebookresearch/pytorch3d/main/docs/tutorials/utils/plot_image_grid.py\n",
+    "from plot_image_grid import image_grid"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "g4B62MzYiJUM"
+   },
+   "source": [
+    "OR if running **locally** uncomment and run the following cell:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "paJ4Im8ahl7O"
+   },
+   "outputs": [],
+   "source": [
+    "#  from utils.plot_image_grid import image_grid"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "collapsed": true,
+    "id": "5jGq772XfBLk"
+   },
+   "source": [
+    "### 1. Load a mesh and texture file\n",
+    "\n",
+    "Load an `.obj` file and its associated `.mtl` file and create a **Textures** and **Meshes** object. \n",
+    "\n",
+    "**Meshes** is a unique datastructure provided in PyTorch3D for working with batches of meshes of different sizes. \n",
+    "\n",
+    "**TexturesVertex** is an auxiliary datastructure for storing vertex rgb texture information about meshes. \n",
+    "\n",
+    "**Meshes** has several class methods which are used throughout the rendering pipeline."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "a8eU4zo5jd_H"
+   },
+   "source": [
+    "If running this notebook using **Google Colab**, run the following cell to fetch the mesh obj and texture files and save it at the path `data/cow_mesh`:\n",
+    "If running locally, the data is already available at the correct path. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "tTm0cVuOjb1W"
+   },
+   "outputs": [],
+   "source": [
+    "!mkdir -p data/cow_mesh\n",
+    "!wget -P data/cow_mesh https://dl.fbaipublicfiles.com/pytorch3d/data/cow_mesh/cow.obj\n",
+    "!wget -P data/cow_mesh https://dl.fbaipublicfiles.com/pytorch3d/data/cow_mesh/cow.mtl\n",
+    "!wget -P data/cow_mesh https://dl.fbaipublicfiles.com/pytorch3d/data/cow_mesh/cow_texture.png"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "gi5Kd0GafBLl"
+   },
+   "outputs": [],
+   "source": [
+    "# Setup\n",
+    "if torch.cuda.is_available():\n",
+    "    device = torch.device(\"cuda:0\")\n",
+    "    torch.cuda.set_device(device)\n",
+    "else:\n",
+    "    device = torch.device(\"cpu\")\n",
+    "\n",
+    "# Set paths\n",
+    "DATA_DIR = \"./data\"\n",
+    "obj_filename = os.path.join(DATA_DIR, \"cow_mesh/cow.obj\")\n",
+    "\n",
+    "# Load obj file\n",
+    "mesh = load_objs_as_meshes([obj_filename], device=device)\n",
+    "\n",
+    "# We scale normalize and center the target mesh to fit in a sphere of radius 1 \n",
+    "# centered at (0,0,0). (scale, center) will be used to bring the predicted mesh \n",
+    "# to its original center and scale.  Note that normalizing the target mesh, \n",
+    "# speeds up the optimization but is not necessary!\n",
+    "verts = mesh.verts_packed()\n",
+    "N = verts.shape[0]\n",
+    "center = verts.mean(0)\n",
+    "scale = max((verts - center).abs().max(0)[0])\n",
+    "mesh.offset_verts_(-center)\n",
+    "mesh.scale_verts_((1.0 / float(scale)));"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "17c4xmtyfBMH"
+   },
+   "source": [
+    "## 2. Dataset Creation\n",
+    "\n",
+    "We sample different camera positions that encode multiple viewpoints of the cow.  We create a renderer with a shader that performs texture map interpolation.  We render a synthetic dataset of images of the textured cow mesh from multiple viewpoints.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "CDQKebNNfBMI"
+   },
+   "outputs": [],
+   "source": [
+    "# the number of different viewpoints from which we want to render the mesh.\n",
+    "num_views = 20\n",
+    "\n",
+    "# Get a batch of viewing angles. \n",
+    "elev = torch.linspace(0, 360, num_views)\n",
+    "azim = torch.linspace(-180, 180, num_views)\n",
+    "\n",
+    "# Place a point light in front of the object. As mentioned above, the front of \n",
+    "# the cow is facing the -z direction. \n",
+    "lights = PointLights(device=device, location=[[0.0, 0.0, -3.0]])\n",
+    "\n",
+    "# Initialize an OpenGL perspective camera that represents a batch of different \n",
+    "# viewing angles. All the cameras helper methods support mixed type inputs and \n",
+    "# broadcasting. So we can view the camera from the a distance of dist=2.7, and \n",
+    "# then specify elevation and azimuth angles for each viewpoint as tensors. \n",
+    "R, T = look_at_view_transform(dist=2.7, elev=elev, azim=azim)\n",
+    "cameras = FoVPerspectiveCameras(device=device, R=R, T=T)\n",
+    "\n",
+    "# We arbitrarily choose one particular view that will be used to visualize \n",
+    "# results\n",
+    "camera = FoVPerspectiveCameras(device=device, R=R[None, 1, ...], \n",
+    "                                  T=T[None, 1, ...]) \n",
+    "\n",
+    "# Define the settings for rasterization and shading. Here we set the output \n",
+    "# image to be of size 128X128. As we are rendering images for visualization \n",
+    "# purposes only we will set faces_per_pixel=1 and blur_radius=0.0. Refer to \n",
+    "# rasterize_meshes.py for explanations of these parameters.  We also leave \n",
+    "# bin_size and max_faces_per_bin to their default values of None, which sets \n",
+    "# their values using heuristics and ensures that the faster coarse-to-fine \n",
+    "# rasterization method is used.  Refer to docs/notes/renderer.md for an \n",
+    "# explanation of the difference between naive and coarse-to-fine rasterization. \n",
+    "raster_settings = RasterizationSettings(\n",
+    "    image_size=128, \n",
+    "    blur_radius=0.0, \n",
+    "    faces_per_pixel=1, \n",
+    ")\n",
+    "\n",
+    "# Create a Phong renderer by composing a rasterizer and a shader. The textured \n",
+    "# Phong shader will interpolate the texture uv coordinates for each vertex, \n",
+    "# sample from a texture image and apply the Phong lighting model\n",
+    "renderer = MeshRenderer(\n",
+    "    rasterizer=MeshRasterizer(\n",
+    "        cameras=camera, \n",
+    "        raster_settings=raster_settings\n",
+    "    ),\n",
+    "    shader=SoftPhongShader(\n",
+    "        device=device, \n",
+    "        cameras=camera,\n",
+    "        lights=lights\n",
+    "    )\n",
+    ")\n",
+    "\n",
+    "# Create a batch of meshes by repeating the cow mesh and associated textures. \n",
+    "# Meshes has a useful `extend` method which allows us do this very easily. \n",
+    "# This also extends the textures. \n",
+    "meshes = mesh.extend(num_views)\n",
+    "\n",
+    "# Render the cow mesh from each viewing angle\n",
+    "target_images = renderer(meshes, cameras=cameras, lights=lights)\n",
+    "\n",
+    "# Our multi-view cow dataset will be represented by these 2 lists of tensors,\n",
+    "# each of length num_views.\n",
+    "target_rgb = [target_images[i, ..., :3] for i in range(num_views)]\n",
+    "target_cameras = [FoVPerspectiveCameras(device=device, R=R[None, i, ...], \n",
+    "                                           T=T[None, i, ...]) for i in range(num_views)]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "TppB4PVmR1Rc"
+   },
+   "source": [
+    "Visualize the dataset:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "HHE0CnbVR1Rd"
+   },
+   "outputs": [],
+   "source": [
+    "# RGB images\n",
+    "image_grid(target_images.cpu().numpy(), rows=4, cols=5, rgb=True)\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "gOb4rYx65E8z"
+   },
+   "source": [
+    "Later in this tutorial, we will fit a mesh to the rendered RGB images, as well as to just images of just the cow silhouette.  For the latter case, we will render a dataset of silhouette images.  Most shaders in PyTorch3D will output an alpha channel along with the RGB image as a 4th channel in an RGBA image.  The alpha channel encodes the probability that each pixel belongs to the foreground of the object.  We construct a soft silhouette shader to render this alpha channel."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "iP_g-nwX4exM"
+   },
+   "outputs": [],
+   "source": [
+    "# Rasterization settings for silhouette rendering  \n",
+    "sigma = 1e-4\n",
+    "raster_settings_silhouette = RasterizationSettings(\n",
+    "    image_size=128, \n",
+    "    blur_radius=np.log(1. / 1e-4 - 1.)*sigma, \n",
+    "    faces_per_pixel=50, \n",
+    ")\n",
+    "\n",
+    "# Silhouette renderer \n",
+    "renderer_silhouette = MeshRenderer(\n",
+    "    rasterizer=MeshRasterizer(\n",
+    "        cameras=camera, \n",
+    "        raster_settings=raster_settings_silhouette\n",
+    "    ),\n",
+    "    shader=SoftSilhouetteShader()\n",
+    ")\n",
+    "\n",
+    "# Render silhouette images.  The 3rd channel of the rendering output is \n",
+    "# the alpha/silhouette channel\n",
+    "silhouette_images = renderer_silhouette(meshes, cameras=cameras, lights=lights)\n",
+    "target_silhouette = [silhouette_images[i, ..., 3] for i in range(num_views)]\n",
+    "\n",
+    "# Visualize silhouette images\n",
+    "image_grid(silhouette_images.cpu().numpy(), rows=4, cols=5, rgb=False)\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "t3qphI1ElUb5"
+   },
+   "source": [
+    "## 3. Mesh prediction via silhouette rendering\n",
+    "In the previous section, we created a dataset of images of multiple viewpoints of a cow.  In this section, we predict a mesh by observing those target images without any knowledge of the ground truth cow mesh.  We assume we know the position of the cameras and lighting.\n",
+    "\n",
+    "We first define some helper functions to visualize the results of our mesh prediction:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "eeWYHROrR1Rh"
+   },
+   "outputs": [],
+   "source": [
+    "# Show a visualization comparing the rendered predicted mesh to the ground truth \n",
+    "# mesh\n",
+    "def visualize_prediction(predicted_mesh, renderer=renderer_silhouette, \n",
+    "                         target_image=target_rgb[1], title='', \n",
+    "                         silhouette=False):\n",
+    "    inds = 3 if silhouette else range(3)\n",
+    "    with torch.no_grad():\n",
+    "        predicted_images = renderer(predicted_mesh)\n",
+    "    plt.figure(figsize=(20, 10))\n",
+    "    plt.subplot(1, 2, 1)\n",
+    "    plt.imshow(predicted_images[0, ..., inds].cpu().detach().numpy())\n",
+    "\n",
+    "    plt.subplot(1, 2, 2)\n",
+    "    plt.imshow(target_image.cpu().detach().numpy())\n",
+    "    plt.title(title)\n",
+    "    plt.axis(\"off\")\n",
+    "\n",
+    "# Plot losses as a function of optimization iteration\n",
+    "def plot_losses(losses):\n",
+    "    fig = plt.figure(figsize=(13, 5))\n",
+    "    ax = fig.gca()\n",
+    "    for k, l in losses.items():\n",
+    "        ax.plot(l['values'], label=k + \" loss\")\n",
+    "    ax.legend(fontsize=\"16\")\n",
+    "    ax.set_xlabel(\"Iteration\", fontsize=\"16\")\n",
+    "    ax.set_ylabel(\"Loss\", fontsize=\"16\")\n",
+    "    ax.set_title(\"Loss vs iterations\", fontsize=\"16\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "PpsvBpuMR1Ri"
+   },
+   "source": [
+    "Starting from a sphere mesh, we will learn offsets of each vertex such that the predicted mesh silhouette is more similar to the target silhouette image at each optimization step.  We begin by loading our initial sphere mesh:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "i989ARH1R1Rj"
+   },
+   "outputs": [],
+   "source": [
+    "# We initialize the source shape to be a sphere of radius 1.  \n",
+    "src_mesh = ico_sphere(4, device)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "f5xVtgLNDvC5"
+   },
+   "source": [
+    "We create a new differentiable renderer for rendering the silhouette of our predicted mesh:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "sXfjzgG4DsDJ"
+   },
+   "outputs": [],
+   "source": [
+    "# Rasterization settings for differentiable rendering, where the blur_radius\n",
+    "# initialization is based on Liu et al, 'Soft Rasterizer: A Differentiable \n",
+    "# Renderer for Image-based 3D Reasoning', ICCV 2019\n",
+    "sigma = 1e-4\n",
+    "raster_settings_soft = RasterizationSettings(\n",
+    "    image_size=128, \n",
+    "    blur_radius=np.log(1. / 1e-4 - 1.)*sigma, \n",
+    "    faces_per_pixel=50, \n",
+    ")\n",
+    "\n",
+    "# Silhouette renderer \n",
+    "renderer_silhouette = MeshRenderer(\n",
+    "    rasterizer=MeshRasterizer(\n",
+    "        cameras=camera, \n",
+    "        raster_settings=raster_settings_soft\n",
+    "    ),\n",
+    "    shader=SoftSilhouetteShader()\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "SGJKbCB6R1Rk"
+   },
+   "source": [
+    "We initialize settings, losses, and the optimizer that will be used to iteratively fit our mesh to the target silhouettes:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "0sLrKv_MEULh"
+   },
+   "outputs": [],
+   "source": [
+    "# Number of views to optimize over in each SGD iteration\n",
+    "num_views_per_iteration = 2\n",
+    "# Number of optimization steps\n",
+    "Niter = 2000\n",
+    "# Plot period for the losses\n",
+    "plot_period = 250\n",
+    "\n",
+    "%matplotlib inline\n",
+    "\n",
+    "# Optimize using rendered silhouette image loss, mesh edge loss, mesh normal \n",
+    "# consistency, and mesh laplacian smoothing\n",
+    "losses = {\"silhouette\": {\"weight\": 1.0, \"values\": []},\n",
+    "          \"edge\": {\"weight\": 1.0, \"values\": []},\n",
+    "          \"normal\": {\"weight\": 0.01, \"values\": []},\n",
+    "          \"laplacian\": {\"weight\": 1.0, \"values\": []},\n",
+    "         }\n",
+    "\n",
+    "# Losses to smooth / regularize the mesh shape\n",
+    "def update_mesh_shape_prior_losses(mesh, loss):\n",
+    "    # and (b) the edge length of the predicted mesh\n",
+    "    loss[\"edge\"] = mesh_edge_loss(mesh)\n",
+    "    \n",
+    "    # mesh normal consistency\n",
+    "    loss[\"normal\"] = mesh_normal_consistency(mesh)\n",
+    "    \n",
+    "    # mesh laplacian smoothing\n",
+    "    loss[\"laplacian\"] = mesh_laplacian_smoothing(mesh, method=\"uniform\")\n",
+    "\n",
+    "# We will learn to deform the source mesh by offsetting its vertices\n",
+    "# The shape of the deform parameters is equal to the total number of vertices in\n",
+    "# src_mesh\n",
+    "verts_shape = src_mesh.verts_packed().shape\n",
+    "deform_verts = torch.full(verts_shape, 0.0, device=device, requires_grad=True)\n",
+    "\n",
+    "# The optimizer\n",
+    "optimizer = torch.optim.SGD([deform_verts], lr=1.0, momentum=0.9)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "QLc9zK8lEqFS"
+   },
+   "source": [
+    "We write an optimization loop to iteratively refine our predicted mesh from the sphere mesh into a mesh that matches the silhouettes of the target images:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "gCfepfOoR1Rl"
+   },
+   "outputs": [],
+   "source": [
+    "loop = tqdm(range(Niter))\n",
+    "\n",
+    "for i in loop:\n",
+    "    # Initialize optimizer\n",
+    "    optimizer.zero_grad()\n",
+    "    \n",
+    "    # Deform the mesh\n",
+    "    new_src_mesh = src_mesh.offset_verts(deform_verts)\n",
+    "    \n",
+    "    # Losses to smooth /regularize the mesh shape\n",
+    "    loss = {k: torch.tensor(0.0, device=device) for k in losses}\n",
+    "    update_mesh_shape_prior_losses(new_src_mesh, loss)\n",
+    "    \n",
+    "    # Compute the average silhouette loss over two random views, as the average \n",
+    "    # squared L2 distance between the predicted silhouette and the target \n",
+    "    # silhouette from our dataset\n",
+    "    for j in np.random.permutation(num_views).tolist()[:num_views_per_iteration]:\n",
+    "        images_predicted = renderer_silhouette(new_src_mesh, cameras=target_cameras[j], lights=lights)\n",
+    "        predicted_silhouette = images_predicted[..., 3]\n",
+    "        loss_silhouette = ((predicted_silhouette - target_silhouette[j]) ** 2).mean()\n",
+    "        loss[\"silhouette\"] += loss_silhouette / num_views_per_iteration\n",
+    "    \n",
+    "    # Weighted sum of the losses\n",
+    "    sum_loss = torch.tensor(0.0, device=device)\n",
+    "    for k, l in loss.items():\n",
+    "        sum_loss += l * losses[k][\"weight\"]\n",
+    "        losses[k][\"values\"].append(float(l.detach().cpu()))\n",
+    "\n",
+    "    \n",
+    "    # Print the losses\n",
+    "    loop.set_description(\"total_loss = %.6f\" % sum_loss)\n",
+    "    \n",
+    "    # Plot mesh\n",
+    "    if i % plot_period == 0:\n",
+    "        visualize_prediction(new_src_mesh, title=\"iter: %d\" % i, silhouette=True,\n",
+    "                             target_image=target_silhouette[1])\n",
+    "        \n",
+    "    # Optimization step\n",
+    "    sum_loss.backward()\n",
+    "    optimizer.step()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "CX4huayKR1Rm",
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "visualize_prediction(new_src_mesh, silhouette=True, \n",
+    "                     target_image=target_silhouette[1])\n",
+    "plot_losses(losses)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "XJDsJQmrR1Ro"
+   },
+   "source": [
+    "## 3. Mesh and texture prediction via textured rendering\n",
+    "We can predict both the mesh and its texture if we add an additional loss based on the comparing a predicted rendered RGB image to the target image. As before, we start with a sphere mesh.  We learn both translational offsets and RGB texture colors for each vertex in the sphere mesh.  Since our loss is based on rendered RGB pixel values instead of just the silhouette, we use a **SoftPhongShader** instead of a **SoftSilhouetteShader**."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "aZObyIt9R1Ro"
+   },
+   "outputs": [],
+   "source": [
+    "# Rasterization settings for differentiable rendering, where the blur_radius\n",
+    "# initialization is based on Liu et al, 'Soft Rasterizer: A Differentiable \n",
+    "# Renderer for Image-based 3D Reasoning', ICCV 2019\n",
+    "sigma = 1e-4\n",
+    "raster_settings_soft = RasterizationSettings(\n",
+    "    image_size=128, \n",
+    "    blur_radius=np.log(1. / 1e-4 - 1.)*sigma, \n",
+    "    faces_per_pixel=50, \n",
+    "    perspective_correct=False, \n",
+    ")\n",
+    "\n",
+    "# Differentiable soft renderer using per vertex RGB colors for texture\n",
+    "renderer_textured = MeshRenderer(\n",
+    "    rasterizer=MeshRasterizer(\n",
+    "        cameras=camera, \n",
+    "        raster_settings=raster_settings_soft\n",
+    "    ),\n",
+    "    shader=SoftPhongShader(device=device, \n",
+    "        cameras=camera,\n",
+    "        lights=lights)\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "NM7gJux8GMQX"
+   },
+   "source": [
+    "We initialize settings, losses, and the optimizer that will be used to iteratively fit our mesh to the target RGB images:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "BS6LAQquF3wq"
+   },
+   "outputs": [],
+   "source": [
+    "# Number of views to optimize over in each SGD iteration\n",
+    "num_views_per_iteration = 2\n",
+    "# Number of optimization steps\n",
+    "Niter = 2000\n",
+    "# Plot period for the losses\n",
+    "plot_period = 250\n",
+    "\n",
+    "%matplotlib inline\n",
+    "\n",
+    "# Optimize using rendered RGB image loss, rendered silhouette image loss, mesh \n",
+    "# edge loss, mesh normal consistency, and mesh laplacian smoothing\n",
+    "losses = {\"rgb\": {\"weight\": 1.0, \"values\": []},\n",
+    "          \"silhouette\": {\"weight\": 1.0, \"values\": []},\n",
+    "          \"edge\": {\"weight\": 1.0, \"values\": []},\n",
+    "          \"normal\": {\"weight\": 0.01, \"values\": []},\n",
+    "          \"laplacian\": {\"weight\": 1.0, \"values\": []},\n",
+    "         }\n",
+    "\n",
+    "# We will learn to deform the source mesh by offsetting its vertices\n",
+    "# The shape of the deform parameters is equal to the total number of vertices in \n",
+    "# src_mesh\n",
+    "verts_shape = src_mesh.verts_packed().shape\n",
+    "deform_verts = torch.full(verts_shape, 0.0, device=device, requires_grad=True)\n",
+    "\n",
+    "# We will also learn per vertex colors for our sphere mesh that define texture \n",
+    "# of the mesh\n",
+    "sphere_verts_rgb = torch.full([1, verts_shape[0], 3], 0.5, device=device, requires_grad=True)\n",
+    "\n",
+    "# The optimizer\n",
+    "optimizer = torch.optim.SGD([deform_verts, sphere_verts_rgb], lr=1.0, momentum=0.9)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "tzIAycuUR1Rq"
+   },
+   "source": [
+    "We write an optimization loop to iteratively refine our predicted mesh and its vertex colors from the sphere mesh into a mesh that matches the target images:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "EKEH2p8-R1Rr"
+   },
+   "outputs": [],
+   "source": [
+    "loop = tqdm(range(Niter))\n",
+    "\n",
+    "for i in loop:\n",
+    "    # Initialize optimizer\n",
+    "    optimizer.zero_grad()\n",
+    "    \n",
+    "    # Deform the mesh\n",
+    "    new_src_mesh = src_mesh.offset_verts(deform_verts)\n",
+    "    \n",
+    "    # Add per vertex colors to texture the mesh\n",
+    "    new_src_mesh.textures = TexturesVertex(verts_features=sphere_verts_rgb) \n",
+    "    \n",
+    "    # Losses to smooth /regularize the mesh shape\n",
+    "    loss = {k: torch.tensor(0.0, device=device) for k in losses}\n",
+    "    update_mesh_shape_prior_losses(new_src_mesh, loss)\n",
+    "    \n",
+    "    # Randomly select two views to optimize over in this iteration.  Compared\n",
+    "    # to using just one view, this helps resolve ambiguities between updating\n",
+    "    # mesh shape vs. updating mesh texture\n",
+    "    for j in np.random.permutation(num_views).tolist()[:num_views_per_iteration]:\n",
+    "        images_predicted = renderer_textured(new_src_mesh, cameras=target_cameras[j], lights=lights)\n",
+    "\n",
+    "        # Squared L2 distance between the predicted silhouette and the target \n",
+    "        # silhouette from our dataset\n",
+    "        predicted_silhouette = images_predicted[..., 3]\n",
+    "        loss_silhouette = ((predicted_silhouette - target_silhouette[j]) ** 2).mean()\n",
+    "        loss[\"silhouette\"] += loss_silhouette / num_views_per_iteration\n",
+    "        \n",
+    "        # Squared L2 distance between the predicted RGB image and the target \n",
+    "        # image from our dataset\n",
+    "        predicted_rgb = images_predicted[..., :3]\n",
+    "        loss_rgb = ((predicted_rgb - target_rgb[j]) ** 2).mean()\n",
+    "        loss[\"rgb\"] += loss_rgb / num_views_per_iteration\n",
+    "    \n",
+    "    # Weighted sum of the losses\n",
+    "    sum_loss = torch.tensor(0.0, device=device)\n",
+    "    for k, l in loss.items():\n",
+    "        sum_loss += l * losses[k][\"weight\"]\n",
+    "        losses[k][\"values\"].append(float(l.detach().cpu()))\n",
+    "    \n",
+    "    # Print the losses\n",
+    "    loop.set_description(\"total_loss = %.6f\" % sum_loss)\n",
+    "    \n",
+    "    # Plot mesh\n",
+    "    if i % plot_period == 0:\n",
+    "        visualize_prediction(new_src_mesh, renderer=renderer_textured, title=\"iter: %d\" % i, silhouette=False)\n",
+    "        \n",
+    "    # Optimization step\n",
+    "    sum_loss.backward()\n",
+    "    optimizer.step()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "2qTcHO4rR1Rs",
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "visualize_prediction(new_src_mesh, renderer=renderer_textured, silhouette=False)\n",
+    "plot_losses(losses)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "akBOm_xcNUms"
+   },
+   "source": [
+    "Save the final predicted mesh:"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "dXoIsGyhxRyK"
+   },
+   "source": [
+    "## 4. Save the final predicted mesh"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "OQGhV-psKna8"
+   },
+   "outputs": [],
+   "source": [
+    "# Fetch the verts and faces of the final predicted mesh\n",
+    "final_verts, final_faces = new_src_mesh.get_mesh_verts_faces(0)\n",
+    "\n",
+    "# Scale normalize back to the original target size\n",
+    "final_verts = final_verts * scale + center\n",
+    "\n",
+    "# Store the predicted mesh using save_obj\n",
+    "final_obj = os.path.join('./', 'final_model.obj')\n",
+    "save_obj(final_obj, final_verts, final_faces)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "MtKYp0B6R1Ru"
+   },
+   "source": [
+    "## 5. Conclusion\n",
+    "In this tutorial, we learned how to load a textured mesh from an obj file, create a synthetic dataset by rendering the mesh from multiple viewpoints.  We showed how to set up an optimization loop to fit a mesh to the observed dataset images based on a rendered silhouette loss.  We then augmented this optimization loop with an additional loss based on rendered RGB images, which allowed us to predict both a mesh and its texture."
+   ]
+  }
+ ],
+ "metadata": {
+  "accelerator": "GPU",
+  "anp_metadata": {
+   "path": "fbsource/fbcode/vision/fair/pytorch3d/docs/tutorials/fit_textured_mesh.ipynb"
+  },
+  "bento_stylesheets": {
+   "bento/extensions/flow/main.css": true,
+   "bento/extensions/kernel_selector/main.css": true,
+   "bento/extensions/kernel_ui/main.css": true,
+   "bento/extensions/new_kernel/main.css": true,
+   "bento/extensions/system_usage/main.css": true,
+   "bento/extensions/theme/main.css": true
+  },
+  "colab": {
+   "name": "fit_textured_mesh.ipynb",
+   "provenance": [],
+   "toc_visible": true
+  },
+  "disseminate_notebook_info": {
+   "backup_notebook_id": "781874812352022"
+  },
+  "kernelspec": {
+   "display_name": "intro_to_cv",
+   "language": "python",
+   "name": "bento_kernel_intro_to_cv"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.5+"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 1
+}
diff --git a/pytorch3d/docs/tutorials/fit_textured_volume.ipynb b/pytorch3d/docs/tutorials/fit_textured_volume.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..e74bf9e90ba6bc2e7dd36064b23b558e2235eb40
--- /dev/null
+++ b/pytorch3d/docs/tutorials/fit_textured_volume.ipynb
@@ -0,0 +1,495 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Copyright (c) Meta Platforms, Inc. and affiliates. All rights reserved."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Fit a volume via raymarching\n",
+    "\n",
+    "This tutorial shows how to fit a volume given a set of views of a scene using differentiable volumetric rendering.\n",
+    "\n",
+    "More specifically, this tutorial will explain how to:\n",
+    "1. Create a differentiable volumetric renderer.\n",
+    "2. Create a Volumetric model (including how to use the `Volumes` class).\n",
+    "3. Fit the volume based on the images using the differentiable volumetric renderer. \n",
+    "4. Visualize the predicted volume."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 0. Install and Import modules\n",
+    "Ensure `torch` and `torchvision` are installed. If `pytorch3d` is not installed, install it using the following cell:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import sys\n",
+    "import torch\n",
+    "need_pytorch3d=False\n",
+    "try:\n",
+    "    import pytorch3d\n",
+    "except ModuleNotFoundError:\n",
+    "    need_pytorch3d=True\n",
+    "if need_pytorch3d:\n",
+    "    if torch.__version__.startswith(\"2.1.\") and sys.platform.startswith(\"linux\"):\n",
+    "        # We try to install PyTorch3D via a released wheel.\n",
+    "        pyt_version_str=torch.__version__.split(\"+\")[0].replace(\".\", \"\")\n",
+    "        version_str=\"\".join([\n",
+    "            f\"py3{sys.version_info.minor}_cu\",\n",
+    "            torch.version.cuda.replace(\".\",\"\"),\n",
+    "            f\"_pyt{pyt_version_str}\"\n",
+    "        ])\n",
+    "        !pip install fvcore iopath\n",
+    "        !pip install --no-index --no-cache-dir pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/{version_str}/download.html\n",
+    "    else:\n",
+    "        # We try to install PyTorch3D from source.\n",
+    "        !pip install 'git+https://github.com/facebookresearch/pytorch3d.git@stable'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import sys\n",
+    "import time\n",
+    "import json\n",
+    "import glob\n",
+    "import torch\n",
+    "import math\n",
+    "from tqdm.notebook import tqdm\n",
+    "import matplotlib.pyplot as plt\n",
+    "import numpy as np\n",
+    "from PIL import Image\n",
+    "from IPython import display\n",
+    "\n",
+    "# Data structures and functions for rendering\n",
+    "from pytorch3d.structures import Volumes\n",
+    "from pytorch3d.renderer import (\n",
+    "    FoVPerspectiveCameras, \n",
+    "    VolumeRenderer,\n",
+    "    NDCMultinomialRaysampler,\n",
+    "    EmissionAbsorptionRaymarcher\n",
+    ")\n",
+    "from pytorch3d.transforms import so3_exp_map\n",
+    "\n",
+    "# obtain the utilized device\n",
+    "if torch.cuda.is_available():\n",
+    "    device = torch.device(\"cuda:0\")\n",
+    "    torch.cuda.set_device(device)\n",
+    "else:\n",
+    "    device = torch.device(\"cpu\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!wget https://raw.githubusercontent.com/facebookresearch/pytorch3d/main/docs/tutorials/utils/plot_image_grid.py\n",
+    "!wget https://raw.githubusercontent.com/facebookresearch/pytorch3d/main/docs/tutorials/utils/generate_cow_renders.py\n",
+    "from plot_image_grid import image_grid\n",
+    "from generate_cow_renders import generate_cow_renders"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "OR if running locally uncomment and run the following cell:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# from utils.generate_cow_renders import generate_cow_renders\n",
+    "# from utils import image_grid"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 1. Generate images of the scene and masks\n",
+    "\n",
+    "The following cell generates our training data.\n",
+    "It renders the cow mesh from the `fit_textured_mesh.ipynb` tutorial from several viewpoints and returns:\n",
+    "1. A batch of image and silhouette tensors that are produced by the cow mesh renderer.\n",
+    "2. A set of cameras corresponding to each render.\n",
+    "\n",
+    "Note: For the purpose of this tutorial, which aims at explaining the details of volumetric rendering, we do not explain how the mesh rendering, implemented in the `generate_cow_renders` function, works. Please refer to `fit_textured_mesh.ipynb` for a detailed explanation of mesh rendering."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "target_cameras, target_images, target_silhouettes = generate_cow_renders(num_views=40)\n",
+    "print(f'Generated {len(target_images)} images/silhouettes/cameras.')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 2. Initialize the volumetric renderer\n",
+    "\n",
+    "The following initializes a volumetric renderer that emits a ray from each pixel of a target image and samples a set of uniformly-spaced points along the ray. At each ray-point, the corresponding density and color value is obtained by querying the corresponding location in the volumetric model of the scene (the model is described & instantiated in a later cell).\n",
+    "\n",
+    "The renderer is composed of a *raymarcher* and a *raysampler*.\n",
+    "- The *raysampler* is responsible for emitting rays from image pixels and sampling the points along them. Here, we use the `NDCMultinomialRaysampler` which follows the standard PyTorch3D coordinate grid convention (+X from right to left; +Y from bottom to top; +Z away from the user).\n",
+    "- The *raymarcher* takes the densities and colors sampled along each ray and renders each ray into a color and an opacity value of the ray's source pixel. Here we use the `EmissionAbsorptionRaymarcher` which implements the standard Emission-Absorption raymarching algorithm."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# render_size describes the size of both sides of the \n",
+    "# rendered images in pixels. We set this to the same size\n",
+    "# as the target images. I.e. we render at the same\n",
+    "# size as the ground truth images.\n",
+    "render_size = target_images.shape[1]\n",
+    "\n",
+    "# Our rendered scene is centered around (0,0,0) \n",
+    "# and is enclosed inside a bounding box\n",
+    "# whose side is roughly equal to 3.0 (world units).\n",
+    "volume_extent_world = 3.0\n",
+    "\n",
+    "# 1) Instantiate the raysampler.\n",
+    "# Here, NDCMultinomialRaysampler generates a rectangular image\n",
+    "# grid of rays whose coordinates follow the PyTorch3D\n",
+    "# coordinate conventions.\n",
+    "# Since we use a volume of size 128^3, we sample n_pts_per_ray=150,\n",
+    "# which roughly corresponds to a one ray-point per voxel.\n",
+    "# We further set the min_depth=0.1 since there is no surface within\n",
+    "# 0.1 units of any camera plane.\n",
+    "raysampler = NDCMultinomialRaysampler(\n",
+    "    image_width=render_size,\n",
+    "    image_height=render_size,\n",
+    "    n_pts_per_ray=150,\n",
+    "    min_depth=0.1,\n",
+    "    max_depth=volume_extent_world,\n",
+    ")\n",
+    "\n",
+    "\n",
+    "# 2) Instantiate the raymarcher.\n",
+    "# Here, we use the standard EmissionAbsorptionRaymarcher \n",
+    "# which marches along each ray in order to render\n",
+    "# each ray into a single 3D color vector \n",
+    "# and an opacity scalar.\n",
+    "raymarcher = EmissionAbsorptionRaymarcher()\n",
+    "\n",
+    "# Finally, instantiate the volumetric render\n",
+    "# with the raysampler and raymarcher objects.\n",
+    "renderer = VolumeRenderer(\n",
+    "    raysampler=raysampler, raymarcher=raymarcher,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 3. Initialize the volumetric model\n",
+    "\n",
+    "Next we instantiate a volumetric model of the scene. This quantizes the 3D space to cubical voxels, where each voxel is described with a 3D vector representing the voxel's RGB color and a density scalar which describes the opacity of the voxel (ranging between [0-1], the higher the more opaque).\n",
+    "\n",
+    "In order to ensure the range of densities and colors is between [0-1], we represent both volume colors and densities in the logarithmic space. During the forward function of the model, the log-space values are passed through the sigmoid function to bring the log-space values to the correct range.\n",
+    "\n",
+    "Additionally, `VolumeModel` contains the renderer object. This object stays unaltered throughout the optimization.\n",
+    "\n",
+    "In this cell we also define the `huber` loss function which computes the discrepancy between the rendered colors and masks."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class VolumeModel(torch.nn.Module):\n",
+    "    def __init__(self, renderer, volume_size=[64] * 3, voxel_size=0.1):\n",
+    "        super().__init__()\n",
+    "        # After evaluating torch.sigmoid(self.log_colors), we get \n",
+    "        # densities close to zero.\n",
+    "        self.log_densities = torch.nn.Parameter(-4.0 * torch.ones(1, *volume_size))\n",
+    "        # After evaluating torch.sigmoid(self.log_colors), we get \n",
+    "        # a neutral gray color everywhere.\n",
+    "        self.log_colors = torch.nn.Parameter(torch.zeros(3, *volume_size))\n",
+    "        self._voxel_size = voxel_size\n",
+    "        # Store the renderer module as well.\n",
+    "        self._renderer = renderer\n",
+    "        \n",
+    "    def forward(self, cameras):\n",
+    "        batch_size = cameras.R.shape[0]\n",
+    "\n",
+    "        # Convert the log-space values to the densities/colors\n",
+    "        densities = torch.sigmoid(self.log_densities)\n",
+    "        colors = torch.sigmoid(self.log_colors)\n",
+    "        \n",
+    "        # Instantiate the Volumes object, making sure\n",
+    "        # the densities and colors are correctly\n",
+    "        # expanded batch_size-times.\n",
+    "        volumes = Volumes(\n",
+    "            densities = densities[None].expand(\n",
+    "                batch_size, *self.log_densities.shape),\n",
+    "            features = colors[None].expand(\n",
+    "                batch_size, *self.log_colors.shape),\n",
+    "            voxel_size=self._voxel_size,\n",
+    "        )\n",
+    "        \n",
+    "        # Given cameras and volumes, run the renderer\n",
+    "        # and return only the first output value \n",
+    "        # (the 2nd output is a representation of the sampled\n",
+    "        # rays which can be omitted for our purpose).\n",
+    "        return self._renderer(cameras=cameras, volumes=volumes)[0]\n",
+    "    \n",
+    "# A helper function for evaluating the smooth L1 (huber) loss\n",
+    "# between the rendered silhouettes and colors.\n",
+    "def huber(x, y, scaling=0.1):\n",
+    "    diff_sq = (x - y) ** 2\n",
+    "    loss = ((1 + diff_sq / (scaling**2)).clamp(1e-4).sqrt() - 1) * float(scaling)\n",
+    "    return loss"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 4. Fit the volume\n",
+    "\n",
+    "Here we carry out the volume fitting with differentiable rendering.\n",
+    "\n",
+    "In order to fit the volume, we render it from the viewpoints of the `target_cameras`\n",
+    "and compare the resulting renders with the observed `target_images` and `target_silhouettes`.\n",
+    "\n",
+    "The comparison is done by evaluating the mean huber (smooth-l1) error between corresponding\n",
+    "pairs of `target_images`/`rendered_images` and `target_silhouettes`/`rendered_silhouettes`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# First move all relevant variables to the correct device.\n",
+    "target_cameras = target_cameras.to(device)\n",
+    "target_images = target_images.to(device)\n",
+    "target_silhouettes = target_silhouettes.to(device)\n",
+    "\n",
+    "# Instantiate the volumetric model.\n",
+    "# We use a cubical volume with the size of \n",
+    "# one side = 128. The size of each voxel of the volume \n",
+    "# is set to volume_extent_world / volume_size s.t. the\n",
+    "# volume represents the space enclosed in a 3D bounding box\n",
+    "# centered at (0, 0, 0) with the size of each side equal to 3.\n",
+    "volume_size = 128\n",
+    "volume_model = VolumeModel(\n",
+    "    renderer,\n",
+    "    volume_size=[volume_size] * 3, \n",
+    "    voxel_size = volume_extent_world / volume_size,\n",
+    ").to(device)\n",
+    "\n",
+    "# Instantiate the Adam optimizer. We set its master learning rate to 0.1.\n",
+    "lr = 0.1\n",
+    "optimizer = torch.optim.Adam(volume_model.parameters(), lr=lr)\n",
+    "\n",
+    "# We do 300 Adam iterations and sample 10 random images in each minibatch.\n",
+    "batch_size = 10\n",
+    "n_iter = 300\n",
+    "for iteration in range(n_iter):\n",
+    "\n",
+    "    # In case we reached the last 75% of iterations,\n",
+    "    # decrease the learning rate of the optimizer 10-fold.\n",
+    "    if iteration == round(n_iter * 0.75):\n",
+    "        print('Decreasing LR 10-fold ...')\n",
+    "        optimizer = torch.optim.Adam(\n",
+    "            volume_model.parameters(), lr=lr * 0.1\n",
+    "        )\n",
+    "    \n",
+    "    # Zero the optimizer gradient.\n",
+    "    optimizer.zero_grad()\n",
+    "    \n",
+    "    # Sample random batch indices.\n",
+    "    batch_idx = torch.randperm(len(target_cameras))[:batch_size]\n",
+    "    \n",
+    "    # Sample the minibatch of cameras.\n",
+    "    batch_cameras = FoVPerspectiveCameras(\n",
+    "        R = target_cameras.R[batch_idx], \n",
+    "        T = target_cameras.T[batch_idx], \n",
+    "        znear = target_cameras.znear[batch_idx],\n",
+    "        zfar = target_cameras.zfar[batch_idx],\n",
+    "        aspect_ratio = target_cameras.aspect_ratio[batch_idx],\n",
+    "        fov = target_cameras.fov[batch_idx],\n",
+    "        device = device,\n",
+    "    )\n",
+    "    \n",
+    "    # Evaluate the volumetric model.\n",
+    "    rendered_images, rendered_silhouettes = volume_model(\n",
+    "        batch_cameras\n",
+    "    ).split([3, 1], dim=-1)\n",
+    "    \n",
+    "    # Compute the silhouette error as the mean huber\n",
+    "    # loss between the predicted masks and the\n",
+    "    # target silhouettes.\n",
+    "    sil_err = huber(\n",
+    "        rendered_silhouettes[..., 0], target_silhouettes[batch_idx],\n",
+    "    ).abs().mean()\n",
+    "\n",
+    "    # Compute the color error as the mean huber\n",
+    "    # loss between the rendered colors and the\n",
+    "    # target ground truth images.\n",
+    "    color_err = huber(\n",
+    "        rendered_images, target_images[batch_idx],\n",
+    "    ).abs().mean()\n",
+    "    \n",
+    "    # The optimization loss is a simple\n",
+    "    # sum of the color and silhouette errors.\n",
+    "    loss = color_err + sil_err \n",
+    "    \n",
+    "    # Print the current values of the losses.\n",
+    "    if iteration % 10 == 0:\n",
+    "        print(\n",
+    "            f'Iteration {iteration:05d}:'\n",
+    "            + f' color_err = {float(color_err):1.2e}'\n",
+    "            + f' mask_err = {float(sil_err):1.2e}'\n",
+    "        )\n",
+    "    \n",
+    "    # Take the optimization step.\n",
+    "    loss.backward()\n",
+    "    optimizer.step()\n",
+    "    \n",
+    "    # Visualize the renders every 40 iterations.\n",
+    "    if iteration % 40 == 0:\n",
+    "        # Visualize only a single randomly selected element of the batch.\n",
+    "        im_show_idx = int(torch.randint(low=0, high=batch_size, size=(1,)))\n",
+    "        fig, ax = plt.subplots(2, 2, figsize=(10, 10))\n",
+    "        ax = ax.ravel()\n",
+    "        clamp_and_detach = lambda x: x.clamp(0.0, 1.0).cpu().detach().numpy()\n",
+    "        ax[0].imshow(clamp_and_detach(rendered_images[im_show_idx]))\n",
+    "        ax[1].imshow(clamp_and_detach(target_images[batch_idx[im_show_idx], ..., :3]))\n",
+    "        ax[2].imshow(clamp_and_detach(rendered_silhouettes[im_show_idx, ..., 0]))\n",
+    "        ax[3].imshow(clamp_and_detach(target_silhouettes[batch_idx[im_show_idx]]))\n",
+    "        for ax_, title_ in zip(\n",
+    "            ax, \n",
+    "            (\"rendered image\", \"target image\", \"rendered silhouette\", \"target silhouette\")\n",
+    "        ):\n",
+    "            ax_.grid(\"off\")\n",
+    "            ax_.axis(\"off\")\n",
+    "            ax_.set_title(title_)\n",
+    "        fig.canvas.draw(); fig.show()\n",
+    "        display.clear_output(wait=True)\n",
+    "        display.display(fig)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 5. Visualizing the optimized volume\n",
+    "\n",
+    "Finally, we visualize the optimized volume by rendering from multiple viewpoints that rotate around the volume's y-axis."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def generate_rotating_volume(volume_model, n_frames = 50):\n",
+    "    logRs = torch.zeros(n_frames, 3, device=device)\n",
+    "    logRs[:, 1] = torch.linspace(0.0, 2.0 * 3.14, n_frames, device=device)\n",
+    "    Rs = so3_exp_map(logRs)\n",
+    "    Ts = torch.zeros(n_frames, 3, device=device)\n",
+    "    Ts[:, 2] = 2.7\n",
+    "    frames = []\n",
+    "    print('Generating rotating volume ...')\n",
+    "    for R, T in zip(tqdm(Rs), Ts):\n",
+    "        camera = FoVPerspectiveCameras(\n",
+    "            R=R[None], \n",
+    "            T=T[None], \n",
+    "            znear = target_cameras.znear[0],\n",
+    "            zfar = target_cameras.zfar[0],\n",
+    "            aspect_ratio = target_cameras.aspect_ratio[0],\n",
+    "            fov = target_cameras.fov[0],\n",
+    "            device=device,\n",
+    "        )\n",
+    "        frames.append(volume_model(camera)[..., :3].clamp(0.0, 1.0))\n",
+    "    return torch.cat(frames)\n",
+    "    \n",
+    "with torch.no_grad():\n",
+    "    rotating_volume_frames = generate_rotating_volume(volume_model, n_frames=7*4)\n",
+    "\n",
+    "image_grid(rotating_volume_frames.clamp(0., 1.).cpu().numpy(), rows=4, cols=7, rgb=True, fill=True)\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 6. Conclusion\n",
+    "\n",
+    "In this tutorial, we have shown how to optimize a 3D volumetric representation of a scene such that the renders of the volume from known viewpoints match the observed images for each viewpoint. The rendering was carried out using the PyTorch3D's volumetric renderer composed of an `NDCMultinomialRaysampler` and an `EmissionAbsorptionRaymarcher`."
+   ]
+  }
+ ],
+ "metadata": {
+  "bento_stylesheets": {
+   "bento/extensions/flow/main.css": true,
+   "bento/extensions/kernel_selector/main.css": true,
+   "bento/extensions/kernel_ui/main.css": true,
+   "bento/extensions/new_kernel/main.css": true,
+   "bento/extensions/system_usage/main.css": true,
+   "bento/extensions/theme/main.css": true
+  },
+  "kernelspec": {
+   "display_name": "pytorch3d_etc (local)",
+   "language": "python",
+   "name": "pytorch3d_etc_local"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.5+"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/pytorch3d/docs/tutorials/implicitron_config_system.ipynb b/pytorch3d/docs/tutorials/implicitron_config_system.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..8d0102f8fdbd449a98713bc407d5d5a87897442b
--- /dev/null
+++ b/pytorch3d/docs/tutorials/implicitron_config_system.ipynb
@@ -0,0 +1,1241 @@
+{
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "customInput": null,
+        "customOutput": null,
+        "originalKey": "f0af2d90-cb21-4ab4-b4cb-0fd00dbfb77b",
+        "showInput": true
+      },
+      "outputs": [],
+      "source": [
+        "# Copyright (c) Meta Platforms, Inc. and affiliates. All rights reserved."
+      ]
+    },
+    {
+      "attachments": {},
+      "cell_type": "markdown",
+      "metadata": {
+        "customInput": null,
+        "originalKey": "4e15bfa2-5404-40d0-98b6-eb2732c8b72b",
+        "showInput": false
+      },
+      "source": [
+        "# Implicitron's config system"
+      ]
+    },
+    {
+      "attachments": {},
+      "cell_type": "markdown",
+      "metadata": {
+        "customInput": null,
+        "originalKey": "287be985-423d-42e0-a2af-1e8c585e723c",
+        "showInput": false
+      },
+      "source": [
+        "Implicitron's components are all based on a unified hierarchical configuration system. \n",
+        "This allows configurable variables and all defaults to be defined separately for each new component.\n",
+        "All configs relevant to an experiment are then automatically composed into a single configuration file that fully specifies the experiment.\n",
+        "An especially important feature is extension points where users can insert their own sub-classes of Implicitron's base components.\n",
+        "\n",
+        "The file which defines this system is [here](https://github.com/facebookresearch/pytorch3d/blob/main/pytorch3d/implicitron/tools/config.py) in the PyTorch3D repo.\n",
+        "The Implicitron volumes tutorial contains a simple example of using the config system.\n",
+        "This tutorial provides detailed hands-on experience in using and modifying Implicitron's configurable components.\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "customInput": null,
+        "originalKey": "fde300a2-99cb-4d52-9d5b-4464a2083e0b",
+        "showInput": false
+      },
+      "source": [
+        "## 0. Install and import modules\n",
+        "\n",
+        "Ensure `torch` and `torchvision` are installed. If `pytorch3d` is not installed, install it using the following cell:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "customInput": null,
+        "customOutput": null,
+        "originalKey": "ad6e94a7-e114-43d3-b038-a5210c7d34c9",
+        "showInput": true
+      },
+      "outputs": [],
+      "source": [
+        "import os\n",
+        "import sys\n",
+        "import torch\n",
+        "need_pytorch3d=False\n",
+        "try:\n",
+        "    import pytorch3d\n",
+        "except ModuleNotFoundError:\n",
+        "    need_pytorch3d=True\n",
+        "if need_pytorch3d:\n",
+        "    if torch.__version__.startswith(\"2.1.\") and sys.platform.startswith(\"linux\"):\n",
+        "        # We try to install PyTorch3D via a released wheel.\n",
+        "        pyt_version_str=torch.__version__.split(\"+\")[0].replace(\".\", \"\")\n",
+        "        version_str=\"\".join([\n",
+        "            f\"py3{sys.version_info.minor}_cu\",\n",
+        "            torch.version.cuda.replace(\".\",\"\"),\n",
+        "            f\"_pyt{pyt_version_str}\"\n",
+        "        ])\n",
+        "        !pip install fvcore iopath\n",
+        "        !pip install --no-index --no-cache-dir pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/{version_str}/download.html\n",
+        "    else:\n",
+        "        # We try to install PyTorch3D from source.\n",
+        "        !pip install 'git+https://github.com/facebookresearch/pytorch3d.git@stable'"
+      ]
+    },
+    {
+      "attachments": {},
+      "cell_type": "markdown",
+      "metadata": {
+        "customInput": null,
+        "originalKey": "609896c0-9e2e-4716-b074-b565f0170e32",
+        "showInput": false
+      },
+      "source": [
+        "Ensure omegaconf is installed. If not, run this cell. (It should not be necessary to restart the runtime.)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "customInput": null,
+        "customOutput": null,
+        "originalKey": "d1c1851e-b9f2-4236-93c3-19aa4d63041c",
+        "showInput": true
+      },
+      "outputs": [],
+      "source": [
+        "!pip install omegaconf"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "code_folding": [],
+        "collapsed": false,
+        "customOutput": null,
+        "executionStartTime": 1659465468717,
+        "executionStopTime": 1659465468738,
+        "hidden_ranges": [],
+        "originalKey": "5ac7ef23-b74c-46b2-b8d3-799524d7ba4f",
+        "requestMsgId": "5ac7ef23-b74c-46b2-b8d3-799524d7ba4f"
+      },
+      "outputs": [],
+      "source": [
+        "from dataclasses import dataclass\n",
+        "from typing import Optional, Tuple\n",
+        "\n",
+        "import torch\n",
+        "from omegaconf import DictConfig, OmegaConf\n",
+        "from pytorch3d.implicitron.tools.config import (\n",
+        "    Configurable,\n",
+        "    ReplaceableBase,\n",
+        "    expand_args_fields,\n",
+        "    get_default_args,\n",
+        "    registry,\n",
+        "    run_auto_creation,\n",
+        ")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "customInput": null,
+        "originalKey": "a638bf90-eb6b-424d-b53d-eae11954a717",
+        "showInput": false
+      },
+      "source": [
+        "## 1. Introducing dataclasses \n",
+        "\n",
+        "[Type hints](https://docs.python.org/3/library/typing.html) give a taxonomy of types in Python. [Dataclasses](https://docs.python.org/3/library/dataclasses.html) let you create a class based on a list of members which have names, types and possibly default values. The `__init__` function is created automatically, and calls a `__post_init__` function if present as a final step. For example"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false,
+        "customInput": null,
+        "customOutput": null,
+        "executionStartTime": 1659454972732,
+        "executionStopTime": 1659454972739,
+        "originalKey": "71eaad5e-e198-492e-8610-24b0da9dd4ae",
+        "requestMsgId": "71eaad5e-e198-492e-8610-24b0da9dd4ae",
+        "showInput": true
+      },
+      "outputs": [],
+      "source": [
+        "@dataclass\n",
+        "class MyDataclass:\n",
+        "    a: int\n",
+        "    b: int = 8\n",
+        "    c: Optional[Tuple[int, ...]] = None\n",
+        "\n",
+        "    def __post_init__(self):\n",
+        "        print(f\"created with a = {self.a}\")\n",
+        "        self.d = 2 * self.b"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false,
+        "customInput": null,
+        "customOutput": null,
+        "executionStartTime": 1659454973051,
+        "executionStopTime": 1659454973077,
+        "originalKey": "83202a18-a3d3-44ec-a62d-b3360a302645",
+        "requestMsgId": "83202a18-a3d3-44ec-a62d-b3360a302645",
+        "showInput": true
+      },
+      "outputs": [],
+      "source": [
+        "my_dataclass_instance = MyDataclass(a=18)\n",
+        "assert my_dataclass_instance.d == 16"
+      ]
+    },
+    {
+      "attachments": {},
+      "cell_type": "markdown",
+      "metadata": {
+        "customInput": null,
+        "originalKey": "b67ccb9f-dc6c-4994-9b99-b5a1bcfebd70",
+        "showInput": false
+      },
+      "source": [
+        "👷 Note that the `dataclass` decorator here is function which modifies the definition of the class itself.\n",
+        "It runs immediately after the definition.\n",
+        "Our config system requires that implicitron library code contains classes whose modified versions need to be aware of user-defined implementations.\n",
+        "Therefore we need the modification of the class to be delayed. We don't use a decorator.\n"
+      ]
+    },
+    {
+      "attachments": {},
+      "cell_type": "markdown",
+      "metadata": {
+        "customInput": null,
+        "originalKey": "3e90f664-99df-4387-9c45-a1ad7939ef3a",
+        "showInput": false
+      },
+      "source": [
+        "## 2. Introducing omegaconf and OmegaConf.structured\n",
+        "\n",
+        "The [omegaconf](https://github.com/omry/omegaconf/) library provides a DictConfig class which is like a `dict` with str keys, but with extra features for ease-of-use as a configuration system."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false,
+        "customInput": null,
+        "customOutput": null,
+        "executionStartTime": 1659451341683,
+        "executionStopTime": 1659451341690,
+        "originalKey": "81c73c9b-27ee-4aab-b55e-fb0dd67fe174",
+        "requestMsgId": "81c73c9b-27ee-4aab-b55e-fb0dd67fe174",
+        "showInput": true
+      },
+      "outputs": [],
+      "source": [
+        "dc = DictConfig({\"a\": 2, \"b\": True, \"c\": None, \"d\": \"hello\"})\n",
+        "assert dc.a == dc[\"a\"] == 2"
+      ]
+    },
+    {
+      "attachments": {},
+      "cell_type": "markdown",
+      "metadata": {
+        "customInput": null,
+        "originalKey": "3b5b76a9-4b76-4784-96ff-2a1212e48e48",
+        "showInput": false
+      },
+      "source": [
+        "OmegaConf has a serialization to and from yaml. The [Hydra](https://hydra.cc/) library relies on this for its configuration files."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false,
+        "customInput": null,
+        "customOutput": null,
+        "executionStartTime": 1659451411835,
+        "executionStopTime": 1659451411936,
+        "originalKey": "d7a25ec1-caea-46bc-a1da-4b1f040c4b61",
+        "requestMsgId": "d7a25ec1-caea-46bc-a1da-4b1f040c4b61",
+        "showInput": true
+      },
+      "outputs": [],
+      "source": [
+        "print(OmegaConf.to_yaml(dc))\n",
+        "assert OmegaConf.create(OmegaConf.to_yaml(dc)) == dc"
+      ]
+    },
+    {
+      "attachments": {},
+      "cell_type": "markdown",
+      "metadata": {
+        "customInput": null,
+        "originalKey": "777fecdd-8bf6-4fd8-827b-cb8af5477fa8",
+        "showInput": false
+      },
+      "source": [
+        "OmegaConf.structured provides a DictConfig from a dataclass or instance of a dataclass. Unlike a normal DictConfig, it is type-checked and only known keys can be added."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false,
+        "customInput": null,
+        "customOutput": null,
+        "executionStartTime": 1659455098879,
+        "executionStopTime": 1659455098900,
+        "originalKey": "de36efb4-0b08-4fb8-bb3a-be1b2c0cd162",
+        "requestMsgId": "de36efb4-0b08-4fb8-bb3a-be1b2c0cd162",
+        "showInput": true
+      },
+      "outputs": [],
+      "source": [
+        "structured = OmegaConf.structured(MyDataclass)\n",
+        "assert isinstance(structured, DictConfig)\n",
+        "print(structured)\n",
+        "print()\n",
+        "print(OmegaConf.to_yaml(structured))"
+      ]
+    },
+    {
+      "attachments": {},
+      "cell_type": "markdown",
+      "metadata": {
+        "customInput": null,
+        "originalKey": "be4446da-e536-4139-9ba3-37669a5b5e61",
+        "showInput": false
+      },
+      "source": [
+        "`structured` knows it is missing a value for `a`."
+      ]
+    },
+    {
+      "attachments": {},
+      "cell_type": "markdown",
+      "metadata": {
+        "customInput": null,
+        "originalKey": "864811e8-1a75-4932-a85e-f681b0541ae9",
+        "showInput": false
+      },
+      "source": [
+        "Such an object has members compatible with the dataclass, so an initialisation can be performed as follows."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false,
+        "customInput": null,
+        "customOutput": null,
+        "executionStartTime": 1659455580491,
+        "executionStopTime": 1659455580501,
+        "originalKey": "eb88aaa0-c22f-4ffb-813a-ca957b490acb",
+        "requestMsgId": "eb88aaa0-c22f-4ffb-813a-ca957b490acb",
+        "showInput": true
+      },
+      "outputs": [],
+      "source": [
+        "structured.a = 21\n",
+        "my_dataclass_instance2 = MyDataclass(**structured)\n",
+        "print(my_dataclass_instance2)"
+      ]
+    },
+    {
+      "attachments": {},
+      "cell_type": "markdown",
+      "metadata": {
+        "customInput": null,
+        "originalKey": "2d08c81c-9d18-4de9-8464-0da2d89f94f3",
+        "showInput": false
+      },
+      "source": [
+        "You can also call OmegaConf.structured on an instance."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false,
+        "customInput": null,
+        "customOutput": null,
+        "executionStartTime": 1659455594700,
+        "executionStopTime": 1659455594737,
+        "originalKey": "5e469bac-32a4-475d-9c09-8b64ba3f2155",
+        "requestMsgId": "5e469bac-32a4-475d-9c09-8b64ba3f2155",
+        "showInput": true
+      },
+      "outputs": [],
+      "source": [
+        "structured_from_instance = OmegaConf.structured(my_dataclass_instance)\n",
+        "my_dataclass_instance3 = MyDataclass(**structured_from_instance)\n",
+        "print(my_dataclass_instance3)"
+      ]
+    },
+    {
+      "attachments": {},
+      "cell_type": "markdown",
+      "metadata": {
+        "collapsed": false,
+        "customInput": null,
+        "customOutput": null,
+        "executionStartTime": 1659452594203,
+        "executionStopTime": 1659452594333,
+        "originalKey": "2ed559e3-8552-465a-938f-30c72a321184",
+        "requestMsgId": "2ed559e3-8552-465a-938f-30c72a321184",
+        "showInput": false
+      },
+      "source": [
+        "## 3. Our approach to OmegaConf.structured\n",
+        "\n",
+        "We provide functions which are equivalent to `OmegaConf.structured` but support more features. \n",
+        "To achieve the above using our functions, the following is used.\n",
+        "Note that we indicate configurable classes using a special base class `Configurable`, not a decorator."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false,
+        "customInput": null,
+        "customOutput": null,
+        "executionStartTime": 1659454053323,
+        "executionStopTime": 1659454061629,
+        "originalKey": "9888afbd-e617-4596-ab7a-fc1073f58656",
+        "requestMsgId": "9888afbd-e617-4596-ab7a-fc1073f58656",
+        "showInput": true
+      },
+      "outputs": [],
+      "source": [
+        "class MyConfigurable(Configurable):\n",
+        "    a: int\n",
+        "    b: int = 8\n",
+        "    c: Optional[Tuple[int, ...]] = None\n",
+        "\n",
+        "    def __post_init__(self):\n",
+        "        print(f\"created with a = {self.a}\")\n",
+        "        self.d = 2 * self.b"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false,
+        "customInput": null,
+        "customOutput": null,
+        "executionStartTime": 1659454784912,
+        "executionStopTime": 1659454784928,
+        "originalKey": "e43155b4-3da5-4df1-a2f5-da1d0369eec9",
+        "requestMsgId": "e43155b4-3da5-4df1-a2f5-da1d0369eec9",
+        "showInput": true
+      },
+      "outputs": [],
+      "source": [
+        "# The expand_args_fields function modifies the class like @dataclasses.dataclass.\n",
+        "# If it has not been called on a Configurable object before it has been instantiated, it will\n",
+        "# be called automatically.\n",
+        "expand_args_fields(MyConfigurable)\n",
+        "my_configurable_instance = MyConfigurable(a=18)\n",
+        "assert my_configurable_instance.d == 16"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false,
+        "customInput": null,
+        "customOutput": null,
+        "executionStartTime": 1659460669541,
+        "executionStopTime": 1659460669566,
+        "originalKey": "96eaae18-dce4-4ee1-b451-1466fea51b9f",
+        "requestMsgId": "96eaae18-dce4-4ee1-b451-1466fea51b9f",
+        "showInput": true
+      },
+      "outputs": [],
+      "source": [
+        "# get_default_args also calls expand_args_fields automatically\n",
+        "our_structured = get_default_args(MyConfigurable)\n",
+        "assert isinstance(our_structured, DictConfig)\n",
+        "print(OmegaConf.to_yaml(our_structured))"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false,
+        "customInput": null,
+        "customOutput": null,
+        "executionStartTime": 1659460454020,
+        "executionStopTime": 1659460454032,
+        "originalKey": "359f7925-68de-42cd-bd34-79a099b1c210",
+        "requestMsgId": "359f7925-68de-42cd-bd34-79a099b1c210",
+        "showInput": true
+      },
+      "outputs": [],
+      "source": [
+        "our_structured.a = 21\n",
+        "print(MyConfigurable(**our_structured))"
+      ]
+    },
+    {
+      "attachments": {},
+      "cell_type": "markdown",
+      "metadata": {
+        "collapsed": false,
+        "customInput": null,
+        "customOutput": null,
+        "executionStartTime": 1659460599142,
+        "executionStopTime": 1659460599149,
+        "originalKey": "eac7d385-9365-4098-acf9-4f0a0dbdcb85",
+        "requestMsgId": "eac7d385-9365-4098-acf9-4f0a0dbdcb85",
+        "showInput": false
+      },
+      "source": [
+        "## 4. First enhancement: nested types 🪺\n",
+        "\n",
+        "Our system allows Configurable classes to contain each other. \n",
+        "One thing to remember: add a call to `run_auto_creation` in `__post_init__`."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false,
+        "customInput": null,
+        "customOutput": null,
+        "executionStartTime": 1659465752418,
+        "executionStopTime": 1659465752976,
+        "originalKey": "9bd70ee5-4ec1-4021-bce5-9638b5088c0a",
+        "requestMsgId": "9bd70ee5-4ec1-4021-bce5-9638b5088c0a",
+        "showInput": true
+      },
+      "outputs": [],
+      "source": [
+        "class Inner(Configurable):\n",
+        "    a: int = 8\n",
+        "    b: bool = True\n",
+        "    c: Tuple[int, ...] = (2, 3, 4, 6)\n",
+        "\n",
+        "\n",
+        "class Outer(Configurable):\n",
+        "    inner: Inner\n",
+        "    x: str = \"hello\"\n",
+        "    xx: bool = False\n",
+        "\n",
+        "    def __post_init__(self):\n",
+        "        run_auto_creation(self)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false,
+        "customInput": null,
+        "customOutput": null,
+        "executionStartTime": 1659465762326,
+        "executionStopTime": 1659465762339,
+        "originalKey": "9f2b9f98-b54b-46cc-9b02-9e902cb279e7",
+        "requestMsgId": "9f2b9f98-b54b-46cc-9b02-9e902cb279e7",
+        "showInput": true
+      },
+      "outputs": [],
+      "source": [
+        "outer_dc = get_default_args(Outer)\n",
+        "print(OmegaConf.to_yaml(outer_dc))"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false,
+        "customInput": null,
+        "customOutput": null,
+        "executionStartTime": 1659465772894,
+        "executionStopTime": 1659465772911,
+        "originalKey": "0254204b-8c7a-4d40-bba6-5132185f63d7",
+        "requestMsgId": "0254204b-8c7a-4d40-bba6-5132185f63d7",
+        "showInput": true
+      },
+      "outputs": [],
+      "source": [
+        "outer = Outer(**outer_dc)\n",
+        "assert isinstance(outer, Outer)\n",
+        "assert isinstance(outer.inner, Inner)\n",
+        "print(vars(outer))\n",
+        "print(outer.inner)"
+      ]
+    },
+    {
+      "attachments": {},
+      "cell_type": "markdown",
+      "metadata": {
+        "customInput": null,
+        "originalKey": "44a78c13-ec92-4a87-808a-c4674b320c22",
+        "showInput": false
+      },
+      "source": [
+        "Note how inner_args is an extra member of outer. `run_auto_creation(self)` is equivalent to\n",
+        "```\n",
+        "    self.inner = Inner(**self.inner_args)\n",
+        "```"
+      ]
+    },
+    {
+      "attachments": {},
+      "cell_type": "markdown",
+      "metadata": {
+        "collapsed": false,
+        "customInput": null,
+        "customOutput": null,
+        "executionStartTime": 1659461071129,
+        "executionStopTime": 1659461071137,
+        "originalKey": "af0ec78b-7888-4b0d-9346-63d970d43293",
+        "requestMsgId": "af0ec78b-7888-4b0d-9346-63d970d43293",
+        "showInput": false
+      },
+      "source": [
+        "## 5. Second enhancement: pluggable/replaceable components 🔌\n",
+        "\n",
+        "If a class uses `ReplaceableBase` as a base class instead of `Configurable`, we call it a replaceable.\n",
+        "It indicates that it is designed for child classes to use in its place.\n",
+        "We might use `NotImplementedError` to indicate functionality which subclasses are expected to implement.\n",
+        "The system maintains a global `registry` containing subclasses of each ReplaceableBase.\n",
+        "The subclasses register themselves with it with a decorator.\n",
+        "\n",
+        "A configurable class (i.e. a class which uses our system, i.e. a child of `Configurable` or `ReplaceableBase`) which contains a ReplaceableBase must also \n",
+        "contain a corresponding class_type field of type `str` which indicates which concrete child class to use."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false,
+        "customInput": null,
+        "customOutput": null,
+        "executionStartTime": 1659463453457,
+        "executionStopTime": 1659463453467,
+        "originalKey": "f2898703-d147-4394-978e-fc7f1f559395",
+        "requestMsgId": "f2898703-d147-4394-978e-fc7f1f559395",
+        "showInput": true
+      },
+      "outputs": [],
+      "source": [
+        "class InnerBase(ReplaceableBase):\n",
+        "    def say_something(self):\n",
+        "        raise NotImplementedError\n",
+        "\n",
+        "\n",
+        "@registry.register\n",
+        "class Inner1(InnerBase):\n",
+        "    a: int = 1\n",
+        "    b: str = \"h\"\n",
+        "\n",
+        "    def say_something(self):\n",
+        "        print(\"hello from an Inner1\")\n",
+        "\n",
+        "\n",
+        "@registry.register\n",
+        "class Inner2(InnerBase):\n",
+        "    a: int = 2\n",
+        "\n",
+        "    def say_something(self):\n",
+        "        print(\"hello from an Inner2\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false,
+        "customInput": null,
+        "customOutput": null,
+        "executionStartTime": 1659463453514,
+        "executionStopTime": 1659463453592,
+        "originalKey": "6f171599-51ee-440f-82d7-a59f84d24624",
+        "requestMsgId": "6f171599-51ee-440f-82d7-a59f84d24624",
+        "showInput": true
+      },
+      "outputs": [],
+      "source": [
+        "class Out(Configurable):\n",
+        "    inner: InnerBase\n",
+        "    inner_class_type: str = \"Inner1\"\n",
+        "    x: int = 19\n",
+        "\n",
+        "    def __post_init__(self):\n",
+        "        run_auto_creation(self)\n",
+        "\n",
+        "    def talk(self):\n",
+        "        self.inner.say_something()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false,
+        "customInput": null,
+        "customOutput": null,
+        "executionStartTime": 1659463191360,
+        "executionStopTime": 1659463191428,
+        "originalKey": "7abaecec-96e6-44df-8c8d-69c36a14b913",
+        "requestMsgId": "7abaecec-96e6-44df-8c8d-69c36a14b913",
+        "showInput": true
+      },
+      "outputs": [],
+      "source": [
+        "Out_dc = get_default_args(Out)\n",
+        "print(OmegaConf.to_yaml(Out_dc))"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false,
+        "customInput": null,
+        "customOutput": null,
+        "executionStartTime": 1659463192717,
+        "executionStopTime": 1659463192754,
+        "originalKey": "c82dc2ca-ba8f-4a44-aed3-43f6b52ec28c",
+        "requestMsgId": "c82dc2ca-ba8f-4a44-aed3-43f6b52ec28c",
+        "showInput": true
+      },
+      "outputs": [],
+      "source": [
+        "Out_dc.inner_class_type = \"Inner2\"\n",
+        "out = Out(**Out_dc)\n",
+        "print(out.inner)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false,
+        "customInput": null,
+        "customOutput": null,
+        "executionStartTime": 1659463193751,
+        "executionStopTime": 1659463193791,
+        "originalKey": "aa0e1b04-963a-4724-81b7-5748b598b541",
+        "requestMsgId": "aa0e1b04-963a-4724-81b7-5748b598b541",
+        "showInput": true
+      },
+      "outputs": [],
+      "source": [
+        "out.talk()"
+      ]
+    },
+    {
+      "attachments": {},
+      "cell_type": "markdown",
+      "metadata": {
+        "customInput": null,
+        "originalKey": "4f78a56c-39cd-4563-a97e-041e5f360f6b",
+        "showInput": false
+      },
+      "source": [
+        "Note in this case there are many `args` members. It is usually fine to ignore them in the code. They are needed for the config."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false,
+        "customInput": null,
+        "customOutput": null,
+        "executionStartTime": 1659462145294,
+        "executionStopTime": 1659462145307,
+        "originalKey": "ce7069d5-a813-4286-a7cd-6ff40362105a",
+        "requestMsgId": "ce7069d5-a813-4286-a7cd-6ff40362105a",
+        "showInput": true
+      },
+      "outputs": [],
+      "source": [
+        "print(vars(out))"
+      ]
+    },
+    {
+      "attachments": {},
+      "cell_type": "markdown",
+      "metadata": {
+        "collapsed": false,
+        "customInput": null,
+        "customOutput": null,
+        "executionStartTime": 1659462231114,
+        "executionStopTime": 1659462231130,
+        "originalKey": "c7f051ff-c264-4b89-80dc-36cf179aafaf",
+        "requestMsgId": "c7f051ff-c264-4b89-80dc-36cf179aafaf",
+        "showInput": false
+      },
+      "source": [
+        "## 6. Example with torch.nn.Module  🔥\n",
+        "Typically in implicitron, we use this system in combination with [`Module`](https://pytorch.org/docs/stable/generated/torch.nn.Module.html)s. \n",
+        "Note in this case it is necessary to call `Module.__init__` explicitly in `__post_init__`."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false,
+        "customInput": null,
+        "customOutput": null,
+        "executionStartTime": 1659462645018,
+        "executionStopTime": 1659462645037,
+        "originalKey": "42d210d6-09e0-4daf-8ccb-411d30f268f4",
+        "requestMsgId": "42d210d6-09e0-4daf-8ccb-411d30f268f4",
+        "showInput": true
+      },
+      "outputs": [],
+      "source": [
+        "class MyLinear(torch.nn.Module, Configurable):\n",
+        "    d_in: int = 2\n",
+        "    d_out: int = 200\n",
+        "\n",
+        "    def __post_init__(self):\n",
+        "        super().__init__()\n",
+        "        self.linear = torch.nn.Linear(in_features=self.d_in, out_features=self.d_out)\n",
+        "\n",
+        "    def forward(self, x):\n",
+        "        return self.linear.forward(x)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false,
+        "customInput": null,
+        "customOutput": null,
+        "executionStartTime": 1659462692309,
+        "executionStopTime": 1659462692346,
+        "originalKey": "546781fe-5b95-4e48-9cb5-34a634a31313",
+        "requestMsgId": "546781fe-5b95-4e48-9cb5-34a634a31313",
+        "showInput": true
+      },
+      "outputs": [],
+      "source": [
+        "my_linear = MyLinear()\n",
+        "input = torch.zeros(2)\n",
+        "output = my_linear(input)\n",
+        "print(\"output shape:\", output.shape)"
+      ]
+    },
+    {
+      "attachments": {},
+      "cell_type": "markdown",
+      "metadata": {
+        "collapsed": false,
+        "customInput": null,
+        "customOutput": null,
+        "executionStartTime": 1659462738302,
+        "executionStopTime": 1659462738419,
+        "originalKey": "b6cb71e1-1d54-4e89-a422-0a70772c5c03",
+        "requestMsgId": "b6cb71e1-1d54-4e89-a422-0a70772c5c03",
+        "showInput": false
+      },
+      "source": [
+        "`my_linear` has all the usual features of a Module.\n",
+        "E.g. it can be saved and loaded with `torch.save` and `torch.load`.\n",
+        "It has parameters:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false,
+        "customInput": null,
+        "customOutput": null,
+        "executionStartTime": 1659462821485,
+        "executionStopTime": 1659462821501,
+        "originalKey": "47e8c53e-2d2c-4b41-8aa3-65aa3ea8a7d3",
+        "requestMsgId": "47e8c53e-2d2c-4b41-8aa3-65aa3ea8a7d3",
+        "showInput": true
+      },
+      "outputs": [],
+      "source": [
+        "for name, value in my_linear.named_parameters():\n",
+        "    print(name, value.shape)"
+      ]
+    },
+    {
+      "attachments": {},
+      "cell_type": "markdown",
+      "metadata": {
+        "collapsed": false,
+        "customInput": null,
+        "customOutput": null,
+        "executionStartTime": 1659463222379,
+        "executionStopTime": 1659463222409,
+        "originalKey": "a01f0ea7-55f2-4af9-8e81-45dddf40f13b",
+        "requestMsgId": "a01f0ea7-55f2-4af9-8e81-45dddf40f13b",
+        "showInput": false
+      },
+      "source": [
+        "## 7. Example of implementing your own pluggable component \n",
+        "Let's say I am using a library with `Out` like in section **5** but I want to implement my own child of InnerBase. \n",
+        "All I need to do is register its definition, but I need to do this before expand_args_fields is explicitly or implicitly called on Out."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false,
+        "customInput": null,
+        "customOutput": null,
+        "executionStartTime": 1659463694644,
+        "executionStopTime": 1659463694653,
+        "originalKey": "d9635511-a52b-43d5-8dae-d5c1a3dd9157",
+        "requestMsgId": "d9635511-a52b-43d5-8dae-d5c1a3dd9157",
+        "showInput": true
+      },
+      "outputs": [],
+      "source": [
+        "@registry.register\n",
+        "class UserImplementedInner(InnerBase):\n",
+        "    a: int = 200\n",
+        "\n",
+        "    def say_something(self):\n",
+        "        print(\"hello from the user\")"
+      ]
+    },
+    {
+      "attachments": {},
+      "cell_type": "markdown",
+      "metadata": {
+        "customInput": null,
+        "originalKey": "f1511aa2-56b8-4ed0-a453-17e2bbfeefe7",
+        "showInput": false
+      },
+      "source": [
+        "At this point, we need to redefine the class Out. \n",
+        "Otherwise if it has already been expanded without UserImplementedInner, then the following would not work,\n",
+        "because the implementations known to a class are fixed when it is expanded.\n",
+        "\n",
+        "If you are running experiments from a script, the thing to remember here is that you must import your own modules, which register your own implementations,\n",
+        "before you *use* the library classes."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false,
+        "customInput": null,
+        "customOutput": null,
+        "executionStartTime": 1659463745967,
+        "executionStopTime": 1659463745986,
+        "originalKey": "c7bb5a6e-682b-4eb0-a214-e0f5990b9406",
+        "requestMsgId": "c7bb5a6e-682b-4eb0-a214-e0f5990b9406",
+        "showInput": true
+      },
+      "outputs": [],
+      "source": [
+        "class Out(Configurable):\n",
+        "    inner: InnerBase\n",
+        "    inner_class_type: str = \"Inner1\"\n",
+        "    x: int = 19\n",
+        "\n",
+        "    def __post_init__(self):\n",
+        "        run_auto_creation(self)\n",
+        "\n",
+        "    def talk(self):\n",
+        "        self.inner.say_something()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false,
+        "customInput": null,
+        "customOutput": null,
+        "executionStartTime": 1659463747398,
+        "executionStopTime": 1659463747431,
+        "originalKey": "b6ecdc86-4b7b-47c6-9f45-a7e557c94979",
+        "requestMsgId": "b6ecdc86-4b7b-47c6-9f45-a7e557c94979",
+        "showInput": true
+      },
+      "outputs": [],
+      "source": [
+        "out2 = Out(inner_class_type=\"UserImplementedInner\")\n",
+        "print(out2.inner)"
+      ]
+    },
+    {
+      "attachments": {},
+      "cell_type": "markdown",
+      "metadata": {
+        "collapsed": false,
+        "customInput": null,
+        "customOutput": null,
+        "executionStartTime": 1659464033633,
+        "executionStopTime": 1659464033643,
+        "originalKey": "c7fe0df3-da13-40b8-9b06-6b1f37f37bb9",
+        "requestMsgId": "c7fe0df3-da13-40b8-9b06-6b1f37f37bb9",
+        "showInput": false
+      },
+      "source": [
+        "## 8: Example of making a subcomponent pluggable\n",
+        "\n",
+        "Let's look what needs to happen if we have a subcomponent which we make pluggable, to allow users to supply their own."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false,
+        "customInput": null,
+        "customOutput": null,
+        "executionStartTime": 1659464709922,
+        "executionStopTime": 1659464709933,
+        "originalKey": "e37227b2-6897-4033-8560-9f2040abdeeb",
+        "requestMsgId": "e37227b2-6897-4033-8560-9f2040abdeeb",
+        "showInput": true
+      },
+      "outputs": [],
+      "source": [
+        "class SubComponent(Configurable):\n",
+        "    x: float = 0.25\n",
+        "\n",
+        "    def apply(self, a: float) -> float:\n",
+        "        return a + self.x\n",
+        "\n",
+        "\n",
+        "class LargeComponent(Configurable):\n",
+        "    repeats: int = 4\n",
+        "    subcomponent: SubComponent\n",
+        "\n",
+        "    def __post_init__(self):\n",
+        "        run_auto_creation(self)\n",
+        "\n",
+        "    def apply(self, a: float) -> float:\n",
+        "        for _ in range(self.repeats):\n",
+        "            a = self.subcomponent.apply(a)\n",
+        "        return a"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false,
+        "customInput": null,
+        "customOutput": null,
+        "executionStartTime": 1659464710339,
+        "executionStopTime": 1659464710459,
+        "originalKey": "cab4c121-350e-443f-9a49-bd542a9735a2",
+        "requestMsgId": "cab4c121-350e-443f-9a49-bd542a9735a2",
+        "showInput": true
+      },
+      "outputs": [],
+      "source": [
+        "large_component = LargeComponent()\n",
+        "assert large_component.apply(3) == 4\n",
+        "print(OmegaConf.to_yaml(LargeComponent))"
+      ]
+    },
+    {
+      "attachments": {},
+      "cell_type": "markdown",
+      "metadata": {
+        "customInput": null,
+        "originalKey": "be60323a-badf-46e4-a259-72cae1391028",
+        "showInput": false
+      },
+      "source": [
+        "Made generic:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false,
+        "customInput": null,
+        "customOutput": null,
+        "executionStartTime": 1659464717226,
+        "executionStopTime": 1659464717261,
+        "originalKey": "fc0d8cdb-4627-4427-b92a-17ac1c1b37b8",
+        "requestMsgId": "fc0d8cdb-4627-4427-b92a-17ac1c1b37b8",
+        "showInput": true
+      },
+      "outputs": [],
+      "source": [
+        "class SubComponentBase(ReplaceableBase):\n",
+        "    def apply(self, a: float) -> float:\n",
+        "        raise NotImplementedError\n",
+        "\n",
+        "\n",
+        "@registry.register\n",
+        "class SubComponent(SubComponentBase):\n",
+        "    x: float = 0.25\n",
+        "\n",
+        "    def apply(self, a: float) -> float:\n",
+        "        return a + self.x\n",
+        "\n",
+        "\n",
+        "class LargeComponent(Configurable):\n",
+        "    repeats: int = 4\n",
+        "    subcomponent: SubComponentBase\n",
+        "    subcomponent_class_type: str = \"SubComponent\"\n",
+        "\n",
+        "    def __post_init__(self):\n",
+        "        run_auto_creation(self)\n",
+        "\n",
+        "    def apply(self, a: float) -> float:\n",
+        "        for _ in range(self.repeats):\n",
+        "            a = self.subcomponent.apply(a)\n",
+        "        return a"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false,
+        "customInput": null,
+        "customOutput": null,
+        "executionStartTime": 1659464725473,
+        "executionStopTime": 1659464725587,
+        "originalKey": "bbc3d321-6b49-4356-be75-1a173b1fc3a5",
+        "requestMsgId": "bbc3d321-6b49-4356-be75-1a173b1fc3a5",
+        "showInput": true
+      },
+      "outputs": [],
+      "source": [
+        "large_component = LargeComponent()\n",
+        "assert large_component.apply(3) == 4\n",
+        "print(OmegaConf.to_yaml(LargeComponent))"
+      ]
+    },
+    {
+      "attachments": {},
+      "cell_type": "markdown",
+      "metadata": {
+        "collapsed": false,
+        "customInput": null,
+        "customOutput": null,
+        "executionStartTime": 1659464672680,
+        "executionStopTime": 1659464673231,
+        "originalKey": "5115453a-1d96-4022-97e7-46433e6dcf60",
+        "requestMsgId": "5115453a-1d96-4022-97e7-46433e6dcf60",
+        "showInput": false
+      },
+      "source": [
+        "The following things had to change:\n",
+        "* The base class SubComponentBase was defined.\n",
+        "* SubComponent gained a `@registry.register` decoration and had its base class changed to the new one.\n",
+        "* `subcomponent_class_type` was added as a member of the outer class.\n",
+        "* In any saved configuration yaml files, the key `subcomponent_args` had to be changed to `subcomponent_SubComponent_args`."
+      ]
+    },
+    {
+      "attachments": {},
+      "cell_type": "markdown",
+      "metadata": {
+        "collapsed": false,
+        "customInput": null,
+        "customOutput": null,
+        "executionStartTime": 1659462041307,
+        "executionStopTime": 1659462041637,
+        "originalKey": "0739269e-5c0e-4551-b06f-f4aab386ba54",
+        "requestMsgId": "0739269e-5c0e-4551-b06f-f4aab386ba54",
+        "showInput": false
+      },
+      "source": [
+        "## Appendix: gotchas ⚠️\n",
+        "\n",
+        "* Omitting to define `__post_init__` or not calling `run_auto_creation` in it.\n",
+        "* Omitting a type annotation on a field. For example, writing \n",
+        "```\n",
+        "    subcomponent_class_type = \"SubComponent\"\n",
+        "```\n",
+        "instead of \n",
+        "```\n",
+        "    subcomponent_class_type: str = \"SubComponent\"\n",
+        "```\n",
+        "\n"
+      ]
+    }
+  ],
+  "metadata": {
+    "bento_stylesheets": {
+      "bento/extensions/flow/main.css": true,
+      "bento/extensions/kernel_selector/main.css": true,
+      "bento/extensions/kernel_ui/main.css": true,
+      "bento/extensions/new_kernel/main.css": true,
+      "bento/extensions/system_usage/main.css": true,
+      "bento/extensions/theme/main.css": true
+    },
+    "captumWidgetMessage": {},
+    "dataExplorerConfig": {},
+    "kernelspec": {
+      "display_name": "pytorch3d",
+      "language": "python",
+      "metadata": {
+        "cinder_runtime": false,
+        "fbpkg_supported": true,
+        "is_prebuilt": true,
+        "kernel_name": "bento_kernel_pytorch3d",
+        "nightly_builds": true
+      },
+      "name": "bento_kernel_pytorch3d"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3"
+    },
+    "last_base_url": "https://9177.od.fbinfra.net:443/",
+    "last_kernel_id": "90755407-3729-46f4-ab67-ff2cb1daa5cb",
+    "last_msg_id": "f61034eb-826226915ad9548ffbe495ba_6317",
+    "last_server_session_id": "d6b46f14-cee7-44c1-8c51-39a38a4ea4c2",
+    "outputWidgetContext": {}
+  },
+  "nbformat": 4,
+  "nbformat_minor": 2
+}
diff --git a/pytorch3d/docs/tutorials/implicitron_volumes.ipynb b/pytorch3d/docs/tutorials/implicitron_volumes.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..bacf7cbd8f959eec7451326c9577fbe790aba56c
--- /dev/null
+++ b/pytorch3d/docs/tutorials/implicitron_volumes.ipynb
@@ -0,0 +1,896 @@
+{
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false,
+        "customInput": null,
+        "customOutput": null,
+        "executionStartTime": 1659619824914,
+        "executionStopTime": 1659619825485,
+        "originalKey": "d38652e8-200a-413c-a36a-f4d349b78a9d",
+        "requestMsgId": "641de8aa-0e42-4446-9304-c160a2d226bf",
+        "showInput": true
+      },
+      "outputs": [],
+      "source": [
+        "# Copyright (c) Meta Platforms, Inc. and affiliates. All rights reserved."
+      ]
+    },
+    {
+      "attachments": {},
+      "cell_type": "markdown",
+      "metadata": {
+        "customInput": null,
+        "originalKey": "a48a9dcf-e80f-474b-a0c4-2c9a765b15c5",
+        "showInput": false
+      },
+      "source": [
+        "# A simple model using Implicitron\n",
+        "\n",
+        "In this demo, we use the VolumeRenderer from PyTorch3D as a custom implicit function in Implicitron. We will see\n",
+        "* some of the main objects in Implicitron\n",
+        "* how to plug in a custom part of a model"
+      ]
+    },
+    {
+      "attachments": {},
+      "cell_type": "markdown",
+      "metadata": {
+        "customInput": null,
+        "originalKey": "51337c0e-ad27-4b75-ad6a-737dca5d7b95",
+        "showInput": false
+      },
+      "source": [
+        "## 0. Install and import modules\n",
+        "\n",
+        "Ensure `torch` and `torchvision` are installed. If `pytorch3d` is not installed, install it using the following cell:\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false,
+        "customInput": null,
+        "customOutput": null,
+        "executionStartTime": 1659619898147,
+        "executionStopTime": 1659619898274,
+        "originalKey": "76f1ecd4-6b73-4214-81b0-118ef8d86872",
+        "requestMsgId": "deb6a860-6923-4227-abef-d31388b5142d",
+        "showInput": true
+      },
+      "outputs": [],
+      "source": [
+        "import os\n",
+        "import sys\n",
+        "import torch\n",
+        "need_pytorch3d=False\n",
+        "try:\n",
+        "    import pytorch3d\n",
+        "except ModuleNotFoundError:\n",
+        "    need_pytorch3d=True\n",
+        "if need_pytorch3d:\n",
+        "    if torch.__version__.startswith(\"2.1.\") and sys.platform.startswith(\"linux\"):\n",
+        "        # We try to install PyTorch3D via a released wheel.\n",
+        "        pyt_version_str=torch.__version__.split(\"+\")[0].replace(\".\", \"\")\n",
+        "        version_str=\"\".join([\n",
+        "            f\"py3{sys.version_info.minor}_cu\",\n",
+        "            torch.version.cuda.replace(\".\",\"\"),\n",
+        "            f\"_pyt{pyt_version_str}\"\n",
+        "        ])\n",
+        "        !pip install fvcore iopath\n",
+        "        !pip install --no-index --no-cache-dir pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/{version_str}/download.html\n",
+        "    else:\n",
+        "        # We try to install PyTorch3D from source.\n",
+        "        !pip install 'git+https://github.com/facebookresearch/pytorch3d.git@stable'"
+      ]
+    },
+    {
+      "attachments": {},
+      "cell_type": "markdown",
+      "metadata": {
+        "customInput": null,
+        "originalKey": "2c1020e6-eb4a-4644-9719-9147500d8e4f",
+        "showInput": false
+      },
+      "source": [
+        "Ensure omegaconf and visdom are installed. If not, run this cell. (It should not be necessary to restart the runtime.)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "customInput": null,
+        "customOutput": null,
+        "originalKey": "9e751931-a38d-44c9-9ff1-ac2f7d3a3f99",
+        "showInput": true
+      },
+      "outputs": [],
+      "source": [
+        "!pip install omegaconf visdom"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "code_folding": [],
+        "collapsed": false,
+        "customOutput": null,
+        "executionStartTime": 1659612480556,
+        "executionStopTime": 1659612480644,
+        "hidden_ranges": [],
+        "originalKey": "86807e4a-1675-4520-a033-c7af85b233ec",
+        "requestMsgId": "880a7e20-4a90-4b37-a5eb-bccc0b23cac6"
+      },
+      "outputs": [],
+      "source": [
+        "import logging\n",
+        "from typing import Tuple\n",
+        "\n",
+        "import matplotlib.animation as animation\n",
+        "import matplotlib.pyplot as plt\n",
+        "import numpy as np\n",
+        "import torch\n",
+        "import tqdm\n",
+        "from IPython.display import HTML\n",
+        "from omegaconf import OmegaConf\n",
+        "from PIL import Image\n",
+        "from pytorch3d.implicitron.dataset.dataset_base import FrameData\n",
+        "from pytorch3d.implicitron.dataset.rendered_mesh_dataset_map_provider import RenderedMeshDatasetMapProvider\n",
+        "from pytorch3d.implicitron.models.generic_model import GenericModel\n",
+        "from pytorch3d.implicitron.models.implicit_function.base import ImplicitFunctionBase, ImplicitronRayBundle\n",
+        "from pytorch3d.implicitron.models.renderer.base import EvaluationMode\n",
+        "from pytorch3d.implicitron.tools.config import get_default_args, registry, remove_unused_components\n",
+        "from pytorch3d.renderer.implicit.renderer import VolumeSampler\n",
+        "from pytorch3d.structures import Volumes\n",
+        "from pytorch3d.vis.plotly_vis import plot_batch_individually, plot_scene"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "code_folding": [],
+        "collapsed": false,
+        "customInput": null,
+        "customOutput": null,
+        "executionStartTime": 1659610929375,
+        "executionStopTime": 1659610929383,
+        "hidden_ranges": [],
+        "originalKey": "b2d9f5bd-a9d4-4f78-b21e-92f2658e0fe9",
+        "requestMsgId": "7e43e623-4030-438b-af4e-b96170c9a052",
+        "showInput": true
+      },
+      "outputs": [],
+      "source": [
+        "output_resolution = 80"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "code_folding": [],
+        "collapsed": false,
+        "customInput": null,
+        "customOutput": null,
+        "executionStartTime": 1659610930042,
+        "executionStopTime": 1659610930050,
+        "hidden_ranges": [],
+        "originalKey": "0b0c2087-4c86-4c57-b0ee-6f48a70a9c78",
+        "requestMsgId": "46883aad-f00b-4fd4-ac17-eec0b2ac272a",
+        "showInput": true
+      },
+      "outputs": [],
+      "source": [
+        "torch.set_printoptions(sci_mode=False)"
+      ]
+    },
+    {
+      "attachments": {},
+      "cell_type": "markdown",
+      "metadata": {
+        "customInput": null,
+        "originalKey": "37809d0d-b02e-42df-85b6-cdd038373653",
+        "showInput": false
+      },
+      "source": [
+        "## 1. Load renders of a mesh (the cow mesh) as a dataset\n",
+        "\n",
+        "A dataset's train, val and test parts in Implicitron are represented as a `dataset_map`, and provided by an implementation of `DatasetMapProvider`. \n",
+        "`RenderedMeshDatasetMapProvider` is one which generates a single-scene dataset with only a train component by taking a mesh and rendering it.\n",
+        "We use it with the cow mesh."
+      ]
+    },
+    {
+      "attachments": {},
+      "cell_type": "markdown",
+      "metadata": {
+        "collapsed": false,
+        "customInput": null,
+        "customOutput": null,
+        "executionStartTime": 1659620739780,
+        "executionStopTime": 1659620739914,
+        "originalKey": "cc68cb9c-b8bf-4e9e-bef1-2cfafdf6caa2",
+        "requestMsgId": "398cfcae-5d43-4b6f-9c75-db3d297364d4",
+        "showInput": false
+      },
+      "source": [
+        "If running this notebook using **Google Colab**, run the following cell to fetch the mesh obj and texture files and save it at the path data/cow_mesh.\n",
+        "If running locally, the data is already available at the correct path."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "customInput": null,
+        "customOutput": null,
+        "originalKey": "2c55e002-a885-4169-8fdc-af9078b05968",
+        "showInput": true
+      },
+      "outputs": [],
+      "source": [
+        "!mkdir -p data/cow_mesh\n",
+        "!wget -P data/cow_mesh https://dl.fbaipublicfiles.com/pytorch3d/data/cow_mesh/cow.obj\n",
+        "!wget -P data/cow_mesh https://dl.fbaipublicfiles.com/pytorch3d/data/cow_mesh/cow.mtl\n",
+        "!wget -P data/cow_mesh https://dl.fbaipublicfiles.com/pytorch3d/data/cow_mesh/cow_texture.png"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "code_folding": [],
+        "collapsed": false,
+        "customOutput": null,
+        "executionStartTime": 1659621652237,
+        "executionStopTime": 1659621652903,
+        "hidden_ranges": [],
+        "originalKey": "eb77aaec-048c-40bd-bd69-0e66b6ab60b1",
+        "requestMsgId": "09b9975c-ff86-41c9-b4a9-975d23afc562",
+        "showInput": true
+      },
+      "outputs": [],
+      "source": [
+        "cow_provider = RenderedMeshDatasetMapProvider(\n",
+        "    data_file=\"data/cow_mesh/cow.obj\",\n",
+        "    use_point_light=False,\n",
+        "    resolution=output_resolution,\n",
+        ")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "code_folding": [],
+        "collapsed": false,
+        "customInput": null,
+        "customOutput": null,
+        "executionStartTime": 1659610966145,
+        "executionStopTime": 1659610966255,
+        "hidden_ranges": [],
+        "originalKey": "8210e15b-da48-4306-a49a-41c4e7e7d42f",
+        "requestMsgId": "c243edd2-a106-4fba-8471-dfa4f99a2088",
+        "showInput": true
+      },
+      "outputs": [],
+      "source": [
+        "dataset_map = cow_provider.get_dataset_map()\n",
+        "tr_cameras = [training_frame.camera for training_frame in dataset_map.train]"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "code_folding": [],
+        "collapsed": false,
+        "customInput": null,
+        "customOutput": null,
+        "executionStartTime": 1659610967703,
+        "executionStopTime": 1659610967848,
+        "hidden_ranges": [],
+        "originalKey": "458d72ad-d9a7-4f13-b5b7-90d2aec61c16",
+        "requestMsgId": "7f9431f3-8717-4d89-a7fe-1420dd0e00c4",
+        "showInput": true
+      },
+      "outputs": [],
+      "source": [
+        "# The cameras are all in the XZ plane, in a circle about 2.7 from the origin\n",
+        "centers = torch.cat([i.get_camera_center() for i in tr_cameras])\n",
+        "print(centers.min(0).values)\n",
+        "print(centers.max(0).values)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "code_folding": [],
+        "collapsed": false,
+        "customInput": null,
+        "customOutput": null,
+        "executionStartTime": 1659552920194,
+        "executionStopTime": 1659552923122,
+        "hidden_ranges": [],
+        "originalKey": "931e712b-b141-437a-97fb-dc2a07ce3458",
+        "requestMsgId": "931e712b-b141-437a-97fb-dc2a07ce3458",
+        "showInput": true
+      },
+      "outputs": [],
+      "source": [
+        "# visualization of the cameras\n",
+        "plot = plot_scene({\"k\": {i: camera for i, camera in enumerate(tr_cameras)}}, camera_scale=0.25)\n",
+        "plot.layout.scene.aspectmode = \"data\"\n",
+        "plot"
+      ]
+    },
+    {
+      "attachments": {},
+      "cell_type": "markdown",
+      "metadata": {
+        "customInput": null,
+        "originalKey": "afa9c02d-f76b-4f68-83e9-9733c615406b",
+        "showInput": false
+      },
+      "source": [
+        "## 2. Custom implicit function 🧊\n",
+        "\n",
+        "At the core of neural rendering methods are functions of spatial coordinates called implicit functions, which are used in some kind of rendering process.\n",
+        "(Often those functions can additionally take other data as well, such as view direction.)\n",
+        "A common rendering process is ray marching over densities and colors provided by an implicit function.\n",
+        "In our case, taking samples from a 3D volume grid is a very simple function of spatial coordinates. \n",
+        "\n",
+        "Here we define our own implicit function, which uses PyTorch3D's existing functionality for sampling from a volume grid.\n",
+        "We do this by subclassing `ImplicitFunctionBase`.\n",
+        "We need to register our subclass with a special decorator.\n",
+        "We use Python's dataclass annotations for configuring the module."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "code_folding": [],
+        "collapsed": false,
+        "customInput": null,
+        "customOutput": null,
+        "executionStartTime": 1659613575850,
+        "executionStopTime": 1659613575940,
+        "hidden_ranges": [],
+        "originalKey": "61b55043-dc52-4de7-992e-e2195edd2123",
+        "requestMsgId": "dfaace3c-098c-4ffe-9240-6a7ae0ff271e",
+        "showInput": true
+      },
+      "outputs": [],
+      "source": [
+        "@registry.register\n",
+        "class MyVolumes(ImplicitFunctionBase, torch.nn.Module):\n",
+        "    grid_resolution: int = 50  # common HWD of volumes, the number of voxels in each direction\n",
+        "    extent: float = 1.0  # In world coordinates, the volume occupies is [-extent, extent] along each axis\n",
+        "\n",
+        "    def __post_init__(self):\n",
+        "        # We have to call this explicitly if there are other base classes like Module\n",
+        "        super().__init__()\n",
+        "\n",
+        "        # We define parameters like other torch.nn.Module objects.\n",
+        "        # In this case, both our parameter tensors are trainable; they govern the contents of the volume grid.\n",
+        "        density = torch.full((self.grid_resolution, self.grid_resolution, self.grid_resolution), -2.0)\n",
+        "        self.density = torch.nn.Parameter(density)\n",
+        "        color = torch.full((3, self.grid_resolution, self.grid_resolution, self.grid_resolution), 0.0)\n",
+        "        self.color = torch.nn.Parameter(color)\n",
+        "        self.density_activation = torch.nn.Softplus()\n",
+        "\n",
+        "    def forward(\n",
+        "        self,\n",
+        "        ray_bundle: ImplicitronRayBundle,\n",
+        "        fun_viewpool=None,\n",
+        "        global_code=None,\n",
+        "    ):\n",
+        "        densities = self.density_activation(self.density[None, None])\n",
+        "        voxel_size = 2.0 * float(self.extent) / self.grid_resolution\n",
+        "        features = self.color.sigmoid()[None]\n",
+        "\n",
+        "        # Like other PyTorch3D structures, the actual Volumes object should only exist as long\n",
+        "        # as one iteration of training. It is local to this function.\n",
+        "\n",
+        "        volume = Volumes(densities=densities, features=features, voxel_size=voxel_size)\n",
+        "        sampler = VolumeSampler(volumes=volume)\n",
+        "        densities, features = sampler(ray_bundle)\n",
+        "\n",
+        "        # When an implicit function is used for raymarching, i.e. for MultiPassEmissionAbsorptionRenderer,\n",
+        "        # it must return (densities, features, an auxiliary tuple)\n",
+        "        return densities, features, {}\n"
+      ]
+    },
+    {
+      "attachments": {},
+      "cell_type": "markdown",
+      "metadata": {
+        "customInput": null,
+        "originalKey": "abaf2cd6-1b68-400e-a142-8fb9f49953f3",
+        "showInput": false
+      },
+      "source": [
+        "## 3. Construct the model object.\n",
+        "\n",
+        "The main model object in PyTorch3D is `GenericModel`, which has pluggable components for the major steps, including the renderer and the implicit function(s).\n",
+        "There are two ways to construct it which are equivalent here."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false,
+        "customInput": null,
+        "customOutput": null,
+        "executionStartTime": 1659621267561,
+        "executionStopTime": 1659621267938,
+        "originalKey": "f26c3dce-fbae-4592-bd0e-e4a8abc57c2c",
+        "requestMsgId": "9213687e-1caf-46a8-a4e5-a9c531530092",
+        "showInput": true
+      },
+      "outputs": [],
+      "source": [
+        "CONSTRUCT_MODEL_FROM_CONFIG = True\n",
+        "if CONSTRUCT_MODEL_FROM_CONFIG:\n",
+        "    # Via a DictConfig - this is how our training loop with hydra works\n",
+        "    cfg = get_default_args(GenericModel)\n",
+        "    cfg.implicit_function_class_type = \"MyVolumes\"\n",
+        "    cfg.render_image_height=output_resolution\n",
+        "    cfg.render_image_width=output_resolution\n",
+        "    cfg.loss_weights={\"loss_rgb_huber\": 1.0}\n",
+        "    cfg.tqdm_trigger_threshold=19000\n",
+        "    cfg.raysampler_AdaptiveRaySampler_args.scene_extent= 4.0\n",
+        "    gm = GenericModel(**cfg)\n",
+        "else:\n",
+        "    # constructing GenericModel directly\n",
+        "    gm = GenericModel(\n",
+        "        implicit_function_class_type=\"MyVolumes\",\n",
+        "        render_image_height=output_resolution,\n",
+        "        render_image_width=output_resolution,\n",
+        "        loss_weights={\"loss_rgb_huber\": 1.0},\n",
+        "        tqdm_trigger_threshold=19000,\n",
+        "        raysampler_AdaptiveRaySampler_args = {\"scene_extent\": 4.0}\n",
+        "    )\n",
+        "\n",
+        "    # In this case we can get the equivalent DictConfig cfg object to the way gm is configured as follows\n",
+        "    cfg = OmegaConf.structured(gm)\n"
+      ]
+    },
+    {
+      "attachments": {},
+      "cell_type": "markdown",
+      "metadata": {
+        "code_folding": [],
+        "collapsed": false,
+        "customInput": null,
+        "customOutput": null,
+        "executionStartTime": 1659611214689,
+        "executionStopTime": 1659611214748,
+        "hidden_ranges": [],
+        "originalKey": "4e659f7d-ce66-4999-83de-005eb09d7705",
+        "requestMsgId": "7b815b2b-cf19-44d0-ae89-76fde6df35ec",
+        "showInput": false
+      },
+      "source": [
+        " The default renderer is an emission-absorbtion raymarcher. We keep that default."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "code_folding": [],
+        "collapsed": false,
+        "customInput": null,
+        "customOutput": null,
+        "executionStartTime": 1659621268007,
+        "executionStopTime": 1659621268190,
+        "hidden_ranges": [],
+        "originalKey": "d37ae488-c57c-44d3-9def-825dc1a6495b",
+        "requestMsgId": "71143ec1-730f-4876-8a14-e46eea9d6dd1",
+        "showInput": true
+      },
+      "outputs": [],
+      "source": [
+        "# We can display the configuration in use as follows.\n",
+        "remove_unused_components(cfg)\n",
+        "yaml = OmegaConf.to_yaml(cfg, sort_keys=False)\n",
+        "%page -r yaml"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "code_folding": [],
+        "collapsed": false,
+        "customInput": null,
+        "customOutput": null,
+        "executionStartTime": 1659621268727,
+        "executionStopTime": 1659621268776,
+        "hidden_ranges": [],
+        "originalKey": "52e53179-3c6e-4c1f-a38a-3a6d803687bb",
+        "requestMsgId": "05de9bc3-3f74-4a6f-851c-9ec919b59506",
+        "showInput": true
+      },
+      "outputs": [],
+      "source": [
+        "device = torch.device(\"cuda:0\")\n",
+        "gm.to(device)\n",
+        "assert next(gm.parameters()).is_cuda"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "customInput": null,
+        "originalKey": "528a7d53-c645-49c2-9021-09adbb18cd23",
+        "showInput": false
+      },
+      "source": [
+        "## 4. train the model "
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "code_folding": [],
+        "collapsed": false,
+        "customInput": null,
+        "customOutput": null,
+        "executionStartTime": 1659621270236,
+        "executionStopTime": 1659621270446,
+        "hidden_ranges": [],
+        "originalKey": "953280bd-3161-42ba-8dcb-0c8ef2d5cc25",
+        "requestMsgId": "9bba424b-7bfd-4e5a-9d79-ae316e20bab0",
+        "showInput": true
+      },
+      "outputs": [],
+      "source": [
+        "train_data_collated = [FrameData.collate([frame.to(device)]) for frame in dataset_map.train]"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "code_folding": [],
+        "collapsed": false,
+        "customInput": null,
+        "customOutput": null,
+        "executionStartTime": 1659621270815,
+        "executionStopTime": 1659621270948,
+        "hidden_ranges": [],
+        "originalKey": "2fcf07f0-0c28-49c7-8c76-1c9a9d810167",
+        "requestMsgId": "821deb43-6084-4ece-83c3-dee214562c47",
+        "showInput": true
+      },
+      "outputs": [],
+      "source": [
+        "gm.train()\n",
+        "optimizer = torch.optim.Adam(gm.parameters(), lr=0.1)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "code_folding": [],
+        "collapsed": false,
+        "customOutput": null,
+        "executionStartTime": 1659621271875,
+        "executionStopTime": 1659621298146,
+        "hidden_ranges": [],
+        "originalKey": "105099f7-ed0c-4e7f-a976-61a93fd0a8fe",
+        "requestMsgId": "0c87c108-83e3-4129-ad02-85e0140f1368",
+        "showInput": true
+      },
+      "outputs": [],
+      "source": [
+        "iterator = tqdm.tqdm(range(2000))\n",
+        "for n_batch in iterator:\n",
+        "    optimizer.zero_grad()\n",
+        "\n",
+        "    frame = train_data_collated[n_batch % len(dataset_map.train)]\n",
+        "    out = gm(**frame, evaluation_mode=EvaluationMode.TRAINING)\n",
+        "    out[\"objective\"].backward()\n",
+        "    if n_batch % 100 == 0:\n",
+        "        iterator.set_postfix_str(f\"loss: {float(out['objective']):.5f}\")\n",
+        "    optimizer.step()"
+      ]
+    },
+    {
+      "attachments": {},
+      "cell_type": "markdown",
+      "metadata": {
+        "collapsed": false,
+        "customInput": null,
+        "customOutput": null,
+        "executionStartTime": 1659535024768,
+        "executionStopTime": 1659535024906,
+        "originalKey": "e3cd494a-536b-48bc-8290-c048118c82eb",
+        "requestMsgId": "e3cd494a-536b-48bc-8290-c048118c82eb",
+        "showInput": false
+      },
+      "source": [
+        "## 5. Evaluate the module\n",
+        "\n",
+        "We generate complete images from all the viewpoints to see how they look."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "code_folding": [],
+        "collapsed": false,
+        "customInput": null,
+        "customOutput": null,
+        "executionStartTime": 1659621299859,
+        "executionStopTime": 1659621311133,
+        "hidden_ranges": [],
+        "originalKey": "fbe1b2ea-cc24-4b20-a2d7-0249185e34a5",
+        "requestMsgId": "771ef1f8-5eee-4932-9e81-33604bf0512a",
+        "showInput": true
+      },
+      "outputs": [],
+      "source": [
+        "def to_numpy_image(image):\n",
+        "    # Takes an image of shape (C, H, W) in [0,1], where C=3 or 1\n",
+        "    # to a numpy uint image of shape (H, W, 3)\n",
+        "    return (image * 255).to(torch.uint8).permute(1, 2, 0).detach().cpu().expand(-1, -1, 3).numpy()\n",
+        "def resize_image(image):\n",
+        "    # Takes images of shape (B, C, H, W) to (B, C, output_resolution, output_resolution)\n",
+        "    return torch.nn.functional.interpolate(image, size=(output_resolution, output_resolution))\n",
+        "\n",
+        "gm.eval()\n",
+        "images = []\n",
+        "expected = []\n",
+        "masks = []\n",
+        "masks_expected = []\n",
+        "for frame in tqdm.tqdm(train_data_collated):\n",
+        "    with torch.no_grad():\n",
+        "        out = gm(**frame, evaluation_mode=EvaluationMode.EVALUATION)\n",
+        "\n",
+        "    image_rgb = to_numpy_image(out[\"images_render\"][0])\n",
+        "    mask = to_numpy_image(out[\"masks_render\"][0])\n",
+        "    expd = to_numpy_image(resize_image(frame.image_rgb)[0])\n",
+        "    mask_expected = to_numpy_image(resize_image(frame.fg_probability)[0])\n",
+        "\n",
+        "    images.append(image_rgb)\n",
+        "    masks.append(mask)\n",
+        "    expected.append(expd)\n",
+        "    masks_expected.append(mask_expected)"
+      ]
+    },
+    {
+      "attachments": {},
+      "cell_type": "markdown",
+      "metadata": {
+        "collapsed": false,
+        "customInput": null,
+        "customOutput": null,
+        "executionStartTime": 1659614622542,
+        "executionStopTime": 1659614622757,
+        "originalKey": "24953039-9780-40fd-bd81-5d63e9f40069",
+        "requestMsgId": "7af895a3-dfe4-4c28-ac3b-4ff0fbb40c7f",
+        "showInput": false
+      },
+      "source": [
+        "We draw a grid showing predicted image and expected image, followed by predicted mask and expected mask, from each viewpoint. \n",
+        "This is a grid of four rows of images, wrapped in to several large rows, i.e..\n",
+        "<small><center>\n",
+        "```\n",
+        "┌────────┬────────┐           ┌────────┐\n",
+        "│pred    │pred    │           │pred    │\n",
+        "│image   │image   │           │image   │\n",
+        "│1       │2       │           │n       │\n",
+        "├────────┼────────┤           ├────────┤\n",
+        "│expected│expected│           │expected│\n",
+        "│image   │image   │  ...      │image   │\n",
+        "│1       │2       │           │n       │\n",
+        "├────────┼────────┤           ├────────┤\n",
+        "│pred    │pred    │           │pred    │\n",
+        "│mask    │mask    │           │mask    │\n",
+        "│1       │2       │           │n       │\n",
+        "├────────┼────────┤           ├────────┤\n",
+        "│expected│expected│           │expected│\n",
+        "│mask    │mask    │           │mask    │\n",
+        "│1       │2       │           │n       │\n",
+        "├────────┼────────┤           ├────────┤\n",
+        "│pred    │pred    │           │pred    │\n",
+        "│image   │image   │           │image   │\n",
+        "│n+1     │n+1     │           │2n      │\n",
+        "├────────┼────────┤           ├────────┤\n",
+        "│expected│expected│           │expected│\n",
+        "│image   │image   │  ...      │image   │\n",
+        "│n+1     │n+2     │           │2n      │\n",
+        "├────────┼────────┤           ├────────┤\n",
+        "│pred    │pred    │           │pred    │\n",
+        "│mask    │mask    │           │mask    │\n",
+        "│n+1     │n+2     │           │2n      │\n",
+        "├────────┼────────┤           ├────────┤\n",
+        "│expected│expected│           │expected│\n",
+        "│mask    │mask    │           │mask    │\n",
+        "│n+1     │n+2     │           │2n      │\n",
+        "└────────┴────────┘           └────────┘\n",
+        "           ...\n",
+        "```\n",
+        "</center></small>"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "code_folding": [],
+        "collapsed": false,
+        "customInput": null,
+        "customOutput": null,
+        "executionStartTime": 1659621313894,
+        "executionStopTime": 1659621314042,
+        "hidden_ranges": [],
+        "originalKey": "c488a34a-e46d-4649-93fb-4b1bb5a0e439",
+        "requestMsgId": "4221e632-fca1-4fe5-b2e3-f92c37aa40e4",
+        "showInput": true
+      },
+      "outputs": [],
+      "source": [
+        "images_to_display = [images.copy(), expected.copy(), masks.copy(), masks_expected.copy()]\n",
+        "n_rows = 4\n",
+        "n_images = len(images)\n",
+        "blank_image = images[0] * 0\n",
+        "n_per_row = 1+(n_images-1)//n_rows\n",
+        "for _ in range(n_per_row*n_rows - n_images):\n",
+        "    for group in images_to_display:\n",
+        "        group.append(blank_image)\n",
+        "\n",
+        "images_to_display_listed = [[[i] for i in j] for j in images_to_display]\n",
+        "split = []\n",
+        "for row in range(n_rows):\n",
+        "    for group in images_to_display_listed:\n",
+        "        split.append(group[row*n_per_row:(row+1)*n_per_row])  \n",
+        "\n",
+        "Image.fromarray(np.block(split))\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "code_folding": [],
+        "collapsed": false,
+        "customInput": null,
+        "customOutput": null,
+        "executionStartTime": 1659621323795,
+        "executionStopTime": 1659621323820,
+        "hidden_ranges": [],
+        "originalKey": "49eab9e1-4fe2-4fbe-b4f3-7b6953340170",
+        "requestMsgId": "85b402ad-f903-431f-a13e-c2d697e869bb",
+        "showInput": true
+      },
+      "outputs": [],
+      "source": [
+        "# Print the maximum channel intensity in the first image.\n",
+        "print(images[1].max()/255)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "code_folding": [],
+        "collapsed": false,
+        "customInput": null,
+        "customOutput": null,
+        "executionStartTime": 1659621408642,
+        "executionStopTime": 1659621409559,
+        "hidden_ranges": [],
+        "originalKey": "137d2c43-d39d-4266-ac5e-2b714da5e0ee",
+        "requestMsgId": "8e27ec57-c2d6-4ae0-be69-b63b6af929ff",
+        "showInput": true
+      },
+      "outputs": [],
+      "source": [
+        "plt.ioff()\n",
+        "fig, ax = plt.subplots(figsize=(3,3))\n",
+        "\n",
+        "ax.grid(None)\n",
+        "ims = [[ax.imshow(im, animated=True)] for im in images]\n",
+        "ani = animation.ArtistAnimation(fig, ims, interval=80, blit=True)\n",
+        "ani_html = ani.to_jshtml()\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false,
+        "customInput": null,
+        "customOutput": null,
+        "executionStartTime": 1659621409620,
+        "executionStopTime": 1659621409725,
+        "originalKey": "783e70d6-7cf1-4d76-a126-ba11ffc2f5be",
+        "requestMsgId": "b6843506-c5fa-4508-80fc-8ecae51a934a",
+        "showInput": true
+      },
+      "outputs": [],
+      "source": [
+        "HTML(ani_html)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false,
+        "customInput": null,
+        "customOutput": null,
+        "executionStartTime": 1659614670081,
+        "executionStopTime": 1659614670168,
+        "originalKey": "0286c350-2362-4f47-8181-2fc2ba51cfcf",
+        "requestMsgId": "976f4db9-d4c7-466c-bcfd-218234400226",
+        "showInput": true
+      },
+      "outputs": [],
+      "source": [
+        "# If you want to see the output of the model with the volume forced to opaque white, run this and re-evaluate\n",
+        "# with torch.no_grad():\n",
+        "#      gm._implicit_functions[0]._fn.density.fill_(9.0)\n",
+        "#      gm._implicit_functions[0]._fn.color.fill_(9.0)\n"
+      ]
+    }
+  ],
+  "metadata": {
+    "bento_stylesheets": {
+      "bento/extensions/flow/main.css": true,
+      "bento/extensions/kernel_selector/main.css": true,
+      "bento/extensions/kernel_ui/main.css": true,
+      "bento/extensions/new_kernel/main.css": true,
+      "bento/extensions/system_usage/main.css": true,
+      "bento/extensions/theme/main.css": true
+    },
+    "captumWidgetMessage": {},
+    "dataExplorerConfig": {},
+    "kernelspec": {
+      "display_name": "pytorch3d",
+      "language": "python",
+      "metadata": {
+        "cinder_runtime": false,
+        "fbpkg_supported": true,
+        "is_prebuilt": true,
+        "kernel_name": "bento_kernel_pytorch3d",
+        "nightly_builds": true
+      },
+      "name": "bento_kernel_pytorch3d"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3"
+    },
+    "last_base_url": "https://9177.od.fbinfra.net:443/",
+    "last_kernel_id": "bb33cd83-7924-489a-8bd8-2d9d62eb0126",
+    "last_msg_id": "99f7088e-d22b355b859660479ef0574e_5743",
+    "last_server_session_id": "2944b203-9ea8-4c0e-9634-645dfea5f26b",
+    "outputWidgetContext": {}
+  },
+  "nbformat": 4,
+  "nbformat_minor": 2
+}
diff --git a/pytorch3d/docs/tutorials/render_colored_points.ipynb b/pytorch3d/docs/tutorials/render_colored_points.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..0982a9b554a5d7ec7037b56f87db468298ef5ebc
--- /dev/null
+++ b/pytorch3d/docs/tutorials/render_colored_points.ipynb
@@ -0,0 +1,479 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Copyright (c) Meta Platforms, Inc. and affiliates. All rights reserved."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Render a colored point cloud\n",
+    "\n",
+    "This tutorial shows how to:\n",
+    "- set up a renderer \n",
+    "- render the point cloud \n",
+    "- vary the rendering settings such as compositing and camera position"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Import modules"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Ensure `torch` and `torchvision` are installed. If `pytorch3d` is not installed, install it using the following cell:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import sys\n",
+    "import torch\n",
+    "need_pytorch3d=False\n",
+    "try:\n",
+    "    import pytorch3d\n",
+    "except ModuleNotFoundError:\n",
+    "    need_pytorch3d=True\n",
+    "if need_pytorch3d:\n",
+    "    if torch.__version__.startswith(\"2.1.\") and sys.platform.startswith(\"linux\"):\n",
+    "        # We try to install PyTorch3D via a released wheel.\n",
+    "        pyt_version_str=torch.__version__.split(\"+\")[0].replace(\".\", \"\")\n",
+    "        version_str=\"\".join([\n",
+    "            f\"py3{sys.version_info.minor}_cu\",\n",
+    "            torch.version.cuda.replace(\".\",\"\"),\n",
+    "            f\"_pyt{pyt_version_str}\"\n",
+    "        ])\n",
+    "        !pip install fvcore iopath\n",
+    "        !pip install --no-index --no-cache-dir pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/{version_str}/download.html\n",
+    "    else:\n",
+    "        # We try to install PyTorch3D from source.\n",
+    "        !pip install 'git+https://github.com/facebookresearch/pytorch3d.git@stable'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import torch\n",
+    "import torch.nn.functional as F\n",
+    "import matplotlib.pyplot as plt\n",
+    "\n",
+    "# Util function for loading point clouds|\n",
+    "import numpy as np\n",
+    "\n",
+    "# Data structures and functions for rendering\n",
+    "from pytorch3d.structures import Pointclouds\n",
+    "from pytorch3d.vis.plotly_vis import AxisArgs, plot_batch_individually, plot_scene\n",
+    "from pytorch3d.renderer import (\n",
+    "    look_at_view_transform,\n",
+    "    FoVOrthographicCameras, \n",
+    "    PointsRasterizationSettings,\n",
+    "    PointsRenderer,\n",
+    "    PulsarPointsRenderer,\n",
+    "    PointsRasterizer,\n",
+    "    AlphaCompositor,\n",
+    "    NormWeightedCompositor\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Load a point cloud and corresponding colors\n",
+    "\n",
+    "Load and create a **Point Cloud** object. \n",
+    "\n",
+    "**Pointclouds** is a unique datastructure provided in PyTorch3D for working with batches of point clouds of different sizes. "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "If running this notebook using **Google Colab**, run the following cell to fetch the pointcloud data and save it at the path `data/PittsburghBridge`:\n",
+    "If running locally, the data is already available at the correct path. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!mkdir -p data/PittsburghBridge\n",
+    "!wget -P data/PittsburghBridge https://dl.fbaipublicfiles.com/pytorch3d/data/PittsburghBridge/pointcloud.npz"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Setup\n",
+    "if torch.cuda.is_available():\n",
+    "    device = torch.device(\"cuda:0\")\n",
+    "    torch.cuda.set_device(device)\n",
+    "else:\n",
+    "    device = torch.device(\"cpu\")\n",
+    "\n",
+    "# Set paths\n",
+    "DATA_DIR = \"./data\"\n",
+    "obj_filename = os.path.join(DATA_DIR, \"PittsburghBridge/pointcloud.npz\")\n",
+    "\n",
+    "# Load point cloud\n",
+    "pointcloud = np.load(obj_filename)\n",
+    "verts = torch.Tensor(pointcloud['verts']).to(device)\n",
+    "        \n",
+    "rgb = torch.Tensor(pointcloud['rgb']).to(device)\n",
+    "\n",
+    "point_cloud = Pointclouds(points=[verts], features=[rgb])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Create a renderer\n",
+    "\n",
+    "A renderer in PyTorch3D is composed of a **rasterizer** and a **shader** which each have a number of subcomponents such as a **camera** (orthographic/perspective). Here we initialize some of these components and use default values for the rest.\n",
+    "\n",
+    "In this example we will first create a **renderer** which uses an **orthographic camera**, and applies **alpha compositing**. Then we learn how to vary different components using the modular API.  \n",
+    "\n",
+    "[1] <a href=\"https://arxiv.org/abs/1912.08804\">SynSin: End to end View Synthesis from a Single Image.</a> Olivia Wiles, Georgia Gkioxari, Richard Szeliski, Justin Johnson. CVPR 2020."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Initialize a camera.\n",
+    "R, T = look_at_view_transform(20, 10, 0)\n",
+    "cameras = FoVOrthographicCameras(device=device, R=R, T=T, znear=0.01)\n",
+    "\n",
+    "# Define the settings for rasterization and shading. Here we set the output image to be of size\n",
+    "# 512x512. As we are rendering images for visualization purposes only we will set faces_per_pixel=1\n",
+    "# and blur_radius=0.0. Refer to raster_points.py for explanations of these parameters. \n",
+    "raster_settings = PointsRasterizationSettings(\n",
+    "    image_size=512, \n",
+    "    radius = 0.003,\n",
+    "    points_per_pixel = 10\n",
+    ")\n",
+    "\n",
+    "\n",
+    "# Create a points renderer by compositing points using an alpha compositor (nearer points\n",
+    "# are weighted more heavily). See [1] for an explanation.\n",
+    "rasterizer = PointsRasterizer(cameras=cameras, raster_settings=raster_settings)\n",
+    "renderer = PointsRenderer(\n",
+    "    rasterizer=rasterizer,\n",
+    "    compositor=AlphaCompositor()\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "images = renderer(point_cloud)\n",
+    "plt.figure(figsize=(10, 10))\n",
+    "plt.imshow(images[0, ..., :3].cpu().numpy())\n",
+    "plt.axis(\"off\");"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We will now modify the **renderer** to use **alpha compositing** with a set background color. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "renderer = PointsRenderer(\n",
+    "    rasterizer=rasterizer,\n",
+    "    # Pass in background_color to the alpha compositor, setting the background color \n",
+    "    # to the 3 item tuple, representing rgb on a scale of 0 -> 1, in this case blue\n",
+    "    compositor=AlphaCompositor(background_color=(0, 0, 1))\n",
+    ")\n",
+    "images = renderer(point_cloud)\n",
+    "\n",
+    "plt.figure(figsize=(10, 10))\n",
+    "plt.imshow(images[0, ..., :3].cpu().numpy())\n",
+    "plt.axis(\"off\");"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "In this example we will first create a **renderer** which uses an **orthographic camera**, and applies **weighted compositing**. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Initialize a camera.\n",
+    "R, T = look_at_view_transform(20, 10, 0)\n",
+    "cameras = FoVOrthographicCameras(device=device, R=R, T=T, znear=0.01)\n",
+    "\n",
+    "# Define the settings for rasterization and shading. Here we set the output image to be of size\n",
+    "# 512x512. As we are rendering images for visualization purposes only we will set faces_per_pixel=1\n",
+    "# and blur_radius=0.0. Refer to rasterize_points.py for explanations of these parameters. \n",
+    "raster_settings = PointsRasterizationSettings(\n",
+    "    image_size=512, \n",
+    "    radius = 0.003,\n",
+    "    points_per_pixel = 10\n",
+    ")\n",
+    "\n",
+    "\n",
+    "# Create a points renderer by compositing points using an weighted compositor (3D points are\n",
+    "# weighted according to their distance to a pixel and accumulated using a weighted sum)\n",
+    "renderer = PointsRenderer(\n",
+    "    rasterizer=PointsRasterizer(cameras=cameras, raster_settings=raster_settings),\n",
+    "    compositor=NormWeightedCompositor()\n",
+    ")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "images = renderer(point_cloud)\n",
+    "plt.figure(figsize=(10, 10))\n",
+    "plt.imshow(images[0, ..., :3].cpu().numpy())\n",
+    "plt.axis(\"off\");"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We will now modify the **renderer** to use **weighted compositing** with a set background color. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "renderer = PointsRenderer(\n",
+    "    rasterizer=PointsRasterizer(cameras=cameras, raster_settings=raster_settings),\n",
+    "    # Pass in background_color to the norm weighted compositor, setting the background color \n",
+    "    # to the 3 item tuple, representing rgb on a scale of 0 -> 1, in this case red\n",
+    "    compositor=NormWeightedCompositor(background_color=(1,0,0))\n",
+    ")\n",
+    "images = renderer(point_cloud)\n",
+    "plt.figure(figsize=(10, 10))\n",
+    "plt.imshow(images[0, ..., :3].cpu().numpy())\n",
+    "plt.axis(\"off\");"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Using the pulsar backend\n",
+    "\n",
+    "Switching to the pulsar backend is easy! The pulsar backend has a compositor built-in, so the `compositor` argument is not required when creating it (a warning will be displayed if you provide it nevertheless). It pre-allocates memory on the rendering device, that's why it needs the `n_channels` at construction time.\n",
+    "\n",
+    "All parameters for the renderer forward function are batch-wise except the background color (in this example, `gamma`) and you have to provide as many values as you have examples in your batch. The background color is optional and by default set to all zeros. You can find a detailed explanation of how gamma influences the rendering function here in the paper [Fast Differentiable Raycasting for Neural Rendering using\n",
+    "Sphere-based Representations](https://arxiv.org/pdf/2004.07484.pdf).\n",
+    "\n",
+    "You can also use the `native` backend for the pulsar backend which already provides access to point opacity. The native backend can be imported from `pytorch3d.renderer.points.pulsar`; you can find examples for this in the folder `docs/examples`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "renderer = PulsarPointsRenderer(\n",
+    "    rasterizer=PointsRasterizer(cameras=cameras, raster_settings=raster_settings),\n",
+    "    n_channels=4\n",
+    ").to(device)\n",
+    "\n",
+    "images = renderer(point_cloud, gamma=(1e-4,),\n",
+    "                  bg_col=torch.tensor([0.0, 1.0, 0.0, 1.0], dtype=torch.float32, device=device))\n",
+    "plt.figure(figsize=(10, 10))\n",
+    "plt.imshow(images[0, ..., :3].cpu().numpy())\n",
+    "plt.axis(\"off\");"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### View pointclouds in Plotly figures\n",
+    "\n",
+    "Here we use the PyTorch3D function `plot_scene` to render the pointcloud in a Plotly figure. `plot_scene` returns a plotly figure with trace and subplots defined by the input."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plot_scene({\n",
+    "    \"Pointcloud\": {\n",
+    "        \"person\": point_cloud\n",
+    "    }\n",
+    "})"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We will now render a batch of pointclouds. The first pointcloud is the same as above, and the second is all-black and offset by 2 in all dimensions so we can see them on the same plot. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "point_cloud_batch = Pointclouds(points=[verts, verts + 2], features=[rgb, torch.zeros_like(rgb)])\n",
+    "# render both in the same plot in different traces\n",
+    "fig = plot_scene({\n",
+    "    \"Pointcloud\": {\n",
+    "        \"person\": point_cloud_batch[0],\n",
+    "        \"person2\": point_cloud_batch[1]\n",
+    "    }\n",
+    "})\n",
+    "fig.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# render both in the same plot in one trace\n",
+    "fig = plot_scene({\n",
+    "    \"Pointcloud\": {\n",
+    "        \"2 people\": point_cloud_batch\n",
+    "    }\n",
+    "})\n",
+    "fig.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "For batches, we can also use `plot_batch_individually` to avoid constructing the scene dictionary ourselves."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# render both in 1 row in different subplots\n",
+    "fig2 = plot_batch_individually(point_cloud_batch, ncols=2)\n",
+    "fig2.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# modify the plotly figure height and width\n",
+    "fig2.update_layout(height=500, width=500)\n",
+    "fig2.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We can also modify the axis arguments and axis backgrounds for either function, and title our plots in `plot_batch_individually`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fig3 = plot_batch_individually(\n",
+    "    point_cloud_batch, \n",
+    "    xaxis={\"backgroundcolor\":\"rgb(200, 200, 230)\"},\n",
+    "    yaxis={\"backgroundcolor\":\"rgb(230, 200, 200)\"},\n",
+    "    zaxis={\"backgroundcolor\":\"rgb(200, 230, 200)\"}, \n",
+    "    subplot_titles=[\"Pointcloud1\", \"Pointcloud2\"], # this should have a title for each subplot, titles can be \"\"\n",
+    "    axis_args=AxisArgs(showgrid=True))\n",
+    "fig3.show()"
+   ]
+  }
+ ],
+ "metadata": {
+  "bento_stylesheets": {
+   "bento/extensions/flow/main.css": true,
+   "bento/extensions/kernel_selector/main.css": true,
+   "bento/extensions/kernel_ui/main.css": true,
+   "bento/extensions/new_kernel/main.css": true,
+   "bento/extensions/system_usage/main.css": true,
+   "bento/extensions/theme/main.css": true
+  },
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/pytorch3d/docs/tutorials/render_densepose.ipynb b/pytorch3d/docs/tutorials/render_densepose.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..2307165bcbfae270bb3da89d29efd822ccf667eb
--- /dev/null
+++ b/pytorch3d/docs/tutorials/render_densepose.ipynb
@@ -0,0 +1,435 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Copyright (c) Meta Platforms, Inc. and affiliates. All rights reserved."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Render DensePose \n",
+    "\n",
+    "DensePose refers to dense human pose representation: https://github.com/facebookresearch/DensePose. \n",
+    "In this tutorial, we provide an example of using DensePose data in PyTorch3D.\n",
+    "\n",
+    "This tutorial shows how to:\n",
+    "- load a mesh and textures from densepose `.mat` and `.pkl` files\n",
+    "- set up a renderer \n",
+    "- render the mesh \n",
+    "- vary the rendering settings such as lighting and camera position"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "Bnj3THhzfBLf"
+   },
+   "source": [
+    "## Import modules"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Ensure `torch` and `torchvision` are installed. If `pytorch3d` is not installed, install it using the following cell:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import sys\n",
+    "import torch\n",
+    "need_pytorch3d=False\n",
+    "try:\n",
+    "    import pytorch3d\n",
+    "except ModuleNotFoundError:\n",
+    "    need_pytorch3d=True\n",
+    "if need_pytorch3d:\n",
+    "    if torch.__version__.startswith(\"2.1.\") and sys.platform.startswith(\"linux\"):\n",
+    "        # We try to install PyTorch3D via a released wheel.\n",
+    "        pyt_version_str=torch.__version__.split(\"+\")[0].replace(\".\", \"\")\n",
+    "        version_str=\"\".join([\n",
+    "            f\"py3{sys.version_info.minor}_cu\",\n",
+    "            torch.version.cuda.replace(\".\",\"\"),\n",
+    "            f\"_pyt{pyt_version_str}\"\n",
+    "        ])\n",
+    "        !pip install fvcore iopath\n",
+    "        !pip install --no-index --no-cache-dir pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/{version_str}/download.html\n",
+    "    else:\n",
+    "        # We try to install PyTorch3D from source.\n",
+    "        !pip install 'git+https://github.com/facebookresearch/pytorch3d.git@stable'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# We also install chumpy as it is needed to load the SMPL model pickle file.\n",
+    "!pip install chumpy"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import torch\n",
+    "import matplotlib.pyplot as plt\n",
+    "import numpy as np\n",
+    "\n",
+    "# libraries for reading data from files\n",
+    "from scipy.io import loadmat\n",
+    "from PIL import Image\n",
+    "import pickle\n",
+    "\n",
+    "# Data structures and functions for rendering\n",
+    "from pytorch3d.structures import Meshes\n",
+    "from pytorch3d.renderer import (\n",
+    "    look_at_view_transform,\n",
+    "    FoVPerspectiveCameras, \n",
+    "    PointLights, \n",
+    "    DirectionalLights, \n",
+    "    Materials, \n",
+    "    RasterizationSettings, \n",
+    "    MeshRenderer, \n",
+    "    MeshRasterizer,  \n",
+    "    SoftPhongShader,\n",
+    "    TexturesUV\n",
+    ")\n",
+    "\n",
+    "# add path for demo utils functions \n",
+    "import sys\n",
+    "import os\n",
+    "sys.path.append(os.path.abspath(''))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Load the SMPL model\n",
+    "\n",
+    "#### Download the SMPL model\n",
+    "- Go to https://smpl.is.tue.mpg.de/download.php and sign up.\n",
+    "- Download SMPL for Python Users and unzip.\n",
+    "- Copy the file male template file **'models/basicModel_m_lbs_10_207_0_v1.0.0.pkl'** to the data/DensePose/ folder.\n",
+    "   - rename the file to **'smpl_model.pkl'** or rename the string where it's commented below\n",
+    "   \n",
+    "If running this notebook using Google Colab, run the following cell to fetch the texture and UV values and save it at the correct path."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Texture image\n",
+    "!wget -P data/DensePose https://raw.githubusercontent.com/facebookresearch/DensePose/master/DensePoseData/demo_data/texture_from_SURREAL.png\n",
+    "\n",
+    "# UV_processed.mat\n",
+    "!wget https://dl.fbaipublicfiles.com/densepose/densepose_uv_data.tar.gz\n",
+    "!tar xvf densepose_uv_data.tar.gz -C data/DensePose\n",
+    "!rm densepose_uv_data.tar.gz"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Load our texture UV data and our SMPL data, with some processing to correct data values and format."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Setup\n",
+    "if torch.cuda.is_available():\n",
+    "    device = torch.device(\"cuda:0\")\n",
+    "    torch.cuda.set_device(device)\n",
+    "else:\n",
+    "    device = torch.device(\"cpu\")\n",
+    "    \n",
+    "# Set paths\n",
+    "DATA_DIR = \"./data\"\n",
+    "data_filename = os.path.join(DATA_DIR, \"DensePose/UV_Processed.mat\")\n",
+    "tex_filename = os.path.join(DATA_DIR,\"DensePose/texture_from_SURREAL.png\")\n",
+    "# rename your .pkl file or change this string\n",
+    "verts_filename = os.path.join(DATA_DIR, \"DensePose/smpl_model.pkl\")\n",
+    "\n",
+    "\n",
+    "# Load SMPL and texture data\n",
+    "with open(verts_filename, 'rb') as f:\n",
+    "    data = pickle.load(f, encoding='latin1') \n",
+    "    v_template = torch.Tensor(data['v_template']).to(device) # (6890, 3)\n",
+    "ALP_UV = loadmat(data_filename)\n",
+    "with Image.open(tex_filename) as image:\n",
+    "    np_image = np.asarray(image.convert(\"RGB\")).astype(np.float32)\n",
+    "tex = torch.from_numpy(np_image / 255.)[None].to(device)\n",
+    "\n",
+    "verts = torch.from_numpy((ALP_UV[\"All_vertices\"]).astype(int)).squeeze().to(device) # (7829,)\n",
+    "U = torch.Tensor(ALP_UV['All_U_norm']).to(device) # (7829, 1)\n",
+    "V = torch.Tensor(ALP_UV['All_V_norm']).to(device) # (7829, 1)\n",
+    "faces = torch.from_numpy((ALP_UV['All_Faces'] - 1).astype(int)).to(device)  # (13774, 3)\n",
+    "face_indices = torch.Tensor(ALP_UV['All_FaceIndices']).squeeze()  # (13774,)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Display the texture image\n",
+    "plt.figure(figsize=(10, 10))\n",
+    "plt.imshow(tex.squeeze(0).cpu())\n",
+    "plt.axis(\"off\");"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "In DensePose, the body mesh is split into 24 parts. In the texture image, we can see the 24 parts are separated out into individual (200, 200) images per body part.  The convention in DensePose is that each face in the mesh is associated with a body part (given by the face_indices tensor above). The vertex UV values (in the range [0, 1]) for each face are specific to the (200, 200) size texture map for the part of the body that the mesh face corresponds to. We cannot use them directly with the entire texture map. We have to offset the vertex UV values depending on what body part the associated face corresponds to."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Map each face to a (u, v) offset\n",
+    "offset_per_part = {}\n",
+    "already_offset = set()\n",
+    "cols, rows = 4, 6\n",
+    "for i, u in enumerate(np.linspace(0, 1, cols, endpoint=False)):\n",
+    "    for j, v in enumerate(np.linspace(0, 1, rows, endpoint=False)):\n",
+    "        part = rows * i + j + 1  # parts are 1-indexed in face_indices\n",
+    "        offset_per_part[part] = (u, v)\n",
+    "\n",
+    "U_norm = U.clone()\n",
+    "V_norm = V.clone()\n",
+    "\n",
+    "# iterate over faces and offset the corresponding vertex u and v values\n",
+    "for i in range(len(faces)):\n",
+    "    face_vert_idxs = faces[i]\n",
+    "    part = face_indices[i]\n",
+    "    offset_u, offset_v = offset_per_part[int(part.item())]\n",
+    "    \n",
+    "    for vert_idx in face_vert_idxs:   \n",
+    "        # vertices are reused, but we don't want to offset multiple times\n",
+    "        if vert_idx.item() not in already_offset:\n",
+    "            # offset u value\n",
+    "            U_norm[vert_idx] = U[vert_idx] / cols + offset_u\n",
+    "            # offset v value\n",
+    "            # this also flips each part locally, as each part is upside down\n",
+    "            V_norm[vert_idx] = (1 - V[vert_idx]) / rows + offset_v\n",
+    "            # add vertex to our set tracking offsetted vertices\n",
+    "            already_offset.add(vert_idx.item())\n",
+    "\n",
+    "# invert V values\n",
+    "V_norm = 1 - V_norm"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# create our verts_uv values\n",
+    "verts_uv = torch.cat([U_norm[None],V_norm[None]], dim=2) # (1, 7829, 2)\n",
+    "\n",
+    "# There are 6890 xyz vertex coordinates but 7829 vertex uv coordinates. \n",
+    "# This is because the same vertex can be shared by multiple faces where each face may correspond to a different body part.  \n",
+    "# Therefore when initializing the Meshes class,\n",
+    "# we need to map each of the vertices referenced by the DensePose faces (in verts, which is the \"All_vertices\" field)\n",
+    "# to the correct xyz coordinate in the SMPL template mesh.\n",
+    "v_template_extended = v_template[verts-1][None] # (1, 7829, 3)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Create our textured mesh \n",
+    "\n",
+    "**Meshes** is a unique datastructure provided in PyTorch3D for working with batches of meshes of different sizes.\n",
+    "\n",
+    "**TexturesUV** is an auxiliary datastructure for storing vertex uv and texture maps for meshes."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "texture = TexturesUV(maps=tex, faces_uvs=faces[None], verts_uvs=verts_uv)\n",
+    "mesh = Meshes(v_template_extended, faces[None], texture)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Create a renderer"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Initialize a camera.\n",
+    "# World coordinates +Y up, +X left and +Z in.\n",
+    "R, T = look_at_view_transform(2.7, 0, 0) \n",
+    "cameras = FoVPerspectiveCameras(device=device, R=R, T=T)\n",
+    "\n",
+    "# Define the settings for rasterization and shading. Here we set the output image to be of size\n",
+    "# 512x512. As we are rendering images for visualization purposes only we will set faces_per_pixel=1\n",
+    "# and blur_radius=0.0. \n",
+    "raster_settings = RasterizationSettings(\n",
+    "    image_size=512, \n",
+    "    blur_radius=0.0, \n",
+    "    faces_per_pixel=1, \n",
+    ")\n",
+    "\n",
+    "# Place a point light in front of the person. \n",
+    "lights = PointLights(device=device, location=[[0.0, 0.0, 2.0]])\n",
+    "\n",
+    "# Create a Phong renderer by composing a rasterizer and a shader. The textured Phong shader will \n",
+    "# interpolate the texture uv coordinates for each vertex, sample from a texture image and \n",
+    "# apply the Phong lighting model\n",
+    "renderer = MeshRenderer(\n",
+    "    rasterizer=MeshRasterizer(\n",
+    "        cameras=cameras, \n",
+    "        raster_settings=raster_settings\n",
+    "    ),\n",
+    "    shader=SoftPhongShader(\n",
+    "        device=device, \n",
+    "        cameras=cameras,\n",
+    "        lights=lights\n",
+    "    )\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Render the textured mesh we created from the SMPL model and texture map."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "images = renderer(mesh)\n",
+    "plt.figure(figsize=(10, 10))\n",
+    "plt.imshow(images[0, ..., :3].cpu().numpy())\n",
+    "plt.axis(\"off\");"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Different view and lighting of the body\n",
+    "\n",
+    "We can also change many other settings in the rendering pipeline. Here we:\n",
+    "\n",
+    "- change the **viewing angle** of the camera\n",
+    "- change the **position** of the point light"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Rotate the person by increasing the elevation and azimuth angles to view the back of the person from above. \n",
+    "R, T = look_at_view_transform(2.7, 10, 180)\n",
+    "cameras = FoVPerspectiveCameras(device=device, R=R, T=T)\n",
+    "\n",
+    "# Move the light location so the light is shining on the person's back.  \n",
+    "lights.location = torch.tensor([[2.0, 2.0, -2.0]], device=device)\n",
+    "\n",
+    "# Re render the mesh, passing in keyword arguments for the modified components.\n",
+    "images = renderer(mesh, lights=lights, cameras=cameras)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plt.figure(figsize=(10, 10))\n",
+    "plt.imshow(images[0, ..., :3].cpu().numpy())\n",
+    "plt.axis(\"off\");"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Conclusion\n",
+    "In this tutorial, we've learned how to construct a **textured mesh** from **DensePose model and uv data**, as well as initialize a **Renderer** and change the viewing angle and lighting of our rendered mesh."
+   ]
+  }
+ ],
+ "metadata": {
+  "bento_stylesheets": {
+   "bento/extensions/flow/main.css": true,
+   "bento/extensions/kernel_selector/main.css": true,
+   "bento/extensions/kernel_ui/main.css": true,
+   "bento/extensions/new_kernel/main.css": true,
+   "bento/extensions/system_usage/main.css": true,
+   "bento/extensions/theme/main.css": true
+  },
+  "kernelspec": {
+   "display_name": "pytorch3d_etc (local)",
+   "language": "python",
+   "name": "pytorch3d_etc_local"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.5+"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/pytorch3d/docs/tutorials/render_textured_meshes.ipynb b/pytorch3d/docs/tutorials/render_textured_meshes.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..8c5adc48352557db353889f94fb860c87ff06c38
--- /dev/null
+++ b/pytorch3d/docs/tutorials/render_textured_meshes.ipynb
@@ -0,0 +1,820 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "_Ip8kp4TfBLZ"
+   },
+   "outputs": [],
+   "source": [
+    "# Copyright (c) Meta Platforms, Inc. and affiliates. All rights reserved."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "kuXHJv44fBLe"
+   },
+   "source": [
+    "# Render a textured mesh\n",
+    "\n",
+    "This tutorial shows how to:\n",
+    "- load a mesh and textures from an `.obj` file. \n",
+    "- set up a renderer \n",
+    "- render the mesh \n",
+    "- vary the rendering settings such as lighting and camera position\n",
+    "- use the batching features of the pytorch3d API to render the mesh from different viewpoints"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "Bnj3THhzfBLf"
+   },
+   "source": [
+    "## 0. Install and Import modules"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "okLalbR_g7NS"
+   },
+   "source": [
+    "Ensure `torch` and `torchvision` are installed. If `pytorch3d` is not installed, install it using the following cell:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 717
+    },
+    "colab_type": "code",
+    "id": "musUWTglgxSB",
+    "outputId": "16d1a1b2-3f7f-43ed-ca28-a4d236cc0572"
+   },
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import sys\n",
+    "import torch\n",
+    "need_pytorch3d=False\n",
+    "try:\n",
+    "    import pytorch3d\n",
+    "except ModuleNotFoundError:\n",
+    "    need_pytorch3d=True\n",
+    "if need_pytorch3d:\n",
+    "    if torch.__version__.startswith(\"2.1.\") and sys.platform.startswith(\"linux\"):\n",
+    "        # We try to install PyTorch3D via a released wheel.\n",
+    "        pyt_version_str=torch.__version__.split(\"+\")[0].replace(\".\", \"\")\n",
+    "        version_str=\"\".join([\n",
+    "            f\"py3{sys.version_info.minor}_cu\",\n",
+    "            torch.version.cuda.replace(\".\",\"\"),\n",
+    "            f\"_pyt{pyt_version_str}\"\n",
+    "        ])\n",
+    "        !pip install fvcore iopath\n",
+    "        !pip install --no-index --no-cache-dir pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/{version_str}/download.html\n",
+    "    else:\n",
+    "        # We try to install PyTorch3D from source.\n",
+    "        !pip install 'git+https://github.com/facebookresearch/pytorch3d.git@stable'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "nX99zdoffBLg"
+   },
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import torch\n",
+    "import matplotlib.pyplot as plt\n",
+    "\n",
+    "# Util function for loading meshes\n",
+    "from pytorch3d.io import load_objs_as_meshes, load_obj\n",
+    "\n",
+    "# Data structures and functions for rendering\n",
+    "from pytorch3d.structures import Meshes\n",
+    "from pytorch3d.vis.plotly_vis import AxisArgs, plot_batch_individually, plot_scene\n",
+    "from pytorch3d.vis.texture_vis import texturesuv_image_matplotlib\n",
+    "from pytorch3d.renderer import (\n",
+    "    look_at_view_transform,\n",
+    "    FoVPerspectiveCameras, \n",
+    "    PointLights, \n",
+    "    DirectionalLights, \n",
+    "    Materials, \n",
+    "    RasterizationSettings, \n",
+    "    MeshRenderer, \n",
+    "    MeshRasterizer,  \n",
+    "    SoftPhongShader,\n",
+    "    TexturesUV,\n",
+    "    TexturesVertex\n",
+    ")\n",
+    "\n",
+    "# add path for demo utils functions \n",
+    "import sys\n",
+    "import os\n",
+    "sys.path.append(os.path.abspath(''))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "Lxmehq6Zhrzv"
+   },
+   "source": [
+    "If using **Google Colab**, fetch the utils file for plotting image grids:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 204
+    },
+    "colab_type": "code",
+    "id": "HZozr3Pmho-5",
+    "outputId": "be5eb60d-5f65-4db1-cca0-44ee68c8f5fd"
+   },
+   "outputs": [],
+   "source": [
+    "!wget https://raw.githubusercontent.com/facebookresearch/pytorch3d/main/docs/tutorials/utils/plot_image_grid.py\n",
+    "from plot_image_grid import image_grid"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "g4B62MzYiJUM"
+   },
+   "source": [
+    "OR if running **locally** uncomment and run the following cell:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "paJ4Im8ahl7O"
+   },
+   "outputs": [],
+   "source": [
+    "# from utils import image_grid"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "5jGq772XfBLk"
+   },
+   "source": [
+    "### 1. Load a mesh and texture file\n",
+    "\n",
+    "Load an `.obj` file and its associated `.mtl` file and create a **Textures** and **Meshes** object. \n",
+    "\n",
+    "**Meshes** is a unique datastructure provided in PyTorch3D for working with batches of meshes of different sizes. \n",
+    "\n",
+    "**TexturesUV** is an auxiliary datastructure for storing vertex uv and texture maps for meshes. \n",
+    "\n",
+    "**Meshes** has several class methods which are used throughout the rendering pipeline."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "a8eU4zo5jd_H"
+   },
+   "source": [
+    "If running this notebook using **Google Colab**, run the following cell to fetch the mesh obj and texture files and save it at the path `data/cow_mesh`:\n",
+    "If running locally, the data is already available at the correct path. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 578
+    },
+    "colab_type": "code",
+    "id": "tTm0cVuOjb1W",
+    "outputId": "6cd7e2ec-65e1-4dcc-99e8-c347bc504f0a"
+   },
+   "outputs": [],
+   "source": [
+    "!mkdir -p data/cow_mesh\n",
+    "!wget -P data/cow_mesh https://dl.fbaipublicfiles.com/pytorch3d/data/cow_mesh/cow.obj\n",
+    "!wget -P data/cow_mesh https://dl.fbaipublicfiles.com/pytorch3d/data/cow_mesh/cow.mtl\n",
+    "!wget -P data/cow_mesh https://dl.fbaipublicfiles.com/pytorch3d/data/cow_mesh/cow_texture.png"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "gi5Kd0GafBLl"
+   },
+   "outputs": [],
+   "source": [
+    "# Setup\n",
+    "if torch.cuda.is_available():\n",
+    "    device = torch.device(\"cuda:0\")\n",
+    "    torch.cuda.set_device(device)\n",
+    "else:\n",
+    "    device = torch.device(\"cpu\")\n",
+    "\n",
+    "# Set paths\n",
+    "DATA_DIR = \"./data\"\n",
+    "obj_filename = os.path.join(DATA_DIR, \"cow_mesh/cow.obj\")\n",
+    "\n",
+    "# Load obj file\n",
+    "mesh = load_objs_as_meshes([obj_filename], device=device)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "5APAQs6-fBLp"
+   },
+   "source": [
+    "#### Let's visualize the texture map"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 428
+    },
+    "colab_type": "code",
+    "id": "YipUhrIHfBLq",
+    "outputId": "48987b1d-5cc1-4c2a-cb3c-713d64f6a38d"
+   },
+   "outputs": [],
+   "source": [
+    "plt.figure(figsize=(7,7))\n",
+    "texture_image=mesh.textures.maps_padded()\n",
+    "plt.imshow(texture_image.squeeze().cpu().numpy())\n",
+    "plt.axis(\"off\");"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "PyTorch3D has a built-in way to view the texture map with matplotlib along with the points on the map corresponding to vertices. There is also a method, texturesuv_image_PIL, to get a similar image which can be saved to a file."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plt.figure(figsize=(7,7))\n",
+    "texturesuv_image_matplotlib(mesh.textures, subsample=None)\n",
+    "plt.axis(\"off\");"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "GcnG6XJ6fBLu"
+   },
+   "source": [
+    "## 2. Create a renderer\n",
+    "\n",
+    "A renderer in PyTorch3D is composed of a **rasterizer** and a **shader** which each have a number of subcomponents such as a **camera** (orthographic/perspective). Here we initialize some of these components and use default values for the rest.\n",
+    "\n",
+    "In this example we will first create a **renderer** which uses a **perspective camera**, a **point light** and applies **Phong shading**. Then we learn how to vary different components using the modular API.  "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "dX466mWnfBLv"
+   },
+   "outputs": [],
+   "source": [
+    "# Initialize a camera.\n",
+    "# With world coordinates +Y up, +X left and +Z in, the front of the cow is facing the -Z direction. \n",
+    "# So we move the camera by 180 in the azimuth direction so it is facing the front of the cow. \n",
+    "R, T = look_at_view_transform(2.7, 0, 180) \n",
+    "cameras = FoVPerspectiveCameras(device=device, R=R, T=T)\n",
+    "\n",
+    "# Define the settings for rasterization and shading. Here we set the output image to be of size\n",
+    "# 512x512. As we are rendering images for visualization purposes only we will set faces_per_pixel=1\n",
+    "# and blur_radius=0.0. We also set bin_size and max_faces_per_bin to None which ensure that \n",
+    "# the faster coarse-to-fine rasterization method is used. Refer to rasterize_meshes.py for \n",
+    "# explanations of these parameters. Refer to docs/notes/renderer.md for an explanation of \n",
+    "# the difference between naive and coarse-to-fine rasterization. \n",
+    "raster_settings = RasterizationSettings(\n",
+    "    image_size=512, \n",
+    "    blur_radius=0.0, \n",
+    "    faces_per_pixel=1, \n",
+    ")\n",
+    "\n",
+    "# Place a point light in front of the object. As mentioned above, the front of the cow is facing the \n",
+    "# -z direction. \n",
+    "lights = PointLights(device=device, location=[[0.0, 0.0, -3.0]])\n",
+    "\n",
+    "# Create a Phong renderer by composing a rasterizer and a shader. The textured Phong shader will \n",
+    "# interpolate the texture uv coordinates for each vertex, sample from a texture image and \n",
+    "# apply the Phong lighting model\n",
+    "renderer = MeshRenderer(\n",
+    "    rasterizer=MeshRasterizer(\n",
+    "        cameras=cameras, \n",
+    "        raster_settings=raster_settings\n",
+    "    ),\n",
+    "    shader=SoftPhongShader(\n",
+    "        device=device, \n",
+    "        cameras=cameras,\n",
+    "        lights=lights\n",
+    "    )\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "KyOY5qXvfBLz"
+   },
+   "source": [
+    "## 3. Render the mesh"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "8VkRA4qJfBL0"
+   },
+   "source": [
+    "The light is in front of the object so it is bright and the image has specular highlights."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 592
+    },
+    "colab_type": "code",
+    "id": "gBLZH8iUfBL1",
+    "outputId": "cc3cd3f0-189e-4497-ce47-e64b4da542e8"
+   },
+   "outputs": [],
+   "source": [
+    "images = renderer(mesh)\n",
+    "plt.figure(figsize=(10, 10))\n",
+    "plt.imshow(images[0, ..., :3].cpu().numpy())\n",
+    "plt.axis(\"off\");"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "k161XF3sfBL5"
+   },
+   "source": [
+    "## 4. Move the light behind the object and re-render\n",
+    "\n",
+    "We can pass arbitrary keyword arguments to the `rasterizer`/`shader` via the call to the `renderer` so the renderer does not need to be reinitialized if any of the settings change/\n",
+    "\n",
+    "In this case, we can simply update the location of the lights and pass them into the call to the renderer. \n",
+    "\n",
+    "The image is now dark as there is only ambient lighting, and there are no specular highlights."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "BdWkkeibfBL6"
+   },
+   "outputs": [],
+   "source": [
+    "# Now move the light so it is on the +Z axis which will be behind the cow. \n",
+    "lights.location = torch.tensor([0.0, 0.0, +1.0], device=device)[None]\n",
+    "images = renderer(mesh, lights=lights)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 592
+    },
+    "colab_type": "code",
+    "id": "UmV3j1YffBL9",
+    "outputId": "2e8edca0-5bd8-4a2f-a160-83c4b0520123"
+   },
+   "outputs": [],
+   "source": [
+    "plt.figure(figsize=(10, 10))\n",
+    "plt.imshow(images[0, ..., :3].cpu().numpy())\n",
+    "plt.axis(\"off\");"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "t93aVotMfBMB"
+   },
+   "source": [
+    "## 5. Rotate the object, modify the material properties or light properties\n",
+    "\n",
+    "We can also change many other settings in the rendering pipeline. Here we:\n",
+    "\n",
+    "- change the **viewing angle** of the camera\n",
+    "- change the **position** of the point light\n",
+    "- change the **material reflectance** properties of the mesh"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "4mYXYziefBMB"
+   },
+   "outputs": [],
+   "source": [
+    "# Rotate the object by increasing the elevation and azimuth angles\n",
+    "R, T = look_at_view_transform(dist=2.7, elev=10, azim=-150)\n",
+    "cameras = FoVPerspectiveCameras(device=device, R=R, T=T)\n",
+    "\n",
+    "# Move the light location so the light is shining on the cow's face.  \n",
+    "lights.location = torch.tensor([[2.0, 2.0, -2.0]], device=device)\n",
+    "\n",
+    "# Change specular color to green and change material shininess \n",
+    "materials = Materials(\n",
+    "    device=device,\n",
+    "    specular_color=[[0.0, 1.0, 0.0]],\n",
+    "    shininess=10.0\n",
+    ")\n",
+    "\n",
+    "# Re render the mesh, passing in keyword arguments for the modified components.\n",
+    "images = renderer(mesh, lights=lights, materials=materials, cameras=cameras)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 592
+    },
+    "colab_type": "code",
+    "id": "rHIxIfh5fBME",
+    "outputId": "1ca2d337-2983-478f-b3c9-d64b84ba1a31"
+   },
+   "outputs": [],
+   "source": [
+    "plt.figure(figsize=(10, 10))\n",
+    "plt.imshow(images[0, ..., :3].cpu().numpy())\n",
+    "plt.axis(\"off\");"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "17c4xmtyfBMH"
+   },
+   "source": [
+    "## 6. Batched Rendering\n",
+    "\n",
+    "One of the core design choices of the PyTorch3D API is to support **batched inputs for all components**. \n",
+    "The renderer and associated components can take batched inputs and **render a batch of output images in one forward pass**. We will now use this feature to render the mesh from many different viewpoints.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "CDQKebNNfBMI"
+   },
+   "outputs": [],
+   "source": [
+    "# Set batch size - this is the number of different viewpoints from which we want to render the mesh.\n",
+    "batch_size = 20\n",
+    "\n",
+    "# Create a batch of meshes by repeating the cow mesh and associated textures. \n",
+    "# Meshes has a useful `extend` method which allows us do this very easily. \n",
+    "# This also extends the textures. \n",
+    "meshes = mesh.extend(batch_size)\n",
+    "\n",
+    "# Get a batch of viewing angles. \n",
+    "elev = torch.linspace(0, 180, batch_size)\n",
+    "azim = torch.linspace(-180, 180, batch_size)\n",
+    "\n",
+    "# All the cameras helper methods support mixed type inputs and broadcasting. So we can \n",
+    "# view the camera from the same distance and specify dist=2.7 as a float,\n",
+    "# and then specify elevation and azimuth angles for each viewpoint as tensors. \n",
+    "R, T = look_at_view_transform(dist=2.7, elev=elev, azim=azim)\n",
+    "cameras = FoVPerspectiveCameras(device=device, R=R, T=T)\n",
+    "\n",
+    "# Move the light back in front of the cow which is facing the -z direction.\n",
+    "lights.location = torch.tensor([[0.0, 0.0, -3.0]], device=device)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "gyYJCwEDfBML"
+   },
+   "outputs": [],
+   "source": [
+    "# We can pass arbitrary keyword arguments to the rasterizer/shader via the renderer\n",
+    "# so the renderer does not need to be reinitialized if any of the settings change.\n",
+    "images = renderer(meshes, cameras=cameras, lights=lights)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "image_grid(images.cpu().numpy(), rows=4, cols=5, rgb=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 7. Plotly visualization \n",
+    "If you only want to visualize a mesh, you don't really need to use a differentiable renderer - instead we support plotting of Meshes with plotly. For these Meshes, we use TexturesVertex to define a texture for the rendering.\n",
+    "`plot_meshes` creates a Plotly figure with a trace for each Meshes object. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "verts, faces_idx, _ = load_obj(obj_filename)\n",
+    "faces = faces_idx.verts_idx\n",
+    "\n",
+    "# Initialize each vertex to be white in color.\n",
+    "verts_rgb = torch.ones_like(verts)[None]  # (1, V, 3)\n",
+    "textures = TexturesVertex(verts_features=verts_rgb.to(device))\n",
+    "\n",
+    "# Create a Meshes object\n",
+    "mesh = Meshes(\n",
+    "    verts=[verts.to(device)],   \n",
+    "    faces=[faces.to(device)],\n",
+    "    textures=textures\n",
+    ")\n",
+    "\n",
+    "# Render the plotly figure\n",
+    "fig = plot_scene({\n",
+    "    \"subplot1\": {\n",
+    "        \"cow_mesh\": mesh\n",
+    "    }\n",
+    "})\n",
+    "fig.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# use Plotly's default colors (no texture)\n",
+    "mesh = Meshes(\n",
+    "    verts=[verts.to(device)],   \n",
+    "    faces=[faces.to(device)]\n",
+    ")\n",
+    "\n",
+    "# Render the plotly figure\n",
+    "fig = plot_scene({\n",
+    "    \"subplot1\": {\n",
+    "        \"cow_mesh\": mesh\n",
+    "    }\n",
+    "})\n",
+    "fig.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# create a batch of meshes, and offset one to prevent overlap\n",
+    "mesh_batch = Meshes(\n",
+    "    verts=[verts.to(device), (verts + 2).to(device)],   \n",
+    "    faces=[faces.to(device), faces.to(device)]\n",
+    ")\n",
+    "\n",
+    "# plot mesh batch in the same trace\n",
+    "fig = plot_scene({\n",
+    "    \"subplot1\": {\n",
+    "        \"cow_mesh_batch\": mesh_batch\n",
+    "    }\n",
+    "})\n",
+    "fig.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# plot batch of meshes in different traces\n",
+    "fig = plot_scene({\n",
+    "    \"subplot1\": {\n",
+    "        \"cow_mesh1\": mesh_batch[0],\n",
+    "        \"cow_mesh2\": mesh_batch[1]\n",
+    "    }\n",
+    "})\n",
+    "fig.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# plot batch of meshes in different subplots\n",
+    "fig = plot_scene({\n",
+    "    \"subplot1\": {\n",
+    "        \"cow_mesh1\": mesh_batch[0]\n",
+    "    },\n",
+    "    \"subplot2\":{\n",
+    "        \"cow_mesh2\": mesh_batch[1]\n",
+    "    }\n",
+    "})\n",
+    "fig.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "For batches, we can also use `plot_batch_individually` to avoid constructing the scene dictionary ourselves."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# extend the batch to have 4 meshes\n",
+    "mesh_4 = mesh_batch.extend(2)\n",
+    "\n",
+    "# visualize the batch in different subplots, 2 per row\n",
+    "fig = plot_batch_individually(mesh_4)\n",
+    "# we can update the figure height and width\n",
+    "fig.update_layout(height=1000, width=500)\n",
+    "fig.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We can also modify the axis arguments and axis backgrounds in both functions. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fig2 = plot_scene({\n",
+    "    \"cow_plot1\": {\n",
+    "        \"cows\": mesh_batch\n",
+    "    }\n",
+    "},\n",
+    "    xaxis={\"backgroundcolor\":\"rgb(200, 200, 230)\"},\n",
+    "    yaxis={\"backgroundcolor\":\"rgb(230, 200, 200)\"},\n",
+    "    zaxis={\"backgroundcolor\":\"rgb(200, 230, 200)\"}, \n",
+    "    axis_args=AxisArgs(showgrid=True))\n",
+    "fig2.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fig3 = plot_batch_individually(\n",
+    "    mesh_4, \n",
+    "    ncols=2,\n",
+    "    subplot_titles = [\"cow1\", \"cow2\", \"cow3\", \"cow4\"], # customize subplot titles\n",
+    "    xaxis={\"backgroundcolor\":\"rgb(200, 200, 230)\"},\n",
+    "    yaxis={\"backgroundcolor\":\"rgb(230, 200, 200)\"},\n",
+    "    zaxis={\"backgroundcolor\":\"rgb(200, 230, 200)\"}, \n",
+    "    axis_args=AxisArgs(showgrid=True))\n",
+    "fig3.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "t3qphI1ElUb5"
+   },
+   "source": [
+    "## 8. Conclusion\n",
+    "In this tutorial we learnt how to **load** a textured mesh from an obj file, initialize a PyTorch3D datastructure called **Meshes**, set up an **Renderer** consisting of a **Rasterizer** and a **Shader**, and modify several components of the rendering pipeline. We also learned how to render Meshes in Plotly figures."
+   ]
+  }
+ ],
+ "metadata": {
+  "accelerator": "GPU",
+  "anp_metadata": {
+   "path": "notebooks/render_textured_meshes.ipynb"
+  },
+  "bento_stylesheets": {
+   "bento/extensions/flow/main.css": true,
+   "bento/extensions/kernel_selector/main.css": true,
+   "bento/extensions/kernel_ui/main.css": true,
+   "bento/extensions/new_kernel/main.css": true,
+   "bento/extensions/system_usage/main.css": true,
+   "bento/extensions/theme/main.css": true
+  },
+  "colab": {
+   "name": "render_textured_meshes.ipynb",
+   "provenance": []
+  },
+  "disseminate_notebook_info": {
+   "backup_notebook_id": "569222367081034"
+  },
+  "kernelspec": {
+   "display_name": "pytorch3d_etc (local)",
+   "language": "python",
+   "name": "pytorch3d_etc_local"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.5+"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 1
+}
diff --git a/pytorch3d/docs/tutorials/utils/__init__.py b/pytorch3d/docs/tutorials/utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..ad18d359e67570517c87fb96d78f9655c9901de6
--- /dev/null
+++ b/pytorch3d/docs/tutorials/utils/__init__.py
@@ -0,0 +1,8 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from .camera_visualization import get_camera_wireframe, plot_camera_scene, plot_cameras
+from .plot_image_grid import image_grid
diff --git a/pytorch3d/docs/tutorials/utils/camera_visualization.py b/pytorch3d/docs/tutorials/utils/camera_visualization.py
new file mode 100644
index 0000000000000000000000000000000000000000..6bb4df2474201e1ab0a9c436db278ab1e688b425
--- /dev/null
+++ b/pytorch3d/docs/tutorials/utils/camera_visualization.py
@@ -0,0 +1,59 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import matplotlib.pyplot as plt
+from mpl_toolkits.mplot3d import Axes3D  # noqa: F401 unused import
+from pytorch3d.vis.plotly_vis import get_camera_wireframe
+
+
+def plot_cameras(ax, cameras, color: str = "blue"):
+    """
+    Plots a set of `cameras` objects into the maplotlib axis `ax` with
+    color `color`.
+    """
+    cam_wires_canonical = get_camera_wireframe().cuda()[None]
+    cam_trans = cameras.get_world_to_view_transform().inverse()
+    cam_wires_trans = cam_trans.transform_points(cam_wires_canonical)
+    plot_handles = []
+    for wire in cam_wires_trans:
+        # the Z and Y axes are flipped intentionally here!
+        x_, z_, y_ = wire.detach().cpu().numpy().T.astype(float)
+        (h,) = ax.plot(x_, y_, z_, color=color, linewidth=0.3)
+        plot_handles.append(h)
+    return plot_handles
+
+
+def plot_camera_scene(cameras, cameras_gt, status: str):
+    """
+    Plots a set of predicted cameras `cameras` and their corresponding
+    ground truth locations `cameras_gt`. The plot is named with
+    a string passed inside the `status` argument.
+    """
+    fig = plt.figure()
+    ax = fig.add_subplot(projection="3d")
+    ax.clear()
+    ax.set_title(status)
+    handle_cam = plot_cameras(ax, cameras, color="#FF7D1E")
+    handle_cam_gt = plot_cameras(ax, cameras_gt, color="#812CE5")
+    plot_radius = 3
+    ax.set_xlim3d([-plot_radius, plot_radius])
+    ax.set_ylim3d([3 - plot_radius, 3 + plot_radius])
+    ax.set_zlim3d([-plot_radius, plot_radius])
+    ax.set_xlabel("x")
+    ax.set_ylabel("z")
+    ax.set_zlabel("y")
+    labels_handles = {
+        "Estimated cameras": handle_cam[0],
+        "GT cameras": handle_cam_gt[0],
+    }
+    ax.legend(
+        labels_handles.values(),
+        labels_handles.keys(),
+        loc="upper center",
+        bbox_to_anchor=(0.5, 0),
+    )
+    plt.show()
+    return fig
diff --git a/pytorch3d/docs/tutorials/utils/generate_cow_renders.py b/pytorch3d/docs/tutorials/utils/generate_cow_renders.py
new file mode 100644
index 0000000000000000000000000000000000000000..89f9ba4fb04b60bff29f3f5f1808510a1713156f
--- /dev/null
+++ b/pytorch3d/docs/tutorials/utils/generate_cow_renders.py
@@ -0,0 +1,169 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import os
+
+import numpy as np
+import torch
+from pytorch3d.io import load_objs_as_meshes
+from pytorch3d.renderer import (
+    BlendParams,
+    FoVPerspectiveCameras,
+    look_at_view_transform,
+    MeshRasterizer,
+    MeshRenderer,
+    PointLights,
+    RasterizationSettings,
+    SoftPhongShader,
+    SoftSilhouetteShader,
+)
+
+
+# create the default data directory
+current_dir = os.path.dirname(os.path.realpath(__file__))
+DATA_DIR = os.path.join(current_dir, "..", "data", "cow_mesh")
+
+
+def generate_cow_renders(
+    num_views: int = 40, data_dir: str = DATA_DIR, azimuth_range: float = 180
+):
+    """
+    This function generates `num_views` renders of a cow mesh.
+    The renders are generated from viewpoints sampled at uniformly distributed
+    azimuth intervals. The elevation is kept constant so that the camera's
+    vertical position coincides with the equator.
+
+    For a more detailed explanation of this code, please refer to the
+    docs/tutorials/fit_textured_mesh.ipynb notebook.
+
+    Args:
+        num_views: The number of generated renders.
+        data_dir: The folder that contains the cow mesh files. If the cow mesh
+            files do not exist in the folder, this function will automatically
+            download them.
+        azimuth_range: number of degrees on each side of the start position to
+            take samples
+
+    Returns:
+        cameras: A batch of `num_views` `FoVPerspectiveCameras` from which the
+            images are rendered.
+        images: A tensor of shape `(num_views, height, width, 3)` containing
+            the rendered images.
+        silhouettes: A tensor of shape `(num_views, height, width)` containing
+            the rendered silhouettes.
+    """
+
+    # set the paths
+
+    # download the cow mesh if not done before
+    cow_mesh_files = [
+        os.path.join(data_dir, fl) for fl in ("cow.obj", "cow.mtl", "cow_texture.png")
+    ]
+    if any(not os.path.isfile(f) for f in cow_mesh_files):
+        os.makedirs(data_dir, exist_ok=True)
+        os.system(
+            f"wget -P {data_dir} "
+            + "https://dl.fbaipublicfiles.com/pytorch3d/data/cow_mesh/cow.obj"
+        )
+        os.system(
+            f"wget -P {data_dir} "
+            + "https://dl.fbaipublicfiles.com/pytorch3d/data/cow_mesh/cow.mtl"
+        )
+        os.system(
+            f"wget -P {data_dir} "
+            + "https://dl.fbaipublicfiles.com/pytorch3d/data/cow_mesh/cow_texture.png"
+        )
+
+    # Setup
+    if torch.cuda.is_available():
+        device = torch.device("cuda:0")
+        torch.cuda.set_device(device)
+    else:
+        device = torch.device("cpu")
+
+    # Load obj file
+    obj_filename = os.path.join(data_dir, "cow.obj")
+    mesh = load_objs_as_meshes([obj_filename], device=device)
+
+    # We scale normalize and center the target mesh to fit in a sphere of radius 1
+    # centered at (0,0,0). (scale, center) will be used to bring the predicted mesh
+    # to its original center and scale.  Note that normalizing the target mesh,
+    # speeds up the optimization but is not necessary!
+    verts = mesh.verts_packed()
+    N = verts.shape[0]
+    center = verts.mean(0)
+    scale = max((verts - center).abs().max(0)[0])
+    mesh.offset_verts_(-(center.expand(N, 3)))
+    mesh.scale_verts_((1.0 / float(scale)))
+
+    # Get a batch of viewing angles.
+    elev = torch.linspace(0, 0, num_views)  # keep constant
+    azim = torch.linspace(-azimuth_range, azimuth_range, num_views) + 180.0
+
+    # Place a point light in front of the object. As mentioned above, the front of
+    # the cow is facing the -z direction.
+    lights = PointLights(device=device, location=[[0.0, 0.0, -3.0]])
+
+    # Initialize an OpenGL perspective camera that represents a batch of different
+    # viewing angles. All the cameras helper methods support mixed type inputs and
+    # broadcasting. So we can view the camera from the a distance of dist=2.7, and
+    # then specify elevation and azimuth angles for each viewpoint as tensors.
+    R, T = look_at_view_transform(dist=2.7, elev=elev, azim=azim)
+    cameras = FoVPerspectiveCameras(device=device, R=R, T=T)
+
+    # Define the settings for rasterization and shading. Here we set the output
+    # image to be of size 128X128. As we are rendering images for visualization
+    # purposes only we will set faces_per_pixel=1 and blur_radius=0.0. Refer to
+    # rasterize_meshes.py for explanations of these parameters.  We also leave
+    # bin_size and max_faces_per_bin to their default values of None, which sets
+    # their values using heuristics and ensures that the faster coarse-to-fine
+    # rasterization method is used.  Refer to docs/notes/renderer.md for an
+    # explanation of the difference between naive and coarse-to-fine rasterization.
+    raster_settings = RasterizationSettings(
+        image_size=128, blur_radius=0.0, faces_per_pixel=1
+    )
+
+    # Create a Phong renderer by composing a rasterizer and a shader. The textured
+    # Phong shader will interpolate the texture uv coordinates for each vertex,
+    # sample from a texture image and apply the Phong lighting model
+    blend_params = BlendParams(sigma=1e-4, gamma=1e-4, background_color=(0.0, 0.0, 0.0))
+    renderer = MeshRenderer(
+        rasterizer=MeshRasterizer(cameras=cameras, raster_settings=raster_settings),
+        shader=SoftPhongShader(
+            device=device, cameras=cameras, lights=lights, blend_params=blend_params
+        ),
+    )
+
+    # Create a batch of meshes by repeating the cow mesh and associated textures.
+    # Meshes has a useful `extend` method which allows us do this very easily.
+    # This also extends the textures.
+    meshes = mesh.extend(num_views)
+
+    # Render the cow mesh from each viewing angle
+    target_images = renderer(meshes, cameras=cameras, lights=lights)
+
+    # Rasterization settings for silhouette rendering
+    sigma = 1e-4
+    raster_settings_silhouette = RasterizationSettings(
+        image_size=128, blur_radius=np.log(1.0 / 1e-4 - 1.0) * sigma, faces_per_pixel=50
+    )
+
+    # Silhouette renderer
+    renderer_silhouette = MeshRenderer(
+        rasterizer=MeshRasterizer(
+            cameras=cameras, raster_settings=raster_settings_silhouette
+        ),
+        shader=SoftSilhouetteShader(),
+    )
+
+    # Render silhouette images.  The 3rd channel of the rendering output is
+    # the alpha/silhouette channel
+    silhouette_images = renderer_silhouette(meshes, cameras=cameras, lights=lights)
+
+    # binary silhouettes
+    silhouette_binary = (silhouette_images[..., 3] > 1e-4).float()
+
+    return cameras, target_images[..., :3], silhouette_binary
diff --git a/pytorch3d/docs/tutorials/utils/plot_image_grid.py b/pytorch3d/docs/tutorials/utils/plot_image_grid.py
new file mode 100644
index 0000000000000000000000000000000000000000..59bba0de107a9c720681df5abb26fc6f7eed9203
--- /dev/null
+++ b/pytorch3d/docs/tutorials/utils/plot_image_grid.py
@@ -0,0 +1,53 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import matplotlib.pyplot as plt
+
+
+def image_grid(
+    images,
+    rows=None,
+    cols=None,
+    fill: bool = True,
+    show_axes: bool = False,
+    rgb: bool = True,
+):
+    """
+    A util function for plotting a grid of images.
+
+    Args:
+        images: (N, H, W, 4) array of RGBA images
+        rows: number of rows in the grid
+        cols: number of columns in the grid
+        fill: boolean indicating if the space between images should be filled
+        show_axes: boolean indicating if the axes of the plots should be visible
+        rgb: boolean, If True, only RGB channels are plotted.
+            If False, only the alpha channel is plotted.
+
+    Returns:
+        None
+    """
+    if (rows is None) != (cols is None):
+        raise ValueError("Specify either both rows and cols or neither.")
+
+    if rows is None:
+        rows = len(images)
+        cols = 1
+
+    gridspec_kw = {"wspace": 0.0, "hspace": 0.0} if fill else {}
+    fig, axarr = plt.subplots(rows, cols, gridspec_kw=gridspec_kw, figsize=(15, 9))
+    bleed = 0
+    fig.subplots_adjust(left=bleed, bottom=bleed, right=(1 - bleed), top=(1 - bleed))
+
+    for ax, im in zip(axarr.ravel(), images):
+        if rgb:
+            # only render RGB channels
+            ax.imshow(im[..., :3])
+        else:
+            # only render Alpha channel
+            ax.imshow(im[..., 3])
+        if not show_axes:
+            ax.set_axis_off()
diff --git a/pytorch3d/packaging/build_conda.py b/pytorch3d/packaging/build_conda.py
new file mode 100644
index 0000000000000000000000000000000000000000..3ef730b239e6352997fae8cf187704f874896533
--- /dev/null
+++ b/pytorch3d/packaging/build_conda.py
@@ -0,0 +1,139 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import os.path
+import runpy
+import subprocess
+from typing import List
+
+# required env vars:
+# CU_VERSION: E.g. cu112
+# JUST_TESTRUN: 1 to not set nvcc flags
+# PYTORCH_VERSION: e.g. 1.12.0
+# PYTHON_VERSION: e.g. 3.9
+
+# should be run from pytorch3d root
+
+CU_VERSION = os.environ["CU_VERSION"]
+PYTORCH_VERSION = os.environ["PYTORCH_VERSION"]
+pytorch_major_minor = tuple(int(i) for i in PYTORCH_VERSION.split(".")[:2])
+source_root_dir = os.environ["PWD"]
+
+
+def version_constraint(version):
+    """
+    Given version "11.3" returns " >=11.3,<11.4"
+    """
+    last_part = version.rindex(".") + 1
+    upper = version[:last_part] + str(1 + int(version[last_part:]))
+    return f" >={version},<{upper}"
+
+
+def get_cuda_major_minor():
+    if CU_VERSION == "cpu":
+        raise ValueError("fn only for cuda builds")
+    if len(CU_VERSION) != 5 or CU_VERSION[:2] != "cu":
+        raise ValueError(f"Bad CU_VERSION {CU_VERSION}")
+    major = CU_VERSION[2:4]
+    minor = CU_VERSION[4]
+    return major, minor
+
+
+def setup_cuda():
+    if CU_VERSION == "cpu":
+        return
+    major, minor = get_cuda_major_minor()
+    os.environ["CUDA_HOME"] = f"/usr/local/cuda-{major}.{minor}/"
+    os.environ["FORCE_CUDA"] = "1"
+
+    basic_nvcc_flags = (
+        "-gencode=arch=compute_50,code=sm_50 "
+        "-gencode=arch=compute_60,code=sm_60 "
+        "-gencode=arch=compute_70,code=sm_70 "
+        "-gencode=arch=compute_75,code=sm_75 "
+        "-gencode=arch=compute_50,code=compute_50"
+    )
+    if CU_VERSION == "cu102":
+        nvcc_flags = "-gencode=arch=compute_35,code=sm_35 " + basic_nvcc_flags
+    elif CU_VERSION < ("cu118"):
+        nvcc_flags = (
+            "-gencode=arch=compute_35,code=sm_35 "
+            + "-gencode=arch=compute_80,code=sm_80 "
+            + "-gencode=arch=compute_86,code=sm_86 "
+            + basic_nvcc_flags
+        )
+    else:
+        nvcc_flags = (
+            "-gencode=arch=compute_80,code=sm_80 "
+            + "-gencode=arch=compute_86,code=sm_86 "
+            + "-gencode=arch=compute_90,code=sm_90 "
+            + basic_nvcc_flags
+        )
+
+    if os.environ.get("JUST_TESTRUN", "0") != "1":
+        os.environ["NVCC_FLAGS"] = nvcc_flags
+
+
+def setup_conda_pytorch_constraint() -> List[str]:
+    pytorch_constraint = f"- pytorch=={PYTORCH_VERSION}"
+    os.environ["CONDA_PYTORCH_CONSTRAINT"] = pytorch_constraint
+    os.environ["CONDA_PYTORCH_BUILD_CONSTRAINT"] = pytorch_constraint
+    os.environ["PYTORCH_VERSION_NODOT"] = PYTORCH_VERSION.replace(".", "")
+
+    if pytorch_major_minor < (1, 13):
+        return ["-c", "pytorch"]
+    else:
+        return ["-c", "pytorch", "-c", "nvidia"]
+
+
+def setup_conda_cudatoolkit_constraint():
+    if CU_VERSION == "cpu":
+        os.environ["CONDA_CPUONLY_FEATURE"] = "- cpuonly"
+        os.environ["CONDA_CUDATOOLKIT_CONSTRAINT"] = ""
+        return
+    os.environ["CONDA_CPUONLY_FEATURE"] = ""
+
+    if CU_VERSION in ("cu102", "cu110"):
+        os.environ["CONDA_CUB_CONSTRAINT"] = "- nvidiacub"
+    else:
+        os.environ["CONDA_CUB_CONSTRAINT"] = ""
+
+    major, minor = get_cuda_major_minor()
+    version_clause = version_constraint(f"{major}.{minor}")
+    if pytorch_major_minor < (1, 13):
+        toolkit = f"- cudatoolkit {version_clause}"
+    else:
+        toolkit = f"- pytorch-cuda {version_clause}"
+    os.environ["CONDA_CUDATOOLKIT_CONSTRAINT"] = toolkit
+
+
+def do_build(start_args: List[str]):
+    args = start_args.copy()
+
+    test_flag = os.environ.get("TEST_FLAG")
+    if test_flag is not None:
+        args.append(test_flag)
+
+    args.extend(["-c", "bottler", "-c", "fvcore", "-c", "iopath", "-c", "conda-forge"])
+    args.append("--no-anaconda-upload")
+    args.extend(["--python", os.environ["PYTHON_VERSION"]])
+    args.append("packaging/pytorch3d")
+    print(args)
+    subprocess.check_call(args)
+
+
+if __name__ == "__main__":
+    args = ["conda", "build"]
+    setup_cuda()
+
+    init_path = source_root_dir + "/pytorch3d/__init__.py"
+    build_version = runpy.run_path(init_path)["__version__"]
+    os.environ["BUILD_VERSION"] = build_version
+
+    os.environ["SOURCE_ROOT_DIR"] = source_root_dir
+    args += setup_conda_pytorch_constraint()
+    setup_conda_cudatoolkit_constraint()
+    do_build(args)
diff --git a/pytorch3d/packaging/build_wheel.sh b/pytorch3d/packaging/build_wheel.sh
new file mode 100644
index 0000000000000000000000000000000000000000..afe5a0e81359f681302628f670fb16f44cf65851
--- /dev/null
+++ b/pytorch3d/packaging/build_wheel.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+set -ex
+
+script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
+. "$script_dir/pkg_helpers.bash"
+
+VERSION=$(python -c "exec(open('${script_dir}/../pytorch3d/__init__.py').read()); print(__version__)")
+
+export BUILD_TYPE=wheel
+setup_env "$VERSION"
+setup_wheel_python
+pip_install numpy
+setup_pip_pytorch_version
+download_nvidiacub_if_needed
+python setup.py clean
+IS_WHEEL=1 python setup.py bdist_wheel
diff --git a/pytorch3d/packaging/conda/build_pytorch3d.sh b/pytorch3d/packaging/conda/build_pytorch3d.sh
new file mode 100644
index 0000000000000000000000000000000000000000..6be532fb84b3d36561cf015268b8e8f43c9228c8
--- /dev/null
+++ b/pytorch3d/packaging/conda/build_pytorch3d.sh
@@ -0,0 +1,218 @@
+#!/usr/bin/env bash
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+if [[ -x "/remote/anaconda_token" ]]; then
+    . /remote/anaconda_token || true
+fi
+
+set -ex
+
+# Function to retry functions that sometimes timeout or have flaky failures
+retry () {
+    $*  || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*)
+}
+
+# Parse arguments and determmine version
+###########################################################
+
+if [ "$#" -ne 3 ]; then
+    echo "Illegal number of parameters. Pass cuda version, pytorch3d version, pytorch3d build number"
+    echo "CUDA version should be Mm with no dot, e.g. '80'"
+    echo "DESIRED_PYTHON should be M.m, e.g. '2.7'"
+    exit 1
+fi
+
+desired_cuda="$1"
+build_version="$2"
+build_number="$3"
+
+if [[ "$desired_cuda" != cpu ]]; then
+  desired_cuda="$(echo $desired_cuda | tr -d cuda. )"
+fi
+echo "Building cuda version $desired_cuda and pytorch3d version: $build_version build_number: $build_number"
+
+if [[ "$desired_cuda" == 'cpu' ]]; then
+    cpu_only=1
+    cuver="cpu"
+else
+    # Switch desired_cuda to be M.m to be consistent with other scripts in
+    # pytorch/builder
+    export FORCE_CUDA=1
+    cuda_nodot="$desired_cuda"
+
+    if [[ ${#cuda_nodot} -eq 2 ]]; then
+        desired_cuda="${desired_cuda:0:1}.${desired_cuda:1:1}"
+    elif [[ ${#cuda_nodot} -eq 3 ]]; then
+        desired_cuda="${desired_cuda:0:2}.${desired_cuda:2:1}"
+    else
+        echo "unknown cuda version $cuda_nodot"
+        exit 1
+    fi
+
+    cuver="cu$cuda_nodot"
+fi
+
+export PYTORCH3D_BUILD_VERSION=$build_version
+export PYTORCH3D_BUILD_NUMBER=$build_number
+
+if [[ -z "$DESIRED_PYTHON" ]]; then
+    DESIRED_PYTHON=('3.5' '3.6' '3.7')
+fi
+
+SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )"
+
+if [[ -z "$WIN_PACKAGE_WORK_DIR" ]]; then
+    WIN_PACKAGE_WORK_DIR="$(echo $(pwd -W) | tr '/' '\\')\\tmp_conda_$(date +%H%M%S)"
+fi
+
+mkdir -p "$WIN_PACKAGE_WORK_DIR" || true
+pytorch3d_rootdir="$(realpath ${WIN_PACKAGE_WORK_DIR})/pytorch3d-src"
+git config --system core.longpaths true
+
+if [[ ! -d "$pytorch3d_rootdir" ]]; then
+    rm -rf "$pytorch3d_rootdir"
+    git clone SOURCE_DIR/../.. "$pytorch3d_rootdir"
+
+fi
+
+cd "$SOURCE_DIR"
+
+export tmp_conda="${WIN_PACKAGE_WORK_DIR}\\conda"
+export miniconda_exe="${WIN_PACKAGE_WORK_DIR}\\miniconda.exe"
+rm -rf "$tmp_conda"
+rm -f "$miniconda_exe"
+curl -sSk https://repo.continuum.io/miniconda/Miniconda3-latest-Windows-x86_64.exe -o "$miniconda_exe"
+"$SOURCE_DIR/install_conda.bat" && rm "$miniconda_exe"
+pushd $tmp_conda
+export PATH="$(pwd):$(pwd)/Library/usr/bin:$(pwd)/Library/bin:$(pwd)/Scripts:$(pwd)/bin:$PATH"
+popd
+retry conda install -yq conda-build
+
+ANACONDA_USER=pytorch-nightly
+conda config --set anaconda_upload no
+
+
+if [[ "$desired_cuda" == 'cpu' ]]; then
+    export CONDA_CUDATOOLKIT_CONSTRAINT=""
+    export CONDA_CPUONLY_FEATURE="- cpuonly # [not osx]"
+    export CUDA_VERSION="None"
+else
+    export CONDA_CPUONLY_FEATURE=""
+    . ./switch_cuda_version.sh $desired_cuda
+    if [[ "$desired_cuda" == "10.1" ]]; then
+        export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=10.1,<10.2 # [not osx]"
+    elif [[ "$desired_cuda" == "10.0" ]]; then
+        export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=10.0,<10.1 # [not osx]"
+    elif [[ "$desired_cuda" == "9.2" ]]; then
+        export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=9.2,<9.3 # [not osx]"
+    elif [[ "$desired_cuda" == "9.0" ]]; then
+        export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=9.0,<9.1 # [not osx]"
+    elif [[ "$desired_cuda" == "8.0" ]]; then
+        export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=8.0,<8.1 # [not osx]"
+    else
+        echo "unhandled desired_cuda: $desired_cuda"
+        exit 1
+    fi
+fi
+
+if [[ -z "$PYTORCH_VERSION" ]]; then
+    export CONDA_CHANNEL_FLAGS="-c pytorch-nightly"
+    export PYTORCH_VERSION="$(conda search --json 'pytorch[channel=pytorch-nightly]' | \
+                                python -c "import os, sys, json, re; cuver = '$cuver'; \
+                                cuver = cuver.replace('cu', 'cuda') if cuver != 'cpu' else cuver; \
+                                print(re.sub(r'\\+.*$', '', \
+                                [x['version'] for x in json.load(sys.stdin)['pytorch'] \
+                                    if (x['platform'] == 'darwin' or cuver in x['fn']) \
+                                    and 'py' + os.environ['DESIRED_PYTHON'] in x['fn']][-1]))")"
+    if [[ -z "$PYTORCH_VERSION" ]]; then
+        echo "PyTorch version auto detection failed"
+        echo "No package found for desired_cuda=$desired_cuda and DESIRED_PYTHON=$DESIRED_PYTHON"
+        exit 1
+    fi
+else
+    export CONDA_CHANNEL_FLAGS="-c pytorch -c pytorch-nightly"
+fi
+if [[ "$desired_cuda" == 'cpu' ]]; then
+    export CONDA_PYTORCH_BUILD_CONSTRAINT="- pytorch==$PYTORCH_VERSION"
+    export CONDA_PYTORCH_CONSTRAINT="- pytorch==$PYTORCH_VERSION"
+else
+    export CONDA_PYTORCH_BUILD_CONSTRAINT="- pytorch==${PYTORCH_VERSION}"
+    export CONDA_PYTORCH_CONSTRAINT="- pytorch==${PYTORCH_VERSION}"
+fi
+
+export PYTORCH_VERSION_NODOT=${PYTORCH_VERSION//./}
+
+# Loop through all Python versions to build a package for each
+for py_ver in "${DESIRED_PYTHON[@]}"; do
+    build_string="py${py_ver}_${build_string_suffix}"
+    folder_tag="${build_string}_$(date +'%Y%m%d')"
+
+    # Create the conda package into this temporary folder. This is so we can find
+    # the package afterwards, as there's no easy way to extract the final filename
+    # from conda-build
+    output_folder="out_$folder_tag"
+    rm -rf "$output_folder"
+    mkdir "$output_folder"
+
+    export VSTOOLCHAIN_PACKAGE=vs2017
+
+    # We need to build the compiler activation scripts first on Windows
+    time VSDEVCMD_ARGS=${VSDEVCMD_ARGS[@]} \
+        conda build -c "$ANACONDA_USER" \
+                    --no-anaconda-upload \
+                    --output-folder "$output_folder" \
+                    ../$VSTOOLCHAIN_PACKAGE
+
+    cp ../$VSTOOLCHAIN_PACKAGE/conda_build_config.yaml ../pytorch3d/conda_build_config.yaml
+
+    conda config --set anaconda_upload no
+    echo "Calling conda-build at $(date)"
+    if [[ "$desired_cuda" == "9.2" ]]; then
+        time CMAKE_ARGS=${CMAKE_ARGS[@]} \
+            BUILD_VERSION="$PYTORCH3D_BUILD_VERSION" \
+            CU_VERSION="$cuver" \
+            SOURCE_ROOT_DIR="$pytorch3d_rootdir" \
+            conda build -c "$ANACONDA_USER" \
+                        -c defaults \
+                        -c conda-forge \
+                        -c "numba/label/dev" \
+                        --no-anaconda-upload \
+                        --python "$py_ver" \
+                        --output-folder "$output_folder" \
+                        --no-verify \
+                        --no-test \
+                        ../pytorch3d
+    else
+        time CMAKE_ARGS=${CMAKE_ARGS[@]} \
+            BUILD_VERSION="$PYTORCH3D_BUILD_VERSION" \
+            CU_VERSION="$cuver" \
+            SOURCE_ROOT_DIR="$pytorch3d_rootdir" \
+            conda build -c "$ANACONDA_USER" \
+                        -c defaults \
+                        -c conda-forge \
+                        --no-anaconda-upload \
+                        --python "$py_ver" \
+                        --output-folder "$output_folder" \
+                        --no-verify \
+                        --no-test \
+                        ../pytorch3d
+    fi
+    echo "Finished conda-build at $(date)"
+
+    # Extract the package for testing
+    ls -lah "$output_folder"
+    built_package="$(find $output_folder/ -name '*pytorch3d*.tar.bz2')"
+
+    # Copy the built package to the host machine for persistence before testing
+    if [[ -n "$PYTORCH_FINAL_PACKAGE_DIR" ]]; then
+        mkdir -p "$PYTORCH_FINAL_PACKAGE_DIR" || true
+        cp "$built_package" "$PYTORCH_FINAL_PACKAGE_DIR/"
+    fi
+done
+
+
+set +e
diff --git a/pytorch3d/packaging/conda/install_conda.bat b/pytorch3d/packaging/conda/install_conda.bat
new file mode 100644
index 0000000000000000000000000000000000000000..c9aebe988f4dac55189ac7ae81e390ba9388684f
--- /dev/null
+++ b/pytorch3d/packaging/conda/install_conda.bat
@@ -0,0 +1,7 @@
+@REM Copyright (c) Meta Platforms, Inc. and affiliates.
+@REM All rights reserved.
+@REM
+@REM This source code is licensed under the BSD-style license found in the
+@REM LICENSE file in the root directory of this source tree.
+
+start /wait "" "%miniconda_exe%" /S /InstallationType=JustMe /RegisterPython=0 /AddToPath=0 /D=%tmp_conda%
diff --git a/pytorch3d/packaging/conda/switch_cuda_version.sh b/pytorch3d/packaging/conda/switch_cuda_version.sh
new file mode 100644
index 0000000000000000000000000000000000000000..e30f2c7b7b765f8f7966d7c42f887c37c210a208
--- /dev/null
+++ b/pytorch3d/packaging/conda/switch_cuda_version.sh
@@ -0,0 +1,35 @@
+#!/usr/bin/env bash
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+if [[ "$OSTYPE" == "msys" ]]; then
+    CUDA_DIR="/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v$1"
+else
+    CUDA_DIR="/usr/local/cuda-$1"
+fi
+
+if ! ls "$CUDA_DIR"
+then
+    echo "folder $CUDA_DIR not found to switch"
+fi
+
+echo "Switching symlink to $CUDA_DIR"
+mkdir -p /usr/local
+rm -fr /usr/local/cuda
+ln -s "$CUDA_DIR" /usr/local/cuda
+
+if [[ "$OSTYPE" == "msys" ]]; then
+    export CUDA_VERSION=`ls /usr/local/cuda/bin/cudart64*.dll | head -1 | tr '._' ' ' | cut -d ' ' -f2`
+    export CUDNN_VERSION=`ls /usr/local/cuda/bin/cudnn64*.dll | head -1 | tr '._' ' ' | cut -d ' ' -f2`
+else
+    export CUDA_VERSION=$(ls /usr/local/cuda/lib64/libcudart.so.*|sort|tac | head -1 | rev | cut -d"." -f -3 | rev)
+    export CUDNN_VERSION=$(ls /usr/local/cuda/lib64/libcudnn.so.*|sort|tac | head -1 | rev | cut -d"." -f -3 | rev)
+fi
+
+ls -alh /usr/local/cuda
+
+echo "CUDA_VERSION=$CUDA_VERSION"
+echo "CUDNN_VERSION=$CUDNN_VERSION"
diff --git a/pytorch3d/packaging/cub_conda/README.md b/pytorch3d/packaging/cub_conda/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..fbf71eb4fb126dde214ebf60f99aa80738155983
--- /dev/null
+++ b/pytorch3d/packaging/cub_conda/README.md
@@ -0,0 +1,26 @@
+## For building conda package for NVIDIA CUB
+
+CUB is required for building PyTorch3D so it makes sense
+to provide a conda package to make its header files available.
+This directory is used to do that, it is independent of the rest
+of this repo.
+
+Make sure you are in a conda environment with
+anaconda-client and conda-build installed.
+
+From this directory, build the package with the following.
+```
+mkdir -p ./out
+conda build --no-anaconda-upload --output-folder ./out cub
+```
+
+You can then upload the package with the following.
+```
+retry () {
+    # run a command, and try again if it fails
+    $*  || (echo && sleep 8 && echo retrying && $*)
+}
+
+file=out/linux-64/nvidiacub-1.10.0-0.tar.bz2
+retry anaconda --verbose -t ${TOKEN} upload -u pytorch3d --force ${file} --no-progress
+```
diff --git a/pytorch3d/packaging/cub_conda/cub/meta.yaml b/pytorch3d/packaging/cub_conda/cub/meta.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7ebb038a58deeecfcabb662ad32d77129336604a
--- /dev/null
+++ b/pytorch3d/packaging/cub_conda/cub/meta.yaml
@@ -0,0 +1,12 @@
+package:
+  name: nvidiacub
+  version: 1.10.0
+source:
+  url: https://github.com/NVIDIA/cub/archive/1.10.0.tar.gz
+  folder: source
+build:
+  script: mkdir $PREFIX/include && cp -r source/cub $PREFIX/include/cub
+
+about:
+  home: https://github.com/NVIDIA/cub
+  summary: CUB provides state-of-the-art, reusable software components for every layer of the CUDA programming model.
diff --git a/pytorch3d/packaging/linux_wheels/README.md b/pytorch3d/packaging/linux_wheels/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..d8871ffb517e877eccaff6ff5a13cc087513d30d
--- /dev/null
+++ b/pytorch3d/packaging/linux_wheels/README.md
@@ -0,0 +1,31 @@
+## Building Linux pip Packages
+
+1. Make sure this directory is on a filesystem which docker can
+use - e.g. not NFS. If you are using a local hard drive there is
+nothing to do here.
+
+2. You may want to `docker pull pytorch/conda-cuda:latest`.
+
+3. Run `bash go.sh` in this directory. This takes ages
+and writes packages to `inside/output`.
+
+4. You can upload the packages to s3, along with basic html files
+which enable them to be used, with `bash after.sh`.
+
+
+In particular, if you are in a jupyter/colab notebook you can
+then install using these wheels with the following series of
+commands.
+
+```
+import sys
+import torch
+pyt_version_str=torch.__version__.split("+")[0].replace(".", "")
+version_str="".join([
+    f"py3{sys.version_info.minor}_cu",
+    torch.version.cuda.replace(".",""),
+    f"_pyt{pyt_version_str}"
+])
+!pip install fvcore iopath
+!pip install --no-index --no-cache-dir pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/{version_str}/download.html
+```
diff --git a/pytorch3d/packaging/linux_wheels/after.sh b/pytorch3d/packaging/linux_wheels/after.sh
new file mode 100644
index 0000000000000000000000000000000000000000..ccfce6473e1f819244b2f3cc77bb45f66e6735f8
--- /dev/null
+++ b/pytorch3d/packaging/linux_wheels/after.sh
@@ -0,0 +1,10 @@
+#!/usr/bin/bash
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+set -ex
+sudo chown -R "$USER" output
+python publish.py
diff --git a/pytorch3d/packaging/linux_wheels/go.sh b/pytorch3d/packaging/linux_wheels/go.sh
new file mode 100644
index 0000000000000000000000000000000000000000..1929a4671ad4f2abe28bf8f2fbc725d5b0d98cc9
--- /dev/null
+++ b/pytorch3d/packaging/linux_wheels/go.sh
@@ -0,0 +1,17 @@
+#!/usr/bin/bash
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# Some directory to persist downloaded conda packages
+conda_cache=/raid/$USER/building_conda_cache
+
+mkdir -p "$conda_cache"
+
+sudo docker run --rm -v "$conda_cache:/conda_cache" -v "$PWD/../../:/inside" -e SELECTED_CUDA=cu113 pytorch/conda-builder:cuda113 bash inside/packaging/linux_wheels/inside.sh
+sudo docker run --rm -v "$conda_cache:/conda_cache" -v "$PWD/../../:/inside" -e SELECTED_CUDA=cu115 pytorch/conda-builder:cuda115 bash inside/packaging/linux_wheels/inside.sh
+sudo docker run --rm -v "$conda_cache:/conda_cache" -v "$PWD/../../:/inside" -e SELECTED_CUDA=cu116 pytorch/conda-builder:cuda116 bash inside/packaging/linux_wheels/inside.sh
+sudo docker run --rm -v "$conda_cache:/conda_cache" -v "$PWD/../../:/inside" -e SELECTED_CUDA=cu117 pytorch/conda-builder:cuda117 bash inside/packaging/linux_wheels/inside.sh
+sudo docker run --rm -v "$conda_cache:/conda_cache" -v "$PWD/../../:/inside" -e SELECTED_CUDA=cu118 pytorch/conda-builder:cuda118 bash inside/packaging/linux_wheels/inside.sh
diff --git a/pytorch3d/packaging/linux_wheels/inside.sh b/pytorch3d/packaging/linux_wheels/inside.sh
new file mode 100644
index 0000000000000000000000000000000000000000..a7b8951a00ad60b3ebce16ba08993d8fd3abcf37
--- /dev/null
+++ b/pytorch3d/packaging/linux_wheels/inside.sh
@@ -0,0 +1,163 @@
+#!/bin/bash
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+set -ex
+
+conda init bash
+# shellcheck source=/dev/null
+source ~/.bashrc
+
+cd /inside
+VERSION=$(python -c "exec(open('pytorch3d/__init__.py').read()); print(__version__)")
+
+export BUILD_VERSION=$VERSION
+export FORCE_CUDA=1
+export MAX_JOBS=8
+export CONDA_PKGS_DIRS=/conda_cache
+
+if false
+then
+    # We used to have to do this for old versions of CUDA
+    wget --no-verbose https://github.com/NVIDIA/cub/archive/1.10.0.tar.gz
+    tar xzf 1.10.0.tar.gz
+    CUB_HOME=$(realpath ./cub-1.10.0)
+    export CUB_HOME
+    echo "CUB_HOME is now $CUB_HOME"
+fi
+
+# As a rule, we want to build for any combination of dependencies which is supported by
+# PyTorch3D and not older than the current Google Colab set up.
+
+PYTHON_VERSIONS="3.8 3.9 3.10"
+# the keys are pytorch versions
+declare -A CONDA_CUDA_VERSIONS=(
+#    ["1.11.0"]="cu113"
+#    ["1.12.0"]="cu113"
+#    ["1.12.1"]="cu113"
+#    ["1.13.0"]="cu116"
+#    ["1.13.1"]="cu116 cu117"
+#    ["2.0.0"]="cu117 cu118"
+    ["2.0.1"]="cu117 cu118"
+)
+
+
+
+for python_version in $PYTHON_VERSIONS
+do
+    for pytorch_version in "${!CONDA_CUDA_VERSIONS[@]}"
+    do
+        if [[ "3.7 3.8 3.9" != *$python_version* ]] && [[ "1.7.0 1.7.1 1.8.0 1.8.1 1.9.0 1.9.1 1.10.0 1.10.1 1.10.2" == *$pytorch_version* ]]
+        then
+            #python 3.10 and later not supported by pytorch 1.10.2 and before
+            continue
+        fi
+
+        extra_channel="-c nvidia"
+        cudatools="pytorch-cuda"
+        if [[ "1.11.0" == "$pytorch_version" ]]
+        then
+            extra_channel=""
+            cudatools="cudatoolkit"
+        fi
+        if [[ "1.12.0" == "$pytorch_version" ]] || [[ "1.12.1" == "$pytorch_version" ]]
+        then
+            extra_channel="-c conda-forge"
+            cudatools="cudatoolkit"
+        fi
+
+        for cu_version in ${CONDA_CUDA_VERSIONS[$pytorch_version]}
+        do
+            if [[ $SELECTED_CUDA != "$cu_version" ]]
+            then
+                continue
+            fi
+
+            case "$cu_version" in
+                cu118)
+                    export CUDA_HOME=/usr/local/cuda-11.8/
+                    export CUDA_TAG=11.8
+                    export NVCC_FLAGS="-gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_86,code=sm_86 -gencode=arch=compute_50,code=compute_50"
+                ;;
+                cu117)
+                    export CUDA_HOME=/usr/local/cuda-11.7/
+                    export CUDA_TAG=11.7
+                    export NVCC_FLAGS="-gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_86,code=sm_86 -gencode=arch=compute_50,code=compute_50"
+                ;;
+                cu116)
+                    export CUDA_HOME=/usr/local/cuda-11.6/
+                    export CUDA_TAG=11.6
+                    export NVCC_FLAGS="-gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_86,code=sm_86 -gencode=arch=compute_50,code=compute_50"
+                ;;
+                cu115)
+                    export CUDA_HOME=/usr/local/cuda-11.5/
+                    export CUDA_TAG=11.5
+                    export NVCC_FLAGS="-gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_86,code=sm_86 -gencode=arch=compute_50,code=compute_50"
+                ;;
+                cu113)
+                    export CUDA_HOME=/usr/local/cuda-11.3/
+                    export CUDA_TAG=11.3
+                    export NVCC_FLAGS="-gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_86,code=sm_86 -gencode=arch=compute_50,code=compute_50"
+                ;;
+                cu112)
+                    export CUDA_HOME=/usr/local/cuda-11.2/
+                    export CUDA_TAG=11.2
+                    export NVCC_FLAGS="-gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_86,code=sm_86 -gencode=arch=compute_50,code=compute_50"
+                ;;
+                cu111)
+                    export CUDA_HOME=/usr/local/cuda-11.1/
+                    export CUDA_TAG=11.1
+                    export NVCC_FLAGS="-gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_86,code=sm_86 -gencode=arch=compute_50,code=compute_50"
+                ;;
+                cu110)
+                    export CUDA_HOME=/usr/local/cuda-11.0/
+                    export CUDA_TAG=11.0
+                    export NVCC_FLAGS="-gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_50,code=compute_50"
+                ;;
+                cu102)
+                    export CUDA_HOME=/usr/local/cuda-10.2/
+                    export CUDA_TAG=10.2
+                    export NVCC_FLAGS="-gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_50,code=compute_50"
+                ;;
+                cu101)
+                    export CUDA_HOME=/usr/local/cuda-10.1/
+                    export CUDA_TAG=10.1
+                    export NVCC_FLAGS="-gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_50,code=compute_50"
+                ;;
+                *)
+                    echo "Unrecognized cu_version=$cu_version"
+                    exit 1
+                ;;
+            esac
+            tag=py"${python_version//./}"_"${cu_version}"_pyt"${pytorch_version//./}"
+
+            outdir="/inside/packaging/linux_wheels/output/$tag"
+            if [[ -d "$outdir" ]]
+            then
+                continue
+            fi
+
+            conda create -y -n "$tag" "python=$python_version"
+            conda activate "$tag"
+            # shellcheck disable=SC2086
+            conda install -y -c pytorch $extra_channel "pytorch=$pytorch_version" "$cudatools=$CUDA_TAG"
+            pip install fvcore iopath
+            echo "python version" "$python_version" "pytorch version" "$pytorch_version" "cuda version" "$cu_version" "tag" "$tag"
+
+            rm -rf dist
+
+            python setup.py clean
+            python setup.py bdist_wheel
+
+            rm -rf "$outdir"
+            mkdir -p "$outdir"
+            cp dist/*whl "$outdir"
+
+            conda deactivate
+        done
+    done
+done
+echo "DONE"
diff --git a/pytorch3d/packaging/linux_wheels/publish.py b/pytorch3d/packaging/linux_wheels/publish.py
new file mode 100644
index 0000000000000000000000000000000000000000..6d0da93c740bf7c2ae63b8c659c7d217346b5a76
--- /dev/null
+++ b/pytorch3d/packaging/linux_wheels/publish.py
@@ -0,0 +1,87 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import subprocess
+from pathlib import Path
+from typing import List
+
+
+dest = "s3://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/"
+
+output = Path("output")
+
+
+def aws_s3_cmd(args) -> List[str]:
+    """
+    This function returns the full args for subprocess to do a command
+    with aws.
+    """
+    cmd_args = ["aws", "s3", "--profile", "saml"] + args
+    return cmd_args
+
+
+def fs3_exists(path) -> bool:
+    """
+    Returns True if the path exists inside dest on S3.
+    In fact, will also return True if there is a file which has the given
+    path as a prefix, but we are careful about this.
+    """
+    out = subprocess.check_output(aws_s3_cmd(["ls", path]))
+    return len(out) != 0
+
+
+def get_html_wrappers() -> None:
+    for directory in sorted(output.iterdir()):
+        output_wrapper = directory / "download.html"
+        assert not output_wrapper.exists()
+        dest_wrapper = dest + directory.name + "/download.html"
+        if fs3_exists(dest_wrapper):
+            subprocess.check_call(aws_s3_cmd(["cp", dest_wrapper, str(output_wrapper)]))
+
+
+def write_html_wrappers() -> None:
+    html = """
+    <a href="$">$</a><br>
+    """
+
+    for directory in sorted(output.iterdir()):
+        files = list(directory.glob("*.whl"))
+        assert len(files) == 1, files
+        [wheel] = files
+
+        this_html = html.replace("$", wheel.name)
+        output_wrapper = directory / "download.html"
+        if output_wrapper.exists():
+            contents = output_wrapper.read_text()
+            if this_html not in contents:
+                with open(output_wrapper, "a") as f:
+                    f.write(this_html)
+        else:
+            output_wrapper.write_text(this_html)
+
+
+def to_aws() -> None:
+    for directory in output.iterdir():
+        for file in directory.iterdir():
+            print(file)
+            subprocess.check_call(
+                aws_s3_cmd(["cp", str(file), dest + str(file.relative_to(output))])
+            )
+
+
+if __name__ == "__main__":
+    # Uncomment this for subsequent releases.
+    # get_html_wrappers()
+    write_html_wrappers()
+    to_aws()
+
+
+# see all files with
+#  aws s3 --profile saml ls --recursive s3://dl.fbaipublicfiles.com/pytorch3d/
+
+# empty current with
+#  aws s3 --profile saml rm --recursive
+#                 s3://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/
diff --git a/pytorch3d/packaging/pkg_helpers.bash b/pytorch3d/packaging/pkg_helpers.bash
new file mode 100644
index 0000000000000000000000000000000000000000..e22643ecc59c721edb72a19186d242d77781bdbc
--- /dev/null
+++ b/pytorch3d/packaging/pkg_helpers.bash
@@ -0,0 +1,390 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# shellcheck shell=bash
+# A set of useful bash functions for common functionality we need to do in
+# many build scripts
+
+# Setup CUDA environment variables, based on CU_VERSION
+#
+# Inputs:
+#   CU_VERSION (cu92, cu100, cu101, cu102)
+#   NO_CUDA_PACKAGE (bool)
+#   BUILD_TYPE (conda, wheel)
+#
+# Outputs:
+#   VERSION_SUFFIX (e.g., "")
+#   PYTORCH_VERSION_SUFFIX (e.g., +cpu)
+#   WHEEL_DIR (e.g., cu100/)
+#   CUDA_HOME (e.g., /usr/local/cuda-9.2, respected by torch.utils.cpp_extension)
+#   FORCE_CUDA (respected by pytorch3d setup.py)
+#   NVCC_FLAGS (respected by pytorch3d setup.py)
+#
+# Precondition: CUDA versions are installed in their conventional locations in
+# /usr/local/cuda-*
+#
+# NOTE: Why VERSION_SUFFIX versus PYTORCH_VERSION_SUFFIX?  If you're building
+# a package with CUDA on a platform we support CUDA on, VERSION_SUFFIX ==
+# PYTORCH_VERSION_SUFFIX and everyone is happy.  However, if you are building a
+# package with only CPU bits (e.g., torchaudio), then VERSION_SUFFIX is always
+# empty, but PYTORCH_VERSION_SUFFIX is +cpu (because that's how you get a CPU
+# version of a Python package.  But that doesn't apply if you're on OS X,
+# since the default CU_VERSION on OS X is cpu.
+setup_cuda() {
+
+  # First, compute version suffixes.  By default, assume no version suffixes
+  export VERSION_SUFFIX=""
+  export PYTORCH_VERSION_SUFFIX=""
+  export WHEEL_DIR=""
+  # Wheel builds need suffixes (but not if they're on OS X, which never has suffix)
+  if [[ "$BUILD_TYPE" == "wheel" ]] && [[ "$(uname)" != Darwin ]]; then
+    # The default CUDA has no suffix
+    if [[ "$CU_VERSION" != "cu102" ]]; then
+      export PYTORCH_VERSION_SUFFIX="+$CU_VERSION"
+    fi
+    # Match the suffix scheme of pytorch, unless this package does not have
+    # CUDA builds (in which case, use default)
+    if [[ -z "$NO_CUDA_PACKAGE" ]]; then
+      export VERSION_SUFFIX="$PYTORCH_VERSION_SUFFIX"
+      export WHEEL_DIR="$CU_VERSION/"
+    fi
+  fi
+
+  # Now work out the CUDA settings
+  case "$CU_VERSION" in
+    cu116)
+      if [[ "$OSTYPE" == "msys" ]]; then
+        export CUDA_HOME="C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v11.6"
+      else
+        export CUDA_HOME=/usr/local/cuda-11.6/
+      fi
+      export FORCE_CUDA=1
+      # Hard-coding gencode flags is temporary situation until
+      # https://github.com/pytorch/pytorch/pull/23408 lands
+      export NVCC_FLAGS="-gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_86,code=sm_86 -gencode=arch=compute_50,code=compute_50"
+      ;;
+    cu115)
+      if [[ "$OSTYPE" == "msys" ]]; then
+        export CUDA_HOME="C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v11.5"
+      else
+        export CUDA_HOME=/usr/local/cuda-11.5/
+      fi
+      export FORCE_CUDA=1
+      # Hard-coding gencode flags is temporary situation until
+      # https://github.com/pytorch/pytorch/pull/23408 lands
+      export NVCC_FLAGS="-gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_86,code=sm_86 -gencode=arch=compute_50,code=compute_50"
+      ;;
+    cu113)
+      if [[ "$OSTYPE" == "msys" ]]; then
+        export CUDA_HOME="C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v11.3"
+      else
+        export CUDA_HOME=/usr/local/cuda-11.3/
+      fi
+      export FORCE_CUDA=1
+      # Hard-coding gencode flags is temporary situation until
+      # https://github.com/pytorch/pytorch/pull/23408 lands
+      export NVCC_FLAGS="-gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_86,code=sm_86 -gencode=arch=compute_50,code=compute_50"
+      ;;
+    cu112)
+      if [[ "$OSTYPE" == "msys" ]]; then
+        export CUDA_HOME="C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v11.2"
+      else
+        export CUDA_HOME=/usr/local/cuda-11.2/
+      fi
+      export FORCE_CUDA=1
+      # Hard-coding gencode flags is temporary situation until
+      # https://github.com/pytorch/pytorch/pull/23408 lands
+      export NVCC_FLAGS="-gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_86,code=sm_86 -gencode=arch=compute_50,code=compute_50"
+      ;;
+    cu111)
+      if [[ "$OSTYPE" == "msys" ]]; then
+        export CUDA_HOME="C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v11.1"
+      else
+        export CUDA_HOME=/usr/local/cuda-11.1/
+      fi
+      export FORCE_CUDA=1
+      # Hard-coding gencode flags is temporary situation until
+      # https://github.com/pytorch/pytorch/pull/23408 lands
+      export NVCC_FLAGS="-gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_86,code=sm_86 -gencode=arch=compute_50,code=compute_50"
+      ;;
+    cu110)
+      if [[ "$OSTYPE" == "msys" ]]; then
+        export CUDA_HOME="C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v11.0"
+      else
+        export CUDA_HOME=/usr/local/cuda-11.0/
+      fi
+      export FORCE_CUDA=1
+      # Hard-coding gencode flags is temporary situation until
+      # https://github.com/pytorch/pytorch/pull/23408 lands
+      export NVCC_FLAGS="-gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_50,code=compute_50"
+      ;;
+    cu102)
+      if [[ "$OSTYPE" == "msys" ]]; then
+        export CUDA_HOME="C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v10.2"
+      else
+        export CUDA_HOME=/usr/local/cuda-10.2/
+      fi
+      export FORCE_CUDA=1
+      # Hard-coding gencode flags is temporary situation until
+      # https://github.com/pytorch/pytorch/pull/23408 lands
+      export NVCC_FLAGS="-gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_50,code=compute_50"
+      ;;
+    cu101)
+      if [[ "$OSTYPE" == "msys" ]]; then
+        export CUDA_HOME="C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v10.1"
+      else
+        export CUDA_HOME=/usr/local/cuda-10.1/
+      fi
+      export FORCE_CUDA=1
+      # Hard-coding gencode flags is temporary situation until
+      # https://github.com/pytorch/pytorch/pull/23408 lands
+      export NVCC_FLAGS="-gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_50,code=compute_50"
+      ;;
+    cu100)
+      if [[ "$OSTYPE" == "msys" ]]; then
+        export CUDA_HOME="C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v10.0"
+      else
+        export CUDA_HOME=/usr/local/cuda-10.0/
+      fi
+      export FORCE_CUDA=1
+      # Hard-coding gencode flags is temporary situation until
+      # https://github.com/pytorch/pytorch/pull/23408 lands
+      export NVCC_FLAGS="-gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_50,code=compute_50"
+      ;;
+    cu92)
+      if [[ "$OSTYPE" == "msys" ]]; then
+        export CUDA_HOME="C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v9.2"
+      else
+        export CUDA_HOME=/usr/local/cuda-9.2/
+      fi
+      export FORCE_CUDA=1
+      export NVCC_FLAGS="-gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_50,code=compute_50"
+      ;;
+    cpu)
+      ;;
+    *)
+      echo "Unrecognized CU_VERSION=$CU_VERSION"
+      exit 1
+      ;;
+  esac
+}
+
+# Populate build version if necessary, and add version suffix
+#
+# Inputs:
+#   BUILD_VERSION (e.g., 0.2.0 or empty)
+#   VERSION_SUFFIX (e.g., +cpu)
+#
+# Outputs:
+#   BUILD_VERSION (e.g., 0.2.0.dev20190807+cpu)
+#
+# Fill BUILD_VERSION if it doesn't exist already with a nightly string
+# Usage: setup_build_version 0.2.0
+setup_build_version() {
+  if [[ -z "$BUILD_VERSION" ]]; then
+    export BUILD_VERSION="$1.dev$(date "+%Y%m%d")$VERSION_SUFFIX"
+  else
+    export BUILD_VERSION="$BUILD_VERSION$VERSION_SUFFIX"
+  fi
+}
+
+# Set some useful variables for OS X, if applicable
+setup_macos() {
+  if [[ "$(uname)" == Darwin ]]; then
+    export MACOSX_DEPLOYMENT_TARGET=10.9 CC=clang CXX=clang++
+  fi
+}
+
+# Top-level entry point for things every package will need to do
+#
+# Usage: setup_env 0.2.0
+setup_env() {
+  setup_cuda
+  setup_build_version "$1"
+  setup_macos
+}
+
+# Function to retry functions that sometimes timeout or have flaky failures
+retry () {
+    $*  || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*)
+}
+
+# Inputs:
+#   PYTHON_VERSION (2.7, 3.5, 3.6, 3.7)
+#   UNICODE_ABI (bool)
+#
+# Outputs:
+#   PATH modified to put correct Python version in PATH
+#
+# Precondition: If Linux, you are in a soumith/manylinux-cuda* Docker image
+setup_wheel_python() {
+  if [[ "$(uname)" == Darwin ]]; then
+    eval "$(conda shell.bash hook)"
+    conda env remove -n "env$PYTHON_VERSION" || true
+    conda create -yn "env$PYTHON_VERSION" python="$PYTHON_VERSION"
+    conda activate "env$PYTHON_VERSION"
+  else
+    case "$PYTHON_VERSION" in
+      2.7)
+        if [[ -n "$UNICODE_ABI" ]]; then
+          python_abi=cp27-cp27mu
+        else
+          python_abi=cp27-cp27m
+        fi
+        ;;
+      3.5) python_abi=cp35-cp35m ;;
+      3.6) python_abi=cp36-cp36m ;;
+      3.7) python_abi=cp37-cp37m ;;
+      3.8) python_abi=cp38-cp38 ;;
+      *)
+        echo "Unrecognized PYTHON_VERSION=$PYTHON_VERSION"
+        exit 1
+        ;;
+    esac
+    export PATH="/opt/python/$python_abi/bin:$PATH"
+  fi
+}
+
+# Install with pip a bit more robustly than the default
+pip_install() {
+  retry pip install --progress-bar off "$@"
+}
+
+# Install torch with pip, respecting PYTORCH_VERSION, and record the installed
+# version into PYTORCH_VERSION, if applicable
+setup_pip_pytorch_version() {
+  if [[ -z "$PYTORCH_VERSION" ]]; then
+    # Install latest prerelease version of torch, per our nightlies, consistent
+    # with the requested cuda version
+    pip_install --pre torch -f "https://download.pytorch.org/whl/nightly/${WHEEL_DIR}torch_nightly.html"
+    if [[ "$CUDA_VERSION" == "cpu" ]]; then
+      # CUDA and CPU are ABI compatible on the CPU-only parts, so strip
+      # in this case
+      export PYTORCH_VERSION="$(pip show torch | grep ^Version: | sed 's/Version:  *//' | sed 's/+.\+//')"
+    else
+      export PYTORCH_VERSION="$(pip show torch | grep ^Version: | sed 's/Version:  *//')"
+    fi
+  else
+    pip_install "torch==$PYTORCH_VERSION$CUDA_SUFFIX" \
+      -f https://download.pytorch.org/whl/torch_stable.html \
+      -f https://download.pytorch.org/whl/nightly/torch_nightly.html
+  fi
+}
+
+# Fill PYTORCH_VERSION with the latest conda nightly version, and
+# CONDA_CHANNEL_FLAGS with appropriate flags to retrieve these versions
+#
+# You MUST have populated CUDA_SUFFIX before hand.
+setup_conda_pytorch_constraint() {
+  if [[ -z "$PYTORCH_VERSION" ]]; then
+    export CONDA_CHANNEL_FLAGS="-c pytorch-nightly"
+    export PYTORCH_VERSION="$(conda search --json 'pytorch[channel=pytorch-nightly]' | \
+                              python -c "import os, sys, json, re; cuver = os.environ.get('CU_VERSION'); \
+                               cuver_1 = cuver.replace('cu', 'cuda') if cuver != 'cpu' else cuver; \
+                               cuver_2 = (cuver[:-1] + '.' + cuver[-1]).replace('cu', 'cuda') if cuver != 'cpu' else cuver; \
+                               print(re.sub(r'\\+.*$', '', \
+                                [x['version'] for x in json.load(sys.stdin)['pytorch'] \
+                                  if (x['platform'] == 'darwin' or cuver_1 in x['fn'] or cuver_2 in x['fn']) \
+                                    and 'py' + os.environ['PYTHON_VERSION'] in x['fn']][-1]))")"
+    if [[ -z "$PYTORCH_VERSION" ]]; then
+      echo "PyTorch version auto detection failed"
+      echo "No package found for CU_VERSION=$CU_VERSION and PYTHON_VERSION=$PYTHON_VERSION"
+      exit 1
+    fi
+  else
+    export CONDA_CHANNEL_FLAGS="-c pytorch"
+  fi
+  if [[ "$CU_VERSION" == cpu ]]; then
+    export CONDA_PYTORCH_BUILD_CONSTRAINT="- pytorch==$PYTORCH_VERSION${PYTORCH_VERSION_SUFFIX}"
+    export CONDA_PYTORCH_CONSTRAINT="- pytorch==$PYTORCH_VERSION"
+  else
+    export CONDA_PYTORCH_BUILD_CONSTRAINT="- pytorch==${PYTORCH_VERSION}${PYTORCH_VERSION_SUFFIX}"
+    export CONDA_PYTORCH_CONSTRAINT="- pytorch==${PYTORCH_VERSION}${PYTORCH_VERSION_SUFFIX}"
+  fi
+  export PYTORCH_VERSION_NODOT=${PYTORCH_VERSION//./}
+}
+
+# Translate CUDA_VERSION into CUDA_CUDATOOLKIT_CONSTRAINT
+setup_conda_cudatoolkit_constraint() {
+  export CONDA_CPUONLY_FEATURE=""
+  export CONDA_CUB_CONSTRAINT=""
+  if [[ "$(uname)" == Darwin ]]; then
+    export CONDA_CUDATOOLKIT_CONSTRAINT=""
+  else
+    case "$CU_VERSION" in
+      cu116)
+        export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=11.6,<11.7 # [not osx]"
+        ;;
+      cu115)
+        export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=11.5,<11.6 # [not osx]"
+        ;;
+      cu113)
+        export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=11.3,<11.4 # [not osx]"
+        ;;
+      cu112)
+        export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=11.2,<11.3 # [not osx]"
+        ;;
+      cu111)
+        export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=11.1,<11.2 # [not osx]"
+        ;;
+      cu110)
+        export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=11.0,<11.1 # [not osx]"
+        # Even though cudatoolkit 11.0 provides CUB we need our own, to control the
+        # version, because the built-in 1.9.9 in the cudatoolkit causes problems.
+        export CONDA_CUB_CONSTRAINT="- nvidiacub"
+        ;;
+      cu102)
+        export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=10.2,<10.3 # [not osx]"
+        export CONDA_CUB_CONSTRAINT="- nvidiacub"
+        ;;
+      cu101)
+        export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=10.1,<10.2 # [not osx]"
+        export CONDA_CUB_CONSTRAINT="- nvidiacub"
+        ;;
+      cu100)
+        export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=10.0,<10.1 # [not osx]"
+        export CONDA_CUB_CONSTRAINT="- nvidiacub"
+        ;;
+      cu92)
+        export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=9.2,<9.3 # [not osx]"
+        export CONDA_CUB_CONSTRAINT="- nvidiacub"
+        ;;
+      cpu)
+        export CONDA_CUDATOOLKIT_CONSTRAINT=""
+        export CONDA_CPUONLY_FEATURE="- cpuonly"
+        ;;
+      *)
+        echo "Unrecognized CU_VERSION=$CU_VERSION"
+        exit 1
+        ;;
+    esac
+  fi
+}
+
+# Build the proper compiler package before building the final package
+setup_visual_studio_constraint() {
+  if [[ "$OSTYPE" == "msys" ]]; then
+      export VSTOOLCHAIN_PACKAGE=vs2019
+      export VSDEVCMD_ARGS=''
+      # shellcheck disable=SC2086
+      conda build $CONDA_CHANNEL_FLAGS --no-anaconda-upload packaging/$VSTOOLCHAIN_PACKAGE
+      cp packaging/$VSTOOLCHAIN_PACKAGE/conda_build_config.yaml packaging/pytorch3d/conda_build_config.yaml
+  fi
+}
+
+download_nvidiacub_if_needed() {
+  case "$CU_VERSION" in
+    cu110|cu102|cu101|cu100|cu92)
+      echo "Downloading cub"
+      wget --no-verbose https://github.com/NVIDIA/cub/archive/1.10.0.tar.gz
+      tar xzf 1.10.0.tar.gz
+      CUB_HOME=$(realpath ./cub-1.10.0)
+      export CUB_HOME
+      echo "CUB_HOME is now $CUB_HOME"
+      ;;
+  esac
+  # We don't need CUB for a cpu build or if cuda is 11.1 or higher
+}
diff --git a/pytorch3d/packaging/pytorch3d/meta.yaml b/pytorch3d/packaging/pytorch3d/meta.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8604127d4104be38fa5d4153f15ee05e6bf6bb91
--- /dev/null
+++ b/pytorch3d/packaging/pytorch3d/meta.yaml
@@ -0,0 +1,59 @@
+package:
+  name: pytorch3d
+  version: "{{ environ.get('BUILD_VERSION') }}"
+
+source:
+ path: "{{ environ.get('SOURCE_ROOT_DIR') }}"
+
+requirements:
+  build:
+    - {{ compiler('c') }} # [win]
+    {{ environ.get('CONDA_CUB_CONSTRAINT') }}
+
+  host:
+    - python
+    - setuptools
+    {{ environ.get('CONDA_PYTORCH_BUILD_CONSTRAINT') }}
+    {{ environ.get('CONDA_CUDATOOLKIT_CONSTRAINT') }}
+    {{ environ.get('CONDA_CPUONLY_FEATURE') }}
+
+  run:
+    - python
+    - numpy >=1.11
+    - torchvision >=0.5
+    - fvcore
+    - iopath
+    {{ environ.get('CONDA_PYTORCH_CONSTRAINT') }}
+    {{ environ.get('CONDA_CUDATOOLKIT_CONSTRAINT') }}
+
+build:
+  string: py{{py}}_{{ environ['CU_VERSION'] }}_pyt{{ environ['PYTORCH_VERSION_NODOT']}}
+  script: python setup.py install --single-version-externally-managed --record=record.txt # [not win]
+  script_env:
+    - CUDA_HOME
+    - FORCE_CUDA
+    - NVCC_FLAGS
+    - MAX_JOBS
+  features:
+    {{ environ.get('CONDA_CPUONLY_FEATURE') }}
+
+test:
+  imports:
+    - pytorch3d
+  source_files:
+    - tests
+    - docs
+  requires:
+    - imageio
+    - hydra-core
+    - accelerate
+  commands:
+    #pytest .
+    python -m unittest discover -v -s tests -t .
+
+
+about:
+  home: https://github.com/facebookresearch/pytorch3d
+  license: BSD
+  license_file: LICENSE
+  summary: '3d Geometry for pytorch'
diff --git a/pytorch3d/packaging/vs2017/activate.bat b/pytorch3d/packaging/vs2017/activate.bat
new file mode 100644
index 0000000000000000000000000000000000000000..55928c1e141f12753275a8c7f1768fdc0432780a
--- /dev/null
+++ b/pytorch3d/packaging/vs2017/activate.bat
@@ -0,0 +1,50 @@
+@REM Copyright (c) Meta Platforms, Inc. and affiliates.
+@REM All rights reserved.
+@REM
+@REM This source code is licensed under the BSD-style license found in the
+@REM LICENSE file in the root directory of this source tree.
+
+:: Set env vars that tell distutils to use the compiler that we put on path
+SET DISTUTILS_USE_SDK=1
+SET MSSdk=1
+
+SET "VS_VERSION=15.0"
+SET "VS_MAJOR=15"
+SET "VS_YEAR=2017"
+
+set "MSYS2_ARG_CONV_EXCL=/AI;/AL;/OUT;/out"
+set "MSYS2_ENV_CONV_EXCL=CL"
+
+:: For Python 3.5+, ensure that we link with the dynamic runtime.  See
+:: http://stevedower.id.au/blog/building-for-python-3-5-part-two/ for more info
+set "PY_VCRUNTIME_REDIST=%PREFIX%\\bin\\vcruntime140.dll"
+
+for /f "usebackq tokens=*" %%i in (`"%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" -legacy -products * -version [15^,16^) -property installationPath`) do (
+    if exist "%%i" if exist "%%i\VC\Auxiliary\Build\vcvarsall.bat" (
+        set "VSINSTALLDIR=%%i\"
+        goto :vswhere
+    )
+)
+
+:vswhere
+
+:: Shorten PATH to avoid the `input line too long` error.
+SET MyPath=%PATH%
+
+setlocal EnableDelayedExpansion
+
+SET TempPath="%MyPath:;=";"%"
+SET var=
+FOR %%a IN (%TempPath%) DO (
+    IF EXIST %%~sa (
+        SET "var=!var!;%%~sa"
+    )
+)
+
+set "TempPath=!var:~1!"
+endlocal & set "PATH=%TempPath%"
+
+:: Shorten current directory too
+FOR %%A IN (.) DO CD "%%~sA"
+
+:: other things added by install_activate.bat at package build time
diff --git a/pytorch3d/packaging/vs2017/conda_build_config.yaml b/pytorch3d/packaging/vs2017/conda_build_config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5188bb0ebecf72aefb1c2e779458998216e4d479
--- /dev/null
+++ b/pytorch3d/packaging/vs2017/conda_build_config.yaml
@@ -0,0 +1,24 @@
+blas_impl:
+  - mkl                        # [x86_64]
+c_compiler:
+  - vs2017                     # [win]
+cxx_compiler:
+  - vs2017                     # [win]
+python:
+  - 3.5
+  - 3.6
+# This differs from target_platform in that it determines what subdir the compiler
+#    will target, not what subdir the compiler package will be itself.
+#    For example, we need a win-64 vs2008_win-32 package, so that we compile win-32
+#    code on win-64 miniconda.
+cross_compiler_target_platform:
+  - win-64                     # [win]
+target_platform:
+  - win-64                     # [win]
+vc:
+  - 14
+zip_keys:
+  -                             # [win]
+    - vc                        # [win]
+    - c_compiler                # [win]
+    - cxx_compiler              # [win]
diff --git a/pytorch3d/packaging/vs2017/install_activate.bat b/pytorch3d/packaging/vs2017/install_activate.bat
new file mode 100644
index 0000000000000000000000000000000000000000..7d4e4cc31bac3cf0b168706b37c1faed703f6e57
--- /dev/null
+++ b/pytorch3d/packaging/vs2017/install_activate.bat
@@ -0,0 +1,35 @@
+@REM Copyright (c) Meta Platforms, Inc. and affiliates.
+@REM All rights reserved.
+@REM
+@REM This source code is licensed under the BSD-style license found in the
+@REM LICENSE file in the root directory of this source tree.
+
+set YEAR=2017
+set VER=15
+
+mkdir "%PREFIX%\etc\conda\activate.d"
+COPY "%RECIPE_DIR%\activate.bat" "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
+
+IF "%cross_compiler_target_platform%" == "win-64" (
+  set "target_platform=amd64"
+  echo SET "CMAKE_GENERATOR=Visual Studio %VER% %YEAR% Win64" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
+  echo pushd "%%VSINSTALLDIR%%" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
+  IF "%VSDEVCMD_ARGS%" == "" (
+    echo CALL "VC\Auxiliary\Build\vcvarsall.bat" x64 >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
+    echo popd >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
+    echo pushd "%%VSINSTALLDIR%%" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
+    echo CALL "VC\Auxiliary\Build\vcvarsall.bat" x86_amd64 >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
+  ) ELSE (
+    echo CALL "VC\Auxiliary\Build\vcvarsall.bat" x64 %VSDEVCMD_ARGS% >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
+    echo popd >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
+    echo pushd "%%VSINSTALLDIR%%" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
+    echo CALL "VC\Auxiliary\Build\vcvarsall.bat" x86_amd64 %VSDEVCMD_ARGS% >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
+  )
+  echo popd >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
+  ) else (
+  set "target_platform=x86"
+  echo SET "CMAKE_GENERATOR=Visual Studio %VER% %YEAR%" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
+  echo pushd "%%VSINSTALLDIR%%" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
+  echo CALL "VC\Auxiliary\Build\vcvars32.bat" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
+  echo popd
+  )
diff --git a/pytorch3d/packaging/vs2017/install_runtime.bat b/pytorch3d/packaging/vs2017/install_runtime.bat
new file mode 100644
index 0000000000000000000000000000000000000000..9e7806657fb926fabdc501754cf26e1862678f70
--- /dev/null
+++ b/pytorch3d/packaging/vs2017/install_runtime.bat
@@ -0,0 +1,55 @@
+@REM Copyright (c) Meta Platforms, Inc. and affiliates.
+@REM All rights reserved.
+@REM
+@REM This source code is licensed under the BSD-style license found in the
+@REM LICENSE file in the root directory of this source tree.
+
+set VC_PATH=x86
+if "%ARCH%"=="64" (
+   set VC_PATH=x64
+)
+
+set MSC_VER=2017
+
+rem :: This should always be present for VC installed with VS.  Not sure about VC installed with Visual C++ Build Tools 2015
+rem FOR /F "usebackq tokens=3*" %%A IN (`REG QUERY "HKEY_LOCAL_MACHINE\Software\Microsoft\DevDiv\VC\Servicing\14.0\IDE.x64" /v UpdateVersion`) DO (
+rem     set SP=%%A
+rem     )
+
+rem if not "%SP%" == "%PKG_VERSION%" (
+rem    echo "Version detected from registry: %SP%"
+rem    echo    "does not match version of package being built (%PKG_VERSION%)"
+rem    echo "Do you have current updates for VS 2015 installed?"
+rem    exit 1
+rem )
+
+
+REM ========== REQUIRES Win 10 SDK be installed, or files otherwise copied to location below!
+robocopy "C:\Program Files (x86)\Windows Kits\10\Redist\ucrt\DLLs\%VC_PATH%"  "%LIBRARY_BIN%" *.dll /E
+robocopy "C:\Program Files (x86)\Windows Kits\10\Redist\ucrt\DLLs\%VC_PATH%"  "%PREFIX%" *.dll /E
+if %ERRORLEVEL% GEQ 8 exit 1
+
+REM ========== This one comes from visual studio 2017
+set "VC_VER=141"
+
+for /f "usebackq tokens=*" %%i in (`"%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" -legacy -products * -version [15^,16^) -property installationPath`) do (
+    if exist "%%i" if exist "%%i\VC\Auxiliary\Build\vcvarsall.bat" (
+        set "VS15VCVARSALL=%%i\VC\Auxiliary\Build\vcvarsall.bat"
+        goto :eof
+    )
+)
+
+@setlocal
+call "%VS15VARSALL%" x64
+
+set "REDIST_ROOT=%VCToolsRedistDir%%VC_PATH%"
+
+robocopy "%REDIST_ROOT%\Microsoft.VC%VC_VER%.CRT" "%LIBRARY_BIN%" *.dll /E
+if %ERRORLEVEL% LSS 8 exit 0
+robocopy "%REDIST_ROOT%\Microsoft.VC%VC_VER%.CRT" "%PREFIX%" *.dll /E
+if %ERRORLEVEL% LSS 8 exit 0
+robocopy "%REDIST_ROOT%\Microsoft.VC%VC_VER%.OpenMP" "%LIBRARY_BIN%" *.dll /E
+if %ERRORLEVEL% LSS 8 exit 0
+robocopy "%REDIST_ROOT%\Microsoft.VC%VC_VER%.OpenMP" "%PREFIX%" *.dll /E
+if %ERRORLEVEL% LSS 8 exit 0
+@endlocal
diff --git a/pytorch3d/packaging/vs2017/meta.yaml b/pytorch3d/packaging/vs2017/meta.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..34f4860ba850120f59f3f499e21e2a4b429e03cc
--- /dev/null
+++ b/pytorch3d/packaging/vs2017/meta.yaml
@@ -0,0 +1,45 @@
+{% set vcver="14.1" %}
+{% set vcfeature="14" %}
+{% set vsyear="2017" %}
+{% set fullver="15.4.27004.2010" %}
+
+package:
+  name: vs{{ vsyear }}
+  version: {{ fullver }}
+
+build:
+  skip: True  [not win]
+  script_env:
+    - VSDEVCMD_ARGS # [win]
+
+outputs:
+  - name: vs{{ vsyear }}_{{ cross_compiler_target_platform }}
+    script: install_activate.bat
+    track_features:
+      # VS 2017 is binary-compatible with VS 2015/vc14.  Tools are "v141".
+      strong:
+        - vc{{ vcfeature }}
+    run_exports:
+      - vc {{ vcver }}
+    about:
+      summary: Activation and version verification of MSVC {{ vcver }} (VS {{ vsyear }}) compiler
+      license: BSD 3-clause
+  - name: vs{{ vsyear }}_runtime
+    script: install_runtime.bat
+  - name: vc
+    version: {{ vcver }}
+    track_features:
+      - vc{{ vcfeature }}
+    requirements:
+      run:
+        - {{ pin_subpackage('vs' ~ vsyear ~ '_runtime') }}
+    about:
+      home: https://github.com/conda/conda/wiki/VC-features
+      license: Modified BSD License (3-clause)
+      license_family: BSD
+      summary: A meta-package to track VC features.
+      description: |
+          This metapackage is used to activate vc features without
+          depending on Python.
+      doc_url: https://github.com/conda/conda/wiki/VC-features
+      dev_url: https://github.com/conda/conda/wiki/VC-features
diff --git a/pytorch3d/packaging/vs2019/activate.bat b/pytorch3d/packaging/vs2019/activate.bat
new file mode 100644
index 0000000000000000000000000000000000000000..fd4f5706e339bb50442c6c3d63d484bf7628db72
--- /dev/null
+++ b/pytorch3d/packaging/vs2019/activate.bat
@@ -0,0 +1,50 @@
+@REM Copyright (c) Meta Platforms, Inc. and affiliates.
+@REM All rights reserved.
+@REM
+@REM This source code is licensed under the BSD-style license found in the
+@REM LICENSE file in the root directory of this source tree.
+
+:: Set env vars that tell distutils to use the compiler that we put on path
+SET DISTUTILS_USE_SDK=1
+SET MSSdk=1
+
+SET "VS_VERSION=16.0"
+SET "VS_MAJOR=16"
+SET "VS_YEAR=2019"
+
+set "MSYS2_ARG_CONV_EXCL=/AI;/AL;/OUT;/out"
+set "MSYS2_ENV_CONV_EXCL=CL"
+
+:: For Python 3.5+, ensure that we link with the dynamic runtime.  See
+:: http://stevedower.id.au/blog/building-for-python-3-5-part-two/ for more info
+set "PY_VCRUNTIME_REDIST=%PREFIX%\\bin\\vcruntime140.dll"
+
+for /f "usebackq tokens=*" %%i in (`"%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" -legacy -products * -version [16^,17^) -property installationPath`) do (
+    if exist "%%i" if exist "%%i\VC\Auxiliary\Build\vcvarsall.bat" (
+        set "VSINSTALLDIR=%%i\"
+        goto :vswhere
+    )
+)
+
+:vswhere
+
+:: Shorten PATH to avoid the `input line too long` error.
+SET MyPath=%PATH%
+
+setlocal EnableDelayedExpansion
+
+SET TempPath="%MyPath:;=";"%"
+SET var=
+FOR %%a IN (%TempPath%) DO (
+    IF EXIST %%~sa (
+        SET "var=!var!;%%~sa"
+    )
+)
+
+set "TempPath=!var:~1!"
+endlocal & set "PATH=%TempPath%"
+
+:: Shorten current directory too
+FOR %%A IN (.) DO CD "%%~sA"
+
+:: other things added by install_activate.bat at package build time
diff --git a/pytorch3d/packaging/vs2019/conda_build_config.yaml b/pytorch3d/packaging/vs2019/conda_build_config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..358052ec012940bb56778d167bcd69302d255846
--- /dev/null
+++ b/pytorch3d/packaging/vs2019/conda_build_config.yaml
@@ -0,0 +1,24 @@
+blas_impl:
+  - mkl                        # [x86_64]
+c_compiler:
+  - vs2019                     # [win]
+cxx_compiler:
+  - vs2019                     # [win]
+python:
+  - 3.5
+  - 3.6
+# This differs from target_platform in that it determines what subdir the compiler
+#    will target, not what subdir the compiler package will be itself.
+#    For example, we need a win-64 vs2008_win-32 package, so that we compile win-32
+#    code on win-64 miniconda.
+cross_compiler_target_platform:
+  - win-64                     # [win]
+target_platform:
+  - win-64                     # [win]
+vc:
+  - 14
+zip_keys:
+  -                             # [win]
+    - vc                        # [win]
+    - c_compiler                # [win]
+    - cxx_compiler              # [win]
diff --git a/pytorch3d/packaging/vs2019/install_activate.bat b/pytorch3d/packaging/vs2019/install_activate.bat
new file mode 100644
index 0000000000000000000000000000000000000000..ee7ccdc679777691de2ac9c883e4a01118a29ec3
--- /dev/null
+++ b/pytorch3d/packaging/vs2019/install_activate.bat
@@ -0,0 +1,35 @@
+@REM Copyright (c) Meta Platforms, Inc. and affiliates.
+@REM All rights reserved.
+@REM
+@REM This source code is licensed under the BSD-style license found in the
+@REM LICENSE file in the root directory of this source tree.
+
+set YEAR=2019
+set VER=16
+
+mkdir "%PREFIX%\etc\conda\activate.d"
+COPY "%RECIPE_DIR%\activate.bat" "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
+
+IF "%cross_compiler_target_platform%" == "win-64" (
+  set "target_platform=amd64"
+  echo SET "CMAKE_GENERATOR=Visual Studio %VER% %YEAR% Win64" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
+  echo pushd "%%VSINSTALLDIR%%" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
+  IF "%VSDEVCMD_ARGS%" == "" (
+    echo CALL "VC\Auxiliary\Build\vcvarsall.bat" x64 >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
+    echo popd >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
+    echo pushd "%%VSINSTALLDIR%%" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
+    echo CALL "VC\Auxiliary\Build\vcvarsall.bat" x86_amd64 >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
+  ) ELSE (
+    echo CALL "VC\Auxiliary\Build\vcvarsall.bat" x64 %VSDEVCMD_ARGS% >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
+    echo popd >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
+    echo pushd "%%VSINSTALLDIR%%" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
+    echo CALL "VC\Auxiliary\Build\vcvarsall.bat" x86_amd64 %VSDEVCMD_ARGS% >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
+  )
+  echo popd >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
+  ) else (
+  set "target_platform=x86"
+  echo SET "CMAKE_GENERATOR=Visual Studio %VER% %YEAR%" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
+  echo pushd "%%VSINSTALLDIR%%" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
+  echo CALL "VC\Auxiliary\Build\vcvars32.bat" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
+  echo popd
+  )
diff --git a/pytorch3d/packaging/vs2019/install_runtime.bat b/pytorch3d/packaging/vs2019/install_runtime.bat
new file mode 100644
index 0000000000000000000000000000000000000000..1c842cfe350db46f09c9ac79dcd3bc22965d3315
--- /dev/null
+++ b/pytorch3d/packaging/vs2019/install_runtime.bat
@@ -0,0 +1,55 @@
+@REM Copyright (c) Meta Platforms, Inc. and affiliates.
+@REM All rights reserved.
+@REM
+@REM This source code is licensed under the BSD-style license found in the
+@REM LICENSE file in the root directory of this source tree.
+
+set VC_PATH=x86
+if "%ARCH%"=="64" (
+   set VC_PATH=x64
+)
+
+set MSC_VER=2019
+
+rem :: This should always be present for VC installed with VS.  Not sure about VC installed with Visual C++ Build Tools 2015
+rem FOR /F "usebackq tokens=3*" %%A IN (`REG QUERY "HKEY_LOCAL_MACHINE\Software\Microsoft\DevDiv\VC\Servicing\14.0\IDE.x64" /v UpdateVersion`) DO (
+rem     set SP=%%A
+rem     )
+
+rem if not "%SP%" == "%PKG_VERSION%" (
+rem    echo "Version detected from registry: %SP%"
+rem    echo    "does not match version of package being built (%PKG_VERSION%)"
+rem    echo "Do you have current updates for VS 2015 installed?"
+rem    exit 1
+rem )
+
+
+REM ========== REQUIRES Win 10 SDK be installed, or files otherwise copied to location below!
+robocopy "C:\Program Files (x86)\Windows Kits\10\Redist\ucrt\DLLs\%VC_PATH%"  "%LIBRARY_BIN%" *.dll /E
+robocopy "C:\Program Files (x86)\Windows Kits\10\Redist\ucrt\DLLs\%VC_PATH%"  "%PREFIX%" *.dll /E
+if %ERRORLEVEL% GEQ 8 exit 1
+
+REM ========== This one comes from visual studio 2019
+set "VC_VER=142"
+
+for /f "usebackq tokens=*" %%i in (`"%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" -legacy -products * -version [16^,17^) -property installationPath`) do (
+    if exist "%%i" if exist "%%i\VC\Auxiliary\Build\vcvarsall.bat" (
+        set "VS15VCVARSALL=%%i\VC\Auxiliary\Build\vcvarsall.bat"
+        goto :eof
+    )
+)
+
+@setlocal
+call "%VS15VARSALL%" x64
+
+set "REDIST_ROOT=%VCToolsRedistDir%%VC_PATH%"
+
+robocopy "%REDIST_ROOT%\Microsoft.VC%VC_VER%.CRT" "%LIBRARY_BIN%" *.dll /E
+if %ERRORLEVEL% LSS 8 exit 0
+robocopy "%REDIST_ROOT%\Microsoft.VC%VC_VER%.CRT" "%PREFIX%" *.dll /E
+if %ERRORLEVEL% LSS 8 exit 0
+robocopy "%REDIST_ROOT%\Microsoft.VC%VC_VER%.OpenMP" "%LIBRARY_BIN%" *.dll /E
+if %ERRORLEVEL% LSS 8 exit 0
+robocopy "%REDIST_ROOT%\Microsoft.VC%VC_VER%.OpenMP" "%PREFIX%" *.dll /E
+if %ERRORLEVEL% LSS 8 exit 0
+@endlocal
diff --git a/pytorch3d/packaging/vs2019/meta.yaml b/pytorch3d/packaging/vs2019/meta.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e3f8b4714818e1fe5754a30ceb2070ff000991fd
--- /dev/null
+++ b/pytorch3d/packaging/vs2019/meta.yaml
@@ -0,0 +1,45 @@
+{% set vcver="14.2" %}
+{% set vcfeature="14" %}
+{% set vsyear="2019" %}
+{% set fullver="15.4.27004.2010" %}
+
+package:
+  name: vs{{ vsyear }}
+  version: {{ fullver }}
+
+build:
+  skip: True  [not win]
+  script_env:
+    - VSDEVCMD_ARGS # [win]
+
+outputs:
+  - name: vs{{ vsyear }}_{{ cross_compiler_target_platform }}
+    script: install_activate.bat
+    track_features:
+      # VS 2019 is binary-compatible with VS 2017/vc 14.1 and 2015/vc14.  Tools are "v142".
+      strong:
+        - vc{{ vcfeature }}
+    run_exports:
+      - vc {{ vcver }}
+    about:
+      summary: Activation and version verification of MSVC {{ vcver }} (VS {{ vsyear }}) compiler
+      license: BSD 3-clause
+  - name: vs{{ vsyear }}_runtime
+    script: install_runtime.bat
+  - name: vc
+    version: {{ vcver }}
+    track_features:
+      - vc{{ vcfeature }}
+    requirements:
+      run:
+        - {{ pin_subpackage('vs' ~ vsyear ~ '_runtime') }}
+    about:
+      home: https://github.com/conda/conda/wiki/VC-features
+      license: Modified BSD License (3-clause)
+      license_family: BSD
+      summary: A meta-package to track VC features.
+      description: |
+          This metapackage is used to activate vc features without
+          depending on Python.
+      doc_url: https://github.com/conda/conda/wiki/VC-features
+      dev_url: https://github.com/conda/conda/wiki/VC-features
diff --git a/pytorch3d/projects/__init__.py b/pytorch3d/projects/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..2e41cd717f6a439a9c08d76a9d0e4a54e190fc5a
--- /dev/null
+++ b/pytorch3d/projects/__init__.py
@@ -0,0 +1,5 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
diff --git a/pytorch3d/projects/implicitron_trainer/README.md b/pytorch3d/projects/implicitron_trainer/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..f8f875eb00ef2bdfd6504173c5e62f0c2d06151d
--- /dev/null
+++ b/pytorch3d/projects/implicitron_trainer/README.md
@@ -0,0 +1,455 @@
+# Introduction
+
+Implicitron is a PyTorch3D-based framework for new-view synthesis via modeling the neural-network based representations.
+
+# License
+
+Implicitron is distributed as part of PyTorch3D under the [BSD license](https://github.com/facebookresearch/pytorch3d/blob/main/LICENSE).
+It includes code from the [NeRF](https://github.com/bmild/nerf), [SRN](http://github.com/vsitzmann/scene-representation-networks) and [IDR](http://github.com/lioryariv/idr) repos.
+See [LICENSE-3RD-PARTY](https://github.com/facebookresearch/pytorch3d/blob/main/LICENSE-3RD-PARTY) for their licenses.
+
+
+# Installation
+
+There are three ways to set up Implicitron, depending on the flexibility level required.
+If you only want to train or evaluate models as they are implemented changing only the parameters, you can just install the package.
+Implicitron also provides a flexible API that supports user-defined plug-ins;
+if you want to re-implement some of the components without changing the high-level pipeline, you need to create a custom launcher script.
+The most flexible option, though, is cloning PyTorch3D repo and building it from sources, which allows changing the code in arbitrary ways.
+Below, we descibe all three options in more details.
+
+
+## [Option 1] Running an executable from the package
+
+This option allows you to use the code as is without changing the implementations.
+Only configuration can be changed (see [Configuration system](#configuration-system)).
+
+For this setup, install the dependencies and PyTorch3D from conda following [the guide](https://github.com/facebookresearch/pytorch3d/blob/master/INSTALL.md#1-install-with-cuda-support-from-anaconda-cloud-on-linux-only). Then, install implicitron-specific dependencies:
+
+```shell
+pip install "hydra-core>=1.1" visdom lpips matplotlib accelerate
+```
+
+Runner executable is available as `pytorch3d_implicitron_runner` shell command.
+See [Running](#running) section below for examples of training and evaluation commands.
+
+
+## [Option 2] Supporting custom implementations
+
+To plug in custom implementations, for example, of renderer or implicit-function protocols, you need to create your own runner script and import the plug-in implementations there.
+First, install PyTorch3D and Implicitron dependencies as described in the previous section.
+Then, implement the custom script; copying `pytorch3d/projects/implicitron_trainer` is a good place to start.
+See [Custom plugins](#custom-plugins) for more information on how to import implementations and enable them in the configs.
+
+
+## [Option 3] Cloning PyTorch3D repo
+
+This is the most flexible way to set up Implicitron as it allows changing the code directly.
+It allows modifying the high-level rendering pipeline or implementing yet-unsupported loss functions.
+Please follow the instructions to [install PyTorch3D from a local clone](https://github.com/facebookresearch/pytorch3d/blob/main/INSTALL.md#2-install-from-a-local-clone).
+Then, install Implicitron-specific dependencies:
+
+```shell
+pip install "hydra-core>=1.1" visdom lpips matplotlib accelerate
+```
+
+You are still encouraged to implement custom plugins as above where possible as it makes reusing the code easier.
+The executable is located in `pytorch3d/projects/implicitron_trainer`.
+
+> **_NOTE:_**  Both `pytorch3d_implicitron_runner` and `pytorch3d_implicitron_visualizer`
+executables (mentioned below) are not available when using local clone.
+Instead users should use the python scripts `experiment.py` and `visualize_reconstruction.py` (see the [Running](Running) section below).
+
+
+# Running
+
+This section assumes that you use the executable provided by the installed package
+(Option 1 / Option 2 in [#Installation](Installation) above),
+i.e. `pytorch3d_implicitron_runner` and `pytorch3d_implicitron_visualizer` are available.
+
+> **_NOTE:_**  If the executables are not available (e.g. when using a local clone - Option 3 in [#Installation](Installation)),
+users should directly use the `experiment.py` and `visualize_reconstruction.py` python scripts
+which correspond to the executables as follows:
+- `pytorch3d_implicitron_runner` corresponds to `<pytorch3d_root>/projects/implicitron_trainer/experiment.py`
+- `pytorch3d_implicitron_visualizer` corresponds to `<pytorch3d_root>/projects/implicitron_trainer/visualize_reconstruction.py`
+
+For instance, in order to directly execute training with the python script, users can call:
+```shell
+cd <pytorch3d_root>/projects/
+python -m implicitron_trainer.experiment <args>`
+```
+
+If you have a custom `experiment.py` or `visualize_reconstruction.py` script
+(as in the Option 2 [above](#Installation)), replace the executable with the path to your script.
+
+## Training
+
+To run training, pass a yaml config file, followed by a list of overridden arguments.
+For example, to train NeRF on the first skateboard sequence from CO3D dataset, you can run:
+```shell
+dataset_args=data_source_ImplicitronDataSource_args.dataset_map_provider_JsonIndexDatasetMapProvider_args
+pytorch3d_implicitron_runner --config-path ./configs/ --config-name repro_singleseq_nerf \
+    $dataset_args.dataset_root=<DATASET_ROOT> $dataset_args.category='skateboard' \
+    $dataset_args.test_restrict_sequence_id=0 test_when_finished=True exp_dir=<CHECKPOINT_DIR>
+```
+
+Here, `--config-path` points to the config path relative to `pytorch3d_implicitron_runner` location;
+`--config-name` picks the config (in this case, `repro_singleseq_nerf.yaml`);
+`test_when_finished` will launch evaluation script once training is finished.
+Replace `<DATASET_ROOT>` with the location where the dataset in Implicitron format is stored
+and `<CHECKPOINT_DIR>` with a directory where checkpoints will be dumped during training.
+Other configuration parameters can be overridden in the same way.
+See [Configuration system](#configuration-system) section for more information on this.
+
+### Visdom logging
+
+Note that the training script logs its progress to Visdom. Make sure to start a visdom server before the training commences:
+```
+python -m visdom.server
+```
+> In case a Visdom server is not started, the console will get flooded with `requests.exceptions.ConnectionError` errors signalling that a Visdom server is not available. Note that these errors <b>will NOT interrupt</b> the program and the training will still continue without issues.
+
+## Evaluation
+
+To run evaluation on the latest checkpoint after (or during) training, simply add `eval_only=True` to your training command.
+
+E.g. for executing the evaluation on the NeRF skateboard sequence, you can run:
+```shell
+dataset_args=data_source_ImplicitronDataSource_args.dataset_map_provider_JsonIndexDatasetMapProvider_args
+pytorch3d_implicitron_runner --config-path ./configs/ --config-name repro_singleseq_nerf \
+    $dataset_args.dataset_root=<CO3D_DATASET_ROOT> $dataset_args.category='skateboard' \
+    $dataset_args.test_restrict_sequence_id=0 exp_dir=<CHECKPOINT_DIR> eval_only=True
+```
+Evaluation prints the metrics to `stdout` and dumps them to a json file in `exp_dir`.
+
+## Visualisation
+
+The script produces a video of renders by a trained model assuming a pre-defined camera trajectory.
+In order for it to work, `ffmpeg` needs to be installed:
+
+```shell
+conda install ffmpeg
+```
+
+Here is an example of calling the script:
+```shell
+pytorch3d_implicitron_visualizer exp_dir=<CHECKPOINT_DIR> \
+    visdom_show_preds=True n_eval_cameras=40 render_size="[64,64]" video_size="[256,256]"
+```
+
+The argument `n_eval_cameras` sets the number of renderring viewpoints sampled on a trajectory, which defaults to a circular fly-around;
+`render_size` sets the size of a render passed to the model, which can be resized to `video_size` before writing.
+
+Rendered videos of images, masks, and depth maps will be saved to `<CHECKPOINT_DIR>/video`.
+
+
+# Configuration system
+
+We use hydra and OmegaConf to parse the configs.
+The config schema and default values are defined by the dataclasses implementing the modules.
+More specifically, if a class derives from `Configurable`, its fields can be set in config yaml files or overridden in CLI.
+For example, `GenericModel` has a field `render_image_width` with the default value 400.
+If it is specified in the yaml config file or in CLI command, the new value will be used.
+
+Configurables can form hierarchies.
+For example, `GenericModel` has a field `raysampler: RaySampler`, which is also Configurable.
+In the config, inner parameters can be propagated using `_args` postfix, e.g. to change `raysampler.n_pts_per_ray_training` (the number of sampled points per ray), the node `raysampler_args.n_pts_per_ray_training` should be specified.
+
+### Top-level configuration class: `Experiment`
+
+<b>The root of the hierarchy is defined by `Experiment` Configurable in `<pytorch3d_root>/projects/implicitron_trainer/experiment.py`.</b>
+
+It has top-level fields like `seed`, which seeds the random number generator.
+Additionally, it has non-leaf nodes like `model_factory_ImplicitronModelFactory_args.model_GenericModel_args`, which dispatches the config parameters to `GenericModel`.
+Thus, changing the model parameters may be achieved in two ways: either by editing the config file, e.g.
+```yaml
+model_factory_ImplicitronModelFactory_args:
+    model_GenericModel_args:
+        render_image_width: 800
+        raysampler_args:
+            n_pts_per_ray_training: 128
+```
+
+or, equivalently, by adding the following to `pytorch3d_implicitron_runner` arguments:
+
+```shell
+model_args=model_factory_ImplicitronModelFactory_args.model_GenericModel_args
+$model_args.render_image_width=800 $model_args.raysampler_args.n_pts_per_ray_training=128
+```
+
+See the documentation in `pytorch3d/implicitron/tools/config.py` for more details.
+
+## Replaceable implementations
+
+Sometimes changing the model parameters does not provide enough flexibility, and you want to provide a new implementation for a building block.
+The configuration system also supports it!
+Abstract classes like `BaseRenderer` derive from `ReplaceableBase` instead of `Configurable`.
+This means that other Configurables can refer to them using the base type, while the specific implementation is chosen in the config using `_class_type`-postfixed node.
+In that case, `_args` node name has to include the implementation type.
+More specifically, to change renderer settings, the config will look like this:
+```yaml
+model_factory_ImplicitronModelFactory_args:
+    model_GenericModel_args:
+        renderer_class_type: LSTMRenderer
+        renderer_LSTMRenderer_args:
+            num_raymarch_steps: 10
+            hidden_size: 16
+```
+
+See the documentation in `pytorch3d/implicitron/tools/config.py` for more details on the configuration system.
+
+## Custom plugins
+
+If you have an idea for another implementation of a replaceable component, it can be plugged in without changing the core code.
+For that, you need to set up Implicitron through option 2 or 3 above.
+Let's say you want to implement a renderer that accumulates opacities similar to an X-ray machine.
+First, create a module `x_ray_renderer.py` with a class deriving from `BaseRenderer`:
+
+```python
+from pytorch3d.implicitron.tools.config import registry
+
+@registry.register
+class XRayRenderer(BaseRenderer, torch.nn.Module):
+    n_pts_per_ray: int = 64
+
+    def __post_init__(self):
+        # custom initialization
+
+    def forward(
+        self,
+        ray_bundle,
+        implicit_functions=[],
+        evaluation_mode: EvaluationMode = EvaluationMode.EVALUATION,
+        **kwargs,
+    ) -> RendererOutput:
+        ...
+```
+
+Please note `@registry.register` decorator that registers the plug-in as an implementation of `Renderer`.
+IMPORTANT: In order for it to run, the class (or its enclosing module) has to be imported in your launch script.
+Additionally, this has to be done before parsing the root configuration class `ExperimentConfig`.
+Simply add `import .x_ray_renderer` in the beginning of `experiment.py`.
+
+After that, you should be able to change the config with:
+```yaml
+model_factory_ImplicitronModelFactory_args:
+    model_GenericModel_args:
+        renderer_class_type: XRayRenderer
+        renderer_XRayRenderer_args:
+            n_pts_per_ray: 128
+```
+
+to replace the implementation and potentially override the parameters.
+
+# Code and config structure
+
+The main object for this trainer loop is `Experiment`. It has four top-level replaceable components.
+
+* `data_source`: This is a `DataSourceBase` which defaults to `ImplicitronDataSource`.
+It constructs the data sets and dataloaders.
+* `model_factory`: This is a `ModelFactoryBase` which defaults to `ImplicitronModelFactory`.
+It constructs the model, which is usually an instance of `OverfitModel` (for NeRF-style training with overfitting to one scene) or `GenericModel` (that is able to generalize to multiple scenes by NeRFormer-style conditioning on other scene views), and can load its weights from a checkpoint.
+* `optimizer_factory`: This is an `OptimizerFactoryBase` which defaults to `ImplicitronOptimizerFactory`.
+It constructs the optimizer and can load its weights from a checkpoint.
+* `training_loop`: This is a `TrainingLoopBase` which defaults to `ImplicitronTrainingLoop` and defines the main training loop.
+
+As per above, the config structure is parsed automatically from the module hierarchy.
+In particular, for ImplicitronModelFactory with generic model, model parameters are contained in the `model_factory_ImplicitronModelFactory_args.model_GenericModel_args` node, and dataset parameters in `data_source_ImplicitronDataSource_args` node.
+
+Here is the class structure of GenericModel (single-line edges show aggregation, while double lines show available implementations):
+```
+model_GenericModel_args: GenericModel
+└-- global_encoder_*_args: GlobalEncoderBase
+    ╘== SequenceAutodecoder
+        └-- autodecoder_args: Autodecoder
+    ╘== HarmonicTimeEncoder
+└-- raysampler_*_args: RaySampler
+    ╘== AdaptiveRaysampler
+    ╘== NearFarRaysampler
+└-- renderer_*_args: BaseRenderer
+    ╘== MultiPassEmissionAbsorptionRenderer
+    ╘== LSTMRenderer
+    ╘== SignedDistanceFunctionRenderer
+        └-- ray_tracer_args: RayTracing
+        └-- ray_normal_coloring_network_args: RayNormalColoringNetwork
+└-- implicit_function_*_args: ImplicitFunctionBase
+    ╘== NeuralRadianceFieldImplicitFunction
+    ╘== SRNImplicitFunction
+        └-- raymarch_function_args: SRNRaymarchFunction
+        └-- pixel_generator_args: SRNPixelGenerator
+    ╘== SRNHyperNetImplicitFunction
+        └-- hypernet_args: SRNRaymarchHyperNet
+        └-- pixel_generator_args: SRNPixelGenerator
+    ╘== IdrFeatureField
+└-- image_feature_extractor_*_args: FeatureExtractorBase
+    ╘== ResNetFeatureExtractor
+└-- view_pooler_args: ViewPooler
+    └-- view_sampler_args: ViewSampler
+    └-- feature_aggregator_*_args: FeatureAggregatorBase
+        ╘== IdentityFeatureAggregator
+        ╘== AngleWeightedIdentityFeatureAggregator
+        ╘== AngleWeightedReductionFeatureAggregator
+        ╘== ReductionFeatureAggregator
+```
+
+Here is the class structure of OverfitModel:
+
+```
+model_OverfitModel_args: OverfitModel
+└-- raysampler_*_args: RaySampler
+    ╘== AdaptiveRaysampler
+    ╘== NearFarRaysampler
+└-- renderer_*_args: BaseRenderer
+    ╘== MultiPassEmissionAbsorptionRenderer
+    ╘== LSTMRenderer
+    ╘== SignedDistanceFunctionRenderer
+        └-- ray_tracer_args: RayTracing
+        └-- ray_normal_coloring_network_args: RayNormalColoringNetwork
+└-- implicit_function_*_args: ImplicitFunctionBase
+    ╘== NeuralRadianceFieldImplicitFunction
+    ╘== SRNImplicitFunction
+        └-- raymarch_function_args: SRNRaymarchFunction
+        └-- pixel_generator_args: SRNPixelGenerator
+    ╘== SRNHyperNetImplicitFunction
+        └-- hypernet_args: SRNRaymarchHyperNet
+        └-- pixel_generator_args: SRNPixelGenerator
+    ╘== IdrFeatureField
+└-- coarse_implicit_function_*_args: ImplicitFunctionBase
+    ╘== NeuralRadianceFieldImplicitFunction
+    ╘== SRNImplicitFunction
+        └-- raymarch_function_args: SRNRaymarchFunction
+        └-- pixel_generator_args: SRNPixelGenerator
+    ╘== SRNHyperNetImplicitFunction
+        └-- hypernet_args: SRNRaymarchHyperNet
+        └-- pixel_generator_args: SRNPixelGenerator
+    ╘== IdrFeatureField
+```
+
+OverfitModel has been introduced to create a simple class to disantagle Nerfs which the overfit pattern
+from the GenericModel.
+
+
+Please look at the annotations of the respective classes or functions for the lists of hyperparameters.
+`tests/experiment.yaml` shows every possible option if you have no user-defined classes.
+
+
+# Implementations of existing methods
+
+We provide configuration files that implement several existing works.
+
+<b>The configuration files live in `pytorch3d/projects/implicitron_trainer/configs`.</b>
+
+
+## NeRF
+
+The following config file corresponds to training of a vanilla NeRF on Blender Synthetic dataset
+(see https://arxiv.org/abs/2003.08934 for details of the method):
+
+`./configs/repro_singleseq_nerf_blender.yaml`
+
+
+### Downloading Blender-Synthetic
+Training requires the Blender Synthetic dataset.
+To download the dataset, visit the [gdrive bucket](https://drive.google.com/file/d/18JxhpWD-4ZmuFKLzKlAw-w5PpzZxXOcG/view?usp=share_link)
+and click Download.
+Then unpack the downloaded .zip file to a folder which we call `<BLENDER_DATASET_ROOT_FOLDER>`.
+
+
+### Launching NeRF training
+In order to train NeRF on the "drums" scene, execute the following command:
+```shell
+export BLENDER_DATASET_ROOT="<BLENDER_DATASET_ROOT_FOLDER>" \
+export BLENDER_SINGLESEQ_CLASS="drums" \
+pytorch3d_implicitron_runner --config-path ./configs/ --config-name repro_singleseq_nerf_blender
+```
+
+Note that the training scene is selected by setting the environment variable `BLENDER_SINGLESEQ_CLASS`
+appropriately (one of `"chair"`, `"drums"`, `"ficus"`, `"hotdog"`, `"lego"`, `"materials"`, `"mic"`, `"ship"`).
+
+By default, the training outputs will be stored to `"./data/nerf_blender_repro/$BLENDER_SINGLESEQ_CLASS/"`
+
+
+### Visualizing trained NeRF
+```shell
+pytorch3d_implicitron_visualizer exp_dir=<CHECKPOINT_DIR> \
+    visdom_show_preds=True n_eval_cameras=40 render_size="[64,64]" video_size="[256,256]"
+```
+where `<CHECKPOINT_DIR>` corresponds to the directory with the training outputs (defaults to `"./data/nerf_blender_repro/$BLENDER_SINGLESEQ_CLASS/"`).
+
+The script will output a rendered video of the learned radiance field to `"./data/nerf_blender_repro/$BLENDER_SINGLESEQ_CLASS/"` (requires `ffmpeg`).
+
+> **_NOTE:_** Recall that, if `pytorch3d_implicitron_runner`/`pytorch3d_implicitron_visualizer` are not available, replace the calls
+with `cd <pytorch3d_root>/projects/; python -m implicitron_trainer.[experiment|visualize_reconstruction]`
+
+
+## CO3D experiments
+
+Common Objects in 3D (CO3D) is a large-scale dataset of videos of rigid objects grouped into 50 common categories.
+Implicitron provides implementations and config files to reproduce the results from [the paper](https://arxiv.org/abs/2109.00512).
+Please follow [the link](https://github.com/facebookresearch/co3d#automatic-batch-download) for the instructions to download the dataset.
+In training and evaluation scripts, use the download location as `<DATASET_ROOT>`.
+It is also possible to define environment variable `CO3D_DATASET_ROOT` instead of specifying it.
+To reproduce the experiments from the paper, use the following configs.
+
+For single-sequence experiments:
+
+| Method          |   config file                       |
+|-----------------|-------------------------------------|
+| NeRF            | repro_singleseq_nerf.yaml           |
+| NeRF + WCE      | repro_singleseq_nerf_wce.yaml       |
+| NerFormer       | repro_singleseq_nerformer.yaml      |
+| IDR             | repro_singleseq_idr.yaml            |
+| SRN             | repro_singleseq_srn_noharm.yaml     |
+| SRN + γ         | repro_singleseq_srn.yaml            |
+| SRN + WCE       | repro_singleseq_srn_wce_noharm.yaml |
+| SRN + WCE + γ   | repro_singleseq_srn_wce_noharm.yaml |
+
+For multi-sequence autodecoder experiments (without generalization to new sequences):
+
+| Method          |   config file                              |
+|-----------------|--------------------------------------------|
+| NeRF + AD       | repro_multiseq_nerf_ad.yaml                |
+| SRN + AD        | repro_multiseq_srn_ad_hypernet_noharm.yaml |
+| SRN + γ + AD    | repro_multiseq_srn_ad_hypernet.yaml        |
+
+For multi-sequence experiments (with generalization to new sequences):
+
+| Method          |   config file                        |
+|-----------------|--------------------------------------|
+| NeRF + WCE      | repro_multiseq_nerf_wce.yaml         |
+| NerFormer       | repro_multiseq_nerformer.yaml        |
+| SRN + WCE       | repro_multiseq_srn_wce_noharm.yaml   |
+| SRN + WCE + γ   | repro_multiseq_srn_wce.yaml          |
+
+
+## CO3Dv2 experiments
+
+The following config files implement training on the second version of CO3D, `CO3Dv2`.
+
+In order to launch trainings, set the `CO3DV2_DATASET_ROOT` environment variable
+to the root folder of the dataset (note that the name of the env. variable differs from the CO3Dv1 version).
+
+Single-sequence experiments:
+
+| Method          |   config file                         |
+|-----------------|-------------------------------------|
+| NeRF            | repro_singleseq_v2_nerf.yaml        |
+| NerFormer       | repro_singleseq_v2_nerformer.yaml   |
+| IDR             | repro_singleseq_v2_idr.yaml         |
+| SRN             | repro_singleseq_v2_srn_noharm.yaml  |
+
+Multi-sequence autodecoder experiments (without generalization to new sequences):
+
+| Method          |   config file                                |
+|-----------------|--------------------------------------------|
+| NeRF + AD       | repro_multiseq_v2_nerf_ad.yaml             |
+| SRN + γ + AD    | repro_multiseq_v2_srn_ad_hypernet.yaml     |
+
+Multi-sequence experiments (with generalization to new sequences):
+
+| Method          |   config file                            |
+|-----------------|----------------------------------------|
+| NeRF + WCE      | repro_multiseq_v2_nerf_wce.yaml        |
+| NerFormer       | repro_multiseq_v2_nerformer.yaml       |
+| SRN + WCE       | repro_multiseq_v2_srn_wce_noharm.yaml  |
+| SRN + WCE + γ   | repro_multiseq_v2_srn_wce.yaml         |
diff --git a/pytorch3d/projects/implicitron_trainer/__init__.py b/pytorch3d/projects/implicitron_trainer/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..2e41cd717f6a439a9c08d76a9d0e4a54e190fc5a
--- /dev/null
+++ b/pytorch3d/projects/implicitron_trainer/__init__.py
@@ -0,0 +1,5 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
diff --git a/pytorch3d/projects/implicitron_trainer/configs/overfit_base.yaml b/pytorch3d/projects/implicitron_trainer/configs/overfit_base.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d5cc0cccbad843c750a4653c3b72fbe90a57547e
--- /dev/null
+++ b/pytorch3d/projects/implicitron_trainer/configs/overfit_base.yaml
@@ -0,0 +1,79 @@
+defaults:
+- default_config
+- _self_
+exp_dir: ./data/exps/overfit_base/
+training_loop_ImplicitronTrainingLoop_args:
+  visdom_port: 8097
+  visualize_interval: 0
+  max_epochs: 1000
+data_source_ImplicitronDataSource_args:
+  data_loader_map_provider_class_type: SequenceDataLoaderMapProvider
+  dataset_map_provider_class_type: JsonIndexDatasetMapProvider
+  data_loader_map_provider_SequenceDataLoaderMapProvider_args:
+    dataset_length_train: 1000
+    dataset_length_val: 1
+    num_workers: 8
+  dataset_map_provider_JsonIndexDatasetMapProvider_args:
+    dataset_root: ${oc.env:CO3D_DATASET_ROOT}
+    n_frames_per_sequence: -1
+    test_on_train: true
+    test_restrict_sequence_id: 0
+    dataset_JsonIndexDataset_args:
+      load_point_clouds: false
+      mask_depths: false
+      mask_images: false
+model_factory_ImplicitronModelFactory_args:
+  model_class_type: "OverfitModel"
+  model_OverfitModel_args:
+    loss_weights:
+      loss_mask_bce: 1.0
+      loss_prev_stage_mask_bce: 1.0
+      loss_autodecoder_norm: 0.01
+      loss_rgb_mse: 1.0
+      loss_prev_stage_rgb_mse: 1.0
+    output_rasterized_mc: false
+    chunk_size_grid: 102400
+    render_image_height: 400
+    render_image_width: 400
+    share_implicit_function_across_passes: false
+    implicit_function_class_type: "NeuralRadianceFieldImplicitFunction"
+    implicit_function_NeuralRadianceFieldImplicitFunction_args:
+      n_harmonic_functions_xyz: 10
+      n_harmonic_functions_dir: 4
+      n_hidden_neurons_xyz: 256
+      n_hidden_neurons_dir: 128
+      n_layers_xyz: 8
+      append_xyz:
+      - 5
+    coarse_implicit_function_class_type: "NeuralRadianceFieldImplicitFunction"
+    coarse_implicit_function_NeuralRadianceFieldImplicitFunction_args:
+      n_harmonic_functions_xyz: 10
+      n_harmonic_functions_dir: 4
+      n_hidden_neurons_xyz: 256
+      n_hidden_neurons_dir: 128
+      n_layers_xyz: 8
+      append_xyz:
+      - 5
+    raysampler_AdaptiveRaySampler_args:
+      n_rays_per_image_sampled_from_mask: 1024
+      scene_extent: 8.0
+      n_pts_per_ray_training: 64
+      n_pts_per_ray_evaluation: 64
+      stratified_point_sampling_training: true
+      stratified_point_sampling_evaluation: false
+    renderer_MultiPassEmissionAbsorptionRenderer_args:
+      n_pts_per_ray_fine_training: 64
+      n_pts_per_ray_fine_evaluation: 64
+      append_coarse_samples_to_fine: true
+      density_noise_std_train: 1.0
+optimizer_factory_ImplicitronOptimizerFactory_args:
+  breed: Adam
+  weight_decay: 0.0
+  lr_policy: MultiStepLR
+  multistep_lr_milestones: []
+  lr: 0.0005
+  gamma: 0.1
+  momentum: 0.9
+  betas:
+  - 0.9
+  - 0.999
diff --git a/pytorch3d/projects/implicitron_trainer/configs/overfit_singleseq_base.yaml b/pytorch3d/projects/implicitron_trainer/configs/overfit_singleseq_base.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0349fd27a1ab25d7155f1d05c6258545acd6a5f7
--- /dev/null
+++ b/pytorch3d/projects/implicitron_trainer/configs/overfit_singleseq_base.yaml
@@ -0,0 +1,42 @@
+defaults:
+- overfit_base
+- _self_
+data_source_ImplicitronDataSource_args:
+  data_loader_map_provider_SequenceDataLoaderMapProvider_args:
+    batch_size: 1
+    dataset_length_train: 1000
+    dataset_length_val: 1
+    num_workers: 8
+  dataset_map_provider_JsonIndexDatasetMapProvider_args:
+    assert_single_seq: true
+    n_frames_per_sequence: -1
+    test_restrict_sequence_id: 0
+    test_on_train: false
+model_factory_ImplicitronModelFactory_args:
+  model_class_type: "OverfitModel"
+  model_OverfitModel_args:
+    render_image_height: 800
+    render_image_width: 800
+    log_vars:
+    - loss_rgb_psnr_fg
+    - loss_rgb_psnr
+    - loss_eikonal
+    - loss_prev_stage_rgb_psnr
+    - loss_mask_bce
+    - loss_prev_stage_mask_bce
+    - loss_rgb_mse
+    - loss_prev_stage_rgb_mse
+    - loss_depth_abs
+    - loss_depth_abs_fg
+    - loss_kl
+    - loss_mask_neg_iou
+    - objective
+    - epoch
+    - sec/it
+optimizer_factory_ImplicitronOptimizerFactory_args:
+  lr: 0.0005
+  multistep_lr_milestones:
+  - 200
+  - 300
+training_loop_ImplicitronTrainingLoop_args:
+  max_epochs: 400
diff --git a/pytorch3d/projects/implicitron_trainer/configs/overfit_singleseq_nerf_blender.yaml b/pytorch3d/projects/implicitron_trainer/configs/overfit_singleseq_nerf_blender.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c61d759f382beb27da12d8e9655599f367161fd9
--- /dev/null
+++ b/pytorch3d/projects/implicitron_trainer/configs/overfit_singleseq_nerf_blender.yaml
@@ -0,0 +1,56 @@
+defaults:
+- overfit_singleseq_base
+- _self_
+exp_dir: "./data/overfit_nerf_blender_repro/${oc.env:BLENDER_SINGLESEQ_CLASS}"
+data_source_ImplicitronDataSource_args:
+  data_loader_map_provider_SequenceDataLoaderMapProvider_args:
+    dataset_length_train: 100
+  dataset_map_provider_class_type: BlenderDatasetMapProvider
+  dataset_map_provider_BlenderDatasetMapProvider_args:
+    base_dir: ${oc.env:BLENDER_DATASET_ROOT}/${oc.env:BLENDER_SINGLESEQ_CLASS}
+    n_known_frames_for_test: null
+    object_name: ${oc.env:BLENDER_SINGLESEQ_CLASS}
+    path_manager_factory_class_type: PathManagerFactory
+    path_manager_factory_PathManagerFactory_args:
+      silence_logs: true
+
+model_factory_ImplicitronModelFactory_args:
+  model_class_type: "OverfitModel"
+  model_OverfitModel_args:
+    mask_images: false
+    raysampler_class_type: AdaptiveRaySampler
+    raysampler_AdaptiveRaySampler_args:
+      n_pts_per_ray_training: 64
+      n_pts_per_ray_evaluation: 64
+      n_rays_per_image_sampled_from_mask: 4096
+      stratified_point_sampling_training: true
+      stratified_point_sampling_evaluation: false
+      scene_extent: 2.0
+      scene_center:
+      - 0.0
+      - 0.0
+      - 0.0
+    renderer_MultiPassEmissionAbsorptionRenderer_args:
+      density_noise_std_train: 0.0
+      n_pts_per_ray_fine_training: 128
+      n_pts_per_ray_fine_evaluation: 128
+      raymarcher_EmissionAbsorptionRaymarcher_args:
+        blend_output: false
+    loss_weights:
+      loss_rgb_mse: 1.0
+      loss_prev_stage_rgb_mse: 1.0
+      loss_mask_bce: 0.0
+      loss_prev_stage_mask_bce: 0.0
+      loss_autodecoder_norm: 0.00
+
+optimizer_factory_ImplicitronOptimizerFactory_args:
+  exponential_lr_step_size: 3001
+  lr_policy: LinearExponential
+  linear_exponential_lr_milestone: 200
+
+training_loop_ImplicitronTrainingLoop_args:
+  max_epochs: 6000
+  metric_print_interval: 10
+  store_checkpoints_purge: 3
+  test_when_finished: true
+  validation_interval: 100
diff --git a/pytorch3d/projects/implicitron_trainer/configs/repro_base.yaml b/pytorch3d/projects/implicitron_trainer/configs/repro_base.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9d6af2608fe23be8924a354e3cf5f20d690bdac9
--- /dev/null
+++ b/pytorch3d/projects/implicitron_trainer/configs/repro_base.yaml
@@ -0,0 +1,80 @@
+defaults:
+- default_config
+- _self_
+exp_dir: ./data/exps/base/
+training_loop_ImplicitronTrainingLoop_args:
+  visdom_port: 8097
+  visualize_interval: 0
+  max_epochs: 1000
+data_source_ImplicitronDataSource_args:
+  data_loader_map_provider_class_type: SequenceDataLoaderMapProvider
+  dataset_map_provider_class_type: JsonIndexDatasetMapProvider
+  data_loader_map_provider_SequenceDataLoaderMapProvider_args:
+    dataset_length_train: 1000
+    dataset_length_val: 1
+    num_workers: 8
+  dataset_map_provider_JsonIndexDatasetMapProvider_args:
+    dataset_root: ${oc.env:CO3D_DATASET_ROOT}
+    n_frames_per_sequence: -1
+    test_on_train: true
+    test_restrict_sequence_id: 0
+    dataset_JsonIndexDataset_args:
+      load_point_clouds: false
+      mask_depths: false
+      mask_images: false
+model_factory_ImplicitronModelFactory_args:
+  model_GenericModel_args:
+    loss_weights:
+      loss_mask_bce: 1.0
+      loss_prev_stage_mask_bce: 1.0
+      loss_autodecoder_norm: 0.01
+      loss_rgb_mse: 1.0
+      loss_prev_stage_rgb_mse: 1.0
+    output_rasterized_mc: false
+    chunk_size_grid: 102400
+    render_image_height: 400
+    render_image_width: 400
+    num_passes: 2
+    implicit_function_NeuralRadianceFieldImplicitFunction_args:
+      n_harmonic_functions_xyz: 10
+      n_harmonic_functions_dir: 4
+      n_hidden_neurons_xyz: 256
+      n_hidden_neurons_dir: 128
+      n_layers_xyz: 8
+      append_xyz:
+      - 5
+    raysampler_AdaptiveRaySampler_args:
+      n_rays_per_image_sampled_from_mask: 1024
+      scene_extent: 8.0
+      n_pts_per_ray_training: 64
+      n_pts_per_ray_evaluation: 64
+      stratified_point_sampling_training: true
+      stratified_point_sampling_evaluation: false
+    renderer_MultiPassEmissionAbsorptionRenderer_args:
+      n_pts_per_ray_fine_training: 64
+      n_pts_per_ray_fine_evaluation: 64
+      append_coarse_samples_to_fine: true
+      density_noise_std_train: 1.0
+    view_pooler_args:
+      view_sampler_args:
+        masked_sampling: false
+    image_feature_extractor_ResNetFeatureExtractor_args:
+      stages:
+      - 1
+      - 2
+      - 3
+      - 4
+      proj_dim: 16
+      image_rescale: 0.32
+      first_max_pool: false
+optimizer_factory_ImplicitronOptimizerFactory_args:
+  breed: Adam
+  weight_decay: 0.0
+  lr_policy: MultiStepLR
+  multistep_lr_milestones: []
+  lr: 0.0005
+  gamma: 0.1
+  momentum: 0.9
+  betas:
+  - 0.9
+  - 0.999
diff --git a/pytorch3d/projects/implicitron_trainer/configs/repro_feat_extractor_normed.yaml b/pytorch3d/projects/implicitron_trainer/configs/repro_feat_extractor_normed.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b2154c8bfa130d90073f70b7d54ac540a9e557ef
--- /dev/null
+++ b/pytorch3d/projects/implicitron_trainer/configs/repro_feat_extractor_normed.yaml
@@ -0,0 +1,18 @@
+model_factory_ImplicitronModelFactory_args:
+  model_GenericModel_args:
+    image_feature_extractor_class_type: ResNetFeatureExtractor
+    image_feature_extractor_ResNetFeatureExtractor_args:
+      add_images: true
+      add_masks: true
+      first_max_pool: true
+      image_rescale: 0.375
+      l2_norm: true
+      name: resnet34
+      normalize_image: true
+      pretrained: true
+      stages:
+      - 1
+      - 2
+      - 3
+      - 4
+      proj_dim: 32
diff --git a/pytorch3d/projects/implicitron_trainer/configs/repro_feat_extractor_transformer.yaml b/pytorch3d/projects/implicitron_trainer/configs/repro_feat_extractor_transformer.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8d24495bbb15ad8d8770dadf5147ec49d2706b08
--- /dev/null
+++ b/pytorch3d/projects/implicitron_trainer/configs/repro_feat_extractor_transformer.yaml
@@ -0,0 +1,18 @@
+model_factory_ImplicitronModelFactory_args:
+  model_GenericModel_args:
+    image_feature_extractor_class_type: ResNetFeatureExtractor
+    image_feature_extractor_ResNetFeatureExtractor_args:
+      add_images: true
+      add_masks: true
+      first_max_pool: false
+      image_rescale: 0.375
+      l2_norm: true
+      name: resnet34
+      normalize_image: true
+      pretrained: true
+      stages:
+      - 1
+      - 2
+      - 3
+      - 4
+      proj_dim: 16
diff --git a/pytorch3d/projects/implicitron_trainer/configs/repro_feat_extractor_unnormed.yaml b/pytorch3d/projects/implicitron_trainer/configs/repro_feat_extractor_unnormed.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2d4eb3f861089e96bf63b9b0bced5bed7943f134
--- /dev/null
+++ b/pytorch3d/projects/implicitron_trainer/configs/repro_feat_extractor_unnormed.yaml
@@ -0,0 +1,19 @@
+model_factory_ImplicitronModelFactory_args:
+  model_GenericModel_args:
+    image_feature_extractor_class_type: ResNetFeatureExtractor
+    image_feature_extractor_ResNetFeatureExtractor_args:
+      stages:
+      - 1
+      - 2
+      - 3
+      first_max_pool: false
+      proj_dim: -1
+      l2_norm: false
+      image_rescale: 0.375
+      name: resnet34
+      normalize_image: true
+      pretrained: true
+    view_pooler_args:
+      feature_aggregator_AngleWeightedReductionFeatureAggregator_args:
+        reduction_functions:
+        - AVG
diff --git a/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_base.yaml b/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_base.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..578fe1a2ccfef253ed268fc84eaf202a1c88c91c
--- /dev/null
+++ b/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_base.yaml
@@ -0,0 +1,38 @@
+defaults:
+- repro_base.yaml
+- _self_
+data_source_ImplicitronDataSource_args:
+  data_loader_map_provider_SequenceDataLoaderMapProvider_args:
+    batch_size: 10
+    dataset_length_train: 1000
+    dataset_length_val: 1
+    num_workers: 8
+    train_conditioning_type: SAME
+    val_conditioning_type: SAME
+    test_conditioning_type: SAME
+    images_per_seq_options:
+    - 2
+    - 3
+    - 4
+    - 5
+    - 6
+    - 7
+    - 8
+    - 9
+    - 10
+  dataset_map_provider_JsonIndexDatasetMapProvider_args:
+    assert_single_seq: false
+    task_str: multisequence
+    n_frames_per_sequence: -1
+    test_on_train: true
+    test_restrict_sequence_id: 0
+optimizer_factory_ImplicitronOptimizerFactory_args:
+  multistep_lr_milestones:
+  - 1000
+training_loop_ImplicitronTrainingLoop_args:
+  max_epochs: 3000
+  evaluator_ImplicitronEvaluator_args:
+    camera_difficulty_bin_breaks:
+      - 0.666667
+      - 0.833334
+    is_multisequence: true
diff --git a/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_co3dv2_base.yaml b/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_co3dv2_base.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9eb9bd9030a5fbc0b48006416137762d89ac2757
--- /dev/null
+++ b/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_co3dv2_base.yaml
@@ -0,0 +1,8 @@
+data_source_ImplicitronDataSource_args:
+  dataset_map_provider_class_type: JsonIndexDatasetMapProviderV2
+  dataset_map_provider_JsonIndexDatasetMapProviderV2_args:
+    category: teddybear
+    subset_name: fewview_dev
+training_loop_ImplicitronTrainingLoop_args:
+  evaluator_ImplicitronEvaluator_args:
+    is_multisequence: true
diff --git a/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_idr_ad.yaml b/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_idr_ad.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f6bb1fe40ca47fb9456b74932e380b43a97e8d43
--- /dev/null
+++ b/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_idr_ad.yaml
@@ -0,0 +1,65 @@
+defaults:
+- repro_multiseq_base.yaml
+- _self_
+model_factory_ImplicitronModelFactory_args:
+  model_GenericModel_args:
+    loss_weights:
+      loss_mask_bce: 100.0
+      loss_kl: 0.0
+      loss_rgb_mse: 1.0
+      loss_eikonal: 0.1
+    chunk_size_grid: 65536
+    num_passes: 1
+    output_rasterized_mc: true
+    sampling_mode_training: mask_sample
+    global_encoder_class_type: SequenceAutodecoder
+    global_encoder_SequenceAutodecoder_args:
+      autodecoder_args:
+        n_instances: 20000
+        init_scale: 1.0
+        encoding_dim: 256
+    implicit_function_IdrFeatureField_args:
+      n_harmonic_functions_xyz: 6
+      bias: 0.6
+      d_in: 3
+      d_out: 1
+      dims:
+      - 512
+      - 512
+      - 512
+      - 512
+      - 512
+      - 512
+      - 512
+      - 512
+      geometric_init: true
+      pooled_feature_dim: 0
+      skip_in:
+      - 6
+      weight_norm: true
+    renderer_SignedDistanceFunctionRenderer_args:
+      ray_tracer_args:
+        line_search_step: 0.5
+        line_step_iters: 3
+        n_secant_steps: 8
+        n_steps: 100
+        sdf_threshold: 5.0e-05
+      ray_normal_coloring_network_args:
+        d_in: 9
+        d_out: 3
+        dims:
+        - 512
+        - 512
+        - 512
+        - 512
+        mode: idr
+        n_harmonic_functions_dir: 4
+        pooled_feature_dim: 0
+        weight_norm: true
+    raysampler_AdaptiveRaySampler_args:
+      n_rays_per_image_sampled_from_mask: 1024
+      n_pts_per_ray_training: 0
+      n_pts_per_ray_evaluation: 0
+      scene_extent: 8.0
+    renderer_class_type: SignedDistanceFunctionRenderer
+    implicit_function_class_type: IdrFeatureField
diff --git a/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_nerf_ad.yaml b/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_nerf_ad.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..aa4291d3503cd731255a364db19f82b6f707f729
--- /dev/null
+++ b/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_nerf_ad.yaml
@@ -0,0 +1,12 @@
+defaults:
+- repro_multiseq_base.yaml
+- _self_
+model_factory_ImplicitronModelFactory_args:
+  model_GenericModel_args:
+    chunk_size_grid: 16000
+    view_pooler_enabled: false
+    global_encoder_class_type: SequenceAutodecoder
+    global_encoder_SequenceAutodecoder_args:
+      autodecoder_args:
+        n_instances: 20000
+        encoding_dim: 256
diff --git a/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_nerf_wce.yaml b/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_nerf_wce.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fa366d46ac4a2f09a437cf2632e5735aee34d5fa
--- /dev/null
+++ b/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_nerf_wce.yaml
@@ -0,0 +1,12 @@
+defaults:
+- repro_multiseq_base.yaml
+- repro_feat_extractor_unnormed.yaml
+- _self_
+model_factory_ImplicitronModelFactory_args:
+  model_GenericModel_args:
+    chunk_size_grid: 16000
+    view_pooler_enabled: true
+    raysampler_AdaptiveRaySampler_args:
+      n_rays_per_image_sampled_from_mask: 850
+training_loop_ImplicitronTrainingLoop_args:
+  clip_grad: 1.0
diff --git a/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_nerformer.yaml b/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_nerformer.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9aa9f4c5fd0839bc4e3c6fc74f3db3190d559fb5
--- /dev/null
+++ b/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_nerformer.yaml
@@ -0,0 +1,18 @@
+defaults:
+- repro_multiseq_base.yaml
+- repro_feat_extractor_transformer.yaml
+- _self_
+model_factory_ImplicitronModelFactory_args:
+  model_GenericModel_args:
+    chunk_size_grid: 16000
+    raysampler_AdaptiveRaySampler_args:
+      n_rays_per_image_sampled_from_mask: 800
+      n_pts_per_ray_training: 32
+      n_pts_per_ray_evaluation: 32
+    renderer_MultiPassEmissionAbsorptionRenderer_args:
+      n_pts_per_ray_fine_training: 16
+      n_pts_per_ray_fine_evaluation: 16
+    implicit_function_class_type: NeRFormerImplicitFunction
+    view_pooler_enabled: true
+    view_pooler_args:
+      feature_aggregator_class_type: IdentityFeatureAggregator
diff --git a/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_nerformer_angle_w.yaml b/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_nerformer_angle_w.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9c9a30fe79dd25afded6cffb80c29610a45803c0
--- /dev/null
+++ b/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_nerformer_angle_w.yaml
@@ -0,0 +1,7 @@
+defaults:
+- repro_multiseq_nerformer.yaml
+- _self_
+model_factory_ImplicitronModelFactory_args:
+  model_GenericModel_args:
+    view_pooler_args:
+      feature_aggregator_class_type: AngleWeightedIdentityFeatureAggregator
diff --git a/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_srn_ad_hypernet.yaml b/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_srn_ad_hypernet.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1b4a2ef2d17d5a7a2d868b1603c996e2fb3ad7b2
--- /dev/null
+++ b/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_srn_ad_hypernet.yaml
@@ -0,0 +1,35 @@
+defaults:
+- repro_multiseq_base.yaml
+- _self_
+model_factory_ImplicitronModelFactory_args:
+  model_GenericModel_args:
+    chunk_size_grid: 16000
+    view_pooler_enabled: false
+    n_train_target_views: -1
+    num_passes: 1
+    loss_weights:
+      loss_rgb_mse: 200.0
+      loss_prev_stage_rgb_mse: 0.0
+      loss_mask_bce: 1.0
+      loss_prev_stage_mask_bce: 0.0
+      loss_autodecoder_norm: 0.001
+      depth_neg_penalty: 10000.0
+    global_encoder_class_type: SequenceAutodecoder
+    global_encoder_SequenceAutodecoder_args:
+      autodecoder_args:
+        encoding_dim: 256
+        n_instances: 20000
+    raysampler_class_type: NearFarRaySampler
+    raysampler_NearFarRaySampler_args:
+      n_rays_per_image_sampled_from_mask: 2048
+      min_depth: 0.05
+      max_depth: 0.05
+      n_pts_per_ray_training: 1
+      n_pts_per_ray_evaluation: 1
+      stratified_point_sampling_training: false
+      stratified_point_sampling_evaluation: false
+    renderer_class_type: LSTMRenderer
+    implicit_function_class_type: SRNHyperNetImplicitFunction
+optimizer_factory_ImplicitronOptimizerFactory_args:
+  breed: Adam
+  lr: 5.0e-05
diff --git a/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_srn_ad_hypernet_noharm.yaml b/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_srn_ad_hypernet_noharm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9f29cbbe82ede4f4610949849433a67f91aff07f
--- /dev/null
+++ b/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_srn_ad_hypernet_noharm.yaml
@@ -0,0 +1,11 @@
+defaults:
+- repro_multiseq_srn_ad_hypernet.yaml
+- _self_
+model_factory_ImplicitronModelFactory_args:
+  model_GenericModel_args:
+    num_passes: 1
+    implicit_function_SRNHyperNetImplicitFunction_args:
+      pixel_generator_args:
+        n_harmonic_functions: 0
+      hypernet_args:
+        n_harmonic_functions: 0
diff --git a/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_srn_wce.yaml b/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_srn_wce.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4a72c32621d063276a2b765d34e1edd707c87eac
--- /dev/null
+++ b/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_srn_wce.yaml
@@ -0,0 +1,31 @@
+defaults:
+- repro_multiseq_base.yaml
+- repro_feat_extractor_normed.yaml
+- _self_
+model_factory_ImplicitronModelFactory_args:
+  model_GenericModel_args:
+    chunk_size_grid: 32000
+    num_passes: 1
+    n_train_target_views: -1
+    loss_weights:
+      loss_rgb_mse: 200.0
+      loss_prev_stage_rgb_mse: 0.0
+      loss_mask_bce: 1.0
+      loss_prev_stage_mask_bce: 0.0
+      loss_autodecoder_norm: 0.0
+      depth_neg_penalty: 10000.0
+    raysampler_class_type: NearFarRaySampler
+    raysampler_NearFarRaySampler_args:
+      n_rays_per_image_sampled_from_mask: 2048
+      min_depth: 0.05
+      max_depth: 0.05
+      n_pts_per_ray_training: 1
+      n_pts_per_ray_evaluation: 1
+      stratified_point_sampling_training: false
+      stratified_point_sampling_evaluation: false
+    renderer_class_type: LSTMRenderer
+    implicit_function_class_type: SRNImplicitFunction
+    view_pooler_enabled: true
+optimizer_factory_ImplicitronOptimizerFactory_args:
+  breed: Adam
+  lr: 5.0e-05
diff --git a/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_srn_wce_noharm.yaml b/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_srn_wce_noharm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d2ea11e367e6b169895546286c80c939724a4754
--- /dev/null
+++ b/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_srn_wce_noharm.yaml
@@ -0,0 +1,11 @@
+defaults:
+- repro_multiseq_srn_wce.yaml
+- _self_
+model_factory_ImplicitronModelFactory_args:
+  model_GenericModel_args:
+    num_passes: 1
+    implicit_function_SRNImplicitFunction_args:
+      pixel_generator_args:
+        n_harmonic_functions: 0
+      raymarch_function_args:
+        n_harmonic_functions: 0
diff --git a/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_v2_nerf_wce.yaml b/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_v2_nerf_wce.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0f3ac0553a9a05574626c1228873cd8ac370ec5a
--- /dev/null
+++ b/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_v2_nerf_wce.yaml
@@ -0,0 +1,4 @@
+defaults:
+- repro_multiseq_nerf_wce.yaml
+- repro_multiseq_co3dv2_base.yaml
+- _self_
diff --git a/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_v2_nerformer.yaml b/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_v2_nerformer.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ee7ef332310d444b377798faaf7b67e8575d5b0f
--- /dev/null
+++ b/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_v2_nerformer.yaml
@@ -0,0 +1,4 @@
+defaults:
+- repro_multiseq_nerformer.yaml
+- repro_multiseq_co3dv2_base.yaml
+- _self_
diff --git a/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_v2_srn_ad_hypernet.yaml b/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_v2_srn_ad_hypernet.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..bdb544f3217e329a8940b117ceb2f47cdc501692
--- /dev/null
+++ b/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_v2_srn_ad_hypernet.yaml
@@ -0,0 +1,4 @@
+defaults:
+- repro_multiseq_srn_ad_hypernet.yaml
+- repro_multiseq_co3dv2_base.yaml
+- _self_
diff --git a/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_v2_srn_wce.yaml b/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_v2_srn_wce.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b8ae36746035bc35c93867fc01399c61476e14a6
--- /dev/null
+++ b/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_v2_srn_wce.yaml
@@ -0,0 +1,4 @@
+defaults:
+- repro_multiseq_srn_wce.yaml
+- repro_multiseq_co3dv2_base.yaml
+- _self_
diff --git a/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_base.yaml b/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_base.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..572fc7d5e71323f61c9b099c56b7f7aeb900b614
--- /dev/null
+++ b/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_base.yaml
@@ -0,0 +1,41 @@
+defaults:
+- repro_base
+- _self_
+data_source_ImplicitronDataSource_args:
+  data_loader_map_provider_SequenceDataLoaderMapProvider_args:
+    batch_size: 1
+    dataset_length_train: 1000
+    dataset_length_val: 1
+    num_workers: 8
+  dataset_map_provider_JsonIndexDatasetMapProvider_args:
+    assert_single_seq: true
+    n_frames_per_sequence: -1
+    test_restrict_sequence_id: 0
+    test_on_train: false
+model_factory_ImplicitronModelFactory_args:
+  model_GenericModel_args:
+    render_image_height: 800
+    render_image_width: 800
+    log_vars:
+    - loss_rgb_psnr_fg
+    - loss_rgb_psnr
+    - loss_eikonal
+    - loss_prev_stage_rgb_psnr
+    - loss_mask_bce
+    - loss_prev_stage_mask_bce
+    - loss_rgb_mse
+    - loss_prev_stage_rgb_mse
+    - loss_depth_abs
+    - loss_depth_abs_fg
+    - loss_kl
+    - loss_mask_neg_iou
+    - objective
+    - epoch
+    - sec/it
+optimizer_factory_ImplicitronOptimizerFactory_args:
+  lr: 0.0005
+  multistep_lr_milestones:
+  - 200
+  - 300
+training_loop_ImplicitronTrainingLoop_args:
+  max_epochs: 400
diff --git a/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_co3dv2_base.yaml b/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_co3dv2_base.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..54e1e2a42037013e0a55f8ad13ca11973d68d6b7
--- /dev/null
+++ b/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_co3dv2_base.yaml
@@ -0,0 +1,8 @@
+data_source_ImplicitronDataSource_args:
+  dataset_map_provider_class_type: JsonIndexDatasetMapProviderV2
+  dataset_map_provider_JsonIndexDatasetMapProviderV2_args:
+    category: teddybear
+    subset_name: manyview_dev_0
+training_loop_ImplicitronTrainingLoop_args:
+  evaluator_ImplicitronEvaluator_args:
+    is_multisequence: false
diff --git a/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_idr.yaml b/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_idr.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7224b9d5d9cecd791262a50dde5432cac0d7ed88
--- /dev/null
+++ b/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_idr.yaml
@@ -0,0 +1,57 @@
+defaults:
+- repro_singleseq_base
+- _self_
+model_factory_ImplicitronModelFactory_args:
+  model_GenericModel_args:
+    loss_weights:
+      loss_mask_bce: 100.0
+      loss_kl: 0.0
+      loss_rgb_mse: 1.0
+      loss_eikonal: 0.1
+    chunk_size_grid: 65536
+    num_passes: 1
+    view_pooler_enabled: false
+    implicit_function_IdrFeatureField_args:
+      n_harmonic_functions_xyz: 6
+      bias: 0.6
+      d_in: 3
+      d_out: 1
+      dims:
+      - 512
+      - 512
+      - 512
+      - 512
+      - 512
+      - 512
+      - 512
+      - 512
+      geometric_init: true
+      pooled_feature_dim: 0
+      skip_in:
+      - 6
+      weight_norm: true
+    renderer_SignedDistanceFunctionRenderer_args:
+      ray_tracer_args:
+        line_search_step: 0.5
+        line_step_iters: 3
+        n_secant_steps: 8
+        n_steps: 100
+        sdf_threshold: 5.0e-05
+      ray_normal_coloring_network_args:
+        d_in: 9
+        d_out: 3
+        dims:
+        - 512
+        - 512
+        - 512
+        - 512
+        mode: idr
+        n_harmonic_functions_dir: 4
+        pooled_feature_dim: 0
+        weight_norm: true
+    raysampler_AdaptiveRaySampler_args:
+      n_rays_per_image_sampled_from_mask: 1024
+      n_pts_per_ray_training: 0
+      n_pts_per_ray_evaluation: 0
+    renderer_class_type: SignedDistanceFunctionRenderer
+    implicit_function_class_type: IdrFeatureField
diff --git a/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_nerf.yaml b/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_nerf.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fd85af5e7af23f5acd2abec6dae3255e7087cd7c
--- /dev/null
+++ b/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_nerf.yaml
@@ -0,0 +1,3 @@
+defaults:
+- repro_singleseq_base
+- _self_
diff --git a/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_nerf_blender.yaml b/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_nerf_blender.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2a92a92c1f20ea48a2b655211655dafa4e894c23
--- /dev/null
+++ b/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_nerf_blender.yaml
@@ -0,0 +1,55 @@
+defaults:
+- repro_singleseq_base
+- _self_
+exp_dir: "./data/nerf_blender_repro/${oc.env:BLENDER_SINGLESEQ_CLASS}"
+data_source_ImplicitronDataSource_args:
+  data_loader_map_provider_SequenceDataLoaderMapProvider_args:
+    dataset_length_train: 100
+  dataset_map_provider_class_type: BlenderDatasetMapProvider
+  dataset_map_provider_BlenderDatasetMapProvider_args:
+    base_dir: ${oc.env:BLENDER_DATASET_ROOT}/${oc.env:BLENDER_SINGLESEQ_CLASS}
+    n_known_frames_for_test: null
+    object_name: ${oc.env:BLENDER_SINGLESEQ_CLASS}
+    path_manager_factory_class_type: PathManagerFactory
+    path_manager_factory_PathManagerFactory_args:
+      silence_logs: true
+
+model_factory_ImplicitronModelFactory_args:
+  model_GenericModel_args:
+    mask_images: false
+    raysampler_class_type: AdaptiveRaySampler
+    raysampler_AdaptiveRaySampler_args:
+      n_pts_per_ray_training: 64
+      n_pts_per_ray_evaluation: 64
+      n_rays_per_image_sampled_from_mask: 4096
+      stratified_point_sampling_training: true
+      stratified_point_sampling_evaluation: false
+      scene_extent: 2.0
+      scene_center:
+      - 0.0
+      - 0.0
+      - 0.0
+    renderer_MultiPassEmissionAbsorptionRenderer_args:
+      density_noise_std_train: 0.0
+      n_pts_per_ray_fine_training: 128
+      n_pts_per_ray_fine_evaluation: 128
+      raymarcher_EmissionAbsorptionRaymarcher_args:
+        blend_output: false
+    loss_weights:
+      loss_rgb_mse: 1.0
+      loss_prev_stage_rgb_mse: 1.0
+      loss_mask_bce: 0.0
+      loss_prev_stage_mask_bce: 0.0
+      loss_autodecoder_norm: 0.00
+
+optimizer_factory_ImplicitronOptimizerFactory_args:
+  exponential_lr_step_size: 3001
+  lr_policy: LinearExponential
+  linear_exponential_lr_milestone: 200
+
+training_loop_ImplicitronTrainingLoop_args:
+  max_epochs: 6000
+  metric_print_interval: 10
+  store_checkpoints_purge: 3
+  test_when_finished: true
+  validation_interval: 100
diff --git a/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_nerf_wce.yaml b/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_nerf_wce.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..38212e35707e2c26b93d3aa593e76579c483ca91
--- /dev/null
+++ b/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_nerf_wce.yaml
@@ -0,0 +1,10 @@
+defaults:
+- repro_singleseq_wce_base.yaml
+- repro_feat_extractor_unnormed.yaml
+- _self_
+model_factory_ImplicitronModelFactory_args:
+  model_GenericModel_args:
+    chunk_size_grid: 16000
+    view_pooler_enabled: true
+    raysampler_AdaptiveRaySampler_args:
+      n_rays_per_image_sampled_from_mask: 850
diff --git a/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_nerformer.yaml b/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_nerformer.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8983c26f34309fe35d41d43a87f53ddd564db3a5
--- /dev/null
+++ b/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_nerformer.yaml
@@ -0,0 +1,18 @@
+defaults:
+- repro_singleseq_wce_base.yaml
+- repro_feat_extractor_transformer.yaml
+- _self_
+model_factory_ImplicitronModelFactory_args:
+  model_GenericModel_args:
+    chunk_size_grid: 16000
+    view_pooler_enabled: true
+    implicit_function_class_type: NeRFormerImplicitFunction
+    raysampler_AdaptiveRaySampler_args:
+      n_rays_per_image_sampled_from_mask: 800
+      n_pts_per_ray_training: 32
+      n_pts_per_ray_evaluation: 32
+    renderer_MultiPassEmissionAbsorptionRenderer_args:
+      n_pts_per_ray_fine_training: 16
+      n_pts_per_ray_fine_evaluation: 16
+    view_pooler_args:
+      feature_aggregator_class_type: IdentityFeatureAggregator
diff --git a/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_srn.yaml b/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_srn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1f60f0b9480348a6660b90244600e7d59622470a
--- /dev/null
+++ b/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_srn.yaml
@@ -0,0 +1,29 @@
+defaults:
+- repro_singleseq_base.yaml
+- _self_
+model_factory_ImplicitronModelFactory_args:
+  model_GenericModel_args:
+    num_passes: 1
+    chunk_size_grid: 32000
+    view_pooler_enabled: false
+    loss_weights:
+      loss_rgb_mse: 200.0
+      loss_prev_stage_rgb_mse: 0.0
+      loss_mask_bce: 1.0
+      loss_prev_stage_mask_bce: 0.0
+      loss_autodecoder_norm: 0.0
+      depth_neg_penalty: 10000.0
+    raysampler_class_type: NearFarRaySampler
+    raysampler_NearFarRaySampler_args:
+      n_rays_per_image_sampled_from_mask: 2048
+      min_depth: 0.05
+      max_depth: 0.05
+      n_pts_per_ray_training: 1
+      n_pts_per_ray_evaluation: 1
+      stratified_point_sampling_training: false
+      stratified_point_sampling_evaluation: false
+    renderer_class_type: LSTMRenderer
+    implicit_function_class_type: SRNImplicitFunction
+optimizer_factory_ImplicitronOptimizerFactory_args:
+  breed: Adam
+  lr: 5.0e-05
diff --git a/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_srn_noharm.yaml b/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_srn_noharm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..28b7570c8c9f49f3ecc5a45056c1467b3b3b2130
--- /dev/null
+++ b/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_srn_noharm.yaml
@@ -0,0 +1,11 @@
+defaults:
+- repro_singleseq_srn.yaml
+- _self_
+model_factory_ImplicitronModelFactory_args:
+  model_GenericModel_args:
+    num_passes: 1
+    implicit_function_SRNImplicitFunction_args:
+      pixel_generator_args:
+        n_harmonic_functions: 0
+      raymarch_function_args:
+        n_harmonic_functions: 0
diff --git a/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_srn_wce.yaml b/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_srn_wce.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d190c28084f905a08d106976b45de7eb8560b3a0
--- /dev/null
+++ b/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_srn_wce.yaml
@@ -0,0 +1,30 @@
+defaults:
+- repro_singleseq_wce_base
+- repro_feat_extractor_normed.yaml
+- _self_
+model_factory_ImplicitronModelFactory_args:
+  model_GenericModel_args:
+    num_passes: 1
+    chunk_size_grid: 32000
+    view_pooler_enabled: true
+    loss_weights:
+      loss_rgb_mse: 200.0
+      loss_prev_stage_rgb_mse: 0.0
+      loss_mask_bce: 1.0
+      loss_prev_stage_mask_bce: 0.0
+      loss_autodecoder_norm: 0.0
+      depth_neg_penalty: 10000.0
+    raysampler_class_type: NearFarRaySampler
+    raysampler_NearFarRaySampler_args:
+      n_rays_per_image_sampled_from_mask: 2048
+      min_depth: 0.05
+      max_depth: 0.05
+      n_pts_per_ray_training: 1
+      n_pts_per_ray_evaluation: 1
+      stratified_point_sampling_training: false
+      stratified_point_sampling_evaluation: false
+    renderer_class_type: LSTMRenderer
+    implicit_function_class_type: SRNImplicitFunction
+optimizer_factory_ImplicitronOptimizerFactory_args:
+  breed: Adam
+  lr: 5.0e-05
diff --git a/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_srn_wce_noharm.yaml b/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_srn_wce_noharm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3fc1254bd14e42266a1b8894d19bf081edced575
--- /dev/null
+++ b/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_srn_wce_noharm.yaml
@@ -0,0 +1,11 @@
+defaults:
+- repro_singleseq_srn_wce.yaml
+- _self_
+model_factory_ImplicitronModelFactory_args:
+  model_GenericModel_args:
+    num_passes: 1
+    implicit_function_SRNImplicitFunction_args:
+      pixel_generator_args:
+        n_harmonic_functions: 0
+      raymarch_function_args:
+        n_harmonic_functions: 0
diff --git a/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_v2_idr.yaml b/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_v2_idr.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4b73e40797d30f70420e213588fa46f110895cde
--- /dev/null
+++ b/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_v2_idr.yaml
@@ -0,0 +1,4 @@
+defaults:
+- repro_singleseq_idr.yaml
+- repro_singleseq_co3dv2_base.yaml
+- _self_
diff --git a/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_v2_nerf.yaml b/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_v2_nerf.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..89999cde6b2869bb4ba773e6f09819bdc4554cd4
--- /dev/null
+++ b/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_v2_nerf.yaml
@@ -0,0 +1,4 @@
+defaults:
+- repro_singleseq_nerf.yaml
+- repro_singleseq_co3dv2_base.yaml
+- _self_
diff --git a/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_v2_nerformer.yaml b/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_v2_nerformer.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..510589a0c048f1f915da6b0e4c57dfbc3f8f29b5
--- /dev/null
+++ b/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_v2_nerformer.yaml
@@ -0,0 +1,4 @@
+defaults:
+- repro_singleseq_nerformer.yaml
+- repro_singleseq_co3dv2_base.yaml
+- _self_
diff --git a/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_v2_srn_noharm.yaml b/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_v2_srn_noharm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8964a4a21e41286e9587cc2209a786b54482ab44
--- /dev/null
+++ b/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_v2_srn_noharm.yaml
@@ -0,0 +1,4 @@
+defaults:
+- repro_singleseq_srn_noharm.yaml
+- repro_singleseq_co3dv2_base.yaml
+- _self_
diff --git a/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_wce_base.yaml b/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_wce_base.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f5b174c04a9b48646151509bdd22db24bc495702
--- /dev/null
+++ b/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_wce_base.yaml
@@ -0,0 +1,22 @@
+defaults:
+- repro_singleseq_base
+- _self_
+data_source_ImplicitronDataSource_args:
+  data_loader_map_provider_SequenceDataLoaderMapProvider_args:
+    batch_size: 10
+    dataset_length_train: 1000
+    dataset_length_val: 1
+    num_workers: 8
+    train_conditioning_type: SAME
+    val_conditioning_type: SAME
+    test_conditioning_type: SAME
+    images_per_seq_options:
+    - 2
+    - 3
+    - 4
+    - 5
+    - 6
+    - 7
+    - 8
+    - 9
+    - 10
diff --git a/pytorch3d/projects/implicitron_trainer/experiment.py b/pytorch3d/projects/implicitron_trainer/experiment.py
new file mode 100644
index 0000000000000000000000000000000000000000..797660c8007bd9ac4446b3716375e7dac9028c60
--- /dev/null
+++ b/pytorch3d/projects/implicitron_trainer/experiment.py
@@ -0,0 +1,297 @@
+#!/usr/bin/env python
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+""""
+This file is the entry point for launching experiments with Implicitron.
+
+Launch Training
+---------------
+Experiment config .yaml files are located in the
+`projects/implicitron_trainer/configs` folder. To launch an experiment,
+specify the name of the file. Specific config values can also be overridden
+from the command line, for example:
+
+```
+./experiment.py --config-name base_config.yaml override.param.one=42 override.param.two=84
+```
+
+Main functions
+---------------
+- The Experiment class defines `run` which creates the model, optimizer, and other
+  objects used in training, then starts TrainingLoop's `run` function.
+- TrainingLoop takes care of the actual training logic: forward and backward passes,
+  evaluation and testing, as well as model checkpointing, visualization, and metric
+  printing.
+
+Outputs
+--------
+The outputs of the experiment are saved and logged in multiple ways:
+  - Checkpoints:
+        Model, optimizer and stats are stored in the directory
+        named by the `exp_dir` key from the config file / CLI parameters.
+  - Stats
+        Stats are logged and plotted to the file "train_stats.pdf" in the
+        same directory. The stats are also saved as part of the checkpoint file.
+  - Visualizations
+        Predictions are plotted to a visdom server running at the
+        port specified by the `visdom_server` and `visdom_port` keys in the
+        config file.
+
+"""
+import logging
+import os
+import warnings
+
+from dataclasses import field
+
+import hydra
+
+import torch
+from accelerate import Accelerator
+from omegaconf import DictConfig, OmegaConf
+from packaging import version
+
+from pytorch3d.implicitron.dataset.data_source import (
+    DataSourceBase,
+    ImplicitronDataSource,
+)
+from pytorch3d.implicitron.models.base_model import ImplicitronModelBase
+
+from pytorch3d.implicitron.models.renderer.multipass_ea import (
+    MultiPassEmissionAbsorptionRenderer,
+)
+from pytorch3d.implicitron.models.renderer.ray_sampler import AdaptiveRaySampler
+from pytorch3d.implicitron.tools.config import (
+    Configurable,
+    expand_args_fields,
+    remove_unused_components,
+    run_auto_creation,
+)
+
+from .impl.model_factory import ModelFactoryBase
+from .impl.optimizer_factory import OptimizerFactoryBase
+from .impl.training_loop import TrainingLoopBase
+from .impl.utils import seed_all_random_engines
+
+logger = logging.getLogger(__name__)
+
+# workaround for https://github.com/facebookresearch/hydra/issues/2262
+_RUN = hydra.types.RunMode.RUN
+
+if version.parse(hydra.__version__) < version.Version("1.1"):
+    raise ValueError(
+        f"Hydra version {hydra.__version__} is too old."
+        " (Implicitron requires version 1.1 or later.)"
+    )
+
+try:
+    # only makes sense in FAIR cluster
+    import pytorch3d.implicitron.fair_cluster.slurm  # noqa: F401
+except ModuleNotFoundError:
+    pass
+
+no_accelerate = os.environ.get("PYTORCH3D_NO_ACCELERATE") is not None
+
+
+class Experiment(Configurable):  # pyre-ignore: 13
+    """
+    This class is at the top level of Implicitron's config hierarchy. Its
+    members are high-level components necessary for training an implicit rende-
+    ring network.
+
+    Members:
+        data_source: An object that produces datasets and dataloaders.
+        model_factory: An object that produces an implicit rendering model as
+            well as its corresponding Stats object.
+        optimizer_factory: An object that produces the optimizer and lr
+            scheduler.
+        training_loop: An object that runs training given the outputs produced
+            by the data_source, model_factory and optimizer_factory.
+        seed: A random seed to ensure reproducibility.
+        detect_anomaly: Whether torch.autograd should detect anomalies. Useful
+            for debugging, but might slow down the training.
+        exp_dir: Root experimentation directory. Checkpoints and training stats
+            will be saved here.
+    """
+
+    data_source: DataSourceBase
+    data_source_class_type: str = "ImplicitronDataSource"
+    model_factory: ModelFactoryBase
+    model_factory_class_type: str = "ImplicitronModelFactory"
+    optimizer_factory: OptimizerFactoryBase
+    optimizer_factory_class_type: str = "ImplicitronOptimizerFactory"
+    training_loop: TrainingLoopBase
+    training_loop_class_type: str = "ImplicitronTrainingLoop"
+
+    seed: int = 42
+    detect_anomaly: bool = False
+    exp_dir: str = "./data/default_experiment/"
+
+    hydra: dict = field(
+        default_factory=lambda: {
+            "run": {"dir": "."},  # Make hydra not change the working dir.
+            "output_subdir": None,  # disable storing the .hydra logs
+            "mode": _RUN,
+        }
+    )
+
+    def __post_init__(self):
+        seed_all_random_engines(
+            self.seed
+        )  # Set all random engine seeds for reproducibility
+
+        run_auto_creation(self)
+
+    def run(self) -> None:
+        # Initialize the accelerator if desired.
+        if no_accelerate:
+            accelerator = None
+            device = torch.device("cuda:0")
+        else:
+            accelerator = Accelerator(device_placement=False)
+            logger.info(accelerator.state)
+            device = accelerator.device
+
+        logger.info(f"Running experiment on device: {device}")
+        os.makedirs(self.exp_dir, exist_ok=True)
+
+        # set the debug mode
+        if self.detect_anomaly:
+            logger.info("Anomaly detection!")
+        torch.autograd.set_detect_anomaly(self.detect_anomaly)
+
+        # Initialize the datasets and dataloaders.
+        datasets, dataloaders = self.data_source.get_datasets_and_dataloaders()
+
+        # Init the model and the corresponding Stats object.
+        model = self.model_factory(
+            accelerator=accelerator,
+            exp_dir=self.exp_dir,
+        )
+
+        stats = self.training_loop.load_stats(
+            log_vars=model.log_vars,
+            exp_dir=self.exp_dir,
+            resume=self.model_factory.resume,
+            resume_epoch=self.model_factory.resume_epoch,  # pyre-ignore [16]
+        )
+        start_epoch = stats.epoch + 1
+
+        model.to(device)
+
+        # Init the optimizer and LR scheduler.
+        optimizer, scheduler = self.optimizer_factory(
+            accelerator=accelerator,
+            exp_dir=self.exp_dir,
+            last_epoch=start_epoch,
+            model=model,
+            resume=self.model_factory.resume,
+            resume_epoch=self.model_factory.resume_epoch,
+        )
+
+        # Wrap all modules in the distributed library
+        # Note: we don't pass the scheduler to prepare as it
+        # doesn't need to be stepped at each optimizer step
+        train_loader = dataloaders.train
+        val_loader = dataloaders.val
+        test_loader = dataloaders.test
+        if accelerator is not None:
+            (
+                model,
+                optimizer,
+                train_loader,
+                val_loader,
+            ) = accelerator.prepare(model, optimizer, train_loader, val_loader)
+
+        # Enter the main training loop.
+        self.training_loop.run(
+            train_loader=train_loader,
+            val_loader=val_loader,
+            test_loader=test_loader,
+            # pyre-ignore[6]
+            train_dataset=datasets.train,
+            model=model,
+            optimizer=optimizer,
+            scheduler=scheduler,
+            accelerator=accelerator,
+            device=device,
+            exp_dir=self.exp_dir,
+            stats=stats,
+            seed=self.seed,
+        )
+
+
+def _setup_envvars_for_cluster() -> bool:
+    """
+    Prepares to run on cluster if relevant.
+    Returns whether FAIR cluster in use.
+    """
+    # TODO: How much of this is needed in general?
+
+    try:
+        import submitit
+    except ImportError:
+        return False
+
+    try:
+        # Only needed when launching on cluster with slurm and submitit
+        job_env = submitit.JobEnvironment()
+    except RuntimeError:
+        return False
+
+    os.environ["LOCAL_RANK"] = str(job_env.local_rank)
+    os.environ["RANK"] = str(job_env.global_rank)
+    os.environ["WORLD_SIZE"] = str(job_env.num_tasks)
+    os.environ["MASTER_ADDR"] = "localhost"
+    os.environ["MASTER_PORT"] = "42918"
+    logger.info(
+        "Num tasks %s, global_rank %s"
+        % (str(job_env.num_tasks), str(job_env.global_rank))
+    )
+
+    return True
+
+
+def dump_cfg(cfg: DictConfig) -> None:
+    remove_unused_components(cfg)
+    # dump the exp config to the exp dir
+    os.makedirs(cfg.exp_dir, exist_ok=True)
+    try:
+        cfg_filename = os.path.join(cfg.exp_dir, "expconfig.yaml")
+        OmegaConf.save(config=cfg, f=cfg_filename)
+    except PermissionError:
+        warnings.warn("Can't dump config due to insufficient permissions!")
+
+
+expand_args_fields(Experiment)
+cs = hydra.core.config_store.ConfigStore.instance()
+cs.store(name="default_config", node=Experiment)
+
+
+@hydra.main(config_path="./configs/", config_name="default_config")
+def experiment(cfg: DictConfig) -> None:
+    # CUDA_VISIBLE_DEVICES must have been set.
+
+    if "CUDA_DEVICE_ORDER" not in os.environ:
+        os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
+
+    if not _setup_envvars_for_cluster():
+        logger.info("Running locally")
+
+    # TODO: The following may be needed for hydra/submitit it to work
+    expand_args_fields(ImplicitronModelBase)
+    expand_args_fields(AdaptiveRaySampler)
+    expand_args_fields(MultiPassEmissionAbsorptionRenderer)
+    expand_args_fields(ImplicitronDataSource)
+
+    experiment = Experiment(**cfg)
+    dump_cfg(cfg)
+    experiment.run()
+
+
+if __name__ == "__main__":
+    experiment()
diff --git a/pytorch3d/projects/implicitron_trainer/impl/__init__.py b/pytorch3d/projects/implicitron_trainer/impl/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..2e41cd717f6a439a9c08d76a9d0e4a54e190fc5a
--- /dev/null
+++ b/pytorch3d/projects/implicitron_trainer/impl/__init__.py
@@ -0,0 +1,5 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
diff --git a/pytorch3d/projects/implicitron_trainer/impl/model_factory.py b/pytorch3d/projects/implicitron_trainer/impl/model_factory.py
new file mode 100644
index 0000000000000000000000000000000000000000..9c8ea9da3026dd63f0cfbdfe9352a777d591db3c
--- /dev/null
+++ b/pytorch3d/projects/implicitron_trainer/impl/model_factory.py
@@ -0,0 +1,133 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+import os
+from typing import Optional
+
+import torch.optim
+
+from accelerate import Accelerator
+from pytorch3d.implicitron.models.base_model import ImplicitronModelBase
+from pytorch3d.implicitron.tools import model_io
+from pytorch3d.implicitron.tools.config import (
+    registry,
+    ReplaceableBase,
+    run_auto_creation,
+)
+from pytorch3d.implicitron.tools.stats import Stats
+
+logger = logging.getLogger(__name__)
+
+
+class ModelFactoryBase(ReplaceableBase):
+
+    resume: bool = True  # resume from the last checkpoint
+
+    def __call__(self, **kwargs) -> ImplicitronModelBase:
+        """
+        Initialize the model (possibly from a previously saved state).
+
+        Returns: An instance of ImplicitronModelBase.
+        """
+        raise NotImplementedError()
+
+    def load_stats(self, **kwargs) -> Stats:
+        """
+        Initialize or load a Stats object.
+        """
+        raise NotImplementedError()
+
+
+@registry.register
+class ImplicitronModelFactory(ModelFactoryBase):  # pyre-ignore [13]
+    """
+    A factory class that initializes an implicit rendering model.
+
+    Members:
+        model: An ImplicitronModelBase object.
+        resume: If True, attempt to load the last checkpoint from `exp_dir`
+            passed to __call__. Failure to do so will return a model with ini-
+            tial weights unless `force_resume` is True.
+        resume_epoch: If `resume` is True: Resume a model at this epoch, or if
+            `resume_epoch` <= 0, then resume from the latest checkpoint.
+        force_resume: If True, throw a FileNotFoundError if `resume` is True but
+            a model checkpoint cannot be found.
+
+    """
+
+    model: ImplicitronModelBase
+    model_class_type: str = "GenericModel"
+    resume: bool = True
+    resume_epoch: int = -1
+    force_resume: bool = False
+
+    def __post_init__(self):
+        run_auto_creation(self)
+
+    def __call__(
+        self,
+        exp_dir: str,
+        accelerator: Optional[Accelerator] = None,
+    ) -> ImplicitronModelBase:
+        """
+        Returns an instance of `ImplicitronModelBase`, possibly loaded from a
+        checkpoint (if self.resume, self.resume_epoch specify so).
+
+        Args:
+            exp_dir: Root experiment directory.
+            accelerator: An Accelerator object.
+
+        Returns:
+            model: The model with optionally loaded weights from checkpoint
+
+        Raise:
+            FileNotFoundError if `force_resume` is True but checkpoint not found.
+        """
+        # Determine the network outputs that should be logged
+        if hasattr(self.model, "log_vars"):
+            log_vars = list(self.model.log_vars)
+        else:
+            log_vars = ["objective"]
+
+        if self.resume_epoch > 0:
+            # Resume from a certain epoch
+            model_path = model_io.get_checkpoint(exp_dir, self.resume_epoch)
+            if not os.path.isfile(model_path):
+                raise ValueError(f"Cannot find model from epoch {self.resume_epoch}.")
+        else:
+            # Retrieve the last checkpoint
+            model_path = model_io.find_last_checkpoint(exp_dir)
+
+        if model_path is not None:
+            logger.info(f"Found previous model {model_path}")
+            if self.force_resume or self.resume:
+                logger.info("Resuming.")
+
+                map_location = None
+                if accelerator is not None and not accelerator.is_local_main_process:
+                    map_location = {
+                        "cuda:%d" % 0: "cuda:%d" % accelerator.local_process_index
+                    }
+                model_state_dict = torch.load(
+                    model_io.get_model_path(model_path), map_location=map_location
+                )
+
+                try:
+                    self.model.load_state_dict(model_state_dict, strict=True)
+                except RuntimeError as e:
+                    logger.error(e)
+                    logger.info(
+                        "Cannot load state dict in strict mode! -> trying non-strict"
+                    )
+                    self.model.load_state_dict(model_state_dict, strict=False)
+                self.model.log_vars = log_vars
+            else:
+                logger.info("Not resuming -> starting from scratch.")
+        elif self.force_resume:
+            raise FileNotFoundError(f"Cannot find a checkpoint in {exp_dir}!")
+
+        return self.model
diff --git a/pytorch3d/projects/implicitron_trainer/impl/optimizer_factory.py b/pytorch3d/projects/implicitron_trainer/impl/optimizer_factory.py
new file mode 100644
index 0000000000000000000000000000000000000000..1ec3165384dcd12c25429fba9f449f306181ff59
--- /dev/null
+++ b/pytorch3d/projects/implicitron_trainer/impl/optimizer_factory.py
@@ -0,0 +1,337 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import inspect
+import logging
+import os
+from collections import defaultdict
+from dataclasses import field
+from typing import Any, Dict, List, Optional, Tuple
+
+import torch.optim
+
+from accelerate import Accelerator
+
+from pytorch3d.implicitron.models.base_model import ImplicitronModelBase
+from pytorch3d.implicitron.tools import model_io
+from pytorch3d.implicitron.tools.config import (
+    registry,
+    ReplaceableBase,
+    run_auto_creation,
+)
+
+logger = logging.getLogger(__name__)
+
+
+class OptimizerFactoryBase(ReplaceableBase):
+    def __call__(
+        self, model: ImplicitronModelBase, **kwargs
+    ) -> Tuple[torch.optim.Optimizer, Any]:
+        """
+        Initialize the optimizer and lr scheduler.
+
+        Args:
+            model: The model with optionally loaded weights.
+
+        Returns:
+            An optimizer module (optionally loaded from a checkpoint) and
+            a learning rate scheduler module (should be a subclass of torch.optim's
+            lr_scheduler._LRScheduler).
+        """
+        raise NotImplementedError()
+
+
+@registry.register
+class ImplicitronOptimizerFactory(OptimizerFactoryBase):
+    """
+    A factory that initializes the optimizer and lr scheduler.
+
+    Members:
+        betas: Beta parameters for the Adam optimizer.
+        breed: The type of optimizer to use. We currently support SGD, Adagrad
+            and Adam.
+        exponential_lr_step_size: With Exponential policy only,
+            lr = lr * gamma ** (epoch/step_size)
+        gamma: Multiplicative factor of learning rate decay.
+        lr: The value for the initial learning rate.
+        lr_policy: The policy to use for learning rate. We currently support
+            MultiStepLR and Exponential policies.
+        momentum: A momentum value (for SGD only).
+        multistep_lr_milestones: With MultiStepLR policy only: list of
+            increasing epoch indices at which the learning rate is modified.
+        momentum: Momentum factor for SGD optimizer.
+        weight_decay: The optimizer weight_decay (L2 penalty on model weights).
+        foreach: Whether to use new "foreach" implementation of optimizer where
+            available (e.g. requires PyTorch 1.12.0 for Adam)
+        group_learning_rates: Parameters or modules can be assigned to parameter
+            groups. This dictionary has names of those parameter groups as keys
+            and learning rates as values. All parameter group names have to be
+            defined in this dictionary. Parameters which do not have predefined
+            parameter group are put into "default" parameter group which has
+            `lr` as its learning rate.
+    """
+
+    betas: Tuple[float, ...] = (0.9, 0.999)
+    breed: str = "Adam"
+    exponential_lr_step_size: int = 250
+    gamma: float = 0.1
+    lr: float = 0.0005
+    lr_policy: str = "MultiStepLR"
+    momentum: float = 0.9
+    multistep_lr_milestones: tuple = ()
+    weight_decay: float = 0.0
+    linear_exponential_lr_milestone: int = 200
+    linear_exponential_start_gamma: float = 0.1
+    foreach: Optional[bool] = True
+    group_learning_rates: Dict[str, float] = field(default_factory=lambda: {})
+
+    def __post_init__(self):
+        run_auto_creation(self)
+
+    def __call__(
+        self,
+        last_epoch: int,
+        model: ImplicitronModelBase,
+        accelerator: Optional[Accelerator] = None,
+        exp_dir: Optional[str] = None,
+        resume: bool = True,
+        resume_epoch: int = -1,
+        **kwargs,
+    ) -> Tuple[torch.optim.Optimizer, Any]:
+        """
+        Initialize the optimizer (optionally from a checkpoint) and the lr scheduluer.
+
+        Args:
+            last_epoch: If the model was loaded from checkpoint this will be the
+                number of the last epoch that was saved.
+            model: The model with optionally loaded weights.
+            accelerator: An optional Accelerator instance.
+            exp_dir: Root experiment directory.
+            resume: If True, attempt to load optimizer checkpoint from exp_dir.
+                Failure to do so will return a newly initialized optimizer.
+            resume_epoch: If `resume` is True: Resume optimizer at this epoch. If
+                `resume_epoch` <= 0, then resume from the latest checkpoint.
+        Returns:
+            An optimizer module (optionally loaded from a checkpoint) and
+            a learning rate scheduler module (should be a subclass of torch.optim's
+            lr_scheduler._LRScheduler).
+        """
+        # Get the parameters to optimize
+        if hasattr(model, "_get_param_groups"):  # use the model function
+            p_groups = model._get_param_groups(self.lr, wd=self.weight_decay)
+        else:
+            p_groups = [
+                {"params": params, "lr": self._get_group_learning_rate(group)}
+                for group, params in self._get_param_groups(model).items()
+            ]
+
+        # Intialize the optimizer
+        optimizer_kwargs: Dict[str, Any] = {
+            "lr": self.lr,
+            "weight_decay": self.weight_decay,
+        }
+        if self.breed == "SGD":
+            optimizer_class = torch.optim.SGD
+            optimizer_kwargs["momentum"] = self.momentum
+        elif self.breed == "Adagrad":
+            optimizer_class = torch.optim.Adagrad
+        elif self.breed == "Adam":
+            optimizer_class = torch.optim.Adam
+            optimizer_kwargs["betas"] = self.betas
+        else:
+            raise ValueError(f"No such solver type {self.breed}")
+
+        if "foreach" in inspect.signature(optimizer_class.__init__).parameters:
+            optimizer_kwargs["foreach"] = self.foreach
+        optimizer = optimizer_class(p_groups, **optimizer_kwargs)
+        logger.info(f"Solver type = {self.breed}")
+
+        # Load state from checkpoint
+        optimizer_state = self._get_optimizer_state(
+            exp_dir,
+            accelerator,
+            resume_epoch=resume_epoch,
+            resume=resume,
+        )
+        if optimizer_state is not None:
+            logger.info("Setting loaded optimizer state.")
+            optimizer.load_state_dict(optimizer_state)
+
+        # Initialize the learning rate scheduler
+        if self.lr_policy.casefold() == "MultiStepLR".casefold():
+            scheduler = torch.optim.lr_scheduler.MultiStepLR(
+                optimizer,
+                milestones=self.multistep_lr_milestones,
+                gamma=self.gamma,
+            )
+        elif self.lr_policy.casefold() == "Exponential".casefold():
+            scheduler = torch.optim.lr_scheduler.LambdaLR(
+                optimizer,
+                lambda epoch: self.gamma ** (epoch / self.exponential_lr_step_size),
+                verbose=False,
+            )
+        elif self.lr_policy.casefold() == "LinearExponential".casefold():
+            # linear learning rate progression between epochs 0 to
+            # self.linear_exponential_lr_milestone, followed by exponential
+            # lr decay for the rest of the epochs
+            def _get_lr(epoch: int):
+                m = self.linear_exponential_lr_milestone
+                if epoch < m:
+                    w = (m - epoch) / m
+                    gamma = w * self.linear_exponential_start_gamma + (1 - w)
+                else:
+                    epoch_rest = epoch - m
+                    gamma = self.gamma ** (epoch_rest / self.exponential_lr_step_size)
+                return gamma
+
+            scheduler = torch.optim.lr_scheduler.LambdaLR(
+                optimizer, _get_lr, verbose=False
+            )
+        else:
+            raise ValueError("no such lr policy %s" % self.lr_policy)
+
+        # When loading from checkpoint, this will make sure that the
+        # lr is correctly set even after returning.
+        for _ in range(last_epoch):
+            scheduler.step()
+
+        optimizer.zero_grad()
+
+        return optimizer, scheduler
+
+    def _get_optimizer_state(
+        self,
+        exp_dir: Optional[str],
+        accelerator: Optional[Accelerator] = None,
+        resume: bool = True,
+        resume_epoch: int = -1,
+    ) -> Optional[Dict[str, Any]]:
+        """
+        Load an optimizer state from a checkpoint.
+
+        resume: If True, attempt to load the last checkpoint from `exp_dir`
+            passed to __call__. Failure to do so will return a newly initialized
+            optimizer.
+        resume_epoch: If `resume` is True: Resume optimizer at this epoch. If
+            `resume_epoch` <= 0, then resume from the latest checkpoint.
+        """
+        if exp_dir is None or not resume:
+            return None
+        if resume_epoch > 0:
+            save_path = model_io.get_checkpoint(exp_dir, resume_epoch)
+            if not os.path.isfile(save_path):
+                raise FileNotFoundError(
+                    f"Cannot find optimizer from epoch {resume_epoch}."
+                )
+        else:
+            save_path = model_io.find_last_checkpoint(exp_dir)
+        optimizer_state = None
+        if save_path is not None:
+            logger.info(f"Found previous optimizer state {save_path} -> resuming.")
+            opt_path = model_io.get_optimizer_path(save_path)
+
+            if os.path.isfile(opt_path):
+                map_location = None
+                if accelerator is not None and not accelerator.is_local_main_process:
+                    map_location = {
+                        "cuda:%d" % 0: "cuda:%d" % accelerator.local_process_index
+                    }
+                optimizer_state = torch.load(opt_path, map_location)
+            else:
+                raise FileNotFoundError(f"Optimizer state {opt_path} does not exist.")
+        return optimizer_state
+
+    def _get_param_groups(
+        self, module: torch.nn.Module
+    ) -> Dict[str, List[torch.nn.Parameter]]:
+        """
+        Recursively visits all the modules inside the `module` and sorts all the
+        parameters in parameter groups.
+
+        Uses `param_groups` dictionary member, where keys are names of individual
+        parameters or module members and values are the names of the parameter groups
+        for those parameters or members. "self" key is used to denote the parameter groups
+        at the module level. Possible keys, including the "self" key do not have to
+        be defined. By default all parameters have the learning rate defined in the
+        optimizer. This can be overridden by setting the parameter group in `param_groups`
+        member of a specific module. Values are a parameter group name. The keys
+        specify what parameters will be affected as follows:
+            - “self”: All the parameters of the module and its child modules
+            - name of a parameter: A parameter with that name.
+            - name of a module member: All the parameters of the module and its
+                child modules.
+                This is useful if members do not have `param_groups`, for
+                example torch.nn.Linear.
+            - <name of module member>.<something>: recursive. Same as if <something>
+                was used in param_groups of that submodule/member.
+
+        Args:
+            module: module from which to extract the parameters and their parameter
+                groups
+        Returns:
+            dictionary with parameter groups as keys and lists of parameters as values
+        """
+
+        param_groups = defaultdict(list)
+
+        def traverse(module, default_group: str, mapping: Dict[str, str]) -> None:
+            """
+            Visitor for module to assign its parameters to the relevant member of
+            param_groups.
+
+            Args:
+                module: the module being visited in a depth-first search
+                default_group: the param group to assign parameters to unless
+                                otherwise overriden.
+                mapping: known mappings of parameters to groups for this module,
+                    destructively modified by this function.
+            """
+            # If key self is defined in param_groups then chenge the default param
+            # group for all parameters and children in the module.
+            if hasattr(module, "param_groups") and "self" in module.param_groups:
+                default_group = module.param_groups["self"]
+
+            # Collect all the parameters that are directly inside the `module`,
+            # they will be in the default param group if they don't have
+            # defined group.
+            if hasattr(module, "param_groups"):
+                mapping.update(module.param_groups)
+
+            for name, param in module.named_parameters(recurse=False):
+                if param.requires_grad:
+                    group_name = mapping.get(name, default_group)
+                    logger.debug(f"Assigning {name} to param_group {group_name}")
+                    param_groups[group_name].append(param)
+
+            # If children have defined default param group then use it else pass
+            # own default.
+            for child_name, child in module.named_children():
+                mapping_to_add = {
+                    name[len(child_name) + 1 :]: group
+                    for name, group in mapping.items()
+                    if name.startswith(child_name + ".")
+                }
+                traverse(child, mapping.get(child_name, default_group), mapping_to_add)
+
+        traverse(module, "default", {})
+        return param_groups
+
+    def _get_group_learning_rate(self, group_name: str) -> float:
+        """
+        Wraps the `group_learning_rates` dictionary providing errors and returns
+        `self.lr` for "default" group_name.
+
+        Args:
+            group_name: a string representing the name of the group
+        Returns:
+            learning rate for a specific group
+        """
+        if group_name == "default":
+            return self.lr
+        lr = self.group_learning_rates.get(group_name, None)
+        if lr is None:
+            raise ValueError(f"no learning rate given for group {group_name}")
+        return lr
diff --git a/pytorch3d/projects/implicitron_trainer/impl/training_loop.py b/pytorch3d/projects/implicitron_trainer/impl/training_loop.py
new file mode 100644
index 0000000000000000000000000000000000000000..57917cc8183e95b91080663dec06e6e7f4dbad37
--- /dev/null
+++ b/pytorch3d/projects/implicitron_trainer/impl/training_loop.py
@@ -0,0 +1,452 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+import os
+import time
+from typing import Any, List, Optional
+
+import torch
+from accelerate import Accelerator
+from pytorch3d.implicitron.evaluation.evaluator import EvaluatorBase
+from pytorch3d.implicitron.models.base_model import ImplicitronModelBase
+from pytorch3d.implicitron.models.generic_model import EvaluationMode
+from pytorch3d.implicitron.tools import model_io, vis_utils
+from pytorch3d.implicitron.tools.config import (
+    registry,
+    ReplaceableBase,
+    run_auto_creation,
+)
+from pytorch3d.implicitron.tools.stats import Stats
+from torch.utils.data import DataLoader, Dataset
+
+from .utils import seed_all_random_engines
+
+logger = logging.getLogger(__name__)
+
+
+# pyre-fixme[13]: Attribute `evaluator` is never initialized.
+class TrainingLoopBase(ReplaceableBase):
+    """
+    Members:
+        evaluator: An EvaluatorBase instance, used to evaluate training results.
+    """
+
+    evaluator: Optional[EvaluatorBase]
+    evaluator_class_type: Optional[str] = "ImplicitronEvaluator"
+
+    def run(
+        self,
+        train_loader: DataLoader,
+        val_loader: Optional[DataLoader],
+        test_loader: Optional[DataLoader],
+        train_dataset: Dataset,
+        model: ImplicitronModelBase,
+        optimizer: torch.optim.Optimizer,
+        scheduler: Any,
+        **kwargs,
+    ) -> None:
+        raise NotImplementedError()
+
+    def load_stats(
+        self,
+        log_vars: List[str],
+        exp_dir: str,
+        resume: bool = True,
+        resume_epoch: int = -1,
+        **kwargs,
+    ) -> Stats:
+        raise NotImplementedError()
+
+
+@registry.register
+class ImplicitronTrainingLoop(TrainingLoopBase):
+    """
+    Members:
+        eval_only: If True, only run evaluation using the test dataloader.
+        max_epochs: Train for this many epochs. Note that if the model was
+            loaded from a checkpoint, we will restart training at the appropriate
+            epoch and run for (max_epochs - checkpoint_epoch) epochs.
+        store_checkpoints: If True, store model and optimizer state checkpoints.
+        store_checkpoints_purge: If >= 0, remove any checkpoints older or equal
+            to this many epochs.
+        test_interval: Evaluate on a test dataloader each `test_interval` epochs.
+        test_when_finished: If True, evaluate on a test dataloader when training
+            completes.
+        validation_interval: Validate each `validation_interval` epochs.
+        clip_grad: Optionally clip the gradient norms.
+            If set to a value <=0.0, no clipping
+        metric_print_interval: The batch interval at which the stats should be
+            logged.
+        visualize_interval: The batch interval at which the visualizations
+            should be plotted
+        visdom_env: The name of the Visdom environment to use for plotting.
+        visdom_port: The Visdom port.
+        visdom_server: Address of the Visdom server.
+    """
+
+    # Parameters of the outer training loop.
+    eval_only: bool = False
+    max_epochs: int = 1000
+    store_checkpoints: bool = True
+    store_checkpoints_purge: int = 1
+    test_interval: int = -1
+    test_when_finished: bool = False
+    validation_interval: int = 1
+
+    # Gradient clipping.
+    clip_grad: float = 0.0
+
+    # Visualization/logging parameters.
+    metric_print_interval: int = 5
+    visualize_interval: int = 1000
+    visdom_env: str = ""
+    visdom_port: int = int(os.environ.get("VISDOM_PORT", 8097))
+    visdom_server: str = "http://127.0.0.1"
+
+    def __post_init__(self):
+        run_auto_creation(self)
+
+    # pyre-fixme[14]: `run` overrides method defined in `TrainingLoopBase`
+    #  inconsistently.
+    def run(
+        self,
+        *,
+        train_loader: DataLoader,
+        val_loader: Optional[DataLoader],
+        test_loader: Optional[DataLoader],
+        train_dataset: Dataset,
+        model: ImplicitronModelBase,
+        optimizer: torch.optim.Optimizer,
+        scheduler: Any,
+        accelerator: Optional[Accelerator],
+        device: torch.device,
+        exp_dir: str,
+        stats: Stats,
+        seed: int,
+        **kwargs,
+    ):
+        """
+        Entry point to run the training and validation loops
+        based on the specified config file.
+        """
+        start_epoch = stats.epoch + 1
+        assert scheduler.last_epoch == stats.epoch + 1
+        assert scheduler.last_epoch == start_epoch
+
+        # only run evaluation on the test dataloader
+        if self.eval_only:
+            if test_loader is not None:
+                # pyre-fixme[16]: `Optional` has no attribute `run`.
+                self.evaluator.run(
+                    dataloader=test_loader,
+                    device=device,
+                    dump_to_json=True,
+                    epoch=stats.epoch,
+                    exp_dir=exp_dir,
+                    model=model,
+                )
+                return
+            else:
+                raise ValueError(
+                    "Cannot evaluate and dump results to json, no test data provided."
+                )
+
+        # loop through epochs
+        for epoch in range(start_epoch, self.max_epochs):
+            # automatic new_epoch and plotting of stats at every epoch start
+            with stats:
+
+                # Make sure to re-seed random generators to ensure reproducibility
+                # even after restart.
+                seed_all_random_engines(seed + epoch)
+
+                cur_lr = float(scheduler.get_last_lr()[-1])
+                logger.debug(f"scheduler lr = {cur_lr:1.2e}")
+
+                # train loop
+                self._training_or_validation_epoch(
+                    accelerator=accelerator,
+                    device=device,
+                    epoch=epoch,
+                    loader=train_loader,
+                    model=model,
+                    optimizer=optimizer,
+                    stats=stats,
+                    validation=False,
+                )
+
+                # val loop (optional)
+                if val_loader is not None and epoch % self.validation_interval == 0:
+                    self._training_or_validation_epoch(
+                        accelerator=accelerator,
+                        device=device,
+                        epoch=epoch,
+                        loader=val_loader,
+                        model=model,
+                        optimizer=optimizer,
+                        stats=stats,
+                        validation=True,
+                    )
+
+                # eval loop (optional)
+                if (
+                    test_loader is not None
+                    and self.test_interval > 0
+                    and epoch % self.test_interval == 0
+                ):
+                    self.evaluator.run(
+                        device=device,
+                        dataloader=test_loader,
+                        model=model,
+                    )
+
+                assert stats.epoch == epoch, "inconsistent stats!"
+                self._checkpoint(accelerator, epoch, exp_dir, model, optimizer, stats)
+
+                scheduler.step()
+                new_lr = float(scheduler.get_last_lr()[-1])
+                if new_lr != cur_lr:
+                    logger.info(f"LR change! {cur_lr} -> {new_lr}")
+
+        if self.test_when_finished:
+            if test_loader is not None:
+                self.evaluator.run(
+                    device=device,
+                    dump_to_json=True,
+                    epoch=stats.epoch,
+                    exp_dir=exp_dir,
+                    dataloader=test_loader,
+                    model=model,
+                )
+            else:
+                raise ValueError(
+                    "Cannot evaluate and dump results to json, no test data provided."
+                )
+
+    def load_stats(
+        self,
+        log_vars: List[str],
+        exp_dir: str,
+        resume: bool = True,
+        resume_epoch: int = -1,
+        **kwargs,
+    ) -> Stats:
+        """
+        Load Stats that correspond to the model's log_vars and resume_epoch.
+
+        Args:
+            log_vars: A list of variable names to log. Should be a subset of the
+                `preds` returned by the forward function of the corresponding
+                ImplicitronModelBase instance.
+            exp_dir: Root experiment directory.
+            resume: If False, do not load stats from the checkpoint speci-
+                fied by resume and resume_epoch; instead, create a fresh stats object.
+
+        stats: The stats structure (optionally loaded from checkpoint)
+        """
+        # Init the stats struct
+        visdom_env_charts = (
+            vis_utils.get_visdom_env(self.visdom_env, exp_dir) + "_charts"
+        )
+        stats = Stats(
+            # log_vars should be a list, but OmegaConf might load them as ListConfig
+            list(log_vars),
+            plot_file=os.path.join(exp_dir, "train_stats.pdf"),
+            visdom_env=visdom_env_charts,
+            visdom_server=self.visdom_server,
+            visdom_port=self.visdom_port,
+        )
+
+        model_path = None
+        if resume:
+            if resume_epoch > 0:
+                model_path = model_io.get_checkpoint(exp_dir, resume_epoch)
+                if not os.path.isfile(model_path):
+                    raise FileNotFoundError(
+                        f"Cannot find stats from epoch {resume_epoch}."
+                    )
+            else:
+                model_path = model_io.find_last_checkpoint(exp_dir)
+
+        if model_path is not None:
+            stats_path = model_io.get_stats_path(model_path)
+            stats_load = model_io.load_stats(stats_path)
+
+            # Determine if stats should be reset
+            if resume:
+                if stats_load is None:
+                    logger.warning("\n\n\n\nCORRUPT STATS -> clearing stats\n\n\n\n")
+                    last_epoch = model_io.parse_epoch_from_model_path(model_path)
+                    logger.info(f"Estimated resume epoch = {last_epoch}")
+
+                    # Reset the stats struct
+                    for _ in range(last_epoch + 1):
+                        stats.new_epoch()
+                    assert last_epoch == stats.epoch
+                else:
+                    logger.info(f"Found previous stats in {stats_path} -> resuming.")
+                    stats = stats_load
+
+                # Update stats properties incase it was reset on load
+                stats.visdom_env = visdom_env_charts
+                stats.visdom_server = self.visdom_server
+                stats.visdom_port = self.visdom_port
+                stats.plot_file = os.path.join(exp_dir, "train_stats.pdf")
+                stats.synchronize_logged_vars(log_vars)
+            else:
+                logger.info("Clearing stats")
+
+        return stats
+
+    def _training_or_validation_epoch(
+        self,
+        epoch: int,
+        loader: DataLoader,
+        model: ImplicitronModelBase,
+        optimizer: torch.optim.Optimizer,
+        stats: Stats,
+        validation: bool,
+        *,
+        accelerator: Optional[Accelerator],
+        bp_var: str = "objective",
+        device: torch.device,
+        **kwargs,
+    ) -> None:
+        """
+        This is the main loop for training and evaluation including:
+        model forward pass, loss computation, backward pass and visualization.
+
+        Args:
+            epoch: The index of the current epoch
+            loader: The dataloader to use for the loop
+            model: The model module optionally loaded from checkpoint
+            optimizer: The optimizer module optionally loaded from checkpoint
+            stats: The stats struct, also optionally loaded from checkpoint
+            validation: If true, run the loop with the model in eval mode
+                and skip the backward pass
+            accelerator: An optional Accelerator instance.
+            bp_var: The name of the key in the model output `preds` dict which
+                should be used as the loss for the backward pass.
+            device: The device on which to run the model.
+        """
+
+        if validation:
+            model.eval()
+            trainmode = "val"
+        else:
+            model.train()
+            trainmode = "train"
+
+        t_start = time.time()
+
+        # get the visdom env name
+        visdom_env_imgs = stats.visdom_env + "_images_" + trainmode
+        viz = vis_utils.get_visdom_connection(
+            server=stats.visdom_server,
+            port=stats.visdom_port,
+        )
+
+        # Iterate through the batches
+        n_batches = len(loader)
+        for it, net_input in enumerate(loader):
+            last_iter = it == n_batches - 1
+
+            # move to gpu where possible (in place)
+            net_input = net_input.to(device)
+
+            # run the forward pass
+            if not validation:
+                optimizer.zero_grad()
+                preds = model(
+                    **{**net_input, "evaluation_mode": EvaluationMode.TRAINING}
+                )
+            else:
+                with torch.no_grad():
+                    preds = model(
+                        **{**net_input, "evaluation_mode": EvaluationMode.EVALUATION}
+                    )
+
+            # make sure we dont overwrite something
+            assert all(k not in preds for k in net_input.keys())
+            # merge everything into one big dict
+            preds.update(net_input)
+
+            # update the stats logger
+            stats.update(preds, time_start=t_start, stat_set=trainmode)
+            # pyre-ignore [16]
+            assert stats.it[trainmode] == it, "inconsistent stat iteration number!"
+
+            # print textual status update
+            if it % self.metric_print_interval == 0 or last_iter:
+                std_out = stats.get_status_string(stat_set=trainmode, max_it=n_batches)
+                logger.info(std_out)
+
+            # visualize results
+            if (
+                (accelerator is None or accelerator.is_local_main_process)
+                and self.visualize_interval > 0
+                and it % self.visualize_interval == 0
+            ):
+                prefix = f"e{stats.epoch}_it{stats.it[trainmode]}"
+                if hasattr(model, "visualize"):
+                    model.visualize(
+                        viz,
+                        visdom_env_imgs,
+                        preds,
+                        prefix,
+                    )
+
+            # optimizer step
+            if not validation:
+                loss = preds[bp_var]
+                assert torch.isfinite(loss).all(), "Non-finite loss!"
+                # backprop
+                if accelerator is None:
+                    loss.backward()
+                else:
+                    accelerator.backward(loss)
+                if self.clip_grad > 0.0:
+                    # Optionally clip the gradient norms.
+                    total_norm = torch.nn.utils.clip_grad_norm(
+                        model.parameters(), self.clip_grad
+                    )
+                    if total_norm > self.clip_grad:
+                        logger.debug(
+                            f"Clipping gradient: {total_norm}"
+                            + f" with coef {self.clip_grad / float(total_norm)}."
+                        )
+
+                optimizer.step()
+
+    def _checkpoint(
+        self,
+        accelerator: Optional[Accelerator],
+        epoch: int,
+        exp_dir: str,
+        model: ImplicitronModelBase,
+        optimizer: torch.optim.Optimizer,
+        stats: Stats,
+    ):
+        """
+        Save a model and its corresponding Stats object to a file, if
+        `self.store_checkpoints` is True. In addition, if
+        `self.store_checkpoints_purge` is True, remove any checkpoints older
+        than `self.store_checkpoints_purge` epochs old.
+        """
+        if self.store_checkpoints and (
+            accelerator is None or accelerator.is_local_main_process
+        ):
+            if self.store_checkpoints_purge > 0:
+                for prev_epoch in range(epoch - self.store_checkpoints_purge):
+                    model_io.purge_epoch(exp_dir, prev_epoch)
+            outfile = model_io.get_checkpoint(exp_dir, epoch)
+            unwrapped_model = (
+                model if accelerator is None else accelerator.unwrap_model(model)
+            )
+            model_io.safe_save_model(
+                unwrapped_model, stats, outfile, optimizer=optimizer
+            )
diff --git a/pytorch3d/projects/implicitron_trainer/impl/utils.py b/pytorch3d/projects/implicitron_trainer/impl/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..4fac4463857f319127c9b38b496173c2ac05fd13
--- /dev/null
+++ b/pytorch3d/projects/implicitron_trainer/impl/utils.py
@@ -0,0 +1,17 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import random
+
+import numpy as np
+import torch
+
+
+def seed_all_random_engines(seed: int) -> None:
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    random.seed(seed)
diff --git a/pytorch3d/projects/implicitron_trainer/tests/__init__.py b/pytorch3d/projects/implicitron_trainer/tests/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..2e41cd717f6a439a9c08d76a9d0e4a54e190fc5a
--- /dev/null
+++ b/pytorch3d/projects/implicitron_trainer/tests/__init__.py
@@ -0,0 +1,5 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
diff --git a/pytorch3d/projects/implicitron_trainer/tests/experiment.yaml b/pytorch3d/projects/implicitron_trainer/tests/experiment.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e0394f2207ee4c01dc0a8179d0a0ceeb4ddcbbda
--- /dev/null
+++ b/pytorch3d/projects/implicitron_trainer/tests/experiment.yaml
@@ -0,0 +1,1243 @@
+data_source_class_type: ImplicitronDataSource
+model_factory_class_type: ImplicitronModelFactory
+optimizer_factory_class_type: ImplicitronOptimizerFactory
+training_loop_class_type: ImplicitronTrainingLoop
+seed: 42
+detect_anomaly: false
+exp_dir: ./data/default_experiment/
+hydra:
+  run:
+    dir: .
+  output_subdir: null
+  mode: RUN
+data_source_ImplicitronDataSource_args:
+  dataset_map_provider_class_type: ???
+  data_loader_map_provider_class_type: SequenceDataLoaderMapProvider
+  dataset_map_provider_BlenderDatasetMapProvider_args:
+    base_dir: ???
+    object_name: ???
+    path_manager_factory_class_type: PathManagerFactory
+    n_known_frames_for_test: null
+    path_manager_factory_PathManagerFactory_args:
+      silence_logs: true
+  dataset_map_provider_JsonIndexDatasetMapProvider_args:
+    category: ???
+    task_str: singlesequence
+    dataset_root: ''
+    n_frames_per_sequence: -1
+    test_on_train: false
+    restrict_sequence_name: []
+    test_restrict_sequence_id: -1
+    assert_single_seq: false
+    only_test_set: false
+    dataset_class_type: JsonIndexDataset
+    path_manager_factory_class_type: PathManagerFactory
+    dataset_JsonIndexDataset_args:
+      limit_to: 0
+      limit_sequences_to: 0
+      exclude_sequence: []
+      limit_category_to: []
+      load_images: true
+      load_depths: true
+      load_depth_masks: true
+      load_masks: true
+      load_point_clouds: false
+      max_points: 0
+      mask_images: false
+      mask_depths: false
+      image_height: 800
+      image_width: 800
+      box_crop: true
+      box_crop_mask_thr: 0.4
+      box_crop_context: 0.3
+      remove_empty_masks: true
+      seed: 0
+      sort_frames: false
+    path_manager_factory_PathManagerFactory_args:
+      silence_logs: true
+  dataset_map_provider_JsonIndexDatasetMapProviderV2_args:
+    category: ???
+    subset_name: ???
+    dataset_root: ''
+    test_on_train: false
+    only_test_set: false
+    load_eval_batches: true
+    num_load_workers: 4
+    n_known_frames_for_test: 0
+    dataset_class_type: JsonIndexDataset
+    path_manager_factory_class_type: PathManagerFactory
+    dataset_JsonIndexDataset_args:
+      limit_to: 0
+      limit_sequences_to: 0
+      pick_sequence: []
+      exclude_sequence: []
+      limit_category_to: []
+      load_images: true
+      load_depths: true
+      load_depth_masks: true
+      load_masks: true
+      load_point_clouds: false
+      max_points: 0
+      mask_images: false
+      mask_depths: false
+      image_height: 800
+      image_width: 800
+      box_crop: true
+      box_crop_mask_thr: 0.4
+      box_crop_context: 0.3
+      remove_empty_masks: true
+      n_frames_per_sequence: -1
+      seed: 0
+      sort_frames: false
+    path_manager_factory_PathManagerFactory_args:
+      silence_logs: true
+  dataset_map_provider_LlffDatasetMapProvider_args:
+    base_dir: ???
+    object_name: ???
+    path_manager_factory_class_type: PathManagerFactory
+    n_known_frames_for_test: null
+    path_manager_factory_PathManagerFactory_args:
+      silence_logs: true
+    downscale_factor: 4
+  dataset_map_provider_RenderedMeshDatasetMapProvider_args:
+    num_views: 40
+    data_file: null
+    azimuth_range: 180.0
+    distance: 2.7
+    resolution: 128
+    use_point_light: true
+    gpu_idx: 0
+    path_manager_factory_class_type: PathManagerFactory
+    path_manager_factory_PathManagerFactory_args:
+      silence_logs: true
+  data_loader_map_provider_SequenceDataLoaderMapProvider_args:
+    batch_size: 1
+    num_workers: 0
+    dataset_length_train: 0
+    dataset_length_val: 0
+    dataset_length_test: 0
+    train_conditioning_type: SAME
+    val_conditioning_type: SAME
+    test_conditioning_type: KNOWN
+    images_per_seq_options: []
+    sample_consecutive_frames: false
+    consecutive_frames_max_gap: 0
+    consecutive_frames_max_gap_seconds: 0.1
+  data_loader_map_provider_SimpleDataLoaderMapProvider_args:
+    batch_size: 1
+    num_workers: 0
+    dataset_length_train: 0
+    dataset_length_val: 0
+    dataset_length_test: 0
+  data_loader_map_provider_TrainEvalDataLoaderMapProvider_args:
+    batch_size: 1
+    num_workers: 0
+    dataset_length_train: 0
+    dataset_length_val: 0
+    dataset_length_test: 0
+    train_conditioning_type: SAME
+    val_conditioning_type: SAME
+    test_conditioning_type: KNOWN
+    images_per_seq_options: []
+    sample_consecutive_frames: false
+    consecutive_frames_max_gap: 0
+    consecutive_frames_max_gap_seconds: 0.1
+model_factory_ImplicitronModelFactory_args:
+  resume: true
+  model_class_type: GenericModel
+  resume_epoch: -1
+  force_resume: false
+  model_GenericModel_args:
+    log_vars:
+    - loss_rgb_psnr_fg
+    - loss_rgb_psnr
+    - loss_rgb_mse
+    - loss_rgb_huber
+    - loss_depth_abs
+    - loss_depth_abs_fg
+    - loss_mask_neg_iou
+    - loss_mask_bce
+    - loss_mask_beta_prior
+    - loss_eikonal
+    - loss_density_tv
+    - loss_depth_neg_penalty
+    - loss_autodecoder_norm
+    - loss_prev_stage_rgb_mse
+    - loss_prev_stage_rgb_psnr_fg
+    - loss_prev_stage_rgb_psnr
+    - loss_prev_stage_mask_bce
+    - objective
+    - epoch
+    - sec/it
+    mask_images: true
+    mask_depths: true
+    render_image_width: 400
+    render_image_height: 400
+    mask_threshold: 0.5
+    output_rasterized_mc: false
+    bg_color:
+    - 0.0
+    - 0.0
+    - 0.0
+    num_passes: 1
+    chunk_size_grid: 4096
+    render_features_dimensions: 3
+    tqdm_trigger_threshold: 16
+    n_train_target_views: 1
+    sampling_mode_training: mask_sample
+    sampling_mode_evaluation: full_grid
+    global_encoder_class_type: null
+    raysampler_class_type: AdaptiveRaySampler
+    renderer_class_type: MultiPassEmissionAbsorptionRenderer
+    image_feature_extractor_class_type: null
+    view_pooler_enabled: false
+    implicit_function_class_type: NeuralRadianceFieldImplicitFunction
+    view_metrics_class_type: ViewMetrics
+    regularization_metrics_class_type: RegularizationMetrics
+    loss_weights:
+      loss_rgb_mse: 1.0
+      loss_prev_stage_rgb_mse: 1.0
+      loss_mask_bce: 0.0
+      loss_prev_stage_mask_bce: 0.0
+    global_encoder_HarmonicTimeEncoder_args:
+      n_harmonic_functions: 10
+      append_input: true
+      time_divisor: 1.0
+    global_encoder_SequenceAutodecoder_args:
+      autodecoder_args:
+        encoding_dim: 0
+        n_instances: 1
+        init_scale: 1.0
+        ignore_input: false
+    raysampler_AdaptiveRaySampler_args:
+      n_pts_per_ray_training: 64
+      n_pts_per_ray_evaluation: 64
+      n_rays_per_image_sampled_from_mask: 1024
+      n_rays_total_training: null
+      stratified_point_sampling_training: true
+      stratified_point_sampling_evaluation: false
+      cast_ray_bundle_as_cone: false
+      scene_extent: 8.0
+      scene_center:
+      - 0.0
+      - 0.0
+      - 0.0
+    raysampler_NearFarRaySampler_args:
+      n_pts_per_ray_training: 64
+      n_pts_per_ray_evaluation: 64
+      n_rays_per_image_sampled_from_mask: 1024
+      n_rays_total_training: null
+      stratified_point_sampling_training: true
+      stratified_point_sampling_evaluation: false
+      cast_ray_bundle_as_cone: false
+      min_depth: 0.1
+      max_depth: 8.0
+    renderer_LSTMRenderer_args:
+      num_raymarch_steps: 10
+      init_depth: 17.0
+      init_depth_noise_std: 0.0005
+      hidden_size: 16
+      n_feature_channels: 256
+      bg_color: null
+      verbose: false
+    renderer_MultiPassEmissionAbsorptionRenderer_args:
+      raymarcher_class_type: EmissionAbsorptionRaymarcher
+      n_pts_per_ray_fine_training: 64
+      n_pts_per_ray_fine_evaluation: 64
+      stratified_sampling_coarse_training: true
+      stratified_sampling_coarse_evaluation: false
+      append_coarse_samples_to_fine: true
+      density_noise_std_train: 0.0
+      return_weights: false
+      blurpool_weights: false
+      sample_pdf_eps: 1.0e-05
+      raymarcher_CumsumRaymarcher_args:
+        surface_thickness: 1
+        bg_color:
+        - 0.0
+        replicate_last_interval: false
+        background_opacity: 0.0
+        density_relu: true
+        blend_output: false
+      raymarcher_EmissionAbsorptionRaymarcher_args:
+        surface_thickness: 1
+        bg_color:
+        - 0.0
+        replicate_last_interval: false
+        background_opacity: 10000000000.0
+        density_relu: true
+        blend_output: false
+    renderer_SignedDistanceFunctionRenderer_args:
+      ray_normal_coloring_network_args:
+        feature_vector_size: 3
+        mode: idr
+        d_in: 9
+        d_out: 3
+        dims:
+        - 512
+        - 512
+        - 512
+        - 512
+        weight_norm: true
+        n_harmonic_functions_dir: 0
+        pooled_feature_dim: 0
+      bg_color:
+      - 0.0
+      soft_mask_alpha: 50.0
+      ray_tracer_args:
+        sdf_threshold: 5.0e-05
+        line_search_step: 0.5
+        line_step_iters: 1
+        sphere_tracing_iters: 10
+        n_steps: 100
+        n_secant_steps: 8
+    image_feature_extractor_ResNetFeatureExtractor_args:
+      name: resnet34
+      pretrained: true
+      stages:
+      - 1
+      - 2
+      - 3
+      - 4
+      normalize_image: true
+      image_rescale: 0.16
+      first_max_pool: true
+      proj_dim: 32
+      l2_norm: true
+      add_masks: true
+      add_images: true
+      global_average_pool: false
+      feature_rescale: 1.0
+    view_pooler_args:
+      feature_aggregator_class_type: AngleWeightedReductionFeatureAggregator
+      view_sampler_args:
+        masked_sampling: false
+        sampling_mode: bilinear
+      feature_aggregator_AngleWeightedIdentityFeatureAggregator_args:
+        exclude_target_view: true
+        exclude_target_view_mask_features: true
+        concatenate_output: true
+        weight_by_ray_angle_gamma: 1.0
+        min_ray_angle_weight: 0.1
+      feature_aggregator_AngleWeightedReductionFeatureAggregator_args:
+        exclude_target_view: true
+        exclude_target_view_mask_features: true
+        concatenate_output: true
+        reduction_functions:
+        - AVG
+        - STD
+        weight_by_ray_angle_gamma: 1.0
+        min_ray_angle_weight: 0.1
+      feature_aggregator_IdentityFeatureAggregator_args:
+        exclude_target_view: true
+        exclude_target_view_mask_features: true
+        concatenate_output: true
+      feature_aggregator_ReductionFeatureAggregator_args:
+        exclude_target_view: true
+        exclude_target_view_mask_features: true
+        concatenate_output: true
+        reduction_functions:
+        - AVG
+        - STD
+    implicit_function_IdrFeatureField_args:
+      d_in: 3
+      d_out: 1
+      dims:
+      - 512
+      - 512
+      - 512
+      - 512
+      - 512
+      - 512
+      - 512
+      - 512
+      geometric_init: true
+      bias: 1.0
+      skip_in: []
+      weight_norm: true
+      n_harmonic_functions_xyz: 0
+      pooled_feature_dim: 0
+    implicit_function_NeRFormerImplicitFunction_args:
+      n_harmonic_functions_xyz: 10
+      n_harmonic_functions_dir: 4
+      n_hidden_neurons_dir: 128
+      input_xyz: true
+      xyz_ray_dir_in_camera_coords: false
+      use_integrated_positional_encoding: false
+      transformer_dim_down_factor: 2.0
+      n_hidden_neurons_xyz: 80
+      n_layers_xyz: 2
+      append_xyz:
+      - 1
+    implicit_function_NeuralRadianceFieldImplicitFunction_args:
+      n_harmonic_functions_xyz: 10
+      n_harmonic_functions_dir: 4
+      n_hidden_neurons_dir: 128
+      input_xyz: true
+      xyz_ray_dir_in_camera_coords: false
+      use_integrated_positional_encoding: false
+      transformer_dim_down_factor: 1.0
+      n_hidden_neurons_xyz: 256
+      n_layers_xyz: 8
+      append_xyz:
+      - 5
+    implicit_function_SRNHyperNetImplicitFunction_args:
+      hypernet_args:
+        n_harmonic_functions: 3
+        n_hidden_units: 256
+        n_layers: 2
+        n_hidden_units_hypernet: 256
+        n_layers_hypernet: 1
+        in_features: 3
+        out_features: 256
+        xyz_in_camera_coords: false
+      pixel_generator_args:
+        n_harmonic_functions: 4
+        n_hidden_units: 256
+        n_hidden_units_color: 128
+        n_layers: 2
+        in_features: 256
+        out_features: 3
+        ray_dir_in_camera_coords: false
+    implicit_function_SRNImplicitFunction_args:
+      raymarch_function_args:
+        n_harmonic_functions: 3
+        n_hidden_units: 256
+        n_layers: 2
+        in_features: 3
+        out_features: 256
+        xyz_in_camera_coords: false
+        raymarch_function: null
+      pixel_generator_args:
+        n_harmonic_functions: 4
+        n_hidden_units: 256
+        n_hidden_units_color: 128
+        n_layers: 2
+        in_features: 256
+        out_features: 3
+        ray_dir_in_camera_coords: false
+    implicit_function_VoxelGridImplicitFunction_args:
+      harmonic_embedder_xyz_density_args:
+        n_harmonic_functions: 6
+        omega_0: 1.0
+        logspace: true
+        append_input: true
+      harmonic_embedder_xyz_color_args:
+        n_harmonic_functions: 6
+        omega_0: 1.0
+        logspace: true
+        append_input: true
+      harmonic_embedder_dir_color_args:
+        n_harmonic_functions: 6
+        omega_0: 1.0
+        logspace: true
+        append_input: true
+      decoder_density_class_type: MLPDecoder
+      decoder_color_class_type: MLPDecoder
+      use_multiple_streams: true
+      xyz_ray_dir_in_camera_coords: false
+      scaffold_calculating_epochs: []
+      scaffold_resolution:
+      - 128
+      - 128
+      - 128
+      scaffold_empty_space_threshold: 0.001
+      scaffold_occupancy_chunk_size: -1
+      scaffold_max_pool_kernel_size: 3
+      scaffold_filter_points: true
+      volume_cropping_epochs: []
+      voxel_grid_density_args:
+        voxel_grid_class_type: FullResolutionVoxelGrid
+        extents:
+        - 2.0
+        - 2.0
+        - 2.0
+        translation:
+        - 0.0
+        - 0.0
+        - 0.0
+        init_std: 0.1
+        init_mean: 0.0
+        hold_voxel_grid_as_parameters: true
+        param_groups: {}
+        voxel_grid_CPFactorizedVoxelGrid_args:
+          align_corners: true
+          padding: zeros
+          mode: bilinear
+          n_features: 1
+          resolution_changes:
+            0:
+            - 128
+            - 128
+            - 128
+          n_components: 24
+          basis_matrix: true
+        voxel_grid_FullResolutionVoxelGrid_args:
+          align_corners: true
+          padding: zeros
+          mode: bilinear
+          n_features: 1
+          resolution_changes:
+            0:
+            - 128
+            - 128
+            - 128
+        voxel_grid_VMFactorizedVoxelGrid_args:
+          align_corners: true
+          padding: zeros
+          mode: bilinear
+          n_features: 1
+          resolution_changes:
+            0:
+            - 128
+            - 128
+            - 128
+          n_components: null
+          distribution_of_components: null
+          basis_matrix: true
+      voxel_grid_color_args:
+        voxel_grid_class_type: FullResolutionVoxelGrid
+        extents:
+        - 2.0
+        - 2.0
+        - 2.0
+        translation:
+        - 0.0
+        - 0.0
+        - 0.0
+        init_std: 0.1
+        init_mean: 0.0
+        hold_voxel_grid_as_parameters: true
+        param_groups: {}
+        voxel_grid_CPFactorizedVoxelGrid_args:
+          align_corners: true
+          padding: zeros
+          mode: bilinear
+          n_features: 1
+          resolution_changes:
+            0:
+            - 128
+            - 128
+            - 128
+          n_components: 24
+          basis_matrix: true
+        voxel_grid_FullResolutionVoxelGrid_args:
+          align_corners: true
+          padding: zeros
+          mode: bilinear
+          n_features: 1
+          resolution_changes:
+            0:
+            - 128
+            - 128
+            - 128
+        voxel_grid_VMFactorizedVoxelGrid_args:
+          align_corners: true
+          padding: zeros
+          mode: bilinear
+          n_features: 1
+          resolution_changes:
+            0:
+            - 128
+            - 128
+            - 128
+          n_components: null
+          distribution_of_components: null
+          basis_matrix: true
+      decoder_density_ElementwiseDecoder_args:
+        scale: 1.0
+        shift: 0.0
+        operation: IDENTITY
+      decoder_density_MLPDecoder_args:
+        param_groups: {}
+        network_args:
+          n_layers: 8
+          output_dim: 256
+          skip_dim: 39
+          hidden_dim: 256
+          input_skips:
+          - 5
+          skip_affine_trans: false
+          last_layer_bias_init: null
+          last_activation: RELU
+          use_xavier_init: true
+      decoder_color_ElementwiseDecoder_args:
+        scale: 1.0
+        shift: 0.0
+        operation: IDENTITY
+      decoder_color_MLPDecoder_args:
+        param_groups: {}
+        network_args:
+          n_layers: 8
+          output_dim: 256
+          skip_dim: 39
+          hidden_dim: 256
+          input_skips:
+          - 5
+          skip_affine_trans: false
+          last_layer_bias_init: null
+          last_activation: RELU
+          use_xavier_init: true
+    view_metrics_ViewMetrics_args: {}
+    regularization_metrics_RegularizationMetrics_args: {}
+  model_OverfitModel_args:
+    log_vars:
+    - loss_rgb_psnr_fg
+    - loss_rgb_psnr
+    - loss_rgb_mse
+    - loss_rgb_huber
+    - loss_depth_abs
+    - loss_depth_abs_fg
+    - loss_mask_neg_iou
+    - loss_mask_bce
+    - loss_mask_beta_prior
+    - loss_eikonal
+    - loss_density_tv
+    - loss_depth_neg_penalty
+    - loss_autodecoder_norm
+    - loss_prev_stage_rgb_mse
+    - loss_prev_stage_rgb_psnr_fg
+    - loss_prev_stage_rgb_psnr
+    - loss_prev_stage_mask_bce
+    - objective
+    - epoch
+    - sec/it
+    mask_images: true
+    mask_depths: true
+    render_image_width: 400
+    render_image_height: 400
+    mask_threshold: 0.5
+    output_rasterized_mc: false
+    bg_color:
+    - 0.0
+    - 0.0
+    - 0.0
+    chunk_size_grid: 4096
+    render_features_dimensions: 3
+    tqdm_trigger_threshold: 16
+    n_train_target_views: 1
+    sampling_mode_training: mask_sample
+    sampling_mode_evaluation: full_grid
+    global_encoder_class_type: null
+    raysampler_class_type: AdaptiveRaySampler
+    renderer_class_type: MultiPassEmissionAbsorptionRenderer
+    share_implicit_function_across_passes: false
+    implicit_function_class_type: NeuralRadianceFieldImplicitFunction
+    coarse_implicit_function_class_type: null
+    view_metrics_class_type: ViewMetrics
+    regularization_metrics_class_type: RegularizationMetrics
+    loss_weights:
+      loss_rgb_mse: 1.0
+      loss_prev_stage_rgb_mse: 1.0
+      loss_mask_bce: 0.0
+      loss_prev_stage_mask_bce: 0.0
+    global_encoder_HarmonicTimeEncoder_args:
+      n_harmonic_functions: 10
+      append_input: true
+      time_divisor: 1.0
+    global_encoder_SequenceAutodecoder_args:
+      autodecoder_args:
+        encoding_dim: 0
+        n_instances: 1
+        init_scale: 1.0
+        ignore_input: false
+    raysampler_AdaptiveRaySampler_args:
+      n_pts_per_ray_training: 64
+      n_pts_per_ray_evaluation: 64
+      n_rays_per_image_sampled_from_mask: 1024
+      n_rays_total_training: null
+      stratified_point_sampling_training: true
+      stratified_point_sampling_evaluation: false
+      cast_ray_bundle_as_cone: false
+      scene_extent: 8.0
+      scene_center:
+      - 0.0
+      - 0.0
+      - 0.0
+    raysampler_NearFarRaySampler_args:
+      n_pts_per_ray_training: 64
+      n_pts_per_ray_evaluation: 64
+      n_rays_per_image_sampled_from_mask: 1024
+      n_rays_total_training: null
+      stratified_point_sampling_training: true
+      stratified_point_sampling_evaluation: false
+      cast_ray_bundle_as_cone: false
+      min_depth: 0.1
+      max_depth: 8.0
+    renderer_LSTMRenderer_args:
+      num_raymarch_steps: 10
+      init_depth: 17.0
+      init_depth_noise_std: 0.0005
+      hidden_size: 16
+      n_feature_channels: 256
+      bg_color: null
+      verbose: false
+    renderer_MultiPassEmissionAbsorptionRenderer_args:
+      raymarcher_class_type: EmissionAbsorptionRaymarcher
+      n_pts_per_ray_fine_training: 64
+      n_pts_per_ray_fine_evaluation: 64
+      stratified_sampling_coarse_training: true
+      stratified_sampling_coarse_evaluation: false
+      append_coarse_samples_to_fine: true
+      density_noise_std_train: 0.0
+      return_weights: false
+      blurpool_weights: false
+      sample_pdf_eps: 1.0e-05
+      raymarcher_CumsumRaymarcher_args:
+        surface_thickness: 1
+        bg_color:
+        - 0.0
+        replicate_last_interval: false
+        background_opacity: 0.0
+        density_relu: true
+        blend_output: false
+      raymarcher_EmissionAbsorptionRaymarcher_args:
+        surface_thickness: 1
+        bg_color:
+        - 0.0
+        replicate_last_interval: false
+        background_opacity: 10000000000.0
+        density_relu: true
+        blend_output: false
+    renderer_SignedDistanceFunctionRenderer_args:
+      ray_normal_coloring_network_args:
+        feature_vector_size: 3
+        mode: idr
+        d_in: 9
+        d_out: 3
+        dims:
+        - 512
+        - 512
+        - 512
+        - 512
+        weight_norm: true
+        n_harmonic_functions_dir: 0
+        pooled_feature_dim: 0
+      bg_color:
+      - 0.0
+      soft_mask_alpha: 50.0
+      ray_tracer_args:
+        sdf_threshold: 5.0e-05
+        line_search_step: 0.5
+        line_step_iters: 1
+        sphere_tracing_iters: 10
+        n_steps: 100
+        n_secant_steps: 8
+    implicit_function_IdrFeatureField_args:
+      d_in: 3
+      d_out: 1
+      dims:
+      - 512
+      - 512
+      - 512
+      - 512
+      - 512
+      - 512
+      - 512
+      - 512
+      geometric_init: true
+      bias: 1.0
+      skip_in: []
+      weight_norm: true
+      n_harmonic_functions_xyz: 0
+      pooled_feature_dim: 0
+    implicit_function_NeRFormerImplicitFunction_args:
+      n_harmonic_functions_xyz: 10
+      n_harmonic_functions_dir: 4
+      n_hidden_neurons_dir: 128
+      input_xyz: true
+      xyz_ray_dir_in_camera_coords: false
+      use_integrated_positional_encoding: false
+      transformer_dim_down_factor: 2.0
+      n_hidden_neurons_xyz: 80
+      n_layers_xyz: 2
+      append_xyz:
+      - 1
+    implicit_function_NeuralRadianceFieldImplicitFunction_args:
+      n_harmonic_functions_xyz: 10
+      n_harmonic_functions_dir: 4
+      n_hidden_neurons_dir: 128
+      input_xyz: true
+      xyz_ray_dir_in_camera_coords: false
+      use_integrated_positional_encoding: false
+      transformer_dim_down_factor: 1.0
+      n_hidden_neurons_xyz: 256
+      n_layers_xyz: 8
+      append_xyz:
+      - 5
+    implicit_function_SRNHyperNetImplicitFunction_args:
+      latent_dim_hypernet: 0
+      hypernet_args:
+        n_harmonic_functions: 3
+        n_hidden_units: 256
+        n_layers: 2
+        n_hidden_units_hypernet: 256
+        n_layers_hypernet: 1
+        in_features: 3
+        out_features: 256
+        xyz_in_camera_coords: false
+      pixel_generator_args:
+        n_harmonic_functions: 4
+        n_hidden_units: 256
+        n_hidden_units_color: 128
+        n_layers: 2
+        in_features: 256
+        out_features: 3
+        ray_dir_in_camera_coords: false
+    implicit_function_SRNImplicitFunction_args:
+      raymarch_function_args:
+        n_harmonic_functions: 3
+        n_hidden_units: 256
+        n_layers: 2
+        in_features: 3
+        out_features: 256
+        xyz_in_camera_coords: false
+        raymarch_function: null
+      pixel_generator_args:
+        n_harmonic_functions: 4
+        n_hidden_units: 256
+        n_hidden_units_color: 128
+        n_layers: 2
+        in_features: 256
+        out_features: 3
+        ray_dir_in_camera_coords: false
+    implicit_function_VoxelGridImplicitFunction_args:
+      harmonic_embedder_xyz_density_args:
+        n_harmonic_functions: 6
+        omega_0: 1.0
+        logspace: true
+        append_input: true
+      harmonic_embedder_xyz_color_args:
+        n_harmonic_functions: 6
+        omega_0: 1.0
+        logspace: true
+        append_input: true
+      harmonic_embedder_dir_color_args:
+        n_harmonic_functions: 6
+        omega_0: 1.0
+        logspace: true
+        append_input: true
+      decoder_density_class_type: MLPDecoder
+      decoder_color_class_type: MLPDecoder
+      use_multiple_streams: true
+      xyz_ray_dir_in_camera_coords: false
+      scaffold_calculating_epochs: []
+      scaffold_resolution:
+      - 128
+      - 128
+      - 128
+      scaffold_empty_space_threshold: 0.001
+      scaffold_occupancy_chunk_size: -1
+      scaffold_max_pool_kernel_size: 3
+      scaffold_filter_points: true
+      volume_cropping_epochs: []
+      voxel_grid_density_args:
+        voxel_grid_class_type: FullResolutionVoxelGrid
+        extents:
+        - 2.0
+        - 2.0
+        - 2.0
+        translation:
+        - 0.0
+        - 0.0
+        - 0.0
+        init_std: 0.1
+        init_mean: 0.0
+        hold_voxel_grid_as_parameters: true
+        param_groups: {}
+        voxel_grid_CPFactorizedVoxelGrid_args:
+          align_corners: true
+          padding: zeros
+          mode: bilinear
+          n_features: 1
+          resolution_changes:
+            0:
+            - 128
+            - 128
+            - 128
+          n_components: 24
+          basis_matrix: true
+        voxel_grid_FullResolutionVoxelGrid_args:
+          align_corners: true
+          padding: zeros
+          mode: bilinear
+          n_features: 1
+          resolution_changes:
+            0:
+            - 128
+            - 128
+            - 128
+        voxel_grid_VMFactorizedVoxelGrid_args:
+          align_corners: true
+          padding: zeros
+          mode: bilinear
+          n_features: 1
+          resolution_changes:
+            0:
+            - 128
+            - 128
+            - 128
+          n_components: null
+          distribution_of_components: null
+          basis_matrix: true
+      voxel_grid_color_args:
+        voxel_grid_class_type: FullResolutionVoxelGrid
+        extents:
+        - 2.0
+        - 2.0
+        - 2.0
+        translation:
+        - 0.0
+        - 0.0
+        - 0.0
+        init_std: 0.1
+        init_mean: 0.0
+        hold_voxel_grid_as_parameters: true
+        param_groups: {}
+        voxel_grid_CPFactorizedVoxelGrid_args:
+          align_corners: true
+          padding: zeros
+          mode: bilinear
+          n_features: 1
+          resolution_changes:
+            0:
+            - 128
+            - 128
+            - 128
+          n_components: 24
+          basis_matrix: true
+        voxel_grid_FullResolutionVoxelGrid_args:
+          align_corners: true
+          padding: zeros
+          mode: bilinear
+          n_features: 1
+          resolution_changes:
+            0:
+            - 128
+            - 128
+            - 128
+        voxel_grid_VMFactorizedVoxelGrid_args:
+          align_corners: true
+          padding: zeros
+          mode: bilinear
+          n_features: 1
+          resolution_changes:
+            0:
+            - 128
+            - 128
+            - 128
+          n_components: null
+          distribution_of_components: null
+          basis_matrix: true
+      decoder_density_ElementwiseDecoder_args:
+        scale: 1.0
+        shift: 0.0
+        operation: IDENTITY
+      decoder_density_MLPDecoder_args:
+        param_groups: {}
+        network_args:
+          n_layers: 8
+          output_dim: 256
+          skip_dim: 39
+          hidden_dim: 256
+          input_skips:
+          - 5
+          skip_affine_trans: false
+          last_layer_bias_init: null
+          last_activation: RELU
+          use_xavier_init: true
+      decoder_color_ElementwiseDecoder_args:
+        scale: 1.0
+        shift: 0.0
+        operation: IDENTITY
+      decoder_color_MLPDecoder_args:
+        param_groups: {}
+        network_args:
+          n_layers: 8
+          output_dim: 256
+          skip_dim: 39
+          hidden_dim: 256
+          input_skips:
+          - 5
+          skip_affine_trans: false
+          last_layer_bias_init: null
+          last_activation: RELU
+          use_xavier_init: true
+    coarse_implicit_function_IdrFeatureField_args:
+      d_in: 3
+      d_out: 1
+      dims:
+      - 512
+      - 512
+      - 512
+      - 512
+      - 512
+      - 512
+      - 512
+      - 512
+      geometric_init: true
+      bias: 1.0
+      skip_in: []
+      weight_norm: true
+      n_harmonic_functions_xyz: 0
+      pooled_feature_dim: 0
+    coarse_implicit_function_NeRFormerImplicitFunction_args:
+      n_harmonic_functions_xyz: 10
+      n_harmonic_functions_dir: 4
+      n_hidden_neurons_dir: 128
+      input_xyz: true
+      xyz_ray_dir_in_camera_coords: false
+      use_integrated_positional_encoding: false
+      transformer_dim_down_factor: 2.0
+      n_hidden_neurons_xyz: 80
+      n_layers_xyz: 2
+      append_xyz:
+      - 1
+    coarse_implicit_function_NeuralRadianceFieldImplicitFunction_args:
+      n_harmonic_functions_xyz: 10
+      n_harmonic_functions_dir: 4
+      n_hidden_neurons_dir: 128
+      input_xyz: true
+      xyz_ray_dir_in_camera_coords: false
+      use_integrated_positional_encoding: false
+      transformer_dim_down_factor: 1.0
+      n_hidden_neurons_xyz: 256
+      n_layers_xyz: 8
+      append_xyz:
+      - 5
+    coarse_implicit_function_SRNHyperNetImplicitFunction_args:
+      latent_dim_hypernet: 0
+      hypernet_args:
+        n_harmonic_functions: 3
+        n_hidden_units: 256
+        n_layers: 2
+        n_hidden_units_hypernet: 256
+        n_layers_hypernet: 1
+        in_features: 3
+        out_features: 256
+        xyz_in_camera_coords: false
+      pixel_generator_args:
+        n_harmonic_functions: 4
+        n_hidden_units: 256
+        n_hidden_units_color: 128
+        n_layers: 2
+        in_features: 256
+        out_features: 3
+        ray_dir_in_camera_coords: false
+    coarse_implicit_function_SRNImplicitFunction_args:
+      raymarch_function_args:
+        n_harmonic_functions: 3
+        n_hidden_units: 256
+        n_layers: 2
+        in_features: 3
+        out_features: 256
+        xyz_in_camera_coords: false
+        raymarch_function: null
+      pixel_generator_args:
+        n_harmonic_functions: 4
+        n_hidden_units: 256
+        n_hidden_units_color: 128
+        n_layers: 2
+        in_features: 256
+        out_features: 3
+        ray_dir_in_camera_coords: false
+    coarse_implicit_function_VoxelGridImplicitFunction_args:
+      harmonic_embedder_xyz_density_args:
+        n_harmonic_functions: 6
+        omega_0: 1.0
+        logspace: true
+        append_input: true
+      harmonic_embedder_xyz_color_args:
+        n_harmonic_functions: 6
+        omega_0: 1.0
+        logspace: true
+        append_input: true
+      harmonic_embedder_dir_color_args:
+        n_harmonic_functions: 6
+        omega_0: 1.0
+        logspace: true
+        append_input: true
+      decoder_density_class_type: MLPDecoder
+      decoder_color_class_type: MLPDecoder
+      use_multiple_streams: true
+      xyz_ray_dir_in_camera_coords: false
+      scaffold_calculating_epochs: []
+      scaffold_resolution:
+      - 128
+      - 128
+      - 128
+      scaffold_empty_space_threshold: 0.001
+      scaffold_occupancy_chunk_size: -1
+      scaffold_max_pool_kernel_size: 3
+      scaffold_filter_points: true
+      volume_cropping_epochs: []
+      voxel_grid_density_args:
+        voxel_grid_class_type: FullResolutionVoxelGrid
+        extents:
+        - 2.0
+        - 2.0
+        - 2.0
+        translation:
+        - 0.0
+        - 0.0
+        - 0.0
+        init_std: 0.1
+        init_mean: 0.0
+        hold_voxel_grid_as_parameters: true
+        param_groups: {}
+        voxel_grid_CPFactorizedVoxelGrid_args:
+          align_corners: true
+          padding: zeros
+          mode: bilinear
+          n_features: 1
+          resolution_changes:
+            0:
+            - 128
+            - 128
+            - 128
+          n_components: 24
+          basis_matrix: true
+        voxel_grid_FullResolutionVoxelGrid_args:
+          align_corners: true
+          padding: zeros
+          mode: bilinear
+          n_features: 1
+          resolution_changes:
+            0:
+            - 128
+            - 128
+            - 128
+        voxel_grid_VMFactorizedVoxelGrid_args:
+          align_corners: true
+          padding: zeros
+          mode: bilinear
+          n_features: 1
+          resolution_changes:
+            0:
+            - 128
+            - 128
+            - 128
+          n_components: null
+          distribution_of_components: null
+          basis_matrix: true
+      voxel_grid_color_args:
+        voxel_grid_class_type: FullResolutionVoxelGrid
+        extents:
+        - 2.0
+        - 2.0
+        - 2.0
+        translation:
+        - 0.0
+        - 0.0
+        - 0.0
+        init_std: 0.1
+        init_mean: 0.0
+        hold_voxel_grid_as_parameters: true
+        param_groups: {}
+        voxel_grid_CPFactorizedVoxelGrid_args:
+          align_corners: true
+          padding: zeros
+          mode: bilinear
+          n_features: 1
+          resolution_changes:
+            0:
+            - 128
+            - 128
+            - 128
+          n_components: 24
+          basis_matrix: true
+        voxel_grid_FullResolutionVoxelGrid_args:
+          align_corners: true
+          padding: zeros
+          mode: bilinear
+          n_features: 1
+          resolution_changes:
+            0:
+            - 128
+            - 128
+            - 128
+        voxel_grid_VMFactorizedVoxelGrid_args:
+          align_corners: true
+          padding: zeros
+          mode: bilinear
+          n_features: 1
+          resolution_changes:
+            0:
+            - 128
+            - 128
+            - 128
+          n_components: null
+          distribution_of_components: null
+          basis_matrix: true
+      decoder_density_ElementwiseDecoder_args:
+        scale: 1.0
+        shift: 0.0
+        operation: IDENTITY
+      decoder_density_MLPDecoder_args:
+        param_groups: {}
+        network_args:
+          n_layers: 8
+          output_dim: 256
+          skip_dim: 39
+          hidden_dim: 256
+          input_skips:
+          - 5
+          skip_affine_trans: false
+          last_layer_bias_init: null
+          last_activation: RELU
+          use_xavier_init: true
+      decoder_color_ElementwiseDecoder_args:
+        scale: 1.0
+        shift: 0.0
+        operation: IDENTITY
+      decoder_color_MLPDecoder_args:
+        param_groups: {}
+        network_args:
+          n_layers: 8
+          output_dim: 256
+          skip_dim: 39
+          hidden_dim: 256
+          input_skips:
+          - 5
+          skip_affine_trans: false
+          last_layer_bias_init: null
+          last_activation: RELU
+          use_xavier_init: true
+    view_metrics_ViewMetrics_args: {}
+    regularization_metrics_RegularizationMetrics_args: {}
+optimizer_factory_ImplicitronOptimizerFactory_args:
+  betas:
+  - 0.9
+  - 0.999
+  breed: Adam
+  exponential_lr_step_size: 250
+  gamma: 0.1
+  lr: 0.0005
+  lr_policy: MultiStepLR
+  momentum: 0.9
+  multistep_lr_milestones: []
+  weight_decay: 0.0
+  linear_exponential_lr_milestone: 200
+  linear_exponential_start_gamma: 0.1
+  foreach: true
+  group_learning_rates: {}
+training_loop_ImplicitronTrainingLoop_args:
+  evaluator_class_type: ImplicitronEvaluator
+  evaluator_ImplicitronEvaluator_args:
+    is_multisequence: false
+    camera_difficulty_bin_breaks:
+    - 0.97
+    - 0.98
+  eval_only: false
+  max_epochs: 1000
+  store_checkpoints: true
+  store_checkpoints_purge: 1
+  test_interval: -1
+  test_when_finished: false
+  validation_interval: 1
+  clip_grad: 0.0
+  metric_print_interval: 5
+  visualize_interval: 1000
+  visdom_env: ''
+  visdom_port: 8097
+  visdom_server: http://127.0.0.1
diff --git a/pytorch3d/projects/implicitron_trainer/tests/test_experiment.py b/pytorch3d/projects/implicitron_trainer/tests/test_experiment.py
new file mode 100644
index 0000000000000000000000000000000000000000..486d2134aa3d8b664bf3ed00efa1df0a812aafa1
--- /dev/null
+++ b/pytorch3d/projects/implicitron_trainer/tests/test_experiment.py
@@ -0,0 +1,280 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import os
+import tempfile
+import unittest
+from pathlib import Path
+
+import torch
+
+from hydra import compose, initialize_config_dir
+from omegaconf import OmegaConf
+from projects.implicitron_trainer.impl.optimizer_factory import (
+    ImplicitronOptimizerFactory,
+)
+
+from .. import experiment
+from .utils import interactive_testing_requested, intercept_logs
+
+internal = os.environ.get("FB_TEST", False)
+
+
+DATA_DIR = Path(__file__).resolve().parent
+IMPLICITRON_CONFIGS_DIR = Path(__file__).resolve().parent.parent / "configs"
+DEBUG: bool = False
+
+# TODO:
+# - add enough files to skateboard_first_5 that this works on RE.
+# - share common code with PyTorch3D tests?
+
+
+def _parse_float_from_log(line):
+    return float(line.split()[-1])
+
+
+class TestExperiment(unittest.TestCase):
+    def setUp(self):
+        self.maxDiff = None
+
+    def test_from_defaults(self):
+        # Test making minimal changes to the dataclass defaults.
+        if not interactive_testing_requested() or not internal:
+            return
+
+        # Manually override config values. Note that this is not necessary out-
+        # side of the tests!
+        cfg = OmegaConf.structured(experiment.Experiment)
+        cfg.data_source_ImplicitronDataSource_args.dataset_map_provider_class_type = (
+            "JsonIndexDatasetMapProvider"
+        )
+        dataset_args = (
+            cfg.data_source_ImplicitronDataSource_args.dataset_map_provider_JsonIndexDatasetMapProvider_args
+        )
+        dataloader_args = (
+            cfg.data_source_ImplicitronDataSource_args.data_loader_map_provider_SequenceDataLoaderMapProvider_args
+        )
+        dataset_args.category = "skateboard"
+        dataset_args.test_restrict_sequence_id = 0
+        dataset_args.dataset_root = "manifold://co3d/tree/extracted"
+        dataset_args.dataset_JsonIndexDataset_args.limit_sequences_to = 5
+        dataset_args.dataset_JsonIndexDataset_args.image_height = 80
+        dataset_args.dataset_JsonIndexDataset_args.image_width = 80
+        dataloader_args.dataset_length_train = 1
+        dataloader_args.dataset_length_val = 1
+        cfg.training_loop_ImplicitronTrainingLoop_args.max_epochs = 2
+        cfg.training_loop_ImplicitronTrainingLoop_args.store_checkpoints = False
+        cfg.optimizer_factory_ImplicitronOptimizerFactory_args.multistep_lr_milestones = [
+            0,
+            1,
+        ]
+
+        if DEBUG:
+            experiment.dump_cfg(cfg)
+        with intercept_logs(
+            logger_name="projects.implicitron_trainer.impl.training_loop",
+            regexp="LR change!",
+        ) as intercepted_logs:
+            experiment_runner = experiment.Experiment(**cfg)
+            experiment_runner.run()
+
+            # Make sure LR decreased on 0th and 1st epoch 10fold.
+            self.assertEqual(intercepted_logs[0].split()[-1], "5e-06")
+
+    def test_exponential_lr(self):
+        # Test making minimal changes to the dataclass defaults.
+        if not interactive_testing_requested():
+            return
+        cfg = OmegaConf.structured(experiment.Experiment)
+        cfg.data_source_ImplicitronDataSource_args.dataset_map_provider_class_type = (
+            "JsonIndexDatasetMapProvider"
+        )
+        dataset_args = (
+            cfg.data_source_ImplicitronDataSource_args.dataset_map_provider_JsonIndexDatasetMapProvider_args
+        )
+        dataloader_args = (
+            cfg.data_source_ImplicitronDataSource_args.data_loader_map_provider_SequenceDataLoaderMapProvider_args
+        )
+        dataset_args.category = "skateboard"
+        dataset_args.test_restrict_sequence_id = 0
+        dataset_args.dataset_root = "manifold://co3d/tree/extracted"
+        dataset_args.dataset_JsonIndexDataset_args.limit_sequences_to = 5
+        dataset_args.dataset_JsonIndexDataset_args.image_height = 80
+        dataset_args.dataset_JsonIndexDataset_args.image_width = 80
+        dataloader_args.dataset_length_train = 1
+        dataloader_args.dataset_length_val = 1
+        cfg.training_loop_ImplicitronTrainingLoop_args.max_epochs = 2
+        cfg.training_loop_ImplicitronTrainingLoop_args.store_checkpoints = False
+        cfg.optimizer_factory_ImplicitronOptimizerFactory_args.lr_policy = "Exponential"
+        cfg.optimizer_factory_ImplicitronOptimizerFactory_args.exponential_lr_step_size = (
+            2
+        )
+
+        if DEBUG:
+            experiment.dump_cfg(cfg)
+        with intercept_logs(
+            logger_name="projects.implicitron_trainer.impl.training_loop",
+            regexp="LR change!",
+        ) as intercepted_logs:
+            experiment_runner = experiment.Experiment(**cfg)
+            experiment_runner.run()
+
+            # Make sure we followed the exponential lr schedule with gamma=0.1,
+            # exponential_lr_step_size=2 -- so after two epochs, should
+            # decrease lr 10x to 5e-5.
+            self.assertEqual(intercepted_logs[0].split()[-1], "0.00015811388300841897")
+            self.assertEqual(intercepted_logs[1].split()[-1], "5e-05")
+
+    def test_yaml_contents(self):
+        # Check that the default config values, defined by Experiment and its
+        # members, is what we expect it to be.
+        cfg = OmegaConf.structured(experiment.Experiment)
+        # the following removes the possible effect of env variables
+        ds_arg = cfg.data_source_ImplicitronDataSource_args
+        ds_arg.dataset_map_provider_JsonIndexDatasetMapProvider_args.dataset_root = ""
+        ds_arg.dataset_map_provider_JsonIndexDatasetMapProviderV2_args.dataset_root = ""
+        if "dataset_map_provider_SqlIndexDatasetMapProvider_args" in ds_arg:
+            del ds_arg.dataset_map_provider_SqlIndexDatasetMapProvider_args
+        cfg.training_loop_ImplicitronTrainingLoop_args.visdom_port = 8097
+        yaml = OmegaConf.to_yaml(cfg, sort_keys=False)
+        if DEBUG:
+            (DATA_DIR / "experiment.yaml").write_text(yaml)
+        self.assertEqual(yaml, (DATA_DIR / "experiment.yaml").read_text())
+
+    def test_load_configs(self):
+        # Check that all the pre-prepared configs are valid.
+        config_files = []
+
+        for pattern in (
+            "repro_singleseq*.yaml",
+            "repro_multiseq*.yaml",
+            "overfit_singleseq*.yaml",
+        ):
+            config_files.extend(
+                [
+                    f
+                    for f in IMPLICITRON_CONFIGS_DIR.glob(pattern)
+                    if not f.name.endswith("_base.yaml")
+                ]
+            )
+
+        for file in config_files:
+            with self.subTest(file.name):
+                with initialize_config_dir(config_dir=str(IMPLICITRON_CONFIGS_DIR)):
+                    compose(file.name)
+
+    def test_optimizer_factory(self):
+        model = torch.nn.Linear(2, 2)
+
+        adam, sched = ImplicitronOptimizerFactory(breed="Adam")(0, model)
+        self.assertIsInstance(adam, torch.optim.Adam)
+        sgd, sched = ImplicitronOptimizerFactory(breed="SGD")(0, model)
+        self.assertIsInstance(sgd, torch.optim.SGD)
+        adagrad, sched = ImplicitronOptimizerFactory(breed="Adagrad")(0, model)
+        self.assertIsInstance(adagrad, torch.optim.Adagrad)
+
+
+class TestNerfRepro(unittest.TestCase):
+    @unittest.skip("This test runs full blender training.")
+    def test_nerf_blender(self):
+        # Train vanilla NERF.
+        # Set env vars BLENDER_DATASET_ROOT and BLENDER_SINGLESEQ_CLASS first!
+        if not interactive_testing_requested():
+            return
+        with initialize_config_dir(config_dir=str(IMPLICITRON_CONFIGS_DIR)):
+            cfg = compose(config_name="repro_singleseq_nerf_blender", overrides=[])
+            experiment_runner = experiment.Experiment(**cfg)
+            experiment.dump_cfg(cfg)
+            experiment_runner.run()
+
+    @unittest.skip("This test runs full llff training.")
+    def test_nerf_llff(self):
+        # Train vanilla NERF.
+        # Set env vars LLFF_DATASET_ROOT and LLFF_SINGLESEQ_CLASS first!
+        LLFF_SINGLESEQ_CLASS = os.environ["LLFF_SINGLESEQ_CLASS"]
+        if not interactive_testing_requested():
+            return
+        with initialize_config_dir(config_dir=str(IMPLICITRON_CONFIGS_DIR)):
+            cfg = compose(
+                config_name=f"repro_singleseq_nerf_llff_{LLFF_SINGLESEQ_CLASS}",
+                overrides=[],
+            )
+            experiment_runner = experiment.Experiment(**cfg)
+            experiment.dump_cfg(cfg)
+            experiment_runner.run()
+
+    @unittest.skip("This test runs nerf training on co3d v2 - manyview.")
+    def test_nerf_co3dv2_manyview(self):
+        # Train NERF
+        if not interactive_testing_requested():
+            return
+        with initialize_config_dir(config_dir=str(IMPLICITRON_CONFIGS_DIR)):
+            cfg = compose(
+                config_name="repro_singleseq_v2_nerf",
+                overrides=[],
+            )
+            experiment_runner = experiment.Experiment(**cfg)
+            experiment.dump_cfg(cfg)
+            experiment_runner.run()
+
+    @unittest.skip("This test runs nerformer training on co3d v2 - fewview.")
+    def test_nerformer_co3dv2_fewview(self):
+        # Train NeRFormer
+        if not interactive_testing_requested():
+            return
+        with initialize_config_dir(config_dir=str(IMPLICITRON_CONFIGS_DIR)):
+            cfg = compose(
+                config_name="repro_multiseq_v2_nerformer",
+                overrides=[],
+            )
+            experiment_runner = experiment.Experiment(**cfg)
+            experiment.dump_cfg(cfg)
+            experiment_runner.run()
+
+    @unittest.skip("This test checks resuming of the NeRF training.")
+    def test_nerf_blender_resume(self):
+        # Train one train batch of NeRF, then resume for one more batch.
+        # Set env vars BLENDER_DATASET_ROOT and BLENDER_SINGLESEQ_CLASS first!
+        if not interactive_testing_requested():
+            return
+        with initialize_config_dir(config_dir=str(IMPLICITRON_CONFIGS_DIR)):
+            with tempfile.TemporaryDirectory() as exp_dir:
+                cfg = compose(config_name="repro_singleseq_nerf_blender", overrides=[])
+                cfg.exp_dir = exp_dir
+
+                # set dataset len to 1
+
+                # fmt: off
+                (
+                    cfg
+                    .data_source_ImplicitronDataSource_args
+                    .data_loader_map_provider_SequenceDataLoaderMapProvider_args
+                    .dataset_length_train
+                ) = 1
+                # fmt: on
+
+                # run for one epoch
+                cfg.training_loop_ImplicitronTrainingLoop_args.max_epochs = 1
+                experiment_runner = experiment.Experiment(**cfg)
+                experiment.dump_cfg(cfg)
+                experiment_runner.run()
+
+                # update num epochs + 2, let the optimizer resume
+                cfg.training_loop_ImplicitronTrainingLoop_args.max_epochs = 3
+                experiment_runner = experiment.Experiment(**cfg)
+                experiment_runner.run()
+
+                # start from scratch
+                cfg.model_factory_ImplicitronModelFactory_args.resume = False
+                experiment_runner = experiment.Experiment(**cfg)
+                experiment_runner.run()
+
+                # force resume from epoch 1
+                cfg.model_factory_ImplicitronModelFactory_args.resume = True
+                cfg.model_factory_ImplicitronModelFactory_args.force_resume = True
+                cfg.model_factory_ImplicitronModelFactory_args.resume_epoch = 1
+                experiment_runner = experiment.Experiment(**cfg)
+                experiment_runner.run()
diff --git a/pytorch3d/projects/implicitron_trainer/tests/test_optimizer_factory.py b/pytorch3d/projects/implicitron_trainer/tests/test_optimizer_factory.py
new file mode 100644
index 0000000000000000000000000000000000000000..ef7517fe7269cbbc31701b12dcf838114148366f
--- /dev/null
+++ b/pytorch3d/projects/implicitron_trainer/tests/test_optimizer_factory.py
@@ -0,0 +1,183 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+import os
+import unittest
+
+import torch
+from pytorch3d.implicitron.tools.config import expand_args_fields, get_default_args
+
+from ..impl.optimizer_factory import (
+    ImplicitronOptimizerFactory,
+    logger as factory_logger,
+)
+
+internal = os.environ.get("FB_TEST", False)
+
+
+class TestOptimizerFactory(unittest.TestCase):
+    def setUp(self) -> None:
+        torch.manual_seed(42)
+        expand_args_fields(ImplicitronOptimizerFactory)
+
+    def _get_param_groups(self, model):
+        default_cfg = get_default_args(ImplicitronOptimizerFactory)
+        factory = ImplicitronOptimizerFactory(default_cfg)
+        oldlevel = factory_logger.level
+        factory_logger.setLevel(logging.ERROR)
+        out = factory._get_param_groups(model)
+        factory_logger.setLevel(oldlevel)
+        return out
+
+    def _assert_allin(self, a, param_groups, key):
+        """
+        Asserts that all the parameters in a are in the group
+        named by key.
+        """
+        with self.subTest(f"Testing key {key}"):
+            b = param_groups[key]
+            for el in a:
+                if el not in b:
+                    raise ValueError(
+                        f"Element {el}\n\n from:\n\n {a}\n\n not in:\n\n {b}\n\n."
+                        + f" Full param groups = \n\n{param_groups}"
+                    )
+            for el in b:
+                if el not in a:
+                    raise ValueError(
+                        f"Element {el}\n\n from:\n\n {b}\n\n not in:\n\n {a}\n\n."
+                        + f" Full param groups = \n\n{param_groups}"
+                    )
+
+    def test_default_param_group_assignment(self):
+        pa, pb, pc = [torch.nn.Parameter(data=torch.tensor(i * 1.0)) for i in range(3)]
+        na, nb = Node(params=[pa]), Node(params=[pb])
+        root = Node(children=[na, nb], params=[pc])
+        param_groups = self._get_param_groups(root)
+        self._assert_allin([pa, pb, pc], param_groups, "default")
+
+    def test_member_overrides_default_param_group_assignment(self):
+        pa, pb, pc = [torch.nn.Parameter(data=torch.tensor(i * 1.0)) for i in range(3)]
+        na, nb = Node(params=[pa]), Node(params=[pb])
+        root = Node(children=[na, nb], params=[pc], param_groups={"m1": "pb"})
+        param_groups = self._get_param_groups(root)
+        self._assert_allin([pa, pc], param_groups, "default")
+        self._assert_allin([pb], param_groups, "pb")
+
+    def test_self_overrides_member_param_group_assignment(self):
+        pa, pb, pc = [torch.nn.Parameter(data=torch.tensor(i * 1.0)) for i in range(3)]
+        na, nb = Node(params=[pa]), Node(params=[pb], param_groups={"self": "pb_self"})
+        root = Node(children=[na, nb], params=[pc], param_groups={"m1": "pb_member"})
+        param_groups = self._get_param_groups(root)
+        self._assert_allin([pa, pc], param_groups, "default")
+        self._assert_allin([pb], param_groups, "pb_self")
+        assert len(param_groups["pb_member"]) == 0, param_groups
+
+    def test_param_overrides_self_param_group_assignment(self):
+        pa, pb, pc = [torch.nn.Parameter(data=torch.tensor(i * 1.0)) for i in range(3)]
+        na, nb = Node(params=[pa]), Node(
+            params=[pb], param_groups={"self": "pb_self", "p1": "pb_param"}
+        )
+        root = Node(children=[na, nb], params=[pc], param_groups={"m1": "pb_member"})
+        param_groups = self._get_param_groups(root)
+        self._assert_allin([pa, pc], param_groups, "default")
+        self._assert_allin([pb], param_groups, "pb_self")
+        assert len(param_groups["pb_member"]) == 0, param_groups
+
+    def test_no_param_groups_defined(self):
+        pa, pb, pc = [torch.nn.Parameter(data=torch.tensor(i * 1.0)) for i in range(3)]
+        na, nb = Node(params=[pa]), Node(params=[pb])
+        root = Node(children=[na, nb], params=[pc])
+        param_groups = self._get_param_groups(root)
+        self._assert_allin([pa, pb, pc], param_groups, "default")
+
+    def test_double_dotted(self):
+        pa, pb = [torch.nn.Parameter(data=torch.tensor(i * 1.0)) for i in range(2)]
+        na = Node(params=[pa, pb])
+        nb = Node(children=[na])
+        root = Node(children=[nb], param_groups={"m0.m0.p0": "X", "m0.m0": "Y"})
+        param_groups = self._get_param_groups(root)
+        self._assert_allin([pa], param_groups, "X")
+        self._assert_allin([pb], param_groups, "Y")
+
+    def test_tree_param_groups_defined(self):
+        """
+        Test generic tree assignment.
+
+        A0
+        |---------------------------
+        |              |           |
+        Bb             M           J-
+        |-----                     |-------
+        |     |                    |      |
+        C     Ddg                  K      Ll
+              |--------------
+              |    |    |    |
+              E4   Ff   G    H-
+
+        All nodes have one parameter. Character next to the capital
+        letter means they have added something to their `parameter_groups`:
+            - small letter same as capital means self is set to that letter
+            - small letter different then capital means that member is set
+                (the one that is named like that)
+            - number means parameter's parameter_group is set like that
+            - "-" means it does not have `parameter_groups` member
+        """
+        p = [torch.nn.Parameter(data=torch.tensor(i * 1.0)) for i in range(12)]
+        L = Node(params=[p[11]], param_groups={"self": "l"})
+        K = Node(params=[p[10]], param_groups={})
+        J = Node(params=[p[9]], param_groups=None, children=[K, L])
+        M = Node(params=[p[8]], param_groups={})
+
+        E = Node(params=[p[4]], param_groups={"p0": "4"})
+        F = Node(params=[p[5]], param_groups={"self": "f"})
+        G = Node(params=[p[6]], param_groups={})
+        H = Node(params=[p[7]], param_groups=None)
+
+        D = Node(
+            params=[p[3]], param_groups={"self": "d", "m2": "g"}, children=[E, F, G, H]
+        )
+        C = Node(params=[p[2]], param_groups={})
+
+        B = Node(params=[p[1]], param_groups={"self": "b"}, children=[C, D])
+
+        A = Node(params=[p[0]], param_groups={"p0": "0"}, children=[B, M, J])
+
+        param_groups = self._get_param_groups(A)
+
+        # if parts of the group belong to two different categories assert is repeated
+        # parameter level
+        self._assert_allin([p[0]], param_groups, "0")
+        self._assert_allin([p[4]], param_groups, "4")
+        # self level
+        self._assert_allin([p[5]], param_groups, "f")
+        self._assert_allin([p[11]], param_groups, "l")
+        self._assert_allin([p[2], p[1]], param_groups, "b")
+        self._assert_allin([p[7], p[3]], param_groups, "d")
+        # member level
+        self._assert_allin([p[6]], param_groups, "g")
+        # inherit level
+        self._assert_allin([p[7], p[3]], param_groups, "d")
+        self._assert_allin([p[2], p[1]], param_groups, "b")
+        # default level
+        self._assert_allin([p[8], p[9], p[10]], param_groups, "default")
+
+
+class Node(torch.nn.Module):
+    def __init__(self, children=(), params=(), param_groups=None):
+        super().__init__()
+        for i, child in enumerate(children):
+            self.add_module("m" + str(i), child)
+        for i, param in enumerate(params):
+            setattr(self, "p" + str(i), param)
+        if param_groups is not None:
+            self.param_groups = param_groups
+
+    def __str__(self):
+        return (
+            "modules:\n" + str(self._modules) + "\nparameters\n" + str(self._parameters)
+        )
diff --git a/pytorch3d/projects/implicitron_trainer/tests/test_visualize.py b/pytorch3d/projects/implicitron_trainer/tests/test_visualize.py
new file mode 100644
index 0000000000000000000000000000000000000000..d414a05d95af2870de7de103969ebbd266939fa1
--- /dev/null
+++ b/pytorch3d/projects/implicitron_trainer/tests/test_visualize.py
@@ -0,0 +1,27 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import os
+import unittest
+
+from .. import visualize_reconstruction
+from .utils import interactive_testing_requested
+
+internal = os.environ.get("FB_TEST", False)
+
+
+class TestVisualize(unittest.TestCase):
+    def test_from_defaults(self):
+        if not interactive_testing_requested():
+            return
+        checkpoint_dir = os.environ["exp_dir"]
+        argv = [
+            f"exp_dir={checkpoint_dir}",
+            "n_eval_cameras=40",
+            "render_size=[64,64]",
+            "video_size=[256,256]",
+        ]
+        visualize_reconstruction.main(argv)
diff --git a/pytorch3d/projects/implicitron_trainer/tests/utils.py b/pytorch3d/projects/implicitron_trainer/tests/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..4b6d84ae575aeb6cf8b0411600b7e55e51fce3a9
--- /dev/null
+++ b/pytorch3d/projects/implicitron_trainer/tests/utils.py
@@ -0,0 +1,40 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import contextlib
+import logging
+import os
+import re
+
+
+@contextlib.contextmanager
+def intercept_logs(logger_name: str, regexp: str):
+    # Intercept logs that match a regexp, from a given logger.
+    intercepted_messages = []
+    logger = logging.getLogger(logger_name)
+
+    class LoggerInterceptor(logging.Filter):
+        def filter(self, record):
+            message = record.getMessage()
+            if re.search(regexp, message):
+                intercepted_messages.append(message)
+            return True
+
+    interceptor = LoggerInterceptor()
+    logger.addFilter(interceptor)
+    try:
+        yield intercepted_messages
+    finally:
+        logger.removeFilter(interceptor)
+
+
+def interactive_testing_requested() -> bool:
+    """
+    Certain tests are only useful when run interactively, and so are not regularly run.
+    These are activated by this funciton returning True, which the user requests by
+    setting the environment variable `PYTORCH3D_INTERACTIVE_TESTING` to 1.
+    """
+    return os.environ.get("PYTORCH3D_INTERACTIVE_TESTING", "") == "1"
diff --git a/pytorch3d/projects/implicitron_trainer/visualize_reconstruction.py b/pytorch3d/projects/implicitron_trainer/visualize_reconstruction.py
new file mode 100644
index 0000000000000000000000000000000000000000..618d1aa61e312594f7ebf89a4bb4a82e8a8e21df
--- /dev/null
+++ b/pytorch3d/projects/implicitron_trainer/visualize_reconstruction.py
@@ -0,0 +1,158 @@
+#!/usr/bin/env python3
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""
+Script to visualize a previously trained model. Example call:
+
+    pytorch3d_implicitron_visualizer \
+    exp_dir='./exps/checkpoint_dir' visdom_show_preds=True visdom_port=8097 \
+    n_eval_cameras=40 render_size="[64,64]" video_size="[256,256]"
+"""
+
+import os
+import sys
+from typing import Optional, Tuple
+
+import numpy as np
+import torch
+from omegaconf import DictConfig, OmegaConf
+from pytorch3d.implicitron.models.visualization.render_flyaround import render_flyaround
+from pytorch3d.implicitron.tools.config import enable_get_default_args, get_default_args
+
+from .experiment import Experiment
+
+
+def visualize_reconstruction(
+    exp_dir: str = "",
+    restrict_sequence_name: Optional[str] = None,
+    output_directory: Optional[str] = None,
+    render_size: Tuple[int, int] = (512, 512),
+    video_size: Optional[Tuple[int, int]] = None,
+    split: str = "train",
+    n_source_views: int = 9,
+    n_eval_cameras: int = 40,
+    visdom_show_preds: bool = False,
+    visdom_server: str = "http://127.0.0.1",
+    visdom_port: int = 8097,
+    visdom_env: Optional[str] = None,
+    **render_flyaround_kwargs,
+) -> None:
+    """
+    Given an `exp_dir` containing a trained Implicitron model, generates videos consisting
+    of renderes of sequences from the dataset used to train and evaluate the trained
+    Implicitron model.
+
+    Args:
+        exp_dir: Implicitron experiment directory.
+        restrict_sequence_name: If set, defines the list of sequences to visualize.
+        output_directory: If set, defines a custom directory to output visualizations to.
+        render_size: The size (HxW) of the generated renders.
+        video_size: The size (HxW) of the output video.
+        split: The dataset split to use for visualization.
+            Can be "train" / "val" / "test".
+        n_source_views: The number of source views added to each rendered batch. These
+            views are required inputs for models such as NeRFormer / NeRF-WCE.
+        n_eval_cameras: The number of cameras each fly-around trajectory.
+        visdom_show_preds: If `True`, outputs visualizations to visdom.
+        visdom_server: The address of the visdom server.
+        visdom_port: The port of the visdom server.
+        visdom_env: If set, defines a custom name for the visdom environment.
+        render_flyaround_kwargs: Keyword arguments passed to the invoked `render_flyaround`
+            function (see `pytorch3d.implicitron.models.visualization.render_flyaround`).
+    """
+
+    # In case an output directory is specified use it. If no output_directory
+    # is specified create a vis folder inside the experiment directory
+    if output_directory is None:
+        output_directory = os.path.join(exp_dir, "vis")
+    os.makedirs(output_directory, exist_ok=True)
+
+    # Set the random seeds
+    torch.manual_seed(0)
+    np.random.seed(0)
+
+    # Get the config from the experiment_directory,
+    # and overwrite relevant fields
+    config = _get_config_from_experiment_directory(exp_dir)
+    config.exp_dir = exp_dir
+    # important so that the CO3D dataset gets loaded in full
+    data_source_args = config.data_source_ImplicitronDataSource_args
+    if "dataset_map_provider_JsonIndexDatasetMapProvider_args" in data_source_args:
+        dataset_args = (
+            data_source_args.dataset_map_provider_JsonIndexDatasetMapProvider_args
+        )
+        dataset_args.test_on_train = False
+        if restrict_sequence_name is not None:
+            dataset_args.restrict_sequence_name = restrict_sequence_name
+
+    # Set the rendering image size
+    model_factory_args = config.model_factory_ImplicitronModelFactory_args
+    model_factory_args.force_resume = True
+    model_args = model_factory_args.model_GenericModel_args
+    model_args.render_image_width = render_size[0]
+    model_args.render_image_height = render_size[1]
+
+    # Load the previously trained model
+    experiment = Experiment(**config)
+    model = experiment.model_factory(exp_dir=exp_dir)
+    device = torch.device("cuda")
+    model.to(device)
+    model.eval()
+
+    # Setup the dataset
+    data_source = experiment.data_source
+    dataset_map, _ = data_source.get_datasets_and_dataloaders()
+    dataset = dataset_map[split]
+    if dataset is None:
+        raise ValueError(f"{split} dataset not provided")
+
+    if visdom_env is None:
+        visdom_env = (
+            "visualizer_" + config.training_loop_ImplicitronTrainingLoop_args.visdom_env
+        )
+
+    # iterate over the sequences in the dataset
+    for sequence_name in dataset.sequence_names():
+        with torch.no_grad():
+            render_kwargs = {
+                "dataset": dataset,
+                "sequence_name": sequence_name,
+                "model": model,
+                "output_video_path": os.path.join(output_directory, "video"),
+                "n_source_views": n_source_views,
+                "visdom_show_preds": visdom_show_preds,
+                "n_flyaround_poses": n_eval_cameras,
+                "visdom_server": visdom_server,
+                "visdom_port": visdom_port,
+                "visdom_environment": visdom_env,
+                "video_resize": video_size,
+                "device": device,
+                **render_flyaround_kwargs,
+            }
+            render_flyaround(**render_kwargs)
+
+
+enable_get_default_args(visualize_reconstruction)
+
+
+def _get_config_from_experiment_directory(experiment_directory) -> DictConfig:
+    cfg_file = os.path.join(experiment_directory, "expconfig.yaml")
+    config = OmegaConf.load(cfg_file)
+    # pyre-ignore[7]
+    return OmegaConf.merge(get_default_args(Experiment), config)
+
+
+def main(argv=sys.argv) -> None:
+    # automatically parses arguments of visualize_reconstruction
+    cfg = OmegaConf.create(get_default_args(visualize_reconstruction))
+    cfg.update(OmegaConf.from_cli(argv))
+    with torch.no_grad():
+        visualize_reconstruction(**cfg)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/pytorch3d/projects/nerf/.gitignore b/pytorch3d/projects/nerf/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..07f207f0f7bb99be4841fd6b3a7818d251ccc6d3
--- /dev/null
+++ b/pytorch3d/projects/nerf/.gitignore
@@ -0,0 +1,5 @@
+checkpoints
+outputs
+data/*.png
+data/*.pth
+data/*_license.txt
diff --git a/pytorch3d/projects/nerf/README.md b/pytorch3d/projects/nerf/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..a103aab47d21ebfa7bfd65387a68f83f0a1c8fbc
--- /dev/null
+++ b/pytorch3d/projects/nerf/README.md
@@ -0,0 +1,91 @@
+Neural Radiance Fields in PyTorch3D
+===================================
+
+This project implements the Neural Radiance Fields (NeRF) from [1].
+
+<img src="https://raw.githubusercontent.com/facebookresearch/pytorch3d/main/.github/nerf_project_logo.gif" width="600" height="338"/>
+
+
+Installation
+------------
+1) [Install PyTorch3D](https://github.com/facebookresearch/pytorch3d/blob/main/INSTALL.md)
+
+2) Install other dependencies:
+    - [`visdom`](https://github.com/facebookresearch/visdom)
+    - [`hydra`](https://github.com/facebookresearch/hydra)
+    - [`Pillow`](https://python-pillow.org/)
+    - [`requests`](https://pypi.org/project/requests/)
+
+    E.g. using `pip`:
+    ```
+    pip install visdom
+    pip install hydra-core --upgrade
+    pip install Pillow
+    pip install requests
+    ```
+
+    Exporting videos further requires a working `ffmpeg`.
+
+Training NeRF
+-------------
+```
+python ./train_nerf.py --config-name lego
+```
+will train the model from [1] on the Lego dataset.
+
+Note that the script outputs visualizations to `Visdom`. In order to enable this, make sure to start the visdom server (before launching the training) with the following command:
+```
+python -m visdom.server
+```
+Note that training on the "lego" scene takes roughly 24 hours on a single Tesla V100.
+
+#### Training data
+Note that the `train_nerf.py` script will automatically download the relevant dataset in case it is missing.
+
+Testing NeRF
+------------
+```
+python ./test_nerf.py --config-name lego
+```
+Will load a trained model from the `./checkpoints` directory and evaluate it on the test split of the corresponding dataset (Lego in the case above).
+
+### Exporting multi-view video of the radiance field
+Furthermore, the codebase supports generating videos of the neural radiance field.
+The following generates a turntable video of the Lego scene:
+```
+python ./test_nerf.py --config-name=lego test.mode='export_video'
+```
+Note that this requires a working `ffmpeg` for generating the video from exported frames.
+
+Additionally, note that generation of the video in the original resolution is quite slow. In order to speed up the process, one can decrease the resolution of the output video by setting the `data.image_size` flag:
+```
+python ./test_nerf.py --config-name=lego test.mode='export_video' data.image_size="[128,128]"
+```
+This will generate the video in a lower `128 x 128` resolution.
+
+
+Training & testing on other datasets
+------------------------------------
+Currently we support the following datasets:
+- lego `python ./train_nerf.py --config-name lego`
+- fern `python ./train_nerf.py --config-name fern`
+- pt3logo `python ./train_nerf.py --config-name pt3logo`
+
+The dataset files are located in the following public S3 bucket:
+https://dl.fbaipublicfiles.com/pytorch3d_nerf_data
+
+Attribution: `lego` and `fern` are data from the original code release of [1] in https://drive.google.com/drive/folders/128yBriW1IG_3NJ5Rp7APSTZsJqdJdfc1, which are hosted under the CC-BY license (https://creativecommons.org/licenses/by/4.0/) The S3 bucket files contains the same images while the camera matrices have been adjusted to follow the PyTorch3D convention.
+
+#### Quantitative results
+Below are the comparisons between our implementation and the official [`TensorFlow code`](https://github.com/bmild/nerf). The speed is measured on NVidia Quadro GP100.
+```
++----------------+------------------+------------------+-----------------+
+| Implementation |  Lego: test PSNR |  Fern: test PSNR |  training speed |
++----------------+------------------+------------------+-----------------+
+| TF (official)  |             31.0 |             27.5 |  0.24 sec/it    |
+| PyTorch3D      |             32.7 |             27.9 |  0.18 sec/it    |
++----------------+------------------+------------------+-----------------+
+```
+
+#### References
+[1] Ben Mildenhall and Pratul P. Srinivasan and Matthew Tancik and Jonathan T. Barron and Ravi Ramamoorthi and Ren Ng, NeRF: Representing Scenes as Neural Radiance Fields for View Synthesis, ECCV2020
diff --git a/pytorch3d/projects/nerf/__init__.py b/pytorch3d/projects/nerf/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..2e41cd717f6a439a9c08d76a9d0e4a54e190fc5a
--- /dev/null
+++ b/pytorch3d/projects/nerf/__init__.py
@@ -0,0 +1,5 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
diff --git a/pytorch3d/projects/nerf/configs/fern.yaml b/pytorch3d/projects/nerf/configs/fern.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1beb67211ca43d44f3b92f2ed0c0698205456136
--- /dev/null
+++ b/pytorch3d/projects/nerf/configs/fern.yaml
@@ -0,0 +1,45 @@
+seed: 3
+resume: True
+stats_print_interval: 10
+validation_epoch_interval: 150
+checkpoint_epoch_interval: 150
+checkpoint_path: 'checkpoints/fern_pt3d.pth'
+data:
+  dataset_name: 'fern'
+  image_size: [378, 504] # [height, width]
+  precache_rays: True
+test:
+  mode: 'evaluation'
+  trajectory_type: 'figure_eight'
+  up: [0.0, 1.0, 0.0]
+  scene_center: [0.0, 0.0, -2.0]
+  n_frames: 100
+  fps: 20
+  trajectory_scale: 1.0
+optimizer:
+  max_epochs: 37500
+  lr: 0.0005
+  lr_scheduler_step_size: 12500
+  lr_scheduler_gamma: 0.1
+visualization:
+  history_size: 10
+  visdom: True
+  visdom_server: 'localhost'
+  visdom_port: 8097
+  visdom_env: 'nerf_pytorch3d'
+raysampler:
+  n_pts_per_ray: 64
+  n_pts_per_ray_fine: 64
+  n_rays_per_image: 1024
+  min_depth: 1.2
+  max_depth: 6.28
+  stratified: True
+  stratified_test: False
+  chunk_size_test: 6000
+implicit_function:
+  n_harmonic_functions_xyz: 10
+  n_harmonic_functions_dir: 4
+  n_hidden_neurons_xyz: 256
+  n_hidden_neurons_dir: 128
+  density_noise_std: 0.0
+  n_layers_xyz: 8
diff --git a/pytorch3d/projects/nerf/configs/lego.yaml b/pytorch3d/projects/nerf/configs/lego.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5a8870a7f6e7754843706f79663c8187f227b7f4
--- /dev/null
+++ b/pytorch3d/projects/nerf/configs/lego.yaml
@@ -0,0 +1,45 @@
+seed: 3
+resume: True
+stats_print_interval: 10
+validation_epoch_interval: 30
+checkpoint_epoch_interval: 30
+checkpoint_path: 'checkpoints/lego_pt3d.pth'
+data:
+  dataset_name: 'lego'
+  image_size: [800, 800] # [height, width]
+  precache_rays: True
+test:
+  mode: 'evaluation'
+  trajectory_type: 'circular'
+  up: [0.0, 0.0, 1.0]
+  scene_center: [0.0, 0.0, 0.0]
+  n_frames: 100
+  fps: 20
+  trajectory_scale: 0.2
+optimizer:
+  max_epochs: 20000
+  lr: 0.0005
+  lr_scheduler_step_size: 5000
+  lr_scheduler_gamma: 0.1
+visualization:
+  history_size: 10
+  visdom: True
+  visdom_server: 'localhost'
+  visdom_port: 8097
+  visdom_env: 'nerf_pytorch3d'
+raysampler:
+  n_pts_per_ray: 64
+  n_pts_per_ray_fine: 64
+  n_rays_per_image: 1024
+  min_depth: 2.0
+  max_depth: 6.0
+  stratified: True
+  stratified_test: False
+  chunk_size_test: 6000
+implicit_function:
+  n_harmonic_functions_xyz: 10
+  n_harmonic_functions_dir: 4
+  n_hidden_neurons_xyz: 256
+  n_hidden_neurons_dir: 128
+  density_noise_std: 0.0
+  n_layers_xyz: 8
diff --git a/pytorch3d/projects/nerf/configs/pt3logo.yaml b/pytorch3d/projects/nerf/configs/pt3logo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..db4a640ab2c10329a29acf7bd33f34bebb46ede7
--- /dev/null
+++ b/pytorch3d/projects/nerf/configs/pt3logo.yaml
@@ -0,0 +1,45 @@
+seed: 3
+resume: True
+stats_print_interval: 10
+validation_epoch_interval: 30
+checkpoint_epoch_interval: 30
+checkpoint_path: 'checkpoints/pt3logo_pt3d.pth'
+data:
+  dataset_name: 'pt3logo'
+  image_size: [512, 1024] # [height, width]
+  precache_rays: True
+test:
+  mode: 'export_video'
+  trajectory_type: 'figure_eight'
+  up: [0.0, -1.0, 0.0]
+  scene_center: [0.0, 0.0, 0.0]
+  n_frames: 100
+  fps: 20
+  trajectory_scale: 0.2
+optimizer:
+  max_epochs: 100000
+  lr: 0.0005
+  lr_scheduler_step_size: 10000
+  lr_scheduler_gamma: 0.1
+visualization:
+  history_size: 20
+  visdom: True
+  visdom_server: 'localhost'
+  visdom_port: 8097
+  visdom_env: 'nerf_pytorch3d'
+raysampler:
+  n_pts_per_ray: 64
+  n_pts_per_ray_fine: 64
+  n_rays_per_image: 1024
+  min_depth: 8.0
+  max_depth: 23.0
+  stratified: True
+  stratified_test: False
+  chunk_size_test: 6000
+implicit_function:
+  n_harmonic_functions_xyz: 10
+  n_harmonic_functions_dir: 4
+  n_hidden_neurons_xyz: 256
+  n_hidden_neurons_dir: 128
+  density_noise_std: 0.0
+  n_layers_xyz: 8
diff --git a/pytorch3d/projects/nerf/nerf/__init__.py b/pytorch3d/projects/nerf/nerf/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..2e41cd717f6a439a9c08d76a9d0e4a54e190fc5a
--- /dev/null
+++ b/pytorch3d/projects/nerf/nerf/__init__.py
@@ -0,0 +1,5 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
diff --git a/pytorch3d/projects/nerf/nerf/dataset.py b/pytorch3d/projects/nerf/nerf/dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..d13a896a23aa8d0672616f8898f071262a69bdf7
--- /dev/null
+++ b/pytorch3d/projects/nerf/nerf/dataset.py
@@ -0,0 +1,166 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import os
+from typing import List, Optional, Tuple
+
+import numpy as np
+import requests
+import torch
+from PIL import Image
+from pytorch3d.renderer import PerspectiveCameras
+from torch.utils.data import Dataset
+
+
+DEFAULT_DATA_ROOT = os.path.join(
+    os.path.dirname(os.path.realpath(__file__)), "..", "data"
+)
+
+DEFAULT_URL_ROOT = "https://dl.fbaipublicfiles.com/pytorch3d_nerf_data"
+
+ALL_DATASETS = ("lego", "fern", "pt3logo")
+
+
+def trivial_collate(batch):
+    """
+    A trivial collate function that merely returns the uncollated batch.
+    """
+    return batch
+
+
+class ListDataset(Dataset):
+    """
+    A simple dataset made of a list of entries.
+    """
+
+    def __init__(self, entries: List) -> None:
+        """
+        Args:
+            entries: The list of dataset entries.
+        """
+        self._entries = entries
+
+    def __len__(
+        self,
+    ) -> int:
+        return len(self._entries)
+
+    def __getitem__(self, index):
+        return self._entries[index]
+
+
+def get_nerf_datasets(
+    dataset_name: str,  # 'lego | fern'
+    image_size: Tuple[int, int],
+    data_root: str = DEFAULT_DATA_ROOT,
+    autodownload: bool = True,
+) -> Tuple[Dataset, Dataset, Dataset]:
+    """
+    Obtains the training and validation dataset object for a dataset specified
+    with the `dataset_name` argument.
+
+    Args:
+        dataset_name: The name of the dataset to load.
+        image_size: A tuple (height, width) denoting the sizes of the loaded dataset images.
+        data_root: The root folder at which the data is stored.
+        autodownload: Auto-download the dataset files in case they are missing.
+
+    Returns:
+        train_dataset: The training dataset object.
+        val_dataset: The validation dataset object.
+        test_dataset: The testing dataset object.
+    """
+
+    if dataset_name not in ALL_DATASETS:
+        raise ValueError(f"'{dataset_name}'' does not refer to a known dataset.")
+
+    print(f"Loading dataset {dataset_name}, image size={str(image_size)} ...")
+
+    cameras_path = os.path.join(data_root, dataset_name + ".pth")
+    image_path = cameras_path.replace(".pth", ".png")
+
+    if autodownload and any(not os.path.isfile(p) for p in (cameras_path, image_path)):
+        # Automatically download the data files if missing.
+        download_data((dataset_name,), data_root=data_root)
+
+    train_data = torch.load(cameras_path)
+    n_cameras = train_data["cameras"]["R"].shape[0]
+
+    _image_max_image_pixels = Image.MAX_IMAGE_PIXELS
+    Image.MAX_IMAGE_PIXELS = None  # The dataset image is very large ...
+    images = torch.FloatTensor(np.array(Image.open(image_path))) / 255.0
+    images = torch.stack(torch.chunk(images, n_cameras, dim=0))[..., :3]
+    Image.MAX_IMAGE_PIXELS = _image_max_image_pixels
+
+    scale_factors = [s_new / s for s, s_new in zip(images.shape[1:3], image_size)]
+    if abs(scale_factors[0] - scale_factors[1]) > 1e-3:
+        raise ValueError(
+            "Non-isotropic scaling is not allowed. Consider changing the 'image_size' argument."
+        )
+    scale_factor = sum(scale_factors) * 0.5
+
+    if scale_factor != 1.0:
+        print(f"Rescaling dataset (factor={scale_factor})")
+        images = torch.nn.functional.interpolate(
+            images.permute(0, 3, 1, 2),
+            size=tuple(image_size),
+            mode="bilinear",
+        ).permute(0, 2, 3, 1)
+
+    cameras = [
+        PerspectiveCameras(
+            **{k: v[cami][None] for k, v in train_data["cameras"].items()}
+        ).to("cpu")
+        for cami in range(n_cameras)
+    ]
+
+    train_idx, val_idx, test_idx = train_data["split"]
+
+    train_dataset, val_dataset, test_dataset = [
+        ListDataset(
+            [
+                {"image": images[i], "camera": cameras[i], "camera_idx": int(i)}
+                for i in idx
+            ]
+        )
+        for idx in [train_idx, val_idx, test_idx]
+    ]
+
+    return train_dataset, val_dataset, test_dataset
+
+
+def download_data(
+    dataset_names: Optional[List[str]] = None,
+    data_root: str = DEFAULT_DATA_ROOT,
+    url_root: str = DEFAULT_URL_ROOT,
+) -> None:
+    """
+    Downloads the relevant dataset files.
+
+    Args:
+        dataset_names: A list of the names of datasets to download. If `None`,
+            downloads all available datasets.
+    """
+
+    if dataset_names is None:
+        dataset_names = ALL_DATASETS
+
+    os.makedirs(data_root, exist_ok=True)
+
+    for dataset_name in dataset_names:
+        cameras_file = dataset_name + ".pth"
+        images_file = cameras_file.replace(".pth", ".png")
+        license_file = cameras_file.replace(".pth", "_license.txt")
+
+        for fl in (cameras_file, images_file, license_file):
+            local_fl = os.path.join(data_root, fl)
+            remote_fl = os.path.join(url_root, fl)
+
+            print(f"Downloading dataset {dataset_name} from {remote_fl} to {local_fl}.")
+
+            r = requests.get(remote_fl)
+            with open(local_fl, "wb") as f:
+                f.write(r.content)
diff --git a/pytorch3d/projects/nerf/nerf/eval_video_utils.py b/pytorch3d/projects/nerf/nerf/eval_video_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..0dca5f848227cf3c6f1499edd5209af513e34c1f
--- /dev/null
+++ b/pytorch3d/projects/nerf/nerf/eval_video_utils.py
@@ -0,0 +1,158 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import math
+from typing import Tuple
+
+import torch
+from pytorch3d.renderer import look_at_view_transform, PerspectiveCameras
+from torch.utils.data.dataset import Dataset
+
+
+def generate_eval_video_cameras(
+    train_dataset,
+    n_eval_cams: int = 100,
+    trajectory_type: str = "figure_eight",
+    trajectory_scale: float = 0.2,
+    scene_center: Tuple[float, float, float] = (0.0, 0.0, 0.0),
+    up: Tuple[float, float, float] = (0.0, 0.0, 1.0),
+) -> Dataset[torch.Tensor]:
+    """
+    Generate a camera trajectory for visualizing a NeRF model.
+
+    Args:
+        train_dataset: The training dataset object.
+        n_eval_cams: Number of cameras in the trajectory.
+        trajectory_type: The type of the camera trajectory. Can be one of:
+            circular: Rotating around the center of the scene at a fixed radius.
+            figure_eight: Figure-of-8 trajectory around the center of the
+                central camera of the training dataset.
+            trefoil_knot: Same as 'figure_eight', but the trajectory has a shape
+                of a trefoil knot (https://en.wikipedia.org/wiki/Trefoil_knot).
+            figure_eight_knot: Same as 'figure_eight', but the trajectory has a shape
+                of a figure-eight knot
+                (https://en.wikipedia.org/wiki/Figure-eight_knot_(mathematics)).
+        trajectory_scale: The extent of the trajectory.
+        up: The "up" vector of the scene (=the normal of the scene floor).
+            Active for the `trajectory_type="circular"`.
+        scene_center: The center of the scene in world coordinates which all
+            the cameras from the generated trajectory look at.
+    Returns:
+        Dictionary of camera instances which can be used as the test dataset
+    """
+    if trajectory_type in ("figure_eight", "trefoil_knot", "figure_eight_knot"):
+        cam_centers = torch.cat(
+            [e["camera"].get_camera_center() for e in train_dataset]
+        )
+        # get the nearest camera center to the mean of centers
+        mean_camera_idx = (
+            ((cam_centers - cam_centers.mean(dim=0)[None]) ** 2)
+            .sum(dim=1)
+            .min(dim=0)
+            .indices
+        )
+        # generate the knot trajectory in canonical coords
+        time = torch.linspace(0, 2 * math.pi, n_eval_cams + 1)[:n_eval_cams]
+        if trajectory_type == "trefoil_knot":
+            traj = _trefoil_knot(time)
+        elif trajectory_type == "figure_eight_knot":
+            traj = _figure_eight_knot(time)
+        elif trajectory_type == "figure_eight":
+            traj = _figure_eight(time)
+        traj[:, 2] -= traj[:, 2].max()
+
+        # transform the canonical knot to the coord frame of the mean camera
+        traj_trans = (
+            train_dataset[mean_camera_idx]["camera"]
+            .get_world_to_view_transform()
+            .inverse()
+        )
+        traj_trans = traj_trans.scale(cam_centers.std(dim=0).mean() * trajectory_scale)
+        traj = traj_trans.transform_points(traj)
+
+    elif trajectory_type == "circular":
+        cam_centers = torch.cat(
+            [e["camera"].get_camera_center() for e in train_dataset]
+        )
+
+        # fit plane to the camera centers
+        plane_mean = cam_centers.mean(dim=0)
+        cam_centers_c = cam_centers - plane_mean[None]
+
+        if up is not None:
+            # us the up vector instead of the plane through the camera centers
+            plane_normal = torch.FloatTensor(up)
+        else:
+            cov = (cam_centers_c.t() @ cam_centers_c) / cam_centers_c.shape[0]
+            _, e_vec = torch.linalg.eigh(cov, UPLO="U")
+            plane_normal = e_vec[:, 0]
+
+        plane_dist = (plane_normal[None] * cam_centers_c).sum(dim=-1)
+        cam_centers_on_plane = cam_centers_c - plane_dist[:, None] * plane_normal[None]
+
+        cov = (
+            cam_centers_on_plane.t() @ cam_centers_on_plane
+        ) / cam_centers_on_plane.shape[0]
+        _, e_vec = torch.linalg.eigh(cov, UPLO="U")
+        traj_radius = (cam_centers_on_plane**2).sum(dim=1).sqrt().mean()
+        angle = torch.linspace(0, 2.0 * math.pi, n_eval_cams)
+        traj = traj_radius * torch.stack(
+            (torch.zeros_like(angle), angle.cos(), angle.sin()), dim=-1
+        )
+        traj = traj @ e_vec.t() + plane_mean[None]
+
+    else:
+        raise ValueError(f"Unknown trajectory_type {trajectory_type}.")
+
+    # point all cameras towards the center of the scene
+    R, T = look_at_view_transform(
+        eye=traj,
+        at=(scene_center,),  # (1, 3)
+        up=(up,),  # (1, 3)
+        device=traj.device,
+    )
+
+    # get the average focal length and principal point
+    focal = torch.cat([e["camera"].focal_length for e in train_dataset]).mean(dim=0)
+    p0 = torch.cat([e["camera"].principal_point for e in train_dataset]).mean(dim=0)
+
+    # assemble the dataset
+    test_dataset = [
+        {
+            "image": None,
+            "camera": PerspectiveCameras(
+                focal_length=focal[None],
+                principal_point=p0[None],
+                R=R_[None],
+                T=T_[None],
+            ),
+            "camera_idx": i,
+        }
+        for i, (R_, T_) in enumerate(zip(R, T))
+    ]
+
+    return test_dataset
+
+
+def _figure_eight_knot(t: torch.Tensor, z_scale: float = 0.5):
+    x = (2 + (2 * t).cos()) * (3 * t).cos()
+    y = (2 + (2 * t).cos()) * (3 * t).sin()
+    z = (4 * t).sin() * z_scale
+    return torch.stack((x, y, z), dim=-1)
+
+
+def _trefoil_knot(t: torch.Tensor, z_scale: float = 0.5):
+    x = t.sin() + 2 * (2 * t).sin()
+    y = t.cos() - 2 * (2 * t).cos()
+    z = -(3 * t).sin() * z_scale
+    return torch.stack((x, y, z), dim=-1)
+
+
+def _figure_eight(t: torch.Tensor, z_scale: float = 0.5):
+    x = t.cos()
+    y = (2 * t).sin() / 2
+    z = t.sin() * z_scale
+    return torch.stack((x, y, z), dim=-1)
diff --git a/pytorch3d/projects/nerf/nerf/implicit_function.py b/pytorch3d/projects/nerf/nerf/implicit_function.py
new file mode 100644
index 0000000000000000000000000000000000000000..4209e53c91ed5a5a97f90bd7b2fa9b950f3ae502
--- /dev/null
+++ b/pytorch3d/projects/nerf/nerf/implicit_function.py
@@ -0,0 +1,301 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Tuple
+
+import torch
+from pytorch3d.common.linear_with_repeat import LinearWithRepeat
+from pytorch3d.renderer import HarmonicEmbedding, ray_bundle_to_ray_points, RayBundle
+
+
+def _xavier_init(linear):
+    """
+    Performs the Xavier weight initialization of the linear layer `linear`.
+    """
+    torch.nn.init.xavier_uniform_(linear.weight.data)
+
+
+class NeuralRadianceField(torch.nn.Module):
+    def __init__(
+        self,
+        n_harmonic_functions_xyz: int = 6,
+        n_harmonic_functions_dir: int = 4,
+        n_hidden_neurons_xyz: int = 256,
+        n_hidden_neurons_dir: int = 128,
+        n_layers_xyz: int = 8,
+        append_xyz: Tuple[int, ...] = (5,),
+        use_multiple_streams: bool = True,
+        **kwargs,
+    ):
+        """
+        Args:
+            n_harmonic_functions_xyz: The number of harmonic functions
+                used to form the harmonic embedding of 3D point locations.
+            n_harmonic_functions_dir: The number of harmonic functions
+                used to form the harmonic embedding of the ray directions.
+            n_hidden_neurons_xyz: The number of hidden units in the
+                fully connected layers of the MLP that accepts the 3D point
+                locations and outputs the occupancy field with the intermediate
+                features.
+            n_hidden_neurons_dir: The number of hidden units in the
+                fully connected layers of the MLP that accepts the intermediate
+                features and ray directions and outputs the radiance field
+                (per-point colors).
+            n_layers_xyz: The number of layers of the MLP that outputs the
+                occupancy field.
+            append_xyz: The list of indices of the skip layers of the occupancy MLP.
+            use_multiple_streams: Whether density and color should be calculated on
+                separate CUDA streams.
+        """
+        super().__init__()
+
+        # The harmonic embedding layer converts input 3D coordinates
+        # to a representation that is more suitable for
+        # processing with a deep neural network.
+        self.harmonic_embedding_xyz = HarmonicEmbedding(n_harmonic_functions_xyz)
+        self.harmonic_embedding_dir = HarmonicEmbedding(n_harmonic_functions_dir)
+        embedding_dim_xyz = n_harmonic_functions_xyz * 2 * 3 + 3
+        embedding_dim_dir = n_harmonic_functions_dir * 2 * 3 + 3
+
+        self.mlp_xyz = MLPWithInputSkips(
+            n_layers_xyz,
+            embedding_dim_xyz,
+            n_hidden_neurons_xyz,
+            embedding_dim_xyz,
+            n_hidden_neurons_xyz,
+            input_skips=append_xyz,
+        )
+
+        self.intermediate_linear = torch.nn.Linear(
+            n_hidden_neurons_xyz, n_hidden_neurons_xyz
+        )
+        _xavier_init(self.intermediate_linear)
+
+        self.density_layer = torch.nn.Linear(n_hidden_neurons_xyz, 1)
+        _xavier_init(self.density_layer)
+
+        # Zero the bias of the density layer to avoid
+        # a completely transparent initialization.
+        self.density_layer.bias.data[:] = 0.0  # fixme: Sometimes this is not enough
+
+        self.color_layer = torch.nn.Sequential(
+            LinearWithRepeat(
+                n_hidden_neurons_xyz + embedding_dim_dir, n_hidden_neurons_dir
+            ),
+            torch.nn.ReLU(True),
+            torch.nn.Linear(n_hidden_neurons_dir, 3),
+            torch.nn.Sigmoid(),
+        )
+        self.use_multiple_streams = use_multiple_streams
+
+    def _get_densities(
+        self,
+        features: torch.Tensor,
+        depth_values: torch.Tensor,
+        density_noise_std: float,
+    ) -> torch.Tensor:
+        """
+        This function takes `features` predicted by `self.mlp_xyz`
+        and converts them to `raw_densities` with `self.density_layer`.
+        `raw_densities` are later re-weighted using the depth step sizes
+        and mapped to [0-1] range with 1 - inverse exponential of `raw_densities`.
+        """
+        raw_densities = self.density_layer(features)
+        deltas = torch.cat(
+            (
+                depth_values[..., 1:] - depth_values[..., :-1],
+                1e10 * torch.ones_like(depth_values[..., :1]),
+            ),
+            dim=-1,
+        )[..., None]
+        if density_noise_std > 0.0:
+            raw_densities = (
+                raw_densities + torch.randn_like(raw_densities) * density_noise_std
+            )
+        densities = 1 - (-deltas * torch.relu(raw_densities)).exp()
+        return densities
+
+    def _get_colors(
+        self, features: torch.Tensor, rays_directions: torch.Tensor
+    ) -> torch.Tensor:
+        """
+        This function takes per-point `features` predicted by `self.mlp_xyz`
+        and evaluates the color model in order to attach to each
+        point a 3D vector of its RGB color.
+        """
+        # Normalize the ray_directions to unit l2 norm.
+        rays_directions_normed = torch.nn.functional.normalize(rays_directions, dim=-1)
+
+        # Obtain the harmonic embedding of the normalized ray directions.
+        rays_embedding = self.harmonic_embedding_dir(rays_directions_normed)
+
+        return self.color_layer((self.intermediate_linear(features), rays_embedding))
+
+    def _get_densities_and_colors(
+        self, features: torch.Tensor, ray_bundle: RayBundle, density_noise_std: float
+    ) -> Tuple[torch.Tensor, torch.Tensor]:
+        """
+        The second part of the forward calculation.
+
+        Args:
+            features: the output of the common mlp (the prior part of the
+                calculation), shape
+                (minibatch x ... x self.n_hidden_neurons_xyz).
+            ray_bundle: As for forward().
+            density_noise_std:  As for forward().
+
+        Returns:
+            rays_densities: A tensor of shape `(minibatch, ..., num_points_per_ray, 1)`
+                denoting the opacity of each ray point.
+            rays_colors: A tensor of shape `(minibatch, ..., num_points_per_ray, 3)`
+                denoting the color of each ray point.
+        """
+        if self.use_multiple_streams and features.is_cuda:
+            current_stream = torch.cuda.current_stream(features.device)
+            other_stream = torch.cuda.Stream(features.device)
+            other_stream.wait_stream(current_stream)
+
+            with torch.cuda.stream(other_stream):
+                rays_densities = self._get_densities(
+                    features, ray_bundle.lengths, density_noise_std
+                )
+                # rays_densities.shape = [minibatch x ... x 1] in [0-1]
+
+            rays_colors = self._get_colors(features, ray_bundle.directions)
+            # rays_colors.shape = [minibatch x ... x 3] in [0-1]
+
+            current_stream.wait_stream(other_stream)
+        else:
+            # Same calculation as above, just serial.
+            rays_densities = self._get_densities(
+                features, ray_bundle.lengths, density_noise_std
+            )
+            rays_colors = self._get_colors(features, ray_bundle.directions)
+        return rays_densities, rays_colors
+
+    def forward(
+        self,
+        ray_bundle: RayBundle,
+        density_noise_std: float = 0.0,
+        **kwargs,
+    ) -> Tuple[torch.Tensor, torch.Tensor]:
+        """
+        The forward function accepts the parametrizations of
+        3D points sampled along projection rays. The forward
+        pass is responsible for attaching a 3D vector
+        and a 1D scalar representing the point's
+        RGB color and opacity respectively.
+
+        Args:
+            ray_bundle: A RayBundle object containing the following variables:
+                origins: A tensor of shape `(minibatch, ..., 3)` denoting the
+                    origins of the sampling rays in world coords.
+                directions: A tensor of shape `(minibatch, ..., 3)`
+                    containing the direction vectors of sampling rays in world coords.
+                lengths: A tensor of shape `(minibatch, ..., num_points_per_ray)`
+                    containing the lengths at which the rays are sampled.
+            density_noise_std: A floating point value representing the
+                variance of the random normal noise added to the output of
+                the opacity function. This can prevent floating artifacts.
+
+        Returns:
+            rays_densities: A tensor of shape `(minibatch, ..., num_points_per_ray, 1)`
+                denoting the opacity of each ray point.
+            rays_colors: A tensor of shape `(minibatch, ..., num_points_per_ray, 3)`
+                denoting the color of each ray point.
+        """
+        # We first convert the ray parametrizations to world
+        # coordinates with `ray_bundle_to_ray_points`.
+        rays_points_world = ray_bundle_to_ray_points(ray_bundle)
+        # rays_points_world.shape = [minibatch x ... x 3]
+
+        # For each 3D world coordinate, we obtain its harmonic embedding.
+        embeds_xyz = self.harmonic_embedding_xyz(rays_points_world)
+        # embeds_xyz.shape = [minibatch x ... x self.n_harmonic_functions*6 + 3]
+
+        # self.mlp maps each harmonic embedding to a latent feature space.
+        features = self.mlp_xyz(embeds_xyz, embeds_xyz)
+        # features.shape = [minibatch x ... x self.n_hidden_neurons_xyz]
+
+        rays_densities, rays_colors = self._get_densities_and_colors(
+            features, ray_bundle, density_noise_std
+        )
+        return rays_densities, rays_colors
+
+
+class MLPWithInputSkips(torch.nn.Module):
+    """
+    Implements the multi-layer perceptron architecture of the Neural Radiance Field.
+
+    As such, `MLPWithInputSkips` is a multi layer perceptron consisting
+    of a sequence of linear layers with ReLU activations.
+
+    Additionally, for a set of predefined layers `input_skips`, the forward pass
+    appends a skip tensor `z` to the output of the preceding layer.
+
+    Note that this follows the architecture described in the Supplementary
+    Material (Fig. 7) of [1].
+
+    References:
+        [1] Ben Mildenhall and Pratul P. Srinivasan and Matthew Tancik
+            and Jonathan T. Barron and Ravi Ramamoorthi and Ren Ng:
+            NeRF: Representing Scenes as Neural Radiance Fields for View
+            Synthesis, ECCV2020
+    """
+
+    def __init__(
+        self,
+        n_layers: int,
+        input_dim: int,
+        output_dim: int,
+        skip_dim: int,
+        hidden_dim: int,
+        input_skips: Tuple[int, ...] = (),
+    ):
+        """
+        Args:
+            n_layers: The number of linear layers of the MLP.
+            input_dim: The number of channels of the input tensor.
+            output_dim: The number of channels of the output.
+            skip_dim: The number of channels of the tensor `z` appended when
+                evaluating the skip layers.
+            hidden_dim: The number of hidden units of the MLP.
+            input_skips: The list of layer indices at which we append the skip
+                tensor `z`.
+        """
+        super().__init__()
+        layers = []
+        for layeri in range(n_layers):
+            if layeri == 0:
+                dimin = input_dim
+                dimout = hidden_dim
+            elif layeri in input_skips:
+                dimin = hidden_dim + skip_dim
+                dimout = hidden_dim
+            else:
+                dimin = hidden_dim
+                dimout = hidden_dim
+            linear = torch.nn.Linear(dimin, dimout)
+            _xavier_init(linear)
+            layers.append(torch.nn.Sequential(linear, torch.nn.ReLU(True)))
+        self.mlp = torch.nn.ModuleList(layers)
+        self._input_skips = set(input_skips)
+
+    def forward(self, x: torch.Tensor, z: torch.Tensor) -> torch.Tensor:
+        """
+        Args:
+            x: The input tensor of shape `(..., input_dim)`.
+            z: The input skip tensor of shape `(..., skip_dim)` which is appended
+                to layers whose indices are specified by `input_skips`.
+        Returns:
+            y: The output tensor of shape `(..., output_dim)`.
+        """
+        y = x
+        for li, layer in enumerate(self.mlp):
+            if li in self._input_skips:
+                y = torch.cat((y, z), dim=-1)
+            y = layer(y)
+        return y
diff --git a/pytorch3d/projects/nerf/nerf/nerf_renderer.py b/pytorch3d/projects/nerf/nerf/nerf_renderer.py
new file mode 100644
index 0000000000000000000000000000000000000000..d72089734d580ebf7fa00dd1c7cbd9faba85bf35
--- /dev/null
+++ b/pytorch3d/projects/nerf/nerf/nerf_renderer.py
@@ -0,0 +1,434 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import List, Optional, Tuple
+
+import torch
+from pytorch3d.renderer import ImplicitRenderer, ray_bundle_to_ray_points
+from pytorch3d.renderer.cameras import CamerasBase
+from pytorch3d.structures import Pointclouds
+from pytorch3d.vis.plotly_vis import plot_scene
+from visdom import Visdom
+
+from .implicit_function import NeuralRadianceField
+from .raymarcher import EmissionAbsorptionNeRFRaymarcher
+from .raysampler import NeRFRaysampler, ProbabilisticRaysampler
+from .utils import calc_mse, calc_psnr, sample_images_at_mc_locs
+
+
+class RadianceFieldRenderer(torch.nn.Module):
+    """
+    Implements a renderer of a Neural Radiance Field.
+
+    This class holds pointers to the fine and coarse renderer objects, which are
+    instances of `pytorch3d.renderer.ImplicitRenderer`, and pointers to the
+    neural networks representing the fine and coarse Neural Radiance Fields,
+    which are instances of `NeuralRadianceField`.
+
+    The rendering forward pass proceeds as follows:
+        1) For a given input camera, rendering rays are generated with the
+            `NeRFRaysampler` object of `self._renderer['coarse']`.
+            In the training mode (`self.training==True`), the rays are a set
+                of `n_rays_per_image` random 2D locations of the image grid.
+            In the evaluation mode (`self.training==False`), the rays correspond
+                to the full image grid. The rays are further split to
+                `chunk_size_test`-sized chunks to prevent out-of-memory errors.
+        2) For each ray point, the coarse `NeuralRadianceField` MLP is evaluated.
+            The pointer to this MLP is stored in `self._implicit_function['coarse']`
+        3) The coarse radiance field is rendered with the
+            `EmissionAbsorptionNeRFRaymarcher` object of `self._renderer['coarse']`.
+        4) The coarse raymarcher outputs a probability distribution that guides
+            the importance raysampling of the fine rendering pass. The
+            `ProbabilisticRaysampler` stored in `self._renderer['fine'].raysampler`
+            implements the importance ray-sampling.
+        5) Similar to 2) the fine MLP in `self._implicit_function['fine']`
+            labels the ray points with occupancies and colors.
+        6) self._renderer['fine'].raymarcher` generates the final fine render.
+        7) The fine and coarse renders are compared to the ground truth input image
+            with PSNR and MSE metrics.
+    """
+
+    def __init__(
+        self,
+        image_size: Tuple[int, int],
+        n_pts_per_ray: int,
+        n_pts_per_ray_fine: int,
+        n_rays_per_image: int,
+        min_depth: float,
+        max_depth: float,
+        stratified: bool,
+        stratified_test: bool,
+        chunk_size_test: int,
+        n_harmonic_functions_xyz: int = 6,
+        n_harmonic_functions_dir: int = 4,
+        n_hidden_neurons_xyz: int = 256,
+        n_hidden_neurons_dir: int = 128,
+        n_layers_xyz: int = 8,
+        append_xyz: Tuple[int, ...] = (5,),
+        density_noise_std: float = 0.0,
+        visualization: bool = False,
+    ):
+        """
+        Args:
+            image_size: The size of the rendered image (`[height, width]`).
+            n_pts_per_ray: The number of points sampled along each ray for the
+                coarse rendering pass.
+            n_pts_per_ray_fine: The number of points sampled along each ray for the
+                fine rendering pass.
+            n_rays_per_image: Number of Monte Carlo ray samples when training
+                (`self.training==True`).
+            min_depth: The minimum depth of a sampled ray-point for the coarse rendering.
+            max_depth: The maximum depth of a sampled ray-point for the coarse rendering.
+            stratified: If `True`, stratifies (=randomly offsets) the depths
+                of each ray point during training (`self.training==True`).
+            stratified_test: If `True`, stratifies (=randomly offsets) the depths
+                of each ray point during evaluation (`self.training==False`).
+            chunk_size_test: The number of rays in each chunk of image rays.
+                Active only when `self.training==True`.
+            n_harmonic_functions_xyz: The number of harmonic functions
+                used to form the harmonic embedding of 3D point locations.
+            n_harmonic_functions_dir: The number of harmonic functions
+                used to form the harmonic embedding of the ray directions.
+            n_hidden_neurons_xyz: The number of hidden units in the
+                fully connected layers of the MLP that accepts the 3D point
+                locations and outputs the occupancy field with the intermediate
+                features.
+            n_hidden_neurons_dir: The number of hidden units in the
+                fully connected layers of the MLP that accepts the intermediate
+                features and ray directions and outputs the radiance field
+                (per-point colors).
+            n_layers_xyz: The number of layers of the MLP that outputs the
+                occupancy field.
+            append_xyz: The list of indices of the skip layers of the occupancy MLP.
+                Prior to evaluating the skip layers, the tensor which was input to MLP
+                is appended to the skip layer input.
+            density_noise_std: The standard deviation of the random normal noise
+                added to the output of the occupancy MLP.
+                Active only when `self.training==True`.
+            visualization: whether to store extra output for visualization.
+        """
+
+        super().__init__()
+
+        # The renderers and implicit functions are stored under the fine/coarse
+        # keys in ModuleDict PyTorch modules.
+        self._renderer = torch.nn.ModuleDict()
+        self._implicit_function = torch.nn.ModuleDict()
+
+        # Init the EA raymarcher used by both passes.
+        raymarcher = EmissionAbsorptionNeRFRaymarcher()
+
+        # Parse out image dimensions.
+        image_height, image_width = image_size
+
+        for render_pass in ("coarse", "fine"):
+            if render_pass == "coarse":
+                # Initialize the coarse raysampler.
+                raysampler = NeRFRaysampler(
+                    n_pts_per_ray=n_pts_per_ray,
+                    min_depth=min_depth,
+                    max_depth=max_depth,
+                    stratified=stratified,
+                    stratified_test=stratified_test,
+                    n_rays_per_image=n_rays_per_image,
+                    image_height=image_height,
+                    image_width=image_width,
+                )
+            elif render_pass == "fine":
+                # Initialize the fine raysampler.
+                raysampler = ProbabilisticRaysampler(
+                    n_pts_per_ray=n_pts_per_ray_fine,
+                    stratified=stratified,
+                    stratified_test=stratified_test,
+                )
+            else:
+                raise ValueError(f"No such rendering pass {render_pass}")
+
+            # Initialize the fine/coarse renderer.
+            self._renderer[render_pass] = ImplicitRenderer(
+                raysampler=raysampler,
+                raymarcher=raymarcher,
+            )
+
+            # Instantiate the fine/coarse NeuralRadianceField module.
+            self._implicit_function[render_pass] = NeuralRadianceField(
+                n_harmonic_functions_xyz=n_harmonic_functions_xyz,
+                n_harmonic_functions_dir=n_harmonic_functions_dir,
+                n_hidden_neurons_xyz=n_hidden_neurons_xyz,
+                n_hidden_neurons_dir=n_hidden_neurons_dir,
+                n_layers_xyz=n_layers_xyz,
+                append_xyz=append_xyz,
+            )
+
+        self._density_noise_std = density_noise_std
+        self._chunk_size_test = chunk_size_test
+        self._image_size = image_size
+        self.visualization = visualization
+
+    def precache_rays(
+        self,
+        cache_cameras: List[CamerasBase],
+        cache_camera_hashes: List[str],
+    ):
+        """
+        Precaches the rays emitted from the list of cameras `cache_cameras`,
+        where each camera is uniquely identified with the corresponding hash
+        from `cache_camera_hashes`.
+
+        The cached rays are moved to cpu and stored in
+        `self._renderer['coarse']._ray_cache`.
+
+        Raises `ValueError` when caching two cameras with the same hash.
+
+        Args:
+            cache_cameras: A list of `N` cameras for which the rays are pre-cached.
+            cache_camera_hashes: A list of `N` unique identifiers for each
+                camera from `cameras`.
+        """
+        self._renderer["coarse"].raysampler.precache_rays(
+            cache_cameras,
+            cache_camera_hashes,
+        )
+
+    def _process_ray_chunk(
+        self,
+        camera_hash: Optional[str],
+        camera: CamerasBase,
+        image: torch.Tensor,
+        chunk_idx: int,
+    ) -> dict:
+        """
+        Samples and renders a chunk of rays.
+
+        Args:
+            camera_hash: A unique identifier of a pre-cached camera.
+                If `None`, the cache is not searched and the sampled rays are
+                calculated from scratch.
+            camera: A batch of cameras from which the scene is rendered.
+            image: A batch of corresponding ground truth images of shape
+                ('batch_size', ·, ·, 3).
+            chunk_idx: The index of the currently rendered ray chunk.
+        Returns:
+            out: `dict` containing the outputs of the rendering:
+                `rgb_coarse`: The result of the coarse rendering pass.
+                `rgb_fine`: The result of the fine rendering pass.
+                `rgb_gt`: The corresponding ground-truth RGB values.
+        """
+        # Initialize the outputs of the coarse rendering to None.
+        coarse_ray_bundle = None
+        coarse_weights = None
+
+        # First evaluate the coarse rendering pass, then the fine one.
+        for renderer_pass in ("coarse", "fine"):
+            (rgb, weights), ray_bundle_out = self._renderer[renderer_pass](
+                cameras=camera,
+                volumetric_function=self._implicit_function[renderer_pass],
+                chunksize=self._chunk_size_test,
+                chunk_idx=chunk_idx,
+                density_noise_std=(self._density_noise_std if self.training else 0.0),
+                input_ray_bundle=coarse_ray_bundle,
+                ray_weights=coarse_weights,
+                camera_hash=camera_hash,
+            )
+
+            if renderer_pass == "coarse":
+                rgb_coarse = rgb
+                # Store the weights and the rays of the first rendering pass
+                # for the ensuing importance ray-sampling of the fine render.
+                coarse_ray_bundle = ray_bundle_out
+                coarse_weights = weights
+                if image is not None:
+                    # Sample the ground truth images at the xy locations of the
+                    # rendering ray pixels.
+                    rgb_gt = sample_images_at_mc_locs(
+                        image[..., :3][None],
+                        ray_bundle_out.xys,
+                    )
+                else:
+                    rgb_gt = None
+
+            elif renderer_pass == "fine":
+                rgb_fine = rgb
+
+            else:
+                raise ValueError(f"No such rendering pass {renderer_pass}")
+
+        out = {"rgb_fine": rgb_fine, "rgb_coarse": rgb_coarse, "rgb_gt": rgb_gt}
+        if self.visualization:
+            # Store the coarse rays/weights only for visualization purposes.
+            out["coarse_ray_bundle"] = type(coarse_ray_bundle)(
+                *[v.detach().cpu() for k, v in coarse_ray_bundle._asdict().items()]
+            )
+            out["coarse_weights"] = coarse_weights.detach().cpu()
+
+        return out
+
+    def forward(
+        self,
+        camera_hash: Optional[str],
+        camera: CamerasBase,
+        image: torch.Tensor,
+    ) -> Tuple[dict, dict]:
+        """
+        Performs the coarse and fine rendering passes of the radiance field
+        from the viewpoint of the input `camera`.
+        Afterwards, both renders are compared to the input ground truth `image`
+        by evaluating the peak signal-to-noise ratio and the mean-squared error.
+
+        The rendering result depends on the `self.training` flag:
+            - In the training mode (`self.training==True`), the function renders
+              a random subset of image rays (Monte Carlo rendering).
+            - In evaluation mode (`self.training==False`), the function renders
+              the full image. In order to prevent out-of-memory errors,
+              when `self.training==False`, the rays are sampled and rendered
+              in batches of size `chunksize`.
+
+        Args:
+            camera_hash: A unique identifier of a pre-cached camera.
+                If `None`, the cache is not searched and the sampled rays are
+                calculated from scratch.
+            camera: A batch of cameras from which the scene is rendered.
+            image: A batch of corresponding ground truth images of shape
+                ('batch_size', ·, ·, 3).
+        Returns:
+            out: `dict` containing the outputs of the rendering:
+                `rgb_coarse`: The result of the coarse rendering pass.
+                `rgb_fine`: The result of the fine rendering pass.
+                `rgb_gt`: The corresponding ground-truth RGB values.
+
+                The shape of `rgb_coarse`, `rgb_fine`, `rgb_gt` depends on the
+                `self.training` flag:
+                    If `==True`, all 3 tensors are of shape
+                    `(batch_size, n_rays_per_image, 3)` and contain the result
+                    of the Monte Carlo training rendering pass.
+                    If `==False`, all 3 tensors are of shape
+                    `(batch_size, image_size[0], image_size[1], 3)` and contain
+                    the result of the full image rendering pass.
+            metrics: `dict` containing the error metrics comparing the fine and
+                coarse renders to the ground truth:
+                `mse_coarse`: Mean-squared error between the coarse render and
+                    the input `image`
+                `mse_fine`: Mean-squared error between the fine render and
+                    the input `image`
+                `psnr_coarse`: Peak signal-to-noise ratio between the coarse render and
+                    the input `image`
+                `psnr_fine`: Peak signal-to-noise ratio between the fine render and
+                    the input `image`
+        """
+        if not self.training:
+            # Full evaluation pass.
+            n_chunks = self._renderer["coarse"].raysampler.get_n_chunks(
+                self._chunk_size_test,
+                camera.R.shape[0],
+            )
+        else:
+            # MonteCarlo ray sampling.
+            n_chunks = 1
+
+        # Process the chunks of rays.
+        chunk_outputs = [
+            self._process_ray_chunk(
+                camera_hash,
+                camera,
+                image,
+                chunk_idx,
+            )
+            for chunk_idx in range(n_chunks)
+        ]
+
+        if not self.training:
+            # For a full render pass concatenate the output chunks,
+            # and reshape to image size.
+            out = {
+                k: torch.cat(
+                    [ch_o[k] for ch_o in chunk_outputs],
+                    dim=1,
+                ).view(-1, *self._image_size, 3)
+                if chunk_outputs[0][k] is not None
+                else None
+                for k in ("rgb_fine", "rgb_coarse", "rgb_gt")
+            }
+        else:
+            out = chunk_outputs[0]
+
+        # Calc the error metrics.
+        metrics = {}
+        if image is not None:
+            for render_pass in ("coarse", "fine"):
+                for metric_name, metric_fun in zip(
+                    ("mse", "psnr"), (calc_mse, calc_psnr)
+                ):
+                    metrics[f"{metric_name}_{render_pass}"] = metric_fun(
+                        out["rgb_" + render_pass][..., :3],
+                        out["rgb_gt"][..., :3],
+                    )
+
+        return out, metrics
+
+
+def visualize_nerf_outputs(
+    nerf_out: dict, output_cache: List, viz: Visdom, visdom_env: str
+):
+    """
+    Visualizes the outputs of the `RadianceFieldRenderer`.
+
+    Args:
+        nerf_out: An output of the validation rendering pass.
+        output_cache: A list with outputs of several training render passes.
+        viz: A visdom connection object.
+        visdom_env: The name of visdom environment for visualization.
+    """
+
+    # Show the training images.
+    ims = torch.stack([o["image"] for o in output_cache])
+    ims = torch.cat(list(ims), dim=1)
+    viz.image(
+        ims.permute(2, 0, 1),
+        env=visdom_env,
+        win="images",
+        opts={"title": "train_images"},
+    )
+
+    # Show the coarse and fine renders together with the ground truth images.
+    ims_full = torch.cat(
+        [
+            nerf_out[imvar][0].permute(2, 0, 1).detach().cpu().clamp(0.0, 1.0)
+            for imvar in ("rgb_coarse", "rgb_fine", "rgb_gt")
+        ],
+        dim=2,
+    )
+    viz.image(
+        ims_full,
+        env=visdom_env,
+        win="images_full",
+        opts={"title": "coarse | fine | target"},
+    )
+
+    # Make a 3D plot of training cameras and their emitted rays.
+    camera_trace = {
+        f"camera_{ci:03d}": o["camera"].cpu() for ci, o in enumerate(output_cache)
+    }
+    ray_pts_trace = {
+        f"ray_pts_{ci:03d}": Pointclouds(
+            ray_bundle_to_ray_points(o["coarse_ray_bundle"])
+            .detach()
+            .cpu()
+            .view(1, -1, 3)
+        )
+        for ci, o in enumerate(output_cache)
+    }
+    plotly_plot = plot_scene(
+        {
+            "training_scene": {
+                **camera_trace,
+                **ray_pts_trace,
+            },
+        },
+        pointcloud_max_points=5000,
+        pointcloud_marker_size=1,
+        camera_scale=0.3,
+    )
+    viz.plotlyplot(plotly_plot, env=visdom_env, win="scenes")
diff --git a/pytorch3d/projects/nerf/nerf/raymarcher.py b/pytorch3d/projects/nerf/nerf/raymarcher.py
new file mode 100644
index 0000000000000000000000000000000000000000..d5a80c2a40f250e0874b29c2a288012870fcac5e
--- /dev/null
+++ b/pytorch3d/projects/nerf/nerf/raymarcher.py
@@ -0,0 +1,73 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+from pytorch3d.renderer import EmissionAbsorptionRaymarcher
+from pytorch3d.renderer.implicit.raymarching import (
+    _check_density_bounds,
+    _check_raymarcher_inputs,
+    _shifted_cumprod,
+)
+
+
+class EmissionAbsorptionNeRFRaymarcher(EmissionAbsorptionRaymarcher):
+    """
+    This is essentially the `pytorch3d.renderer.EmissionAbsorptionRaymarcher`
+    which additionally returns the rendering weights. It also skips returning
+    the computation of the alpha-mask which is, in case of NeRF, equal to 1
+    everywhere.
+
+    The weights are later used in the NeRF pipeline to carry out the importance
+    ray-sampling for the fine rendering pass.
+
+    For more details about the EmissionAbsorptionRaymarcher please refer to
+    the documentation of `pytorch3d.renderer.EmissionAbsorptionRaymarcher`.
+    """
+
+    def forward(
+        self,
+        rays_densities: torch.Tensor,
+        rays_features: torch.Tensor,
+        eps: float = 1e-10,
+        **kwargs,
+    ) -> torch.Tensor:
+        """
+        Args:
+            rays_densities: Per-ray density values represented with a tensor
+                of shape `(..., n_points_per_ray, 1)` whose values range in [0, 1].
+            rays_features: Per-ray feature values represented with a tensor
+                of shape `(..., n_points_per_ray, feature_dim)`.
+            eps: A lower bound added to `rays_densities` before computing
+                the absorption function (cumprod of `1-rays_densities` along
+                each ray). This prevents the cumprod to yield exact 0
+                which would inhibit any gradient-based learning.
+
+        Returns:
+            features: A tensor of shape `(..., feature_dim)` containing
+                the rendered features for each ray.
+            weights: A tensor of shape `(..., n_points_per_ray)` containing
+                the ray-specific emission-absorption distribution.
+                Each ray distribution `(..., :)` is a valid probability
+                distribution, i.e. it contains non-negative values that integrate
+                to 1, such that `weights.sum(dim=-1)==1).all()` yields `True`.
+        """
+        _check_raymarcher_inputs(
+            rays_densities,
+            rays_features,
+            None,
+            z_can_be_none=True,
+            features_can_be_none=False,
+            density_1d=True,
+        )
+        _check_density_bounds(rays_densities)
+        rays_densities = rays_densities[..., 0]
+        absorption = _shifted_cumprod(
+            (1.0 + eps) - rays_densities, shift=self.surface_thickness
+        )
+        weights = rays_densities * absorption
+        features = (weights[..., None] * rays_features).sum(dim=-2)
+
+        return features, weights
diff --git a/pytorch3d/projects/nerf/nerf/raysampler.py b/pytorch3d/projects/nerf/nerf/raysampler.py
new file mode 100644
index 0000000000000000000000000000000000000000..69e99b9ad9340207bc9bb72e6cc7bbb36b1c2d67
--- /dev/null
+++ b/pytorch3d/projects/nerf/nerf/raysampler.py
@@ -0,0 +1,365 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import math
+from typing import List
+
+import torch
+from pytorch3d.renderer import MonteCarloRaysampler, NDCMultinomialRaysampler, RayBundle
+from pytorch3d.renderer.cameras import CamerasBase
+from pytorch3d.renderer.implicit.sample_pdf import sample_pdf
+
+
+class ProbabilisticRaysampler(torch.nn.Module):
+    """
+    Implements the importance sampling of points along rays.
+    The input is a `RayBundle` object with a `ray_weights` tensor
+    which specifies the probabilities of sampling a point along each ray.
+
+    This raysampler is used for the fine rendering pass of NeRF.
+    As such, the forward pass accepts the RayBundle output by the
+    raysampling of the coarse rendering pass. Hence, it does not
+    take cameras as input.
+    """
+
+    def __init__(
+        self,
+        n_pts_per_ray: int,
+        stratified: bool,
+        stratified_test: bool,
+        add_input_samples: bool = True,
+    ):
+        """
+        Args:
+            n_pts_per_ray: The number of points to sample along each ray.
+            stratified: If `True`, the input `ray_weights` are assumed to be
+                sampled at equidistant intervals.
+            stratified_test: Same as `stratified` with the difference that this
+                setting is applied when the module is in the `eval` mode
+                (`self.training==False`).
+            add_input_samples: Concatenates and returns the sampled values
+                together with the input samples.
+        """
+        super().__init__()
+        self._n_pts_per_ray = n_pts_per_ray
+        self._stratified = stratified
+        self._stratified_test = stratified_test
+        self._add_input_samples = add_input_samples
+
+    def forward(
+        self,
+        input_ray_bundle: RayBundle,
+        ray_weights: torch.Tensor,
+        **kwargs,
+    ) -> RayBundle:
+        """
+        Args:
+            input_ray_bundle: An instance of `RayBundle` specifying the
+                source rays for sampling of the probability distribution.
+            ray_weights: A tensor of shape
+                `(..., input_ray_bundle.legths.shape[-1])` with non-negative
+                elements defining the probability distribution to sample
+                ray points from.
+
+        Returns:
+            ray_bundle: A new `RayBundle` instance containing the input ray
+                points together with `n_pts_per_ray` additional sampled
+                points per ray.
+        """
+
+        # Calculate the mid-points between the ray depths.
+        z_vals = input_ray_bundle.lengths
+        batch_size = z_vals.shape[0]
+
+        # Carry out the importance sampling.
+        with torch.no_grad():
+            z_vals_mid = 0.5 * (z_vals[..., 1:] + z_vals[..., :-1])
+            z_samples = sample_pdf(
+                z_vals_mid.view(-1, z_vals_mid.shape[-1]),
+                ray_weights.view(-1, ray_weights.shape[-1])[..., 1:-1],
+                self._n_pts_per_ray,
+                det=not (
+                    (self._stratified and self.training)
+                    or (self._stratified_test and not self.training)
+                ),
+            ).view(batch_size, z_vals.shape[1], self._n_pts_per_ray)
+
+        if self._add_input_samples:
+            # Add the new samples to the input ones.
+            z_vals = torch.cat((z_vals, z_samples), dim=-1)
+        else:
+            z_vals = z_samples
+        # Resort by depth.
+        z_vals, _ = torch.sort(z_vals, dim=-1)
+
+        return RayBundle(
+            origins=input_ray_bundle.origins,
+            directions=input_ray_bundle.directions,
+            lengths=z_vals,
+            xys=input_ray_bundle.xys,
+        )
+
+
+class NeRFRaysampler(torch.nn.Module):
+    """
+    Implements the raysampler of NeRF.
+
+    Depending on the `self.training` flag, the raysampler either samples
+    a chunk of random rays (`self.training==True`), or returns a subset of rays
+    of the full image grid (`self.training==False`).
+    The chunking of rays allows for efficient evaluation of the NeRF implicit
+    surface function without encountering out-of-GPU-memory errors.
+
+    Additionally, this raysampler supports pre-caching of the ray bundles
+    for a set of input cameras (`self.precache_rays`).
+    Pre-caching the rays before training greatly speeds-up the ensuing
+    raysampling step of the training NeRF iterations.
+    """
+
+    def __init__(
+        self,
+        n_pts_per_ray: int,
+        min_depth: float,
+        max_depth: float,
+        n_rays_per_image: int,
+        image_width: int,
+        image_height: int,
+        stratified: bool = False,
+        stratified_test: bool = False,
+    ):
+        """
+        Args:
+            n_pts_per_ray: The number of points sampled along each ray.
+            min_depth: The minimum depth of a ray-point.
+            max_depth: The maximum depth of a ray-point.
+            n_rays_per_image: Number of Monte Carlo ray samples when training
+                (`self.training==True`).
+            image_width: The horizontal size of the image grid.
+            image_height: The vertical size of the image grid.
+            stratified: If `True`, stratifies (=randomly offsets) the depths
+                of each ray point during training (`self.training==True`).
+            stratified_test: If `True`, stratifies (=randomly offsets) the depths
+                of each ray point during evaluation (`self.training==False`).
+        """
+
+        super().__init__()
+        self._stratified = stratified
+        self._stratified_test = stratified_test
+
+        # Initialize the grid ray sampler.
+        self._grid_raysampler = NDCMultinomialRaysampler(
+            image_width=image_width,
+            image_height=image_height,
+            n_pts_per_ray=n_pts_per_ray,
+            min_depth=min_depth,
+            max_depth=max_depth,
+        )
+
+        # Initialize the Monte Carlo ray sampler.
+        self._mc_raysampler = MonteCarloRaysampler(
+            min_x=-1.0,
+            max_x=1.0,
+            min_y=-1.0,
+            max_y=1.0,
+            n_rays_per_image=n_rays_per_image,
+            n_pts_per_ray=n_pts_per_ray,
+            min_depth=min_depth,
+            max_depth=max_depth,
+        )
+
+        # create empty ray cache
+        self._ray_cache = {}
+
+    def get_n_chunks(self, chunksize: int, batch_size: int):
+        """
+        Returns the total number of `chunksize`-sized chunks
+        of the raysampler's rays.
+
+        Args:
+            chunksize: The number of rays per chunk.
+            batch_size: The size of the batch of the raysampler.
+
+        Returns:
+            n_chunks: The total number of chunks.
+        """
+        return int(
+            math.ceil(
+                (self._grid_raysampler._xy_grid.numel() * 0.5 * batch_size) / chunksize
+            )
+        )
+
+    def _print_precaching_progress(self, i, total, bar_len=30):
+        """
+        Print a progress bar for ray precaching.
+        """
+        position = round((i + 1) / total * bar_len)
+        pbar = "[" + "█" * position + " " * (bar_len - position) + "]"
+        print(pbar, end="\r")
+
+    def precache_rays(self, cameras: List[CamerasBase], camera_hashes: List):
+        """
+        Precaches the rays emitted from the list of cameras `cameras`,
+        where each camera is uniquely identified with the corresponding hash
+        from `camera_hashes`.
+
+        The cached rays are moved to cpu and stored in `self._ray_cache`.
+        Raises `ValueError` when caching two cameras with the same hash.
+
+        Args:
+            cameras: A list of `N` cameras for which the rays are pre-cached.
+            camera_hashes: A list of `N` unique identifiers of each
+                camera from `cameras`.
+        """
+        print(f"Precaching {len(cameras)} ray bundles ...")
+        full_chunksize = (
+            self._grid_raysampler._xy_grid.numel()
+            // 2
+            * self._grid_raysampler._n_pts_per_ray
+        )
+        if self.get_n_chunks(full_chunksize, 1) != 1:
+            raise ValueError("There has to be one chunk for precaching rays!")
+        for camera_i, (camera, camera_hash) in enumerate(zip(cameras, camera_hashes)):
+            ray_bundle = self.forward(
+                camera,
+                caching=True,
+                chunksize=full_chunksize,
+            )
+            if camera_hash in self._ray_cache:
+                raise ValueError("There are redundant cameras!")
+            self._ray_cache[camera_hash] = RayBundle(
+                *[v.to("cpu").detach() for v in ray_bundle]
+            )
+            self._print_precaching_progress(camera_i, len(cameras))
+        print("")
+
+    def _stratify_ray_bundle(self, ray_bundle: RayBundle):
+        """
+        Stratifies the lengths of the input `ray_bundle`.
+
+        More specifically, the stratification replaces each ray points' depth `z`
+        with a sample from a uniform random distribution on
+        `[z - delta_depth, z+delta_depth]`, where `delta_depth` is the difference
+        of depths of the consecutive ray depth values.
+
+        Args:
+            `ray_bundle`: The input `RayBundle`.
+
+        Returns:
+            `stratified_ray_bundle`: `ray_bundle` whose `lengths` field is replaced
+                with the stratified samples.
+        """
+        z_vals = ray_bundle.lengths
+        # Get intervals between samples.
+        mids = 0.5 * (z_vals[..., 1:] + z_vals[..., :-1])
+        upper = torch.cat((mids, z_vals[..., -1:]), dim=-1)
+        lower = torch.cat((z_vals[..., :1], mids), dim=-1)
+        # Stratified samples in those intervals.
+        z_vals = lower + (upper - lower) * torch.rand_like(lower)
+        return ray_bundle._replace(lengths=z_vals)
+
+    def _normalize_raybundle(self, ray_bundle: RayBundle):
+        """
+        Normalizes the ray directions of the input `RayBundle` to unit norm.
+        """
+        ray_bundle = ray_bundle._replace(
+            directions=torch.nn.functional.normalize(ray_bundle.directions, dim=-1)
+        )
+        return ray_bundle
+
+    def forward(
+        self,
+        cameras: CamerasBase,
+        chunksize: int = None,
+        chunk_idx: int = 0,
+        camera_hash: str = None,
+        caching: bool = False,
+        **kwargs,
+    ) -> RayBundle:
+        """
+        Args:
+            cameras: A batch of `batch_size` cameras from which the rays are emitted.
+            chunksize: The number of rays per chunk.
+                Active only when `self.training==False`.
+            chunk_idx: The index of the ray chunk. The number has to be in
+                `[0, self.get_n_chunks(chunksize, batch_size)-1]`.
+                Active only when `self.training==False`.
+            camera_hash: A unique identifier of a pre-cached camera. If `None`,
+                the cache is not searched and the rays are calculated from scratch.
+            caching: If `True`, activates the caching mode that returns the `RayBundle`
+                that should be stored into the cache.
+        Returns:
+            A named tuple `RayBundle` with the following fields:
+                origins: A tensor of shape
+                    `(batch_size, n_rays_per_image, 3)`
+                    denoting the locations of ray origins in the world coordinates.
+                directions: A tensor of shape
+                    `(batch_size, n_rays_per_image, 3)`
+                    denoting the directions of each ray in the world coordinates.
+                lengths: A tensor of shape
+                    `(batch_size, n_rays_per_image, n_pts_per_ray)`
+                    containing the z-coordinate (=depth) of each ray in world units.
+                xys: A tensor of shape
+                    `(batch_size, n_rays_per_image, 2)`
+                    containing the 2D image coordinates of each ray.
+        """
+
+        batch_size = cameras.R.shape[0]  # pyre-ignore
+        device = cameras.device
+
+        if (camera_hash is None) and (not caching) and self.training:
+            # Sample random rays from scratch.
+            ray_bundle = self._mc_raysampler(cameras)
+            ray_bundle = self._normalize_raybundle(ray_bundle)
+        else:
+            if camera_hash is not None:
+                # The case where we retrieve a camera from cache.
+                if batch_size != 1:
+                    raise NotImplementedError(
+                        "Ray caching works only for batches with a single camera!"
+                    )
+                full_ray_bundle = self._ray_cache[camera_hash]
+            else:
+                # We generate a full ray grid from scratch.
+                full_ray_bundle = self._grid_raysampler(cameras)
+                full_ray_bundle = self._normalize_raybundle(full_ray_bundle)
+
+            n_pixels = full_ray_bundle.directions.shape[:-1].numel()
+
+            if self.training:
+                # During training we randomly subsample rays.
+                sel_rays = torch.randperm(
+                    n_pixels, device=full_ray_bundle.lengths.device
+                )[: self._mc_raysampler._n_rays_per_image]
+            else:
+                # In case we test, we take only the requested chunk.
+                if chunksize is None:
+                    chunksize = n_pixels * batch_size
+                start = chunk_idx * chunksize * batch_size
+                end = min(start + chunksize, n_pixels)
+                sel_rays = torch.arange(
+                    start,
+                    end,
+                    dtype=torch.long,
+                    device=full_ray_bundle.lengths.device,
+                )
+
+            # Take the "sel_rays" rays from the full ray bundle.
+            ray_bundle = RayBundle(
+                *[
+                    v.view(n_pixels, -1)[sel_rays]
+                    .view(batch_size, sel_rays.numel() // batch_size, -1)
+                    .to(device)
+                    for v in full_ray_bundle
+                ]
+            )
+
+        if (
+            (self._stratified and self.training)
+            or (self._stratified_test and not self.training)
+        ) and not caching:  # Make sure not to stratify when caching!
+            ray_bundle = self._stratify_ray_bundle(ray_bundle)
+
+        return ray_bundle
diff --git a/pytorch3d/projects/nerf/nerf/stats.py b/pytorch3d/projects/nerf/nerf/stats.py
new file mode 100644
index 0000000000000000000000000000000000000000..cb02472bcf990ea8d6dfb3b4ea1484d739b754df
--- /dev/null
+++ b/pytorch3d/projects/nerf/nerf/stats.py
@@ -0,0 +1,346 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import time
+import warnings
+from itertools import cycle
+from typing import List, Optional
+
+import matplotlib
+import matplotlib.pyplot as plt
+import numpy as np
+from matplotlib import colors as mcolors
+from visdom import Visdom
+
+
+class AverageMeter:
+    """
+    Computes and stores the average and current value.
+    Tracks the exact history of the added values in every epoch.
+    """
+
+    def __init__(self) -> None:
+        """
+        Initialize the structure with empty history and zero-ed moving average.
+        """
+        self.history = []
+        self.reset()
+
+    def reset(self) -> None:
+        """
+        Reset the running average meter.
+        """
+        self.val = 0
+        self.avg = 0
+        self.sum = 0
+        self.count = 0
+
+    def update(self, val: float, n: int = 1, epoch: int = 0) -> None:
+        """
+        Updates the average meter with a value `val`.
+
+        Args:
+            val: A float to be added to the meter.
+            n: Represents the number of entities to be added.
+            epoch: The epoch to which the number should be added.
+        """
+        # make sure the history is of the same len as epoch
+        while len(self.history) <= epoch:
+            self.history.append([])
+        self.history[epoch].append(val / n)
+        self.val = val
+        self.sum += val * n
+        self.count += n
+        self.avg = self.sum / self.count
+
+    def get_epoch_averages(self):
+        """
+        Returns:
+            averages: A list of average values of the metric for each epoch
+                in the history buffer.
+        """
+        if len(self.history) == 0:
+            return None
+        return [
+            (float(np.array(h).mean()) if len(h) > 0 else float("NaN"))
+            for h in self.history
+        ]
+
+
+class Stats:
+    """
+    Stats logging object useful for gathering statistics of training
+    a deep network in PyTorch.
+
+    Example:
+        ```
+        # Init stats structure that logs statistics 'objective' and 'top1e'.
+        stats = Stats( ('objective','top1e') )
+
+        network = init_net()  # init a pytorch module (=neural network)
+        dataloader = init_dataloader()  # init a dataloader
+
+        for epoch in range(10):
+
+            # start of epoch -> call new_epoch
+            stats.new_epoch()
+
+            # Iterate over batches.
+            for batch in dataloader:
+                # Run a model and save into a dict of output variables "output"
+                output = network(batch)
+
+                # stats.update() automatically parses the 'objective' and 'top1e'
+                # from the "output" dict and stores this into the db.
+                stats.update(output)
+                stats.print() # prints the averages over given epoch
+
+            # Stores the training plots into '/tmp/epoch_stats.pdf'
+            # and plots into a visdom server running at localhost (if running).
+            stats.plot_stats(plot_file='/tmp/epoch_stats.pdf')
+        ```
+    """
+
+    def __init__(
+        self,
+        log_vars: List[str],
+        verbose: bool = False,
+        epoch: int = -1,
+        plot_file: Optional[str] = None,
+    ) -> None:
+        """
+        Args:
+            log_vars: The list of variable names to be logged.
+            verbose: Print status messages.
+            epoch: The initial epoch of the object.
+            plot_file: The path to the file that will hold the training plots.
+        """
+        self.verbose = verbose
+        self.log_vars = log_vars
+        self.plot_file = plot_file
+        self.hard_reset(epoch=epoch)
+
+    def reset(self) -> None:
+        """
+        Called before an epoch to clear current epoch buffers.
+        """
+        stat_sets = list(self.stats.keys())
+        if self.verbose:
+            print("stats: epoch %d - reset" % self.epoch)
+        self.it = {k: -1 for k in stat_sets}
+        for stat_set in stat_sets:
+            for stat in self.stats[stat_set]:
+                self.stats[stat_set][stat].reset()
+
+        # Set a new timestamp.
+        self._epoch_start = time.time()
+
+    def hard_reset(self, epoch: int = -1) -> None:
+        """
+        Erases all logged data.
+        """
+        self._epoch_start = None
+        self.epoch = epoch
+        if self.verbose:
+            print("stats: epoch %d - hard reset" % self.epoch)
+        self.stats = {}
+        self.reset()
+
+    def new_epoch(self) -> None:
+        """
+        Initializes a new epoch.
+        """
+        if self.verbose:
+            print("stats: new epoch %d" % (self.epoch + 1))
+        self.epoch += 1  # increase epoch counter
+        self.reset()  # zero the stats
+
+    def _gather_value(self, val):
+        if isinstance(val, float):
+            pass
+        else:
+            val = val.data.cpu().numpy()
+            val = float(val.sum())
+        return val
+
+    def update(self, preds: dict, stat_set: str = "train") -> None:
+        """
+        Update the internal logs with metrics of a training step.
+
+        Each metric is stored as an instance of an AverageMeter.
+
+        Args:
+            preds: Dict of values to be added to the logs.
+            stat_set: The set of statistics to be updated (e.g. "train", "val").
+        """
+
+        if self.epoch == -1:  # uninitialized
+            warnings.warn(
+                "self.epoch==-1 means uninitialized stats structure"
+                " -> new_epoch() called"
+            )
+            self.new_epoch()
+
+        if stat_set not in self.stats:
+            self.stats[stat_set] = {}
+            self.it[stat_set] = -1
+
+        self.it[stat_set] += 1
+
+        epoch = self.epoch
+        it = self.it[stat_set]
+
+        for stat in self.log_vars:
+
+            if stat not in self.stats[stat_set]:
+                self.stats[stat_set][stat] = AverageMeter()
+
+            if stat == "sec/it":  # compute speed
+                elapsed = time.time() - self._epoch_start
+                time_per_it = float(elapsed) / float(it + 1)
+                val = time_per_it
+            else:
+                if stat in preds:
+                    val = self._gather_value(preds[stat])
+                else:
+                    val = None
+
+            if val is not None:
+                self.stats[stat_set][stat].update(val, epoch=epoch, n=1)
+
+    def print(self, max_it: Optional[int] = None, stat_set: str = "train") -> None:
+        """
+        Print the current values of all stored stats.
+
+        Args:
+            max_it: Maximum iteration number to be displayed.
+                If None, the maximum iteration number is not displayed.
+            stat_set: The set of statistics to be printed.
+        """
+
+        epoch = self.epoch
+        stats = self.stats
+
+        str_out = ""
+
+        it = self.it[stat_set]
+        stat_str = ""
+        stats_print = sorted(stats[stat_set].keys())
+        for stat in stats_print:
+            if stats[stat_set][stat].count == 0:
+                continue
+            stat_str += " {0:.12}: {1:1.3f} |".format(stat, stats[stat_set][stat].avg)
+
+        head_str = f"[{stat_set}] | epoch {epoch} | it {it}"
+        if max_it:
+            head_str += f"/ {max_it}"
+
+        str_out = f"{head_str} | {stat_str}"
+
+        print(str_out)
+
+    def plot_stats(
+        self,
+        viz: Visdom = None,
+        visdom_env: Optional[str] = None,
+        plot_file: Optional[str] = None,
+    ) -> None:
+        """
+        Plot the line charts of the history of the stats.
+
+        Args:
+            viz: The Visdom object holding the connection to a Visdom server.
+            visdom_env: The visdom environment for storing the graphs.
+            plot_file: The path to a file with training plots.
+        """
+
+        stat_sets = list(self.stats.keys())
+
+        if viz is None:
+            withvisdom = False
+        elif not viz.check_connection():
+            warnings.warn("Cannot connect to the visdom server! Skipping visdom plots.")
+            withvisdom = False
+        else:
+            withvisdom = True
+
+        lines = []
+
+        for stat in self.log_vars:
+            vals = []
+            stat_sets_now = []
+            for stat_set in stat_sets:
+                val = self.stats[stat_set][stat].get_epoch_averages()
+                if val is None:
+                    continue
+                else:
+                    val = np.array(val).reshape(-1)
+                    stat_sets_now.append(stat_set)
+                vals.append(val)
+
+            if len(vals) == 0:
+                continue
+
+            vals = np.stack(vals, axis=1)
+            x = np.arange(vals.shape[0])
+
+            lines.append((stat_sets_now, stat, x, vals))
+
+        if withvisdom:
+            for tmodes, stat, x, vals in lines:
+                title = "%s" % stat
+                opts = {"title": title, "legend": list(tmodes)}
+                for i, (tmode, val) in enumerate(zip(tmodes, vals.T)):
+                    update = "append" if i > 0 else None
+                    valid = np.where(np.isfinite(val))
+                    if len(valid) == 0:
+                        continue
+                    viz.line(
+                        Y=val[valid],
+                        X=x[valid],
+                        env=visdom_env,
+                        opts=opts,
+                        win=f"stat_plot_{title}",
+                        name=tmode,
+                        update=update,
+                    )
+
+        if plot_file is None:
+            plot_file = self.plot_file
+
+        if plot_file is not None:
+            print("Exporting stats to %s" % plot_file)
+            ncol = 3
+            nrow = int(np.ceil(float(len(lines)) / ncol))
+            matplotlib.rcParams.update({"font.size": 5})
+            color = cycle(plt.cm.tab10(np.linspace(0, 1, 10)))
+            fig = plt.figure(1)
+            plt.clf()
+            for idx, (tmodes, stat, x, vals) in enumerate(lines):
+                c = next(color)
+                plt.subplot(nrow, ncol, idx + 1)
+                for vali, vals_ in enumerate(vals.T):
+                    c_ = c * (1.0 - float(vali) * 0.3)
+                    valid = np.where(np.isfinite(vals_))
+                    if len(valid) == 0:
+                        continue
+                    plt.plot(x[valid], vals_[valid], c=c_, linewidth=1)
+                plt.ylabel(stat)
+                plt.xlabel("epoch")
+                plt.gca().yaxis.label.set_color(c[0:3] * 0.75)
+                plt.legend(tmodes)
+                gcolor = np.array(mcolors.to_rgba("lightgray"))
+                plt.grid(
+                    b=True, which="major", color=gcolor, linestyle="-", linewidth=0.4
+                )
+                plt.grid(
+                    b=True, which="minor", color=gcolor, linestyle="--", linewidth=0.2
+                )
+                plt.minorticks_on()
+
+            plt.tight_layout()
+            plt.show()
+            fig.savefig(plot_file)
diff --git a/pytorch3d/projects/nerf/nerf/utils.py b/pytorch3d/projects/nerf/nerf/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..dbe5e91285a3f70ab62848a7f8927f425feb3513
--- /dev/null
+++ b/pytorch3d/projects/nerf/nerf/utils.py
@@ -0,0 +1,59 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+
+
+def calc_mse(x: torch.Tensor, y: torch.Tensor):
+    """
+    Calculates the mean square error between tensors `x` and `y`.
+    """
+    return torch.mean((x - y) ** 2)
+
+
+def calc_psnr(x: torch.Tensor, y: torch.Tensor):
+    """
+    Calculates the Peak-signal-to-noise ratio between tensors `x` and `y`.
+    """
+    mse = calc_mse(x, y)
+    psnr = -10.0 * torch.log10(mse)
+    return psnr
+
+
+def sample_images_at_mc_locs(
+    target_images: torch.Tensor,
+    sampled_rays_xy: torch.Tensor,
+):
+    """
+    Given a set of pixel locations `sampled_rays_xy` this method samples the tensor
+    `target_images` at the respective 2D locations.
+
+    This function is used in order to extract the colors from ground truth images
+    that correspond to the colors rendered using a Monte Carlo rendering.
+
+    Args:
+        target_images: A tensor of shape `(batch_size, ..., 3)`.
+        sampled_rays_xy: A tensor of shape `(batch_size, S_1, ..., S_N, 2)`.
+
+    Returns:
+        images_sampled: A tensor of shape `(batch_size, S_1, ..., S_N, 3)`
+            containing `target_images` sampled at `sampled_rays_xy`.
+    """
+    ba = target_images.shape[0]
+    dim = target_images.shape[-1]
+    spatial_size = sampled_rays_xy.shape[1:-1]
+
+    # The coordinate grid convention for grid_sample has both x and y
+    # directions inverted.
+    xy_sample = -sampled_rays_xy.view(ba, -1, 1, 2).clone()
+
+    images_sampled = torch.nn.functional.grid_sample(
+        target_images.permute(0, 3, 1, 2),
+        xy_sample,
+        align_corners=True,
+        mode="bilinear",
+    )
+    return images_sampled.permute(0, 2, 3, 1).view(ba, *spatial_size, dim)
diff --git a/pytorch3d/projects/nerf/test_nerf.py b/pytorch3d/projects/nerf/test_nerf.py
new file mode 100644
index 0000000000000000000000000000000000000000..2d7bafc0b5d9e4e9b1101920fbe6a979faafa300
--- /dev/null
+++ b/pytorch3d/projects/nerf/test_nerf.py
@@ -0,0 +1,172 @@
+#!/usr/bin/env python3
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import os
+import warnings
+
+import hydra
+import numpy as np
+import torch
+from nerf.dataset import get_nerf_datasets, trivial_collate
+from nerf.eval_video_utils import generate_eval_video_cameras
+from nerf.nerf_renderer import RadianceFieldRenderer
+from nerf.stats import Stats
+from omegaconf import DictConfig
+from PIL import Image
+
+
+CONFIG_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), "configs")
+
+
+@hydra.main(config_path=CONFIG_DIR, config_name="lego")
+def main(cfg: DictConfig):
+
+    # Device on which to run.
+    if torch.cuda.is_available():
+        device = "cuda"
+    else:
+        warnings.warn(
+            "Please note that although executing on CPU is supported,"
+            + "the testing is unlikely to finish in reasonable time."
+        )
+        device = "cpu"
+
+    # Initialize the Radiance Field model.
+    model = RadianceFieldRenderer(
+        image_size=cfg.data.image_size,
+        n_pts_per_ray=cfg.raysampler.n_pts_per_ray,
+        n_pts_per_ray_fine=cfg.raysampler.n_pts_per_ray,
+        n_rays_per_image=cfg.raysampler.n_rays_per_image,
+        min_depth=cfg.raysampler.min_depth,
+        max_depth=cfg.raysampler.max_depth,
+        stratified=cfg.raysampler.stratified,
+        stratified_test=cfg.raysampler.stratified_test,
+        chunk_size_test=cfg.raysampler.chunk_size_test,
+        n_harmonic_functions_xyz=cfg.implicit_function.n_harmonic_functions_xyz,
+        n_harmonic_functions_dir=cfg.implicit_function.n_harmonic_functions_dir,
+        n_hidden_neurons_xyz=cfg.implicit_function.n_hidden_neurons_xyz,
+        n_hidden_neurons_dir=cfg.implicit_function.n_hidden_neurons_dir,
+        n_layers_xyz=cfg.implicit_function.n_layers_xyz,
+        density_noise_std=cfg.implicit_function.density_noise_std,
+    )
+
+    # Move the model to the relevant device.
+    model.to(device)
+
+    # Resume from the checkpoint.
+    checkpoint_path = os.path.join(hydra.utils.get_original_cwd(), cfg.checkpoint_path)
+    if not os.path.isfile(checkpoint_path):
+        raise ValueError(f"Model checkpoint {checkpoint_path} does not exist!")
+
+    print(f"Loading checkpoint {checkpoint_path}.")
+    loaded_data = torch.load(checkpoint_path)
+    # Do not load the cached xy grid.
+    # - this allows setting an arbitrary evaluation image size.
+    state_dict = {
+        k: v
+        for k, v in loaded_data["model"].items()
+        if "_grid_raysampler._xy_grid" not in k
+    }
+    model.load_state_dict(state_dict, strict=False)
+
+    # Load the test data.
+    if cfg.test.mode == "evaluation":
+        _, _, test_dataset = get_nerf_datasets(
+            dataset_name=cfg.data.dataset_name,
+            image_size=cfg.data.image_size,
+        )
+    elif cfg.test.mode == "export_video":
+        train_dataset, _, _ = get_nerf_datasets(
+            dataset_name=cfg.data.dataset_name,
+            image_size=cfg.data.image_size,
+        )
+        test_dataset = generate_eval_video_cameras(
+            train_dataset,
+            trajectory_type=cfg.test.trajectory_type,
+            up=cfg.test.up,
+            scene_center=cfg.test.scene_center,
+            n_eval_cams=cfg.test.n_frames,
+            trajectory_scale=cfg.test.trajectory_scale,
+        )
+        # store the video in directory (checkpoint_file - extension + '_video')
+        export_dir = os.path.splitext(checkpoint_path)[0] + "_video"
+        os.makedirs(export_dir, exist_ok=True)
+    else:
+        raise ValueError(f"Unknown test mode {cfg.test_mode}.")
+
+    # Init the test dataloader.
+    test_dataloader = torch.utils.data.DataLoader(
+        test_dataset,
+        batch_size=1,
+        shuffle=False,
+        num_workers=0,
+        collate_fn=trivial_collate,
+    )
+
+    if cfg.test.mode == "evaluation":
+        # Init the test stats object.
+        eval_stats = ["mse_coarse", "mse_fine", "psnr_coarse", "psnr_fine", "sec/it"]
+        stats = Stats(eval_stats)
+        stats.new_epoch()
+    elif cfg.test.mode == "export_video":
+        # Init the frame buffer.
+        frame_paths = []
+
+    # Set the model to the eval mode.
+    model.eval()
+
+    # Run the main testing loop.
+    for batch_idx, test_batch in enumerate(test_dataloader):
+        test_image, test_camera, camera_idx = test_batch[0].values()
+        if test_image is not None:
+            test_image = test_image.to(device)
+        test_camera = test_camera.to(device)
+
+        # Activate eval mode of the model (lets us do a full rendering pass).
+        model.eval()
+        with torch.no_grad():
+            test_nerf_out, test_metrics = model(
+                None,  # we do not use pre-cached cameras
+                test_camera,
+                test_image,
+            )
+
+        if cfg.test.mode == "evaluation":
+            # Update stats with the validation metrics.
+            stats.update(test_metrics, stat_set="test")
+            stats.print(stat_set="test")
+
+        elif cfg.test.mode == "export_video":
+            # Store the video frame.
+            frame = test_nerf_out["rgb_fine"][0].detach().cpu()
+            frame_path = os.path.join(export_dir, f"frame_{batch_idx:05d}.png")
+            print(f"Writing {frame_path}.")
+            Image.fromarray((frame.numpy() * 255.0).astype(np.uint8)).save(frame_path)
+            frame_paths.append(frame_path)
+
+    if cfg.test.mode == "evaluation":
+        print(f"Final evaluation metrics on '{cfg.data.dataset_name}':")
+        for stat in eval_stats:
+            stat_value = stats.stats["test"][stat].get_epoch_averages()[0]
+            print(f"{stat:15s}: {stat_value:1.4f}")
+
+    elif cfg.test.mode == "export_video":
+        # Convert the exported frames to a video.
+        video_path = os.path.join(export_dir, "video.mp4")
+        ffmpeg_bin = "ffmpeg"
+        frame_regexp = os.path.join(export_dir, "frame_%05d.png")
+        ffmcmd = (
+            "%s -r %d -i %s -vcodec h264 -f mp4 -y -b 2000k -pix_fmt yuv420p %s"
+            % (ffmpeg_bin, cfg.test.fps, frame_regexp, video_path)
+        )
+        ret = os.system(ffmcmd)
+        if ret != 0:
+            raise RuntimeError("ffmpeg failed!")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/pytorch3d/projects/nerf/tests/__init__.py b/pytorch3d/projects/nerf/tests/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..2e41cd717f6a439a9c08d76a9d0e4a54e190fc5a
--- /dev/null
+++ b/pytorch3d/projects/nerf/tests/__init__.py
@@ -0,0 +1,5 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
diff --git a/pytorch3d/projects/nerf/tests/test_raymarcher.py b/pytorch3d/projects/nerf/tests/test_raymarcher.py
new file mode 100644
index 0000000000000000000000000000000000000000..e9cfb74c4f7d6262100c1c9fdcf520fc369c98b6
--- /dev/null
+++ b/pytorch3d/projects/nerf/tests/test_raymarcher.py
@@ -0,0 +1,38 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+
+import torch
+from nerf.raymarcher import EmissionAbsorptionNeRFRaymarcher
+from pytorch3d.renderer import EmissionAbsorptionRaymarcher
+
+
+class TestRaymarcher(unittest.TestCase):
+    def setUp(self) -> None:
+        torch.manual_seed(42)
+
+    def test_raymarcher(self):
+        """
+        Checks that the nerf raymarcher outputs are identical to the
+        EmissionAbsorptionRaymarcher.
+        """
+
+        feat_dim = 3
+        rays_densities = torch.rand(100, 10, 1)
+        rays_features = torch.randn(100, 10, feat_dim)
+
+        out, out_nerf = [
+            raymarcher(rays_densities, rays_features)
+            for raymarcher in (
+                EmissionAbsorptionRaymarcher(),
+                EmissionAbsorptionNeRFRaymarcher(),
+            )
+        ]
+
+        self.assertTrue(
+            torch.allclose(out[..., :feat_dim], out_nerf[0][..., :feat_dim])
+        )
diff --git a/pytorch3d/projects/nerf/tests/test_raysampler.py b/pytorch3d/projects/nerf/tests/test_raysampler.py
new file mode 100644
index 0000000000000000000000000000000000000000..ba53713a819a9c255edec3f606acf106b94a7145
--- /dev/null
+++ b/pytorch3d/projects/nerf/tests/test_raysampler.py
@@ -0,0 +1,126 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+
+import torch
+from nerf.raysampler import NeRFRaysampler, ProbabilisticRaysampler
+from pytorch3d.renderer import PerspectiveCameras
+from pytorch3d.transforms.rotation_conversions import random_rotations
+
+
+class TestRaysampler(unittest.TestCase):
+    def setUp(self) -> None:
+        torch.manual_seed(42)
+
+    def test_raysampler_caching(self, batch_size=10):
+        """
+        Tests the consistency of the NeRF raysampler caching.
+        """
+
+        raysampler = NeRFRaysampler(
+            min_x=0.0,
+            max_x=10.0,
+            min_y=0.0,
+            max_y=10.0,
+            n_pts_per_ray=10,
+            min_depth=0.1,
+            max_depth=10.0,
+            n_rays_per_image=12,
+            image_width=10,
+            image_height=10,
+            stratified=False,
+            stratified_test=False,
+            invert_directions=True,
+        )
+
+        raysampler.eval()
+
+        cameras, rays = [], []
+
+        for _ in range(batch_size):
+
+            R = random_rotations(1)
+            T = torch.randn(1, 3)
+            focal_length = torch.rand(1, 2) + 0.5
+            principal_point = torch.randn(1, 2)
+
+            camera = PerspectiveCameras(
+                focal_length=focal_length,
+                principal_point=principal_point,
+                R=R,
+                T=T,
+            )
+
+            cameras.append(camera)
+            rays.append(raysampler(camera))
+
+        raysampler.precache_rays(cameras, list(range(batch_size)))
+
+        for cam_index, rays_ in enumerate(rays):
+            rays_cached_ = raysampler(
+                cameras=cameras[cam_index],
+                chunksize=None,
+                chunk_idx=0,
+                camera_hash=cam_index,
+                caching=False,
+            )
+
+            for v, v_cached in zip(rays_, rays_cached_):
+                self.assertTrue(torch.allclose(v, v_cached))
+
+    def test_probabilistic_raysampler(self, batch_size=1, n_pts_per_ray=60):
+        """
+        Check that the probabilistic ray sampler does not crash for various
+        settings.
+        """
+
+        raysampler_grid = NeRFRaysampler(
+            min_x=0.0,
+            max_x=10.0,
+            min_y=0.0,
+            max_y=10.0,
+            n_pts_per_ray=n_pts_per_ray,
+            min_depth=1.0,
+            max_depth=10.0,
+            n_rays_per_image=12,
+            image_width=10,
+            image_height=10,
+            stratified=False,
+            stratified_test=False,
+            invert_directions=True,
+        )
+
+        R = random_rotations(batch_size)
+        T = torch.randn(batch_size, 3)
+        focal_length = torch.rand(batch_size, 2) + 0.5
+        principal_point = torch.randn(batch_size, 2)
+        camera = PerspectiveCameras(
+            focal_length=focal_length,
+            principal_point=principal_point,
+            R=R,
+            T=T,
+        )
+
+        raysampler_grid.eval()
+
+        ray_bundle = raysampler_grid(cameras=camera)
+
+        ray_weights = torch.rand_like(ray_bundle.lengths)
+
+        # Just check that we dont crash for all possible settings.
+        for stratified_test in (True, False):
+            for stratified in (True, False):
+                raysampler_prob = ProbabilisticRaysampler(
+                    n_pts_per_ray=n_pts_per_ray,
+                    stratified=stratified,
+                    stratified_test=stratified_test,
+                    add_input_samples=True,
+                )
+                for mode in ("train", "eval"):
+                    getattr(raysampler_prob, mode)()
+                    for _ in range(10):
+                        raysampler_prob(ray_bundle, ray_weights)
diff --git a/pytorch3d/projects/nerf/train_nerf.py b/pytorch3d/projects/nerf/train_nerf.py
new file mode 100644
index 0000000000000000000000000000000000000000..6b079bb3650d4c4569a55cdd3f6ada6501660ee9
--- /dev/null
+++ b/pytorch3d/projects/nerf/train_nerf.py
@@ -0,0 +1,273 @@
+#!/usr/bin/env python3
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import collections
+import os
+import pickle
+import warnings
+
+import hydra
+import numpy as np
+import torch
+from nerf.dataset import get_nerf_datasets, trivial_collate
+from nerf.nerf_renderer import RadianceFieldRenderer, visualize_nerf_outputs
+from nerf.stats import Stats
+from omegaconf import DictConfig
+from visdom import Visdom
+
+
+CONFIG_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), "configs")
+
+
+@hydra.main(config_path=CONFIG_DIR, config_name="lego")
+def main(cfg: DictConfig):
+
+    # Set the relevant seeds for reproducibility.
+    np.random.seed(cfg.seed)
+    torch.manual_seed(cfg.seed)
+
+    # Device on which to run.
+    if torch.cuda.is_available():
+        device = "cuda"
+    else:
+        warnings.warn(
+            "Please note that although executing on CPU is supported,"
+            + "the training is unlikely to finish in reasonable time."
+        )
+        device = "cpu"
+
+    # Initialize the Radiance Field model.
+    model = RadianceFieldRenderer(
+        image_size=cfg.data.image_size,
+        n_pts_per_ray=cfg.raysampler.n_pts_per_ray,
+        n_pts_per_ray_fine=cfg.raysampler.n_pts_per_ray,
+        n_rays_per_image=cfg.raysampler.n_rays_per_image,
+        min_depth=cfg.raysampler.min_depth,
+        max_depth=cfg.raysampler.max_depth,
+        stratified=cfg.raysampler.stratified,
+        stratified_test=cfg.raysampler.stratified_test,
+        chunk_size_test=cfg.raysampler.chunk_size_test,
+        n_harmonic_functions_xyz=cfg.implicit_function.n_harmonic_functions_xyz,
+        n_harmonic_functions_dir=cfg.implicit_function.n_harmonic_functions_dir,
+        n_hidden_neurons_xyz=cfg.implicit_function.n_hidden_neurons_xyz,
+        n_hidden_neurons_dir=cfg.implicit_function.n_hidden_neurons_dir,
+        n_layers_xyz=cfg.implicit_function.n_layers_xyz,
+        density_noise_std=cfg.implicit_function.density_noise_std,
+        visualization=cfg.visualization.visdom,
+    )
+
+    # Move the model to the relevant device.
+    model.to(device)
+
+    # Init stats to None before loading.
+    stats = None
+    optimizer_state_dict = None
+    start_epoch = 0
+
+    checkpoint_path = os.path.join(hydra.utils.get_original_cwd(), cfg.checkpoint_path)
+    if len(cfg.checkpoint_path) > 0:
+        # Make the root of the experiment directory.
+        checkpoint_dir = os.path.split(checkpoint_path)[0]
+        os.makedirs(checkpoint_dir, exist_ok=True)
+
+        # Resume training if requested.
+        if cfg.resume and os.path.isfile(checkpoint_path):
+            print(f"Resuming from checkpoint {checkpoint_path}.")
+            loaded_data = torch.load(checkpoint_path)
+            model.load_state_dict(loaded_data["model"])
+            stats = pickle.loads(loaded_data["stats"])
+            print(f"   => resuming from epoch {stats.epoch}.")
+            optimizer_state_dict = loaded_data["optimizer"]
+            start_epoch = stats.epoch
+
+    # Initialize the optimizer.
+    optimizer = torch.optim.Adam(
+        model.parameters(),
+        lr=cfg.optimizer.lr,
+    )
+
+    # Load the optimizer state dict in case we are resuming.
+    if optimizer_state_dict is not None:
+        optimizer.load_state_dict(optimizer_state_dict)
+        optimizer.last_epoch = start_epoch
+
+    # Init the stats object.
+    if stats is None:
+        stats = Stats(
+            ["loss", "mse_coarse", "mse_fine", "psnr_coarse", "psnr_fine", "sec/it"],
+        )
+
+    # Learning rate scheduler setup.
+
+    # Following the original code, we use exponential decay of the
+    # learning rate: current_lr = base_lr * gamma ** (epoch / step_size)
+    def lr_lambda(epoch):
+        return cfg.optimizer.lr_scheduler_gamma ** (
+            epoch / cfg.optimizer.lr_scheduler_step_size
+        )
+
+    # The learning rate scheduling is implemented with LambdaLR PyTorch scheduler.
+    lr_scheduler = torch.optim.lr_scheduler.LambdaLR(
+        optimizer, lr_lambda, last_epoch=start_epoch - 1, verbose=False
+    )
+
+    # Initialize the cache for storing variables needed for visualization.
+    visuals_cache = collections.deque(maxlen=cfg.visualization.history_size)
+
+    # Init the visualization visdom env.
+    if cfg.visualization.visdom:
+        viz = Visdom(
+            server=cfg.visualization.visdom_server,
+            port=cfg.visualization.visdom_port,
+            use_incoming_socket=False,
+        )
+    else:
+        viz = None
+
+    # Load the training/validation data.
+    train_dataset, val_dataset, _ = get_nerf_datasets(
+        dataset_name=cfg.data.dataset_name,
+        image_size=cfg.data.image_size,
+    )
+
+    if cfg.data.precache_rays:
+        # Precache the projection rays.
+        model.eval()
+        with torch.no_grad():
+            for dataset in (train_dataset, val_dataset):
+                cache_cameras = [e["camera"].to(device) for e in dataset]
+                cache_camera_hashes = [e["camera_idx"] for e in dataset]
+                model.precache_rays(cache_cameras, cache_camera_hashes)
+
+    train_dataloader = torch.utils.data.DataLoader(
+        train_dataset,
+        batch_size=1,
+        shuffle=True,
+        num_workers=0,
+        collate_fn=trivial_collate,
+    )
+
+    # The validation dataloader is just an endless stream of random samples.
+    val_dataloader = torch.utils.data.DataLoader(
+        val_dataset,
+        batch_size=1,
+        num_workers=0,
+        collate_fn=trivial_collate,
+        sampler=torch.utils.data.RandomSampler(
+            val_dataset,
+            replacement=True,
+            num_samples=cfg.optimizer.max_epochs,
+        ),
+    )
+
+    # Set the model to the training mode.
+    model.train()
+
+    # Run the main training loop.
+    for epoch in range(start_epoch, cfg.optimizer.max_epochs):
+        stats.new_epoch()  # Init a new epoch.
+        for iteration, batch in enumerate(train_dataloader):
+            image, camera, camera_idx = batch[0].values()
+            image = image.to(device)
+            camera = camera.to(device)
+
+            optimizer.zero_grad()
+
+            # Run the forward pass of the model.
+            nerf_out, metrics = model(
+                camera_idx if cfg.data.precache_rays else None,
+                camera,
+                image,
+            )
+
+            # The loss is a sum of coarse and fine MSEs
+            loss = metrics["mse_coarse"] + metrics["mse_fine"]
+
+            # Take the training step.
+            loss.backward()
+            optimizer.step()
+
+            # Update stats with the current metrics.
+            stats.update(
+                {"loss": float(loss), **metrics},
+                stat_set="train",
+            )
+
+            if iteration % cfg.stats_print_interval == 0:
+                stats.print(stat_set="train")
+
+            # Update the visualization cache.
+            if viz is not None:
+                visuals_cache.append(
+                    {
+                        "camera": camera.cpu(),
+                        "camera_idx": camera_idx,
+                        "image": image.cpu().detach(),
+                        "rgb_fine": nerf_out["rgb_fine"].cpu().detach(),
+                        "rgb_coarse": nerf_out["rgb_coarse"].cpu().detach(),
+                        "rgb_gt": nerf_out["rgb_gt"].cpu().detach(),
+                        "coarse_ray_bundle": nerf_out["coarse_ray_bundle"],
+                    }
+                )
+
+        # Adjust the learning rate.
+        lr_scheduler.step()
+
+        # Validation
+        if epoch % cfg.validation_epoch_interval == 0 and epoch > 0:
+
+            # Sample a validation camera/image.
+            val_batch = next(val_dataloader.__iter__())
+            val_image, val_camera, camera_idx = val_batch[0].values()
+            val_image = val_image.to(device)
+            val_camera = val_camera.to(device)
+
+            # Activate eval mode of the model (lets us do a full rendering pass).
+            model.eval()
+            with torch.no_grad():
+                val_nerf_out, val_metrics = model(
+                    camera_idx if cfg.data.precache_rays else None,
+                    val_camera,
+                    val_image,
+                )
+
+            # Update stats with the validation metrics.
+            stats.update(val_metrics, stat_set="val")
+            stats.print(stat_set="val")
+
+            if viz is not None:
+                # Plot that loss curves into visdom.
+                stats.plot_stats(
+                    viz=viz,
+                    visdom_env=cfg.visualization.visdom_env,
+                    plot_file=None,
+                )
+                # Visualize the intermediate results.
+                visualize_nerf_outputs(
+                    val_nerf_out, visuals_cache, viz, cfg.visualization.visdom_env
+                )
+
+            # Set the model back to train mode.
+            model.train()
+
+        # Checkpoint.
+        if (
+            epoch % cfg.checkpoint_epoch_interval == 0
+            and len(cfg.checkpoint_path) > 0
+            and epoch > 0
+        ):
+            print(f"Storing checkpoint {checkpoint_path}.")
+            data_to_store = {
+                "model": model.state_dict(),
+                "optimizer": optimizer.state_dict(),
+                "stats": pickle.dumps(stats),
+            }
+            torch.save(data_to_store, checkpoint_path)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/pytorch3d/pytorch3d/__init__.py b/pytorch3d/pytorch3d/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..0ba85c540ae192c8d0d5e14c3d3d39344c33decc
--- /dev/null
+++ b/pytorch3d/pytorch3d/__init__.py
@@ -0,0 +1,7 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+__version__ = "0.7.5"
diff --git a/pytorch3d/pytorch3d/common/__init__.py b/pytorch3d/pytorch3d/common/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..8f5d84e961ca7f04618ee01cb3fa2fa658c5bd97
--- /dev/null
+++ b/pytorch3d/pytorch3d/common/__init__.py
@@ -0,0 +1,10 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from .datatypes import Device, get_device, make_device
+
+
+__all__ = [k for k in globals().keys() if not k.startswith("_")]
diff --git a/pytorch3d/pytorch3d/common/compat.py b/pytorch3d/pytorch3d/common/compat.py
new file mode 100644
index 0000000000000000000000000000000000000000..5c155f12f4157e2d74da642a61ec5a4f180d3357
--- /dev/null
+++ b/pytorch3d/pytorch3d/common/compat.py
@@ -0,0 +1,43 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Sequence, Tuple, Union
+
+import torch
+
+
+"""
+Some functions which depend on PyTorch or Python versions.
+"""
+
+
+def meshgrid_ij(
+    *A: Union[torch.Tensor, Sequence[torch.Tensor]]
+) -> Tuple[torch.Tensor, ...]:  # pragma: no cover
+    """
+    Like torch.meshgrid was before PyTorch 1.10.0, i.e. with indexing set to ij
+    """
+    if (
+        # pyre-fixme[16]: Callable `meshgrid` has no attribute `__kwdefaults__`.
+        torch.meshgrid.__kwdefaults__ is not None
+        and "indexing" in torch.meshgrid.__kwdefaults__
+    ):
+        # PyTorch >= 1.10.0
+        # pyre-fixme[6]: For 1st param expected `Union[List[Tensor], Tensor]` but
+        #  got `Union[Sequence[Tensor], Tensor]`.
+        return torch.meshgrid(*A, indexing="ij")
+    # pyre-fixme[6]: For 1st param expected `Union[List[Tensor], Tensor]` but got
+    #  `Union[Sequence[Tensor], Tensor]`.
+    return torch.meshgrid(*A)
+
+
+def prod(iterable, *, start=1):
+    """
+    Like math.prod in Python 3.8 and later.
+    """
+    for i in iterable:
+        start *= i
+    return start
diff --git a/pytorch3d/pytorch3d/common/datatypes.py b/pytorch3d/pytorch3d/common/datatypes.py
new file mode 100644
index 0000000000000000000000000000000000000000..03fe3efc54dd81044ee579ee0aba8641eaa6b834
--- /dev/null
+++ b/pytorch3d/pytorch3d/common/datatypes.py
@@ -0,0 +1,58 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Optional, Union
+
+import torch
+
+
+Device = Union[str, torch.device]
+
+
+def make_device(device: Device) -> torch.device:
+    """
+    Makes an actual torch.device object from the device specified as
+    either a string or torch.device object. If the device is `cuda` without
+    a specific index, the index of the current device is assigned.
+
+    Args:
+        device: Device (as str or torch.device)
+
+    Returns:
+        A matching torch.device object
+    """
+    device = torch.device(device) if isinstance(device, str) else device
+    if device.type == "cuda" and device.index is None:
+        # If cuda but with no index, then the current cuda device is indicated.
+        # In that case, we fix to that device
+        device = torch.device(f"cuda:{torch.cuda.current_device()}")
+    return device
+
+
+def get_device(x, device: Optional[Device] = None) -> torch.device:
+    """
+    Gets the device of the specified variable x if it is a tensor, or
+    falls back to a default CPU device otherwise. Allows overriding by
+    providing an explicit device.
+
+    Args:
+        x: a torch.Tensor to get the device from or another type
+        device: Device (as str or torch.device) to fall back to
+
+    Returns:
+        A matching torch.device object
+    """
+
+    # User overrides device
+    if device is not None:
+        return make_device(device)
+
+    # Set device based on input tensor
+    if torch.is_tensor(x):
+        return x.device
+
+    # Default device is cpu
+    return torch.device("cpu")
diff --git a/pytorch3d/pytorch3d/common/linear_with_repeat.py b/pytorch3d/pytorch3d/common/linear_with_repeat.py
new file mode 100644
index 0000000000000000000000000000000000000000..2dd477be3f1045386bdfc5d588101b8c7be7ab31
--- /dev/null
+++ b/pytorch3d/pytorch3d/common/linear_with_repeat.py
@@ -0,0 +1,93 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import math
+from typing import Tuple
+
+import torch
+import torch.nn.functional as F
+from torch.nn import init, Parameter
+
+
+class LinearWithRepeat(torch.nn.Module):
+    """
+    if x has shape (..., k, n1)
+    and y has shape (..., n2)
+    then
+    LinearWithRepeat(n1 + n2, out_features).forward((x,y))
+    is equivalent to
+    Linear(n1 + n2, out_features).forward(
+        torch.cat([x, y.unsqueeze(-2).expand(..., k, n2)], dim=-1)
+    )
+
+    Or visually:
+    Given the following, for each ray,
+
+                feature   ->
+
+    ray         xxxxxxxx
+    position    xxxxxxxx
+      |         xxxxxxxx
+      v         xxxxxxxx
+
+
+    and
+                            yyyyyyyy
+
+    where the y's do not depend on the position
+    but only on the ray,
+    we want to evaluate a Linear layer on both
+    types of data at every position.
+
+    It's as if we constructed
+
+                xxxxxxxxyyyyyyyy
+                xxxxxxxxyyyyyyyy
+                xxxxxxxxyyyyyyyy
+                xxxxxxxxyyyyyyyy
+
+    and sent that through the Linear.
+    """
+
+    def __init__(
+        self,
+        in_features: int,
+        out_features: int,
+        bias: bool = True,
+        device=None,
+        dtype=None,
+    ) -> None:
+        """
+        Copied from torch.nn.Linear.
+        """
+        factory_kwargs = {"device": device, "dtype": dtype}
+        super().__init__()
+        self.in_features = in_features
+        self.out_features = out_features
+        self.weight = Parameter(
+            torch.empty((out_features, in_features), **factory_kwargs)
+        )
+        if bias:
+            self.bias = Parameter(torch.empty(out_features, **factory_kwargs))
+        else:
+            self.register_parameter("bias", None)
+        self.reset_parameters()
+
+    def reset_parameters(self) -> None:
+        """
+        Copied from torch.nn.Linear.
+        """
+        init.kaiming_uniform_(self.weight, a=math.sqrt(5))
+        if self.bias is not None:
+            fan_in, _ = init._calculate_fan_in_and_fan_out(self.weight)
+            bound = 1 / math.sqrt(fan_in) if fan_in > 0 else 0
+            init.uniform_(self.bias, -bound, bound)
+
+    def forward(self, input: Tuple[torch.Tensor, torch.Tensor]) -> torch.Tensor:
+        n1 = input[0].shape[-1]
+        output1 = F.linear(input[0], self.weight[:, :n1], self.bias)
+        output2 = F.linear(input[1], self.weight[:, n1:], None)
+        return output1 + output2.unsqueeze(-2)
diff --git a/pytorch3d/pytorch3d/common/workaround/__init__.py b/pytorch3d/pytorch3d/common/workaround/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..64c5d3fab285ba8b52c945a65f0d0a3996a5a581
--- /dev/null
+++ b/pytorch3d/pytorch3d/common/workaround/__init__.py
@@ -0,0 +1,8 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from .symeig3x3 import symeig3x3
+from .utils import _safe_det_3x3
diff --git a/pytorch3d/pytorch3d/common/workaround/symeig3x3.py b/pytorch3d/pytorch3d/common/workaround/symeig3x3.py
new file mode 100644
index 0000000000000000000000000000000000000000..479f8b000a81d8a6b35e389e4db5f0cd7dbc769b
--- /dev/null
+++ b/pytorch3d/pytorch3d/common/workaround/symeig3x3.py
@@ -0,0 +1,317 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import math
+from typing import Optional, Tuple
+
+import torch
+import torch.nn.functional as F
+from torch import nn
+
+
+class _SymEig3x3(nn.Module):
+    """
+    Optimized implementation of eigenvalues and eigenvectors computation for symmetric 3x3
+     matrices.
+
+    Please see https://en.wikipedia.org/wiki/Eigenvalue_algorithm#3.C3.973_matrices
+     and https://www.geometrictools.com/Documentation/RobustEigenSymmetric3x3.pdf
+    """
+
+    def __init__(self, eps: Optional[float] = None) -> None:
+        """
+        Args:
+            eps: epsilon to specify, if None then use torch.float eps
+        """
+        super().__init__()
+
+        self.register_buffer("_identity", torch.eye(3))
+        self.register_buffer("_rotation_2d", torch.tensor([[0.0, -1.0], [1.0, 0.0]]))
+        self.register_buffer(
+            "_rotations_3d", self._create_rotation_matrices(self._rotation_2d)
+        )
+
+        self._eps = eps or torch.finfo(torch.float).eps
+
+    @staticmethod
+    def _create_rotation_matrices(rotation_2d) -> torch.Tensor:
+        """
+        Compute rotations for later use in U V computation
+
+        Args:
+            rotation_2d: a π/2 rotation matrix.
+
+        Returns:
+            a (3, 3, 3) tensor containing 3 rotation matrices around each of the coordinate axes
+            by π/2
+        """
+
+        rotations_3d = torch.zeros((3, 3, 3))
+        rotation_axes = set(range(3))
+        for rotation_axis in rotation_axes:
+            rest = list(rotation_axes - {rotation_axis})
+            rotations_3d[rotation_axis][rest[0], rest] = rotation_2d[0]
+            rotations_3d[rotation_axis][rest[1], rest] = rotation_2d[1]
+
+        return rotations_3d
+
+    def forward(
+        self, inputs: torch.Tensor, eigenvectors: bool = True
+    ) -> Tuple[torch.Tensor, Optional[torch.Tensor]]:
+        """
+        Compute eigenvalues and (optionally) eigenvectors
+
+        Args:
+            inputs: symmetric matrices with shape of (..., 3, 3)
+            eigenvectors: whether should we compute only eigenvalues or eigenvectors as well
+
+        Returns:
+            Either a tuple of (eigenvalues, eigenvectors) or eigenvalues only, depending on
+             given params. Eigenvalues are of shape (..., 3) and eigenvectors (..., 3, 3)
+        """
+        if inputs.shape[-2:] != (3, 3):
+            raise ValueError("Only inputs of shape (..., 3, 3) are supported.")
+
+        inputs_diag = inputs.diagonal(dim1=-2, dim2=-1)
+        inputs_trace = inputs_diag.sum(-1)
+        q = inputs_trace / 3.0
+
+        # Calculate squared sum of elements outside the main diagonal / 2
+        # pyre-fixme[58]: `**` is not supported for operand types `Tensor` and `int`.
+        p1 = ((inputs**2).sum(dim=(-1, -2)) - (inputs_diag**2).sum(-1)) / 2
+        # pyre-fixme[58]: `**` is not supported for operand types `Tensor` and `int`.
+        p2 = ((inputs_diag - q[..., None]) ** 2).sum(dim=-1) + 2.0 * p1.clamp(self._eps)
+
+        p = torch.sqrt(p2 / 6.0)
+        B = (inputs - q[..., None, None] * self._identity) / p[..., None, None]
+
+        r = torch.det(B) / 2.0
+        # Keep r within (-1.0, 1.0) boundaries with a margin to prevent exploding gradients.
+        r = r.clamp(-1.0 + self._eps, 1.0 - self._eps)
+
+        phi = torch.acos(r) / 3.0
+        eig1 = q + 2 * p * torch.cos(phi)
+        eig2 = q + 2 * p * torch.cos(phi + 2 * math.pi / 3)
+        eig3 = 3 * q - eig1 - eig2
+        # eigenvals[..., i] is the i-th eigenvalue of the input, α0 ≤ α1 ≤ α2.
+        eigenvals = torch.stack((eig2, eig3, eig1), dim=-1)
+
+        # Soft dispatch between the degenerate case (diagonal A) and general.
+        # diag_soft_cond -> 1.0 when p1 < 6 * eps and diag_soft_cond -> 0.0 otherwise.
+        # We use 6 * eps to take into account the error accumulated during the p1 summation
+        diag_soft_cond = torch.exp(-((p1 / (6 * self._eps)) ** 2)).detach()[..., None]
+
+        # Eigenvalues are the ordered elements of main diagonal in the degenerate case
+        diag_eigenvals, _ = torch.sort(inputs_diag, dim=-1)
+        eigenvals = diag_soft_cond * diag_eigenvals + (1.0 - diag_soft_cond) * eigenvals
+
+        if eigenvectors:
+            eigenvecs = self._construct_eigenvecs_set(inputs, eigenvals)
+        else:
+            eigenvecs = None
+
+        return eigenvals, eigenvecs
+
+    def _construct_eigenvecs_set(
+        self, inputs: torch.Tensor, eigenvals: torch.Tensor
+    ) -> torch.Tensor:
+        """
+        Construct orthonormal set of eigenvectors by given inputs and pre-computed eigenvalues
+
+        Args:
+            inputs: tensor of symmetric matrices of shape (..., 3, 3)
+            eigenvals: tensor of pre-computed eigenvalues of of shape (..., 3, 3)
+
+        Returns:
+            Tuple of three eigenvector tensors of shape (..., 3, 3), composing an orthonormal
+             set
+        """
+        eigenvecs_tuple_for_01 = self._construct_eigenvecs(
+            inputs, eigenvals[..., 0], eigenvals[..., 1]
+        )
+        eigenvecs_for_01 = torch.stack(eigenvecs_tuple_for_01, dim=-1)
+
+        eigenvecs_tuple_for_21 = self._construct_eigenvecs(
+            inputs, eigenvals[..., 2], eigenvals[..., 1]
+        )
+        eigenvecs_for_21 = torch.stack(eigenvecs_tuple_for_21[::-1], dim=-1)
+
+        # The result will be smooth here even if both parts of comparison
+        # are close, because eigenvecs_01 and eigenvecs_21 would be mostly equal as well
+        eigenvecs_cond = (
+            eigenvals[..., 1] - eigenvals[..., 0]
+            > eigenvals[..., 2] - eigenvals[..., 1]
+        ).detach()
+        eigenvecs = torch.where(
+            eigenvecs_cond[..., None, None], eigenvecs_for_01, eigenvecs_for_21
+        )
+
+        return eigenvecs
+
+    def _construct_eigenvecs(
+        self, inputs: torch.Tensor, alpha0: torch.Tensor, alpha1: torch.Tensor
+    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+        """
+        Construct an orthonormal set of eigenvectors by given pair of eigenvalues.
+
+        Args:
+            inputs: tensor of symmetric matrices of shape (..., 3, 3)
+            alpha0: first eigenvalues of shape (..., 3)
+            alpha1: second eigenvalues of shape (..., 3)
+
+        Returns:
+            Tuple of three eigenvector tensors of shape (..., 3, 3), composing an orthonormal
+             set
+        """
+
+        # Find the eigenvector corresponding to alpha0, its eigenvalue is distinct
+        ev0 = self._get_ev0(inputs - alpha0[..., None, None] * self._identity)
+        u, v = self._get_uv(ev0)
+        ev1 = self._get_ev1(inputs - alpha1[..., None, None] * self._identity, u, v)
+        # Third eigenvector is computed as the cross-product of the other two
+        ev2 = torch.cross(ev0, ev1, dim=-1)
+
+        return ev0, ev1, ev2
+
+    def _get_ev0(self, char_poly: torch.Tensor) -> torch.Tensor:
+        """
+        Construct the first normalized eigenvector given a characteristic polynomial
+
+        Args:
+            char_poly: a characteristic polynomials of the input matrices of shape (..., 3, 3)
+
+        Returns:
+            Tensor of first eigenvectors of shape (..., 3)
+        """
+
+        r01 = torch.cross(char_poly[..., 0, :], char_poly[..., 1, :], dim=-1)
+        r12 = torch.cross(char_poly[..., 1, :], char_poly[..., 2, :], dim=-1)
+        r02 = torch.cross(char_poly[..., 0, :], char_poly[..., 2, :], dim=-1)
+
+        cross_products = torch.stack((r01, r12, r02), dim=-2)
+        # Regularize it with + or -eps depending on the sign of the first vector
+        cross_products += self._eps * self._sign_without_zero(
+            cross_products[..., :1, :]
+        )
+
+        # pyre-fixme[58]: `**` is not supported for operand types `Tensor` and `int`.
+        norms_sq = (cross_products**2).sum(dim=-1)
+        max_norms_index = norms_sq.argmax(dim=-1)
+
+        # Pick only the cross-product with highest squared norm for each input
+        max_cross_products = self._gather_by_index(
+            cross_products, max_norms_index[..., None, None], -2
+        )
+        # Pick corresponding squared norms for each cross-product
+        max_norms_sq = self._gather_by_index(norms_sq, max_norms_index[..., None], -1)
+
+        # Normalize cross-product vectors by thier norms
+        return max_cross_products / torch.sqrt(max_norms_sq[..., None])
+
+    def _gather_by_index(
+        self, source: torch.Tensor, index: torch.Tensor, dim: int
+    ) -> torch.Tensor:
+        """
+        Selects elements from the given source tensor by provided index tensor.
+        Number of dimensions should be the same for source and index tensors.
+
+        Args:
+            source: input tensor to gather from
+            index: index tensor with indices to gather from source
+            dim: dimension to gather across
+
+        Returns:
+            Tensor of shape same as the source with exception of specified dimension.
+        """
+
+        index_shape = list(source.shape)
+        index_shape[dim] = 1
+
+        return source.gather(dim, index.expand(index_shape)).squeeze(dim)
+
+    def _get_uv(self, w: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
+        """
+        Computes unit-length vectors U and V such that {U, V, W} is a right-handed
+        orthonormal set.
+
+        Args:
+            w: eigenvector tensor of shape (..., 3)
+
+        Returns:
+            Tuple of U and V unit-length vector tensors of shape (..., 3)
+        """
+
+        min_idx = w.abs().argmin(dim=-1)
+        rotation_2d = self._rotations_3d[min_idx].to(w)
+
+        u = F.normalize((rotation_2d @ w[..., None])[..., 0], dim=-1)
+        v = torch.cross(w, u, dim=-1)
+        return u, v
+
+    def _get_ev1(
+        self, char_poly: torch.Tensor, u: torch.Tensor, v: torch.Tensor
+    ) -> torch.Tensor:
+        """
+        Computes the second normalized eigenvector given a characteristic polynomial
+        and U and V vectors
+
+        Args:
+            char_poly: a characteristic polynomials of the input matrices of shape (..., 3, 3)
+            u: unit-length vectors from _get_uv method
+            v: unit-length vectors from _get_uv method
+
+        Returns:
+            desc
+        """
+
+        j = torch.stack((u, v), dim=-1)
+        m = j.transpose(-1, -2) @ char_poly @ j
+
+        # If angle between those vectors is acute, take their sum = m[..., 0, :] + m[..., 1, :],
+        # otherwise take the difference = m[..., 0, :] - m[..., 1, :]
+        # m is in theory of rank 1 (or 0), so it snaps only when one of the rows is close to 0
+        is_acute_sign = self._sign_without_zero(
+            (m[..., 0, :] * m[..., 1, :]).sum(dim=-1)
+        ).detach()
+
+        rowspace = m[..., 0, :] + is_acute_sign[..., None] * m[..., 1, :]
+        # rowspace will be near zero for second-order eigenvalues
+        # this regularization guarantees abs(rowspace[0]) >= eps in a smooth'ish way
+        rowspace += self._eps * self._sign_without_zero(rowspace[..., :1])
+
+        return (
+            j
+            @ F.normalize(rowspace @ self._rotation_2d.to(rowspace), dim=-1)[..., None]
+        )[..., 0]
+
+    @staticmethod
+    def _sign_without_zero(tensor):
+        """
+        Args:
+            tensor: an arbitrary shaped tensor
+
+        Returns:
+            Tensor of the same shape as an input, but with 1.0 if tensor > 0.0 and -1.0
+             otherwise
+        """
+        return 2.0 * (tensor > 0.0).to(tensor.dtype) - 1.0
+
+
+def symeig3x3(
+    inputs: torch.Tensor, eigenvectors: bool = True
+) -> Tuple[torch.Tensor, Optional[torch.Tensor]]:
+    """
+    Compute eigenvalues and (optionally) eigenvectors
+
+    Args:
+        inputs: symmetric matrices with shape of (..., 3, 3)
+        eigenvectors: whether should we compute only eigenvalues or eigenvectors as well
+
+    Returns:
+        Either a tuple of (eigenvalues, eigenvectors) or eigenvalues only, depending on
+         given params. Eigenvalues are of shape (..., 3) and eigenvectors (..., 3, 3)
+    """
+    return _SymEig3x3().to(inputs.device)(inputs, eigenvectors=eigenvectors)
diff --git a/pytorch3d/pytorch3d/common/workaround/utils.py b/pytorch3d/pytorch3d/common/workaround/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..6cd694129a154551a986f30e1f3b88c772a44237
--- /dev/null
+++ b/pytorch3d/pytorch3d/common/workaround/utils.py
@@ -0,0 +1,31 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import torch
+
+
+def _safe_det_3x3(t: torch.Tensor):
+    """
+    Fast determinant calculation for a batch of 3x3 matrices.
+
+    Note, result of this function might not be the same as `torch.det()`.
+    The differences might be in the last significant digit.
+
+    Args:
+        t: Tensor of shape (N, 3, 3).
+
+    Returns:
+        Tensor of shape (N) with determinants.
+    """
+
+    det = (
+        t[..., 0, 0] * (t[..., 1, 1] * t[..., 2, 2] - t[..., 1, 2] * t[..., 2, 1])
+        - t[..., 0, 1] * (t[..., 1, 0] * t[..., 2, 2] - t[..., 2, 0] * t[..., 1, 2])
+        + t[..., 0, 2] * (t[..., 1, 0] * t[..., 2, 1] - t[..., 2, 0] * t[..., 1, 1])
+    )
+
+    return det
diff --git a/pytorch3d/pytorch3d/csrc/ball_query/ball_query.cu b/pytorch3d/pytorch3d/csrc/ball_query/ball_query.cu
new file mode 100644
index 0000000000000000000000000000000000000000..586701c18150b2fbd91c7b48989d9b96b1cfd55f
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/ball_query/ball_query.cu
@@ -0,0 +1,129 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <ATen/ATen.h>
+#include <ATen/cuda/CUDAContext.h>
+#include <c10/cuda/CUDAGuard.h>
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+// A chunk of work is blocksize-many points of P1.
+// The number of potential chunks to do is N*(1+(P1-1)/blocksize)
+// call (1+(P1-1)/blocksize) chunks_per_cloud
+// These chunks are divided among the gridSize-many blocks.
+// In block b, we work on chunks b, b+gridSize, b+2*gridSize etc .
+// In chunk i, we work on cloud i/chunks_per_cloud on points starting from
+// blocksize*(i%chunks_per_cloud).
+
+template <typename scalar_t>
+__global__ void BallQueryKernel(
+    const at::PackedTensorAccessor64<scalar_t, 3, at::RestrictPtrTraits> p1,
+    const at::PackedTensorAccessor64<scalar_t, 3, at::RestrictPtrTraits> p2,
+    const at::PackedTensorAccessor64<int64_t, 1, at::RestrictPtrTraits>
+        lengths1,
+    const at::PackedTensorAccessor64<int64_t, 1, at::RestrictPtrTraits>
+        lengths2,
+    at::PackedTensorAccessor64<int64_t, 3, at::RestrictPtrTraits> idxs,
+    at::PackedTensorAccessor64<scalar_t, 3, at::RestrictPtrTraits> dists,
+    const int64_t K,
+    const float radius2) {
+  const int64_t N = p1.size(0);
+  const int64_t chunks_per_cloud = (1 + (p1.size(1) - 1) / blockDim.x);
+  const int64_t chunks_to_do = N * chunks_per_cloud;
+  const int D = p1.size(2);
+
+  for (int64_t chunk = blockIdx.x; chunk < chunks_to_do; chunk += gridDim.x) {
+    const int64_t n = chunk / chunks_per_cloud; // batch_index
+    const int64_t start_point = blockDim.x * (chunk % chunks_per_cloud);
+    int64_t i = start_point + threadIdx.x;
+
+    // Check if point is valid in heterogeneous tensor
+    if (i >= lengths1[n]) {
+      continue;
+    }
+
+    // Iterate over points in p2 until desired count is reached or
+    // all points have been considered
+    for (int64_t j = 0, count = 0; j < lengths2[n] && count < K; ++j) {
+      // Calculate the distance between the points
+      scalar_t dist2 = 0.0;
+      for (int d = 0; d < D; ++d) {
+        scalar_t diff = p1[n][i][d] - p2[n][j][d];
+        dist2 += (diff * diff);
+      }
+
+      if (dist2 < radius2) {
+        // If the point is within the radius
+        // Set the value of the index to the point index
+        idxs[n][i][count] = j;
+        dists[n][i][count] = dist2;
+
+        // increment the number of selected samples for the point i
+        ++count;
+      }
+    }
+  }
+}
+
+std::tuple<at::Tensor, at::Tensor> BallQueryCuda(
+    const at::Tensor& p1, // (N, P1, 3)
+    const at::Tensor& p2, // (N, P2, 3)
+    const at::Tensor& lengths1, // (N,)
+    const at::Tensor& lengths2, // (N,)
+    int K,
+    float radius) {
+  // Check inputs are on the same device
+  at::TensorArg p1_t{p1, "p1", 1}, p2_t{p2, "p2", 2},
+      lengths1_t{lengths1, "lengths1", 3}, lengths2_t{lengths2, "lengths2", 4};
+  at::CheckedFrom c = "BallQueryCuda";
+  at::checkAllSameGPU(c, {p1_t, p2_t, lengths1_t, lengths2_t});
+  at::checkAllSameType(c, {p1_t, p2_t});
+
+  // Set the device for the kernel launch based on the device of p1
+  at::cuda::CUDAGuard device_guard(p1.device());
+  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+
+  TORCH_CHECK(
+      p2.size(2) == p1.size(2), "Point sets must have the same last dimension");
+
+  const int N = p1.size(0);
+  const int P1 = p1.size(1);
+  const int64_t K_64 = K;
+  const float radius2 = radius * radius;
+
+  // Output tensor with indices of neighbors for each point in p1
+  auto long_dtype = lengths1.options().dtype(at::kLong);
+  auto idxs = at::full({N, P1, K}, -1, long_dtype);
+  auto dists = at::zeros({N, P1, K}, p1.options());
+
+  if (idxs.numel() == 0) {
+    AT_CUDA_CHECK(cudaGetLastError());
+    return std::make_tuple(idxs, dists);
+  }
+
+  const size_t blocks = 256;
+  const size_t threads = 256;
+
+  AT_DISPATCH_FLOATING_TYPES(
+      p1.scalar_type(), "ball_query_kernel_cuda", ([&] {
+        BallQueryKernel<<<blocks, threads, 0, stream>>>(
+            p1.packed_accessor64<float, 3, at::RestrictPtrTraits>(),
+            p2.packed_accessor64<float, 3, at::RestrictPtrTraits>(),
+            lengths1.packed_accessor64<int64_t, 1, at::RestrictPtrTraits>(),
+            lengths2.packed_accessor64<int64_t, 1, at::RestrictPtrTraits>(),
+            idxs.packed_accessor64<int64_t, 3, at::RestrictPtrTraits>(),
+            dists.packed_accessor64<float, 3, at::RestrictPtrTraits>(),
+            K_64,
+            radius2);
+      }));
+
+  AT_CUDA_CHECK(cudaGetLastError());
+
+  return std::make_tuple(idxs, dists);
+}
diff --git a/pytorch3d/pytorch3d/csrc/ball_query/ball_query.h b/pytorch3d/pytorch3d/csrc/ball_query/ball_query.h
new file mode 100644
index 0000000000000000000000000000000000000000..059cad8b88b5362304135984827bac8db375a548
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/ball_query/ball_query.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+#include <torch/extension.h>
+#include <tuple>
+#include "utils/pytorch3d_cutils.h"
+
+// Compute indices of K neighbors in pointcloud p2 to points
+// in pointcloud p1 which fall within a specified radius
+//
+// Args:
+//    p1: FloatTensor of shape (N, P1, D) giving a batch of pointclouds each
+//        containing P1 points of dimension D.
+//    p2: FloatTensor of shape (N, P2, D) giving a batch of pointclouds each
+//        containing P2 points of dimension D.
+//    lengths1: LongTensor, shape (N,), giving actual length of each P1 cloud.
+//    lengths2: LongTensor, shape (N,), giving actual length of each P2 cloud.
+//    K: Integer giving the upper bound on the number of samples to take
+//      within the radius
+//    radius: the radius around each point within which the neighbors need to be
+//      located
+//
+// Returns:
+//    p1_neighbor_idx: LongTensor of shape (N, P1, K), where
+//        p1_neighbor_idx[n, i, k] = j means that the kth
+//        neighbor to p1[n, i] in the cloud p2[n] is p2[n, j].
+//        This is padded with -1s both where a cloud in p2 has fewer than
+//        S points and where a cloud in p1 has fewer than P1 points and
+//        also if there are fewer than K points which satisfy the radius
+//        threshold.
+//
+//    p1_neighbor_dists: FloatTensor of shape (N, P1, K) containing the squared
+//        distance from each point p1[n, p, :] to its K neighbors
+//        p2[n, p1_neighbor_idx[n, p, k], :].
+
+// CPU implementation
+std::tuple<at::Tensor, at::Tensor> BallQueryCpu(
+    const at::Tensor& p1,
+    const at::Tensor& p2,
+    const at::Tensor& lengths1,
+    const at::Tensor& lengths2,
+    const int K,
+    const float radius);
+
+// CUDA implementation
+std::tuple<at::Tensor, at::Tensor> BallQueryCuda(
+    const at::Tensor& p1,
+    const at::Tensor& p2,
+    const at::Tensor& lengths1,
+    const at::Tensor& lengths2,
+    const int K,
+    const float radius);
+
+// Implementation which is exposed
+// Note: the backward pass reuses the KNearestNeighborBackward kernel
+inline std::tuple<at::Tensor, at::Tensor> BallQuery(
+    const at::Tensor& p1,
+    const at::Tensor& p2,
+    const at::Tensor& lengths1,
+    const at::Tensor& lengths2,
+    int K,
+    float radius) {
+  if (p1.is_cuda() || p2.is_cuda()) {
+#ifdef WITH_CUDA
+    CHECK_CUDA(p1);
+    CHECK_CUDA(p2);
+    return BallQueryCuda(
+        p1.contiguous(),
+        p2.contiguous(),
+        lengths1.contiguous(),
+        lengths2.contiguous(),
+        K,
+        radius);
+#else
+    AT_ERROR("Not compiled with GPU support.");
+#endif
+  }
+  return BallQueryCpu(
+      p1.contiguous(),
+      p2.contiguous(),
+      lengths1.contiguous(),
+      lengths2.contiguous(),
+      K,
+      radius);
+}
diff --git a/pytorch3d/pytorch3d/csrc/ball_query/ball_query_cpu.cpp b/pytorch3d/pytorch3d/csrc/ball_query/ball_query_cpu.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..a38447175ef7d5eeb60061a1ff1ea363bc24d77c
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/ball_query/ball_query_cpu.cpp
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <torch/extension.h>
+#include <queue>
+#include <tuple>
+
+std::tuple<at::Tensor, at::Tensor> BallQueryCpu(
+    const at::Tensor& p1,
+    const at::Tensor& p2,
+    const at::Tensor& lengths1,
+    const at::Tensor& lengths2,
+    int K,
+    float radius) {
+  const int N = p1.size(0);
+  const int P1 = p1.size(1);
+  const int D = p1.size(2);
+
+  auto long_opts = lengths1.options().dtype(torch::kInt64);
+  torch::Tensor idxs = torch::full({N, P1, K}, -1, long_opts);
+  torch::Tensor dists = torch::full({N, P1, K}, 0, p1.options());
+  const float radius2 = radius * radius;
+
+  auto p1_a = p1.accessor<float, 3>();
+  auto p2_a = p2.accessor<float, 3>();
+  auto lengths1_a = lengths1.accessor<int64_t, 1>();
+  auto lengths2_a = lengths2.accessor<int64_t, 1>();
+  auto idxs_a = idxs.accessor<int64_t, 3>();
+  auto dists_a = dists.accessor<float, 3>();
+
+  for (int n = 0; n < N; ++n) {
+    const int64_t length1 = lengths1_a[n];
+    const int64_t length2 = lengths2_a[n];
+    for (int64_t i = 0; i < length1; ++i) {
+      for (int64_t j = 0, count = 0; j < length2 && count < K; ++j) {
+        float dist2 = 0;
+        for (int d = 0; d < D; ++d) {
+          float diff = p1_a[n][i][d] - p2_a[n][j][d];
+          dist2 += diff * diff;
+        }
+        if (dist2 < radius2) {
+          dists_a[n][i][count] = dist2;
+          idxs_a[n][i][count] = j;
+          ++count;
+        }
+      }
+    }
+  }
+  return std::make_tuple(idxs, dists);
+}
diff --git a/pytorch3d/pytorch3d/csrc/blending/sigmoid_alpha_blend.cu b/pytorch3d/pytorch3d/csrc/blending/sigmoid_alpha_blend.cu
new file mode 100644
index 0000000000000000000000000000000000000000..76912c441b155e03e2470144835850cd567cb060
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/blending/sigmoid_alpha_blend.cu
@@ -0,0 +1,216 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <ATen/ATen.h>
+#include <ATen/cuda/CUDAContext.h>
+#include <c10/cuda/CUDAGuard.h>
+#include <cmath>
+#include <vector>
+
+template <typename scalar_t>
+__global__ void SigmoidAlphaBlendForwardKernel(
+    // clang-format off
+    const at::PackedTensorAccessor64<scalar_t, 4, at::RestrictPtrTraits> distances, // (N, H, W, K)
+    const at::PackedTensorAccessor64<int64_t, 4, at::RestrictPtrTraits> pix_to_face, // (N, H, W, K)
+    at::PackedTensorAccessor64<scalar_t, 3, at::RestrictPtrTraits> alphas, // (N, H, W)
+    // clang-format on
+    const scalar_t sigma,
+    const int N,
+    const int H,
+    const int W,
+    const int K) {
+  // Parallelize over each pixel in images of
+  // size H * W, for each image in the batch of size N.
+  const int num_threads = gridDim.x * blockDim.x;
+  const int tid = blockIdx.x * blockDim.x + threadIdx.x;
+
+  // TODO: revisit performance of this kernel with shared memory usage
+
+  for (int t_i = tid; t_i < N * H * W; t_i += num_threads) {
+    // Convert linear index to 3D index
+    const int n = t_i / (H * W); // batch index.
+    const int pix_idx = t_i % (H * W);
+
+    // TODO: fix index calculation for non square images.
+    const int yi = pix_idx / W;
+    const int xi = pix_idx % W;
+    scalar_t alpha = 1.0;
+
+    // Loop over all the faces for this pixel.
+    for (int k = 0; k < K; k++) {
+      // Index into (N, H, W, K) tensors
+      const int f = pix_to_face[n][yi][xi][k];
+      if (f < 0) {
+        // Sentinel value is -1 indicating no face overlaps the pixel.
+        continue;
+      }
+      // The distance is negative if a pixel is inside a face and positive
+      // outside the face. Therefore use -1.0 * the distance to get the
+      // correct sign.
+      scalar_t dist = -1.0 * distances[n][yi][xi][k];
+
+      // Calculate the sigmoid probability.
+      scalar_t prob = 1. / (1. + exp(-dist / sigma));
+
+      // The cumulative product ensures that alpha will be 0.0 if at least 1
+      // face fully covers the pixel as for that face, prob will be 1.0.
+      // This results in a multiplication by 0.0 because of the (1.0 - prob)
+      // term. Therefore the final result of (1.0 - alpha) will be 1.0.
+      alpha *= (1.0 - prob);
+    }
+    alphas[n][yi][xi] = 1.0 - alpha;
+  }
+}
+
+at::Tensor SigmoidAlphaBlendForwardCuda(
+    const at::Tensor& distances, // (N, H, W, K)
+    const at::Tensor& pix_to_face, // (N, H, W, K)
+    const float sigma) {
+  const int N = distances.size(0);
+  const int H = distances.size(1);
+  const int W = distances.size(2);
+  const int K = distances.size(3);
+
+  at::Tensor alphas = at::zeros({N, H, W}, distances.options());
+  const size_t blocks = 1024;
+  const size_t threads = 128;
+
+  // Check inputs are on the same device
+  at::TensorArg distances_t{distances, "distances", 1},
+      pix_to_face_t{pix_to_face, "pix_to_face", 2};
+  at::CheckedFrom c = "SigmoidAlphaBlendForwardCuda";
+  at::checkAllSameGPU(c, {distances_t, pix_to_face_t});
+
+  // Set the device for the kernel launch based on the device of distances
+  at::cuda::CUDAGuard device_guard(distances.device());
+  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+
+  if (distances.numel() == 0) {
+    AT_CUDA_CHECK(cudaGetLastError());
+    return alphas;
+  }
+
+  AT_DISPATCH_FLOATING_TYPES(
+      distances.scalar_type(), "sigmoid_alpha_blend_kernel", ([&] {
+        // clang-format off
+      SigmoidAlphaBlendForwardKernel<scalar_t><<<blocks, threads, 0, stream>>>(
+      distances.packed_accessor64<scalar_t, 4, at::RestrictPtrTraits>(),
+      pix_to_face.packed_accessor64<int64_t, 4, at::RestrictPtrTraits>(),
+      alphas.packed_accessor64<scalar_t, 3, at::RestrictPtrTraits>(),
+      sigma,
+      N,
+      H,
+      W,
+      K);
+        // clang-format on
+      }));
+
+  AT_CUDA_CHECK(cudaGetLastError());
+  return alphas;
+}
+
+template <typename scalar_t>
+__global__ void SigmoidAlphaBlendBackwardKernel(
+    // clang-format off
+    const at::PackedTensorAccessor64<scalar_t, 3, at::RestrictPtrTraits> grad_alphas, // (N, H, W)
+    const at::PackedTensorAccessor64<scalar_t, 3, at::RestrictPtrTraits> alphas, // (N, H, W)
+    const at::PackedTensorAccessor64<scalar_t, 4, at::RestrictPtrTraits> distances, // (N, H, W, K)
+    const at::PackedTensorAccessor64<int64_t, 4, at::RestrictPtrTraits> pix_to_face, // (N, H, W, K)
+    at::PackedTensorAccessor64<scalar_t, 4, at::RestrictPtrTraits> grad_distances, // (N, H, W)
+    // clang-format on
+    const scalar_t sigma,
+    const int N,
+    const int H,
+    const int W,
+    const int K) {
+  // Parallelize over each of the top K faces for each pixel in images of
+  // size H * W * K, for each image in the batch of size N.
+
+  // Get block and thread index.
+  const int n = blockIdx.x;
+  const int num_pixels = H * W * K;
+  const int num_threads = gridDim.y * blockDim.x;
+  const int tid = blockIdx.y * blockDim.x + threadIdx.x;
+
+  for (int t_i = tid; t_i < num_pixels; t_i += num_threads) {
+    // Convert linear index to 3D index.
+    int yi = t_i / (W * K);
+    int xi = (t_i % (W * K)) / K;
+    int k = (t_i % (W * K)) % K;
+
+    const scalar_t alpha = 1.0 - alphas[n][yi][xi];
+    const scalar_t grad_alpha = grad_alphas[n][yi][xi];
+    const int f = pix_to_face[n][yi][xi][k];
+
+    // Sentinel value is -1 indicating no face overlaps the pixel.
+    if (f >= 0) {
+      // The distance is negative if a pixel is inside a face and positive
+      // outside the face. Therefore use -1.0 * the distance to get the
+      // correct sign.
+      scalar_t dist = -1.0 * distances[n][yi][xi][k];
+
+      // Calculate the sigmoid probability.
+      scalar_t prob = 1. / (1. + exp(-dist / sigma));
+
+      grad_distances[n][yi][xi][k] = grad_alpha * (-1.0 / sigma) * prob * alpha;
+    }
+  }
+}
+
+at::Tensor SigmoidAlphaBlendBackwardCuda(
+    const at::Tensor& grad_alphas, // (N, H, W)
+    const at::Tensor& alphas, // (N, H, W)
+    const at::Tensor& distances, // (N, H, W, K)
+    const at::Tensor& pix_to_face, // (N, H, W, K)
+    float sigma) {
+  const int N = distances.size(0);
+  const int H = distances.size(1);
+  const int W = distances.size(2);
+  const int K = distances.size(3);
+
+  at::Tensor grad_distances = at::zeros({N, H, W, K}, distances.options());
+
+  const dim3 threads(512);
+  const dim3 blocks(N, 1024 / N + 1);
+
+  at::TensorArg grad_alphas_t{grad_alphas, "grad_alphas", 1},
+      alphas_t{alphas, "alphas", 2}, distances_t{distances, "distances", 3},
+      pix_to_face_t{pix_to_face, "pix_to_face", 4};
+  at::CheckedFrom c = "SigmoidAlphaBlendBackwardCuda";
+  at::checkAllSameGPU(c, {grad_alphas_t, alphas_t, distances_t, pix_to_face_t});
+
+  // Set the device for the kernel launch based on the device of distances
+  at::cuda::CUDAGuard device_guard(alphas.device());
+  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+
+  if (alphas.numel() == 0) {
+    AT_CUDA_CHECK(cudaGetLastError());
+    return grad_alphas;
+  }
+
+  AT_DISPATCH_FLOATING_TYPES(
+      distances.scalar_type(), "sigmoid_alpha_blend_backward_kernel", ([&] {
+        SigmoidAlphaBlendBackwardKernel<
+            scalar_t><<<blocks, threads, 0, stream>>>(
+            // clang-format off
+            grad_alphas.packed_accessor64<scalar_t, 3,at::RestrictPtrTraits>(),
+            alphas.packed_accessor64<scalar_t, 3, at::RestrictPtrTraits>(),
+            distances.packed_accessor64<scalar_t, 4, at::RestrictPtrTraits>(),
+            pix_to_face.packed_accessor64<int64_t, 4, at::RestrictPtrTraits>(),
+            grad_distances.packed_accessor64<scalar_t, 4, at::RestrictPtrTraits>(),
+            // clang-format on
+            sigma,
+            N,
+            H,
+            W,
+            K);
+      }));
+
+  AT_CUDA_CHECK(cudaGetLastError());
+  return grad_distances;
+}
diff --git a/pytorch3d/pytorch3d/csrc/blending/sigmoid_alpha_blend.h b/pytorch3d/pytorch3d/csrc/blending/sigmoid_alpha_blend.h
new file mode 100644
index 0000000000000000000000000000000000000000..d424c769c03c7df8b9bd32d6ac1d52b25befb2de
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/blending/sigmoid_alpha_blend.h
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+#include <torch/extension.h>
+#include <tuple>
+
+// clang-format off
+// Function to blend the top K faces per pixel based on the 2d euclidean distance
+// from the center of the pixel to the face. This method is adapted from [1].
+// The output can be used to set the alpha value in an RGBA image.
+// Args:
+//      pix_to_face: LongTensor of shape (N, H, W, K), indices of faces overlapping
+//          with each pixel, where N is the batch size, H, W are the dimensions of the
+//          image and K is the number of faces rasterized per pixel.
+//      distances: FloatTensor of shape (N, H, W, K), 2d euclidean distance of each pixel
+//          relative to the faces in pix_to_face
+//      sigma: float, parameter which controls the width of the sigmoid for blending
+// Returns:
+//      alphas: FloatTensor of shape (N, H, W), the blended values for each pixel
+//          in the image.
+//
+// [1] Shichen Liu et al, 'Soft Rasterizer: A Differentiable Renderer for
+// Image-based 3D Reasoning'
+// clang-format on
+at::Tensor SigmoidAlphaBlendForwardCpu(
+    const at::Tensor& distances,
+    const at::Tensor& pix_to_face,
+    const float sigma);
+
+#ifdef WITH_CUDA
+at::Tensor SigmoidAlphaBlendForwardCuda(
+    const at::Tensor& distances,
+    const at::Tensor& pix_to_face,
+    const float sigma);
+#endif
+
+// clang-format off
+// Args:
+//      grad_alphas: FloatTensor of shape (N, H, W), upstream gradients for alphas
+//      alphas: FloatTensor of shape (N, H, W), the alpha values from the forward pass
+//      pix_to_face: LongTensor of shape (N, H, W, K), indices of faces overlapping
+//          with each pixel, where N is the batch size, H, W are the dimensions of the
+//          image, and K is the number of faces rasterized per pixel
+//      distances: FloatTensor of shape (N, H, W, K), 2d euclidean distance of each pixel
+//          to the corresponding faces in pix_to_face
+//      sigma: float, parameter which controls the width of the sigmoid for blending
+// Returns:
+//      grad_distances: FloatTensor of shape (N, H, W, K)
+// clang-format on
+at::Tensor SigmoidAlphaBlendBackwardCpu(
+    const at::Tensor& grad_alphas,
+    const at::Tensor& alphas,
+    const at::Tensor& distances,
+    const at::Tensor& pix_to_face,
+    const float sigma);
+
+#ifdef WITH_CUDA
+at::Tensor SigmoidAlphaBlendBackwardCuda(
+    const at::Tensor& grad_alphas,
+    const at::Tensor& alphas,
+    const at::Tensor& distances,
+    const at::Tensor& pix_to_face,
+    const float sigma);
+#endif
+
+// Implementation which is exposed.
+at::Tensor
+SigmoidAlphaBlend(at::Tensor& distances, at::Tensor& pix_to_face, float sigma) {
+  if (distances.is_cuda() && pix_to_face.is_cuda()) {
+#ifdef WITH_CUDA
+    return SigmoidAlphaBlendForwardCuda(distances, pix_to_face, sigma);
+#else
+    AT_ERROR("Not compiled with GPU support.");
+#endif
+  }
+  return SigmoidAlphaBlendForwardCpu(distances, pix_to_face, sigma);
+}
+
+// Implementation which is exposed.
+at::Tensor SigmoidAlphaBlendBackward(
+    const at::Tensor& grad_alphas,
+    const at::Tensor& alphas,
+    const at::Tensor& distances,
+    const at::Tensor& pix_to_face,
+    const float sigma) {
+  if (distances.is_cuda() && pix_to_face.is_cuda() && alphas.is_cuda() &&
+      grad_alphas.is_cuda()) {
+#ifdef WITH_CUDA
+    return SigmoidAlphaBlendBackwardCuda(
+        grad_alphas, alphas, distances, pix_to_face, sigma);
+#else
+    AT_ERROR("Not compiled with GPU support.");
+#endif
+  }
+  return SigmoidAlphaBlendBackwardCpu(
+      grad_alphas, alphas, distances, pix_to_face, sigma);
+}
diff --git a/pytorch3d/pytorch3d/csrc/blending/sigmoid_alpha_blend_cpu.cpp b/pytorch3d/pytorch3d/csrc/blending/sigmoid_alpha_blend_cpu.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..8a19516726f320e206402f7e78a37661603be76b
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/blending/sigmoid_alpha_blend_cpu.cpp
@@ -0,0 +1,129 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <torch/extension.h>
+#include <cmath>
+#include <vector>
+
+at::Tensor SigmoidAlphaBlendForwardCpu(
+    const at::Tensor& distances, // (N, H, W, K)
+    const at::Tensor& pix_to_face, // (N, H, W, K)
+    const float sigma) {
+  const int N = distances.size(0);
+  const int H = distances.size(1);
+  const int W = distances.size(2);
+  const int K = distances.size(3);
+
+  torch::Tensor out = torch::empty({N, H, W}, distances.options());
+
+  auto distances_a = distances.accessor<float, 4>();
+  auto pix_to_face_a = pix_to_face.accessor<int64_t, 4>();
+  auto out_a = out.accessor<float, 3>();
+
+  // Iterate over the images in the batch.
+  for (int n = 0; n < N; ++n) {
+    // Iterate through the horizontal lines of the image from top to bottom.
+    for (int h = 0; h < H; ++h) {
+      // Iterate over the pixels on this horizontal line, left to right.
+      for (int w = 0; w < W; ++w) {
+        float alpha = 1.0;
+
+        // Loop through the top K faces for each pixel.
+        for (int k = 0; k < K; ++k) {
+          const int f = pix_to_face_a[n][h][w][k];
+          if (f < 0) {
+            // Sentinel value is -1 indicating no face overlaps the pixel.
+            continue;
+          }
+          // The distance is negative if a pixel is inside a face and positive
+          // outside the face. Therefore use -1.0 * the distance to get the
+          // correct sign.
+          float dist = -1.0 * distances_a[n][h][w][k];
+
+          // Calculate the sigmoid probability.
+          float prob = 1. / (1. + exp(-dist / sigma));
+
+          // The product ensures that alpha will be 0.0 if at least 1
+          // face fully covers the pixel as for that face, prob will be 1.0.
+          // This results in a multiplication by 0.0 because of the (1.0 - prob)
+          // term. Therefore 1.0 - alpha will be 1.0.
+          alpha *= 1.0 - prob;
+        }
+        out_a[n][h][w] = 1.0 - alpha;
+      }
+    }
+  }
+  return out;
+}
+
+at::Tensor SigmoidAlphaBlendBackwardCpu(
+    const at::Tensor& grad_alphas, // (N, H, W)
+    const at::Tensor& alphas, // (N, H, W)
+    const at::Tensor& distances, // (N, H, W, K)
+    const at::Tensor& pix_to_face, // (N, H, W, K)
+    const float sigma) {
+  const int N = distances.size(0);
+  const int H = distances.size(1);
+  const int W = distances.size(2);
+  const int K = distances.size(3);
+
+  auto distances_a = distances.accessor<float, 4>();
+  auto pix_to_face_a = pix_to_face.accessor<int64_t, 4>();
+  auto alphas_a = alphas.accessor<float, 3>();
+  auto grad_alphas_a = grad_alphas.accessor<float, 3>();
+
+  torch::Tensor grad_distances =
+      torch::zeros({N, H, W, K}, distances.options());
+  auto grad_distances_a = grad_distances.accessor<float, 4>();
+
+  // Iterate over the images in the batch.
+  for (int n = 0; n < N; ++n) {
+    // Iterate through the horizontal lines of the image from top to bottom.
+    for (int h = 0; h < H; ++h) {
+      // Iterate over the pixels on this horizontal line, left to right.
+      for (int w = 0; w < W; ++w) {
+        // Get the alpha value from the forward pass and the
+        // upstream gradient.
+        const float alpha = 1.0 - alphas_a[n][h][w];
+        const float grad_alpha = grad_alphas_a[n][h][w];
+
+        // Loop through the top K faces for each pixel.
+        for (int k = 0; k < K; ++k) {
+          const int f = pix_to_face_a[n][h][w][k];
+          if (f < 0) {
+            // Sentinel value is -1 indicating no face overlaps the pixel
+            continue;
+          }
+          // The distance is negative if a pixel is inside a face and positive
+          // outside the face. Therefore use -1.0 * distance to get the
+          // correct sign.
+          float dist = -1.0 * distances_a[n][h][w][k];
+
+          // Calculate the sigmoid probability.
+          float prob = 1. / (1. + exp(-dist / sigma));
+
+          // clang-format off
+          // We need to take the derivative of alpha w.r.t to the distance.
+          // alpha = 1.0 - (1.0- sigmoid(-x)) * (1.0 - sigmoid(-x2)) * ... * (1.0 - sigmoid(-xn))
+          //
+          // Note that d/dx sigmoid(x) = sigmoid(x) * (1.0 - sigmoid(x))
+          //
+          // This gives:
+          // d_alpha/d_dist = -1.0 * -1.0 * sigmoid(-x)(1. - sigmoid(-x)) * (-1.0/sigma)
+          //        * ((1.0 - sigmoid(-x2) * ... * (1.0 - sigmoid(-xn))
+          //    = (-1.0/sigma) * prob * (1.0 - prob) * alpha/(1.0 - prob)
+          //    = (-1.0/sigma) * prob * alpha
+          // clang-format on
+          grad_distances_a[n][h][w][k] =
+              grad_alpha * (-1.0 / sigma) * prob * alpha;
+        }
+      }
+    }
+  }
+  return grad_distances;
+}
diff --git a/pytorch3d/pytorch3d/csrc/compositing/alpha_composite.cu b/pytorch3d/pytorch3d/csrc/compositing/alpha_composite.cu
new file mode 100644
index 0000000000000000000000000000000000000000..679d8a8231b45678c6ed95a1705e6c72edef454d
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/compositing/alpha_composite.cu
@@ -0,0 +1,235 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <ATen/ATen.h>
+#include <ATen/core/TensorAccessor.h>
+#include <ATen/cuda/CUDAContext.h>
+#include <c10/cuda/CUDAGuard.h>
+
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <stdio.h>
+#include <vector>
+
+__constant__ const float kEpsilon = 1e-9;
+
+// TODO(gkioxari) support all data types once AtomicAdd supports doubles.
+// Currently, support is for floats only.
+__global__ void alphaCompositeCudaForwardKernel(
+    // clang-format off
+    at::PackedTensorAccessor64<float, 4, at::RestrictPtrTraits> result,
+    const at::PackedTensorAccessor64<float, 2, at::RestrictPtrTraits> features,
+    const at::PackedTensorAccessor64<float, 4, at::RestrictPtrTraits> alphas,
+    const at::PackedTensorAccessor64<int64_t, 4, at::RestrictPtrTraits> points_idx) {
+  // clang-format on
+  const int64_t batch_size = result.size(0);
+  const int64_t C = features.size(0);
+  const int64_t H = points_idx.size(2);
+  const int64_t W = points_idx.size(3);
+
+  // Get the batch and index
+  const int batch = blockIdx.x;
+
+  const int num_pixels = C * H * W;
+  const int num_threads = gridDim.y * blockDim.x;
+  const int tid = blockIdx.y * blockDim.x + threadIdx.x;
+
+  // Iterate over each feature in each pixel
+  for (int pid = tid; pid < num_pixels; pid += num_threads) {
+    int ch = pid / (H * W);
+    int j = (pid % (H * W)) / W;
+    int i = (pid % (H * W)) % W;
+
+    // alphacomposite the different values
+    float cum_alpha = 1.;
+    // Iterate through the closest K points for this pixel
+    for (int k = 0; k < points_idx.size(1); ++k) {
+      int n_idx = points_idx[batch][k][j][i];
+
+      // Sentinel value is -1 indicating no point overlaps the pixel
+      if (n_idx < 0) {
+        continue;
+      }
+
+      float alpha = alphas[batch][k][j][i];
+      // TODO(gkioxari) It might be more efficient to have threads write in a
+      // local variable, and move atomicAdd outside of the loop such that
+      // atomicAdd is executed once per thread.
+      atomicAdd(
+          &result[batch][ch][j][i], features[ch][n_idx] * cum_alpha * alpha);
+      cum_alpha = cum_alpha * (1 - alpha);
+    }
+  }
+}
+
+// TODO(gkioxari) support all data types once AtomicAdd supports doubles.
+// Currently, support is for floats only.
+__global__ void alphaCompositeCudaBackwardKernel(
+    // clang-format off
+    at::PackedTensorAccessor64<float, 2, at::RestrictPtrTraits> grad_features,
+    at::PackedTensorAccessor64<float, 4, at::RestrictPtrTraits> grad_alphas,
+    const at::PackedTensorAccessor64<float, 4, at::RestrictPtrTraits> grad_outputs,
+    const at::PackedTensorAccessor64<float, 2, at::RestrictPtrTraits> features,
+    const at::PackedTensorAccessor64<float, 4, at::RestrictPtrTraits> alphas,
+    const at::PackedTensorAccessor64<int64_t, 4, at::RestrictPtrTraits> points_idx) {
+  // clang-format on
+  const int64_t batch_size = points_idx.size(0);
+  const int64_t C = features.size(0);
+  const int64_t H = points_idx.size(2);
+  const int64_t W = points_idx.size(3);
+
+  // Get the batch and index
+  const int batch = blockIdx.x;
+
+  const int num_pixels = C * H * W;
+  const int num_threads = gridDim.y * blockDim.x;
+  const int tid = blockIdx.y * blockDim.x + threadIdx.x;
+
+  // Parallelize over each feature in each pixel in images of size H * W,
+  // for each image in the batch of size batch_size
+  for (int pid = tid; pid < num_pixels; pid += num_threads) {
+    int ch = pid / (H * W);
+    int j = (pid % (H * W)) / W;
+    int i = (pid % (H * W)) % W;
+
+    // alphacomposite the different values
+    float cum_alpha = 1.;
+    // Iterate through the closest K points for this pixel
+    for (int k = 0; k < points_idx.size(1); ++k) {
+      int n_idx = points_idx[batch][k][j][i];
+
+      // Sentinel value is -1 indicating no point overlaps the pixel
+      if (n_idx < 0) {
+        continue;
+      }
+      float alpha = alphas[batch][k][j][i];
+
+      // TODO(gkioxari) It might be more efficient to have threads write in a
+      // local variable, and move atomicAdd outside of the loop such that
+      // atomicAdd is executed once per thread.
+      atomicAdd(
+          &grad_alphas[batch][k][j][i],
+          cum_alpha * features[ch][n_idx] * grad_outputs[batch][ch][j][i]);
+      atomicAdd(
+          &grad_features[ch][n_idx],
+          cum_alpha * alpha * grad_outputs[batch][ch][j][i]);
+
+      // Iterate over all (K-1) nearest points to update gradient
+      for (int t = 0; t < k; ++t) {
+        int t_idx = points_idx[batch][t][j][i];
+        // Sentinel value is -1, indicating no point overlaps this pixel
+        if (t_idx < 0) {
+          continue;
+        }
+        float alpha_tvalue = alphas[batch][t][j][i];
+        // TODO(gkioxari) It might be more efficient to have threads write in a
+        // local variable, and move atomicAdd outside of the loop such that
+        // atomicAdd is executed once per thread.
+        atomicAdd(
+            &grad_alphas[batch][t][j][i],
+            -grad_outputs[batch][ch][j][i] * features[ch][n_idx] * cum_alpha *
+                alpha / (1 - alpha_tvalue + kEpsilon));
+      }
+
+      cum_alpha = cum_alpha * (1 - alphas[batch][k][j][i]);
+    }
+  }
+}
+
+at::Tensor alphaCompositeCudaForward(
+    const at::Tensor& features,
+    const at::Tensor& alphas,
+    const at::Tensor& points_idx) {
+  // Check inputs are on the same device
+  at::TensorArg features_t{features, "features", 1},
+      alphas_t{alphas, "alphas", 2}, points_idx_t{points_idx, "points_idx", 3};
+  at::CheckedFrom c = "alphaCompositeCudaForward";
+  at::checkAllSameGPU(c, {features_t, alphas_t, points_idx_t});
+  at::checkAllSameType(c, {features_t, alphas_t});
+
+  // Set the device for the kernel launch based on the device of the input
+  at::cuda::CUDAGuard device_guard(features.device());
+  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+
+  const int64_t batch_size = points_idx.size(0);
+  const int64_t C = features.size(0);
+  const int64_t H = points_idx.size(2);
+  const int64_t W = points_idx.size(3);
+
+  auto result = at::zeros({batch_size, C, H, W}, features.options());
+
+  if (result.numel() == 0) {
+    AT_CUDA_CHECK(cudaGetLastError());
+    return result;
+  }
+
+  const dim3 threadsPerBlock(64);
+  const dim3 numBlocks(batch_size, 1024 / batch_size + 1);
+
+  // TODO(gkioxari) add AT_DISPATCH_FLOATING_TYPES once atomicAdd supports
+  // doubles. Currently, support is for floats only.
+  alphaCompositeCudaForwardKernel<<<numBlocks, threadsPerBlock, 0, stream>>>(
+      // clang-format off
+      // As we are using packed accessors here the tensors
+      // do not need to be made contiguous.
+      result.packed_accessor64<float, 4, at::RestrictPtrTraits>(),
+      features.packed_accessor64<float, 2, at::RestrictPtrTraits>(),
+      alphas.packed_accessor64<float, 4, at::RestrictPtrTraits>(),
+      points_idx.packed_accessor64<int64_t, 4, at::RestrictPtrTraits>());
+  // clang-format on
+  AT_CUDA_CHECK(cudaGetLastError());
+  return result;
+}
+
+std::tuple<at::Tensor, at::Tensor> alphaCompositeCudaBackward(
+    const at::Tensor& grad_outputs,
+    const at::Tensor& features,
+    const at::Tensor& alphas,
+    const at::Tensor& points_idx) {
+  // Check inputs are on the same device
+  at::TensorArg grad_outputs_t{grad_outputs, "grad_outputs", 1},
+      features_t{features, "features", 2}, alphas_t{alphas, "alphas", 3},
+      points_idx_t{points_idx, "points_idx", 4};
+  at::CheckedFrom c = "alphaCompositeCudaBackward";
+  at::checkAllSameGPU(c, {grad_outputs_t, features_t, alphas_t, points_idx_t});
+  at::checkAllSameType(c, {grad_outputs_t, features_t, alphas_t});
+
+  // Set the device for the kernel launch based on the device of the input
+  at::cuda::CUDAGuard device_guard(features.device());
+  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+
+  auto grad_features = at::zeros_like(features);
+  auto grad_alphas = at::zeros_like(alphas);
+
+  if (grad_features.numel() == 0 || grad_alphas.numel() == 0) {
+    AT_CUDA_CHECK(cudaGetLastError());
+    return std::make_tuple(grad_features, grad_alphas);
+  }
+
+  const int64_t bs = alphas.size(0);
+
+  const dim3 threadsPerBlock(64);
+  const dim3 numBlocks(bs, 1024 / bs + 1);
+
+  // TODO(gkioxari) add AT_DISPATCH_FLOATING_TYPES once atomicAdd supports
+  // doubles. Currently, support is for floats only.
+  alphaCompositeCudaBackwardKernel<<<numBlocks, threadsPerBlock, 0, stream>>>(
+      // clang-format off
+      // As we are using packed accessors here the tensors
+      // do not need to be made contiguous.
+      grad_features.packed_accessor64<float, 2, at::RestrictPtrTraits>(),
+      grad_alphas.packed_accessor64<float, 4, at::RestrictPtrTraits>(),
+      grad_outputs.packed_accessor64<float, 4, at::RestrictPtrTraits>(),
+      features.packed_accessor64<float, 2, at::RestrictPtrTraits>(),
+      alphas.packed_accessor64<float, 4, at::RestrictPtrTraits>(),
+      points_idx.packed_accessor64<int64_t, 4, at::RestrictPtrTraits>());
+  // clang-format on
+  AT_CUDA_CHECK(cudaGetLastError());
+  return std::make_tuple(grad_features, grad_alphas);
+}
diff --git a/pytorch3d/pytorch3d/csrc/compositing/alpha_composite.h b/pytorch3d/pytorch3d/csrc/compositing/alpha_composite.h
new file mode 100644
index 0000000000000000000000000000000000000000..44ba1bf0fd2e848e682db4a46c0badeefb3c02e5
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/compositing/alpha_composite.h
@@ -0,0 +1,107 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <torch/extension.h>
+#include "utils/pytorch3d_cutils.h"
+
+#include <vector>
+
+// Perform alpha compositing of points in a z-buffer.
+//
+// Inputs:
+//    features: FloatTensor of shape (C, P) which gives the features
+//            of each point where C is the size of the feature and
+//            P the number of points.
+//    alphas: FloatTensor of shape (N, points_per_pixel, H, W) where
+//            points_per_pixel is the number of points in the z-buffer
+//            sorted in z-order, and (H, W) is the image size.
+//    points_idx: IntTensor of shape (N, points_per_pixel, H, W) giving the
+//            indices of the nearest points at each pixel, sorted in z-order.
+// Returns:
+//    weighted_fs: FloatTensor of shape (N, C, H, W) giving the accumulated
+//            feature for each point. Concretely, it gives:
+//                 weighted_fs[b,c,i,j] = sum_k cum_alpha_k *
+//                   features[c,points_idx[b,k,i,j]]
+//                 where cum_alpha_k =
+//                    alphas[b,k,i,j] * prod_l=0..k-1 (1 - alphas[b,l,i,j])
+
+// CUDA declarations
+#ifdef WITH_CUDA
+torch::Tensor alphaCompositeCudaForward(
+    const torch::Tensor& features,
+    const torch::Tensor& alphas,
+    const torch::Tensor& points_idx);
+
+std::tuple<torch::Tensor, torch::Tensor> alphaCompositeCudaBackward(
+    const torch::Tensor& grad_outputs,
+    const torch::Tensor& features,
+    const torch::Tensor& alphas,
+    const torch::Tensor& points_idx);
+#endif
+
+// C++ declarations
+torch::Tensor alphaCompositeCpuForward(
+    const torch::Tensor& features,
+    const torch::Tensor& alphas,
+    const torch::Tensor& points_idx);
+
+std::tuple<torch::Tensor, torch::Tensor> alphaCompositeCpuBackward(
+    const torch::Tensor& grad_outputs,
+    const torch::Tensor& features,
+    const torch::Tensor& alphas,
+    const torch::Tensor& points_idx);
+
+torch::Tensor alphaCompositeForward(
+    torch::Tensor& features,
+    torch::Tensor& alphas,
+    torch::Tensor& points_idx) {
+  features = features.contiguous();
+  alphas = alphas.contiguous();
+  points_idx = points_idx.contiguous();
+
+  if (features.is_cuda()) {
+#ifdef WITH_CUDA
+    CHECK_CUDA(features);
+    CHECK_CUDA(alphas);
+    CHECK_CUDA(points_idx);
+    return alphaCompositeCudaForward(features, alphas, points_idx);
+#else
+    AT_ERROR("Not compiled with GPU support");
+#endif
+  } else {
+    return alphaCompositeCpuForward(features, alphas, points_idx);
+  }
+}
+
+std::tuple<torch::Tensor, torch::Tensor> alphaCompositeBackward(
+    torch::Tensor& grad_outputs,
+    torch::Tensor& features,
+    torch::Tensor& alphas,
+    torch::Tensor& points_idx) {
+  grad_outputs = grad_outputs.contiguous();
+  features = features.contiguous();
+  alphas = alphas.contiguous();
+  points_idx = points_idx.contiguous();
+
+  if (grad_outputs.is_cuda()) {
+#ifdef WITH_CUDA
+    CHECK_CUDA(grad_outputs);
+    CHECK_CUDA(features);
+    CHECK_CUDA(alphas);
+    CHECK_CUDA(points_idx);
+
+    return alphaCompositeCudaBackward(
+        grad_outputs, features, alphas, points_idx);
+#else
+    AT_ERROR("Not compiled with GPU support");
+#endif
+  } else {
+    return alphaCompositeCpuBackward(
+        grad_outputs, features, alphas, points_idx);
+  }
+}
diff --git a/pytorch3d/pytorch3d/csrc/compositing/alpha_composite_cpu.cpp b/pytorch3d/pytorch3d/csrc/compositing/alpha_composite_cpu.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..41bc0ec76794228e7f770f0436453306ffc8aec0
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/compositing/alpha_composite_cpu.cpp
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <torch/extension.h>
+
+#include <cmath>
+#include <vector>
+
+// Epsilon float
+const float kEps = 1e-9;
+
+torch::Tensor alphaCompositeCpuForward(
+    const torch::Tensor& features,
+    const torch::Tensor& alphas,
+    const torch::Tensor& points_idx) {
+  const int64_t B = points_idx.size(0);
+  const int64_t K = points_idx.size(1);
+  const int64_t H = points_idx.size(2);
+  const int64_t W = points_idx.size(3);
+  const int64_t C = features.size(0);
+
+  torch::Tensor result = torch::zeros({B, C, H, W}, features.options());
+
+  auto features_a = features.accessor<float, 2>();
+  auto alphas_a = alphas.accessor<float, 4>();
+  auto points_idx_a = points_idx.accessor<int64_t, 4>();
+  auto result_a = result.accessor<float, 4>();
+
+  // Iterate over the batch
+  for (int b = 0; b < B; ++b) {
+    // Iterate over the features
+    for (int c = 0; c < C; ++c) {
+      // Iterate through the horizontal lines of the image from top to bottom
+      for (int j = 0; j < H; ++j) {
+        // Iterate over pixels in a horizontal line, left to right
+        for (int i = 0; i < W; ++i) {
+          float cum_alpha = 1.;
+          // Iterate through the closest K points for this pixel
+          for (int k = 0; k < K; ++k) {
+            int64_t n_idx = points_idx_a[b][k][j][i];
+            // Sentinel value is -1 indicating no point overlaps the pixel
+            if (n_idx < 0) {
+              continue;
+            }
+            float alpha = alphas_a[b][k][j][i];
+            result_a[b][c][j][i] += cum_alpha * alpha * features_a[c][n_idx];
+            cum_alpha = cum_alpha * (1 - alpha);
+          }
+        }
+      }
+    }
+  }
+  return result;
+}
+
+std::tuple<torch::Tensor, torch::Tensor> alphaCompositeCpuBackward(
+    const torch::Tensor& grad_outputs,
+    const torch::Tensor& features,
+    const torch::Tensor& alphas,
+    const torch::Tensor& points_idx) {
+  torch::Tensor grad_features = torch::zeros_like(features);
+  torch::Tensor grad_alphas = torch::zeros_like(alphas);
+
+  const int64_t B = points_idx.size(0);
+  const int64_t K = points_idx.size(1);
+  const int64_t H = points_idx.size(2);
+  const int64_t W = points_idx.size(3);
+  const int64_t C = features.size(0);
+
+  auto grad_outputs_a = grad_outputs.accessor<float, 4>();
+  auto features_a = features.accessor<float, 2>();
+  auto alphas_a = alphas.accessor<float, 4>();
+  auto points_idx_a = points_idx.accessor<int64_t, 4>();
+  auto grad_features_a = grad_features.accessor<float, 2>();
+  auto grad_alphas_a = grad_alphas.accessor<float, 4>();
+
+  // Iterate over the batch
+  for (int b = 0; b < B; ++b) {
+    // Iterate over the features
+    for (int c = 0; c < C; ++c) {
+      // Iterate through the horizontal lines of the image from top to bottom
+      for (int j = 0; j < H; ++j) {
+        // Iterate over pixels in a horizontal line, left to right
+        for (int i = 0; i < W; ++i) {
+          float cum_alpha = 1.;
+          // Iterate through the closest K points for this pixel
+          for (int k = 0; k < K; ++k) {
+            int64_t n_idx = points_idx_a[b][k][j][i];
+            // Sentinal value is -1, indicating no point overlaps this pixel
+            if (n_idx < 0) {
+              continue;
+            }
+            float alpha = alphas_a[b][k][j][i];
+            grad_alphas_a[b][k][j][i] +=
+                grad_outputs_a[b][c][j][i] * features_a[c][n_idx] * cum_alpha;
+            grad_features_a[c][n_idx] +=
+                grad_outputs_a[b][c][j][i] * cum_alpha * alpha;
+
+            // Iterate over all (K-1) nearer points to update gradient
+            for (int t = 0; t < k; t++) {
+              int64_t t_idx = points_idx_a[b][t][j][i];
+              // Sentinal value is -1, indicating no point overlaps this pixel
+              if (t_idx < 0) {
+                continue;
+              }
+              float alpha_tvalue = alphas_a[b][t][j][i];
+              grad_alphas_a[b][t][j][i] -= grad_outputs_a[b][c][j][i] *
+                  features_a[c][n_idx] * cum_alpha * alpha /
+                  (1 - alpha_tvalue + kEps);
+            }
+
+            cum_alpha = cum_alpha * (1 - alpha);
+          }
+        }
+      }
+    }
+  }
+  return std::make_tuple(grad_features, grad_alphas);
+}
diff --git a/pytorch3d/pytorch3d/csrc/compositing/norm_weighted_sum.cu b/pytorch3d/pytorch3d/csrc/compositing/norm_weighted_sum.cu
new file mode 100644
index 0000000000000000000000000000000000000000..984647172f9a15277eab70b15158a9441355490d
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/compositing/norm_weighted_sum.cu
@@ -0,0 +1,249 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <ATen/ATen.h>
+#include <ATen/core/TensorAccessor.h>
+#include <ATen/cuda/CUDAContext.h>
+#include <c10/cuda/CUDAGuard.h>
+
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <stdio.h>
+#include <vector>
+
+__constant__ const float kEpsilon = 1e-4;
+
+// TODO(gkioxari) support all data types once AtomicAdd supports doubles.
+// Currently, support is for floats only.
+__global__ void weightedSumNormCudaForwardKernel(
+    // clang-format off
+    at::PackedTensorAccessor64<float, 4, at::RestrictPtrTraits> result,
+    const at::PackedTensorAccessor64<float, 2, at::RestrictPtrTraits> features,
+    const at::PackedTensorAccessor64<float, 4, at::RestrictPtrTraits> alphas,
+    const at::PackedTensorAccessor64<int64_t, 4, at::RestrictPtrTraits> points_idx) {
+  // clang-format on
+  const int64_t batch_size = result.size(0);
+  const int64_t C = features.size(0);
+  const int64_t H = points_idx.size(2);
+  const int64_t W = points_idx.size(3);
+
+  // Get the batch and index
+  const int batch = blockIdx.x;
+
+  const int num_pixels = C * H * W;
+  const int num_threads = gridDim.y * blockDim.x;
+  const int tid = blockIdx.y * blockDim.x + threadIdx.x;
+
+  // Parallelize over each feature in each pixel in images of size H * W,
+  // for each image in the batch of size batch_size
+  for (int pid = tid; pid < num_pixels; pid += num_threads) {
+    int ch = pid / (H * W);
+    int j = (pid % (H * W)) / W;
+    int i = (pid % (H * W)) % W;
+
+    // Store the accumulated alpha value
+    float cum_alpha = 0.;
+    // Iterate through the closest K points for this pixel
+    for (int k = 0; k < points_idx.size(1); ++k) {
+      int n_idx = points_idx[batch][k][j][i];
+      // Sentinel value is -1 indicating no point overlaps the pixel
+      if (n_idx < 0) {
+        continue;
+      }
+
+      cum_alpha += alphas[batch][k][j][i];
+    }
+
+    if (cum_alpha < kEpsilon) {
+      cum_alpha = kEpsilon;
+    }
+
+    // Iterate through the closest K points for this pixel
+    for (int k = 0; k < points_idx.size(1); ++k) {
+      int n_idx = points_idx[batch][k][j][i];
+      // Sentinel value is -1 indicating no point overlaps the pixel
+      if (n_idx < 0) {
+        continue;
+      }
+      float alpha = alphas[batch][k][j][i];
+      // TODO(gkioxari) It might be more efficient to have threads write in a
+      // local variable, and move atomicAdd outside of the loop such that
+      // atomicAdd is executed once per thread.
+      atomicAdd(
+          &result[batch][ch][j][i], features[ch][n_idx] * alpha / cum_alpha);
+    }
+  }
+}
+
+// TODO(gkioxari) support all data types once AtomicAdd supports doubles.
+// Currently, support is for floats only.
+__global__ void weightedSumNormCudaBackwardKernel(
+    // clang-format off
+    at::PackedTensorAccessor64<float, 2, at::RestrictPtrTraits> grad_features,
+    at::PackedTensorAccessor64<float, 4, at::RestrictPtrTraits> grad_alphas,
+    const at::PackedTensorAccessor64<float, 4, at::RestrictPtrTraits> grad_outputs,
+    const at::PackedTensorAccessor64<float, 2, at::RestrictPtrTraits> features,
+    const at::PackedTensorAccessor64<float, 4, at::RestrictPtrTraits> alphas,
+    const at::PackedTensorAccessor64<int64_t, 4, at::RestrictPtrTraits> points_idx) {
+  // clang-format on
+  const int64_t batch_size = points_idx.size(0);
+  const int64_t C = features.size(0);
+  const int64_t H = points_idx.size(2);
+  const int64_t W = points_idx.size(3);
+
+  // Get the batch and index
+  const int batch = blockIdx.x;
+
+  const int num_pixels = C * W * H;
+  const int num_threads = gridDim.y * blockDim.x;
+  const int tid = blockIdx.y * blockDim.x + threadIdx.x;
+
+  // Parallelize over each feature in each pixel in images of size H * W,
+  // for each image in the batch of size batch_size
+  for (int pid = tid; pid < num_pixels; pid += num_threads) {
+    int ch = pid / (H * W);
+    int j = (pid % (H * W)) / W;
+    int i = (pid % (H * W)) % W;
+
+    float sum_alpha = 0.;
+    float sum_alphafs = 0.;
+    // Iterate through the closest K points for this pixel to calculate the
+    // cumulative sum of the alphas for this pixel
+    for (int k = 0; k < points_idx.size(1); ++k) {
+      int n_idx = points_idx[batch][k][j][i];
+      // Sentinel value is -1 indicating no point overlaps the pixel
+      if (n_idx < 0) {
+        continue;
+      }
+
+      sum_alpha += alphas[batch][k][j][i];
+      sum_alphafs += alphas[batch][k][j][i] * features[ch][n_idx];
+    }
+
+    if (sum_alpha < kEpsilon) {
+      sum_alpha = kEpsilon;
+    }
+
+    // Iterate again through the closest K points for this pixel to calculate
+    // the gradient.
+    for (int k = 0; k < points_idx.size(1); ++k) {
+      int n_idx = points_idx[batch][k][j][i];
+
+      // Sentinel value is -1 indicating no point overlaps the pixel
+      if (n_idx < 0) {
+        continue;
+      }
+      float alpha = alphas[batch][k][j][i];
+
+      // TODO(gkioxari) It might be more efficient to have threads write in a
+      // local variable, and move atomicAdd outside of the loop such that
+      // atomicAdd is executed once per thread.
+      atomicAdd(
+          &grad_alphas[batch][k][j][i],
+          (features[ch][n_idx] * sum_alpha - sum_alphafs) /
+              (sum_alpha * sum_alpha) * grad_outputs[batch][ch][j][i]);
+      atomicAdd(
+          &grad_features[ch][n_idx],
+          alpha * grad_outputs[batch][ch][j][i] / sum_alpha);
+    }
+  }
+}
+
+at::Tensor weightedSumNormCudaForward(
+    const at::Tensor& features,
+    const at::Tensor& alphas,
+    const at::Tensor& points_idx) {
+  // Check inputs are on the same device
+  at::TensorArg features_t{features, "features", 1},
+      alphas_t{alphas, "alphas", 2}, points_idx_t{points_idx, "points_idx", 3};
+  at::CheckedFrom c = "weightedSumNormCudaForward";
+  at::checkAllSameGPU(c, {features_t, alphas_t, points_idx_t});
+  at::checkAllSameType(c, {features_t, alphas_t});
+
+  // Set the device for the kernel launch based on the device of the input
+  at::cuda::CUDAGuard device_guard(features.device());
+  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+
+  const int64_t batch_size = points_idx.size(0);
+  const int64_t C = features.size(0);
+  const int64_t H = points_idx.size(2);
+  const int64_t W = points_idx.size(3);
+
+  auto result = at::zeros({batch_size, C, H, W}, features.options());
+
+  if (result.numel() == 0) {
+    AT_CUDA_CHECK(cudaGetLastError());
+    return result;
+  }
+
+  const dim3 threadsPerBlock(64);
+  const dim3 numBlocks(batch_size, 1024 / batch_size + 1);
+
+  // TODO(gkioxari) add AT_DISPATCH_FLOATING_TYPES once atomicAdd supports
+  // doubles. Currently, support is for floats only.
+  // clang-format off
+  weightedSumNormCudaForwardKernel<<<numBlocks, threadsPerBlock, 0, stream>>>(
+      // As we are using packed accessors here the tensors
+      // do not need to be made contiguous.
+      result.packed_accessor64<float, 4, at::RestrictPtrTraits>(),
+      features.packed_accessor64<float, 2, at::RestrictPtrTraits>(),
+      alphas.packed_accessor64<float, 4, at::RestrictPtrTraits>(),
+      points_idx.packed_accessor64<int64_t, 4, at::RestrictPtrTraits>());
+  // clang-format on
+
+  AT_CUDA_CHECK(cudaGetLastError());
+  return result;
+}
+
+std::tuple<at::Tensor, at::Tensor> weightedSumNormCudaBackward(
+    const at::Tensor& grad_outputs,
+    const at::Tensor& features,
+    const at::Tensor& alphas,
+    const at::Tensor& points_idx) {
+  // Check inputs are on the same device
+  at::TensorArg grad_outputs_t{grad_outputs, "grad_outputs", 1},
+      features_t{features, "features", 2}, alphas_t{alphas, "alphas", 3},
+      points_idx_t{points_idx, "points_idx", 4};
+  at::CheckedFrom c = "weightedSumNormCudaBackward";
+  at::checkAllSameGPU(c, {grad_outputs_t, features_t, alphas_t, points_idx_t});
+  at::checkAllSameType(c, {grad_outputs_t, features_t, alphas_t});
+
+  // Set the device for the kernel launch based on the device of the input
+  at::cuda::CUDAGuard device_guard(features.device());
+  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+
+  auto grad_features = at::zeros_like(features);
+  auto grad_alphas = at::zeros_like(alphas);
+
+  if (grad_features.numel() == 0 || grad_alphas.numel() == 0) {
+    AT_CUDA_CHECK(cudaGetLastError());
+    return std::make_tuple(grad_features, grad_alphas);
+  }
+
+  const int64_t bs = points_idx.size(0);
+
+  const dim3 threadsPerBlock(64);
+  const dim3 numBlocks(bs, 1024 / bs + 1);
+
+  // TODO(gkioxari) add AT_DISPATCH_FLOATING_TYPES once atomicAdd supports
+  // doubles. Currently, support is for floats only.
+  weightedSumNormCudaBackwardKernel<<<numBlocks, threadsPerBlock, 0, stream>>>(
+      // clang-format off
+      // As we are using packed accessors here the tensors
+      // do not need to be made contiguous.
+      grad_features.packed_accessor64<float, 2, at::RestrictPtrTraits>(),
+      grad_alphas.packed_accessor64<float, 4, at::RestrictPtrTraits>(),
+      grad_outputs.packed_accessor64<float, 4, at::RestrictPtrTraits>(),
+      features.packed_accessor64<float, 2, at::RestrictPtrTraits>(),
+      alphas.packed_accessor64<float, 4, at::RestrictPtrTraits>(),
+      points_idx.packed_accessor64<int64_t, 4, at::RestrictPtrTraits>());
+  // clang-format on
+  AT_CUDA_CHECK(cudaGetLastError());
+  return std::make_tuple(grad_features, grad_alphas);
+}
diff --git a/pytorch3d/pytorch3d/csrc/compositing/norm_weighted_sum.h b/pytorch3d/pytorch3d/csrc/compositing/norm_weighted_sum.h
new file mode 100644
index 0000000000000000000000000000000000000000..5d0a5f5be08267ad33da8b704814c6c6c333930d
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/compositing/norm_weighted_sum.h
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <torch/extension.h>
+#include "utils/pytorch3d_cutils.h"
+
+#include <vector>
+
+// Perform normalized weighted sum compositing of points in a z-buffer.
+//
+// Inputs:
+//    features: FloatTensor of shape (C, P) which gives the features
+//            of each point where C is the size of the feature and
+//            P the number of points.
+//    alphas: FloatTensor of shape (N, points_per_pixel, H, W) where
+//            points_per_pixel is the number of points in the z-buffer
+//            sorted in z-order, and (H, W) is the image size.
+//    points_idx: IntTensor of shape (N, points_per_pixel, H, W) giving the
+//            indices of the nearest points at each pixel, sorted in z-order.
+// Returns:
+//    weighted_fs: FloatTensor of shape (N, C, H, W) giving the accumulated
+//            feature in each point. Concretely, it gives:
+//                 weighted_fs[b,c,i,j] = sum_k alphas[b,k,i,j] *
+//                   features[c,points_idx[b,k,i,j]] / sum_k alphas[b,k,i,j]
+
+// CUDA declarations
+#ifdef WITH_CUDA
+torch::Tensor weightedSumNormCudaForward(
+    const torch::Tensor& features,
+    const torch::Tensor& alphas,
+    const torch::Tensor& points_idx);
+
+std::tuple<torch::Tensor, torch::Tensor> weightedSumNormCudaBackward(
+    const torch::Tensor& grad_outputs,
+    const torch::Tensor& features,
+    const torch::Tensor& alphas,
+    const torch::Tensor& points_idx);
+#endif
+
+// C++ declarations
+torch::Tensor weightedSumNormCpuForward(
+    const torch::Tensor& features,
+    const torch::Tensor& alphas,
+    const torch::Tensor& points_idx);
+
+std::tuple<torch::Tensor, torch::Tensor> weightedSumNormCpuBackward(
+    const torch::Tensor& grad_outputs,
+    const torch::Tensor& features,
+    const torch::Tensor& alphas,
+    const torch::Tensor& points_idx);
+
+torch::Tensor weightedSumNormForward(
+    torch::Tensor& features,
+    torch::Tensor& alphas,
+    torch::Tensor& points_idx) {
+  features = features.contiguous();
+  alphas = alphas.contiguous();
+  points_idx = points_idx.contiguous();
+
+  if (features.is_cuda()) {
+#ifdef WITH_CUDA
+    CHECK_CUDA(features);
+    CHECK_CUDA(alphas);
+    CHECK_CUDA(points_idx);
+
+    return weightedSumNormCudaForward(features, alphas, points_idx);
+#else
+    AT_ERROR("Not compiled with GPU support");
+#endif
+  } else {
+    return weightedSumNormCpuForward(features, alphas, points_idx);
+  }
+}
+
+std::tuple<torch::Tensor, torch::Tensor> weightedSumNormBackward(
+    torch::Tensor& grad_outputs,
+    torch::Tensor& features,
+    torch::Tensor& alphas,
+    torch::Tensor& points_idx) {
+  grad_outputs = grad_outputs.contiguous();
+  features = features.contiguous();
+  alphas = alphas.contiguous();
+  points_idx = points_idx.contiguous();
+
+  if (grad_outputs.is_cuda()) {
+#ifdef WITH_CUDA
+    CHECK_CUDA(grad_outputs);
+    CHECK_CUDA(features);
+    CHECK_CUDA(alphas);
+    CHECK_CUDA(points_idx);
+
+    return weightedSumNormCudaBackward(
+        grad_outputs, features, alphas, points_idx);
+#else
+    AT_ERROR("Not compiled with GPU support");
+#endif
+  } else {
+    return weightedSumNormCpuBackward(
+        grad_outputs, features, alphas, points_idx);
+  }
+}
diff --git a/pytorch3d/pytorch3d/csrc/compositing/norm_weighted_sum_cpu.cpp b/pytorch3d/pytorch3d/csrc/compositing/norm_weighted_sum_cpu.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..840ef3d24ae652fb42384afc755c0a889543e649
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/compositing/norm_weighted_sum_cpu.cpp
@@ -0,0 +1,140 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <torch/extension.h>
+
+#include <cmath>
+#include <vector>
+
+// Epsilon float
+const float kEps = 1e-4;
+
+torch::Tensor weightedSumNormCpuForward(
+    const torch::Tensor& features,
+    const torch::Tensor& alphas,
+    const torch::Tensor& points_idx) {
+  const int64_t B = points_idx.size(0);
+  const int64_t K = points_idx.size(1);
+  const int64_t H = points_idx.size(2);
+  const int64_t W = points_idx.size(3);
+  const int64_t C = features.size(0);
+
+  torch::Tensor result = torch::zeros({B, C, H, W}, features.options());
+
+  auto features_a = features.accessor<float, 2>();
+  auto alphas_a = alphas.accessor<float, 4>();
+  auto points_idx_a = points_idx.accessor<int64_t, 4>();
+  auto result_a = result.accessor<float, 4>();
+
+  // Iterate over the batch
+  for (int b = 0; b < B; ++b) {
+    // Iterate oer the features
+    for (int c = 0; c < C; ++c) {
+      // Iterate through the horizontal lines of the image from top to bottom
+      for (int j = 0; j < H; ++j) {
+        // Iterate over pixels in a horizontal line, left to right
+        for (int i = 0; i < W; ++i) {
+          float t_alpha = 0.;
+          for (int k = 0; k < K; ++k) {
+            int64_t n_idx = points_idx_a[b][k][j][i];
+            if (n_idx < 0) {
+              continue;
+            }
+
+            t_alpha += alphas_a[b][k][j][i];
+          }
+
+          if (t_alpha < kEps) {
+            t_alpha = kEps;
+          }
+
+          // Iterate over the different zs to combine
+          for (int k = 0; k < K; ++k) {
+            int64_t n_idx = points_idx_a[b][k][j][i];
+            // Sentinel value is -1 indicating no point overlaps the pixel
+            if (n_idx < 0) {
+              continue;
+            }
+            float alpha = alphas_a[b][k][j][i];
+            result_a[b][c][j][i] += alpha * features_a[c][n_idx] / t_alpha;
+          }
+        }
+      }
+    }
+  }
+  return result;
+}
+
+std::tuple<torch::Tensor, torch::Tensor> weightedSumNormCpuBackward(
+    const torch::Tensor& grad_outputs,
+    const torch::Tensor& features,
+    const torch::Tensor& alphas,
+    const torch::Tensor& points_idx) {
+  torch::Tensor grad_features = torch::zeros_like(features);
+  torch::Tensor grad_alphas = torch::zeros_like(alphas);
+
+  const int64_t B = points_idx.size(0);
+  const int64_t K = points_idx.size(1);
+  const int64_t H = points_idx.size(2);
+  const int64_t W = points_idx.size(3);
+  const int64_t C = features.size(0);
+
+  auto grad_outputs_a = grad_outputs.accessor<float, 4>();
+  auto features_a = features.accessor<float, 2>();
+  auto alphas_a = alphas.accessor<float, 4>();
+  auto points_idx_a = points_idx.accessor<int64_t, 4>();
+  auto grad_features_a = grad_features.accessor<float, 2>();
+  auto grad_alphas_a = grad_alphas.accessor<float, 4>();
+
+  // Iterate over the batch
+  for (int b = 0; b < B; ++b) {
+    // Iterate oer the features
+    for (int c = 0; c < C; ++c) {
+      // Iterate through the horizontal lines of the image from top to bottom
+      for (int j = 0; j < H; ++j) {
+        // Iterate over pixels in a horizontal line, left to right
+        for (int i = 0; i < W; ++i) {
+          float t_alpha = 0.;
+          float t_alphafs = 0.;
+          // Iterate through the closest K points for this pixel
+          for (int k = 0; k < K; ++k) {
+            int64_t n_idx = points_idx_a[b][k][j][i];
+            // Sentinel value is -1, indicating no point overlaps this pixel
+            if (n_idx < 0) {
+              continue;
+            }
+
+            t_alpha += alphas_a[b][k][j][i];
+            t_alphafs += alphas_a[b][k][j][i] * features_a[c][n_idx];
+          }
+
+          if (t_alpha < kEps) {
+            t_alpha = kEps;
+          }
+
+          // Iterate through the closest K points for this pixel ordered by z
+          // distance.
+          for (int k = 0; k < K; ++k) {
+            int64_t n_idx = points_idx_a[b][k][j][i];
+            // Sentinel value is -1 indicating no point overlaps the pixel
+            if (n_idx < 0) {
+              continue;
+            }
+            float alpha = alphas_a[b][k][j][i];
+            grad_alphas_a[b][k][j][i] += grad_outputs_a[b][c][j][i] *
+                (features_a[c][n_idx] * t_alpha - t_alphafs) /
+                (t_alpha * t_alpha);
+            grad_features_a[c][n_idx] +=
+                grad_outputs_a[b][c][j][i] * alpha / t_alpha;
+          }
+        }
+      }
+    }
+  }
+  return std::make_tuple(grad_features, grad_alphas);
+}
diff --git a/pytorch3d/pytorch3d/csrc/compositing/weighted_sum.cu b/pytorch3d/pytorch3d/csrc/compositing/weighted_sum.cu
new file mode 100644
index 0000000000000000000000000000000000000000..17a35bf924438408a0f039daac4c719b4496c716
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/compositing/weighted_sum.cu
@@ -0,0 +1,207 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <ATen/ATen.h>
+#include <ATen/core/TensorAccessor.h>
+#include <ATen/cuda/CUDAContext.h>
+#include <c10/cuda/CUDAGuard.h>
+
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <stdio.h>
+#include <vector>
+
+// TODO(gkioxari) support all data types once AtomicAdd supports doubles.
+// Currently, support is for floats only.
+__global__ void weightedSumCudaForwardKernel(
+    // clang-format off
+    at::PackedTensorAccessor64<float, 4, at::RestrictPtrTraits> result,
+    const at::PackedTensorAccessor64<float, 2, at::RestrictPtrTraits> features,
+    const at::PackedTensorAccessor64<float, 4, at::RestrictPtrTraits> alphas,
+    const at::PackedTensorAccessor64<int64_t, 4, at::RestrictPtrTraits> points_idx) {
+  // clang-format on
+  const int64_t batch_size = result.size(0);
+  const int64_t C = features.size(0);
+  const int64_t H = points_idx.size(2);
+  const int64_t W = points_idx.size(3);
+
+  // Get the batch and index
+  const int batch = blockIdx.x;
+
+  const int num_pixels = C * H * W;
+  const int num_threads = gridDim.y * blockDim.x;
+  const int tid = blockIdx.y * blockDim.x + threadIdx.x;
+
+  // Parallelize over each feature in each pixel in images of size H * W,
+  // for each image in the batch of size batch_size
+  for (int pid = tid; pid < num_pixels; pid += num_threads) {
+    int ch = pid / (H * W);
+    int j = (pid % (H * W)) / W;
+    int i = (pid % (H * W)) % W;
+
+    // Iterate through the closest K points for this pixel
+    for (int k = 0; k < points_idx.size(1); ++k) {
+      int n_idx = points_idx[batch][k][j][i];
+      // Sentinel value is -1 indicating no point overlaps the pixel
+      if (n_idx < 0) {
+        continue;
+      }
+
+      // Accumulate the values
+      float alpha = alphas[batch][k][j][i];
+      // TODO(gkioxari) It might be more efficient to have threads write in a
+      // local variable, and move atomicAdd outside of the loop such that
+      // atomicAdd is executed once per thread.
+      atomicAdd(&result[batch][ch][j][i], features[ch][n_idx] * alpha);
+    }
+  }
+}
+
+// TODO(gkioxari) support all data types once AtomicAdd supports doubles.
+// Currently, support is for floats only.
+__global__ void weightedSumCudaBackwardKernel(
+    // clang-format off
+    at::PackedTensorAccessor64<float, 2, at::RestrictPtrTraits> grad_features,
+    at::PackedTensorAccessor64<float, 4, at::RestrictPtrTraits> grad_alphas,
+    const at::PackedTensorAccessor64<float, 4, at::RestrictPtrTraits> grad_outputs,
+    const at::PackedTensorAccessor64<float, 2, at::RestrictPtrTraits> features,
+    const at::PackedTensorAccessor64<float, 4, at::RestrictPtrTraits> alphas,
+    const at::PackedTensorAccessor64<int64_t, 4, at::RestrictPtrTraits> points_idx) {
+  // clang-format on
+  const int64_t batch_size = points_idx.size(0);
+  const int64_t C = features.size(0);
+  const int64_t H = points_idx.size(2);
+  const int64_t W = points_idx.size(3);
+
+  // Get the batch and index
+  const int batch = blockIdx.x;
+
+  const int num_pixels = C * H * W;
+  const int num_threads = gridDim.y * blockDim.x;
+  const int tid = blockIdx.y * blockDim.x + threadIdx.x;
+
+  // Iterate over each pixel to compute the contribution to the
+  // gradient for the features and weights
+  for (int pid = tid; pid < num_pixels; pid += num_threads) {
+    int ch = pid / (H * W);
+    int j = (pid % (H * W)) / W;
+    int i = (pid % (H * W)) % W;
+
+    // Iterate through the closest K points for this pixel
+    for (int k = 0; k < points_idx.size(1); ++k) {
+      int n_idx = points_idx[batch][k][j][i];
+      // Sentinel value is -1 indicating no point overlaps the pixel
+      if (n_idx < 0) {
+        continue;
+      }
+      float alpha = alphas[batch][k][j][i];
+
+      // TODO(gkioxari) It might be more efficient to have threads write in a
+      // local variable, and move atomicAdd outside of the loop such that
+      // atomicAdd is executed once per thread.
+      atomicAdd(
+          &grad_alphas[batch][k][j][i],
+          features[ch][n_idx] * grad_outputs[batch][ch][j][i]);
+      atomicAdd(
+          &grad_features[ch][n_idx], alpha * grad_outputs[batch][ch][j][i]);
+    }
+  }
+}
+
+at::Tensor weightedSumCudaForward(
+    const at::Tensor& features,
+    const at::Tensor& alphas,
+    const at::Tensor& points_idx) {
+  // Check inputs are on the same device
+  at::TensorArg features_t{features, "features", 1},
+      alphas_t{alphas, "alphas", 2}, points_idx_t{points_idx, "points_idx", 3};
+  at::CheckedFrom c = "weightedSumCudaForward";
+  at::checkAllSameGPU(c, {features_t, alphas_t, points_idx_t});
+  at::checkAllSameType(c, {features_t, alphas_t});
+
+  // Set the device for the kernel launch based on the device of the input
+  at::cuda::CUDAGuard device_guard(features.device());
+  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+
+  const int64_t batch_size = points_idx.size(0);
+  const int64_t C = features.size(0);
+  const int64_t H = points_idx.size(2);
+  const int64_t W = points_idx.size(3);
+
+  auto result = at::zeros({batch_size, C, H, W}, features.options());
+
+  if (result.numel() == 0) {
+    AT_CUDA_CHECK(cudaGetLastError());
+    return result;
+  }
+
+  const dim3 threadsPerBlock(64);
+  const dim3 numBlocks(batch_size, 1024 / batch_size + 1);
+
+  // TODO(gkioxari) add AT_DISPATCH_FLOATING_TYPES once atomicAdd supports
+  // doubles. Currently, support is for floats only.
+  weightedSumCudaForwardKernel<<<numBlocks, threadsPerBlock, 0, stream>>>(
+      // clang-format off
+      // As we are using packed accessors here the tensors
+      // do not need to be made contiguous.
+      result.packed_accessor64<float, 4, at::RestrictPtrTraits>(),
+      features.packed_accessor64<float, 2, at::RestrictPtrTraits>(),
+      alphas.packed_accessor64<float, 4, at::RestrictPtrTraits>(),
+      points_idx.packed_accessor64<int64_t, 4, at::RestrictPtrTraits>());
+  // clang-format on
+  AT_CUDA_CHECK(cudaGetLastError());
+  return result;
+}
+
+std::tuple<at::Tensor, at::Tensor> weightedSumCudaBackward(
+    const at::Tensor& grad_outputs,
+    const at::Tensor& features,
+    const at::Tensor& alphas,
+    const at::Tensor& points_idx) {
+  // Check inputs are on the same device
+  at::TensorArg grad_outputs_t{grad_outputs, "grad_outputs", 1},
+      features_t{features, "features", 2}, alphas_t{alphas, "alphas", 3},
+      points_idx_t{points_idx, "points_idx", 4};
+  at::CheckedFrom c = "weightedSumCudaBackward";
+  at::checkAllSameGPU(c, {grad_outputs_t, features_t, alphas_t, points_idx_t});
+  at::checkAllSameType(c, {grad_outputs_t, features_t, alphas_t});
+
+  // Set the device for the kernel launch based on the device of the input
+  at::cuda::CUDAGuard device_guard(features.device());
+  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+
+  auto grad_features = at::zeros_like(features);
+  auto grad_alphas = at::zeros_like(alphas);
+
+  if (grad_features.numel() == 0 || grad_alphas.numel() == 0) {
+    AT_CUDA_CHECK(cudaGetLastError());
+    return std::make_tuple(grad_features, grad_alphas);
+  }
+
+  const int64_t bs = points_idx.size(0);
+
+  const dim3 threadsPerBlock(64);
+  const dim3 numBlocks(bs, 1024 / bs + 1);
+
+  // TODO(gkioxari) add AT_DISPATCH_FLOATING_TYPES once atomicAdd supports
+  // doubles. Currently, support is for floats only.
+  weightedSumCudaBackwardKernel<<<numBlocks, threadsPerBlock, 0, stream>>>(
+      // clang-format off
+      // As we are using packed accessors here the tensors
+      // do not need to be made contiguous.
+      grad_features.packed_accessor64<float, 2, at::RestrictPtrTraits>(),
+      grad_alphas.packed_accessor64<float, 4, at::RestrictPtrTraits>(),
+      grad_outputs.packed_accessor64<float, 4, at::RestrictPtrTraits>(),
+      features.packed_accessor64<float, 2, at::RestrictPtrTraits>(),
+      alphas.packed_accessor64<float, 4, at::RestrictPtrTraits>(),
+      points_idx.packed_accessor64<int64_t, 4, at::RestrictPtrTraits>());
+  // clang-format on
+  AT_CUDA_CHECK(cudaGetLastError());
+  return std::make_tuple(grad_features, grad_alphas);
+}
diff --git a/pytorch3d/pytorch3d/csrc/compositing/weighted_sum.h b/pytorch3d/pytorch3d/csrc/compositing/weighted_sum.h
new file mode 100644
index 0000000000000000000000000000000000000000..0be6e4988419da7710b8f9c412caf0f8699cf35f
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/compositing/weighted_sum.h
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <torch/extension.h>
+#include "utils/pytorch3d_cutils.h"
+
+#include <vector>
+
+// Perform weighted sum compositing of points in a z-buffer.
+//
+// Inputs:
+//    features: FloatTensor of shape (C, P) which gives the features
+//            of each point where C is the size of the feature and
+//            P the number of points.
+//    alphas: FloatTensor of shape (N, points_per_pixel, H, W) where
+//            points_per_pixel is the number of points in the z-buffer
+//            sorted in z-order, and (H, W) is the image size.
+//    points_idx: IntTensor of shape (N, points_per_pixel, W, W) giving the
+//            indices of the nearest points at each pixel, sorted in z-order.
+// Returns:
+//    weighted_fs: FloatTensor of shape (N, C, H, W) giving the accumulated
+//            feature in each point. Concretely, it gives:
+//                 weighted_fs[b,c,i,j] = sum_k alphas[b,k,i,j] *
+//                   features[c,points_idx[b,k,i,j]]
+
+// CUDA declarations
+#ifdef WITH_CUDA
+torch::Tensor weightedSumCudaForward(
+    const torch::Tensor& features,
+    const torch::Tensor& alphas,
+    const torch::Tensor& points_idx);
+
+std::tuple<torch::Tensor, torch::Tensor> weightedSumCudaBackward(
+    const torch::Tensor& grad_outputs,
+    const torch::Tensor& features,
+    const torch::Tensor& alphas,
+    const torch::Tensor& points_idx);
+#endif
+
+// C++ declarations
+torch::Tensor weightedSumCpuForward(
+    const torch::Tensor& features,
+    const torch::Tensor& alphas,
+    const torch::Tensor& points_idx);
+
+std::tuple<torch::Tensor, torch::Tensor> weightedSumCpuBackward(
+    const torch::Tensor& grad_outputs,
+    const torch::Tensor& features,
+    const torch::Tensor& alphas,
+    const torch::Tensor& points_idx);
+
+torch::Tensor weightedSumForward(
+    torch::Tensor& features,
+    torch::Tensor& alphas,
+    torch::Tensor& points_idx) {
+  features = features.contiguous();
+  alphas = alphas.contiguous();
+  points_idx = points_idx.contiguous();
+
+  if (features.is_cuda()) {
+#ifdef WITH_CUDA
+    CHECK_CUDA(features);
+    CHECK_CUDA(alphas);
+    CHECK_CUDA(points_idx);
+    return weightedSumCudaForward(features, alphas, points_idx);
+#else
+    AT_ERROR("Not compiled with GPU support");
+#endif
+  } else {
+    return weightedSumCpuForward(features, alphas, points_idx);
+  }
+}
+
+std::tuple<torch::Tensor, torch::Tensor> weightedSumBackward(
+    torch::Tensor& grad_outputs,
+    torch::Tensor& features,
+    torch::Tensor& alphas,
+    torch::Tensor& points_idx) {
+  grad_outputs = grad_outputs.contiguous();
+  features = features.contiguous();
+  alphas = alphas.contiguous();
+  points_idx = points_idx.contiguous();
+
+  if (grad_outputs.is_cuda()) {
+#ifdef WITH_CUDA
+    CHECK_CUDA(grad_outputs);
+    CHECK_CUDA(features);
+    CHECK_CUDA(alphas);
+    CHECK_CUDA(points_idx);
+
+    return weightedSumCudaBackward(grad_outputs, features, alphas, points_idx);
+#else
+    AT_ERROR("Not compiled with GPU support");
+#endif
+  } else {
+    return weightedSumCpuBackward(grad_outputs, features, alphas, points_idx);
+  }
+}
diff --git a/pytorch3d/pytorch3d/csrc/compositing/weighted_sum_cpu.cpp b/pytorch3d/pytorch3d/csrc/compositing/weighted_sum_cpu.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..b7bddee3c9791647352b686d368dd2e6adccf27f
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/compositing/weighted_sum_cpu.cpp
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <torch/extension.h>
+
+#include <cmath>
+#include <vector>
+
+torch::Tensor weightedSumCpuForward(
+    const torch::Tensor& features,
+    const torch::Tensor& alphas,
+    const torch::Tensor& points_idx) {
+  const int64_t B = points_idx.size(0);
+  const int64_t K = points_idx.size(1);
+  const int64_t H = points_idx.size(2);
+  const int64_t W = points_idx.size(3);
+  const int64_t C = features.size(0);
+
+  torch::Tensor result = torch::zeros({B, C, H, W}, features.options());
+
+  auto features_a = features.accessor<float, 2>();
+  auto alphas_a = alphas.accessor<float, 4>();
+  auto points_idx_a = points_idx.accessor<int64_t, 4>();
+  auto result_a = result.accessor<float, 4>();
+
+  // Iterate over the batch
+  for (int b = 0; b < B; ++b) {
+    // Iterate over the features
+    for (int c = 0; c < C; ++c) {
+      // Iterate through the horizontal lines of the image from top to bottom
+      for (int j = 0; j < H; ++j) {
+        // Iterate over pixels in a horizontal line, left to right
+        for (int i = 0; i < W; ++i) {
+          // Iterate through the closest K points for this pixel
+          for (int k = 0; k < K; ++k) {
+            int64_t n_idx = points_idx_a[b][k][j][i];
+            // Sentinel value is -1 indicating no point overlaps the pixel
+            if (n_idx < 0) {
+              continue;
+            }
+
+            float alpha = alphas_a[b][k][j][i];
+            result_a[b][c][j][i] += alpha * features_a[c][n_idx];
+          }
+        }
+      }
+    }
+  }
+  return result;
+}
+
+std::tuple<torch::Tensor, torch::Tensor> weightedSumCpuBackward(
+    const torch::Tensor& grad_outputs,
+    const torch::Tensor& features,
+    const torch::Tensor& alphas,
+    const torch::Tensor& points_idx) {
+  const int64_t B = points_idx.size(0);
+  const int64_t K = points_idx.size(1);
+  const int64_t H = points_idx.size(2);
+  const int64_t W = points_idx.size(3);
+  const int64_t C = features.size(0);
+
+  torch::Tensor grad_features = torch::zeros_like(features);
+  torch::Tensor grad_alphas = torch::zeros_like(alphas);
+
+  auto grad_outputs_a = grad_outputs.accessor<float, 4>();
+  auto features_a = features.accessor<float, 2>();
+  auto alphas_a = alphas.accessor<float, 4>();
+  auto points_idx_a = points_idx.accessor<int64_t, 4>();
+  auto grad_features_a = grad_features.accessor<float, 2>();
+  auto grad_alphas_a = grad_alphas.accessor<float, 4>();
+
+  // Iterate over the batch
+  for (int b = 0; b < B; ++b) {
+    // Iterate oer the features
+    for (int c = 0; c < C; ++c) {
+      // Iterate through the horizontal lines of the image from top to bottom
+      for (int j = 0; j < H; ++j) {
+        // Iterate over pixels in a horizontal line, left to right
+        for (int i = 0; i < W; ++i) {
+          // Iterate through the closest K points for this pixel
+          for (int k = 0; k < K; ++k) {
+            int64_t n_idx = points_idx_a[b][k][j][i];
+            // Sentinal value is -1, indicating no point overlaps this pixel
+            if (n_idx < 0) {
+              continue;
+            }
+
+            float alpha = alphas_a[b][k][j][i];
+            grad_alphas_a[b][k][j][i] +=
+                grad_outputs_a[b][c][j][i] * features_a[c][n_idx];
+            grad_features_a[c][n_idx] += grad_outputs_a[b][c][j][i] * alpha;
+          }
+        }
+      }
+    }
+  }
+  return std::make_tuple(grad_features, grad_alphas);
+}
diff --git a/pytorch3d/pytorch3d/csrc/ext.cpp b/pytorch3d/pytorch3d/csrc/ext.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..6a17dbb0ce77bc13d90003889c0abff853b09be6
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/ext.cpp
@@ -0,0 +1,186 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+// clang-format off
+#include "./pulsar/global.h" // Include before <torch/extension.h>.
+#include <torch/extension.h>
+// clang-format on
+#include "./pulsar/pytorch/renderer.h"
+#include "./pulsar/pytorch/tensor_util.h"
+#include "ball_query/ball_query.h"
+#include "blending/sigmoid_alpha_blend.h"
+#include "compositing/alpha_composite.h"
+#include "compositing/norm_weighted_sum.h"
+#include "compositing/weighted_sum.h"
+#include "face_areas_normals/face_areas_normals.h"
+#include "gather_scatter/gather_scatter.h"
+#include "interp_face_attrs/interp_face_attrs.h"
+#include "iou_box3d/iou_box3d.h"
+#include "knn/knn.h"
+#include "marching_cubes/marching_cubes.h"
+#include "mesh_normal_consistency/mesh_normal_consistency.h"
+#include "packed_to_padded_tensor/packed_to_padded_tensor.h"
+#include "point_mesh/point_mesh_cuda.h"
+#include "points_to_volumes/points_to_volumes.h"
+#include "rasterize_meshes/rasterize_meshes.h"
+#include "rasterize_points/rasterize_points.h"
+#include "sample_farthest_points/sample_farthest_points.h"
+#include "sample_pdf/sample_pdf.h"
+
+PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
+  m.def("face_areas_normals_forward", &FaceAreasNormalsForward);
+  m.def("face_areas_normals_backward", &FaceAreasNormalsBackward);
+  m.def("packed_to_padded", &PackedToPadded);
+  m.def("padded_to_packed", &PaddedToPacked);
+  m.def("interp_face_attrs_forward", &InterpFaceAttrsForward);
+  m.def("interp_face_attrs_backward", &InterpFaceAttrsBackward);
+#ifdef WITH_CUDA
+  m.def("knn_check_version", &KnnCheckVersion);
+#endif
+  m.def("knn_points_idx", &KNearestNeighborIdx);
+  m.def("knn_points_backward", &KNearestNeighborBackward);
+  m.def("ball_query", &BallQuery);
+  m.def("sample_farthest_points", &FarthestPointSampling);
+  m.def(
+      "mesh_normal_consistency_find_verts", &MeshNormalConsistencyFindVertices);
+  m.def("gather_scatter", &GatherScatter);
+  m.def("points_to_volumes_forward", PointsToVolumesForward);
+  m.def("points_to_volumes_backward", PointsToVolumesBackward);
+  m.def("rasterize_points", &RasterizePoints);
+  m.def("rasterize_points_backward", &RasterizePointsBackward);
+  m.def("rasterize_meshes_backward", &RasterizeMeshesBackward);
+  m.def("rasterize_meshes", &RasterizeMeshes);
+  m.def("sigmoid_alpha_blend", &SigmoidAlphaBlend);
+  m.def("sigmoid_alpha_blend_backward", &SigmoidAlphaBlendBackward);
+
+  // Accumulation functions
+  m.def("accum_weightedsumnorm", &weightedSumNormForward);
+  m.def("accum_weightedsum", &weightedSumForward);
+  m.def("accum_alphacomposite", &alphaCompositeForward);
+  m.def("accum_weightedsumnorm_backward", &weightedSumNormBackward);
+  m.def("accum_weightedsum_backward", &weightedSumBackward);
+  m.def("accum_alphacomposite_backward", &alphaCompositeBackward);
+
+  // These are only visible for testing; users should not call them directly
+  m.def("_rasterize_points_coarse", &RasterizePointsCoarse);
+  m.def("_rasterize_points_naive", &RasterizePointsNaive);
+  m.def("_rasterize_meshes_naive", &RasterizeMeshesNaive);
+  m.def("_rasterize_meshes_coarse", &RasterizeMeshesCoarse);
+  m.def("_rasterize_meshes_fine", &RasterizeMeshesFine);
+
+  // PointEdge distance functions
+  m.def("point_edge_dist_forward", &PointEdgeDistanceForward);
+  m.def("point_edge_dist_backward", &PointEdgeDistanceBackward);
+  m.def("edge_point_dist_forward", &EdgePointDistanceForward);
+  m.def("edge_point_dist_backward", &EdgePointDistanceBackward);
+  m.def("point_edge_array_dist_forward", &PointEdgeArrayDistanceForward);
+  m.def("point_edge_array_dist_backward", &PointEdgeArrayDistanceBackward);
+
+  // PointFace distance functions
+  m.def("point_face_dist_forward", &PointFaceDistanceForward);
+  m.def("point_face_dist_backward", &PointFaceDistanceBackward);
+  m.def("face_point_dist_forward", &FacePointDistanceForward);
+  m.def("face_point_dist_backward", &FacePointDistanceBackward);
+  m.def("point_face_array_dist_forward", &PointFaceArrayDistanceForward);
+  m.def("point_face_array_dist_backward", &PointFaceArrayDistanceBackward);
+
+  // Sample PDF
+  m.def("sample_pdf", &SamplePdf);
+
+  // 3D IoU
+  m.def("iou_box3d", &IoUBox3D);
+
+  // Marching cubes
+  m.def("marching_cubes", &MarchingCubes);
+
+  // Pulsar.
+#ifdef PULSAR_LOGGING_ENABLED
+  c10::ShowLogInfoToStderr();
+#endif
+  py::class_<
+      pulsar::pytorch::Renderer,
+      std::shared_ptr<pulsar::pytorch::Renderer>>(m, "PulsarRenderer")
+      .def(py::init<
+           const uint&,
+           const uint&,
+           const uint&,
+           const bool&,
+           const bool&,
+           const float&,
+           const uint&,
+           const uint&>())
+      .def(
+          "__eq__",
+          [](const pulsar::pytorch::Renderer& a,
+             const pulsar::pytorch::Renderer& b) { return a == b; },
+          py::is_operator())
+      .def(
+          "__ne__",
+          [](const pulsar::pytorch::Renderer& a,
+             const pulsar::pytorch::Renderer& b) { return !(a == b); },
+          py::is_operator())
+      .def(
+          "__repr__",
+          [](const pulsar::pytorch::Renderer& self) {
+            std::stringstream ss;
+            ss << self;
+            return ss.str();
+          })
+      .def(
+          "forward",
+          &pulsar::pytorch::Renderer::forward,
+          py::arg("vert_pos"),
+          py::arg("vert_col"),
+          py::arg("vert_radii"),
+
+          py::arg("cam_pos"),
+          py::arg("pixel_0_0_center"),
+          py::arg("pixel_vec_x"),
+          py::arg("pixel_vec_y"),
+          py::arg("focal_length"),
+          py::arg("principal_point_offsets"),
+
+          py::arg("gamma"),
+          py::arg("max_depth"),
+          py::arg("min_depth") /* = 0.f*/,
+          py::arg(
+              "bg_col") /* = at::nullopt not exposed properly in pytorch 1.1. */
+          ,
+          py::arg("opacity") /* = at::nullopt ... */,
+          py::arg("percent_allowed_difference") = 0.01f,
+          py::arg("max_n_hits") = MAX_UINT,
+          py::arg("mode") = 0)
+      .def("backward", &pulsar::pytorch::Renderer::backward)
+      .def_property(
+          "device_tracker",
+          [](const pulsar::pytorch::Renderer& self) {
+            return self.device_tracker;
+          },
+          [](pulsar::pytorch::Renderer& self, const torch::Tensor& val) {
+            self.device_tracker = val;
+          })
+      .def_property_readonly("width", &pulsar::pytorch::Renderer::width)
+      .def_property_readonly("height", &pulsar::pytorch::Renderer::height)
+      .def_property_readonly(
+          "max_num_balls", &pulsar::pytorch::Renderer::max_num_balls)
+      .def_property_readonly(
+          "orthogonal", &pulsar::pytorch::Renderer::orthogonal)
+      .def_property_readonly(
+          "right_handed", &pulsar::pytorch::Renderer::right_handed)
+      .def_property_readonly("n_track", &pulsar::pytorch::Renderer::n_track);
+  m.def(
+      "pulsar_sphere_ids_from_result_info_nograd",
+      &pulsar::pytorch::sphere_ids_from_result_info_nograd);
+  // Constants.
+  m.attr("EPS") = py::float_(EPS);
+  m.attr("MAX_FLOAT") = py::float_(MAX_FLOAT);
+  m.attr("MAX_INT") = py::int_(MAX_INT);
+  m.attr("MAX_UINT") = py::int_(MAX_UINT);
+  m.attr("MAX_USHORT") = py::int_(MAX_USHORT);
+  m.attr("PULSAR_MAX_GRAD_SPHERES") = py::int_(MAX_GRAD_SPHERES);
+}
diff --git a/pytorch3d/pytorch3d/csrc/face_areas_normals/face_areas_normals.cu b/pytorch3d/pytorch3d/csrc/face_areas_normals/face_areas_normals.cu
new file mode 100644
index 0000000000000000000000000000000000000000..58aeb20fcfd2e5d51ab93054f176f9a2e4962ca4
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/face_areas_normals/face_areas_normals.cu
@@ -0,0 +1,301 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <ATen/ATen.h>
+#include <ATen/cuda/CUDAContext.h>
+#include <c10/cuda/CUDAGuard.h>
+#include <tuple>
+
+template <typename scalar_t>
+__global__ void FaceAreasNormalsForwardKernel(
+    const scalar_t* __restrict__ verts,
+    const int64_t* __restrict__ faces,
+    scalar_t* __restrict__ face_areas,
+    scalar_t* __restrict__ face_normals,
+    const size_t V,
+    const size_t F) {
+  const size_t tid = blockIdx.x * blockDim.x + threadIdx.x;
+  const size_t stride = gridDim.x * blockDim.x;
+
+  // Faces split evenly over the number of threads in the grid.
+  // Each thread computes the area & normal of its respective faces and adds it
+  // to the global face_areas tensor.
+  for (size_t f = tid; f < F; f += stride) {
+    const int64_t i0 = faces[3 * f + 0];
+    const int64_t i1 = faces[3 * f + 1];
+    const int64_t i2 = faces[3 * f + 2];
+
+    const scalar_t v0_x = verts[3 * i0 + 0];
+    const scalar_t v0_y = verts[3 * i0 + 1];
+    const scalar_t v0_z = verts[3 * i0 + 2];
+
+    const scalar_t v1_x = verts[3 * i1 + 0];
+    const scalar_t v1_y = verts[3 * i1 + 1];
+    const scalar_t v1_z = verts[3 * i1 + 2];
+
+    const scalar_t v2_x = verts[3 * i2 + 0];
+    const scalar_t v2_y = verts[3 * i2 + 1];
+    const scalar_t v2_z = verts[3 * i2 + 2];
+
+    const scalar_t ax = v1_x - v0_x;
+    const scalar_t ay = v1_y - v0_y;
+    const scalar_t az = v1_z - v0_z;
+
+    const scalar_t bx = v2_x - v0_x;
+    const scalar_t by = v2_y - v0_y;
+    const scalar_t bz = v2_z - v0_z;
+
+    const scalar_t cx = ay * bz - az * by;
+    const scalar_t cy = az * bx - ax * bz;
+    const scalar_t cz = ax * by - ay * bx;
+
+    scalar_t norm = sqrt(cx * cx + cy * cy + cz * cz);
+    face_areas[f] = norm / 2.0;
+    norm = (norm < 1e-6) ? 1e-6 : norm; // max(norm, 1e-6)
+    face_normals[3 * f + 0] = cx / norm;
+    face_normals[3 * f + 1] = cy / norm;
+    face_normals[3 * f + 2] = cz / norm;
+  }
+}
+
+// TODO(gkioxari) support all data types once AtomicAdd supports doubles.
+// Currently, support is for floats only.
+__global__ void FaceAreasNormalsBackwardKernel(
+    const float* __restrict__ grad_areas,
+    const float* __restrict__ grad_normals,
+    const float* __restrict__ verts,
+    const int64_t* __restrict__ faces,
+    float* __restrict__ grad_verts,
+    const size_t V,
+    const size_t F) {
+  const size_t tid = blockIdx.x * blockDim.x + threadIdx.x;
+  const size_t stride = gridDim.x * blockDim.x;
+
+  // Faces split evenly over the number of threads in the grid.
+  // Each thread computes the area & normal of its respective faces and adds it
+  // to the global face_areas tensor.
+  for (size_t f = tid; f < F; f += stride) {
+    const int64_t i0 = faces[3 * f + 0];
+    const int64_t i1 = faces[3 * f + 1];
+    const int64_t i2 = faces[3 * f + 2];
+
+    const float v0_x = verts[3 * i0 + 0];
+    const float v0_y = verts[3 * i0 + 1];
+    const float v0_z = verts[3 * i0 + 2];
+
+    const float v1_x = verts[3 * i1 + 0];
+    const float v1_y = verts[3 * i1 + 1];
+    const float v1_z = verts[3 * i1 + 2];
+
+    const float v2_x = verts[3 * i2 + 0];
+    const float v2_y = verts[3 * i2 + 1];
+    const float v2_z = verts[3 * i2 + 2];
+
+    const float ax = v1_x - v0_x;
+    const float ay = v1_y - v0_y;
+    const float az = v1_z - v0_z;
+
+    const float bx = v2_x - v0_x;
+    const float by = v2_y - v0_y;
+    const float bz = v2_z - v0_z;
+
+    const float cx = ay * bz - az * by;
+    const float cy = az * bx - ax * bz;
+    const float cz = ax * by - ay * bx;
+
+    float norm = sqrt(cx * cx + cy * cy + cz * cz);
+    norm = (norm < 1e-6) ? 1e-6 : norm; // max(norm, 1e-6)
+    float inv_norm = 1. / norm;
+    float inv_norm_2 = pow(inv_norm, 2.0f);
+    float inv_norm_3 = pow(inv_norm, 3.0f);
+
+    // We compute gradients with respect to the input vertices.
+    // For each vertex, gradients come from grad_areas and grad_normals.
+    // eg, grad_v0_x = (d / d v0_x)
+    //       = \sum_f (d / d areas[f]) * (d areas[f] / d v0_x)
+    //              + (d / d normals[f, 0]) * (d normals[f, 0] / d v0_x)
+    //              + (d / d normals[f, 1]) * (d normals[f, 1] / d v0_x)
+    //              + (d / d normals[f, 2]) * (d normals[f, 2] / d v0_x)
+    // with (d / d areas[f]) = grad_areas[f] and
+    //      (d / d normals[f, j]) = grad_normals[f][j].
+    // The equations below are derived after taking
+    // derivatives wrt to the vertices (fun times!).
+
+    // grad v0 coming from grad areas and grad normals
+    const float grad_v0_x =
+        ((-az + bz) * cy + (-by + ay) * cz) / 2.0 * inv_norm * grad_areas[f] +
+        -cx * ((-az + bz) * cy + (-by + ay) * cz) * inv_norm_3 *
+            grad_normals[3 * f + 0] +
+        ((-az + bz) - cy * ((-az + bz) * cy + (-by + ay) * cz) * inv_norm_2) *
+            inv_norm * grad_normals[3 * f + 1] +
+        ((-by + ay) - cz * ((-az + bz) * cy + (-by + ay) * cz) * inv_norm_2) *
+            inv_norm * grad_normals[3 * f + 2];
+    atomicAdd(grad_verts + 3 * i0 + 0, grad_v0_x);
+
+    const float grad_v0_y =
+        ((-bz + az) * cx + (-ax + bx) * cz) / 2.0 * inv_norm * grad_areas[f] +
+        ((-bz + az) - cx * ((-bz + az) * cx + (-ax + bx) * cz) * inv_norm_2) *
+            inv_norm * grad_normals[3 * f + 0] +
+        -cy * ((-bz + az) * cx + (-ax + bx) * cz) * inv_norm_3 *
+            grad_normals[3 * f + 1] +
+        ((-ax + bx) - cz * ((-bz + az) * cx + (-ax + bx) * cz) * inv_norm_2) *
+            inv_norm * grad_normals[3 * f + 2];
+    atomicAdd(grad_verts + 3 * i0 + 1, grad_v0_y);
+
+    const float grad_v0_z =
+        ((-ay + by) * cx + (-bx + ax) * cy) / 2.0 * inv_norm * grad_areas[f] +
+        ((-ay + by) - cx * ((-ay + by) * cx + (-bx + ax) * cy) * inv_norm_2) *
+            inv_norm * grad_normals[3 * f + 0] +
+        ((-bx + ax) - cy * ((-ay + by) * cx + (-bx + ax) * cy) * inv_norm_2) *
+            inv_norm * grad_normals[3 * f + 1] +
+        -cz * ((-ay + by) * cx + (-bx + ax) * cy) * inv_norm_3 *
+            grad_normals[3 * f + 2];
+    atomicAdd(grad_verts + 3 * i0 + 2, grad_v0_z);
+
+    // grad v1 coming from grad areas and grad normals
+    const float grad_v1_x =
+        (by * cz - bz * cy) / 2.0 * inv_norm * grad_areas[f] +
+        -cx * (by * cz - bz * cy) * inv_norm_3 * grad_normals[3 * f + 0] +
+        (-bz - cy * (by * cz - bz * cy) * inv_norm_2) * inv_norm *
+            grad_normals[3 * f + 1] +
+        (by - cz * (by * cz - bz * cy) * inv_norm_2) * inv_norm *
+            grad_normals[3 * f + 2];
+    atomicAdd(grad_verts + 3 * i1 + 0, grad_v1_x);
+
+    const float grad_v1_y =
+        (bz * cx - bx * cz) / 2.0 * inv_norm * grad_areas[f] +
+        (bz - cx * (bz * cx - bx * cz) * inv_norm_2) * inv_norm *
+            grad_normals[3 * f + 0] +
+        -cy * (bz * cx - bx * cz) * inv_norm_3 * grad_normals[3 * f + 1] +
+        (-bx - cz * (bz * cx - bx * cz) * inv_norm_2) * inv_norm *
+            grad_normals[3 * f + 2];
+    atomicAdd(grad_verts + 3 * i1 + 1, grad_v1_y);
+
+    const float grad_v1_z =
+        (bx * cy - by * cx) / 2.0 * inv_norm * grad_areas[f] +
+        (-by - cx * (bx * cy - by * cx) * inv_norm_2) * inv_norm *
+            grad_normals[3 * f + 0] +
+        (bx - cx * (bx * cy - by * cx) * inv_norm_2) * inv_norm *
+            grad_normals[3 * f + 1] +
+        -cz * (bx * cy - by * cx) * inv_norm_3 * grad_normals[3 * f + 2];
+    atomicAdd(grad_verts + 3 * i1 + 2, grad_v1_z);
+
+    // grad v2 coming from grad areas
+    const float grad_v2_x =
+        (az * cy - ay * cz) / 2.0 * inv_norm * grad_areas[f] +
+        -cx * (az * cy - ay * cz) * inv_norm_3 * grad_normals[3 * f + 0] +
+        (az - cy * (az * cy - ay * cz) * inv_norm_2) * inv_norm *
+            grad_normals[3 * f + 1] +
+        (-ay - cz * (az * cy - ay * cz) * inv_norm_2) * inv_norm *
+            grad_normals[3 * f + 2];
+    atomicAdd(grad_verts + 3 * i2 + 0, grad_v2_x);
+
+    const float grad_v2_y =
+        (ax * cz - az * cx) / 2.0 * inv_norm * grad_areas[f] +
+        (-az - cx * (ax * cz - az * cx) * inv_norm_2) * inv_norm *
+            grad_normals[3 * f + 0] +
+        -cy * (ax * cz - az * cx) * inv_norm_3 * grad_normals[3 * f + 1] +
+        (ax - cz * (ax * cz - az * cx) * inv_norm_2) * inv_norm *
+            grad_normals[3 * f + 2];
+    atomicAdd(grad_verts + 3 * i2 + 1, grad_v2_y);
+
+    const float grad_v2_z =
+        (ay * cx - ax * cy) / 2.0 * inv_norm * grad_areas[f] +
+        (ay - cx * (ay * cx - ax * cy) * inv_norm_2) * inv_norm *
+            grad_normals[3 * f + 0] +
+        (-ax - cy * (ay * cx - ax * cy) * inv_norm_2) * inv_norm *
+            grad_normals[3 * f + 1] +
+        -cz * (ay * cx - ax * cy) * inv_norm_3 * grad_normals[3 * f + 2];
+    atomicAdd(grad_verts + 3 * i2 + 2, grad_v2_z);
+  }
+}
+
+std::tuple<at::Tensor, at::Tensor> FaceAreasNormalsForwardCuda(
+    const at::Tensor verts,
+    const at::Tensor faces) {
+  const auto V = verts.size(0);
+  const auto F = faces.size(0);
+
+  // Check inputs are on the same device
+  at::TensorArg verts_t{verts, "verts", 1}, faces_t{faces, "faces", 2};
+  at::CheckedFrom c = "FaceAreasNormalsForwardCuda";
+  at::checkAllSameGPU(c, {verts_t, faces_t});
+
+  // Set the device for the kernel launch based on the device of verts
+  at::cuda::CUDAGuard device_guard(verts.device());
+  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+
+  at::Tensor areas = at::empty({F}, verts.options());
+  at::Tensor normals = at::empty({F, 3}, verts.options());
+
+  if (areas.numel() == 0) {
+    AT_CUDA_CHECK(cudaGetLastError());
+    return std::make_tuple(areas, normals);
+  }
+
+  const int blocks = 64;
+  const int threads = 512;
+
+  AT_DISPATCH_FLOATING_TYPES(
+      verts.scalar_type(), "face_areas_normals_forward_cuda", ([&] {
+        FaceAreasNormalsForwardKernel<scalar_t><<<blocks, threads, 0, stream>>>(
+            verts.contiguous().data_ptr<scalar_t>(),
+            faces.contiguous().data_ptr<int64_t>(),
+            areas.data_ptr<scalar_t>(),
+            normals.data_ptr<scalar_t>(),
+            V,
+            F);
+      }));
+  AT_CUDA_CHECK(cudaGetLastError());
+  return std::make_tuple(areas, normals);
+}
+
+at::Tensor FaceAreasNormalsBackwardCuda(
+    const at::Tensor grad_areas,
+    const at::Tensor grad_normals,
+    const at::Tensor verts,
+    const at::Tensor faces) {
+  // Check inputs are on the same device
+  at::TensorArg verts_t{verts, "verts", 1}, faces_t{faces, "faces", 2},
+      grad_areas_t{grad_areas, "grad_areas", 3},
+      grad_normals_t{grad_normals, "grad_normals", 4};
+  at::CheckedFrom c = "FaceAreasNormalsBackwardCuda";
+  at::checkAllSameGPU(c, {verts_t, faces_t, grad_areas_t, grad_normals_t});
+  // This is nondeterministic because atomicAdd
+  at::globalContext().alertNotDeterministic("FaceAreasNormalsBackwardCuda");
+
+  // Set the device for the kernel launch based on the device of verts
+  at::cuda::CUDAGuard device_guard(verts.device());
+  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+
+  const auto V = verts.size(0);
+  const auto F = faces.size(0);
+
+  at::Tensor grad_verts = at::zeros({V, 3}, grad_areas.options());
+
+  if (grad_verts.numel() == 0) {
+    AT_CUDA_CHECK(cudaGetLastError());
+    return grad_verts;
+  }
+
+  const int blocks = 64;
+  const int threads = 512;
+  // TODO(gkioxari) add AT_DISPATCH_FLOATING_TYPES once atomicAdd supports
+  // doubles. Currently, support is for floats only.
+  FaceAreasNormalsBackwardKernel<<<blocks, threads, 0, stream>>>(
+      grad_areas.contiguous().data_ptr<float>(),
+      grad_normals.contiguous().data_ptr<float>(),
+      verts.contiguous().data_ptr<float>(),
+      faces.contiguous().data_ptr<int64_t>(),
+      grad_verts.data_ptr<float>(),
+      V,
+      F);
+
+  AT_CUDA_CHECK(cudaGetLastError());
+  return grad_verts;
+}
diff --git a/pytorch3d/pytorch3d/csrc/face_areas_normals/face_areas_normals.h b/pytorch3d/pytorch3d/csrc/face_areas_normals/face_areas_normals.h
new file mode 100644
index 0000000000000000000000000000000000000000..6df37c12e4c81cc9c03375bad3751baafeb473aa
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/face_areas_normals/face_areas_normals.h
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+#include <torch/extension.h>
+#include <tuple>
+#include "utils/pytorch3d_cutils.h"
+
+// Compute areas of mesh faces using packed representation.
+//
+// Inputs:
+//    verts: FloatTensor of shape (V, 3) giving vertex positions.
+//    faces: LongTensor of shape (F, 3) giving faces.
+//
+// Returns:
+//    areas: FloatTensor of shape (F,) where areas[f] is the area of faces[f].
+//    normals: FloatTensor of shape (F, 3) where normals[f] is the normal of
+//    faces[f]
+//
+
+// Cpu implementation.
+std::tuple<at::Tensor, at::Tensor> FaceAreasNormalsForwardCpu(
+    const at::Tensor verts,
+    const at::Tensor faces);
+// Cpu implementation
+at::Tensor FaceAreasNormalsBackwardCpu(
+    const at::Tensor grad_areas,
+    const at::Tensor grad_normals,
+    const at::Tensor verts,
+    const at::Tensor faces);
+
+#ifdef WITH_CUDA
+// Cuda implementation.
+std::tuple<at::Tensor, at::Tensor> FaceAreasNormalsForwardCuda(
+    const at::Tensor verts,
+    const at::Tensor faces);
+// Cuda implementation.
+at::Tensor FaceAreasNormalsBackwardCuda(
+    const at::Tensor grad_areas,
+    const at::Tensor grad_normals,
+    const at::Tensor verts,
+    const at::Tensor faces);
+#endif
+
+// Implementation which is exposed.
+std::tuple<at::Tensor, at::Tensor> FaceAreasNormalsForward(
+    const at::Tensor verts,
+    const at::Tensor faces) {
+  if (verts.is_cuda() && faces.is_cuda()) {
+#ifdef WITH_CUDA
+    CHECK_CUDA(verts);
+    CHECK_CUDA(faces);
+    return FaceAreasNormalsForwardCuda(verts, faces);
+#else
+    AT_ERROR("Not compiled with GPU support.");
+#endif
+  }
+  return FaceAreasNormalsForwardCpu(verts, faces);
+}
+
+// Implementation which is exposed.
+at::Tensor FaceAreasNormalsBackward(
+    const at::Tensor grad_areas,
+    const at::Tensor grad_normals,
+    const at::Tensor verts,
+    const at::Tensor faces) {
+  if (verts.is_cuda() && faces.is_cuda()) {
+#ifdef WITH_CUDA
+    CHECK_CUDA(verts);
+    CHECK_CUDA(faces);
+    CHECK_CUDA(grad_areas);
+    CHECK_CUDA(grad_normals);
+    return FaceAreasNormalsBackwardCuda(grad_areas, grad_normals, verts, faces);
+#else
+    AT_ERROR("Not compiled with GPU support.");
+#endif
+  }
+  return FaceAreasNormalsBackwardCpu(grad_areas, grad_normals, verts, faces);
+}
diff --git a/pytorch3d/pytorch3d/csrc/face_areas_normals/face_areas_normals_cpu.cpp b/pytorch3d/pytorch3d/csrc/face_areas_normals/face_areas_normals_cpu.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..1871ac7d4044467d1322ba32e300d513c1d5118e
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/face_areas_normals/face_areas_normals_cpu.cpp
@@ -0,0 +1,215 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <torch/extension.h>
+#include <tuple>
+
+std::tuple<at::Tensor, at::Tensor> FaceAreasNormalsForwardCpu(
+    const at::Tensor verts,
+    const at::Tensor faces) {
+  const int F = faces.size(0);
+
+  at::Tensor areas = at::empty({F}, verts.options());
+  at::Tensor normals = at::empty({F, 3}, verts.options());
+
+  auto verts_a = verts.accessor<float, 2>();
+  auto faces_a = faces.accessor<int64_t, 2>();
+  auto areas_a = areas.accessor<float, 1>();
+  auto normals_a = normals.accessor<float, 2>();
+
+  for (int f = 0; f < F; ++f) {
+    const int64_t i0 = faces_a[f][0];
+    const int64_t i1 = faces_a[f][1];
+    const int64_t i2 = faces_a[f][2];
+
+    const float v0_x = verts_a[i0][0];
+    const float v0_y = verts_a[i0][1];
+    const float v0_z = verts_a[i0][2];
+
+    const float v1_x = verts_a[i1][0];
+    const float v1_y = verts_a[i1][1];
+    const float v1_z = verts_a[i1][2];
+
+    const float v2_x = verts_a[i2][0];
+    const float v2_y = verts_a[i2][1];
+    const float v2_z = verts_a[i2][2];
+
+    const float ax = v1_x - v0_x;
+    const float ay = v1_y - v0_y;
+    const float az = v1_z - v0_z;
+
+    const float bx = v2_x - v0_x;
+    const float by = v2_y - v0_y;
+    const float bz = v2_z - v0_z;
+
+    const float cx = ay * bz - az * by;
+    const float cy = az * bx - ax * bz;
+    const float cz = ax * by - ay * bx;
+
+    float norm = sqrt(cx * cx + cy * cy + cz * cz);
+    areas_a[f] = norm / 2.0;
+    norm = (norm < 1e-6) ? 1e-6 : norm; // max(norm, 1e-6)
+    normals_a[f][0] = cx / norm;
+    normals_a[f][1] = cy / norm;
+    normals_a[f][2] = cz / norm;
+  }
+  return std::make_tuple(areas, normals);
+}
+
+at::Tensor FaceAreasNormalsBackwardCpu(
+    const at::Tensor grad_areas,
+    const at::Tensor grad_normals,
+    const at::Tensor verts,
+    const at::Tensor faces) {
+  const int V = verts.size(0);
+  const int F = faces.size(0);
+
+  at::Tensor grad_verts = at::zeros({V, 3}, grad_areas.options());
+
+  auto grad_areas_a = grad_areas.accessor<float, 1>();
+  auto grad_normals_a = grad_normals.accessor<float, 2>();
+  auto verts_a = verts.accessor<float, 2>();
+  auto faces_a = faces.accessor<int64_t, 2>();
+  auto grad_verts_a = grad_verts.accessor<float, 2>();
+
+  for (int f = 0; f < F; ++f) {
+    const int64_t i0 = faces_a[f][0];
+    const int64_t i1 = faces_a[f][1];
+    const int64_t i2 = faces_a[f][2];
+
+    const float v0_x = verts_a[i0][0];
+    const float v0_y = verts_a[i0][1];
+    const float v0_z = verts_a[i0][2];
+
+    const float v1_x = verts_a[i1][0];
+    const float v1_y = verts_a[i1][1];
+    const float v1_z = verts_a[i1][2];
+
+    const float v2_x = verts_a[i2][0];
+    const float v2_y = verts_a[i2][1];
+    const float v2_z = verts_a[i2][2];
+
+    const float ax = v1_x - v0_x;
+    const float ay = v1_y - v0_y;
+    const float az = v1_z - v0_z;
+
+    const float bx = v2_x - v0_x;
+    const float by = v2_y - v0_y;
+    const float bz = v2_z - v0_z;
+
+    const float cx = ay * bz - az * by;
+    const float cy = az * bx - ax * bz;
+    const float cz = ax * by - ay * bx;
+
+    float norm = sqrt(cx * cx + cy * cy + cz * cz);
+    norm = (norm < 1e-6) ? 1e-6 : norm; // max(norm, 1e-6)
+    float inv_norm = 1. / norm;
+    float inv_norm_2 = pow(inv_norm, 2.0f);
+    float inv_norm_3 = pow(inv_norm, 3.0f);
+
+    // We compute gradients with respect to the input vertices.
+    // For each vertex, gradients come from grad_areas and grad_normals.
+    // eg, grad_v0_x = (d / d v0_x)
+    //       = \sum_f (d / d areas[f]) * (d areas[f] / d v0_x)
+    //              + (d / d normals[f, 0]) * (d normals[f, 0] / d v0_x)
+    //              + (d / d normals[f, 1]) * (d normals[f, 1] / d v0_x)
+    //              + (d / d normals[f, 2]) * (d normals[f, 2] / d v0_x)
+    // with (d / d areas[f]) = grad_areas[f] and
+    //      (d / d normals[f, j]) = grad_normals[f][j].
+    // The equations below are derived after taking
+    // derivatives wrt to the vertices (fun times!).
+
+    // grad v0 coming from grad areas and grad normals
+    const float grad_v0_x =
+        ((-az + bz) * cy + (-by + ay) * cz) / 2.0 * inv_norm * grad_areas_a[f] +
+        -cx * ((-az + bz) * cy + (-by + ay) * cz) * inv_norm_3 *
+            grad_normals_a[f][0] +
+        ((-az + bz) - cy * ((-az + bz) * cy + (-by + ay) * cz) * inv_norm_2) *
+            inv_norm * grad_normals_a[f][1] +
+        ((-by + ay) - cz * ((-az + bz) * cy + (-by + ay) * cz) * inv_norm_2) *
+            inv_norm * grad_normals_a[f][2];
+    grad_verts_a[i0][0] += grad_v0_x;
+
+    const float grad_v0_y =
+        ((-bz + az) * cx + (-ax + bx) * cz) / 2.0 * inv_norm * grad_areas_a[f] +
+        ((-bz + az) - cx * ((-bz + az) * cx + (-ax + bx) * cz) * inv_norm_2) *
+            inv_norm * grad_normals_a[f][0] +
+        -cy * ((-bz + az) * cx + (-ax + bx) * cz) * inv_norm_3 *
+            grad_normals_a[f][1] +
+        ((-ax + bx) - cz * ((-bz + az) * cx + (-ax + bx) * cz) * inv_norm_2) *
+            inv_norm * grad_normals_a[f][2];
+    grad_verts[i0][1] += grad_v0_y;
+
+    const float grad_v0_z =
+        ((-ay + by) * cx + (-bx + ax) * cy) / 2.0 * inv_norm * grad_areas_a[f] +
+        ((-ay + by) - cx * ((-ay + by) * cx + (-bx + ax) * cy) * inv_norm_2) *
+            inv_norm * grad_normals_a[f][0] +
+        ((-bx + ax) - cy * ((-ay + by) * cx + (-bx + ax) * cy) * inv_norm_2) *
+            inv_norm * grad_normals_a[f][1] +
+        -cz * ((-ay + by) * cx + (-bx + ax) * cy) * inv_norm_3 *
+            grad_normals_a[f][2];
+    grad_verts[i0][2] += grad_v0_z;
+
+    // grad v1 coming from grad areas and grad normals
+    const float grad_v1_x =
+        (by * cz - bz * cy) / 2.0 * inv_norm * grad_areas_a[f] +
+        -cx * (by * cz - bz * cy) * inv_norm_3 * grad_normals_a[f][0] +
+        (-bz - cy * (by * cz - bz * cy) * inv_norm_2) * inv_norm *
+            grad_normals_a[f][1] +
+        (by - cz * (by * cz - bz * cy) * inv_norm_2) * inv_norm *
+            grad_normals_a[f][2];
+    grad_verts[i1][0] += grad_v1_x;
+
+    const float grad_v1_y =
+        (bz * cx - bx * cz) / 2.0 * inv_norm * grad_areas_a[f] +
+        (bz - cx * (bz * cx - bx * cz) * inv_norm_2) * inv_norm *
+            grad_normals_a[f][0] +
+        -cy * (bz * cx - bx * cz) * inv_norm_3 * grad_normals_a[f][1] +
+        (-bx - cz * (bz * cx - bx * cz) * inv_norm_2) * inv_norm *
+            grad_normals_a[f][2];
+    grad_verts[i1][1] += grad_v1_y;
+
+    const float grad_v1_z =
+        (bx * cy - by * cx) / 2.0 * inv_norm * grad_areas_a[f] +
+        (-by - cx * (bx * cy - by * cx) * inv_norm_2) * inv_norm *
+            grad_normals_a[f][0] +
+        (bx - cx * (bx * cy - by * cx) * inv_norm_2) * inv_norm *
+            grad_normals_a[f][1] +
+        -cz * (bx * cy - by * cx) * inv_norm_3 * grad_normals_a[f][2];
+    grad_verts[i1][2] += grad_v1_z;
+
+    // grad v2 coming from grad areas
+    const float grad_v2_x =
+        (az * cy - ay * cz) / 2.0 * inv_norm * grad_areas_a[f] +
+        -cx * (az * cy - ay * cz) * inv_norm_3 * grad_normals_a[f][0] +
+        (az - cy * (az * cy - ay * cz) * inv_norm_2) * inv_norm *
+            grad_normals_a[f][1] +
+        (-ay - cz * (az * cy - ay * cz) * inv_norm_2) * inv_norm *
+            grad_normals_a[f][2];
+    grad_verts[i2][0] += grad_v2_x;
+
+    const float grad_v2_y =
+        (ax * cz - az * cx) / 2.0 * inv_norm * grad_areas_a[f] +
+        (-az - cx * (ax * cz - az * cx) * inv_norm_2) * inv_norm *
+            grad_normals_a[f][0] +
+        -cy * (ax * cz - az * cx) * inv_norm_3 * grad_normals_a[f][1] +
+        (ax - cz * (ax * cz - az * cx) * inv_norm_2) * inv_norm *
+            grad_normals_a[f][2];
+    grad_verts[i2][1] += grad_v2_y;
+
+    const float grad_v2_z =
+        (ay * cx - ax * cy) / 2.0 * inv_norm * grad_areas_a[f] +
+        (ay - cx * (ay * cx - ax * cy) * inv_norm_2) * inv_norm *
+            grad_normals_a[f][0] +
+        (-ax - cy * (ay * cx - ax * cy) * inv_norm_2) * inv_norm *
+            grad_normals_a[f][1] +
+        -cz * (ay * cx - ax * cy) * inv_norm_3 * grad_normals_a[f][2];
+    grad_verts[i2][2] += grad_v2_z;
+  }
+  return grad_verts;
+}
diff --git a/pytorch3d/pytorch3d/csrc/gather_scatter/gather_scatter.cu b/pytorch3d/pytorch3d/csrc/gather_scatter/gather_scatter.cu
new file mode 100644
index 0000000000000000000000000000000000000000..1ec1a6f27a2476375f3d140c3c8fb440fb92c04f
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/gather_scatter/gather_scatter.cu
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <ATen/ATen.h>
+#include <ATen/cuda/CUDAContext.h>
+#include <c10/cuda/CUDAGuard.h>
+
+// TODO(T47953967) to make this cuda kernel support all datatypes.
+__global__ void GatherScatterCudaKernel(
+    const float* __restrict__ input,
+    const int64_t* __restrict__ edges,
+    float* __restrict__ output,
+    bool directed,
+    bool backward,
+    const size_t V,
+    const size_t D,
+    const size_t E) {
+  const int tid = threadIdx.x;
+
+  // Reverse the vertex order if backward.
+  const int v0_idx = backward ? 1 : 0;
+  const int v1_idx = backward ? 0 : 1;
+
+  // Edges are split evenly across the blocks.
+  for (int e = blockIdx.x; e < E; e += gridDim.x) {
+    // Get indices of vertices which form the edge.
+    const int64_t v0 = edges[2 * e + v0_idx];
+    const int64_t v1 = edges[2 * e + v1_idx];
+
+    // Split vertex features evenly across threads.
+    // This implementation will be quite wasteful when D<128 since there will be
+    // a lot of threads doing nothing.
+    for (int d = tid; d < D; d += blockDim.x) {
+      const float val = input[v1 * D + d];
+      float* address = output + v0 * D + d;
+      atomicAdd(address, val);
+      if (!directed) {
+        const float val = input[v0 * D + d];
+        float* address = output + v1 * D + d;
+        atomicAdd(address, val);
+      }
+    }
+    __syncthreads();
+  }
+}
+
+at::Tensor GatherScatterCuda(
+    const at::Tensor& input,
+    const at::Tensor& edges,
+    bool directed,
+    bool backward) {
+  // Check inputs are on the same device
+  at::TensorArg input_t{input, "input", 1}, edges_t{edges, "edges", 2};
+  at::CheckedFrom c = "GatherScatterCuda";
+  at::checkAllSameGPU(c, {input_t, edges_t});
+
+  // Set the device for the kernel launch based on the device of the input
+  at::cuda::CUDAGuard device_guard(input.device());
+  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+
+  const auto num_vertices = input.size(0);
+  const auto input_feature_dim = input.size(1);
+  const auto num_edges = edges.size(0);
+
+  auto output = at::zeros({num_vertices, input_feature_dim}, input.options());
+  const size_t threads = 128;
+  const size_t max_blocks = 1920;
+  const size_t blocks = num_edges < max_blocks ? num_edges : max_blocks;
+
+  if (output.numel() == 0) {
+    AT_CUDA_CHECK(cudaGetLastError());
+    return output;
+  }
+
+  GatherScatterCudaKernel<<<blocks, threads, 0, stream>>>(
+      input.contiguous().data_ptr<float>(),
+      edges.contiguous().data_ptr<int64_t>(),
+      output.data_ptr<float>(),
+      directed,
+      backward,
+      num_vertices,
+      input_feature_dim,
+      num_edges);
+  AT_CUDA_CHECK(cudaGetLastError());
+  return output;
+}
diff --git a/pytorch3d/pytorch3d/csrc/gather_scatter/gather_scatter.h b/pytorch3d/pytorch3d/csrc/gather_scatter/gather_scatter.h
new file mode 100644
index 0000000000000000000000000000000000000000..9ab9574f2df2861a9d57162e0c5f0ccc746ce206
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/gather_scatter/gather_scatter.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+#include <torch/extension.h>
+#include "utils/pytorch3d_cutils.h"
+
+// Fused gather scatter operation for aggregating features of neighbor nodes
+// in a graph. This gather scatter operation is specific to graphs as edge
+// indices are used as input.
+//
+// Args:
+//   input: float32 Tensor of shape (V, D) where V is the number of vertices
+//          and D is the feature dimension.
+//   edges: int64 Tensor of shape (E, 2) giving the indices of the vertices that
+//          make up the edge. E is the number of edges.
+//  directed: Bool indicating if edges in the graph are directed. For a
+//            directed graph v0 -> v1 the updated feature for v0 depends on v1.
+//  backward: Bool indicating if the operation is the backward pass.
+//
+// Returns:
+//   output: float32 Tensor of same shape as input.
+
+at::Tensor GatherScatterCuda(
+    const at::Tensor& input,
+    const at::Tensor& edges,
+    bool directed,
+    bool backward);
+
+at::Tensor GatherScatterCpu(
+    const at::Tensor& input,
+    const at::Tensor& edges,
+    bool directed,
+    bool backward);
+
+// Exposed implementation.
+at::Tensor GatherScatter(
+    const at::Tensor& input,
+    const at::Tensor& edges,
+    bool directed,
+    bool backward) {
+  if (input.is_cuda() && edges.is_cuda()) {
+#ifdef WITH_CUDA
+    CHECK_CUDA(input);
+    CHECK_CUDA(edges);
+    return GatherScatterCuda(input, edges, directed, backward);
+#else
+    AT_ERROR("Not compiled with GPU support.");
+#endif
+  }
+  return GatherScatterCpu(input, edges, directed, backward);
+}
diff --git a/pytorch3d/pytorch3d/csrc/gather_scatter/gather_scatter_cpu.cpp b/pytorch3d/pytorch3d/csrc/gather_scatter/gather_scatter_cpu.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..8511e125519cf50f6b538da1adc33b39e4b16171
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/gather_scatter/gather_scatter_cpu.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <ATen/ATen.h>
+
+at::Tensor GatherScatterCpu(
+    const at::Tensor& input,
+    const at::Tensor& edges,
+    bool directed,
+    bool backward) {
+  const auto num_vertices = input.size(0);
+  const auto input_feature_dim = input.size(1);
+  const auto num_edges = edges.size(0);
+
+  auto output = at::zeros({num_vertices, input_feature_dim}, input.options());
+
+  auto input_a = input.accessor<float, 2>();
+  auto edges_a = edges.accessor<int64_t, 2>();
+  auto output_a = output.accessor<float, 2>();
+  const int v0_idx = backward ? 1 : 0;
+  const int v1_idx = backward ? 0 : 1;
+
+  for (int e = 0; e < num_edges; ++e) {
+    // Get indices of vertices which form the edge.
+    const int64_t v0 = edges_a[e][v0_idx];
+    const int64_t v1 = edges_a[e][v1_idx];
+
+    for (int d = 0; d < input_feature_dim; ++d) {
+      output_a[v0][d] += input_a[v1][d];
+      if (!directed) {
+        output_a[v1][d] += input_a[v0][d];
+      }
+    }
+  }
+  return output;
+}
diff --git a/pytorch3d/pytorch3d/csrc/interp_face_attrs/interp_face_attrs.cu b/pytorch3d/pytorch3d/csrc/interp_face_attrs/interp_face_attrs.cu
new file mode 100644
index 0000000000000000000000000000000000000000..6bd2a80d972f2f4f7d76c0cf7d97d534ab3c55fe
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/interp_face_attrs/interp_face_attrs.cu
@@ -0,0 +1,170 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <ATen/ATen.h>
+#include <ATen/cuda/CUDAContext.h>
+#include <c10/cuda/CUDAGuard.h>
+#include <tuple>
+
+template <typename scalar_t>
+__global__ void InterpFaceAttrsForwardKernel(
+    const int64_t* __restrict__ pix_to_face, // (P,)
+    const scalar_t* __restrict__ barycentric_coords, // (P, 3)
+    const scalar_t* __restrict__ face_attrs, // (F, 3, D)
+    scalar_t* pix_attrs, // (P, D)
+    const size_t P,
+    const size_t F,
+    const size_t D) {
+  const int tid = threadIdx.x + blockIdx.x * blockDim.x;
+  const int num_threads = blockDim.x * gridDim.x;
+  for (int pd = tid; pd < P * D; pd += num_threads) {
+    const int p = pd / D;
+    const int d = pd % D;
+    const int64_t f = pix_to_face[p];
+    if (f < 0) {
+      continue;
+    }
+    scalar_t pix_attr = 0.0;
+    for (int i = 0; i < 3; ++i) {
+      scalar_t weight = barycentric_coords[p * 3 + i];
+      scalar_t vert_attr = face_attrs[f * 3 * D + i * D + d];
+      pix_attr += weight * vert_attr;
+    }
+    pix_attrs[p * D + d] = pix_attr;
+  }
+}
+
+at::Tensor InterpFaceAttrsForwardCuda(
+    const at::Tensor& pix_to_face,
+    const at::Tensor& barycentric_coords,
+    const at::Tensor& face_attrs) {
+  // Make sure all inputs are on the same device
+  at::TensorArg pix_to_face_t{pix_to_face, "pix_to_face", 1},
+      barycentric_coords_t{barycentric_coords, "barycentric_coords", 2},
+      face_attrs_t{face_attrs, "face_attributes", 3};
+  at::CheckedFrom c = "InterpFaceAttrsForwardCuda";
+  at::checkAllSameGPU(c, {pix_to_face_t, barycentric_coords_t, face_attrs_t});
+  at::checkAllSameType(c, {barycentric_coords_t, face_attrs_t});
+
+  // Set the device for the kernel launch based on the input
+  at::cuda::CUDAGuard device_guard(pix_to_face.device());
+  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+
+  const auto P = pix_to_face.size(0);
+  const auto F = face_attrs.size(0);
+  const auto D = face_attrs.size(2);
+
+  TORCH_CHECK(
+      barycentric_coords.size(0) == P && barycentric_coords.size(1) == 3,
+      "barycentric_coords must have size (P, 3)");
+  TORCH_CHECK(face_attrs.size(1) == 3, "face_attrs must have size (F, 3, D)");
+
+  auto pix_attrs = at::zeros({P, D}, face_attrs.options());
+  const int threads = 1024;
+  const int blocks = 512;
+  AT_DISPATCH_FLOATING_TYPES(
+      face_attrs.scalar_type(), "interp_face_attrs_cuda", ([&] {
+        InterpFaceAttrsForwardKernel<<<blocks, threads, 0, stream>>>(
+            pix_to_face.contiguous().data_ptr<int64_t>(),
+            barycentric_coords.contiguous().data_ptr<scalar_t>(),
+            face_attrs.contiguous().data_ptr<scalar_t>(),
+            pix_attrs.contiguous().data_ptr<scalar_t>(),
+            P,
+            F,
+            D);
+      }));
+  AT_CUDA_CHECK(cudaGetLastError());
+  return pix_attrs;
+}
+
+template <typename scalar_t>
+__global__ void InterpFaceAttrsBackwardKernel(
+    const int64_t* __restrict__ pix_to_face, // (P,)
+    const scalar_t* __restrict__ barycentric_coords, // (P, 3)
+    const scalar_t* __restrict__ face_attrs, // (F, 3, D)
+    const scalar_t* __restrict__ grad_pix_attrs, // (P, D)
+    scalar_t* __restrict__ grad_barycentric_coords, // (P, 3)
+    scalar_t* __restrict__ grad_face_attrs, // (F, 3, D)
+    const size_t P,
+    const size_t F,
+    const size_t D) {
+  const int tid = threadIdx.x + blockIdx.x * blockDim.x;
+  const int num_threads = blockDim.x * gridDim.x;
+  for (int pd = tid; pd < P * D; pd += num_threads) {
+    const int p = pd / D;
+    const int d = pd % D;
+    const int64_t f = pix_to_face[p];
+    if (f < 0) {
+      continue;
+    }
+    scalar_t upstream_grad = grad_pix_attrs[p * D + d];
+    for (int i = 0; i < 3; ++i) {
+      scalar_t weight = barycentric_coords[p * 3 + i];
+      scalar_t vert_attr = face_attrs[f * 3 * D + i * D + d];
+      scalar_t grad_bary_down = vert_attr * upstream_grad;
+      scalar_t grad_face_down = weight * upstream_grad;
+      atomicAdd(grad_barycentric_coords + p * 3 + i, grad_bary_down);
+      atomicAdd(grad_face_attrs + f * 3 * D + i * D + d, grad_face_down);
+    }
+  }
+}
+
+std::tuple<at::Tensor, at::Tensor> InterpFaceAttrsBackwardCuda(
+    const at::Tensor& pix_to_face,
+    const at::Tensor& barycentric_coords,
+    const at::Tensor& face_attrs,
+    const at::Tensor& grad_pix_attrs) {
+  // Make sure all inputs are on the same device
+  at::TensorArg pix_to_face_t{pix_to_face, "pix_to_face", 1},
+      barycentric_coords_t{barycentric_coords, "barycentric_coords", 2},
+      face_attrs_t{face_attrs, "face_attributes", 3},
+      grad_pix_attrs_t{grad_pix_attrs, "pix_attrs", 4};
+  at::CheckedFrom c = "InterpFaceAttrsBackwarduda";
+  at::checkAllSameGPU(
+      c, {pix_to_face_t, barycentric_coords_t, face_attrs_t, grad_pix_attrs_t});
+  at::checkAllSameType(
+      c, {barycentric_coords_t, face_attrs_t, grad_pix_attrs_t});
+
+  // This is nondeterministic because atomicAdd
+  at::globalContext().alertNotDeterministic("InterpFaceAttrsBackwardCuda");
+
+  // Set the device for the kernel launch based on the input
+  at::cuda::CUDAGuard device_guard(pix_to_face.device());
+  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+
+  const auto P = pix_to_face.size(0);
+  const auto F = face_attrs.size(0);
+  const auto D = face_attrs.size(2);
+
+  TORCH_CHECK(
+      barycentric_coords.size(0) == P && barycentric_coords.size(1) == 3,
+      "barycentric_coords must have size (P, 3)");
+  TORCH_CHECK(face_attrs.size(1) == 3, "face_attrs must have size (F, 3, D)");
+  TORCH_CHECK(
+      grad_pix_attrs.size(0) == P && grad_pix_attrs.size(1) == D,
+      "grad_pix_attrs must have size (P, D)");
+
+  auto grad_barycentric_coords = at::zeros_like(barycentric_coords);
+  auto grad_face_attrs = at::zeros_like(face_attrs);
+  const int threads = 1024;
+  const int blocks = 512;
+  // Only allow float for now.
+  // TODO: Add support for double once we fix atomicAdd
+  // clang-format off
+  InterpFaceAttrsBackwardKernel<<<blocks, threads, 0, stream>>>(
+    pix_to_face.contiguous().data_ptr<int64_t>(),
+    barycentric_coords.contiguous().data_ptr<float>(),
+    face_attrs.contiguous().data_ptr<float>(),
+    grad_pix_attrs.contiguous().data_ptr<float>(),
+    grad_barycentric_coords.contiguous().data_ptr<float>(),
+    grad_face_attrs.contiguous().data_ptr<float>(),
+    P, F, D);
+  AT_CUDA_CHECK(cudaGetLastError());
+  // clang-format on
+  return std::make_tuple(grad_barycentric_coords, grad_face_attrs);
+}
diff --git a/pytorch3d/pytorch3d/csrc/interp_face_attrs/interp_face_attrs.h b/pytorch3d/pytorch3d/csrc/interp_face_attrs/interp_face_attrs.h
new file mode 100644
index 0000000000000000000000000000000000000000..5ba144621777eed15759f95f196466734bcaf077
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/interp_face_attrs/interp_face_attrs.h
@@ -0,0 +1,111 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+#include <torch/extension.h>
+#include <tuple>
+#include "utils/pytorch3d_cutils.h"
+
+// Interpolates per-face attributes (forward pass)
+//
+// Inputs:
+//    pix_to_face: LongTensor of shape (P,) giving a face index for each pixel.
+//        Each element should be < F, the total number of faces.
+//        Face indices < 0 indicate that the pixel is not covered by a face.
+//    barycentric_coords: FloatTensor of shape (P, 3) giving barycentric coords.
+//    face_attrs: FloatTensor of shape (F, 3, D) giving a D-dimensional
+//        value for each vertex of each face.
+//
+// Returns:
+//    pix_attributes: FloatTensor of shape (P, D) giving an interpolated value
+//    for each pixel.
+
+// CPU implementation
+at::Tensor InterpFaceAttrsForwardCpu(
+    const at::Tensor& pix_to_face,
+    const at::Tensor& barycentric_coords,
+    const at::Tensor& face_attrs) {
+  AT_ERROR("Not Implemented");
+  return pix_to_face;
+}
+
+#ifdef WITH_CUDA
+// Cuda implementation.
+at::Tensor InterpFaceAttrsForwardCuda(
+    const at::Tensor& pix_to_face,
+    const at::Tensor& barycentric_coords,
+    const at::Tensor& face_attrs);
+#endif
+
+// General implementation
+at::Tensor InterpFaceAttrsForward(
+    const at::Tensor& pix_to_face,
+    const at::Tensor& barycentric_coords,
+    const at::Tensor& face_attrs) {
+  if (pix_to_face.is_cuda()) {
+#ifdef WITH_CUDA
+    CHECK_CUDA(face_attrs);
+    CHECK_CUDA(barycentric_coords);
+    return InterpFaceAttrsForwardCuda(
+        pix_to_face, barycentric_coords, face_attrs);
+#else
+    AT_ERROR("Not compiled with GPU support.");
+#endif
+  }
+  return InterpFaceAttrsForwardCpu(pix_to_face, barycentric_coords, face_attrs);
+}
+
+// Interpolates per-face attributes (backward pass)
+//
+// Inputs:
+//    pix_to_face: LongTensor of shape (P,) giving a face index for each pixel.
+//        Each element should be < F, the total number of faces.
+//        Face indices < 0 indicate that the pixel is not covered by a face.
+//    barycentric_coords: FloatTensor of shape (P, 3) giving barycentric coords.
+//    face_attrs: FloatTensor of shape (F, 3, D) giving a D-dimensional
+//        value for each vertex of each face.
+//    grad_pix_attrs: Upstream gradients of shape (P, D)
+//
+// Returns a tuple of:
+//    grad_barycentric_coords: FloatTensor of shape (P, 3)
+//    grad_face_attrs: FloatTensor of shape (F, 3, D)
+
+std::tuple<at::Tensor, at::Tensor> InterpFaceAttrsBackwardCpu(
+    const at::Tensor& pix_to_face,
+    const at::Tensor& barycentric_coords,
+    const at::Tensor& face_attrs,
+    const at::Tensor& grad_pix_attrs) {
+  AT_ERROR("Not Implemented");
+  return std::make_tuple(pix_to_face, pix_to_face);
+}
+
+std::tuple<at::Tensor, at::Tensor> InterpFaceAttrsBackwardCuda(
+    const at::Tensor& pix_to_face,
+    const at::Tensor& barycentric_coords,
+    const at::Tensor& face_attrs,
+    const at::Tensor& grad_pix_attrs);
+
+std::tuple<at::Tensor, at::Tensor> InterpFaceAttrsBackward(
+    const at::Tensor& pix_to_face,
+    const at::Tensor& barycentric_coords,
+    const at::Tensor& face_attrs,
+    const at::Tensor& grad_pix_attrs) {
+  if (pix_to_face.is_cuda()) {
+#ifdef WITH_CUDA
+    CHECK_CUDA(face_attrs);
+    CHECK_CUDA(barycentric_coords);
+    CHECK_CUDA(grad_pix_attrs);
+    return InterpFaceAttrsBackwardCuda(
+        pix_to_face, barycentric_coords, face_attrs, grad_pix_attrs);
+#else
+    AT_ERROR("Not compiled with GPU support.");
+#endif
+  }
+  return InterpFaceAttrsBackwardCpu(
+      pix_to_face, barycentric_coords, face_attrs, grad_pix_attrs);
+}
diff --git a/pytorch3d/pytorch3d/csrc/iou_box3d/iou_box3d.cu b/pytorch3d/pytorch3d/csrc/iou_box3d/iou_box3d.cu
new file mode 100644
index 0000000000000000000000000000000000000000..a315550f639ba9353016d8012db453f6d952a5b0
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/iou_box3d/iou_box3d.cu
@@ -0,0 +1,175 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <ATen/ATen.h>
+#include <ATen/cuda/CUDAContext.h>
+#include <c10/cuda/CUDAGuard.h>
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "iou_box3d/iou_utils.cuh"
+
+// Parallelize over N*M computations which can each be done
+// independently
+__global__ void IoUBox3DKernel(
+    const at::PackedTensorAccessor64<float, 3, at::RestrictPtrTraits> boxes1,
+    const at::PackedTensorAccessor64<float, 3, at::RestrictPtrTraits> boxes2,
+    at::PackedTensorAccessor64<float, 2, at::RestrictPtrTraits> vols,
+    at::PackedTensorAccessor64<float, 2, at::RestrictPtrTraits> ious) {
+  const size_t N = boxes1.size(0);
+  const size_t M = boxes2.size(0);
+
+  const size_t tid = blockIdx.x * blockDim.x + threadIdx.x;
+  const size_t stride = gridDim.x * blockDim.x;
+
+  FaceVerts box1_tris[NUM_TRIS];
+  FaceVerts box2_tris[NUM_TRIS];
+  FaceVerts box1_planes[NUM_PLANES];
+  FaceVerts box2_planes[NUM_PLANES];
+
+  for (size_t i = tid; i < N * M; i += stride) {
+    const size_t n = i / M; // box1 index
+    const size_t m = i % M; // box2 index
+
+    // Convert to array of structs of face vertices i.e. effectively (F, 3, 3)
+    // FaceVerts is a data type defined in iou_utils.cuh
+    GetBoxTris(boxes1[n], box1_tris);
+    GetBoxTris(boxes2[m], box2_tris);
+
+    // Calculate the position of the center of the box which is used in
+    // several calculations. This requires a tensor as input.
+    const float3 box1_center = BoxCenter(boxes1[n]);
+    const float3 box2_center = BoxCenter(boxes2[m]);
+
+    // Convert to an array of face vertices
+    GetBoxPlanes(boxes1[n], box1_planes);
+    GetBoxPlanes(boxes2[m], box2_planes);
+
+    // Get Box Volumes
+    const float box1_vol = BoxVolume(box1_tris, box1_center, NUM_TRIS);
+    const float box2_vol = BoxVolume(box2_tris, box2_center, NUM_TRIS);
+
+    // Tris in Box1 intersection with Planes in Box2
+    // Initialize box1 intersecting faces. MAX_TRIS is the
+    // max faces possible in the intersecting shape.
+    // TODO: determine if the value of MAX_TRIS is sufficient or
+    // if we should store the max tris for each NxM computation
+    // and throw an error if any exceeds the max.
+    FaceVerts box1_intersect[MAX_TRIS];
+    for (int j = 0; j < NUM_TRIS; ++j) {
+      // Initialize the faces from the box
+      box1_intersect[j] = box1_tris[j];
+    }
+    // Get the count of the actual number of faces in the intersecting shape
+    int box1_count = BoxIntersections(box2_planes, box2_center, box1_intersect);
+
+    // Tris in Box2 intersection with Planes in Box1
+    FaceVerts box2_intersect[MAX_TRIS];
+    for (int j = 0; j < NUM_TRIS; ++j) {
+      box2_intersect[j] = box2_tris[j];
+    }
+    const int box2_count =
+        BoxIntersections(box1_planes, box1_center, box2_intersect);
+
+    // If there are overlapping regions in Box2, remove any coplanar faces
+    if (box2_count > 0) {
+      // Identify if any triangles in Box2 are coplanar with Box1
+      Keep tri2_keep[MAX_TRIS];
+      for (int j = 0; j < MAX_TRIS; ++j) {
+        // Initialize the valid faces to be true
+        tri2_keep[j].keep = j < box2_count ? true : false;
+      }
+      for (int b1 = 0; b1 < box1_count; ++b1) {
+        for (int b2 = 0; b2 < box2_count; ++b2) {
+          const bool is_coplanar =
+              IsCoplanarTriTri(box1_intersect[b1], box2_intersect[b2]);
+          const float area = FaceArea(box1_intersect[b1]);
+          if ((is_coplanar) && (area > aEpsilon)) {
+            tri2_keep[b2].keep = false;
+          }
+        }
+      }
+
+      // Keep only the non coplanar triangles in Box2 - add them to the
+      // Box1 triangles.
+      for (int b2 = 0; b2 < box2_count; ++b2) {
+        if (tri2_keep[b2].keep) {
+          box1_intersect[box1_count] = box2_intersect[b2];
+          // box1_count will determine the total faces in the
+          // intersecting shape
+          box1_count++;
+        }
+      }
+    }
+
+    // Initialize the vol and iou to 0.0 in case there are no triangles
+    // in the intersecting shape.
+    float vol = 0.0;
+    float iou = 0.0;
+
+    // If there are triangles in the intersecting shape
+    if (box1_count > 0) {
+      // The intersecting shape is a polyhedron made up of the
+      // triangular faces that are all now in box1_intersect.
+      // Calculate the polyhedron center
+      const float3 poly_center = PolyhedronCenter(box1_intersect, box1_count);
+      // Compute intersecting polyhedron volume
+      vol = BoxVolume(box1_intersect, poly_center, box1_count);
+      // Compute IoU
+      iou = vol / (box1_vol + box2_vol - vol);
+    }
+
+    // Write the volume and IoU to global memory
+    vols[n][m] = vol;
+    ious[n][m] = iou;
+  }
+}
+
+std::tuple<at::Tensor, at::Tensor> IoUBox3DCuda(
+    const at::Tensor& boxes1, // (N, 8, 3)
+    const at::Tensor& boxes2) { // (M, 8, 3)
+  // Check inputs are on the same device
+  at::TensorArg boxes1_t{boxes1, "boxes1", 1}, boxes2_t{boxes2, "boxes2", 2};
+  at::CheckedFrom c = "IoUBox3DCuda";
+  at::checkAllSameGPU(c, {boxes1_t, boxes2_t});
+  at::checkAllSameType(c, {boxes1_t, boxes2_t});
+
+  // Set the device for the kernel launch based on the device of boxes1
+  at::cuda::CUDAGuard device_guard(boxes1.device());
+  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+
+  TORCH_CHECK(boxes2.size(2) == boxes1.size(2), "Boxes must have shape (8, 3)");
+
+  TORCH_CHECK(
+      (boxes2.size(1) == 8) && (boxes1.size(1) == 8),
+      "Boxes must have shape (8, 3)");
+
+  const int64_t N = boxes1.size(0);
+  const int64_t M = boxes2.size(0);
+
+  auto vols = at::zeros({N, M}, boxes1.options());
+  auto ious = at::zeros({N, M}, boxes1.options());
+
+  if (vols.numel() == 0) {
+    AT_CUDA_CHECK(cudaGetLastError());
+    return std::make_tuple(vols, ious);
+  }
+
+  const size_t blocks = 512;
+  const size_t threads = 256;
+
+  IoUBox3DKernel<<<blocks, threads, 0, stream>>>(
+      boxes1.packed_accessor64<float, 3, at::RestrictPtrTraits>(),
+      boxes2.packed_accessor64<float, 3, at::RestrictPtrTraits>(),
+      vols.packed_accessor64<float, 2, at::RestrictPtrTraits>(),
+      ious.packed_accessor64<float, 2, at::RestrictPtrTraits>());
+
+  AT_CUDA_CHECK(cudaGetLastError());
+
+  return std::make_tuple(vols, ious);
+}
diff --git a/pytorch3d/pytorch3d/csrc/iou_box3d/iou_box3d.h b/pytorch3d/pytorch3d/csrc/iou_box3d/iou_box3d.h
new file mode 100644
index 0000000000000000000000000000000000000000..84f752b0718ab1bd495315358e0fa976d6fe8b22
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/iou_box3d/iou_box3d.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+#include <torch/extension.h>
+#include <tuple>
+#include "utils/pytorch3d_cutils.h"
+
+// Calculate the intersection volume and IoU metric for two batches of boxes
+//
+// Args:
+//     boxes1: tensor of shape (N, 8, 3) of the coordinates of the 1st boxes
+//     boxes2: tensor of shape (M, 8, 3) of the coordinates of the 2nd boxes
+// Returns:
+//     vol: (N, M) tensor of the volume of the intersecting convex shapes
+//     iou: (N, M) tensor of the intersection over union which is
+//          defined as: `iou = vol / (vol1 + vol2 - vol)`
+
+// CPU implementation
+std::tuple<at::Tensor, at::Tensor> IoUBox3DCpu(
+    const at::Tensor& boxes1,
+    const at::Tensor& boxes2);
+
+// CUDA implementation
+std::tuple<at::Tensor, at::Tensor> IoUBox3DCuda(
+    const at::Tensor& boxes1,
+    const at::Tensor& boxes2);
+
+// Implementation which is exposed
+inline std::tuple<at::Tensor, at::Tensor> IoUBox3D(
+    const at::Tensor& boxes1,
+    const at::Tensor& boxes2) {
+  if (boxes1.is_cuda() || boxes2.is_cuda()) {
+#ifdef WITH_CUDA
+    CHECK_CUDA(boxes1);
+    CHECK_CUDA(boxes2);
+    return IoUBox3DCuda(boxes1.contiguous(), boxes2.contiguous());
+#else
+    AT_ERROR("Not compiled with GPU support.");
+#endif
+  }
+  return IoUBox3DCpu(boxes1.contiguous(), boxes2.contiguous());
+}
diff --git a/pytorch3d/pytorch3d/csrc/iou_box3d/iou_box3d_cpu.cpp b/pytorch3d/pytorch3d/csrc/iou_box3d/iou_box3d_cpu.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..3bc66de4e2db720984ea0917517a60e5ce601c7d
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/iou_box3d/iou_box3d_cpu.cpp
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <torch/extension.h>
+#include <torch/torch.h>
+#include <list>
+#include <numeric>
+#include <queue>
+#include <tuple>
+#include "iou_box3d/iou_utils.h"
+
+std::tuple<at::Tensor, at::Tensor> IoUBox3DCpu(
+    const at::Tensor& boxes1,
+    const at::Tensor& boxes2) {
+  const int N = boxes1.size(0);
+  const int M = boxes2.size(0);
+  auto float_opts = boxes1.options().dtype(torch::kFloat32);
+  torch::Tensor vols = torch::zeros({N, M}, float_opts);
+  torch::Tensor ious = torch::zeros({N, M}, float_opts);
+
+  // Create tensor accessors
+  auto boxes1_a = boxes1.accessor<float, 3>();
+  auto boxes2_a = boxes2.accessor<float, 3>();
+  auto vols_a = vols.accessor<float, 2>();
+  auto ious_a = ious.accessor<float, 2>();
+
+  // Iterate through the N boxes in boxes1
+  for (int n = 0; n < N; ++n) {
+    const auto& box1 = boxes1_a[n];
+    // Convert to vector of face vertices i.e. effectively (F, 3, 3)
+    // face_verts is a data type defined in iou_utils.h
+    const face_verts box1_tris = GetBoxTris(box1);
+
+    // Calculate the position of the center of the box which is used in
+    // several calculations. This requires a tensor as input.
+    const vec3<float> box1_center = BoxCenter(boxes1[n]);
+
+    // Convert to vector of face vertices i.e. effectively (P, 4, 3)
+    const face_verts box1_planes = GetBoxPlanes(box1);
+
+    // Get Box Volumes
+    const float box1_vol = BoxVolume(box1_tris, box1_center);
+
+    // Iterate through the M boxes in boxes2
+    for (int m = 0; m < M; ++m) {
+      // Repeat above steps for box2
+      // TODO: check if caching these value helps performance.
+      const auto& box2 = boxes2_a[m];
+      const face_verts box2_tris = GetBoxTris(box2);
+      const vec3<float> box2_center = BoxCenter(boxes2[m]);
+      const face_verts box2_planes = GetBoxPlanes(box2);
+      const float box2_vol = BoxVolume(box2_tris, box2_center);
+
+      // Every triangle in one box will be compared to each plane in the other
+      // box. There are 3 possible outcomes:
+      // 1. If the triangle is fully inside, then it will
+      //    remain as is.
+      // 2. If the triagnle it is fully outside, it will be removed.
+      // 3. If the triangle intersects with the (infinite) plane, it
+      //    will be broken into subtriangles such that each subtriangle is full
+      //    inside the plane and part of the intersecting tetrahedron.
+
+      // Tris in Box1 -> Planes in Box2
+      face_verts box1_intersect =
+          BoxIntersections(box1_tris, box2_planes, box2_center);
+      // Tris in Box2 -> Planes in Box1
+      face_verts box2_intersect =
+          BoxIntersections(box2_tris, box1_planes, box1_center);
+
+      // If there are overlapping regions in Box2, remove any coplanar faces
+      if (box2_intersect.size() > 0) {
+        // Identify if any triangles in Box2 are coplanar with Box1
+        std::vector<int> tri2_keep(box2_intersect.size());
+        std::fill(tri2_keep.begin(), tri2_keep.end(), 1);
+        for (int b1 = 0; b1 < box1_intersect.size(); ++b1) {
+          for (int b2 = 0; b2 < box2_intersect.size(); ++b2) {
+            const bool is_coplanar =
+                IsCoplanarTriTri(box1_intersect[b1], box2_intersect[b2]);
+            const float area = FaceArea(box1_intersect[b1]);
+            if ((is_coplanar) && (area > aEpsilon)) {
+              tri2_keep[b2] = 0;
+            }
+          }
+        }
+
+        // Keep only the non coplanar triangles in Box2 - add them to the
+        // Box1 triangles.
+        for (int b2 = 0; b2 < box2_intersect.size(); ++b2) {
+          if (tri2_keep[b2] == 1) {
+            box1_intersect.push_back((box2_intersect[b2]));
+          }
+        }
+      }
+
+      // Initialize the vol and iou to 0.0 in case there are no triangles
+      // in the intersecting shape.
+      float vol = 0.0;
+      float iou = 0.0;
+
+      // If there are triangles in the intersecting shape
+      if (box1_intersect.size() > 0) {
+        // The intersecting shape is a polyhedron made up of the
+        // triangular faces that are all now in box1_intersect.
+        // Calculate the polyhedron center
+        const vec3<float> polyhedron_center = PolyhedronCenter(box1_intersect);
+        // Compute intersecting polyhedron volume
+        vol = BoxVolume(box1_intersect, polyhedron_center);
+        // Compute IoU
+        iou = vol / (box1_vol + box2_vol - vol);
+      }
+      // Save out volume and IoU
+      vols_a[n][m] = vol;
+      ious_a[n][m] = iou;
+    }
+  }
+  return std::make_tuple(vols, ious);
+}
diff --git a/pytorch3d/pytorch3d/csrc/iou_box3d/iou_utils.cuh b/pytorch3d/pytorch3d/csrc/iou_box3d/iou_utils.cuh
new file mode 100644
index 0000000000000000000000000000000000000000..5ad5b165dfdc0b76df4d4d2d23bdff7b19e482ec
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/iou_box3d/iou_utils.cuh
@@ -0,0 +1,737 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <float.h>
+#include <math.h>
+#include <cstdio>
+#include "utils/float_math.cuh"
+
+// dEpsilon: Used in dot products and is used to assess whether two unit vectors
+// are orthogonal (or coplanar). It's an epsilon on cos(θ).
+// With dEpsilon = 0.001, two unit vectors are considered co-planar
+// if their θ = 2.5 deg.
+__constant__ const float dEpsilon = 1e-3;
+// aEpsilon: Used once in main function to check for small face areas
+__constant__ const float aEpsilon = 1e-4;
+// kEpsilon: Used only for norm(u) = u/max(||u||, kEpsilon)
+__constant__ const float kEpsilon = 1e-8;
+
+/*
+_PLANES and _TRIS define the 4- and 3-connectivity
+of the 8 box corners.
+_PLANES gives the quad faces of the 3D box
+_TRIS gives the triangle faces of the 3D box
+*/
+const int NUM_PLANES = 6;
+const int NUM_TRIS = 12;
+// This is required for iniitalizing the faces
+// in the intersecting shape
+const int MAX_TRIS = 100;
+
+// Create data types for representing the
+// verts for each face and the indices.
+// We will use struct arrays for representing
+// the data for each box and intersecting
+// triangles
+struct FaceVerts {
+  float3 v0;
+  float3 v1;
+  float3 v2;
+  float3 v3; // Can be empty for triangles
+};
+
+struct FaceVertsIdx {
+  int v0;
+  int v1;
+  int v2;
+  int v3; // Can be empty for triangles
+};
+
+// This is used when deciding which faces to
+// keep that are not coplanar
+struct Keep {
+  bool keep;
+};
+
+__device__ FaceVertsIdx _PLANES[] = {
+    {0, 1, 2, 3},
+    {3, 2, 6, 7},
+    {0, 1, 5, 4},
+    {0, 3, 7, 4},
+    {1, 5, 6, 2},
+    {4, 5, 6, 7},
+};
+__device__ FaceVertsIdx _TRIS[] = {
+    {0, 1, 2},
+    {0, 3, 2},
+    {4, 5, 6},
+    {4, 6, 7},
+    {1, 5, 6},
+    {1, 6, 2},
+    {0, 4, 7},
+    {0, 7, 3},
+    {3, 2, 6},
+    {3, 6, 7},
+    {0, 1, 5},
+    {0, 4, 5},
+};
+
+// Args
+//    box: (8, 3) tensor accessor for the box vertices
+//    box_tris: Array of structs of type FaceVerts,
+//      effectively (F, 3, 3) where the coordinates of the
+//      verts for each face will be saved to.
+//
+// Returns: None (output saved to box_tris)
+//
+template <typename Box, typename BoxTris>
+__device__ inline void GetBoxTris(const Box& box, BoxTris& box_tris) {
+  for (int t = 0; t < NUM_TRIS; ++t) {
+    const float3 v0 = make_float3(
+        box[_TRIS[t].v0][0], box[_TRIS[t].v0][1], box[_TRIS[t].v0][2]);
+    const float3 v1 = make_float3(
+        box[_TRIS[t].v1][0], box[_TRIS[t].v1][1], box[_TRIS[t].v1][2]);
+    const float3 v2 = make_float3(
+        box[_TRIS[t].v2][0], box[_TRIS[t].v2][1], box[_TRIS[t].v2][2]);
+    box_tris[t] = {v0, v1, v2};
+  }
+}
+
+// Args
+//    box: (8, 3) tensor accessor for the box vertices
+//    box_planes: Array of structs of type FaceVerts, effectively (P, 4, 3)
+//      where the coordinates of the verts for the four corners of each plane
+//      will be saved to
+//
+// Returns: None (output saved to box_planes)
+//
+template <typename Box, typename FaceVertsBoxPlanes>
+__device__ inline void GetBoxPlanes(
+    const Box& box,
+    FaceVertsBoxPlanes& box_planes) {
+  for (int t = 0; t < NUM_PLANES; ++t) {
+    const float3 v0 = make_float3(
+        box[_PLANES[t].v0][0], box[_PLANES[t].v0][1], box[_PLANES[t].v0][2]);
+    const float3 v1 = make_float3(
+        box[_PLANES[t].v1][0], box[_PLANES[t].v1][1], box[_PLANES[t].v1][2]);
+    const float3 v2 = make_float3(
+        box[_PLANES[t].v2][0], box[_PLANES[t].v2][1], box[_PLANES[t].v2][2]);
+    const float3 v3 = make_float3(
+        box[_PLANES[t].v3][0], box[_PLANES[t].v3][1], box[_PLANES[t].v3][2]);
+    box_planes[t] = {v0, v1, v2, v3};
+  }
+}
+
+// The geometric center of a list of vertices.
+//
+// Args
+//    vertices: A list of float3 vertices {v0, ..., vN}.
+//
+// Returns
+//    float3: Geometric center of the vertices.
+//
+__device__ inline float3 FaceCenter(
+    std::initializer_list<const float3> vertices) {
+  auto sumVertices = float3{};
+  for (const auto& vertex : vertices) {
+    sumVertices = sumVertices + vertex;
+  }
+  return sumVertices / vertices.size();
+}
+
+// The normal of a plane spanned by vectors e0 and e1
+//
+// Args
+//    e0, e1: float3 vectors defining a plane
+//
+// Returns
+//    float3: normal of the plane
+//
+__device__ inline float3 GetNormal(const float3 e0, const float3 e1) {
+  float3 n = cross(e0, e1);
+  n = n / std::fmaxf(norm(n), kEpsilon);
+  return n;
+}
+
+// The normal of a face with vertices (v0, v1, v2) or (v0, ..., v3).
+// We find the "best" edges connecting the face center to the vertices,
+// such that the cross product between the edges is maximized.
+//
+// Args
+//    vertices: a list of float3 coordinates of the vertices.
+//
+// Returns
+//    float3: center of the plane
+//
+__device__ inline float3 FaceNormal(
+    std::initializer_list<const float3> vertices) {
+  const auto faceCenter = FaceCenter(vertices);
+  auto normal = float3();
+  auto maxDist = -1;
+  for (auto v1 = vertices.begin(); v1 != vertices.end() - 1; ++v1) {
+    for (auto v2 = v1 + 1; v2 != vertices.end(); ++v2) {
+      const auto v1ToCenter = *v1 - faceCenter;
+      const auto v2ToCenter = *v2 - faceCenter;
+      const auto dist = norm(cross(v1ToCenter, v2ToCenter));
+      if (dist > maxDist) {
+        normal = GetNormal(v1ToCenter, v2ToCenter);
+        maxDist = dist;
+      }
+    }
+  }
+  return normal;
+}
+
+// The area of the face defined by vertices (v0, v1, v2)
+// Define e0 to be the edge connecting (v1, v0)
+// Define e1 to be the edge connecting (v2, v0)
+// Area is the norm of the cross product of e0, e1 divided by 2.0
+//
+// Args
+//    tri: FaceVerts of float3 coordinates of the vertices of the face
+//
+// Returns
+//    float: area for the face
+//
+__device__ inline float FaceArea(const FaceVerts& tri) {
+  // Get verts for face 1
+  const float3 n = cross(tri.v1 - tri.v0, tri.v2 - tri.v0);
+  return norm(n) / 2.0;
+}
+
+// The normal of a box plane defined by the verts in `plane` such that it
+// points toward the centroid of the box given by `center`.
+//
+// Args
+//    plane: float3 coordinates of the vertices of the plane
+//    center: float3 coordinates of the center of the box from
+//        which the plane originated
+//
+// Returns
+//    float3: normal for the plane such that it points towards
+//      the center of the box
+//
+template <typename FaceVertsPlane>
+__device__ inline float3 PlaneNormalDirection(
+    const FaceVertsPlane& plane,
+    const float3& center) {
+  // The plane's center
+  const float3 plane_center =
+      FaceCenter({plane.v0, plane.v1, plane.v2, plane.v3});
+
+  // The plane's normal
+  float3 n = FaceNormal({plane.v0, plane.v1, plane.v2, plane.v3});
+
+  // We project the center on the plane defined by (v0, v1, v2, v3)
+  // We can write center = plane_center + a * e0 + b * e1 + c * n
+  // We know that <e0, n> = 0 and <e1, n> = 0 and
+  // <a, b> is the dot product between a and b.
+  // This means we can solve for c as:
+  // c = <center - plane_center - a * e0 - b * e1, n>
+  //   = <center - plane_center, n>
+  const float c = dot((center - plane_center), n);
+
+  // If c is negative, then we revert the direction of n such that n
+  // points "inside"
+  if (c < 0.0f) {
+    n = -1.0f * n;
+  }
+
+  return n;
+}
+
+// Calculate the volume of the box by summing the volume of
+// each of the tetrahedrons formed with a triangle face and
+// the box centroid.
+//
+// Args
+//    box_tris: vector of float3 coordinates of the vertices of each
+//       of the triangles in the box
+//    box_center: float3 coordinates of the center of the box
+//
+// Returns
+//    float: volume of the box
+//
+template <typename BoxTris>
+__device__ inline float BoxVolume(
+    const BoxTris& box_tris,
+    const float3& box_center,
+    const int num_tris) {
+  float box_vol = 0.0;
+  // Iterate through each triange, calculate the area of the
+  // tetrahedron formed with the box_center and sum them
+  for (int t = 0; t < num_tris; ++t) {
+    // Subtract the center:
+    float3 v0 = box_tris[t].v0;
+    float3 v1 = box_tris[t].v1;
+    float3 v2 = box_tris[t].v2;
+
+    v0 = v0 - box_center;
+    v1 = v1 - box_center;
+    v2 = v2 - box_center;
+
+    // Compute the area
+    const float area = dot(v0, cross(v1, v2));
+    const float vol = abs(area) / 6.0;
+    box_vol = box_vol + vol;
+  }
+  return box_vol;
+}
+
+// Compute the box center as the mean of the verts
+//
+// Args
+//    box_verts: (8, 3) tensor of the corner vertices of the box
+//
+// Returns
+//    float3: coordinates of the center of the box
+//
+template <typename Box>
+__device__ inline float3 BoxCenter(const Box box_verts) {
+  float x = 0.0;
+  float y = 0.0;
+  float z = 0.0;
+  const int num_verts = box_verts.size(0); // Should be 8
+  // Sum all x, y, z, and take the mean
+  for (int t = 0; t < num_verts; ++t) {
+    x = x + box_verts[t][0];
+    y = y + box_verts[t][1];
+    z = z + box_verts[t][2];
+  }
+  // Take the mean of all the vertex positions
+  x = x / num_verts;
+  y = y / num_verts;
+  z = z / num_verts;
+  const float3 center = make_float3(x, y, z);
+  return center;
+}
+
+// Compute the polyhedron center as the mean of the face centers
+// of the triangle faces
+//
+// Args
+//    tris: vector of float3 coordinates of the
+//       vertices of each of the triangles in the polyhedron
+//
+// Returns
+//    float3: coordinates of the center of the polyhedron
+//
+template <typename Tris>
+__device__ inline float3 PolyhedronCenter(
+    const Tris& tris,
+    const int num_tris) {
+  float x = 0.0;
+  float y = 0.0;
+  float z = 0.0;
+
+  // Find the center point of each face
+  for (int t = 0; t < num_tris; ++t) {
+    const float3 v0 = tris[t].v0;
+    const float3 v1 = tris[t].v1;
+    const float3 v2 = tris[t].v2;
+    const float x_face = (v0.x + v1.x + v2.x) / 3.0;
+    const float y_face = (v0.y + v1.y + v2.y) / 3.0;
+    const float z_face = (v0.z + v1.z + v2.z) / 3.0;
+    x = x + x_face;
+    y = y + y_face;
+    z = z + z_face;
+  }
+
+  // Take the mean of the centers of all faces
+  x = x / num_tris;
+  y = y / num_tris;
+  z = z / num_tris;
+
+  const float3 center = make_float3(x, y, z);
+  return center;
+}
+
+// Compute a boolean indicator for whether a point
+// is inside a plane, where inside refers to whether
+// or not the point has a component in the
+// normal direction of the plane.
+//
+// Args
+//    plane: vector of float3 coordinates of the
+//       vertices of each of the triangles in the box
+//    normal: float3 of the direction of the plane normal
+//    point: float3 of the position of the point of interest
+//
+// Returns
+//    bool: whether or not the point is inside the plane
+//
+__device__ inline bool
+IsInside(const FaceVerts& plane, const float3& normal, const float3& point) {
+  // The center of the plane
+  const float3 plane_ctr = FaceCenter({plane.v0, plane.v1, plane.v2, plane.v3});
+
+  // Every point p can be written as p = plane_ctr + a e0 + b e1 + c n
+  // Solving for c:
+  // c = (point - plane_ctr - a * e0 - b * e1).dot(n)
+  // We know that <e0, n> = 0 and <e1, n> = 0
+  // So the calculation can be simplified as:
+  const float c = dot((point - plane_ctr), normal);
+  const bool inside = c >= 0.0f;
+  return inside;
+}
+
+// Find the point of intersection between a plane
+// and a line given by the end points (p0, p1)
+//
+// Args
+//    plane: vector of float3 coordinates of the
+//       vertices of each of the triangles in the box
+//    normal: float3 of the direction of the plane normal
+//    p0, p1: float3 of the start and end point of the line
+//
+// Returns
+//    float3: position of the intersection point
+//
+__device__ inline float3 PlaneEdgeIntersection(
+    const FaceVerts& plane,
+    const float3& normal,
+    const float3& p0,
+    const float3& p1) {
+  // The center of the plane
+  const float3 plane_ctr = FaceCenter({plane.v0, plane.v1, plane.v2, plane.v3});
+
+  // The point of intersection can be parametrized
+  // p = p0 + a (p1 - p0) where a in [0, 1]
+  // We want to find a such that p is on plane
+  // <p - plane_ctr, n> = 0
+
+  float3 direc = p1 - p0;
+  direc = direc / fmaxf(norm(direc), kEpsilon);
+
+  float3 p = (p1 + p0) / 2.0f;
+
+  if (abs(dot(direc, normal)) >= dEpsilon) {
+    const float top = -1.0f * dot(p0 - plane_ctr, normal);
+    const float bot = dot(p1 - p0, normal);
+    const float a = top / bot;
+    p = p0 + a * (p1 - p0);
+  }
+
+  return p;
+}
+
+// Compute the most distant points between two sets of vertices
+//
+// Args
+//    verts1, verts2: list of float3 defining the list of vertices
+//
+// Returns
+//    v1m, v2m: float3 vectors of the most distant points
+//          in verts1 and verts2 respectively
+//
+__device__ inline std::tuple<float3, float3> ArgMaxVerts(
+    std::initializer_list<float3> verts1,
+    std::initializer_list<float3> verts2) {
+  auto v1m = float3();
+  auto v2m = float3();
+  float maxdist = -1.0f;
+
+  for (const auto& v1 : verts1) {
+    for (const auto& v2 : verts2) {
+      if (norm(v1 - v2) > maxdist) {
+        v1m = v1;
+        v2m = v2;
+        maxdist = norm(v1 - v2);
+      }
+    }
+  }
+  return std::make_tuple(v1m, v2m);
+}
+
+// Compute a boolean indicator for whether or not two faces
+// are coplanar
+//
+// Args
+//    tri1, tri2: FaceVerts struct of the vertex coordinates of
+//       the triangle face
+//
+// Returns
+//    bool: whether or not the two faces are coplanar
+//
+__device__ inline bool IsCoplanarTriTri(
+    const FaceVerts& tri1,
+    const FaceVerts& tri2) {
+  const float3 tri1_ctr = FaceCenter({tri1.v0, tri1.v1, tri1.v2});
+  const float3 tri1_n = FaceNormal({tri1.v0, tri1.v1, tri1.v2});
+
+  const float3 tri2_ctr = FaceCenter({tri2.v0, tri2.v1, tri2.v2});
+  const float3 tri2_n = FaceNormal({tri2.v0, tri2.v1, tri2.v2});
+
+  // Check if parallel
+  const bool check1 = abs(dot(tri1_n, tri2_n)) > 1 - dEpsilon;
+
+  // Compute most distant points
+  const auto v1mAndv2m =
+      ArgMaxVerts({tri1.v0, tri1.v1, tri1.v2}, {tri2.v0, tri2.v1, tri2.v2});
+  const auto v1m = std::get<0>(v1mAndv2m);
+  const auto v2m = std::get<1>(v1mAndv2m);
+
+  float3 n12m = v1m - v2m;
+  n12m = n12m / fmaxf(norm(n12m), kEpsilon);
+
+  const bool check2 = (abs(dot(n12m, tri1_n)) < dEpsilon) ||
+      (abs(dot(n12m, tri2_n)) < dEpsilon);
+
+  return (check1 && check2);
+}
+
+// Compute a boolean indicator for whether or not a triangular and a planar
+// face are coplanar
+//
+// Args
+//    tri, plane: FaceVerts struct of the vertex coordinates of
+//       the triangle and planar face
+//  normal: the normal direction of the plane pointing "inside"
+//
+// Returns
+//    bool: whether or not the two faces are coplanar
+//
+__device__ inline bool IsCoplanarTriPlane(
+    const FaceVerts& tri,
+    const FaceVerts& plane,
+    const float3& normal) {
+  const float3 tri_ctr = FaceCenter({tri.v0, tri.v1, tri.v2});
+  const float3 nt = FaceNormal({tri.v0, tri.v1, tri.v2});
+
+  // check if parallel
+  const bool check1 = abs(dot(nt, normal)) > 1 - dEpsilon;
+
+  // Compute most distant points
+  const auto v1mAndv2m = ArgMaxVerts(
+      {tri.v0, tri.v1, tri.v2}, {plane.v0, plane.v1, plane.v2, plane.v3});
+  const auto v1m = std::get<0>(v1mAndv2m);
+  const auto v2m = std::get<1>(v1mAndv2m);
+
+  float3 n12m = v1m - v2m;
+  n12m = n12m / fmaxf(norm(n12m), kEpsilon);
+
+  const bool check2 = abs(dot(n12m, normal)) < dEpsilon;
+
+  return (check1 && check2);
+}
+
+// Triangle is clipped into a quadrilateral
+// based on the intersection points with the plane.
+// Then the quadrilateral is divided into two triangles.
+//
+// Args
+//    plane: vector of float3 coordinates of the
+//        vertices of each of the triangles in the box
+//    normal: float3 of the direction of the plane normal
+//    vout: float3 of the point in the triangle which is outside
+//       the plane
+//    vin1, vin2: float3 of the points in the triangle which are
+//        inside the plane
+//    face_verts_out: Array of structs of type FaceVerts,
+//       with the coordinates of the new triangle faces
+//       formed after clipping.
+//       All triangles are now "inside" the plane.
+//
+// Returns:
+//    count: (int) number of new faces after clipping the triangle
+//      i.e. the valid faces which have been saved
+//      to face_verts_out
+//
+template <typename FaceVertsBox>
+__device__ inline int ClipTriByPlaneOneOut(
+    const FaceVerts& plane,
+    const float3& normal,
+    const float3& vout,
+    const float3& vin1,
+    const float3& vin2,
+    FaceVertsBox& face_verts_out) {
+  // point of intersection between plane and (vin1, vout)
+  const float3 pint1 = PlaneEdgeIntersection(plane, normal, vin1, vout);
+  // point of intersection between plane and (vin2, vout)
+  const float3 pint2 = PlaneEdgeIntersection(plane, normal, vin2, vout);
+
+  face_verts_out[0] = {vin1, pint1, pint2};
+  face_verts_out[1] = {vin1, pint2, vin2};
+
+  return 2;
+}
+
+// Triangle is clipped into a smaller triangle based
+// on the intersection points with the plane.
+//
+// Args
+//    plane: vector of float3 coordinates of the
+//       vertices of each of the triangles in the box
+//    normal: float3 of the direction of the plane normal
+//    vout1, vout2: float3 of the points in the triangle which are
+//       outside the plane
+//    vin: float3 of the point in the triangle which is inside
+//        the plane
+//    face_verts_out: Array of structs of type FaceVerts,
+//       with the coordinates of the new triangle faces
+//       formed after clipping.
+//       All triangles are now "inside" the plane.
+//
+// Returns:
+//    count: (int) number of new faces after clipping the triangle
+//      i.e. the valid faces which have been saved
+//      to face_verts_out
+//
+template <typename FaceVertsBox>
+__device__ inline int ClipTriByPlaneTwoOut(
+    const FaceVerts& plane,
+    const float3& normal,
+    const float3& vout1,
+    const float3& vout2,
+    const float3& vin,
+    FaceVertsBox& face_verts_out) {
+  // point of intersection between plane and (vin, vout1)
+  const float3 pint1 = PlaneEdgeIntersection(plane, normal, vin, vout1);
+  // point of intersection between plane and (vin, vout2)
+  const float3 pint2 = PlaneEdgeIntersection(plane, normal, vin, vout2);
+
+  face_verts_out[0] = {vin, pint1, pint2};
+
+  return 1;
+}
+
+// Clip the triangle faces so that they lie within the
+// plane, creating new triangle faces where necessary.
+//
+// Args
+//    plane: Array of structs of type FaceVerts with the coordinates
+//       of the vertices of each of the triangles in the box
+//    tri: Array of structs of type FaceVerts with the vertex
+//       coordinates of the triangle faces
+//    normal: float3 of the direction of the plane normal
+//    face_verts_out: Array of structs of type FaceVerts,
+//       with the coordinates of the new triangle faces
+//       formed after clipping.
+//       All triangles are now "inside" the plane.
+//
+// Returns:
+//    count: (int) number of new faces after clipping the triangle
+//      i.e. the valid faces which have been saved
+//      to face_verts_out
+//
+template <typename FaceVertsBox>
+__device__ inline int ClipTriByPlane(
+    const FaceVerts& plane,
+    const FaceVerts& tri,
+    const float3& normal,
+    FaceVertsBox& face_verts_out) {
+  // Get Triangle vertices
+  const float3 v0 = tri.v0;
+  const float3 v1 = tri.v1;
+  const float3 v2 = tri.v2;
+
+  // Check each of the triangle vertices to see if it is inside the plane
+  const bool isin0 = IsInside(plane, normal, v0);
+  const bool isin1 = IsInside(plane, normal, v1);
+  const bool isin2 = IsInside(plane, normal, v2);
+
+  // Check coplanar
+  const bool iscoplanar = IsCoplanarTriPlane(tri, plane, normal);
+  if (iscoplanar) {
+    // Return input vertices
+    face_verts_out[0] = {v0, v1, v2};
+    return 1;
+  }
+
+  // All in
+  if (isin0 && isin1 && isin2) {
+    // Return input vertices
+    face_verts_out[0] = {v0, v1, v2};
+    return 1;
+  }
+
+  // All out
+  if (!isin0 && !isin1 && !isin2) {
+    return 0;
+  }
+
+  // One vert out
+  if (isin0 && isin1 && !isin2) {
+    return ClipTriByPlaneOneOut(plane, normal, v2, v0, v1, face_verts_out);
+  }
+  if (isin0 && !isin1 && isin2) {
+    return ClipTriByPlaneOneOut(plane, normal, v1, v0, v2, face_verts_out);
+  }
+  if (!isin0 && isin1 && isin2) {
+    return ClipTriByPlaneOneOut(plane, normal, v0, v1, v2, face_verts_out);
+  }
+
+  // Two verts out
+  if (isin0 && !isin1 && !isin2) {
+    return ClipTriByPlaneTwoOut(plane, normal, v1, v2, v0, face_verts_out);
+  }
+  if (!isin0 && !isin1 && isin2) {
+    return ClipTriByPlaneTwoOut(plane, normal, v0, v1, v2, face_verts_out);
+  }
+  if (!isin0 && isin1 && !isin2) {
+    return ClipTriByPlaneTwoOut(plane, normal, v0, v2, v1, face_verts_out);
+  }
+
+  // Else return empty (should not be reached)
+  return 0;
+}
+
+// Get the triangles from each box which are part of the
+// intersecting polyhedron by computing the intersection
+// points with each of the planes.
+//
+// Args
+//    planes: Array of structs of type FaceVerts with the coordinates
+//       of the vertices of each of the triangles in the box
+//    center: float3 coordinates of the center of the box from which
+//        the planes originate
+//    face_verts_out: Array of structs of type FaceVerts,
+//       where the coordinates of the new triangle faces
+//       formed after clipping will be saved to.
+//       All triangles are now "inside" the plane.
+//
+// Returns:
+//    count: (int) number of faces in the intersecting shape
+//      i.e. the valid faces which have been saved
+//      to face_verts_out
+//
+template <typename FaceVertsPlane, typename FaceVertsBox>
+__device__ inline int BoxIntersections(
+    const FaceVertsPlane& planes,
+    const float3& center,
+    FaceVertsBox& face_verts_out) {
+  // Initialize num tris to 12
+  int num_tris = NUM_TRIS;
+  for (int p = 0; p < NUM_PLANES; ++p) {
+    // Get plane normal direction
+    const float3 n2 = PlaneNormalDirection(planes[p], center);
+    // Create intermediate vector to store the updated tris
+    FaceVerts tri_verts_updated[MAX_TRIS];
+    int offset = 0;
+
+    // Iterate through triangles in face_verts_out
+    // for the valid tris given by num_tris
+    for (int t = 0; t < num_tris; ++t) {
+      // Clip tri by plane, can max be split into 2 triangles
+      FaceVerts tri_updated[2];
+      const int count =
+          ClipTriByPlane(planes[p], face_verts_out[t], n2, tri_updated);
+      // Add to the tri_verts_updated output if not empty
+      for (int v = 0; v < count; ++v) {
+        tri_verts_updated[offset] = tri_updated[v];
+        offset++;
+      }
+    }
+    // Update the face_verts_out tris
+    num_tris = offset;
+    for (int j = 0; j < num_tris; ++j) {
+      face_verts_out[j] = tri_verts_updated[j];
+    }
+  }
+  return num_tris;
+}
diff --git a/pytorch3d/pytorch3d/csrc/iou_box3d/iou_utils.h b/pytorch3d/pytorch3d/csrc/iou_box3d/iou_utils.h
new file mode 100644
index 0000000000000000000000000000000000000000..283822a112daa9bec5e2e2fe083fa983210273ca
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/iou_box3d/iou_utils.h
@@ -0,0 +1,733 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <ATen/ATen.h>
+#include <assert.h>
+#include <torch/extension.h>
+#include <torch/torch.h>
+#include <algorithm>
+#include <list>
+#include <numeric>
+#include <queue>
+#include <tuple>
+#include <type_traits>
+#include "utils/vec3.h"
+
+// dEpsilon: Used in dot products and is used to assess whether two unit vectors
+// are orthogonal (or coplanar). It's an epsilon on cos(θ).
+// With dEpsilon = 0.001, two unit vectors are considered co-planar
+// if their θ = 2.5 deg.
+const auto dEpsilon = 1e-3;
+// aEpsilon: Used once in main function to check for small face areas
+const auto aEpsilon = 1e-4;
+// kEpsilon: Used only for norm(u) = u/max(||u||, kEpsilon)
+const auto kEpsilon = 1e-8;
+
+/*
+_PLANES and _TRIS define the 4- and 3-connectivity
+of the 8 box corners.
+_PLANES gives the quad faces of the 3D box
+_TRIS gives the triangle faces of the 3D box
+*/
+const int NUM_PLANES = 6;
+const int NUM_TRIS = 12;
+const int _PLANES[6][4] = {
+    {0, 1, 2, 3},
+    {3, 2, 6, 7},
+    {0, 1, 5, 4},
+    {0, 3, 7, 4},
+    {1, 5, 6, 2},
+    {4, 5, 6, 7},
+};
+const int _TRIS[12][3] = {
+    {0, 1, 2},
+    {0, 3, 2},
+    {4, 5, 6},
+    {4, 6, 7},
+    {1, 5, 6},
+    {1, 6, 2},
+    {0, 4, 7},
+    {0, 7, 3},
+    {3, 2, 6},
+    {3, 6, 7},
+    {0, 1, 5},
+    {0, 4, 5},
+};
+
+// Create a new data type for representing the
+// verts for each face which can be triangle or plane.
+// This helps make the code more readable.
+using face_verts = std::vector<std::vector<vec3<float>>>;
+
+// Args
+//    box: (8, 3) tensor accessor for the box vertices
+//    plane_idx: index of the plane in the box
+//    vert_idx: index of the vertex in the plane
+//
+// Returns
+//    vec3<T> (x, y, x) vertex coordinates
+//
+template <typename Box>
+inline vec3<float>
+ExtractVertsPlane(const Box& box, const int plane_idx, const int vert_idx) {
+  return vec3<float>(
+      box[_PLANES[plane_idx][vert_idx]][0],
+      box[_PLANES[plane_idx][vert_idx]][1],
+      box[_PLANES[plane_idx][vert_idx]][2]);
+}
+
+// Args
+//    box: (8, 3) tensor accessor for the box vertices
+//    tri_idx: index of the triangle face in the box
+//    vert_idx: index of the vertex in the triangle
+//
+// Returns
+//    vec3<T> (x, y, x) vertex coordinates
+//
+template <typename Box>
+inline vec3<float>
+ExtractVertsTri(const Box& box, const int tri_idx, const int vert_idx) {
+  return vec3<float>(
+      box[_TRIS[tri_idx][vert_idx]][0],
+      box[_TRIS[tri_idx][vert_idx]][1],
+      box[_TRIS[tri_idx][vert_idx]][2]);
+}
+
+// Args
+//    box: (8, 3) tensor accessor for the box vertices
+//
+// Returns
+//    std::vector<std::vector<vec3<T>>> effectively (F, 3, 3)
+//      coordinates of the verts for each face
+//
+template <typename Box>
+inline face_verts GetBoxTris(const Box& box) {
+  face_verts box_tris;
+  for (int t = 0; t < NUM_TRIS; ++t) {
+    vec3<float> v0 = ExtractVertsTri(box, t, 0);
+    vec3<float> v1 = ExtractVertsTri(box, t, 1);
+    vec3<float> v2 = ExtractVertsTri(box, t, 2);
+    box_tris.push_back({v0, v1, v2});
+  }
+  return box_tris;
+}
+
+// Args
+//    box: (8, 3) tensor accessor for the box vertices
+//
+// Returns
+//    std::vector<std::vector<vec3<T>>> effectively (P, 3, 3)
+//      coordinates of the 4 verts for each plane
+//
+template <typename Box>
+inline face_verts GetBoxPlanes(const Box& box) {
+  face_verts box_planes;
+  for (int t = 0; t < NUM_PLANES; ++t) {
+    vec3<float> v0 = ExtractVertsPlane(box, t, 0);
+    vec3<float> v1 = ExtractVertsPlane(box, t, 1);
+    vec3<float> v2 = ExtractVertsPlane(box, t, 2);
+    vec3<float> v3 = ExtractVertsPlane(box, t, 3);
+    box_planes.push_back({v0, v1, v2, v3});
+  }
+  return box_planes;
+}
+
+// The normal of a plane spanned by vectors e0 and e1
+//
+// Args
+//    e0, e1: vec3 vectors defining a plane
+//
+// Returns
+//    vec3: normal of the plane
+//
+inline vec3<float> GetNormal(const vec3<float> e0, const vec3<float> e1) {
+  vec3<float> n = cross(e0, e1);
+  n = n / std::fmaxf(norm(n), kEpsilon);
+  return n;
+}
+
+// The center of a triangle tri
+//
+// Args
+//    tri: vec3 coordinates of the vertices of the triangle
+//
+// Returns
+//    vec3: center of the triangle
+//
+inline vec3<float> TriCenter(const std::vector<vec3<float>>& tri) {
+  // Vertices of the triangle
+  const vec3<float> v0 = tri[0];
+  const vec3<float> v1 = tri[1];
+  const vec3<float> v2 = tri[2];
+
+  return (v0 + v1 + v2) / 3.0f;
+}
+
+// The normal of the triangle defined by vertices (v0, v1, v2)
+// We find the "best" edges connecting the face center to the vertices,
+// such that the cross product between the edges is maximized.
+//
+// Args
+//    tri: vec3 coordinates of the vertices of the face
+//
+// Returns
+//    vec3: normal for the face
+//
+inline vec3<float> TriNormal(const std::vector<vec3<float>>& tri) {
+  // Get center of triangle
+  const vec3<float> ctr = TriCenter(tri);
+
+  // find the "best" normal as cross product of edges from center
+  float max_dist = -1.0f;
+  vec3<float> n = {0.0f, 0.0f, 0.0f};
+  for (int i = 0; i < 2; ++i) {
+    for (int j = i + 1; j < 3; ++j) {
+      const float dist = norm(cross(tri[i] - ctr, tri[j] - ctr));
+      if (dist > max_dist) {
+        n = GetNormal(tri[i] - ctr, tri[j] - ctr);
+      }
+    }
+  }
+  return n;
+}
+
+// The center of a plane
+//
+// Args
+//    plane: vec3 coordinates of the vertices of the plane
+//
+// Returns
+//    vec3: center of the plane
+//
+inline vec3<float> PlaneCenter(const std::vector<vec3<float>>& plane) {
+  // Vertices of the plane
+  const vec3<float> v0 = plane[0];
+  const vec3<float> v1 = plane[1];
+  const vec3<float> v2 = plane[2];
+  const vec3<float> v3 = plane[3];
+
+  return (v0 + v1 + v2 + v3) / 4.0f;
+}
+
+// The normal of a planar face with vertices (v0, v1, v2, v3)
+// We find the "best" edges connecting the face center to the vertices,
+// such that the cross product between the edges is maximized.
+//
+// Args
+//    plane: vec3 coordinates of the vertices of the planar face
+//
+// Returns
+//    vec3: normal of the planar face
+//
+inline vec3<float> PlaneNormal(const std::vector<vec3<float>>& plane) {
+  // Get center of planar face
+  vec3<float> ctr = PlaneCenter(plane);
+
+  // find the "best" normal as cross product of edges from center
+  float max_dist = -1.0f;
+  vec3<float> n = {0.0f, 0.0f, 0.0f};
+  for (int i = 0; i < 3; ++i) {
+    for (int j = i + 1; j < 4; ++j) {
+      const float dist = norm(cross(plane[i] - ctr, plane[j] - ctr));
+      if (dist > max_dist) {
+        n = GetNormal(plane[i] - ctr, plane[j] - ctr);
+      }
+    }
+  }
+  return n;
+}
+
+// The area of the face defined by vertices (v0, v1, v2)
+// Define e0 to be the edge connecting (v1, v0)
+// Define e1 to be the edge connecting (v2, v0)
+// Area is the norm of the cross product of e0, e1 divided by 2.0
+//
+// Args
+//    tri: vec3 coordinates of the vertices of the face
+//
+// Returns
+//    float: area for the face
+//
+inline float FaceArea(const std::vector<vec3<float>>& tri) {
+  // Get verts for face
+  const vec3<float> v0 = tri[0];
+  const vec3<float> v1 = tri[1];
+  const vec3<float> v2 = tri[2];
+  const vec3<float> n = cross(v1 - v0, v2 - v0);
+  return norm(n) / 2.0;
+}
+
+// The normal of a box plane defined by the verts in `plane` such that it
+// points toward the centroid of the box given by `center`.
+//
+// Args
+//    plane: vec3 coordinates of the vertices of the plane
+//    center: vec3 coordinates of the center of the box from
+//        which the plane originated
+//
+// Returns
+//    vec3: normal for the plane such that it points towards
+//      the center of the box
+//
+inline vec3<float> PlaneNormalDirection(
+    const std::vector<vec3<float>>& plane,
+    const vec3<float>& center) {
+  // The plane's center & normal
+  const vec3<float> plane_center = PlaneCenter(plane);
+  vec3<float> n = PlaneNormal(plane);
+
+  // We project the center on the plane defined by (v0, v1, v2, v3)
+  // We can write center = plane_center + a * e0 + b * e1 + c * n
+  // We know that <e0, n> = 0 and <e1, n> = 0 and
+  // <a, b> is the dot product between a and b.
+  // This means we can solve for c as:
+  // c = <center - plane_center - a * e0 - b * e1, n>
+  //   = <center - plane_center, n>
+  const float c = dot((center - plane_center), n);
+
+  // If c is negative, then we revert the direction of n such that n
+  // points "inside"
+  if (c < 0.0f) {
+    n = -1.0f * n;
+  }
+
+  return n;
+}
+
+// Calculate the volume of the box by summing the volume of
+// each of the tetrahedrons formed with a triangle face and
+// the box centroid.
+//
+// Args
+//    box_tris: vector of vec3 coordinates of the vertices of each
+//       of the triangles in the box
+//    box_center: vec3 coordinates of the center of the box
+//
+// Returns
+//    float: volume of the box
+//
+inline float BoxVolume(
+    const face_verts& box_tris,
+    const vec3<float>& box_center) {
+  float box_vol = 0.0;
+  // Iterate through each triange, calculate the area of the
+  // tetrahedron formed with the box_center and sum them
+  for (int t = 0; t < box_tris.size(); ++t) {
+    // Subtract the center:
+    const vec3<float> v0 = box_tris[t][0] - box_center;
+    const vec3<float> v1 = box_tris[t][1] - box_center;
+    const vec3<float> v2 = box_tris[t][2] - box_center;
+
+    // Compute the area
+    const float area = dot(v0, cross(v1, v2));
+    const float vol = std::abs(area) / 6.0;
+    box_vol = box_vol + vol;
+  }
+  return box_vol;
+}
+
+// Compute the box center as the mean of the verts
+//
+// Args
+//    box_verts: (8, 3) tensor of the corner vertices of the box
+//
+// Returns
+//    vec3: coordinates of the center of the box
+//
+inline vec3<float> BoxCenter(const at::Tensor& box_verts) {
+  const auto& box_center_t = at::mean(box_verts, 0);
+  const vec3<float> box_center(
+      box_center_t[0].item<float>(),
+      box_center_t[1].item<float>(),
+      box_center_t[2].item<float>());
+  return box_center;
+}
+
+// Compute the polyhedron center as the mean of the face centers
+// of the triangle faces
+//
+// Args
+//    tris: vector of vec3 coordinates of the
+//       vertices of each of the triangles in the polyhedron
+//
+// Returns
+//    vec3: coordinates of the center of the polyhedron
+//
+inline vec3<float> PolyhedronCenter(const face_verts& tris) {
+  float x = 0.0;
+  float y = 0.0;
+  float z = 0.0;
+  const int num_tris = tris.size();
+
+  // Find the center point of each face
+  for (int t = 0; t < num_tris; ++t) {
+    const vec3<float> v0 = tris[t][0];
+    const vec3<float> v1 = tris[t][1];
+    const vec3<float> v2 = tris[t][2];
+    const float x_face = (v0.x + v1.x + v2.x) / 3.0;
+    const float y_face = (v0.y + v1.y + v2.y) / 3.0;
+    const float z_face = (v0.z + v1.z + v2.z) / 3.0;
+    x = x + x_face;
+    y = y + y_face;
+    z = z + z_face;
+  }
+
+  // Take the mean of the centers of all faces
+  x = x / num_tris;
+  y = y / num_tris;
+  z = z / num_tris;
+
+  const vec3<float> center(x, y, z);
+  return center;
+}
+
+// Compute a boolean indicator for whether a point
+// is inside a plane, where inside refers to whether
+// or not the point has a component in the
+// normal direction of the plane.
+//
+// Args
+//    plane: vector of vec3 coordinates of the
+//       vertices of each of the triangles in the box
+//    normal: vec3 of the direction of the plane normal
+//    point: vec3 of the position of the point of interest
+//
+// Returns
+//    bool: whether or not the point is inside the plane
+//
+inline bool IsInside(
+    const std::vector<vec3<float>>& plane,
+    const vec3<float>& normal,
+    const vec3<float>& point) {
+  // The center of the plane
+  const vec3<float> plane_ctr = PlaneCenter(plane);
+
+  // Every point p can be written as p = plane_ctr + a e0 + b e1 + c n
+  // Solving for c:
+  // c = (point - plane_ctr - a * e0 - b * e1).dot(n)
+  // We know that <e0, n> = 0 and <e1, n> = 0
+  // So the calculation can be simplified as:
+  const float c = dot((point - plane_ctr), normal);
+  const bool inside = c >= 0.0f;
+  return inside;
+}
+
+// Find the point of intersection between a plane
+// and a line given by the end points (p0, p1)
+//
+// Args
+//    plane: vector of vec3 coordinates of the
+//       vertices of each of the triangles in the box
+//    normal: vec3 of the direction of the plane normal
+//    p0, p1: vec3 of the start and end point of the line
+//
+// Returns
+//    vec3: position of the intersection point
+//
+inline vec3<float> PlaneEdgeIntersection(
+    const std::vector<vec3<float>>& plane,
+    const vec3<float>& normal,
+    const vec3<float>& p0,
+    const vec3<float>& p1) {
+  // The center of the plane
+  const vec3<float> plane_ctr = PlaneCenter(plane);
+
+  // The point of intersection can be parametrized
+  // p = p0 + a (p1 - p0) where a in [0, 1]
+  // We want to find a such that p is on plane
+  // <p - ctr, n> = 0
+
+  vec3<float> direc = p1 - p0;
+  direc = direc / std::fmaxf(norm(direc), kEpsilon);
+
+  vec3<float> p = (p1 + p0) / 2.0f;
+
+  if (std::abs(dot(direc, normal)) >= dEpsilon) {
+    const float top = -1.0f * dot(p0 - plane_ctr, normal);
+    const float bot = dot(p1 - p0, normal);
+    const float a = top / bot;
+    p = p0 + a * (p1 - p0);
+  }
+  return p;
+}
+
+// Compute the most distant points between two sets of vertices
+//
+// Args
+//    verts1, verts2: vec3 defining the list of vertices
+//
+// Returns
+//    v1m, v2m: vec3 vectors of the most distant points
+//          in verts1 and verts2 respectively
+//
+inline std::tuple<vec3<float>, vec3<float>> ArgMaxVerts(
+    const std::vector<vec3<float>>& verts1,
+    const std::vector<vec3<float>>& verts2) {
+  vec3<float> v1m = {0.0f, 0.0f, 0.0f};
+  vec3<float> v2m = {0.0f, 0.0f, 0.0f};
+  float maxdist = -1.0f;
+
+  for (const auto& v1 : verts1) {
+    for (const auto& v2 : verts2) {
+      if (norm(v1 - v2) > maxdist) {
+        v1m = v1;
+        v2m = v2;
+        maxdist = norm(v1 - v2);
+      }
+    }
+  }
+  return std::make_tuple(v1m, v2m);
+}
+
+// Compute a boolean indicator for whether or not two faces
+// are coplanar
+//
+// Args
+//    tri1, tri2: std:vector<vec3> of the vertex coordinates of
+//        triangle faces
+//
+// Returns
+//    bool: whether or not the two faces are coplanar
+//
+inline bool IsCoplanarTriTri(
+    const std::vector<vec3<float>>& tri1,
+    const std::vector<vec3<float>>& tri2) {
+  // Get normal for tri 1
+  const vec3<float> n1 = TriNormal(tri1);
+
+  // Get normal for tri 2
+  const vec3<float> n2 = TriNormal(tri2);
+
+  // Check if parallel
+  const bool check1 = std::abs(dot(n1, n2)) > 1 - dEpsilon;
+
+  // Compute most distant points
+  auto argvs = ArgMaxVerts(tri1, tri2);
+  const auto v1m = std::get<0>(argvs);
+  const auto v2m = std::get<1>(argvs);
+
+  vec3<float> n12m = v1m - v2m;
+  n12m = n12m / std::fmaxf(norm(n12m), kEpsilon);
+
+  const bool check2 = (std::abs(dot(n12m, n1)) < dEpsilon) ||
+      (std::abs(dot(n12m, n2)) < dEpsilon);
+
+  return (check1 && check2);
+}
+
+// Compute a boolean indicator for whether or not a triangular and a planar
+// face are coplanar
+//
+// Args
+//    tri, plane: std:vector<vec3> of the vertex coordinates of
+//        triangular face and planar face
+//    normal: the normal direction of the plane pointing "inside"
+//
+// Returns
+//    bool: whether or not the two faces are coplanar
+//
+inline bool IsCoplanarTriPlane(
+    const std::vector<vec3<float>>& tri,
+    const std::vector<vec3<float>>& plane,
+    const vec3<float>& normal) {
+  // Get normal for tri
+  const vec3<float> nt = TriNormal(tri);
+
+  // check if parallel
+  const bool check1 = std::abs(dot(nt, normal)) > 1 - dEpsilon;
+
+  // Compute most distant points
+  auto argvs = ArgMaxVerts(tri, plane);
+  const auto v1m = std::get<0>(argvs);
+  const auto v2m = std::get<1>(argvs);
+
+  vec3<float> n12m = v1m - v2m;
+  n12m = n12m / std::fmaxf(norm(n12m), kEpsilon);
+
+  const bool check2 = std::abs(dot(n12m, normal)) < dEpsilon;
+
+  return (check1 && check2);
+}
+
+// Triangle is clipped into a quadrilateral
+// based on the intersection points with the plane.
+// Then the quadrilateral is divided into two triangles.
+//
+// Args
+//    plane: vector of vec3 coordinates of the
+//        vertices of each of the triangles in the box
+//    normal: vec3 of the direction of the plane normal
+//    vout: vec3 of the point in the triangle which is outside
+//       the plane
+//    vin1, vin2: vec3 of the points in the triangle which are
+//        inside the plane
+//
+// Returns
+//    std::vector<std::vector<vec3>>: vector of vertex coordinates
+//      of the new triangle faces
+//
+inline face_verts ClipTriByPlaneOneOut(
+    const std::vector<vec3<float>>& plane,
+    const vec3<float>& normal,
+    const vec3<float>& vout,
+    const vec3<float>& vin1,
+    const vec3<float>& vin2) {
+  // point of intersection between plane and (vin1, vout)
+  const vec3<float> pint1 = PlaneEdgeIntersection(plane, normal, vin1, vout);
+  // point of intersection between plane and (vin2, vout)
+  const vec3<float> pint2 = PlaneEdgeIntersection(plane, normal, vin2, vout);
+  const face_verts face_verts = {{vin1, pint1, pint2}, {vin1, pint2, vin2}};
+  return face_verts;
+}
+
+// Triangle is clipped into a smaller triangle based
+// on the intersection points with the plane.
+//
+// Args
+//    plane: vector of vec3 coordinates of the
+//       vertices of each of the triangles in the box
+//    normal: vec3 of the direction of the plane normal
+//    vout1, vout2: vec3 of the points in the triangle which are
+//       outside the plane
+//    vin: vec3 of the point in the triangle which is inside
+//        the plane
+// Returns
+//    std::vector<std::vector<vec3>>: vector of vertex coordinates
+//      of the new triangle face
+//
+inline face_verts ClipTriByPlaneTwoOut(
+    const std::vector<vec3<float>>& plane,
+    const vec3<float>& normal,
+    const vec3<float>& vout1,
+    const vec3<float>& vout2,
+    const vec3<float>& vin) {
+  // point of intersection between plane and (vin, vout1)
+  const vec3<float> pint1 = PlaneEdgeIntersection(plane, normal, vin, vout1);
+  // point of intersection between plane and (vin, vout2)
+  const vec3<float> pint2 = PlaneEdgeIntersection(plane, normal, vin, vout2);
+  const face_verts face_verts = {{vin, pint1, pint2}};
+  return face_verts;
+}
+
+// Clip the triangle faces so that they lie within the
+// plane, creating new triangle faces where necessary.
+//
+// Args
+//    plane: vector of vec3 coordinates of the
+//       vertices of each of the triangles in the box
+//    tri: std:vector<vec3> of the vertex coordinates of the
+//       triangle faces
+//    normal: vec3 of the direction of the plane normal
+//
+// Returns
+//    std::vector<std::vector<vec3>>: vector of vertex coordinates
+//      of the new triangle faces formed after clipping.
+//      All triangles are now "inside" the plane.
+//
+inline face_verts ClipTriByPlane(
+    const std::vector<vec3<float>>& plane,
+    const std::vector<vec3<float>>& tri,
+    const vec3<float>& normal) {
+  // Get Triangle vertices
+  const vec3<float> v0 = tri[0];
+  const vec3<float> v1 = tri[1];
+  const vec3<float> v2 = tri[2];
+
+  // Check coplanar
+  const bool iscoplanar = IsCoplanarTriPlane(tri, plane, normal);
+  if (iscoplanar) {
+    // Return input vertices
+    face_verts tris = {{v0, v1, v2}};
+    return tris;
+  }
+
+  // Check each of the triangle vertices to see if it is inside the plane
+  const bool isin0 = IsInside(plane, normal, v0);
+  const bool isin1 = IsInside(plane, normal, v1);
+  const bool isin2 = IsInside(plane, normal, v2);
+
+  // All in
+  if (isin0 && isin1 && isin2) {
+    // Return input vertices
+    face_verts tris = {{v0, v1, v2}};
+    return tris;
+  }
+
+  face_verts empty_tris = {};
+  // All out
+  if (!isin0 && !isin1 && !isin2) {
+    return empty_tris;
+  }
+
+  // One vert out
+  if (isin0 && isin1 && !isin2) {
+    return ClipTriByPlaneOneOut(plane, normal, v2, v0, v1);
+  }
+  if (isin0 && !isin1 && isin2) {
+    return ClipTriByPlaneOneOut(plane, normal, v1, v0, v2);
+  }
+  if (!isin0 && isin1 && isin2) {
+    return ClipTriByPlaneOneOut(plane, normal, v0, v1, v2);
+  }
+
+  // Two verts out
+  if (isin0 && !isin1 && !isin2) {
+    return ClipTriByPlaneTwoOut(plane, normal, v1, v2, v0);
+  }
+  if (!isin0 && !isin1 && isin2) {
+    return ClipTriByPlaneTwoOut(plane, normal, v0, v1, v2);
+  }
+  if (!isin0 && isin1 && !isin2) {
+    return ClipTriByPlaneTwoOut(plane, normal, v0, v2, v1);
+  }
+
+  // Else return empty (should not be reached)
+  return empty_tris;
+}
+
+// Get the triangles from each box which are part of the
+// intersecting polyhedron by computing the intersection
+// points with each of the planes.
+//
+// Args
+//    tris: vertex coordinates of all the triangle faces
+//       in the box
+//    planes: vertex coordinates of all the planes in the box
+//    center: vec3 coordinates of the center of the box from which
+//        the planes originate
+//
+// Returns
+//    std::vector<std::vector<vec3>>> vector of vertex coordinates
+//      of the new triangle faces formed after clipping.
+//      All triangles are now "inside" the planes.
+//
+inline face_verts BoxIntersections(
+    const face_verts& tris,
+    const face_verts& planes,
+    const vec3<float>& center) {
+  // Create a new vector to avoid modifying in place
+  face_verts out_tris = tris;
+  for (int p = 0; p < NUM_PLANES; ++p) {
+    // Get plane normal direction
+    const vec3<float> n2 = PlaneNormalDirection(planes[p], center);
+    // Iterate through triangles in tris
+    // Create intermediate vector to store the updated tris
+    face_verts tri_verts_updated;
+    for (int t = 0; t < out_tris.size(); ++t) {
+      // Clip tri by plane
+      const face_verts tri_updated = ClipTriByPlane(planes[p], out_tris[t], n2);
+      // Add to the tri_verts_updated output if not empty
+      for (int v = 0; v < tri_updated.size(); ++v) {
+        tri_verts_updated.push_back(tri_updated[v]);
+      }
+    }
+    // Update the tris
+    out_tris = tri_verts_updated;
+  }
+  return out_tris;
+}
diff --git a/pytorch3d/pytorch3d/csrc/knn/knn.cu b/pytorch3d/pytorch3d/csrc/knn/knn.cu
new file mode 100644
index 0000000000000000000000000000000000000000..93a3060b2294af481e1df2a829910a199d47f533
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/knn/knn.cu
@@ -0,0 +1,587 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <ATen/ATen.h>
+#include <ATen/cuda/CUDAContext.h>
+#include <c10/cuda/CUDAGuard.h>
+#include <float.h>
+#include <iostream>
+#include <tuple>
+
+#include "utils/dispatch.cuh"
+#include "utils/mink.cuh"
+
+// A chunk of work is blocksize-many points of P1.
+// The number of potential chunks to do is N*(1+(P1-1)/blocksize)
+// call (1+(P1-1)/blocksize) chunks_per_cloud
+// These chunks are divided among the gridSize-many blocks.
+// In block b, we work on chunks b, b+gridSize, b+2*gridSize etc .
+// In chunk i, we work on cloud i/chunks_per_cloud on points starting from
+// blocksize*(i%chunks_per_cloud).
+
+template <typename scalar_t>
+__global__ void KNearestNeighborKernelV0(
+    const scalar_t* __restrict__ points1,
+    const scalar_t* __restrict__ points2,
+    const int64_t* __restrict__ lengths1,
+    const int64_t* __restrict__ lengths2,
+    scalar_t* __restrict__ dists,
+    int64_t* __restrict__ idxs,
+    const size_t N,
+    const size_t P1,
+    const size_t P2,
+    const size_t D,
+    const size_t K,
+    const size_t norm) {
+  // Store both dists and indices for knn in global memory.
+  const int64_t chunks_per_cloud = (1 + (P1 - 1) / blockDim.x);
+  const int64_t chunks_to_do = N * chunks_per_cloud;
+  for (int64_t chunk = blockIdx.x; chunk < chunks_to_do; chunk += gridDim.x) {
+    const int64_t n = chunk / chunks_per_cloud;
+    const int64_t start_point = blockDim.x * (chunk % chunks_per_cloud);
+    int64_t p1 = start_point + threadIdx.x;
+    if (p1 >= lengths1[n])
+      continue;
+    int offset = n * P1 * K + p1 * K;
+    int64_t length2 = lengths2[n];
+    MinK<scalar_t, int64_t> mink(dists + offset, idxs + offset, K);
+    for (int p2 = 0; p2 < length2; ++p2) {
+      // Find the distance between points1[n, p1] and points[n, p2]
+      scalar_t dist = 0;
+      for (int d = 0; d < D; ++d) {
+        scalar_t coord1 = points1[n * P1 * D + p1 * D + d];
+        scalar_t coord2 = points2[n * P2 * D + p2 * D + d];
+        scalar_t diff = coord1 - coord2;
+        scalar_t norm_diff = (norm == 2) ? (diff * diff) : abs(diff);
+        dist += norm_diff;
+      }
+      mink.add(dist, p2);
+    }
+  }
+}
+
+template <typename scalar_t, int64_t D>
+__global__ void KNearestNeighborKernelV1(
+    const scalar_t* __restrict__ points1,
+    const scalar_t* __restrict__ points2,
+    const int64_t* __restrict__ lengths1,
+    const int64_t* __restrict__ lengths2,
+    scalar_t* __restrict__ dists,
+    int64_t* __restrict__ idxs,
+    const size_t N,
+    const size_t P1,
+    const size_t P2,
+    const size_t K,
+    const size_t norm) {
+  // Same idea as the previous version, but hoist D into a template argument
+  // so we can cache the current point in a thread-local array. We still store
+  // the current best K dists and indices in global memory, so this should work
+  // for very large K and fairly large D.
+  scalar_t cur_point[D];
+  const int64_t chunks_per_cloud = (1 + (P1 - 1) / blockDim.x);
+  const int64_t chunks_to_do = N * chunks_per_cloud;
+  for (int64_t chunk = blockIdx.x; chunk < chunks_to_do; chunk += gridDim.x) {
+    const int64_t n = chunk / chunks_per_cloud;
+    const int64_t start_point = blockDim.x * (chunk % chunks_per_cloud);
+    int64_t p1 = start_point + threadIdx.x;
+    if (p1 >= lengths1[n])
+      continue;
+    for (int d = 0; d < D; ++d) {
+      cur_point[d] = points1[n * P1 * D + p1 * D + d];
+    }
+    int offset = n * P1 * K + p1 * K;
+    int64_t length2 = lengths2[n];
+    MinK<scalar_t, int64_t> mink(dists + offset, idxs + offset, K);
+    for (int p2 = 0; p2 < length2; ++p2) {
+      // Find the distance between cur_point and points[n, p2]
+      scalar_t dist = 0;
+      for (int d = 0; d < D; ++d) {
+        scalar_t diff = cur_point[d] - points2[n * P2 * D + p2 * D + d];
+        scalar_t norm_diff = (norm == 2) ? (diff * diff) : abs(diff);
+        dist += norm_diff;
+      }
+      mink.add(dist, p2);
+    }
+  }
+}
+
+// This is a shim functor to allow us to dispatch using DispatchKernel1D
+template <typename scalar_t, int64_t D>
+struct KNearestNeighborV1Functor {
+  static void run(
+      size_t blocks,
+      size_t threads,
+      const scalar_t* __restrict__ points1,
+      const scalar_t* __restrict__ points2,
+      const int64_t* __restrict__ lengths1,
+      const int64_t* __restrict__ lengths2,
+      scalar_t* __restrict__ dists,
+      int64_t* __restrict__ idxs,
+      const size_t N,
+      const size_t P1,
+      const size_t P2,
+      const size_t K,
+      const size_t norm) {
+    cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+    KNearestNeighborKernelV1<scalar_t, D><<<blocks, threads, 0, stream>>>(
+        points1, points2, lengths1, lengths2, dists, idxs, N, P1, P2, K, norm);
+  }
+};
+
+template <typename scalar_t, int64_t D, int64_t K>
+__global__ void KNearestNeighborKernelV2(
+    const scalar_t* __restrict__ points1,
+    const scalar_t* __restrict__ points2,
+    const int64_t* __restrict__ lengths1,
+    const int64_t* __restrict__ lengths2,
+    scalar_t* __restrict__ dists,
+    int64_t* __restrict__ idxs,
+    const int64_t N,
+    const int64_t P1,
+    const int64_t P2,
+    const size_t norm) {
+  // Same general implementation as V2, but also hoist K into a template arg.
+  scalar_t cur_point[D];
+  scalar_t min_dists[K];
+  int min_idxs[K];
+  const int64_t chunks_per_cloud = (1 + (P1 - 1) / blockDim.x);
+  const int64_t chunks_to_do = N * chunks_per_cloud;
+  for (int64_t chunk = blockIdx.x; chunk < chunks_to_do; chunk += gridDim.x) {
+    const int64_t n = chunk / chunks_per_cloud;
+    const int64_t start_point = blockDim.x * (chunk % chunks_per_cloud);
+    int64_t p1 = start_point + threadIdx.x;
+    if (p1 >= lengths1[n])
+      continue;
+    for (int d = 0; d < D; ++d) {
+      cur_point[d] = points1[n * P1 * D + p1 * D + d];
+    }
+    int64_t length2 = lengths2[n];
+    MinK<scalar_t, int> mink(min_dists, min_idxs, K);
+    for (int p2 = 0; p2 < length2; ++p2) {
+      scalar_t dist = 0;
+      for (int d = 0; d < D; ++d) {
+        int offset = n * P2 * D + p2 * D + d;
+        scalar_t diff = cur_point[d] - points2[offset];
+        scalar_t norm_diff = (norm == 2) ? (diff * diff) : abs(diff);
+        dist += norm_diff;
+      }
+      mink.add(dist, p2);
+    }
+    for (int k = 0; k < mink.size(); ++k) {
+      idxs[n * P1 * K + p1 * K + k] = min_idxs[k];
+      dists[n * P1 * K + p1 * K + k] = min_dists[k];
+    }
+  }
+}
+
+// This is a shim so we can dispatch using DispatchKernel2D
+template <typename scalar_t, int64_t D, int64_t K>
+struct KNearestNeighborKernelV2Functor {
+  static void run(
+      size_t blocks,
+      size_t threads,
+      const scalar_t* __restrict__ points1,
+      const scalar_t* __restrict__ points2,
+      const int64_t* __restrict__ lengths1,
+      const int64_t* __restrict__ lengths2,
+      scalar_t* __restrict__ dists,
+      int64_t* __restrict__ idxs,
+      const int64_t N,
+      const int64_t P1,
+      const int64_t P2,
+      const size_t norm) {
+    cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+    KNearestNeighborKernelV2<scalar_t, D, K><<<blocks, threads, 0, stream>>>(
+        points1, points2, lengths1, lengths2, dists, idxs, N, P1, P2, norm);
+  }
+};
+
+template <typename scalar_t, int D, int K>
+__global__ void KNearestNeighborKernelV3(
+    const scalar_t* __restrict__ points1,
+    const scalar_t* __restrict__ points2,
+    const int64_t* __restrict__ lengths1,
+    const int64_t* __restrict__ lengths2,
+    scalar_t* __restrict__ dists,
+    int64_t* __restrict__ idxs,
+    const size_t N,
+    const size_t P1,
+    const size_t P2,
+    const size_t norm) {
+  // Same idea as V2, but use register indexing for thread-local arrays.
+  // Enabling sorting for this version leads to huge slowdowns; I suspect
+  // that it forces min_dists into local memory rather than registers.
+  // As a result this version is always unsorted.
+  scalar_t cur_point[D];
+  scalar_t min_dists[K];
+  int min_idxs[K];
+  const int64_t chunks_per_cloud = (1 + (P1 - 1) / blockDim.x);
+  const int64_t chunks_to_do = N * chunks_per_cloud;
+  for (int64_t chunk = blockIdx.x; chunk < chunks_to_do; chunk += gridDim.x) {
+    const int64_t n = chunk / chunks_per_cloud;
+    const int64_t start_point = blockDim.x * (chunk % chunks_per_cloud);
+    int64_t p1 = start_point + threadIdx.x;
+    if (p1 >= lengths1[n])
+      continue;
+    for (int d = 0; d < D; ++d) {
+      cur_point[d] = points1[n * P1 * D + p1 * D + d];
+    }
+    int64_t length2 = lengths2[n];
+    RegisterMinK<scalar_t, int, K> mink(min_dists, min_idxs);
+    for (int p2 = 0; p2 < length2; ++p2) {
+      scalar_t dist = 0;
+      for (int d = 0; d < D; ++d) {
+        int offset = n * P2 * D + p2 * D + d;
+        scalar_t diff = cur_point[d] - points2[offset];
+        scalar_t norm_diff = (norm == 2) ? (diff * diff) : abs(diff);
+        dist += norm_diff;
+      }
+      mink.add(dist, p2);
+    }
+    for (int k = 0; k < mink.size(); ++k) {
+      idxs[n * P1 * K + p1 * K + k] = min_idxs[k];
+      dists[n * P1 * K + p1 * K + k] = min_dists[k];
+    }
+  }
+}
+
+// This is a shim so we can dispatch using DispatchKernel2D
+template <typename scalar_t, int64_t D, int64_t K>
+struct KNearestNeighborKernelV3Functor {
+  static void run(
+      size_t blocks,
+      size_t threads,
+      const scalar_t* __restrict__ points1,
+      const scalar_t* __restrict__ points2,
+      const int64_t* __restrict__ lengths1,
+      const int64_t* __restrict__ lengths2,
+      scalar_t* __restrict__ dists,
+      int64_t* __restrict__ idxs,
+      const size_t N,
+      const size_t P1,
+      const size_t P2,
+      const size_t norm) {
+    cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+    KNearestNeighborKernelV3<scalar_t, D, K><<<blocks, threads, 0, stream>>>(
+        points1, points2, lengths1, lengths2, dists, idxs, N, P1, P2, norm);
+  }
+};
+
+constexpr int V1_MIN_D = 1;
+constexpr int V1_MAX_D = 32;
+
+constexpr int V2_MIN_D = 1;
+constexpr int V2_MAX_D = 8;
+constexpr int V2_MIN_K = 1;
+constexpr int V2_MAX_K = 32;
+
+constexpr int V3_MIN_D = 1;
+constexpr int V3_MAX_D = 8;
+constexpr int V3_MIN_K = 1;
+constexpr int V3_MAX_K = 4;
+
+bool InBounds(const int64_t min, const int64_t x, const int64_t max) {
+  return min <= x && x <= max;
+}
+
+bool KnnCheckVersion(int version, const int64_t D, const int64_t K) {
+  if (version == 0) {
+    return true;
+  } else if (version == 1) {
+    return InBounds(V1_MIN_D, D, V1_MAX_D);
+  } else if (version == 2) {
+    return InBounds(V2_MIN_D, D, V2_MAX_D) && InBounds(V2_MIN_K, K, V2_MAX_K);
+  } else if (version == 3) {
+    return InBounds(V3_MIN_D, D, V3_MAX_D) && InBounds(V3_MIN_K, K, V3_MAX_K);
+  }
+  return false;
+}
+
+int ChooseVersion(const int64_t D, const int64_t K) {
+  for (int version = 3; version >= 1; version--) {
+    if (KnnCheckVersion(version, D, K)) {
+      return version;
+    }
+  }
+  return 0;
+}
+
+std::tuple<at::Tensor, at::Tensor> KNearestNeighborIdxCuda(
+    const at::Tensor& p1,
+    const at::Tensor& p2,
+    const at::Tensor& lengths1,
+    const at::Tensor& lengths2,
+    const int norm,
+    const int K,
+    int version) {
+  // Check inputs are on the same device
+  at::TensorArg p1_t{p1, "p1", 1}, p2_t{p2, "p2", 2},
+      lengths1_t{lengths1, "lengths1", 3}, lengths2_t{lengths2, "lengths2", 4};
+  at::CheckedFrom c = "KNearestNeighborIdxCuda";
+  at::checkAllSameGPU(c, {p1_t, p2_t, lengths1_t, lengths2_t});
+  at::checkAllSameType(c, {p1_t, p2_t});
+
+  // Set the device for the kernel launch based on the device of the input
+  at::cuda::CUDAGuard device_guard(p1.device());
+  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+
+  const auto N = p1.size(0);
+  const auto P1 = p1.size(1);
+  const auto P2 = p2.size(1);
+  const auto D = p2.size(2);
+  const int64_t K_64 = K;
+
+  TORCH_CHECK((norm == 1) || (norm == 2), "Norm must be 1 or 2.");
+
+  TORCH_CHECK(p2.size(2) == D, "Point sets must have the same last dimension");
+  auto long_dtype = lengths1.options().dtype(at::kLong);
+  auto idxs = at::zeros({N, P1, K}, long_dtype);
+  auto dists = at::zeros({N, P1, K}, p1.options());
+
+  if (idxs.numel() == 0) {
+    AT_CUDA_CHECK(cudaGetLastError());
+    return std::make_tuple(idxs, dists);
+  }
+
+  if (version < 0) {
+    version = ChooseVersion(D, K);
+  } else if (!KnnCheckVersion(version, D, K)) {
+    int new_version = ChooseVersion(D, K);
+    std::cout << "WARNING: Requested KNN version " << version
+              << " is not compatible with D = " << D << "; K = " << K
+              << ". Falling back to version = " << new_version << std::endl;
+    version = new_version;
+  }
+
+  // At this point we should have a valid version no matter what data the user
+  // gave us. But we can check once more to be sure; however this time
+  // assert fail since failing at this point means we have a bug in our version
+  // selection or checking code.
+  AT_ASSERTM(KnnCheckVersion(version, D, K), "Invalid version");
+
+  const size_t threads = 256;
+  const size_t blocks = 256;
+  if (version == 0) {
+    AT_DISPATCH_FLOATING_TYPES(
+        p1.scalar_type(), "knn_kernel_cuda", ([&] {
+          KNearestNeighborKernelV0<scalar_t><<<blocks, threads, 0, stream>>>(
+              p1.contiguous().data_ptr<scalar_t>(),
+              p2.contiguous().data_ptr<scalar_t>(),
+              lengths1.contiguous().data_ptr<int64_t>(),
+              lengths2.contiguous().data_ptr<int64_t>(),
+              dists.data_ptr<scalar_t>(),
+              idxs.data_ptr<int64_t>(),
+              N,
+              P1,
+              P2,
+              D,
+              K,
+              norm);
+        }));
+  } else if (version == 1) {
+    AT_DISPATCH_FLOATING_TYPES(p1.scalar_type(), "knn_kernel_cuda", ([&] {
+                                 DispatchKernel1D<
+                                     KNearestNeighborV1Functor,
+                                     scalar_t,
+                                     V1_MIN_D,
+                                     V1_MAX_D>(
+                                     D,
+                                     blocks,
+                                     threads,
+                                     p1.contiguous().data_ptr<scalar_t>(),
+                                     p2.contiguous().data_ptr<scalar_t>(),
+                                     lengths1.contiguous().data_ptr<int64_t>(),
+                                     lengths2.contiguous().data_ptr<int64_t>(),
+                                     dists.data_ptr<scalar_t>(),
+                                     idxs.data_ptr<int64_t>(),
+                                     N,
+                                     P1,
+                                     P2,
+                                     K,
+                                     norm);
+                               }));
+  } else if (version == 2) {
+    AT_DISPATCH_FLOATING_TYPES(p1.scalar_type(), "knn_kernel_cuda", ([&] {
+                                 DispatchKernel2D<
+                                     KNearestNeighborKernelV2Functor,
+                                     scalar_t,
+                                     V2_MIN_D,
+                                     V2_MAX_D,
+                                     V2_MIN_K,
+                                     V2_MAX_K>(
+                                     D,
+                                     K_64,
+                                     blocks,
+                                     threads,
+                                     p1.contiguous().data_ptr<scalar_t>(),
+                                     p2.contiguous().data_ptr<scalar_t>(),
+                                     lengths1.contiguous().data_ptr<int64_t>(),
+                                     lengths2.contiguous().data_ptr<int64_t>(),
+                                     dists.data_ptr<scalar_t>(),
+                                     idxs.data_ptr<int64_t>(),
+                                     N,
+                                     P1,
+                                     P2,
+                                     norm);
+                               }));
+  } else if (version == 3) {
+    AT_DISPATCH_FLOATING_TYPES(p1.scalar_type(), "knn_kernel_cuda", ([&] {
+                                 DispatchKernel2D<
+                                     KNearestNeighborKernelV3Functor,
+                                     scalar_t,
+                                     V3_MIN_D,
+                                     V3_MAX_D,
+                                     V3_MIN_K,
+                                     V3_MAX_K>(
+                                     D,
+                                     K_64,
+                                     blocks,
+                                     threads,
+                                     p1.contiguous().data_ptr<scalar_t>(),
+                                     p2.contiguous().data_ptr<scalar_t>(),
+                                     lengths1.contiguous().data_ptr<int64_t>(),
+                                     lengths2.contiguous().data_ptr<int64_t>(),
+                                     dists.data_ptr<scalar_t>(),
+                                     idxs.data_ptr<int64_t>(),
+                                     N,
+                                     P1,
+                                     P2,
+                                     norm);
+                               }));
+  }
+  AT_CUDA_CHECK(cudaGetLastError());
+  return std::make_tuple(idxs, dists);
+}
+
+// ------------------------------------------------------------- //
+//                   Backward Operators                          //
+// ------------------------------------------------------------- //
+
+// TODO(gkioxari) support all data types once AtomicAdd supports doubles.
+// Currently, support is for floats only.
+__global__ void KNearestNeighborBackwardKernel(
+    const float* __restrict__ p1, // (N, P1, D)
+    const float* __restrict__ p2, // (N, P2, D)
+    const int64_t* __restrict__ lengths1, // (N,)
+    const int64_t* __restrict__ lengths2, // (N,)
+    const int64_t* __restrict__ idxs, // (N, P1, K)
+    const float* __restrict__ grad_dists, // (N, P1, K)
+    float* __restrict__ grad_p1, // (N, P1, D)
+    float* __restrict__ grad_p2, // (N, P2, D)
+    const size_t N,
+    const size_t P1,
+    const size_t P2,
+    const size_t K,
+    const size_t D,
+    const size_t norm) {
+  const size_t tid = blockIdx.x * blockDim.x + threadIdx.x;
+  const size_t stride = gridDim.x * blockDim.x;
+
+  for (size_t i = tid; i < N * P1 * K * D; i += stride) {
+    const size_t n = i / (P1 * K * D); // batch index
+    size_t rem = i % (P1 * K * D);
+    const size_t p1_idx = rem / (K * D); // index of point in p1
+    rem = rem % (K * D);
+    const size_t k = rem / D; // k-th nearest neighbor
+    const size_t d = rem % D; // d-th dimension in the feature vector
+
+    const size_t num1 = lengths1[n]; // number of valid points in p1 in batch
+    const size_t num2 = lengths2[n]; // number of valid points in p2 in batch
+    if ((p1_idx < num1) && (k < num2)) {
+      const float grad_dist = grad_dists[n * P1 * K + p1_idx * K + k];
+      // index of point in p2 corresponding to the k-th nearest neighbor
+      const int64_t p2_idx = idxs[n * P1 * K + p1_idx * K + k];
+      // If the index is the pad value of -1 then ignore it
+      if (p2_idx == -1) {
+        continue;
+      }
+      float diff = 0.0;
+      if (norm == 1) {
+        float sign =
+            (p1[n * P1 * D + p1_idx * D + d] > p2[n * P2 * D + p2_idx * D + d])
+            ? 1.0
+            : -1.0;
+        diff = grad_dist * sign;
+      } else { // norm is 2
+        diff = 2.0 * grad_dist *
+            (p1[n * P1 * D + p1_idx * D + d] - p2[n * P2 * D + p2_idx * D + d]);
+      }
+      atomicAdd(grad_p1 + n * P1 * D + p1_idx * D + d, diff);
+      atomicAdd(grad_p2 + n * P2 * D + p2_idx * D + d, -1.0f * diff);
+    }
+  }
+}
+
+std::tuple<at::Tensor, at::Tensor> KNearestNeighborBackwardCuda(
+    const at::Tensor& p1,
+    const at::Tensor& p2,
+    const at::Tensor& lengths1,
+    const at::Tensor& lengths2,
+    const at::Tensor& idxs,
+    int norm,
+    const at::Tensor& grad_dists) {
+  // Check inputs are on the same device
+  at::TensorArg p1_t{p1, "p1", 1}, p2_t{p2, "p2", 2},
+      lengths1_t{lengths1, "lengths1", 3}, lengths2_t{lengths2, "lengths2", 4},
+      idxs_t{idxs, "idxs", 5}, grad_dists_t{grad_dists, "grad_dists", 6};
+  at::CheckedFrom c = "KNearestNeighborBackwardCuda";
+  at::checkAllSameGPU(
+      c, {p1_t, p2_t, lengths1_t, lengths2_t, idxs_t, grad_dists_t});
+  at::checkAllSameType(c, {p1_t, p2_t, grad_dists_t});
+
+  // This is nondeterministic because atomicAdd
+  at::globalContext().alertNotDeterministic("KNearestNeighborBackwardCuda");
+
+  // Set the device for the kernel launch based on the device of the input
+  at::cuda::CUDAGuard device_guard(p1.device());
+  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+
+  const auto N = p1.size(0);
+  const auto P1 = p1.size(1);
+  const auto P2 = p2.size(1);
+  const auto D = p2.size(2);
+  const auto K = idxs.size(2);
+
+  TORCH_CHECK(p1.size(2) == D, "Point sets must have the same last dimension");
+  TORCH_CHECK(idxs.size(0) == N, "KNN idxs must have the same batch dimension");
+  TORCH_CHECK(
+      idxs.size(1) == P1, "KNN idxs must have the same point dimension as p1");
+  TORCH_CHECK(grad_dists.size(0) == N);
+  TORCH_CHECK(grad_dists.size(1) == P1);
+  TORCH_CHECK(grad_dists.size(2) == K);
+
+  auto grad_p1 = at::zeros({N, P1, D}, p1.options());
+  auto grad_p2 = at::zeros({N, P2, D}, p2.options());
+
+  if (grad_p1.numel() == 0 || grad_p2.numel() == 0) {
+    AT_CUDA_CHECK(cudaGetLastError());
+    return std::make_tuple(grad_p1, grad_p2);
+  }
+
+  const int blocks = 64;
+  const int threads = 512;
+
+  KNearestNeighborBackwardKernel<<<blocks, threads, 0, stream>>>(
+      p1.contiguous().data_ptr<float>(),
+      p2.contiguous().data_ptr<float>(),
+      lengths1.contiguous().data_ptr<int64_t>(),
+      lengths2.contiguous().data_ptr<int64_t>(),
+      idxs.contiguous().data_ptr<int64_t>(),
+      grad_dists.contiguous().data_ptr<float>(),
+      grad_p1.data_ptr<float>(),
+      grad_p2.data_ptr<float>(),
+      N,
+      P1,
+      P2,
+      K,
+      D,
+      norm);
+
+  AT_CUDA_CHECK(cudaGetLastError());
+  return std::make_tuple(grad_p1, grad_p2);
+}
diff --git a/pytorch3d/pytorch3d/csrc/knn/knn.h b/pytorch3d/pytorch3d/csrc/knn/knn.h
new file mode 100644
index 0000000000000000000000000000000000000000..7fc8d48870758ccdeb6f36b74ba9afd916b2b8d0
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/knn/knn.h
@@ -0,0 +1,157 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+#include <torch/extension.h>
+#include <tuple>
+#include "utils/pytorch3d_cutils.h"
+
+// Compute indices of K nearest neighbors in pointcloud p2 to points
+// in pointcloud p1.
+//
+// Args:
+//    p1: FloatTensor of shape (N, P1, D) giving a batch of pointclouds each
+//        containing P1 points of dimension D.
+//    p2: FloatTensor of shape (N, P2, D) giving a batch of pointclouds each
+//        containing P2 points of dimension D.
+//    lengths1: LongTensor, shape (N,), giving actual length of each P1 cloud.
+//    lengths2: LongTensor, shape (N,), giving actual length of each P2 cloud.
+//    norm: int specifying the norm for the distance (1 for L1, 2 for L2)
+//    K: int giving the number of nearest points to return.
+//    version: Integer telling which implementation to use.
+//
+// Returns:
+//    p1_neighbor_idx: LongTensor of shape (N, P1, K), where
+//        p1_neighbor_idx[n, i, k] = j means that the kth nearest
+//        neighbor to p1[n, i] in the cloud p2[n] is p2[n, j].
+//        It is padded with zeros so that it can be used easily in a later
+//        gather() operation.
+//
+//    p1_neighbor_dists: FloatTensor of shape (N, P1, K) containing the squared
+//        distance from each point p1[n, p, :] to its K neighbors
+//        p2[n, p1_neighbor_idx[n, p, k], :].
+
+// CPU implementation.
+std::tuple<at::Tensor, at::Tensor> KNearestNeighborIdxCpu(
+    const at::Tensor& p1,
+    const at::Tensor& p2,
+    const at::Tensor& lengths1,
+    const at::Tensor& lengths2,
+    const int norm,
+    const int K);
+
+// CUDA implementation
+std::tuple<at::Tensor, at::Tensor> KNearestNeighborIdxCuda(
+    const at::Tensor& p1,
+    const at::Tensor& p2,
+    const at::Tensor& lengths1,
+    const at::Tensor& lengths2,
+    const int norm,
+    const int K,
+    const int version);
+
+// Implementation which is exposed.
+std::tuple<at::Tensor, at::Tensor> KNearestNeighborIdx(
+    const at::Tensor& p1,
+    const at::Tensor& p2,
+    const at::Tensor& lengths1,
+    const at::Tensor& lengths2,
+    const int norm,
+    const int K,
+    const int version) {
+  if (p1.is_cuda() || p2.is_cuda()) {
+#ifdef WITH_CUDA
+    CHECK_CUDA(p1);
+    CHECK_CUDA(p2);
+    return KNearestNeighborIdxCuda(
+        p1, p2, lengths1, lengths2, norm, K, version);
+#else
+    AT_ERROR("Not compiled with GPU support.");
+#endif
+  }
+  return KNearestNeighborIdxCpu(p1, p2, lengths1, lengths2, norm, K);
+}
+
+// Compute gradients with respect to p1 and p2
+//
+// Args:
+//    p1: FloatTensor of shape (N, P1, D) giving a batch of pointclouds each
+//        containing P1 points of dimension D.
+//    p2: FloatTensor of shape (N, P2, D) giving a batch of pointclouds each
+//        containing P2 points of dimension D.
+//    lengths1: LongTensor, shape (N,), giving actual length of each P1 cloud.
+//    lengths2: LongTensor, shape (N,), giving actual length of each P2 cloud.
+//    p1_neighbor_idx: LongTensor of shape (N, P1, K), where
+//        p1_neighbor_idx[n, i, k] = j means that the kth nearest
+//        neighbor to p1[n, i] in the cloud p2[n] is p2[n, j].
+//        It is padded with zeros so that it can be used easily in a later
+//        gather() operation. This is computed from the forward pass.
+//    norm: int specifying the norm for the distance (1 for L1, 2 for L2)
+//    grad_dists: FLoatTensor of shape (N, P1, K) which contains the input
+//        gradients.
+//
+// Returns:
+//    grad_p1: FloatTensor of shape (N, P1, D) containing the output gradients
+//        wrt p1.
+//    grad_p2: FloatTensor of shape (N, P2, D) containing the output gradients
+//        wrt p2.
+
+// CPU implementation.
+std::tuple<at::Tensor, at::Tensor> KNearestNeighborBackwardCpu(
+    const at::Tensor& p1,
+    const at::Tensor& p2,
+    const at::Tensor& lengths1,
+    const at::Tensor& lengths2,
+    const at::Tensor& idxs,
+    const int norm,
+    const at::Tensor& grad_dists);
+
+// CUDA implementation
+std::tuple<at::Tensor, at::Tensor> KNearestNeighborBackwardCuda(
+    const at::Tensor& p1,
+    const at::Tensor& p2,
+    const at::Tensor& lengths1,
+    const at::Tensor& lengths2,
+    const at::Tensor& idxs,
+    const int norm,
+    const at::Tensor& grad_dists);
+
+// Implementation which is exposed.
+std::tuple<at::Tensor, at::Tensor> KNearestNeighborBackward(
+    const at::Tensor& p1,
+    const at::Tensor& p2,
+    const at::Tensor& lengths1,
+    const at::Tensor& lengths2,
+    const at::Tensor& idxs,
+    const int norm,
+    const at::Tensor& grad_dists) {
+  if (p1.is_cuda() || p2.is_cuda()) {
+#ifdef WITH_CUDA
+    CHECK_CUDA(p1);
+    CHECK_CUDA(p2);
+    return KNearestNeighborBackwardCuda(
+        p1, p2, lengths1, lengths2, idxs, norm, grad_dists);
+#else
+    AT_ERROR("Not compiled with GPU support.");
+#endif
+  }
+  return KNearestNeighborBackwardCpu(
+      p1, p2, lengths1, lengths2, idxs, norm, grad_dists);
+}
+
+// Utility to check whether a KNN version can be used.
+//
+// Args:
+//    version: Integer in the range 0 <= version <= 3 indicating one of our
+//        KNN implementations.
+//    D: Number of dimensions for the input and query point clouds
+//    K: Number of neighbors to be found
+//
+// Returns:
+//    Whether the indicated KNN version can be used.
+bool KnnCheckVersion(int version, const int64_t D, const int64_t K);
diff --git a/pytorch3d/pytorch3d/csrc/knn/knn_cpu.cpp b/pytorch3d/pytorch3d/csrc/knn/knn_cpu.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..9e3153a6669721240c36084a3a7a563dee250a42
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/knn/knn_cpu.cpp
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <torch/extension.h>
+#include <queue>
+#include <tuple>
+
+std::tuple<at::Tensor, at::Tensor> KNearestNeighborIdxCpu(
+    const at::Tensor& p1,
+    const at::Tensor& p2,
+    const at::Tensor& lengths1,
+    const at::Tensor& lengths2,
+    const int norm,
+    const int K) {
+  const int N = p1.size(0);
+  const int P1 = p1.size(1);
+  const int D = p1.size(2);
+
+  auto long_opts = lengths1.options().dtype(torch::kInt64);
+  torch::Tensor idxs = torch::full({N, P1, K}, 0, long_opts);
+  torch::Tensor dists = torch::full({N, P1, K}, 0, p1.options());
+
+  auto p1_a = p1.accessor<float, 3>();
+  auto p2_a = p2.accessor<float, 3>();
+  auto lengths1_a = lengths1.accessor<int64_t, 1>();
+  auto lengths2_a = lengths2.accessor<int64_t, 1>();
+  auto idxs_a = idxs.accessor<int64_t, 3>();
+  auto dists_a = dists.accessor<float, 3>();
+
+  for (int n = 0; n < N; ++n) {
+    const int64_t length1 = lengths1_a[n];
+    const int64_t length2 = lengths2_a[n];
+    for (int64_t i1 = 0; i1 < length1; ++i1) {
+      // Use a priority queue to store (distance, index) tuples.
+      std::priority_queue<std::tuple<float, int>> q;
+      for (int64_t i2 = 0; i2 < length2; ++i2) {
+        float dist = 0;
+        for (int d = 0; d < D; ++d) {
+          float diff = p1_a[n][i1][d] - p2_a[n][i2][d];
+          if (norm == 1) {
+            dist += abs(diff);
+          } else { // norm is 2 (default)
+            dist += diff * diff;
+          }
+        }
+        int size = static_cast<int>(q.size());
+        if (size < K || dist < std::get<0>(q.top())) {
+          q.emplace(dist, i2);
+          if (size >= K) {
+            q.pop();
+          }
+        }
+      }
+      while (!q.empty()) {
+        auto t = q.top();
+        q.pop();
+        const int k = q.size();
+        dists_a[n][i1][k] = std::get<0>(t);
+        idxs_a[n][i1][k] = std::get<1>(t);
+      }
+    }
+  }
+  return std::make_tuple(idxs, dists);
+}
+
+// ------------------------------------------------------------- //
+//                   Backward Operators                          //
+// ------------------------------------------------------------- //
+
+std::tuple<at::Tensor, at::Tensor> KNearestNeighborBackwardCpu(
+    const at::Tensor& p1,
+    const at::Tensor& p2,
+    const at::Tensor& lengths1,
+    const at::Tensor& lengths2,
+    const at::Tensor& idxs,
+    const int norm,
+    const at::Tensor& grad_dists) {
+  const int N = p1.size(0);
+  const int P1 = p1.size(1);
+  const int D = p1.size(2);
+  const int P2 = p2.size(1);
+  const int K = idxs.size(2);
+
+  torch::Tensor grad_p1 = torch::full({N, P1, D}, 0, p1.options());
+  torch::Tensor grad_p2 = torch::full({N, P2, D}, 0, p2.options());
+
+  auto p1_a = p1.accessor<float, 3>();
+  auto p2_a = p2.accessor<float, 3>();
+  auto lengths1_a = lengths1.accessor<int64_t, 1>();
+  auto lengths2_a = lengths2.accessor<int64_t, 1>();
+  auto idxs_a = idxs.accessor<int64_t, 3>();
+  auto grad_dists_a = grad_dists.accessor<float, 3>();
+  auto grad_p1_a = grad_p1.accessor<float, 3>();
+  auto grad_p2_a = grad_p2.accessor<float, 3>();
+
+  for (int n = 0; n < N; ++n) {
+    const int64_t length1 = lengths1_a[n];
+    int64_t length2 = lengths2_a[n];
+    length2 = (length2 < K) ? length2 : K;
+    for (int64_t i1 = 0; i1 < length1; ++i1) {
+      for (int64_t k = 0; k < length2; ++k) {
+        const int64_t i2 = idxs_a[n][i1][k];
+        // If the index is the pad value of -1 then ignore it
+        if (i2 == -1) {
+          continue;
+        }
+        for (int64_t d = 0; d < D; ++d) {
+          float diff = 0.0;
+          if (norm == 1) {
+            float sign = (p1_a[n][i1][d] > p2_a[n][i2][d]) ? 1.0 : -1.0;
+            diff = grad_dists_a[n][i1][k] * sign;
+          } else { // norm is 2 (default)
+            diff = 2.0f * grad_dists_a[n][i1][k] *
+                (p1_a[n][i1][d] - p2_a[n][i2][d]);
+          }
+          grad_p1_a[n][i1][d] += diff;
+          grad_p2_a[n][i2][d] += -1.0f * diff;
+        }
+      }
+    }
+  }
+  return std::make_tuple(grad_p1, grad_p2);
+}
diff --git a/pytorch3d/pytorch3d/csrc/marching_cubes/marching_cubes.cu b/pytorch3d/pytorch3d/csrc/marching_cubes/marching_cubes.cu
new file mode 100644
index 0000000000000000000000000000000000000000..44d50934762939c1be67948621a1c27bf1d3d732
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/marching_cubes/marching_cubes.cu
@@ -0,0 +1,535 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <ATen/ATen.h>
+#include <ATen/cuda/CUDAContext.h>
+#include <c10/cuda/CUDAGuard.h>
+#include <cstdio>
+#include "marching_cubes/tables.h"
+
+/*
+Parallelized marching cubes for pytorch extension
+referenced and adapted from CUDA-Samples:
+(https://github.com/NVIDIA/cuda-samples/tree/master/Samples/5_Domain_Specific/marchingCubes)
+We divide the algorithm into two forward-passes:
+(1) The first forward-pass executes "ClassifyVoxelKernel" to
+evaluate volume scalar field for each cube and pre-compute
+two arrays -- number of vertices per cube (d_voxelVerts) and
+occupied or not per cube (d_voxelOccupied).
+
+Some prepration steps:
+With d_voxelOccupied, an exclusive scan is performed to compute
+the number of activeVoxels, which can be used to accelerate
+computation. With d_voxelVerts, another exclusive scan
+is performed to compute the accumulated sum of vertices in the 3d
+grid and totalVerts.
+
+(2) The second forward-pass calls "GenerateFacesKernel" to
+generate interpolated vertex positions and face indices by "marching
+through" each cube in the grid.
+
+*/
+
+// EPS: Used to indicate if two float values are close
+__constant__ const float EPSILON = 1e-5;
+
+// Linearly interpolate the position where an isosurface cuts an edge
+// between two vertices, based on their scalar values
+//
+// Args:
+//    isolevel: float value used as threshold
+//    p1: position of point1
+//    p2: position of point2
+//    valp1: field value for p1
+//    valp2: field value for p2
+//
+// Returns:
+//    point: interpolated verte
+//
+__device__ float3
+vertexInterp(float isolevel, float3 p1, float3 p2, float valp1, float valp2) {
+  float ratio;
+  float3 p;
+
+  if (abs(isolevel - valp1) < EPSILON) {
+    return p1;
+  } else if (abs(isolevel - valp2) < EPSILON) {
+    return p2;
+  } else if (abs(valp1 - valp2) < EPSILON) {
+    return p1;
+  }
+
+  ratio = (isolevel - valp1) / (valp2 - valp1);
+
+  p.x = p1.x * (1 - ratio) + p2.x * ratio;
+  p.y = p1.y * (1 - ratio) + p2.y * ratio;
+  p.z = p1.z * (1 - ratio) + p2.z * ratio;
+
+  return p;
+}
+
+// Determine if the triangle is degenerate
+// A triangle is degenerate when at least two of the vertices
+// share the same position.
+//
+// Args:
+//    p1: position of vertex p1
+//    p2: position of vertex p2
+//    p3: position of vertex p3
+//
+// Returns:
+//    boolean indicator if the triangle is degenerate
+__device__ bool isDegenerate(float3 p1, float3 p2, float3 p3) {
+  if ((abs(p1.x - p2.x) < EPSILON && abs(p1.y - p2.y) < EPSILON &&
+       abs(p1.z - p2.z) < EPSILON) ||
+      (abs(p2.x - p3.x) < EPSILON && abs(p2.y - p3.y) < EPSILON &&
+       abs(p2.z - p3.z) < EPSILON) ||
+      (abs(p3.x - p1.x) < EPSILON && abs(p3.y - p1.y) < EPSILON &&
+       abs(p3.z - p1.z) < EPSILON)) {
+    return true;
+  } else {
+    return false;
+  }
+}
+
+// Convert from local vertex id to global vertex id, given position
+// of the cube where the vertex resides. The function ensures vertices
+// shared from adjacent cubes are mapped to the same global id.
+
+// Args:
+//     v: local vertex id
+//     x: x position of the cube where the vertex belongs
+//     y: y position of the cube where the vertex belongs
+//     z: z position of the cube where the vertex belongs
+//     W: width of x dimension
+//     H: height of y dimension
+
+// Returns:
+//     global vertex id represented by its x/y/z offsets
+__device__ uint localToGlobal(int v, int x, int y, int z, int W, int H) {
+  const int dx = v & 1;
+  const int dy = v >> 1 & 1;
+  const int dz = v >> 2 & 1;
+  return (x + dx) + (y + dy) * W + (z + dz) * W * H;
+}
+
+// Hash_combine a pair of global vertex id to a single integer.
+//
+// Args:
+//    v1_id: global id of vertex 1
+//    v2_id: global id of vertex 2
+//    W: width of the 3d grid
+//    H: height of the 3d grid
+//    Z: depth of the 3d grid
+//
+// Returns:
+//    hashing for a pair of vertex ids
+//
+__device__ int64_t hashVpair(uint v1_id, uint v2_id, int W, int H, int D) {
+  return (int64_t)v1_id * (W + W * H + W * H * D) + (int64_t)v2_id;
+}
+
+// precompute number of vertices and occupancy
+// for each voxel in the grid.
+//
+// Args:
+//    voxelVerts: pointer to device array to store number
+//          of verts per voxel
+//    voxelOccupied: pointer to device array to store
+//          occupancy state per voxel
+//    vol: torch tensor stored with 3D scalar field
+//    isolevel: threshold to determine isosurface intersection
+//
+__global__ void ClassifyVoxelKernel(
+    at::PackedTensorAccessor32<int, 1, at::RestrictPtrTraits> voxelVerts,
+    at::PackedTensorAccessor32<int, 1, at::RestrictPtrTraits> voxelOccupied,
+    const at::PackedTensorAccessor32<float, 3, at::RestrictPtrTraits> vol,
+    // const at::PackedTensorAccessor<int, 1, at::RestrictPtrTraits>
+    // numVertsTable,
+    float isolevel) {
+  const int indexTable[8]{0, 1, 4, 5, 3, 2, 7, 6};
+  const uint D = vol.size(0) - 1;
+  const uint H = vol.size(1) - 1;
+  const uint W = vol.size(2) - 1;
+
+  // 1-d grid
+  uint id = blockIdx.x * blockDim.x + threadIdx.x;
+  uint num_threads = gridDim.x * blockDim.x;
+
+  // Table mapping from cubeindex to number of vertices in the configuration
+  const unsigned char numVertsTable[256] = {
+      0,  3,  3,  6,  3,  6,  6,  9,  3,  6,  6,  9,  6,  9,  9,  6,  3,  6,
+      6,  9,  6,  9,  9,  12, 6,  9,  9,  12, 9,  12, 12, 9,  3,  6,  6,  9,
+      6,  9,  9,  12, 6,  9,  9,  12, 9,  12, 12, 9,  6,  9,  9,  6,  9,  12,
+      12, 9,  9,  12, 12, 9,  12, 15, 15, 6,  3,  6,  6,  9,  6,  9,  9,  12,
+      6,  9,  9,  12, 9,  12, 12, 9,  6,  9,  9,  12, 9,  12, 12, 15, 9,  12,
+      12, 15, 12, 15, 15, 12, 6,  9,  9,  12, 9,  12, 6,  9,  9,  12, 12, 15,
+      12, 15, 9,  6,  9,  12, 12, 9,  12, 15, 9,  6,  12, 15, 15, 12, 15, 6,
+      12, 3,  3,  6,  6,  9,  6,  9,  9,  12, 6,  9,  9,  12, 9,  12, 12, 9,
+      6,  9,  9,  12, 9,  12, 12, 15, 9,  6,  12, 9,  12, 9,  15, 6,  6,  9,
+      9,  12, 9,  12, 12, 15, 9,  12, 12, 15, 12, 15, 15, 12, 9,  12, 12, 9,
+      12, 15, 15, 12, 12, 9,  15, 6,  15, 12, 6,  3,  6,  9,  9,  12, 9,  12,
+      12, 15, 9,  12, 12, 15, 6,  9,  9,  6,  9,  12, 12, 15, 12, 15, 15, 6,
+      12, 9,  15, 12, 9,  6,  12, 3,  9,  12, 12, 15, 12, 15, 9,  12, 12, 15,
+      15, 6,  9,  12, 6,  3,  6,  9,  9,  6,  9,  12, 6,  3,  9,  6,  12, 3,
+      6,  3,  3,  0,
+  };
+
+  for (uint tid = id; tid < D * H * W; tid += num_threads) {
+    // compute global location of the voxel
+    const int gx = tid % W;
+    const int gy = tid / W % H;
+    const int gz = tid / (W * H);
+
+    int cubeindex = 0;
+    for (int i = 0; i < 8; i++) {
+      const int dx = i & 1;
+      const int dy = i >> 1 & 1;
+      const int dz = i >> 2 & 1;
+
+      const int x = gx + dx;
+      const int y = gy + dy;
+      const int z = gz + dz;
+
+      if (vol[z][y][x] < isolevel) {
+        cubeindex |= 1 << indexTable[i];
+      }
+    }
+    // collect number of vertices for each voxel
+    unsigned char numVerts = numVertsTable[cubeindex];
+    voxelVerts[tid] = numVerts;
+    voxelOccupied[tid] = (numVerts > 0);
+  }
+}
+
+// extract compact voxel array for acceleration
+//
+// Args:
+//    compactedVoxelArray: tensor of shape (activeVoxels,) which maps
+//          from accumulated non-empty voxel index to original 3d grid index
+//    voxelOccupied: tensor of shape (numVoxels,) which stores
+//          the occupancy state per voxel
+//    voxelOccupiedScan: tensor of shape (numVoxels,) which
+//          stores the accumulated occupied voxel counts
+//    numVoxels: number of total voxels in the grid
+//
+__global__ void CompactVoxelsKernel(
+    at::PackedTensorAccessor32<int, 1, at::RestrictPtrTraits>
+        compactedVoxelArray,
+    const at::PackedTensorAccessor32<int, 1, at::RestrictPtrTraits>
+        voxelOccupied,
+    const at::PackedTensorAccessor32<int64_t, 1, at::RestrictPtrTraits>
+        voxelOccupiedScan,
+    uint numVoxels) {
+  uint id = blockIdx.x * blockDim.x + threadIdx.x;
+  uint num_threads = gridDim.x * blockDim.x;
+  for (uint tid = id; tid < numVoxels; tid += num_threads) {
+    if (voxelOccupied[tid]) {
+      compactedVoxelArray[voxelOccupiedScan[tid]] = tid;
+    }
+  }
+}
+
+// generate triangles for each voxel using marching cubes
+//
+// Args:
+//    verts: torch tensor of shape (V, 3) to store interpolated mesh vertices
+//    faces: torch tensor of shape (F, 3) to store indices for mesh faces
+//    ids: torch tensor of shape (V) to store id of each vertex
+//    compactedVoxelArray: tensor of shape (activeVoxels,) which stores
+//          non-empty voxel index.
+//    numVertsScanned: tensor of shape (numVoxels,) which stores accumulated
+//          vertices count in the voxel
+//    activeVoxels: number of active voxels used for acceleration
+//    vol: torch tensor stored with 3D scalar field
+//    isolevel: threshold to determine isosurface intersection
+//
+__global__ void GenerateFacesKernel(
+    at::PackedTensorAccessor32<float, 2, at::RestrictPtrTraits> verts,
+    at::PackedTensorAccessor<int64_t, 2, at::RestrictPtrTraits> faces,
+    at::PackedTensorAccessor<int64_t, 1, at::RestrictPtrTraits> ids,
+    at::PackedTensorAccessor32<int, 1, at::RestrictPtrTraits>
+        compactedVoxelArray,
+    at::PackedTensorAccessor32<int64_t, 1, at::RestrictPtrTraits>
+        numVertsScanned,
+    const uint activeVoxels,
+    const at::PackedTensorAccessor32<float, 3, at::RestrictPtrTraits> vol,
+    const at::PackedTensorAccessor32<int, 2, at::RestrictPtrTraits> faceTable,
+    // const at::PackedTensorAccessor32<int, 1, at::RestrictPtrTraits>
+    // numVertsTable,
+    const float isolevel) {
+  uint id = blockIdx.x * blockDim.x + threadIdx.x;
+  uint num_threads = gridDim.x * blockDim.x;
+  const int faces_size = faces.size(0);
+  // Table mapping each edge to the corresponding cube vertices offsets
+  const int edgeToVertsTable[12][2] = {
+      {0, 1},
+      {1, 5},
+      {4, 5},
+      {0, 4},
+      {2, 3},
+      {3, 7},
+      {6, 7},
+      {2, 6},
+      {0, 2},
+      {1, 3},
+      {5, 7},
+      {4, 6},
+  };
+
+  // Table mapping from cubeindex to number of vertices in the configuration
+  const unsigned char numVertsTable[256] = {
+      0,  3,  3,  6,  3,  6,  6,  9,  3,  6,  6,  9,  6,  9,  9,  6,  3,  6,
+      6,  9,  6,  9,  9,  12, 6,  9,  9,  12, 9,  12, 12, 9,  3,  6,  6,  9,
+      6,  9,  9,  12, 6,  9,  9,  12, 9,  12, 12, 9,  6,  9,  9,  6,  9,  12,
+      12, 9,  9,  12, 12, 9,  12, 15, 15, 6,  3,  6,  6,  9,  6,  9,  9,  12,
+      6,  9,  9,  12, 9,  12, 12, 9,  6,  9,  9,  12, 9,  12, 12, 15, 9,  12,
+      12, 15, 12, 15, 15, 12, 6,  9,  9,  12, 9,  12, 6,  9,  9,  12, 12, 15,
+      12, 15, 9,  6,  9,  12, 12, 9,  12, 15, 9,  6,  12, 15, 15, 12, 15, 6,
+      12, 3,  3,  6,  6,  9,  6,  9,  9,  12, 6,  9,  9,  12, 9,  12, 12, 9,
+      6,  9,  9,  12, 9,  12, 12, 15, 9,  6,  12, 9,  12, 9,  15, 6,  6,  9,
+      9,  12, 9,  12, 12, 15, 9,  12, 12, 15, 12, 15, 15, 12, 9,  12, 12, 9,
+      12, 15, 15, 12, 12, 9,  15, 6,  15, 12, 6,  3,  6,  9,  9,  12, 9,  12,
+      12, 15, 9,  12, 12, 15, 6,  9,  9,  6,  9,  12, 12, 15, 12, 15, 15, 6,
+      12, 9,  15, 12, 9,  6,  12, 3,  9,  12, 12, 15, 12, 15, 9,  12, 12, 15,
+      15, 6,  9,  12, 6,  3,  6,  9,  9,  6,  9,  12, 6,  3,  9,  6,  12, 3,
+      6,  3,  3,  0,
+  };
+
+  for (uint tid = id; tid < activeVoxels; tid += num_threads) {
+    uint voxel = compactedVoxelArray[tid]; // maps from accumulated id to
+                                           // original 3d voxel id
+    // mapping from offsets to vi index
+    int indexTable[8]{0, 1, 4, 5, 3, 2, 7, 6};
+    // field value for each vertex
+    float val[8];
+    // position for each vertex
+    float3 p[8];
+    // 3d address
+    const uint D = vol.size(0) - 1;
+    const uint H = vol.size(1) - 1;
+    const uint W = vol.size(2) - 1;
+
+    const int gx = voxel % W;
+    const int gy = voxel / W % H;
+    const int gz = voxel / (W * H);
+
+    // recalculate cubeindex;
+    uint cubeindex = 0;
+    for (int i = 0; i < 8; i++) {
+      const int dx = i & 1;
+      const int dy = i >> 1 & 1;
+      const int dz = i >> 2 & 1;
+
+      const int x = gx + dx;
+      const int y = gy + dy;
+      const int z = gz + dz;
+
+      if (vol[z][y][x] < isolevel) {
+        cubeindex |= 1 << indexTable[i];
+      }
+      val[indexTable[i]] = vol[z][y][x]; // maps from vi to volume
+      p[indexTable[i]] = make_float3(x, y, z); // maps from vi to position
+    }
+
+    // Interpolate vertices where the surface intersects the cube
+    float3 vertlist[12];
+    vertlist[0] = vertexInterp(isolevel, p[0], p[1], val[0], val[1]);
+    vertlist[1] = vertexInterp(isolevel, p[1], p[2], val[1], val[2]);
+    vertlist[2] = vertexInterp(isolevel, p[3], p[2], val[3], val[2]);
+    vertlist[3] = vertexInterp(isolevel, p[0], p[3], val[0], val[3]);
+
+    vertlist[4] = vertexInterp(isolevel, p[4], p[5], val[4], val[5]);
+    vertlist[5] = vertexInterp(isolevel, p[5], p[6], val[5], val[6]);
+    vertlist[6] = vertexInterp(isolevel, p[7], p[6], val[7], val[6]);
+    vertlist[7] = vertexInterp(isolevel, p[4], p[7], val[4], val[7]);
+
+    vertlist[8] = vertexInterp(isolevel, p[0], p[4], val[0], val[4]);
+    vertlist[9] = vertexInterp(isolevel, p[1], p[5], val[1], val[5]);
+    vertlist[10] = vertexInterp(isolevel, p[2], p[6], val[2], val[6]);
+    vertlist[11] = vertexInterp(isolevel, p[3], p[7], val[3], val[7]);
+
+    // output triangle faces
+    uint numVerts = numVertsTable[cubeindex];
+
+    for (int i = 0; i < numVerts; i++) {
+      int index = numVertsScanned[voxel] + i;
+      unsigned char edge = faceTable[cubeindex][i];
+
+      uint v1 = edgeToVertsTable[edge][0];
+      uint v2 = edgeToVertsTable[edge][1];
+      uint v1_id = localToGlobal(v1, gx, gy, gz, W + 1, H + 1);
+      uint v2_id = localToGlobal(v2, gx, gy, gz, W + 1, H + 1);
+      int64_t edge_id = hashVpair(v1_id, v2_id, W + 1, H + 1, D + 1);
+
+      verts[index][0] = vertlist[edge].x;
+      verts[index][1] = vertlist[edge].y;
+      verts[index][2] = vertlist[edge].z;
+
+      if (index < faces_size) {
+        faces[index][0] = index * 3 + 0;
+        faces[index][1] = index * 3 + 1;
+        faces[index][2] = index * 3 + 2;
+      }
+
+      ids[index] = edge_id;
+    }
+  } // end for grid-strided kernel
+}
+
+// Entrance for marching cubes cuda extension. Marching Cubes is an algorithm to
+// create triangle meshes from an implicit function (one of the form f(x, y, z)
+// = 0). It works by iteratively checking a grid of cubes superimposed over a
+// region of the function. The number of faces and positions of the vertices in
+// each cube are determined by the the isolevel as well as the volume values
+// from the eight vertices of the cube.
+//
+// We implement this algorithm with two forward passes where the first pass
+// checks the occupancy and collects number of vertices for each cube. The
+// second pass will skip empty voxels and generate vertices as well as faces for
+// each cube through table lookup. The vertex positions, faces and identifiers
+// for each vertex will be returned.
+//
+//
+// Args:
+//    vol: torch tensor of shape (D, H, W) for volume scalar field
+//    isolevel: threshold to determine isosurface intesection
+//
+// Returns:
+//     tuple of <verts, faces, ids>: which stores vertex positions, face
+//         indices and integer identifiers for each vertex.
+//    verts: (N_verts, 3) FloatTensor for vertex positions
+//    faces: (N_faces, 3) LongTensor of face indices
+//    ids: (N_verts,) LongTensor used to identify each vertex. Vertices from
+//         adjacent edges can share the same 3d position. To reduce memory
+//         redudancy, we tag each vertex with a unique id for deduplication. In
+//         contrast to deduping on vertices, this has the benefit to avoid
+//         floating point precision issues.
+//
+std::tuple<at::Tensor, at::Tensor, at::Tensor> MarchingCubesCuda(
+    const at::Tensor& vol,
+    const float isolevel) {
+  // Set the device for the kernel launch based on the device of vol
+  at::cuda::CUDAGuard device_guard(vol.device());
+  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+
+  // transfer _FACE_TABLE data to device
+  at::Tensor face_table_tensor = at::zeros(
+      {256, 16}, at::TensorOptions().dtype(at::kInt).device(at::kCPU));
+  auto face_table_a = face_table_tensor.accessor<int, 2>();
+  for (int i = 0; i < 256; i++) {
+    for (int j = 0; j < 16; j++) {
+      face_table_a[i][j] = _FACE_TABLE[i][j];
+    }
+  }
+  at::Tensor faceTable = face_table_tensor.to(vol.device());
+
+  // get numVoxels
+  int threads = 128;
+  const uint D = vol.size(0);
+  const uint H = vol.size(1);
+  const uint W = vol.size(2);
+  const int numVoxels = (D - 1) * (H - 1) * (W - 1);
+  dim3 grid((numVoxels + threads - 1) / threads, 1, 1);
+  if (grid.x > 65535) {
+    grid.x = 65535;
+  }
+
+  using at::indexing::None;
+  using at::indexing::Slice;
+
+  auto d_voxelVerts =
+      at::zeros({numVoxels + 1}, at::TensorOptions().dtype(at::kInt))
+          .to(vol.device());
+  auto d_voxelVerts_ = d_voxelVerts.index({Slice(1, None)});
+  auto d_voxelOccupied =
+      at::zeros({numVoxels + 1}, at::TensorOptions().dtype(at::kInt))
+          .to(vol.device());
+  auto d_voxelOccupied_ = d_voxelOccupied.index({Slice(1, None)});
+
+  // Execute "ClassifyVoxelKernel" kernel to precompute
+  // two arrays - d_voxelOccupied and d_voxelVertices to global memory,
+  // which stores the occupancy state and number of voxel vertices per voxel.
+  ClassifyVoxelKernel<<<grid, threads, 0, stream>>>(
+      d_voxelVerts_.packed_accessor32<int, 1, at::RestrictPtrTraits>(),
+      d_voxelOccupied_.packed_accessor32<int, 1, at::RestrictPtrTraits>(),
+      vol.packed_accessor32<float, 3, at::RestrictPtrTraits>(),
+      isolevel);
+  AT_CUDA_CHECK(cudaGetLastError());
+  cudaDeviceSynchronize();
+
+  // Scan "d_voxelOccupied" array to generate accumulated voxel occupancy
+  // count for voxels in the grid and compute the number of active voxels.
+  // If the number of active voxels is 0, return zero tensor for verts and
+  // faces.
+
+  auto d_voxelOccupiedScan = at::cumsum(d_voxelOccupied, 0);
+  auto d_voxelOccupiedScan_ = d_voxelOccupiedScan.index({Slice(1, None)});
+
+  // number of active voxels
+  int64_t activeVoxels = d_voxelOccupiedScan[numVoxels].cpu().item<int64_t>();
+
+  const int device_id = vol.device().index();
+  auto opt = at::TensorOptions().dtype(at::kInt).device(at::kCUDA, device_id);
+  auto opt_long =
+      at::TensorOptions().dtype(at::kLong).device(at::kCUDA, device_id);
+
+  if (activeVoxels == 0) {
+    int ntris = 0;
+    at::Tensor verts = at::zeros({ntris * 3, 3}, vol.options());
+    at::Tensor faces = at::zeros({ntris, 3}, opt_long);
+    at::Tensor ids = at::zeros({ntris}, opt_long);
+    return std::make_tuple(verts, faces, ids);
+  }
+
+  // Execute "CompactVoxelsKernel" kernel to compress voxels for accleration.
+  // This allows us to run triangle generation on only the occupied voxels.
+  auto d_compVoxelArray = at::zeros({activeVoxels}, opt);
+  CompactVoxelsKernel<<<grid, threads, 0, stream>>>(
+      d_compVoxelArray.packed_accessor32<int, 1, at::RestrictPtrTraits>(),
+      d_voxelOccupied.packed_accessor32<int, 1, at::RestrictPtrTraits>(),
+      d_voxelOccupiedScan_
+          .packed_accessor32<int64_t, 1, at::RestrictPtrTraits>(),
+      numVoxels);
+  AT_CUDA_CHECK(cudaGetLastError());
+  cudaDeviceSynchronize();
+
+  // Scan d_voxelVerts array to generate offsets of vertices for each voxel
+  auto d_voxelVertsScan = at::cumsum(d_voxelVerts, 0);
+  auto d_voxelVertsScan_ = d_voxelVertsScan.index({Slice(1, None)});
+
+  // total number of vertices
+  int64_t totalVerts = d_voxelVertsScan[numVoxels].cpu().item<int64_t>();
+
+  // Execute "GenerateFacesKernel" kernel
+  // This runs only on the occupied voxels.
+  // It looks up the field values and generates the triangle data.
+  at::Tensor verts = at::zeros({totalVerts, 3}, vol.options());
+  at::Tensor faces = at::zeros({totalVerts / 3, 3}, opt_long);
+
+  at::Tensor ids = at::zeros({totalVerts}, opt_long);
+
+  dim3 grid2((activeVoxels + threads - 1) / threads, 1, 1);
+  if (grid2.x > 65535) {
+    grid2.x = 65535;
+  }
+
+  GenerateFacesKernel<<<grid2, threads, 0, stream>>>(
+      verts.packed_accessor32<float, 2, at::RestrictPtrTraits>(),
+      faces.packed_accessor<int64_t, 2, at::RestrictPtrTraits>(),
+      ids.packed_accessor<int64_t, 1, at::RestrictPtrTraits>(),
+      d_compVoxelArray.packed_accessor32<int, 1, at::RestrictPtrTraits>(),
+      d_voxelVertsScan_.packed_accessor32<int64_t, 1, at::RestrictPtrTraits>(),
+      activeVoxels,
+      vol.packed_accessor32<float, 3, at::RestrictPtrTraits>(),
+      faceTable.packed_accessor32<int, 2, at::RestrictPtrTraits>(),
+      isolevel);
+  AT_CUDA_CHECK(cudaGetLastError());
+
+  return std::make_tuple(verts, faces, ids);
+}
diff --git a/pytorch3d/pytorch3d/csrc/marching_cubes/marching_cubes.h b/pytorch3d/pytorch3d/csrc/marching_cubes/marching_cubes.h
new file mode 100644
index 0000000000000000000000000000000000000000..51c660b18076014fd8717ef57eb93af328785f56
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/marching_cubes/marching_cubes.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+#include <torch/extension.h>
+#include <tuple>
+#include <vector>
+#include "utils/pytorch3d_cutils.h"
+
+// Run Marching Cubes algorithm over a batch of volume scalar fields
+// with a pre-defined threshold and return a mesh composed of vertices
+// and faces for the mesh.
+//
+// Args:
+//    vol: FloatTensor of shape (D, H, W) giving a volume
+//    scalar grids.
+//    isolevel: isosurface value to use as the threshoold to determine whether
+//    the points are within a volume.
+//
+// Returns:
+//    vertices: (N_verts, 3) FloatTensor of vertices
+//    faces: (N_faces, 3) LongTensor of faces
+//    ids: (N_verts,) LongTensor used to identify each vertex and deduplication
+//         to avoid floating point precision issues.
+//         For Cuda, will be used to dedupe redundant vertices.
+//         For cpp implementation, this tensor is just a placeholder.
+
+// CPU implementation
+std::tuple<at::Tensor, at::Tensor, at::Tensor> MarchingCubesCpu(
+    const at::Tensor& vol,
+    const float isolevel);
+
+// CUDA implementation
+std::tuple<at::Tensor, at::Tensor, at::Tensor> MarchingCubesCuda(
+    const at::Tensor& vol,
+    const float isolevel);
+
+// Implementation which is exposed
+inline std::tuple<at::Tensor, at::Tensor, at::Tensor> MarchingCubes(
+    const at::Tensor& vol,
+    const float isolevel) {
+  if (vol.is_cuda()) {
+#ifdef WITH_CUDA
+    CHECK_CUDA(vol);
+    const int D = vol.size(0);
+    const int H = vol.size(1);
+    const int W = vol.size(2);
+    if (D > 1024 || H > 1024 || W > 1024) {
+      AT_ERROR("Maximum volume size allowed 1K x 1K x 1K");
+    }
+    return MarchingCubesCuda(vol.contiguous(), isolevel);
+#else
+    AT_ERROR("Not compiled with GPU support.");
+#endif
+  }
+  return MarchingCubesCpu(vol.contiguous(), isolevel);
+}
diff --git a/pytorch3d/pytorch3d/csrc/marching_cubes/marching_cubes_cpu.cpp b/pytorch3d/pytorch3d/csrc/marching_cubes/marching_cubes_cpu.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..5d68db5070c4ff3b24bbba61aea20ada95b7f9ca
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/marching_cubes/marching_cubes_cpu.cpp
@@ -0,0 +1,111 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <torch/extension.h>
+#include <algorithm>
+#include <array>
+#include <cstring>
+#include <unordered_map>
+#include <vector>
+#include "marching_cubes/marching_cubes_utils.h"
+#include "marching_cubes/tables.h"
+
+// Cpu implementation for Marching Cubes
+// Args:
+//    vol: a Tensor of size (D, H, W) corresponding to a 3D scalar field
+//    isolevel: the isosurface value to use as the threshold to determine
+//          whether points are within a volume.
+//
+// Returns:
+//    vertices: a float tensor of shape (N_verts, 3) for positions of the mesh
+//    faces: a long tensor of shape (N_faces, 3) for indices of the face
+//    ids: a long tensor of shape (N_verts) as placeholder
+//
+std::tuple<at::Tensor, at::Tensor, at::Tensor> MarchingCubesCpu(
+    const at::Tensor& vol,
+    const float isolevel) {
+  // volume shapes
+  const int D = vol.size(0);
+  const int H = vol.size(1);
+  const int W = vol.size(2);
+
+  // Create tensor accessors
+  auto vol_a = vol.accessor<float, 3>();
+  // edge_id_to_v maps from an edge id to a vertex position
+  std::unordered_map<int64_t, Vertex> edge_id_to_v;
+  // uniq_edge_id: used to remove redundant edge ids
+  std::unordered_map<int64_t, int64_t> uniq_edge_id;
+  std::vector<int64_t> faces; // store face indices
+  std::vector<Vertex> verts; // store vertex positions
+  // enumerate each cell in the 3d grid
+  for (int z = 0; z < D - 1; z++) {
+    for (int y = 0; y < H - 1; y++) {
+      for (int x = 0; x < W - 1; x++) {
+        Cube cube(x, y, z, vol_a, isolevel);
+        // Cube is entirely in/out of the surface
+        if (_FACE_TABLE[cube.cubeindex][0] == 255) {
+          continue;
+        }
+        // store all boundary vertices that intersect with the edges
+        std::array<Vertex, 12> interp_points;
+        // triangle vertex IDs and positions
+        std::vector<int64_t> tri;
+        std::vector<Vertex> ps;
+
+        // Interpolate the vertices where the surface intersects with the cube
+        for (int j = 0; _FACE_TABLE[cube.cubeindex][j] != 255; j++) {
+          const int e = _FACE_TABLE[cube.cubeindex][j];
+          interp_points[e] = cube.VertexInterp(isolevel, e, vol_a);
+
+          int64_t edge = cube.HashVpair(e, W, H, D);
+          tri.push_back(edge);
+          ps.push_back(interp_points[e]);
+
+          // Check if the triangle face is degenerate. A triangle face
+          // is degenerate if any of the two verices share the same 3D position
+          if ((j + 1) % 3 == 0 && ps[0] != ps[1] && ps[1] != ps[2] &&
+              ps[2] != ps[0]) {
+            for (int k = 0; k < 3; k++) {
+              int v = tri[k];
+              edge_id_to_v[tri.at(k)] = ps.at(k);
+              if (!uniq_edge_id.count(v)) {
+                uniq_edge_id[v] = verts.size();
+                verts.push_back(edge_id_to_v[v]);
+              }
+              faces.push_back(uniq_edge_id[v]);
+            }
+            tri.clear();
+            ps.clear();
+          } // endif
+        } // endfor edge enumeration
+      } // endfor x
+    } // endfor y
+  } // endfor z
+  // Collect returning tensor
+  const int n_vertices = verts.size();
+  const int64_t n_faces = (int64_t)faces.size() / 3;
+  auto vert_tensor = torch::zeros({n_vertices, 3}, torch::kFloat);
+  auto id_tensor = torch::zeros({n_vertices}, torch::kInt64); // placeholder
+  auto face_tensor = torch::zeros({n_faces, 3}, torch::kInt64);
+
+  auto vert_a = vert_tensor.accessor<float, 2>();
+  for (int i = 0; i < n_vertices; i++) {
+    vert_a[i][0] = verts.at(i).x;
+    vert_a[i][1] = verts.at(i).y;
+    vert_a[i][2] = verts.at(i).z;
+  }
+
+  auto face_a = face_tensor.accessor<int64_t, 2>();
+  for (int64_t i = 0; i < n_faces; i++) {
+    face_a[i][0] = faces.at(i * 3 + 0);
+    face_a[i][1] = faces.at(i * 3 + 1);
+    face_a[i][2] = faces.at(i * 3 + 2);
+  }
+
+  return std::make_tuple(vert_tensor, face_tensor, id_tensor);
+}
diff --git a/pytorch3d/pytorch3d/csrc/marching_cubes/marching_cubes_utils.h b/pytorch3d/pytorch3d/csrc/marching_cubes/marching_cubes_utils.h
new file mode 100644
index 0000000000000000000000000000000000000000..486e0339eda613f2886bb3165a0fde1d0a5d6bf7
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/marching_cubes/marching_cubes_utils.h
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+#include <torch/extension.h>
+#include <cmath>
+#include <cstdint>
+#include <vector>
+#include "ATen/core/TensorAccessor.h"
+#include "marching_cubes/tables.h"
+
+// EPS: Used to assess whether two float values are close
+const float EPS = 1e-5;
+
+// Data structures for the marching cubes
+struct Vertex {
+  // Constructor used when performing marching cube in each cell
+  explicit Vertex(float x = 0.0f, float y = 0.0f, float z = 0.0f)
+      : x(x), y(y), z(z) {}
+
+  // The */+ operator overrides are used for vertex interpolation
+  Vertex operator*(float s) const {
+    return Vertex(x * s, y * s, z * s);
+  }
+  Vertex operator+(const Vertex& xyz) const {
+    return Vertex(x + xyz.x, y + xyz.y, z + xyz.z);
+  }
+  // The =/!= operator overrides is used for checking degenerate triangles
+  bool operator==(const Vertex& xyz) const {
+    return (
+        std::abs(x - xyz.x) < EPS && std::abs(y - xyz.y) < EPS &&
+        std::abs(z - xyz.z) < EPS);
+  }
+  bool operator!=(const Vertex& xyz) const {
+    return (
+        std::abs(x - xyz.x) >= EPS || std::abs(y - xyz.y) >= EPS ||
+        std::abs(z - xyz.z) >= EPS);
+  }
+  // vertex position
+  float x, y, z;
+};
+
+struct Cube {
+  // Edge and vertex convention:
+  //                     v4_______e4____________v5
+  //                     /|                    /|
+  //                    / |                   / |
+  //                 e7/  |                e5/  |
+  //                  /___|______e6_________/   |
+  //               v7|    |                 |v6 |e9
+  //                 |    |                 |   |
+  //                 |    |e8               |e10|
+  //              e11|    |                 |   |
+  //                 |    |_________________|___|
+  //                 |   / v0      e0       |   /v1
+  //                 |  /                   |  /
+  //                 | /e3                  | /e1
+  //                 |/_____________________|/
+  //                 v3         e2          v2
+
+  Vertex p[8];
+  int x, y, z;
+  int cubeindex = 0;
+  Cube(
+      int x,
+      int y,
+      int z,
+      const at::TensorAccessor<float, 3>& vol_a,
+      const float isolevel)
+      : x(x), y(y), z(z) {
+    // vertex position (x, y, z) for v0-v1-v4-v5-v3-v2-v7-v6
+    for (int v = 0; v < 8; v++) {
+      p[v] = Vertex(x + (v & 1), y + (v >> 1 & 1), z + (v >> 2 & 1));
+    }
+    // Calculates cube configuration index given values of the cube vertices
+    for (int i = 0; i < 8; i++) {
+      const int idx = _INDEX_TABLE[i];
+      Vertex v = p[idx];
+      if (vol_a[v.z][v.y][v.x] < isolevel) {
+        cubeindex |= (1 << i);
+      }
+    }
+  }
+
+  // Linearly interpolate the position where an isosurface cuts an edge
+  // between two vertices, based on their scalar values
+  //
+  // Args:
+  //    isolevel: float value used as threshold
+  //    edge: edge (ID) to interpolate
+  //    cube: current cube vertices
+  //    vol_a: 3D scalar field
+  //
+  // Returns:
+  //    point: interpolated vertex
+  Vertex VertexInterp(
+      float isolevel,
+      const int edge,
+      const at::TensorAccessor<float, 3>& vol_a) {
+    const int v1 = _EDGE_TO_VERTICES[edge][0];
+    const int v2 = _EDGE_TO_VERTICES[edge][1];
+    Vertex p1 = p[v1];
+    Vertex p2 = p[v2];
+    float val1 = vol_a[p1.z][p1.y][p1.x];
+    float val2 = vol_a[p2.z][p2.y][p2.x];
+
+    float ratio = 1.0f;
+    if (std::abs(isolevel - val1) < EPS) {
+      return p1;
+    } else if (std::abs(isolevel - val2) < EPS) {
+      return p2;
+    } else if (std::abs(val1 - val2) < EPS) {
+      return p1;
+    }
+    // interpolate vertex p based on two vertices on the edge
+    ratio = (isolevel - val1) / (val2 - val1);
+    return p1 * (1 - ratio) + p2 * ratio;
+  }
+
+  // Hash an edge into a global edge_id. The function binds an
+  // edge with an integer to address floating point precision issue.
+  //
+  // Args:
+  //    v1_id: global id of vertex 1
+  //    v2_id: global id of vertex 2
+  //    W: width of the 3d grid
+  //    H: height of the 3d grid
+  //    D: depth of the 3d grid
+  //
+  // Returns:
+  //    hashing for a pair of vertex ids
+  //
+  int64_t HashVpair(const int edge, int W, int H, int D) {
+    const int v1 = _EDGE_TO_VERTICES[edge][0];
+    const int v2 = _EDGE_TO_VERTICES[edge][1];
+    const int v1_id = p[v1].x + p[v1].y * W + p[v1].z * W * H;
+    const int v2_id = p[v2].x + p[v2].y * W + p[v2].z * W * H;
+    return (int64_t)v1_id * (W + W * H + W * H * D) + (int64_t)v2_id;
+  }
+};
diff --git a/pytorch3d/pytorch3d/csrc/marching_cubes/tables.h b/pytorch3d/pytorch3d/csrc/marching_cubes/tables.h
new file mode 100644
index 0000000000000000000000000000000000000000..3aff617c53e5ba963c014cabd7beb1c2cd61a053
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/marching_cubes/tables.h
@@ -0,0 +1,294 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+using uint = unsigned int;
+
+// A table mapping from cubeindex to a list of face configurations.
+// Each list contains at most 5 faces, where each face is represented with
+// 3 consecutive numbers
+// Table adapted from http://paulbourke.net/geometry/polygonise/
+//
+#define X 255
+const unsigned char _FACE_TABLE[256][16] = {
+    {X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X},
+    {0, 8, 3, X, X, X, X, X, X, X, X, X, X, X, X, X},
+    {0, 1, 9, X, X, X, X, X, X, X, X, X, X, X, X, X},
+    {1, 8, 3, 9, 8, 1, X, X, X, X, X, X, X, X, X, X},
+    {1, 2, 10, X, X, X, X, X, X, X, X, X, X, X, X, X},
+    {0, 8, 3, 1, 2, 10, X, X, X, X, X, X, X, X, X, X},
+    {9, 2, 10, 0, 2, 9, X, X, X, X, X, X, X, X, X, X},
+    {2, 8, 3, 2, 10, 8, 10, 9, 8, X, X, X, X, X, X, X},
+    {3, 11, 2, X, X, X, X, X, X, X, X, X, X, X, X, X},
+    {0, 11, 2, 8, 11, 0, X, X, X, X, X, X, X, X, X, X},
+    {1, 9, 0, 2, 3, 11, X, X, X, X, X, X, X, X, X, X},
+    {1, 11, 2, 1, 9, 11, 9, 8, 11, X, X, X, X, X, X, X},
+    {3, 10, 1, 11, 10, 3, X, X, X, X, X, X, X, X, X, X},
+    {0, 10, 1, 0, 8, 10, 8, 11, 10, X, X, X, X, X, X, X},
+    {3, 9, 0, 3, 11, 9, 11, 10, 9, X, X, X, X, X, X, X},
+    {9, 8, 10, 10, 8, 11, X, X, X, X, X, X, X, X, X, X},
+    {4, 7, 8, X, X, X, X, X, X, X, X, X, X, X, X, X},
+    {4, 3, 0, 7, 3, 4, X, X, X, X, X, X, X, X, X, X},
+    {0, 1, 9, 8, 4, 7, X, X, X, X, X, X, X, X, X, X},
+    {4, 1, 9, 4, 7, 1, 7, 3, 1, X, X, X, X, X, X, X},
+    {1, 2, 10, 8, 4, 7, X, X, X, X, X, X, X, X, X, X},
+    {3, 4, 7, 3, 0, 4, 1, 2, 10, X, X, X, X, X, X, X},
+    {9, 2, 10, 9, 0, 2, 8, 4, 7, X, X, X, X, X, X, X},
+    {2, 10, 9, 2, 9, 7, 2, 7, 3, 7, 9, 4, X, X, X, X},
+    {8, 4, 7, 3, 11, 2, X, X, X, X, X, X, X, X, X, X},
+    {11, 4, 7, 11, 2, 4, 2, 0, 4, X, X, X, X, X, X, X},
+    {9, 0, 1, 8, 4, 7, 2, 3, 11, X, X, X, X, X, X, X},
+    {4, 7, 11, 9, 4, 11, 9, 11, 2, 9, 2, 1, X, X, X, X},
+    {3, 10, 1, 3, 11, 10, 7, 8, 4, X, X, X, X, X, X, X},
+    {1, 11, 10, 1, 4, 11, 1, 0, 4, 7, 11, 4, X, X, X, X},
+    {4, 7, 8, 9, 0, 11, 9, 11, 10, 11, 0, 3, X, X, X, X},
+    {4, 7, 11, 4, 11, 9, 9, 11, 10, X, X, X, X, X, X, X},
+    {9, 5, 4, X, X, X, X, X, X, X, X, X, X, X, X, X},
+    {9, 5, 4, 0, 8, 3, X, X, X, X, X, X, X, X, X, X},
+    {0, 5, 4, 1, 5, 0, X, X, X, X, X, X, X, X, X, X},
+    {8, 5, 4, 8, 3, 5, 3, 1, 5, X, X, X, X, X, X, X},
+    {1, 2, 10, 9, 5, 4, X, X, X, X, X, X, X, X, X, X},
+    {3, 0, 8, 1, 2, 10, 4, 9, 5, X, X, X, X, X, X, X},
+    {5, 2, 10, 5, 4, 2, 4, 0, 2, X, X, X, X, X, X, X},
+    {2, 10, 5, 3, 2, 5, 3, 5, 4, 3, 4, 8, X, X, X, X},
+    {9, 5, 4, 2, 3, 11, X, X, X, X, X, X, X, X, X, X},
+    {0, 11, 2, 0, 8, 11, 4, 9, 5, X, X, X, X, X, X, X},
+    {0, 5, 4, 0, 1, 5, 2, 3, 11, X, X, X, X, X, X, X},
+    {2, 1, 5, 2, 5, 8, 2, 8, 11, 4, 8, 5, X, X, X, X},
+    {10, 3, 11, 10, 1, 3, 9, 5, 4, X, X, X, X, X, X, X},
+    {4, 9, 5, 0, 8, 1, 8, 10, 1, 8, 11, 10, X, X, X, X},
+    {5, 4, 0, 5, 0, 11, 5, 11, 10, 11, 0, 3, X, X, X, X},
+    {5, 4, 8, 5, 8, 10, 10, 8, 11, X, X, X, X, X, X, X},
+    {9, 7, 8, 5, 7, 9, X, X, X, X, X, X, X, X, X, X},
+    {9, 3, 0, 9, 5, 3, 5, 7, 3, X, X, X, X, X, X, X},
+    {0, 7, 8, 0, 1, 7, 1, 5, 7, X, X, X, X, X, X, X},
+    {1, 5, 3, 3, 5, 7, X, X, X, X, X, X, X, X, X, X},
+    {9, 7, 8, 9, 5, 7, 10, 1, 2, X, X, X, X, X, X, X},
+    {10, 1, 2, 9, 5, 0, 5, 3, 0, 5, 7, 3, X, X, X, X},
+    {8, 0, 2, 8, 2, 5, 8, 5, 7, 10, 5, 2, X, X, X, X},
+    {2, 10, 5, 2, 5, 3, 3, 5, 7, X, X, X, X, X, X, X},
+    {7, 9, 5, 7, 8, 9, 3, 11, 2, X, X, X, X, X, X, X},
+    {9, 5, 7, 9, 7, 2, 9, 2, 0, 2, 7, 11, X, X, X, X},
+    {2, 3, 11, 0, 1, 8, 1, 7, 8, 1, 5, 7, X, X, X, X},
+    {11, 2, 1, 11, 1, 7, 7, 1, 5, X, X, X, X, X, X, X},
+    {9, 5, 8, 8, 5, 7, 10, 1, 3, 10, 3, 11, X, X, X, X},
+    {5, 7, 0, 5, 0, 9, 7, 11, 0, 1, 0, 10, 11, 10, 0, X},
+    {11, 10, 0, 11, 0, 3, 10, 5, 0, 8, 0, 7, 5, 7, 0, X},
+    {11, 10, 5, 7, 11, 5, X, X, X, X, X, X, X, X, X, X},
+    {10, 6, 5, X, X, X, X, X, X, X, X, X, X, X, X, X},
+    {0, 8, 3, 5, 10, 6, X, X, X, X, X, X, X, X, X, X},
+    {9, 0, 1, 5, 10, 6, X, X, X, X, X, X, X, X, X, X},
+    {1, 8, 3, 1, 9, 8, 5, 10, 6, X, X, X, X, X, X, X},
+    {1, 6, 5, 2, 6, 1, X, X, X, X, X, X, X, X, X, X},
+    {1, 6, 5, 1, 2, 6, 3, 0, 8, X, X, X, X, X, X, X},
+    {9, 6, 5, 9, 0, 6, 0, 2, 6, X, X, X, X, X, X, X},
+    {5, 9, 8, 5, 8, 2, 5, 2, 6, 3, 2, 8, X, X, X, X},
+    {2, 3, 11, 10, 6, 5, X, X, X, X, X, X, X, X, X, X},
+    {11, 0, 8, 11, 2, 0, 10, 6, 5, X, X, X, X, X, X, X},
+    {0, 1, 9, 2, 3, 11, 5, 10, 6, X, X, X, X, X, X, X},
+    {5, 10, 6, 1, 9, 2, 9, 11, 2, 9, 8, 11, X, X, X, X},
+    {6, 3, 11, 6, 5, 3, 5, 1, 3, X, X, X, X, X, X, X},
+    {0, 8, 11, 0, 11, 5, 0, 5, 1, 5, 11, 6, X, X, X, X},
+    {3, 11, 6, 0, 3, 6, 0, 6, 5, 0, 5, 9, X, X, X, X},
+    {6, 5, 9, 6, 9, 11, 11, 9, 8, X, X, X, X, X, X, X},
+    {5, 10, 6, 4, 7, 8, X, X, X, X, X, X, X, X, X, X},
+    {4, 3, 0, 4, 7, 3, 6, 5, 10, X, X, X, X, X, X, X},
+    {1, 9, 0, 5, 10, 6, 8, 4, 7, X, X, X, X, X, X, X},
+    {10, 6, 5, 1, 9, 7, 1, 7, 3, 7, 9, 4, X, X, X, X},
+    {6, 1, 2, 6, 5, 1, 4, 7, 8, X, X, X, X, X, X, X},
+    {1, 2, 5, 5, 2, 6, 3, 0, 4, 3, 4, 7, X, X, X, X},
+    {8, 4, 7, 9, 0, 5, 0, 6, 5, 0, 2, 6, X, X, X, X},
+    {7, 3, 9, 7, 9, 4, 3, 2, 9, 5, 9, 6, 2, 6, 9, X},
+    {3, 11, 2, 7, 8, 4, 10, 6, 5, X, X, X, X, X, X, X},
+    {5, 10, 6, 4, 7, 2, 4, 2, 0, 2, 7, 11, X, X, X, X},
+    {0, 1, 9, 4, 7, 8, 2, 3, 11, 5, 10, 6, X, X, X, X},
+    {9, 2, 1, 9, 11, 2, 9, 4, 11, 7, 11, 4, 5, 10, 6, X},
+    {8, 4, 7, 3, 11, 5, 3, 5, 1, 5, 11, 6, X, X, X, X},
+    {5, 1, 11, 5, 11, 6, 1, 0, 11, 7, 11, 4, 0, 4, 11, X},
+    {0, 5, 9, 0, 6, 5, 0, 3, 6, 11, 6, 3, 8, 4, 7, X},
+    {6, 5, 9, 6, 9, 11, 4, 7, 9, 7, 11, 9, X, X, X, X},
+    {10, 4, 9, 6, 4, 10, X, X, X, X, X, X, X, X, X, X},
+    {4, 10, 6, 4, 9, 10, 0, 8, 3, X, X, X, X, X, X, X},
+    {10, 0, 1, 10, 6, 0, 6, 4, 0, X, X, X, X, X, X, X},
+    {8, 3, 1, 8, 1, 6, 8, 6, 4, 6, 1, 10, X, X, X, X},
+    {1, 4, 9, 1, 2, 4, 2, 6, 4, X, X, X, X, X, X, X},
+    {3, 0, 8, 1, 2, 9, 2, 4, 9, 2, 6, 4, X, X, X, X},
+    {0, 2, 4, 4, 2, 6, X, X, X, X, X, X, X, X, X, X},
+    {8, 3, 2, 8, 2, 4, 4, 2, 6, X, X, X, X, X, X, X},
+    {10, 4, 9, 10, 6, 4, 11, 2, 3, X, X, X, X, X, X, X},
+    {0, 8, 2, 2, 8, 11, 4, 9, 10, 4, 10, 6, X, X, X, X},
+    {3, 11, 2, 0, 1, 6, 0, 6, 4, 6, 1, 10, X, X, X, X},
+    {6, 4, 1, 6, 1, 10, 4, 8, 1, 2, 1, 11, 8, 11, 1, X},
+    {9, 6, 4, 9, 3, 6, 9, 1, 3, 11, 6, 3, X, X, X, X},
+    {8, 11, 1, 8, 1, 0, 11, 6, 1, 9, 1, 4, 6, 4, 1, X},
+    {3, 11, 6, 3, 6, 0, 0, 6, 4, X, X, X, X, X, X, X},
+    {6, 4, 8, 11, 6, 8, X, X, X, X, X, X, X, X, X, X},
+    {7, 10, 6, 7, 8, 10, 8, 9, 10, X, X, X, X, X, X, X},
+    {0, 7, 3, 0, 10, 7, 0, 9, 10, 6, 7, 10, X, X, X, X},
+    {10, 6, 7, 1, 10, 7, 1, 7, 8, 1, 8, 0, X, X, X, X},
+    {10, 6, 7, 10, 7, 1, 1, 7, 3, X, X, X, X, X, X, X},
+    {1, 2, 6, 1, 6, 8, 1, 8, 9, 8, 6, 7, X, X, X, X},
+    {2, 6, 9, 2, 9, 1, 6, 7, 9, 0, 9, 3, 7, 3, 9, X},
+    {7, 8, 0, 7, 0, 6, 6, 0, 2, X, X, X, X, X, X, X},
+    {7, 3, 2, 6, 7, 2, X, X, X, X, X, X, X, X, X, X},
+    {2, 3, 11, 10, 6, 8, 10, 8, 9, 8, 6, 7, X, X, X, X},
+    {2, 0, 7, 2, 7, 11, 0, 9, 7, 6, 7, 10, 9, 10, 7, X},
+    {1, 8, 0, 1, 7, 8, 1, 10, 7, 6, 7, 10, 2, 3, 11, X},
+    {11, 2, 1, 11, 1, 7, 10, 6, 1, 6, 7, 1, X, X, X, X},
+    {8, 9, 6, 8, 6, 7, 9, 1, 6, 11, 6, 3, 1, 3, 6, X},
+    {0, 9, 1, 11, 6, 7, X, X, X, X, X, X, X, X, X, X},
+    {7, 8, 0, 7, 0, 6, 3, 11, 0, 11, 6, 0, X, X, X, X},
+    {7, 11, 6, X, X, X, X, X, X, X, X, X, X, X, X, X},
+    {7, 6, 11, X, X, X, X, X, X, X, X, X, X, X, X, X},
+    {3, 0, 8, 11, 7, 6, X, X, X, X, X, X, X, X, X, X},
+    {0, 1, 9, 11, 7, 6, X, X, X, X, X, X, X, X, X, X},
+    {8, 1, 9, 8, 3, 1, 11, 7, 6, X, X, X, X, X, X, X},
+    {10, 1, 2, 6, 11, 7, X, X, X, X, X, X, X, X, X, X},
+    {1, 2, 10, 3, 0, 8, 6, 11, 7, X, X, X, X, X, X, X},
+    {2, 9, 0, 2, 10, 9, 6, 11, 7, X, X, X, X, X, X, X},
+    {6, 11, 7, 2, 10, 3, 10, 8, 3, 10, 9, 8, X, X, X, X},
+    {7, 2, 3, 6, 2, 7, X, X, X, X, X, X, X, X, X, X},
+    {7, 0, 8, 7, 6, 0, 6, 2, 0, X, X, X, X, X, X, X},
+    {2, 7, 6, 2, 3, 7, 0, 1, 9, X, X, X, X, X, X, X},
+    {1, 6, 2, 1, 8, 6, 1, 9, 8, 8, 7, 6, X, X, X, X},
+    {10, 7, 6, 10, 1, 7, 1, 3, 7, X, X, X, X, X, X, X},
+    {10, 7, 6, 1, 7, 10, 1, 8, 7, 1, 0, 8, X, X, X, X},
+    {0, 3, 7, 0, 7, 10, 0, 10, 9, 6, 10, 7, X, X, X, X},
+    {7, 6, 10, 7, 10, 8, 8, 10, 9, X, X, X, X, X, X, X},
+    {6, 8, 4, 11, 8, 6, X, X, X, X, X, X, X, X, X, X},
+    {3, 6, 11, 3, 0, 6, 0, 4, 6, X, X, X, X, X, X, X},
+    {8, 6, 11, 8, 4, 6, 9, 0, 1, X, X, X, X, X, X, X},
+    {9, 4, 6, 9, 6, 3, 9, 3, 1, 11, 3, 6, X, X, X, X},
+    {6, 8, 4, 6, 11, 8, 2, 10, 1, X, X, X, X, X, X, X},
+    {1, 2, 10, 3, 0, 11, 0, 6, 11, 0, 4, 6, X, X, X, X},
+    {4, 11, 8, 4, 6, 11, 0, 2, 9, 2, 10, 9, X, X, X, X},
+    {10, 9, 3, 10, 3, 2, 9, 4, 3, 11, 3, 6, 4, 6, 3, X},
+    {8, 2, 3, 8, 4, 2, 4, 6, 2, X, X, X, X, X, X, X},
+    {0, 4, 2, 4, 6, 2, X, X, X, X, X, X, X, X, X, X},
+    {1, 9, 0, 2, 3, 4, 2, 4, 6, 4, 3, 8, X, X, X, X},
+    {1, 9, 4, 1, 4, 2, 2, 4, 6, X, X, X, X, X, X, X},
+    {8, 1, 3, 8, 6, 1, 8, 4, 6, 6, 10, 1, X, X, X, X},
+    {10, 1, 0, 10, 0, 6, 6, 0, 4, X, X, X, X, X, X, X},
+    {4, 6, 3, 4, 3, 8, 6, 10, 3, 0, 3, 9, 10, 9, 3, X},
+    {10, 9, 4, 6, 10, 4, X, X, X, X, X, X, X, X, X, X},
+    {4, 9, 5, 7, 6, 11, X, X, X, X, X, X, X, X, X, X},
+    {0, 8, 3, 4, 9, 5, 11, 7, 6, X, X, X, X, X, X, X},
+    {5, 0, 1, 5, 4, 0, 7, 6, 11, X, X, X, X, X, X, X},
+    {11, 7, 6, 8, 3, 4, 3, 5, 4, 3, 1, 5, X, X, X, X},
+    {9, 5, 4, 10, 1, 2, 7, 6, 11, X, X, X, X, X, X, X},
+    {6, 11, 7, 1, 2, 10, 0, 8, 3, 4, 9, 5, X, X, X, X},
+    {7, 6, 11, 5, 4, 10, 4, 2, 10, 4, 0, 2, X, X, X, X},
+    {3, 4, 8, 3, 5, 4, 3, 2, 5, 10, 5, 2, 11, 7, 6, X},
+    {7, 2, 3, 7, 6, 2, 5, 4, 9, X, X, X, X, X, X, X},
+    {9, 5, 4, 0, 8, 6, 0, 6, 2, 6, 8, 7, X, X, X, X},
+    {3, 6, 2, 3, 7, 6, 1, 5, 0, 5, 4, 0, X, X, X, X},
+    {6, 2, 8, 6, 8, 7, 2, 1, 8, 4, 8, 5, 1, 5, 8, X},
+    {9, 5, 4, 10, 1, 6, 1, 7, 6, 1, 3, 7, X, X, X, X},
+    {1, 6, 10, 1, 7, 6, 1, 0, 7, 8, 7, 0, 9, 5, 4, X},
+    {4, 0, 10, 4, 10, 5, 0, 3, 10, 6, 10, 7, 3, 7, 10, X},
+    {7, 6, 10, 7, 10, 8, 5, 4, 10, 4, 8, 10, X, X, X, X},
+    {6, 9, 5, 6, 11, 9, 11, 8, 9, X, X, X, X, X, X, X},
+    {3, 6, 11, 0, 6, 3, 0, 5, 6, 0, 9, 5, X, X, X, X},
+    {0, 11, 8, 0, 5, 11, 0, 1, 5, 5, 6, 11, X, X, X, X},
+    {6, 11, 3, 6, 3, 5, 5, 3, 1, X, X, X, X, X, X, X},
+    {1, 2, 10, 9, 5, 11, 9, 11, 8, 11, 5, 6, X, X, X, X},
+    {0, 11, 3, 0, 6, 11, 0, 9, 6, 5, 6, 9, 1, 2, 10, X},
+    {11, 8, 5, 11, 5, 6, 8, 0, 5, 10, 5, 2, 0, 2, 5, X},
+    {6, 11, 3, 6, 3, 5, 2, 10, 3, 10, 5, 3, X, X, X, X},
+    {5, 8, 9, 5, 2, 8, 5, 6, 2, 3, 8, 2, X, X, X, X},
+    {9, 5, 6, 9, 6, 0, 0, 6, 2, X, X, X, X, X, X, X},
+    {1, 5, 8, 1, 8, 0, 5, 6, 8, 3, 8, 2, 6, 2, 8, X},
+    {1, 5, 6, 2, 1, 6, X, X, X, X, X, X, X, X, X, X},
+    {1, 3, 6, 1, 6, 10, 3, 8, 6, 5, 6, 9, 8, 9, 6, X},
+    {10, 1, 0, 10, 0, 6, 9, 5, 0, 5, 6, 0, X, X, X, X},
+    {0, 3, 8, 5, 6, 10, X, X, X, X, X, X, X, X, X, X},
+    {10, 5, 6, X, X, X, X, X, X, X, X, X, X, X, X, X},
+    {11, 5, 10, 7, 5, 11, X, X, X, X, X, X, X, X, X, X},
+    {11, 5, 10, 11, 7, 5, 8, 3, 0, X, X, X, X, X, X, X},
+    {5, 11, 7, 5, 10, 11, 1, 9, 0, X, X, X, X, X, X, X},
+    {10, 7, 5, 10, 11, 7, 9, 8, 1, 8, 3, 1, X, X, X, X},
+    {11, 1, 2, 11, 7, 1, 7, 5, 1, X, X, X, X, X, X, X},
+    {0, 8, 3, 1, 2, 7, 1, 7, 5, 7, 2, 11, X, X, X, X},
+    {9, 7, 5, 9, 2, 7, 9, 0, 2, 2, 11, 7, X, X, X, X},
+    {7, 5, 2, 7, 2, 11, 5, 9, 2, 3, 2, 8, 9, 8, 2, X},
+    {2, 5, 10, 2, 3, 5, 3, 7, 5, X, X, X, X, X, X, X},
+    {8, 2, 0, 8, 5, 2, 8, 7, 5, 10, 2, 5, X, X, X, X},
+    {9, 0, 1, 5, 10, 3, 5, 3, 7, 3, 10, 2, X, X, X, X},
+    {9, 8, 2, 9, 2, 1, 8, 7, 2, 10, 2, 5, 7, 5, 2, X},
+    {1, 3, 5, 3, 7, 5, X, X, X, X, X, X, X, X, X, X},
+    {0, 8, 7, 0, 7, 1, 1, 7, 5, X, X, X, X, X, X, X},
+    {9, 0, 3, 9, 3, 5, 5, 3, 7, X, X, X, X, X, X, X},
+    {9, 8, 7, 5, 9, 7, X, X, X, X, X, X, X, X, X, X},
+    {5, 8, 4, 5, 10, 8, 10, 11, 8, X, X, X, X, X, X, X},
+    {5, 0, 4, 5, 11, 0, 5, 10, 11, 11, 3, 0, X, X, X, X},
+    {0, 1, 9, 8, 4, 10, 8, 10, 11, 10, 4, 5, X, X, X, X},
+    {10, 11, 4, 10, 4, 5, 11, 3, 4, 9, 4, 1, 3, 1, 4, X},
+    {2, 5, 1, 2, 8, 5, 2, 11, 8, 4, 5, 8, X, X, X, X},
+    {0, 4, 11, 0, 11, 3, 4, 5, 11, 2, 11, 1, 5, 1, 11, X},
+    {0, 2, 5, 0, 5, 9, 2, 11, 5, 4, 5, 8, 11, 8, 5, X},
+    {9, 4, 5, 2, 11, 3, X, X, X, X, X, X, X, X, X, X},
+    {2, 5, 10, 3, 5, 2, 3, 4, 5, 3, 8, 4, X, X, X, X},
+    {5, 10, 2, 5, 2, 4, 4, 2, 0, X, X, X, X, X, X, X},
+    {3, 10, 2, 3, 5, 10, 3, 8, 5, 4, 5, 8, 0, 1, 9, X},
+    {5, 10, 2, 5, 2, 4, 1, 9, 2, 9, 4, 2, X, X, X, X},
+    {8, 4, 5, 8, 5, 3, 3, 5, 1, X, X, X, X, X, X, X},
+    {0, 4, 5, 1, 0, 5, X, X, X, X, X, X, X, X, X, X},
+    {8, 4, 5, 8, 5, 3, 9, 0, 5, 0, 3, 5, X, X, X, X},
+    {9, 4, 5, X, X, X, X, X, X, X, X, X, X, X, X, X},
+    {4, 11, 7, 4, 9, 11, 9, 10, 11, X, X, X, X, X, X, X},
+    {0, 8, 3, 4, 9, 7, 9, 11, 7, 9, 10, 11, X, X, X, X},
+    {1, 10, 11, 1, 11, 4, 1, 4, 0, 7, 4, 11, X, X, X, X},
+    {3, 1, 4, 3, 4, 8, 1, 10, 4, 7, 4, 11, 10, 11, 4, X},
+    {4, 11, 7, 9, 11, 4, 9, 2, 11, 9, 1, 2, X, X, X, X},
+    {9, 7, 4, 9, 11, 7, 9, 1, 11, 2, 11, 1, 0, 8, 3, X},
+    {11, 7, 4, 11, 4, 2, 2, 4, 0, X, X, X, X, X, X, X},
+    {11, 7, 4, 11, 4, 2, 8, 3, 4, 3, 2, 4, X, X, X, X},
+    {2, 9, 10, 2, 7, 9, 2, 3, 7, 7, 4, 9, X, X, X, X},
+    {9, 10, 7, 9, 7, 4, 10, 2, 7, 8, 7, 0, 2, 0, 7, X},
+    {3, 7, 10, 3, 10, 2, 7, 4, 10, 1, 10, 0, 4, 0, 10, X},
+    {1, 10, 2, 8, 7, 4, X, X, X, X, X, X, X, X, X, X},
+    {4, 9, 1, 4, 1, 7, 7, 1, 3, X, X, X, X, X, X, X},
+    {4, 9, 1, 4, 1, 7, 0, 8, 1, 8, 7, 1, X, X, X, X},
+    {4, 0, 3, 7, 4, 3, X, X, X, X, X, X, X, X, X, X},
+    {4, 8, 7, X, X, X, X, X, X, X, X, X, X, X, X, X},
+    {9, 10, 8, 10, 11, 8, X, X, X, X, X, X, X, X, X, X},
+    {3, 0, 9, 3, 9, 11, 11, 9, 10, X, X, X, X, X, X, X},
+    {0, 1, 10, 0, 10, 8, 8, 10, 11, X, X, X, X, X, X, X},
+    {3, 1, 10, 11, 3, 10, X, X, X, X, X, X, X, X, X, X},
+    {1, 2, 11, 1, 11, 9, 9, 11, 8, X, X, X, X, X, X, X},
+    {3, 0, 9, 3, 9, 11, 1, 2, 9, 2, 11, 9, X, X, X, X},
+    {0, 2, 11, 8, 0, 11, X, X, X, X, X, X, X, X, X, X},
+    {3, 2, 11, X, X, X, X, X, X, X, X, X, X, X, X, X},
+    {2, 3, 8, 2, 8, 10, 10, 8, 9, X, X, X, X, X, X, X},
+    {9, 10, 2, 0, 9, 2, X, X, X, X, X, X, X, X, X, X},
+    {2, 3, 8, 2, 8, 10, 0, 1, 8, 1, 10, 8, X, X, X, X},
+    {1, 10, 2, X, X, X, X, X, X, X, X, X, X, X, X, X},
+    {1, 3, 8, 9, 1, 8, X, X, X, X, X, X, X, X, X, X},
+    {0, 9, 1, X, X, X, X, X, X, X, X, X, X, X, X, X},
+    {0, 3, 8, X, X, X, X, X, X, X, X, X, X, X, X, X},
+    {X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X}};
+#undef X
+
+// Table mapping each edge to the corresponding cube vertices offsets
+const uint _EDGE_TO_VERTICES[12][2] = {
+    {0, 1},
+    {1, 5},
+    {4, 5},
+    {0, 4},
+    {2, 3},
+    {3, 7},
+    {6, 7},
+    {2, 6},
+    {0, 2},
+    {1, 3},
+    {5, 7},
+    {4, 6},
+};
+
+// Table mapping from 0-7 to v0-v7 in cube.vertices
+const int _INDEX_TABLE[8] = {0, 1, 5, 4, 2, 3, 7, 6};
diff --git a/pytorch3d/pytorch3d/csrc/mesh_normal_consistency/mesh_normal_consistency.h b/pytorch3d/pytorch3d/csrc/mesh_normal_consistency/mesh_normal_consistency.h
new file mode 100644
index 0000000000000000000000000000000000000000..17795ae1eb4b6bceb8a9960bc6d7523eb3c2acb6
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/mesh_normal_consistency/mesh_normal_consistency.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+#include <torch/extension.h>
+#include "utils/pytorch3d_cutils.h"
+
+// For mesh_normal_consistency, find pairs of vertices opposite the same edge.
+//
+// Args:
+//   edge_num: int64 Tensor of shape (E,) giving the number of vertices
+//              corresponding to each edge.
+//
+// Returns:
+//    pairs: int64 Tensor of shape (N,2)
+
+at::Tensor MeshNormalConsistencyFindVerticesCpu(const at::Tensor& edge_num);
+
+// Exposed implementation.
+at::Tensor MeshNormalConsistencyFindVertices(const at::Tensor& edge_num) {
+  if (edge_num.is_cuda()) {
+    AT_ERROR("This function needs a CPU tensor.");
+  }
+  return MeshNormalConsistencyFindVerticesCpu(edge_num);
+}
diff --git a/pytorch3d/pytorch3d/csrc/mesh_normal_consistency/mesh_normal_consistency_cpu.cpp b/pytorch3d/pytorch3d/csrc/mesh_normal_consistency/mesh_normal_consistency_cpu.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..1b0d5e6ea0a76cb2aac5e3f6ddb8eca77635da97
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/mesh_normal_consistency/mesh_normal_consistency_cpu.cpp
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <ATen/ATen.h>
+#include <utility>
+#include <vector>
+
+at::Tensor MeshNormalConsistencyFindVerticesCpu(const at::Tensor& edge_num) {
+  // We take a LongTensor of shape (E,) giving the number of things intersecting
+  // each edge. The things are taken to be numbered in order.
+  // (In fact, the "things" are opposite vertices to edges, renumbered).
+  // We return a tensor of shape (?, 2) where for every pair of things which
+  // intersect the same edge there is a row of their numbers in the output.
+
+  // Example possible inputs and outputs (order of output is not specified):
+  //  [1,0,1,1,0] => [[]]
+  //          [3] => [[0,1], [0,2], [1,2]]
+  //        [0,3] => [[0,1], [0,2], [1,2]]
+  //        [1,3] => [[1,2], [1,3], [2,3]]
+  //[1,0,2,1,0,2] => [[1,2], [4,5]]
+
+  const auto num_edges = edge_num.size(0);
+  auto edges_a = edge_num.accessor<int64_t, 1>();
+
+  int64_t vert_idx = 0;
+  std::vector<std::pair<int64_t, int64_t>> pairs;
+  for (int64_t i_edge = 0; i_edge < num_edges; ++i_edge) {
+    int64_t e = edges_a[i_edge];
+    for (int64_t j = 0; j < e; ++j) {
+      for (int64_t i = 0; i < j; ++i) {
+        pairs.emplace_back(vert_idx + i, vert_idx + j);
+      }
+    }
+    vert_idx += e;
+  }
+
+  // Convert from std::vector by copying over the items to a new empty torch
+  // tensor.
+  auto pairs_tensor = at::empty({(int64_t)pairs.size(), 2}, edge_num.options());
+  auto pairs_a = pairs_tensor.accessor<int64_t, 2>();
+  for (int64_t i_pair = 0; i_pair < pairs.size(); ++i_pair) {
+    auto accessor = pairs_a[i_pair];
+    accessor[0] = pairs[i_pair].first;
+    accessor[1] = pairs[i_pair].second;
+  }
+
+  return pairs_tensor;
+}
diff --git a/pytorch3d/pytorch3d/csrc/packed_to_padded_tensor/packed_to_padded_tensor.cu b/pytorch3d/pytorch3d/csrc/packed_to_padded_tensor/packed_to_padded_tensor.cu
new file mode 100644
index 0000000000000000000000000000000000000000..94f22c18431bb8bc4557584acdd5894155a17e37
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/packed_to_padded_tensor/packed_to_padded_tensor.cu
@@ -0,0 +1,241 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <ATen/ATen.h>
+#include <ATen/cuda/CUDAContext.h>
+#include <c10/cuda/CUDAGuard.h>
+
+// Kernel for inputs_packed of shape (F, D), where D > 1
+template <typename scalar_t>
+__global__ void PackedToPaddedKernel(
+    const scalar_t* __restrict__ inputs_packed,
+    const int64_t* __restrict__ first_idxs,
+    scalar_t* __restrict__ inputs_padded,
+    const size_t batch_size,
+    const size_t max_size,
+    const size_t num_inputs,
+    const size_t D) {
+  // Batch elements split evenly across blocks (num blocks = batch_size) and
+  // values for each element split across threads in the block. Each thread adds
+  // the values of its respective input elements to the global inputs_padded
+  // tensor.
+  const size_t tid = threadIdx.x;
+  const size_t batch_idx = blockIdx.x;
+
+  const int64_t start = first_idxs[batch_idx];
+  const int64_t end =
+      batch_idx + 1 < batch_size ? first_idxs[batch_idx + 1] : num_inputs;
+  const int num = end - start;
+  for (size_t f = tid; f < num; f += blockDim.x) {
+    for (size_t j = 0; j < D; ++j) {
+      inputs_padded[batch_idx * max_size * D + f * D + j] =
+          inputs_packed[(start + f) * D + j];
+    }
+  }
+}
+
+// Kernel for inputs of shape (F, 1)
+template <typename scalar_t>
+__global__ void PackedToPaddedKernelD1(
+    const scalar_t* __restrict__ inputs_packed,
+    const int64_t* __restrict__ first_idxs,
+    scalar_t* __restrict__ inputs_padded,
+    const size_t batch_size,
+    const size_t max_size,
+    const size_t num_inputs) {
+  // Batch elements split evenly across blocks (num blocks = batch_size) and
+  // values for each element split across threads in the block. Each thread adds
+  // the values of its respective input elements to the global inputs_padded
+  // tensor.
+  const size_t tid = threadIdx.x;
+  const size_t batch_idx = blockIdx.x;
+
+  const int64_t start = first_idxs[batch_idx];
+  const int64_t end =
+      batch_idx + 1 < batch_size ? first_idxs[batch_idx + 1] : num_inputs;
+  const int num = end - start;
+  for (size_t f = tid; f < num; f += blockDim.x) {
+    inputs_padded[batch_idx * max_size + f] = inputs_packed[start + f];
+  }
+}
+
+// Kernel for inputs_padded of shape (B, F, D), where D > 1
+template <typename scalar_t>
+__global__ void PaddedToPackedKernel(
+    const scalar_t* __restrict__ inputs_padded,
+    const int64_t* __restrict__ first_idxs,
+    scalar_t* __restrict__ inputs_packed,
+    const size_t batch_size,
+    const size_t max_size,
+    const size_t num_inputs,
+    const size_t D) {
+  // Batch elements split evenly across blocks (num blocks = batch_size) and
+  // values for each element split across threads in the block. Each thread adds
+  // the values of its respective input elements to the global inputs_packed
+  // tensor.
+  const size_t tid = threadIdx.x;
+  const size_t batch_idx = blockIdx.x;
+
+  const int64_t start = first_idxs[batch_idx];
+  const int64_t end =
+      batch_idx + 1 < batch_size ? first_idxs[batch_idx + 1] : num_inputs;
+  const int num = end - start;
+  for (size_t f = tid; f < num; f += blockDim.x) {
+    for (size_t j = 0; j < D; ++j) {
+      inputs_packed[(start + f) * D + j] =
+          inputs_padded[batch_idx * max_size * D + f * D + j];
+    }
+  }
+}
+
+// Kernel for inputs_padded of shape (B, F, 1)
+template <typename scalar_t>
+__global__ void PaddedToPackedKernelD1(
+    const scalar_t* __restrict__ inputs_padded,
+    const int64_t* __restrict__ first_idxs,
+    scalar_t* __restrict__ inputs_packed,
+    const size_t batch_size,
+    const size_t max_size,
+    const size_t num_inputs) {
+  // Batch elements split evenly across blocks (num blocks = batch_size) and
+  // values for each element split across threads in the block. Each thread adds
+  // the values of its respective input elements to the global inputs_packed
+  // tensor.
+  const size_t tid = threadIdx.x;
+  const size_t batch_idx = blockIdx.x;
+
+  const int64_t start = first_idxs[batch_idx];
+  const int64_t end =
+      batch_idx + 1 < batch_size ? first_idxs[batch_idx + 1] : num_inputs;
+  const int num = end - start;
+  for (size_t f = tid; f < num; f += blockDim.x) {
+    inputs_packed[start + f] = inputs_padded[batch_idx * max_size + f];
+  }
+}
+
+at::Tensor PackedToPaddedCuda(
+    const at::Tensor inputs_packed,
+    const at::Tensor first_idxs,
+    const int64_t max_size) {
+  // Check inputs are on the same device
+  at::TensorArg inputs_packed_t{inputs_packed, "inputs_packed", 1},
+      first_idxs_t{first_idxs, "first_idxs", 2};
+  at::CheckedFrom c = "PackedToPaddedCuda";
+  at::checkAllSameGPU(c, {inputs_packed_t, first_idxs_t});
+
+  // Set the device for the kernel launch based on the device of the input
+  at::cuda::CUDAGuard device_guard(inputs_packed.device());
+  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+
+  const int64_t num_inputs = inputs_packed.size(0);
+  const int64_t batch_size = first_idxs.size(0);
+
+  TORCH_CHECK(
+      inputs_packed.dim() == 2, "inputs_packed must be a 2-dimensional tensor");
+  const int64_t D = inputs_packed.size(1);
+  at::Tensor inputs_padded =
+      at::zeros({batch_size, max_size, D}, inputs_packed.options());
+
+  if (inputs_padded.numel() == 0) {
+    AT_CUDA_CHECK(cudaGetLastError());
+    return inputs_padded;
+  }
+
+  const int threads = 512;
+  const int blocks = batch_size;
+  if (D == 1) {
+    AT_DISPATCH_FLOATING_TYPES(
+        inputs_packed.scalar_type(), "packed_to_padded_d1_kernel", ([&] {
+          PackedToPaddedKernelD1<scalar_t><<<blocks, threads, 0, stream>>>(
+              inputs_packed.contiguous().data_ptr<scalar_t>(),
+              first_idxs.contiguous().data_ptr<int64_t>(),
+              inputs_padded.data_ptr<scalar_t>(),
+              batch_size,
+              max_size,
+              num_inputs);
+        }));
+  } else {
+    AT_DISPATCH_FLOATING_TYPES(
+        inputs_packed.scalar_type(), "packed_to_padded_kernel", ([&] {
+          PackedToPaddedKernel<scalar_t><<<blocks, threads, 0, stream>>>(
+              inputs_packed.contiguous().data_ptr<scalar_t>(),
+              first_idxs.contiguous().data_ptr<int64_t>(),
+              inputs_padded.data_ptr<scalar_t>(),
+              batch_size,
+              max_size,
+              num_inputs,
+              D);
+        }));
+  }
+
+  AT_CUDA_CHECK(cudaGetLastError());
+  return inputs_padded;
+}
+
+at::Tensor PaddedToPackedCuda(
+    const at::Tensor inputs_padded,
+    const at::Tensor first_idxs,
+    const int64_t num_inputs) {
+  // Check inputs are on the same device
+  at::TensorArg inputs_padded_t{inputs_padded, "inputs_padded", 1},
+      first_idxs_t{first_idxs, "first_idxs", 2};
+  at::CheckedFrom c = "PaddedToPackedCuda";
+  at::checkAllSameGPU(c, {inputs_padded_t, first_idxs_t});
+
+  // Set the device for the kernel launch based on the device of the input
+  at::cuda::CUDAGuard device_guard(inputs_padded.device());
+  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+
+  const int64_t batch_size = inputs_padded.size(0);
+  const int64_t max_size = inputs_padded.size(1);
+
+  TORCH_CHECK(batch_size == first_idxs.size(0), "sizes mismatch");
+  TORCH_CHECK(
+      inputs_padded.dim() == 3,
+      "inputs_padded  must be a 3-dimensional tensor");
+  const int64_t D = inputs_padded.size(2);
+
+  at::Tensor inputs_packed =
+      at::zeros({num_inputs, D}, inputs_padded.options());
+
+  if (inputs_packed.numel() == 0) {
+    AT_CUDA_CHECK(cudaGetLastError());
+    return inputs_packed;
+  }
+
+  const int threads = 512;
+  const int blocks = batch_size;
+
+  if (D == 1) {
+    AT_DISPATCH_FLOATING_TYPES(
+        inputs_padded.scalar_type(), "padded_to_packed_d1_kernel", ([&] {
+          PaddedToPackedKernelD1<scalar_t><<<blocks, threads, 0, stream>>>(
+              inputs_padded.contiguous().data_ptr<scalar_t>(),
+              first_idxs.contiguous().data_ptr<int64_t>(),
+              inputs_packed.data_ptr<scalar_t>(),
+              batch_size,
+              max_size,
+              num_inputs);
+        }));
+  } else {
+    AT_DISPATCH_FLOATING_TYPES(
+        inputs_padded.scalar_type(), "padded_to_packed_kernel", ([&] {
+          PaddedToPackedKernel<scalar_t><<<blocks, threads, 0, stream>>>(
+              inputs_padded.contiguous().data_ptr<scalar_t>(),
+              first_idxs.contiguous().data_ptr<int64_t>(),
+              inputs_packed.data_ptr<scalar_t>(),
+              batch_size,
+              max_size,
+              num_inputs,
+              D);
+        }));
+  }
+
+  AT_CUDA_CHECK(cudaGetLastError());
+  return inputs_packed;
+}
diff --git a/pytorch3d/pytorch3d/csrc/packed_to_padded_tensor/packed_to_padded_tensor.h b/pytorch3d/pytorch3d/csrc/packed_to_padded_tensor/packed_to_padded_tensor.h
new file mode 100644
index 0000000000000000000000000000000000000000..27bbc3bf03d1e195f44d36662c93d9db85adae68
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/packed_to_padded_tensor/packed_to_padded_tensor.h
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+#include <torch/extension.h>
+#include "utils/pytorch3d_cutils.h"
+
+// PackedToPadded
+// Converts a packed tensor into a padded tensor, restoring the batch dimension.
+// Refer to pytorch3d/structures/meshes.py for details on packed/padded tensors.
+//
+// Inputs:
+//    inputs_packed: FloatTensor of shape (F, D), representing the packed batch
+//                      tensor, e.g. areas for faces in a batch of meshes.
+//    first_idxs: LongTensor of shape (N,) where N is the number of
+//                       elements in the batch and `first_idxs[i] = f`
+//                       means that the inputs for batch element i begin at
+//                       `inputs[f]`.
+//    max_size: Max length of an element in the batch.
+// Returns:
+//   inputs_padded: FloatTensor of shape (N, max_size, D) where max_size is max
+//                 of `sizes`. The values for batch element i which start at
+//                 `inputs_packed[first_idxs[i]]` will be copied to
+//                 `inputs_padded[i, :]`, with zeros padding out the extra
+//                  inputs.
+//
+
+// PaddedToPacked
+// Converts a padded tensor into a packed tensor.
+// Refer to pytorch3d/structures/meshes.py for details on packed/padded tensors.
+//
+// Inputs:
+//    inputs_padded: FloatTensor of shape (N, max_size, D), representing the
+//                padded tensor, e.g. areas for faces in a batch of meshes.
+//    first_idxs: LongTensor of shape (N,) where N is the number of
+//                       elements in the batch and `first_idxs[i] = f`
+//                       means that the inputs for batch element i begin at
+//                       `inputs_packed[f]`.
+//    num_inputs: Number of packed entries (= F)
+// Returns:
+//   inputs_packed: FloatTensor of shape (F, D), where
+//                      `inputs_packed[first_idx[i]:] = inputs_padded[i, :]`.
+//
+//
+
+// Cpu implementation.
+at::Tensor PackedToPaddedCpu(
+    const at::Tensor inputs_packed,
+    const at::Tensor first_idxs,
+    const int64_t max_size);
+
+// Cpu implementation.
+at::Tensor PaddedToPackedCpu(
+    const at::Tensor inputs_padded,
+    const at::Tensor first_idxs,
+    const int64_t num_inputs);
+
+#ifdef WITH_CUDA
+// Cuda implementation.
+at::Tensor PackedToPaddedCuda(
+    const at::Tensor inputs_packed,
+    const at::Tensor first_idxs,
+    const int64_t max_size);
+
+// Cuda implementation.
+at::Tensor PaddedToPackedCuda(
+    const at::Tensor inputs_padded,
+    const at::Tensor first_idxs,
+    const int64_t num_inputs);
+#endif
+
+// Implementation which is exposed.
+at::Tensor PackedToPadded(
+    const at::Tensor inputs_packed,
+    const at::Tensor first_idxs,
+    const int64_t max_size) {
+  if (inputs_packed.is_cuda()) {
+#ifdef WITH_CUDA
+    CHECK_CUDA(inputs_packed);
+    CHECK_CUDA(first_idxs);
+    return PackedToPaddedCuda(inputs_packed, first_idxs, max_size);
+#else
+    AT_ERROR("Not compiled with GPU support.");
+#endif
+  }
+  return PackedToPaddedCpu(inputs_packed, first_idxs, max_size);
+}
+
+// Implementation which is exposed.
+at::Tensor PaddedToPacked(
+    const at::Tensor inputs_padded,
+    const at::Tensor first_idxs,
+    const int64_t num_inputs) {
+  if (inputs_padded.is_cuda()) {
+#ifdef WITH_CUDA
+    CHECK_CUDA(inputs_padded);
+    CHECK_CUDA(first_idxs);
+    return PaddedToPackedCuda(inputs_padded, first_idxs, num_inputs);
+#else
+    AT_ERROR("Not compiled with GPU support.");
+#endif
+  }
+  return PaddedToPackedCpu(inputs_padded, first_idxs, num_inputs);
+}
diff --git a/pytorch3d/pytorch3d/csrc/packed_to_padded_tensor/packed_to_padded_tensor_cpu.cpp b/pytorch3d/pytorch3d/csrc/packed_to_padded_tensor/packed_to_padded_tensor_cpu.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..c9176a1afd5e6736f938dc938dfc1d62c1052ddc
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/packed_to_padded_tensor/packed_to_padded_tensor_cpu.cpp
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <torch/extension.h>
+
+at::Tensor PackedToPaddedCpu(
+    const at::Tensor inputs_packed,
+    const at::Tensor first_idxs,
+    const int64_t max_size) {
+  const int64_t num_inputs = inputs_packed.size(0);
+  const int64_t batch_size = first_idxs.size(0);
+
+  AT_ASSERTM(
+      inputs_packed.dim() == 2, "inputs_packed must be a 2-dimensional tensor");
+  const int64_t D = inputs_packed.size(1);
+
+  torch::Tensor inputs_padded =
+      torch::zeros({batch_size, max_size, D}, inputs_packed.options());
+
+  auto inputs_packed_a = inputs_packed.accessor<float, 2>();
+  auto first_idxs_a = first_idxs.accessor<int64_t, 1>();
+  auto inputs_padded_a = inputs_padded.accessor<float, 3>();
+
+  for (int b = 0; b < batch_size; ++b) {
+    const int64_t start = first_idxs_a[b];
+    const int64_t end = b + 1 < batch_size ? first_idxs_a[b + 1] : num_inputs;
+    const int64_t num = end - start;
+    for (int i = 0; i < num; ++i) {
+      for (int j = 0; j < D; ++j) {
+        inputs_padded_a[b][i][j] = inputs_packed_a[start + i][j];
+      }
+    }
+  }
+  return inputs_padded;
+}
+
+at::Tensor PaddedToPackedCpu(
+    const at::Tensor inputs_padded,
+    const at::Tensor first_idxs,
+    const int64_t num_inputs) {
+  const int64_t batch_size = inputs_padded.size(0);
+
+  AT_ASSERTM(
+      inputs_padded.dim() == 3, "inputs_padded must be a 3-dimensional tensor");
+  const int64_t D = inputs_padded.size(2);
+
+  torch::Tensor inputs_packed =
+      torch::zeros({num_inputs, D}, inputs_padded.options());
+
+  auto inputs_padded_a = inputs_padded.accessor<float, 3>();
+  auto first_idxs_a = first_idxs.accessor<int64_t, 1>();
+  auto inputs_packed_a = inputs_packed.accessor<float, 2>();
+
+  for (int b = 0; b < batch_size; ++b) {
+    const int64_t start = first_idxs_a[b];
+    const int64_t end = b + 1 < batch_size ? first_idxs_a[b + 1] : num_inputs;
+    const int64_t num = end - start;
+    for (int i = 0; i < num; ++i) {
+      for (int j = 0; j < D; ++j) {
+        inputs_packed_a[start + i][j] = inputs_padded_a[b][i][j];
+      }
+    }
+  }
+  return inputs_packed;
+}
diff --git a/pytorch3d/pytorch3d/csrc/point_mesh/point_mesh_cpu.cpp b/pytorch3d/pytorch3d/csrc/point_mesh/point_mesh_cpu.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..e059409c40330005e76f88792bf7037cb72e4000
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/point_mesh/point_mesh_cpu.cpp
@@ -0,0 +1,428 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <torch/extension.h>
+#include <array>
+#include <limits>
+#include "utils/geometry_utils.h"
+#include "utils/vec3.h"
+
+// - We start with implementations of simple operations on points, edges and
+// faces. The hull of H points is a point if H=1, an edge if H=2, a face if H=3.
+
+template <typename T>
+vec3<T> ExtractPoint(const at::TensorAccessor<T, 1>& t) {
+  return vec3<T>(t[0], t[1], t[2]);
+}
+
+template <typename Accessor>
+static std::array<vec3<std::remove_pointer_t<typename Accessor::PtrType>>, 1>
+ExtractHullHelper(const Accessor& t, std::array<char, 1> /*tag*/) {
+  return {ExtractPoint(t)};
+}
+
+template <typename Accessor>
+static std::array<vec3<std::remove_pointer_t<typename Accessor::PtrType>>, 2>
+ExtractHullHelper(const Accessor& t, std::array<char, 2> /*tag*/) {
+  return {ExtractPoint(t[0]), ExtractPoint(t[1])};
+}
+
+template <typename Accessor>
+static std::array<vec3<std::remove_pointer_t<typename Accessor::PtrType>>, 3>
+ExtractHullHelper(const Accessor& t, std::array<char, 3> /*tag*/) {
+  return {ExtractPoint(t[0]), ExtractPoint(t[1]), ExtractPoint(t[2])};
+}
+
+template <int H, typename Accessor>
+std::array<vec3<std::remove_pointer_t<typename Accessor::PtrType>>, H>
+ExtractHull(const Accessor& t) {
+  std::array<char, H> tag;
+  return ExtractHullHelper(t, tag);
+}
+
+template <typename T>
+void IncrementPoint(at::TensorAccessor<T, 1>&& t, const vec3<T>& point) {
+  t[0] += point.x;
+  t[1] += point.y;
+  t[2] += point.z;
+}
+
+// distance between the convex hull of A points and B points
+// this could be done in c++17 with tuple_cat and invoke
+template <typename T>
+T HullDistance(
+    const std::array<vec3<T>, 1>& a,
+    const std::array<vec3<T>, 2>& b,
+    const double /*min_triangle_area*/) {
+  using std::get;
+  return PointLine3DistanceForward(get<0>(a), get<0>(b), get<1>(b));
+}
+template <typename T>
+T HullDistance(
+    const std::array<vec3<T>, 1>& a,
+    const std::array<vec3<T>, 3>& b,
+    const double min_triangle_area) {
+  using std::get;
+  return PointTriangle3DistanceForward(
+      get<0>(a), get<0>(b), get<1>(b), get<2>(b), min_triangle_area);
+}
+template <typename T>
+T HullDistance(
+    const std::array<vec3<T>, 2>& a,
+    const std::array<vec3<T>, 1>& b,
+    const double /*min_triangle_area*/) {
+  return HullDistance(b, a, 1);
+}
+template <typename T>
+T HullDistance(
+    const std::array<vec3<T>, 3>& a,
+    const std::array<vec3<T>, 1>& b,
+    const double min_triangle_area) {
+  return HullDistance(b, a, min_triangle_area);
+}
+
+template <typename T>
+void HullHullDistanceBackward(
+    const std::array<vec3<T>, 1>& a,
+    const std::array<vec3<T>, 2>& b,
+    T grad_dist,
+    at::TensorAccessor<T, 1>&& grad_a,
+    at::TensorAccessor<T, 2>&& grad_b,
+    const double /*min_triangle_area*/) {
+  using std::get;
+  auto res =
+      PointLine3DistanceBackward(get<0>(a), get<0>(b), get<1>(b), grad_dist);
+  IncrementPoint(std::move(grad_a), get<0>(res));
+  IncrementPoint(grad_b[0], get<1>(res));
+  IncrementPoint(grad_b[1], get<2>(res));
+}
+template <typename T>
+void HullHullDistanceBackward(
+    const std::array<vec3<T>, 1>& a,
+    const std::array<vec3<T>, 3>& b,
+    T grad_dist,
+    at::TensorAccessor<T, 1>&& grad_a,
+    at::TensorAccessor<T, 2>&& grad_b,
+    const double min_triangle_area) {
+  using std::get;
+  auto res = PointTriangle3DistanceBackward(
+      get<0>(a), get<0>(b), get<1>(b), get<2>(b), grad_dist, min_triangle_area);
+  IncrementPoint(std::move(grad_a), get<0>(res));
+  IncrementPoint(grad_b[0], get<1>(res));
+  IncrementPoint(grad_b[1], get<2>(res));
+  IncrementPoint(grad_b[2], get<3>(res));
+}
+template <typename T>
+void HullHullDistanceBackward(
+    const std::array<vec3<T>, 3>& a,
+    const std::array<vec3<T>, 1>& b,
+    T grad_dist,
+    at::TensorAccessor<T, 2>&& grad_a,
+    at::TensorAccessor<T, 1>&& grad_b,
+    const double min_triangle_area) {
+  return HullHullDistanceBackward(
+      b, a, grad_dist, std::move(grad_b), std::move(grad_a), min_triangle_area);
+}
+template <typename T>
+void HullHullDistanceBackward(
+    const std::array<vec3<T>, 2>& a,
+    const std::array<vec3<T>, 1>& b,
+    T grad_dist,
+    at::TensorAccessor<T, 2>&& grad_a,
+    at::TensorAccessor<T, 1>&& grad_b,
+    const double /*min_triangle_area*/) {
+  return HullHullDistanceBackward(
+      b, a, grad_dist, std::move(grad_b), std::move(grad_a), 1);
+}
+
+template <int H>
+void ValidateShape(const at::Tensor& as) {
+  if (H == 1) {
+    TORCH_CHECK(as.size(1) == 3);
+  } else {
+    TORCH_CHECK(as.size(2) == 3 && as.size(1) == H);
+  }
+}
+
+// ----------- Here begins the implementation of each top-level
+//             function using non-type template parameters to
+//             implement all the cases in one go. ----------- //
+
+template <int H1, int H2>
+std::tuple<at::Tensor, at::Tensor> HullHullDistanceForwardCpu(
+    const at::Tensor& as,
+    const at::Tensor& as_first_idx,
+    const at::Tensor& bs,
+    const at::Tensor& bs_first_idx,
+    const double min_triangle_area) {
+  const int64_t A_N = as.size(0);
+  const int64_t B_N = bs.size(0);
+  const int64_t BATCHES = as_first_idx.size(0);
+
+  ValidateShape<H1>(as);
+  ValidateShape<H2>(bs);
+
+  TORCH_CHECK(bs_first_idx.size(0) == BATCHES);
+
+  // clang-format off
+  at::Tensor dists = at::zeros({A_N,}, as.options());
+  at::Tensor idxs = at::zeros({A_N,}, as_first_idx.options());
+  // clang-format on
+
+  auto as_a = as.accessor < float, H1 == 1 ? 2 : 3 > ();
+  auto bs_a = bs.accessor < float, H2 == 1 ? 2 : 3 > ();
+  auto as_first_idx_a = as_first_idx.accessor<int64_t, 1>();
+  auto bs_first_idx_a = bs_first_idx.accessor<int64_t, 1>();
+  auto dists_a = dists.accessor<float, 1>();
+  auto idxs_a = idxs.accessor<int64_t, 1>();
+  int64_t a_batch_end = 0;
+  int64_t b_batch_start = 0, b_batch_end = 0;
+  int64_t batch_idx = 0;
+  for (int64_t a_n = 0; a_n < A_N; ++a_n) {
+    if (a_n == a_batch_end) {
+      ++batch_idx;
+      b_batch_start = b_batch_end;
+      if (batch_idx == BATCHES) {
+        a_batch_end = std::numeric_limits<int64_t>::max();
+        b_batch_end = B_N;
+      } else {
+        a_batch_end = as_first_idx_a[batch_idx];
+        b_batch_end = bs_first_idx_a[batch_idx];
+      }
+    }
+    float min_dist = std::numeric_limits<float>::max();
+    size_t min_idx = 0;
+    auto a = ExtractHull<H1>(as_a[a_n]);
+    for (int64_t b_n = b_batch_start; b_n < b_batch_end; ++b_n) {
+      float dist =
+          HullDistance(a, ExtractHull<H2>(bs_a[b_n]), min_triangle_area);
+      if (dist <= min_dist) {
+        min_dist = dist;
+        min_idx = b_n;
+      }
+    }
+    dists_a[a_n] = min_dist;
+    idxs_a[a_n] = min_idx;
+  }
+
+  return std::make_tuple(dists, idxs);
+}
+
+template <int H1, int H2>
+std::tuple<at::Tensor, at::Tensor> HullHullDistanceBackwardCpu(
+    const at::Tensor& as,
+    const at::Tensor& bs,
+    const at::Tensor& idx_bs,
+    const at::Tensor& grad_dists,
+    const double min_triangle_area) {
+  const int64_t A_N = as.size(0);
+
+  TORCH_CHECK(idx_bs.size(0) == A_N);
+  TORCH_CHECK(grad_dists.size(0) == A_N);
+  ValidateShape<H1>(as);
+  ValidateShape<H2>(bs);
+
+  at::Tensor grad_as = at::zeros_like(as);
+  at::Tensor grad_bs = at::zeros_like(bs);
+
+  auto as_a = as.accessor < float, H1 == 1 ? 2 : 3 > ();
+  auto bs_a = bs.accessor < float, H2 == 1 ? 2 : 3 > ();
+  auto grad_as_a = grad_as.accessor < float, H1 == 1 ? 2 : 3 > ();
+  auto grad_bs_a = grad_bs.accessor < float, H2 == 1 ? 2 : 3 > ();
+  auto idx_bs_a = idx_bs.accessor<int64_t, 1>();
+  auto grad_dists_a = grad_dists.accessor<float, 1>();
+
+  for (int64_t a_n = 0; a_n < A_N; ++a_n) {
+    auto a = ExtractHull<H1>(as_a[a_n]);
+    auto b = ExtractHull<H2>(bs_a[idx_bs_a[a_n]]);
+    HullHullDistanceBackward(
+        a,
+        b,
+        grad_dists_a[a_n],
+        grad_as_a[a_n],
+        grad_bs_a[idx_bs_a[a_n]],
+        min_triangle_area);
+  }
+  return std::make_tuple(grad_as, grad_bs);
+}
+
+template <int H>
+torch::Tensor PointHullArrayDistanceForwardCpu(
+    const torch::Tensor& points,
+    const torch::Tensor& bs,
+    const double min_triangle_area) {
+  const int64_t P = points.size(0);
+  const int64_t B_N = bs.size(0);
+
+  TORCH_CHECK(points.size(1) == 3, "points must be of shape Px3");
+  ValidateShape<H>(bs);
+
+  at::Tensor dists = at::zeros({P, B_N}, points.options());
+  auto points_a = points.accessor<float, 2>();
+  auto bs_a = bs.accessor<float, 3>();
+  auto dists_a = dists.accessor<float, 2>();
+  for (int64_t p = 0; p < P; ++p) {
+    auto point = ExtractHull<1>(points_a[p]);
+    auto dest = dists_a[p];
+    for (int64_t b_n = 0; b_n < B_N; ++b_n) {
+      auto b = ExtractHull<H>(bs_a[b_n]);
+      dest[b_n] = HullDistance(point, b, min_triangle_area);
+    }
+  }
+  return dists;
+}
+
+template <int H>
+std::tuple<at::Tensor, at::Tensor> PointHullArrayDistanceBackwardCpu(
+    const at::Tensor& points,
+    const at::Tensor& bs,
+    const at::Tensor& grad_dists,
+    const double min_triangle_area) {
+  const int64_t P = points.size(0);
+  const int64_t B_N = bs.size(0);
+
+  TORCH_CHECK(points.size(1) == 3, "points must be of shape Px3");
+  ValidateShape<H>(bs);
+  TORCH_CHECK((grad_dists.size(0) == P) && (grad_dists.size(1) == B_N));
+
+  at::Tensor grad_points = at::zeros({P, 3}, points.options());
+  at::Tensor grad_bs = at::zeros({B_N, H, 3}, bs.options());
+
+  auto points_a = points.accessor<float, 2>();
+  auto bs_a = bs.accessor<float, 3>();
+  auto grad_dists_a = grad_dists.accessor<float, 2>();
+  auto grad_points_a = grad_points.accessor<float, 2>();
+  auto grad_bs_a = grad_bs.accessor<float, 3>();
+  for (int64_t p = 0; p < P; ++p) {
+    auto point = ExtractHull<1>(points_a[p]);
+    auto grad_point = grad_points_a[p];
+    auto grad_dist = grad_dists_a[p];
+    for (int64_t b_n = 0; b_n < B_N; ++b_n) {
+      auto b = ExtractHull<H>(bs_a[b_n]);
+      HullHullDistanceBackward(
+          point,
+          b,
+          grad_dist[b_n],
+          std::move(grad_point),
+          grad_bs_a[b_n],
+          min_triangle_area);
+    }
+  }
+  return std::make_tuple(grad_points, grad_bs);
+}
+
+// ---------- Here begin the exported functions ------------ //
+
+std::tuple<torch::Tensor, torch::Tensor> PointFaceDistanceForwardCpu(
+    const torch::Tensor& points,
+    const torch::Tensor& points_first_idx,
+    const torch::Tensor& tris,
+    const torch::Tensor& tris_first_idx,
+    const double min_triangle_area) {
+  return HullHullDistanceForwardCpu<1, 3>(
+      points, points_first_idx, tris, tris_first_idx, min_triangle_area);
+}
+
+std::tuple<torch::Tensor, torch::Tensor> PointFaceDistanceBackwardCpu(
+    const torch::Tensor& points,
+    const torch::Tensor& tris,
+    const torch::Tensor& idx_points,
+    const torch::Tensor& grad_dists,
+    const double min_triangle_area) {
+  return HullHullDistanceBackwardCpu<1, 3>(
+      points, tris, idx_points, grad_dists, min_triangle_area);
+}
+
+std::tuple<torch::Tensor, torch::Tensor> FacePointDistanceForwardCpu(
+    const torch::Tensor& points,
+    const torch::Tensor& points_first_idx,
+    const torch::Tensor& tris,
+    const torch::Tensor& tris_first_idx,
+    const double min_triangle_area) {
+  return HullHullDistanceForwardCpu<3, 1>(
+      tris, tris_first_idx, points, points_first_idx, min_triangle_area);
+}
+
+std::tuple<torch::Tensor, torch::Tensor> FacePointDistanceBackwardCpu(
+    const torch::Tensor& points,
+    const torch::Tensor& tris,
+    const torch::Tensor& idx_tris,
+    const torch::Tensor& grad_dists,
+    const double min_triangle_area) {
+  auto res = HullHullDistanceBackwardCpu<3, 1>(
+      tris, points, idx_tris, grad_dists, min_triangle_area);
+  return std::make_tuple(std::get<1>(res), std::get<0>(res));
+}
+
+torch::Tensor PointEdgeArrayDistanceForwardCpu(
+    const torch::Tensor& points,
+    const torch::Tensor& segms) {
+  return PointHullArrayDistanceForwardCpu<2>(points, segms, 1);
+}
+
+std::tuple<at::Tensor, at::Tensor> PointFaceArrayDistanceBackwardCpu(
+    const at::Tensor& points,
+    const at::Tensor& tris,
+    const at::Tensor& grad_dists,
+    const double min_triangle_area) {
+  return PointHullArrayDistanceBackwardCpu<3>(
+      points, tris, grad_dists, min_triangle_area);
+}
+
+torch::Tensor PointFaceArrayDistanceForwardCpu(
+    const torch::Tensor& points,
+    const torch::Tensor& tris,
+    const double min_triangle_area) {
+  return PointHullArrayDistanceForwardCpu<3>(points, tris, min_triangle_area);
+}
+
+std::tuple<at::Tensor, at::Tensor> PointEdgeArrayDistanceBackwardCpu(
+    const at::Tensor& points,
+    const at::Tensor& segms,
+    const at::Tensor& grad_dists) {
+  return PointHullArrayDistanceBackwardCpu<2>(points, segms, grad_dists, 1);
+}
+
+std::tuple<torch::Tensor, torch::Tensor> PointEdgeDistanceForwardCpu(
+    const torch::Tensor& points,
+    const torch::Tensor& points_first_idx,
+    const torch::Tensor& segms,
+    const torch::Tensor& segms_first_idx,
+    const int64_t /*max_points*/) {
+  return HullHullDistanceForwardCpu<1, 2>(
+      points, points_first_idx, segms, segms_first_idx, 1);
+}
+
+std::tuple<torch::Tensor, torch::Tensor> PointEdgeDistanceBackwardCpu(
+    const torch::Tensor& points,
+    const torch::Tensor& segms,
+    const torch::Tensor& idx_points,
+    const torch::Tensor& grad_dists) {
+  return HullHullDistanceBackwardCpu<1, 2>(
+      points, segms, idx_points, grad_dists, 1);
+}
+
+std::tuple<torch::Tensor, torch::Tensor> EdgePointDistanceForwardCpu(
+    const torch::Tensor& points,
+    const torch::Tensor& points_first_idx,
+    const torch::Tensor& segms,
+    const torch::Tensor& segms_first_idx,
+    const int64_t /*max_segms*/) {
+  return HullHullDistanceForwardCpu<2, 1>(
+      segms, segms_first_idx, points, points_first_idx, 1);
+}
+
+std::tuple<torch::Tensor, torch::Tensor> EdgePointDistanceBackwardCpu(
+    const torch::Tensor& points,
+    const torch::Tensor& segms,
+    const torch::Tensor& idx_segms,
+    const torch::Tensor& grad_dists) {
+  auto res = HullHullDistanceBackwardCpu<2, 1>(
+      segms, points, idx_segms, grad_dists, 1);
+  return std::make_tuple(std::get<1>(res), std::get<0>(res));
+}
diff --git a/pytorch3d/pytorch3d/csrc/point_mesh/point_mesh_cuda.cu b/pytorch3d/pytorch3d/csrc/point_mesh/point_mesh_cuda.cu
new file mode 100644
index 0000000000000000000000000000000000000000..3788d4055136e854f9415fef4e1233cfe23cfc86
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/point_mesh/point_mesh_cuda.cu
@@ -0,0 +1,833 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <ATen/ATen.h>
+#include <ATen/cuda/CUDAContext.h>
+#include <c10/cuda/CUDAGuard.h>
+#include <algorithm>
+#include <list>
+#include <queue>
+#include <tuple>
+#include "utils/float_math.cuh"
+#include "utils/geometry_utils.cuh"
+#include "utils/warp_reduce.cuh"
+
+// ****************************************************************************
+// *                   Generic Forward/Backward Kernels                       *
+// ****************************************************************************
+
+__global__ void DistanceForwardKernel(
+    const float* __restrict__ objects, // (O * oD * 3)
+    const size_t objects_size, // O
+    const size_t objects_dim, // oD
+    const float* __restrict__ targets, // (T * tD * 3)
+    const size_t targets_size, // T
+    const size_t targets_dim, // tD
+    const int64_t* __restrict__ objects_first_idx, // (B,)
+    const int64_t* __restrict__ targets_first_idx, // (B,)
+    const size_t batch_size, // B
+    float* __restrict__ dist_objects, // (O,)
+    int64_t* __restrict__ idx_objects, // (O,)
+    const double min_triangle_area) {
+  // This kernel is used interchangeably to compute bi-directional distances
+  // between points and triangles/lines. The direction of the distance computed,
+  // i.e. point to triangle/line or triangle/line to point, depends on the order
+  // of the input arguments and is inferred based on their shape. The shape is
+  // used to distinguish between triangles and lines
+
+  // Single shared memory buffer which is split and cast to different types.
+  extern __shared__ char shared_buf[];
+  float* min_dists = (float*)shared_buf; // float[NUM_THREADS]
+  int64_t* min_idxs = (int64_t*)&min_dists[blockDim.x]; // int64_t[NUM_THREADS]
+
+  const size_t batch_idx = blockIdx.y; // index of batch element.
+
+  // start and end for objects in batch_idx
+  const int64_t starto = objects_first_idx[batch_idx];
+  const int64_t endo = batch_idx + 1 < batch_size
+      ? objects_first_idx[batch_idx + 1]
+      : objects_size;
+
+  // start and end for targets in batch_idx
+  const int64_t startt = targets_first_idx[batch_idx];
+  const int64_t endt = batch_idx + 1 < batch_size
+      ? targets_first_idx[batch_idx + 1]
+      : targets_size;
+
+  const size_t i = blockIdx.x; // index within batch element.
+  const size_t tid = threadIdx.x; // thread index
+
+  // Set references to points/face based on which of objects/targets refer to
+  // points/faces
+  float3* points_f3 = objects_dim == 1 ? (float3*)objects : (float3*)targets;
+  float3* face_f3 = objects_dim == 1 ? (float3*)targets : (float3*)objects;
+  // Distinguishes whether we're computing distance against triangle vs edge
+  bool isTriangle = objects_dim == 3 || targets_dim == 3;
+
+  // Each block will compute one element of the output idx_objects[starto + i],
+  // dist_objects[starto + i]. Within the block we will use threads to compute
+  // the distances between objects[starto + i] and targets[j] for all j
+  // belonging in the same batch as i, i.e. j in [startt, endt]. Then use a
+  // block reduction to take an argmin of the distances.
+
+  // If i exceeds the number of objects in batch_idx, then do nothing
+  if (i < (endo - starto)) {
+    // Compute the distances between objects[starto + i] and targets[j] for
+    // all j belonging in the same batch as i, i.e. j in [startt, endt].
+    // Here each thread will reduce over (endt-startt) / blockDim.x in serial,
+    // and store its result to shared memory
+    float min_dist = FLT_MAX;
+    size_t min_idx = 0;
+    for (size_t j = tid; j < (endt - startt); j += blockDim.x) {
+      size_t point_idx = objects_dim == 1 ? starto + i : startt + j;
+      size_t face_idx = objects_dim == 1 ? (startt + j) * targets_dim
+                                         : (starto + i) * objects_dim;
+
+      float dist;
+      if (isTriangle) {
+        dist = PointTriangle3DistanceForward(
+            points_f3[point_idx],
+            face_f3[face_idx],
+            face_f3[face_idx + 1],
+            face_f3[face_idx + 2],
+            min_triangle_area);
+      } else {
+        dist = PointLine3DistanceForward(
+            points_f3[point_idx], face_f3[face_idx], face_f3[face_idx + 1]);
+      }
+
+      min_dist = (j == tid) ? dist : min_dist;
+      min_idx = (dist <= min_dist) ? (startt + j) : min_idx;
+      min_dist = (dist <= min_dist) ? dist : min_dist;
+    }
+    min_dists[tid] = min_dist;
+    min_idxs[tid] = min_idx;
+    __syncthreads();
+
+    // Perform reduction in shared memory.
+    for (int s = blockDim.x / 2; s > 32; s >>= 1) {
+      if (tid < s) {
+        if (min_dists[tid] > min_dists[tid + s]) {
+          min_dists[tid] = min_dists[tid + s];
+          min_idxs[tid] = min_idxs[tid + s];
+        }
+      }
+      __syncthreads();
+    }
+
+    // Unroll the last 6 iterations of the loop since they will happen
+    // synchronized within a single warp.
+    if (tid < 32)
+      WarpReduceMin<float>(min_dists, min_idxs, tid);
+
+    // Finally thread 0 writes the result to the output buffer.
+    if (tid == 0) {
+      idx_objects[starto + i] = min_idxs[0];
+      dist_objects[starto + i] = min_dists[0];
+    }
+  }
+}
+
+std::tuple<at::Tensor, at::Tensor> DistanceForwardCuda(
+    const at::Tensor& objects,
+    const size_t objects_dim,
+    const at::Tensor& objects_first_idx,
+    const at::Tensor& targets,
+    const size_t targets_dim,
+    const at::Tensor& targets_first_idx,
+    const int64_t max_objects,
+    const double min_triangle_area) {
+  // Check inputs are on the same device
+  at::TensorArg objects_t{objects, "objects", 1},
+      objects_first_idx_t{objects_first_idx, "objects_first_idx", 2},
+      targets_t{targets, "targets", 3},
+      targets_first_idx_t{targets_first_idx, "targets_first_idx", 4};
+  at::CheckedFrom c = "DistanceForwardCuda";
+  at::checkAllSameGPU(
+      c, {objects_t, objects_first_idx_t, targets_t, targets_first_idx_t});
+  at::checkAllSameType(c, {objects_t, targets_t});
+
+  // Set the device for the kernel launch based on the device of the input
+  at::cuda::CUDAGuard device_guard(objects.device());
+  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+
+  const int64_t objects_size = objects.size(0);
+  const int64_t targets_size = targets.size(0);
+  const int64_t batch_size = objects_first_idx.size(0);
+
+  TORCH_CHECK(targets_first_idx.size(0) == batch_size);
+  if (objects_dim == 1) {
+    TORCH_CHECK(
+        targets_dim >= 2 && targets_dim <= 3,
+        "either object or target must be edge or face");
+    TORCH_CHECK(objects.size(1) == 3, "points must be of shape Px3");
+    TORCH_CHECK(
+        targets.size(2) == 3,
+        "face must be of shape Tx3x3, lines must be of shape Tx2x3");
+  } else {
+    TORCH_CHECK(targets_dim == 1, "either object or target must be point");
+    TORCH_CHECK(
+        objects_dim >= 2 && objects_dim <= 3,
+        "either object or target must be edge or face");
+    TORCH_CHECK(targets.size(1) == 3, "points must be of shape Px3");
+    TORCH_CHECK(
+        objects.size(2) == 3,
+        "face must be of shape Tx3x3, lines must be of shape Tx2x3");
+  }
+
+  // clang-format off
+  at::Tensor dists = at::zeros({objects_size,}, objects.options());
+  at::Tensor idxs = at::zeros({objects_size,}, objects_first_idx.options());
+  // clang-format on
+
+  if (dists.numel() == 0) {
+    AT_CUDA_CHECK(cudaGetLastError());
+    return std::make_tuple(dists, idxs);
+  }
+
+  const int threads = 128;
+  const dim3 blocks(max_objects, batch_size);
+  size_t shared_size = threads * sizeof(size_t) + threads * sizeof(int64_t);
+
+  DistanceForwardKernel<<<blocks, threads, shared_size, stream>>>(
+      objects.contiguous().data_ptr<float>(),
+      objects_size,
+      objects_dim,
+      targets.contiguous().data_ptr<float>(),
+      targets_size,
+      targets_dim,
+      objects_first_idx.contiguous().data_ptr<int64_t>(),
+      targets_first_idx.contiguous().data_ptr<int64_t>(),
+      batch_size,
+      dists.data_ptr<float>(),
+      idxs.data_ptr<int64_t>(),
+      min_triangle_area);
+
+  AT_CUDA_CHECK(cudaGetLastError());
+  return std::make_tuple(dists, idxs);
+}
+
+__global__ void DistanceBackwardKernel(
+    const float* __restrict__ objects, // (O * oD * 3)
+    const size_t objects_size, // O
+    const size_t objects_dim, // oD
+    const float* __restrict__ targets, // (T * tD * 3)
+    const size_t targets_dim, // tD
+    const int64_t* __restrict__ idx_objects, // (O,)
+    const float* __restrict__ grad_dists, // (O,)
+    float* __restrict__ grad_points, // ((O or T) * 3)
+    float* __restrict__ grad_face, // ((O or T) * max(oD, tD) * 3)
+    const double min_triangle_area) {
+  // This kernel is used interchangeably to compute bi-directional backward
+  // distances between points and triangles/lines. The direction of the distance
+  // computed, i.e. point to triangle/line or triangle/line to point, depends on
+  // the order of the input arguments and is inferred based on their shape. The
+  // shape is used to distinguish between triangles and lines. Note that
+  // grad_points will always be used for the point data and grad_face for the
+  // edge/triangle
+
+  // Set references to points/face based on whether objects/targets are which
+  float3* points_f3 = objects_dim == 1 ? (float3*)objects : (float3*)targets;
+  float3* face_f3 = objects_dim == 1 ? (float3*)targets : (float3*)objects;
+
+  const size_t tid = blockIdx.x * blockDim.x + threadIdx.x;
+  const size_t stride = gridDim.x * blockDim.x;
+
+  for (size_t o = tid; o < objects_size; o += stride) {
+    const int64_t tidx = idx_objects[o];
+
+    size_t point_index = objects_dim == 1 ? o : tidx;
+    size_t face_index = objects_dim == 1 ? tidx * targets_dim : o * objects_dim;
+    bool isTriangle = objects_dim == 3 || targets_dim == 3;
+
+    float3 grad_point, grad_v0, grad_v1, grad_v2;
+    if (isTriangle) {
+      const auto grads = PointTriangle3DistanceBackward(
+          points_f3[point_index],
+          face_f3[face_index],
+          face_f3[face_index + 1],
+          face_f3[face_index + 2],
+          grad_dists[o],
+          min_triangle_area);
+      grad_point = thrust::get<0>(grads);
+      grad_v0 = thrust::get<1>(grads);
+      grad_v1 = thrust::get<2>(grads);
+      grad_v2 = thrust::get<3>(grads);
+    } else {
+      const auto grads = PointLine3DistanceBackward(
+          points_f3[point_index],
+          face_f3[face_index],
+          face_f3[face_index + 1],
+          grad_dists[o]);
+      grad_point = thrust::get<0>(grads);
+      grad_v0 = thrust::get<1>(grads);
+      grad_v1 = thrust::get<2>(grads);
+    }
+
+    atomicAdd(grad_points + point_index * 3 + 0, grad_point.x);
+    atomicAdd(grad_points + point_index * 3 + 1, grad_point.y);
+    atomicAdd(grad_points + point_index * 3 + 2, grad_point.z);
+
+    atomicAdd(grad_face + face_index * 3 + 0 * 3 + 0, grad_v0.x);
+    atomicAdd(grad_face + face_index * 3 + 0 * 3 + 1, grad_v0.y);
+    atomicAdd(grad_face + face_index * 3 + 0 * 3 + 2, grad_v0.z);
+
+    atomicAdd(grad_face + face_index * 3 + 1 * 3 + 0, grad_v1.x);
+    atomicAdd(grad_face + face_index * 3 + 1 * 3 + 1, grad_v1.y);
+    atomicAdd(grad_face + face_index * 3 + 1 * 3 + 2, grad_v1.z);
+
+    if (isTriangle) {
+      atomicAdd(grad_face + face_index * 3 + 2 * 3 + 0, grad_v2.x);
+      atomicAdd(grad_face + face_index * 3 + 2 * 3 + 1, grad_v2.y);
+      atomicAdd(grad_face + face_index * 3 + 2 * 3 + 2, grad_v2.z);
+    }
+  }
+}
+
+std::tuple<at::Tensor, at::Tensor> DistanceBackwardCuda(
+    const at::Tensor& objects,
+    const size_t objects_dim,
+    const at::Tensor& targets,
+    const size_t targets_dim,
+    const at::Tensor& idx_objects,
+    const at::Tensor& grad_dists,
+    const double min_triangle_area) {
+  // Check inputs are on the same device
+  at::TensorArg objects_t{objects, "objects", 1},
+      targets_t{targets, "targets", 2},
+      idx_objects_t{idx_objects, "idx_objects", 3},
+      grad_dists_t{grad_dists, "grad_dists", 4};
+  at::CheckedFrom c = "DistanceBackwardCuda";
+  at::checkAllSameGPU(c, {objects_t, targets_t, idx_objects_t, grad_dists_t});
+  at::checkAllSameType(c, {objects_t, targets_t, grad_dists_t});
+  // This is nondeterministic because atomicAdd
+  at::globalContext().alertNotDeterministic("DistanceBackwardCuda");
+
+  // Set the device for the kernel launch based on the device of the input
+  at::cuda::CUDAGuard device_guard(objects.device());
+  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+
+  const int64_t objects_size = objects.size(0);
+  const int64_t targets_size = targets.size(0);
+
+  at::Tensor grad_points;
+  at::Tensor grad_tris;
+
+  TORCH_CHECK(idx_objects.size(0) == objects_size);
+  TORCH_CHECK(grad_dists.size(0) == objects_size);
+  if (objects_dim == 1) {
+    TORCH_CHECK(
+        targets_dim >= 2 && targets_dim <= 3,
+        "either object or target must be edge or face");
+    TORCH_CHECK(objects.size(1) == 3, "points must be of shape Px3");
+    TORCH_CHECK(
+        targets.size(2) == 3,
+        "face must be of shape Tx3x3, lines must be of shape Tx2x3");
+    // clang-format off
+    grad_points = at::zeros({objects_size, 3}, objects.options());
+    grad_tris = at::zeros({targets_size, int64_t(targets_dim), 3}, targets.options());
+    // clang-format on
+  } else {
+    TORCH_CHECK(targets_dim == 1, "either object or target must be point");
+    TORCH_CHECK(
+        objects_dim >= 2 && objects_dim <= 3,
+        "either object or target must be edge or face");
+    TORCH_CHECK(targets.size(1) == 3, "points must be of shape Px3");
+    TORCH_CHECK(
+        objects.size(2) == 3,
+        "face must be of shape Tx3x3, lines must be of shape Tx2x3");
+    // clang-format off
+    grad_points = at::zeros({targets_size, 3}, targets.options());
+    grad_tris = at::zeros({objects_size, int64_t(objects_dim), 3}, objects.options());
+    // clang-format on
+  }
+
+  if (grad_points.numel() == 0 || grad_tris.numel() == 0) {
+    AT_CUDA_CHECK(cudaGetLastError());
+    return std::make_tuple(grad_points, grad_tris);
+  }
+
+  const int blocks = 64;
+  const int threads = 512;
+
+  DistanceBackwardKernel<<<blocks, threads, 0, stream>>>(
+      objects.contiguous().data_ptr<float>(),
+      objects_size,
+      objects_dim,
+      targets.contiguous().data_ptr<float>(),
+      targets_dim,
+      idx_objects.contiguous().data_ptr<int64_t>(),
+      grad_dists.contiguous().data_ptr<float>(),
+      grad_points.data_ptr<float>(),
+      grad_tris.data_ptr<float>(),
+      min_triangle_area);
+
+  AT_CUDA_CHECK(cudaGetLastError());
+  return std::make_tuple(grad_points, grad_tris);
+}
+
+// ****************************************************************************
+// *                          PointFaceDistance                               *
+// ****************************************************************************
+
+std::tuple<at::Tensor, at::Tensor> PointFaceDistanceForwardCuda(
+    const at::Tensor& points,
+    const at::Tensor& points_first_idx,
+    const at::Tensor& tris,
+    const at::Tensor& tris_first_idx,
+    const int64_t max_points,
+    const double min_triangle_area) {
+  return DistanceForwardCuda(
+      points,
+      1,
+      points_first_idx,
+      tris,
+      3,
+      tris_first_idx,
+      max_points,
+      min_triangle_area);
+}
+
+std::tuple<at::Tensor, at::Tensor> PointFaceDistanceBackwardCuda(
+    const at::Tensor& points,
+    const at::Tensor& tris,
+    const at::Tensor& idx_points,
+    const at::Tensor& grad_dists,
+    const double min_triangle_area) {
+  return DistanceBackwardCuda(
+      points, 1, tris, 3, idx_points, grad_dists, min_triangle_area);
+}
+
+// ****************************************************************************
+// *                          FacePointDistance                               *
+// ****************************************************************************
+
+std::tuple<at::Tensor, at::Tensor> FacePointDistanceForwardCuda(
+    const at::Tensor& points,
+    const at::Tensor& points_first_idx,
+    const at::Tensor& tris,
+    const at::Tensor& tris_first_idx,
+    const int64_t max_tris,
+    const double min_triangle_area) {
+  return DistanceForwardCuda(
+      tris,
+      3,
+      tris_first_idx,
+      points,
+      1,
+      points_first_idx,
+      max_tris,
+      min_triangle_area);
+}
+
+std::tuple<at::Tensor, at::Tensor> FacePointDistanceBackwardCuda(
+    const at::Tensor& points,
+    const at::Tensor& tris,
+    const at::Tensor& idx_tris,
+    const at::Tensor& grad_dists,
+    const double min_triangle_area) {
+  return DistanceBackwardCuda(
+      tris, 3, points, 1, idx_tris, grad_dists, min_triangle_area);
+}
+
+// ****************************************************************************
+// *                          PointEdgeDistance                               *
+// ****************************************************************************
+
+std::tuple<at::Tensor, at::Tensor> PointEdgeDistanceForwardCuda(
+    const at::Tensor& points,
+    const at::Tensor& points_first_idx,
+    const at::Tensor& segms,
+    const at::Tensor& segms_first_idx,
+    const int64_t max_points) {
+  return DistanceForwardCuda(
+      points,
+      1,
+      points_first_idx,
+      segms,
+      2,
+      segms_first_idx,
+      max_points,
+      1); // todo: unused parameter handling for min_triangle_area
+}
+
+std::tuple<at::Tensor, at::Tensor> PointEdgeDistanceBackwardCuda(
+    const at::Tensor& points,
+    const at::Tensor& segms,
+    const at::Tensor& idx_points,
+    const at::Tensor& grad_dists) {
+  return DistanceBackwardCuda(points, 1, segms, 2, idx_points, grad_dists, 1);
+}
+
+// ****************************************************************************
+// *                          EdgePointDistance                               *
+// ****************************************************************************
+
+std::tuple<at::Tensor, at::Tensor> EdgePointDistanceForwardCuda(
+    const at::Tensor& points,
+    const at::Tensor& points_first_idx,
+    const at::Tensor& segms,
+    const at::Tensor& segms_first_idx,
+    const int64_t max_segms) {
+  return DistanceForwardCuda(
+      segms, 2, segms_first_idx, points, 1, points_first_idx, max_segms, 1);
+}
+
+std::tuple<at::Tensor, at::Tensor> EdgePointDistanceBackwardCuda(
+    const at::Tensor& points,
+    const at::Tensor& segms,
+    const at::Tensor& idx_segms,
+    const at::Tensor& grad_dists) {
+  return DistanceBackwardCuda(segms, 2, points, 1, idx_segms, grad_dists, 1);
+}
+
+// ****************************************************************************
+// *                     PointFaceArrayDistance                               *
+// ****************************************************************************
+// TODO: Create wrapper function and merge kernel with other array kernel
+
+__global__ void PointFaceArrayForwardKernel(
+    const float* __restrict__ points, // (P, 3)
+    const float* __restrict__ tris, // (T, 3, 3)
+    float* __restrict__ dists, // (P, T)
+    const size_t P,
+    const size_t T,
+    const double min_triangle_area) {
+  const float3* points_f3 = (float3*)points;
+  const float3* tris_f3 = (float3*)tris;
+
+  // Parallelize over P * S computations
+  const int num_threads = gridDim.x * blockDim.x;
+  const int tid = blockIdx.x * blockDim.x + threadIdx.x;
+
+  for (int t_i = tid; t_i < P * T; t_i += num_threads) {
+    const int t = t_i / P; // segment index.
+    const int p = t_i % P; // point index
+    const float3 v0 = tris_f3[t * 3 + 0];
+    const float3 v1 = tris_f3[t * 3 + 1];
+    const float3 v2 = tris_f3[t * 3 + 2];
+
+    const float3 point = points_f3[p];
+    float dist =
+        PointTriangle3DistanceForward(point, v0, v1, v2, min_triangle_area);
+    dists[p * T + t] = dist;
+  }
+}
+
+at::Tensor PointFaceArrayDistanceForwardCuda(
+    const at::Tensor& points,
+    const at::Tensor& tris,
+    const double min_triangle_area) {
+  // Check inputs are on the same device
+  at::TensorArg points_t{points, "points", 1}, tris_t{tris, "tris", 2};
+  at::CheckedFrom c = "PointFaceArrayDistanceForwardCuda";
+  at::checkAllSameGPU(c, {points_t, tris_t});
+  at::checkAllSameType(c, {points_t, tris_t});
+
+  // Set the device for the kernel launch based on the device of the input
+  at::cuda::CUDAGuard device_guard(points.device());
+  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+
+  const int64_t P = points.size(0);
+  const int64_t T = tris.size(0);
+
+  TORCH_CHECK(points.size(1) == 3, "points must be of shape Px3");
+  TORCH_CHECK(
+      (tris.size(1) == 3) && (tris.size(2) == 3),
+      "tris must be of shape Tx3x3");
+
+  at::Tensor dists = at::zeros({P, T}, points.options());
+
+  if (dists.numel() == 0) {
+    AT_CUDA_CHECK(cudaGetLastError());
+    return dists;
+  }
+
+  const size_t blocks = 1024;
+  const size_t threads = 64;
+
+  PointFaceArrayForwardKernel<<<blocks, threads, 0, stream>>>(
+      points.contiguous().data_ptr<float>(),
+      tris.contiguous().data_ptr<float>(),
+      dists.data_ptr<float>(),
+      P,
+      T,
+      min_triangle_area);
+
+  AT_CUDA_CHECK(cudaGetLastError());
+  return dists;
+}
+
+__global__ void PointFaceArrayBackwardKernel(
+    const float* __restrict__ points, // (P, 3)
+    const float* __restrict__ tris, // (T, 3, 3)
+    const float* __restrict__ grad_dists, // (P, T)
+    float* __restrict__ grad_points, // (P, 3)
+    float* __restrict__ grad_tris, // (T, 3, 3)
+    const size_t P,
+    const size_t T,
+    const double min_triangle_area) {
+  const float3* points_f3 = (float3*)points;
+  const float3* tris_f3 = (float3*)tris;
+
+  // Parallelize over P * S computations
+  const int num_threads = gridDim.x * blockDim.x;
+  const int tid = blockIdx.x * blockDim.x + threadIdx.x;
+
+  for (int t_i = tid; t_i < P * T; t_i += num_threads) {
+    const int t = t_i / P; // triangle index.
+    const int p = t_i % P; // point index
+    const float3 v0 = tris_f3[t * 3 + 0];
+    const float3 v1 = tris_f3[t * 3 + 1];
+    const float3 v2 = tris_f3[t * 3 + 2];
+
+    const float3 point = points_f3[p];
+
+    const float grad_dist = grad_dists[p * T + t];
+    const auto grad = PointTriangle3DistanceBackward(
+        point, v0, v1, v2, grad_dist, min_triangle_area);
+
+    const float3 grad_point = thrust::get<0>(grad);
+    const float3 grad_v0 = thrust::get<1>(grad);
+    const float3 grad_v1 = thrust::get<2>(grad);
+    const float3 grad_v2 = thrust::get<3>(grad);
+
+    atomicAdd(grad_points + 3 * p + 0, grad_point.x);
+    atomicAdd(grad_points + 3 * p + 1, grad_point.y);
+    atomicAdd(grad_points + 3 * p + 2, grad_point.z);
+
+    atomicAdd(grad_tris + t * 3 * 3 + 0 * 3 + 0, grad_v0.x);
+    atomicAdd(grad_tris + t * 3 * 3 + 0 * 3 + 1, grad_v0.y);
+    atomicAdd(grad_tris + t * 3 * 3 + 0 * 3 + 2, grad_v0.z);
+
+    atomicAdd(grad_tris + t * 3 * 3 + 1 * 3 + 0, grad_v1.x);
+    atomicAdd(grad_tris + t * 3 * 3 + 1 * 3 + 1, grad_v1.y);
+    atomicAdd(grad_tris + t * 3 * 3 + 1 * 3 + 2, grad_v1.z);
+
+    atomicAdd(grad_tris + t * 3 * 3 + 2 * 3 + 0, grad_v2.x);
+    atomicAdd(grad_tris + t * 3 * 3 + 2 * 3 + 1, grad_v2.y);
+    atomicAdd(grad_tris + t * 3 * 3 + 2 * 3 + 2, grad_v2.z);
+  }
+}
+
+std::tuple<at::Tensor, at::Tensor> PointFaceArrayDistanceBackwardCuda(
+    const at::Tensor& points,
+    const at::Tensor& tris,
+    const at::Tensor& grad_dists,
+    const double min_triangle_area) {
+  // Check inputs are on the same device
+  at::TensorArg points_t{points, "points", 1}, tris_t{tris, "tris", 2},
+      grad_dists_t{grad_dists, "grad_dists", 3};
+  at::CheckedFrom c = "PointFaceArrayDistanceBackwardCuda";
+  at::checkAllSameGPU(c, {points_t, tris_t, grad_dists_t});
+  at::checkAllSameType(c, {points_t, tris_t, grad_dists_t});
+  // This is nondeterministic because atomicAdd
+  at::globalContext().alertNotDeterministic(
+      "PointFaceArrayDistanceBackwardCuda");
+
+  // Set the device for the kernel launch based on the device of the input
+  at::cuda::CUDAGuard device_guard(points.device());
+  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+
+  const int64_t P = points.size(0);
+  const int64_t T = tris.size(0);
+
+  TORCH_CHECK(points.size(1) == 3, "points must be of shape Px3");
+  TORCH_CHECK(
+      (tris.size(1) == 3) && (tris.size(2) == 3),
+      "tris must be of shape Tx3x3");
+  TORCH_CHECK((grad_dists.size(0) == P) && (grad_dists.size(1) == T));
+
+  at::Tensor grad_points = at::zeros({P, 3}, points.options());
+  at::Tensor grad_tris = at::zeros({T, 3, 3}, tris.options());
+
+  if (grad_points.numel() == 0 || grad_tris.numel() == 0) {
+    AT_CUDA_CHECK(cudaGetLastError());
+    return std::make_tuple(grad_points, grad_tris);
+  }
+
+  const size_t blocks = 1024;
+  const size_t threads = 64;
+
+  PointFaceArrayBackwardKernel<<<blocks, threads, 0, stream>>>(
+      points.contiguous().data_ptr<float>(),
+      tris.contiguous().data_ptr<float>(),
+      grad_dists.contiguous().data_ptr<float>(),
+      grad_points.data_ptr<float>(),
+      grad_tris.data_ptr<float>(),
+      P,
+      T,
+      min_triangle_area);
+
+  AT_CUDA_CHECK(cudaGetLastError());
+  return std::make_tuple(grad_points, grad_tris);
+}
+
+// ****************************************************************************
+// *                     PointEdgeArrayDistance                               *
+// ****************************************************************************
+// TODO: Create wrapper function and merge kernel with other array kernel
+
+__global__ void PointEdgeArrayForwardKernel(
+    const float* __restrict__ points, // (P, 3)
+    const float* __restrict__ segms, // (S, 2, 3)
+    float* __restrict__ dists, // (P, S)
+    const size_t P,
+    const size_t S) {
+  float3* points_f3 = (float3*)points;
+  float3* segms_f3 = (float3*)segms;
+
+  // Parallelize over P * S computations
+  const int num_threads = gridDim.x * blockDim.x;
+  const int tid = blockIdx.x * blockDim.x + threadIdx.x;
+
+  for (int t_i = tid; t_i < P * S; t_i += num_threads) {
+    const int s = t_i / P; // segment index.
+    const int p = t_i % P; // point index
+    float3 a = segms_f3[s * 2 + 0];
+    float3 b = segms_f3[s * 2 + 1];
+
+    float3 point = points_f3[p];
+    float dist = PointLine3DistanceForward(point, a, b);
+    dists[p * S + s] = dist;
+  }
+}
+
+at::Tensor PointEdgeArrayDistanceForwardCuda(
+    const at::Tensor& points,
+    const at::Tensor& segms) {
+  // Check inputs are on the same device
+  at::TensorArg points_t{points, "points", 1}, segms_t{segms, "segms", 2};
+  at::CheckedFrom c = "PointEdgeArrayDistanceForwardCuda";
+  at::checkAllSameGPU(c, {points_t, segms_t});
+  at::checkAllSameType(c, {points_t, segms_t});
+
+  // Set the device for the kernel launch based on the device of the input
+  at::cuda::CUDAGuard device_guard(points.device());
+  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+
+  const int64_t P = points.size(0);
+  const int64_t S = segms.size(0);
+
+  TORCH_CHECK(points.size(1) == 3, "points must be of shape Px3");
+  TORCH_CHECK(
+      (segms.size(1) == 2) && (segms.size(2) == 3),
+      "segms must be of shape Sx2x3");
+
+  at::Tensor dists = at::zeros({P, S}, points.options());
+
+  if (dists.numel() == 0) {
+    AT_CUDA_CHECK(cudaGetLastError());
+    return dists;
+  }
+
+  const size_t blocks = 1024;
+  const size_t threads = 64;
+
+  PointEdgeArrayForwardKernel<<<blocks, threads, 0, stream>>>(
+      points.contiguous().data_ptr<float>(),
+      segms.contiguous().data_ptr<float>(),
+      dists.data_ptr<float>(),
+      P,
+      S);
+
+  AT_CUDA_CHECK(cudaGetLastError());
+  return dists;
+}
+
+__global__ void PointEdgeArrayBackwardKernel(
+    const float* __restrict__ points, // (P, 3)
+    const float* __restrict__ segms, // (S, 2, 3)
+    const float* __restrict__ grad_dists, // (P, S)
+    float* __restrict__ grad_points, // (P, 3)
+    float* __restrict__ grad_segms, // (S, 2, 3)
+    const size_t P,
+    const size_t S) {
+  float3* points_f3 = (float3*)points;
+  float3* segms_f3 = (float3*)segms;
+
+  // Parallelize over P * S computations
+  const int num_threads = gridDim.x * blockDim.x;
+  const int tid = blockIdx.x * blockDim.x + threadIdx.x;
+
+  for (int t_i = tid; t_i < P * S; t_i += num_threads) {
+    const int s = t_i / P; // segment index.
+    const int p = t_i % P; // point index
+    const float3 a = segms_f3[s * 2 + 0];
+    const float3 b = segms_f3[s * 2 + 1];
+
+    const float3 point = points_f3[p];
+    const float grad_dist = grad_dists[p * S + s];
+    const auto grads = PointLine3DistanceBackward(point, a, b, grad_dist);
+    const float3 grad_point = thrust::get<0>(grads);
+    const float3 grad_a = thrust::get<1>(grads);
+    const float3 grad_b = thrust::get<2>(grads);
+
+    atomicAdd(grad_points + p * 3 + 0, grad_point.x);
+    atomicAdd(grad_points + p * 3 + 1, grad_point.y);
+    atomicAdd(grad_points + p * 3 + 2, grad_point.z);
+
+    atomicAdd(grad_segms + s * 2 * 3 + 0 * 3 + 0, grad_a.x);
+    atomicAdd(grad_segms + s * 2 * 3 + 0 * 3 + 1, grad_a.y);
+    atomicAdd(grad_segms + s * 2 * 3 + 0 * 3 + 2, grad_a.z);
+
+    atomicAdd(grad_segms + s * 2 * 3 + 1 * 3 + 0, grad_b.x);
+    atomicAdd(grad_segms + s * 2 * 3 + 1 * 3 + 1, grad_b.y);
+    atomicAdd(grad_segms + s * 2 * 3 + 1 * 3 + 2, grad_b.z);
+  }
+}
+
+std::tuple<at::Tensor, at::Tensor> PointEdgeArrayDistanceBackwardCuda(
+    const at::Tensor& points,
+    const at::Tensor& segms,
+    const at::Tensor& grad_dists) {
+  // Check inputs are on the same device
+  at::TensorArg points_t{points, "points", 1}, segms_t{segms, "segms", 2},
+      grad_dists_t{grad_dists, "grad_dists", 3};
+  at::CheckedFrom c = "PointEdgeArrayDistanceBackwardCuda";
+  at::checkAllSameGPU(c, {points_t, segms_t, grad_dists_t});
+  at::checkAllSameType(c, {points_t, segms_t, grad_dists_t});
+  // This is nondeterministic because atomicAdd
+  at::globalContext().alertNotDeterministic(
+      "PointEdgeArrayDistanceBackwardCuda");
+
+  // Set the device for the kernel launch based on the device of the input
+  at::cuda::CUDAGuard device_guard(points.device());
+  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+
+  const int64_t P = points.size(0);
+  const int64_t S = segms.size(0);
+
+  TORCH_CHECK(points.size(1) == 3, "points must be of shape Px3");
+  TORCH_CHECK(
+      (segms.size(1) == 2) && (segms.size(2) == 3),
+      "segms must be of shape Sx2x3");
+  TORCH_CHECK((grad_dists.size(0) == P) && (grad_dists.size(1) == S));
+
+  at::Tensor grad_points = at::zeros({P, 3}, points.options());
+  at::Tensor grad_segms = at::zeros({S, 2, 3}, segms.options());
+
+  if (grad_points.numel() == 0 || grad_segms.numel() == 0) {
+    AT_CUDA_CHECK(cudaGetLastError());
+    return std::make_tuple(grad_points, grad_segms);
+  }
+
+  const size_t blocks = 1024;
+  const size_t threads = 64;
+
+  PointEdgeArrayBackwardKernel<<<blocks, threads, 0, stream>>>(
+      points.contiguous().data_ptr<float>(),
+      segms.contiguous().data_ptr<float>(),
+      grad_dists.contiguous().data_ptr<float>(),
+      grad_points.data_ptr<float>(),
+      grad_segms.data_ptr<float>(),
+      P,
+      S);
+  AT_CUDA_CHECK(cudaGetLastError());
+  return std::make_tuple(grad_points, grad_segms);
+}
diff --git a/pytorch3d/pytorch3d/csrc/point_mesh/point_mesh_cuda.h b/pytorch3d/pytorch3d/csrc/point_mesh/point_mesh_cuda.h
new file mode 100644
index 0000000000000000000000000000000000000000..529dd5604c2ff4a4b84c590611adc4ef83edce4d
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/point_mesh/point_mesh_cuda.h
@@ -0,0 +1,707 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+#include <torch/extension.h>
+#include <cstdio>
+#include <tuple>
+#include "utils/pytorch3d_cutils.h"
+
+// ****************************************************************************
+// *                      PointFaceDistance                                   *
+// ****************************************************************************
+
+// Computes the squared euclidean distance of each p in points to it closest
+// triangular face belonging to the corresponding mesh example in the batch of
+// size N.
+//
+// Args:
+//    points: FloatTensor of shape (P, 3)
+//    points_first_idx: LongTensor of shape (N,) indicating the first point
+//        index for each example in the batch
+//    tris: FloatTensor of shape (T, 3, 3) of the triangular faces. The t-th
+//        triangular face is spanned by (tris[t, 0], tris[t, 1], tris[t, 2])
+//    tris_first_idx: LongTensor of shape (N,) indicating the first face
+//        index for each example in the batch
+//    max_points: Scalar equal to max(P_i) for i in [0, N - 1] containing
+//        the maximum number of points in the batch and is used to set
+//        the block dimensions in the CUDA implementation.
+//     min_triangle_area: triangles less than this size are considered
+//     points/lines.
+//
+// Returns:
+//    dists: FloatTensor of shape (P,), where dists[p] is the minimum
+//        squared euclidean distance of points[p] to the faces in the same
+//        example in the batch.
+//    idxs: LongTensor of shape (P,), where idxs[p] is the index of the closest
+//        face in the batch.
+//        So, dists[p] = d(points[p], tris[idxs[p], 0], tris[idxs[p], 1],
+//        tris[idxs[p], 2]) where d(u, v0, v1, v2) is the distance of u from the
+//        face spanned by (v0, v1, v2)
+//
+//
+
+#ifdef WITH_CUDA
+
+std::tuple<torch::Tensor, torch::Tensor> PointFaceDistanceForwardCuda(
+    const torch::Tensor& points,
+    const torch::Tensor& points_first_idx,
+    const torch::Tensor& tris,
+    const torch::Tensor& tris_first_idx,
+    const int64_t max_points,
+    const double min_triangle_area);
+#endif
+
+std::tuple<torch::Tensor, torch::Tensor> PointFaceDistanceForwardCpu(
+    const torch::Tensor& points,
+    const torch::Tensor& points_first_idx,
+    const torch::Tensor& tris,
+    const torch::Tensor& tris_first_idx,
+    const double min_triangle_area);
+
+std::tuple<torch::Tensor, torch::Tensor> PointFaceDistanceForward(
+    const torch::Tensor& points,
+    const torch::Tensor& points_first_idx,
+    const torch::Tensor& tris,
+    const torch::Tensor& tris_first_idx,
+    const int64_t max_points,
+    const double min_triangle_area) {
+  if (points.is_cuda()) {
+#ifdef WITH_CUDA
+    CHECK_CUDA(points);
+    CHECK_CUDA(points_first_idx);
+    CHECK_CUDA(tris);
+    CHECK_CUDA(tris_first_idx);
+    return PointFaceDistanceForwardCuda(
+        points,
+        points_first_idx,
+        tris,
+        tris_first_idx,
+        max_points,
+        min_triangle_area);
+#else
+    AT_ERROR("Not compiled with GPU support.");
+#endif
+  }
+  return PointFaceDistanceForwardCpu(
+      points, points_first_idx, tris, tris_first_idx, min_triangle_area);
+}
+
+// Backward pass for PointFaceDistance.
+//
+// Args:
+//    points: FloatTensor of shape (P, 3)
+//    tris: FloatTensor of shape (T, 3, 3)
+//    idx_points: LongTensor of shape (P,) containing the indices
+//        of the closest face in the example in the batch.
+//        This is computed by the forward pass
+//    grad_dists: FloatTensor of shape (P,)
+//     min_triangle_area: triangles less than this size are considered
+//     points/lines.
+//
+// Returns:
+//    grad_points: FloatTensor of shape (P, 3)
+//    grad_tris: FloatTensor of shape (T, 3, 3)
+//
+
+#ifdef WITH_CUDA
+
+std::tuple<torch::Tensor, torch::Tensor> PointFaceDistanceBackwardCuda(
+    const torch::Tensor& points,
+    const torch::Tensor& tris,
+    const torch::Tensor& idx_points,
+    const torch::Tensor& grad_dists,
+    const double min_triangle_area);
+#endif
+std::tuple<torch::Tensor, torch::Tensor> PointFaceDistanceBackwardCpu(
+    const torch::Tensor& points,
+    const torch::Tensor& tris,
+    const torch::Tensor& idx_points,
+    const torch::Tensor& grad_dists,
+    const double min_triangle_area);
+
+std::tuple<torch::Tensor, torch::Tensor> PointFaceDistanceBackward(
+    const torch::Tensor& points,
+    const torch::Tensor& tris,
+    const torch::Tensor& idx_points,
+    const torch::Tensor& grad_dists,
+    const double min_triangle_area) {
+  if (points.is_cuda()) {
+#ifdef WITH_CUDA
+    CHECK_CUDA(points);
+    CHECK_CUDA(tris);
+    CHECK_CUDA(idx_points);
+    CHECK_CUDA(grad_dists);
+    return PointFaceDistanceBackwardCuda(
+        points, tris, idx_points, grad_dists, min_triangle_area);
+#else
+    AT_ERROR("Not compiled with GPU support.");
+#endif
+  }
+  return PointFaceDistanceBackwardCpu(
+      points, tris, idx_points, grad_dists, min_triangle_area);
+}
+
+// ****************************************************************************
+// *                      FacePointDistance                                   *
+// ****************************************************************************
+
+// Computes the squared euclidean distance of each triangular face to its
+// closest point belonging to the corresponding example in the batch of size N.
+//
+// Args:
+//    points: FloatTensor of shape (P, 3)
+//    points_first_idx: LongTensor of shape (N,) indicating the first point
+//        index for each example in the batch
+//    tris: FloatTensor of shape (T, 3, 3) of the triangular faces. The t-th
+//        triangular face is spanned by (tris[t, 0], tris[t, 1], tris[t, 2])
+//    tris_first_idx: LongTensor of shape (N,) indicating the first face
+//        index for each example in the batch
+//    max_tris: Scalar equal to max(T_i) for i in [0, N - 1] containing
+//        the maximum number of faces in the batch and is used to set
+//        the block dimensions in the CUDA implementation.
+//     min_triangle_area: triangles less than this size are considered
+//     points/lines.
+//
+// Returns:
+//    dists: FloatTensor of shape (T,), where dists[t] is the minimum squared
+//        euclidean distance of t-th triangular face from the closest point in
+//        the batch.
+//    idxs: LongTensor of shape (T,), where idxs[t] is the index of the closest
+//        point in the batch.
+//        So, dists[t] = d(points[idxs[t]], tris[t, 0], tris[t, 1], tris[t, 2])
+//        where d(u, v0, v1, v2) is the distance of u from the triangular face
+//        spanned by (v0, v1, v2)
+//
+
+#ifdef WITH_CUDA
+
+std::tuple<torch::Tensor, torch::Tensor> FacePointDistanceForwardCuda(
+    const torch::Tensor& points,
+    const torch::Tensor& points_first_idx,
+    const torch::Tensor& tris,
+    const torch::Tensor& tris_first_idx,
+    const int64_t max_tris,
+    const double min_triangle_area);
+#endif
+
+std::tuple<torch::Tensor, torch::Tensor> FacePointDistanceForwardCpu(
+    const torch::Tensor& points,
+    const torch::Tensor& points_first_idx,
+    const torch::Tensor& tris,
+    const torch::Tensor& tris_first_idx,
+    const double min_triangle_area);
+
+std::tuple<torch::Tensor, torch::Tensor> FacePointDistanceForward(
+    const torch::Tensor& points,
+    const torch::Tensor& points_first_idx,
+    const torch::Tensor& tris,
+    const torch::Tensor& tris_first_idx,
+    const int64_t max_tris,
+    const double min_triangle_area) {
+  if (points.is_cuda()) {
+#ifdef WITH_CUDA
+    CHECK_CUDA(points);
+    CHECK_CUDA(points_first_idx);
+    CHECK_CUDA(tris);
+    CHECK_CUDA(tris_first_idx);
+    return FacePointDistanceForwardCuda(
+        points,
+        points_first_idx,
+        tris,
+        tris_first_idx,
+        max_tris,
+        min_triangle_area);
+#else
+    AT_ERROR("Not compiled with GPU support.");
+#endif
+  }
+  return FacePointDistanceForwardCpu(
+      points, points_first_idx, tris, tris_first_idx, min_triangle_area);
+}
+
+// Backward pass for FacePointDistance.
+//
+// Args:
+//    points: FloatTensor of shape (P, 3)
+//    tris: FloatTensor of shape (T, 3, 3)
+//    idx_tris: LongTensor of shape (T,) containing the indices
+//        of the closest point in the example in the batch.
+//        This is computed by the forward pass
+//    grad_dists: FloatTensor of shape (T,)
+//     min_triangle_area: triangles less than this size are considered
+//     points/lines.
+//
+// Returns:
+//    grad_points: FloatTensor of shape (P, 3)
+//    grad_tris: FloatTensor of shape (T, 3, 3)
+//
+
+#ifdef WITH_CUDA
+
+std::tuple<torch::Tensor, torch::Tensor> FacePointDistanceBackwardCuda(
+    const torch::Tensor& points,
+    const torch::Tensor& tris,
+    const torch::Tensor& idx_tris,
+    const torch::Tensor& grad_dists,
+    const double min_triangle_area);
+#endif
+
+std::tuple<torch::Tensor, torch::Tensor> FacePointDistanceBackwardCpu(
+    const torch::Tensor& points,
+    const torch::Tensor& tris,
+    const torch::Tensor& idx_tris,
+    const torch::Tensor& grad_dists,
+    const double min_triangle_area);
+
+std::tuple<torch::Tensor, torch::Tensor> FacePointDistanceBackward(
+    const torch::Tensor& points,
+    const torch::Tensor& tris,
+    const torch::Tensor& idx_tris,
+    const torch::Tensor& grad_dists,
+    const double min_triangle_area) {
+  if (points.is_cuda()) {
+#ifdef WITH_CUDA
+    CHECK_CUDA(points);
+    CHECK_CUDA(tris);
+    CHECK_CUDA(idx_tris);
+    CHECK_CUDA(grad_dists);
+    return FacePointDistanceBackwardCuda(
+        points, tris, idx_tris, grad_dists, min_triangle_area);
+#else
+    AT_ERROR("Not compiled with GPU support.");
+#endif
+  }
+  return FacePointDistanceBackwardCpu(
+      points, tris, idx_tris, grad_dists, min_triangle_area);
+}
+
+// ****************************************************************************
+// *                      PointEdgeDistance                                   *
+// ****************************************************************************
+
+// Computes the squared euclidean distance of each p in points to the closest
+// mesh edge belonging to the corresponding example in the batch of size N.
+//
+// Args:
+//    points: FloatTensor of shape (P, 3)
+//    points_first_idx: LongTensor of shape (N,) indicating the first point
+//         index for each example in the batch
+//    segms: FloatTensor of shape (S, 2, 3) of edge segments. The s-th edge
+//        segment is spanned by (segms[s, 0], segms[s, 1])
+//    segms_first_idx: LongTensor of shape (N,) indicating the first edge
+//        index for each example in the batch
+//    max_points: Scalar equal to max(P_i) for i in [0, N - 1] containing
+//        the maximum number of points in the batch and is used to set
+//        the grid dimensions in the CUDA implementation.
+//
+// Returns:
+//    dists: FloatTensor of shape (P,), where dists[p] is the squared euclidean
+//        distance of points[p] to the closest edge in the same example in the
+//        batch.
+//    idxs: LongTensor of shape (P,), where idxs[p] is the index of the closest
+//        edge in the batch.
+//        So, dists[p] = d(points[p], segms[idxs[p], 0], segms[idxs[p], 1]),
+//        where d(u, v0, v1) is the distance of u from the segment spanned by
+//        (v0, v1).
+//
+
+#ifdef WITH_CUDA
+
+std::tuple<torch::Tensor, torch::Tensor> PointEdgeDistanceForwardCuda(
+    const torch::Tensor& points,
+    const torch::Tensor& points_first_idx,
+    const torch::Tensor& segms,
+    const torch::Tensor& segms_first_idx,
+    const int64_t max_points);
+#endif
+
+std::tuple<torch::Tensor, torch::Tensor> PointEdgeDistanceForwardCpu(
+    const torch::Tensor& points,
+    const torch::Tensor& points_first_idx,
+    const torch::Tensor& segms,
+    const torch::Tensor& segms_first_idx,
+    const int64_t max_points);
+
+std::tuple<torch::Tensor, torch::Tensor> PointEdgeDistanceForward(
+    const torch::Tensor& points,
+    const torch::Tensor& points_first_idx,
+    const torch::Tensor& segms,
+    const torch::Tensor& segms_first_idx,
+    const int64_t max_points) {
+  if (points.is_cuda()) {
+#ifdef WITH_CUDA
+    CHECK_CUDA(points);
+    CHECK_CUDA(points_first_idx);
+    CHECK_CUDA(segms);
+    CHECK_CUDA(segms_first_idx);
+    return PointEdgeDistanceForwardCuda(
+        points, points_first_idx, segms, segms_first_idx, max_points);
+#else
+    AT_ERROR("Not compiled with GPU support.");
+#endif
+  }
+  return PointEdgeDistanceForwardCpu(
+      points, points_first_idx, segms, segms_first_idx, max_points);
+}
+
+// Backward pass for PointEdgeDistance.
+//
+// Args:
+//    points: FloatTensor of shape (P, 3)
+//    segms: FloatTensor of shape (S, 2, 3)
+//    idx_points: LongTensor of shape (P,) containing the indices
+//        of the closest edge in the example in the batch.
+//        This is computed by the forward pass.
+//    grad_dists: FloatTensor of shape (P,)
+//
+// Returns:
+//    grad_points: FloatTensor of shape (P, 3)
+//    grad_segms: FloatTensor of shape (S, 2, 3)
+//
+
+#ifdef WITH_CUDA
+
+std::tuple<torch::Tensor, torch::Tensor> PointEdgeDistanceBackwardCuda(
+    const torch::Tensor& points,
+    const torch::Tensor& segms,
+    const torch::Tensor& idx_points,
+    const torch::Tensor& grad_dists);
+#endif
+
+std::tuple<torch::Tensor, torch::Tensor> PointEdgeDistanceBackwardCpu(
+    const torch::Tensor& points,
+    const torch::Tensor& segms,
+    const torch::Tensor& idx_points,
+    const torch::Tensor& grad_dists);
+
+std::tuple<torch::Tensor, torch::Tensor> PointEdgeDistanceBackward(
+    const torch::Tensor& points,
+    const torch::Tensor& segms,
+    const torch::Tensor& idx_points,
+    const torch::Tensor& grad_dists) {
+  if (points.is_cuda()) {
+#ifdef WITH_CUDA
+    CHECK_CUDA(points);
+    CHECK_CUDA(segms);
+    CHECK_CUDA(idx_points);
+    CHECK_CUDA(grad_dists);
+    return PointEdgeDistanceBackwardCuda(points, segms, idx_points, grad_dists);
+#else
+    AT_ERROR("Not compiled with GPU support.");
+#endif
+  }
+  return PointEdgeDistanceBackwardCpu(points, segms, idx_points, grad_dists);
+}
+
+// ****************************************************************************
+// *                      EdgePointDistance                                   *
+// ****************************************************************************
+
+// Computes the squared euclidean distance of each edge segment to the closest
+// point belonging to the corresponding example in the batch of size N.
+//
+// Args:
+//    points: FloatTensor of shape (P, 3)
+//    points_first_idx: LongTensor of shape (N,) indicating the first point
+//         index for each example in the batch
+//    segms: FloatTensor of shape (S, 2, 3) of edge segments. The s-th edge
+//        segment is spanned by (segms[s, 0], segms[s, 1])
+//    segms_first_idx: LongTensor of shape (N,) indicating the first edge
+//        index for each example in the batch
+//    max_segms: Scalar equal to max(S_i) for i in [0, N - 1] containing
+//        the maximum number of edges in the batch and is used to set
+//        the block dimensions in the CUDA implementation.
+//
+// Returns:
+//    dists: FloatTensor of shape (S,), where dists[s] is the squared
+//        euclidean distance of s-th edge to the closest point in the
+//        corresponding example in the batch.
+//    idxs: LongTensor of shape (S,), where idxs[s] is the index of the closest
+//        point in the example in the batch.
+//        So, dists[s] = d(points[idxs[s]], segms[s, 0], segms[s, 1]), where
+//        d(u, v0, v1) is the distance of u from the segment spanned by (v0, v1)
+//
+//
+
+#ifdef WITH_CUDA
+
+std::tuple<torch::Tensor, torch::Tensor> EdgePointDistanceForwardCuda(
+    const torch::Tensor& points,
+    const torch::Tensor& points_first_idx,
+    const torch::Tensor& segms,
+    const torch::Tensor& segms_first_idx,
+    const int64_t max_segms);
+#endif
+
+std::tuple<torch::Tensor, torch::Tensor> EdgePointDistanceForwardCpu(
+    const torch::Tensor& points,
+    const torch::Tensor& points_first_idx,
+    const torch::Tensor& segms,
+    const torch::Tensor& segms_first_idx,
+    const int64_t max_segms);
+
+std::tuple<torch::Tensor, torch::Tensor> EdgePointDistanceForward(
+    const torch::Tensor& points,
+    const torch::Tensor& points_first_idx,
+    const torch::Tensor& segms,
+    const torch::Tensor& segms_first_idx,
+    const int64_t max_segms) {
+  if (points.is_cuda()) {
+#ifdef WITH_CUDA
+    CHECK_CUDA(points);
+    CHECK_CUDA(points_first_idx);
+    CHECK_CUDA(segms);
+    CHECK_CUDA(segms_first_idx);
+    return EdgePointDistanceForwardCuda(
+        points, points_first_idx, segms, segms_first_idx, max_segms);
+#else
+    AT_ERROR("Not compiled with GPU support.");
+#endif
+  }
+  return EdgePointDistanceForwardCpu(
+      points, points_first_idx, segms, segms_first_idx, max_segms);
+}
+
+// Backward pass for EdgePointDistance.
+//
+// Args:
+//    points: FloatTensor of shape (P, 3)
+//    segms: FloatTensor of shape (S, 2, 3)
+//    idx_segms: LongTensor of shape (S,) containing the indices
+//        of the closest point in the example in the batch.
+//        This is computed by the forward pass
+//    grad_dists: FloatTensor of shape (S,)
+//
+// Returns:
+//    grad_points: FloatTensor of shape (P, 3)
+//    grad_segms: FloatTensor of shape (S, 2, 3)
+//
+
+#ifdef WITH_CUDA
+
+std::tuple<torch::Tensor, torch::Tensor> EdgePointDistanceBackwardCuda(
+    const torch::Tensor& points,
+    const torch::Tensor& segms,
+    const torch::Tensor& idx_segms,
+    const torch::Tensor& grad_dists);
+#endif
+
+std::tuple<torch::Tensor, torch::Tensor> EdgePointDistanceBackwardCpu(
+    const torch::Tensor& points,
+    const torch::Tensor& segms,
+    const torch::Tensor& idx_segms,
+    const torch::Tensor& grad_dists);
+
+std::tuple<torch::Tensor, torch::Tensor> EdgePointDistanceBackward(
+    const torch::Tensor& points,
+    const torch::Tensor& segms,
+    const torch::Tensor& idx_segms,
+    const torch::Tensor& grad_dists) {
+  if (points.is_cuda()) {
+#ifdef WITH_CUDA
+    CHECK_CUDA(points);
+    CHECK_CUDA(segms);
+    CHECK_CUDA(idx_segms);
+    CHECK_CUDA(grad_dists);
+    return EdgePointDistanceBackwardCuda(points, segms, idx_segms, grad_dists);
+#else
+    AT_ERROR("Not compiled with GPU support.");
+#endif
+  }
+  return EdgePointDistanceBackwardCpu(points, segms, idx_segms, grad_dists);
+}
+
+// ****************************************************************************
+// *                       PointFaceArrayDistance                             *
+// ****************************************************************************
+
+// Computes the squared euclidean distance of each p in points to each
+// triangular face spanned by (v0, v1, v2) in tris.
+//
+// Args:
+//    points: FloatTensor of shape (P, 3)
+//    tris: FloatTensor of shape (T, 3, 3) of the triangular faces. The t-th
+//        triangular face is spanned by (tris[t, 0], tris[t, 1], tris[t, 2])
+//     min_triangle_area: triangles less than this size are considered
+//     points/lines.
+//
+// Returns:
+//    dists: FloatTensor of shape (P, T), where dists[p, t] is the squared
+//        euclidean distance of points[p] to the face spanned by (v0, v1, v2)
+//        where v0 = tris[t, 0], v1 = tris[t, 1] and v2 = tris[t, 2]
+//
+// For pointcloud and meshes of batch size N, this function requires N
+// computations. The memory occupied is O(NPT) which can become quite large.
+// For example, a medium sized batch with N = 32 with P = 10000 and T = 5000
+// will require for the forward pass 5.8G of memory to store dists.
+
+#ifdef WITH_CUDA
+
+torch::Tensor PointFaceArrayDistanceForwardCuda(
+    const torch::Tensor& points,
+    const torch::Tensor& tris,
+    const double min_triangle_area);
+#endif
+
+torch::Tensor PointFaceArrayDistanceForwardCpu(
+    const torch::Tensor& points,
+    const torch::Tensor& tris,
+    const double min_triangle_area);
+
+torch::Tensor PointFaceArrayDistanceForward(
+    const torch::Tensor& points,
+    const torch::Tensor& tris,
+    const double min_triangle_area) {
+  if (points.is_cuda()) {
+#ifdef WITH_CUDA
+    CHECK_CUDA(points);
+    CHECK_CUDA(tris);
+    return PointFaceArrayDistanceForwardCuda(points, tris, min_triangle_area);
+#else
+    AT_ERROR("Not compiled with GPU support.");
+#endif
+  }
+  return PointFaceArrayDistanceForwardCpu(points, tris, min_triangle_area);
+}
+
+// Backward pass for PointFaceArrayDistance.
+//
+// Args:
+//    points: FloatTensor of shape (P, 3)
+//    tris: FloatTensor of shape (T, 3, 3)
+//    grad_dists: FloatTensor of shape (P, T)
+//     min_triangle_area: triangles less than this size are considered
+//     points/lines.
+//
+// Returns:
+//    grad_points: FloatTensor of shape (P, 3)
+//    grad_tris: FloatTensor of shape (T, 3, 3)
+//
+
+#ifdef WITH_CUDA
+std::tuple<torch::Tensor, torch::Tensor> PointFaceArrayDistanceBackwardCuda(
+    const torch::Tensor& points,
+    const torch::Tensor& tris,
+    const torch::Tensor& grad_dists,
+    const double min_triangle_area);
+#endif
+std::tuple<torch::Tensor, torch::Tensor> PointFaceArrayDistanceBackwardCpu(
+    const torch::Tensor& points,
+    const torch::Tensor& tris,
+    const torch::Tensor& grad_dists,
+    const double min_triangle_area);
+
+std::tuple<torch::Tensor, torch::Tensor> PointFaceArrayDistanceBackward(
+    const torch::Tensor& points,
+    const torch::Tensor& tris,
+    const torch::Tensor& grad_dists,
+    const double min_triangle_area) {
+  if (points.is_cuda()) {
+#ifdef WITH_CUDA
+    CHECK_CUDA(points);
+    CHECK_CUDA(tris);
+    CHECK_CUDA(grad_dists);
+    return PointFaceArrayDistanceBackwardCuda(
+        points, tris, grad_dists, min_triangle_area);
+#else
+    AT_ERROR("Not compiled with GPU support.");
+#endif
+  }
+  return PointFaceArrayDistanceBackwardCpu(
+      points, tris, grad_dists, min_triangle_area);
+}
+
+// ****************************************************************************
+// *                          PointEdgeArrayDistance                          *
+// ****************************************************************************
+
+// Computes the squared euclidean distance of each p in points to each edge
+// segment in segms.
+//
+// Args:
+//    points: FloatTensor of shape (P, 3)
+//    segms: FloatTensor of shape (S, 2, 3) of edge segments. The s-th
+//        edge segment is spanned by (segms[s, 0], segms[s, 1])
+//
+// Returns:
+//    dists: FloatTensor of shape (P, S), where dists[p, s] is the squared
+//        euclidean distance of points[p] to the segment spanned by
+//        (segms[s, 0], segms[s, 1])
+//
+// For pointcloud and meshes of batch size N, this function requires N
+// computations. The memory occupied is O(NPS) which can become quite large.
+// For example, a medium sized batch with N = 32 with P = 10000 and S = 5000
+// will require for the forward pass 5.8G of memory to store dists.
+
+#ifdef WITH_CUDA
+torch::Tensor PointEdgeArrayDistanceForwardCuda(
+    const torch::Tensor& points,
+    const torch::Tensor& segms);
+#endif
+
+torch::Tensor PointEdgeArrayDistanceForwardCpu(
+    const torch::Tensor& points,
+    const torch::Tensor& segms);
+
+torch::Tensor PointEdgeArrayDistanceForward(
+    const torch::Tensor& points,
+    const torch::Tensor& segms) {
+  if (points.is_cuda()) {
+#ifdef WITH_CUDA
+    CHECK_CUDA(points);
+    CHECK_CUDA(segms);
+    return PointEdgeArrayDistanceForwardCuda(points, segms);
+#else
+    AT_ERROR("Not compiled with GPU support.");
+#endif
+  }
+  return PointEdgeArrayDistanceForwardCpu(points, segms);
+}
+
+// Backward pass for PointEdgeArrayDistance.
+//
+// Args:
+//    points: FloatTensor of shape (P, 3)
+//    segms: FloatTensor of shape (S, 2, 3)
+//    grad_dists: FloatTensor of shape (P, S)
+//
+// Returns:
+//   grad_points: FloatTensor of shape (P, 3)
+//   grad_segms: FloatTensor of shape (S, 2, 3)
+//
+
+#ifdef WITH_CUDA
+
+std::tuple<torch::Tensor, torch::Tensor> PointEdgeArrayDistanceBackwardCuda(
+    const torch::Tensor& points,
+    const torch::Tensor& segms,
+    const torch::Tensor& grad_dists);
+#endif
+
+std::tuple<torch::Tensor, torch::Tensor> PointEdgeArrayDistanceBackwardCpu(
+    const torch::Tensor& points,
+    const torch::Tensor& segms,
+    const torch::Tensor& grad_dists);
+
+std::tuple<torch::Tensor, torch::Tensor> PointEdgeArrayDistanceBackward(
+    const torch::Tensor& points,
+    const torch::Tensor& segms,
+    const torch::Tensor& grad_dists) {
+  if (points.is_cuda()) {
+#ifdef WITH_CUDA
+    CHECK_CUDA(points);
+    CHECK_CUDA(segms);
+    CHECK_CUDA(grad_dists);
+    return PointEdgeArrayDistanceBackwardCuda(points, segms, grad_dists);
+#else
+    AT_ERROR("Not compiled with GPU support.");
+#endif
+  }
+  return PointEdgeArrayDistanceBackwardCpu(points, segms, grad_dists);
+}
diff --git a/pytorch3d/pytorch3d/csrc/points_to_volumes/points_to_volumes.cu b/pytorch3d/pytorch3d/csrc/points_to_volumes/points_to_volumes.cu
new file mode 100644
index 0000000000000000000000000000000000000000..43d4ed55a3b0c0bb6e5b0256ef62084ccebfe660
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/points_to_volumes/points_to_volumes.cu
@@ -0,0 +1,349 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <ATen/ATen.h>
+#include <ATen/cuda/CUDAContext.h>
+#include <c10/cuda/CUDAGuard.h>
+
+using at::PackedTensorAccessor64;
+using at::RestrictPtrTraits;
+
+// A chunk of work is blocksize-many points.
+// There are N clouds in the batch, and P points in each cloud.
+// The number of potential chunks to do per cloud is (1+(P-1)/blocksize),
+// which we call chunks_per_cloud.
+// These (N*chunks_per_cloud) chunks are divided among the gridSize-many blocks.
+// In block b, we work on chunks b, b+gridSize, b+2*gridSize etc .
+// In chunk i, we work on cloud (i/chunks_per_cloud) on points starting from
+// blocksize*(i%chunks_per_cloud).
+
+// Explanation of the calculation is in the cpp file.
+
+// EightDirections(t) runs t(a,b,c) for every combination of boolean a, b, c.
+template <class T>
+static __device__ void EightDirections(T&& t) {
+  t(false, false, false);
+  t(false, false, true);
+  t(false, true, false);
+  t(false, true, true);
+  t(true, false, false);
+  t(true, false, true);
+  t(true, true, false);
+  t(true, true, true);
+}
+
+__global__ void PointsToVolumesForwardKernel(
+    const PackedTensorAccessor64<float, 3, RestrictPtrTraits> points_3d,
+    const PackedTensorAccessor64<float, 3, RestrictPtrTraits> points_features,
+    PackedTensorAccessor64<float, 5, RestrictPtrTraits> volume_densities,
+    PackedTensorAccessor64<float, 5, RestrictPtrTraits> volume_features,
+    PackedTensorAccessor64<int64_t, 2, RestrictPtrTraits> grid_sizes,
+    PackedTensorAccessor64<float, 2, RestrictPtrTraits> mask,
+    const float point_weight,
+    const bool align_corners,
+    const bool splat,
+    const int64_t batch_size,
+    const int64_t P,
+    const int64_t n_features) {
+  const int64_t chunks_per_cloud = (1 + (P - 1) / blockDim.x);
+  const int64_t chunks_to_do = batch_size * chunks_per_cloud;
+  const int scale_offset = align_corners ? 1 : 0;
+  const float offset = align_corners ? 0 : 0.5;
+  for (int64_t chunk = blockIdx.x; chunk < chunks_to_do; chunk += gridDim.x) {
+    const int64_t batch_index = chunk / chunks_per_cloud;
+    const int64_t start_point = blockDim.x * (chunk % chunks_per_cloud);
+    int64_t point_idx = start_point + threadIdx.x;
+    if (point_idx >= P) {
+      continue;
+    }
+    if (mask[batch_index][point_idx] == 0) {
+      continue;
+    }
+    auto volume_densities_aa = volume_densities[batch_index][0];
+    auto volume_features_aa = volume_features[batch_index];
+    auto point = points_3d[batch_index][point_idx];
+    auto point_features = points_features[batch_index][point_idx];
+    const int64_t grid_size_x = grid_sizes[batch_index][2];
+    const int64_t grid_size_y = grid_sizes[batch_index][1];
+    const int64_t grid_size_z = grid_sizes[batch_index][0];
+    auto increment_location =
+        [&](int64_t x, int64_t y, int64_t z, float weight) {
+          if (x >= grid_size_x || y >= grid_size_y || z >= grid_size_z) {
+            return;
+          }
+          if (x < 0 || y < 0 || z < 0) {
+            return;
+          }
+
+          atomicAdd(&volume_densities_aa[z][y][x], weight * point_weight);
+
+          for (int64_t feature_idx = 0; feature_idx < n_features;
+               ++feature_idx) {
+            atomicAdd(
+                &volume_features_aa[feature_idx][z][y][x],
+                point_features[feature_idx] * weight * point_weight);
+          }
+        };
+    if (!splat) {
+      long x = std::lround(
+          (point[0] + 1) * 0.5 * (grid_size_x - scale_offset) - offset);
+      long y = std::lround(
+          (point[1] + 1) * 0.5 * (grid_size_y - scale_offset) - offset);
+      long z = std::lround(
+          (point[2] + 1) * 0.5 * (grid_size_z - scale_offset) - offset);
+      increment_location(x, y, z, 1);
+    } else {
+      float x = 0, y = 0, z = 0;
+      float rx = std::modf(
+          (point[0] + 1) * 0.5 * (grid_size_x - scale_offset) - offset, &x);
+      float ry = std::modf(
+          (point[1] + 1) * 0.5 * (grid_size_y - scale_offset) - offset, &y);
+      float rz = std::modf(
+          (point[2] + 1) * 0.5 * (grid_size_z - scale_offset) - offset, &z);
+      auto handle_point = [&](bool up_x, bool up_y, bool up_z) {
+        float weight =
+            (up_x ? rx : 1 - rx) * (up_y ? ry : 1 - ry) * (up_z ? rz : 1 - rz);
+        increment_location(x + up_x, y + up_y, z + up_z, weight);
+      };
+      EightDirections(handle_point);
+    }
+  }
+}
+
+void PointsToVolumesForwardCuda(
+    const at::Tensor& points_3d,
+    const at::Tensor& points_features,
+    const at::Tensor& volume_densities,
+    const at::Tensor& volume_features,
+    const at::Tensor& grid_sizes,
+    const at::Tensor& mask,
+    const float point_weight,
+    const bool align_corners,
+    const bool splat) {
+  // Check inputs are on the same device
+  at::TensorArg points_3d_t{points_3d, "points_3d", 1},
+      points_features_t{points_features, "points_features", 2},
+      volume_densities_t{volume_densities, "volume_densities", 3},
+      volume_features_t{volume_features, "volume_features", 4},
+      grid_sizes_t{grid_sizes, "grid_sizes", 5}, mask_t{mask, "mask", 6};
+  at::CheckedFrom c = "PointsToVolumesForwardCuda";
+  at::checkAllSameGPU(
+      c,
+      {points_3d_t,
+       points_features_t,
+       volume_densities_t,
+       volume_features_t,
+       grid_sizes_t,
+       mask_t});
+
+  // This is nondeterministic because atomicAdd
+  at::globalContext().alertNotDeterministic("PointsToVolumesForwardCuda");
+
+  // Set the device for the kernel launch based on the device of the input
+  at::cuda::CUDAGuard device_guard(points_3d.device());
+  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+
+  const int blocks = 1024;
+  const int threads = 32;
+
+  const int64_t batch_size = points_3d.size(0);
+  const int64_t P = points_3d.size(1);
+  const int64_t n_features = points_features.size(2);
+
+  PointsToVolumesForwardKernel<<<blocks, threads, 0, stream>>>(
+      points_3d.packed_accessor64<float, 3, RestrictPtrTraits>(),
+      points_features.packed_accessor64<float, 3, RestrictPtrTraits>(),
+      volume_densities.packed_accessor64<float, 5, RestrictPtrTraits>(),
+      volume_features.packed_accessor64<float, 5, RestrictPtrTraits>(),
+      grid_sizes.packed_accessor64<int64_t, 2, RestrictPtrTraits>(),
+      mask.packed_accessor64<float, 2, RestrictPtrTraits>(),
+      point_weight,
+      align_corners,
+      splat,
+      batch_size,
+      P,
+      n_features);
+}
+
+__global__ void PointsToVolumesBackwardKernel(
+    const PackedTensorAccessor64<float, 3, RestrictPtrTraits> points_3d,
+    const PackedTensorAccessor64<float, 3, RestrictPtrTraits> points_features,
+    const PackedTensorAccessor64<int64_t, 2, RestrictPtrTraits> grid_sizes,
+    const PackedTensorAccessor64<float, 2, RestrictPtrTraits> mask,
+    PackedTensorAccessor64<float, 5, RestrictPtrTraits> grad_volume_densities,
+    PackedTensorAccessor64<float, 5, RestrictPtrTraits> grad_volume_features,
+    PackedTensorAccessor64<float, 3, RestrictPtrTraits> grad_points_3d,
+    PackedTensorAccessor64<float, 3, RestrictPtrTraits> grad_points_features,
+    const float point_weight,
+    const bool align_corners,
+    const bool splat,
+    const int64_t batch_size,
+    const int64_t P,
+    const int64_t n_features) {
+  const int64_t chunks_per_cloud = (1 + (P - 1) / blockDim.x);
+  const int64_t chunks_to_do = batch_size * chunks_per_cloud;
+  const int scale_offset = align_corners ? 1 : 0;
+  const float offset = align_corners ? 0 : 0.5;
+  // Note that the gradients belonging to each point are only touched by
+  // a single thread in one of our "chunks", which is in a single block.
+  // So unlike in the forward pass, there's no need for atomics here.
+  for (int64_t chunk = blockIdx.x; chunk < chunks_to_do; chunk += gridDim.x) {
+    const int64_t batch_index = chunk / chunks_per_cloud;
+    const int64_t start_point = blockDim.x * (chunk % chunks_per_cloud);
+    int64_t point_idx = start_point + threadIdx.x;
+    if (point_idx >= P) {
+      continue;
+    }
+    if (mask[batch_index][point_idx] == 0) {
+      continue;
+    }
+    auto point = points_3d[batch_index][point_idx];
+    auto point_features = points_features[batch_index][point_idx];
+    auto grad_point = grad_points_3d[batch_index][point_idx];
+    auto grad_point_features = grad_points_features[batch_index][point_idx];
+    auto grad_volume_densities_a = grad_volume_densities[batch_index][0];
+    auto grad_volume_features_a = grad_volume_features[batch_index];
+    const int64_t grid_size_x = grid_sizes[batch_index][2];
+    const int64_t grid_size_y = grid_sizes[batch_index][1];
+    const int64_t grid_size_z = grid_sizes[batch_index][0];
+
+    auto increment_location =
+        [&](int64_t x, int64_t y, int64_t z, float weight) {
+          if (x >= grid_size_x || y >= grid_size_y || z >= grid_size_z) {
+            return false;
+          }
+          if (x < 0 || y < 0 || z < 0) {
+            return false;
+          }
+
+          // This is a forward line, for comparison
+          // volume_densities_aa[z][y][x] += weight * point_weight;
+
+          for (int64_t feature_idx = 0; feature_idx < n_features;
+               ++feature_idx) {
+            // This is a forward line, for comparison
+            // volume_features_aa[feature_idx][z][y][x] +=
+            //    point_features[feature_idx] * weight * point_weight;
+            grad_point_features[feature_idx] +=
+                grad_volume_features_a[feature_idx][z][y][x] * weight *
+                point_weight;
+          }
+          return true;
+        };
+
+    if (!splat) {
+      long x = std::lround(
+          (point[0] + 1) * 0.5 * (grid_size_x - scale_offset) - offset);
+      long y = std::lround(
+          (point[1] + 1) * 0.5 * (grid_size_y - scale_offset) - offset);
+      long z = std::lround(
+          (point[2] + 1) * 0.5 * (grid_size_z - scale_offset) - offset);
+      increment_location(x, y, z, 1);
+    } else {
+      float x = 0, y = 0, z = 0;
+      float rx = std::modf(
+          (point[0] + 1) * 0.5 * (grid_size_x - scale_offset) - offset, &x);
+      float ry = std::modf(
+          (point[1] + 1) * 0.5 * (grid_size_y - scale_offset) - offset, &y);
+      float rz = std::modf(
+          (point[2] + 1) * 0.5 * (grid_size_z - scale_offset) - offset, &z);
+      auto handle_point = [&](bool up_x, bool up_y, bool up_z) {
+        float weight_x = (up_x ? rx : 1 - rx);
+        float weight_y = (up_y ? ry : 1 - ry);
+        float weight_z = (up_z ? rz : 1 - rz);
+        float weight = weight_x * weight_y * weight_z;
+        if (increment_location(x + up_x, y + up_y, z + up_z, weight)) {
+          // weight * point_weight has been added to
+          // volume_densities_aa[z+up_z][y+up_y][x+up_x]
+          // Also for each feature_idx,
+          //   point_features[feature_idx] * weight * point_weight
+          // has been added to
+          // volume_features_aa[feature_idx][z+up_z][y+up_y][x+up_x]
+
+          double source_gradient =
+              grad_volume_densities_a[z + up_z][y + up_y][x + up_x];
+          for (int64_t feature_idx = 0; feature_idx < n_features;
+               ++feature_idx) {
+            source_gradient += point_features[feature_idx] *
+                grad_volume_features_a[feature_idx][z + up_z][y + up_y]
+                                      [x + up_x];
+          }
+          grad_point[0] += source_gradient * (up_x ? 1 : -1) * weight_y *
+              weight_z * 0.5 * (grid_size_x - scale_offset) * point_weight;
+          grad_point[1] += source_gradient * (up_y ? 1 : -1) * weight_x *
+              weight_z * 0.5 * (grid_size_y - scale_offset) * point_weight;
+          grad_point[2] += source_gradient * (up_z ? 1 : -1) * weight_x *
+              weight_y * 0.5 * (grid_size_z - scale_offset) * point_weight;
+        }
+      };
+      EightDirections(handle_point);
+    }
+  }
+}
+
+void PointsToVolumesBackwardCuda(
+    const at::Tensor& points_3d,
+    const at::Tensor& points_features,
+    const at::Tensor& grid_sizes,
+    const at::Tensor& mask,
+    const float point_weight,
+    const bool align_corners,
+    const bool splat,
+    const at::Tensor& grad_volume_densities,
+    const at::Tensor& grad_volume_features,
+    const at::Tensor& grad_points_3d,
+    const at::Tensor& grad_points_features) {
+  // Check inputs are on the same device
+  at::TensorArg points_3d_t{points_3d, "points_3d", 1},
+      points_features_t{points_features, "points_features", 2},
+      grid_sizes_t{grid_sizes, "grid_sizes", 3}, mask_t{mask, "mask", 4},
+      grad_volume_densities_t{
+          grad_volume_densities, "grad_volume_densities", 8},
+      grad_volume_features_t{grad_volume_features, "grad_volume_features", 9},
+      grad_points_3d_t{grad_points_3d, "grad_points_3d", 10},
+      grad_points_features_t{grad_points_features, "grad_points_features", 11};
+
+  at::CheckedFrom c = "PointsToVolumesBackwardCuda";
+  at::checkAllSameGPU(
+      c,
+      {points_3d_t,
+       points_features_t,
+       grid_sizes_t,
+       mask_t,
+       grad_volume_densities_t,
+       grad_volume_features_t,
+       grad_points_3d_t,
+       grad_points_features_t});
+
+  // Set the device for the kernel launch based on the device of the input
+  at::cuda::CUDAGuard device_guard(points_3d.device());
+  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+
+  const int blocks = 1024;
+  const int threads = 32;
+
+  const int64_t batch_size = points_3d.size(0);
+  const int64_t P = points_3d.size(1);
+  const int64_t n_features = points_features.size(2);
+
+  PointsToVolumesBackwardKernel<<<blocks, threads, 0, stream>>>(
+      points_3d.packed_accessor64<float, 3, RestrictPtrTraits>(),
+      points_features.packed_accessor64<float, 3, RestrictPtrTraits>(),
+      grid_sizes.packed_accessor64<int64_t, 2, RestrictPtrTraits>(),
+      mask.packed_accessor64<float, 2, RestrictPtrTraits>(),
+      grad_volume_densities.packed_accessor64<float, 5, RestrictPtrTraits>(),
+      grad_volume_features.packed_accessor64<float, 5, RestrictPtrTraits>(),
+      grad_points_3d.packed_accessor64<float, 3, RestrictPtrTraits>(),
+      grad_points_features.packed_accessor64<float, 3, RestrictPtrTraits>(),
+      point_weight,
+      align_corners,
+      splat,
+      batch_size,
+      P,
+      n_features);
+}
diff --git a/pytorch3d/pytorch3d/csrc/points_to_volumes/points_to_volumes.h b/pytorch3d/pytorch3d/csrc/points_to_volumes/points_to_volumes.h
new file mode 100644
index 0000000000000000000000000000000000000000..4c5eba3c9e55f7dc81f1df8a7e6698abdec33d70
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/points_to_volumes/points_to_volumes.h
@@ -0,0 +1,198 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+#include <torch/csrc/autograd/VariableTypeUtils.h>
+#include <torch/extension.h>
+#include <cstdio>
+#include <tuple>
+#include "utils/pytorch3d_cutils.h"
+
+/*
+    volume_features and volume_densities are modified in place.
+
+    Args:
+        points_3d: Batch of 3D point cloud coordinates of shape
+            `(minibatch, N, 3)` where N is the number of points
+            in each point cloud. Coordinates have to be specified in the
+            local volume coordinates (ranging in [-1, 1]).
+        points_features: Features of shape `(minibatch, N, feature_dim)`
+            corresponding to the points of the input point cloud `points_3d`.
+        volume_features: Batch of input feature volumes
+            of shape `(minibatch, feature_dim, D, H, W)`
+        volume_densities: Batch of input feature volume densities
+            of shape `(minibatch, 1, D, H, W)`. Each voxel should
+            contain a non-negative number corresponding to its
+            opaqueness (the higher, the less transparent).
+
+        grid_sizes: `LongTensor` of shape (minibatch, 3) representing the
+            spatial resolutions of each of the the non-flattened `volumes`
+            tensors. Note that the following has to hold:
+                `torch.prod(grid_sizes, dim=1)==N_voxels`.
+
+        point_weight: A scalar controlling how much weight a single point has.
+
+        mask: A binary mask of shape `(minibatch, N)` determining
+            which 3D points are going to be converted to the resulting
+            volume. Set to `None` if all points are valid.
+
+        align_corners: as for grid_sample.
+
+        splat: if true, trilinear interpolation. If false all the weight goes in
+            the nearest voxel.
+*/
+
+void PointsToVolumesForwardCpu(
+    const torch::Tensor& points_3d,
+    const torch::Tensor& points_features,
+    const torch::Tensor& volume_densities,
+    const torch::Tensor& volume_features,
+    const torch::Tensor& grid_sizes,
+    const torch::Tensor& mask,
+    float point_weight,
+    bool align_corners,
+    bool splat);
+
+void PointsToVolumesForwardCuda(
+    const torch::Tensor& points_3d,
+    const torch::Tensor& points_features,
+    const torch::Tensor& volume_densities,
+    const torch::Tensor& volume_features,
+    const torch::Tensor& grid_sizes,
+    const torch::Tensor& mask,
+    float point_weight,
+    bool align_corners,
+    bool splat);
+
+inline void PointsToVolumesForward(
+    const torch::Tensor& points_3d,
+    const torch::Tensor& points_features,
+    const torch::Tensor& volume_densities,
+    const torch::Tensor& volume_features,
+    const torch::Tensor& grid_sizes,
+    const torch::Tensor& mask,
+    float point_weight,
+    bool align_corners,
+    bool splat) {
+  if (points_3d.is_cuda()) {
+#ifdef WITH_CUDA
+    CHECK_CUDA(points_3d);
+    CHECK_CUDA(points_features);
+    CHECK_CUDA(volume_densities);
+    CHECK_CUDA(volume_features);
+    CHECK_CUDA(grid_sizes);
+    CHECK_CUDA(mask);
+    PointsToVolumesForwardCuda(
+        points_3d,
+        points_features,
+        volume_densities,
+        volume_features,
+        grid_sizes,
+        mask,
+        point_weight,
+        align_corners,
+        splat);
+    torch::autograd::increment_version(volume_features);
+    torch::autograd::increment_version(volume_densities);
+    return;
+#else
+    AT_ERROR("Not compiled with GPU support.");
+#endif
+  }
+  PointsToVolumesForwardCpu(
+      points_3d,
+      points_features,
+      volume_densities,
+      volume_features,
+      grid_sizes,
+      mask,
+      point_weight,
+      align_corners,
+      splat);
+}
+
+// grad_points_3d and grad_points_features are modified in place.
+
+void PointsToVolumesBackwardCpu(
+    const torch::Tensor& points_3d,
+    const torch::Tensor& points_features,
+    const torch::Tensor& grid_sizes,
+    const torch::Tensor& mask,
+    float point_weight,
+    bool align_corners,
+    bool splat,
+    const torch::Tensor& grad_volume_densities,
+    const torch::Tensor& grad_volume_features,
+    const torch::Tensor& grad_points_3d,
+    const torch::Tensor& grad_points_features);
+
+void PointsToVolumesBackwardCuda(
+    const torch::Tensor& points_3d,
+    const torch::Tensor& points_features,
+    const torch::Tensor& grid_sizes,
+    const torch::Tensor& mask,
+    float point_weight,
+    bool align_corners,
+    bool splat,
+    const torch::Tensor& grad_volume_densities,
+    const torch::Tensor& grad_volume_features,
+    const torch::Tensor& grad_points_3d,
+    const torch::Tensor& grad_points_features);
+
+inline void PointsToVolumesBackward(
+    const torch::Tensor& points_3d,
+    const torch::Tensor& points_features,
+    const torch::Tensor& grid_sizes,
+    const torch::Tensor& mask,
+    float point_weight,
+    bool align_corners,
+    bool splat,
+    const torch::Tensor& grad_volume_densities,
+    const torch::Tensor& grad_volume_features,
+    const torch::Tensor& grad_points_3d,
+    const torch::Tensor& grad_points_features) {
+  if (points_3d.is_cuda()) {
+#ifdef WITH_CUDA
+    CHECK_CUDA(points_3d);
+    CHECK_CUDA(points_features);
+    CHECK_CUDA(grid_sizes);
+    CHECK_CUDA(mask);
+    CHECK_CUDA(grad_volume_densities);
+    CHECK_CUDA(grad_volume_features);
+    CHECK_CUDA(grad_points_3d);
+    CHECK_CUDA(grad_points_features);
+    PointsToVolumesBackwardCuda(
+        points_3d,
+        points_features,
+        grid_sizes,
+        mask,
+        point_weight,
+        align_corners,
+        splat,
+        grad_volume_densities,
+        grad_volume_features,
+        grad_points_3d,
+        grad_points_features);
+    return;
+#else
+    AT_ERROR("Not compiled with GPU support.");
+#endif
+  }
+  PointsToVolumesBackwardCpu(
+      points_3d,
+      points_features,
+      grid_sizes,
+      mask,
+      point_weight,
+      align_corners,
+      splat,
+      grad_volume_densities,
+      grad_volume_features,
+      grad_points_3d,
+      grad_points_features);
+}
diff --git a/pytorch3d/pytorch3d/csrc/points_to_volumes/points_to_volumes_cpu.cpp b/pytorch3d/pytorch3d/csrc/points_to_volumes/points_to_volumes_cpu.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..900ea097a9a6e734c694305ca8f78b3e4d6eccbf
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/points_to_volumes/points_to_volumes_cpu.cpp
@@ -0,0 +1,319 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <torch/csrc/autograd/VariableTypeUtils.h>
+#include <torch/extension.h>
+#include <algorithm>
+#include <cmath>
+#include <thread>
+#include <vector>
+
+// In the x direction, the location {0, ..., grid_size_x - 1} correspond to
+// points px in [-1, 1]. There are two ways to do this.
+
+// If align_corners=True, px=-1 is the exact location 0 and px=1 is the exact
+// location grid_size_x - 1.
+// So the location of px is {(px + 1) * 0.5} * (grid_size_x - 1).
+// Note that if you generate random points within the bounds you are less likely
+// to hit the edge locations than other locations.
+// This can be thought of as saying "location i" means a specific point.
+
+// If align_corners=False, px=-1 is half way between the exact location 0 and
+// the non-existent location -1, i.e. location -0.5.
+// Similarly px=1 is is half way between the exact location grid_size_x-1 and
+// the non-existent location grid_size, i.e. the location grid_size_x - 0.5.
+// So the location of px is ({(px + 1) * 0.5} * grid_size_x) - 0.5.
+// Note that if you generate random points within the bounds you are equally
+// likely to hit any location.
+// This can be thought of as saying "location i" means the whole box from
+// (i-0.5) to (i+0.5)
+
+// EightDirections(t) runs t(a,b,c) for every combination of boolean a, b, c.
+template <class T>
+static void EightDirections(T&& t) {
+  t(false, false, false);
+  t(false, false, true);
+  t(false, true, false);
+  t(false, true, true);
+  t(true, false, false);
+  t(true, false, true);
+  t(true, true, false);
+  t(true, true, true);
+}
+
+void PointsToVolumesForwardCpu(
+    const torch::Tensor& points_3d,
+    const torch::Tensor& points_features,
+    const torch::Tensor& volume_densities,
+    const torch::Tensor& volume_features,
+    const torch::Tensor& grid_sizes,
+    const torch::Tensor& mask,
+    const float point_weight,
+    const bool align_corners,
+    const bool splat) {
+  const int64_t batch_size = points_3d.size(0);
+  const int64_t P = points_3d.size(1);
+  const int64_t n_features = points_features.size(2);
+
+  // We unify the formula for the location of px in the comment above as
+  // ({(px + 1) * 0.5} * (grid_size_x-scale_offset)) - offset.
+  const int scale_offset = align_corners ? 1 : 0;
+  const float offset = align_corners ? 0 : 0.5;
+
+  auto points_3d_a = points_3d.accessor<float, 3>();
+  auto points_features_a = points_features.accessor<float, 3>();
+  auto volume_densities_a = volume_densities.accessor<float, 5>();
+  auto volume_features_a = volume_features.accessor<float, 5>();
+  auto grid_sizes_a = grid_sizes.accessor<int64_t, 2>();
+  auto mask_a = mask.accessor<float, 2>();
+
+  // For each batch element
+  for (int64_t batch_idx = 0; batch_idx < batch_size; ++batch_idx) {
+    auto points_3d_aa = points_3d_a[batch_idx];
+    auto points_features_aa = points_features_a[batch_idx];
+    auto volume_densities_aa = volume_densities_a[batch_idx][0];
+    auto volume_features_aa = volume_features_a[batch_idx];
+    auto grid_sizes_aa = grid_sizes_a[batch_idx];
+    auto mask_aa = mask_a[batch_idx];
+
+    const int64_t grid_size_x = grid_sizes_aa[2];
+    const int64_t grid_size_y = grid_sizes_aa[1];
+    const int64_t grid_size_z = grid_sizes_aa[0];
+
+    // For each point
+    for (int64_t point_idx = 0; point_idx < P; ++point_idx) {
+      // Ignore point if mask is 0
+      if (mask_aa[point_idx] == 0) {
+        continue;
+      }
+      auto point = points_3d_aa[point_idx];
+      auto point_features = points_features_aa[point_idx];
+
+      // Define how to increment a location in the volume by an amount. The need
+      // for this depends on the interpolation method:
+      // once per point for nearest, eight times for splat.
+      auto increment_location =
+          [&](int64_t x, int64_t y, int64_t z, float weight) {
+            if (x >= grid_size_x || y >= grid_size_y || z >= grid_size_z) {
+              return;
+            }
+            if (x < 0 || y < 0 || z < 0) {
+              return;
+            }
+
+            volume_densities_aa[z][y][x] += weight * point_weight;
+
+            for (int64_t feature_idx = 0; feature_idx < n_features;
+                 ++feature_idx) {
+              volume_features_aa[feature_idx][z][y][x] +=
+                  point_features[feature_idx] * weight * point_weight;
+            }
+          };
+
+      if (!splat) {
+        // Increment the location nearest the point.
+        long x = std::lround(
+            (point[0] + 1) * 0.5 * (grid_size_x - scale_offset) - offset);
+        long y = std::lround(
+            (point[1] + 1) * 0.5 * (grid_size_y - scale_offset) - offset);
+        long z = std::lround(
+            (point[2] + 1) * 0.5 * (grid_size_z - scale_offset) - offset);
+        increment_location(x, y, z, 1);
+      } else {
+        // There are 8 locations around the point which we need to worry about.
+        // Their coordinates are (x or x+1, y or y+1, z or z+1).
+        // rx is a number between 0 and 1 for the proportion in the x direction:
+        // rx==0 means weight all on the lower bound, x, rx=1-eps means most
+        // weight on x+1. Ditto for ry and yz.
+        float x = 0, y = 0, z = 0;
+        float rx = std::modf(
+            (point[0] + 1) * 0.5 * (grid_size_x - scale_offset) - offset, &x);
+        float ry = std::modf(
+            (point[1] + 1) * 0.5 * (grid_size_y - scale_offset) - offset, &y);
+        float rz = std::modf(
+            (point[2] + 1) * 0.5 * (grid_size_z - scale_offset) - offset, &z);
+        // Define how to fractionally increment one of the 8 locations around
+        // the point.
+        auto handle_point = [&](bool up_x, bool up_y, bool up_z) {
+          float weight = (up_x ? rx : 1 - rx) * (up_y ? ry : 1 - ry) *
+              (up_z ? rz : 1 - rz);
+          increment_location(x + up_x, y + up_y, z + up_z, weight);
+        };
+        // and do so.
+        EightDirections(handle_point);
+      }
+    }
+  }
+  torch::autograd::increment_version(volume_features);
+  torch::autograd::increment_version(volume_densities);
+}
+
+// With nearest, the only smooth dependence is that volume features
+// depend on points features.
+//
+// With splat, the dependencies are as follows, with gradients passing
+// in the opposite direction.
+//
+//    points_3d         points_features
+//         │  │                  │
+//         │  │                  │
+//         │  └───────────┐      │
+//         │              │      │
+//         │              │      │
+//         ▼              ▼      ▼
+// volume_densities    volume_features
+
+// It is also the case that the input volume_densities and
+// volume_features affect the corresponding outputs (they are
+// modified in place).
+// But the forward pass just increments these by a value which
+// does not depend on them. So our autograd backwards pass needs
+// to copy the gradient for each of those outputs to the
+// corresponding input. We just do that in the Python layer.
+
+void PointsToVolumesBackwardCpu(
+    const torch::Tensor& points_3d,
+    const torch::Tensor& points_features,
+    const torch::Tensor& grid_sizes,
+    const torch::Tensor& mask,
+    const float point_weight,
+    const bool align_corners,
+    const bool splat,
+    const torch::Tensor& grad_volume_densities,
+    const torch::Tensor& grad_volume_features,
+    const torch::Tensor& grad_points_3d,
+    const torch::Tensor& grad_points_features) {
+  const int64_t batch_size = points_3d.size(0);
+  const int64_t P = points_3d.size(1);
+  const int64_t n_features = grad_points_features.size(2);
+  const int scale_offset = align_corners ? 1 : 0;
+  const float offset = align_corners ? 0 : 0.5;
+
+  auto points_3d_a = points_3d.accessor<float, 3>();
+  auto points_features_a = points_features.accessor<float, 3>();
+  auto grid_sizes_a = grid_sizes.accessor<int64_t, 2>();
+  auto mask_a = mask.accessor<float, 2>();
+  auto grad_volume_densities_a = grad_volume_densities.accessor<float, 5>();
+  auto grad_volume_features_a = grad_volume_features.accessor<float, 5>();
+  auto grad_points_3d_a = grad_points_3d.accessor<float, 3>();
+  auto grad_points_features_a = grad_points_features.accessor<float, 3>();
+
+  // For each batch element
+  for (int64_t batch_idx = 0; batch_idx < batch_size; ++batch_idx) {
+    auto points_3d_aa = points_3d_a[batch_idx];
+    auto points_features_aa = points_features_a[batch_idx];
+    auto grid_sizes_aa = grid_sizes_a[batch_idx];
+    auto mask_aa = mask_a[batch_idx];
+    auto grad_volume_densities_aa = grad_volume_densities_a[batch_idx][0];
+    auto grad_volume_features_aa = grad_volume_features_a[batch_idx];
+    auto grad_points_3d_aa = grad_points_3d_a[batch_idx];
+    auto grad_points_features_aa = grad_points_features_a[batch_idx];
+
+    const int64_t grid_size_x = grid_sizes_aa[2];
+    const int64_t grid_size_y = grid_sizes_aa[1];
+    const int64_t grid_size_z = grid_sizes_aa[0];
+
+    // For each point
+    for (int64_t point_idx = 0; point_idx < P; ++point_idx) {
+      if (mask_aa[point_idx] == 0) {
+        continue;
+      }
+      auto point = points_3d_aa[point_idx];
+      auto point_features = points_features_aa[point_idx];
+      auto grad_point_features = grad_points_features_aa[point_idx];
+      auto grad_point = grad_points_3d_aa[point_idx];
+
+      // Define how to (backwards) increment a location in the point cloud,
+      // to take gradients to the features.
+      // We return false if the location does not really exist, so there was
+      // nothing to do.
+      // This happens once per point for nearest, eight times for splat.
+      auto increment_location =
+          [&](int64_t x, int64_t y, int64_t z, float weight) {
+            if (x >= grid_size_x || y >= grid_size_y || z >= grid_size_z) {
+              return false;
+            }
+            if (x < 0 || y < 0 || z < 0) {
+              return false;
+            }
+
+            for (int64_t feature_idx = 0; feature_idx < n_features;
+                 ++feature_idx) {
+              // This is a forward line, for comparison
+              // volume_features_aa[feature_idx][z][y][x] +=
+              //    point_features[feature_idx] * weight * point_weight;
+              grad_point_features[feature_idx] +=
+                  grad_volume_features_aa[feature_idx][z][y][x] * weight *
+                  point_weight;
+            }
+            return true;
+          };
+
+      if (!splat) {
+        long x = std::lround(
+            (point[0] + 1) * 0.5 * (grid_size_x - scale_offset) - offset);
+        long y = std::lround(
+            (point[1] + 1) * 0.5 * (grid_size_y - scale_offset) - offset);
+        long z = std::lround(
+            (point[2] + 1) * 0.5 * (grid_size_z - scale_offset) - offset);
+        increment_location(x, y, z, 1);
+      } else {
+        float x = 0, y = 0, z = 0;
+        float rx = std::modf(
+            (point[0] + 1) * 0.5 * (grid_size_x - scale_offset) - offset, &x);
+        float ry = std::modf(
+            (point[1] + 1) * 0.5 * (grid_size_y - scale_offset) - offset, &y);
+        float rz = std::modf(
+            (point[2] + 1) * 0.5 * (grid_size_z - scale_offset) - offset, &z);
+        auto handle_point = [&](bool up_x, bool up_y, bool up_z) {
+          float weight_x = (up_x ? rx : 1 - rx);
+          float weight_y = (up_y ? ry : 1 - ry);
+          float weight_z = (up_z ? rz : 1 - rz);
+          float weight = weight_x * weight_y * weight_z;
+          // For each of the eight locations, we first increment the feature
+          // gradient.
+          if (increment_location(x + up_x, y + up_y, z + up_z, weight)) {
+            // If the location is a real location, we also (in this splat
+            // case) need to update the gradient w.r.t. the point position.
+            // - the amount in this location is controlled by the weight.
+            // There are two contributions:
+            //  (1) The point position affects how much density we added
+            //      to the location's density, so we have a contribution
+            //      from grad_volume_density. Specifically,
+            //      weight * point_weight has been added to
+            //      volume_densities_aa[z+up_z][y+up_y][x+up_x]
+            //
+            //  (2) The point position affects how much of each of the
+            //      point's features were added to the corresponding feature
+            //      of this location, so we have a contribution from
+            //      grad_volume_features. Specifically, for each feature_idx,
+            //      point_features[feature_idx] * weight * point_weight
+            //      has been added to
+            //      volume_features_aa[feature_idx][z+up_z][y+up_y][x+up_x]
+
+            float source_gradient =
+                grad_volume_densities_aa[z + up_z][y + up_y][x + up_x];
+            for (int64_t feature_idx = 0; feature_idx < n_features;
+                 ++feature_idx) {
+              source_gradient += point_features[feature_idx] *
+                  grad_volume_features_aa[feature_idx][z + up_z][y + up_y]
+                                         [x + up_x];
+            }
+            grad_point[0] += source_gradient * (up_x ? 1 : -1) * weight_y *
+                weight_z * 0.5 * (grid_size_x - scale_offset) * point_weight;
+            grad_point[1] += source_gradient * (up_y ? 1 : -1) * weight_x *
+                weight_z * 0.5 * (grid_size_y - scale_offset) * point_weight;
+            grad_point[2] += source_gradient * (up_z ? 1 : -1) * weight_x *
+                weight_y * 0.5 * (grid_size_z - scale_offset) * point_weight;
+          }
+        };
+        EightDirections(handle_point);
+      }
+    }
+  }
+}
diff --git a/pytorch3d/pytorch3d/csrc/pulsar/constants.h b/pytorch3d/pytorch3d/csrc/pulsar/constants.h
new file mode 100644
index 0000000000000000000000000000000000000000..a2eee6217158d3a2e7a3e92a52e5afa4107494ab
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/pulsar/constants.h
@@ -0,0 +1,19 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#ifndef PULSAR_NATIVE_CONSTANTS_H_
+#define PULSAR_NATIVE_CONSTANTS_H_
+
+#define EPS 1E-6
+#define FEPS 1E-6f
+#define MAX_FLOAT 3.4E38f
+#define MAX_INT 2147483647
+#define MAX_UINT 4294967295u
+#define MAX_USHORT 65535u
+
+#endif
diff --git a/pytorch3d/pytorch3d/csrc/pulsar/cuda/README.md b/pytorch3d/pytorch3d/csrc/pulsar/cuda/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..60c5d07cba3b8d403693e9aa3db2a0b74f66c472
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/pulsar/cuda/README.md
@@ -0,0 +1,5 @@
+# CUDA device compilation units
+
+This folder contains `.cu` files to create compilation units
+for device-specific functions. See `../include/README.md` for
+more information.
diff --git a/pytorch3d/pytorch3d/csrc/pulsar/cuda/commands.h b/pytorch3d/pytorch3d/csrc/pulsar/cuda/commands.h
new file mode 100644
index 0000000000000000000000000000000000000000..00e6f37852169c6dd3ccaaf02d0381039fe2edbc
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/pulsar/cuda/commands.h
@@ -0,0 +1,505 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#ifndef PULSAR_NATIVE_CUDA_COMMANDS_H_
+#define PULSAR_NATIVE_CUDA_COMMANDS_H_
+
+// Definitions for GPU commands.
+#include <cooperative_groups.h>
+#include <cub/cub.cuh>
+namespace cg = cooperative_groups;
+
+#ifdef __DRIVER_TYPES_H__
+#ifndef DEVICE_RESET
+#define DEVICE_RESET cudaDeviceReset();
+#endif
+#else
+#ifndef DEVICE_RESET
+#define DEVICE_RESET
+#endif
+#endif
+
+#define HANDLECUDA(CMD) CMD
+// handleCudaError((CMD), __FILE__, __LINE__)
+inline void
+handleCudaError(const cudaError_t err, const char* file, const int line) {
+  if (err != cudaSuccess) {
+#ifndef __NVCC__
+    fprintf(
+        stderr,
+        "%s(%i) : getLastCudaError() CUDA error :"
+        " (%d) %s.\n",
+        file,
+        line,
+        static_cast<int>(err),
+        cudaGetErrorString(err));
+    DEVICE_RESET
+    exit(1);
+#endif
+  }
+}
+inline void
+getLastCudaError(const char* errorMessage, const char* file, const int line) {
+  cudaError_t err = cudaGetLastError();
+  if (cudaSuccess != err) {
+    fprintf(stderr, "Error: %s.", errorMessage);
+    handleCudaError(err, file, line);
+  }
+}
+
+#define ALIGN(VAL) __align__(VAL)
+#define SYNC() HANDLECUDE(cudaDeviceSynchronize())
+#define THREADFENCE_B() __threadfence_block()
+#define SHFL_SYNC(a, b, c) __shfl_sync((a), (b), (c))
+#define SHARED __shared__
+#define ACTIVEMASK() __activemask()
+#define BALLOT(mask, val) __ballot_sync((mask), val)
+/**
+ * Find the cumulative sum within a warp up to the current
+ * thread lane, with each mask thread contributing base.
+ */
+template <typename T>
+DEVICE T
+WARP_CUMSUM(const cg::coalesced_group& group, const uint& mask, const T& base) {
+  T ret = base;
+  T shfl_val;
+  shfl_val = __shfl_down_sync(mask, ret, 1u); // Deactivate the rightmost lane.
+  ret += (group.thread_rank() < 31) * shfl_val;
+  shfl_val = __shfl_down_sync(mask, ret, 2u);
+  ret += (group.thread_rank() < 30) * shfl_val;
+  shfl_val = __shfl_down_sync(mask, ret, 4u); // ...4
+  ret += (group.thread_rank() < 28) * shfl_val;
+  shfl_val = __shfl_down_sync(mask, ret, 8u); // ...8
+  ret += (group.thread_rank() < 24) * shfl_val;
+  shfl_val = __shfl_down_sync(mask, ret, 16u); // ...16
+  ret += (group.thread_rank() < 16) * shfl_val;
+  return ret;
+}
+
+template <typename T>
+DEVICE T
+WARP_MAX(const cg::coalesced_group& group, const uint& mask, const T& base) {
+  T ret = base;
+  ret = max(ret, __shfl_down_sync(mask, ret, 16u));
+  ret = max(ret, __shfl_down_sync(mask, ret, 8u));
+  ret = max(ret, __shfl_down_sync(mask, ret, 4u));
+  ret = max(ret, __shfl_down_sync(mask, ret, 2u));
+  ret = max(ret, __shfl_down_sync(mask, ret, 1u));
+  return ret;
+}
+
+template <typename T>
+DEVICE T
+WARP_SUM(const cg::coalesced_group& group, const uint& mask, const T& base) {
+  T ret = base;
+  ret = ret + __shfl_down_sync(mask, ret, 16u);
+  ret = ret + __shfl_down_sync(mask, ret, 8u);
+  ret = ret + __shfl_down_sync(mask, ret, 4u);
+  ret = ret + __shfl_down_sync(mask, ret, 2u);
+  ret = ret + __shfl_down_sync(mask, ret, 1u);
+  return ret;
+}
+
+INLINE DEVICE float3 WARP_SUM_FLOAT3(
+    const cg::coalesced_group& group,
+    const uint& mask,
+    const float3& base) {
+  float3 ret = base;
+  ret.x = WARP_SUM(group, mask, base.x);
+  ret.y = WARP_SUM(group, mask, base.y);
+  ret.z = WARP_SUM(group, mask, base.z);
+  return ret;
+}
+
+// Floating point.
+// #define FMUL(a, b) __fmul_rn((a), (b))
+#define FMUL(a, b) ((a) * (b))
+#define FDIV(a, b) __fdiv_rn((a), (b))
+// #define FSUB(a, b) __fsub_rn((a), (b))
+#define FSUB(a, b) ((a) - (b))
+#define FADD(a, b) __fadd_rn((a), (b))
+#define FSQRT(a) __fsqrt_rn(a)
+#define FEXP(a) fasterexp(a)
+#define FLN(a) fasterlog(a)
+#define FPOW(a, b) __powf((a), (b))
+#define FMAX(a, b) fmax((a), (b))
+#define FMIN(a, b) fmin((a), (b))
+#define FCEIL(a) ceilf(a)
+#define FFLOOR(a) floorf(a)
+#define FROUND(x) nearbyintf(x)
+#define FSATURATE(x) __saturatef(x)
+#define FABS(a) abs(a)
+#define IASF(a, loc) (loc) = __int_as_float(a)
+#define FASI(a, loc) (loc) = __float_as_int(a)
+#define FABSLEQAS(a, b, c) \
+  ((a) <= (b) ? FSUB((b), (a)) <= (c) : FSUB((a), (b)) < (c))
+/** Calculates x*y+z. */
+#define FMA(x, y, z) __fmaf_rn((x), (y), (z))
+#define I2F(a) __int2float_rn(a)
+#define FRCP(x) __frcp_rn(x)
+__device__ static float atomicMax(float* address, float val) {
+  int* address_as_i = (int*)address;
+  int old = *address_as_i, assumed;
+  do {
+    assumed = old;
+    old = ::atomicCAS(
+        address_as_i,
+        assumed,
+        __float_as_int(::fmaxf(val, __int_as_float(assumed))));
+  } while (assumed != old);
+  return __int_as_float(old);
+}
+__device__ static float atomicMin(float* address, float val) {
+  int* address_as_i = (int*)address;
+  int old = *address_as_i, assumed;
+  do {
+    assumed = old;
+    old = ::atomicCAS(
+        address_as_i,
+        assumed,
+        __float_as_int(::fminf(val, __int_as_float(assumed))));
+  } while (assumed != old);
+  return __int_as_float(old);
+}
+#define DMAX(a, b) FMAX(a, b)
+#define DMIN(a, b) FMIN(a, b)
+#define DSQRT(a) sqrt(a)
+#define DSATURATE(a) DMIN(1., DMAX(0., (a)))
+// half
+#define HADD(a, b) __hadd((a), (b))
+#define HSUB2(a, b) __hsub2((a), (b))
+#define HMUL2(a, b) __hmul2((a), (b))
+#define HSQRT(a) hsqrt(a)
+
+// uint.
+#define CLZ(VAL) __clz(VAL)
+#define POPC(a) __popc(a)
+//
+//
+//
+//
+//
+//
+//
+//
+//
+#define ATOMICADD(PTR, VAL) atomicAdd((PTR), (VAL))
+#define ATOMICADD_F3(PTR, VAL)   \
+  ATOMICADD(&((PTR)->x), VAL.x); \
+  ATOMICADD(&((PTR)->y), VAL.y); \
+  ATOMICADD(&((PTR)->z), VAL.z);
+#if (CUDART_VERSION >= 10000) && (__CUDA_ARCH__ >= 600)
+#define ATOMICADD_B(PTR, VAL) atomicAdd_block((PTR), (VAL))
+#else
+#define ATOMICADD_B(PTR, VAL) ATOMICADD(PTR, VAL)
+#endif
+//
+//
+//
+//
+// int.
+#define IMIN(a, b) min((a), (b))
+#define IMAX(a, b) max((a), (b))
+#define IABS(a) abs(a)
+
+// Checks.
+// like TORCH_CHECK_ARG in PyTorch > 1.10
+#define ARGCHECK(cond, argN, ...) \
+  TORCH_CHECK(cond, "invalid argument ", argN, ": ", __VA_ARGS__)
+
+// Math.
+#define NORM3DF(x, y, z) norm3df(x, y, z)
+#define RNORM3DF(x, y, z) rnorm3df(x, y, z)
+
+// High level.
+#define GET_SORT_WS_SIZE(RES_PTR, KEY_TYPE, VAL_TYPE, NUM_OBJECTS) \
+  cub::DeviceRadixSort::SortPairsDescending(                       \
+      (void*)NULL,                                                 \
+      *(RES_PTR),                                                  \
+      reinterpret_cast<KEY_TYPE*>(NULL),                           \
+      reinterpret_cast<KEY_TYPE*>(NULL),                           \
+      reinterpret_cast<VAL_TYPE*>(NULL),                           \
+      reinterpret_cast<VAL_TYPE*>(NULL),                           \
+      (NUM_OBJECTS));
+#define GET_REDUCE_WS_SIZE(RES_PTR, TYPE, REDUCE_OP, NUM_OBJECTS) \
+  {                                                               \
+    TYPE init = TYPE();                                           \
+    cub::DeviceReduce::Reduce(                                    \
+        (void*)NULL,                                              \
+        *(RES_PTR),                                               \
+        (TYPE*)NULL,                                              \
+        (TYPE*)NULL,                                              \
+        (NUM_OBJECTS),                                            \
+        (REDUCE_OP),                                              \
+        init);                                                    \
+  }
+#define GET_SELECT_WS_SIZE(                              \
+    RES_PTR, TYPE_SELECTOR, TYPE_SELECTION, NUM_OBJECTS) \
+  {                                                      \
+    cub::DeviceSelect::Flagged(                          \
+        (void*)NULL,                                     \
+        *(RES_PTR),                                      \
+        (TYPE_SELECTION*)NULL,                           \
+        (TYPE_SELECTOR*)NULL,                            \
+        (TYPE_SELECTION*)NULL,                           \
+        (int*)NULL,                                      \
+        (NUM_OBJECTS));                                  \
+  }
+#define GET_SUM_WS_SIZE(RES_PTR, TYPE_SUM, NUM_OBJECTS) \
+  {                                                     \
+    cub::DeviceReduce::Sum(                             \
+        (void*)NULL,                                    \
+        *(RES_PTR),                                     \
+        (TYPE_SUM*)NULL,                                \
+        (TYPE_SUM*)NULL,                                \
+        NUM_OBJECTS);                                   \
+  }
+#define GET_MM_WS_SIZE(RES_PTR, TYPE, NUM_OBJECTS)                         \
+  {                                                                        \
+    TYPE init = TYPE();                                                    \
+    cub::DeviceReduce::Max(                                                \
+        (void*)NULL, *(RES_PTR), (TYPE*)NULL, (TYPE*)NULL, (NUM_OBJECTS)); \
+  }
+#define SORT_DESCENDING(                                               \
+    TMPN1, SORT_PTR, SORTED_PTR, VAL_PTR, VAL_SORTED_PTR, NUM_OBJECTS) \
+  void* TMPN1 = NULL;                                                  \
+  size_t TMPN1##_bytes = 0;                                            \
+  cub::DeviceRadixSort::SortPairsDescending(                           \
+      TMPN1,                                                           \
+      TMPN1##_bytes,                                                   \
+      (SORT_PTR),                                                      \
+      (SORTED_PTR),                                                    \
+      (VAL_PTR),                                                       \
+      (VAL_SORTED_PTR),                                                \
+      (NUM_OBJECTS));                                                  \
+  HANDLECUDA(cudaMalloc(&TMPN1, TMPN1##_bytes));                       \
+  cub::DeviceRadixSort::SortPairsDescending(                           \
+      TMPN1,                                                           \
+      TMPN1##_bytes,                                                   \
+      (SORT_PTR),                                                      \
+      (SORTED_PTR),                                                    \
+      (VAL_PTR),                                                       \
+      (VAL_SORTED_PTR),                                                \
+      (NUM_OBJECTS));                                                  \
+  HANDLECUDA(cudaFree(TMPN1));
+#define SORT_DESCENDING_WS(                  \
+    TMPN1,                                   \
+    SORT_PTR,                                \
+    SORTED_PTR,                              \
+    VAL_PTR,                                 \
+    VAL_SORTED_PTR,                          \
+    NUM_OBJECTS,                             \
+    WORKSPACE_PTR,                           \
+    WORKSPACE_BYTES)                         \
+  cub::DeviceRadixSort::SortPairsDescending( \
+      (WORKSPACE_PTR),                       \
+      (WORKSPACE_BYTES),                     \
+      (SORT_PTR),                            \
+      (SORTED_PTR),                          \
+      (VAL_PTR),                             \
+      (VAL_SORTED_PTR),                      \
+      (NUM_OBJECTS));
+#define SORT_ASCENDING_WS(         \
+    SORT_PTR,                      \
+    SORTED_PTR,                    \
+    VAL_PTR,                       \
+    VAL_SORTED_PTR,                \
+    NUM_OBJECTS,                   \
+    WORKSPACE_PTR,                 \
+    WORKSPACE_BYTES,               \
+    STREAM)                        \
+  cub::DeviceRadixSort::SortPairs( \
+      (WORKSPACE_PTR),             \
+      (WORKSPACE_BYTES),           \
+      (SORT_PTR),                  \
+      (SORTED_PTR),                \
+      (VAL_PTR),                   \
+      (VAL_SORTED_PTR),            \
+      (NUM_OBJECTS),               \
+      0,                           \
+      sizeof(*(SORT_PTR)) * 8,     \
+      (STREAM));
+#define SUM_WS(                                                            \
+    SUM_PTR, OUT_PTR, NUM_OBJECTS, WORKSPACE_PTR, WORKSPACE_BYTES, STREAM) \
+  cub::DeviceReduce::Sum(                                                  \
+      (WORKSPACE_PTR),                                                     \
+      (WORKSPACE_BYTES),                                                   \
+      (SUM_PTR),                                                           \
+      (OUT_PTR),                                                           \
+      (NUM_OBJECTS),                                                       \
+      (STREAM));
+#define MIN_WS(                                                            \
+    MIN_PTR, OUT_PTR, NUM_OBJECTS, WORKSPACE_PTR, WORKSPACE_BYTES, STREAM) \
+  cub::DeviceReduce::Min(                                                  \
+      (WORKSPACE_PTR),                                                     \
+      (WORKSPACE_BYTES),                                                   \
+      (MIN_PTR),                                                           \
+      (OUT_PTR),                                                           \
+      (NUM_OBJECTS),                                                       \
+      (STREAM));
+#define MAX_WS(                                                            \
+    MAX_PTR, OUT_PTR, NUM_OBJECTS, WORKSPACE_PTR, WORKSPACE_BYTES, STREAM) \
+  cub::DeviceReduce::Min(                                                  \
+      (WORKSPACE_PTR),                                                     \
+      (WORKSPACE_BYTES),                                                   \
+      (MAX_PTR),                                                           \
+      (OUT_PTR),                                                           \
+      (NUM_OBJECTS),                                                       \
+      (STREAM));
+//
+//
+//
+// TODO: rewrite using nested contexts instead of temporary names.
+#define REDUCE(REDUCE_PTR, RESULT_PTR, NUM_ITEMS, REDUCE_OP, REDUCE_INIT) \
+  cub::DeviceReduce::Reduce(                                              \
+      TMPN1,                                                              \
+      TMPN1##_bytes,                                                      \
+      (REDUCE_PTR),                                                       \
+      (RESULT_PTR),                                                       \
+      (NUM_ITEMS),                                                        \
+      (REDUCE_OP),                                                        \
+      (REDUCE_INIT));                                                     \
+  HANDLECUDA(cudaMalloc(&TMPN1, TMPN1##_bytes));                          \
+  cub::DeviceReduce::Reduce(                                              \
+      TMPN1,                                                              \
+      TMPN1##_bytes,                                                      \
+      (REDUCE_PTR),                                                       \
+      (RESULT_PTR),                                                       \
+      (NUM_ITEMS),                                                        \
+      (REDUCE_OP),                                                        \
+      (REDUCE_INIT));                                                     \
+  HANDLECUDA(cudaFree(TMPN1));
+#define REDUCE_WS(           \
+    REDUCE_PTR,              \
+    RESULT_PTR,              \
+    NUM_ITEMS,               \
+    REDUCE_OP,               \
+    REDUCE_INIT,             \
+    WORKSPACE_PTR,           \
+    WORSPACE_BYTES,          \
+    STREAM)                  \
+  cub::DeviceReduce::Reduce( \
+      (WORKSPACE_PTR),       \
+      (WORSPACE_BYTES),      \
+      (REDUCE_PTR),          \
+      (RESULT_PTR),          \
+      (NUM_ITEMS),           \
+      (REDUCE_OP),           \
+      (REDUCE_INIT),         \
+      (STREAM));
+#define SELECT_FLAGS_WS(      \
+    FLAGS_PTR,                \
+    ITEM_PTR,                 \
+    OUT_PTR,                  \
+    NUM_SELECTED_PTR,         \
+    NUM_ITEMS,                \
+    WORKSPACE_PTR,            \
+    WORSPACE_BYTES,           \
+    STREAM)                   \
+  cub::DeviceSelect::Flagged( \
+      (WORKSPACE_PTR),        \
+      (WORSPACE_BYTES),       \
+      (ITEM_PTR),             \
+      (FLAGS_PTR),            \
+      (OUT_PTR),              \
+      (NUM_SELECTED_PTR),     \
+      (NUM_ITEMS),            \
+      stream = (STREAM));
+
+#define COPY_HOST_DEV(PTR_D, PTR_H, TYPE, SIZE) \
+  HANDLECUDA(cudaMemcpy(                        \
+      (PTR_D), (PTR_H), sizeof(TYPE) * (SIZE), cudaMemcpyHostToDevice))
+#define COPY_DEV_HOST(PTR_H, PTR_D, TYPE, SIZE) \
+  HANDLECUDA(cudaMemcpy(                        \
+      (PTR_H), (PTR_D), sizeof(TYPE) * (SIZE), cudaMemcpyDeviceToHost))
+#define COPY_DEV_DEV(PTR_T, PTR_S, TYPE, SIZE) \
+  HANDLECUDA(cudaMemcpy(                       \
+      (PTR_T), (PTR_S), sizeof(TYPE) * (SIZE), cudaMemcpyDeviceToDevice))
+//
+// We *must* use cudaMallocManaged for pointers on device that should
+// interact with pytorch. However, this comes at a significant speed penalty.
+// We're using plain CUDA pointers for the rendering operations and
+// explicitly copy results to managed pointers wrapped for pytorch (see
+// pytorch/util.h).
+#define MALLOC(VAR, TYPE, SIZE) cudaMalloc(&(VAR), sizeof(TYPE) * (SIZE))
+#define FREE(PTR) HANDLECUDA(cudaFree(PTR))
+#define MEMSET(VAR, VAL, TYPE, SIZE, STREAM) \
+  HANDLECUDA(cudaMemsetAsync((VAR), (VAL), sizeof(TYPE) * (SIZE), (STREAM)))
+
+#define LAUNCH_MAX_PARALLEL_1D(FUNC, N, STREAM, ...)                \
+  {                                                                 \
+    int64_t max_threads =                                           \
+        at::cuda::getCurrentDeviceProperties()->maxThreadsPerBlock; \
+    uint num_threads = min((N), max_threads);                       \
+    uint num_blocks = iDivCeil((N), num_threads);                   \
+    FUNC<<<num_blocks, num_threads, 0, (STREAM)>>>(__VA_ARGS__);    \
+  }
+#define LAUNCH_PARALLEL_1D(FUNC, N, TN, STREAM, ...)                   \
+  {                                                                    \
+    uint num_threads = min(static_cast<int>(N), static_cast<int>(TN)); \
+    uint num_blocks = iDivCeil((N), num_threads);                      \
+    FUNC<<<num_blocks, num_threads, 0, (STREAM)>>>(__VA_ARGS__);       \
+  }
+#define LAUNCH_MAX_PARALLEL_2D(FUNC, NX, NY, STREAM, ...)               \
+  {                                                                     \
+    int64_t max_threads =                                               \
+        at::cuda::getCurrentDeviceProperties()->maxThreadsPerBlock;     \
+    int64_t max_threads_sqrt = static_cast<int64_t>(sqrt(max_threads)); \
+    dim3 num_threads, num_blocks;                                       \
+    num_threads.x = min((NX), max_threads_sqrt);                        \
+    num_blocks.x = iDivCeil((NX), num_threads.x);                       \
+    num_threads.y = min((NY), max_threads_sqrt);                        \
+    num_blocks.y = iDivCeil((NY), num_threads.y);                       \
+    num_threads.z = 1;                                                  \
+    num_blocks.z = 1;                                                   \
+    FUNC<<<num_blocks, num_threads, 0, (STREAM)>>>(__VA_ARGS__);        \
+  }
+#define LAUNCH_PARALLEL_2D(FUNC, NX, NY, TX, TY, STREAM, ...)    \
+  {                                                              \
+    dim3 num_threads, num_blocks;                                \
+    num_threads.x = min((NX), (TX));                             \
+    num_blocks.x = iDivCeil((NX), num_threads.x);                \
+    num_threads.y = min((NY), (TY));                             \
+    num_blocks.y = iDivCeil((NY), num_threads.y);                \
+    num_threads.z = 1;                                           \
+    num_blocks.z = 1;                                            \
+    FUNC<<<num_blocks, num_threads, 0, (STREAM)>>>(__VA_ARGS__); \
+  }
+
+#define GET_PARALLEL_IDX_1D(VARNAME, N)                               \
+  const uint VARNAME = __mul24(blockIdx.x, blockDim.x) + threadIdx.x; \
+  if (VARNAME >= (N)) {                                               \
+    return;                                                           \
+  }
+#define GET_PARALLEL_IDS_2D(VAR_X, VAR_Y, WIDTH, HEIGHT)            \
+  const uint VAR_X = __mul24(blockIdx.x, blockDim.x) + threadIdx.x; \
+  const uint VAR_Y = __mul24(blockIdx.y, blockDim.y) + threadIdx.y; \
+  if (VAR_X >= (WIDTH) || VAR_Y >= (HEIGHT))                        \
+    return;
+#define END_PARALLEL()
+#define END_PARALLEL_NORET()
+#define END_PARALLEL_2D_NORET()
+#define END_PARALLEL_2D()
+#define RETURN_PARALLEL() return
+#define CHECKLAUNCH() C10_CUDA_CHECK(cudaGetLastError());
+#define ISONDEVICE true
+#define SYNCDEVICE() HANDLECUDA(cudaDeviceSynchronize())
+#define START_TIME(TN)                             \
+  cudaEvent_t __time_start_##TN, __time_stop_##TN; \
+  cudaEventCreate(&__time_start_##TN);             \
+  cudaEventCreate(&__time_stop_##TN);              \
+  cudaEventRecord(__time_start_##TN);
+#define STOP_TIME(TN) cudaEventRecord(__time_stop_##TN);
+#define GET_TIME(TN, TOPTR)               \
+  cudaEventSynchronize(__time_stop_##TN); \
+  cudaEventElapsedTime((TOPTR), __time_start_##TN, __time_stop_##TN);
+#define START_TIME_CU(TN) START_TIME(CN)
+#define STOP_TIME_CU(TN) STOP_TIME(TN)
+#define GET_TIME_CU(TN, TOPTR) GET_TIME(TN, TOPTR)
+
+#endif
diff --git a/pytorch3d/pytorch3d/csrc/pulsar/cuda/renderer.backward.gpu.cu b/pytorch3d/pytorch3d/csrc/pulsar/cuda/renderer.backward.gpu.cu
new file mode 100644
index 0000000000000000000000000000000000000000..e0da7b7020c0a3f5ae0647030282adf0e0103d39
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/pulsar/cuda/renderer.backward.gpu.cu
@@ -0,0 +1,9 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include "../include/renderer.backward.instantiate.h"
diff --git a/pytorch3d/pytorch3d/csrc/pulsar/cuda/renderer.backward_dbg.gpu.cu b/pytorch3d/pytorch3d/csrc/pulsar/cuda/renderer.backward_dbg.gpu.cu
new file mode 100644
index 0000000000000000000000000000000000000000..a95bb421d2d9b6bfec1a9286e035b042b0d9842c
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/pulsar/cuda/renderer.backward_dbg.gpu.cu
@@ -0,0 +1,9 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include "../include/renderer.backward_dbg.instantiate.h"
diff --git a/pytorch3d/pytorch3d/csrc/pulsar/cuda/renderer.calc_gradients.gpu.cu b/pytorch3d/pytorch3d/csrc/pulsar/cuda/renderer.calc_gradients.gpu.cu
new file mode 100644
index 0000000000000000000000000000000000000000..ff38b08e0dfe46e65a94039c8dec7da721d0421a
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/pulsar/cuda/renderer.calc_gradients.gpu.cu
@@ -0,0 +1,9 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include "../include/renderer.calc_gradients.instantiate.h"
diff --git a/pytorch3d/pytorch3d/csrc/pulsar/cuda/renderer.calc_signature.gpu.cu b/pytorch3d/pytorch3d/csrc/pulsar/cuda/renderer.calc_signature.gpu.cu
new file mode 100644
index 0000000000000000000000000000000000000000..81c72192eaa877038d9383cfdd0adf9a91e06f97
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/pulsar/cuda/renderer.calc_signature.gpu.cu
@@ -0,0 +1,9 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include "../include/renderer.calc_signature.instantiate.h"
diff --git a/pytorch3d/pytorch3d/csrc/pulsar/cuda/renderer.construct.gpu.cu b/pytorch3d/pytorch3d/csrc/pulsar/cuda/renderer.construct.gpu.cu
new file mode 100644
index 0000000000000000000000000000000000000000..67583511aec2a6bd4dd8670aeb809939a3d2e19c
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/pulsar/cuda/renderer.construct.gpu.cu
@@ -0,0 +1,9 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include "../include/renderer.construct.instantiate.h"
diff --git a/pytorch3d/pytorch3d/csrc/pulsar/cuda/renderer.create_selector.gpu.cu b/pytorch3d/pytorch3d/csrc/pulsar/cuda/renderer.create_selector.gpu.cu
new file mode 100644
index 0000000000000000000000000000000000000000..52e265bcb2ab8ca9e4d08d90d1dc4fef75294520
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/pulsar/cuda/renderer.create_selector.gpu.cu
@@ -0,0 +1,9 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include "../include/renderer.create_selector.instantiate.h"
diff --git a/pytorch3d/pytorch3d/csrc/pulsar/cuda/renderer.destruct.gpu.cu b/pytorch3d/pytorch3d/csrc/pulsar/cuda/renderer.destruct.gpu.cu
new file mode 100644
index 0000000000000000000000000000000000000000..e61be93fa4c4893e6c4800f71cf49ef81c717ff0
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/pulsar/cuda/renderer.destruct.gpu.cu
@@ -0,0 +1,9 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include "../include/renderer.destruct.instantiate.h"
diff --git a/pytorch3d/pytorch3d/csrc/pulsar/cuda/renderer.fill_bg.gpu.cu b/pytorch3d/pytorch3d/csrc/pulsar/cuda/renderer.fill_bg.gpu.cu
new file mode 100644
index 0000000000000000000000000000000000000000..6c7b1a48b675b1dbe69992c81a8cbb8c8861911e
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/pulsar/cuda/renderer.fill_bg.gpu.cu
@@ -0,0 +1,9 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include "../include/renderer.fill_bg.instantiate.h"
diff --git a/pytorch3d/pytorch3d/csrc/pulsar/cuda/renderer.forward.gpu.cu b/pytorch3d/pytorch3d/csrc/pulsar/cuda/renderer.forward.gpu.cu
new file mode 100644
index 0000000000000000000000000000000000000000..bfb42debeeaa7444daec94a88830c39825239170
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/pulsar/cuda/renderer.forward.gpu.cu
@@ -0,0 +1,9 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include "../include/renderer.forward.instantiate.h"
diff --git a/pytorch3d/pytorch3d/csrc/pulsar/cuda/renderer.norm_cam_gradients.gpu.cu b/pytorch3d/pytorch3d/csrc/pulsar/cuda/renderer.norm_cam_gradients.gpu.cu
new file mode 100644
index 0000000000000000000000000000000000000000..93d666324a4973f44ad4becbeecaf34e0c7b96e5
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/pulsar/cuda/renderer.norm_cam_gradients.gpu.cu
@@ -0,0 +1,9 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include "../include/renderer.norm_cam_gradients.instantiate.h"
diff --git a/pytorch3d/pytorch3d/csrc/pulsar/cuda/renderer.norm_sphere_gradients.gpu.cu b/pytorch3d/pytorch3d/csrc/pulsar/cuda/renderer.norm_sphere_gradients.gpu.cu
new file mode 100644
index 0000000000000000000000000000000000000000..65339caea11645e4b7ba99a0af77c21b4ae2f738
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/pulsar/cuda/renderer.norm_sphere_gradients.gpu.cu
@@ -0,0 +1,9 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include "../include/renderer.norm_sphere_gradients.instantiate.h"
diff --git a/pytorch3d/pytorch3d/csrc/pulsar/cuda/renderer.render.gpu.cu b/pytorch3d/pytorch3d/csrc/pulsar/cuda/renderer.render.gpu.cu
new file mode 100644
index 0000000000000000000000000000000000000000..eb46adbafbc1c2a60dfb21fa9ce222828e53e31b
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/pulsar/cuda/renderer.render.gpu.cu
@@ -0,0 +1,9 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include "../include/renderer.render.instantiate.h"
diff --git a/pytorch3d/pytorch3d/csrc/pulsar/global.h b/pytorch3d/pytorch3d/csrc/pulsar/global.h
new file mode 100644
index 0000000000000000000000000000000000000000..3cea957e1f09d32494bc6e644e8929a22534270d
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/pulsar/global.h
@@ -0,0 +1,118 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#ifndef PULSAR_GLOBAL_H
+#define PULSAR_GLOBAL_H
+
+#include "./constants.h"
+#ifndef WIN32
+#include <csignal>
+#endif
+
+#if defined(_WIN64) || defined(_WIN32)
+#define uint unsigned int
+#define ushort unsigned short
+#endif
+
+#include "./logging.h" // <- include before torch/extension.h
+
+#define MAX_GRAD_SPHERES 128
+
+#ifdef __CUDACC__
+#define INLINE __forceinline__
+#define HOST __host__
+#define DEVICE __device__
+#define GLOBAL __global__
+#define RESTRICT __restrict__
+#define DEBUGBREAK()
+#ifdef __NVCC_DIAG_PRAGMA_SUPPORT__
+#pragma nv_diag_suppress 1866
+#pragma nv_diag_suppress 2941
+#pragma nv_diag_suppress 2951
+#pragma nv_diag_suppress 2967
+#else
+#pragma diag_suppress = attribute_not_allowed
+#pragma diag_suppress = 1866
+#pragma diag_suppress = 2941
+#pragma diag_suppress = 2951
+#pragma diag_suppress = 2967
+#endif
+#else // __CUDACC__
+#define INLINE inline
+#define HOST
+#define DEVICE
+#define GLOBAL
+#define RESTRICT
+#define DEBUGBREAK() std::raise(SIGINT)
+// Don't care about pytorch warnings; they shouldn't clutter our warnings.
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Weverything"
+#include <torch/extension.h>
+#pragma clang diagnostic pop
+#ifdef WITH_CUDA
+#include <ATen/cuda/CUDAContext.h>
+#include <vector_functions.h>
+#else
+#ifndef cudaStream_t
+typedef void* cudaStream_t;
+#endif
+struct int2 {
+  int x, y;
+};
+struct ushort2 {
+  unsigned short x, y;
+};
+struct float2 {
+  float x, y;
+};
+struct float3 {
+  float x, y, z;
+};
+inline float3 make_float3(const float& x, const float& y, const float& z) {
+  float3 res;
+  res.x = x;
+  res.y = y;
+  res.z = z;
+  return res;
+}
+#endif
+namespace py = pybind11;
+
+inline bool operator==(const float3& a, const float3& b) {
+  return a.x == b.x && a.y == b.y && a.z == b.z;
+}
+#endif // __CUDACC__
+#define IHD INLINE HOST DEVICE
+
+// An assertion command that can be used on host and device.
+#ifdef PULSAR_ASSERTIONS
+#ifdef __CUDACC__
+#define PASSERT(VAL)                                     \
+  if (!(VAL)) {                                          \
+    printf(                                              \
+        "Pulsar assertion failed in %s, line %d: %s.\n", \
+        __FILE__,                                        \
+        __LINE__,                                        \
+        #VAL);                                           \
+  }
+#else
+#define PASSERT(VAL)                                     \
+  if (!(VAL)) {                                          \
+    printf(                                              \
+        "Pulsar assertion failed in %s, line %d: %s.\n", \
+        __FILE__,                                        \
+        __LINE__,                                        \
+        #VAL);                                           \
+    std::raise(SIGINT);                                  \
+  }
+#endif
+#else
+#define PASSERT(VAL)
+#endif
+
+#endif
diff --git a/pytorch3d/pytorch3d/csrc/pulsar/host/README.md b/pytorch3d/pytorch3d/csrc/pulsar/host/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..34f1bade9134da24f4038425c4b50fe1fffc45dc
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/pulsar/host/README.md
@@ -0,0 +1,5 @@
+# Device-specific host compilation units
+
+This folder contains `.cpp` files to create compilation units
+for device specific functions. See `../include/README.md` for
+more information.
diff --git a/pytorch3d/pytorch3d/csrc/pulsar/host/commands.h b/pytorch3d/pytorch3d/csrc/pulsar/host/commands.h
new file mode 100644
index 0000000000000000000000000000000000000000..4378303bbc310b879bb25329cc35c29e40ef0367
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/pulsar/host/commands.h
@@ -0,0 +1,391 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#ifndef PULSAR_NATIVE_COMMANDS_H_
+#define PULSAR_NATIVE_COMMANDS_H_
+
+#ifdef _MSC_VER
+#include <intrin.h>
+#define __builtin_popcount (int)__popcnt
+#endif
+
+// Definitions for CPU commands.
+// #include <execution>
+// #include <numeric>
+
+namespace cg {
+struct coalesced_group {
+  INLINE uint thread_rank() const {
+    return 0u;
+  }
+  INLINE uint size() const {
+    return 1u;
+  }
+  INLINE uint ballot(uint val) const {
+    return static_cast<uint>(val > 0);
+  }
+};
+
+struct thread_block {
+  INLINE uint thread_rank() const {
+    return 0u;
+  }
+  INLINE uint size() const {
+    return 1u;
+  }
+  INLINE void sync() const {}
+};
+
+INLINE coalesced_group coalesced_threads() {
+  coalesced_group ret;
+  return ret;
+}
+
+INLINE thread_block this_thread_block() {
+  thread_block ret;
+  return ret;
+}
+} // namespace cg
+#define SHFL_SYNC(a, b, c) (b)
+template <typename T>
+T WARP_CUMSUM(
+    const cg::coalesced_group& group,
+    const uint& mask,
+    const T& base) {
+  return base;
+}
+
+template <typename T>
+DEVICE T
+WARP_MAX(const cg::coalesced_group& group, const uint& mask, const T& base) {
+  return base;
+}
+
+template <typename T>
+DEVICE T
+WARP_SUM(const cg::coalesced_group& group, const uint& mask, const T& base) {
+  return base;
+}
+
+INLINE DEVICE float3 WARP_SUM_FLOAT3(
+    const cg::coalesced_group& group,
+    const uint& mask,
+    const float3& base) {
+  return base;
+}
+
+#define ACTIVEMASK() (1u << 31)
+#define ALIGN(VAL)
+#define SYNC()
+#define THREADFENCE_B()
+#define BALLOT(mask, val) (val != 0)
+#define SHARED
+// Floating point.
+#define FMAX(a, b) std::fmax((a), (b))
+#define FMIN(a, b) std::fmin((a), (b))
+INLINE float atomicMax(float* address, float val) {
+  *address = std::max(*address, val);
+  return *address;
+}
+INLINE float atomicMin(float* address, float val) {
+  *address = std::min(*address, val);
+  return *address;
+}
+#define FMUL(a, b) ((a) * (b))
+#define FDIV(a, b) ((a) / (b))
+#define FSUB(a, b) ((a) - (b))
+#define FABSLEQAS(a, b, c) \
+  ((a) <= (b) ? FSUB((b), (a)) <= (c) : FSUB((a), (b)) < (c))
+#define FADD(a, b) ((a) + (b))
+#define FSQRT(a) sqrtf(a)
+#define FEXP(a) fasterexp(a)
+#define FLN(a) fasterlog(a)
+#define FPOW(a, b) powf((a), (b))
+#define FROUND(x) roundf(x)
+#define FCEIL(a) ceilf(a)
+#define FFLOOR(a) floorf(a)
+#define FSATURATE(x) std::max(0.f, std::min(1.f, x))
+#define FABS(a) abs(a)
+#define FMA(x, y, z) ((x) * (y) + (z))
+#define I2F(a) static_cast<float>(a)
+#define FRCP(x) (1.f / (x))
+#define IASF(x, loc) memcpy(&(loc), &(x), sizeof(x))
+#define FASI(x, loc) memcpy(&(loc), &(x), sizeof(x))
+#define DMAX(a, b) std::max((a), (b))
+#define DMIN(a, b) std::min((a), (b))
+#define DSATURATE(a) DMIN(1., DMAX(0., (a)))
+#define DSQRT(a) sqrt(a)
+//
+//
+//
+//
+//
+//
+//
+//
+//
+//
+//
+//
+// uint.
+#define CLZ(VAL) _clz(VAL)
+template <typename T>
+INLINE T ATOMICADD(T* address, T val) {
+  T old = *address;
+  *address += val;
+  return old;
+}
+template <typename T>
+INLINE void ATOMICADD_F3(T* address, T val) {
+  ATOMICADD(&(address->x), val.x);
+  ATOMICADD(&(address->y), val.y);
+  ATOMICADD(&(address->z), val.z);
+}
+#define ATOMICADD_B(a, b) ATOMICADD((a), (b))
+#define POPC(a) __builtin_popcount(a)
+
+// int.
+#define IMIN(a, b) std::min((a), (b))
+#define IMAX(a, b) std::max((a), (b))
+#define IABS(a) abs(a)
+
+// Checks.
+// like TORCH_CHECK_ARG in PyTorch > 1.10
+#define ARGCHECK(cond, argN, ...) \
+  TORCH_CHECK(cond, "invalid argument ", argN, ": ", __VA_ARGS__)
+
+// Math.
+#define NORM3DF(x, y, z) sqrtf(x* x + y * y + z * z)
+#define RNORM3DF(x, y, z) (1.f / sqrtf(x * x + y * y + z * z))
+
+// High level.
+#define PREFETCH(PTR)
+#define GET_SORT_WS_SIZE(RES_PTR, KEY_TYPE, VAL_TYPE, NUM_OBJECTS) \
+  *(RES_PTR) = 0;
+#define GET_REDUCE_WS_SIZE(RES_PTR, TYPE, REDUCE_OP, NUM_OBJECTS) \
+  *(RES_PTR) = 0;
+#define GET_SELECT_WS_SIZE(                              \
+    RES_PTR, TYPE_SELECTOR, TYPE_SELECTION, NUM_OBJECTS) \
+  *(RES_PTR) = 0;
+#define GET_SUM_WS_SIZE(RES_PTR, TYPE_SUM, NUM_OBJECTS) *(RES_PTR) = 0;
+#define GET_MM_WS_SIZE(RES_PTR, TYPE, NUM_OBJECTS) *(RES_PTR) = 0;
+
+#define SORT_DESCENDING(                                                     \
+    TMPN1, SORT_PTR, SORTED_PTR, VAL_PTR, VAL_SORTED_PTR, NUM_OBJECTS)       \
+  std::vector<size_t> TMPN1(NUM_OBJECTS);                                    \
+  std::iota(TMPN1.begin(), TMPN1.end(), 0);                                  \
+  const auto TMPN1##_val_ptr = (SORT_PTR);                                   \
+  std::sort(                                                                 \
+      TMPN1.begin(), TMPN1.end(), [&TMPN1##_val_ptr](size_t i1, size_t i2) { \
+        return TMPN1##_val_ptr[i1] > TMPN1##_val_ptr[i2];                    \
+      });                                                                    \
+  for (int i = 0; i < (NUM_OBJECTS); ++i) {                                  \
+    (SORTED_PTR)[i] = (SORT_PTR)[TMPN1[i]];                                  \
+  }                                                                          \
+  for (int i = 0; i < (NUM_OBJECTS); ++i) {                                  \
+    (VAL_SORTED_PTR)[i] = (VAL_PTR)[TMPN1[i]];                               \
+  }
+
+#define SORT_ASCENDING(                                                 \
+    SORT_PTR, SORTED_PTR, VAL_PTR, VAL_SORTED_PTR, NUM_OBJECTS, STREAM) \
+  {                                                                     \
+    std::vector<size_t> TMPN1(NUM_OBJECTS);                             \
+    std::iota(TMPN1.begin(), TMPN1.end(), 0);                           \
+    const auto TMPN1_val_ptr = (SORT_PTR);                              \
+    std::sort(                                                          \
+        TMPN1.begin(),                                                  \
+        TMPN1.end(),                                                    \
+        [&TMPN1_val_ptr](size_t i1, size_t i2) -> bool {                \
+          return TMPN1_val_ptr[i1] < TMPN1_val_ptr[i2];                 \
+        });                                                             \
+    for (int i = 0; i < (NUM_OBJECTS); ++i) {                           \
+      (SORTED_PTR)[i] = (SORT_PTR)[TMPN1[i]];                           \
+    }                                                                   \
+    for (int i = 0; i < (NUM_OBJECTS); ++i) {                           \
+      (VAL_SORTED_PTR)[i] = (VAL_PTR)[TMPN1[i]];                        \
+    }                                                                   \
+  }
+
+#define SORT_DESCENDING_WS( \
+    TMPN1,                  \
+    SORT_PTR,               \
+    SORTED_PTR,             \
+    VAL_PTR,                \
+    VAL_SORTED_PTR,         \
+    NUM_OBJECTS,            \
+    WORSPACE_PTR,           \
+    WORKSPACE_SIZE)         \
+  SORT_DESCENDING(          \
+      TMPN1, SORT_PTR, SORTED_PTR, VAL_PTR, VAL_SORTED_PTR, NUM_OBJECTS)
+
+#define SORT_ASCENDING_WS( \
+    SORT_PTR,              \
+    SORTED_PTR,            \
+    VAL_PTR,               \
+    VAL_SORTED_PTR,        \
+    NUM_OBJECTS,           \
+    WORSPACE_PTR,          \
+    WORKSPACE_SIZE,        \
+    STREAM)                \
+  SORT_ASCENDING(          \
+      SORT_PTR, SORTED_PTR, VAL_PTR, VAL_SORTED_PTR, NUM_OBJECTS, STREAM)
+
+#define REDUCE(REDUCE_PTR, RESULT_PTR, NUM_ITEMS, REDUCE_OP, REDUCE_INIT) \
+  {                                                                       \
+    *(RESULT_PTR) = (REDUCE_INIT);                                        \
+    for (int i = 0; i < (NUM_ITEMS); ++i) {                               \
+      *(RESULT_PTR) = REDUCE_OP(*(RESULT_PTR), (REDUCE_PTR)[i]);          \
+    }                                                                     \
+  }
+#define REDUCE_WS(  \
+    REDUCE_PTR,     \
+    RESULT_PTR,     \
+    NUM_ITEMS,      \
+    REDUCE_OP,      \
+    REDUCE_INIT,    \
+    WORKSPACE_PTR,  \
+    WORKSPACE_SIZE, \
+    STREAM)         \
+  REDUCE(REDUCE_PTR, RESULT_PTR, NUM_ITEMS, REDUCE_OP, REDUCE_INIT)
+
+#define SELECT_FLAGS_WS(                    \
+    FLAGS_PTR,                              \
+    ITEM_PTR,                               \
+    OUT_PTR,                                \
+    NUM_SELECTED_PTR,                       \
+    NUM_ITEMS,                              \
+    WORKSPACE_PTR,                          \
+    WORSPACE_BYTES,                         \
+    STREAM)                                 \
+  {                                         \
+    *NUM_SELECTED_PTR = 0;                  \
+    ptrdiff_t write_pos = 0;                \
+    for (int i = 0; i < NUM_ITEMS; ++i) {   \
+      if (FLAGS_PTR[i]) {                   \
+        OUT_PTR[write_pos++] = ITEM_PTR[i]; \
+        *NUM_SELECTED_PTR += 1;             \
+      }                                     \
+    }                                       \
+  }
+
+template <typename T>
+void SUM_WS(
+    T* SUM_PTR,
+    T* OUT_PTR,
+    size_t NUM_OBJECTS,
+    char* WORKSPACE_PTR,
+    size_t WORKSPACE_BYTES,
+    cudaStream_t STREAM) {
+  *(OUT_PTR) = T();
+  for (int i = 0; i < (NUM_OBJECTS); ++i) {
+    *(OUT_PTR) = *(OUT_PTR) + (SUM_PTR)[i];
+  }
+}
+
+template <typename T>
+void MIN_WS(
+    T* MIN_PTR,
+    T* OUT_PTR,
+    size_t NUM_OBJECTS,
+    char* WORKSPACE_PTR,
+    size_t WORKSPACE_BYTES,
+    cudaStream_t STREAM) {
+  *(OUT_PTR) = T();
+  for (int i = 0; i < (NUM_OBJECTS); ++i) {
+    *(OUT_PTR) = std::min<T>(*(OUT_PTR), (MIN_PTR)[i]);
+  }
+}
+
+template <typename T>
+void MAX_WS(
+    T* MAX_PTR,
+    T* OUT_PTR,
+    size_t NUM_OBJECTS,
+    char* WORKSPACE_PTR,
+    size_t WORKSPACE_BYTES,
+    cudaStream_t STREAM) {
+  *(OUT_PTR) = T();
+  for (int i = 0; i < (NUM_OBJECTS); ++i) {
+    *(OUT_PTR) = std::max<T>(*(OUT_PTR), (MAX_PTR)[i]);
+  }
+}
+//
+//
+//
+//
+#define COPY_HOST_DEV(PTR_D, PTR_H, TYPE, SIZE) \
+  std::memcpy((PTR_D), (PTR_H), sizeof(TYPE) * (SIZE))
+//
+#define COPY_DEV_HOST(PTR_H, PTR_D, TYPE, SIZE) \
+  std::memcpy((PTR_H), (PTR_D), sizeof(TYPE) * (SIZE))
+//
+#define COPY_DEV_DEV(PTR_T, PTR_S, TYPE, SIZE) \
+  std::memcpy((PTR_T), (PTR_S), sizeof(TYPE) * SIZE)
+//
+
+#define MALLOC(VAR, TYPE, SIZE) MALLOC_HOST(VAR, TYPE, SIZE)
+#define FREE(PTR) FREE_HOST(PTR)
+#define MEMSET(VAR, VAL, TYPE, SIZE, STREAM) \
+  memset((VAR), (VAL), sizeof(TYPE) * (SIZE))
+//
+
+#define LAUNCH_MAX_PARALLEL_1D(FUNC, N, STREAM, ...) FUNC(__VA_ARGS__);
+#define LAUNCH_PARALLEL_1D(FUNC, N, TN, STREAM, ...) FUNC(__VA_ARGS__);
+#define LAUNCH_MAX_PARALLEL_2D(FUNC, NX, NY, STREAM, ...) FUNC(__VA_ARGS__);
+#define LAUNCH_PARALLEL_2D(FUNC, NX, NY, TX, TY, STREAM, ...) FUNC(__VA_ARGS__);
+//
+//
+//
+//
+//
+#define GET_PARALLEL_IDX_1D(VARNAME, N) \
+  for (uint VARNAME = 0; VARNAME < (N); ++VARNAME) {
+#define GET_PARALLEL_IDS_2D(VAR_X, VAR_Y, WIDTH, HEIGHT)          \
+  int2 blockDim;                                                  \
+  blockDim.x = 1;                                                 \
+  blockDim.y = 1;                                                 \
+  uint __parallel_2d_width = WIDTH;                               \
+  uint __parallel_2d_height = HEIGHT;                             \
+  for (uint VAR_Y = 0; VAR_Y < __parallel_2d_height; ++(VAR_Y)) { \
+    for (uint VAR_X = 0; VAR_X < __parallel_2d_width; ++(VAR_X)) {
+//
+//
+//
+#define END_PARALLEL() \
+  end_parallel:;       \
+  }
+#define END_PARALLEL_NORET() }
+#define END_PARALLEL_2D() \
+  end_parallel:;          \
+  }                       \
+  }
+#define END_PARALLEL_2D_NORET() \
+  }                             \
+  }
+#define RETURN_PARALLEL() goto end_parallel;
+#define CHECKLAUNCH()
+#define ISONDEVICE false
+#define SYNCDEVICE()
+#define START_TIME(TN) \
+  auto __time_start_##TN = std::chrono::steady_clock::now();
+#define STOP_TIME(TN) auto __time_stop_##TN = std::chrono::steady_clock::now();
+#define GET_TIME(TN, TOPTR)                                       \
+  *TOPTR = std::chrono::duration_cast<std::chrono::milliseconds>( \
+               __time_stop_##TN - __time_start_##TN)              \
+               .count()
+#define START_TIME_CU(TN)                          \
+  cudaEvent_t __time_start_##TN, __time_stop_##TN; \
+  cudaEventCreate(&__time_start_##TN);             \
+  cudaEventCreate(&__time_stop_##TN);              \
+  cudaEventRecord(__time_start_##TN);
+#define STOP_TIME_CU(TN) cudaEventRecord(__time_stop_##TN);
+#define GET_TIME_CU(TN, TOPTR)            \
+  cudaEventSynchronize(__time_stop_##TN); \
+  cudaEventElapsedTime((TOPTR), __time_start_##TN, __time_stop_##TN);
+
+#endif
diff --git a/pytorch3d/pytorch3d/csrc/pulsar/host/renderer.backward.cpu.cpp b/pytorch3d/pytorch3d/csrc/pulsar/host/renderer.backward.cpu.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..e0da7b7020c0a3f5ae0647030282adf0e0103d39
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/pulsar/host/renderer.backward.cpu.cpp
@@ -0,0 +1,9 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include "../include/renderer.backward.instantiate.h"
diff --git a/pytorch3d/pytorch3d/csrc/pulsar/host/renderer.backward_dbg.cpu.cpp b/pytorch3d/pytorch3d/csrc/pulsar/host/renderer.backward_dbg.cpu.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..a95bb421d2d9b6bfec1a9286e035b042b0d9842c
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/pulsar/host/renderer.backward_dbg.cpu.cpp
@@ -0,0 +1,9 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include "../include/renderer.backward_dbg.instantiate.h"
diff --git a/pytorch3d/pytorch3d/csrc/pulsar/host/renderer.calc_gradients.cpu.cpp b/pytorch3d/pytorch3d/csrc/pulsar/host/renderer.calc_gradients.cpu.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..ff38b08e0dfe46e65a94039c8dec7da721d0421a
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/pulsar/host/renderer.calc_gradients.cpu.cpp
@@ -0,0 +1,9 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include "../include/renderer.calc_gradients.instantiate.h"
diff --git a/pytorch3d/pytorch3d/csrc/pulsar/host/renderer.calc_signature.cpu.cpp b/pytorch3d/pytorch3d/csrc/pulsar/host/renderer.calc_signature.cpu.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..81c72192eaa877038d9383cfdd0adf9a91e06f97
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/pulsar/host/renderer.calc_signature.cpu.cpp
@@ -0,0 +1,9 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include "../include/renderer.calc_signature.instantiate.h"
diff --git a/pytorch3d/pytorch3d/csrc/pulsar/host/renderer.construct.cpu.cpp b/pytorch3d/pytorch3d/csrc/pulsar/host/renderer.construct.cpu.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..67583511aec2a6bd4dd8670aeb809939a3d2e19c
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/pulsar/host/renderer.construct.cpu.cpp
@@ -0,0 +1,9 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include "../include/renderer.construct.instantiate.h"
diff --git a/pytorch3d/pytorch3d/csrc/pulsar/host/renderer.create_selector.cpu.cpp b/pytorch3d/pytorch3d/csrc/pulsar/host/renderer.create_selector.cpu.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..52e265bcb2ab8ca9e4d08d90d1dc4fef75294520
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/pulsar/host/renderer.create_selector.cpu.cpp
@@ -0,0 +1,9 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include "../include/renderer.create_selector.instantiate.h"
diff --git a/pytorch3d/pytorch3d/csrc/pulsar/host/renderer.destruct.cpu.cpp b/pytorch3d/pytorch3d/csrc/pulsar/host/renderer.destruct.cpu.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..e61be93fa4c4893e6c4800f71cf49ef81c717ff0
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/pulsar/host/renderer.destruct.cpu.cpp
@@ -0,0 +1,9 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include "../include/renderer.destruct.instantiate.h"
diff --git a/pytorch3d/pytorch3d/csrc/pulsar/host/renderer.fill_bg.cpu.cpp b/pytorch3d/pytorch3d/csrc/pulsar/host/renderer.fill_bg.cpu.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..6c7b1a48b675b1dbe69992c81a8cbb8c8861911e
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/pulsar/host/renderer.fill_bg.cpu.cpp
@@ -0,0 +1,9 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include "../include/renderer.fill_bg.instantiate.h"
diff --git a/pytorch3d/pytorch3d/csrc/pulsar/host/renderer.forward.cpu.cpp b/pytorch3d/pytorch3d/csrc/pulsar/host/renderer.forward.cpu.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..bfb42debeeaa7444daec94a88830c39825239170
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/pulsar/host/renderer.forward.cpu.cpp
@@ -0,0 +1,9 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include "../include/renderer.forward.instantiate.h"
diff --git a/pytorch3d/pytorch3d/csrc/pulsar/host/renderer.norm_cam_gradients.cpu.cpp b/pytorch3d/pytorch3d/csrc/pulsar/host/renderer.norm_cam_gradients.cpu.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..93d666324a4973f44ad4becbeecaf34e0c7b96e5
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/pulsar/host/renderer.norm_cam_gradients.cpu.cpp
@@ -0,0 +1,9 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include "../include/renderer.norm_cam_gradients.instantiate.h"
diff --git a/pytorch3d/pytorch3d/csrc/pulsar/host/renderer.norm_sphere_gradients.cpu.cpp b/pytorch3d/pytorch3d/csrc/pulsar/host/renderer.norm_sphere_gradients.cpu.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..65339caea11645e4b7ba99a0af77c21b4ae2f738
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/pulsar/host/renderer.norm_sphere_gradients.cpu.cpp
@@ -0,0 +1,9 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include "../include/renderer.norm_sphere_gradients.instantiate.h"
diff --git a/pytorch3d/pytorch3d/csrc/pulsar/host/renderer.render.cpu.cpp b/pytorch3d/pytorch3d/csrc/pulsar/host/renderer.render.cpu.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..eb46adbafbc1c2a60dfb21fa9ce222828e53e31b
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/pulsar/host/renderer.render.cpu.cpp
@@ -0,0 +1,9 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include "../include/renderer.render.instantiate.h"
diff --git a/pytorch3d/pytorch3d/csrc/pulsar/include/README.md b/pytorch3d/pytorch3d/csrc/pulsar/include/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..e963ff043abdbbf88af350512f60fb70a02a4774
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/pulsar/include/README.md
@@ -0,0 +1,16 @@
+# The `include` folder
+
+This folder contains header files with implementations of several useful
+algorithms. These implementations are usually done in files called `x.device.h`
+and use macros that route every device specific command to the right
+implementation (see `commands.h`).
+
+If you're using a device specific implementation, include `x.device.h`.
+This gives you the high-speed, device specific implementation that lets
+you work with all the details of the datastructure. All function calls are
+inlined. If you need to work with the high-level interface and be able to
+dynamically pick a device, only include `x.h`. The functions there are
+templated with a boolean `DEV` flag and are instantiated in device specific
+compilation units. You will not be able to use any other functions, but can
+use `func<true>(params)` to work on a CUDA device, or `func<false>(params)`
+to work on the host.
diff --git a/pytorch3d/pytorch3d/csrc/pulsar/include/camera.device.h b/pytorch3d/pytorch3d/csrc/pulsar/include/camera.device.h
new file mode 100644
index 0000000000000000000000000000000000000000..f003db31ba09e177f0119083fc00cb27fb019c0d
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/pulsar/include/camera.device.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#ifndef PULSAR_NATIVE_INCLUDE_CAMERA_DEVICE_H_
+#define PULSAR_NATIVE_INCLUDE_CAMERA_DEVICE_H_
+
+#include "../global.h"
+#include "./camera.h"
+#include "./commands.h"
+
+namespace pulsar {
+IHD CamGradInfo::CamGradInfo() {
+  cam_pos = make_float3(0.f, 0.f, 0.f);
+  pixel_0_0_center = make_float3(0.f, 0.f, 0.f);
+  pixel_dir_x = make_float3(0.f, 0.f, 0.f);
+  pixel_dir_y = make_float3(0.f, 0.f, 0.f);
+}
+} // namespace pulsar
+
+#endif
diff --git a/pytorch3d/pytorch3d/csrc/pulsar/include/camera.h b/pytorch3d/pytorch3d/csrc/pulsar/include/camera.h
new file mode 100644
index 0000000000000000000000000000000000000000..cbb583a14e7ee4349d11fea9faf8a6f09cb5e66c
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/pulsar/include/camera.h
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#ifndef PULSAR_NATIVE_INCLUDE_CAMERA_H_
+#define PULSAR_NATIVE_INCLUDE_CAMERA_H_
+
+#include <stdint.h>
+#include "../global.h"
+
+namespace pulsar {
+/**
+ * Everything that's needed to raycast with our camera model.
+ */
+struct CamInfo {
+  float3 eye; /** Position in world coordinates. */
+  float3 pixel_0_0_center; /** LUC center of pixel position in world
+                              coordinates. */
+  float3 pixel_dir_x; /** Direction for increasing x for one pixel to the next,
+                       * in  world coordinates. */
+  float3 pixel_dir_y; /** Direction for increasing y for one pixel to the next,
+                       * in  world coordinates. */
+  float3 sensor_dir_z; /** Normalized direction vector from eye through the
+                        * sensor in z direction (optical axis). */
+  float half_pixel_size; /** Half size of a pixel, in world coordinates. This
+                          * must be consistent with pixel_dir_x and pixel_dir_y!
+                          */
+  float focal_length; /** The focal length, if applicable. */
+  uint aperture_width; /** Full image width in px, possibly not fully used
+                        * in case of a shifted principal point. */
+  uint aperture_height; /** Full image height in px, possibly not fully used
+                         * in case of a shifted principal point. */
+  uint film_width; /** Resulting image width. */
+  uint film_height; /** Resulting image height. */
+  /** The top left coordinates (inclusive) of the film in the full aperture. */
+  uint film_border_left, film_border_top;
+  int32_t principal_point_offset_x; /** Horizontal principal point offset. */
+  int32_t principal_point_offset_y; /** Vertical principal point offset. */
+  float min_dist; /** Minimum distance for a ball to be rendered. */
+  float max_dist; /** Maximum distance for a ball to be rendered. */
+  float norm_fac; /** 1 / (max_dist - min_dist), pre-computed. */
+  /** The depth where to place the background, in normalized coordinates where
+   * 0. is the backmost depth and 1. the frontmost. */
+  float background_normalization_depth;
+  /** The number of image content channels to use. Usually three. */
+  uint n_channels;
+  /** Whether to use an orthogonal instead of a perspective projection. */
+  bool orthogonal_projection;
+  /** Whether to use a right-handed system (inverts the z axis). */
+  bool right_handed;
+};
+
+inline bool operator==(const CamInfo& a, const CamInfo& b) {
+  return a.film_width == b.film_width && a.film_height == b.film_height &&
+      a.background_normalization_depth == b.background_normalization_depth &&
+      a.n_channels == b.n_channels &&
+      a.orthogonal_projection == b.orthogonal_projection &&
+      a.right_handed == b.right_handed;
+};
+
+struct CamGradInfo {
+  HOST DEVICE CamGradInfo();
+  float3 cam_pos;
+  float3 pixel_0_0_center;
+  float3 pixel_dir_x;
+  float3 pixel_dir_y;
+};
+
+// TODO: remove once https://github.com/NVlabs/cub/issues/172 is resolved.
+struct IntWrapper {
+  int val;
+};
+
+} // namespace pulsar
+
+#endif
diff --git a/pytorch3d/pytorch3d/csrc/pulsar/include/closest_sphere_tracker.device.h b/pytorch3d/pytorch3d/csrc/pulsar/include/closest_sphere_tracker.device.h
new file mode 100644
index 0000000000000000000000000000000000000000..a533dd0048e7f624af7c14a4017b19fde3accff5
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/pulsar/include/closest_sphere_tracker.device.h
@@ -0,0 +1,138 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#ifndef PULSAR_NATIVE_INCLUDE_CLOSEST_SPHERE_TRACKER_DEVICE_H_
+#define PULSAR_NATIVE_INCLUDE_CLOSEST_SPHERE_TRACKER_DEVICE_H_
+
+#include "../global.h"
+
+namespace pulsar {
+namespace Renderer {
+
+/**
+ * A facility to track the closest spheres to the camera.
+ *
+ * Their max number is defined by MAX_GRAD_SPHERES (this is defined in
+ * `pulsar/native/global.h`). This is done to keep the performance as high as
+ * possible because this struct needs to do updates continuously on the GPU.
+ */
+struct ClosestSphereTracker {
+ public:
+  IHD ClosestSphereTracker(const int& n_track) : n_hits(0), n_track(n_track) {
+    PASSERT(n_track < MAX_GRAD_SPHERES);
+    // Initialize the sphere IDs to -1 and the weights to 0.
+    for (int i = 0; i < n_track; ++i) {
+      this->most_important_sphere_ids[i] = -1;
+      this->closest_sphere_intersection_depths[i] = MAX_FLOAT;
+    }
+  };
+
+  IHD void track(
+      const uint& sphere_idx,
+      const float& intersection_depth,
+      const uint& coord_x,
+      const uint& coord_y) {
+    PULSAR_LOG_DEV_PIX(
+        PULSAR_LOG_TRACKER_PIX,
+        "tracker|tracking sphere %u (depth: %f).\n",
+        sphere_idx,
+        intersection_depth);
+    for (int i = IMIN(this->n_hits, n_track) - 1; i >= -1; --i) {
+      if (i < 0 ||
+          this->closest_sphere_intersection_depths[i] < intersection_depth) {
+        // Write position is i+1.
+        PULSAR_LOG_DEV_PIX(
+            PULSAR_LOG_TRACKER_PIX,
+            "tracker|determined writing position: %d.\n",
+            i + 1);
+        if (i + 1 < n_track) {
+          // Shift every other sphere back.
+          for (int j = n_track - 1; j > i + 1; --j) {
+            this->closest_sphere_intersection_depths[j] =
+                this->closest_sphere_intersection_depths[j - 1];
+            this->most_important_sphere_ids[j] =
+                this->most_important_sphere_ids[j - 1];
+          }
+          this->closest_sphere_intersection_depths[i + 1] = intersection_depth;
+          this->most_important_sphere_ids[i + 1] = sphere_idx;
+        }
+        break;
+      }
+    }
+#if PULSAR_LOG_TRACKER_PIX
+    PULSAR_LOG_DEV_PIX(
+        PULSAR_LOG_TRACKER_PIX,
+        "tracker|sphere list after adding sphere %u:\n",
+        sphere_idx);
+    for (int i = 0; i < n_track; ++i) {
+      PULSAR_LOG_DEV_PIX(
+          PULSAR_LOG_TRACKER_PIX,
+          "tracker|sphere %d: %d (depth: %f).\n",
+          i,
+          this->most_important_sphere_ids[i],
+          this->closest_sphere_intersection_depths[i]);
+    }
+#endif // PULSAR_LOG_TRACKER_PIX
+    this->n_hits += 1;
+  }
+
+  /**
+   * Get the number of hits registered.
+   */
+  IHD int get_n_hits() const {
+    return this->n_hits;
+  }
+
+  /**
+   * Get the idx closest sphere ID.
+   *
+   * For example, get_closest_sphere_id(0) gives the overall closest
+   * sphere id.
+   *
+   * This method is implemented for highly optimized scenarios and will *not*
+   * perform an index check at runtime if assertions are disabled. idx must be
+   * >=0 and < IMIN(n_hits, n_track) for a valid result, if it is >=
+   * n_hits it will return -1.
+   */
+  IHD int get_closest_sphere_id(const int& idx) {
+    PASSERT(idx >= 0 && idx < n_track);
+    return this->most_important_sphere_ids[idx];
+  }
+
+  /**
+   * Get the idx closest sphere normalized_depth.
+   *
+   * For example, get_closest_sphere_depth(0) gives the overall closest
+   * sphere depth (normalized).
+   *
+   * This method is implemented for highly optimized scenarios and will *not*
+   * perform an index check at runtime if assertions are disabled. idx must be
+   * >=0 and < IMIN(n_hits, n_track) for a valid result, if it is >=
+   * n_hits it will return 1. + FEPS.
+   */
+  IHD float get_closest_sphere_depth(const int& idx) {
+    PASSERT(idx >= 0 && idx < n_track);
+    return this->closest_sphere_intersection_depths[idx];
+  }
+
+ private:
+  /** The number of registered hits so far. */
+  int n_hits;
+  /** The number of intersections to track. Must be <MAX_GRAD_SPHERES. */
+  int n_track;
+  /** The sphere ids of the n_track spheres with the highest color
+   * contribution. */
+  int most_important_sphere_ids[MAX_GRAD_SPHERES];
+  /** The normalized depths of the closest n_track spheres. */
+  float closest_sphere_intersection_depths[MAX_GRAD_SPHERES];
+};
+
+} // namespace Renderer
+} // namespace pulsar
+
+#endif
diff --git a/pytorch3d/pytorch3d/csrc/pulsar/include/commands.h b/pytorch3d/pytorch3d/csrc/pulsar/include/commands.h
new file mode 100644
index 0000000000000000000000000000000000000000..c0b17f40a7c2d63e796b322cca3c2cdae739f34e
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/pulsar/include/commands.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#ifndef PULSAR_NATIVE_COMMANDS_ROUTING_H_
+#define PULSAR_NATIVE_COMMANDS_ROUTING_H_
+
+#include "../global.h"
+
+// Commands available everywhere.
+#define MALLOC_HOST(VAR, TYPE, SIZE) \
+  VAR = static_cast<TYPE*>(malloc(sizeof(TYPE) * (SIZE)))
+#define FREE_HOST(PTR) free(PTR)
+
+/* Include command definitions depending on CPU or GPU use. */
+
+#ifdef __CUDACC__
+// TODO: find out which compiler we're using here and use the suppression.
+// #pragma push
+// #pragma diag_suppress = 68
+#include <ATen/cuda/CUDAContext.h>
+// #pragma pop
+#include "../cuda/commands.h"
+#else
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Weverything"
+#pragma clang diagnostic pop
+#include "../host/commands.h"
+#endif
+
+#endif
diff --git a/pytorch3d/pytorch3d/csrc/pulsar/include/fastermath.h b/pytorch3d/pytorch3d/csrc/pulsar/include/fastermath.h
new file mode 100644
index 0000000000000000000000000000000000000000..cae598f9c0a7f903b502702dcb62173c8841a3b8
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/pulsar/include/fastermath.h
@@ -0,0 +1,88 @@
+#ifndef PULSAR_NATIVE_INCLUDE_FASTERMATH_H_
+#define PULSAR_NATIVE_INCLUDE_FASTERMATH_H_
+
+// @lint-ignore-every LICENSELINT
+/*=====================================================================*
+ *                   Copyright (C) 2011 Paul Mineiro                   *
+ * All rights reserved.                                                *
+ *                                                                     *
+ * Redistribution and use in source and binary forms, with             *
+ * or without modification, are permitted provided that the            *
+ * following conditions are met:                                       *
+ *                                                                     *
+ *     * Redistributions of source code must retain the                *
+ *     above copyright notice, this list of conditions and             *
+ *     the following disclaimer.                                       *
+ *                                                                     *
+ *     * Redistributions in binary form must reproduce the             *
+ *     above copyright notice, this list of conditions and             *
+ *     the following disclaimer in the documentation and/or            *
+ *     other materials provided with the distribution.                 *
+ *                                                                     *
+ *     * Neither the name of Paul Mineiro nor the names                *
+ *     of other contributors may be used to endorse or promote         *
+ *     products derived from this software without specific            *
+ *     prior written permission.                                       *
+ *                                                                     *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND              *
+ * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,         *
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES               *
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE             *
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER               *
+ * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,                 *
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES            *
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE           *
+ * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR                *
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF          *
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT           *
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY              *
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE             *
+ * POSSIBILITY OF SUCH DAMAGE.                                         *
+ *                                                                     *
+ * Contact: Paul Mineiro <paul@mineiro.com>                            *
+ *=====================================================================*/
+
+#include <stdint.h>
+#include "./commands.h"
+
+#ifdef __cplusplus
+#define cast_uint32_t static_cast<uint32_t>
+#else
+#define cast_uint32_t (uint32_t)
+#endif
+
+IHD float fasterlog2(float x) {
+  union {
+    float f;
+    uint32_t i;
+  } vx = {x};
+  float y = vx.i;
+  y *= 1.1920928955078125e-7f;
+  return y - 126.94269504f;
+}
+
+IHD float fasterlog(float x) {
+  //  return 0.69314718f * fasterlog2 (x);
+  union {
+    float f;
+    uint32_t i;
+  } vx = {x};
+  float y = vx.i;
+  y *= 8.2629582881927490e-8f;
+  return y - 87.989971088f;
+}
+
+IHD float fasterpow2(float p) {
+  float clipp = (p < -126) ? -126.0f : p;
+  union {
+    uint32_t i;
+    float f;
+  } v = {cast_uint32_t((1 << 23) * (clipp + 126.94269504f))};
+  return v.f;
+}
+
+IHD float fasterexp(float p) {
+  return fasterpow2(1.442695040f * p);
+}
+
+#endif
diff --git a/pytorch3d/pytorch3d/csrc/pulsar/include/math.h b/pytorch3d/pytorch3d/csrc/pulsar/include/math.h
new file mode 100644
index 0000000000000000000000000000000000000000..d77e2ee1aabb8607c706a7faaee052eb6531b557
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/pulsar/include/math.h
@@ -0,0 +1,157 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#ifndef PULSAR_NATIVE_IMPL_MATH_H_
+#define PULSAR_NATIVE_IMPL_MATH_H_
+
+#include "./camera.h"
+#include "./commands.h"
+#include "./fastermath.h"
+
+/**
+ * Get the direction of val.
+ *
+ * Returns +1 if val is positive, -1 if val is zero or negative.
+ */
+IHD int sign_dir(const int& val) {
+  return -(static_cast<int>((val <= 0)) << 1) + 1;
+};
+
+/**
+ * Get the direction of val.
+ *
+ * Returns +1 if val is positive, -1 if val is zero or negative.
+ */
+IHD float sign_dir(const float& val) {
+  return static_cast<float>(1 - (static_cast<int>((val <= 0)) << 1));
+};
+
+/**
+ * Integer ceil division.
+ */
+IHD uint iDivCeil(uint a, uint b) {
+  return (a % b != 0) ? (a / b + 1) : (a / b);
+}
+
+IHD float3 outer_product_sum(const float3& a) {
+  return make_float3(
+      a.x * a.x + a.x * a.y + a.x * a.z,
+      a.x * a.y + a.y * a.y + a.y * a.z,
+      a.x * a.z + a.y * a.z + a.z * a.z);
+}
+
+// TODO: put intrinsics here.
+IHD float3 operator+(const float3& a, const float3& b) {
+  return make_float3(a.x + b.x, a.y + b.y, a.z + b.z);
+}
+
+IHD void operator+=(float3& a, const float3& b) {
+  a.x += b.x;
+  a.y += b.y;
+  a.z += b.z;
+}
+
+IHD void operator-=(float3& a, const float3& b) {
+  a.x -= b.x;
+  a.y -= b.y;
+  a.z -= b.z;
+}
+
+IHD void operator/=(float3& a, const float& b) {
+  a.x /= b;
+  a.y /= b;
+  a.z /= b;
+}
+
+IHD void operator*=(float3& a, const float& b) {
+  a.x *= b;
+  a.y *= b;
+  a.z *= b;
+}
+
+IHD float3 operator/(const float3& a, const float& b) {
+  return make_float3(a.x / b, a.y / b, a.z / b);
+}
+
+IHD float3 operator-(const float3& a, const float3& b) {
+  return make_float3(a.x - b.x, a.y - b.y, a.z - b.z);
+}
+
+IHD float3 operator*(const float3& a, const float& b) {
+  return make_float3(a.x * b, a.y * b, a.z * b);
+}
+
+IHD float3 operator*(const float3& a, const float3& b) {
+  return make_float3(a.x * b.x, a.y * b.y, a.z * b.z);
+}
+
+IHD float3 operator*(const float& a, const float3& b) {
+  return b * a;
+}
+
+INLINE DEVICE float length(const float3& v) {
+  // TODO: benchmark what's faster.
+  return NORM3DF(v.x, v.y, v.z);
+  // return __fsqrt_rn(v.x * v.x + v.y * v.y + v.z * v.z);
+}
+
+/**
+ * Left-hand multiplication of the constructed rotation matrix with the vector.
+ */
+IHD float3 rotate(
+    const float3& v,
+    const float3& dir_x,
+    const float3& dir_y,
+    const float3& dir_z) {
+  return make_float3(
+      dir_x.x * v.x + dir_x.y * v.y + dir_x.z * v.z,
+      dir_y.x * v.x + dir_y.y * v.y + dir_y.z * v.z,
+      dir_z.x * v.x + dir_z.y * v.y + dir_z.z * v.z);
+}
+
+INLINE DEVICE float3 normalize(const float3& v) {
+  return v * RNORM3DF(v.x, v.y, v.z);
+}
+
+INLINE DEVICE float dot(const float3& a, const float3& b) {
+  return FADD(FADD(FMUL(a.x, b.x), FMUL(a.y, b.y)), FMUL(a.z, b.z));
+}
+
+INLINE DEVICE float3 cross(const float3& a, const float3& b) {
+  // TODO: faster
+  return make_float3(
+      a.y * b.z - a.z * b.y, a.z * b.x - a.x * b.z, a.x * b.y - a.y * b.x);
+}
+
+namespace pulsar {
+IHD CamGradInfo operator+(const CamGradInfo& a, const CamGradInfo& b) {
+  CamGradInfo res;
+  res.cam_pos = a.cam_pos + b.cam_pos;
+  res.pixel_0_0_center = a.pixel_0_0_center + b.pixel_0_0_center;
+  res.pixel_dir_x = a.pixel_dir_x + b.pixel_dir_x;
+  res.pixel_dir_y = a.pixel_dir_y + b.pixel_dir_y;
+  return res;
+}
+
+IHD CamGradInfo operator*(const CamGradInfo& a, const float& b) {
+  CamGradInfo res;
+  res.cam_pos = a.cam_pos * b;
+  res.pixel_0_0_center = a.pixel_0_0_center * b;
+  res.pixel_dir_x = a.pixel_dir_x * b;
+  res.pixel_dir_y = a.pixel_dir_y * b;
+  return res;
+}
+
+IHD IntWrapper operator+(const IntWrapper& a, const IntWrapper& b) {
+  IntWrapper res;
+  res.val = a.val + b.val;
+  return res;
+}
+} // namespace pulsar
+
+#endif
diff --git a/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.backward.device.h b/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.backward.device.h
new file mode 100644
index 0000000000000000000000000000000000000000..dcd9dd50fd1e83229073fc8e86b815d4da4c99f9
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.backward.device.h
@@ -0,0 +1,189 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#ifndef PULSAR_NATIVE_RENDERER_BACKWARD_DEVICE_H_
+#define PULSAR_NATIVE_RENDERER_BACKWARD_DEVICE_H_
+
+#include "./camera.device.h"
+#include "./math.h"
+#include "./renderer.h"
+
+namespace pulsar {
+namespace Renderer {
+
+template <bool DEV>
+void backward(
+    Renderer* self,
+    const float* grad_im,
+    const float* image,
+    const float* forw_info,
+    const float* vert_pos,
+    const float* vert_col,
+    const float* vert_rad,
+    const CamInfo& cam,
+    const float& gamma,
+    float percent_allowed_difference,
+    const uint& max_n_hits,
+    const float* vert_opy_d,
+    const size_t& num_balls,
+    const uint& mode,
+    const bool& dif_pos,
+    const bool& dif_col,
+    const bool& dif_rad,
+    const bool& dif_cam,
+    const bool& dif_opy,
+    cudaStream_t stream) {
+  ARGCHECK(gamma > 0.f && gamma <= 1.f, 6, "gamma must be in [0., 1.]");
+  ARGCHECK(
+      percent_allowed_difference >= 0.f && percent_allowed_difference <= 1.f,
+      7,
+      "percent_allowed_difference must be in [0., 1.]");
+  ARGCHECK(max_n_hits >= 1u, 8, "max_n_hits must be >= 1");
+  ARGCHECK(
+      num_balls > 0 && num_balls <= self->max_num_balls,
+      9,
+      "num_balls must be >0 and less than max num balls!");
+  ARGCHECK(
+      cam.film_width == self->cam.film_width &&
+          cam.film_height == self->cam.film_height,
+      5,
+      "cam film size must agree");
+  ARGCHECK(mode <= 1, 10, "mode must be <= 1!");
+  if (percent_allowed_difference < EPS) {
+    LOG(WARNING) << "percent_allowed_difference < " << FEPS << "! Clamping to "
+                 << FEPS << ".";
+    percent_allowed_difference = FEPS;
+  }
+  if (percent_allowed_difference > 1.f - FEPS) {
+    LOG(WARNING) << "percent_allowed_difference > " << (1.f - FEPS)
+                 << "! Clamping to " << (1.f - FEPS) << ".";
+    percent_allowed_difference = 1.f - FEPS;
+  }
+  LOG_IF(INFO, PULSAR_LOG_RENDER) << "Rendering backward pass...";
+  // Update camera.
+  self->cam.eye = cam.eye;
+  self->cam.pixel_0_0_center = cam.pixel_0_0_center - cam.eye;
+  self->cam.pixel_dir_x = cam.pixel_dir_x;
+  self->cam.pixel_dir_y = cam.pixel_dir_y;
+  self->cam.sensor_dir_z = cam.sensor_dir_z;
+  self->cam.half_pixel_size = cam.half_pixel_size;
+  self->cam.focal_length = cam.focal_length;
+  self->cam.aperture_width = cam.aperture_width;
+  self->cam.aperture_height = cam.aperture_height;
+  self->cam.min_dist = cam.min_dist;
+  self->cam.max_dist = cam.max_dist;
+  self->cam.norm_fac = cam.norm_fac;
+  self->cam.principal_point_offset_x = cam.principal_point_offset_x;
+  self->cam.principal_point_offset_y = cam.principal_point_offset_y;
+  self->cam.film_border_left = cam.film_border_left;
+  self->cam.film_border_top = cam.film_border_top;
+#ifdef PULSAR_TIMINGS_ENABLED
+  START_TIME(calc_signature);
+#endif
+  LAUNCH_MAX_PARALLEL_1D(
+      calc_signature<DEV>,
+      num_balls,
+      stream,
+      *self,
+      reinterpret_cast<const float3*>(vert_pos),
+      vert_col,
+      vert_rad,
+      num_balls);
+  CHECKLAUNCH();
+#ifdef PULSAR_TIMINGS_ENABLED
+  STOP_TIME(calc_signature);
+  START_TIME(calc_gradients);
+#endif
+  MEMSET(self->grad_pos_d, 0, float3, num_balls, stream);
+  MEMSET(self->grad_col_d, 0, float, num_balls * self->cam.n_channels, stream);
+  MEMSET(self->grad_rad_d, 0, float, num_balls, stream);
+  MEMSET(self->grad_cam_d, 0, float, 12, stream);
+  MEMSET(self->grad_cam_buf_d, 0, CamGradInfo, num_balls, stream);
+  MEMSET(self->grad_opy_d, 0, float, num_balls, stream);
+  MEMSET(self->ids_sorted_d, 0, int, num_balls, stream);
+  LAUNCH_PARALLEL_2D(
+      calc_gradients<DEV>,
+      self->cam.film_width,
+      self->cam.film_height,
+      GRAD_BLOCK_SIZE,
+      GRAD_BLOCK_SIZE,
+      stream,
+      self->cam,
+      grad_im,
+      gamma,
+      reinterpret_cast<const float3*>(vert_pos),
+      vert_col,
+      vert_rad,
+      vert_opy_d,
+      num_balls,
+      image,
+      forw_info,
+      self->di_d,
+      self->ii_d,
+      dif_pos,
+      dif_col,
+      dif_rad,
+      dif_cam,
+      dif_opy,
+      self->grad_rad_d,
+      self->grad_col_d,
+      self->grad_pos_d,
+      self->grad_cam_buf_d,
+      self->grad_opy_d,
+      self->ids_sorted_d,
+      self->n_track);
+  CHECKLAUNCH();
+#ifdef PULSAR_TIMINGS_ENABLED
+  STOP_TIME(calc_gradients);
+  START_TIME(normalize);
+#endif
+  LAUNCH_MAX_PARALLEL_1D(
+      norm_sphere_gradients<DEV>, num_balls, stream, *self, num_balls);
+  CHECKLAUNCH();
+  if (dif_cam) {
+    SUM_WS(
+        self->grad_cam_buf_d,
+        reinterpret_cast<CamGradInfo*>(self->grad_cam_d),
+        static_cast<int>(num_balls),
+        self->workspace_d,
+        self->workspace_size,
+        stream);
+    CHECKLAUNCH();
+    SUM_WS(
+        (IntWrapper*)(self->ids_sorted_d),
+        (IntWrapper*)(self->n_grad_contributions_d),
+        static_cast<int>(num_balls),
+        self->workspace_d,
+        self->workspace_size,
+        stream);
+    CHECKLAUNCH();
+    LAUNCH_MAX_PARALLEL_1D(
+        norm_cam_gradients<DEV>, static_cast<int64_t>(1), stream, *self);
+    CHECKLAUNCH();
+  }
+#ifdef PULSAR_TIMINGS_ENABLED
+  STOP_TIME(normalize);
+  float time_ms;
+  // This blocks the result and prevents batch-processing from parallelizing.
+  GET_TIME(calc_signature, &time_ms);
+  std::cout << "Time for signature calculation: " << time_ms << " ms"
+            << std::endl;
+  GET_TIME(calc_gradients, &time_ms);
+  std::cout << "Time for gradient calculation: " << time_ms << " ms"
+            << std::endl;
+  GET_TIME(normalize, &time_ms);
+  std::cout << "Time for aggregation and normalization: " << time_ms << " ms"
+            << std::endl;
+#endif
+  LOG_IF(INFO, PULSAR_LOG_RENDER) << "Backward pass complete.";
+}
+
+} // namespace Renderer
+} // namespace pulsar
+
+#endif
diff --git a/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.backward.instantiate.h b/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.backward.instantiate.h
new file mode 100644
index 0000000000000000000000000000000000000000..75e85fd4dc88befc63e84ed4891f8ecb4b659bc4
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.backward.instantiate.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include "./renderer.backward.device.h"
+
+namespace pulsar {
+namespace Renderer {
+
+template void backward<ISONDEVICE>(
+    Renderer* self,
+    const float* grad_im,
+    const float* image,
+    const float* forw_info,
+    const float* vert_pos,
+    const float* vert_col,
+    const float* vert_rad,
+    const CamInfo& cam,
+    const float& gamma,
+    float percent_allowed_difference,
+    const uint& max_n_hits,
+    const float* vert_opy,
+    const size_t& num_balls,
+    const uint& mode,
+    const bool& dif_pos,
+    const bool& dif_col,
+    const bool& dif_rad,
+    const bool& dif_cam,
+    const bool& dif_opy,
+    cudaStream_t stream);
+
+} // namespace Renderer
+} // namespace pulsar
diff --git a/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.backward_dbg.device.h b/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.backward_dbg.device.h
new file mode 100644
index 0000000000000000000000000000000000000000..f2bdc7e69027d29a8442a14b08d677cc22dc51c9
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.backward_dbg.device.h
@@ -0,0 +1,157 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#ifndef PULSAR_NATIVE_RENDERER_BACKWARD_DBG_DEVICE_H_
+#define PULSAR_NATIVE_RENDERER_BACKWARD_DBG_DEVICE_H_
+
+#include "./camera.device.h"
+#include "./math.h"
+#include "./renderer.h"
+
+namespace pulsar {
+namespace Renderer {
+
+template <bool DEV>
+void backward_dbg(
+    Renderer* self,
+    const float* grad_im,
+    const float* image,
+    const float* forw_info,
+    const float* vert_pos,
+    const float* vert_col,
+    const float* vert_rad,
+    const CamInfo& cam,
+    const float& gamma,
+    float percent_allowed_difference,
+    const uint& max_n_hits,
+    const float* vert_opy_d,
+    const size_t& num_balls,
+    const uint& mode,
+    const bool& dif_pos,
+    const bool& dif_col,
+    const bool& dif_rad,
+    const bool& dif_cam,
+    const bool& dif_opy,
+    const uint& pos_x,
+    const uint& pos_y,
+    cudaStream_t stream) {
+  ARGCHECK(gamma > 0.f && gamma <= 1.f, 6, "gamma must be in [0., 1.]");
+  ARGCHECK(
+      percent_allowed_difference >= 0.f && percent_allowed_difference <= 1.f,
+      7,
+      "percent_allowed_difference must be in [0., 1.]");
+  ARGCHECK(max_n_hits >= 1u, 8, "max_n_hits must be >= 1");
+  ARGCHECK(
+      num_balls > 0 && num_balls <= self->max_num_balls,
+      9,
+      "num_balls must be >0 and less than max num balls!");
+  ARGCHECK(
+      cam.film_width == self->cam.film_width &&
+          cam.film_height == self->cam.film_height,
+      5,
+      "cam film size must agree");
+  ARGCHECK(mode <= 1, 10, "mode must be <= 1!");
+  if (percent_allowed_difference < EPS) {
+    LOG(WARNING) << "percent_allowed_difference < " << FEPS << "! Clamping to "
+                 << FEPS << ".";
+    percent_allowed_difference = FEPS;
+  }
+  ARGCHECK(
+      pos_x < cam.film_width && pos_y < cam.film_height,
+      15,
+      "pos_x must be < width and pos_y < height.");
+  if (percent_allowed_difference > 1.f - FEPS) {
+    LOG(WARNING) << "percent_allowed_difference > " << (1.f - FEPS)
+                 << "! Clamping to " << (1.f - FEPS) << ".";
+    percent_allowed_difference = 1.f - FEPS;
+  }
+  LOG_IF(INFO, PULSAR_LOG_RENDER)
+      << "Rendering debug backward pass for x: " << pos_x << ", y: " << pos_y;
+  // Update camera.
+  self->cam.eye = cam.eye;
+  self->cam.pixel_0_0_center = cam.pixel_0_0_center - cam.eye;
+  self->cam.pixel_dir_x = cam.pixel_dir_x;
+  self->cam.pixel_dir_y = cam.pixel_dir_y;
+  self->cam.sensor_dir_z = cam.sensor_dir_z;
+  self->cam.half_pixel_size = cam.half_pixel_size;
+  self->cam.focal_length = cam.focal_length;
+  self->cam.aperture_width = cam.aperture_width;
+  self->cam.aperture_height = cam.aperture_height;
+  self->cam.min_dist = cam.min_dist;
+  self->cam.max_dist = cam.max_dist;
+  self->cam.norm_fac = cam.norm_fac;
+  self->cam.principal_point_offset_x = cam.principal_point_offset_x;
+  self->cam.principal_point_offset_y = cam.principal_point_offset_y;
+  self->cam.film_border_left = cam.film_border_left;
+  self->cam.film_border_top = cam.film_border_top;
+  LAUNCH_MAX_PARALLEL_1D(
+      calc_signature<DEV>,
+      num_balls,
+      stream,
+      *self,
+      reinterpret_cast<const float3*>(vert_pos),
+      vert_col,
+      vert_rad,
+      num_balls);
+  CHECKLAUNCH();
+  MEMSET(self->grad_pos_d, 0, float3, num_balls, stream);
+  MEMSET(self->grad_col_d, 0, float, num_balls * self->cam.n_channels, stream);
+  MEMSET(self->grad_rad_d, 0, float, num_balls, stream);
+  MEMSET(self->grad_cam_d, 0, float, 12, stream);
+  MEMSET(self->grad_cam_buf_d, 0, CamGradInfo, num_balls, stream);
+  MEMSET(self->grad_opy_d, 0, float, num_balls, stream);
+  MEMSET(self->ids_sorted_d, 0, int, num_balls, stream);
+  LAUNCH_MAX_PARALLEL_2D(
+      calc_gradients<DEV>,
+      (int64_t)1,
+      (int64_t)1,
+      stream,
+      self->cam,
+      grad_im,
+      gamma,
+      reinterpret_cast<const float3*>(vert_pos),
+      vert_col,
+      vert_rad,
+      vert_opy_d,
+      num_balls,
+      image,
+      forw_info,
+      self->di_d,
+      self->ii_d,
+      dif_pos,
+      dif_col,
+      dif_rad,
+      dif_cam,
+      dif_opy,
+      self->grad_rad_d,
+      self->grad_col_d,
+      self->grad_pos_d,
+      self->grad_cam_buf_d,
+      self->grad_opy_d,
+      self->ids_sorted_d,
+      self->n_track,
+      pos_x,
+      pos_y);
+  CHECKLAUNCH();
+  // We're not doing sphere gradient normalization here.
+  SUM_WS(
+      self->grad_cam_buf_d,
+      reinterpret_cast<CamGradInfo*>(self->grad_cam_d),
+      static_cast<int>(1),
+      self->workspace_d,
+      self->workspace_size,
+      stream);
+  CHECKLAUNCH();
+  // We're not doing camera gradient normalization here.
+  LOG_IF(INFO, PULSAR_LOG_RENDER) << "Debug backward pass complete.";
+}
+
+} // namespace Renderer
+} // namespace pulsar
+
+#endif
diff --git a/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.backward_dbg.instantiate.h b/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.backward_dbg.instantiate.h
new file mode 100644
index 0000000000000000000000000000000000000000..5a7a1ba1f8e56df0a5ff212e7eb769a0564e7f60
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.backward_dbg.instantiate.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include "./renderer.backward_dbg.device.h"
+
+namespace pulsar {
+namespace Renderer {
+
+template void backward_dbg<ISONDEVICE>(
+    Renderer* self,
+    const float* grad_im,
+    const float* image,
+    const float* forw_info,
+    const float* vert_pos,
+    const float* vert_col,
+    const float* vert_rad,
+    const CamInfo& cam,
+    const float& gamma,
+    float percent_allowed_difference,
+    const uint& max_n_hits,
+    const float* vert_opy,
+    const size_t& num_balls,
+    const uint& mode,
+    const bool& dif_pos,
+    const bool& dif_col,
+    const bool& dif_rad,
+    const bool& dif_cam,
+    const bool& dif_opy,
+    const uint& pos_x,
+    const uint& pos_y,
+    cudaStream_t stream);
+
+} // namespace Renderer
+} // namespace pulsar
diff --git a/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.calc_gradients.device.h b/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.calc_gradients.device.h
new file mode 100644
index 0000000000000000000000000000000000000000..90b3872e9606c8830b039f18c4d465c3f8c23c1f
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.calc_gradients.device.h
@@ -0,0 +1,198 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#ifndef PULSAR_NATIVE_INCLUDE_RENDERER_CALC_GRADIENTS_H_
+#define PULSAR_NATIVE_INCLUDE_RENDERER_CALC_GRADIENTS_H_
+
+#include "../global.h"
+#include "./commands.h"
+#include "./renderer.h"
+
+#include "./renderer.draw.device.h"
+
+namespace pulsar {
+namespace Renderer {
+
+template <bool DEV>
+GLOBAL void calc_gradients(
+    const CamInfo cam, /** Camera in world coordinates. */
+    float const* const RESTRICT grad_im, /** The gradient image. */
+    const float
+        gamma, /** The transparency parameter used in the forward pass. */
+    float3 const* const RESTRICT vert_poss, /** Vertex position vector. */
+    float const* const RESTRICT vert_cols, /** Vertex color vector. */
+    float const* const RESTRICT vert_rads, /** Vertex radius vector. */
+    float const* const RESTRICT opacity, /** Vertex opacity. */
+    const uint num_balls, /** Number of balls. */
+    float const* const RESTRICT result_d, /** Result image. */
+    float const* const RESTRICT forw_info_d, /** Forward pass info. */
+    DrawInfo const* const RESTRICT di_d, /** Draw information. */
+    IntersectInfo const* const RESTRICT ii_d, /** Intersect information. */
+    // Mode switches.
+    const bool calc_grad_pos,
+    const bool calc_grad_col,
+    const bool calc_grad_rad,
+    const bool calc_grad_cam,
+    const bool calc_grad_opy,
+    // Out variables.
+    float* const RESTRICT grad_rad_d, /** Radius gradients. */
+    float* const RESTRICT grad_col_d, /** Color gradients. */
+    float3* const RESTRICT grad_pos_d, /** Position gradients. */
+    CamGradInfo* const RESTRICT grad_cam_buf_d, /** Camera gradient buffer. */
+    float* const RESTRICT grad_opy_d, /** Opacity gradient buffer. */
+    int* const RESTRICT
+        grad_contributed_d, /** Gradient contribution counter. */
+    // Infrastructure.
+    const int n_track,
+    const uint offs_x,
+    const uint offs_y /** Debug offsets. */
+) {
+  uint limit_x = cam.film_width, limit_y = cam.film_height;
+  if (offs_x != 0) {
+    // We're in debug mode.
+    limit_x = 1;
+    limit_y = 1;
+  }
+  GET_PARALLEL_IDS_2D(coord_x_base, coord_y_base, limit_x, limit_y);
+  // coord_x_base and coord_y_base are in the film coordinate system.
+  // We now need to translate to the aperture coordinate system. If
+  // the principal point was shifted left/up nothing has to be
+  // subtracted - only shift needs to be added in case it has been
+  // shifted down/right.
+  const uint film_coord_x = coord_x_base + offs_x;
+  const uint ap_coord_x = film_coord_x +
+      2 * static_cast<uint>(std::max(0, cam.principal_point_offset_x));
+  const uint film_coord_y = coord_y_base + offs_y;
+  const uint ap_coord_y = film_coord_y +
+      2 * static_cast<uint>(std::max(0, cam.principal_point_offset_y));
+  const float3 ray_dir = /** Ray cast through the pixel, normalized. */
+      cam.pixel_0_0_center + ap_coord_x * cam.pixel_dir_x +
+      ap_coord_y * cam.pixel_dir_y;
+  const float norm_ray_dir = length(ray_dir);
+  // ray_dir_norm *must* be calculated here in the same way as in the draw
+  // function to have the same values withno other numerical instabilities
+  // (for example, ray_dir * FRCP(norm_ray_dir) does not work)!
+  float3 ray_dir_norm; /** Ray cast through the pixel, normalized. */
+  float2 projected_ray; /** Ray intersection with the sensor. */
+  if (cam.orthogonal_projection) {
+    ray_dir_norm = cam.sensor_dir_z;
+    projected_ray.x = static_cast<float>(ap_coord_x);
+    projected_ray.y = static_cast<float>(ap_coord_y);
+  } else {
+    ray_dir_norm = normalize(
+        cam.pixel_0_0_center + ap_coord_x * cam.pixel_dir_x +
+        ap_coord_y * cam.pixel_dir_y);
+    // This is a reasonable assumption for normal focal lengths and image sizes.
+    PASSERT(FABS(ray_dir_norm.z) > FEPS);
+    projected_ray.x = ray_dir_norm.x / ray_dir_norm.z * cam.focal_length;
+    projected_ray.y = ray_dir_norm.y / ray_dir_norm.z * cam.focal_length;
+  }
+  float* result = const_cast<float*>(
+      result_d + film_coord_y * cam.film_width * cam.n_channels +
+      film_coord_x * cam.n_channels);
+  const float* grad_im_l = grad_im +
+      film_coord_y * cam.film_width * cam.n_channels +
+      film_coord_x * cam.n_channels;
+  // For writing...
+  float3 grad_pos;
+  float grad_rad, grad_opy;
+  CamGradInfo grad_cam_local = CamGradInfo();
+  // Set up shared infrastructure.
+  const int fwi_loc = film_coord_y * cam.film_width * (3 + 2 * n_track) +
+      film_coord_x * (3 + 2 * n_track);
+  float sm_m = forw_info_d[fwi_loc];
+  float sm_d = forw_info_d[fwi_loc + 1];
+  PULSAR_LOG_DEV_APIX(
+      PULSAR_LOG_GRAD,
+      "grad|sm_m: %f, sm_d: %f, result: "
+      "%f, %f, %f; grad_im: %f, %f, %f.\n",
+      sm_m,
+      sm_d,
+      result[0],
+      result[1],
+      result[2],
+      grad_im_l[0],
+      grad_im_l[1],
+      grad_im_l[2]);
+  // Start processing.
+  for (int grad_idx = 0; grad_idx < n_track; ++grad_idx) {
+    int sphere_idx;
+    FASI(forw_info_d[fwi_loc + 3 + 2 * grad_idx], sphere_idx);
+    PASSERT(
+        sphere_idx == -1 ||
+        sphere_idx >= 0 && static_cast<uint>(sphere_idx) < num_balls);
+    if (sphere_idx >= 0) {
+      // TODO: make more efficient.
+      grad_pos = make_float3(0.f, 0.f, 0.f);
+      grad_rad = 0.f;
+      grad_cam_local = CamGradInfo();
+      const DrawInfo di = di_d[sphere_idx];
+      grad_opy = 0.f;
+      draw(
+          di,
+          opacity == NULL ? 1.f : opacity[sphere_idx],
+          cam,
+          gamma,
+          ray_dir_norm,
+          projected_ray,
+          // Mode switches.
+          false, // draw only
+          calc_grad_pos,
+          calc_grad_col,
+          calc_grad_rad,
+          calc_grad_cam,
+          calc_grad_opy,
+          // Position info.
+          ap_coord_x,
+          ap_coord_y,
+          sphere_idx,
+          // Optional in.
+          &ii_d[sphere_idx],
+          &ray_dir,
+          &norm_ray_dir,
+          grad_im_l,
+          NULL,
+          // In/out
+          &sm_d,
+          &sm_m,
+          result,
+          // Optional out.
+          NULL,
+          NULL,
+          &grad_pos,
+          grad_col_d + sphere_idx * cam.n_channels,
+          &grad_rad,
+          &grad_cam_local,
+          &grad_opy);
+      ATOMICADD(&(grad_rad_d[sphere_idx]), grad_rad);
+      // Color has been added directly.
+      ATOMICADD_F3(&(grad_pos_d[sphere_idx]), grad_pos);
+      ATOMICADD_F3(
+          &(grad_cam_buf_d[sphere_idx].cam_pos), grad_cam_local.cam_pos);
+      if (!cam.orthogonal_projection) {
+        ATOMICADD_F3(
+            &(grad_cam_buf_d[sphere_idx].pixel_0_0_center),
+            grad_cam_local.pixel_0_0_center);
+      }
+      ATOMICADD_F3(
+          &(grad_cam_buf_d[sphere_idx].pixel_dir_x),
+          grad_cam_local.pixel_dir_x);
+      ATOMICADD_F3(
+          &(grad_cam_buf_d[sphere_idx].pixel_dir_y),
+          grad_cam_local.pixel_dir_y);
+      ATOMICADD(&(grad_opy_d[sphere_idx]), grad_opy);
+      ATOMICADD(&(grad_contributed_d[sphere_idx]), 1);
+    }
+  }
+  END_PARALLEL_2D_NORET();
+};
+
+} // namespace Renderer
+} // namespace pulsar
+
+#endif
diff --git a/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.calc_gradients.instantiate.h b/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.calc_gradients.instantiate.h
new file mode 100644
index 0000000000000000000000000000000000000000..596c322b28eef850d5466037770cef53caf51cff
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.calc_gradients.instantiate.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include "./renderer.calc_gradients.device.h"
+
+namespace pulsar {
+namespace Renderer {
+
+template GLOBAL void calc_gradients<ISONDEVICE>(
+    const CamInfo cam, /** Camera in world coordinates. */
+    float const* const RESTRICT grad_im, /** The gradient image. */
+    const float
+        gamma, /** The transparency parameter used in the forward pass. */
+    float3 const* const RESTRICT vert_poss, /** Vertex position vector. */
+    float const* const RESTRICT vert_cols, /** Vertex color vector. */
+    float const* const RESTRICT vert_rads, /** Vertex radius vector. */
+    float const* const RESTRICT opacity, /** Vertex opacity. */
+    const uint num_balls, /** Number of balls. */
+    float const* const RESTRICT result_d, /** Result image. */
+    float const* const RESTRICT forw_info_d, /** Forward pass info. */
+    DrawInfo const* const RESTRICT di_d, /** Draw information. */
+    IntersectInfo const* const RESTRICT ii_d, /** Intersect information. */
+    // Mode switches.
+    const bool calc_grad_pos,
+    const bool calc_grad_col,
+    const bool calc_grad_rad,
+    const bool calc_grad_cam,
+    const bool calc_grad_opy,
+    // Out variables.
+    float* const RESTRICT grad_rad_d, /** Radius gradients. */
+    float* const RESTRICT grad_col_d, /** Color gradients. */
+    float3* const RESTRICT grad_pos_d, /** Position gradients. */
+    CamGradInfo* const RESTRICT grad_cam_buf_d, /** Camera gradient buffer. */
+    float* const RESTRICT grad_opy_d, /** Opacity gradient buffer. */
+    int* const RESTRICT
+        grad_contributed_d, /** Gradient contribution counter. */
+    // Infrastructure.
+    const int n_track,
+    const uint offs_x,
+    const uint offs_y);
+
+} // namespace Renderer
+} // namespace pulsar
diff --git a/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.calc_signature.device.h b/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.calc_signature.device.h
new file mode 100644
index 0000000000000000000000000000000000000000..bd687fee63d1ee9869ab5beb454a910ff387914c
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.calc_signature.device.h
@@ -0,0 +1,201 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#ifndef PULSAR_NATIVE_INCLUDE_RENDERER_CALC_SIGNATURE_DEVICE_H_
+#define PULSAR_NATIVE_INCLUDE_RENDERER_CALC_SIGNATURE_DEVICE_H_
+
+#include "../global.h"
+#include "./camera.device.h"
+#include "./commands.h"
+#include "./math.h"
+#include "./renderer.get_screen_area.device.h"
+#include "./renderer.h"
+
+namespace pulsar {
+namespace Renderer {
+
+template <bool DEV>
+GLOBAL void calc_signature(
+    Renderer renderer,
+    float3 const* const RESTRICT vert_poss,
+    float const* const RESTRICT vert_cols,
+    float const* const RESTRICT vert_rads,
+    const uint num_balls) {
+  /* We're not using RESTRICT here for the pointers within `renderer`. Just one
+     value is being read from each of the pointers, so the effect would be
+     negligible or non-existent. */
+  GET_PARALLEL_IDX_1D(idx, num_balls);
+  // Create aliases.
+  // For reading...
+  const float3& vert_pos = vert_poss[idx]; /** Vertex position. */
+  const float* vert_col =
+      vert_cols + idx * renderer.cam.n_channels; /** Vertex color. */
+  const float& vert_rad = vert_rads[idx]; /** Vertex radius. */
+  const CamInfo& cam = renderer.cam; /** Camera in world coordinates. */
+  // For writing...
+  /** Ball ID (either original index of the ball or -1 if not visible). */
+  int& id_out = renderer.ids_d[idx];
+  /** Intersection helper structure for the ball. */
+  IntersectInfo& intersect_helper_out = renderer.ii_d[idx];
+  /** Draw helper structure for this ball. */
+  DrawInfo& draw_helper_out = renderer.di_d[idx];
+  /** Minimum possible intersection depth for this ball. */
+  float& closest_possible_intersect_out = renderer.min_depth_d[idx];
+  PULSAR_LOG_DEV(
+      PULSAR_LOG_CALC_SIGNATURE,
+      "signature %d|vert_pos: %.9f, %.9f, %.9f, vert_col (first three): "
+      "%.9f, %.9f, %.9f.\n",
+      idx,
+      vert_pos.x,
+      vert_pos.y,
+      vert_pos.z,
+      vert_col[0],
+      vert_col[1],
+      vert_col[2]);
+  // Set flags to invalid for a potential early return.
+  id_out = -1; // Invalid ID.
+  closest_possible_intersect_out =
+      MAX_FLOAT; // These spheres are sorted to the very end.
+  intersect_helper_out.max.x = MAX_USHORT; // No intersection possible.
+  intersect_helper_out.min.x = MAX_USHORT;
+  intersect_helper_out.max.y = MAX_USHORT;
+  intersect_helper_out.min.y = MAX_USHORT;
+  // Start processing.
+  /** Ball center in the camera coordinate system. */
+  const float3 ball_center_cam = vert_pos - cam.eye;
+  /** Distance to the ball center in the camera coordinate system. */
+  const float t_center = length(ball_center_cam);
+  /** Closest possible intersection with this ball from the camera. */
+  float closest_possible_intersect;
+  if (cam.orthogonal_projection) {
+    const float3 ball_center_cam_rot = rotate(
+        ball_center_cam,
+        cam.pixel_dir_x / length(cam.pixel_dir_x),
+        cam.pixel_dir_y / length(cam.pixel_dir_y),
+        cam.sensor_dir_z);
+    closest_possible_intersect = ball_center_cam_rot.z - vert_rad;
+  } else {
+    closest_possible_intersect = t_center - vert_rad;
+  }
+  PULSAR_LOG_DEV(
+      PULSAR_LOG_CALC_SIGNATURE,
+      "signature %d|t_center: %f. vert_rad: %f. "
+      "closest_possible_intersect: %f.\n",
+      idx,
+      t_center,
+      vert_rad,
+      closest_possible_intersect);
+  /**
+   * Corner points of the enclosing projected rectangle of the ball.
+   * They are first calculated in the camera coordinate system, then
+   * converted to the pixel coordinate system.
+   */
+  float x_1, x_2, y_1, y_2;
+  bool hits_screen_plane;
+  float3 ray_center_norm = ball_center_cam / t_center;
+  PASSERT(vert_rad >= 0.f);
+  if (closest_possible_intersect < cam.min_dist ||
+      closest_possible_intersect > cam.max_dist) {
+    PULSAR_LOG_DEV(
+        PULSAR_LOG_CALC_SIGNATURE,
+        "signature %d|ignoring sphere out of min/max bounds: %.9f, "
+        "min: %.9f, max: %.9f.\n",
+        idx,
+        closest_possible_intersect,
+        cam.min_dist,
+        cam.max_dist);
+    RETURN_PARALLEL();
+  }
+  // Find the relevant region on the screen plane.
+  hits_screen_plane = get_screen_area(
+      ball_center_cam,
+      ray_center_norm,
+      vert_rad,
+      cam,
+      idx,
+      &x_1,
+      &x_2,
+      &y_1,
+      &y_2);
+  if (!hits_screen_plane)
+    RETURN_PARALLEL();
+  PULSAR_LOG_DEV(
+      PULSAR_LOG_CALC_SIGNATURE,
+      "signature %d|in pixels: x_1: %f, x_2: %f, y_1: %f, y_2: %f.\n",
+      idx,
+      x_1,
+      x_2,
+      y_1,
+      y_2);
+  // Check whether the pixel coordinates are on screen.
+  if (FMAX(x_1, x_2) <= static_cast<float>(cam.film_border_left) ||
+      FMIN(x_1, x_2) >=
+          static_cast<float>(cam.film_border_left + cam.film_width) - 0.5f ||
+      FMAX(y_1, y_2) <= static_cast<float>(cam.film_border_top) ||
+      FMIN(y_1, y_2) >
+          static_cast<float>(cam.film_border_top + cam.film_height) - 0.5f)
+    RETURN_PARALLEL();
+  // Write results.
+  id_out = idx;
+  intersect_helper_out.min.x = static_cast<ushort>(
+      FMAX(FMIN(x_1, x_2), static_cast<float>(cam.film_border_left)));
+  intersect_helper_out.min.y = static_cast<ushort>(
+      FMAX(FMIN(y_1, y_2), static_cast<float>(cam.film_border_top)));
+  // In the following calculations, the max that needs to be stored is
+  // exclusive.
+  // That means that the calculated value needs to be `ceil`ed and incremented
+  // to find the correct value.
+  intersect_helper_out.max.x = static_cast<ushort>(FMIN(
+      FCEIL(FMAX(x_1, x_2)) + 1,
+      static_cast<float>(cam.film_border_left + cam.film_width)));
+  intersect_helper_out.max.y = static_cast<ushort>(FMIN(
+      FCEIL(FMAX(y_1, y_2)) + 1,
+      static_cast<float>(cam.film_border_top + cam.film_height)));
+  PULSAR_LOG_DEV(
+      PULSAR_LOG_CALC_SIGNATURE,
+      "signature %d|limits after refining: x_1: %u, x_2: %u, "
+      "y_1: %u, y_2: %u.\n",
+      idx,
+      intersect_helper_out.min.x,
+      intersect_helper_out.max.x,
+      intersect_helper_out.min.y,
+      intersect_helper_out.max.y);
+  if (intersect_helper_out.min.x == MAX_USHORT) {
+    id_out = -1;
+    RETURN_PARALLEL();
+  }
+  PULSAR_LOG_DEV(
+      PULSAR_LOG_CALC_SIGNATURE,
+      "signature %d|writing info. closest_possible_intersect: %.9f. "
+      "ray_center_norm: %.9f, %.9f, %.9f. t_center: %.9f. radius: %.9f.\n",
+      idx,
+      closest_possible_intersect,
+      ray_center_norm.x,
+      ray_center_norm.y,
+      ray_center_norm.z,
+      t_center,
+      vert_rad);
+  closest_possible_intersect_out = closest_possible_intersect;
+  draw_helper_out.ray_center_norm = ray_center_norm;
+  draw_helper_out.t_center = t_center;
+  draw_helper_out.radius = vert_rad;
+  if (cam.n_channels <= 3) {
+    draw_helper_out.first_color = vert_col[0];
+    for (uint c_id = 1; c_id < cam.n_channels; ++c_id) {
+      draw_helper_out.color_union.color[c_id - 1] = vert_col[c_id];
+    }
+  } else {
+    draw_helper_out.color_union.ptr = const_cast<float*>(vert_col);
+  }
+  END_PARALLEL();
+};
+
+} // namespace Renderer
+} // namespace pulsar
+
+#endif
diff --git a/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.calc_signature.instantiate.h b/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.calc_signature.instantiate.h
new file mode 100644
index 0000000000000000000000000000000000000000..6afa95b44b161d8881b79b22e119c89aad522cc6
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.calc_signature.instantiate.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#ifndef PULSAR_NATIVE_INCLUDE_RENDERER_CALC_SIGNATURE_INSTANTIATE_H_
+#define PULSAR_NATIVE_INCLUDE_RENDERER_CALC_SIGNATURE_INSTANTIATE_H_
+
+#include "./renderer.calc_signature.device.h"
+
+namespace pulsar {
+namespace Renderer {
+template GLOBAL void calc_signature<ISONDEVICE>(
+    Renderer renderer,
+    float3 const* const RESTRICT vert_poss,
+    float const* const RESTRICT vert_cols,
+    float const* const RESTRICT vert_rads,
+    const uint num_balls);
+}
+} // namespace pulsar
+
+#endif
diff --git a/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.construct.device.h b/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.construct.device.h
new file mode 100644
index 0000000000000000000000000000000000000000..8e9ada4b2d62f9ea5d0ed003bd13d016d80b6e9d
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.construct.device.h
@@ -0,0 +1,111 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#ifndef PULSAR_NATIVE_INCLUDE_RENDERER_CONSTRUCT_DEVICE_H_
+#define PULSAR_NATIVE_INCLUDE_RENDERER_CONSTRUCT_DEVICE_H_
+
+#include "../global.h"
+#include "./camera.device.h"
+#include "./commands.h"
+#include "./math.h"
+#include "./renderer.h"
+
+namespace pulsar {
+namespace Renderer {
+
+template <bool DEV>
+HOST void construct(
+    Renderer* self,
+    const size_t& max_num_balls,
+    const int& width,
+    const int& height,
+    const bool& orthogonal_projection,
+    const bool& right_handed_system,
+    const float& background_normalization_depth,
+    const uint& n_channels,
+    const uint& n_track) {
+  ARGCHECK(
+      (max_num_balls > 0 && max_num_balls < MAX_INT),
+      2,
+      ("the maximum number of balls must be >0 and <" +
+       std::to_string(MAX_INT) + ". Is " + std::to_string(max_num_balls) + ".")
+          .c_str());
+  ARGCHECK(width > 1, 3, "the image width must be > 1");
+  ARGCHECK(height > 1, 4, "the image height must be > 1");
+  ARGCHECK(
+      background_normalization_depth > 0.f &&
+          background_normalization_depth < 1.f,
+      6,
+      "background_normalization_depth must be in ]0., 1.[.");
+  ARGCHECK(n_channels > 0, 7, "n_channels must be >0!");
+  ARGCHECK(
+      n_track > 0 && n_track <= MAX_GRAD_SPHERES,
+      8,
+      ("n_track must be >0 and <" + std::to_string(MAX_GRAD_SPHERES) + ". Is " +
+       std::to_string(n_track) + ".")
+          .c_str());
+  self->cam.film_width = width;
+  self->cam.film_height = height;
+  self->max_num_balls = max_num_balls;
+  MALLOC(self->result_d, float, width* height* n_channels);
+  self->cam.orthogonal_projection = orthogonal_projection;
+  self->cam.right_handed = right_handed_system;
+  self->cam.background_normalization_depth = background_normalization_depth;
+  self->cam.n_channels = n_channels;
+  MALLOC(self->min_depth_d, float, max_num_balls);
+  MALLOC(self->min_depth_sorted_d, float, max_num_balls);
+  MALLOC(self->ii_d, IntersectInfo, max_num_balls);
+  MALLOC(self->ii_sorted_d, IntersectInfo, max_num_balls);
+  MALLOC(self->ids_d, int, max_num_balls);
+  MALLOC(self->ids_sorted_d, int, max_num_balls);
+  size_t sort_id_size = 0;
+  GET_SORT_WS_SIZE(&sort_id_size, float, int, max_num_balls);
+  CHECKLAUNCH();
+  size_t sort_ii_size = 0;
+  GET_SORT_WS_SIZE(&sort_ii_size, float, IntersectInfo, max_num_balls);
+  CHECKLAUNCH();
+  size_t sort_di_size = 0;
+  GET_SORT_WS_SIZE(&sort_di_size, float, DrawInfo, max_num_balls);
+  CHECKLAUNCH();
+  size_t select_ii_size = 0;
+  GET_SELECT_WS_SIZE(&select_ii_size, char, IntersectInfo, max_num_balls);
+  size_t select_di_size = 0;
+  GET_SELECT_WS_SIZE(&select_di_size, char, DrawInfo, max_num_balls);
+  size_t sum_size = 0;
+  GET_SUM_WS_SIZE(&sum_size, CamGradInfo, max_num_balls);
+  size_t sum_cont_size = 0;
+  GET_SUM_WS_SIZE(&sum_cont_size, int, max_num_balls);
+  size_t reduce_size = 0;
+  GET_REDUCE_WS_SIZE(
+      &reduce_size, IntersectInfo, IntersectInfoMinMax(), max_num_balls);
+  self->workspace_size = IMAX(
+      IMAX(IMAX(sort_id_size, sort_ii_size), sort_di_size),
+      IMAX(
+          IMAX(select_di_size, select_ii_size),
+          IMAX(IMAX(sum_size, sum_cont_size), reduce_size)));
+  MALLOC(self->workspace_d, char, self->workspace_size);
+  MALLOC(self->di_d, DrawInfo, max_num_balls);
+  MALLOC(self->di_sorted_d, DrawInfo, max_num_balls);
+  MALLOC(self->region_flags_d, char, max_num_balls);
+  MALLOC(self->num_selected_d, size_t, 1);
+  MALLOC(self->forw_info_d, float, width* height*(3 + 2 * n_track));
+  MALLOC(self->min_max_pixels_d, IntersectInfo, 1);
+  MALLOC(self->grad_pos_d, float3, max_num_balls);
+  MALLOC(self->grad_col_d, float, max_num_balls* n_channels);
+  MALLOC(self->grad_rad_d, float, max_num_balls);
+  MALLOC(self->grad_cam_d, float, 12);
+  MALLOC(self->grad_cam_buf_d, CamGradInfo, max_num_balls);
+  MALLOC(self->grad_opy_d, float, max_num_balls);
+  MALLOC(self->n_grad_contributions_d, int, 1);
+  self->n_track = static_cast<int>(n_track);
+}
+
+} // namespace Renderer
+} // namespace pulsar
+
+#endif
diff --git a/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.construct.instantiate.h b/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.construct.instantiate.h
new file mode 100644
index 0000000000000000000000000000000000000000..e5ce722e29b063b04cb8efc0e880d9332bd35f23
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.construct.instantiate.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#ifndef PULSAR_NATIVE_INCLUDE_RENDERER_CONSTRUCT_INSTANTIATE_H_
+#define PULSAR_NATIVE_INCLUDE_RENDERER_CONSTRUCT_INSTANTIATE_H_
+
+#include "./renderer.construct.device.h"
+
+namespace pulsar {
+namespace Renderer {
+template void construct<ISONDEVICE>(
+    Renderer* self,
+    const size_t& max_num_balls,
+    const int& width,
+    const int& height,
+    const bool& orthogonal_projection,
+    const bool& right_handed_system,
+    const float& background_normalization_depth,
+    const uint& n_channels,
+    const uint& n_track);
+}
+} // namespace pulsar
+
+#endif
diff --git a/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.create_selector.device.h b/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.create_selector.device.h
new file mode 100644
index 0000000000000000000000000000000000000000..747ad03cd3a3a49c34d81485a1780d81a332a215
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.create_selector.device.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#ifndef PULSAR_NATIVE_INCLUDE_RENDERER_CREATE_SELECTOR_DEVICE_H_
+#define PULSAR_NATIVE_INCLUDE_RENDERER_CREATE_SELECTOR_DEVICE_H_
+
+#include "../global.h"
+#include "./commands.h"
+#include "./renderer.h"
+
+namespace pulsar {
+namespace Renderer {
+
+template <bool DEV>
+GLOBAL void create_selector(
+    IntersectInfo const* const RESTRICT ii_sorted_d,
+    const uint num_balls,
+    const int min_x,
+    const int max_x,
+    const int min_y,
+    const int max_y,
+    /* Out variables. */
+    char* RESTRICT region_flags_d) {
+  GET_PARALLEL_IDX_1D(idx, num_balls);
+  bool hit = (static_cast<int>(ii_sorted_d[idx].min.x) <= max_x) &&
+      (static_cast<int>(ii_sorted_d[idx].max.x) > min_x) &&
+      (static_cast<int>(ii_sorted_d[idx].min.y) <= max_y) &&
+      (static_cast<int>(ii_sorted_d[idx].max.y) > min_y);
+  region_flags_d[idx] = hit;
+  END_PARALLEL_NORET();
+}
+
+} // namespace Renderer
+} // namespace pulsar
+
+#endif
diff --git a/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.create_selector.instantiate.h b/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.create_selector.instantiate.h
new file mode 100644
index 0000000000000000000000000000000000000000..8e91a8bfb8e9b0f03db39c001e9363920b2eb35f
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.create_selector.instantiate.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#ifndef PULSAR_NATIVE_INCLUDE_RENDERER_CREATE_SELECTOR_INSTANTIATE_H_
+#define PULSAR_NATIVE_INCLUDE_RENDERER_CREATE_SELECTOR_INSTANTIATE_H_
+
+#include "./renderer.create_selector.device.h"
+
+namespace pulsar {
+namespace Renderer {
+
+template GLOBAL void create_selector<ISONDEVICE>(
+    IntersectInfo const* const RESTRICT ii_sorted_d,
+    const uint num_balls,
+    const int min_x,
+    const int max_x,
+    const int min_y,
+    const int max_y,
+    /* Out variables. */
+    char* RESTRICT region_flags_d);
+
+}
+} // namespace pulsar
+
+#endif
diff --git a/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.destruct.device.h b/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.destruct.device.h
new file mode 100644
index 0000000000000000000000000000000000000000..8520233c59be062fa72376158a9935afa50c3950
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.destruct.device.h
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#ifndef PULSAR_NATIVE_INCLUDE_RENDERER_DESTRUCT_H_
+#define PULSAR_NATIVE_INCLUDE_RENDERER_DESTRUCT_H_
+
+#include "../global.h"
+#include "./commands.h"
+#include "./renderer.h"
+
+namespace pulsar {
+namespace Renderer {
+
+template <bool DEV>
+HOST void destruct(Renderer* self) {
+  if (self->result_d != NULL)
+    FREE(self->result_d);
+  self->result_d = NULL;
+  if (self->min_depth_d != NULL)
+    FREE(self->min_depth_d);
+  self->min_depth_d = NULL;
+  if (self->min_depth_sorted_d != NULL)
+    FREE(self->min_depth_sorted_d);
+  self->min_depth_sorted_d = NULL;
+  if (self->ii_d != NULL)
+    FREE(self->ii_d);
+  self->ii_d = NULL;
+  if (self->ii_sorted_d != NULL)
+    FREE(self->ii_sorted_d);
+  self->ii_sorted_d = NULL;
+  if (self->ids_d != NULL)
+    FREE(self->ids_d);
+  self->ids_d = NULL;
+  if (self->ids_sorted_d != NULL)
+    FREE(self->ids_sorted_d);
+  self->ids_sorted_d = NULL;
+  if (self->workspace_d != NULL)
+    FREE(self->workspace_d);
+  self->workspace_d = NULL;
+  if (self->di_d != NULL)
+    FREE(self->di_d);
+  self->di_d = NULL;
+  if (self->di_sorted_d != NULL)
+    FREE(self->di_sorted_d);
+  self->di_sorted_d = NULL;
+  if (self->region_flags_d != NULL)
+    FREE(self->region_flags_d);
+  self->region_flags_d = NULL;
+  if (self->num_selected_d != NULL)
+    FREE(self->num_selected_d);
+  self->num_selected_d = NULL;
+  if (self->forw_info_d != NULL)
+    FREE(self->forw_info_d);
+  self->forw_info_d = NULL;
+  if (self->min_max_pixels_d != NULL)
+    FREE(self->min_max_pixels_d);
+  self->min_max_pixels_d = NULL;
+  if (self->grad_pos_d != NULL)
+    FREE(self->grad_pos_d);
+  self->grad_pos_d = NULL;
+  if (self->grad_col_d != NULL)
+    FREE(self->grad_col_d);
+  self->grad_col_d = NULL;
+  if (self->grad_rad_d != NULL)
+    FREE(self->grad_rad_d);
+  self->grad_rad_d = NULL;
+  if (self->grad_cam_d != NULL)
+    FREE(self->grad_cam_d);
+  self->grad_cam_d = NULL;
+  if (self->grad_cam_buf_d != NULL)
+    FREE(self->grad_cam_buf_d);
+  self->grad_cam_buf_d = NULL;
+  if (self->grad_opy_d != NULL)
+    FREE(self->grad_opy_d);
+  self->grad_opy_d = NULL;
+  if (self->n_grad_contributions_d != NULL)
+    FREE(self->n_grad_contributions_d);
+  self->n_grad_contributions_d = NULL;
+}
+
+} // namespace Renderer
+} // namespace pulsar
+
+#endif
diff --git a/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.destruct.instantiate.h b/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.destruct.instantiate.h
new file mode 100644
index 0000000000000000000000000000000000000000..d41ba5a323d0bed9196dc804fab87929b2a726af
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.destruct.instantiate.h
@@ -0,0 +1,20 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#ifndef PULSAR_NATIVE_INCLUDE_RENDERER_DESTRUCT_INSTANTIATE_H_
+#define PULSAR_NATIVE_INCLUDE_RENDERER_DESTRUCT_INSTANTIATE_H_
+
+#include "./renderer.destruct.device.h"
+
+namespace pulsar {
+namespace Renderer {
+template void destruct<ISONDEVICE>(Renderer* self);
+}
+} // namespace pulsar
+
+#endif
diff --git a/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.draw.device.h b/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.draw.device.h
new file mode 100644
index 0000000000000000000000000000000000000000..cb8ecabed3eefce77f7120d234fad15b0bed064c
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.draw.device.h
@@ -0,0 +1,846 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#ifndef PULSAR_NATIVE_INCLUDE_RENDERER_CALC_SIGNATURE_DEVICE_H_
+#define PULSAR_NATIVE_INCLUDE_RENDERER_CALC_SIGNATURE_DEVICE_H_
+
+#include "../global.h"
+#include "./camera.device.h"
+#include "./commands.h"
+#include "./math.h"
+#include "./renderer.h"
+
+namespace pulsar {
+namespace Renderer {
+
+/**
+ * Draw a ball into the `result`.
+ *
+ * Returns whether a hit was noticed. See README for an explanation of sphere
+ * points and variable notation.
+ */
+INLINE DEVICE bool draw(
+    /* In variables. */
+    const DrawInfo& draw_info, /** The draw information for this ball. */
+    const float& opacity, /** The sphere opacity. */
+    const CamInfo&
+        cam, /** Camera information. Doesn't have to be normalized. */
+    const float& gamma, /** 'Transparency' indicator (see paper for details). */
+    const float3& ray_dir_norm, /** The direction of the ray, normalized. */
+    const float2& projected_ray, /** The intersection of the ray with the image
+                                    in pixel space. */
+    /** Mode switches. */
+    const bool& draw_only, /** Whether we are in draw vs. grad mode. */
+    const bool& calc_grad_pos, /** Calculate position gradients. */
+    const bool& calc_grad_col, /** Calculate color gradients. */
+    const bool& calc_grad_rad, /** Calculate radius gradients. */
+    const bool& calc_grad_cam, /** Calculate camera gradients. */
+    const bool& calc_grad_opy, /** Calculate opacity gradients. */
+    /** Position info. */
+    const uint& coord_x, /** The pixel position x to draw at. */
+    const uint& coord_y, /** The pixel position y to draw at. */
+    const uint& idx, /** The id of the sphere to process. */
+    /* Optional in variables. */
+    IntersectInfo const* const RESTRICT
+        intersect_info, /** The intersect information for this ball. */
+    float3 const* const RESTRICT ray_dir, /** The ray direction (not normalized)
+                             to draw at. Only used for grad computation. */
+    float const* const RESTRICT norm_ray_dir, /** The length of the direction
+                                 vector. Only used for grad computation. */
+    float const* const RESTRICT grad_pix, /** The gradient for this pixel. Only
+                              used for grad computation. */
+    float const* const RESTRICT
+        ln_pad_over_1minuspad, /** Allowed percentage indicator. */
+    /* In or out variables, depending on mode. */
+    float* const RESTRICT sm_d, /** Normalization denominator. */
+    float* const RESTRICT
+        sm_m, /** Maximum of normalization weight factors observed. */
+    float* const RESTRICT
+        result, /** Result pixel color. Must be zeros initially. */
+    /* Optional out variables. */
+    float* const RESTRICT depth_threshold, /** The depth threshold to use. Only
+                                              used for rendering. */
+    float* const RESTRICT intersection_depth_norm_out, /** The intersection
+                                             depth. Only set when rendering. */
+    float3* const RESTRICT grad_pos, /** Gradient w.r.t. position. */
+    float* const RESTRICT grad_col, /** Gradient w.r.t. color. */
+    float* const RESTRICT grad_rad, /** Gradient w.r.t. radius. */
+    CamGradInfo* const RESTRICT grad_cam, /** Gradient w.r.t. camera. */
+    float* const RESTRICT grad_opy /** Gradient w.r.t. opacity. */
+) {
+  // TODO: variable reuse?
+  PASSERT(
+      isfinite(draw_info.ray_center_norm.x) &&
+      isfinite(draw_info.ray_center_norm.y) &&
+      isfinite(draw_info.ray_center_norm.z));
+  PASSERT(isfinite(draw_info.t_center) && draw_info.t_center >= 0.f);
+  PASSERT(
+      isfinite(draw_info.radius) && draw_info.radius >= 0.f &&
+      draw_info.radius <= draw_info.t_center);
+  PASSERT(isfinite(ray_dir_norm.x));
+  PASSERT(isfinite(ray_dir_norm.y));
+  PASSERT(isfinite(ray_dir_norm.z));
+  PASSERT(isfinite(*sm_d));
+  PASSERT(
+      cam.orthogonal_projection && cam.focal_length == 0.f ||
+      cam.focal_length > 0.f);
+  PASSERT(gamma <= 1.f && gamma >= 1e-5f);
+  /** The ball center in the camera coordinate system. */
+  float3 center = draw_info.ray_center_norm * draw_info.t_center;
+  /** The vector from the reference point to the ball center. */
+  float3 raydiff;
+  if (cam.orthogonal_projection) {
+    center = rotate(
+        center,
+        cam.pixel_dir_x / length(cam.pixel_dir_x),
+        cam.pixel_dir_y / length(cam.pixel_dir_y),
+        cam.sensor_dir_z);
+    raydiff =
+        make_float3( // TODO: make offset consistent with `get_screen_area`.
+            center.x -
+                (projected_ray.x -
+                 static_cast<float>(cam.aperture_width) * .5f) *
+                    (2.f * cam.half_pixel_size),
+            center.y -
+                (projected_ray.y -
+                 static_cast<float>(cam.aperture_height) * .5f) *
+                    (2.f * cam.half_pixel_size),
+            0.f);
+  } else {
+    /** The reference point on the ray; the point in the same distance
+     * from the camera as the ball center, but along the ray.
+     */
+    const float3 rayref = ray_dir_norm * draw_info.t_center;
+    raydiff = center - rayref;
+  }
+  /** The closeness of the reference point to ball center in world coords.
+   *
+   * In [0., radius].
+   */
+  const float closeness_world = length(raydiff);
+  /** The reciprocal radius. */
+  const float radius_rcp = FRCP(draw_info.radius);
+  /** The closeness factor normalized with the ball radius.
+   *
+   * In [0., 1.].
+   */
+  float closeness = FSATURATE(FMA(-closeness_world, radius_rcp, 1.f));
+  PULSAR_LOG_DEV_PIX(
+      PULSAR_LOG_DRAW_PIX,
+      "drawprep %u|center: %.9f, %.9f, %.9f. raydiff: %.9f, "
+      "%.9f, %.9f. closeness_world: %.9f. closeness: %.9f\n",
+      idx,
+      center.x,
+      center.y,
+      center.z,
+      raydiff.x,
+      raydiff.y,
+      raydiff.z,
+      closeness_world,
+      closeness);
+  /** Whether this is the 'center pixel' for this ball, the pixel that
+   * is closest to its projected center. This information is used to
+   * make sure to draw 'tiny' spheres with less than one pixel in
+   * projected size.
+   */
+  bool ray_through_center_pixel;
+  float projected_radius, projected_x, projected_y;
+  if (cam.orthogonal_projection) {
+    projected_x = center.x / (2.f * cam.half_pixel_size) +
+        (static_cast<float>(cam.aperture_width) - 1.f) / 2.f;
+    projected_y = center.y / (2.f * cam.half_pixel_size) +
+        (static_cast<float>(cam.aperture_height) - 1.f) / 2.f;
+    projected_radius = draw_info.radius / (2.f * cam.half_pixel_size);
+    ray_through_center_pixel =
+        (FABS(FSUB(projected_x, projected_ray.x)) < 0.5f + FEPS &&
+         FABS(FSUB(projected_y, projected_ray.y)) < 0.5f + FEPS);
+    PULSAR_LOG_DEV_PIX(
+        PULSAR_LOG_DRAW_PIX,
+        "drawprep %u|closeness_world: %.9f. closeness: %.9f. "
+        "projected (x, y): %.9f, %.9f. projected_ray (x, y): "
+        "%.9f, %.9f. ray_through_center_pixel: %d.\n",
+        idx,
+        closeness_world,
+        closeness,
+        projected_x,
+        projected_y,
+        projected_ray.x,
+        projected_ray.y,
+        ray_through_center_pixel);
+  } else {
+    // Misusing this variable for half pixel size projected to the depth
+    // at which the sphere resides. Leave some slack for numerical
+    // inaccuracy (factor 1.5).
+    projected_x = FMUL(cam.half_pixel_size * 1.5, draw_info.t_center) *
+        FRCP(cam.focal_length);
+    projected_radius = FMUL(draw_info.radius, cam.focal_length) *
+        FRCP(draw_info.t_center) / (2.f * cam.half_pixel_size);
+    ray_through_center_pixel = projected_x > closeness_world;
+    PULSAR_LOG_DEV_PIX(
+        PULSAR_LOG_DRAW_PIX,
+        "drawprep %u|closeness_world: %.9f. closeness: %.9f. "
+        "projected half pixel size: %.9f. "
+        "ray_through_center_pixel: %d.\n",
+        idx,
+        closeness_world,
+        closeness,
+        projected_x,
+        ray_through_center_pixel);
+  }
+  if (draw_only && draw_info.radius < closeness_world &&
+      !ray_through_center_pixel) {
+    PULSAR_LOG_DEV_PIX(
+        PULSAR_LOG_DRAW_PIX,
+        "drawprep %u|Abandoning since no hit has been detected.\n",
+        idx);
+    return false;
+  } else {
+    // This is always a hit since we are following the forward execution pass.
+    // p2 is the closest intersection point with the sphere.
+  }
+  if (ray_through_center_pixel && projected_radius < 1.f) {
+    // Make a tiny sphere visible.
+    PULSAR_LOG_DEV_PIX(
+        PULSAR_LOG_DRAW_PIX,
+        "drawprep %u|Setting closeness to 1 (projected radius: %.9f).\n",
+        idx,
+        projected_radius);
+    closeness = 1.;
+  }
+  PASSERT(closeness >= 0.f && closeness <= 1.f);
+  /** Distance between the camera (`o`) and `p1`, the closest point to the
+   * ball center along the casted ray.
+   *
+   * In [t_center - radius, t_center].
+   */
+  float o__p1_;
+  /** The distance from ball center to p1.
+   *
+   * In [0., sqrt(t_center ^ 2 - (t_center - radius) ^ 2)].
+   */
+  float c__p1_;
+  if (cam.orthogonal_projection) {
+    o__p1_ = FABS(center.z);
+    c__p1_ = length(raydiff);
+  } else {
+    o__p1_ = dot(center, ray_dir_norm);
+    /**
+     * This is being calculated as sqrt(t_center^2 - o__p1_^2) =
+     * sqrt((t_center + o__p1_) * (t_center - o__p1_)) to avoid
+     * catastrophic cancellation in floating point representations.
+     */
+    c__p1_ = FSQRT(
+        (draw_info.t_center + o__p1_) * FMAX(draw_info.t_center - o__p1_, 0.f));
+    // PASSERT(o__p1_ >= draw_info.t_center - draw_info.radius);
+    // Numerical errors lead to too large values.
+    o__p1_ = FMIN(o__p1_, draw_info.t_center);
+    // PASSERT(o__p1_ <= draw_info.t_center);
+  }
+  /** The distance from the closest point to the sphere center (p1)
+   * to the closest intersection point (p2).
+   *
+   * In [0., radius].
+   */
+  const float p1__p2_ =
+      FSQRT((draw_info.radius + c__p1_) * FMAX(draw_info.radius - c__p1_, 0.f));
+  PASSERT(p1__p2_ >= 0.f && p1__p2_ <= draw_info.radius);
+  PULSAR_LOG_DEV_PIX(
+      PULSAR_LOG_DRAW_PIX,
+      "drawprep %u|o__p1_: %.9f, c__p1_: %.9f, p1__p2_: %.9f.\n",
+      idx,
+      o__p1_,
+      c__p1_,
+      p1__p2_);
+  /** The intersection depth of the ray with this ball.
+   *
+   * In [t_center - radius, t_center].
+   */
+  const float intersection_depth = (o__p1_ - p1__p2_);
+  PASSERT(
+      cam.orthogonal_projection &&
+          (intersection_depth >= center.z - draw_info.radius &&
+           intersection_depth <= center.z) ||
+      intersection_depth >= draw_info.t_center - draw_info.radius &&
+          intersection_depth <= draw_info.t_center);
+  /** Normalized distance of the closest intersection point; in [0., 1.]. */
+  const float norm_dist =
+      FMUL(FSUB(intersection_depth, cam.min_dist), cam.norm_fac);
+  PASSERT(norm_dist >= 0.f && norm_dist <= 1.f);
+  /** Scaled, normalized distance in [1., 0.] (closest, farthest). */
+  const float norm_dist_scaled = FSUB(1.f, norm_dist) / gamma * opacity;
+  PASSERT(norm_dist_scaled >= 0.f && norm_dist_scaled <= 1.f / gamma);
+  PULSAR_LOG_DEV_PIX(
+      PULSAR_LOG_DRAW_PIX,
+      "drawprep %u|intersection_depth: %.9f, norm_dist: %.9f, "
+      "norm_dist_scaled: %.9f.\n",
+      idx,
+      intersection_depth,
+      norm_dist,
+      norm_dist_scaled);
+  float const* const col_ptr =
+      cam.n_channels > 3 ? draw_info.color_union.ptr : &draw_info.first_color;
+  // The implementation for the numerically stable weighted softmax is based
+  // on https://arxiv.org/pdf/1805.02867.pdf .
+  if (draw_only) {
+    /** The old maximum observed value. */
+    const float sm_m_old = *sm_m;
+    *sm_m = FMAX(*sm_m, norm_dist_scaled);
+    const float coeff_exp = FEXP(norm_dist_scaled - *sm_m);
+    PASSERT(isfinite(coeff_exp));
+    /** The color coefficient for the ball color; in [0., 1.]. */
+    const float coeff = closeness * coeff_exp * opacity;
+    PULSAR_LOG_DEV_PIX(
+        PULSAR_LOG_DRAW_PIX,
+        "draw %u|coeff: %.9f. closeness: %.9f. coeff_exp: %.9f. "
+        "opacity: %.9f.\n",
+        idx,
+        coeff,
+        closeness,
+        coeff_exp,
+        opacity);
+    // Rendering.
+    if (sm_m_old == *sm_m) {
+      // Use the fact that exp(0) = 1 to avoid the exp calculation for
+      // the case that the maximum remains the same (which it should
+      // most of the time).
+      *sm_d = FADD(*sm_d, coeff);
+      for (uint c_id = 0; c_id < cam.n_channels; ++c_id) {
+        PASSERT(isfinite(result[c_id]));
+        result[c_id] = FMA(coeff, col_ptr[c_id], result[c_id]);
+      }
+    } else {
+      const float exp_correction = FEXP(sm_m_old - *sm_m);
+      *sm_d = FMA(*sm_d, exp_correction, coeff);
+      for (uint c_id = 0; c_id < cam.n_channels; ++c_id) {
+        PASSERT(isfinite(result[c_id]));
+        result[c_id] =
+            FMA(coeff, col_ptr[c_id], FMUL(result[c_id], exp_correction));
+      }
+    }
+    PASSERT(isfinite(*sm_d));
+    *intersection_depth_norm_out = intersection_depth;
+    // Update the depth threshold.
+    *depth_threshold =
+        1.f - (FLN(*sm_d + FEPS) + *ln_pad_over_1minuspad + *sm_m) * gamma;
+    *depth_threshold =
+        FMA(*depth_threshold, FSUB(cam.max_dist, cam.min_dist), cam.min_dist);
+  } else {
+    // Gradient computation.
+    const float coeff_exp = FEXP(norm_dist_scaled - *sm_m);
+    const float gamma_rcp = FRCP(gamma);
+    const float radius_sq = FMUL(draw_info.radius, draw_info.radius);
+    const float coeff = FMAX(
+        FMIN(closeness * coeff_exp * opacity, *sm_d - FEPS),
+        0.f); // in [0., sm_d - FEPS].
+    PASSERT(coeff >= 0.f && coeff <= *sm_d);
+    const float otherw = *sm_d - coeff; // in [FEPS, sm_d].
+    const float p1__p2_safe = FMAX(p1__p2_, FEPS); // in [eps, t_center].
+    const float cam_range = FSUB(cam.max_dist, cam.min_dist); // in ]0, inf[
+    PULSAR_LOG_DEV_PIX(
+        PULSAR_LOG_GRAD,
+        "grad %u|pos: %.9f, %.9f, %.9f. pixeldirx: %.9f, %.9f, %.9f. "
+        "pixeldiry: %.9f, %.9f, %.9f. pixel00center: %.9f, %.9f, %.9f.\n",
+        idx,
+        draw_info.ray_center_norm.x * draw_info.t_center,
+        draw_info.ray_center_norm.y * draw_info.t_center,
+        draw_info.ray_center_norm.z * draw_info.t_center,
+        cam.pixel_dir_x.x,
+        cam.pixel_dir_x.y,
+        cam.pixel_dir_x.z,
+        cam.pixel_dir_y.x,
+        cam.pixel_dir_y.y,
+        cam.pixel_dir_y.z,
+        cam.pixel_0_0_center.x,
+        cam.pixel_0_0_center.y,
+        cam.pixel_0_0_center.z);
+    PULSAR_LOG_DEV_PIX(
+        PULSAR_LOG_GRAD,
+        "grad %u|ray_dir: %.9f, %.9f, %.9f. "
+        "ray_dir_norm: %.9f, %.9f, %.9f. "
+        "draw_info.ray_center_norm: %.9f, %.9f, %.9f.\n",
+        idx,
+        ray_dir->x,
+        ray_dir->y,
+        ray_dir->z,
+        ray_dir_norm.x,
+        ray_dir_norm.y,
+        ray_dir_norm.z,
+        draw_info.ray_center_norm.x,
+        draw_info.ray_center_norm.y,
+        draw_info.ray_center_norm.z);
+    PULSAR_LOG_DEV_PIX(
+        PULSAR_LOG_GRAD,
+        "grad %u|coeff_exp: %.9f. "
+        "norm_dist_scaled: %.9f. cam.norm_fac: %f.\n",
+        idx,
+        coeff_exp,
+        norm_dist_scaled,
+        cam.norm_fac);
+    PULSAR_LOG_DEV_PIX(
+        PULSAR_LOG_GRAD,
+        "grad %u|p1__p2_: %.9f. p1__p2_safe: %.9f.\n",
+        idx,
+        p1__p2_,
+        p1__p2_safe);
+    PULSAR_LOG_DEV_PIX(
+        PULSAR_LOG_GRAD,
+        "grad %u|o__p1_: %.9f. c__p1_: %.9f.\n",
+        idx,
+        o__p1_,
+        c__p1_);
+    PULSAR_LOG_DEV_PIX(
+        PULSAR_LOG_GRAD,
+        "grad %u|intersection_depth: %f. norm_dist: %f. "
+        "coeff: %.9f. closeness: %f. coeff_exp: %f. opacity: "
+        "%f. color: %f, %f, %f.\n",
+        idx,
+        intersection_depth,
+        norm_dist,
+        coeff,
+        closeness,
+        coeff_exp,
+        opacity,
+        draw_info.first_color,
+        draw_info.color_union.color[0],
+        draw_info.color_union.color[1]);
+    PULSAR_LOG_DEV_PIX(
+        PULSAR_LOG_GRAD,
+        "grad %u|t_center: %.9f. "
+        "radius: %.9f. max_dist: %f. min_dist: %f. gamma: %f.\n",
+        idx,
+        draw_info.t_center,
+        draw_info.radius,
+        cam.max_dist,
+        cam.min_dist,
+        gamma);
+    PULSAR_LOG_DEV_PIX(
+        PULSAR_LOG_GRAD,
+        "grad %u|sm_d: %f. sm_m: %f. grad_pix (first three): %f, %f, %f.\n",
+        idx,
+        *sm_d,
+        *sm_m,
+        grad_pix[0],
+        grad_pix[1],
+        grad_pix[2]);
+    PULSAR_LOG_DEV_PIX(PULSAR_LOG_GRAD, "grad %u|otherw: %f.\n", idx, otherw);
+    if (calc_grad_col) {
+      const float sm_d_norm = FRCP(FMAX(*sm_d, FEPS));
+      // First do the multiplication of coeff (in [0., sm_d]) and 1/sm_d. The
+      // result is a factor in [0., 1.] to be multiplied with the incoming
+      // gradient.
+      for (uint c_id = 0; c_id < cam.n_channels; ++c_id) {
+        ATOMICADD(grad_col + c_id, grad_pix[c_id] * FMUL(coeff, sm_d_norm));
+      }
+      PULSAR_LOG_DEV_PIX(
+          PULSAR_LOG_GRAD,
+          "grad %u|dimDdcol.x: %f. dresDdcol.x: %f.\n",
+          idx,
+          FMUL(coeff, sm_d_norm) * grad_pix[0],
+          coeff * sm_d_norm);
+    }
+    // We disable the computation for too small spheres.
+    // The comparison is made this way to avoid subtraction of unsigned types.
+    if (calc_grad_cam || calc_grad_pos || calc_grad_rad || calc_grad_opy) {
+      //! First find dimDdcoeff.
+      const float n0 =
+          otherw * FRCP(FMAX(*sm_d * *sm_d, FEPS)); // in [0., 1. / sm_d].
+      PASSERT(isfinite(n0) && n0 >= 0. && n0 <= 1. / *sm_d + 1e2f * FEPS);
+      // We'll aggergate dimDdcoeff over all the 'color' channels.
+      float dimDdcoeff = 0.f;
+      const float otherw_safe_rcp = FRCP(FMAX(otherw, FEPS));
+      float othercol;
+      for (uint c_id = 0; c_id < cam.n_channels; ++c_id) {
+        othercol =
+            (result[c_id] * *sm_d - col_ptr[c_id] * coeff) * otherw_safe_rcp;
+        PULSAR_LOG_DEV_PIX(
+            PULSAR_LOG_GRAD,
+            "grad %u|othercol[%u]: %.9f.\n",
+            idx,
+            c_id,
+            othercol);
+        dimDdcoeff +=
+            FMUL(FMUL(grad_pix[c_id], FSUB(col_ptr[c_id], othercol)), n0);
+      }
+      PASSERT(isfinite(dimDdcoeff));
+      PULSAR_LOG_DEV_PIX(
+          PULSAR_LOG_GRAD,
+          "grad %u|dimDdcoeff: %.9f, n0: %f.\n",
+          idx,
+          dimDdcoeff,
+          n0);
+      if (calc_grad_opy) {
+        //! dimDdopacity.
+        *grad_opy += dimDdcoeff * coeff_exp * closeness *
+            (1.f + opacity * (1.f - norm_dist) * gamma_rcp);
+        PULSAR_LOG_DEV_PIX(
+            PULSAR_LOG_GRAD,
+            "grad %u|dcoeffDdopacity: %.9f, dimDdopacity: %.9f.\n",
+            idx,
+            coeff_exp * closeness,
+            dimDdcoeff * coeff_exp * closeness);
+      }
+      if (intersect_info->max.x >= intersect_info->min.x + 3 &&
+          intersect_info->max.y >= intersect_info->min.y + 3) {
+        //! Now find dcoeffDdintersection_depth and dcoeffDdcloseness.
+        const float dcoeffDdintersection_depth =
+            -closeness * coeff_exp * opacity * opacity / (gamma * cam_range);
+        const float dcoeffDdcloseness = coeff_exp * opacity;
+        PULSAR_LOG_DEV_PIX(
+            PULSAR_LOG_GRAD,
+            "grad %u|dcoeffDdintersection_depth: %.9f. "
+            "dimDdintersection_depth: %.9f. "
+            "dcoeffDdcloseness: %.9f. dimDdcloseness: %.9f.\n",
+            idx,
+            dcoeffDdintersection_depth,
+            dimDdcoeff * dcoeffDdintersection_depth,
+            dcoeffDdcloseness,
+            dimDdcoeff * dcoeffDdcloseness);
+        //! Here, the execution paths for orthogonal and pinyhole camera split.
+        if (cam.orthogonal_projection) {
+          if (calc_grad_rad) {
+            //! Find dcoeffDdrad.
+            float dcoeffDdrad =
+                dcoeffDdcloseness * (closeness_world / radius_sq) -
+                dcoeffDdintersection_depth * draw_info.radius / p1__p2_safe;
+            PASSERT(isfinite(dcoeffDdrad));
+            *grad_rad += FMUL(dimDdcoeff, dcoeffDdrad);
+            PULSAR_LOG_DEV_PIX(
+                PULSAR_LOG_GRAD,
+                "grad %u|dimDdrad: %.9f. dcoeffDdrad: %.9f.\n",
+                idx,
+                FMUL(dimDdcoeff, dcoeffDdrad),
+                dcoeffDdrad);
+          }
+          if (calc_grad_pos || calc_grad_cam) {
+            float3 dimDdcenter = raydiff /
+                p1__p2_safe; /* making it dintersection_depthDdcenter. */
+            dimDdcenter.z = sign_dir(center.z);
+            PASSERT(FABS(center.z) >= cam.min_dist && cam.min_dist >= FEPS);
+            dimDdcenter *= dcoeffDdintersection_depth; // dcoeffDdcenter
+            dimDdcenter -= dcoeffDdcloseness * /* dclosenessDdcenter. */
+                raydiff * FRCP(FMAX(length(raydiff) * draw_info.radius, FEPS));
+            PULSAR_LOG_DEV_PIX(
+                PULSAR_LOG_GRAD,
+                "grad %u|dcoeffDdcenter: %.9f, %.9f, %.9f.\n",
+                idx,
+                dimDdcenter.x,
+                dimDdcenter.y,
+                dimDdcenter.z);
+            // Now dcoeffDdcenter is stored in dimDdcenter.
+            dimDdcenter *= dimDdcoeff;
+            PULSAR_LOG_DEV_PIX(
+                PULSAR_LOG_GRAD,
+                "grad %u|dimDdcenter: %.9f, %.9f, %.9f.\n",
+                idx,
+                dimDdcenter.x,
+                dimDdcenter.y,
+                dimDdcenter.z);
+            // Prepare for posglob and cam pos.
+            const float pixel_size = length(cam.pixel_dir_x);
+            // pixel_size is the same as length(pixeldiry)!
+            const float pixel_size_rcp = FRCP(pixel_size);
+            float3 dcenterDdposglob =
+                (cam.pixel_dir_x + cam.pixel_dir_y) * pixel_size_rcp +
+                cam.sensor_dir_z;
+            PULSAR_LOG_DEV_PIX(
+                PULSAR_LOG_GRAD,
+                "grad %u|dcenterDdposglob: %.9f, %.9f, %.9f.\n",
+                idx,
+                dcenterDdposglob.x,
+                dcenterDdposglob.y,
+                dcenterDdposglob.z);
+            if (calc_grad_pos) {
+              //! dcenterDdposglob.
+              *grad_pos += dimDdcenter * dcenterDdposglob;
+              PULSAR_LOG_DEV_PIX(
+                  PULSAR_LOG_GRAD,
+                  "grad %u|dimDdpos: %.9f, %.9f, %.9f.\n",
+                  idx,
+                  dimDdcenter.x * dcenterDdposglob.x,
+                  dimDdcenter.y * dcenterDdposglob.y,
+                  dimDdcenter.z * dcenterDdposglob.z);
+            }
+            if (calc_grad_cam) {
+              //! Camera.
+              grad_cam->cam_pos -= dimDdcenter * dcenterDdposglob;
+              PULSAR_LOG_DEV_PIX(
+                  PULSAR_LOG_GRAD,
+                  "grad %u|dimDdeye: %.9f, %.9f, %.9f.\n",
+                  idx,
+                  -dimDdcenter.x * dcenterDdposglob.x,
+                  -dimDdcenter.y * dcenterDdposglob.y,
+                  -dimDdcenter.z * dcenterDdposglob.z);
+              // coord_world
+              /*
+              float3 dclosenessDdcoord_world =
+                raydiff * FRCP(FMAX(draw_info.radius * length(raydiff), FEPS));
+              float3 dintersection_depthDdcoord_world = -2.f * raydiff;
+              */
+              float3 dimDdcoord_world = /* dcoeffDdcoord_world */
+                  dcoeffDdcloseness * raydiff *
+                      FRCP(FMAX(draw_info.radius * length(raydiff), FEPS)) -
+                  dcoeffDdintersection_depth * raydiff / p1__p2_safe;
+              PULSAR_LOG_DEV_PIX(
+                  PULSAR_LOG_GRAD,
+                  "grad %u|dcoeffDdcoord_world: %.9f, %.9f, %.9f.\n",
+                  idx,
+                  dimDdcoord_world.x,
+                  dimDdcoord_world.y,
+                  dimDdcoord_world.z);
+              dimDdcoord_world *= dimDdcoeff;
+              PULSAR_LOG_DEV_PIX(
+                  PULSAR_LOG_GRAD,
+                  "grad %u|dimDdcoord_world: %.9f, %.9f, %.9f.\n",
+                  idx,
+                  dimDdcoord_world.x,
+                  dimDdcoord_world.y,
+                  dimDdcoord_world.z);
+              // The third component of dimDdcoord_world is 0!
+              PASSERT(dimDdcoord_world.z == 0.f);
+              float3 coord_world = center - raydiff;
+              coord_world.z = 0.f;
+              PULSAR_LOG_DEV_PIX(
+                  PULSAR_LOG_GRAD,
+                  "grad %u|coord_world: %.9f, %.9f, %.9f.\n",
+                  idx,
+                  coord_world.x,
+                  coord_world.y,
+                  coord_world.z);
+              // Do this component-wise to save unnecessary matmul steps.
+              grad_cam->pixel_dir_x += dimDdcoord_world.x * cam.pixel_dir_x *
+                  coord_world.x * pixel_size_rcp * pixel_size_rcp;
+              grad_cam->pixel_dir_x += dimDdcoord_world.y * cam.pixel_dir_x *
+                  coord_world.y * pixel_size_rcp * pixel_size_rcp;
+              PULSAR_LOG_DEV_PIX(
+                  PULSAR_LOG_GRAD,
+                  "grad %u|dimDdpixel_dir_x|coord_world: %.9f, %.9f, %.9f.\n",
+                  idx,
+                  grad_cam->pixel_dir_x.x,
+                  grad_cam->pixel_dir_x.y,
+                  grad_cam->pixel_dir_x.z);
+              // dcenterkDdpixel_dir_k.
+              float3 center_in_pixels = draw_info.ray_center_norm *
+                  draw_info.t_center * pixel_size_rcp;
+              grad_cam->pixel_dir_x += dimDdcenter.x *
+                  (center_in_pixels -
+                   outer_product_sum(cam.pixel_dir_x) * center_in_pixels *
+                       pixel_size_rcp * pixel_size_rcp);
+              PULSAR_LOG_DEV_PIX(
+                  PULSAR_LOG_GRAD,
+                  "grad %u|dcenter0dpixel_dir_x: %.9f, %.9f, %.9f.\n",
+                  idx,
+                  (center_in_pixels -
+                   outer_product_sum(cam.pixel_dir_x) * center_in_pixels *
+                       pixel_size_rcp * pixel_size_rcp)
+                      .x,
+                  (center_in_pixels -
+                   outer_product_sum(cam.pixel_dir_x) * center_in_pixels *
+                       pixel_size_rcp * pixel_size_rcp)
+                      .y,
+                  (center_in_pixels -
+                   outer_product_sum(cam.pixel_dir_x) * center_in_pixels *
+                       pixel_size_rcp * pixel_size_rcp)
+                      .z);
+              grad_cam->pixel_dir_y += dimDdcenter.y *
+                  (center_in_pixels -
+                   outer_product_sum(cam.pixel_dir_y) * center_in_pixels *
+                       pixel_size_rcp * pixel_size_rcp);
+              PULSAR_LOG_DEV_PIX(
+                  PULSAR_LOG_GRAD,
+                  "grad %u|dcenter1dpixel_dir_y: %.9f, %.9f, %.9f.\n",
+                  idx,
+                  (center_in_pixels -
+                   outer_product_sum(cam.pixel_dir_y) * center_in_pixels *
+                       pixel_size_rcp * pixel_size_rcp)
+                      .x,
+                  (center_in_pixels -
+                   outer_product_sum(cam.pixel_dir_y) * center_in_pixels *
+                       pixel_size_rcp * pixel_size_rcp)
+                      .y,
+                  (center_in_pixels -
+                   outer_product_sum(cam.pixel_dir_y) * center_in_pixels *
+                       pixel_size_rcp * pixel_size_rcp)
+                      .z);
+              // dcenterzDdpixel_dir_k.
+              float sensordirz_norm_rcp = FRCP(
+                  FMAX(length(cross(cam.pixel_dir_y, cam.pixel_dir_x)), FEPS));
+              grad_cam->pixel_dir_x += dimDdcenter.z *
+                  (dot(center, cam.sensor_dir_z) *
+                       cross(cam.pixel_dir_y, cam.sensor_dir_z) -
+                   cross(cam.pixel_dir_y, center)) *
+                  sensordirz_norm_rcp;
+              PULSAR_LOG_DEV_PIX(
+                  PULSAR_LOG_GRAD,
+                  "grad %u|dcenterzDdpixel_dir_x: %.9f, %.9f, %.9f.\n",
+                  idx,
+                  ((dot(center, cam.sensor_dir_z) *
+                        cross(cam.pixel_dir_y, cam.sensor_dir_z) -
+                    cross(cam.pixel_dir_y, center)) *
+                   sensordirz_norm_rcp)
+                      .x,
+                  ((dot(center, cam.sensor_dir_z) *
+                        cross(cam.pixel_dir_y, cam.sensor_dir_z) -
+                    cross(cam.pixel_dir_y, center)) *
+                   sensordirz_norm_rcp)
+                      .y,
+                  ((dot(center, cam.sensor_dir_z) *
+                        cross(cam.pixel_dir_y, cam.sensor_dir_z) -
+                    cross(cam.pixel_dir_y, center)) *
+                   sensordirz_norm_rcp)
+                      .z);
+              grad_cam->pixel_dir_y += dimDdcenter.z *
+                  (dot(center, cam.sensor_dir_z) *
+                       cross(cam.pixel_dir_x, cam.sensor_dir_z) -
+                   cross(cam.pixel_dir_x, center)) *
+                  sensordirz_norm_rcp;
+              PULSAR_LOG_DEV_PIX(
+                  PULSAR_LOG_GRAD,
+                  "grad %u|dcenterzDdpixel_dir_y: %.9f, %.9f, %.9f.\n",
+                  idx,
+                  ((dot(center, cam.sensor_dir_z) *
+                        cross(cam.pixel_dir_x, cam.sensor_dir_z) -
+                    cross(cam.pixel_dir_x, center)) *
+                   sensordirz_norm_rcp)
+                      .x,
+                  ((dot(center, cam.sensor_dir_z) *
+                        cross(cam.pixel_dir_x, cam.sensor_dir_z) -
+                    cross(cam.pixel_dir_x, center)) *
+                   sensordirz_norm_rcp)
+                      .y,
+                  ((dot(center, cam.sensor_dir_z) *
+                        cross(cam.pixel_dir_x, cam.sensor_dir_z) -
+                    cross(cam.pixel_dir_x, center)) *
+                   sensordirz_norm_rcp)
+                      .z);
+              PULSAR_LOG_DEV_PIX(
+                  PULSAR_LOG_GRAD,
+                  "grad %u|dimDdpixel_dir_x: %.9f, %.9f, %.9f.\n",
+                  idx,
+                  grad_cam->pixel_dir_x.x,
+                  grad_cam->pixel_dir_x.y,
+                  grad_cam->pixel_dir_x.z);
+              PULSAR_LOG_DEV_PIX(
+                  PULSAR_LOG_GRAD,
+                  "grad %u|dimDdpixel_dir_y: %.9f, %.9f, %.9f.\n",
+                  idx,
+                  grad_cam->pixel_dir_y.x,
+                  grad_cam->pixel_dir_y.y,
+                  grad_cam->pixel_dir_y.z);
+            }
+          }
+        } else {
+          if (calc_grad_rad) {
+            //! Find dcoeffDdrad.
+            float dcoeffDdrad =
+                dcoeffDdcloseness * (closeness_world / radius_sq) -
+                dcoeffDdintersection_depth * draw_info.radius / p1__p2_safe;
+            PASSERT(isfinite(dcoeffDdrad));
+            *grad_rad += FMUL(dimDdcoeff, dcoeffDdrad);
+            PULSAR_LOG_DEV_PIX(
+                PULSAR_LOG_GRAD,
+                "grad %u|dimDdrad: %.9f. dcoeffDdrad: %.9f.\n",
+                idx,
+                FMUL(dimDdcoeff, dcoeffDdrad),
+                dcoeffDdrad);
+          }
+          if (calc_grad_pos || calc_grad_cam) {
+            const float3 tmp1 = center - ray_dir_norm * o__p1_;
+            const float3 tmp1n = tmp1 / p1__p2_safe;
+            const float ray_dir_normDotRaydiff = dot(ray_dir_norm, raydiff);
+            const float3 dcoeffDdray = dcoeffDdintersection_depth *
+                    (tmp1 - o__p1_ * tmp1n) / *norm_ray_dir +
+                dcoeffDdcloseness *
+                    (ray_dir_norm * -ray_dir_normDotRaydiff + raydiff) /
+                    (closeness_world * draw_info.radius) *
+                    (draw_info.t_center / *norm_ray_dir);
+            PULSAR_LOG_DEV_PIX(
+                PULSAR_LOG_GRAD,
+                "grad %u|dcoeffDdray: %.9f, %.9f, %.9f. dimDdray: "
+                "%.9f, %.9f, %.9f.\n",
+                idx,
+                dcoeffDdray.x,
+                dcoeffDdray.y,
+                dcoeffDdray.z,
+                dimDdcoeff * dcoeffDdray.x,
+                dimDdcoeff * dcoeffDdray.y,
+                dimDdcoeff * dcoeffDdray.z);
+            const float3 dcoeffDdcenter =
+                dcoeffDdintersection_depth * (ray_dir_norm + tmp1n) +
+                dcoeffDdcloseness *
+                    (draw_info.ray_center_norm * ray_dir_normDotRaydiff -
+                     raydiff) /
+                    (closeness_world * draw_info.radius);
+            PULSAR_LOG_DEV_PIX(
+                PULSAR_LOG_GRAD,
+                "grad %u|dcoeffDdcenter: %.9f, %.9f, %.9f. "
+                "dimDdcenter: %.9f, %.9f, %.9f.\n",
+                idx,
+                dcoeffDdcenter.x,
+                dcoeffDdcenter.y,
+                dcoeffDdcenter.z,
+                dimDdcoeff * dcoeffDdcenter.x,
+                dimDdcoeff * dcoeffDdcenter.y,
+                dimDdcoeff * dcoeffDdcenter.z);
+            if (calc_grad_pos) {
+              *grad_pos += dimDdcoeff * dcoeffDdcenter;
+              PULSAR_LOG_DEV_PIX(
+                  PULSAR_LOG_GRAD,
+                  "grad %u|dimDdposglob: %.9f, %.9f, %.9f.\n",
+                  idx,
+                  dimDdcoeff * dcoeffDdcenter.x,
+                  dimDdcoeff * dcoeffDdcenter.y,
+                  dimDdcoeff * dcoeffDdcenter.z);
+            }
+            if (calc_grad_cam) {
+              PULSAR_LOG_DEV_PIX(
+                  PULSAR_LOG_GRAD,
+                  "grad %u|dimDdeye: %.9f, %.9f, %.9f.\n",
+                  idx,
+                  -dimDdcoeff * (dcoeffDdcenter.x + dcoeffDdray.x),
+                  -dimDdcoeff * (dcoeffDdcenter.y + dcoeffDdray.y),
+                  -dimDdcoeff * (dcoeffDdcenter.z + dcoeffDdray.z));
+              grad_cam->cam_pos += -dimDdcoeff * (dcoeffDdcenter + dcoeffDdray);
+              grad_cam->pixel_0_0_center += dimDdcoeff * dcoeffDdray;
+              PULSAR_LOG_DEV_PIX(
+                  PULSAR_LOG_GRAD,
+                  "grad %u|dimDdpixel00centerglob: %.9f, %.9f, %.9f.\n",
+                  idx,
+                  dimDdcoeff * dcoeffDdray.x,
+                  dimDdcoeff * dcoeffDdray.y,
+                  dimDdcoeff * dcoeffDdray.z);
+              grad_cam->pixel_dir_x +=
+                  (dimDdcoeff * static_cast<float>(coord_x)) * dcoeffDdray;
+              PULSAR_LOG_DEV_PIX(
+                  PULSAR_LOG_GRAD,
+                  "grad %u|dimDdpixel_dir_x: %.9f, %.9f, %.9f.\n",
+                  idx,
+                  (dimDdcoeff * static_cast<float>(coord_x)) * dcoeffDdray.x,
+                  (dimDdcoeff * static_cast<float>(coord_x)) * dcoeffDdray.y,
+                  (dimDdcoeff * static_cast<float>(coord_x)) * dcoeffDdray.z);
+              grad_cam->pixel_dir_y +=
+                  (dimDdcoeff * static_cast<float>(coord_y)) * dcoeffDdray;
+              PULSAR_LOG_DEV_PIX(
+                  PULSAR_LOG_GRAD,
+                  "grad %u|dimDdpixel_dir_y: %.9f, %.9f, %.9f.\n",
+                  idx,
+                  (dimDdcoeff * static_cast<float>(coord_y)) * dcoeffDdray.x,
+                  (dimDdcoeff * static_cast<float>(coord_y)) * dcoeffDdray.y,
+                  (dimDdcoeff * static_cast<float>(coord_y)) * dcoeffDdray.z);
+            }
+          }
+        }
+      }
+    }
+  }
+  return true;
+};
+
+} // namespace Renderer
+} // namespace pulsar
+
+#endif
diff --git a/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.fill_bg.device.h b/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.fill_bg.device.h
new file mode 100644
index 0000000000000000000000000000000000000000..2a737d3eb609781f08120eb734982987866637f4
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.fill_bg.device.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#ifndef PULSAR_NATIVE_INCLUDE_RENDERER_FILL_BG_DEVICE_H_
+#define PULSAR_NATIVE_INCLUDE_RENDERER_FILL_BG_DEVICE_H_
+
+#include "../global.h"
+#include "./camera.h"
+#include "./commands.h"
+#include "./renderer.h"
+
+namespace pulsar {
+namespace Renderer {
+
+template <bool DEV>
+GLOBAL void fill_bg(
+    Renderer renderer,
+    const CamInfo cam,
+    float const* const bg_col_d,
+    const float gamma,
+    const uint mode) {
+  GET_PARALLEL_IDS_2D(coord_x, coord_y, cam.film_width, cam.film_height);
+  int write_loc = coord_y * cam.film_width * (3 + 2 * renderer.n_track) +
+      coord_x * (3 + 2 * renderer.n_track);
+  if (renderer.forw_info_d[write_loc + 1] // sm_d
+      == 0.f) {
+    // This location has not been processed yet.
+    // Write first the forw_info:
+    // sm_m
+    renderer.forw_info_d[write_loc] =
+        cam.background_normalization_depth / gamma;
+    // sm_d
+    renderer.forw_info_d[write_loc + 1] = 1.f;
+    // max_closest_possible_intersection_hit
+    renderer.forw_info_d[write_loc + 2] = -1.f;
+    // sphere IDs and intersection depths.
+    for (int i = 0; i < renderer.n_track; ++i) {
+      int sphere_id = -1;
+      IASF(sphere_id, renderer.forw_info_d[write_loc + 3 + i * 2]);
+      renderer.forw_info_d[write_loc + 3 + i * 2 + 1] = -1.f;
+    }
+    if (mode == 0) {
+      // Image background.
+      for (int i = 0; i < cam.n_channels; ++i) {
+        renderer.result_d
+            [coord_y * cam.film_width * cam.n_channels +
+             coord_x * cam.n_channels + i] = bg_col_d[i];
+      }
+    }
+  }
+  END_PARALLEL_2D_NORET();
+};
+
+} // namespace Renderer
+} // namespace pulsar
+
+#endif
diff --git a/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.fill_bg.instantiate.h b/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.fill_bg.instantiate.h
new file mode 100644
index 0000000000000000000000000000000000000000..02830204a6874b8223bde1615fa9ef8ffa4d318c
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.fill_bg.instantiate.h
@@ -0,0 +1,22 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include "./renderer.fill_bg.device.h"
+
+namespace pulsar {
+namespace Renderer {
+
+template GLOBAL void fill_bg<ISONDEVICE>(
+    Renderer renderer,
+    const CamInfo norm,
+    float const* const bg_col_d,
+    const float gamma,
+    const uint mode);
+
+} // namespace Renderer
+} // namespace pulsar
diff --git a/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.forward.device.h b/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.forward.device.h
new file mode 100644
index 0000000000000000000000000000000000000000..3f0412f576de4dd77b3f2be6a27ff8ddb144ca74
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.forward.device.h
@@ -0,0 +1,302 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#ifndef PULSAR_NATIVE_INCLUDE_RENDERER_FORWARD_DEVICE_H_
+#define PULSAR_NATIVE_INCLUDE_RENDERER_FORWARD_DEVICE_H_
+
+#include "../global.h"
+#include "./camera.device.h"
+#include "./commands.h"
+#include "./math.h"
+#include "./renderer.h"
+
+namespace pulsar {
+namespace Renderer {
+
+template <bool DEV>
+void forward(
+    Renderer* self,
+    const float* vert_pos,
+    const float* vert_col,
+    const float* vert_rad,
+    const CamInfo& cam,
+    const float& gamma,
+    float percent_allowed_difference,
+    const uint& max_n_hits,
+    const float* bg_col_d,
+    const float* opacity_d,
+    const size_t& num_balls,
+    const uint& mode,
+    cudaStream_t stream) {
+  ARGCHECK(gamma > 0.f && gamma <= 1.f, 6, "gamma must be in [0., 1.]");
+  ARGCHECK(
+      percent_allowed_difference >= 0.f && percent_allowed_difference <= 1.f,
+      7,
+      "percent_allowed_difference must be in [0., 1.]");
+  ARGCHECK(max_n_hits >= 1u, 8, "max_n_hits must be >= 1");
+  ARGCHECK(
+      num_balls > 0 && num_balls <= self->max_num_balls,
+      9,
+      ("num_balls must be >0 and <= max num balls! (" +
+       std::to_string(num_balls) + " vs. " +
+       std::to_string(self->max_num_balls) + ")")
+          .c_str());
+  ARGCHECK(
+      cam.film_width == self->cam.film_width &&
+          cam.film_height == self->cam.film_height,
+      5,
+      "cam result width and height must agree");
+  ARGCHECK(mode <= 1, 10, "mode must be <= 1!");
+  if (percent_allowed_difference > 1.f - FEPS) {
+    LOG(WARNING) << "percent_allowed_difference > " << (1.f - FEPS)
+                 << "! Clamping to " << (1.f - FEPS) << ".";
+    percent_allowed_difference = 1.f - FEPS;
+  }
+  LOG_IF(INFO, PULSAR_LOG_RENDER) << "Rendering forward pass...";
+  // Update camera and transform into a new virtual camera system with
+  // centered principal point and subsection rendering.
+  self->cam.eye = cam.eye;
+  self->cam.pixel_0_0_center = cam.pixel_0_0_center - cam.eye;
+  self->cam.pixel_dir_x = cam.pixel_dir_x;
+  self->cam.pixel_dir_y = cam.pixel_dir_y;
+  self->cam.sensor_dir_z = cam.sensor_dir_z;
+  self->cam.half_pixel_size = cam.half_pixel_size;
+  self->cam.focal_length = cam.focal_length;
+  self->cam.aperture_width = cam.aperture_width;
+  self->cam.aperture_height = cam.aperture_height;
+  self->cam.min_dist = cam.min_dist;
+  self->cam.max_dist = cam.max_dist;
+  self->cam.norm_fac = cam.norm_fac;
+  self->cam.principal_point_offset_x = cam.principal_point_offset_x;
+  self->cam.principal_point_offset_y = cam.principal_point_offset_y;
+  self->cam.film_border_left = cam.film_border_left;
+  self->cam.film_border_top = cam.film_border_top;
+#ifdef PULSAR_TIMINGS_ENABLED
+  START_TIME(calc_signature);
+#endif
+  LAUNCH_MAX_PARALLEL_1D(
+      calc_signature<DEV>,
+      num_balls,
+      stream,
+      *self,
+      reinterpret_cast<const float3*>(vert_pos),
+      vert_col,
+      vert_rad,
+      num_balls);
+  CHECKLAUNCH();
+#ifdef PULSAR_TIMINGS_ENABLED
+  STOP_TIME(calc_signature);
+  START_TIME(sort);
+#endif
+  SORT_ASCENDING_WS(
+      self->min_depth_d,
+      self->min_depth_sorted_d,
+      self->ids_d,
+      self->ids_sorted_d,
+      num_balls,
+      self->workspace_d,
+      self->workspace_size,
+      stream);
+  CHECKLAUNCH();
+  SORT_ASCENDING_WS(
+      self->min_depth_d,
+      self->min_depth_sorted_d,
+      self->ii_d,
+      self->ii_sorted_d,
+      num_balls,
+      self->workspace_d,
+      self->workspace_size,
+      stream);
+  CHECKLAUNCH();
+  SORT_ASCENDING_WS(
+      self->min_depth_d,
+      self->min_depth_sorted_d,
+      self->di_d,
+      self->di_sorted_d,
+      num_balls,
+      self->workspace_d,
+      self->workspace_size,
+      stream);
+  CHECKLAUNCH();
+#ifdef PULSAR_TIMINGS_ENABLED
+  STOP_TIME(sort);
+  START_TIME(minmax);
+#endif
+  IntersectInfo pixel_minmax;
+  pixel_minmax.min.x = MAX_USHORT;
+  pixel_minmax.min.y = MAX_USHORT;
+  pixel_minmax.max.x = 0;
+  pixel_minmax.max.y = 0;
+  REDUCE_WS(
+      self->ii_sorted_d,
+      self->min_max_pixels_d,
+      num_balls,
+      IntersectInfoMinMax(),
+      pixel_minmax,
+      self->workspace_d,
+      self->workspace_size,
+      stream);
+  COPY_DEV_HOST(&pixel_minmax, self->min_max_pixels_d, IntersectInfo, 1);
+  LOG_IF(INFO, PULSAR_LOG_RENDER)
+      << "Region with pixels to render: " << pixel_minmax.min.x << ":"
+      << pixel_minmax.max.x << " (x), " << pixel_minmax.min.y << ":"
+      << pixel_minmax.max.y << " (y).";
+#ifdef PULSAR_TIMINGS_ENABLED
+  STOP_TIME(minmax);
+  START_TIME(render);
+#endif
+  MEMSET(
+      self->result_d,
+      0,
+      float,
+      self->cam.film_width * self->cam.film_height * self->cam.n_channels,
+      stream);
+  MEMSET(
+      self->forw_info_d,
+      0,
+      float,
+      self->cam.film_width * self->cam.film_height * (3 + 2 * self->n_track),
+      stream);
+  if (pixel_minmax.max.y > pixel_minmax.min.y &&
+      pixel_minmax.max.x > pixel_minmax.min.x) {
+    PASSERT(
+        pixel_minmax.min.x >= static_cast<ushort>(self->cam.film_border_left) &&
+        pixel_minmax.min.x <
+            static_cast<ushort>(
+                self->cam.film_border_left + self->cam.film_width) &&
+        pixel_minmax.max.x <=
+            static_cast<ushort>(
+                self->cam.film_border_left + self->cam.film_width) &&
+        pixel_minmax.min.y >= static_cast<ushort>(self->cam.film_border_top) &&
+        pixel_minmax.min.y <
+            static_cast<ushort>(
+                self->cam.film_border_top + self->cam.film_height) &&
+        pixel_minmax.max.y <=
+            static_cast<ushort>(
+                self->cam.film_border_top + self->cam.film_height));
+    // Cut the image in 3x3 regions.
+    int y_step = RENDER_BLOCK_SIZE *
+        iDivCeil(pixel_minmax.max.y - pixel_minmax.min.y,
+                 3u * RENDER_BLOCK_SIZE);
+    int x_step = RENDER_BLOCK_SIZE *
+        iDivCeil(pixel_minmax.max.x - pixel_minmax.min.x,
+                 3u * RENDER_BLOCK_SIZE);
+    LOG_IF(INFO, PULSAR_LOG_RENDER) << "Using image slices of size " << x_step
+                                    << ", " << y_step << " (W, H).";
+    for (int y_min = pixel_minmax.min.y; y_min < pixel_minmax.max.y;
+         y_min += y_step) {
+      for (int x_min = pixel_minmax.min.x; x_min < pixel_minmax.max.x;
+           x_min += x_step) {
+        // Create region selection.
+        LAUNCH_MAX_PARALLEL_1D(
+            create_selector<DEV>,
+            num_balls,
+            stream,
+            self->ii_sorted_d,
+            num_balls,
+            x_min,
+            x_min + x_step,
+            y_min,
+            y_min + y_step,
+            self->region_flags_d);
+        CHECKLAUNCH();
+        SELECT_FLAGS_WS(
+            self->region_flags_d,
+            self->ii_sorted_d,
+            self->ii_d,
+            self->num_selected_d,
+            num_balls,
+            self->workspace_d,
+            self->workspace_size,
+            stream);
+        CHECKLAUNCH();
+        SELECT_FLAGS_WS(
+            self->region_flags_d,
+            self->di_sorted_d,
+            self->di_d,
+            self->num_selected_d,
+            num_balls,
+            self->workspace_d,
+            self->workspace_size,
+            stream);
+        CHECKLAUNCH();
+        SELECT_FLAGS_WS(
+            self->region_flags_d,
+            self->ids_sorted_d,
+            self->ids_d,
+            self->num_selected_d,
+            num_balls,
+            self->workspace_d,
+            self->workspace_size,
+            stream);
+        CHECKLAUNCH();
+        LAUNCH_PARALLEL_2D(
+            render<DEV>,
+            x_step,
+            y_step,
+            RENDER_BLOCK_SIZE,
+            RENDER_BLOCK_SIZE,
+            stream,
+            self->num_selected_d,
+            self->ii_d,
+            self->di_d,
+            self->min_depth_d,
+            self->ids_d,
+            opacity_d,
+            self->cam,
+            gamma,
+            percent_allowed_difference,
+            max_n_hits,
+            bg_col_d,
+            mode,
+            x_min,
+            y_min,
+            x_step,
+            y_step,
+            self->result_d,
+            self->forw_info_d,
+            self->n_track);
+        CHECKLAUNCH();
+      }
+    }
+  }
+  if (mode == 0) {
+    LAUNCH_MAX_PARALLEL_2D(
+        fill_bg<DEV>,
+        static_cast<int64_t>(self->cam.film_width),
+        static_cast<int64_t>(self->cam.film_height),
+        stream,
+        *self,
+        self->cam,
+        bg_col_d,
+        gamma,
+        mode);
+    CHECKLAUNCH();
+  }
+#ifdef PULSAR_TIMINGS_ENABLED
+  STOP_TIME(render);
+  float time_ms;
+  // This blocks the result and prevents batch-processing from parallelizing.
+  GET_TIME(calc_signature, &time_ms);
+  std::cout << "Time for signature calculation: " << time_ms << " ms"
+            << std::endl;
+  GET_TIME(sort, &time_ms);
+  std::cout << "Time for sorting: " << time_ms << " ms" << std::endl;
+  GET_TIME(minmax, &time_ms);
+  std::cout << "Time for minmax pixel calculation: " << time_ms << " ms"
+            << std::endl;
+  GET_TIME(render, &time_ms);
+  std::cout << "Time for rendering: " << time_ms << " ms" << std::endl;
+#endif
+  LOG_IF(INFO, PULSAR_LOG_RENDER) << "Forward pass complete.";
+}
+
+} // namespace Renderer
+} // namespace pulsar
+
+#endif
diff --git a/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.forward.instantiate.h b/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.forward.instantiate.h
new file mode 100644
index 0000000000000000000000000000000000000000..7f57bc8681b7c7f1356f3c3e134595ab2d1955f0
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.forward.instantiate.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include "./renderer.forward.device.h"
+
+namespace pulsar {
+namespace Renderer {
+
+template void forward<ISONDEVICE>(
+    Renderer* self,
+    const float* vert_pos,
+    const float* vert_col,
+    const float* vert_rad,
+    const CamInfo& cam,
+    const float& gamma,
+    float percent_allowed_difference,
+    const uint& max_n_hits,
+    const float* bg_col_d,
+    const float* opacity_d,
+    const size_t& num_balls,
+    const uint& mode,
+    cudaStream_t stream);
+
+} // namespace Renderer
+} // namespace pulsar
diff --git a/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.get_screen_area.device.h b/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.get_screen_area.device.h
new file mode 100644
index 0000000000000000000000000000000000000000..1a85a1bd20cfa0773e395163871ea5a7a8b39347
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.get_screen_area.device.h
@@ -0,0 +1,144 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#ifndef PULSAR_NATIVE_INCLUDE_RENDERER_GET_SCREEN_AREA_DEVICE_H_
+#define PULSAR_NATIVE_INCLUDE_RENDERER_GET_SCREEN_AREA_DEVICE_H_
+
+#include "../global.h"
+#include "./camera.device.h"
+#include "./commands.h"
+#include "./math.h"
+
+namespace pulsar {
+namespace Renderer {
+
+/**
+ * Find the closest enclosing screen area rectangle in pixels that encloses a
+ * ball.
+ *
+ * The method returns the two x and the two y values of the boundaries. They
+ * are not ordered yet and you need to find min and max for the left/right and
+ * lower/upper boundary.
+ *
+ * The return values are floats and need to be rounded appropriately.
+ */
+INLINE DEVICE bool get_screen_area(
+    const float3& ball_center_cam,
+    const float3& ray_center_norm,
+    const float& vert_rad,
+    const CamInfo& cam,
+    const uint& idx,
+    /* Out variables. */
+    float* x_1,
+    float* x_2,
+    float* y_1,
+    float* y_2) {
+  float cos_alpha = dot(cam.sensor_dir_z, ray_center_norm);
+  float2 o__c_, alpha, theta;
+  if (cos_alpha < EPS) {
+    PULSAR_LOG_DEV(
+        PULSAR_LOG_CALC_SIGNATURE,
+        "signature %d|ball not visible. cos_alpha: %.9f.\n",
+        idx,
+        cos_alpha);
+    // No intersection, ball won't be visible.
+    return false;
+  }
+  // Multiply the direction vector with the camera rotation matrix
+  // to have the optical axis being the canonical z vector (0, 0, 1).
+  // TODO: optimize.
+  const float3 ball_center_cam_rot = rotate(
+      ball_center_cam,
+      cam.pixel_dir_x / length(cam.pixel_dir_x),
+      cam.pixel_dir_y / length(cam.pixel_dir_y),
+      cam.sensor_dir_z);
+  PULSAR_LOG_DEV(
+      PULSAR_LOG_CALC_SIGNATURE,
+      "signature %d|ball_center_cam_rot: %f, %f, %f.\n",
+      idx,
+      ball_center_cam.x,
+      ball_center_cam.y,
+      ball_center_cam.z);
+  const float pixel_size_norm_fac = FRCP(2.f * cam.half_pixel_size);
+  const float optical_offset_x =
+      (static_cast<float>(cam.aperture_width) - 1.f) * .5f;
+  const float optical_offset_y =
+      (static_cast<float>(cam.aperture_height) - 1.f) * .5f;
+  if (cam.orthogonal_projection) {
+    *x_1 =
+        FMA(ball_center_cam_rot.x - vert_rad,
+            pixel_size_norm_fac,
+            optical_offset_x);
+    *x_2 =
+        FMA(ball_center_cam_rot.x + vert_rad,
+            pixel_size_norm_fac,
+            optical_offset_x);
+    *y_1 =
+        FMA(ball_center_cam_rot.y - vert_rad,
+            pixel_size_norm_fac,
+            optical_offset_y);
+    *y_2 =
+        FMA(ball_center_cam_rot.y + vert_rad,
+            pixel_size_norm_fac,
+            optical_offset_y);
+    return true;
+  } else {
+    o__c_.x = FMAX(
+        FSQRT(
+            ball_center_cam_rot.x * ball_center_cam_rot.x +
+            ball_center_cam_rot.z * ball_center_cam_rot.z),
+        FEPS);
+    o__c_.y = FMAX(
+        FSQRT(
+            ball_center_cam_rot.y * ball_center_cam_rot.y +
+            ball_center_cam_rot.z * ball_center_cam_rot.z),
+        FEPS);
+    PULSAR_LOG_DEV(
+        PULSAR_LOG_CALC_SIGNATURE,
+        "signature %d|o__c_: %f, %f.\n",
+        idx,
+        o__c_.x,
+        o__c_.y);
+    alpha.x = sign_dir(ball_center_cam_rot.x) *
+        acos(FMIN(FMAX(ball_center_cam_rot.z / o__c_.x, -1.f), 1.f));
+    alpha.y = -sign_dir(ball_center_cam_rot.y) *
+        acos(FMIN(FMAX(ball_center_cam_rot.z / o__c_.y, -1.f), 1.f));
+    theta.x = asin(FMIN(FMAX(vert_rad / o__c_.x, -1.f), 1.f));
+    theta.y = asin(FMIN(FMAX(vert_rad / o__c_.y, -1.f), 1.f));
+    PULSAR_LOG_DEV(
+        PULSAR_LOG_CALC_SIGNATURE,
+        "signature %d|alpha.x: %f, alpha.y: %f, theta.x: %f, theta.y: %f.\n",
+        idx,
+        alpha.x,
+        alpha.y,
+        theta.x,
+        theta.y);
+    *x_1 = tan(alpha.x - theta.x) * cam.focal_length;
+    *x_2 = tan(alpha.x + theta.x) * cam.focal_length;
+    *y_1 = tan(alpha.y - theta.y) * cam.focal_length;
+    *y_2 = tan(alpha.y + theta.y) * cam.focal_length;
+    PULSAR_LOG_DEV(
+        PULSAR_LOG_CALC_SIGNATURE,
+        "signature %d|in sensor plane: x_1: %f, x_2: %f, y_1: %f, y_2: %f.\n",
+        idx,
+        *x_1,
+        *x_2,
+        *y_1,
+        *y_2);
+    *x_1 = FMA(*x_1, pixel_size_norm_fac, optical_offset_x);
+    *x_2 = FMA(*x_2, pixel_size_norm_fac, optical_offset_x);
+    *y_1 = FMA(*y_1, -pixel_size_norm_fac, optical_offset_y);
+    *y_2 = FMA(*y_2, -pixel_size_norm_fac, optical_offset_y);
+    return true;
+  }
+};
+
+} // namespace Renderer
+} // namespace pulsar
+
+#endif
diff --git a/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.h b/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.h
new file mode 100644
index 0000000000000000000000000000000000000000..d6755ee91887b8f6316563b03cee9c524a6f7315
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.h
@@ -0,0 +1,468 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#ifndef PULSAR_NATIVE_INCLUDE_RENDERER_H_
+#define PULSAR_NATIVE_INCLUDE_RENDERER_H_
+
+#include <algorithm>
+
+#include "../global.h"
+#include "./camera.h"
+
+namespace pulsar {
+namespace Renderer {
+
+//! Remember to order struct members from larger size to smaller size
+//! to avoid padding (for more info, see for example here:
+//! http://www.catb.org/esr/structure-packing/).
+
+/**
+ * This is the information that's needed to do a fast screen point
+ * intersection with one of the balls.
+ *
+ * Aim to keep this below 8 bytes (256 bytes per cache-line / 32 threads in a
+ * warp = 8 bytes per thread).
+ */
+struct IntersectInfo {
+  ushort2 min; /** minimum x, y in pixel coordinates. */
+  ushort2 max; /** maximum x, y in pixel coordinates. */
+};
+static_assert(
+    sizeof(IntersectInfo) == 8,
+    "The compiled size of `IntersectInfo` is wrong.");
+
+/**
+ * Reduction operation to find the limits of multiple IntersectInfo objects.
+ */
+struct IntersectInfoMinMax {
+  IHD IntersectInfo
+  operator()(const IntersectInfo& a, const IntersectInfo& b) const {
+    // Treat the special case of an invalid intersect info object or one for
+    // a ball out of bounds.
+    if (b.max.x == MAX_USHORT && b.min.x == MAX_USHORT &&
+        b.max.y == MAX_USHORT && b.min.y == MAX_USHORT) {
+      return a;
+    }
+    if (a.max.x == MAX_USHORT && a.min.x == MAX_USHORT &&
+        a.max.y == MAX_USHORT && a.min.y == MAX_USHORT) {
+      return b;
+    }
+    IntersectInfo result;
+    result.min.x = std::min<ushort>(a.min.x, b.min.x);
+    result.min.y = std::min<ushort>(a.min.y, b.min.y);
+    result.max.x = std::max<ushort>(a.max.x, b.max.x);
+    result.max.y = std::max<ushort>(a.max.y, b.max.y);
+    return result;
+  }
+};
+
+/**
+ * All information that's needed to draw a ball.
+ *
+ * It's necessary to keep this information in float (not half) format,
+ * because the loss in accuracy would be too high and lead to artifacts.
+ */
+struct DrawInfo {
+  float3 ray_center_norm; /** Ray to the ball center, normalized. */
+  /** Ball color.
+   *
+   * This might be the full color in the case of n_channels <= 3. Otherwise,
+   * a pointer to the original 'color' data is stored in the following union.
+   */
+  float first_color;
+  union {
+    float color[2];
+    float* ptr;
+  } color_union;
+  float t_center; /** Distance from the camera to the ball center. */
+  float radius; /** Ball radius. */
+};
+static_assert(
+    sizeof(DrawInfo) == 8 * 4,
+    "The compiled size of `DrawInfo` is wrong.");
+
+/**
+ * An object to collect all associated data with the renderer.
+ *
+ * The `_d` suffixed pointers point to memory 'on-device', potentially on the
+ * GPU. All other variables are expected to point to CPU memory.
+ */
+struct Renderer {
+  /** Dummy initializer to make sure all pointers are set to NULL to
+   * be safe for the device-specific 'construct' and 'destruct' methods.
+   */
+  inline Renderer() {
+    max_num_balls = 0;
+    result_d = NULL;
+    min_depth_d = NULL;
+    min_depth_sorted_d = NULL;
+    ii_d = NULL;
+    ii_sorted_d = NULL;
+    ids_d = NULL;
+    ids_sorted_d = NULL;
+    workspace_d = NULL;
+    di_d = NULL;
+    di_sorted_d = NULL;
+    region_flags_d = NULL;
+    num_selected_d = NULL;
+    forw_info_d = NULL;
+    grad_pos_d = NULL;
+    grad_col_d = NULL;
+    grad_rad_d = NULL;
+    grad_cam_d = NULL;
+    grad_opy_d = NULL;
+    grad_cam_buf_d = NULL;
+    n_grad_contributions_d = NULL;
+  };
+  /** The camera for this renderer. In world-coordinates. */
+  CamInfo cam;
+  /**
+   * The maximum amount of balls the renderer can handle. Resources are
+   * pre-allocated to account for this size. Less than this amount of balls
+   * can be rendered, but not more.
+   */
+  int max_num_balls;
+  /** The result buffer. */
+  float* result_d;
+  /** Closest possible intersection depth per sphere w.r.t. the camera. */
+  float* min_depth_d;
+  /** Closest possible intersection depth per sphere, ordered ascending. */
+  float* min_depth_sorted_d;
+  /** The intersect infos per sphere. */
+  IntersectInfo* ii_d;
+  /** The intersect infos per sphere, ordered by their closest possible
+   * intersection depth (asc.). */
+  IntersectInfo* ii_sorted_d;
+  /** Original sphere IDs. */
+  int* ids_d;
+  /** Original sphere IDs, ordered by their closest possible intersection depth
+   * (asc.). */
+  int* ids_sorted_d;
+  /** Workspace for CUB routines. */
+  char* workspace_d;
+  /** Workspace size for CUB routines. */
+  size_t workspace_size;
+  /** The draw information structures for each sphere. */
+  DrawInfo* di_d;
+  /** The draw information structures sorted by closest possible intersection
+   * depth (asc.). */
+  DrawInfo* di_sorted_d;
+  /** Region association buffer. */
+  char* region_flags_d;
+  /** Num spheres in the current region. */
+  size_t* num_selected_d;
+  /** Pointer to information from the forward pass. */
+  float* forw_info_d;
+  /** Struct containing information about the min max pixels that contain
+   * rendered information in the image. */
+  IntersectInfo* min_max_pixels_d;
+  /** Gradients w.r.t. position. */
+  float3* grad_pos_d;
+  /** Gradients w.r.t. color. */
+  float* grad_col_d;
+  /** Gradients w.r.t. radius. */
+  float* grad_rad_d;
+  /** Gradients w.r.t. camera parameters. */
+  float* grad_cam_d;
+  /** Gradients w.r.t. opacity. */
+  float* grad_opy_d;
+  /** Camera gradient information by sphere.
+   *
+   * Here, every sphere's contribution to the camera gradients is stored. It is
+   * aggregated and written to grad_cam_d in a separate step. This avoids write
+   * conflicts when processing the spheres.
+   */
+  CamGradInfo* grad_cam_buf_d;
+  /** Total of all gradient contributions for this image. */
+  int* n_grad_contributions_d;
+  /** The number of spheres to track for backpropagation. */
+  int n_track;
+};
+
+inline bool operator==(const Renderer& a, const Renderer& b) {
+  return a.cam == b.cam && a.max_num_balls == b.max_num_balls;
+}
+
+/**
+ * Construct a renderer.
+ */
+template <bool DEV>
+void construct(
+    Renderer* self,
+    const size_t& max_num_balls,
+    const int& width,
+    const int& height,
+    const bool& orthogonal_projection,
+    const bool& right_handed_system,
+    const float& background_normalization_depth,
+    const uint& n_channels,
+    const uint& n_track);
+
+/**
+ * Destruct the renderer and free the associated memory.
+ */
+template <bool DEV>
+void destruct(Renderer* self);
+
+/**
+ * Create a selection of points inside a rectangle.
+ *
+ * This write boolen values into `region_flags_d', which can
+ * for example be used by a CUB function to extract the selection.
+ */
+template <bool DEV>
+GLOBAL void create_selector(
+    IntersectInfo const* const RESTRICT ii_sorted_d,
+    const uint num_balls,
+    const int min_x,
+    const int max_x,
+    const int min_y,
+    const int max_y,
+    /* Out variables. */
+    char* RESTRICT region_flags_d);
+
+/**
+ * Calculate a signature for a ball.
+ *
+ * Populate the `ids_d`, `ii_d`, `di_d` and `min_depth_d` fields of the
+ * renderer. For spheres not visible in the image, sets the id field to -1,
+ * min_depth_d to MAX_FLOAT and the ii_d.min.x fields to MAX_USHORT.
+ */
+template <bool DEV>
+GLOBAL void calc_signature(
+    Renderer renderer,
+    float3 const* const RESTRICT vert_poss,
+    float const* const RESTRICT vert_cols,
+    float const* const RESTRICT vert_rads,
+    const uint num_balls);
+
+/**
+ * The block size for rendering.
+ *
+ * This should be as large as possible, but is limited due to the amount
+ * of variables we use and the memory required per thread.
+ */
+#define RENDER_BLOCK_SIZE 16
+/**
+ * The buffer size of spheres to be loaded and analyzed for relevance.
+ *
+ * This must be at least RENDER_BLOCK_SIZE * RENDER_BLOCK_SIZE so that
+ * for every iteration through the loading loop every thread could add a
+ * 'hit' to the buffer.
+ */
+#define RENDER_BUFFER_SIZE RENDER_BLOCK_SIZE* RENDER_BLOCK_SIZE * 2
+/**
+ * The threshold after which the spheres that are in the render buffer
+ * are rendered and the buffer is flushed.
+ *
+ * Must be less than RENDER_BUFFER_SIZE.
+ */
+#define RENDER_BUFFER_LOAD_THRESH 16 * 4
+
+/**
+ * The render function.
+ *
+ * Assumptions:
+ *   * the focal length is appropriately chosen,
+ *   * ray_dir_norm.z is > EPS.
+ *   * to be completed...
+ */
+template <bool DEV>
+GLOBAL void render(
+    size_t const* const RESTRICT
+        num_balls, /** Number of balls relevant for this pass. */
+    IntersectInfo const* const RESTRICT ii_d, /** Intersect information. */
+    DrawInfo const* const RESTRICT di_d, /** Draw information. */
+    float const* const RESTRICT min_depth_d, /** Minimum depth per sphere. */
+    int const* const RESTRICT id_d, /** IDs. */
+    float const* const RESTRICT op_d, /** Opacity. */
+    const CamInfo cam_norm, /** Camera normalized with all vectors to be in the
+                             * camera coordinate system.
+                             */
+    const float gamma, /** Transparency parameter. **/
+    const float percent_allowed_difference, /** Maximum allowed
+                                               error in color. */
+    const uint max_n_hits,
+    const float* bg_col_d,
+    const uint mode,
+    const int x_min,
+    const int y_min,
+    const int x_step,
+    const int y_step,
+    // Out variables.
+    float* const RESTRICT result_d, /** The result image. */
+    float* const RESTRICT forw_info_d, /** Additional information needed for the
+                                           grad computation. */
+    // Infrastructure.
+    const int n_track /** The number of spheres to track. */
+);
+
+/**
+ * Makes sure to paint background information.
+ *
+ * This is required as a separate post-processing step because certain
+ * pixels may not be processed during the forward pass if there is no
+ * possibility for a sphere to be present at their location.
+ */
+template <bool DEV>
+GLOBAL void fill_bg(
+    Renderer renderer,
+    const CamInfo norm,
+    float const* const bg_col_d,
+    const float gamma,
+    const uint mode);
+
+/**
+ * Rendering forward pass.
+ *
+ * Takes a renderer and sphere data as inputs and creates a rendering.
+ */
+template <bool DEV>
+void forward(
+    Renderer* self,
+    const float* vert_pos,
+    const float* vert_col,
+    const float* vert_rad,
+    const CamInfo& cam,
+    const float& gamma,
+    float percent_allowed_difference,
+    const uint& max_n_hits,
+    const float* bg_col_d,
+    const float* opacity_d,
+    const size_t& num_balls,
+    const uint& mode,
+    cudaStream_t stream);
+
+/**
+ * Normalize the camera gradients by the number of spheres that contributed.
+ */
+template <bool DEV>
+GLOBAL void norm_cam_gradients(Renderer renderer);
+
+/**
+ * Normalize the sphere gradients.
+ *
+ * We're assuming that the samples originate from a Monte Carlo
+ * sampling process and normalize by number and sphere area.
+ */
+template <bool DEV>
+GLOBAL void norm_sphere_gradients(Renderer renderer, const int num_balls);
+
+#define GRAD_BLOCK_SIZE 16
+/** Calculate the gradients.
+ */
+template <bool DEV>
+GLOBAL void calc_gradients(
+    const CamInfo cam, /** Camera in world coordinates. */
+    float const* const RESTRICT grad_im, /** The gradient image. */
+    const float
+        gamma, /** The transparency parameter used in the forward pass. */
+    float3 const* const RESTRICT vert_poss, /** Vertex position vector. */
+    float const* const RESTRICT vert_cols, /** Vertex color vector. */
+    float const* const RESTRICT vert_rads, /** Vertex radius vector. */
+    float const* const RESTRICT opacity, /** Vertex opacity. */
+    const uint num_balls, /** Number of balls. */
+    float const* const RESTRICT result_d, /** Result image. */
+    float const* const RESTRICT forw_info_d, /** Forward pass info. */
+    DrawInfo const* const RESTRICT di_d, /** Draw information. */
+    IntersectInfo const* const RESTRICT ii_d, /** Intersect information. */
+    // Mode switches.
+    const bool calc_grad_pos,
+    const bool calc_grad_col,
+    const bool calc_grad_rad,
+    const bool calc_grad_cam,
+    const bool calc_grad_opy,
+    // Out variables.
+    float* const RESTRICT grad_rad_d, /** Radius gradients. */
+    float* const RESTRICT grad_col_d, /** Color gradients. */
+    float3* const RESTRICT grad_pos_d, /** Position gradients. */
+    CamGradInfo* const RESTRICT grad_cam_buf_d, /** Camera gradient buffer. */
+    float* const RESTRICT grad_opy_d, /** Opacity gradient buffer. */
+    int* const RESTRICT
+        grad_contributed_d, /** Gradient contribution counter. */
+    // Infrastructure.
+    const int n_track,
+    const uint offs_x = 0,
+    const uint offs_y = 0);
+
+/**
+ * A full backward pass.
+ *
+ * Creates the gradients for the given gradient_image and the spheres.
+ */
+template <bool DEV>
+void backward(
+    Renderer* self,
+    const float* grad_im,
+    const float* image,
+    const float* forw_info,
+    const float* vert_pos,
+    const float* vert_col,
+    const float* vert_rad,
+    const CamInfo& cam,
+    const float& gamma,
+    float percent_allowed_difference,
+    const uint& max_n_hits,
+    const float* vert_opy,
+    const size_t& num_balls,
+    const uint& mode,
+    const bool& dif_pos,
+    const bool& dif_col,
+    const bool& dif_rad,
+    const bool& dif_cam,
+    const bool& dif_opy,
+    cudaStream_t stream);
+
+/**
+ * A debug backward pass.
+ *
+ * This is a function to debug the gradient calculation. It calculates the
+ * gradients for exactly one pixel (set with pos_x and pos_y) without averaging.
+ *
+ * *Uses only the first sphere for camera gradient calculation!*
+ */
+template <bool DEV>
+void backward_dbg(
+    Renderer* self,
+    const float* grad_im,
+    const float* image,
+    const float* forw_info,
+    const float* vert_pos,
+    const float* vert_col,
+    const float* vert_rad,
+    const CamInfo& cam,
+    const float& gamma,
+    float percent_allowed_difference,
+    const uint& max_n_hits,
+    const float* vert_opy,
+    const size_t& num_balls,
+    const uint& mode,
+    const bool& dif_pos,
+    const bool& dif_col,
+    const bool& dif_rad,
+    const bool& dif_cam,
+    const bool& dif_opy,
+    const uint& pos_x,
+    const uint& pos_y,
+    cudaStream_t stream);
+
+template <bool DEV>
+void nn(
+    const float* ref_ptr,
+    const float* tar_ptr,
+    const uint& k,
+    const uint& d,
+    const uint& n,
+    float* dist_ptr,
+    int32_t* inds_ptr,
+    cudaStream_t stream);
+
+} // namespace Renderer
+} // namespace pulsar
+
+#endif
diff --git a/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.norm_cam_gradients.device.h b/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.norm_cam_gradients.device.h
new file mode 100644
index 0000000000000000000000000000000000000000..e1dfd55d0b1363c1d8d38709460e00a75efeef5a
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.norm_cam_gradients.device.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#ifndef PULSAR_NATIVE_INCLUDE_RENDERER_NORM_CAM_GRADIENTS_DEVICE_H_
+#define PULSAR_NATIVE_INCLUDE_RENDERER_NORM_CAM_GRADIENTS_DEVICE_H_
+
+#include "../global.h"
+#include "./camera.device.h"
+#include "./commands.h"
+#include "./math.h"
+#include "./renderer.h"
+
+namespace pulsar {
+namespace Renderer {
+
+/**
+ * Normalize the camera gradients by the number of spheres that contributed.
+ */
+template <bool DEV>
+GLOBAL void norm_cam_gradients(Renderer renderer) {
+  GET_PARALLEL_IDX_1D(idx, 1);
+  CamGradInfo* cgi = reinterpret_cast<CamGradInfo*>(renderer.grad_cam_d);
+  *cgi = *cgi * FRCP(static_cast<float>(*renderer.n_grad_contributions_d));
+  END_PARALLEL_NORET();
+};
+
+} // namespace Renderer
+} // namespace pulsar
+
+#endif
diff --git a/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.norm_cam_gradients.instantiate.h b/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.norm_cam_gradients.instantiate.h
new file mode 100644
index 0000000000000000000000000000000000000000..98e05a67e470237a9328d7a441e7b700a7ce675d
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.norm_cam_gradients.instantiate.h
@@ -0,0 +1,17 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include "./renderer.norm_cam_gradients.device.h"
+
+namespace pulsar {
+namespace Renderer {
+
+template GLOBAL void norm_cam_gradients<ISONDEVICE>(Renderer renderer);
+
+} // namespace Renderer
+} // namespace pulsar
diff --git a/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.norm_sphere_gradients.device.h b/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.norm_sphere_gradients.device.h
new file mode 100644
index 0000000000000000000000000000000000000000..37e0eb00a5179911216a5d2827feb83ade487755
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.norm_sphere_gradients.device.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#ifndef PULSAR_NATIVE_INCLUDE_RENDERER_NORM_SPHERE_GRADIENTS_H_
+#define PULSAR_NATIVE_INCLUDE_RENDERER_NORM_SPHERE_GRADIENTS_H_
+
+#include "../global.h"
+#include "./commands.h"
+#include "./math.h"
+#include "./renderer.h"
+
+namespace pulsar {
+namespace Renderer {
+
+/**
+ * Normalize the sphere gradients.
+ *
+ * We're assuming that the samples originate from a Monte Carlo
+ * sampling process and normalize by number and sphere area.
+ */
+template <bool DEV>
+GLOBAL void norm_sphere_gradients(Renderer renderer, const int num_balls) {
+  GET_PARALLEL_IDX_1D(idx, num_balls);
+  float norm_fac = 0.f;
+  IntersectInfo ii;
+  if (renderer.ids_sorted_d[idx] > 0) {
+    ii = renderer.ii_d[idx];
+    // Normalize the sphere gradients as averages.
+    // This avoids the case that there are small spheres in a scene with still
+    // un-converged colors whereas the big spheres already converged, just
+    // because their integrated learning rate is 'higher'.
+    norm_fac = FRCP(static_cast<float>(renderer.ids_sorted_d[idx]));
+  }
+  PULSAR_LOG_DEV_NODE(
+      PULSAR_LOG_NORMALIZE,
+      "ids_sorted_d[idx]: %d, norm_fac: %.9f.\n",
+      renderer.ids_sorted_d[idx],
+      norm_fac);
+  renderer.grad_rad_d[idx] *= norm_fac;
+  for (uint c_idx = 0; c_idx < renderer.cam.n_channels; ++c_idx) {
+    renderer.grad_col_d[idx * renderer.cam.n_channels + c_idx] *= norm_fac;
+  }
+  renderer.grad_pos_d[idx] *= norm_fac;
+  renderer.grad_opy_d[idx] *= norm_fac;
+
+  if (renderer.ids_sorted_d[idx] > 0) {
+    // For the camera, we need to be more correct and have the gradients
+    // be proportional to the area they cover in the image.
+    // This leads to a formulation very much like in monte carlo integration:
+    norm_fac = FRCP(static_cast<float>(renderer.ids_sorted_d[idx])) *
+        (static_cast<float>(ii.max.x) - static_cast<float>(ii.min.x)) *
+        (static_cast<float>(ii.max.y) - static_cast<float>(ii.min.y)) *
+        1e-3f; // for better numerics.
+  }
+  renderer.grad_cam_buf_d[idx].cam_pos *= norm_fac;
+  renderer.grad_cam_buf_d[idx].pixel_0_0_center *= norm_fac;
+  renderer.grad_cam_buf_d[idx].pixel_dir_x *= norm_fac;
+  renderer.grad_cam_buf_d[idx].pixel_dir_y *= norm_fac;
+  // The sphere only contributes to the camera gradients if it is
+  // large enough in screen space.
+  if (renderer.ids_sorted_d[idx] > 0 && ii.max.x >= ii.min.x + 3 &&
+      ii.max.y >= ii.min.y + 3)
+    renderer.ids_sorted_d[idx] = 1;
+  END_PARALLEL_NORET();
+};
+
+} // namespace Renderer
+} // namespace pulsar
+
+#endif
diff --git a/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.norm_sphere_gradients.instantiate.h b/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.norm_sphere_gradients.instantiate.h
new file mode 100644
index 0000000000000000000000000000000000000000..bedcf81611cb20b2b404776f477cb3fe174608d2
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.norm_sphere_gradients.instantiate.h
@@ -0,0 +1,19 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include "./renderer.norm_sphere_gradients.device.h"
+
+namespace pulsar {
+namespace Renderer {
+
+template GLOBAL void norm_sphere_gradients<ISONDEVICE>(
+    Renderer renderer,
+    const int num_balls);
+
+} // namespace Renderer
+} // namespace pulsar
diff --git a/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.render.device.h b/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.render.device.h
new file mode 100644
index 0000000000000000000000000000000000000000..66a62c31be9ad5e6106bc24ce23dd60a901329f1
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.render.device.h
@@ -0,0 +1,416 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#ifndef PULSAR_NATIVE_INCLUDE_RENDERER_RENDER_DEVICE_H_
+#define PULSAR_NATIVE_INCLUDE_RENDERER_RENDER_DEVICE_H_
+
+#include "../global.h"
+#include "./camera.device.h"
+#include "./commands.h"
+#include "./math.h"
+#include "./renderer.h"
+
+#include "./closest_sphere_tracker.device.h"
+#include "./renderer.draw.device.h"
+
+namespace pulsar {
+namespace Renderer {
+
+template <bool DEV>
+GLOBAL void render(
+    size_t const* const RESTRICT
+        num_balls, /** Number of balls relevant for this pass. */
+    IntersectInfo const* const RESTRICT ii_d, /** Intersect information. */
+    DrawInfo const* const RESTRICT di_d, /** Draw information. */
+    float const* const RESTRICT min_depth_d, /** Minimum depth per sphere. */
+    int const* const RESTRICT ids_d, /** IDs. */
+    float const* const RESTRICT op_d, /** Opacity. */
+    const CamInfo cam_norm, /** Camera normalized with all vectors to be in the
+                             * camera coordinate system.
+                             */
+    const float gamma, /** Transparency parameter. **/
+    const float percent_allowed_difference, /** Maximum allowed
+                                               error in color. */
+    const uint max_n_hits,
+    const float* bg_col,
+    const uint mode,
+    const int x_min,
+    const int y_min,
+    const int x_step,
+    const int y_step,
+    // Out variables.
+    float* const RESTRICT result_d, /** The result image. */
+    float* const RESTRICT forw_info_d, /** Additional information needed for the
+                                           grad computation. */
+    const int n_track /** The number of spheres to track for backprop. */
+) {
+  // Do not early stop threads in this block here. They can all contribute to
+  // the scanning process, we just have to prevent from writing their result.
+  GET_PARALLEL_IDS_2D(offs_x, offs_y, x_step, y_step);
+  // Variable declarations and const initializations.
+  const float ln_pad_over_1minuspad =
+      FLN(percent_allowed_difference / (1.f - percent_allowed_difference));
+  /** A facility to track the closest spheres to the camera
+      (in preparation for gradient calculation). */
+  ClosestSphereTracker tracker(n_track);
+  const uint coord_x = x_min + offs_x; /** Ray coordinate x. */
+  const uint coord_y = y_min + offs_y; /** Ray coordinate y. */
+  float3 ray_dir_norm; /** Ray cast through the pixel, normalized. */
+  float2 projected_ray; /** Ray intersection with the sensor. */
+  if (cam_norm.orthogonal_projection) {
+    ray_dir_norm = cam_norm.sensor_dir_z;
+    projected_ray.x = static_cast<float>(coord_x);
+    projected_ray.y = static_cast<float>(coord_y);
+  } else {
+    ray_dir_norm = normalize(
+        cam_norm.pixel_0_0_center + coord_x * cam_norm.pixel_dir_x +
+        coord_y * cam_norm.pixel_dir_y);
+    // This is a reasonable assumption for normal focal lengths and image sizes.
+    PASSERT(FABS(ray_dir_norm.z) > FEPS);
+    projected_ray.x = ray_dir_norm.x / ray_dir_norm.z * cam_norm.focal_length;
+    projected_ray.y = ray_dir_norm.y / ray_dir_norm.z * cam_norm.focal_length;
+  }
+  PULSAR_LOG_DEV_PIX(
+      PULSAR_LOG_RENDER_PIX,
+      "render|ray_dir_norm: %.9f, %.9f, %.9f. projected_ray: %.9f, %.9f.\n",
+      ray_dir_norm.x,
+      ray_dir_norm.y,
+      ray_dir_norm.z,
+      projected_ray.x,
+      projected_ray.y);
+  // Set up shared infrastructure.
+  /** This entire thread block. */
+  cg::thread_block thread_block = cg::this_thread_block();
+  /** The collaborators within a warp. */
+  cg::coalesced_group thread_warp = cg::coalesced_threads();
+  /** The number of loaded balls in the load buffer di_l. */
+  SHARED uint n_loaded;
+  /** Draw information buffer. */
+  SHARED DrawInfo di_l[RENDER_BUFFER_SIZE];
+  /** The original sphere id of each loaded sphere. */
+  SHARED uint sphere_id_l[RENDER_BUFFER_SIZE];
+  /** The number of pixels in this block that are done. */
+  SHARED int n_pixels_done;
+  /** Whether loading of balls is completed. */
+  SHARED bool loading_done;
+  /** The number of balls loaded overall (just for statistics). */
+  SHARED int n_balls_loaded;
+  /** The area this thread block covers. */
+  SHARED IntersectInfo block_area;
+  if (thread_block.thread_rank() == 0) {
+    // Initialize the shared variables.
+    n_loaded = 0;
+    block_area.min.x = static_cast<ushort>(coord_x);
+    block_area.max.x = static_cast<ushort>(IMIN(
+        coord_x + blockDim.x, cam_norm.film_border_left + cam_norm.film_width));
+    block_area.min.y = static_cast<ushort>(coord_y);
+    block_area.max.y = static_cast<ushort>(IMIN(
+        coord_y + blockDim.y, cam_norm.film_border_top + cam_norm.film_height));
+    n_pixels_done = 0;
+    loading_done = false;
+    n_balls_loaded = 0;
+  }
+  PULSAR_LOG_DEV_PIX(
+      PULSAR_LOG_RENDER_PIX,
+      "render|block_area.min: %d, %d. block_area.max: %d, %d.\n",
+      block_area.min.x,
+      block_area.min.y,
+      block_area.max.x,
+      block_area.max.y);
+  // Initialization of the pixel with the background color.
+  /**
+   * The result of this very pixel.
+   * the offset calculation might overflow if this thread is out of
+   * bounds of the film. However, in this case result is not
+   * accessed, so this is fine.
+   */
+  float* result = result_d +
+      (coord_y - cam_norm.film_border_top) * cam_norm.film_width *
+          cam_norm.n_channels +
+      (coord_x - cam_norm.film_border_left) * cam_norm.n_channels;
+  if (coord_x >= cam_norm.film_border_left &&
+      coord_x < cam_norm.film_border_left + cam_norm.film_width &&
+      coord_y >= cam_norm.film_border_top &&
+      coord_y < cam_norm.film_border_top + cam_norm.film_height) {
+    // Initialize the result.
+    if (mode == 0u) {
+      for (uint c_id = 0; c_id < cam_norm.n_channels; ++c_id)
+        result[c_id] = bg_col[c_id];
+    } else {
+      result[0] = 0.f;
+    }
+  }
+  /** Normalization denominator. */
+  float sm_d = 1.f;
+  /** Normalization tracker for stable softmax. The maximum observed value. */
+  float sm_m = cam_norm.background_normalization_depth / gamma;
+  /** Whether this pixel has had all information needed for drawing. */
+  bool done =
+      (coord_x < cam_norm.film_border_left ||
+       coord_x >= cam_norm.film_border_left + cam_norm.film_width ||
+       coord_y < cam_norm.film_border_top ||
+       coord_y >= cam_norm.film_border_top + cam_norm.film_height);
+  /** The depth threshold for a new point to have at least
+   * `percent_allowed_difference` influence on the result color. All points that
+   * are further away than this are ignored.
+   */
+  float depth_threshold = done ? -1.f : MAX_FLOAT;
+  /** The closest intersection possible of a ball that was hit by this pixel
+   * ray. */
+  float max_closest_possible_intersection_hit = -1.f;
+  bool hit; /** Whether a sphere was hit. */
+  float intersection_depth; /** The intersection_depth for a sphere at this
+                               pixel. */
+  float closest_possible_intersection; /** The closest possible intersection
+    for this sphere. */
+  float max_closest_possible_intersection;
+  // Sync up threads so that everyone is similarly initialized.
+  thread_block.sync();
+  //! Coalesced loading and intersection analysis of balls.
+  for (uint ball_idx = thread_block.thread_rank();
+       ball_idx < iDivCeil(static_cast<uint>(*num_balls), thread_block.size()) *
+               thread_block.size() &&
+       !loading_done && n_pixels_done < thread_block.size();
+       ball_idx += thread_block.size()) {
+    if (ball_idx < static_cast<uint>(*num_balls)) { // Account for overflow.
+      const IntersectInfo& ii = ii_d[ball_idx];
+      hit = (ii.min.x <= block_area.max.x) && (ii.max.x > block_area.min.x) &&
+          (ii.min.y <= block_area.max.y) && (ii.max.y > block_area.min.y);
+      if (hit) {
+        uint write_idx = ATOMICADD_B(&n_loaded, 1u);
+        di_l[write_idx] = di_d[ball_idx];
+        sphere_id_l[write_idx] = static_cast<uint>(ids_d[ball_idx]);
+        PULSAR_LOG_DEV_PIXB(
+            PULSAR_LOG_RENDER_PIX,
+            "render|found intersection with sphere %u.\n",
+            sphere_id_l[write_idx]);
+      }
+      if (ii.min.x == MAX_USHORT)
+        // This is an invalid sphere (out of image). These spheres have
+        // maximum depth. Since we ordered the spheres by earliest possible
+        // intersection depth we re certain that there will no other sphere
+        // that is relevant after this one.
+        loading_done = true;
+    }
+    // Reset n_pixels_done.
+    n_pixels_done = 0;
+    thread_block.sync(); // Make sure n_loaded is updated.
+    if (n_loaded > RENDER_BUFFER_LOAD_THRESH) {
+      // The load buffer is full enough. Draw.
+      if (thread_block.thread_rank() == 0)
+        n_balls_loaded += n_loaded;
+      max_closest_possible_intersection = 0.f;
+      // This excludes threads outside of the image boundary. Also, it reduces
+      // block artifacts.
+      if (!done) {
+        for (uint draw_idx = 0; draw_idx < n_loaded; ++draw_idx) {
+          intersection_depth = 0.f;
+          if (cam_norm.orthogonal_projection) {
+            // The closest possible intersection is the distance to the camera
+            // plane.
+            closest_possible_intersection = min_depth_d[sphere_id_l[draw_idx]];
+          } else {
+            closest_possible_intersection =
+                di_l[draw_idx].t_center - di_l[draw_idx].radius;
+          }
+          PULSAR_LOG_DEV_PIX(
+              PULSAR_LOG_RENDER_PIX,
+              "render|drawing sphere %u (depth: %f, "
+              "closest possible intersection: %f).\n",
+              sphere_id_l[draw_idx],
+              di_l[draw_idx].t_center,
+              closest_possible_intersection);
+          hit = draw(
+              di_l[draw_idx], // Sphere to draw.
+              op_d == NULL ? 1.f : op_d[sphere_id_l[draw_idx]], // Opacity.
+              cam_norm, // Cam.
+              gamma, // Gamma.
+              ray_dir_norm, // Ray direction.
+              projected_ray, // Ray intersection with the image.
+              // Mode switches.
+              true, // Draw.
+              false,
+              false,
+              false,
+              false,
+              false, // No gradients.
+              // Position info.
+              coord_x,
+              coord_y,
+              sphere_id_l[draw_idx],
+              // Optional in variables.
+              NULL, // intersect information.
+              NULL, // ray_dir.
+              NULL, // norm_ray_dir.
+              NULL, // grad_pix.
+              &ln_pad_over_1minuspad,
+              // in/out variables
+              &sm_d,
+              &sm_m,
+              result,
+              // Optional out.
+              &depth_threshold,
+              &intersection_depth,
+              NULL,
+              NULL,
+              NULL,
+              NULL,
+              NULL // gradients.
+          );
+          if (hit) {
+            max_closest_possible_intersection_hit = FMAX(
+                max_closest_possible_intersection_hit,
+                closest_possible_intersection);
+            tracker.track(
+                sphere_id_l[draw_idx], intersection_depth, coord_x, coord_y);
+          }
+          max_closest_possible_intersection = FMAX(
+              max_closest_possible_intersection, closest_possible_intersection);
+        }
+        PULSAR_LOG_DEV_PIX(
+            PULSAR_LOG_RENDER_PIX,
+            "render|max_closest_possible_intersection: %f, "
+            "depth_threshold: %f.\n",
+            max_closest_possible_intersection,
+            depth_threshold);
+      }
+      done = done ||
+          (percent_allowed_difference > 0.f &&
+           max_closest_possible_intersection > depth_threshold) ||
+          tracker.get_n_hits() >= max_n_hits;
+      uint warp_done = thread_warp.ballot(done);
+      if (thread_warp.thread_rank() == 0)
+        ATOMICADD_B(&n_pixels_done, POPC(warp_done));
+      // This sync is necessary to keep n_loaded until all threads are done with
+      // painting.
+      thread_block.sync();
+      n_loaded = 0;
+    }
+    thread_block.sync();
+  }
+  if (thread_block.thread_rank() == 0)
+    n_balls_loaded += n_loaded;
+  PULSAR_LOG_DEV_PIX(
+      PULSAR_LOG_RENDER_PIX,
+      "render|loaded %d balls in total.\n",
+      n_balls_loaded);
+  if (!done) {
+    for (uint draw_idx = 0; draw_idx < n_loaded; ++draw_idx) {
+      intersection_depth = 0.f;
+      if (cam_norm.orthogonal_projection) {
+        // The closest possible intersection is the distance to the camera
+        // plane.
+        closest_possible_intersection = min_depth_d[sphere_id_l[draw_idx]];
+      } else {
+        closest_possible_intersection =
+            di_l[draw_idx].t_center - di_l[draw_idx].radius;
+      }
+      PULSAR_LOG_DEV_PIX(
+          PULSAR_LOG_RENDER_PIX,
+          "render|drawing sphere %u (depth: %f, "
+          "closest possible intersection: %f).\n",
+          sphere_id_l[draw_idx],
+          di_l[draw_idx].t_center,
+          closest_possible_intersection);
+      hit = draw(
+          di_l[draw_idx], // Sphere to draw.
+          op_d == NULL ? 1.f : op_d[sphere_id_l[draw_idx]], // Opacity.
+          cam_norm, // Cam.
+          gamma, // Gamma.
+          ray_dir_norm, // Ray direction.
+          projected_ray, // Ray intersection with the image.
+          // Mode switches.
+          true, // Draw.
+          false,
+          false,
+          false,
+          false,
+          false, // No gradients.
+          // Logging info.
+          coord_x,
+          coord_y,
+          sphere_id_l[draw_idx],
+          // Optional in variables.
+          NULL, // intersect information.
+          NULL, // ray_dir.
+          NULL, // norm_ray_dir.
+          NULL, // grad_pix.
+          &ln_pad_over_1minuspad,
+          // in/out variables
+          &sm_d,
+          &sm_m,
+          result,
+          // Optional out.
+          &depth_threshold,
+          &intersection_depth,
+          NULL,
+          NULL,
+          NULL,
+          NULL,
+          NULL // gradients.
+      );
+      if (hit) {
+        max_closest_possible_intersection_hit = FMAX(
+            max_closest_possible_intersection_hit,
+            closest_possible_intersection);
+        tracker.track(
+            sphere_id_l[draw_idx], intersection_depth, coord_x, coord_y);
+      }
+    }
+  }
+  if (coord_x < cam_norm.film_border_left ||
+      coord_y < cam_norm.film_border_top ||
+      coord_x >= cam_norm.film_border_left + cam_norm.film_width ||
+      coord_y >= cam_norm.film_border_top + cam_norm.film_height) {
+    RETURN_PARALLEL();
+  }
+  if (mode == 1u) {
+    // The subtractions, for example coord_y - cam_norm.film_border_left, are
+    // safe even though both components are uints. We checked their relation
+    // just above.
+    result_d
+        [(coord_y - cam_norm.film_border_top) * cam_norm.film_width *
+             cam_norm.n_channels +
+         (coord_x - cam_norm.film_border_left) * cam_norm.n_channels] =
+            static_cast<float>(tracker.get_n_hits());
+  } else {
+    float sm_d_normfac = FRCP(FMAX(sm_d, FEPS));
+    for (uint c_id = 0; c_id < cam_norm.n_channels; ++c_id)
+      result[c_id] *= sm_d_normfac;
+    int write_loc = (coord_y - cam_norm.film_border_top) * cam_norm.film_width *
+            (3 + 2 * n_track) +
+        (coord_x - cam_norm.film_border_left) * (3 + 2 * n_track);
+    forw_info_d[write_loc] = sm_m;
+    forw_info_d[write_loc + 1] = sm_d;
+    forw_info_d[write_loc + 2] = max_closest_possible_intersection_hit;
+    PULSAR_LOG_DEV_PIX(
+        PULSAR_LOG_RENDER_PIX,
+        "render|writing the %d most important ball infos.\n",
+        IMIN(n_track, tracker.get_n_hits()));
+    for (int i = 0; i < n_track; ++i) {
+      int sphere_id = tracker.get_closest_sphere_id(i);
+      IASF(sphere_id, forw_info_d[write_loc + 3 + i * 2]);
+      forw_info_d[write_loc + 3 + i * 2 + 1] =
+          tracker.get_closest_sphere_depth(i) == MAX_FLOAT
+          ? -1.f
+          : tracker.get_closest_sphere_depth(i);
+      PULSAR_LOG_DEV_PIX(
+          PULSAR_LOG_RENDER_PIX,
+          "render|writing %d most important: id: %d, normalized depth: %f.\n",
+          i,
+          tracker.get_closest_sphere_id(i),
+          tracker.get_closest_sphere_depth(i));
+    }
+  }
+  END_PARALLEL_2D();
+}
+
+} // namespace Renderer
+} // namespace pulsar
+
+#endif
diff --git a/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.render.instantiate.h b/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.render.instantiate.h
new file mode 100644
index 0000000000000000000000000000000000000000..9c1f326e63b8b4860137d9f0d0f440896adb2a88
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.render.instantiate.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#ifndef PULSAR_NATIVE_INCLUDE_RENDERER_RENDER_INSTANTIATE_H_
+#define PULSAR_NATIVE_INCLUDE_RENDERER_RENDER_INSTANTIATE_H_
+
+#include "./renderer.render.device.h"
+
+namespace pulsar {
+namespace Renderer {
+template GLOBAL void render<ISONDEVICE>(
+    size_t const* const RESTRICT
+        num_balls, /** Number of balls relevant for this pass. */
+    IntersectInfo const* const RESTRICT ii_d, /** Intersect information. */
+    DrawInfo const* const RESTRICT di_d, /** Draw information. */
+    float const* const RESTRICT min_depth_d, /** Minimum depth per sphere. */
+    int const* const RESTRICT id_d, /** IDs. */
+    float const* const RESTRICT op_d, /** Opacity. */
+    const CamInfo cam_norm, /** Camera normalized with all vectors to be in the
+                             * camera coordinate system.
+                             */
+    const float gamma, /** Transparency parameter. **/
+    const float percent_allowed_difference, /** Maximum allowed
+                                               error in color. */
+    const uint max_n_hits,
+    const float* bg_col_d,
+    const uint mode,
+    const int x_min,
+    const int y_min,
+    const int x_step,
+    const int y_step,
+    // Out variables.
+    float* const RESTRICT result_d, /** The result image. */
+    float* const RESTRICT forw_info_d, /** Additional information needed for the
+                                            grad computation. */
+    const int n_track /** The number of spheres to track for backprop. */
+);
+}
+} // namespace pulsar
+
+#endif
diff --git a/pytorch3d/pytorch3d/csrc/pulsar/logging.h b/pytorch3d/pytorch3d/csrc/pulsar/logging.h
new file mode 100644
index 0000000000000000000000000000000000000000..63d472257671287156ccf77531c6897beff1fcd2
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/pulsar/logging.h
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#ifndef PULSAR_LOGGING_H_
+#define PULSAR_LOGGING_H_
+
+// #define PULSAR_LOGGING_ENABLED
+/**
+ * Enable detailed per-operation timings.
+ *
+ * This timing scheme is not appropriate to measure batched calculations.
+ * Use `PULSAR_TIMINGS_BATCHED_ENABLED` for that.
+ */
+// #define PULSAR_TIMINGS_ENABLED
+/**
+ * Time batched operations.
+ */
+// #define PULSAR_TIMINGS_BATCHED_ENABLED
+#if defined(PULSAR_TIMINGS_BATCHED_ENABLED) && defined(PULSAR_TIMINGS_ENABLED)
+#pragma message("Pulsar|batched and unbatched timings enabled. This will not")
+#pragma message("Pulsar|create meaningful results.")
+#endif
+
+#ifdef PULSAR_LOGGING_ENABLED
+
+// Control logging.
+// 0: INFO, 1: WARNING, 2: ERROR, 3: FATAL (Abort after logging).
+#define CAFFE2_LOG_THRESHOLD 0
+#define PULSAR_LOG_INIT false
+#define PULSAR_LOG_FORWARD false
+#define PULSAR_LOG_CALC_SIGNATURE false
+#define PULSAR_LOG_RENDER false
+#define PULSAR_LOG_RENDER_PIX false
+#define PULSAR_LOG_RENDER_PIX_X 428
+#define PULSAR_LOG_RENDER_PIX_Y 669
+#define PULSAR_LOG_RENDER_PIX_ALL false
+#define PULSAR_LOG_TRACKER_PIX false
+#define PULSAR_LOG_TRACKER_PIX_X 428
+#define PULSAR_LOG_TRACKER_PIX_Y 669
+#define PULSAR_LOG_TRACKER_PIX_ALL false
+#define PULSAR_LOG_DRAW_PIX false
+#define PULSAR_LOG_DRAW_PIX_X 428
+#define PULSAR_LOG_DRAW_PIX_Y 669
+#define PULSAR_LOG_DRAW_PIX_ALL false
+#define PULSAR_LOG_BACKWARD false
+#define PULSAR_LOG_GRAD false
+#define PULSAR_LOG_GRAD_X 509
+#define PULSAR_LOG_GRAD_Y 489
+#define PULSAR_LOG_GRAD_ALL false
+#define PULSAR_LOG_NORMALIZE false
+#define PULSAR_LOG_NORMALIZE_X 0
+#define PULSAR_LOG_NORMALIZE_ALL false
+
+#define PULSAR_LOG_DEV(ID, ...) \
+  if ((ID)) {                   \
+    printf(__VA_ARGS__);        \
+  }
+#define PULSAR_LOG_DEV_APIX(ID, MSG, ...)                               \
+  if ((ID) && (film_coord_x == (ID##_X) && film_coord_y == (ID##_Y)) || \
+      ID##_ALL) {                                                       \
+    printf(                                                             \
+        "%u %u (ap %u %u)|" MSG,                                        \
+        film_coord_x,                                                   \
+        film_coord_y,                                                   \
+        ap_coord_x,                                                     \
+        ap_coord_y,                                                     \
+        __VA_ARGS__);                                                   \
+  }
+#define PULSAR_LOG_DEV_PIX(ID, MSG, ...)                                  \
+  if ((ID) && (coord_x == (ID##_X) && coord_y == (ID##_Y)) || ID##_ALL) { \
+    printf("%u %u|" MSG, coord_x, coord_y, __VA_ARGS__);                  \
+  }
+#ifdef __CUDACC__
+#define PULSAR_LOG_DEV_PIXB(ID, MSG, ...)                       \
+  if ((ID) && static_cast<int>(block_area.min.x) <= (ID##_X) && \
+      static_cast<int>(block_area.max.x) > (ID##_X) &&          \
+      static_cast<int>(block_area.min.y) <= (ID##_Y) &&         \
+      static_cast<int>(block_area.max.y) > (ID##_Y)) {          \
+    printf("%u %u|" MSG, coord_x, coord_y, __VA_ARGS__);        \
+  }
+#else
+#define PULSAR_LOG_DEV_PIXB(ID, MSG, ...)                   \
+  if ((ID) && coord_x == (ID##_X) && coord_y == (ID##_Y)) { \
+    printf("%u %u|" MSG, coord_x, coord_y, __VA_ARGS__);    \
+  }
+#endif
+#define PULSAR_LOG_DEV_NODE(ID, MSG, ...)      \
+  if ((ID) && idx == (ID##_X) || (ID##_ALL)) { \
+    printf("%u|" MSG, idx, __VA_ARGS__);       \
+  }
+
+#else
+
+#define CAFFE2_LOG_THRESHOLD 2
+
+#define PULSAR_LOG_RENDER false
+#define PULSAR_LOG_INIT false
+#define PULSAR_LOG_FORWARD false
+#define PULSAR_LOG_BACKWARD false
+#define PULSAR_LOG_TRACKER_PIX false
+
+#define PULSAR_LOG_DEV(...)
+#define PULSAR_LOG_DEV_APIX(...)
+#define PULSAR_LOG_DEV_PIX(...)
+#define PULSAR_LOG_DEV_PIXB(...)
+#define PULSAR_LOG_DEV_NODE(...)
+
+#endif
+
+#endif
diff --git a/pytorch3d/pytorch3d/csrc/pulsar/pytorch/camera.cpp b/pytorch3d/pytorch3d/csrc/pulsar/pytorch/camera.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..c3794e7edf90f4af50632ea91bc131bd87fd751f
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/pulsar/pytorch/camera.cpp
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include "./camera.h"
+#include "../include/math.h"
+
+namespace pulsar {
+namespace pytorch {
+
+CamInfo cam_info_from_params(
+    const torch::Tensor& cam_pos,
+    const torch::Tensor& pixel_0_0_center,
+    const torch::Tensor& pixel_vec_x,
+    const torch::Tensor& pixel_vec_y,
+    const torch::Tensor& principal_point_offset,
+    const float& focal_length,
+    const uint& width,
+    const uint& height,
+    const float& min_dist,
+    const float& max_dist,
+    const bool& right_handed) {
+  CamInfo res;
+  fill_cam_vecs(
+      cam_pos.detach().cpu(),
+      pixel_0_0_center.detach().cpu(),
+      pixel_vec_x.detach().cpu(),
+      pixel_vec_y.detach().cpu(),
+      principal_point_offset.detach().cpu(),
+      right_handed,
+      &res);
+  res.half_pixel_size = 0.5f * length(res.pixel_dir_x);
+  if (length(res.pixel_dir_y) * 0.5f - res.half_pixel_size > EPS) {
+    throw std::runtime_error("Pixel sizes must agree in x and y direction!");
+  }
+  res.focal_length = focal_length;
+  res.aperture_width =
+      width + 2u * static_cast<uint>(abs(res.principal_point_offset_x));
+  res.aperture_height =
+      height + 2u * static_cast<uint>(abs(res.principal_point_offset_y));
+  res.pixel_0_0_center -=
+      res.pixel_dir_x * static_cast<float>(abs(res.principal_point_offset_x));
+  res.pixel_0_0_center -=
+      res.pixel_dir_y * static_cast<float>(abs(res.principal_point_offset_y));
+  res.film_width = width;
+  res.film_height = height;
+  res.film_border_left =
+      static_cast<uint>(std::max(0, 2 * res.principal_point_offset_x));
+  res.film_border_top =
+      static_cast<uint>(std::max(0, 2 * res.principal_point_offset_y));
+  LOG_IF(INFO, PULSAR_LOG_INIT)
+      << "Aperture width, height: " << res.aperture_width << ", "
+      << res.aperture_height;
+  LOG_IF(INFO, PULSAR_LOG_INIT)
+      << "Film width, height: " << res.film_width << ", " << res.film_height;
+  LOG_IF(INFO, PULSAR_LOG_INIT)
+      << "Film border left, top: " << res.film_border_left << ", "
+      << res.film_border_top;
+  res.min_dist = min_dist;
+  res.max_dist = max_dist;
+  res.norm_fac = 1.f / (max_dist - min_dist);
+  return res;
+};
+
+} // namespace pytorch
+} // namespace pulsar
diff --git a/pytorch3d/pytorch3d/csrc/pulsar/pytorch/camera.h b/pytorch3d/pytorch3d/csrc/pulsar/pytorch/camera.h
new file mode 100644
index 0000000000000000000000000000000000000000..9ecd95353ad76efd2760a4a634493917fda7b468
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/pulsar/pytorch/camera.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#ifndef PULSAR_NATIVE_CAMERA_H_
+#define PULSAR_NATIVE_CAMERA_H_
+
+#include <tuple>
+#include "../global.h"
+
+#include "../include/camera.h"
+
+namespace pulsar {
+namespace pytorch {
+
+inline void fill_cam_vecs(
+    const torch::Tensor& pos_vec,
+    const torch::Tensor& pixel_0_0_center,
+    const torch::Tensor& pixel_dir_x,
+    const torch::Tensor& pixel_dir_y,
+    const torch::Tensor& principal_point_offset,
+    const bool& right_handed,
+    CamInfo* res) {
+  res->eye.x = pos_vec.data_ptr<float>()[0];
+  res->eye.y = pos_vec.data_ptr<float>()[1];
+  res->eye.z = pos_vec.data_ptr<float>()[2];
+  res->pixel_0_0_center.x = pixel_0_0_center.data_ptr<float>()[0];
+  res->pixel_0_0_center.y = pixel_0_0_center.data_ptr<float>()[1];
+  res->pixel_0_0_center.z = pixel_0_0_center.data_ptr<float>()[2];
+  res->pixel_dir_x.x = pixel_dir_x.data_ptr<float>()[0];
+  res->pixel_dir_x.y = pixel_dir_x.data_ptr<float>()[1];
+  res->pixel_dir_x.z = pixel_dir_x.data_ptr<float>()[2];
+  res->pixel_dir_y.x = pixel_dir_y.data_ptr<float>()[0];
+  res->pixel_dir_y.y = pixel_dir_y.data_ptr<float>()[1];
+  res->pixel_dir_y.z = pixel_dir_y.data_ptr<float>()[2];
+  auto sensor_dir_z = pixel_dir_y.cross(pixel_dir_x, -1);
+  sensor_dir_z /= sensor_dir_z.norm();
+  if (right_handed) {
+    sensor_dir_z *= -1.f;
+  }
+  res->sensor_dir_z.x = sensor_dir_z.data_ptr<float>()[0];
+  res->sensor_dir_z.y = sensor_dir_z.data_ptr<float>()[1];
+  res->sensor_dir_z.z = sensor_dir_z.data_ptr<float>()[2];
+  res->principal_point_offset_x = principal_point_offset.data_ptr<int32_t>()[0];
+  res->principal_point_offset_y = principal_point_offset.data_ptr<int32_t>()[1];
+}
+
+CamInfo cam_info_from_params(
+    const torch::Tensor& cam_pos,
+    const torch::Tensor& pixel_0_0_center,
+    const torch::Tensor& pixel_vec_x,
+    const torch::Tensor& pixel_vec_y,
+    const torch::Tensor& principal_point_offset,
+    const float& focal_length,
+    const uint& width,
+    const uint& height,
+    const float& min_dist,
+    const float& max_dist,
+    const bool& right_handed);
+
+} // namespace pytorch
+} // namespace pulsar
+
+#endif
diff --git a/pytorch3d/pytorch3d/csrc/pulsar/pytorch/renderer.cpp b/pytorch3d/pytorch3d/csrc/pulsar/pytorch/renderer.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..4349aea796cfea0a63a5f76f7669816993fe3f2b
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/pulsar/pytorch/renderer.cpp
@@ -0,0 +1,1599 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include "./renderer.h"
+#include "../include/commands.h"
+#include "./camera.h"
+#include "./util.h"
+
+#include <ATen/ATen.h>
+#ifdef WITH_CUDA
+#include <ATen/cuda/CUDAContext.h>
+#include <c10/cuda/CUDAGuard.h>
+#endif
+
+#ifndef TORCH_CHECK_ARG
+// torch <= 1.10
+#define TORCH_CHECK_ARG(cond, argN, ...) \
+  TORCH_CHECK(cond, "invalid argument ", argN, ": ", __VA_ARGS__)
+#endif
+
+namespace PRE = ::pulsar::Renderer;
+
+namespace pulsar {
+namespace pytorch {
+
+Renderer::Renderer(
+    const unsigned int& width,
+    const unsigned int& height,
+    const unsigned int& max_n_balls,
+    const bool& orthogonal_projection,
+    const bool& right_handed_system,
+    const float& background_normalization_depth,
+    const uint& n_channels,
+    const uint& n_track) {
+  LOG_IF(INFO, PULSAR_LOG_INIT) << "Initializing renderer.";
+  TORCH_CHECK_ARG(width > 0, 1, "image width must be > 0!");
+  TORCH_CHECK_ARG(height > 0, 2, "image height must be > 0!");
+  TORCH_CHECK_ARG(max_n_balls > 0, 3, "max_n_balls must be > 0!");
+  TORCH_CHECK_ARG(
+      background_normalization_depth > 0.f &&
+          background_normalization_depth < 1.f,
+      5,
+      "background_normalization_depth must be in ]0., 1.[");
+  TORCH_CHECK_ARG(n_channels > 0, 6, "n_channels must be > 0");
+  TORCH_CHECK_ARG(
+      n_track > 0 && n_track <= MAX_GRAD_SPHERES,
+      7,
+      ("n_track must be > 0 and <" + std::to_string(MAX_GRAD_SPHERES) +
+       ". Is " + std::to_string(n_track) + ".")
+          .c_str());
+  LOG_IF(INFO, PULSAR_LOG_INIT)
+      << "Image width: " << width << ", height: " << height;
+  this->renderer_vec.emplace_back();
+  this->device_type = c10::DeviceType::CPU;
+  this->device_index = -1;
+  PRE::construct<false>(
+      this->renderer_vec.data(),
+      max_n_balls,
+      width,
+      height,
+      orthogonal_projection,
+      right_handed_system,
+      background_normalization_depth,
+      n_channels,
+      n_track);
+  this->device_tracker = torch::zeros(1);
+};
+
+Renderer::~Renderer() {
+  if (this->device_type == c10::DeviceType::CUDA) {
+// Can't happen in the case that not compiled with CUDA.
+#ifdef WITH_CUDA
+    at::cuda::CUDAGuard device_guard(this->device_tracker.device());
+    for (auto nrend : this->renderer_vec) {
+      PRE::destruct<true>(&nrend);
+    }
+#endif
+  } else {
+    for (auto nrend : this->renderer_vec) {
+      PRE::destruct<false>(&nrend);
+    }
+  }
+}
+
+bool Renderer::operator==(const Renderer& rhs) const {
+  LOG_IF(INFO, PULSAR_LOG_INIT) << "Equality check.";
+  bool renderer_agrees = (this->renderer_vec[0] == rhs.renderer_vec[0]);
+  LOG_IF(INFO, PULSAR_LOG_INIT) << "  Renderer agrees: " << renderer_agrees;
+  bool device_agrees =
+      (this->device_tracker.device() == rhs.device_tracker.device());
+  LOG_IF(INFO, PULSAR_LOG_INIT) << "  Device agrees: " << device_agrees;
+  return (renderer_agrees && device_agrees);
+};
+
+void Renderer::ensure_on_device(torch::Device device, bool /*non_blocking*/) {
+  TORCH_CHECK_ARG(
+      device.type() == c10::DeviceType::CUDA ||
+          device.type() == c10::DeviceType::CPU,
+      1,
+      "Only CPU and CUDA device types are supported.");
+  if (device.type() != this->device_type ||
+      device.index() != this->device_index) {
+#ifdef WITH_CUDA
+    LOG_IF(INFO, PULSAR_LOG_INIT)
+        << "Transferring render buffers between devices.";
+    int prev_active;
+    cudaGetDevice(&prev_active);
+    if (this->device_type == c10::DeviceType::CUDA) {
+      LOG_IF(INFO, PULSAR_LOG_INIT) << "  Destructing on CUDA.";
+      cudaSetDevice(this->device_index);
+      for (auto& nrend : this->renderer_vec) {
+        PRE::destruct<true>(&nrend);
+      }
+    } else {
+      LOG_IF(INFO, PULSAR_LOG_INIT) << "  Destructing on CPU.";
+      for (auto& nrend : this->renderer_vec) {
+        PRE::destruct<false>(&nrend);
+      }
+    }
+    if (device.type() == c10::DeviceType::CUDA) {
+      LOG_IF(INFO, PULSAR_LOG_INIT) << "  Constructing on CUDA.";
+      cudaSetDevice(device.index());
+      for (auto& nrend : this->renderer_vec) {
+        PRE::construct<true>(
+            &nrend,
+            this->renderer_vec[0].max_num_balls,
+            this->renderer_vec[0].cam.film_width,
+            this->renderer_vec[0].cam.film_height,
+            this->renderer_vec[0].cam.orthogonal_projection,
+            this->renderer_vec[0].cam.right_handed,
+            this->renderer_vec[0].cam.background_normalization_depth,
+            this->renderer_vec[0].cam.n_channels,
+            this->n_track());
+      }
+    } else {
+      LOG_IF(INFO, PULSAR_LOG_INIT) << "  Constructing on CPU.";
+      for (auto& nrend : this->renderer_vec) {
+        PRE::construct<false>(
+            &nrend,
+            this->renderer_vec[0].max_num_balls,
+            this->renderer_vec[0].cam.film_width,
+            this->renderer_vec[0].cam.film_height,
+            this->renderer_vec[0].cam.orthogonal_projection,
+            this->renderer_vec[0].cam.right_handed,
+            this->renderer_vec[0].cam.background_normalization_depth,
+            this->renderer_vec[0].cam.n_channels,
+            this->n_track());
+      }
+    }
+    cudaSetDevice(prev_active);
+    this->device_type = device.type();
+    this->device_index = device.index();
+#else
+    throw std::runtime_error(
+        "pulsar was built without CUDA "
+        "but a device move to a CUDA device was initiated.");
+#endif
+  }
+};
+
+void Renderer::ensure_n_renderers_gte(const size_t& batch_size) {
+  if (this->renderer_vec.size() < batch_size) {
+    ptrdiff_t diff = batch_size - this->renderer_vec.size();
+    LOG_IF(INFO, PULSAR_LOG_INIT)
+        << "Increasing render buffers by " << diff
+        << " to account for batch size " << batch_size;
+    for (ptrdiff_t i = 0; i < diff; ++i) {
+      this->renderer_vec.emplace_back();
+      if (this->device_type == c10::DeviceType::CUDA) {
+#ifdef WITH_CUDA
+        PRE::construct<true>(
+            &this->renderer_vec[this->renderer_vec.size() - 1],
+            this->max_num_balls(),
+            this->width(),
+            this->height(),
+            this->renderer_vec[0].cam.orthogonal_projection,
+            this->renderer_vec[0].cam.right_handed,
+            this->renderer_vec[0].cam.background_normalization_depth,
+            this->renderer_vec[0].cam.n_channels,
+            this->n_track());
+#endif
+      } else {
+        PRE::construct<false>(
+            &this->renderer_vec[this->renderer_vec.size() - 1],
+            this->max_num_balls(),
+            this->width(),
+            this->height(),
+            this->renderer_vec[0].cam.orthogonal_projection,
+            this->renderer_vec[0].cam.right_handed,
+            this->renderer_vec[0].cam.background_normalization_depth,
+            this->renderer_vec[0].cam.n_channels,
+            this->n_track());
+      }
+    }
+  }
+}
+
+std::tuple<size_t, size_t, bool, torch::Tensor> Renderer::arg_check(
+    const torch::Tensor& vert_pos,
+    const torch::Tensor& vert_col,
+    const torch::Tensor& vert_radii,
+    const torch::Tensor& cam_pos,
+    const torch::Tensor& pixel_0_0_center,
+    const torch::Tensor& pixel_vec_x,
+    const torch::Tensor& pixel_vec_y,
+    const torch::Tensor& focal_length,
+    const torch::Tensor& principal_point_offsets,
+    const float& gamma,
+    const float& max_depth,
+    float& min_depth,
+    const c10::optional<torch::Tensor>& bg_col,
+    const c10::optional<torch::Tensor>& opacity,
+    const float& percent_allowed_difference,
+    const uint& max_n_hits,
+    const uint& mode) {
+  LOG_IF(INFO, PULSAR_LOG_FORWARD || PULSAR_LOG_BACKWARD) << "Arg check.";
+  size_t batch_size = 1;
+  size_t n_points;
+  bool batch_processing = false;
+  if (vert_pos.ndimension() == 3) {
+    // Check all parameters adhere batch size.
+    batch_processing = true;
+    batch_size = vert_pos.size(0);
+    TORCH_CHECK_ARG(
+        vert_col.ndimension() == 3 &&
+            vert_col.size(0) == static_cast<int64_t>(batch_size),
+        2,
+        "vert_col needs to have batch size.");
+    TORCH_CHECK_ARG(
+        vert_radii.ndimension() == 2 &&
+            vert_radii.size(0) == static_cast<int64_t>(batch_size),
+        3,
+        "vert_radii must be specified per batch.");
+    TORCH_CHECK_ARG(
+        cam_pos.ndimension() == 2 &&
+            cam_pos.size(0) == static_cast<int64_t>(batch_size),
+        4,
+        "cam_pos must be specified per batch and have the correct batch size.");
+    TORCH_CHECK_ARG(
+        pixel_0_0_center.ndimension() == 2 &&
+            pixel_0_0_center.size(0) == static_cast<int64_t>(batch_size),
+        5,
+        "pixel_0_0_center must be specified per batch.");
+    TORCH_CHECK_ARG(
+        pixel_vec_x.ndimension() == 2 &&
+            pixel_vec_x.size(0) == static_cast<int64_t>(batch_size),
+        6,
+        "pixel_vec_x must be specified per batch.");
+    TORCH_CHECK_ARG(
+        pixel_vec_y.ndimension() == 2 &&
+            pixel_vec_y.size(0) == static_cast<int64_t>(batch_size),
+        7,
+        "pixel_vec_y must be specified per batch.");
+    TORCH_CHECK_ARG(
+        focal_length.ndimension() == 1 &&
+            focal_length.size(0) == static_cast<int64_t>(batch_size),
+        8,
+        "focal_length must be specified per batch.");
+    TORCH_CHECK_ARG(
+        principal_point_offsets.ndimension() == 2 &&
+            principal_point_offsets.size(0) == static_cast<int64_t>(batch_size),
+        9,
+        "principal_point_offsets must be specified per batch.");
+    if (opacity.has_value()) {
+      TORCH_CHECK_ARG(
+          opacity.value().ndimension() == 2 &&
+              opacity.value().size(0) == static_cast<int64_t>(batch_size),
+          13,
+          "Opacity needs to be specified batch-wise.");
+    }
+    // Check all parameters are for a matching number of points.
+    n_points = vert_pos.size(1);
+    TORCH_CHECK_ARG(
+        vert_col.size(1) == static_cast<int64_t>(n_points),
+        2,
+        ("The number of points for vertex positions (" +
+         std::to_string(n_points) + ") and vertex colors (" +
+         std::to_string(vert_col.size(1)) + ") doesn't agree.")
+            .c_str());
+    TORCH_CHECK_ARG(
+        vert_radii.size(1) == static_cast<int64_t>(n_points),
+        3,
+        ("The number of points for vertex positions (" +
+         std::to_string(n_points) + ") and vertex radii (" +
+         std::to_string(vert_col.size(1)) + ") doesn't agree.")
+            .c_str());
+    if (opacity.has_value()) {
+      TORCH_CHECK_ARG(
+          opacity.value().size(1) == static_cast<int64_t>(n_points),
+          13,
+          "Opacity needs to be specified per point.");
+    }
+    // Check all parameters have the correct last dimension size.
+    TORCH_CHECK_ARG(
+        vert_pos.size(2) == 3,
+        1,
+        ("Vertex positions must be 3D (have shape " +
+         std::to_string(vert_pos.size(2)) + ")!")
+            .c_str());
+    TORCH_CHECK_ARG(
+        vert_col.size(2) == this->renderer_vec[0].cam.n_channels,
+        2,
+        ("Vertex colors must have the right number of channels (have shape " +
+         std::to_string(vert_col.size(2)) + ", need " +
+         std::to_string(this->renderer_vec[0].cam.n_channels) + ")!")
+            .c_str());
+    TORCH_CHECK_ARG(
+        cam_pos.size(1) == 3,
+        4,
+        ("Camera position must be 3D (has shape " +
+         std::to_string(cam_pos.size(1)) + ")!")
+            .c_str());
+    TORCH_CHECK_ARG(
+        pixel_0_0_center.size(1) == 3,
+        5,
+        ("pixel_0_0_center must be 3D (has shape " +
+         std::to_string(pixel_0_0_center.size(1)) + ")!")
+            .c_str());
+    TORCH_CHECK_ARG(
+        pixel_vec_x.size(1) == 3,
+        6,
+        ("pixel_vec_x must be 3D (has shape " +
+         std::to_string(pixel_vec_x.size(1)) + ")!")
+            .c_str());
+    TORCH_CHECK_ARG(
+        pixel_vec_y.size(1) == 3,
+        7,
+        ("pixel_vec_y must be 3D (has shape " +
+         std::to_string(pixel_vec_y.size(1)) + ")!")
+            .c_str());
+    TORCH_CHECK_ARG(
+        principal_point_offsets.size(1) == 2,
+        9,
+        "principal_point_offsets must contain x and y offsets.");
+    // Ensure enough renderers are available for the batch.
+    ensure_n_renderers_gte(batch_size);
+  } else {
+    // Check all parameters are of correct dimension.
+    TORCH_CHECK_ARG(
+        vert_col.ndimension() == 2, 2, "vert_col needs to have dimension 2.");
+    TORCH_CHECK_ARG(
+        vert_radii.ndimension() == 1, 3, "vert_radii must have dimension 1.");
+    TORCH_CHECK_ARG(
+        cam_pos.ndimension() == 1, 4, "cam_pos must have dimension 1.");
+    TORCH_CHECK_ARG(
+        pixel_0_0_center.ndimension() == 1,
+        5,
+        "pixel_0_0_center must have dimension 1.");
+    TORCH_CHECK_ARG(
+        pixel_vec_x.ndimension() == 1, 6, "pixel_vec_x must have dimension 1.");
+    TORCH_CHECK_ARG(
+        pixel_vec_y.ndimension() == 1, 7, "pixel_vec_y must have dimension 1.");
+    TORCH_CHECK_ARG(
+        focal_length.ndimension() == 0,
+        8,
+        "focal_length must have dimension 0.");
+    TORCH_CHECK_ARG(
+        principal_point_offsets.ndimension() == 1,
+        9,
+        "principal_point_offsets must have dimension 1.");
+    if (opacity.has_value()) {
+      TORCH_CHECK_ARG(
+          opacity.value().ndimension() == 1,
+          13,
+          "Opacity needs to be specified per sample.");
+    }
+    // Check each.
+    n_points = vert_pos.size(0);
+    TORCH_CHECK_ARG(
+        vert_col.size(0) == static_cast<int64_t>(n_points),
+        2,
+        ("The number of points for vertex positions (" +
+         std::to_string(n_points) + ") and vertex colors (" +
+         std::to_string(vert_col.size(0)) + ") doesn't agree.")
+            .c_str());
+    TORCH_CHECK_ARG(
+        vert_radii.size(0) == static_cast<int64_t>(n_points),
+        3,
+        ("The number of points for vertex positions (" +
+         std::to_string(n_points) + ") and vertex radii (" +
+         std::to_string(vert_col.size(0)) + ") doesn't agree.")
+            .c_str());
+    if (opacity.has_value()) {
+      TORCH_CHECK_ARG(
+          opacity.value().size(0) == static_cast<int64_t>(n_points),
+          12,
+          "Opacity needs to be specified per point.");
+    }
+    // Check all parameters have the correct last dimension size.
+    TORCH_CHECK_ARG(
+        vert_pos.size(1) == 3,
+        1,
+        ("Vertex positions must be 3D (have shape " +
+         std::to_string(vert_pos.size(1)) + ")!")
+            .c_str());
+    TORCH_CHECK_ARG(
+        vert_col.size(1) == this->renderer_vec[0].cam.n_channels,
+        2,
+        ("Vertex colors must have the right number of channels (have shape " +
+         std::to_string(vert_col.size(1)) + ", need " +
+         std::to_string(this->renderer_vec[0].cam.n_channels) + ")!")
+            .c_str());
+    TORCH_CHECK_ARG(
+        cam_pos.size(0) == 3,
+        4,
+        ("Camera position must be 3D (has shape " +
+         std::to_string(cam_pos.size(0)) + ")!")
+            .c_str());
+    TORCH_CHECK_ARG(
+        pixel_0_0_center.size(0) == 3,
+        5,
+        ("pixel_0_0_center must be 3D (has shape " +
+         std::to_string(pixel_0_0_center.size(0)) + ")!")
+            .c_str());
+    TORCH_CHECK_ARG(
+        pixel_vec_x.size(0) == 3,
+        6,
+        ("pixel_vec_x must be 3D (has shape " +
+         std::to_string(pixel_vec_x.size(0)) + ")!")
+            .c_str());
+    TORCH_CHECK_ARG(
+        pixel_vec_y.size(0) == 3,
+        7,
+        ("pixel_vec_y must be 3D (has shape " +
+         std::to_string(pixel_vec_y.size(0)) + ")!")
+            .c_str());
+    TORCH_CHECK_ARG(
+        principal_point_offsets.size(0) == 2,
+        9,
+        "principal_point_offsets must have x and y component.");
+  }
+  // Check device placement.
+  auto dev = torch::device_of(vert_pos).value();
+  TORCH_CHECK_ARG(
+      dev.type() == this->device_type && dev.index() == this->device_index,
+      1,
+      ("Vertex positions must be stored on device " +
+       c10::DeviceTypeName(this->device_type) + ", index " +
+       std::to_string(this->device_index) + "! Are stored on " +
+       c10::DeviceTypeName(dev.type()) + ", index " +
+       std::to_string(dev.index()) + ".")
+          .c_str());
+  dev = torch::device_of(vert_col).value();
+  TORCH_CHECK_ARG(
+      dev.type() == this->device_type && dev.index() == this->device_index,
+      2,
+      ("Vertex colors must be stored on device " +
+       c10::DeviceTypeName(this->device_type) + ", index " +
+       std::to_string(this->device_index) + "! Are stored on " +
+       c10::DeviceTypeName(dev.type()) + ", index " +
+       std::to_string(dev.index()) + ".")
+          .c_str());
+  dev = torch::device_of(vert_radii).value();
+  TORCH_CHECK_ARG(
+      dev.type() == this->device_type && dev.index() == this->device_index,
+      3,
+      ("Vertex radii must be stored on device " +
+       c10::DeviceTypeName(this->device_type) + ", index " +
+       std::to_string(this->device_index) + "! Are stored on " +
+       c10::DeviceTypeName(dev.type()) + ", index " +
+       std::to_string(dev.index()) + ".")
+          .c_str());
+  dev = torch::device_of(cam_pos).value();
+  TORCH_CHECK_ARG(
+      dev.type() == this->device_type && dev.index() == this->device_index,
+      4,
+      ("Camera position must be stored on device " +
+       c10::DeviceTypeName(this->device_type) + ", index " +
+       std::to_string(this->device_index) + "! Are stored on " +
+       c10::DeviceTypeName(dev.type()) + ", index " +
+       std::to_string(dev.index()) + ".")
+          .c_str());
+  dev = torch::device_of(pixel_0_0_center).value();
+  TORCH_CHECK_ARG(
+      dev.type() == this->device_type && dev.index() == this->device_index,
+      5,
+      ("pixel_0_0_center must be stored on device " +
+       c10::DeviceTypeName(this->device_type) + ", index " +
+       std::to_string(this->device_index) + "! Are stored on " +
+       c10::DeviceTypeName(dev.type()) + ", index " +
+       std::to_string(dev.index()) + ".")
+          .c_str());
+  dev = torch::device_of(pixel_vec_x).value();
+  TORCH_CHECK_ARG(
+      dev.type() == this->device_type && dev.index() == this->device_index,
+      6,
+      ("pixel_vec_x must be stored on device " +
+       c10::DeviceTypeName(this->device_type) + ", index " +
+       std::to_string(this->device_index) + "! Are stored on " +
+       c10::DeviceTypeName(dev.type()) + ", index " +
+       std::to_string(dev.index()) + ".")
+          .c_str());
+  dev = torch::device_of(pixel_vec_y).value();
+  TORCH_CHECK_ARG(
+      dev.type() == this->device_type && dev.index() == this->device_index,
+      7,
+      ("pixel_vec_y must be stored on device " +
+       c10::DeviceTypeName(this->device_type) + ", index " +
+       std::to_string(this->device_index) + "! Are stored on " +
+       c10::DeviceTypeName(dev.type()) + ", index " +
+       std::to_string(dev.index()) + ".")
+          .c_str());
+  dev = torch::device_of(principal_point_offsets).value();
+  TORCH_CHECK_ARG(
+      dev.type() == this->device_type && dev.index() == this->device_index,
+      9,
+      ("principal_point_offsets must be stored on device " +
+       c10::DeviceTypeName(this->device_type) + ", index " +
+       std::to_string(this->device_index) + "! Are stored on " +
+       c10::DeviceTypeName(dev.type()) + ", index " +
+       std::to_string(dev.index()) + ".")
+          .c_str());
+  if (opacity.has_value()) {
+    dev = torch::device_of(opacity.value()).value();
+    TORCH_CHECK_ARG(
+        dev.type() == this->device_type && dev.index() == this->device_index,
+        13,
+        ("opacity must be stored on device " +
+         c10::DeviceTypeName(this->device_type) + ", index " +
+         std::to_string(this->device_index) + "! Is stored on " +
+         c10::DeviceTypeName(dev.type()) + ", index " +
+         std::to_string(dev.index()) + ".")
+            .c_str());
+  }
+  // Type checks.
+  TORCH_CHECK_ARG(
+      vert_pos.scalar_type() == c10::kFloat, 1, "pulsar requires float types.");
+  TORCH_CHECK_ARG(
+      vert_col.scalar_type() == c10::kFloat, 2, "pulsar requires float types.");
+  TORCH_CHECK_ARG(
+      vert_radii.scalar_type() == c10::kFloat,
+      3,
+      "pulsar requires float types.");
+  TORCH_CHECK_ARG(
+      cam_pos.scalar_type() == c10::kFloat, 4, "pulsar requires float types.");
+  TORCH_CHECK_ARG(
+      pixel_0_0_center.scalar_type() == c10::kFloat,
+      5,
+      "pulsar requires float types.");
+  TORCH_CHECK_ARG(
+      pixel_vec_x.scalar_type() == c10::kFloat,
+      6,
+      "pulsar requires float types.");
+  TORCH_CHECK_ARG(
+      pixel_vec_y.scalar_type() == c10::kFloat,
+      7,
+      "pulsar requires float types.");
+  TORCH_CHECK_ARG(
+      focal_length.scalar_type() == c10::kFloat,
+      8,
+      "pulsar requires float types.");
+  TORCH_CHECK_ARG(
+      // Unfortunately, the PyTorch interface is inconsistent for
+      // Int32: in Python, there exists an explicit int32 type, in
+      // C++ this is currently `c10::kInt`.
+      principal_point_offsets.scalar_type() == c10::kInt,
+      9,
+      "principal_point_offsets must be provided as int32.");
+  if (opacity.has_value()) {
+    TORCH_CHECK_ARG(
+        opacity.value().scalar_type() == c10::kFloat,
+        13,
+        "opacity must be a float type.");
+  }
+  // Content checks.
+  TORCH_CHECK_ARG(
+      (vert_radii > FEPS).all().item<bool>(),
+      3,
+      ("Vertex radii must be > FEPS (min is " +
+       std::to_string(vert_radii.min().item<float>()) + ").")
+          .c_str());
+  if (this->orthogonal()) {
+    TORCH_CHECK_ARG(
+        (focal_length == 0.f).all().item<bool>(),
+        8,
+        ("for an orthogonal projection focal length must be zero (abs max: " +
+         std::to_string(focal_length.abs().max().item<float>()) + ").")
+            .c_str());
+  } else {
+    TORCH_CHECK_ARG(
+        (focal_length > FEPS).all().item<bool>(),
+        8,
+        ("for a perspective projection focal length must be > FEPS (min " +
+         std::to_string(focal_length.min().item<float>()) + ").")
+            .c_str());
+  }
+  TORCH_CHECK_ARG(
+      gamma <= 1.f && gamma >= 1E-5f,
+      10,
+      ("gamma must be in [1E-5, 1] (" + std::to_string(gamma) + ").").c_str());
+  if (min_depth == 0.f) {
+    min_depth = focal_length.max().item<float>() + 2.f * FEPS;
+  }
+  TORCH_CHECK_ARG(
+      min_depth > focal_length.max().item<float>(),
+      12,
+      ("min_depth must be > focal_length (" + std::to_string(min_depth) +
+       " vs. " + std::to_string(focal_length.max().item<float>()) + ").")
+          .c_str());
+  TORCH_CHECK_ARG(
+      max_depth > min_depth + FEPS,
+      11,
+      ("max_depth must be > min_depth + FEPS (" + std::to_string(max_depth) +
+       " vs. " + std::to_string(min_depth + FEPS) + ").")
+          .c_str());
+  TORCH_CHECK_ARG(
+      percent_allowed_difference >= 0.f && percent_allowed_difference < 1.f,
+      14,
+      ("percent_allowed_difference must be in [0., 1.[ (" +
+       std::to_string(percent_allowed_difference) + ").")
+          .c_str());
+  TORCH_CHECK_ARG(max_n_hits > 0, 14, "max_n_hits must be > 0!");
+  TORCH_CHECK_ARG(mode < 2, 15, "mode must be in {0, 1}.");
+  torch::Tensor real_bg_col;
+  if (bg_col.has_value()) {
+    TORCH_CHECK_ARG(
+        bg_col.value().device().type() == this->device_type &&
+            bg_col.value().device().index() == this->device_index,
+        13,
+        "bg_col must be stored on the renderer device!");
+    TORCH_CHECK_ARG(
+        bg_col.value().ndimension() == 1 &&
+            bg_col.value().size(0) == renderer_vec[0].cam.n_channels,
+        13,
+        "bg_col must have the same number of channels as the image,).");
+    real_bg_col = bg_col.value();
+  } else {
+    real_bg_col = torch::ones(
+                      {renderer_vec[0].cam.n_channels},
+                      c10::Device(this->device_type, this->device_index))
+                      .to(c10::kFloat);
+  }
+  if (opacity.has_value()) {
+    TORCH_CHECK_ARG(
+        (opacity.value() >= 0.f).all().item<bool>(),
+        13,
+        "opacity must be >= 0.");
+    TORCH_CHECK_ARG(
+        (opacity.value() <= 1.f).all().item<bool>(),
+        13,
+        "opacity must be <= 1.");
+  }
+  LOG_IF(INFO, PULSAR_LOG_FORWARD || PULSAR_LOG_BACKWARD)
+      << "  batch_size: " << batch_size;
+  LOG_IF(INFO, PULSAR_LOG_FORWARD || PULSAR_LOG_BACKWARD)
+      << "  n_points: " << n_points;
+  LOG_IF(INFO, PULSAR_LOG_FORWARD || PULSAR_LOG_BACKWARD)
+      << "  batch_processing: " << batch_processing;
+  return std::tuple<size_t, size_t, bool, torch::Tensor>(
+      batch_size, n_points, batch_processing, real_bg_col);
+}
+
+std::tuple<torch::Tensor, torch::Tensor> Renderer::forward(
+    const torch::Tensor& vert_pos,
+    const torch::Tensor& vert_col,
+    const torch::Tensor& vert_radii,
+    const torch::Tensor& cam_pos,
+    const torch::Tensor& pixel_0_0_center,
+    const torch::Tensor& pixel_vec_x,
+    const torch::Tensor& pixel_vec_y,
+    const torch::Tensor& focal_length,
+    const torch::Tensor& principal_point_offsets,
+    const float& gamma,
+    const float& max_depth,
+    float min_depth,
+    const c10::optional<torch::Tensor>& bg_col,
+    const c10::optional<torch::Tensor>& opacity,
+    const float& percent_allowed_difference,
+    const uint& max_n_hits,
+    const uint& mode) {
+  // Parameter checks.
+  this->ensure_on_device(this->device_tracker.device());
+  size_t batch_size;
+  size_t n_points;
+  bool batch_processing;
+  torch::Tensor real_bg_col;
+  std::tie(batch_size, n_points, batch_processing, real_bg_col) =
+      this->arg_check(
+          vert_pos,
+          vert_col,
+          vert_radii,
+          cam_pos,
+          pixel_0_0_center,
+          pixel_vec_x,
+          pixel_vec_y,
+          focal_length,
+          principal_point_offsets,
+          gamma,
+          max_depth,
+          min_depth,
+          bg_col,
+          opacity,
+          percent_allowed_difference,
+          max_n_hits,
+          mode);
+  LOG_IF(INFO, PULSAR_LOG_FORWARD) << "Extracting camera objects...";
+  // Create the camera information.
+  std::vector<CamInfo> cam_infos(batch_size);
+  if (batch_processing) {
+    for (size_t batch_i = 0; batch_i < batch_size; ++batch_i) {
+      cam_infos[batch_i] = cam_info_from_params(
+          cam_pos[batch_i],
+          pixel_0_0_center[batch_i],
+          pixel_vec_x[batch_i],
+          pixel_vec_y[batch_i],
+          principal_point_offsets[batch_i],
+          focal_length[batch_i].item<float>(),
+          this->renderer_vec[0].cam.film_width,
+          this->renderer_vec[0].cam.film_height,
+          min_depth,
+          max_depth,
+          this->renderer_vec[0].cam.right_handed);
+    }
+  } else {
+    cam_infos[0] = cam_info_from_params(
+        cam_pos,
+        pixel_0_0_center,
+        pixel_vec_x,
+        pixel_vec_y,
+        principal_point_offsets,
+        focal_length.item<float>(),
+        this->renderer_vec[0].cam.film_width,
+        this->renderer_vec[0].cam.film_height,
+        min_depth,
+        max_depth,
+        this->renderer_vec[0].cam.right_handed);
+  }
+  LOG_IF(INFO, PULSAR_LOG_FORWARD) << "Processing...";
+  // Let's go!
+  // Contiguous version of opacity, if available. We need to create this object
+  // in scope to keep it alive.
+  torch::Tensor opacity_contiguous;
+  float const* opacity_ptr = nullptr;
+  if (opacity.has_value()) {
+    opacity_contiguous = opacity.value().contiguous();
+    opacity_ptr = opacity_contiguous.data_ptr<float>();
+  }
+  if (this->device_type == c10::DeviceType::CUDA) {
+// No else check necessary - if not compiled with CUDA
+// we can't even reach this code (the renderer can't be
+// moved to a CUDA device).
+#ifdef WITH_CUDA
+    int prev_active;
+    cudaGetDevice(&prev_active);
+    cudaSetDevice(this->device_index);
+#ifdef PULSAR_TIMINGS_BATCHED_ENABLED
+    START_TIME_CU(batch_forward);
+#endif
+    if (batch_processing) {
+      for (size_t batch_i = 0; batch_i < batch_size; ++batch_i) {
+        // These calls are non-blocking and just kick off the computations.
+        PRE::forward<true>(
+            &this->renderer_vec[batch_i],
+            vert_pos[batch_i].contiguous().data_ptr<float>(),
+            vert_col[batch_i].contiguous().data_ptr<float>(),
+            vert_radii[batch_i].contiguous().data_ptr<float>(),
+            cam_infos[batch_i],
+            gamma,
+            percent_allowed_difference,
+            max_n_hits,
+            real_bg_col.contiguous().data_ptr<float>(),
+            opacity_ptr,
+            n_points,
+            mode,
+            at::cuda::getCurrentCUDAStream());
+      }
+    } else {
+      PRE::forward<true>(
+          this->renderer_vec.data(),
+          vert_pos.contiguous().data_ptr<float>(),
+          vert_col.contiguous().data_ptr<float>(),
+          vert_radii.contiguous().data_ptr<float>(),
+          cam_infos[0],
+          gamma,
+          percent_allowed_difference,
+          max_n_hits,
+          real_bg_col.contiguous().data_ptr<float>(),
+          opacity_ptr,
+          n_points,
+          mode,
+          at::cuda::getCurrentCUDAStream());
+    }
+#ifdef PULSAR_TIMINGS_BATCHED_ENABLED
+    STOP_TIME_CU(batch_forward);
+    float time_ms;
+    GET_TIME_CU(batch_forward, &time_ms);
+    std::cout << "Forward render batched time per example: "
+              << time_ms / static_cast<float>(batch_size) << "ms" << std::endl;
+#endif
+    cudaSetDevice(prev_active);
+#endif
+  } else {
+#ifdef PULSAR_TIMINGS_BATCHED_ENABLED
+    START_TIME(batch_forward);
+#endif
+    if (batch_processing) {
+      for (size_t batch_i = 0; batch_i < batch_size; ++batch_i) {
+        // These calls are non-blocking and just kick off the computations.
+        PRE::forward<false>(
+            &this->renderer_vec[batch_i],
+            vert_pos[batch_i].contiguous().data_ptr<float>(),
+            vert_col[batch_i].contiguous().data_ptr<float>(),
+            vert_radii[batch_i].contiguous().data_ptr<float>(),
+            cam_infos[batch_i],
+            gamma,
+            percent_allowed_difference,
+            max_n_hits,
+            real_bg_col.contiguous().data_ptr<float>(),
+            opacity_ptr,
+            n_points,
+            mode,
+            nullptr);
+      }
+    } else {
+      PRE::forward<false>(
+          this->renderer_vec.data(),
+          vert_pos.contiguous().data_ptr<float>(),
+          vert_col.contiguous().data_ptr<float>(),
+          vert_radii.contiguous().data_ptr<float>(),
+          cam_infos[0],
+          gamma,
+          percent_allowed_difference,
+          max_n_hits,
+          real_bg_col.contiguous().data_ptr<float>(),
+          opacity_ptr,
+          n_points,
+          mode,
+          nullptr);
+    }
+#ifdef PULSAR_TIMINGS_BATCHED_ENABLED
+    STOP_TIME(batch_forward);
+    float time_ms;
+    GET_TIME(batch_forward, &time_ms);
+    std::cout << "Forward render batched time per example: "
+              << time_ms / static_cast<float>(batch_size) << "ms" << std::endl;
+#endif
+  }
+  LOG_IF(INFO, PULSAR_LOG_FORWARD) << "Extracting results...";
+  // Create the results.
+  std::vector<torch::Tensor> results(batch_size);
+  std::vector<torch::Tensor> forw_infos(batch_size);
+  for (size_t batch_i = 0; batch_i < batch_size; ++batch_i) {
+    results[batch_i] = from_blob(
+        this->renderer_vec[batch_i].result_d,
+        {this->renderer_vec[0].cam.film_height,
+         this->renderer_vec[0].cam.film_width,
+         this->renderer_vec[0].cam.n_channels},
+        this->device_type,
+        this->device_index,
+        torch::kFloat,
+        this->device_type == c10::DeviceType::CUDA
+#ifdef WITH_CUDA
+            ? at::cuda::getCurrentCUDAStream()
+#else
+            ? (cudaStream_t) nullptr
+#endif
+            : (cudaStream_t) nullptr);
+    if (mode == 1)
+      results[batch_i] = results[batch_i].slice(2, 0, 1, 1);
+    forw_infos[batch_i] = from_blob(
+        this->renderer_vec[batch_i].forw_info_d,
+        {this->renderer_vec[0].cam.film_height,
+         this->renderer_vec[0].cam.film_width,
+         3 + 2 * this->n_track()},
+        this->device_type,
+        this->device_index,
+        torch::kFloat,
+        this->device_type == c10::DeviceType::CUDA
+#ifdef WITH_CUDA
+            ? at::cuda::getCurrentCUDAStream()
+#else
+            ? (cudaStream_t) nullptr
+#endif
+            : (cudaStream_t) nullptr);
+  }
+  LOG_IF(INFO, PULSAR_LOG_FORWARD) << "Forward render complete.";
+  if (batch_processing) {
+    return std::tuple<torch::Tensor, torch::Tensor>(
+        torch::stack(results), torch::stack(forw_infos));
+  } else {
+    return std::tuple<torch::Tensor, torch::Tensor>(results[0], forw_infos[0]);
+  }
+};
+
+std::tuple<
+    at::optional<torch::Tensor>,
+    at::optional<torch::Tensor>,
+    at::optional<torch::Tensor>,
+    at::optional<torch::Tensor>,
+    at::optional<torch::Tensor>,
+    at::optional<torch::Tensor>,
+    at::optional<torch::Tensor>,
+    at::optional<torch::Tensor>>
+Renderer::backward(
+    const torch::Tensor& grad_im,
+    const torch::Tensor& image,
+    const torch::Tensor& forw_info,
+    const torch::Tensor& vert_pos,
+    const torch::Tensor& vert_col,
+    const torch::Tensor& vert_radii,
+    const torch::Tensor& cam_pos,
+    const torch::Tensor& pixel_0_0_center,
+    const torch::Tensor& pixel_vec_x,
+    const torch::Tensor& pixel_vec_y,
+    const torch::Tensor& focal_length,
+    const torch::Tensor& principal_point_offsets,
+    const float& gamma,
+    const float& max_depth,
+    float min_depth,
+    const c10::optional<torch::Tensor>& bg_col,
+    const c10::optional<torch::Tensor>& opacity,
+    const float& percent_allowed_difference,
+    const uint& max_n_hits,
+    const uint& mode,
+    const bool& dif_pos,
+    const bool& dif_col,
+    const bool& dif_rad,
+    const bool& dif_cam,
+    const bool& dif_opy,
+    const at::optional<std::pair<uint, uint>>& dbg_pos) {
+  this->ensure_on_device(this->device_tracker.device());
+  size_t batch_size;
+  size_t n_points;
+  bool batch_processing;
+  torch::Tensor real_bg_col;
+  std::tie(batch_size, n_points, batch_processing, real_bg_col) =
+      this->arg_check(
+          vert_pos,
+          vert_col,
+          vert_radii,
+          cam_pos,
+          pixel_0_0_center,
+          pixel_vec_x,
+          pixel_vec_y,
+          focal_length,
+          principal_point_offsets,
+          gamma,
+          max_depth,
+          min_depth,
+          bg_col,
+          opacity,
+          percent_allowed_difference,
+          max_n_hits,
+          mode);
+  // Additional checks for the gradient computation.
+  TORCH_CHECK_ARG(
+      (grad_im.ndimension() == 3 + batch_processing &&
+       static_cast<uint>(grad_im.size(0 + batch_processing)) ==
+           this->height() &&
+       static_cast<uint>(grad_im.size(1 + batch_processing)) == this->width() &&
+       static_cast<uint>(grad_im.size(2 + batch_processing)) ==
+           this->renderer_vec[0].cam.n_channels),
+      1,
+      "The gradient image size is not correct.");
+  TORCH_CHECK_ARG(
+      (image.ndimension() == 3 + batch_processing &&
+       static_cast<uint>(image.size(0 + batch_processing)) == this->height() &&
+       static_cast<uint>(image.size(1 + batch_processing)) == this->width() &&
+       static_cast<uint>(image.size(2 + batch_processing)) ==
+           this->renderer_vec[0].cam.n_channels),
+      2,
+      "The result image size is not correct.");
+  TORCH_CHECK_ARG(
+      grad_im.scalar_type() == c10::kFloat,
+      1,
+      "The gradient image must be of float type.");
+  TORCH_CHECK_ARG(
+      image.scalar_type() == c10::kFloat,
+      2,
+      "The image must be of float type.");
+  if (dif_opy) {
+    TORCH_CHECK_ARG(
+        opacity.has_value(), 13, "dif_opy set requires opacity values.");
+  }
+  if (batch_processing) {
+    TORCH_CHECK_ARG(
+        grad_im.size(0) == static_cast<int64_t>(batch_size),
+        1,
+        "Gradient image batch size must agree.");
+    TORCH_CHECK_ARG(
+        image.size(0) == static_cast<int64_t>(batch_size),
+        2,
+        "Image batch size must agree.");
+    TORCH_CHECK_ARG(
+        forw_info.size(0) == static_cast<int64_t>(batch_size),
+        3,
+        "forward info must have batch size.");
+  }
+  TORCH_CHECK_ARG(
+      (forw_info.ndimension() == 3 + batch_processing &&
+       static_cast<uint>(forw_info.size(0 + batch_processing)) ==
+           this->height() &&
+       static_cast<uint>(forw_info.size(1 + batch_processing)) ==
+           this->width() &&
+       static_cast<uint>(forw_info.size(2 + batch_processing)) ==
+           3 + 2 * this->n_track()),
+      3,
+      "The forward info image size is not correct.");
+  TORCH_CHECK_ARG(
+      forw_info.scalar_type() == c10::kFloat,
+      3,
+      "The forward info must be of float type.");
+  // Check device.
+  auto dev = torch::device_of(grad_im).value();
+  TORCH_CHECK_ARG(
+      dev.type() == this->device_type && dev.index() == this->device_index,
+      1,
+      ("grad_im must be stored on device " +
+       c10::DeviceTypeName(this->device_type) + ", index " +
+       std::to_string(this->device_index) + "! Are stored on " +
+       c10::DeviceTypeName(dev.type()) + ", index " +
+       std::to_string(dev.index()) + ".")
+          .c_str());
+  dev = torch::device_of(image).value();
+  TORCH_CHECK_ARG(
+      dev.type() == this->device_type && dev.index() == this->device_index,
+      2,
+      ("image must be stored on device " +
+       c10::DeviceTypeName(this->device_type) + ", index " +
+       std::to_string(this->device_index) + "! Are stored on " +
+       c10::DeviceTypeName(dev.type()) + ", index " +
+       std::to_string(dev.index()) + ".")
+          .c_str());
+  dev = torch::device_of(forw_info).value();
+  TORCH_CHECK_ARG(
+      dev.type() == this->device_type && dev.index() == this->device_index,
+      3,
+      ("forw_info must be stored on device " +
+       c10::DeviceTypeName(this->device_type) + ", index " +
+       std::to_string(this->device_index) + "! Are stored on " +
+       c10::DeviceTypeName(dev.type()) + ", index " +
+       std::to_string(dev.index()) + ".")
+          .c_str());
+  if (dbg_pos.has_value()) {
+    TORCH_CHECK_ARG(
+        dbg_pos.value().first < this->width() &&
+            dbg_pos.value().second < this->height(),
+        23,
+        "The debug position must be within image bounds.");
+  }
+  // Prepare the return value.
+  std::tuple<
+      at::optional<torch::Tensor>,
+      at::optional<torch::Tensor>,
+      at::optional<torch::Tensor>,
+      at::optional<torch::Tensor>,
+      at::optional<torch::Tensor>,
+      at::optional<torch::Tensor>,
+      at::optional<torch::Tensor>,
+      at::optional<torch::Tensor>>
+      ret;
+  if (mode == 1 || (!dif_pos && !dif_col && !dif_rad && !dif_cam && !dif_opy)) {
+    return ret;
+  }
+  // Create the camera information.
+  std::vector<CamInfo> cam_infos(batch_size);
+  if (batch_processing) {
+    for (size_t batch_i = 0; batch_i < batch_size; ++batch_i) {
+      cam_infos[batch_i] = cam_info_from_params(
+          cam_pos[batch_i],
+          pixel_0_0_center[batch_i],
+          pixel_vec_x[batch_i],
+          pixel_vec_y[batch_i],
+          principal_point_offsets[batch_i],
+          focal_length[batch_i].item<float>(),
+          this->renderer_vec[0].cam.film_width,
+          this->renderer_vec[0].cam.film_height,
+          min_depth,
+          max_depth,
+          this->renderer_vec[0].cam.right_handed);
+    }
+  } else {
+    cam_infos[0] = cam_info_from_params(
+        cam_pos,
+        pixel_0_0_center,
+        pixel_vec_x,
+        pixel_vec_y,
+        principal_point_offsets,
+        focal_length.item<float>(),
+        this->renderer_vec[0].cam.film_width,
+        this->renderer_vec[0].cam.film_height,
+        min_depth,
+        max_depth,
+        this->renderer_vec[0].cam.right_handed);
+  }
+  // Let's go!
+  // Contiguous version of opacity, if available. We need to create this object
+  // in scope to keep it alive.
+  torch::Tensor opacity_contiguous;
+  float const* opacity_ptr = nullptr;
+  if (opacity.has_value()) {
+    opacity_contiguous = opacity.value().contiguous();
+    opacity_ptr = opacity_contiguous.data_ptr<float>();
+  }
+  if (this->device_type == c10::DeviceType::CUDA) {
+// No else check necessary - it's not possible to move
+// the renderer to a CUDA device if not built with CUDA.
+#ifdef WITH_CUDA
+    int prev_active;
+    cudaGetDevice(&prev_active);
+    cudaSetDevice(this->device_index);
+#ifdef PULSAR_TIMINGS_BATCHED_ENABLED
+    START_TIME_CU(batch_backward);
+#endif
+    if (batch_processing) {
+      for (size_t batch_i = 0; batch_i < batch_size; ++batch_i) {
+        // These calls are non-blocking and just kick off the computations.
+        if (dbg_pos.has_value()) {
+          PRE::backward_dbg<true>(
+              &this->renderer_vec[batch_i],
+              grad_im[batch_i].contiguous().data_ptr<float>(),
+              image[batch_i].contiguous().data_ptr<float>(),
+              forw_info[batch_i].contiguous().data_ptr<float>(),
+              vert_pos[batch_i].contiguous().data_ptr<float>(),
+              vert_col[batch_i].contiguous().data_ptr<float>(),
+              vert_radii[batch_i].contiguous().data_ptr<float>(),
+              cam_infos[batch_i],
+              gamma,
+              percent_allowed_difference,
+              max_n_hits,
+              opacity_ptr,
+              n_points,
+              mode,
+              dif_pos,
+              dif_col,
+              dif_rad,
+              dif_cam,
+              dif_opy,
+              dbg_pos.value().first,
+              dbg_pos.value().second,
+              at::cuda::getCurrentCUDAStream());
+        } else {
+          PRE::backward<true>(
+              &this->renderer_vec[batch_i],
+              grad_im[batch_i].contiguous().data_ptr<float>(),
+              image[batch_i].contiguous().data_ptr<float>(),
+              forw_info[batch_i].contiguous().data_ptr<float>(),
+              vert_pos[batch_i].contiguous().data_ptr<float>(),
+              vert_col[batch_i].contiguous().data_ptr<float>(),
+              vert_radii[batch_i].contiguous().data_ptr<float>(),
+              cam_infos[batch_i],
+              gamma,
+              percent_allowed_difference,
+              max_n_hits,
+              opacity_ptr,
+              n_points,
+              mode,
+              dif_pos,
+              dif_col,
+              dif_rad,
+              dif_cam,
+              dif_opy,
+              at::cuda::getCurrentCUDAStream());
+        }
+      }
+    } else {
+      if (dbg_pos.has_value()) {
+        PRE::backward_dbg<true>(
+            this->renderer_vec.data(),
+            grad_im.contiguous().data_ptr<float>(),
+            image.contiguous().data_ptr<float>(),
+            forw_info.contiguous().data_ptr<float>(),
+            vert_pos.contiguous().data_ptr<float>(),
+            vert_col.contiguous().data_ptr<float>(),
+            vert_radii.contiguous().data_ptr<float>(),
+            cam_infos[0],
+            gamma,
+            percent_allowed_difference,
+            max_n_hits,
+            opacity_ptr,
+            n_points,
+            mode,
+            dif_pos,
+            dif_col,
+            dif_rad,
+            dif_cam,
+            dif_opy,
+            dbg_pos.value().first,
+            dbg_pos.value().second,
+            at::cuda::getCurrentCUDAStream());
+      } else {
+        PRE::backward<true>(
+            this->renderer_vec.data(),
+            grad_im.contiguous().data_ptr<float>(),
+            image.contiguous().data_ptr<float>(),
+            forw_info.contiguous().data_ptr<float>(),
+            vert_pos.contiguous().data_ptr<float>(),
+            vert_col.contiguous().data_ptr<float>(),
+            vert_radii.contiguous().data_ptr<float>(),
+            cam_infos[0],
+            gamma,
+            percent_allowed_difference,
+            max_n_hits,
+            opacity_ptr,
+            n_points,
+            mode,
+            dif_pos,
+            dif_col,
+            dif_rad,
+            dif_cam,
+            dif_opy,
+            at::cuda::getCurrentCUDAStream());
+      }
+    }
+    cudaSetDevice(prev_active);
+#ifdef PULSAR_TIMINGS_BATCHED_ENABLED
+    STOP_TIME_CU(batch_backward);
+    float time_ms;
+    GET_TIME_CU(batch_backward, &time_ms);
+    std::cout << "Backward render batched time per example: "
+              << time_ms / static_cast<float>(batch_size) << "ms" << std::endl;
+#endif
+#endif // WITH_CUDA
+  } else {
+#ifdef PULSAR_TIMINGS_BATCHED_ENABLED
+    START_TIME(batch_backward);
+#endif
+    if (batch_processing) {
+      for (size_t batch_i = 0; batch_i < batch_size; ++batch_i) {
+        // These calls are non-blocking and just kick off the computations.
+        if (dbg_pos.has_value()) {
+          PRE::backward_dbg<false>(
+              &this->renderer_vec[batch_i],
+              grad_im[batch_i].contiguous().data_ptr<float>(),
+              image[batch_i].contiguous().data_ptr<float>(),
+              forw_info[batch_i].contiguous().data_ptr<float>(),
+              vert_pos[batch_i].contiguous().data_ptr<float>(),
+              vert_col[batch_i].contiguous().data_ptr<float>(),
+              vert_radii[batch_i].contiguous().data_ptr<float>(),
+              cam_infos[batch_i],
+              gamma,
+              percent_allowed_difference,
+              max_n_hits,
+              opacity_ptr,
+              n_points,
+              mode,
+              dif_pos,
+              dif_col,
+              dif_rad,
+              dif_cam,
+              dif_opy,
+              dbg_pos.value().first,
+              dbg_pos.value().second,
+              nullptr);
+        } else {
+          PRE::backward<false>(
+              &this->renderer_vec[batch_i],
+              grad_im[batch_i].contiguous().data_ptr<float>(),
+              image[batch_i].contiguous().data_ptr<float>(),
+              forw_info[batch_i].contiguous().data_ptr<float>(),
+              vert_pos[batch_i].contiguous().data_ptr<float>(),
+              vert_col[batch_i].contiguous().data_ptr<float>(),
+              vert_radii[batch_i].contiguous().data_ptr<float>(),
+              cam_infos[batch_i],
+              gamma,
+              percent_allowed_difference,
+              max_n_hits,
+              opacity_ptr,
+              n_points,
+              mode,
+              dif_pos,
+              dif_col,
+              dif_rad,
+              dif_cam,
+              dif_opy,
+              nullptr);
+        }
+      }
+    } else {
+      if (dbg_pos.has_value()) {
+        PRE::backward_dbg<false>(
+            this->renderer_vec.data(),
+            grad_im.contiguous().data_ptr<float>(),
+            image.contiguous().data_ptr<float>(),
+            forw_info.contiguous().data_ptr<float>(),
+            vert_pos.contiguous().data_ptr<float>(),
+            vert_col.contiguous().data_ptr<float>(),
+            vert_radii.contiguous().data_ptr<float>(),
+            cam_infos[0],
+            gamma,
+            percent_allowed_difference,
+            max_n_hits,
+            opacity_ptr,
+            n_points,
+            mode,
+            dif_pos,
+            dif_col,
+            dif_rad,
+            dif_cam,
+            dif_opy,
+            dbg_pos.value().first,
+            dbg_pos.value().second,
+            nullptr);
+      } else {
+        PRE::backward<false>(
+            this->renderer_vec.data(),
+            grad_im.contiguous().data_ptr<float>(),
+            image.contiguous().data_ptr<float>(),
+            forw_info.contiguous().data_ptr<float>(),
+            vert_pos.contiguous().data_ptr<float>(),
+            vert_col.contiguous().data_ptr<float>(),
+            vert_radii.contiguous().data_ptr<float>(),
+            cam_infos[0],
+            gamma,
+            percent_allowed_difference,
+            max_n_hits,
+            opacity_ptr,
+            n_points,
+            mode,
+            dif_pos,
+            dif_col,
+            dif_rad,
+            dif_cam,
+            dif_opy,
+            nullptr);
+      }
+    }
+#ifdef PULSAR_TIMINGS_BATCHED_ENABLED
+    STOP_TIME(batch_backward);
+    float time_ms;
+    GET_TIME(batch_backward, &time_ms);
+    std::cout << "Backward render batched time per example: "
+              << time_ms / static_cast<float>(batch_size) << "ms" << std::endl;
+#endif
+  }
+  if (dif_pos) {
+    if (batch_processing) {
+      std::vector<torch::Tensor> results(batch_size);
+      for (size_t batch_i = 0; batch_i < batch_size; ++batch_i) {
+        results[batch_i] = from_blob(
+            reinterpret_cast<float*>(this->renderer_vec[batch_i].grad_pos_d),
+            {static_cast<ptrdiff_t>(n_points), 3},
+            this->device_type,
+            this->device_index,
+            torch::kFloat,
+            this->device_type == c10::DeviceType::CUDA
+#ifdef WITH_CUDA
+                ? at::cuda::getCurrentCUDAStream()
+#else
+                ? (cudaStream_t) nullptr
+#endif
+                : (cudaStream_t) nullptr);
+      }
+      std::get<0>(ret) = torch::stack(results);
+    } else {
+      std::get<0>(ret) = from_blob(
+          reinterpret_cast<float*>(this->renderer_vec[0].grad_pos_d),
+          {static_cast<ptrdiff_t>(n_points), 3},
+          this->device_type,
+          this->device_index,
+          torch::kFloat,
+          this->device_type == c10::DeviceType::CUDA
+#ifdef WITH_CUDA
+              ? at::cuda::getCurrentCUDAStream()
+#else
+              ? (cudaStream_t) nullptr
+#endif
+              : (cudaStream_t) nullptr);
+    }
+  }
+  if (dif_col) {
+    if (batch_processing) {
+      std::vector<torch::Tensor> results(batch_size);
+      for (size_t batch_i = 0; batch_i < batch_size; ++batch_i) {
+        results[batch_i] = from_blob(
+            reinterpret_cast<float*>(this->renderer_vec[batch_i].grad_col_d),
+            {static_cast<ptrdiff_t>(n_points),
+             this->renderer_vec[0].cam.n_channels},
+            this->device_type,
+            this->device_index,
+            torch::kFloat,
+            this->device_type == c10::DeviceType::CUDA
+#ifdef WITH_CUDA
+                ? at::cuda::getCurrentCUDAStream()
+#else
+                ? (cudaStream_t) nullptr
+#endif
+                : (cudaStream_t) nullptr);
+      }
+      std::get<1>(ret) = torch::stack(results);
+    } else {
+      std::get<1>(ret) = from_blob(
+          reinterpret_cast<float*>(this->renderer_vec[0].grad_col_d),
+          {static_cast<ptrdiff_t>(n_points),
+           this->renderer_vec[0].cam.n_channels},
+          this->device_type,
+          this->device_index,
+          torch::kFloat,
+          this->device_type == c10::DeviceType::CUDA
+#ifdef WITH_CUDA
+              ? at::cuda::getCurrentCUDAStream()
+#else
+              ? (cudaStream_t) nullptr
+#endif
+              : (cudaStream_t) nullptr);
+    }
+  }
+  if (dif_rad) {
+    if (batch_processing) {
+      std::vector<torch::Tensor> results(batch_size);
+      for (size_t batch_i = 0; batch_i < batch_size; ++batch_i) {
+        results[batch_i] = from_blob(
+            reinterpret_cast<float*>(this->renderer_vec[batch_i].grad_rad_d),
+            {static_cast<ptrdiff_t>(n_points)},
+            this->device_type,
+            this->device_index,
+            torch::kFloat,
+            this->device_type == c10::DeviceType::CUDA
+#ifdef WITH_CUDA
+                ? at::cuda::getCurrentCUDAStream()
+#else
+                ? (cudaStream_t) nullptr
+#endif
+                : (cudaStream_t) nullptr);
+      }
+      std::get<2>(ret) = torch::stack(results);
+    } else {
+      std::get<2>(ret) = from_blob(
+          reinterpret_cast<float*>(this->renderer_vec[0].grad_rad_d),
+          {static_cast<ptrdiff_t>(n_points)},
+          this->device_type,
+          this->device_index,
+          torch::kFloat,
+          this->device_type == c10::DeviceType::CUDA
+#ifdef WITH_CUDA
+              ? at::cuda::getCurrentCUDAStream()
+#else
+              ? (cudaStream_t) nullptr
+#endif
+              : (cudaStream_t) nullptr);
+    }
+  }
+  if (dif_cam) {
+    if (batch_processing) {
+      std::vector<torch::Tensor> res_p1(batch_size);
+      std::vector<torch::Tensor> res_p2(batch_size);
+      std::vector<torch::Tensor> res_p3(batch_size);
+      std::vector<torch::Tensor> res_p4(batch_size);
+      for (size_t batch_i = 0; batch_i < batch_size; ++batch_i) {
+        res_p1[batch_i] = from_blob(
+            reinterpret_cast<float*>(this->renderer_vec[batch_i].grad_cam_d),
+            {3},
+            this->device_type,
+            this->device_index,
+            torch::kFloat,
+            this->device_type == c10::DeviceType::CUDA
+#ifdef WITH_CUDA
+                ? at::cuda::getCurrentCUDAStream()
+#else
+                ? (cudaStream_t) nullptr
+#endif
+                : (cudaStream_t) nullptr);
+        res_p2[batch_i] = from_blob(
+            reinterpret_cast<float*>(
+                this->renderer_vec[batch_i].grad_cam_d + 3),
+            {3},
+            this->device_type,
+            this->device_index,
+            torch::kFloat,
+            this->device_type == c10::DeviceType::CUDA
+#ifdef WITH_CUDA
+                ? at::cuda::getCurrentCUDAStream()
+#else
+                ? (cudaStream_t) nullptr
+#endif
+                : (cudaStream_t) nullptr);
+        res_p3[batch_i] = from_blob(
+            reinterpret_cast<float*>(
+                this->renderer_vec[batch_i].grad_cam_d + 6),
+            {3},
+            this->device_type,
+            this->device_index,
+            torch::kFloat,
+            this->device_type == c10::DeviceType::CUDA
+#ifdef WITH_CUDA
+                ? at::cuda::getCurrentCUDAStream()
+#else
+                ? (cudaStream_t) nullptr
+#endif
+                : (cudaStream_t) nullptr);
+        res_p4[batch_i] = from_blob(
+            reinterpret_cast<float*>(
+                this->renderer_vec[batch_i].grad_cam_d + 9),
+            {3},
+            this->device_type,
+            this->device_index,
+            torch::kFloat,
+            this->device_type == c10::DeviceType::CUDA
+#ifdef WITH_CUDA
+                ? at::cuda::getCurrentCUDAStream()
+#else
+                ? (cudaStream_t) nullptr
+#endif
+                : (cudaStream_t) nullptr);
+      }
+      std::get<3>(ret) = torch::stack(res_p1);
+      std::get<4>(ret) = torch::stack(res_p2);
+      std::get<5>(ret) = torch::stack(res_p3);
+      std::get<6>(ret) = torch::stack(res_p4);
+    } else {
+      std::get<3>(ret) = from_blob(
+          reinterpret_cast<float*>(this->renderer_vec[0].grad_cam_d),
+          {3},
+          this->device_type,
+          this->device_index,
+          torch::kFloat,
+          this->device_type == c10::DeviceType::CUDA
+#ifdef WITH_CUDA
+              ? at::cuda::getCurrentCUDAStream()
+#else
+              ? (cudaStream_t) nullptr
+#endif
+              : (cudaStream_t) nullptr);
+      std::get<4>(ret) = from_blob(
+          reinterpret_cast<float*>(this->renderer_vec[0].grad_cam_d + 3),
+          {3},
+          this->device_type,
+          this->device_index,
+          torch::kFloat,
+          this->device_type == c10::DeviceType::CUDA
+#ifdef WITH_CUDA
+              ? at::cuda::getCurrentCUDAStream()
+#else
+              ? (cudaStream_t) nullptr
+#endif
+              : (cudaStream_t) nullptr);
+      std::get<5>(ret) = from_blob(
+          reinterpret_cast<float*>(this->renderer_vec[0].grad_cam_d + 6),
+          {3},
+          this->device_type,
+          this->device_index,
+          torch::kFloat,
+          this->device_type == c10::DeviceType::CUDA
+#ifdef WITH_CUDA
+              ? at::cuda::getCurrentCUDAStream()
+#else
+              ? (cudaStream_t) nullptr
+#endif
+              : (cudaStream_t) nullptr);
+      std::get<6>(ret) = from_blob(
+          reinterpret_cast<float*>(this->renderer_vec[0].grad_cam_d + 9),
+          {3},
+          this->device_type,
+          this->device_index,
+          torch::kFloat,
+          this->device_type == c10::DeviceType::CUDA
+#ifdef WITH_CUDA
+              ? at::cuda::getCurrentCUDAStream()
+#else
+              ? (cudaStream_t) nullptr
+#endif
+              : (cudaStream_t) nullptr);
+    }
+  }
+  if (dif_opy) {
+    if (batch_processing) {
+      std::vector<torch::Tensor> results(batch_size);
+      for (size_t batch_i = 0; batch_i < batch_size; ++batch_i) {
+        results[batch_i] = from_blob(
+            reinterpret_cast<float*>(this->renderer_vec[batch_i].grad_opy_d),
+            {static_cast<ptrdiff_t>(n_points)},
+            this->device_type,
+            this->device_index,
+            torch::kFloat,
+            this->device_type == c10::DeviceType::CUDA
+#ifdef WITH_CUDA
+                ? at::cuda::getCurrentCUDAStream()
+#else
+                ? (cudaStream_t) nullptr
+#endif
+                : (cudaStream_t) nullptr);
+      }
+      std::get<7>(ret) = torch::stack(results);
+    } else {
+      std::get<7>(ret) = from_blob(
+          reinterpret_cast<float*>(this->renderer_vec[0].grad_opy_d),
+          {static_cast<ptrdiff_t>(n_points)},
+          this->device_type,
+          this->device_index,
+          torch::kFloat,
+          this->device_type == c10::DeviceType::CUDA
+#ifdef WITH_CUDA
+              ? at::cuda::getCurrentCUDAStream()
+#else
+              ? (cudaStream_t) nullptr
+#endif
+              : (cudaStream_t) nullptr);
+    }
+  }
+  return ret;
+};
+
+} // namespace pytorch
+} // namespace pulsar
diff --git a/pytorch3d/pytorch3d/csrc/pulsar/pytorch/renderer.h b/pytorch3d/pytorch3d/csrc/pulsar/pytorch/renderer.h
new file mode 100644
index 0000000000000000000000000000000000000000..2525ca3f3dd9036320401b2a0059a2d5b6b864a4
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/pulsar/pytorch/renderer.h
@@ -0,0 +1,174 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#ifndef PULSAR_NATIVE_PYTORCH_RENDERER_H_
+#define PULSAR_NATIVE_PYTORCH_RENDERER_H_
+
+#include "../global.h"
+#include "../include/renderer.h"
+
+namespace pulsar {
+namespace pytorch {
+
+struct Renderer {
+ public:
+  /**
+   * Pytorch Pulsar differentiable rendering module.
+   */
+  explicit Renderer(
+      const unsigned int& width,
+      const unsigned int& height,
+      const uint& max_n_balls,
+      const bool& orthogonal_projection,
+      const bool& right_handed_system,
+      const float& background_normalization_depth,
+      const uint& n_channels,
+      const uint& n_track);
+  ~Renderer();
+
+  std::tuple<torch::Tensor, torch::Tensor> forward(
+      const torch::Tensor& vert_pos,
+      const torch::Tensor& vert_col,
+      const torch::Tensor& vert_radii,
+      const torch::Tensor& cam_pos,
+      const torch::Tensor& pixel_0_0_center,
+      const torch::Tensor& pixel_vec_x,
+      const torch::Tensor& pixel_vec_y,
+      const torch::Tensor& focal_length,
+      const torch::Tensor& principal_point_offsets,
+      const float& gamma,
+      const float& max_depth,
+      float min_depth,
+      const c10::optional<torch::Tensor>& bg_col,
+      const c10::optional<torch::Tensor>& opacity,
+      const float& percent_allowed_difference,
+      const uint& max_n_hits,
+      const uint& mode);
+
+  std::tuple<
+      at::optional<torch::Tensor>,
+      at::optional<torch::Tensor>,
+      at::optional<torch::Tensor>,
+      at::optional<torch::Tensor>,
+      at::optional<torch::Tensor>,
+      at::optional<torch::Tensor>,
+      at::optional<torch::Tensor>,
+      at::optional<torch::Tensor>>
+  backward(
+      const torch::Tensor& grad_im,
+      const torch::Tensor& image,
+      const torch::Tensor& forw_info,
+      const torch::Tensor& vert_pos,
+      const torch::Tensor& vert_col,
+      const torch::Tensor& vert_radii,
+      const torch::Tensor& cam_pos,
+      const torch::Tensor& pixel_0_0_center,
+      const torch::Tensor& pixel_vec_x,
+      const torch::Tensor& pixel_vec_y,
+      const torch::Tensor& focal_length,
+      const torch::Tensor& principal_point_offsets,
+      const float& gamma,
+      const float& max_depth,
+      float min_depth,
+      const c10::optional<torch::Tensor>& bg_col,
+      const c10::optional<torch::Tensor>& opacity,
+      const float& percent_allowed_difference,
+      const uint& max_n_hits,
+      const uint& mode,
+      const bool& dif_pos,
+      const bool& dif_col,
+      const bool& dif_rad,
+      const bool& dif_cam,
+      const bool& dif_opy,
+      const at::optional<std::pair<uint, uint>>& dbg_pos);
+
+  // Infrastructure.
+  /**
+   * Ensure that the renderer is placed on this device.
+   * Is nearly a no-op if the device is correct.
+   */
+  void ensure_on_device(torch::Device device, bool non_blocking = false);
+
+  /**
+   * Ensure that at least n renderers are available.
+   */
+  void ensure_n_renderers_gte(const size_t& batch_size);
+
+  /**
+   * Check the parameters.
+   */
+  std::tuple<size_t, size_t, bool, torch::Tensor> arg_check(
+      const torch::Tensor& vert_pos,
+      const torch::Tensor& vert_col,
+      const torch::Tensor& vert_radii,
+      const torch::Tensor& cam_pos,
+      const torch::Tensor& pixel_0_0_center,
+      const torch::Tensor& pixel_vec_x,
+      const torch::Tensor& pixel_vec_y,
+      const torch::Tensor& focal_length,
+      const torch::Tensor& principal_point_offsets,
+      const float& gamma,
+      const float& max_depth,
+      float& min_depth,
+      const c10::optional<torch::Tensor>& bg_col,
+      const c10::optional<torch::Tensor>& opacity,
+      const float& percent_allowed_difference,
+      const uint& max_n_hits,
+      const uint& mode);
+
+  bool operator==(const Renderer& rhs) const;
+  inline friend std::ostream& operator<<(
+      std::ostream& stream,
+      const Renderer& self) {
+    stream << "pulsar::Renderer[";
+    // Device info.
+    stream << self.device_type;
+    if (self.device_index != -1)
+      stream << ", ID " << self.device_index;
+    stream << "]";
+    return stream;
+  }
+
+  inline uint width() const {
+    return this->renderer_vec[0].cam.film_width;
+  }
+  inline uint height() const {
+    return this->renderer_vec[0].cam.film_height;
+  }
+  inline int max_num_balls() const {
+    return this->renderer_vec[0].max_num_balls;
+  }
+  inline bool orthogonal() const {
+    return this->renderer_vec[0].cam.orthogonal_projection;
+  }
+  inline bool right_handed() const {
+    return this->renderer_vec[0].cam.right_handed;
+  }
+  inline uint n_track() const {
+    return static_cast<uint>(this->renderer_vec[0].n_track);
+  }
+
+  /** A tensor that is registered as a buffer with this Module to track its
+   * device placement. Unfortunately, pytorch doesn't offer tracking Module
+   * device placement in a better way as of now.
+   */
+  torch::Tensor device_tracker;
+
+ protected:
+  /** The device type for this renderer. */
+  c10::DeviceType device_type;
+  /** The device index for this renderer. */
+  c10::DeviceIndex device_index;
+  /** Pointer to the underlying pulsar renderers. */
+  std::vector<pulsar::Renderer::Renderer> renderer_vec;
+};
+
+} // namespace pytorch
+} // namespace pulsar
+
+#endif
diff --git a/pytorch3d/pytorch3d/csrc/pulsar/pytorch/tensor_util.cpp b/pytorch3d/pytorch3d/csrc/pulsar/pytorch/tensor_util.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..1dd41ed4e5ef40d2c78b3d71fb5dff7cdaa4e6c0
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/pulsar/pytorch/tensor_util.cpp
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#ifdef WITH_CUDA
+#include <ATen/cuda/CUDAContext.h>
+#include <cuda_runtime_api.h>
+#endif
+#include <torch/extension.h>
+
+#include "./tensor_util.h"
+
+namespace pulsar {
+namespace pytorch {
+
+torch::Tensor sphere_ids_from_result_info_nograd(
+    const torch::Tensor& forw_info) {
+  torch::Tensor result = torch::zeros(
+      {forw_info.size(0),
+       forw_info.size(1),
+       forw_info.size(2),
+       (forw_info.size(3) - 3) / 2},
+      torch::TensorOptions().device(forw_info.device()).dtype(torch::kInt32));
+  // Get the relevant slice, contiguous.
+  torch::Tensor tmp =
+      forw_info
+          .slice(
+              /*dim=*/3, /*start=*/3, /*end=*/forw_info.size(3), /*step=*/2)
+          .contiguous();
+  if (forw_info.device().type() == c10::DeviceType::CUDA) {
+#ifdef WITH_CUDA
+    cudaMemcpyAsync(
+        result.data_ptr(),
+        tmp.data_ptr(),
+        sizeof(uint32_t) * tmp.size(0) * tmp.size(1) * tmp.size(2) *
+            tmp.size(3),
+        cudaMemcpyDeviceToDevice,
+        at::cuda::getCurrentCUDAStream());
+#else
+    throw std::runtime_error(
+        "Copy on CUDA device initiated but built "
+        "without CUDA support.");
+#endif
+  } else {
+    memcpy(
+        result.data_ptr(),
+        tmp.data_ptr(),
+        sizeof(uint32_t) * tmp.size(0) * tmp.size(1) * tmp.size(2) *
+            tmp.size(3));
+  }
+  // `tmp` is freed after this, the memory might get reallocated. However,
+  // only kernels in the same stream should ever be able to write to this
+  // memory, which are executed only after the memcpy is complete. That's
+  // why we can just continue.
+  return result;
+}
+
+} // namespace pytorch
+} // namespace pulsar
diff --git a/pytorch3d/pytorch3d/csrc/pulsar/pytorch/tensor_util.h b/pytorch3d/pytorch3d/csrc/pulsar/pytorch/tensor_util.h
new file mode 100644
index 0000000000000000000000000000000000000000..9f1d677cbfd4377f27224e05abc66085a06aa60c
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/pulsar/pytorch/tensor_util.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#ifndef PULSAR_NATIVE_PYTORCH_TENSOR_UTIL_H_
+#define PULSAR_NATIVE_PYTORCH_TENSOR_UTIL_H_
+
+#include <ATen/ATen.h>
+
+namespace pulsar {
+namespace pytorch {
+
+torch::Tensor sphere_ids_from_result_info_nograd(
+    const torch::Tensor& forw_info);
+
+}
+} // namespace pulsar
+
+#endif
diff --git a/pytorch3d/pytorch3d/csrc/pulsar/pytorch/util.cpp b/pytorch3d/pytorch3d/csrc/pulsar/pytorch/util.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..7d25b6e8504c765b816e7793419e9de63a7719dd
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/pulsar/pytorch/util.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#ifdef WITH_CUDA
+#include <cuda_runtime_api.h>
+
+namespace pulsar {
+namespace pytorch {
+
+void cudaDevToDev(
+    void* trg,
+    const void* src,
+    const int& size,
+    const cudaStream_t& stream) {
+  cudaMemcpyAsync(trg, src, size, cudaMemcpyDeviceToDevice, stream);
+}
+
+void cudaDevToHost(
+    void* trg,
+    const void* src,
+    const int& size,
+    const cudaStream_t& stream) {
+  cudaMemcpyAsync(trg, src, size, cudaMemcpyDeviceToHost, stream);
+}
+
+} // namespace pytorch
+} // namespace pulsar
+#endif
diff --git a/pytorch3d/pytorch3d/csrc/pulsar/pytorch/util.h b/pytorch3d/pytorch3d/csrc/pulsar/pytorch/util.h
new file mode 100644
index 0000000000000000000000000000000000000000..be3dc80defbb78c6e65722a1dda5d70e288e73c7
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/pulsar/pytorch/util.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#ifndef PULSAR_NATIVE_PYTORCH_UTIL_H_
+#define PULSAR_NATIVE_PYTORCH_UTIL_H_
+
+#include <ATen/ATen.h>
+#include "../global.h"
+
+namespace pulsar {
+namespace pytorch {
+
+void cudaDevToDev(
+    void* trg,
+    const void* src,
+    const int& size,
+    const cudaStream_t& stream);
+void cudaDevToHost(
+    void* trg,
+    const void* src,
+    const int& size,
+    const cudaStream_t& stream);
+
+/**
+ * This method takes a memory pointer and wraps it into a pytorch tensor.
+ *
+ * This is preferred over `torch::from_blob`, since that requires a CUDA
+ * managed pointer. However, working with these for high performance
+ * operations is slower. Most of the rendering operations should stay
+ * local to the respective GPU anyways, so unmanaged pointers are
+ * preferred.
+ */
+template <typename T>
+torch::Tensor from_blob(
+    const T* ptr,
+    const torch::IntArrayRef& shape,
+    const c10::DeviceType& device_type,
+    const c10::DeviceIndex& device_index,
+    const torch::Dtype& dtype,
+    const cudaStream_t& stream) {
+  torch::Tensor ret = torch::zeros(
+      shape, torch::device({device_type, device_index}).dtype(dtype));
+  const int num_elements =
+      std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<int>{});
+  if (device_type == c10::DeviceType::CUDA) {
+#ifdef WITH_CUDA
+    cudaDevToDev(
+        ret.data_ptr(),
+        static_cast<const void*>(ptr),
+        sizeof(T) * num_elements,
+        stream);
+#else
+    throw std::runtime_error(
+        "Initiating devToDev copy on a build without CUDA.");
+#endif
+    // TODO: check for synchronization.
+  } else {
+    memcpy(ret.data_ptr(), ptr, sizeof(T) * num_elements);
+  }
+  return ret;
+};
+
+} // namespace pytorch
+} // namespace pulsar
+
+#endif
diff --git a/pytorch3d/pytorch3d/csrc/pulsar/warnings.cpp b/pytorch3d/pytorch3d/csrc/pulsar/warnings.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..54615ac1392db7788c643e93a40b4824b59ec102
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/pulsar/warnings.cpp
@@ -0,0 +1,21 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include "./global.h"
+#include "./logging.h"
+
+/**
+ * A compilation unit to provide warnings about the code and avoid
+ * repeated messages.
+ */
+#ifdef PULSAR_ASSERTIONS
+#pragma message("WARNING: assertions are enabled in Pulsar.")
+#endif
+#ifdef PULSAR_LOGGING_ENABLED
+#pragma message("WARNING: logging is enabled in Pulsar.")
+#endif
diff --git a/pytorch3d/pytorch3d/csrc/rasterize_coarse/bitmask.cuh b/pytorch3d/pytorch3d/csrc/rasterize_coarse/bitmask.cuh
new file mode 100644
index 0000000000000000000000000000000000000000..6ffcac87caa13f37a5ccb12b565d33450bc035c2
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/rasterize_coarse/bitmask.cuh
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+#define BINMASK_H
+
+// A BitMask represents a bool array of shape (H, W, N). We pack values into
+// the bits of unsigned ints; a single unsigned int has B = 32 bits, so to hold
+// all values we use H * W * (N / B) = H * W * D values. We want to store
+// BitMasks in shared memory, so we assume that the memory has already been
+// allocated for it elsewhere.
+class BitMask {
+ public:
+  __device__ BitMask(unsigned int* data, int H, int W, int N)
+      : data(data), H(H), W(W), B(8 * sizeof(unsigned int)), D(N / B) {
+    // TODO: check if the data is null.
+    N = ceilf(N % 32); // take ceil incase N % 32 != 0
+    block_clear(); // clear the data
+  }
+
+  // Use all threads in the current block to clear all bits of this BitMask
+  __device__ void block_clear() {
+    for (int i = threadIdx.x; i < H * W * D; i += blockDim.x) {
+      data[i] = 0;
+    }
+    __syncthreads();
+  }
+
+  __device__ int _get_elem_idx(int y, int x, int d) {
+    return y * W * D + x * D + d / B;
+  }
+
+  __device__ int _get_bit_idx(int d) {
+    return d % B;
+  }
+
+  // Turn on a single bit (y, x, d)
+  __device__ void set(int y, int x, int d) {
+    int elem_idx = _get_elem_idx(y, x, d);
+    int bit_idx = _get_bit_idx(d);
+    const unsigned int mask = 1U << bit_idx;
+    atomicOr(data + elem_idx, mask);
+  }
+
+  // Turn off a single bit (y, x, d)
+  __device__ void unset(int y, int x, int d) {
+    int elem_idx = _get_elem_idx(y, x, d);
+    int bit_idx = _get_bit_idx(d);
+    const unsigned int mask = ~(1U << bit_idx);
+    atomicAnd(data + elem_idx, mask);
+  }
+
+  // Check whether the bit (y, x, d) is on or off
+  __device__ bool get(int y, int x, int d) {
+    int elem_idx = _get_elem_idx(y, x, d);
+    int bit_idx = _get_bit_idx(d);
+    return (data[elem_idx] >> bit_idx) & 1U;
+  }
+
+  // Compute the number of bits set in the row (y, x, :)
+  __device__ int count(int y, int x) {
+    int total = 0;
+    for (int i = 0; i < D; ++i) {
+      int elem_idx = y * W * D + x * D + i;
+      unsigned int elem = data[elem_idx];
+      total += __popc(elem);
+    }
+    return total;
+  }
+
+ private:
+  unsigned int* data;
+  int H, W, B, D;
+};
diff --git a/pytorch3d/pytorch3d/csrc/rasterize_coarse/rasterize_coarse.cu b/pytorch3d/pytorch3d/csrc/rasterize_coarse/rasterize_coarse.cu
new file mode 100644
index 0000000000000000000000000000000000000000..bb6acaf1224262f3615dc5750e474f83fa8325c6
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/rasterize_coarse/rasterize_coarse.cu
@@ -0,0 +1,389 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <ATen/ATen.h>
+#include <ATen/cuda/CUDAContext.h>
+#include <c10/cuda/CUDAGuard.h>
+#include <float.h>
+#include <math.h>
+#include <tuple>
+#include "rasterize_coarse/bitmask.cuh"
+#include "rasterize_points/rasterization_utils.cuh"
+#include "utils/float_math.cuh"
+#include "utils/geometry_utils.cuh" // For kEpsilon -- gross
+
+__global__ void TriangleBoundingBoxKernel(
+    const float* face_verts, // (F, 3, 3)
+    const int F,
+    const float blur_radius,
+    float* bboxes, // (4, F)
+    bool* skip_face) { // (F,)
+  const int tid = blockIdx.x * blockDim.x + threadIdx.x;
+  const int num_threads = blockDim.x * gridDim.x;
+  const float sqrt_radius = sqrt(blur_radius);
+  for (int f = tid; f < F; f += num_threads) {
+    const float v0x = face_verts[f * 9 + 0 * 3 + 0];
+    const float v0y = face_verts[f * 9 + 0 * 3 + 1];
+    const float v0z = face_verts[f * 9 + 0 * 3 + 2];
+    const float v1x = face_verts[f * 9 + 1 * 3 + 0];
+    const float v1y = face_verts[f * 9 + 1 * 3 + 1];
+    const float v1z = face_verts[f * 9 + 1 * 3 + 2];
+    const float v2x = face_verts[f * 9 + 2 * 3 + 0];
+    const float v2y = face_verts[f * 9 + 2 * 3 + 1];
+    const float v2z = face_verts[f * 9 + 2 * 3 + 2];
+    const float xmin = FloatMin3(v0x, v1x, v2x) - sqrt_radius;
+    const float xmax = FloatMax3(v0x, v1x, v2x) + sqrt_radius;
+    const float ymin = FloatMin3(v0y, v1y, v2y) - sqrt_radius;
+    const float ymax = FloatMax3(v0y, v1y, v2y) + sqrt_radius;
+    const float zmin = FloatMin3(v0z, v1z, v2z);
+    const bool skip = zmin < kEpsilon;
+    bboxes[0 * F + f] = xmin;
+    bboxes[1 * F + f] = xmax;
+    bboxes[2 * F + f] = ymin;
+    bboxes[3 * F + f] = ymax;
+    skip_face[f] = skip;
+  }
+}
+
+__global__ void PointBoundingBoxKernel(
+    const float* points, // (P, 3)
+    const float* radius, // (P,)
+    const int P,
+    float* bboxes, // (4, P)
+    bool* skip_points) {
+  const int tid = blockIdx.x * blockDim.x + threadIdx.x;
+  const int num_threads = blockDim.x * gridDim.x;
+  for (int p = tid; p < P; p += num_threads) {
+    const float x = points[p * 3 + 0];
+    const float y = points[p * 3 + 1];
+    const float z = points[p * 3 + 2];
+    const float r = radius[p];
+    // TODO: change to kEpsilon to match triangles?
+    const bool skip = z < 0;
+    bboxes[0 * P + p] = x - r;
+    bboxes[1 * P + p] = x + r;
+    bboxes[2 * P + p] = y - r;
+    bboxes[3 * P + p] = y + r;
+    skip_points[p] = skip;
+  }
+}
+
+__global__ void RasterizeCoarseCudaKernel(
+    const float* bboxes, // (4, E) (xmin, xmax, ymin, ymax)
+    const bool* should_skip, // (E,)
+    const int64_t* elem_first_idxs,
+    const int64_t* elems_per_batch,
+    const int N,
+    const int E,
+    const int H,
+    const int W,
+    const int bin_size,
+    const int chunk_size,
+    const int max_elem_per_bin,
+    int* elems_per_bin,
+    int* bin_elems) {
+  extern __shared__ char sbuf[];
+  const int M = max_elem_per_bin;
+  // Integer divide round up
+  const int num_bins_x = 1 + (W - 1) / bin_size;
+  const int num_bins_y = 1 + (H - 1) / bin_size;
+
+  // NDC range depends on the ratio of W/H
+  // The shorter side from (H, W) is given an NDC range of 2.0 and
+  // the other side is scaled by the ratio of H:W.
+  const float NDC_x_half_range = NonSquareNdcRange(W, H) / 2.0f;
+  const float NDC_y_half_range = NonSquareNdcRange(H, W) / 2.0f;
+
+  // Size of half a pixel in NDC units is the NDC half range
+  // divided by the corresponding image dimension
+  const float half_pix_x = NDC_x_half_range / W;
+  const float half_pix_y = NDC_y_half_range / H;
+
+  // This is a boolean array of shape (num_bins_y, num_bins_x, chunk_size)
+  // stored in shared memory that will track whether each elem in the chunk
+  // falls into each bin of the image.
+  BitMask binmask((unsigned int*)sbuf, num_bins_y, num_bins_x, chunk_size);
+
+  // Have each block handle a chunk of elements
+  const int chunks_per_batch = 1 + (E - 1) / chunk_size;
+  const int num_chunks = N * chunks_per_batch;
+
+  for (int chunk = blockIdx.x; chunk < num_chunks; chunk += gridDim.x) {
+    const int batch_idx = chunk / chunks_per_batch; // batch index
+    const int chunk_idx = chunk % chunks_per_batch;
+    const int elem_chunk_start_idx = chunk_idx * chunk_size;
+
+    binmask.block_clear();
+    const int64_t elem_start_idx = elem_first_idxs[batch_idx];
+    const int64_t elem_stop_idx = elem_start_idx + elems_per_batch[batch_idx];
+
+    // Have each thread handle a different face within the chunk
+    for (int e = threadIdx.x; e < chunk_size; e += blockDim.x) {
+      const int e_idx = elem_chunk_start_idx + e;
+
+      // Check that we are still within the same element of the batch
+      if (e_idx >= elem_stop_idx || e_idx < elem_start_idx) {
+        continue;
+      }
+
+      if (should_skip[e_idx]) {
+        continue;
+      }
+      const float xmin = bboxes[0 * E + e_idx];
+      const float xmax = bboxes[1 * E + e_idx];
+      const float ymin = bboxes[2 * E + e_idx];
+      const float ymax = bboxes[3 * E + e_idx];
+
+      // Brute-force search over all bins; TODO(T54294966) something smarter.
+      for (int by = 0; by < num_bins_y; ++by) {
+        // Y coordinate of the top and bottom of the bin.
+        // PixToNdc gives the location of the center of each pixel, so we
+        // need to add/subtract a half pixel to get the true extent of the bin.
+        // Reverse ordering of Y axis so that +Y is upwards in the image.
+        const float bin_y_min =
+            PixToNonSquareNdc(by * bin_size, H, W) - half_pix_y;
+        const float bin_y_max =
+            PixToNonSquareNdc((by + 1) * bin_size - 1, H, W) + half_pix_y;
+        const bool y_overlap = (ymin <= bin_y_max) && (bin_y_min < ymax);
+
+        for (int bx = 0; bx < num_bins_x; ++bx) {
+          // X coordinate of the left and right of the bin.
+          // Reverse ordering of x axis so that +X is left.
+          const float bin_x_max =
+              PixToNonSquareNdc((bx + 1) * bin_size - 1, W, H) + half_pix_x;
+          const float bin_x_min =
+              PixToNonSquareNdc(bx * bin_size, W, H) - half_pix_x;
+
+          const bool x_overlap = (xmin <= bin_x_max) && (bin_x_min < xmax);
+          if (y_overlap && x_overlap) {
+            binmask.set(by, bx, e);
+          }
+        }
+      }
+    }
+    __syncthreads();
+    // Now we have processed every elem in the current chunk. We need to
+    // count the number of elems in each bin so we can write the indices
+    // out to global memory. We have each thread handle a different bin.
+    for (int byx = threadIdx.x; byx < num_bins_y * num_bins_x;
+         byx += blockDim.x) {
+      const int by = byx / num_bins_x;
+      const int bx = byx % num_bins_x;
+      const int count = binmask.count(by, bx);
+      const int elems_per_bin_idx =
+          batch_idx * num_bins_y * num_bins_x + by * num_bins_x + bx;
+
+      // This atomically increments the (global) number of elems found
+      // in the current bin, and gets the previous value of the counter;
+      // this effectively allocates space in the bin_faces array for the
+      // elems in the current chunk that fall into this bin.
+      const int start = atomicAdd(elems_per_bin + elems_per_bin_idx, count);
+      if (start + count > M) {
+        // The number of elems in this bin is so big that they won't fit.
+        // We print a warning using CUDA's printf. This may be invisible
+        // to notebook users, but apparent to others. It would be nice to
+        // also have a Python-friendly warning, but it is not obvious
+        // how to do this without slowing down the normal case.
+        const char* warning =
+            "Bin size was too small in the coarse rasterization phase. "
+            "This caused an overflow, meaning output may be incomplete. "
+            "To solve, "
+            "try increasing max_faces_per_bin / max_points_per_bin, "
+            "decreasing bin_size, "
+            "or setting bin_size to 0 to use the naive rasterization.";
+        printf(warning);
+        continue;
+      }
+
+      // Now loop over the binmask and write the active bits for this bin
+      // out to bin_faces.
+      int next_idx = batch_idx * num_bins_y * num_bins_x * M +
+          by * num_bins_x * M + bx * M + start;
+      for (int e = 0; e < chunk_size; ++e) {
+        if (binmask.get(by, bx, e)) {
+          // TODO(T54296346) find the correct method for handling errors in
+          // CUDA. Throw an error if num_faces_per_bin > max_faces_per_bin.
+          // Either decrease bin size or increase max_faces_per_bin
+          bin_elems[next_idx] = elem_chunk_start_idx + e;
+          next_idx++;
+        }
+      }
+    }
+    __syncthreads();
+  }
+}
+
+at::Tensor RasterizeCoarseCuda(
+    const at::Tensor& bboxes,
+    const at::Tensor& should_skip,
+    const at::Tensor& elem_first_idxs,
+    const at::Tensor& elems_per_batch,
+    const std::tuple<int, int> image_size,
+    const int bin_size,
+    const int max_elems_per_bin) {
+  // Set the device for the kernel launch based on the device of the input
+  at::cuda::CUDAGuard device_guard(bboxes.device());
+  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+
+  const int H = std::get<0>(image_size);
+  const int W = std::get<1>(image_size);
+
+  const int E = bboxes.size(1);
+  const int N = elems_per_batch.size(0);
+  const int M = max_elems_per_bin;
+
+  // Integer divide round up
+  const int num_bins_y = 1 + (H - 1) / bin_size;
+  const int num_bins_x = 1 + (W - 1) / bin_size;
+
+  if (num_bins_y >= kMaxItemsPerBin || num_bins_x >= kMaxItemsPerBin) {
+    std::stringstream ss;
+    ss << "In RasterizeCoarseCuda got num_bins_y: " << num_bins_y
+       << ", num_bins_x: " << num_bins_x << ", "
+       << "; that's too many!";
+    AT_ERROR(ss.str());
+  }
+  auto opts = elems_per_batch.options().dtype(at::kInt);
+  at::Tensor elems_per_bin = at::zeros({N, num_bins_y, num_bins_x}, opts);
+  at::Tensor bin_elems = at::full({N, num_bins_y, num_bins_x, M}, -1, opts);
+
+  if (bin_elems.numel() == 0) {
+    AT_CUDA_CHECK(cudaGetLastError());
+    return bin_elems;
+  }
+
+  const int chunk_size = 512;
+  const size_t shared_size = num_bins_y * num_bins_x * chunk_size / 8;
+  const size_t blocks = 64;
+  const size_t threads = 512;
+
+  RasterizeCoarseCudaKernel<<<blocks, threads, shared_size, stream>>>(
+      bboxes.contiguous().data_ptr<float>(),
+      should_skip.contiguous().data_ptr<bool>(),
+      elem_first_idxs.contiguous().data_ptr<int64_t>(),
+      elems_per_batch.contiguous().data_ptr<int64_t>(),
+      N,
+      E,
+      H,
+      W,
+      bin_size,
+      chunk_size,
+      M,
+      elems_per_bin.data_ptr<int32_t>(),
+      bin_elems.data_ptr<int32_t>());
+
+  AT_CUDA_CHECK(cudaGetLastError());
+  return bin_elems;
+}
+
+at::Tensor RasterizeMeshesCoarseCuda(
+    const at::Tensor& face_verts,
+    const at::Tensor& mesh_to_face_first_idx,
+    const at::Tensor& num_faces_per_mesh,
+    const std::tuple<int, int> image_size,
+    const float blur_radius,
+    const int bin_size,
+    const int max_faces_per_bin) {
+  TORCH_CHECK(
+      face_verts.ndimension() == 3 && face_verts.size(1) == 3 &&
+          face_verts.size(2) == 3,
+      "face_verts must have dimensions (num_faces, 3, 3)");
+
+  // Check inputs are on the same device
+  at::TensorArg face_verts_t{face_verts, "face_verts", 1},
+      mesh_to_face_first_idx_t{
+          mesh_to_face_first_idx, "mesh_to_face_first_idx", 2},
+      num_faces_per_mesh_t{num_faces_per_mesh, "num_faces_per_mesh", 3};
+  at::CheckedFrom c = "RasterizeMeshesCoarseCuda";
+  at::checkAllSameGPU(
+      c, {face_verts_t, mesh_to_face_first_idx_t, num_faces_per_mesh_t});
+
+  // Set the device for the kernel launch based on the device of the input
+  at::cuda::CUDAGuard device_guard(face_verts.device());
+  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+
+  // Allocate tensors for bboxes and should_skip
+  const int F = face_verts.size(0);
+  auto float_opts = face_verts.options().dtype(at::kFloat);
+  auto bool_opts = face_verts.options().dtype(at::kBool);
+  at::Tensor bboxes = at::empty({4, F}, float_opts);
+  at::Tensor should_skip = at::empty({F}, bool_opts);
+
+  // Launch kernel to compute triangle bboxes
+  const size_t blocks = 128;
+  const size_t threads = 256;
+  TriangleBoundingBoxKernel<<<blocks, threads, 0, stream>>>(
+      face_verts.contiguous().data_ptr<float>(),
+      F,
+      blur_radius,
+      bboxes.contiguous().data_ptr<float>(),
+      should_skip.contiguous().data_ptr<bool>());
+  AT_CUDA_CHECK(cudaGetLastError());
+
+  return RasterizeCoarseCuda(
+      bboxes,
+      should_skip,
+      mesh_to_face_first_idx,
+      num_faces_per_mesh,
+      image_size,
+      bin_size,
+      max_faces_per_bin);
+}
+
+at::Tensor RasterizePointsCoarseCuda(
+    const at::Tensor& points, // (P, 3)
+    const at::Tensor& cloud_to_packed_first_idx, // (N,)
+    const at::Tensor& num_points_per_cloud, // (N,)
+    const std::tuple<int, int> image_size,
+    const at::Tensor& radius,
+    const int bin_size,
+    const int max_points_per_bin) {
+  TORCH_CHECK(
+      points.ndimension() == 2 && points.size(1) == 3,
+      "points must have dimensions (num_points, 3)");
+
+  // Check inputs are on the same device
+  at::TensorArg points_t{points, "points", 1},
+      cloud_to_packed_first_idx_t{
+          cloud_to_packed_first_idx, "cloud_to_packed_first_idx", 2},
+      num_points_per_cloud_t{num_points_per_cloud, "num_points_per_cloud", 3};
+  at::CheckedFrom c = "RasterizePointsCoarseCuda";
+  at::checkAllSameGPU(
+      c, {points_t, cloud_to_packed_first_idx_t, num_points_per_cloud_t});
+
+  // Set the device for the kernel launch based on the device of the input
+  at::cuda::CUDAGuard device_guard(points.device());
+  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+
+  // Allocate tensors for bboxes and should_skip
+  const int P = points.size(0);
+  auto float_opts = points.options().dtype(at::kFloat);
+  auto bool_opts = points.options().dtype(at::kBool);
+  at::Tensor bboxes = at::empty({4, P}, float_opts);
+  at::Tensor should_skip = at::empty({P}, bool_opts);
+
+  // Launch kernel to compute point bboxes
+  const size_t blocks = 128;
+  const size_t threads = 256;
+  PointBoundingBoxKernel<<<blocks, threads, 0, stream>>>(
+      points.contiguous().data_ptr<float>(),
+      radius.contiguous().data_ptr<float>(),
+      P,
+      bboxes.contiguous().data_ptr<float>(),
+      should_skip.contiguous().data_ptr<bool>());
+  AT_CUDA_CHECK(cudaGetLastError());
+
+  return RasterizeCoarseCuda(
+      bboxes,
+      should_skip,
+      cloud_to_packed_first_idx,
+      num_points_per_cloud,
+      image_size,
+      bin_size,
+      max_points_per_bin);
+}
diff --git a/pytorch3d/pytorch3d/csrc/rasterize_coarse/rasterize_coarse.h b/pytorch3d/pytorch3d/csrc/rasterize_coarse/rasterize_coarse.h
new file mode 100644
index 0000000000000000000000000000000000000000..858407cb66b2a252f1b2b223f2adaa2ce8074543
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/rasterize_coarse/rasterize_coarse.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include <torch/extension.h>
+#include <tuple>
+
+// Arguments are the same as RasterizeMeshesCoarse from
+// rasterize_meshes/rasterize_meshes.h
+#ifdef WITH_CUDA
+torch::Tensor RasterizeMeshesCoarseCuda(
+    const torch::Tensor& face_verts,
+    const torch::Tensor& mesh_to_face_first_idx,
+    const torch::Tensor& num_faces_per_mesh,
+    const std::tuple<int, int> image_size,
+    const float blur_radius,
+    const int bin_size,
+    const int max_faces_per_bin);
+#endif
+
+// Arguments are the same as RasterizePointsCoarse from
+// rasterize_points/rasterize_points.h
+#ifdef WITH_CUDA
+torch::Tensor RasterizePointsCoarseCuda(
+    const torch::Tensor& points,
+    const torch::Tensor& cloud_to_packed_first_idx,
+    const torch::Tensor& num_points_per_cloud,
+    const std::tuple<int, int> image_size,
+    const torch::Tensor& radius,
+    const int bin_size,
+    const int max_points_per_bin);
+#endif
diff --git a/pytorch3d/pytorch3d/csrc/rasterize_meshes/rasterize_meshes.cu b/pytorch3d/pytorch3d/csrc/rasterize_meshes/rasterize_meshes.cu
new file mode 100644
index 0000000000000000000000000000000000000000..21ff7e504d9478eec865c3f12e3740313ce3bf88
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/rasterize_meshes/rasterize_meshes.cu
@@ -0,0 +1,823 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <ATen/ATen.h>
+#include <ATen/cuda/CUDAContext.h>
+#include <c10/cuda/CUDAGuard.h>
+#include <float.h>
+#include <math.h>
+#include <thrust/tuple.h>
+#include <cstdio>
+#include <tuple>
+#include "rasterize_points/rasterization_utils.cuh"
+#include "utils/float_math.cuh"
+#include "utils/geometry_utils.cuh"
+
+namespace {
+// A structure for holding details about a pixel.
+struct Pixel {
+  float z;
+  int64_t idx; // idx of face
+  float dist; // abs distance of pixel to face
+  float3 bary;
+};
+
+__device__ bool operator<(const Pixel& a, const Pixel& b) {
+  return a.z < b.z || (a.z == b.z && a.idx < b.idx);
+}
+
+// Get the xyz coordinates of the three vertices for the face given by the
+// index face_idx into face_verts.
+__device__ thrust::tuple<float3, float3, float3> GetSingleFaceVerts(
+    const float* face_verts,
+    int face_idx) {
+  const float x0 = face_verts[face_idx * 9 + 0];
+  const float y0 = face_verts[face_idx * 9 + 1];
+  const float z0 = face_verts[face_idx * 9 + 2];
+  const float x1 = face_verts[face_idx * 9 + 3];
+  const float y1 = face_verts[face_idx * 9 + 4];
+  const float z1 = face_verts[face_idx * 9 + 5];
+  const float x2 = face_verts[face_idx * 9 + 6];
+  const float y2 = face_verts[face_idx * 9 + 7];
+  const float z2 = face_verts[face_idx * 9 + 8];
+
+  const float3 v0xyz = make_float3(x0, y0, z0);
+  const float3 v1xyz = make_float3(x1, y1, z1);
+  const float3 v2xyz = make_float3(x2, y2, z2);
+
+  return thrust::make_tuple(v0xyz, v1xyz, v2xyz);
+}
+
+// Get the min/max x/y/z values for the face given by vertices v0, v1, v2.
+__device__ thrust::tuple<float2, float2, float2>
+GetFaceBoundingBox(float3 v0, float3 v1, float3 v2) {
+  const float xmin = FloatMin3(v0.x, v1.x, v2.x);
+  const float ymin = FloatMin3(v0.y, v1.y, v2.y);
+  const float zmin = FloatMin3(v0.z, v1.z, v2.z);
+  const float xmax = FloatMax3(v0.x, v1.x, v2.x);
+  const float ymax = FloatMax3(v0.y, v1.y, v2.y);
+  const float zmax = FloatMax3(v0.z, v1.z, v2.z);
+
+  return thrust::make_tuple(
+      make_float2(xmin, xmax),
+      make_float2(ymin, ymax),
+      make_float2(zmin, zmax));
+}
+
+// Check if the point (px, py) lies outside the face bounding box face_bbox.
+// Return true if the point is outside.
+__device__ bool CheckPointOutsideBoundingBox(
+    float3 v0,
+    float3 v1,
+    float3 v2,
+    float blur_radius,
+    float2 pxy) {
+  const auto bbox = GetFaceBoundingBox(v0, v1, v2);
+  const float2 xlims = thrust::get<0>(bbox);
+  const float2 ylims = thrust::get<1>(bbox);
+  const float2 zlims = thrust::get<2>(bbox);
+
+  const float x_min = xlims.x - blur_radius;
+  const float y_min = ylims.x - blur_radius;
+  const float x_max = xlims.y + blur_radius;
+  const float y_max = ylims.y + blur_radius;
+
+  // Faces with at least one vertex behind the camera won't render correctly
+  // and should be removed or clipped before calling the rasterizer
+  const bool z_invalid = zlims.x < kEpsilon;
+
+  // Check if the current point is oustside the triangle bounding box.
+  return (
+      pxy.x > x_max || pxy.x < x_min || pxy.y > y_max || pxy.y < y_min ||
+      z_invalid);
+}
+
+// This function checks if a pixel given by xy location pxy lies within the
+// face with index face_idx in face_verts. One of the inputs is a list (q)
+// which contains Pixel structs with the indices of the faces which intersect
+// with this pixel sorted by closest z distance. If the point pxy lies in the
+// face, the list (q) is updated and re-orderered in place. In addition
+// the auxiliary variables q_size, q_max_z and q_max_idx are also modified.
+// This code is shared between RasterizeMeshesNaiveCudaKernel and
+// RasterizeMeshesFineCudaKernel.
+template <typename FaceQ>
+__device__ void CheckPixelInsideFace(
+    const float* face_verts, // (F, 3, 3)
+    const int64_t* clipped_faces_neighbor_idx, // (F,)
+    const int face_idx,
+    int& q_size,
+    float& q_max_z,
+    int& q_max_idx,
+    FaceQ& q,
+    const float blur_radius,
+    const float2 pxy, // Coordinates of the pixel
+    const int K,
+    const bool perspective_correct,
+    const bool clip_barycentric_coords,
+    const bool cull_backfaces) {
+  const auto v012 = GetSingleFaceVerts(face_verts, face_idx);
+  const float3 v0 = thrust::get<0>(v012);
+  const float3 v1 = thrust::get<1>(v012);
+  const float3 v2 = thrust::get<2>(v012);
+
+  // Only need xy for barycentric coordinates and distance calculations.
+  const float2 v0xy = make_float2(v0.x, v0.y);
+  const float2 v1xy = make_float2(v1.x, v1.y);
+  const float2 v2xy = make_float2(v2.x, v2.y);
+
+  // Perform checks and skip if:
+  // 1. the face is behind the camera
+  // 2. the face is facing away from the camera
+  // 3. the face has very small face area
+  // 4. the pixel is outside the face bbox
+  const float zmax = FloatMax3(v0.z, v1.z, v2.z);
+  const bool outside_bbox = CheckPointOutsideBoundingBox(
+      v0, v1, v2, sqrt(blur_radius), pxy); // use sqrt of blur for bbox
+  const float face_area = EdgeFunctionForward(v0xy, v1xy, v2xy);
+  // Check if the face is visible to the camera.
+  const bool back_face = face_area < 0.0;
+  const bool zero_face_area =
+      (face_area <= kEpsilon && face_area >= -1.0f * kEpsilon);
+
+  if (zmax < 0 || cull_backfaces && back_face || outside_bbox ||
+      zero_face_area) {
+    return;
+  }
+
+  // Calculate barycentric coords and euclidean dist to triangle.
+  const float3 p_bary0 = BarycentricCoordsForward(pxy, v0xy, v1xy, v2xy);
+  const float3 p_bary = !perspective_correct
+      ? p_bary0
+      : BarycentricPerspectiveCorrectionForward(p_bary0, v0.z, v1.z, v2.z);
+  const float3 p_bary_clip =
+      !clip_barycentric_coords ? p_bary : BarycentricClipForward(p_bary);
+
+  const float pz =
+      p_bary_clip.x * v0.z + p_bary_clip.y * v1.z + p_bary_clip.z * v2.z;
+
+  if (pz < 0) {
+    return; // Face is behind the image plane.
+  }
+
+  // Get abs squared distance
+  const float dist = PointTriangleDistanceForward(pxy, v0xy, v1xy, v2xy);
+
+  // Use the unclipped bary coordinates to determine if the point is inside the
+  // face.
+  const bool inside = p_bary.x > 0.0f && p_bary.y > 0.0f && p_bary.z > 0.0f;
+  const float signed_dist = inside ? -dist : dist;
+  // Check if pixel is outside blur region
+  if (!inside && dist >= blur_radius) {
+    return;
+  }
+
+  // Handle the case where a face (f) partially behind the image plane is
+  // clipped to a quadrilateral and then split into two faces (t1, t2). In this
+  // case we:
+  // 1. Find the index of the neighboring face (e.g. for t1 need index of t2)
+  // 2. Check if the neighboring face (t2) is already in the top K faces
+  // 3. If yes, compare the distance of the pixel to t1 with the distance to t2.
+  // 4. If dist_t1 < dist_t2, overwrite the values for t2 in the top K faces.
+  const int neighbor_idx = clipped_faces_neighbor_idx[face_idx];
+  int neighbor_idx_top_k = -1;
+
+  // Check if neighboring face is already in the top K.
+  // -1 is the fill value in clipped_faces_neighbor_idx
+  if (neighbor_idx != -1) {
+    // Only need to loop until q_size.
+    for (int i = 0; i < q_size; i++) {
+      if (q[i].idx == neighbor_idx) {
+        neighbor_idx_top_k = i;
+        break;
+      }
+    }
+  }
+  // If neighbor idx is not -1 then it is in the top K struct.
+  if (neighbor_idx_top_k != -1) {
+    // If dist of current face is less than neighbor then overwrite the
+    // neighbor face values in the top K struct.
+    float neighbor_dist = abs(q[neighbor_idx_top_k].dist);
+    if (dist < neighbor_dist) {
+      // Overwrite the neighbor face values
+      q[neighbor_idx_top_k] = {pz, face_idx, signed_dist, p_bary_clip};
+
+      // If pz > q_max then overwrite the max values and index of the max.
+      // q_size stays the same.
+      if (pz > q_max_z) {
+        q_max_z = pz;
+        q_max_idx = neighbor_idx_top_k;
+      }
+    }
+  } else {
+    // Handle as a normal face
+    if (q_size < K) {
+      // Just insert it.
+      q[q_size] = {pz, face_idx, signed_dist, p_bary_clip};
+      if (pz > q_max_z) {
+        q_max_z = pz;
+        q_max_idx = q_size;
+      }
+      q_size++;
+    } else if (pz < q_max_z) {
+      // Overwrite the old max, and find the new max.
+      q[q_max_idx] = {pz, face_idx, signed_dist, p_bary_clip};
+      q_max_z = pz;
+      for (int i = 0; i < K; i++) {
+        if (q[i].z > q_max_z) {
+          q_max_z = q[i].z;
+          q_max_idx = i;
+        }
+      }
+    }
+  }
+}
+
+} // namespace
+
+// ****************************************************************************
+// *                          NAIVE RASTERIZATION                      *
+// ****************************************************************************
+__global__ void RasterizeMeshesNaiveCudaKernel(
+    const float* face_verts,
+    const int64_t* mesh_to_face_first_idx,
+    const int64_t* num_faces_per_mesh,
+    const int64_t* clipped_faces_neighbor_idx,
+    const float blur_radius,
+    const bool perspective_correct,
+    const bool clip_barycentric_coords,
+    const bool cull_backfaces,
+    const int N,
+    const int H,
+    const int W,
+    const int K,
+    int64_t* face_idxs,
+    float* zbuf,
+    float* pix_dists,
+    float* bary) {
+  // Simple version: One thread per output pixel
+  int num_threads = gridDim.x * blockDim.x;
+  int tid = blockDim.x * blockIdx.x + threadIdx.x;
+
+  for (int i = tid; i < N * H * W; i += num_threads) {
+    // Convert linear index to 3D index
+    const int n = i / (H * W); // batch index.
+    const int pix_idx = i % (H * W);
+
+    // Reverse ordering of X and Y axes
+    const int yi = H - 1 - pix_idx / W;
+    const int xi = W - 1 - pix_idx % W;
+
+    // screen coordinates to ndc coordinates of pixel.
+    const float xf = PixToNonSquareNdc(xi, W, H);
+    const float yf = PixToNonSquareNdc(yi, H, W);
+    const float2 pxy = make_float2(xf, yf);
+
+    // For keeping track of the K closest points we want a data structure
+    // that (1) gives O(1) access to the closest point for easy comparisons,
+    // and (2) allows insertion of new elements. In the CPU version we use
+    // std::priority_queue; then (2) is O(log K). We can't use STL
+    // containers in CUDA; we could roll our own max heap in an array, but
+    // that would likely have a lot of warp divergence so we do something
+    // simpler instead: keep the elements in an unsorted array, but keep
+    // track of the max value and the index of the max value. Then (1) is
+    // still O(1) time, while (2) is O(K) with a clean loop. Since K <= 8
+    // this should be fast enough for our purposes.
+    Pixel q[kMaxPointsPerPixel];
+    int q_size = 0;
+    float q_max_z = -1000;
+    int q_max_idx = -1;
+
+    // Using the batch index of the thread get the start and stop
+    // indices for the faces.
+    const int64_t face_start_idx = mesh_to_face_first_idx[n];
+    const int64_t face_stop_idx = face_start_idx + num_faces_per_mesh[n];
+
+    // Loop through the faces in the mesh.
+    for (int f = face_start_idx; f < face_stop_idx; ++f) {
+      // Check if the pixel pxy is inside the face bounding box and if it is,
+      // update q, q_size, q_max_z and q_max_idx in place.
+
+      CheckPixelInsideFace(
+          face_verts,
+          clipped_faces_neighbor_idx,
+          f,
+          q_size,
+          q_max_z,
+          q_max_idx,
+          q,
+          blur_radius,
+          pxy,
+          K,
+          perspective_correct,
+          clip_barycentric_coords,
+          cull_backfaces);
+    }
+
+    // TODO: make sorting an option as only top k is needed, not sorted values.
+    BubbleSort(q, q_size);
+    int idx = n * H * W * K + pix_idx * K;
+
+    for (int k = 0; k < q_size; ++k) {
+      face_idxs[idx + k] = q[k].idx;
+      zbuf[idx + k] = q[k].z;
+      pix_dists[idx + k] = q[k].dist;
+      bary[(idx + k) * 3 + 0] = q[k].bary.x;
+      bary[(idx + k) * 3 + 1] = q[k].bary.y;
+      bary[(idx + k) * 3 + 2] = q[k].bary.z;
+    }
+  }
+}
+
+std::tuple<at::Tensor, at::Tensor, at::Tensor, at::Tensor>
+RasterizeMeshesNaiveCuda(
+    const at::Tensor& face_verts,
+    const at::Tensor& mesh_to_faces_packed_first_idx,
+    const at::Tensor& num_faces_per_mesh,
+    const at::Tensor& clipped_faces_neighbor_idx,
+    const std::tuple<int, int> image_size,
+    const float blur_radius,
+    const int num_closest,
+    const bool perspective_correct,
+    const bool clip_barycentric_coords,
+    const bool cull_backfaces) {
+  TORCH_CHECK(
+      face_verts.ndimension() == 3 && face_verts.size(1) == 3 &&
+          face_verts.size(2) == 3,
+      "face_verts must have dimensions (num_faces, 3, 3)");
+
+  TORCH_CHECK(
+      num_faces_per_mesh.size(0) == mesh_to_faces_packed_first_idx.size(0),
+      "num_faces_per_mesh must have save size first dimension as mesh_to_faces_packed_first_idx");
+
+  TORCH_CHECK(
+      clipped_faces_neighbor_idx.size(0) == face_verts.size(0),
+      "clipped_faces_neighbor_idx must have save size first dimension as face_verts");
+
+  if (num_closest > kMaxPointsPerPixel) {
+    std::stringstream ss;
+    ss << "Must have points_per_pixel <= " << kMaxPointsPerPixel;
+    AT_ERROR(ss.str());
+  }
+
+  // Check inputs are on the same device
+  at::TensorArg face_verts_t{face_verts, "face_verts", 1},
+      mesh_to_faces_packed_first_idx_t{
+          mesh_to_faces_packed_first_idx, "mesh_to_faces_packed_first_idx", 2},
+      num_faces_per_mesh_t{num_faces_per_mesh, "num_faces_per_mesh", 3},
+      clipped_faces_neighbor_idx_t{
+          clipped_faces_neighbor_idx, "clipped_faces_neighbor_idx", 4};
+  at::CheckedFrom c = "RasterizeMeshesNaiveCuda";
+  at::checkAllSameGPU(
+      c,
+      {face_verts_t,
+       mesh_to_faces_packed_first_idx_t,
+       num_faces_per_mesh_t,
+       clipped_faces_neighbor_idx_t});
+
+  // Set the device for the kernel launch based on the device of the input
+  at::cuda::CUDAGuard device_guard(face_verts.device());
+  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+
+  const int N = num_faces_per_mesh.size(0); // batch size.
+  const int H = std::get<0>(image_size);
+  const int W = std::get<1>(image_size);
+  const int K = num_closest;
+
+  auto long_opts = num_faces_per_mesh.options().dtype(at::kLong);
+  auto float_opts = face_verts.options().dtype(at::kFloat);
+
+  at::Tensor face_idxs = at::full({N, H, W, K}, -1, long_opts);
+  at::Tensor zbuf = at::full({N, H, W, K}, -1, float_opts);
+  at::Tensor pix_dists = at::full({N, H, W, K}, -1, float_opts);
+  at::Tensor bary = at::full({N, H, W, K, 3}, -1, float_opts);
+
+  if (face_idxs.numel() == 0) {
+    AT_CUDA_CHECK(cudaGetLastError());
+    return std::make_tuple(face_idxs, zbuf, bary, pix_dists);
+  }
+
+  const size_t blocks = 1024;
+  const size_t threads = 64;
+
+  RasterizeMeshesNaiveCudaKernel<<<blocks, threads, 0, stream>>>(
+      face_verts.contiguous().data_ptr<float>(),
+      mesh_to_faces_packed_first_idx.contiguous().data_ptr<int64_t>(),
+      num_faces_per_mesh.contiguous().data_ptr<int64_t>(),
+      clipped_faces_neighbor_idx.contiguous().data_ptr<int64_t>(),
+      blur_radius,
+      perspective_correct,
+      clip_barycentric_coords,
+      cull_backfaces,
+      N,
+      H,
+      W,
+      K,
+      face_idxs.data_ptr<int64_t>(),
+      zbuf.data_ptr<float>(),
+      pix_dists.data_ptr<float>(),
+      bary.data_ptr<float>());
+
+  AT_CUDA_CHECK(cudaGetLastError());
+  return std::make_tuple(face_idxs, zbuf, bary, pix_dists);
+}
+
+// ****************************************************************************
+// *                            BACKWARD PASS                                 *
+// ****************************************************************************
+// TODO: benchmark parallelizing over faces_verts instead of over pixels.
+__global__ void RasterizeMeshesBackwardCudaKernel(
+    const float* face_verts, // (F, 3, 3)
+    const int64_t* pix_to_face, // (N, H, W, K)
+    const bool perspective_correct,
+    const bool clip_barycentric_coords,
+    const int N,
+    const int H,
+    const int W,
+    const int K,
+    const float* grad_zbuf, // (N, H, W, K)
+    const float* grad_bary, // (N, H, W, K, 3)
+    const float* grad_dists, // (N, H, W, K)
+    float* grad_face_verts) { // (F, 3, 3)
+
+  // Parallelize over each pixel in images of
+  // size H * W, for each image in the batch of size N.
+  const int num_threads = gridDim.x * blockDim.x;
+  const int tid = blockIdx.x * blockDim.x + threadIdx.x;
+
+  for (int t_i = tid; t_i < N * H * W; t_i += num_threads) {
+    // Convert linear index to 3D index
+    const int n = t_i / (H * W); // batch index.
+    const int pix_idx = t_i % (H * W);
+
+    // Reverse ordering of X and Y axes.
+    const int yi = H - 1 - pix_idx / W;
+    const int xi = W - 1 - pix_idx % W;
+
+    const float xf = PixToNonSquareNdc(xi, W, H);
+    const float yf = PixToNonSquareNdc(yi, H, W);
+    const float2 pxy = make_float2(xf, yf);
+
+    // Loop over all the faces for this pixel.
+    for (int k = 0; k < K; k++) {
+      // Index into (N, H, W, K, :) grad tensors
+      // pixel index + top k index
+      int i = n * H * W * K + pix_idx * K + k;
+
+      const int f = pix_to_face[i];
+      if (f < 0) {
+        continue; // padded face.
+      }
+      // Get xyz coordinates of the three face vertices.
+      const auto v012 = GetSingleFaceVerts(face_verts, f);
+      const float3 v0 = thrust::get<0>(v012);
+      const float3 v1 = thrust::get<1>(v012);
+      const float3 v2 = thrust::get<2>(v012);
+
+      // Only neex xy for barycentric coordinate and distance calculations.
+      const float2 v0xy = make_float2(v0.x, v0.y);
+      const float2 v1xy = make_float2(v1.x, v1.y);
+      const float2 v2xy = make_float2(v2.x, v2.y);
+
+      // Get upstream gradients for the face.
+      const float grad_dist_upstream = grad_dists[i];
+      const float grad_zbuf_upstream = grad_zbuf[i];
+      const float grad_bary_upstream_w0 = grad_bary[i * 3 + 0];
+      const float grad_bary_upstream_w1 = grad_bary[i * 3 + 1];
+      const float grad_bary_upstream_w2 = grad_bary[i * 3 + 2];
+      const float3 grad_bary_upstream = make_float3(
+          grad_bary_upstream_w0, grad_bary_upstream_w1, grad_bary_upstream_w2);
+
+      const float3 b_w = BarycentricCoordsForward(pxy, v0xy, v1xy, v2xy);
+      const float3 b_pp = !perspective_correct
+          ? b_w
+          : BarycentricPerspectiveCorrectionForward(b_w, v0.z, v1.z, v2.z);
+
+      const float3 b_w_clip =
+          !clip_barycentric_coords ? b_pp : BarycentricClipForward(b_pp);
+
+      const bool inside = b_pp.x > 0.0f && b_pp.y > 0.0f && b_pp.z > 0.0f;
+      const float sign = inside ? -1.0f : 1.0f;
+
+      auto grad_dist_f = PointTriangleDistanceBackward(
+          pxy, v0xy, v1xy, v2xy, sign * grad_dist_upstream);
+      const float2 ddist_d_v0 = thrust::get<1>(grad_dist_f);
+      const float2 ddist_d_v1 = thrust::get<2>(grad_dist_f);
+      const float2 ddist_d_v2 = thrust::get<3>(grad_dist_f);
+
+      // Upstream gradient for barycentric coords from zbuf calculation:
+      // zbuf = bary_w0 * z0 + bary_w1 * z1 + bary_w2 * z2
+      // Therefore
+      // d_zbuf/d_bary_w0 = z0
+      // d_zbuf/d_bary_w1 = z1
+      // d_zbuf/d_bary_w2 = z2
+      const float3 d_zbuf_d_bwclip = make_float3(v0.z, v1.z, v2.z);
+
+      // Total upstream barycentric gradients are the sum of
+      // external upstream gradients and contribution from zbuf.
+      const float3 grad_bary_f_sum =
+          (grad_bary_upstream + grad_zbuf_upstream * d_zbuf_d_bwclip);
+
+      float3 grad_bary0 = grad_bary_f_sum;
+
+      if (clip_barycentric_coords) {
+        grad_bary0 = BarycentricClipBackward(b_w, grad_bary_f_sum);
+      }
+
+      float dz0_persp = 0.0f, dz1_persp = 0.0f, dz2_persp = 0.0f;
+      if (perspective_correct) {
+        auto perspective_grads = BarycentricPerspectiveCorrectionBackward(
+            b_w, v0.z, v1.z, v2.z, grad_bary0);
+        grad_bary0 = thrust::get<0>(perspective_grads);
+        dz0_persp = thrust::get<1>(perspective_grads);
+        dz1_persp = thrust::get<2>(perspective_grads);
+        dz2_persp = thrust::get<3>(perspective_grads);
+      }
+
+      auto grad_bary_f =
+          BarycentricCoordsBackward(pxy, v0xy, v1xy, v2xy, grad_bary0);
+      const float2 dbary_d_v0 = thrust::get<1>(grad_bary_f);
+      const float2 dbary_d_v1 = thrust::get<2>(grad_bary_f);
+      const float2 dbary_d_v2 = thrust::get<3>(grad_bary_f);
+
+      atomicAdd(grad_face_verts + f * 9 + 0, dbary_d_v0.x + ddist_d_v0.x);
+      atomicAdd(grad_face_verts + f * 9 + 1, dbary_d_v0.y + ddist_d_v0.y);
+      atomicAdd(
+          grad_face_verts + f * 9 + 2,
+          grad_zbuf_upstream * b_w_clip.x + dz0_persp);
+      atomicAdd(grad_face_verts + f * 9 + 3, dbary_d_v1.x + ddist_d_v1.x);
+      atomicAdd(grad_face_verts + f * 9 + 4, dbary_d_v1.y + ddist_d_v1.y);
+      atomicAdd(
+          grad_face_verts + f * 9 + 5,
+          grad_zbuf_upstream * b_w_clip.y + dz1_persp);
+      atomicAdd(grad_face_verts + f * 9 + 6, dbary_d_v2.x + ddist_d_v2.x);
+      atomicAdd(grad_face_verts + f * 9 + 7, dbary_d_v2.y + ddist_d_v2.y);
+      atomicAdd(
+          grad_face_verts + f * 9 + 8,
+          grad_zbuf_upstream * b_w_clip.z + dz2_persp);
+    }
+  }
+}
+
+at::Tensor RasterizeMeshesBackwardCuda(
+    const at::Tensor& face_verts, // (F, 3, 3)
+    const at::Tensor& pix_to_face, // (N, H, W, K)
+    const at::Tensor& grad_zbuf, // (N, H, W, K)
+    const at::Tensor& grad_bary, // (N, H, W, K, 3)
+    const at::Tensor& grad_dists, // (N, H, W, K)
+    const bool perspective_correct,
+    const bool clip_barycentric_coords) {
+  // Check inputs are on the same device
+  at::TensorArg face_verts_t{face_verts, "face_verts", 1},
+      pix_to_face_t{pix_to_face, "pix_to_face", 2},
+      grad_zbuf_t{grad_zbuf, "grad_zbuf", 3},
+      grad_bary_t{grad_bary, "grad_bary", 4},
+      grad_dists_t{grad_dists, "grad_dists", 5};
+  at::CheckedFrom c = "RasterizeMeshesBackwardCuda";
+  at::checkAllSameGPU(
+      c, {face_verts_t, pix_to_face_t, grad_zbuf_t, grad_bary_t, grad_dists_t});
+  at::checkAllSameType(
+      c, {face_verts_t, grad_zbuf_t, grad_bary_t, grad_dists_t});
+
+  // This is nondeterministic because atomicAdd
+  at::globalContext().alertNotDeterministic("RasterizeMeshesBackwardCuda");
+
+  // Set the device for the kernel launch based on the device of the input
+  at::cuda::CUDAGuard device_guard(face_verts.device());
+  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+
+  const int F = face_verts.size(0);
+  const int N = pix_to_face.size(0);
+  const int H = pix_to_face.size(1);
+  const int W = pix_to_face.size(2);
+  const int K = pix_to_face.size(3);
+
+  at::Tensor grad_face_verts = at::zeros({F, 3, 3}, face_verts.options());
+
+  if (grad_face_verts.numel() == 0) {
+    AT_CUDA_CHECK(cudaGetLastError());
+    return grad_face_verts;
+  }
+
+  const size_t blocks = 1024;
+  const size_t threads = 64;
+
+  RasterizeMeshesBackwardCudaKernel<<<blocks, threads, 0, stream>>>(
+      face_verts.contiguous().data_ptr<float>(),
+      pix_to_face.contiguous().data_ptr<int64_t>(),
+      perspective_correct,
+      clip_barycentric_coords,
+      N,
+      H,
+      W,
+      K,
+      grad_zbuf.contiguous().data_ptr<float>(),
+      grad_bary.contiguous().data_ptr<float>(),
+      grad_dists.contiguous().data_ptr<float>(),
+      grad_face_verts.data_ptr<float>());
+
+  AT_CUDA_CHECK(cudaGetLastError());
+  return grad_face_verts;
+}
+
+// ****************************************************************************
+// *                            FINE RASTERIZATION                            *
+// ****************************************************************************
+__global__ void RasterizeMeshesFineCudaKernel(
+    const float* face_verts, // (F, 3, 3)
+    const int32_t* bin_faces, // (N, BH, BW, T)
+    const int64_t* clipped_faces_neighbor_idx, // (F,)
+    const float blur_radius,
+    const int bin_size,
+    const bool perspective_correct,
+    const bool clip_barycentric_coords,
+    const bool cull_backfaces,
+    const int N,
+    const int BH,
+    const int BW,
+    const int M,
+    const int H,
+    const int W,
+    const int K,
+    int64_t* face_idxs, // (N, H, W, K)
+    float* zbuf, // (N, H, W, K)
+    float* pix_dists, // (N, H, W, K)
+    float* bary // (N, H, W, K, 3)
+) {
+  // This can be more than H * W if H or W are not divisible by bin_size.
+  int num_pixels = N * BH * BW * bin_size * bin_size;
+  int num_threads = gridDim.x * blockDim.x;
+  int tid = blockIdx.x * blockDim.x + threadIdx.x;
+
+  for (int pid = tid; pid < num_pixels; pid += num_threads) {
+    // Convert linear index into bin and pixel indices. We make the within
+    // block pixel ids move the fastest, so that adjacent threads will fall
+    // into the same bin; this should give them coalesced memory reads when
+    // they read from faces and bin_faces.
+    int i = pid;
+    const int n = i / (BH * BW * bin_size * bin_size);
+    i %= BH * BW * bin_size * bin_size;
+    // bin index y
+    const int by = i / (BW * bin_size * bin_size);
+    i %= BW * bin_size * bin_size;
+    // bin index y
+    const int bx = i / (bin_size * bin_size);
+    // pixel within the bin
+    i %= bin_size * bin_size;
+
+    // Pixel x, y indices
+    const int yi = i / bin_size + by * bin_size;
+    const int xi = i % bin_size + bx * bin_size;
+
+    if (yi >= H || xi >= W)
+      continue;
+
+    const float xf = PixToNonSquareNdc(xi, W, H);
+    const float yf = PixToNonSquareNdc(yi, H, W);
+
+    const float2 pxy = make_float2(xf, yf);
+
+    // This part looks like the naive rasterization kernel, except we use
+    // bin_faces to only look at a subset of faces already known to fall
+    // in this bin. TODO abstract out this logic into some data structure
+    // that is shared by both kernels?
+    Pixel q[kMaxPointsPerPixel];
+    int q_size = 0;
+    float q_max_z = -1000;
+    int q_max_idx = -1;
+
+    for (int m = 0; m < M; m++) {
+      const int f = bin_faces[n * BH * BW * M + by * BW * M + bx * M + m];
+      if (f < 0) {
+        continue; // bin_faces uses -1 as a sentinal value.
+      }
+      // Check if the pixel pxy is inside the face bounding box and if it is,
+      // update q, q_size, q_max_z and q_max_idx in place.
+      CheckPixelInsideFace(
+          face_verts,
+          clipped_faces_neighbor_idx,
+          f,
+          q_size,
+          q_max_z,
+          q_max_idx,
+          q,
+          blur_radius,
+          pxy,
+          K,
+          perspective_correct,
+          clip_barycentric_coords,
+          cull_backfaces);
+    }
+
+    // Now we've looked at all the faces for this bin, so we can write
+    // output for the current pixel.
+    // TODO: make sorting an option as only top k is needed, not sorted values.
+    BubbleSort(q, q_size);
+
+    // Reverse ordering of the X and Y axis so that
+    // in the image +Y is pointing up and +X is pointing left.
+    const int yidx = H - 1 - yi;
+    const int xidx = W - 1 - xi;
+
+    const int pix_idx = n * H * W * K + yidx * W * K + xidx * K;
+    for (int k = 0; k < q_size; k++) {
+      face_idxs[pix_idx + k] = q[k].idx;
+      zbuf[pix_idx + k] = q[k].z;
+      pix_dists[pix_idx + k] = q[k].dist;
+      bary[(pix_idx + k) * 3 + 0] = q[k].bary.x;
+      bary[(pix_idx + k) * 3 + 1] = q[k].bary.y;
+      bary[(pix_idx + k) * 3 + 2] = q[k].bary.z;
+    }
+  }
+}
+
+std::tuple<at::Tensor, at::Tensor, at::Tensor, at::Tensor>
+RasterizeMeshesFineCuda(
+    const at::Tensor& face_verts,
+    const at::Tensor& bin_faces,
+    const at::Tensor& clipped_faces_neighbor_idx,
+    const std::tuple<int, int> image_size,
+    const float blur_radius,
+    const int bin_size,
+    const int faces_per_pixel,
+    const bool perspective_correct,
+    const bool clip_barycentric_coords,
+    const bool cull_backfaces) {
+  TORCH_CHECK(
+      face_verts.ndimension() == 3 && face_verts.size(1) == 3 &&
+          face_verts.size(2) == 3,
+      "face_verts must have dimensions (num_faces, 3, 3)");
+  TORCH_CHECK(bin_faces.ndimension() == 4, "bin_faces must have 4 dimensions");
+  TORCH_CHECK(
+      clipped_faces_neighbor_idx.size(0) == face_verts.size(0),
+      "clipped_faces_neighbor_idx must have the same first dimension as face_verts");
+
+  // Check inputs are on the same device
+  at::TensorArg face_verts_t{face_verts, "face_verts", 1},
+      bin_faces_t{bin_faces, "bin_faces", 2},
+      clipped_faces_neighbor_idx_t{
+          clipped_faces_neighbor_idx, "clipped_faces_neighbor_idx", 3};
+  at::CheckedFrom c = "RasterizeMeshesFineCuda";
+  at::checkAllSameGPU(
+      c, {face_verts_t, bin_faces_t, clipped_faces_neighbor_idx_t});
+
+  // Set the device for the kernel launch based on the device of the input
+  at::cuda::CUDAGuard device_guard(face_verts.device());
+  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+
+  // bin_faces shape (N, BH, BW, M)
+  const int N = bin_faces.size(0);
+  const int BH = bin_faces.size(1);
+  const int BW = bin_faces.size(2);
+  const int M = bin_faces.size(3);
+  const int K = faces_per_pixel;
+
+  const int H = std::get<0>(image_size);
+  const int W = std::get<1>(image_size);
+
+  if (K > kMaxPointsPerPixel) {
+    AT_ERROR("Must have num_closest <= 150");
+  }
+  auto long_opts = bin_faces.options().dtype(at::kLong);
+  auto float_opts = face_verts.options().dtype(at::kFloat);
+
+  at::Tensor face_idxs = at::full({N, H, W, K}, -1, long_opts);
+  at::Tensor zbuf = at::full({N, H, W, K}, -1, float_opts);
+  at::Tensor pix_dists = at::full({N, H, W, K}, -1, float_opts);
+  at::Tensor bary = at::full({N, H, W, K, 3}, -1, float_opts);
+
+  if (face_idxs.numel() == 0) {
+    AT_CUDA_CHECK(cudaGetLastError());
+    return std::make_tuple(face_idxs, zbuf, bary, pix_dists);
+  }
+
+  const size_t blocks = 1024;
+  const size_t threads = 64;
+
+  RasterizeMeshesFineCudaKernel<<<blocks, threads, 0, stream>>>(
+      face_verts.contiguous().data_ptr<float>(),
+      bin_faces.contiguous().data_ptr<int32_t>(),
+      clipped_faces_neighbor_idx.contiguous().data_ptr<int64_t>(),
+      blur_radius,
+      bin_size,
+      perspective_correct,
+      clip_barycentric_coords,
+      cull_backfaces,
+      N,
+      BH,
+      BW,
+      M,
+      H,
+      W,
+      K,
+      face_idxs.data_ptr<int64_t>(),
+      zbuf.data_ptr<float>(),
+      pix_dists.data_ptr<float>(),
+      bary.data_ptr<float>());
+
+  return std::make_tuple(face_idxs, zbuf, bary, pix_dists);
+}
diff --git a/pytorch3d/pytorch3d/csrc/rasterize_meshes/rasterize_meshes.h b/pytorch3d/pytorch3d/csrc/rasterize_meshes/rasterize_meshes.h
new file mode 100644
index 0000000000000000000000000000000000000000..584aa0238ad86434567716d5c77f212a394b1d84
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/rasterize_meshes/rasterize_meshes.h
@@ -0,0 +1,549 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+#include <torch/extension.h>
+#include <cstdio>
+#include <tuple>
+#include "rasterize_coarse/rasterize_coarse.h"
+#include "utils/pytorch3d_cutils.h"
+
+// ****************************************************************************
+// *                            FORWARD PASS                                 *
+// ****************************************************************************
+
+std::tuple<torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor>
+RasterizeMeshesNaiveCpu(
+    const torch::Tensor& face_verts,
+    const torch::Tensor& mesh_to_face_first_idx,
+    const torch::Tensor& num_faces_per_mesh,
+    const torch::Tensor& clipped_faces_neighbor_idx,
+    const std::tuple<int, int> image_size,
+    const float blur_radius,
+    const int faces_per_pixel,
+    const bool perspective_correct,
+    const bool clip_barycentric_coords,
+    const bool cull_backfaces);
+
+#ifdef WITH_CUDA
+std::tuple<at::Tensor, at::Tensor, at::Tensor, at::Tensor>
+RasterizeMeshesNaiveCuda(
+    const at::Tensor& face_verts,
+    const at::Tensor& mesh_to_face_first_idx,
+    const at::Tensor& num_faces_per_mesh,
+    const torch::Tensor& clipped_faces_neighbor_idx,
+    const std::tuple<int, int> image_size,
+    const float blur_radius,
+    const int num_closest,
+    const bool perspective_correct,
+    const bool clip_barycentric_coords,
+    const bool cull_backfaces);
+#endif
+// Forward pass for rasterizing a batch of meshes.
+//
+// Args:
+//    face_verts: Tensor of shape (F, 3, 3) giving (packed) vertex positions for
+//                faces in all the meshes in the batch. Concretely,
+//                face_verts[f, i] = [x, y, z] gives the coordinates for the
+//                ith vertex of the fth face. These vertices are expected to be
+//                in NDC coordinates in the range [-1, 1].
+//    mesh_to_face_first_idx: LongTensor of shape (N) giving the index in
+//                            faces_verts of the first face in each mesh in
+//                            the batch where N is the batch size.
+//    num_faces_per_mesh: LongTensor of shape (N) giving the number of faces
+//                        for each mesh in the batch.
+//    clipped_faces_neighbor_idx: LongTensor of shape (F,) giving the
+//        index of the neighboring face for each face which was clipped to a
+//        quadrilateral and then divided into two triangles.
+//        e.g. for a face f partially behind the image plane which is split into
+//        two triangles (t1, t2): clipped_faces_neighbor_idx[t1_idx] = t2_idx
+//        Faces which are not clipped and subdivided are set to -1.
+//    image_size: Tuple (H, W) giving the size in pixels of the output
+//                image to be rasterized.
+//    blur_radius: float distance in NDC coordinates uses to expand the face
+//                 bounding boxes for the rasterization. Set to 0.0 if no blur
+//                 is required.
+//    faces_per_pixel: the number of closeset faces to rasterize per pixel.
+//    perspective_correct: Whether to apply perspective correction when
+//                         computing barycentric coordinates. If this is True,
+//                         then this function returns world-space barycentric
+//                         coordinates for each pixel; if this is False then
+//                         this function instead returns screen-space
+//                         barycentric coordinates for each pixel.
+//    clip_barycentric_coords: Whether, after any perspective correction
+//          is applied but before the depth is calculated (e.g. for
+//          z clipping), to "correct" a location outside the face (i.e. with
+//          a negative barycentric coordinate) to a position on the edge of the
+//          face.
+//    cull_backfaces: Bool, Whether to only rasterize mesh faces which are
+//                    visible to the camera.  This assumes that vertices of
+//                    front-facing triangles are ordered in an anti-clockwise
+//                    fashion, and triangles that face away from the camera are
+//                    in a clockwise order relative to the current view
+//                    direction. NOTE: This will only work if the mesh faces are
+//                    consistently defined with counter-clockwise ordering when
+//                    viewed from the outside.
+//
+// Returns:
+//    A 4 element tuple of:
+//    pix_to_face: int64 tensor of shape (N, H, W, K) giving the face index of
+//                 each of the closest faces to the pixel in the rasterized
+//                 image, or -1 for pixels that are not covered by any face.
+//    zbuf: float32 Tensor of shape (N, H, W, K) giving the depth of each of
+//          the closest faces for each pixel.
+//    barycentric_coords: float tensor of shape (N, H, W, K, 3) giving
+//                        barycentric coordinates of the pixel with respect to
+//                        each of the closest faces along the z axis, padded
+//                        with -1 for pixels hit by fewer than
+//                        faces_per_pixel faces.
+//    dists: float tensor of shape (N, H, W, K) giving the euclidean distance
+//           in the (NDC) x/y plane between each pixel and its K closest
+//           faces along the z axis padded  with -1 for pixels hit by fewer than
+//           faces_per_pixel faces.
+inline std::tuple<torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor>
+RasterizeMeshesNaive(
+    const torch::Tensor& face_verts,
+    const torch::Tensor& mesh_to_face_first_idx,
+    const torch::Tensor& num_faces_per_mesh,
+    const torch::Tensor& clipped_faces_neighbor_idx,
+    const std::tuple<int, int> image_size,
+    const float blur_radius,
+    const int faces_per_pixel,
+    const bool perspective_correct,
+    const bool clip_barycentric_coords,
+    const bool cull_backfaces) {
+  // TODO: Better type checking.
+  if (face_verts.is_cuda()) {
+#ifdef WITH_CUDA
+    CHECK_CUDA(face_verts);
+    CHECK_CUDA(mesh_to_face_first_idx);
+    CHECK_CUDA(num_faces_per_mesh);
+    return RasterizeMeshesNaiveCuda(
+        face_verts,
+        mesh_to_face_first_idx,
+        num_faces_per_mesh,
+        clipped_faces_neighbor_idx,
+        image_size,
+        blur_radius,
+        faces_per_pixel,
+        perspective_correct,
+        clip_barycentric_coords,
+        cull_backfaces);
+#else
+    AT_ERROR("Not compiled with GPU support");
+#endif
+  } else {
+    return RasterizeMeshesNaiveCpu(
+        face_verts,
+        mesh_to_face_first_idx,
+        num_faces_per_mesh,
+        clipped_faces_neighbor_idx,
+        image_size,
+        blur_radius,
+        faces_per_pixel,
+        perspective_correct,
+        clip_barycentric_coords,
+        cull_backfaces);
+  }
+}
+
+// ****************************************************************************
+// *                            BACKWARD PASS                                 *
+// ****************************************************************************
+
+torch::Tensor RasterizeMeshesBackwardCpu(
+    const torch::Tensor& face_verts,
+    const torch::Tensor& pix_to_face,
+    const torch::Tensor& grad_zbuf,
+    const torch::Tensor& grad_bary,
+    const torch::Tensor& grad_dists,
+    const bool perspective_correct,
+    const bool clip_barycentric_coords);
+
+#ifdef WITH_CUDA
+torch::Tensor RasterizeMeshesBackwardCuda(
+    const torch::Tensor& face_verts,
+    const torch::Tensor& pix_to_face,
+    const torch::Tensor& grad_zbuf,
+    const torch::Tensor& grad_bary,
+    const torch::Tensor& grad_dists,
+    const bool perspective_correct,
+    const bool clip_barycentric_coords);
+#endif
+
+// Args:
+//    face_verts: float32 Tensor of shape (F, 3, 3) (from forward pass) giving
+//                (packed) vertex positions for faces in all the meshes in
+//                 the batch.
+//    pix_to_face: int64 tensor of shape (N, H, W, K) giving the face index of
+//                 each of the closest faces to the pixel in the rasterized
+//                 image, or -1 for pixels that are not covered by any face.
+//    grad_zbuf: Tensor of shape (N, H, W, K) giving upstream gradients
+//               d(loss)/d(zbuf) of the zbuf tensor from the forward pass.
+//    grad_bary: Tensor of shape (N, H, W, K, 3) giving upstream gradients
+//               d(loss)/d(bary) of the barycentric_coords tensor returned by
+//               the forward pass.
+//    grad_dists: Tensor of shape (N, H, W, K) giving upstream gradients
+//                d(loss)/d(dists) of the dists tensor from the forward pass.
+//    perspective_correct: Whether to apply perspective correction when
+//                         computing barycentric coordinates. If this is True,
+//                         then this function returns world-space barycentric
+//                         coordinates for each pixel; if this is False then
+//                         this function instead returns screen-space
+//                         barycentric coordinates for each pixel.
+//    clip_barycentric_coords: Whether, after any perspective correction
+//          is applied but before the depth is calculated (e.g. for
+//          z clipping), to "correct" a location outside the face (i.e. with
+//          a negative barycentric coordinate) to a position on the edge of the
+//          face.
+//
+// Returns:
+//    grad_face_verts: float32 Tensor of shape (F, 3, 3) giving downstream
+//                     gradients for the face vertices.
+torch::Tensor RasterizeMeshesBackward(
+    const torch::Tensor& face_verts,
+    const torch::Tensor& pix_to_face,
+    const torch::Tensor& grad_zbuf,
+    const torch::Tensor& grad_bary,
+    const torch::Tensor& grad_dists,
+    const bool perspective_correct,
+    const bool clip_barycentric_coords) {
+  if (face_verts.is_cuda()) {
+#ifdef WITH_CUDA
+    CHECK_CUDA(face_verts);
+    CHECK_CUDA(pix_to_face);
+    CHECK_CUDA(grad_zbuf);
+    CHECK_CUDA(grad_bary);
+    CHECK_CUDA(grad_dists);
+    return RasterizeMeshesBackwardCuda(
+        face_verts,
+        pix_to_face,
+        grad_zbuf,
+        grad_bary,
+        grad_dists,
+        perspective_correct,
+        clip_barycentric_coords);
+#else
+    AT_ERROR("Not compiled with GPU support");
+#endif
+  } else {
+    return RasterizeMeshesBackwardCpu(
+        face_verts,
+        pix_to_face,
+        grad_zbuf,
+        grad_bary,
+        grad_dists,
+        perspective_correct,
+        clip_barycentric_coords);
+  }
+}
+
+// ****************************************************************************
+// *                          COARSE RASTERIZATION                            *
+// ****************************************************************************
+
+// RasterizeMeshesCoarseCuda in rasterize_coarse/rasterize_coarse.h
+
+torch::Tensor RasterizeMeshesCoarseCpu(
+    const torch::Tensor& face_verts,
+    const at::Tensor& mesh_to_face_first_idx,
+    const at::Tensor& num_faces_per_mesh,
+    const std::tuple<int, int> image_size,
+    const float blur_radius,
+    const int bin_size,
+    const int max_faces_per_bin);
+
+// Args:
+//    face_verts: Tensor of shape (F, 3, 3) giving (packed) vertex positions for
+//                faces in all the meshes in the batch. Concretely,
+//                face_verts[f, i] = [x, y, z] gives the coordinates for the
+//                ith vertex of the fth face. These vertices are expected to be
+//                in NDC coordinates in the range [-1, 1].
+//    mesh_to_face_first_idx: LongTensor of shape (N) giving the index in
+//                            faces_verts of the first face in each mesh in
+//                            the batch where N is the batch size.
+//    num_faces_per_mesh: LongTensor of shape (N) giving the number of faces
+//                        for each mesh in the batch.
+//    image_size: Tuple (H, W) giving the size in pixels of the output
+//                image to be rasterized.
+//    blur_radius: float distance in NDC coordinates uses to expand the face
+//                 bounding boxes for the rasterization. Set to 0.0 if no blur
+//                 is required.
+//    bin_size: Size of each bin within the image (in pixels)
+//    max_faces_per_bin: Maximum number of faces to count in each bin.
+//
+// Returns:
+//   bin_face_idxs: Tensor of shape (N, num_bins, num_bins, K) giving the
+//                  indices of faces that fall into each bin.
+
+torch::Tensor RasterizeMeshesCoarse(
+    const torch::Tensor& face_verts,
+    const torch::Tensor& mesh_to_face_first_idx,
+    const torch::Tensor& num_faces_per_mesh,
+    const std::tuple<int, int> image_size,
+    const float blur_radius,
+    const int bin_size,
+    const int max_faces_per_bin) {
+  if (face_verts.is_cuda()) {
+#ifdef WITH_CUDA
+    CHECK_CUDA(face_verts);
+    CHECK_CUDA(mesh_to_face_first_idx);
+    CHECK_CUDA(num_faces_per_mesh);
+    return RasterizeMeshesCoarseCuda(
+        face_verts,
+        mesh_to_face_first_idx,
+        num_faces_per_mesh,
+        image_size,
+        blur_radius,
+        bin_size,
+        max_faces_per_bin);
+#else
+    AT_ERROR("Not compiled with GPU support");
+#endif
+  } else {
+    return RasterizeMeshesCoarseCpu(
+        face_verts,
+        mesh_to_face_first_idx,
+        num_faces_per_mesh,
+        image_size,
+        blur_radius,
+        bin_size,
+        max_faces_per_bin);
+  }
+}
+
+// ****************************************************************************
+// *                            FINE RASTERIZATION                            *
+// ****************************************************************************
+
+#ifdef WITH_CUDA
+std::tuple<torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor>
+RasterizeMeshesFineCuda(
+    const torch::Tensor& face_verts,
+    const torch::Tensor& bin_faces,
+    const torch::Tensor& clipped_faces_neighbor_idx,
+    const std::tuple<int, int> image_size,
+    const float blur_radius,
+    const int bin_size,
+    const int faces_per_pixel,
+    const bool perspective_correct,
+    const bool clip_barycentric_coords,
+    const bool cull_backfaces);
+#endif
+// Args:
+//    face_verts: Tensor of shape (F, 3, 3) giving (packed) vertex positions for
+//                faces in all the meshes in the batch. Concretely,
+//                face_verts[f, i] = [x, y, z] gives the coordinates for the
+//                ith vertex of the fth face. These vertices are expected to be
+//                in NDC coordinates in the range [-1, 1].
+//    bin_faces: int32 Tensor of shape (N, B, B, M) giving the indices of faces
+//               that fall into each bin (output from coarse rasterization).
+//    clipped_faces_neighbor_idx: LongTensor of shape (F,) giving the
+//        index of the neighboring face for each face which was clipped to a
+//        quadrilateral and then divided into two triangles.
+//        e.g. for a face f partially behind the image plane which is split into
+//        two triangles (t1, t2): clipped_faces_neighbor_idx[t1_idx] = t2_idx
+//        Faces which are not clipped and subdivided are set to -1.
+//    image_size: Tuple (H, W) giving the size in pixels of the output
+//                image to be rasterized.
+//    blur_radius: float distance in NDC coordinates uses to expand the face
+//                 bounding boxes for the rasterization. Set to 0.0 if no blur
+//                 is required.
+//    bin_size: Size of each bin within the image (in pixels)
+//    faces_per_pixel: the number of closeset faces to rasterize per pixel.
+//    perspective_correct: Whether to apply perspective correction when
+//                         computing barycentric coordinates. If this is True,
+//                         then this function returns world-space barycentric
+//                         coordinates for each pixel; if this is False then
+//                         this function instead returns screen-space
+//                         barycentric coordinates for each pixel.
+//    clip_barycentric_coords: Whether, after any perspective correction
+//          is applied but before the depth is calculated (e.g. for
+//          z clipping), to "correct" a location outside the face (i.e. with
+//          a negative barycentric coordinate) to a position on the edge of the
+//          face.
+//    cull_backfaces: Bool, Whether to only rasterize mesh faces which are
+//                    visible to the camera.  This assumes that vertices of
+//                    front-facing triangles are ordered in an anti-clockwise
+//                    fashion, and triangles that face away from the camera are
+//                    in a clockwise order relative to the current view
+//                    direction. NOTE: This will only work if the mesh faces are
+//                    consistently defined with counter-clockwise ordering when
+//                    viewed from the outside.
+//
+// Returns (same as rasterize_meshes):
+//    A 4 element tuple of:
+//    pix_to_face: int64 tensor of shape (N, H, W, K) giving the face index of
+//                 each of the closest faces to the pixel in the rasterized
+//                 image, or -1 for pixels that are not covered by any face.
+//    zbuf: float32 Tensor of shape (N, H, W, K) giving the depth of each of
+//          the closest faces for each pixel.
+//    barycentric_coords: float tensor of shape (N, H, W, K, 3) giving
+//                        barycentric coordinates of the pixel with respect to
+//                        each of the closest faces along the z axis, padded
+//                        with -1 for pixels hit by fewer than
+//                        faces_per_pixel faces.
+//    dists: float tensor of shape (N, H, W, K) giving the euclidean distance
+//           in the (NDC) x/y plane between each pixel and its K closest
+//           faces along the z axis padded  with -1 for pixels hit by fewer than
+//           faces_per_pixel faces.
+std::tuple<torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor>
+RasterizeMeshesFine(
+    const torch::Tensor& face_verts,
+    const torch::Tensor& bin_faces,
+    const torch::Tensor& clipped_faces_neighbor_idx,
+    const std::tuple<int, int> image_size,
+    const float blur_radius,
+    const int bin_size,
+    const int faces_per_pixel,
+    const bool perspective_correct,
+    const bool clip_barycentric_coords,
+    const bool cull_backfaces) {
+  if (face_verts.is_cuda()) {
+#ifdef WITH_CUDA
+    CHECK_CUDA(face_verts);
+    CHECK_CUDA(bin_faces);
+    return RasterizeMeshesFineCuda(
+        face_verts,
+        bin_faces,
+        clipped_faces_neighbor_idx,
+        image_size,
+        blur_radius,
+        bin_size,
+        faces_per_pixel,
+        perspective_correct,
+        clip_barycentric_coords,
+        cull_backfaces);
+#else
+    AT_ERROR("Not compiled with GPU support");
+#endif
+  } else {
+    AT_ERROR("NOT IMPLEMENTED");
+  }
+}
+
+// ****************************************************************************
+// *                         MAIN ENTRY POINT                                 *
+// ****************************************************************************
+
+// This is the main entry point for the forward pass of the mesh rasterizer;
+// it uses either naive or coarse-to-fine rasterization based on bin_size.
+//
+// Args:
+//    face_verts: Tensor of shape (F, 3, 3) giving (packed) vertex positions for
+//                faces in all the meshes in the batch. Concretely,
+//                face_verts[f, i] = [x, y, z] gives the coordinates for the
+//                ith vertex of the fth face. These vertices are expected to be
+//                in NDC coordinates in the range [-1, 1].
+//    mesh_to_face_first_idx: LongTensor of shape (N) giving the index in
+//                            faces_verts of the first face in each mesh in
+//                            the batch where N is the batch size.
+//    num_faces_per_mesh: LongTensor of shape (N) giving the number of faces
+//                        for each mesh in the batch.
+//    clipped_faces_neighbor_idx: LongTensor of shape (F,) giving the
+//        index of the neighboring face for each face which was clipped to a
+//        quadrilateral and then divided into two triangles.
+//        e.g. for a face f partially behind the image plane which is split into
+//        two triangles (t1, t2): clipped_faces_neighbor_idx[t1_idx] = t2_idx
+//        Faces which are not clipped and subdivided are set to -1.
+//    image_size: Tuple (H, W) giving the size in pixels of the output
+//                image to be rasterized.
+//    blur_radius: float distance in NDC coordinates uses to expand the face
+//                 bounding boxes for the rasterization. Set to 0.0 if no blur
+//                 is required.
+//    faces_per_pixel: the number of closeset faces to rasterize per pixel.
+//    bin_size: Bin size (in pixels) for coarse-to-fine rasterization. Setting
+//              bin_size=0 uses naive rasterization instead.
+//    max_faces_per_bin: The maximum number of faces allowed to fall into each
+//                      bin when using coarse-to-fine rasterization.
+//    perspective_correct: Whether to apply perspective correction when
+//                         computing barycentric coordinates. If this is True,
+//                         then this function returns world-space barycentric
+//                         coordinates for each pixel; if this is False then
+//                         this function instead returns screen-space
+//                         barycentric coordinates for each pixel.
+//    clip_barycentric_coords: Whether, after any perspective correction
+//          is applied but before the depth is calculated (e.g. for
+//          z clipping), to "correct" a location outside the face (i.e. with
+//          a negative barycentric coordinate) to a position on the edge of the
+//          face.
+//    cull_backfaces: Bool, Whether to only rasterize mesh faces which are
+//                    visible to the camera.  This assumes that vertices of
+//                    front-facing triangles are ordered in an anti-clockwise
+//                    fashion, and triangles that face away from the camera are
+//                    in a clockwise order relative to the current view
+//                    direction. NOTE: This will only work if the mesh faces are
+//                    consistently defined with counter-clockwise ordering when
+//                    viewed from the outside.
+//
+// Returns:
+//    A 4 element tuple of:
+//    pix_to_face: int64 tensor of shape (N, H, W, K) giving the face index of
+//                 each of the closest faces to the pixel in the rasterized
+//                 image, or -1 for pixels that are not covered by any face.
+//    zbuf: float32 Tensor of shape (N, H, W, K) giving the depth of each of
+//          the closest faces for each pixel.
+//    barycentric_coords: float tensor of shape (N, H, W, K, 3) giving
+//                        barycentric coordinates of the pixel with respect to
+//                        each of the closest faces along the z axis, padded
+//                        with -1 for pixels hit by fewer than
+//                        faces_per_pixel faces.
+//    dists: float tensor of shape (N, H, W, K) giving the euclidean distance
+//           in the (NDC) x/y plane between each pixel and its K closest
+//           faces along the z axis padded  with -1 for pixels hit by fewer than
+//           faces_per_pixel faces.
+std::tuple<torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor>
+RasterizeMeshes(
+    const torch::Tensor& face_verts,
+    const torch::Tensor& mesh_to_face_first_idx,
+    const torch::Tensor& num_faces_per_mesh,
+    const torch::Tensor& clipped_faces_neighbor_idx,
+    const std::tuple<int, int> image_size,
+    const float blur_radius,
+    const int faces_per_pixel,
+    const int bin_size,
+    const int max_faces_per_bin,
+    const bool perspective_correct,
+    const bool clip_barycentric_coords,
+    const bool cull_backfaces) {
+  if (bin_size > 0 && max_faces_per_bin > 0) {
+    // Use coarse-to-fine rasterization
+    at::Tensor bin_faces = RasterizeMeshesCoarse(
+        face_verts,
+        mesh_to_face_first_idx,
+        num_faces_per_mesh,
+        image_size,
+        blur_radius,
+        bin_size,
+        max_faces_per_bin);
+    return RasterizeMeshesFine(
+        face_verts,
+        bin_faces,
+        clipped_faces_neighbor_idx,
+        image_size,
+        blur_radius,
+        bin_size,
+        faces_per_pixel,
+        perspective_correct,
+        clip_barycentric_coords,
+        cull_backfaces);
+  } else {
+    // Use the naive per-pixel implementation
+    return RasterizeMeshesNaive(
+        face_verts,
+        mesh_to_face_first_idx,
+        num_faces_per_mesh,
+        clipped_faces_neighbor_idx,
+        image_size,
+        blur_radius,
+        faces_per_pixel,
+        perspective_correct,
+        clip_barycentric_coords,
+        cull_backfaces);
+  }
+}
diff --git a/pytorch3d/pytorch3d/csrc/rasterize_meshes/rasterize_meshes_cpu.cpp b/pytorch3d/pytorch3d/csrc/rasterize_meshes/rasterize_meshes_cpu.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..210df55e43de5602c3c80b05e0ff8b9d7e59253a
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/rasterize_meshes/rasterize_meshes_cpu.cpp
@@ -0,0 +1,640 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <torch/extension.h>
+#include <algorithm>
+#include <list>
+#include <queue>
+#include <thread>
+#include <tuple>
+#include "ATen/core/TensorAccessor.h"
+#include "rasterize_points/rasterization_utils.h"
+#include "utils/geometry_utils.h"
+#include "utils/vec2.h"
+#include "utils/vec3.h"
+
+// Get (x, y, z) values for vertex from (3, 3) tensor face.
+template <typename Face>
+auto ExtractVerts(const Face& face, const int vertex_index) {
+  return std::make_tuple(
+      face[vertex_index][0], face[vertex_index][1], face[vertex_index][2]);
+}
+
+// Compute min/max x/y for each face.
+auto ComputeFaceBoundingBoxes(const torch::Tensor& face_verts) {
+  const int total_F = face_verts.size(0);
+  auto float_opts = face_verts.options().dtype(torch::kFloat32);
+  auto face_verts_a = face_verts.accessor<float, 3>();
+  torch::Tensor face_bboxes = torch::full({total_F, 6}, -2.0, float_opts);
+
+  // Loop through all the faces
+  for (int f = 0; f < total_F; ++f) {
+    const auto& face = face_verts_a[f];
+    float x0, x1, x2, y0, y1, y2, z0, z1, z2;
+    std::tie(x0, y0, z0) = ExtractVerts(face, 0);
+    std::tie(x1, y1, z1) = ExtractVerts(face, 1);
+    std::tie(x2, y2, z2) = ExtractVerts(face, 2);
+
+    const float x_min = std::min(x0, std::min(x1, x2));
+    const float y_min = std::min(y0, std::min(y1, y2));
+    const float x_max = std::max(x0, std::max(x1, x2));
+    const float y_max = std::max(y0, std::max(y1, y2));
+    const float z_min = std::min(z0, std::min(z1, z2));
+    const float z_max = std::max(z0, std::max(z1, z2));
+
+    face_bboxes[f][0] = x_min;
+    face_bboxes[f][1] = y_min;
+    face_bboxes[f][2] = x_max;
+    face_bboxes[f][3] = y_max;
+    face_bboxes[f][4] = z_min;
+    face_bboxes[f][5] = z_max;
+  }
+
+  return face_bboxes;
+}
+
+// Check if the point (px, py) lies inside the face bounding box face_bbox.
+// Return true if the point is outside.
+template <typename Face>
+bool CheckPointOutsideBoundingBox(
+    const Face& face_bbox,
+    float blur_radius,
+    float px,
+    float py) {
+  // Read triangle bbox coordinates and expand by blur radius.
+  float x_min = face_bbox[0] - blur_radius;
+  float y_min = face_bbox[1] - blur_radius;
+  float x_max = face_bbox[2] + blur_radius;
+  float y_max = face_bbox[3] + blur_radius;
+
+  // Faces with at least one vertex behind the camera won't render correctly
+  // and should be removed or clipped before calling the rasterizer
+  const bool z_invalid = face_bbox[4] < kEpsilon;
+
+  // Check if the current point is within the triangle bounding box.
+  return (px > x_max || px < x_min || py > y_max || py < y_min || z_invalid);
+}
+
+// Calculate areas of all faces. Returns a tensor of shape (total_faces, 1)
+// where faces with zero area have value -1.
+auto ComputeFaceAreas(const torch::Tensor& face_verts) {
+  const int total_F = face_verts.size(0);
+  auto float_opts = face_verts.options().dtype(torch::kFloat32);
+  auto face_verts_a = face_verts.accessor<float, 3>();
+  torch::Tensor face_areas = torch::full({total_F}, -1, float_opts);
+
+  // Loop through all the faces
+  for (int f = 0; f < total_F; ++f) {
+    const auto& face = face_verts_a[f];
+    float x0, x1, x2, y0, y1, y2, z0, z1, z2;
+    std::tie(x0, y0, z0) = ExtractVerts(face, 0);
+    std::tie(x1, y1, z1) = ExtractVerts(face, 1);
+    std::tie(x2, y2, z2) = ExtractVerts(face, 2);
+
+    const vec2<float> v0(x0, y0);
+    const vec2<float> v1(x1, y1);
+    const vec2<float> v2(x2, y2);
+
+    const float face_area = EdgeFunctionForward(v0, v1, v2);
+    face_areas[f] = face_area;
+  }
+
+  return face_areas;
+}
+
+// Helper function to use with std::find_if to find the index of any
+// values in the top k struct which match a given idx.
+struct IsNeighbor {
+  IsNeighbor(int neighbor_idx) {
+    this->neighbor_idx = neighbor_idx;
+  }
+  bool operator()(std::tuple<float, int, float, float, float, float> elem) {
+    return (std::get<1>(elem) == neighbor_idx);
+  }
+  int neighbor_idx;
+};
+
+namespace {
+void RasterizeMeshesNaiveCpu_worker(
+    const int start_yi,
+    const int end_yi,
+    const torch::Tensor& mesh_to_face_first_idx,
+    const torch::Tensor& num_faces_per_mesh,
+    const float blur_radius,
+    const bool perspective_correct,
+    const bool clip_barycentric_coords,
+    const bool cull_backfaces,
+    const int32_t N,
+    const int H,
+    const int W,
+    const int K,
+    at::TensorAccessor<float, 3>& face_verts_a,
+    at::TensorAccessor<float, 1>& face_areas_a,
+    at::TensorAccessor<float, 2>& face_bboxes_a,
+    at::TensorAccessor<int64_t, 1>& neighbor_idx_a,
+    at::TensorAccessor<float, 4>& zbuf_a,
+    at::TensorAccessor<int64_t, 4>& face_idxs_a,
+    at::TensorAccessor<float, 4>& pix_dists_a,
+    at::TensorAccessor<float, 5>& barycentric_coords_a) {
+  for (int n = 0; n < N; ++n) {
+    // Loop through each mesh in the batch.
+    // Get the start index of the faces in faces_packed and the num faces
+    // in the mesh to avoid having to loop through all the faces.
+    const int face_start_idx = mesh_to_face_first_idx[n].item().to<int32_t>();
+    const int face_stop_idx =
+        (face_start_idx + num_faces_per_mesh[n].item().to<int32_t>());
+
+    // Iterate through the horizontal lines of the image from top to bottom.
+    for (int yi = start_yi; yi < end_yi; ++yi) {
+      // Reverse the order of yi so that +Y is pointing upwards in the image.
+      const int yidx = H - 1 - yi;
+
+      // Y coordinate of the top of the pixel.
+      const float yf = PixToNonSquareNdc(yidx, H, W);
+      // Iterate through pixels on this horizontal line, left to right.
+      for (int xi = 0; xi < W; ++xi) {
+        // Reverse the order of xi so that +X is pointing to the left in the
+        // image.
+        const int xidx = W - 1 - xi;
+
+        // X coordinate of the left of the pixel.
+        const float xf = PixToNonSquareNdc(xidx, W, H);
+
+        // Use a deque to hold values:
+        // (z, idx, r, bary.x, bary.y. bary.z)
+        // Sort the deque as needed to mimic a priority queue.
+        std::deque<std::tuple<float, int, float, float, float, float>> q;
+
+        // Loop through the faces in the mesh.
+        for (int f = face_start_idx; f < face_stop_idx; ++f) {
+          // Get coordinates of three face vertices.
+          const auto& face = face_verts_a[f];
+          float x0, x1, x2, y0, y1, y2, z0, z1, z2;
+          std::tie(x0, y0, z0) = ExtractVerts(face, 0);
+          std::tie(x1, y1, z1) = ExtractVerts(face, 1);
+          std::tie(x2, y2, z2) = ExtractVerts(face, 2);
+
+          const vec2<float> v0(x0, y0);
+          const vec2<float> v1(x1, y1);
+          const vec2<float> v2(x2, y2);
+
+          const float face_area = face_areas_a[f];
+          const bool back_face = face_area < 0.0;
+          // Check if the face is visible to the camera.
+          if (cull_backfaces && back_face) {
+            continue;
+          }
+          // Skip faces with zero area.
+          if (face_area <= kEpsilon && face_area >= -1.0f * kEpsilon) {
+            continue;
+          }
+
+          // Skip if point is outside the face bounding box.
+          const auto face_bbox = face_bboxes_a[f];
+          const bool outside_bbox = CheckPointOutsideBoundingBox(
+              face_bbox, std::sqrt(blur_radius), xf, yf);
+          if (outside_bbox) {
+            continue;
+          }
+
+          // Compute barycentric coordinates and use this to get the
+          // depth of the point on the triangle.
+          const vec2<float> pxy(xf, yf);
+          const vec3<float> bary0 =
+              BarycentricCoordinatesForward(pxy, v0, v1, v2);
+          const vec3<float> bary = !perspective_correct
+              ? bary0
+              : BarycentricPerspectiveCorrectionForward(bary0, z0, z1, z2);
+
+          const vec3<float> bary_clip =
+              !clip_barycentric_coords ? bary : BarycentricClipForward(bary);
+
+          // Use barycentric coordinates to get the depth of the current pixel
+          const float pz =
+              (bary_clip.x * z0 + bary_clip.y * z1 + bary_clip.z * z2);
+
+          if (pz < 0) {
+            continue; // Point is behind the image plane so ignore.
+          }
+
+          // Compute squared distance of the point to the triangle.
+          const float dist = PointTriangleDistanceForward(pxy, v0, v1, v2);
+
+          // Use the bary coordinates to determine if the point is
+          // inside the face.
+          const bool inside = bary.x > 0.0f && bary.y > 0.0f && bary.z > 0.0f;
+
+          // If the point is inside the triangle then signed_dist
+          // is negative.
+          const float signed_dist = inside ? -dist : dist;
+
+          // Check if pixel is outside blur region
+          if (!inside && dist >= blur_radius) {
+            continue;
+          }
+
+          // Handle the case where a face (f) partially behind the image plane
+          // is clipped to a quadrilateral and then split into two faces (t1,
+          // t2). In this case we:
+          // 1. Find the index of the neighbor (e.g. for t1 need index of t2)
+          // 2. Check if the neighbor (t2) is already in the top K faces
+          // 3. If yes, compare the distance of the pixel to t1 with the
+          // distance to t2.
+          // 4. If dist_t1 < dist_t2, overwrite the values for t2 in the top K
+          // faces.
+          const int neighbor_idx = neighbor_idx_a[f];
+          int idx_top_k = -1;
+
+          // Check if neighboring face is already in the top K.
+          if (neighbor_idx != -1) {
+            const auto it =
+                std::find_if(q.begin(), q.end(), IsNeighbor(neighbor_idx));
+            // Get the index of the element from the iterator
+            idx_top_k = (it != q.end()) ? it - q.begin() : idx_top_k;
+          }
+
+          // If idx_top_k idx is not -1 then it is in the top K struct.
+          if (idx_top_k != -1) {
+            // If dist of current face is less than neighbor, overwrite
+            // the neighbor face values in the top K struct.
+            const auto neighbor = q[idx_top_k];
+            const float dist_neighbor = std::abs(std::get<2>(neighbor));
+            if (dist < dist_neighbor) {
+              // Overwrite the neighbor face values.
+              q[idx_top_k] = std::make_tuple(
+                  pz, f, signed_dist, bary_clip.x, bary_clip.y, bary_clip.z);
+            }
+          } else {
+            // Handle as a normal face.
+            // The current pixel lies inside the current face.
+            // Add at the end of the deque.
+            q.emplace_back(
+                pz, f, signed_dist, bary_clip.x, bary_clip.y, bary_clip.z);
+          }
+
+          // Sort the deque inplace based on the z distance
+          // to mimic using a priority queue.
+          std::sort(q.begin(), q.end());
+          if (static_cast<int>(q.size()) > K) {
+            // remove the last value
+            q.pop_back();
+          }
+        }
+        while (!q.empty()) {
+          // Loop through and add values to the output tensors
+          auto t = q.back();
+          q.pop_back();
+          const int i = q.size();
+          zbuf_a[n][yi][xi][i] = std::get<0>(t);
+          face_idxs_a[n][yi][xi][i] = std::get<1>(t);
+          pix_dists_a[n][yi][xi][i] = std::get<2>(t);
+          barycentric_coords_a[n][yi][xi][i][0] = std::get<3>(t);
+          barycentric_coords_a[n][yi][xi][i][1] = std::get<4>(t);
+          barycentric_coords_a[n][yi][xi][i][2] = std::get<5>(t);
+        }
+      }
+    }
+  }
+}
+} // namespace
+
+std::tuple<torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor>
+RasterizeMeshesNaiveCpu(
+    const torch::Tensor& face_verts,
+    const torch::Tensor& mesh_to_face_first_idx,
+    const torch::Tensor& num_faces_per_mesh,
+    const torch::Tensor& clipped_faces_neighbor_idx,
+    const std::tuple<int, int> image_size,
+    const float blur_radius,
+    const int faces_per_pixel,
+    const bool perspective_correct,
+    const bool clip_barycentric_coords,
+    const bool cull_backfaces) {
+  if (face_verts.ndimension() != 3 || face_verts.size(1) != 3 ||
+      face_verts.size(2) != 3) {
+    AT_ERROR("face_verts must have dimensions (num_faces, 3, 3)");
+  }
+  if (num_faces_per_mesh.size(0) != mesh_to_face_first_idx.size(0)) {
+    AT_ERROR(
+        "num_faces_per_mesh must have save size first dimension as mesh_to_face_first_idx");
+  }
+
+  const int32_t N = mesh_to_face_first_idx.size(0); // batch_size.
+  const int H = std::get<0>(image_size);
+  const int W = std::get<1>(image_size);
+  const int K = faces_per_pixel;
+
+  auto long_opts = num_faces_per_mesh.options().dtype(torch::kInt64);
+  auto float_opts = face_verts.options().dtype(torch::kFloat32);
+
+  // Initialize output tensors.
+  torch::Tensor face_idxs = torch::full({N, H, W, K}, -1, long_opts);
+  torch::Tensor zbuf = torch::full({N, H, W, K}, -1, float_opts);
+  torch::Tensor pix_dists = torch::full({N, H, W, K}, -1, float_opts);
+  torch::Tensor barycentric_coords =
+      torch::full({N, H, W, K, 3}, -1, float_opts);
+
+  auto face_verts_a = face_verts.accessor<float, 3>();
+  auto face_idxs_a = face_idxs.accessor<int64_t, 4>();
+  auto zbuf_a = zbuf.accessor<float, 4>();
+  auto pix_dists_a = pix_dists.accessor<float, 4>();
+  auto barycentric_coords_a = barycentric_coords.accessor<float, 5>();
+  auto neighbor_idx_a = clipped_faces_neighbor_idx.accessor<int64_t, 1>();
+
+  auto face_bboxes = ComputeFaceBoundingBoxes(face_verts);
+  auto face_bboxes_a = face_bboxes.accessor<float, 2>();
+  auto face_areas = ComputeFaceAreas(face_verts);
+  auto face_areas_a = face_areas.accessor<float, 1>();
+
+  const int64_t n_threads = at::get_num_threads();
+  std::vector<std::thread> threads;
+  threads.reserve(n_threads);
+  const int chunk_size = 1 + (H - 1) / n_threads;
+  int start_yi = 0;
+  for (int iThread = 0; iThread < n_threads; ++iThread) {
+    const int64_t end_yi = std::min(start_yi + chunk_size, H);
+    threads.emplace_back(
+        RasterizeMeshesNaiveCpu_worker,
+        start_yi,
+        end_yi,
+        mesh_to_face_first_idx,
+        num_faces_per_mesh,
+        blur_radius,
+        perspective_correct,
+        clip_barycentric_coords,
+        cull_backfaces,
+        N,
+        H,
+        W,
+        K,
+        std::ref(face_verts_a),
+        std::ref(face_areas_a),
+        std::ref(face_bboxes_a),
+        std::ref(neighbor_idx_a),
+        std::ref(zbuf_a),
+        std::ref(face_idxs_a),
+        std::ref(pix_dists_a),
+        std::ref(barycentric_coords_a));
+    start_yi += chunk_size;
+  }
+  for (auto&& thread : threads) {
+    thread.join();
+  }
+
+  return std::make_tuple(face_idxs, zbuf, barycentric_coords, pix_dists);
+}
+
+torch::Tensor RasterizeMeshesBackwardCpu(
+    const torch::Tensor& face_verts, // (F, 3, 3)
+    const torch::Tensor& pix_to_face, // (N, H, W, K)
+    const torch::Tensor& grad_zbuf, // (N, H, W, K)
+    const torch::Tensor& grad_bary, // (N, H, W, K, 3)
+    const torch::Tensor& grad_dists, // (N, H, W, K)
+    const bool perspective_correct,
+    const bool clip_barycentric_coords) {
+  const int F = face_verts.size(0);
+  const int N = pix_to_face.size(0);
+  const int H = pix_to_face.size(1);
+  const int W = pix_to_face.size(2);
+  const int K = pix_to_face.size(3);
+
+  torch::Tensor grad_face_verts = torch::zeros({F, 3, 3}, face_verts.options());
+  auto face_verts_a = face_verts.accessor<float, 3>();
+  auto pix_to_face_a = pix_to_face.accessor<int64_t, 4>();
+  auto grad_dists_a = grad_dists.accessor<float, 4>();
+  auto grad_zbuf_a = grad_zbuf.accessor<float, 4>();
+  auto grad_bary_a = grad_bary.accessor<float, 5>();
+
+  for (int n = 0; n < N; ++n) {
+    // Iterate through the horizontal lines of the image from top to bottom.
+    for (int y = 0; y < H; ++y) {
+      // Reverse the order of yi so that +Y is pointing upwards in the image.
+      const int yidx = H - 1 - y;
+
+      // Y coordinate of the top of the pixel.
+      const float yf = PixToNonSquareNdc(yidx, H, W);
+      // Iterate through pixels on this horizontal line, left to right.
+      for (int x = 0; x < W; ++x) {
+        // Reverse the order of xi so that +X is pointing to the left in the
+        // image.
+        const int xidx = W - 1 - x;
+
+        // X coordinate of the left of the pixel.
+        const float xf = PixToNonSquareNdc(xidx, W, H);
+        const vec2<float> pxy(xf, yf);
+
+        // Iterate through the faces that hit this pixel.
+        for (int k = 0; k < K; ++k) {
+          // Get face index from forward pass output.
+          const int f = pix_to_face_a[n][y][x][k];
+          if (f < 0) {
+            continue; // padded face.
+          }
+          // Get coordinates of the three face vertices.
+          const auto face_verts_f = face_verts_a[f];
+          const float x0 = face_verts_f[0][0];
+          const float y0 = face_verts_f[0][1];
+          const float z0 = face_verts_f[0][2];
+          const float x1 = face_verts_f[1][0];
+          const float y1 = face_verts_f[1][1];
+          const float z1 = face_verts_f[1][2];
+          const float x2 = face_verts_f[2][0];
+          const float y2 = face_verts_f[2][1];
+          const float z2 = face_verts_f[2][2];
+          const vec2<float> v0xy(x0, y0);
+          const vec2<float> v1xy(x1, y1);
+          const vec2<float> v2xy(x2, y2);
+
+          // Get upstream gradients for the face.
+          const float grad_dist_upstream = grad_dists_a[n][y][x][k];
+          const float grad_zbuf_upstream = grad_zbuf_a[n][y][x][k];
+          const auto grad_bary_upstream_w012 = grad_bary_a[n][y][x][k];
+          const float grad_bary_upstream_w0 = grad_bary_upstream_w012[0];
+          const float grad_bary_upstream_w1 = grad_bary_upstream_w012[1];
+          const float grad_bary_upstream_w2 = grad_bary_upstream_w012[2];
+          const vec3<float> grad_bary_upstream(
+              grad_bary_upstream_w0,
+              grad_bary_upstream_w1,
+              grad_bary_upstream_w2);
+
+          const vec3<float> bary0 =
+              BarycentricCoordinatesForward(pxy, v0xy, v1xy, v2xy);
+          const vec3<float> bary = !perspective_correct
+              ? bary0
+              : BarycentricPerspectiveCorrectionForward(bary0, z0, z1, z2);
+          const vec3<float> bary_clip =
+              !clip_barycentric_coords ? bary : BarycentricClipForward(bary);
+
+          // Distances inside the face are negative so get the
+          // correct sign to apply to the upstream gradient.
+          const bool inside = bary.x > 0.0f && bary.y > 0.0f && bary.z > 0.0f;
+          const float sign = inside ? -1.0f : 1.0f;
+
+          const auto grad_dist_f = PointTriangleDistanceBackward(
+              pxy, v0xy, v1xy, v2xy, sign * grad_dist_upstream);
+          const auto ddist_d_v0 = std::get<1>(grad_dist_f);
+          const auto ddist_d_v1 = std::get<2>(grad_dist_f);
+          const auto ddist_d_v2 = std::get<3>(grad_dist_f);
+
+          // Upstream gradient for barycentric coords from zbuf calculation:
+          // zbuf = bary_w0 * z0 + bary_w1 * z1 + bary_w2 * z2
+          // Therefore
+          // d_zbuf/d_bary_w0 = z0
+          // d_zbuf/d_bary_w1 = z1
+          // d_zbuf/d_bary_w2 = z2
+          const vec3<float> d_zbuf_d_baryclip(z0, z1, z2);
+
+          // Total upstream barycentric gradients are the sum of
+          // external upstream gradients and contribution from zbuf.
+          const vec3<float> grad_bary_f_sum =
+              (grad_bary_upstream + grad_zbuf_upstream * d_zbuf_d_baryclip);
+
+          vec3<float> grad_bary0 = grad_bary_f_sum;
+
+          if (clip_barycentric_coords) {
+            grad_bary0 = BarycentricClipBackward(bary, grad_bary0);
+          }
+
+          if (perspective_correct) {
+            auto perspective_grads = BarycentricPerspectiveCorrectionBackward(
+                bary0, z0, z1, z2, grad_bary0);
+            grad_bary0 = std::get<0>(perspective_grads);
+            grad_face_verts[f][0][2] += std::get<1>(perspective_grads);
+            grad_face_verts[f][1][2] += std::get<2>(perspective_grads);
+            grad_face_verts[f][2][2] += std::get<3>(perspective_grads);
+          }
+
+          auto grad_bary_f =
+              BarycentricCoordsBackward(pxy, v0xy, v1xy, v2xy, grad_bary0);
+          const vec2<float> dbary_d_v0 = std::get<1>(grad_bary_f);
+          const vec2<float> dbary_d_v1 = std::get<2>(grad_bary_f);
+          const vec2<float> dbary_d_v2 = std::get<3>(grad_bary_f);
+
+          // Update output gradient buffer.
+          grad_face_verts[f][0][0] += dbary_d_v0.x + ddist_d_v0.x;
+          grad_face_verts[f][0][1] += dbary_d_v0.y + ddist_d_v0.y;
+          grad_face_verts[f][0][2] += grad_zbuf_upstream * bary_clip.x;
+          grad_face_verts[f][1][0] += dbary_d_v1.x + ddist_d_v1.x;
+          grad_face_verts[f][1][1] += dbary_d_v1.y + ddist_d_v1.y;
+          grad_face_verts[f][1][2] += grad_zbuf_upstream * bary_clip.y;
+          grad_face_verts[f][2][0] += dbary_d_v2.x + ddist_d_v2.x;
+          grad_face_verts[f][2][1] += dbary_d_v2.y + ddist_d_v2.y;
+          grad_face_verts[f][2][2] += grad_zbuf_upstream * bary_clip.z;
+        }
+      }
+    }
+  }
+  return grad_face_verts;
+}
+
+torch::Tensor RasterizeMeshesCoarseCpu(
+    const torch::Tensor& face_verts,
+    const torch::Tensor& mesh_to_face_first_idx,
+    const torch::Tensor& num_faces_per_mesh,
+    const std::tuple<int, int> image_size,
+    const float blur_radius,
+    const int bin_size,
+    const int max_faces_per_bin) {
+  if (face_verts.ndimension() != 3 || face_verts.size(1) != 3 ||
+      face_verts.size(2) != 3) {
+    AT_ERROR("face_verts must have dimensions (num_faces, 3, 3)");
+  }
+  if (num_faces_per_mesh.ndimension() != 1) {
+    AT_ERROR("num_faces_per_mesh can only have one dimension");
+  }
+
+  const int N = num_faces_per_mesh.size(0); // batch size.
+  const int M = max_faces_per_bin;
+
+  const float H = std::get<0>(image_size);
+  const float W = std::get<1>(image_size);
+
+  // Integer division round up.
+  const int BH = 1 + (H - 1) / bin_size;
+  const int BW = 1 + (W - 1) / bin_size;
+
+  auto opts = num_faces_per_mesh.options().dtype(torch::kInt32);
+  torch::Tensor faces_per_bin = torch::zeros({N, BH, BW}, opts);
+  torch::Tensor bin_faces = torch::full({N, BH, BW, M}, -1, opts);
+  auto bin_faces_a = bin_faces.accessor<int32_t, 4>();
+
+  // Precompute all face bounding boxes.
+  auto face_bboxes = ComputeFaceBoundingBoxes(face_verts);
+  auto face_bboxes_a = face_bboxes.accessor<float, 2>();
+
+  const float ndc_x_range = NonSquareNdcRange(W, H);
+  const float pixel_width_x = ndc_x_range / W;
+  const float bin_width_x = pixel_width_x * bin_size;
+
+  const float ndc_y_range = NonSquareNdcRange(H, W);
+  const float pixel_width_y = ndc_y_range / H;
+  const float bin_width_y = pixel_width_y * bin_size;
+
+  // Iterate through the meshes in the batch.
+  for (int n = 0; n < N; ++n) {
+    const int face_start_idx = mesh_to_face_first_idx[n].item().to<int32_t>();
+    const int face_stop_idx =
+        (face_start_idx + num_faces_per_mesh[n].item().to<int32_t>());
+
+    float bin_y_min = -1.0f;
+    float bin_y_max = bin_y_min + bin_width_y;
+
+    // Iterate through the horizontal bins from top to bottom.
+    for (int by = 0; by < BH; ++by) {
+      float bin_x_min = -1.0f;
+      float bin_x_max = bin_x_min + bin_width_x;
+
+      // Iterate through bins on this horizontal line, left to right.
+      for (int bx = 0; bx < BW; ++bx) {
+        int32_t faces_hit = 0;
+
+        for (int32_t f = face_start_idx; f < face_stop_idx; ++f) {
+          // Get bounding box and expand by blur radius.
+          float face_x_min = face_bboxes_a[f][0] - std::sqrt(blur_radius);
+          float face_y_min = face_bboxes_a[f][1] - std::sqrt(blur_radius);
+          float face_x_max = face_bboxes_a[f][2] + std::sqrt(blur_radius);
+          float face_y_max = face_bboxes_a[f][3] + std::sqrt(blur_radius);
+          float face_z_min = face_bboxes_a[f][4];
+
+          // Faces with at least one vertex behind the camera won't render
+          // correctly and should be removed or clipped before calling the
+          // rasterizer
+          if (face_z_min < kEpsilon) {
+            continue;
+          }
+
+          // Use a half-open interval so that faces exactly on the
+          // boundary between bins will fall into exactly one bin.
+          bool x_overlap =
+              (face_x_min <= bin_x_max) && (bin_x_min < face_x_max);
+          bool y_overlap =
+              (face_y_min <= bin_y_max) && (bin_y_min < face_y_max);
+
+          if (x_overlap && y_overlap) {
+            // Got too many faces for this bin, so throw an error.
+            if (faces_hit >= max_faces_per_bin) {
+              AT_ERROR("Got too many faces per bin");
+            }
+            // The current point falls in the current bin, so
+            // record it.
+            bin_faces_a[n][by][bx][faces_hit] = f;
+            faces_hit++;
+          }
+        }
+
+        // Shift the bin to the right for the next loop iteration
+        bin_x_min = bin_x_max;
+        bin_x_max = bin_x_min + bin_width_x;
+      }
+      // Shift the bin down for the next loop iteration
+      bin_y_min = bin_y_max;
+      bin_y_max = bin_y_min + bin_width_y;
+    }
+  }
+  return bin_faces;
+}
diff --git a/pytorch3d/pytorch3d/csrc/rasterize_points/rasterization_utils.cuh b/pytorch3d/pytorch3d/csrc/rasterize_points/rasterization_utils.cuh
new file mode 100644
index 0000000000000000000000000000000000000000..600d7a1afff8a5089ad2d8d8023e195bcaab480c
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/rasterize_points/rasterization_utils.cuh
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+// The default value of the NDC range is [-1, 1], however in the case that
+// H != W, the NDC range is set such that the shorter side has range [-1, 1] and
+// the longer side is scaled by the ratio of H:W. S1 is the dimension for which
+// the NDC range is calculated and S2 is the other image dimension.
+// e.g. to get the NDC x range S1 = W and S2 = H
+__device__ inline float NonSquareNdcRange(int S1, int S2) {
+  float range = 2.0f;
+  if (S1 > S2) {
+    // First multiply S1 by float range so that division results
+    // in a float value.
+    range = (S1 * range) / S2;
+  }
+  return range;
+}
+
+// Given a pixel coordinate 0 <= i < S1, convert it to a normalized device
+// coordinates. We divide the NDC range into S1 evenly-sized
+// pixels, and assume that each pixel falls in the *center* of its range.
+// The default value of the NDC range is [-1, 1], however in the case that
+// H != W, the NDC range is set such that the shorter side has range [-1, 1] and
+// the longer side is scaled by the ratio of H:W. The dimension of i should be
+// S1 and the other image dimension is S2 For example, to get the x and y NDC
+// coordinates or a given pixel i:
+//     x = PixToNonSquareNdc(i, W, H)
+//     y = PixToNonSquareNdc(i, H, W)
+__device__ inline float PixToNonSquareNdc(int i, int S1, int S2) {
+  float range = NonSquareNdcRange(S1, S2);
+  // NDC: offset + (i * pixel_width + half_pixel_width)
+  // The NDC range is [-range/2, range/2].
+  float offset = (range / 2.0f);
+  return -offset + (range * i + offset) / S1;
+}
+
+// The maximum number of points per pixel that we can return. Since we use
+// thread-local arrays to hold and sort points, the maximum size of the array
+// needs to be known at compile time. There might be some fancy template magic
+// we could use to make this more dynamic, but for now just fix a constant.
+// TODO: is 8 enough? Would increasing have performance considerations?
+const int32_t kMaxPointsPerPixel = 150;
+
+const int32_t kMaxItemsPerBin = 22;
+
+template <typename T>
+__device__ inline void BubbleSort(T* arr, int n) {
+  // Bubble sort. We only use it for tiny thread-local arrays (n < 8); in this
+  // regime we care more about warp divergence than computational complexity.
+  for (int i = 0; i < n - 1; ++i) {
+    for (int j = 0; j < n - i - 1; ++j) {
+      if (arr[j + 1] < arr[j]) {
+        T temp = arr[j];
+        arr[j] = arr[j + 1];
+        arr[j + 1] = temp;
+      }
+    }
+  }
+}
diff --git a/pytorch3d/pytorch3d/csrc/rasterize_points/rasterization_utils.h b/pytorch3d/pytorch3d/csrc/rasterize_points/rasterization_utils.h
new file mode 100644
index 0000000000000000000000000000000000000000..6980afc4a49962760283c46ac415faa5ab76ce8e
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/rasterize_points/rasterization_utils.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+// The default value of the NDC range is [-1, 1], however in the case that
+// H != W, the NDC range is set such that the shorter side has range [-1, 1] and
+// the longer side is scaled by the ratio of H:W. S1 is the dimension for which
+// the NDC range is calculated and S2 is the other image dimension.
+// e.g. to get the NDC x range S1 = W and S2 = H
+inline float NonSquareNdcRange(int S1, int S2) {
+  float range = 2.0f;
+  if (S1 > S2) {
+    range = (S1 * range) / S2;
+  }
+  return range;
+}
+
+// Given a pixel coordinate 0 <= i < S1, convert it to a normalized device
+// coordinates. We divide the NDC range into S1 evenly-sized
+// pixels, and assume that each pixel falls in the *center* of its range.
+// The default value of the NDC range is [-1, 1], however in the case that
+// H != W, the NDC range is set such that the shorter side has range [-1, 1] and
+// the longer side is scaled by the ratio of H:W. The dimension of i should be
+// S1 and the other image dimension is S2 For example, to get the x and y NDC
+// coordinates or a given pixel i:
+//     x = PixToNonSquareNdc(i, W, H)
+//     y = PixToNonSquareNdc(i, H, W)
+inline float PixToNonSquareNdc(int i, int S1, int S2) {
+  float range = NonSquareNdcRange(S1, S2);
+  // NDC: offset + (i * pixel_width + half_pixel_width)
+  // The NDC range is [-range/2, range/2].
+  const float offset = (range / 2.0f);
+  return -offset + (range * i + offset) / S1;
+}
diff --git a/pytorch3d/pytorch3d/csrc/rasterize_points/rasterize_points.cu b/pytorch3d/pytorch3d/csrc/rasterize_points/rasterize_points.cu
new file mode 100644
index 0000000000000000000000000000000000000000..5b18d8334d3358244409a2d9e43acd3c9262cb99
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/rasterize_points/rasterize_points.cu
@@ -0,0 +1,462 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <ATen/ATen.h>
+#include <ATen/cuda/CUDAContext.h>
+#include <c10/cuda/CUDAGuard.h>
+#include <math.h>
+#include <cstdio>
+#include <sstream>
+#include <tuple>
+#include "rasterize_points/rasterization_utils.cuh"
+
+namespace {
+// A little structure for holding details about a pixel.
+struct Pix {
+  float z; // Depth of the reference point.
+  int32_t idx; // Index of the reference point.
+  float dist2; // Euclidean distance square to the reference point.
+};
+
+__device__ inline bool operator<(const Pix& a, const Pix& b) {
+  return a.z < b.z;
+}
+
+// This function checks if a pixel given by xy location pxy lies within the
+// point with index p and batch index n. One of the inputs is a list (q)
+// which contains Pixel structs with the indices of the points which intersect
+// with this pixel sorted by closest z distance. If the pixel pxy lies in the
+// point, the list (q) is updated and re-orderered in place. In addition
+// the auxiliary variables q_size, q_max_z and q_max_idx are also modified.
+// This code is shared between RasterizePointsNaiveCudaKernel and
+// RasterizePointsFineCudaKernel.
+template <typename PointQ>
+__device__ void CheckPixelInsidePoint(
+    const float* points, // (P, 3)
+    const int p_idx,
+    int& q_size,
+    float& q_max_z,
+    int& q_max_idx,
+    PointQ& q,
+    const float* radius,
+    const float xf,
+    const float yf,
+    const int K) {
+  const float px = points[p_idx * 3 + 0];
+  const float py = points[p_idx * 3 + 1];
+  const float pz = points[p_idx * 3 + 2];
+  const float p_radius = radius[p_idx];
+  const float radius2 = p_radius * p_radius;
+  if (pz < 0)
+    return; // Don't render points behind the camera
+  const float dx = xf - px;
+  const float dy = yf - py;
+  const float dist2 = dx * dx + dy * dy;
+  if (dist2 < radius2) {
+    if (q_size < K) {
+      // Just insert it
+      q[q_size] = {pz, p_idx, dist2};
+      if (pz > q_max_z) {
+        q_max_z = pz;
+        q_max_idx = q_size;
+      }
+      q_size++;
+    } else if (pz < q_max_z) {
+      // Overwrite the old max, and find the new max
+      q[q_max_idx] = {pz, p_idx, dist2};
+      q_max_z = pz;
+      for (int i = 0; i < K; i++) {
+        if (q[i].z > q_max_z) {
+          q_max_z = q[i].z;
+          q_max_idx = i;
+        }
+      }
+    }
+  }
+}
+} // namespace
+// ****************************************************************************
+// *                          NAIVE RASTERIZATION                             *
+// ****************************************************************************
+
+__global__ void RasterizePointsNaiveCudaKernel(
+    const float* points, // (P, 3)
+    const int64_t* cloud_to_packed_first_idx, // (N)
+    const int64_t* num_points_per_cloud, // (N)
+    const float* radius,
+    const int N,
+    const int H,
+    const int W,
+    const int K,
+    int32_t* point_idxs, // (N, H, W, K)
+    float* zbuf, // (N, H, W, K)
+    float* pix_dists) { // (N, H, W, K)
+  // Simple version: One thread per output pixel
+  const int num_threads = gridDim.x * blockDim.x;
+  const int tid = blockDim.x * blockIdx.x + threadIdx.x;
+  for (int i = tid; i < N * H * W; i += num_threads) {
+    // Convert linear index to 3D index
+    const int n = i / (H * W); // Batch index
+    const int pix_idx = i % (H * W);
+
+    // Reverse ordering of the X and Y axis as the camera coordinates
+    // assume that +Y is pointing up and +X is pointing left.
+    const int yi = H - 1 - pix_idx / W;
+    const int xi = W - 1 - pix_idx % W;
+
+    // screen coordinates to ndc coordinates of pixel.
+    const float xf = PixToNonSquareNdc(xi, W, H);
+    const float yf = PixToNonSquareNdc(yi, H, W);
+
+    // For keeping track of the K closest points we want a data structure
+    // that (1) gives O(1) access to the closest point for easy comparisons,
+    // and (2) allows insertion of new elements. In the CPU version we use
+    // std::priority_queue; then (2) is O(log K). We can't use STL
+    // containers in CUDA; we could roll our own max heap in an array, but
+    // that would likely have a lot of warp divergence so we do something
+    // simpler instead: keep the elements in an unsorted array, but keep
+    // track of the max value and the index of the max value. Then (1) is
+    // still O(1) time, while (2) is O(K) with a clean loop. Since K <= 8
+    // this should be fast enough for our purposes.
+    // TODO(jcjohns) Abstract this out into a standalone data structure
+    Pix q[kMaxPointsPerPixel];
+    int q_size = 0;
+    float q_max_z = -1000;
+    int q_max_idx = -1;
+
+    // Using the batch index of the thread get the start and stop
+    // indices for the points.
+    const int64_t point_start_idx = cloud_to_packed_first_idx[n];
+    const int64_t point_stop_idx = point_start_idx + num_points_per_cloud[n];
+
+    for (int p_idx = point_start_idx; p_idx < point_stop_idx; ++p_idx) {
+      CheckPixelInsidePoint(
+          points, p_idx, q_size, q_max_z, q_max_idx, q, radius, xf, yf, K);
+    }
+    BubbleSort(q, q_size);
+    int idx = n * H * W * K + pix_idx * K;
+    for (int k = 0; k < q_size; ++k) {
+      point_idxs[idx + k] = q[k].idx;
+      zbuf[idx + k] = q[k].z;
+      pix_dists[idx + k] = q[k].dist2;
+    }
+  }
+}
+
+std::tuple<at::Tensor, at::Tensor, at::Tensor> RasterizePointsNaiveCuda(
+    const at::Tensor& points, // (P. 3)
+    const at::Tensor& cloud_to_packed_first_idx, // (N)
+    const at::Tensor& num_points_per_cloud, // (N)
+    const std::tuple<int, int> image_size,
+    const at::Tensor& radius,
+    const int points_per_pixel) {
+  // Check inputs are on the same device
+  at::TensorArg points_t{points, "points", 1},
+      cloud_to_packed_first_idx_t{
+          cloud_to_packed_first_idx, "cloud_to_packed_first_idx", 2},
+      num_points_per_cloud_t{num_points_per_cloud, "num_points_per_cloud", 3};
+  at::CheckedFrom c = "RasterizePointsNaiveCuda";
+  at::checkAllSameGPU(
+      c, {points_t, cloud_to_packed_first_idx_t, num_points_per_cloud_t});
+
+  // Set the device for the kernel launch based on the device of the input
+  at::cuda::CUDAGuard device_guard(points.device());
+  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+
+  TORCH_CHECK(
+      points.ndimension() == 2 && points.size(1) == 3,
+      "points must have dimensions (num_points, 3)");
+  TORCH_CHECK(
+      num_points_per_cloud.size(0) == cloud_to_packed_first_idx.size(0),
+      "num_points_per_cloud must have same size first dimension as cloud_to_packed_first_idx");
+
+  const int N = num_points_per_cloud.size(0); // batch size.
+  const int H = std::get<0>(image_size);
+  const int W = std::get<1>(image_size);
+  const int K = points_per_pixel;
+
+  if (K > kMaxPointsPerPixel) {
+    std::stringstream ss;
+    ss << "Must have points_per_pixel <= " << kMaxPointsPerPixel;
+    AT_ERROR(ss.str());
+  }
+
+  auto int_opts = num_points_per_cloud.options().dtype(at::kInt);
+  auto float_opts = points.options().dtype(at::kFloat);
+  at::Tensor point_idxs = at::full({N, H, W, K}, -1, int_opts);
+  at::Tensor zbuf = at::full({N, H, W, K}, -1, float_opts);
+  at::Tensor pix_dists = at::full({N, H, W, K}, -1, float_opts);
+
+  if (point_idxs.numel() == 0) {
+    AT_CUDA_CHECK(cudaGetLastError());
+    return std::make_tuple(point_idxs, zbuf, pix_dists);
+  }
+
+  const size_t blocks = 1024;
+  const size_t threads = 64;
+  RasterizePointsNaiveCudaKernel<<<blocks, threads, 0, stream>>>(
+      points.contiguous().data_ptr<float>(),
+      cloud_to_packed_first_idx.contiguous().data_ptr<int64_t>(),
+      num_points_per_cloud.contiguous().data_ptr<int64_t>(),
+      radius.contiguous().data_ptr<float>(),
+      N,
+      H,
+      W,
+      K,
+      point_idxs.contiguous().data_ptr<int32_t>(),
+      zbuf.contiguous().data_ptr<float>(),
+      pix_dists.contiguous().data_ptr<float>());
+
+  AT_CUDA_CHECK(cudaGetLastError());
+  return std::make_tuple(point_idxs, zbuf, pix_dists);
+}
+
+// ****************************************************************************
+// *                            FINE RASTERIZATION                            *
+// ****************************************************************************
+
+__global__ void RasterizePointsFineCudaKernel(
+    const float* points, // (P, 3)
+    const int32_t* bin_points, // (N, BH, BW, T)
+    const float* radius,
+    const int bin_size,
+    const int N,
+    const int BH, // num_bins y
+    const int BW, // num_bins x
+    const int M,
+    const int H,
+    const int W,
+    const int K,
+    int32_t* point_idxs, // (N, H, W, K)
+    float* zbuf, // (N, H, W, K)
+    float* pix_dists) { // (N, H, W, K)
+  // This can be more than H * W if H or W are not divisible by bin_size.
+  const int num_pixels = N * BH * BW * bin_size * bin_size;
+  const int num_threads = gridDim.x * blockDim.x;
+  const int tid = blockIdx.x * blockDim.x + threadIdx.x;
+
+  for (int pid = tid; pid < num_pixels; pid += num_threads) {
+    // Convert linear index into bin and pixel indices. We make the within
+    // block pixel ids move the fastest, so that adjacent threads will fall
+    // into the same bin; this should give them coalesced memory reads when
+    // they read from points and bin_points.
+    int i = pid;
+    const int n = i / (BH * BW * bin_size * bin_size);
+    i %= BH * BW * bin_size * bin_size;
+    const int by = i / (BW * bin_size * bin_size);
+    i %= BW * bin_size * bin_size;
+    const int bx = i / (bin_size * bin_size);
+    i %= bin_size * bin_size;
+
+    const int yi = i / bin_size + by * bin_size;
+    const int xi = i % bin_size + bx * bin_size;
+
+    if (yi >= H || xi >= W)
+      continue;
+
+    const float xf = PixToNonSquareNdc(xi, W, H);
+    const float yf = PixToNonSquareNdc(yi, H, W);
+
+    // This part looks like the naive rasterization kernel, except we use
+    // bin_points to only look at a subset of points already known to fall
+    // in this bin. TODO abstract out this logic into some data structure
+    // that is shared by both kernels?
+    Pix q[kMaxPointsPerPixel];
+    int q_size = 0;
+    float q_max_z = -1000;
+    int q_max_idx = -1;
+    for (int m = 0; m < M; ++m) {
+      const int p = bin_points[n * BH * BW * M + by * BW * M + bx * M + m];
+      if (p < 0) {
+        // bin_points uses -1 as a sentinal value
+        continue;
+      }
+      CheckPixelInsidePoint(
+          points, p, q_size, q_max_z, q_max_idx, q, radius, xf, yf, K);
+    }
+    // Now we've looked at all the points for this bin, so we can write
+    // output for the current pixel.
+    BubbleSort(q, q_size);
+
+    // Reverse ordering of the X and Y axis as the camera coordinates
+    // assume that +Y is pointing up and +X is pointing left.
+    const int yidx = H - 1 - yi;
+    const int xidx = W - 1 - xi;
+
+    const int pix_idx = n * H * W * K + yidx * W * K + xidx * K;
+    for (int k = 0; k < q_size; ++k) {
+      point_idxs[pix_idx + k] = q[k].idx;
+      zbuf[pix_idx + k] = q[k].z;
+      pix_dists[pix_idx + k] = q[k].dist2;
+    }
+  }
+}
+
+std::tuple<at::Tensor, at::Tensor, at::Tensor> RasterizePointsFineCuda(
+    const at::Tensor& points, // (P, 3)
+    const at::Tensor& bin_points,
+    const std::tuple<int, int> image_size,
+    const at::Tensor& radius,
+    const int bin_size,
+    const int points_per_pixel) {
+  // Check inputs are on the same device
+  at::TensorArg points_t{points, "points", 1},
+      bin_points_t{bin_points, "bin_points", 2};
+  at::CheckedFrom c = "RasterizePointsFineCuda";
+  at::checkAllSameGPU(c, {points_t, bin_points_t});
+
+  // Set the device for the kernel launch based on the device of the input
+  at::cuda::CUDAGuard device_guard(points.device());
+  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+
+  const int N = bin_points.size(0);
+  const int BH = bin_points.size(1);
+  const int BW = bin_points.size(2);
+  const int M = bin_points.size(3);
+  const int K = points_per_pixel;
+
+  const int H = std::get<0>(image_size);
+  const int W = std::get<1>(image_size);
+
+  if (K > kMaxPointsPerPixel) {
+    AT_ERROR("Must have num_closest <= 150");
+  }
+  auto int_opts = bin_points.options().dtype(at::kInt);
+  auto float_opts = points.options().dtype(at::kFloat);
+  at::Tensor point_idxs = at::full({N, H, W, K}, -1, int_opts);
+  at::Tensor zbuf = at::full({N, H, W, K}, -1, float_opts);
+  at::Tensor pix_dists = at::full({N, H, W, K}, -1, float_opts);
+
+  if (point_idxs.numel() == 0) {
+    AT_CUDA_CHECK(cudaGetLastError());
+    return std::make_tuple(point_idxs, zbuf, pix_dists);
+  }
+
+  const size_t blocks = 1024;
+  const size_t threads = 64;
+  RasterizePointsFineCudaKernel<<<blocks, threads, 0, stream>>>(
+      points.contiguous().data_ptr<float>(),
+      bin_points.contiguous().data_ptr<int32_t>(),
+      radius.contiguous().data_ptr<float>(),
+      bin_size,
+      N,
+      BH,
+      BW,
+      M,
+      H,
+      W,
+      K,
+      point_idxs.contiguous().data_ptr<int32_t>(),
+      zbuf.contiguous().data_ptr<float>(),
+      pix_dists.contiguous().data_ptr<float>());
+
+  AT_CUDA_CHECK(cudaGetLastError());
+  return std::make_tuple(point_idxs, zbuf, pix_dists);
+}
+
+// ****************************************************************************
+// *                            BACKWARD PASS                                 *
+// ****************************************************************************
+// TODO(T55115174) Add more documentation for backward kernel.
+__global__ void RasterizePointsBackwardCudaKernel(
+    const float* points, // (P, 3)
+    const int32_t* idxs, // (N, H, W, K)
+    const int N,
+    const int P,
+    const int H,
+    const int W,
+    const int K,
+    const float* grad_zbuf, // (N, H, W, K)
+    const float* grad_dists, // (N, H, W, K)
+    float* grad_points) { // (P, 3)
+  // Parallelized over each of K points per pixel, for each pixel in images of
+  // size H * W, for each image in the batch of size N.
+  int num_threads = gridDim.x * blockDim.x;
+  int tid = blockIdx.x * blockDim.x + threadIdx.x;
+  for (int i = tid; i < N * H * W * K; i += num_threads) {
+    // const int n = i / (H * W * K); // batch index (not needed).
+    const int yxk = i % (H * W * K);
+    const int yi = yxk / (W * K);
+    const int xk = yxk % (W * K);
+    const int xi = xk / K;
+    // k = xk % K (We don't actually need k, but this would be it.)
+    // Reverse ordering of X and Y axes.
+    const int yidx = H - 1 - yi;
+    const int xidx = W - 1 - xi;
+
+    const float xf = PixToNonSquareNdc(xidx, W, H);
+    const float yf = PixToNonSquareNdc(yidx, H, W);
+
+    const int p = idxs[i];
+    if (p < 0)
+      continue;
+    const float grad_dist2 = grad_dists[i];
+    const int p_ind = p * 3; // index into packed points tensor
+    const float px = points[p_ind + 0];
+    const float py = points[p_ind + 1];
+    const float dx = px - xf;
+    const float dy = py - yf;
+    const float grad_px = 2.0f * grad_dist2 * dx;
+    const float grad_py = 2.0f * grad_dist2 * dy;
+    const float grad_pz = grad_zbuf[i];
+    atomicAdd(grad_points + p_ind + 0, grad_px);
+    atomicAdd(grad_points + p_ind + 1, grad_py);
+    atomicAdd(grad_points + p_ind + 2, grad_pz);
+  }
+}
+
+at::Tensor RasterizePointsBackwardCuda(
+    const at::Tensor& points, // (N, P, 3)
+    const at::Tensor& idxs, // (N, H, W, K)
+    const at::Tensor& grad_zbuf, // (N, H, W, K)
+    const at::Tensor& grad_dists) { // (N, H, W, K)
+
+  // Check inputs are on the same device
+  at::TensorArg points_t{points, "points", 1}, idxs_t{idxs, "idxs", 2},
+      grad_zbuf_t{grad_zbuf, "grad_zbuf", 3},
+      grad_dists_t{grad_dists, "grad_dists", 4};
+  at::CheckedFrom c = "RasterizePointsBackwardCuda";
+  at::checkAllSameGPU(c, {points_t, idxs_t, grad_zbuf_t, grad_dists_t});
+  at::checkAllSameType(c, {points_t, grad_zbuf_t, grad_dists_t});
+  // This is nondeterministic because atomicAdd
+  at::globalContext().alertNotDeterministic("RasterizePointsBackwardCuda");
+  // Set the device for the kernel launch based on the device of the input
+  at::cuda::CUDAGuard device_guard(points.device());
+  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+
+  const int P = points.size(0);
+  const int N = idxs.size(0);
+  const int H = idxs.size(1);
+  const int W = idxs.size(2);
+  const int K = idxs.size(3);
+
+  at::Tensor grad_points = at::zeros({P, 3}, points.options());
+
+  if (grad_points.numel() == 0) {
+    AT_CUDA_CHECK(cudaGetLastError());
+    return grad_points;
+  }
+
+  const size_t blocks = 1024;
+  const size_t threads = 64;
+
+  RasterizePointsBackwardCudaKernel<<<blocks, threads, 0, stream>>>(
+      points.contiguous().data_ptr<float>(),
+      idxs.contiguous().data_ptr<int32_t>(),
+      N,
+      P,
+      H,
+      W,
+      K,
+      grad_zbuf.contiguous().data_ptr<float>(),
+      grad_dists.contiguous().data_ptr<float>(),
+      grad_points.contiguous().data_ptr<float>());
+
+  AT_CUDA_CHECK(cudaGetLastError());
+  return grad_points;
+}
diff --git a/pytorch3d/pytorch3d/csrc/rasterize_points/rasterize_points.h b/pytorch3d/pytorch3d/csrc/rasterize_points/rasterize_points.h
new file mode 100644
index 0000000000000000000000000000000000000000..4e74e9e2b0600dccbcbe3e745e80f13a8149ca99
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/rasterize_points/rasterize_points.h
@@ -0,0 +1,360 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+#include <torch/extension.h>
+#include <cstdio>
+#include <tuple>
+#include "rasterize_coarse/rasterize_coarse.h"
+#include "utils/pytorch3d_cutils.h"
+
+// ****************************************************************************
+// *                          NAIVE RASTERIZATION                             *
+// ****************************************************************************
+
+std::tuple<torch::Tensor, torch::Tensor, torch::Tensor> RasterizePointsNaiveCpu(
+    const torch::Tensor& points,
+    const torch::Tensor& cloud_to_packed_first_idx,
+    const torch::Tensor& num_points_per_cloud,
+    const std::tuple<int, int> image_size,
+    const torch::Tensor& radius,
+    const int points_per_pixel);
+
+#ifdef WITH_CUDA
+std::tuple<torch::Tensor, torch::Tensor, torch::Tensor>
+RasterizePointsNaiveCuda(
+    const torch::Tensor& points,
+    const torch::Tensor& cloud_to_packed_first_idx,
+    const torch::Tensor& num_points_per_cloud,
+    const std::tuple<int, int> image_size,
+    const torch::Tensor& radius,
+    const int points_per_pixel);
+#endif
+// Naive (forward) pointcloud rasterization: For each pixel, for each point,
+// check whether that point hits the pixel.
+//
+// Args:
+//  points: Tensor of shape (P, 3) giving (packed) positions for
+//          points in all N pointclouds in the batch where P is the total
+//          number of points in the batch across all pointclouds. These points
+//          are expected to be in NDC coordinates in the range [-1, 1].
+//  cloud_to_packed_first_idx: LongTensor of shape (N) giving the index in
+//                          points_packed of the first point in each pointcloud
+//                          in the batch where N is the batch size.
+//  num_points_per_cloud: LongTensor of shape (N) giving the number of points
+//                        for each pointcloud in the batch.
+//  image_size: Tuple (H, W) giving the size in pixels of the output
+//              image to be rasterized.
+//  radius: FloatTensor of shape (P) giving the radius (in NDC units) of
+//          each point in points.
+//  points_per_pixel: (K) The number closest of points to return for each pixel
+//
+// Returns:
+//  A 4 element tuple of:
+//  idxs: int32 Tensor of shape (N, S, S, K) giving the indices of the
+//        closest K points along the z-axis for each pixel, padded with -1 for
+//        pixels hit by fewer than K points. The indices refer to points in
+//        points packed i.e a tensor of shape (P, 3) representing the flattened
+//        points for all pointclouds in the batch.
+//  zbuf: float32 Tensor of shape (N, S, S, K) giving the depth of each
+//        closest point for each pixel.
+//  dists: float32 Tensor of shape (N, S, S, K) giving squared Euclidean
+//          distance in the (NDC) x/y plane between each pixel and its K closest
+//          points along the z axis.
+std::tuple<torch::Tensor, torch::Tensor, torch::Tensor> RasterizePointsNaive(
+    const torch::Tensor& points,
+    const torch::Tensor& cloud_to_packed_first_idx,
+    const torch::Tensor& num_points_per_cloud,
+    const std::tuple<int, int> image_size,
+    const torch::Tensor& radius,
+    const int points_per_pixel) {
+  if (points.is_cuda() && cloud_to_packed_first_idx.is_cuda() &&
+      num_points_per_cloud.is_cuda()) {
+#ifdef WITH_CUDA
+    CHECK_CUDA(points);
+    CHECK_CUDA(cloud_to_packed_first_idx);
+    CHECK_CUDA(num_points_per_cloud);
+    CHECK_CUDA(radius);
+    return RasterizePointsNaiveCuda(
+        points,
+        cloud_to_packed_first_idx,
+        num_points_per_cloud,
+        image_size,
+        radius,
+        points_per_pixel);
+#else
+    AT_ERROR("Not compiled with GPU support");
+#endif
+  } else {
+    return RasterizePointsNaiveCpu(
+        points,
+        cloud_to_packed_first_idx,
+        num_points_per_cloud,
+        image_size,
+        radius,
+        points_per_pixel);
+  }
+}
+
+// ****************************************************************************
+// *                          COARSE RASTERIZATION                            *
+// ****************************************************************************
+
+// RasterizePointsCoarseCuda in rasterize_coarse/rasterize_coarse.h
+
+torch::Tensor RasterizePointsCoarseCpu(
+    const torch::Tensor& points,
+    const torch::Tensor& cloud_to_packed_first_idx,
+    const torch::Tensor& num_points_per_cloud,
+    const std::tuple<int, int> image_size,
+    const torch::Tensor& radius,
+    const int bin_size,
+    const int max_points_per_bin);
+
+// Args:
+//  points: Tensor of shape (P, 3) giving (packed) positions for
+//          points in all N pointclouds in the batch where P is the total
+//          number of points in the batch across all pointclouds. These points
+//          are expected to be in NDC coordinates in the range [-1, 1].
+//  cloud_to_packed_first_idx: LongTensor of shape (N) giving the index in
+//                          points_packed of the first point in each pointcloud
+//                          in the batch where N is the batch size.
+//  num_points_per_cloud: LongTensor of shape (N) giving the number of points
+//                        for each pointcloud in the batch.
+//  image_size: Tuple (H, W) giving the size in pixels of the output
+//              image to be rasterized.
+//  radius: FloatTensor of shape (P) giving the radius (in NDC units) of
+//          each point in points.
+//  bin_size: Size of each bin within the image (in pixels)
+//  max_points_per_bin: The maximum number of points allowed to fall into each
+//                      bin when using coarse-to-fine rasterization.
+//
+// Returns:
+//  points_per_bin: Tensor of shape (N, num_bins, num_bins) giving the number
+//                  of points that fall in each bin
+//  bin_points: Tensor of shape (N, num_bins, num_bins, K) giving the indices
+//              of points that fall into each bin.
+torch::Tensor RasterizePointsCoarse(
+    const torch::Tensor& points,
+    const torch::Tensor& cloud_to_packed_first_idx,
+    const torch::Tensor& num_points_per_cloud,
+    const std::tuple<int, int> image_size,
+    const torch::Tensor& radius,
+    const int bin_size,
+    const int max_points_per_bin) {
+  if (points.is_cuda() && cloud_to_packed_first_idx.is_cuda() &&
+      num_points_per_cloud.is_cuda()) {
+#ifdef WITH_CUDA
+    CHECK_CUDA(points);
+    CHECK_CUDA(cloud_to_packed_first_idx);
+    CHECK_CUDA(num_points_per_cloud);
+    CHECK_CUDA(radius);
+    return RasterizePointsCoarseCuda(
+        points,
+        cloud_to_packed_first_idx,
+        num_points_per_cloud,
+        image_size,
+        radius,
+        bin_size,
+        max_points_per_bin);
+#else
+    AT_ERROR("Not compiled with GPU support");
+#endif
+  } else {
+    return RasterizePointsCoarseCpu(
+        points,
+        cloud_to_packed_first_idx,
+        num_points_per_cloud,
+        image_size,
+        radius,
+        bin_size,
+        max_points_per_bin);
+  }
+}
+
+// ****************************************************************************
+// *                            FINE RASTERIZATION                            *
+// ****************************************************************************
+
+#ifdef WITH_CUDA
+std::tuple<torch::Tensor, torch::Tensor, torch::Tensor> RasterizePointsFineCuda(
+    const torch::Tensor& points,
+    const torch::Tensor& bin_points,
+    const std::tuple<int, int> image_size,
+    const torch::Tensor& radius,
+    const int bin_size,
+    const int points_per_pixel);
+#endif
+// Args:
+//  points: Tensor of shape (P, 3) giving (packed) positions for
+//          points in all N pointclouds in the batch where P is the total
+//          number of points in the batch across all pointclouds. These points
+//          are expected to be in NDC coordinates in the range [-1, 1].
+//  bin_points: int32 Tensor of shape (N, B, B, M) giving the indices of points
+//              that fall into each bin (output from coarse rasterization)
+//  image_size: Tuple (H, W) giving the size in pixels of the output
+//              image to be rasterized.
+//  radius: FloatTensor of shape (P) giving the radius (in NDC units) of
+//          each point in points.
+//  bin_size: Size of each bin (in pixels)
+//  points_per_pixel: How many points to rasterize for each pixel
+//
+// Returns (same as rasterize_points):
+//  idxs: int32 Tensor of shape (N, S, S, K) giving the indices of the
+//        closest K points along the z-axis for each pixel, padded with -1 for
+//        pixels hit by fewer than K points. The indices refer to points in
+//        points packed i.e a tensor of shape (P, 3) representing the flattened
+//        points for all pointclouds in the batch.
+//  zbuf: float32 Tensor of shape (N, S, S, K) giving the depth of each of each
+//        closest point for each pixel
+//  dists: float32 Tensor of shape (N, S, S, K) giving squared Euclidean
+//         distance in the (NDC) x/y plane between each pixel and its K closest
+//         points along the z axis.
+std::tuple<torch::Tensor, torch::Tensor, torch::Tensor> RasterizePointsFine(
+    const torch::Tensor& points,
+    const torch::Tensor& bin_points,
+    const std::tuple<int, int> image_size,
+    const torch::Tensor& radius,
+    const int bin_size,
+    const int points_per_pixel) {
+  if (points.is_cuda()) {
+#ifdef WITH_CUDA
+    CHECK_CUDA(points);
+    CHECK_CUDA(bin_points);
+    return RasterizePointsFineCuda(
+        points, bin_points, image_size, radius, bin_size, points_per_pixel);
+#else
+    AT_ERROR("Not compiled with GPU support");
+#endif
+  } else {
+    AT_ERROR("NOT IMPLEMENTED");
+  }
+}
+
+// ****************************************************************************
+// *                            BACKWARD PASS                                 *
+// ****************************************************************************
+
+torch::Tensor RasterizePointsBackwardCpu(
+    const torch::Tensor& points,
+    const torch::Tensor& idxs,
+    const torch::Tensor& grad_zbuf,
+    const torch::Tensor& grad_dists);
+
+#ifdef WITH_CUDA
+torch::Tensor RasterizePointsBackwardCuda(
+    const torch::Tensor& points,
+    const torch::Tensor& idxs,
+    const torch::Tensor& grad_zbuf,
+    const torch::Tensor& grad_dists);
+#endif
+// Args:
+//  points: Tensor of shape (P, 3) giving (packed) positions for
+//          points in all N pointclouds in the batch where P is the total
+//          number of points in the batch across all pointclouds. These points
+//          are expected to be in NDC coordinates in the range [-1, 1].
+//  idxs: int32 Tensor of shape (N, H, W, K) (from forward pass)
+//  grad_zbuf: float32 Tensor of shape (N, H, W, K) giving upstream gradient
+//             d(loss)/d(zbuf) of the distances from each pixel to its nearest
+//             points.
+//  grad_dists: Tensor of shape (N, H, W, K) giving upstream gradient
+//              d(loss)/d(dists) of the dists tensor returned by the forward
+//              pass.
+//
+// Returns:
+//  grad_points: float32 Tensor of shape (N, P, 3) giving downstream gradients
+torch::Tensor RasterizePointsBackward(
+    const torch::Tensor& points,
+    const torch::Tensor& idxs,
+    const torch::Tensor& grad_zbuf,
+    const torch::Tensor& grad_dists) {
+  if (points.is_cuda()) {
+#ifdef WITH_CUDA
+    CHECK_CUDA(points);
+    CHECK_CUDA(idxs);
+    CHECK_CUDA(grad_zbuf);
+    CHECK_CUDA(grad_dists);
+    return RasterizePointsBackwardCuda(points, idxs, grad_zbuf, grad_dists);
+#else
+    AT_ERROR("Not compiled with GPU support");
+#endif
+  } else {
+    return RasterizePointsBackwardCpu(points, idxs, grad_zbuf, grad_dists);
+  }
+}
+
+// ****************************************************************************
+// *                         MAIN ENTRY POINT                                 *
+// ****************************************************************************
+
+// This is the main entry point for the forward pass of the point rasterizer;
+// it uses either naive or coarse-to-fine rasterization based on bin_size.
+//
+// Args:
+//  points: Tensor of shape (P, 3) giving (packed) positions for
+//          points in all N pointclouds in the batch where P is the total
+//          number of points in the batch across all pointclouds. These points
+//          are expected to be in NDC coordinates in the range [-1, 1].
+//  cloud_to_packed_first_idx: LongTensor of shape (N) giving the index in
+//                          points_packed of the first point in each pointcloud
+//                          in the batch where N is the batch size.
+//  num_points_per_cloud: LongTensor of shape (N) giving the number of points
+//                        for each pointcloud in the batch.
+//  image_size: Tuple (H, W) giving the size in pixels of the output
+//              image to be rasterized.
+//  radius: FloatTensor of shape (P) giving the radius (in NDC units) of
+//          each point in points.
+//  points_per_pixel: (K) The number of points to return for each pixel
+//  bin_size: Bin size (in pixels) for coarse-to-fine rasterization. Setting
+//            bin_size=0 uses naive rasterization instead.
+//  max_points_per_bin: The maximum number of points allowed to fall into each
+//                      bin when using coarse-to-fine rasterization.
+//
+// Returns:
+//  idxs: int32 Tensor of shape (N, S, S, K) giving the indices of the
+//        closest K points along the z-axis for each pixel, padded with -1 for
+//        pixels hit by fewer than K points. The indices refer to points in
+//        points packed i.e a tensor of shape (P, 3) representing the flattened
+//        points for all pointclouds in the batch.
+//  zbuf: float32 Tensor of shape (N, S, S, K) giving the depth of each of each
+//        closest point for each pixel
+//  dists: float32 Tensor of shape (N, S, S, K) giving squared Euclidean
+//         distance in the (NDC) x/y plane between each pixel and its K closest
+//         points along the z axis.
+std::tuple<torch::Tensor, torch::Tensor, torch::Tensor> RasterizePoints(
+    const torch::Tensor& points,
+    const torch::Tensor& cloud_to_packed_first_idx,
+    const torch::Tensor& num_points_per_cloud,
+    const std::tuple<int, int> image_size,
+    const torch::Tensor& radius,
+    const int points_per_pixel,
+    const int bin_size,
+    const int max_points_per_bin) {
+  if (bin_size == 0) {
+    // Use the naive per-pixel implementation
+    return RasterizePointsNaive(
+        points,
+        cloud_to_packed_first_idx,
+        num_points_per_cloud,
+        image_size,
+        radius,
+        points_per_pixel);
+  } else {
+    // Use coarse-to-fine rasterization
+    const auto bin_points = RasterizePointsCoarse(
+        points,
+        cloud_to_packed_first_idx,
+        num_points_per_cloud,
+        image_size,
+        radius,
+        bin_size,
+        max_points_per_bin);
+    return RasterizePointsFine(
+        points, bin_points, image_size, radius, bin_size, points_per_pixel);
+  }
+}
diff --git a/pytorch3d/pytorch3d/csrc/rasterize_points/rasterize_points_cpu.cpp b/pytorch3d/pytorch3d/csrc/rasterize_points/rasterize_points_cpu.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..29da4c497b89b2fe3e2e2d5b2563adbbcd6ef326
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/rasterize_points/rasterize_points_cpu.cpp
@@ -0,0 +1,251 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <torch/extension.h>
+#include <queue>
+#include <tuple>
+#include "rasterization_utils.h"
+
+std::tuple<torch::Tensor, torch::Tensor, torch::Tensor> RasterizePointsNaiveCpu(
+    const torch::Tensor& points, // (P, 3)
+    const torch::Tensor& cloud_to_packed_first_idx, // (N)
+    const torch::Tensor& num_points_per_cloud, // (N)
+    const std::tuple<int, int> image_size,
+    const torch::Tensor& radius,
+    const int points_per_pixel) {
+  const int32_t N = cloud_to_packed_first_idx.size(0); // batch_size.
+
+  const int H = std::get<0>(image_size);
+  const int W = std::get<1>(image_size);
+  const int K = points_per_pixel;
+
+  // Initialize output tensors.
+  auto int_opts = num_points_per_cloud.options().dtype(torch::kInt32);
+  auto float_opts = points.options().dtype(torch::kFloat32);
+  torch::Tensor point_idxs = torch::full({N, H, W, K}, -1, int_opts);
+  torch::Tensor zbuf = torch::full({N, H, W, K}, -1, float_opts);
+  torch::Tensor pix_dists = torch::full({N, H, W, K}, -1, float_opts);
+
+  auto points_a = points.accessor<float, 2>();
+  auto point_idxs_a = point_idxs.accessor<int32_t, 4>();
+  auto zbuf_a = zbuf.accessor<float, 4>();
+  auto pix_dists_a = pix_dists.accessor<float, 4>();
+  auto radius_a = radius.accessor<float, 1>();
+
+  for (int n = 0; n < N; ++n) {
+    // Loop through each pointcloud in the batch.
+    // Get the start index of the points in points_packed and the num points
+    // in the point cloud.
+    const int point_start_idx =
+        cloud_to_packed_first_idx[n].item().to<int32_t>();
+    const int point_stop_idx =
+        (point_start_idx + num_points_per_cloud[n].item().to<int32_t>());
+
+    for (int yi = 0; yi < H; ++yi) {
+      // Reverse the order of yi so that +Y is pointing upwards in the image.
+      const int yidx = H - 1 - yi;
+      const float yf = PixToNonSquareNdc(yidx, H, W);
+
+      for (int xi = 0; xi < W; ++xi) {
+        // Reverse the order of xi so that +X is pointing to the left in the
+        // image.
+        const int xidx = W - 1 - xi;
+        const float xf = PixToNonSquareNdc(xidx, W, H);
+
+        // Use a priority queue to hold (z, idx, r)
+        std::priority_queue<std::tuple<float, int, float>> q;
+        for (int p = point_start_idx; p < point_stop_idx; ++p) {
+          const float px = points_a[p][0];
+          const float py = points_a[p][1];
+          const float pz = points_a[p][2];
+          const float p_radius = radius_a[p];
+          const float radius2 = p_radius * p_radius;
+          if (pz < 0) {
+            continue;
+          }
+          const float dx = px - xf;
+          const float dy = py - yf;
+          const float dist2 = dx * dx + dy * dy;
+          if (dist2 < radius2) {
+            // The current point hit the current pixel
+            q.emplace(pz, p, dist2);
+            if ((int)q.size() > K) {
+              q.pop();
+            }
+          }
+        }
+        // Now all the points have been seen, so pop elements off the queue
+        // one by one and write them into the output tensors.
+        while (!q.empty()) {
+          auto t = q.top();
+          q.pop();
+          int i = q.size();
+          zbuf_a[n][yi][xi][i] = std::get<0>(t);
+          point_idxs_a[n][yi][xi][i] = std::get<1>(t);
+          pix_dists_a[n][yi][xi][i] = std::get<2>(t);
+        }
+      }
+    }
+  }
+  return std::make_tuple(point_idxs, zbuf, pix_dists);
+}
+
+torch::Tensor RasterizePointsCoarseCpu(
+    const torch::Tensor& points, // (P, 3)
+    const torch::Tensor& cloud_to_packed_first_idx, // (N)
+    const torch::Tensor& num_points_per_cloud, // (N)
+    const std::tuple<int, int> image_size,
+    const torch::Tensor& radius,
+    const int bin_size,
+    const int max_points_per_bin) {
+  const int32_t N = cloud_to_packed_first_idx.size(0); // batch_size.
+  const int M = max_points_per_bin;
+
+  const float H = std::get<0>(image_size);
+  const float W = std::get<1>(image_size);
+
+  // Integer division round up.
+  const int BH = 1 + (H - 1) / bin_size;
+  const int BW = 1 + (W - 1) / bin_size;
+
+  auto opts = num_points_per_cloud.options().dtype(torch::kInt32);
+  torch::Tensor points_per_bin = torch::zeros({N, BH, BW}, opts);
+  torch::Tensor bin_points = torch::full({N, BH, BW, M}, -1, opts);
+
+  auto points_a = points.accessor<float, 2>();
+  auto points_per_bin_a = points_per_bin.accessor<int32_t, 3>();
+  auto bin_points_a = bin_points.accessor<int32_t, 4>();
+  auto radius_a = radius.accessor<float, 1>();
+
+  const float ndc_x_range = NonSquareNdcRange(W, H);
+  const float pixel_width_x = ndc_x_range / W;
+  const float bin_width_x = pixel_width_x * bin_size;
+
+  const float ndc_y_range = NonSquareNdcRange(H, W);
+  const float pixel_width_y = ndc_y_range / H;
+  const float bin_width_y = pixel_width_y * bin_size;
+
+  for (int n = 0; n < N; ++n) {
+    // Loop through each pointcloud in the batch.
+    // Get the start index of the points in points_packed and the num points
+    // in the point cloud.
+    const int point_start_idx =
+        cloud_to_packed_first_idx[n].item().to<int32_t>();
+    const int point_stop_idx =
+        (point_start_idx + num_points_per_cloud[n].item().to<int32_t>());
+
+    float bin_y_min = -1.0f;
+    float bin_y_max = bin_y_min + bin_width_y;
+
+    // Iterate through the horizontal bins from top to bottom.
+    for (int by = 0; by < BH; by++) {
+      float bin_x_min = -1.0f;
+      float bin_x_max = bin_x_min + bin_width_x;
+
+      // Iterate through bins on this horizontal line, left to right.
+      for (int bx = 0; bx < BW; bx++) {
+        int32_t points_hit = 0;
+        for (int p = point_start_idx; p < point_stop_idx; ++p) {
+          float px = points_a[p][0];
+          float py = points_a[p][1];
+          float pz = points_a[p][2];
+          const float p_radius = radius_a[p];
+          if (pz < 0) {
+            continue;
+          }
+          float point_x_min = px - p_radius;
+          float point_x_max = px + p_radius;
+          float point_y_min = py - p_radius;
+          float point_y_max = py + p_radius;
+
+          // Use a half-open interval so that points exactly on the
+          // boundary between bins will fall into exactly one bin.
+          bool x_hit = (point_x_min <= bin_x_max) && (bin_x_min <= point_x_max);
+          bool y_hit = (point_y_min <= bin_y_max) && (bin_y_min <= point_y_max);
+          if (x_hit && y_hit) {
+            // Got too many points for this bin, so throw an error.
+            if (points_hit >= max_points_per_bin) {
+              AT_ERROR("Got too many points per bin");
+            }
+            // The current point falls in the current bin, so
+            // record it.
+            bin_points_a[n][by][bx][points_hit] = p;
+            points_hit++;
+          }
+        }
+        // Record the number of points found in this bin
+        points_per_bin_a[n][by][bx] = points_hit;
+
+        // Shift the bin to the right for the next loop iteration
+        bin_x_min = bin_x_max;
+        bin_x_max = bin_x_min + bin_width_x;
+      }
+      // Shift the bin down for the next loop iteration
+      bin_y_min = bin_y_max;
+      bin_y_max = bin_y_min + bin_width_y;
+    }
+  }
+  return bin_points;
+}
+
+torch::Tensor RasterizePointsBackwardCpu(
+    const torch::Tensor& points, // (P, 3)
+    const torch::Tensor& idxs, // (N, H, W, K)
+    const torch::Tensor& grad_zbuf, // (N, H, W, K)
+    const torch::Tensor& grad_dists) { // (N, H, W, K)
+
+  const int N = idxs.size(0);
+  const int P = points.size(0);
+  const int H = idxs.size(1);
+  const int W = idxs.size(2);
+  const int K = idxs.size(3);
+
+  torch::Tensor grad_points = torch::zeros({P, 3}, points.options());
+
+  auto points_a = points.accessor<float, 2>();
+  auto idxs_a = idxs.accessor<int32_t, 4>();
+  auto grad_dists_a = grad_dists.accessor<float, 4>();
+  auto grad_zbuf_a = grad_zbuf.accessor<float, 4>();
+  auto grad_points_a = grad_points.accessor<float, 2>();
+
+  for (int n = 0; n < N; ++n) { // Loop over images in the batch
+    for (int y = 0; y < H; ++y) { // Loop over rows in the image
+      // Reverse the order of yi so that +Y is pointing upwards in the image.
+      const int yidx = H - 1 - y;
+      // Y coordinate of the top of the pixel.
+      const float yf = PixToNonSquareNdc(yidx, H, W);
+
+      // Iterate through pixels on this horizontal line, left to right.
+      for (int x = 0; x < W; ++x) { // Loop over pixels in the row
+
+        // Reverse the order of xi so that +X is pointing to the left in the
+        // image.
+        const int xidx = W - 1 - x;
+        const float xf = PixToNonSquareNdc(xidx, W, H);
+        for (int k = 0; k < K; ++k) { // Loop over points for the pixel
+          const int p = idxs_a[n][y][x][k];
+          if (p < 0) {
+            break;
+          }
+          const float grad_dist2 = grad_dists_a[n][y][x][k];
+          const float px = points_a[p][0];
+          const float py = points_a[p][1];
+          const float dx = px - xf;
+          const float dy = py - yf;
+          // Remember: dists[n][y][x][k] = dx * dx + dy * dy;
+          const float grad_px = 2.0f * grad_dist2 * dx;
+          const float grad_py = 2.0f * grad_dist2 * dy;
+          grad_points_a[p][0] += grad_px;
+          grad_points_a[p][1] += grad_py;
+          grad_points_a[p][2] += grad_zbuf_a[n][y][x][k];
+        }
+      }
+    }
+  }
+  return grad_points;
+}
diff --git a/pytorch3d/pytorch3d/csrc/sample_farthest_points/sample_farthest_points.cu b/pytorch3d/pytorch3d/csrc/sample_farthest_points/sample_farthest_points.cu
new file mode 100644
index 0000000000000000000000000000000000000000..70cef75c7b4bd57cdf4615f415786bbefc288a57
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/sample_farthest_points/sample_farthest_points.cu
@@ -0,0 +1,226 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <ATen/ATen.h>
+#include <ATen/cuda/CUDAContext.h>
+#include <c10/cuda/CUDAGuard.h>
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <cub/cub.cuh>
+#include "utils/warp_reduce.cuh"
+
+template <unsigned int block_size>
+__global__ void FarthestPointSamplingKernel(
+    // clang-format off
+    const at::PackedTensorAccessor64<float, 3, at::RestrictPtrTraits> points,
+    const at::PackedTensorAccessor64<int64_t, 1, at::RestrictPtrTraits> lengths,
+    const at::PackedTensorAccessor64<int64_t, 1, at::RestrictPtrTraits> K,
+    at::PackedTensorAccessor64<int64_t, 2, at::RestrictPtrTraits> idxs,
+    at::PackedTensorAccessor64<float, 2, at::RestrictPtrTraits> min_point_dist,
+    const at::PackedTensorAccessor64<int64_t, 1, at::RestrictPtrTraits> start_idxs
+    // clang-format on
+) {
+  typedef cub::BlockReduce<
+      cub::KeyValuePair<int64_t, float>,
+      block_size,
+      cub::BLOCK_REDUCE_RAKING_COMMUTATIVE_ONLY>
+      BlockReduce;
+  __shared__ typename BlockReduce::TempStorage temp_storage;
+  __shared__ int64_t selected_store;
+
+  // Get constants
+  const int64_t N = points.size(0);
+  const int64_t P = points.size(1);
+  const int64_t D = points.size(2);
+
+  // Get batch index and thread index
+  const int64_t batch_idx = blockIdx.x;
+  const size_t tid = threadIdx.x;
+
+  // If K is greater than the number of points in the pointcloud
+  // we only need to iterate until the smaller value is reached.
+  const int64_t k_n = min(K[batch_idx], lengths[batch_idx]);
+
+  // Write the first selected point to global memory in the first thread
+  int64_t selected = start_idxs[batch_idx];
+  if (tid == 0)
+    idxs[batch_idx][0] = selected;
+
+  // Iterate to find k_n sampled points
+  for (int64_t k = 1; k < k_n; ++k) {
+    // Keep track of the maximum of the minimum distance to previously selected
+    // points seen by this thread
+    int64_t max_dist_idx = 0;
+    float max_dist = -1.0;
+
+    // Iterate through all the points in this pointcloud. For already selected
+    // points, the minimum distance to the set of previously selected points
+    // will be 0.0 so they won't be selected again.
+    for (int64_t p = tid; p < lengths[batch_idx]; p += block_size) {
+      // Calculate the distance to the last selected point
+      float dist2 = 0.0;
+      for (int64_t d = 0; d < D; ++d) {
+        float diff = points[batch_idx][selected][d] - points[batch_idx][p][d];
+        dist2 += (diff * diff);
+      }
+
+      // If the distance of point p to the last selected point is
+      // less than the previous minimum distance of p to the set of selected
+      // points, then updated the corresponding value in min_point_dist
+      // so it always contains the min distance.
+      const float p_min_dist = min(dist2, min_point_dist[batch_idx][p]);
+      min_point_dist[batch_idx][p] = p_min_dist;
+
+      // Update the max distance and point idx for this thread.
+      max_dist_idx = (p_min_dist > max_dist) ? p : max_dist_idx;
+      max_dist = (p_min_dist > max_dist) ? p_min_dist : max_dist;
+    }
+
+    // max_dist, max_dist_idx are now the max point and idx seen by this thread.
+    // Now find the index corresponding to the maximum distance seen by any
+    // thread. (This value is only on thread 0.)
+    selected =
+        BlockReduce(temp_storage)
+            .Reduce(
+                cub::KeyValuePair<int64_t, float>(max_dist_idx, max_dist),
+                cub::ArgMax(),
+                block_size)
+            .key;
+
+    if (tid == 0) {
+      // Write the farthest point for iteration k to global memory
+      idxs[batch_idx][k] = selected;
+      selected_store = selected;
+    }
+
+    // Ensure `selected` in all threads equals the global maximum.
+    __syncthreads();
+    selected = selected_store;
+  }
+}
+
+at::Tensor FarthestPointSamplingCuda(
+    const at::Tensor& points, // (N, P, 3)
+    const at::Tensor& lengths, // (N,)
+    const at::Tensor& K, // (N,)
+    const at::Tensor& start_idxs) {
+  // Check inputs are on the same device
+  at::TensorArg p_t{points, "points", 1}, lengths_t{lengths, "lengths", 2},
+      k_t{K, "K", 3}, start_idxs_t{start_idxs, "start_idxs", 4};
+  at::CheckedFrom c = "FarthestPointSamplingCuda";
+  at::checkAllSameGPU(c, {p_t, lengths_t, k_t, start_idxs_t});
+  at::checkAllSameType(c, {lengths_t, k_t, start_idxs_t});
+
+  // Set the device for the kernel launch based on the device of points
+  at::cuda::CUDAGuard device_guard(points.device());
+  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+
+  TORCH_CHECK(
+      points.size(0) == lengths.size(0),
+      "Point and lengths must have the same batch dimension");
+
+  TORCH_CHECK(
+      points.size(0) == K.size(0),
+      "Points and K must have the same batch dimension");
+
+  const int64_t N = points.size(0);
+  const int64_t P = points.size(1);
+  const int64_t max_K = at::max(K).item<int64_t>();
+
+  // Initialize the output tensor with the sampled indices
+  auto idxs = at::full({N, max_K}, -1, lengths.options());
+  auto min_point_dist = at::full({N, P}, 1e10, points.options());
+
+  if (N == 0 || P == 0) {
+    AT_CUDA_CHECK(cudaGetLastError());
+    return idxs;
+  }
+
+  // Set the number of blocks to the batch size so that the
+  // block reduction step can be done for each pointcloud
+  // to find the max distance point in the pointcloud at each iteration.
+  const size_t blocks = N;
+
+  // Set the threads to the nearest power of 2 of the number of
+  // points in the pointcloud (up to the max threads in a block).
+  // This will ensure each thread processes the minimum necessary number of
+  // points (P/threads).
+  const int points_pow_2 = std::log(static_cast<double>(P)) / std::log(2.0);
+
+  // Max possible threads per block
+  const int MAX_THREADS_PER_BLOCK = 1024;
+  const size_t threads = max(min(1 << points_pow_2, MAX_THREADS_PER_BLOCK), 2);
+
+  // Create the accessors
+  auto points_a = points.packed_accessor64<float, 3, at::RestrictPtrTraits>();
+  auto lengths_a =
+      lengths.packed_accessor64<int64_t, 1, at::RestrictPtrTraits>();
+  auto K_a = K.packed_accessor64<int64_t, 1, at::RestrictPtrTraits>();
+  auto idxs_a = idxs.packed_accessor64<int64_t, 2, at::RestrictPtrTraits>();
+  auto start_idxs_a =
+      start_idxs.packed_accessor64<int64_t, 1, at::RestrictPtrTraits>();
+  auto min_point_dist_a =
+      min_point_dist.packed_accessor64<float, 2, at::RestrictPtrTraits>();
+
+  // TempStorage for the reduction uses static shared memory only.
+  size_t shared_mem = 0;
+
+  // Support a case for all powers of 2 up to MAX_THREADS_PER_BLOCK possible per
+  // block.
+  switch (threads) {
+    case 1024:
+      FarthestPointSamplingKernel<1024>
+          <<<blocks, threads, shared_mem, stream>>>(
+              points_a, lengths_a, K_a, idxs_a, min_point_dist_a, start_idxs_a);
+      break;
+    case 512:
+      FarthestPointSamplingKernel<512><<<blocks, threads, shared_mem, stream>>>(
+          points_a, lengths_a, K_a, idxs_a, min_point_dist_a, start_idxs_a);
+      break;
+    case 256:
+      FarthestPointSamplingKernel<256><<<blocks, threads, shared_mem, stream>>>(
+          points_a, lengths_a, K_a, idxs_a, min_point_dist_a, start_idxs_a);
+      break;
+    case 128:
+      FarthestPointSamplingKernel<128><<<blocks, threads, shared_mem, stream>>>(
+          points_a, lengths_a, K_a, idxs_a, min_point_dist_a, start_idxs_a);
+      break;
+    case 64:
+      FarthestPointSamplingKernel<64><<<blocks, threads, shared_mem, stream>>>(
+          points_a, lengths_a, K_a, idxs_a, min_point_dist_a, start_idxs_a);
+      break;
+    case 32:
+      FarthestPointSamplingKernel<32><<<blocks, threads, shared_mem, stream>>>(
+          points_a, lengths_a, K_a, idxs_a, min_point_dist_a, start_idxs_a);
+      break;
+    case 16:
+      FarthestPointSamplingKernel<16><<<blocks, threads, shared_mem, stream>>>(
+          points_a, lengths_a, K_a, idxs_a, min_point_dist_a, start_idxs_a);
+      break;
+    case 8:
+      FarthestPointSamplingKernel<8><<<blocks, threads, shared_mem, stream>>>(
+          points_a, lengths_a, K_a, idxs_a, min_point_dist_a, start_idxs_a);
+      break;
+    case 4:
+      FarthestPointSamplingKernel<4><<<threads, threads, shared_mem, stream>>>(
+          points_a, lengths_a, K_a, idxs_a, min_point_dist_a, start_idxs_a);
+      break;
+    case 2:
+      FarthestPointSamplingKernel<2><<<threads, threads, shared_mem, stream>>>(
+          points_a, lengths_a, K_a, idxs_a, min_point_dist_a, start_idxs_a);
+      break;
+    default:
+      FarthestPointSamplingKernel<1024>
+          <<<blocks, threads, shared_mem, stream>>>(
+              points_a, lengths_a, K_a, idxs_a, min_point_dist_a, start_idxs_a);
+  }
+
+  AT_CUDA_CHECK(cudaGetLastError());
+  return idxs;
+}
diff --git a/pytorch3d/pytorch3d/csrc/sample_farthest_points/sample_farthest_points.h b/pytorch3d/pytorch3d/csrc/sample_farthest_points/sample_farthest_points.h
new file mode 100644
index 0000000000000000000000000000000000000000..7b613d358880936863c2a56b82dee77d93d777f9
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/sample_farthest_points/sample_farthest_points.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+#include <torch/extension.h>
+#include <tuple>
+#include "utils/pytorch3d_cutils.h"
+
+// Iterative farthest point sampling algorithm [1] to subsample a set of
+// K points from a given pointcloud. At each iteration, a point is selected
+// which has the largest nearest neighbor distance to any of the
+// already selected points.
+
+// Farthest point sampling provides more uniform coverage of the input
+// point cloud compared to uniform random sampling.
+
+// [1] Charles R. Qi et al, "PointNet++: Deep Hierarchical Feature Learning
+//     on Point Sets in a Metric Space", NeurIPS 2017.
+
+// Args:
+//     points: (N, P, D) float32 Tensor containing the batch of pointclouds.
+//     lengths: (N,) long Tensor giving the number of points in each pointcloud
+//        (to support heterogeneous batches of pointclouds).
+//     K: a tensor of length (N,) giving the number of
+//        samples to select for each element in the batch.
+//        The number of samples is typically << P.
+//     start_idxs: (N,) long Tensor giving the index of the first point to
+//        sample. Default is all 0. When a random start point is required,
+//        start_idxs should be set to a random value between [0, lengths[n]]
+//        for batch element n.
+// Returns:
+//     selected_indices: (N, K) array of selected indices. If the values in
+//        K are not all the same, then the shape will be (N, max(K), D), and
+//        padded with -1 for batch elements where k_i < max(K). The selected
+//        points are gathered in the pytorch autograd wrapper.
+
+at::Tensor FarthestPointSamplingCuda(
+    const at::Tensor& points,
+    const at::Tensor& lengths,
+    const at::Tensor& K,
+    const at::Tensor& start_idxs);
+
+at::Tensor FarthestPointSamplingCpu(
+    const at::Tensor& points,
+    const at::Tensor& lengths,
+    const at::Tensor& K,
+    const at::Tensor& start_idxs);
+
+// Exposed implementation.
+at::Tensor FarthestPointSampling(
+    const at::Tensor& points,
+    const at::Tensor& lengths,
+    const at::Tensor& K,
+    const at::Tensor& start_idxs) {
+  if (points.is_cuda() || lengths.is_cuda() || K.is_cuda()) {
+#ifdef WITH_CUDA
+    CHECK_CUDA(points);
+    CHECK_CUDA(lengths);
+    CHECK_CUDA(K);
+    CHECK_CUDA(start_idxs);
+    return FarthestPointSamplingCuda(points, lengths, K, start_idxs);
+#else
+    AT_ERROR("Not compiled with GPU support.");
+#endif
+  }
+  return FarthestPointSamplingCpu(points, lengths, K, start_idxs);
+}
diff --git a/pytorch3d/pytorch3d/csrc/sample_farthest_points/sample_farthest_points_cpu.cpp b/pytorch3d/pytorch3d/csrc/sample_farthest_points/sample_farthest_points_cpu.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..cd533825f4da75e232bc493c28a0872e477d6db7
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/sample_farthest_points/sample_farthest_points_cpu.cpp
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <torch/extension.h>
+#include <iterator>
+#include <random>
+#include <vector>
+
+at::Tensor FarthestPointSamplingCpu(
+    const at::Tensor& points,
+    const at::Tensor& lengths,
+    const at::Tensor& K,
+    const at::Tensor& start_idxs) {
+  // Get constants
+  const int64_t N = points.size(0);
+  const int64_t P = points.size(1);
+  const int64_t D = points.size(2);
+  const int64_t max_K = torch::max(K).item<int64_t>();
+
+  // Initialize an output array for the sampled indices
+  // of shape (N, max_K)
+  auto long_opts = lengths.options();
+  torch::Tensor sampled_indices = torch::full({N, max_K}, -1, long_opts);
+
+  // Create accessors for all tensors
+  auto points_a = points.accessor<float, 3>();
+  auto lengths_a = lengths.accessor<int64_t, 1>();
+  auto k_a = K.accessor<int64_t, 1>();
+  auto sampled_indices_a = sampled_indices.accessor<int64_t, 2>();
+  auto start_idxs_a = start_idxs.accessor<int64_t, 1>();
+
+  // Initialize a mask to prevent duplicates
+  // If true, the point has already been selected.
+  std::vector<unsigned char> selected_points_mask(P, false);
+
+  // Initialize to infinity a vector of
+  // distances from each point to any of the previously selected points
+  std::vector<float> dists(P, std::numeric_limits<float>::max());
+
+  for (int64_t n = 0; n < N; ++n) {
+    // Resize and reset points mask and distances for each batch
+    selected_points_mask.resize(lengths_a[n]);
+    dists.resize(lengths_a[n]);
+    std::fill(selected_points_mask.begin(), selected_points_mask.end(), false);
+    std::fill(dists.begin(), dists.end(), std::numeric_limits<float>::max());
+
+    // Get the starting point index and save it
+    int64_t last_idx = start_idxs_a[n];
+    sampled_indices_a[n][0] = last_idx;
+
+    // Set the value of the mask at this point to false
+    selected_points_mask[last_idx] = true;
+
+    // For heterogeneous pointclouds, use the minimum of the
+    // length for that cloud compared to K as the number of
+    // points to sample
+    const int64_t batch_k = std::min(lengths_a[n], k_a[n]);
+
+    // Iteratively select batch_k points per batch
+    for (int64_t k = 1; k < batch_k; ++k) {
+      // Iterate through all the points
+      for (int64_t p = 0; p < lengths_a[n]; ++p) {
+        if (selected_points_mask[p]) {
+          // For already selected points set the distance to 0.0
+          dists[p] = 0.0;
+          continue;
+        }
+
+        // Calculate the distance to the last selected point
+        float dist2 = 0.0;
+        for (int64_t d = 0; d < D; ++d) {
+          float diff = points_a[n][last_idx][d] - points_a[n][p][d];
+          dist2 += diff * diff;
+        }
+
+        // If the distance of this point to the last selected point is closer
+        // than the distance to any of the previously selected points, then
+        // update this distance
+        if (dist2 < dists[p]) {
+          dists[p] = dist2;
+        }
+      }
+
+      // The aim is to pick the point that has the largest
+      // nearest neighbour distance to any of the already selected points
+      auto itr = std::max_element(dists.begin(), dists.end());
+      last_idx = std::distance(dists.begin(), itr);
+
+      // Save selected point
+      sampled_indices_a[n][k] = last_idx;
+
+      // Set the mask value to true to prevent duplicates.
+      selected_points_mask[last_idx] = true;
+    }
+  }
+
+  return sampled_indices;
+}
diff --git a/pytorch3d/pytorch3d/csrc/sample_pdf/sample_pdf.cu b/pytorch3d/pytorch3d/csrc/sample_pdf/sample_pdf.cu
new file mode 100644
index 0000000000000000000000000000000000000000..885313ac3c8aaba173c4691a1189ea43ddd39f1e
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/sample_pdf/sample_pdf.cu
@@ -0,0 +1,153 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <ATen/ATen.h>
+#include <ATen/cuda/CUDAContext.h>
+#include <c10/cuda/CUDAGuard.h>
+
+// There is no intermediate memory, so no reason not to have blocksize=32.
+// 256 is a reasonable number of blocks.
+
+// DESIGN
+// We exploit the fact that n_samples is not tiny.
+// A chunk of work is T*blocksize many samples from
+// a single batch elememt.
+// For each batch element there will be
+// chunks_per_batch = 1 + (n_samples-1)/(T*blocksize) of them.
+// The number of potential chunks to do is
+// n_chunks = chunks_per_batch * n_batches.
+// These chunks are divided among the gridSize-many blocks.
+// In block b, we work on chunks b, b+gridSize, b+2*gridSize etc .
+// In chunk i, we work on batch_element i/chunks_per_batch
+// on samples starting from (i%chunks_per_batch) * (T*blocksize)
+
+// BEGIN HYPOTHETICAL
+// Another option (not implemented) if batch_size was always large
+// would be as follows.
+
+// A chunk of work is S samples from each of blocksize-many
+// batch elements.
+// For each batch element there will be
+// chunks_per_batch = (1+(n_samples-1)/S) of them.
+// The number of potential chunks to do is
+// n_chunks = chunks_per_batch * (1+(n_batches-1)/blocksize)
+// These chunks are divided among the gridSize-many blocks.
+// In block b, we work on chunks b, b+gridSize, b+2*gridSize etc .
+// In chunk i, we work on samples starting from S*(i%chunks_per_batch)
+// on batch elements starting from blocksize*(i/chunks_per_batch).
+// END HYPOTHETICAL
+
+__global__ void SamplePdfCudaKernel(
+    const float* __restrict__ bins,
+    const float* __restrict__ weights,
+    float* __restrict__ outputs,
+    float eps,
+    const int T,
+    const int64_t batch_size,
+    const int64_t n_bins,
+    const int64_t n_samples) {
+  const int64_t chunks_per_batch = 1 + (n_samples - 1) / (T * blockDim.x);
+  const int64_t n_chunks = chunks_per_batch * batch_size;
+
+  for (int64_t i_chunk = blockIdx.x; i_chunk < n_chunks; i_chunk += gridDim.x) {
+    // Loop over the chunks.
+    int64_t i_batch_element = i_chunk / chunks_per_batch;
+    int64_t sample_start = (i_chunk % chunks_per_batch) * (T * blockDim.x);
+    const float* const weight_startp = weights + n_bins * i_batch_element;
+    const float* const bin_startp = bins + (1 + n_bins) * i_batch_element;
+
+    // Each chunk looks at a single batch element, so we do the preprocessing
+    // which depends on the batch element, namely finding the total weight.
+    // Idenntical work is being done in sync here by every thread of the block.
+    float total_weight = eps;
+    for (int64_t i_bin = 0; i_bin < n_bins; ++i_bin) {
+      total_weight += weight_startp[i_bin];
+    }
+
+    float* const output_startp =
+        outputs + n_samples * i_batch_element + sample_start;
+
+    for (int t = 0; t < T; ++t) {
+      // Loop over T, which is the number of samples each thread makes within
+      // the chunk.
+      const int64_t i_sample_within_chunk = threadIdx.x + t * blockDim.x;
+      if (sample_start + i_sample_within_chunk >= n_samples) {
+        // Some threads need to exit early because the sample they would
+        // make is unwanted.
+        continue;
+      }
+      // output_startp[i_sample_within_chunk] contains the quantile we (i.e.
+      // this thread) are calcvulating.
+      float uniform = total_weight * output_startp[i_sample_within_chunk];
+      int64_t i_bin = 0;
+      // We find the bin containing the quantile by walking along the weights.
+      // This loop must be thread dependent. I.e. the whole warp will wait until
+      // every thread has found the bin for its quantile.
+      // It may be best to write it differently.
+      while (i_bin + 1 < n_bins && uniform > weight_startp[i_bin]) {
+        uniform -= weight_startp[i_bin];
+        ++i_bin;
+      }
+
+      // Now we know which bin to look in, we use linear interpolation
+      // to find the location of the quantile within the bin, and
+      // write the answer back.
+      float bin_start = bin_startp[i_bin];
+      float bin_end = bin_startp[i_bin + 1];
+      float bin_weight = weight_startp[i_bin];
+      float output_value = bin_start;
+      if (uniform > bin_weight) {
+        output_value = bin_end;
+      } else if (bin_weight > eps) {
+        output_value += (uniform / bin_weight) * (bin_end - bin_start);
+      }
+      output_startp[i_sample_within_chunk] = output_value;
+    }
+  }
+}
+
+void SamplePdfCuda(
+    const at::Tensor& bins,
+    const at::Tensor& weights,
+    const at::Tensor& outputs,
+    float eps) {
+  // Check inputs are on the same device
+  at::TensorArg bins_t{bins, "bins", 1}, weights_t{weights, "weights", 2},
+      outputs_t{outputs, "outputs", 3};
+  at::CheckedFrom c = "SamplePdfCuda";
+  at::checkAllSameGPU(c, {bins_t, weights_t, outputs_t});
+  at::checkAllSameType(c, {bins_t, weights_t, outputs_t});
+
+  // Set the device for the kernel launch based on the device of the input
+  at::cuda::CUDAGuard device_guard(bins.device());
+  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+
+  const int64_t batch_size = bins.size(0);
+  const int64_t n_bins = weights.size(1);
+  const int64_t n_samples = outputs.size(1);
+
+  const int64_t threads = 32;
+  const int64_t T = n_samples <= threads ? 1 : 2;
+  const int64_t chunks_per_batch = 1 + (n_samples - 1) / (T * threads);
+  const int64_t n_chunks = chunks_per_batch * batch_size;
+
+  const int64_t max_blocks = 1024;
+  const int64_t blocks = n_chunks < max_blocks ? n_chunks : max_blocks;
+
+  SamplePdfCudaKernel<<<blocks, threads, 0, stream>>>(
+      bins.contiguous().data_ptr<float>(),
+      weights.contiguous().data_ptr<float>(),
+      outputs.data_ptr<float>(), // Checked contiguous in header file.
+      eps,
+      T,
+      batch_size,
+      n_bins,
+      n_samples);
+
+  AT_CUDA_CHECK(cudaGetLastError());
+}
diff --git a/pytorch3d/pytorch3d/csrc/sample_pdf/sample_pdf.h b/pytorch3d/pytorch3d/csrc/sample_pdf/sample_pdf.h
new file mode 100644
index 0000000000000000000000000000000000000000..899117df797cf03c3c207e6205d1607dd3707f3d
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/sample_pdf/sample_pdf.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+#include <torch/csrc/autograd/VariableTypeUtils.h>
+#include <torch/extension.h>
+#include <cstdio>
+#include <tuple>
+#include "utils/pytorch3d_cutils.h"
+
+// ****************************************************************************
+// *                          SamplePdf                                       *
+// ****************************************************************************
+
+//  Samples a probability density functions defined by bin edges `bins` and
+//  the non-negative per-bin probabilities `weights`.
+
+//  Args:
+//      bins: FloatTensor of shape `(batch_size, n_bins+1)` denoting the edges
+//      of the sampling bins.
+
+//      weights: FloatTensor of shape `(batch_size, n_bins)` containing
+//      non-negative numbers representing the probability of sampling the
+//      corresponding bin.
+
+//      uniforms: The quantiles to draw, FloatTensor of shape
+//      `(batch_size, n_samples)`.
+
+//      outputs: On call, this contains the quantiles to draw. It is overwritten
+//              with the drawn samples. FloatTensor of shape
+//              `(batch_size, n_samples), where `n_samples are drawn from each
+//               distribution.
+
+//      eps: A constant preventing division by zero in case empty bins are
+//      present.
+
+//  Not differentiable
+
+#ifdef WITH_CUDA
+void SamplePdfCuda(
+    const torch::Tensor& bins,
+    const torch::Tensor& weights,
+    const torch::Tensor& outputs,
+    float eps);
+#endif
+
+void SamplePdfCpu(
+    const torch::Tensor& bins,
+    const torch::Tensor& weights,
+    const torch::Tensor& outputs,
+    float eps);
+
+inline void SamplePdf(
+    const torch::Tensor& bins,
+    const torch::Tensor& weights,
+    const torch::Tensor& outputs,
+    float eps) {
+  if (bins.is_cuda()) {
+#ifdef WITH_CUDA
+    CHECK_CUDA(weights);
+    CHECK_CONTIGUOUS_CUDA(outputs);
+    torch::autograd::increment_version(outputs);
+    SamplePdfCuda(bins, weights, outputs, eps);
+    return;
+#else
+    AT_ERROR("Not compiled with GPU support.");
+#endif
+  }
+  CHECK_CONTIGUOUS(outputs);
+  SamplePdfCpu(bins, weights, outputs, eps);
+}
diff --git a/pytorch3d/pytorch3d/csrc/sample_pdf/sample_pdf_cpu.cpp b/pytorch3d/pytorch3d/csrc/sample_pdf/sample_pdf_cpu.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..272197c6e9d5f8ee19153004310fb0fd8e10b94b
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/sample_pdf/sample_pdf_cpu.cpp
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <torch/csrc/autograd/VariableTypeUtils.h>
+#include <torch/extension.h>
+#include <algorithm>
+#include <thread>
+#include <vector>
+
+// If the number of bins is the typical 64, it is
+// quicker to use binary search than linear scan.
+// With more bins, it is more important.
+// There is no equivalent CUDA implementation yet.
+#define USE_BINARY_SEARCH
+
+namespace {
+// This worker function does the job of SamplePdf but only on
+// batch elements in [start_batch, end_batch).
+void SamplePdfCpu_worker(
+    const torch::Tensor& bins,
+    const torch::Tensor& weights,
+    const torch::Tensor& outputs,
+    float eps,
+    int64_t start_batch,
+    int64_t end_batch) {
+  const int64_t n_bins = weights.size(1);
+  const int64_t n_samples = outputs.size(1);
+
+  auto bins_a = bins.accessor<float, 2>();
+  auto weights_a = weights.accessor<float, 2>();
+  float* output_p = outputs.data_ptr<float>() + start_batch * n_samples;
+
+#ifdef USE_BINARY_SEARCH
+  std::vector<float> partial_sums(n_bins);
+#endif
+
+  for (int64_t i_batch_elt = start_batch; i_batch_elt < end_batch;
+       ++i_batch_elt) {
+    auto bin_a = bins_a[i_batch_elt];
+    auto weight_a = weights_a[i_batch_elt];
+
+    // Here we do the work which has to be done once per batch element.
+    // i.e. (1) finding the total weight. (2) If using binary search,
+    // precompute the partial sums of the weights.
+
+    float total_weight = 0;
+    for (int64_t i_bin = 0; i_bin < n_bins; ++i_bin) {
+      total_weight += weight_a[i_bin];
+#ifdef USE_BINARY_SEARCH
+      partial_sums[i_bin] = total_weight;
+#endif
+    }
+    total_weight += eps;
+
+    for (int64_t i_sample = 0; i_sample < n_samples; ++i_sample) {
+      // Here we are taking a single random quantile (which is stored
+      // in *output_p) and using it to make a single sample, which we
+      // write back to the same location. First we find which bin
+      // the quantile lives in, either by binary search in the
+      // precomputed partial sums, or by scanning through the weights.
+
+      float uniform = total_weight * *output_p;
+#ifdef USE_BINARY_SEARCH
+      int64_t i_bin = std::lower_bound(
+                          partial_sums.begin(), --partial_sums.end(), uniform) -
+          partial_sums.begin();
+      if (i_bin > 0) {
+        uniform -= partial_sums[i_bin - 1];
+      }
+#else
+      int64_t i_bin = 0;
+      while (i_bin + 1 < n_bins && uniform > weight_a[i_bin]) {
+        uniform -= weight_a[i_bin];
+        ++i_bin;
+      }
+#endif
+
+      // Now i_bin identifies the bin the quantile lives in, we use
+      // straight line interpolation to find the position of the
+      // quantile within the bin, and write it to *output_p.
+
+      float bin_start = bin_a[i_bin];
+      float bin_end = bin_a[i_bin + 1];
+      float bin_weight = weight_a[i_bin];
+      float output_value = bin_start;
+      if (uniform > bin_weight) {
+        output_value = bin_end;
+      } else if (bin_weight > eps) {
+        output_value += (uniform / bin_weight) * (bin_end - bin_start);
+      }
+      *output_p = output_value;
+      ++output_p;
+    }
+  }
+}
+
+} // anonymous namespace
+
+void SamplePdfCpu(
+    const torch::Tensor& bins,
+    const torch::Tensor& weights,
+    const torch::Tensor& outputs,
+    float eps) {
+  const int64_t batch_size = bins.size(0);
+  const int64_t max_threads = std::min(4, at::get_num_threads());
+  const int64_t n_threads = std::min(max_threads, batch_size);
+  if (batch_size == 0) {
+    return;
+  }
+
+  // SamplePdfCpu_worker does the work of this function. We send separate ranges
+  // of batch elements to that function in nThreads-1 separate threads.
+
+  std::vector<std::thread> threads;
+  threads.reserve(n_threads - 1);
+  const int64_t batch_elements_per_thread = 1 + (batch_size - 1) / n_threads;
+  int64_t start_batch = 0;
+  for (int iThread = 0; iThread < n_threads - 1; ++iThread) {
+    threads.emplace_back(
+        SamplePdfCpu_worker,
+        bins,
+        weights,
+        outputs,
+        eps,
+        start_batch,
+        start_batch + batch_elements_per_thread);
+    start_batch += batch_elements_per_thread;
+  }
+
+  // The remaining batch elements are calculated in this threads. If nThreads is
+  // 1 then all the work happens in this line.
+  SamplePdfCpu_worker(bins, weights, outputs, eps, start_batch, batch_size);
+  for (auto&& thread : threads) {
+    thread.join();
+  }
+  torch::autograd::increment_version(outputs);
+}
diff --git a/pytorch3d/pytorch3d/csrc/utils/dispatch.cuh b/pytorch3d/pytorch3d/csrc/utils/dispatch.cuh
new file mode 100644
index 0000000000000000000000000000000000000000..83f3d69ff40907c396e3d175402d5cf4561142b5
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/utils/dispatch.cuh
@@ -0,0 +1,357 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+// This file provides utilities for dispatching to specialized versions of
+// functions. This is especially useful for CUDA kernels, since specializing
+// them to particular input sizes can often allow the compiler to unroll loops
+// and place arrays into registers, which can give huge performance speedups.
+//
+// As an example, suppose we have the following function which is specialized
+// based on a compile-time int64_t value:
+//
+// template<typename T, int64_t x>
+// struct SquareOffset {
+//   static void run(T y) {
+//     T val = x * x + y;
+//     std::cout << val << std::endl;
+//   }
+// }
+//
+// This function takes one compile-time argument x, and one run-time argument y.
+// We might want to compile specialized versions of this for x=0, x=1, etc and
+// then dispatch to the correct one based on the runtime value of x.
+// One simple way to achieve this is with a lookup table:
+//
+// template<typename T>
+// void DispatchSquareOffset(const int64_t x, T y) {
+//   if (x == 0) {
+//     SquareOffset<T, 0>::run(y);
+//   } else if (x == 1) {
+//     SquareOffset<T, 1>::run(y);
+//   } else if (x == 2) {
+//     SquareOffset<T, 2>::run(y);
+//   }
+// }
+//
+// This function takes both x and y as run-time arguments, and dispatches to
+// different specialized versions of SquareOffset based on the run-time value
+// of x. This works, but it's tedious and error-prone. If we want to change the
+// set of x values for which we provide compile-time specializations, then we
+// will need to do a lot of tedius editing of the dispatch function. Also, if we
+// want to provide compile-time specializations for another function other than
+// SquareOffset, we will need to duplicate the entire lookup table.
+//
+// To solve these problems, we can use the DispatchKernel1D function provided by
+// this file instead:
+//
+// template<typename T>
+// void DispatchSquareOffset(const int64_t x, T y) {
+//     constexpr int64_t xmin = 0;
+//     constexpr int64_t xmax = 2;
+//     DispatchKernel1D<SquareOffset, T, xmin, xmax>(x, y);
+// }
+//
+// DispatchKernel1D uses template metaprogramming to compile specialized
+// versions of SquareOffset for all values of x with xmin <= x <= xmax, and
+// then dispatches to the correct one based on the run-time value of x. If we
+// want to change the range of x values for which SquareOffset is specialized
+// at compile-time, then all we have to do is change the values of the
+// compile-time constants xmin and xmax.
+//
+// This file also allows us to similarly dispatch functions that depend on two
+// compile-time int64_t values, using the DispatchKernel2D function like this:
+//
+// template<typename T, int64_t x, int64_t y>
+// struct Sum {
+//   static void run(T z, T w) {
+//     T val = x + y + z + w;
+//     std::cout << val << std::endl;
+//   }
+// }
+//
+// template<typename T>
+// void DispatchSum(const int64_t x, const int64_t y, int z, int w) {
+//   constexpr int64_t xmin = 1;
+//   constexpr int64_t xmax = 3;
+//   constexpr int64_t ymin = 2;
+//   constexpr int64_t ymax = 5;
+//   DispatchKernel2D<Sum, T, xmin, xmax, ymin, ymax>(x, y, z, w);
+// }
+//
+// Like its 1D counterpart, DispatchKernel2D uses template metaprogramming to
+// compile specialized versions of sum for all values of (x, y) with
+// xmin <= x <= xmax and ymin <= y <= ymax, then dispatches to the correct
+// specialized version based on the runtime values of x and y.
+
+// Define some helper structs in an anonymous namespace.
+namespace {
+
+// 1D dispatch: general case.
+// Kernel is the function we want to dispatch to; it should take a typename and
+// an int64_t as template args, and it should define a static void function
+// run which takes any number of arguments of any type.
+// In order to dispatch, we will take an additional template argument curN,
+// and increment it via template recursion until it is equal to the run-time
+// argument N.
+template <
+    template <typename, int64_t>
+    class Kernel,
+    typename T,
+    int64_t minN,
+    int64_t maxN,
+    int64_t curN,
+    typename... Args>
+struct DispatchKernelHelper1D {
+  static void run(const int64_t N, Args... args) {
+    if (curN == N) {
+      // The compile-time value curN is equal to the run-time value N, so we
+      // can dispatch to the run method of the Kernel.
+      Kernel<T, curN>::run(args...);
+    } else if (curN < N) {
+      // Increment curN via template recursion
+      DispatchKernelHelper1D<Kernel, T, minN, maxN, curN + 1, Args...>::run(
+          N, args...);
+    }
+    // We shouldn't get here -- throw an error?
+  }
+};
+
+// 1D dispatch: Specialization when curN == maxN
+// We need this base case to avoid infinite template recursion.
+template <
+    template <typename, int64_t>
+    class Kernel,
+    typename T,
+    int64_t minN,
+    int64_t maxN,
+    typename... Args>
+struct DispatchKernelHelper1D<Kernel, T, minN, maxN, maxN, Args...> {
+  static void run(const int64_t N, Args... args) {
+    if (N == maxN) {
+      Kernel<T, maxN>::run(args...);
+    }
+    // We shouldn't get here -- throw an error?
+  }
+};
+
+// 2D dispatch, general case.
+// This is similar to the 1D case: we take additional template args curN and
+// curM, and increment them via template recursion until they are equal to
+// the run-time values of N and M, at which point we dispatch to the run
+// method of the kernel.
+template <
+    template <typename, int64_t, int64_t>
+    class Kernel,
+    typename T,
+    int64_t minN,
+    int64_t maxN,
+    int64_t curN,
+    int64_t minM,
+    int64_t maxM,
+    int64_t curM,
+    typename... Args>
+struct DispatchKernelHelper2D {
+  static void run(const int64_t N, const int64_t M, Args... args) {
+    if (curN == N && curM == M) {
+      Kernel<T, curN, curM>::run(args...);
+    } else if (curN < N && curM < M) {
+      // Increment both curN and curM. This isn't strictly necessary; we could
+      // just increment one or the other at each step. But this helps to cut
+      // on the number of recursive calls we make.
+      DispatchKernelHelper2D<
+          Kernel,
+          T,
+          minN,
+          maxN,
+          curN + 1,
+          minM,
+          maxM,
+          curM + 1,
+          Args...>::run(N, M, args...);
+    } else if (curN < N) {
+      // Increment curN only
+      DispatchKernelHelper2D<
+          Kernel,
+          T,
+          minN,
+          maxN,
+          curN + 1,
+          minM,
+          maxM,
+          curM,
+          Args...>::run(N, M, args...);
+    } else if (curM < M) {
+      // Increment curM only
+      DispatchKernelHelper2D<
+          Kernel,
+          T,
+          minN,
+          maxN,
+          curN,
+          minM,
+          maxM,
+          curM + 1,
+          Args...>::run(N, M, args...);
+    }
+  }
+};
+
+// 2D dispatch, specialization for curN == maxN
+template <
+    template <typename, int64_t, int64_t>
+    class Kernel,
+    typename T,
+    int64_t minN,
+    int64_t maxN,
+    int64_t minM,
+    int64_t maxM,
+    int64_t curM,
+    typename... Args>
+struct DispatchKernelHelper2D<
+    Kernel,
+    T,
+    minN,
+    maxN,
+    maxN,
+    minM,
+    maxM,
+    curM,
+    Args...> {
+  static void run(const int64_t N, const int64_t M, Args... args) {
+    if (maxN == N && curM == M) {
+      Kernel<T, maxN, curM>::run(args...);
+    } else if (curM < maxM) {
+      DispatchKernelHelper2D<
+          Kernel,
+          T,
+          minN,
+          maxN,
+          maxN,
+          minM,
+          maxM,
+          curM + 1,
+          Args...>::run(N, M, args...);
+    }
+    // We should not get here -- throw an error?
+  }
+};
+
+// 2D dispatch, specialization for curM == maxM
+template <
+    template <typename, int64_t, int64_t>
+    class Kernel,
+    typename T,
+    int64_t minN,
+    int64_t maxN,
+    int64_t curN,
+    int64_t minM,
+    int64_t maxM,
+    typename... Args>
+struct DispatchKernelHelper2D<
+    Kernel,
+    T,
+    minN,
+    maxN,
+    curN,
+    minM,
+    maxM,
+    maxM,
+    Args...> {
+  static void run(const int64_t N, const int64_t M, Args... args) {
+    if (curN == N && maxM == M) {
+      Kernel<T, curN, maxM>::run(args...);
+    } else if (curN < maxN) {
+      DispatchKernelHelper2D<
+          Kernel,
+          T,
+          minN,
+          maxN,
+          curN + 1,
+          minM,
+          maxM,
+          maxM,
+          Args...>::run(N, M, args...);
+    }
+    // We should not get here -- throw an error?
+  }
+};
+
+// 2D dispatch, specialization for curN == maxN, curM == maxM
+template <
+    template <typename, int64_t, int64_t>
+    class Kernel,
+    typename T,
+    int64_t minN,
+    int64_t maxN,
+    int64_t minM,
+    int64_t maxM,
+    typename... Args>
+struct DispatchKernelHelper2D<
+    Kernel,
+    T,
+    minN,
+    maxN,
+    maxN,
+    minM,
+    maxM,
+    maxM,
+    Args...> {
+  static void run(const int64_t N, const int64_t M, Args... args) {
+    if (maxN == N && maxM == M) {
+      Kernel<T, maxN, maxM>::run(args...);
+    }
+    // We should not get here -- throw an error?
+  }
+};
+
+} // namespace
+
+// This is the function we expect users to call to dispatch to 1D functions
+template <
+    template <typename, int64_t>
+    class Kernel,
+    typename T,
+    int64_t minN,
+    int64_t maxN,
+    typename... Args>
+void DispatchKernel1D(const int64_t N, Args... args) {
+  if (minN <= N && N <= maxN) {
+    // Kick off the template recursion by calling the Helper with curN = minN
+    DispatchKernelHelper1D<Kernel, T, minN, maxN, minN, Args...>::run(
+        N, args...);
+  }
+  // Maybe throw an error if we tried to dispatch outside the allowed range?
+}
+
+// This is the function we expect users to call to dispatch to 2D functions
+template <
+    template <typename, int64_t, int64_t>
+    class Kernel,
+    typename T,
+    int64_t minN,
+    int64_t maxN,
+    int64_t minM,
+    int64_t maxM,
+    typename... Args>
+void DispatchKernel2D(const int64_t N, const int64_t M, Args... args) {
+  if (minN <= N && N <= maxN && minM <= M && M <= maxM) {
+    // Kick off the template recursion by calling the Helper with curN = minN
+    // and curM = minM
+    DispatchKernelHelper2D<
+        Kernel,
+        T,
+        minN,
+        maxN,
+        minN,
+        minM,
+        maxM,
+        minM,
+        Args...>::run(N, M, args...);
+  }
+  // Maybe throw an error if we tried to dispatch outside the specified range?
+}
diff --git a/pytorch3d/pytorch3d/csrc/utils/float_math.cuh b/pytorch3d/pytorch3d/csrc/utils/float_math.cuh
new file mode 100644
index 0000000000000000000000000000000000000000..e48e960e96544fd901655ce1d0217513d300187b
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/utils/float_math.cuh
@@ -0,0 +1,153 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+#include <thrust/tuple.h>
+
+// Set epsilon
+#ifdef _MSC_VER
+#define vEpsilon 1e-8f
+#else
+const auto vEpsilon = 1e-8;
+#endif
+
+// Common functions and operators for float2.
+
+__device__ inline float2 operator-(const float2& a, const float2& b) {
+  return make_float2(a.x - b.x, a.y - b.y);
+}
+
+__device__ inline float2 operator+(const float2& a, const float2& b) {
+  return make_float2(a.x + b.x, a.y + b.y);
+}
+
+__device__ inline float2 operator/(const float2& a, const float2& b) {
+  return make_float2(a.x / b.x, a.y / b.y);
+}
+
+__device__ inline float2 operator/(const float2& a, const float b) {
+  return make_float2(a.x / b, a.y / b);
+}
+
+__device__ inline float2 operator*(const float2& a, const float2& b) {
+  return make_float2(a.x * b.x, a.y * b.y);
+}
+
+__device__ inline float2 operator*(const float a, const float2& b) {
+  return make_float2(a * b.x, a * b.y);
+}
+
+__device__ inline float FloatMin3(const float a, const float b, const float c) {
+  return fminf(a, fminf(b, c));
+}
+
+__device__ inline float FloatMax3(const float a, const float b, const float c) {
+  return fmaxf(a, fmaxf(b, c));
+}
+
+__device__ inline float dot(const float2& a, const float2& b) {
+  return a.x * b.x + a.y * b.y;
+}
+
+// Backward pass for the dot product.
+// Args:
+//     a, b: Coordinates of two points.
+//     grad_dot: Upstream gradient for the output.
+//
+// Returns:
+//    tuple of gradients for each of the input points:
+//      (float2 grad_a, float2 grad_b)
+//
+__device__ inline thrust::tuple<float2, float2>
+DotBackward(const float2& a, const float2& b, const float& grad_dot) {
+  return thrust::make_tuple(grad_dot * b, grad_dot * a);
+}
+
+__device__ inline float sum(const float2& a) {
+  return a.x + a.y;
+}
+
+// Common functions and operators for float3.
+
+__device__ inline float3 operator-(const float3& a, const float3& b) {
+  return make_float3(a.x - b.x, a.y - b.y, a.z - b.z);
+}
+
+__device__ inline float3 operator+(const float3& a, const float3& b) {
+  return make_float3(a.x + b.x, a.y + b.y, a.z + b.z);
+}
+
+__device__ inline float3 operator/(const float3& a, const float3& b) {
+  return make_float3(a.x / b.x, a.y / b.y, a.z / b.z);
+}
+
+__device__ inline float3 operator/(const float3& a, const float b) {
+  return make_float3(a.x / b, a.y / b, a.z / b);
+}
+
+__device__ inline float3 operator*(const float3& a, const float3& b) {
+  return make_float3(a.x * b.x, a.y * b.y, a.z * b.z);
+}
+
+__device__ inline float3 operator*(const float a, const float3& b) {
+  return make_float3(a * b.x, a * b.y, a * b.z);
+}
+
+__device__ inline float dot(const float3& a, const float3& b) {
+  return a.x * b.x + a.y * b.y + a.z * b.z;
+}
+
+__device__ inline float sum(const float3& a) {
+  return a.x + a.y + a.z;
+}
+
+__device__ inline float3 cross(const float3& a, const float3& b) {
+  return make_float3(
+      a.y * b.z - a.z * b.y, a.z * b.x - a.x * b.z, a.x * b.y - a.y * b.x);
+}
+
+__device__ inline thrust::tuple<float3, float3>
+cross_backward(const float3& a, const float3& b, const float3& grad_cross) {
+  const float grad_ax = -grad_cross.y * b.z + grad_cross.z * b.y;
+  const float grad_ay = grad_cross.x * b.z - grad_cross.z * b.x;
+  const float grad_az = -grad_cross.x * b.y + grad_cross.y * b.x;
+  const float3 grad_a = make_float3(grad_ax, grad_ay, grad_az);
+
+  const float grad_bx = grad_cross.y * a.z - grad_cross.z * a.y;
+  const float grad_by = -grad_cross.x * a.z + grad_cross.z * a.x;
+  const float grad_bz = grad_cross.x * a.y - grad_cross.y * a.x;
+  const float3 grad_b = make_float3(grad_bx, grad_by, grad_bz);
+
+  return thrust::make_tuple(grad_a, grad_b);
+}
+
+__device__ inline float norm(const float3& a) {
+  return sqrt(dot(a, a));
+}
+
+__device__ inline float3 normalize(const float3& a) {
+  return a / (norm(a) + vEpsilon);
+}
+
+__device__ inline float3 normalize_backward(
+    const float3& a,
+    const float3& grad_normz) {
+  const float a_norm = norm(a) + vEpsilon;
+  const float3 out = a / a_norm;
+
+  const float grad_ax = grad_normz.x * (1.0f - out.x * out.x) / a_norm +
+      grad_normz.y * (-out.x * out.y) / a_norm +
+      grad_normz.z * (-out.x * out.z) / a_norm;
+  const float grad_ay = grad_normz.x * (-out.x * out.y) / a_norm +
+      grad_normz.y * (1.0f - out.y * out.y) / a_norm +
+      grad_normz.z * (-out.y * out.z) / a_norm;
+  const float grad_az = grad_normz.x * (-out.x * out.z) / a_norm +
+      grad_normz.y * (-out.y * out.z) / a_norm +
+      grad_normz.z * (1.0f - out.z * out.z) / a_norm;
+  return make_float3(grad_ax, grad_ay, grad_az);
+}
diff --git a/pytorch3d/pytorch3d/csrc/utils/geometry_utils.cuh b/pytorch3d/pytorch3d/csrc/utils/geometry_utils.cuh
new file mode 100644
index 0000000000000000000000000000000000000000..66aee7fc7bcd3495bc7dbba56d89995d383b655e
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/utils/geometry_utils.cuh
@@ -0,0 +1,792 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <float.h>
+#include <math.h>
+#include <cstdio>
+#include "float_math.cuh"
+
+// Set epsilon for preventing floating point errors and division by 0.
+#ifdef _MSC_VER
+#define kEpsilon 1e-8f
+#else
+const auto kEpsilon = 1e-8;
+#endif
+
+// ************************************************************* //
+//                          vec2 utils                           //
+// ************************************************************* //
+
+// Determines whether a point p is on the right side of a 2D line segment
+// given by the end points v0, v1.
+//
+// Args:
+//     p: vec2 Coordinates of a point.
+//     v0, v1: vec2 Coordinates of the end points of the edge.
+//
+// Returns:
+//     area: The signed area of the parallelogram given by the vectors
+//           A = p - v0
+//           B = v1 - v0
+//
+__device__ inline float
+EdgeFunctionForward(const float2& p, const float2& v0, const float2& v1) {
+  return (p.x - v0.x) * (v1.y - v0.y) - (p.y - v0.y) * (v1.x - v0.x);
+}
+
+// Backward pass for the edge function returning partial dervivatives for each
+// of the input points.
+//
+// Args:
+//     p: vec2 Coordinates of a point.
+//     v0, v1: vec2 Coordinates of the end points of the edge.
+//     grad_edge: Upstream gradient for output from edge function.
+//
+// Returns:
+//     tuple of gradients for each of the input points:
+//     (float2 d_edge_dp, float2 d_edge_dv0, float2 d_edge_dv1)
+//
+__device__ inline thrust::tuple<float2, float2, float2> EdgeFunctionBackward(
+    const float2& p,
+    const float2& v0,
+    const float2& v1,
+    const float& grad_edge) {
+  const float2 dedge_dp = make_float2(v1.y - v0.y, v0.x - v1.x);
+  const float2 dedge_dv0 = make_float2(p.y - v1.y, v1.x - p.x);
+  const float2 dedge_dv1 = make_float2(v0.y - p.y, p.x - v0.x);
+  return thrust::make_tuple(
+      grad_edge * dedge_dp, grad_edge * dedge_dv0, grad_edge * dedge_dv1);
+}
+
+// The forward pass for computing the barycentric coordinates of a point
+// relative to a triangle.
+//
+// Args:
+//     p: Coordinates of a point.
+//     v0, v1, v2: Coordinates of the triangle vertices.
+//
+// Returns
+//     bary: (w0, w1, w2) barycentric coordinates in the range [0, 1].
+//
+__device__ inline float3 BarycentricCoordsForward(
+    const float2& p,
+    const float2& v0,
+    const float2& v1,
+    const float2& v2) {
+  const float area = EdgeFunctionForward(v2, v0, v1) + kEpsilon;
+  const float w0 = EdgeFunctionForward(p, v1, v2) / area;
+  const float w1 = EdgeFunctionForward(p, v2, v0) / area;
+  const float w2 = EdgeFunctionForward(p, v0, v1) / area;
+  return make_float3(w0, w1, w2);
+}
+
+// The backward pass for computing the barycentric coordinates of a point
+// relative to a triangle.
+//
+// Args:
+//     p: Coordinates of a point.
+//     v0, v1, v2: (x, y) coordinates of the triangle vertices.
+//     grad_bary_upstream: vec3<T> Upstream gradient for each of the
+//                         barycentric coordaintes [grad_w0, grad_w1, grad_w2].
+//
+// Returns
+//    tuple of gradients for each of the triangle vertices:
+//    (float2 grad_v0, float2 grad_v1, float2 grad_v2)
+//
+__device__ inline thrust::tuple<float2, float2, float2, float2>
+BarycentricCoordsBackward(
+    const float2& p,
+    const float2& v0,
+    const float2& v1,
+    const float2& v2,
+    const float3& grad_bary_upstream) {
+  const float area = EdgeFunctionForward(v2, v0, v1) + kEpsilon;
+  const float area2 = pow(area, 2.0f);
+  const float e0 = EdgeFunctionForward(p, v1, v2);
+  const float e1 = EdgeFunctionForward(p, v2, v0);
+  const float e2 = EdgeFunctionForward(p, v0, v1);
+
+  const float grad_w0 = grad_bary_upstream.x;
+  const float grad_w1 = grad_bary_upstream.y;
+  const float grad_w2 = grad_bary_upstream.z;
+
+  // Calculate component of the gradient from each of w0, w1 and w2.
+  // e.g. for w0:
+  // dloss/dw0_v = dl/dw0 * dw0/dw0_top * dw0_top/dv
+  //               + dl/dw0 * dw0/dw0_bot * dw0_bot/dv
+  const float dw0_darea = -e0 / (area2);
+  const float dw0_e0 = 1 / area;
+  const float dloss_d_w0area = grad_w0 * dw0_darea;
+  const float dloss_e0 = grad_w0 * dw0_e0;
+  auto de0_dv = EdgeFunctionBackward(p, v1, v2, dloss_e0);
+  auto dw0area_dv = EdgeFunctionBackward(v2, v0, v1, dloss_d_w0area);
+  const float2 dw0_p = thrust::get<0>(de0_dv);
+  const float2 dw0_dv0 = thrust::get<1>(dw0area_dv);
+  const float2 dw0_dv1 = thrust::get<1>(de0_dv) + thrust::get<2>(dw0area_dv);
+  const float2 dw0_dv2 = thrust::get<2>(de0_dv) + thrust::get<0>(dw0area_dv);
+
+  const float dw1_darea = -e1 / (area2);
+  const float dw1_e1 = 1 / area;
+  const float dloss_d_w1area = grad_w1 * dw1_darea;
+  const float dloss_e1 = grad_w1 * dw1_e1;
+  auto de1_dv = EdgeFunctionBackward(p, v2, v0, dloss_e1);
+  auto dw1area_dv = EdgeFunctionBackward(v2, v0, v1, dloss_d_w1area);
+  const float2 dw1_p = thrust::get<0>(de1_dv);
+  const float2 dw1_dv0 = thrust::get<2>(de1_dv) + thrust::get<1>(dw1area_dv);
+  const float2 dw1_dv1 = thrust::get<2>(dw1area_dv);
+  const float2 dw1_dv2 = thrust::get<1>(de1_dv) + thrust::get<0>(dw1area_dv);
+
+  const float dw2_darea = -e2 / (area2);
+  const float dw2_e2 = 1 / area;
+  const float dloss_d_w2area = grad_w2 * dw2_darea;
+  const float dloss_e2 = grad_w2 * dw2_e2;
+  auto de2_dv = EdgeFunctionBackward(p, v0, v1, dloss_e2);
+  auto dw2area_dv = EdgeFunctionBackward(v2, v0, v1, dloss_d_w2area);
+  const float2 dw2_p = thrust::get<0>(de2_dv);
+  const float2 dw2_dv0 = thrust::get<1>(de2_dv) + thrust::get<1>(dw2area_dv);
+  const float2 dw2_dv1 = thrust::get<2>(de2_dv) + thrust::get<2>(dw2area_dv);
+  const float2 dw2_dv2 = thrust::get<0>(dw2area_dv);
+
+  const float2 dbary_p = dw0_p + dw1_p + dw2_p;
+  const float2 dbary_dv0 = dw0_dv0 + dw1_dv0 + dw2_dv0;
+  const float2 dbary_dv1 = dw0_dv1 + dw1_dv1 + dw2_dv1;
+  const float2 dbary_dv2 = dw0_dv2 + dw1_dv2 + dw2_dv2;
+
+  return thrust::make_tuple(dbary_p, dbary_dv0, dbary_dv1, dbary_dv2);
+}
+
+// Forward pass for applying perspective correction to barycentric coordinates.
+//
+// Args:
+//     bary: Screen-space barycentric coordinates for a point
+//     z0, z1, z2: Camera-space z-coordinates of the triangle vertices
+//
+// Returns
+//     World-space barycentric coordinates
+//
+__device__ inline float3 BarycentricPerspectiveCorrectionForward(
+    const float3& bary,
+    const float z0,
+    const float z1,
+    const float z2) {
+  const float w0_top = bary.x * z1 * z2;
+  const float w1_top = z0 * bary.y * z2;
+  const float w2_top = z0 * z1 * bary.z;
+  const float denom = fmaxf(w0_top + w1_top + w2_top, kEpsilon);
+  const float w0 = w0_top / denom;
+  const float w1 = w1_top / denom;
+  const float w2 = w2_top / denom;
+  return make_float3(w0, w1, w2);
+}
+
+// Backward pass for applying perspective correction to barycentric coordinates.
+//
+// Args:
+//     bary: Screen-space barycentric coordinates for a point
+//     z0, z1, z2: Camera-space z-coordinates of the triangle vertices
+//     grad_out: Upstream gradient of the loss with respect to the corrected
+//               barycentric coordinates.
+//
+// Returns a tuple of:
+//      grad_bary: Downstream gradient of the loss with respect to the the
+//                 uncorrected barycentric coordinates.
+//      grad_z0, grad_z1, grad_z2: Downstream gradient of the loss with respect
+//                                 to the z-coordinates of the triangle verts
+__device__ inline thrust::tuple<float3, float, float, float>
+BarycentricPerspectiveCorrectionBackward(
+    const float3& bary,
+    const float z0,
+    const float z1,
+    const float z2,
+    const float3& grad_out) {
+  // Recompute forward pass
+  const float w0_top = bary.x * z1 * z2;
+  const float w1_top = z0 * bary.y * z2;
+  const float w2_top = z0 * z1 * bary.z;
+  const float denom = fmaxf(w0_top + w1_top + w2_top, kEpsilon);
+
+  // Now do backward pass
+  const float grad_denom_top =
+      -w0_top * grad_out.x - w1_top * grad_out.y - w2_top * grad_out.z;
+  const float grad_denom = grad_denom_top / (denom * denom);
+  const float grad_w0_top = grad_denom + grad_out.x / denom;
+  const float grad_w1_top = grad_denom + grad_out.y / denom;
+  const float grad_w2_top = grad_denom + grad_out.z / denom;
+  const float grad_bary_x = grad_w0_top * z1 * z2;
+  const float grad_bary_y = grad_w1_top * z0 * z2;
+  const float grad_bary_z = grad_w2_top * z0 * z1;
+  const float3 grad_bary = make_float3(grad_bary_x, grad_bary_y, grad_bary_z);
+  const float grad_z0 = grad_w1_top * bary.y * z2 + grad_w2_top * bary.z * z1;
+  const float grad_z1 = grad_w0_top * bary.x * z2 + grad_w2_top * bary.z * z0;
+  const float grad_z2 = grad_w0_top * bary.x * z1 + grad_w1_top * bary.y * z0;
+  return thrust::make_tuple(grad_bary, grad_z0, grad_z1, grad_z2);
+}
+
+// Clip negative barycentric coordinates to 0.0 and renormalize so
+// the barycentric coordinates for a point sum to 1. When the blur_radius
+// is greater than 0, a face will still be recorded as overlapping a pixel
+// if the pixel is outside the face. In this case at least one of the
+// barycentric coordinates for the pixel relative to the face will be negative.
+// Clipping will ensure that the texture and z buffer are interpolated
+// correctly.
+//
+//  Args
+//     bary: (w0, w1, w2) barycentric coordinates which can be outside the
+//            range [0, 1].
+//
+//  Returns
+//     bary: (w0, w1, w2) barycentric coordinates in the range [0, 1] which
+//           satisfy the condition: sum(w0, w1, w2) = 1.0.
+//
+__device__ inline float3 BarycentricClipForward(const float3 bary) {
+  float3 w = make_float3(0.0f, 0.0f, 0.0f);
+  // Clamp lower bound only
+  w.x = max(bary.x, 0.0);
+  w.y = max(bary.y, 0.0);
+  w.z = max(bary.z, 0.0);
+  float w_sum = w.x + w.y + w.z;
+  w_sum = fmaxf(w_sum, 1e-5);
+  w.x /= w_sum;
+  w.y /= w_sum;
+  w.z /= w_sum;
+
+  return w;
+}
+
+// Backward pass for barycentric coordinate clipping.
+//
+//  Args
+//     bary: (w0, w1, w2) barycentric coordinates which can be outside the
+//            range [0, 1].
+//     grad_baryclip_upstream: vec3<T> Upstream gradient for each of the clipped
+//                         barycentric coordinates [grad_w0, grad_w1, grad_w2].
+//
+// Returns
+//    vec3<T> of gradients for the unclipped barycentric coordinates:
+//    (grad_w0, grad_w1, grad_w2)
+//
+__device__ inline float3 BarycentricClipBackward(
+    const float3 bary,
+    const float3 grad_baryclip_upstream) {
+  // Redo some of the forward pass calculations
+  float3 w = make_float3(0.0f, 0.0f, 0.0f);
+  // Clamp lower bound only
+  w.x = max(bary.x, 0.0);
+  w.y = max(bary.y, 0.0);
+  w.z = max(bary.z, 0.0);
+  float w_sum = w.x + w.y + w.z;
+
+  float3 grad_bary = make_float3(1.0f, 1.0f, 1.0f);
+  float3 grad_clip = make_float3(1.0f, 1.0f, 1.0f);
+  float3 grad_sum = make_float3(1.0f, 1.0f, 1.0f);
+
+  // Check if sum was clipped.
+  float grad_sum_clip = 1.0f;
+  if (w_sum < 1e-5) {
+    grad_sum_clip = 0.0f;
+    w_sum = 1e-5;
+  }
+
+  // Check if any of bary values have been clipped.
+  if (bary.x < 0.0f) {
+    grad_clip.x = 0.0f;
+  }
+  if (bary.y < 0.0f) {
+    grad_clip.y = 0.0f;
+  }
+  if (bary.z < 0.0f) {
+    grad_clip.z = 0.0f;
+  }
+
+  // Gradients of the sum.
+  grad_sum.x = -w.x / (pow(w_sum, 2.0f)) * grad_sum_clip;
+  grad_sum.y = -w.y / (pow(w_sum, 2.0f)) * grad_sum_clip;
+  grad_sum.z = -w.z / (pow(w_sum, 2.0f)) * grad_sum_clip;
+
+  // Gradients for each of the bary coordinates including the cross terms
+  // from the sum.
+  grad_bary.x = grad_clip.x *
+      (grad_baryclip_upstream.x * (1.0f / w_sum + grad_sum.x) +
+       grad_baryclip_upstream.y * (grad_sum.y) +
+       grad_baryclip_upstream.z * (grad_sum.z));
+
+  grad_bary.y = grad_clip.y *
+      (grad_baryclip_upstream.y * (1.0f / w_sum + grad_sum.y) +
+       grad_baryclip_upstream.x * (grad_sum.x) +
+       grad_baryclip_upstream.z * (grad_sum.z));
+
+  grad_bary.z = grad_clip.z *
+      (grad_baryclip_upstream.z * (1.0f / w_sum + grad_sum.z) +
+       grad_baryclip_upstream.x * (grad_sum.x) +
+       grad_baryclip_upstream.y * (grad_sum.y));
+
+  return grad_bary;
+}
+
+// Return minimum distance between line segment (v1 - v0) and point p.
+//
+// Args:
+//     p: Coordinates of a point.
+//     v0, v1: Coordinates of the end points of the line segment.
+//
+// Returns:
+//     squared distance to the boundary of the triangle.
+//
+__device__ inline float
+PointLineDistanceForward(const float2& p, const float2& a, const float2& b) {
+  const float2 ba = b - a;
+  float l2 = dot(ba, ba);
+  float t = dot(ba, p - a) / l2;
+  if (l2 <= kEpsilon) {
+    return dot(p - b, p - b);
+  }
+  t = __saturatef(t); // clamp to the interval [+0.0, 1.0]
+  const float2 p_proj = a + t * ba;
+  const float2 d = (p_proj - p);
+  return dot(d, d); // squared distance
+}
+
+// Backward pass for point to line distance in 2D.
+//
+// Args:
+//     p: Coordinates of a point.
+//     v0, v1: Coordinates of the end points of the line segment.
+//     grad_dist: Upstream gradient for the distance.
+//
+// Returns:
+//    tuple of gradients for each of the input points:
+//      (float2 grad_p, float2 grad_v0, float2 grad_v1)
+//
+__device__ inline thrust::tuple<float2, float2, float2>
+PointLineDistanceBackward(
+    const float2& p,
+    const float2& v0,
+    const float2& v1,
+    const float& grad_dist) {
+  // Redo some of the forward pass calculations.
+  const float2 v1v0 = v1 - v0;
+  const float2 pv0 = p - v0;
+  const float t_bot = dot(v1v0, v1v0);
+  const float t_top = dot(v1v0, pv0);
+  float tt = t_top / t_bot;
+  tt = __saturatef(tt);
+  const float2 p_proj = (1.0f - tt) * v0 + tt * v1;
+  const float2 d = p - p_proj;
+  const float dist = sqrt(dot(d, d));
+
+  const float2 grad_p = -1.0f * grad_dist * 2.0f * (p_proj - p);
+  const float2 grad_v0 = grad_dist * (1.0f - tt) * 2.0f * (p_proj - p);
+  const float2 grad_v1 = grad_dist * tt * 2.0f * (p_proj - p);
+
+  return thrust::make_tuple(grad_p, grad_v0, grad_v1);
+}
+
+// The forward pass for calculating the shortest distance between a point
+// and a triangle.
+//
+// Args:
+//     p: Coordinates of a point.
+//     v0, v1, v2: Coordinates of the three triangle vertices.
+//
+// Returns:
+//     shortest squared distance from a point to a triangle.
+//
+__device__ inline float PointTriangleDistanceForward(
+    const float2& p,
+    const float2& v0,
+    const float2& v1,
+    const float2& v2) {
+  // Compute distance to all 3 edges of the triangle and return the min.
+  const float e01_dist = PointLineDistanceForward(p, v0, v1);
+  const float e02_dist = PointLineDistanceForward(p, v0, v2);
+  const float e12_dist = PointLineDistanceForward(p, v1, v2);
+  const float edge_dist = fminf(fminf(e01_dist, e02_dist), e12_dist);
+  return edge_dist;
+}
+
+// Backward pass for point triangle distance.
+//
+// Args:
+//     p: Coordinates of a point.
+//     v0, v1, v2: Coordinates of the three triangle vertices.
+//     grad_dist: Upstream gradient for the distance.
+//
+// Returns:
+//    tuple of gradients for each of the triangle vertices:
+//      (float2 grad_v0, float2 grad_v1, float2 grad_v2)
+//
+__device__ inline thrust::tuple<float2, float2, float2, float2>
+PointTriangleDistanceBackward(
+    const float2& p,
+    const float2& v0,
+    const float2& v1,
+    const float2& v2,
+    const float& grad_dist) {
+  // Compute distance to all 3 edges of the triangle.
+  const float e01_dist = PointLineDistanceForward(p, v0, v1);
+  const float e02_dist = PointLineDistanceForward(p, v0, v2);
+  const float e12_dist = PointLineDistanceForward(p, v1, v2);
+
+  // Initialize output tensors.
+  float2 grad_v0 = make_float2(0.0f, 0.0f);
+  float2 grad_v1 = make_float2(0.0f, 0.0f);
+  float2 grad_v2 = make_float2(0.0f, 0.0f);
+  float2 grad_p = make_float2(0.0f, 0.0f);
+
+  // Find which edge is the closest and return PointLineDistanceBackward for
+  // that edge.
+  if (e01_dist <= e02_dist && e01_dist <= e12_dist) {
+    // Closest edge is v1 - v0.
+    auto grad_e01 = PointLineDistanceBackward(p, v0, v1, grad_dist);
+    grad_p = thrust::get<0>(grad_e01);
+    grad_v0 = thrust::get<1>(grad_e01);
+    grad_v1 = thrust::get<2>(grad_e01);
+  } else if (e02_dist <= e01_dist && e02_dist <= e12_dist) {
+    // Closest edge is v2 - v0.
+    auto grad_e02 = PointLineDistanceBackward(p, v0, v2, grad_dist);
+    grad_p = thrust::get<0>(grad_e02);
+    grad_v0 = thrust::get<1>(grad_e02);
+    grad_v2 = thrust::get<2>(grad_e02);
+  } else if (e12_dist <= e01_dist && e12_dist <= e02_dist) {
+    // Closest edge is v2 - v1.
+    auto grad_e12 = PointLineDistanceBackward(p, v1, v2, grad_dist);
+    grad_p = thrust::get<0>(grad_e12);
+    grad_v1 = thrust::get<1>(grad_e12);
+    grad_v2 = thrust::get<2>(grad_e12);
+  }
+
+  return thrust::make_tuple(grad_p, grad_v0, grad_v1, grad_v2);
+}
+
+// ************************************************************* //
+//                          vec3 utils                           //
+// ************************************************************* //
+
+// Computes the area of a triangle (v0, v1, v2).
+//
+// Args:
+//     v0, v1, v2: vec3 coordinates of the triangle vertices
+//
+// Returns
+//     area: float: The area of the triangle
+//
+__device__ inline float
+AreaOfTriangle(const float3& v0, const float3& v1, const float3& v2) {
+  float3 p0 = v1 - v0;
+  float3 p1 = v2 - v0;
+
+  // compute the hypotenus of the scross product (p0 x p1)
+  float dd = hypot(
+      p0.y * p1.z - p0.z * p1.y,
+      hypot(p0.z * p1.x - p0.x * p1.z, p0.x * p1.y - p0.y * p1.x));
+
+  return dd / 2.0;
+}
+
+// Computes the barycentric coordinates of a point p relative
+// to a triangle (v0, v1, v2), i.e. p = w0 * v0 + w1 * v1 + w2 * v2
+// s.t. w0 + w1 + w2 = 1.0
+//
+// NOTE that this function assumes that p lives on the space spanned
+// by (v0, v1, v2).
+// TODO(gkioxari) explicitly check whether p is coplanar with (v0, v1, v2)
+// and throw an error if check fails
+//
+// Args:
+//     p: vec3 coordinates of a point
+//     v0, v1, v2: vec3 coordinates of the triangle vertices
+//
+// Returns
+//     bary: (w0, w1, w2) barycentric coordinates
+//
+__device__ inline float3 BarycentricCoords3Forward(
+    const float3& p,
+    const float3& v0,
+    const float3& v1,
+    const float3& v2) {
+  float3 p0 = v1 - v0;
+  float3 p1 = v2 - v0;
+  float3 p2 = p - v0;
+
+  const float d00 = dot(p0, p0);
+  const float d01 = dot(p0, p1);
+  const float d11 = dot(p1, p1);
+  const float d20 = dot(p2, p0);
+  const float d21 = dot(p2, p1);
+
+  const float denom = d00 * d11 - d01 * d01 + kEpsilon;
+  const float w1 = (d11 * d20 - d01 * d21) / denom;
+  const float w2 = (d00 * d21 - d01 * d20) / denom;
+  const float w0 = 1.0f - w1 - w2;
+
+  return make_float3(w0, w1, w2);
+}
+
+// Checks whether the point p is inside the triangle (v0, v1, v2).
+// A point is inside the triangle, if all barycentric coordinates
+// wrt the triangle are >= 0 & <= 1.
+// If the triangle is degenerate, aka line or point, then return False.
+//
+// NOTE that this function assumes that p lives on the space spanned
+// by (v0, v1, v2).
+// TODO(gkioxari) explicitly check whether p is coplanar with (v0, v1, v2)
+// and throw an error if check fails
+//
+// Args:
+//     p: vec3 coordinates of a point
+//     v0, v1, v2: vec3 coordinates of the triangle vertices
+//     min_triangle_area: triangles less than this size are considered
+//     points/lines, IsInsideTriangle returns False
+//
+// Returns:
+//     inside: bool indicating wether p is inside triangle
+//
+__device__ inline bool IsInsideTriangle(
+    const float3& p,
+    const float3& v0,
+    const float3& v1,
+    const float3& v2,
+    const double min_triangle_area) {
+  bool inside;
+  if (AreaOfTriangle(v0, v1, v2) < min_triangle_area) {
+    inside = 0;
+  } else {
+    float3 bary = BarycentricCoords3Forward(p, v0, v1, v2);
+    bool x_in = 0.0f <= bary.x && bary.x <= 1.0f;
+    bool y_in = 0.0f <= bary.y && bary.y <= 1.0f;
+    bool z_in = 0.0f <= bary.z && bary.z <= 1.0f;
+    inside = x_in && y_in && z_in;
+  }
+  return inside;
+}
+
+// Computes the minimum squared Euclidean distance between the point p
+// and the segment spanned by (v0, v1).
+// To find this we parametrize p as: x(t) = v0 + t * (v1 - v0)
+// and find t which minimizes (x(t) - p) ^ 2.
+// Note that p does not need to live in the space spanned by (v0, v1)
+//
+// Args:
+//     p: vec3 coordinates of a point
+//     v0, v1: vec3 coordinates of start and end of segment
+//
+// Returns:
+//     dist: the minimum squared distance of p from segment (v0, v1)
+//
+
+__device__ inline float
+PointLine3DistanceForward(const float3& p, const float3& v0, const float3& v1) {
+  const float3 v1v0 = v1 - v0;
+  const float3 pv0 = p - v0;
+  const float t_bot = dot(v1v0, v1v0);
+  const float t_top = dot(pv0, v1v0);
+  // if t_bot small, then v0 == v1, set tt to 0.
+  float tt = (t_bot < kEpsilon) ? 0.0f : (t_top / t_bot);
+
+  tt = __saturatef(tt); // clamps to [0, 1]
+
+  const float3 p_proj = v0 + tt * v1v0;
+  const float3 diff = p - p_proj;
+  const float dist = dot(diff, diff);
+  return dist;
+}
+
+// Backward function of the minimum squared Euclidean distance between the point
+// p and the line segment (v0, v1).
+//
+// Args:
+//     p: vec3 coordinates of a point
+//     v0, v1: vec3 coordinates of start and end of segment
+//     grad_dist: Float of the gradient wrt dist
+//
+// Returns:
+//    tuple of gradients for the point and line segment (v0, v1):
+//      (float3 grad_p, float3 grad_v0, float3 grad_v1)
+
+__device__ inline thrust::tuple<float3, float3, float3>
+PointLine3DistanceBackward(
+    const float3& p,
+    const float3& v0,
+    const float3& v1,
+    const float& grad_dist) {
+  const float3 v1v0 = v1 - v0;
+  const float3 pv0 = p - v0;
+  const float t_bot = dot(v1v0, v1v0);
+  const float t_top = dot(v1v0, pv0);
+
+  float3 grad_p = make_float3(0.0f, 0.0f, 0.0f);
+  float3 grad_v0 = make_float3(0.0f, 0.0f, 0.0f);
+  float3 grad_v1 = make_float3(0.0f, 0.0f, 0.0f);
+
+  const float tt = t_top / t_bot;
+
+  if (t_bot < kEpsilon) {
+    // if t_bot small, then v0 == v1,
+    // and dist = 0.5 * dot(pv0, pv0) + 0.5 * dot(pv1, pv1)
+    grad_p = grad_dist * 2.0f * pv0;
+    grad_v0 = -0.5f * grad_p;
+    grad_v1 = grad_v0;
+  } else if (tt < 0.0f) {
+    grad_p = grad_dist * 2.0f * pv0;
+    grad_v0 = -1.0f * grad_p;
+    // no gradients wrt v1
+  } else if (tt > 1.0f) {
+    grad_p = grad_dist * 2.0f * (p - v1);
+    grad_v1 = -1.0f * grad_p;
+    // no gradients wrt v0
+  } else {
+    const float3 p_proj = v0 + tt * v1v0;
+    const float3 diff = p - p_proj;
+    const float3 grad_base = grad_dist * 2.0f * diff;
+    grad_p = grad_base - dot(grad_base, v1v0) * v1v0 / t_bot;
+    const float3 dtt_v0 = (-1.0f * v1v0 - pv0 + 2.0f * tt * v1v0) / t_bot;
+    grad_v0 = (-1.0f + tt) * grad_base - dot(grad_base, v1v0) * dtt_v0;
+    const float3 dtt_v1 = (pv0 - 2.0f * tt * v1v0) / t_bot;
+    grad_v1 = -dot(grad_base, v1v0) * dtt_v1 - tt * grad_base;
+  }
+
+  return thrust::make_tuple(grad_p, grad_v0, grad_v1);
+}
+
+// Computes the squared distance of a point p relative to a triangle (v0, v1,
+// v2). If the point's projection p0 on the plane spanned by (v0, v1, v2) is
+// inside the triangle with vertices (v0, v1, v2), then the returned value is
+// the squared distance of p to its projection p0. Otherwise, the returned value
+// is the smallest squared distance of p from the line segments (v0, v1), (v0,
+// v2) and (v1, v2).
+//
+// Args:
+//     p: vec3 coordinates of a point
+//     v0, v1, v2: vec3 coordinates of the triangle vertices
+//     min_triangle_area: triangles less than this size are considered
+//     points/lines, IsInsideTriangle returns False
+//
+// Returns:
+//     dist: Float of the squared distance
+//
+
+__device__ inline float PointTriangle3DistanceForward(
+    const float3& p,
+    const float3& v0,
+    const float3& v1,
+    const float3& v2,
+    const double min_triangle_area) {
+  float3 normal = cross(v2 - v0, v1 - v0);
+  const float norm_normal = norm(normal);
+  normal = normalize(normal);
+
+  // p0 is the projection of p on the plane spanned by (v0, v1, v2)
+  // i.e. p0 = p + t * normal, s.t. (p0 - v0) is orthogonal to normal
+  const float t = dot(v0 - p, normal);
+  const float3 p0 = p + t * normal;
+
+  bool is_inside = IsInsideTriangle(p0, v0, v1, v2, min_triangle_area);
+  float dist = 0.0f;
+
+  if ((is_inside) && (norm_normal > kEpsilon)) {
+    // if projection p0 is inside triangle spanned by (v0, v1, v2)
+    // then distance is equal to norm(p0 - p)^2
+    dist = t * t;
+  } else {
+    const float e01 = PointLine3DistanceForward(p, v0, v1);
+    const float e02 = PointLine3DistanceForward(p, v0, v2);
+    const float e12 = PointLine3DistanceForward(p, v1, v2);
+
+    dist = (e01 > e02) ? e02 : e01;
+    dist = (dist > e12) ? e12 : dist;
+  }
+
+  return dist;
+}
+
+// The backward pass for computing the squared distance of a point
+// to the triangle (v0, v1, v2).
+//
+// Args:
+//     p: xyz coordinates of a point
+//     v0, v1, v2: xyz coordinates of the triangle vertices
+//     grad_dist: Float of the gradient wrt dist
+//     min_triangle_area: triangles less than this size are considered
+//     points/lines, IsInsideTriangle returns False
+//
+// Returns:
+//     tuple of gradients for the point and triangle:
+//        (float3 grad_p, float3 grad_v0, float3 grad_v1, float3 grad_v2)
+//
+
+__device__ inline thrust::tuple<float3, float3, float3, float3>
+PointTriangle3DistanceBackward(
+    const float3& p,
+    const float3& v0,
+    const float3& v1,
+    const float3& v2,
+    const float& grad_dist,
+    const double min_triangle_area) {
+  const float3 v2v0 = v2 - v0;
+  const float3 v1v0 = v1 - v0;
+  const float3 v0p = v0 - p;
+  float3 raw_normal = cross(v2v0, v1v0);
+  const float norm_normal = norm(raw_normal);
+  float3 normal = normalize(raw_normal);
+
+  // p0 is the projection of p on the plane spanned by (v0, v1, v2)
+  // i.e. p0 = p + t * normal, s.t. (p0 - v0) is orthogonal to normal
+  const float t = dot(v0 - p, normal);
+  const float3 p0 = p + t * normal;
+  const float3 diff = t * normal;
+
+  bool is_inside = IsInsideTriangle(p0, v0, v1, v2, min_triangle_area);
+
+  float3 grad_p = make_float3(0.0f, 0.0f, 0.0f);
+  float3 grad_v0 = make_float3(0.0f, 0.0f, 0.0f);
+  float3 grad_v1 = make_float3(0.0f, 0.0f, 0.0f);
+  float3 grad_v2 = make_float3(0.0f, 0.0f, 0.0f);
+
+  if ((is_inside) && (norm_normal > kEpsilon)) {
+    // derivative of dist wrt p
+    grad_p = -2.0f * grad_dist * t * normal;
+    // derivative of dist wrt normal
+    const float3 grad_normal = 2.0f * grad_dist * t * (v0p + diff);
+    // derivative of dist wrt raw_normal
+    const float3 grad_raw_normal = normalize_backward(raw_normal, grad_normal);
+    // derivative of dist wrt v2v0 and v1v0
+    const auto grad_cross = cross_backward(v2v0, v1v0, grad_raw_normal);
+    const float3 grad_cross_v2v0 = thrust::get<0>(grad_cross);
+    const float3 grad_cross_v1v0 = thrust::get<1>(grad_cross);
+    grad_v0 =
+        grad_dist * 2.0f * t * normal - (grad_cross_v2v0 + grad_cross_v1v0);
+    grad_v1 = grad_cross_v1v0;
+    grad_v2 = grad_cross_v2v0;
+  } else {
+    const float e01 = PointLine3DistanceForward(p, v0, v1);
+    const float e02 = PointLine3DistanceForward(p, v0, v2);
+    const float e12 = PointLine3DistanceForward(p, v1, v2);
+
+    if ((e01 <= e02) && (e01 <= e12)) {
+      // e01 is smallest
+      const auto grads = PointLine3DistanceBackward(p, v0, v1, grad_dist);
+      grad_p = thrust::get<0>(grads);
+      grad_v0 = thrust::get<1>(grads);
+      grad_v1 = thrust::get<2>(grads);
+    } else if ((e02 <= e01) && (e02 <= e12)) {
+      // e02 is smallest
+      const auto grads = PointLine3DistanceBackward(p, v0, v2, grad_dist);
+      grad_p = thrust::get<0>(grads);
+      grad_v0 = thrust::get<1>(grads);
+      grad_v2 = thrust::get<2>(grads);
+    } else if ((e12 <= e01) && (e12 <= e02)) {
+      // e12 is smallest
+      const auto grads = PointLine3DistanceBackward(p, v1, v2, grad_dist);
+      grad_p = thrust::get<0>(grads);
+      grad_v1 = thrust::get<1>(grads);
+      grad_v2 = thrust::get<2>(grads);
+    }
+  }
+
+  return thrust::make_tuple(grad_p, grad_v0, grad_v1, grad_v2);
+}
diff --git a/pytorch3d/pytorch3d/csrc/utils/geometry_utils.h b/pytorch3d/pytorch3d/csrc/utils/geometry_utils.h
new file mode 100644
index 0000000000000000000000000000000000000000..ad9f7ff3f34dde7b119ea708eb0901cb826794d7
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/utils/geometry_utils.h
@@ -0,0 +1,823 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <ATen/ATen.h>
+#include <algorithm>
+#include <tuple>
+#include <type_traits>
+#include "vec2.h"
+#include "vec3.h"
+
+// Set epsilon for preventing floating point errors and division by 0.
+const auto kEpsilon = 1e-8;
+
+// Determines whether a point p is on the right side of a 2D line segment
+// given by the end points v0, v1.
+//
+// Args:
+//     p: vec2 Coordinates of a point.
+//     v0, v1: vec2 Coordinates of the end points of the edge.
+//
+// Returns:
+//     area: The signed area of the parallelogram given by the vectors
+//           A = p - v0
+//           B = v1 - v0
+//
+//                 v1 ________
+//                   /\      /
+//               A  /  \    /
+//                 /    \  /
+//             v0 /______\/
+//                   B    p
+//
+//          The area can also be interpreted as the cross product A x B.
+//          If the sign of the area is positive, the point p is on the
+//          right side of the edge. Negative area indicates the point is on
+//          the left side of the edge. i.e. for an edge v1 - v0:
+//
+//                      v1
+//                     /
+//                    /
+//             -     /    +
+//                  /
+//                 /
+//               v0
+//
+template <typename T>
+T EdgeFunctionForward(const vec2<T>& p, const vec2<T>& v0, const vec2<T>& v1) {
+  const T edge = (p.x - v0.x) * (v1.y - v0.y) - (p.y - v0.y) * (v1.x - v0.x);
+  return edge;
+}
+
+// Backward pass for the edge function returning partial dervivatives for each
+// of the input points.
+//
+// Args:
+//     p: vec2 Coordinates of a point.
+//     v0, v1: vec2 Coordinates of the end points of the edge.
+//     grad_edge: Upstream gradient for output from edge function.
+//
+// Returns:
+//     tuple of gradients for each of the input points:
+//     (vec2<T> d_edge_dp, vec2<T> d_edge_dv0, vec2<T> d_edge_dv1)
+//
+template <typename T>
+inline std::tuple<vec2<T>, vec2<T>, vec2<T>> EdgeFunctionBackward(
+    const vec2<T>& p,
+    const vec2<T>& v0,
+    const vec2<T>& v1,
+    const T grad_edge) {
+  const vec2<T> dedge_dp(v1.y - v0.y, v0.x - v1.x);
+  const vec2<T> dedge_dv0(p.y - v1.y, v1.x - p.x);
+  const vec2<T> dedge_dv1(v0.y - p.y, p.x - v0.x);
+  return std::make_tuple(
+      grad_edge * dedge_dp, grad_edge * dedge_dv0, grad_edge * dedge_dv1);
+}
+
+// The forward pass for computing the barycentric coordinates of a point
+// relative to a triangle.
+// Ref:
+// https://www.scratchapixel.com/lessons/3d-basic-rendering/ray-tracing-rendering-a-triangle/barycentric-coordinates
+//
+// Args:
+//     p: Coordinates of a point.
+//     v0, v1, v2: Coordinates of the triangle vertices.
+//
+// Returns
+//     bary: (w0, w1, w2) barycentric coordinates in the range [0, 1].
+//
+template <typename T>
+vec3<T> BarycentricCoordinatesForward(
+    const vec2<T>& p,
+    const vec2<T>& v0,
+    const vec2<T>& v1,
+    const vec2<T>& v2) {
+  const T area = EdgeFunctionForward(v2, v0, v1) + kEpsilon;
+  const T w0 = EdgeFunctionForward(p, v1, v2) / area;
+  const T w1 = EdgeFunctionForward(p, v2, v0) / area;
+  const T w2 = EdgeFunctionForward(p, v0, v1) / area;
+  return vec3<T>(w0, w1, w2);
+}
+
+// The backward pass for computing the barycentric coordinates of a point
+// relative to a triangle.
+//
+// Args:
+//     p: Coordinates of a point.
+//     v0, v1, v2: (x, y) coordinates of the triangle vertices.
+//     grad_bary_upstream: vec3<T> Upstream gradient for each of the
+//                         barycentric coordaintes [grad_w0, grad_w1, grad_w2].
+//
+// Returns
+//    tuple of gradients for each of the triangle vertices:
+//    (vec2<T> grad_v0, vec2<T> grad_v1, vec2<T> grad_v2)
+//
+template <typename T>
+inline std::tuple<vec2<T>, vec2<T>, vec2<T>, vec2<T>> BarycentricCoordsBackward(
+    const vec2<T>& p,
+    const vec2<T>& v0,
+    const vec2<T>& v1,
+    const vec2<T>& v2,
+    const vec3<T>& grad_bary_upstream) {
+  const T area = EdgeFunctionForward(v2, v0, v1) + kEpsilon;
+  const T area2 = pow(area, 2.0f);
+  const T area_inv = 1.0f / area;
+  const T e0 = EdgeFunctionForward(p, v1, v2);
+  const T e1 = EdgeFunctionForward(p, v2, v0);
+  const T e2 = EdgeFunctionForward(p, v0, v1);
+
+  const T grad_w0 = grad_bary_upstream.x;
+  const T grad_w1 = grad_bary_upstream.y;
+  const T grad_w2 = grad_bary_upstream.z;
+
+  // Calculate component of the gradient from each of w0, w1 and w2.
+  // e.g. for w0:
+  // dloss/dw0_v = dl/dw0 * dw0/dw0_top * dw0_top/dv
+  //               + dl/dw0 * dw0/dw0_bot * dw0_bot/dv
+  const T dw0_darea = -e0 / (area2);
+  const T dw0_e0 = area_inv;
+  const T dloss_d_w0area = grad_w0 * dw0_darea;
+  const T dloss_e0 = grad_w0 * dw0_e0;
+  auto de0_dv = EdgeFunctionBackward(p, v1, v2, dloss_e0);
+  auto dw0area_dv = EdgeFunctionBackward(v2, v0, v1, dloss_d_w0area);
+  const vec2<T> dw0_p = std::get<0>(de0_dv);
+  const vec2<T> dw0_dv0 = std::get<1>(dw0area_dv);
+  const vec2<T> dw0_dv1 = std::get<1>(de0_dv) + std::get<2>(dw0area_dv);
+  const vec2<T> dw0_dv2 = std::get<2>(de0_dv) + std::get<0>(dw0area_dv);
+
+  const T dw1_darea = -e1 / (area2);
+  const T dw1_e1 = area_inv;
+  const T dloss_d_w1area = grad_w1 * dw1_darea;
+  const T dloss_e1 = grad_w1 * dw1_e1;
+  auto de1_dv = EdgeFunctionBackward(p, v2, v0, dloss_e1);
+  auto dw1area_dv = EdgeFunctionBackward(v2, v0, v1, dloss_d_w1area);
+  const vec2<T> dw1_p = std::get<0>(de1_dv);
+  const vec2<T> dw1_dv0 = std::get<2>(de1_dv) + std::get<1>(dw1area_dv);
+  const vec2<T> dw1_dv1 = std::get<2>(dw1area_dv);
+  const vec2<T> dw1_dv2 = std::get<1>(de1_dv) + std::get<0>(dw1area_dv);
+
+  const T dw2_darea = -e2 / (area2);
+  const T dw2_e2 = area_inv;
+  const T dloss_d_w2area = grad_w2 * dw2_darea;
+  const T dloss_e2 = grad_w2 * dw2_e2;
+  auto de2_dv = EdgeFunctionBackward(p, v0, v1, dloss_e2);
+  auto dw2area_dv = EdgeFunctionBackward(v2, v0, v1, dloss_d_w2area);
+  const vec2<T> dw2_p = std::get<0>(de2_dv);
+  const vec2<T> dw2_dv0 = std::get<1>(de2_dv) + std::get<1>(dw2area_dv);
+  const vec2<T> dw2_dv1 = std::get<2>(de2_dv) + std::get<2>(dw2area_dv);
+  const vec2<T> dw2_dv2 = std::get<0>(dw2area_dv);
+
+  const vec2<T> dbary_p = dw0_p + dw1_p + dw2_p;
+  const vec2<T> dbary_dv0 = dw0_dv0 + dw1_dv0 + dw2_dv0;
+  const vec2<T> dbary_dv1 = dw0_dv1 + dw1_dv1 + dw2_dv1;
+  const vec2<T> dbary_dv2 = dw0_dv2 + dw1_dv2 + dw2_dv2;
+
+  return std::make_tuple(dbary_p, dbary_dv0, dbary_dv1, dbary_dv2);
+}
+
+// Forward pass for applying perspective correction to barycentric coordinates.
+//
+// Args:
+//     bary: Screen-space barycentric coordinates for a point
+//     z0, z1, z2: Camera-space z-coordinates of the triangle vertices
+//
+// Returns
+//     World-space barycentric coordinates
+//
+template <typename T>
+inline vec3<T> BarycentricPerspectiveCorrectionForward(
+    const vec3<T>& bary,
+    const T z0,
+    const T z1,
+    const T z2) {
+  const T w0_top = bary.x * z1 * z2;
+  const T w1_top = bary.y * z0 * z2;
+  const T w2_top = bary.z * z0 * z1;
+  const T denom = std::max<T>(w0_top + w1_top + w2_top, kEpsilon);
+  const T w0 = w0_top / denom;
+  const T w1 = w1_top / denom;
+  const T w2 = w2_top / denom;
+  return vec3<T>(w0, w1, w2);
+}
+
+// Backward pass for applying perspective correction to barycentric coordinates.
+//
+// Args:
+//     bary: Screen-space barycentric coordinates for a point
+//     z0, z1, z2: Camera-space z-coordinates of the triangle vertices
+//     grad_out: Upstream gradient of the loss with respect to the corrected
+//               barycentric coordinates.
+//
+// Returns a tuple of:
+//      grad_bary: Downstream gradient of the loss with respect to the the
+//                 uncorrected barycentric coordinates.
+//      grad_z0, grad_z1, grad_z2: Downstream gradient of the loss with respect
+//                                 to the z-coordinates of the triangle verts
+template <typename T>
+inline std::tuple<vec3<T>, T, T, T> BarycentricPerspectiveCorrectionBackward(
+    const vec3<T>& bary,
+    const T z0,
+    const T z1,
+    const T z2,
+    const vec3<T>& grad_out) {
+  // Recompute forward pass
+  const T w0_top = bary.x * z1 * z2;
+  const T w1_top = bary.y * z0 * z2;
+  const T w2_top = bary.z * z0 * z1;
+  const T denom = std::max<T>(w0_top + w1_top + w2_top, kEpsilon);
+
+  // Now do backward pass
+  const T grad_denom_top =
+      -w0_top * grad_out.x - w1_top * grad_out.y - w2_top * grad_out.z;
+  const T grad_denom = grad_denom_top / (denom * denom);
+  const T grad_w0_top = grad_denom + grad_out.x / denom;
+  const T grad_w1_top = grad_denom + grad_out.y / denom;
+  const T grad_w2_top = grad_denom + grad_out.z / denom;
+  const T grad_bary_x = grad_w0_top * z1 * z2;
+  const T grad_bary_y = grad_w1_top * z0 * z2;
+  const T grad_bary_z = grad_w2_top * z0 * z1;
+  const vec3<T> grad_bary(grad_bary_x, grad_bary_y, grad_bary_z);
+  const T grad_z0 = grad_w1_top * bary.y * z2 + grad_w2_top * bary.z * z1;
+  const T grad_z1 = grad_w0_top * bary.x * z2 + grad_w2_top * bary.z * z0;
+  const T grad_z2 = grad_w0_top * bary.x * z1 + grad_w1_top * bary.y * z0;
+  return std::make_tuple(grad_bary, grad_z0, grad_z1, grad_z2);
+}
+
+// Clip negative barycentric coordinates to 0.0 and renormalize so
+// the barycentric coordinates for a point sum to 1. When the blur_radius
+// is greater than 0, a face will still be recorded as overlapping a pixel
+// if the pixel is outside the face. In this case at least one of the
+// barycentric coordinates for the pixel relative to the face will be negative.
+// Clipping will ensure that the texture and z buffer are interpolated
+// correctly.
+//
+//  Args
+//     bary: (w0, w1, w2) barycentric coordinates which can contain values < 0.
+//
+//  Returns
+//     bary: (w0, w1, w2) barycentric coordinates in the range [0, 1] which
+//           satisfy the condition: sum(w0, w1, w2) = 1.0.
+//
+template <typename T>
+vec3<T> BarycentricClipForward(const vec3<T> bary) {
+  vec3<T> w(0.0f, 0.0f, 0.0f);
+  // Only clamp negative values to 0.0.
+  // No need to clamp values > 1.0 as they will be renormalized.
+  w.x = std::max(bary.x, 0.0f);
+  w.y = std::max(bary.y, 0.0f);
+  w.z = std::max(bary.z, 0.0f);
+  float w_sum = w.x + w.y + w.z;
+  w_sum = std::fmaxf(w_sum, 1e-5);
+  w.x /= w_sum;
+  w.y /= w_sum;
+  w.z /= w_sum;
+  return w;
+}
+
+// Backward pass for barycentric coordinate clipping.
+//
+//  Args
+//     bary: (w0, w1, w2) barycentric coordinates which can contain values < 0.
+//     grad_baryclip_upstream: vec3<T> Upstream gradient for each of the clipped
+//                         barycentric coordinates [grad_w0, grad_w1, grad_w2].
+//
+// Returns
+//    vec3<T> of gradients for the unclipped barycentric coordinates:
+//    (grad_w0, grad_w1, grad_w2)
+//
+template <typename T>
+vec3<T> BarycentricClipBackward(
+    const vec3<T> bary,
+    const vec3<T> grad_baryclip_upstream) {
+  // Redo some of the forward pass calculations
+  vec3<T> w(0.0f, 0.0f, 0.0f);
+  w.x = std::max(bary.x, 0.0f);
+  w.y = std::max(bary.y, 0.0f);
+  w.z = std::max(bary.z, 0.0f);
+  float w_sum = w.x + w.y + w.z;
+
+  vec3<T> grad_bary(1.0f, 1.0f, 1.0f);
+  vec3<T> grad_clip(1.0f, 1.0f, 1.0f);
+  vec3<T> grad_sum(1.0f, 1.0f, 1.0f);
+
+  // Check if the sum was clipped.
+  float grad_sum_clip = 1.0f;
+  if (w_sum < 1e-5) {
+    grad_sum_clip = 0.0f;
+    w_sum = 1e-5;
+  }
+
+  // Check if any of the bary coordinates have been clipped.
+  // Only negative values are clamped to 0.0.
+  if (bary.x < 0.0f) {
+    grad_clip.x = 0.0f;
+  }
+  if (bary.y < 0.0f) {
+    grad_clip.y = 0.0f;
+  }
+  if (bary.z < 0.0f) {
+    grad_clip.z = 0.0f;
+  }
+
+  // Gradients of the sum.
+  grad_sum.x = -w.x / (pow(w_sum, 2.0f)) * grad_sum_clip;
+  grad_sum.y = -w.y / (pow(w_sum, 2.0f)) * grad_sum_clip;
+  grad_sum.z = -w.z / (pow(w_sum, 2.0f)) * grad_sum_clip;
+
+  // Gradients for each of the bary coordinates including the cross terms
+  // from the sum.
+  grad_bary.x = grad_clip.x *
+      (grad_baryclip_upstream.x * (1.0f / w_sum + grad_sum.x) +
+       grad_baryclip_upstream.y * (grad_sum.y) +
+       grad_baryclip_upstream.z * (grad_sum.z));
+
+  grad_bary.y = grad_clip.y *
+      (grad_baryclip_upstream.y * (1.0f / w_sum + grad_sum.y) +
+       grad_baryclip_upstream.x * (grad_sum.x) +
+       grad_baryclip_upstream.z * (grad_sum.z));
+
+  grad_bary.z = grad_clip.z *
+      (grad_baryclip_upstream.z * (1.0f / w_sum + grad_sum.z) +
+       grad_baryclip_upstream.x * (grad_sum.x) +
+       grad_baryclip_upstream.y * (grad_sum.y));
+
+  return grad_bary;
+}
+
+// Calculate minimum distance between a line segment (v1 - v0) and point p.
+//
+// Args:
+//     p: Coordinates of a point.
+//     v0, v1: Coordinates of the end points of the line segment.
+//
+// Returns:
+//     squared distance of the point to the line.
+//
+// Consider the line extending the segment - this can be parameterized as:
+// v0 + t (v1 - v0).
+//
+// First find the projection of point p onto the line. It falls where:
+// t = [(p - v0) . (v1 - v0)] / |v1 - v0|^2
+// where . is the dot product.
+//
+// The parameter t is clamped from [0, 1] to handle points outside the
+// segment (v1 - v0).
+//
+// Once the projection of the point on the segment is known, the distance from
+// p to the projection gives the minimum distance to the segment.
+//
+template <typename T>
+T PointLineDistanceForward(
+    const vec2<T>& p,
+    const vec2<T>& v0,
+    const vec2<T>& v1) {
+  const vec2<T> v1v0 = v1 - v0;
+  const T l2 = dot(v1v0, v1v0);
+  if (l2 <= kEpsilon) {
+    return dot(p - v1, p - v1);
+  }
+
+  const T t = dot(v1v0, p - v0) / l2;
+  const T tt = std::min(std::max(t, 0.00f), 1.00f);
+  const vec2<T> p_proj = v0 + tt * v1v0;
+  return dot(p - p_proj, p - p_proj);
+}
+
+template <typename T>
+T PointLine3DistanceForward(
+    const vec3<T>& p,
+    const vec3<T>& v0,
+    const vec3<T>& v1) {
+  const vec3<T> v1v0 = v1 - v0;
+  const T l2 = dot(v1v0, v1v0);
+  if (l2 <= kEpsilon) {
+    return dot(p - v1, p - v1);
+  }
+
+  const T t = dot(v1v0, p - v0) / l2;
+  const T tt = std::min(std::max(t, 0.00f), 1.00f);
+  const vec3<T> p_proj = v0 + tt * v1v0;
+  return dot(p - p_proj, p - p_proj);
+}
+
+// Backward pass for point to line distance in 2D.
+//
+// Args:
+//     p: Coordinates of a point.
+//     v0, v1: Coordinates of the end points of the line segment.
+//     grad_dist: Upstream gradient for the distance.
+//
+// Returns:
+//    tuple of gradients for each of the input points:
+//      (vec2<T> grad_p, vec2<T> grad_v0, vec2<T> grad_v1)
+//
+template <typename T>
+inline std::tuple<vec2<T>, vec2<T>, vec2<T>> PointLineDistanceBackward(
+    const vec2<T>& p,
+    const vec2<T>& v0,
+    const vec2<T>& v1,
+    const T& grad_dist) {
+  // Redo some of the forward pass calculations.
+  const vec2<T> v1v0 = v1 - v0;
+  const vec2<T> pv0 = p - v0;
+  const T t_bot = dot(v1v0, v1v0);
+  const T t_top = dot(v1v0, pv0);
+  const T t = t_top / t_bot;
+  const T tt = std::min(std::max(t, 0.00f), 1.00f);
+  const vec2<T> p_proj = (1.0f - tt) * v0 + tt * v1;
+
+  const vec2<T> grad_v0 = grad_dist * (1.0f - tt) * 2.0f * (p_proj - p);
+  const vec2<T> grad_v1 = grad_dist * tt * 2.0f * (p_proj - p);
+  const vec2<T> grad_p = -1.0f * grad_dist * 2.0f * (p_proj - p);
+
+  return std::make_tuple(grad_p, grad_v0, grad_v1);
+}
+
+template <typename T>
+std::tuple<vec3<T>, vec3<T>, vec3<T>> PointLine3DistanceBackward(
+    const vec3<T>& p,
+    const vec3<T>& v0,
+    const vec3<T>& v1,
+    const T& grad_dist) {
+  const vec3<T> v1v0 = v1 - v0;
+  const vec3<T> pv0 = p - v0;
+  const T t_bot = dot(v1v0, v1v0);
+  const T t_top = dot(v1v0, pv0);
+
+  vec3<T> grad_p{0.0f, 0.0f, 0.0f};
+  vec3<T> grad_v0{0.0f, 0.0f, 0.0f};
+  vec3<T> grad_v1{0.0f, 0.0f, 0.0f};
+
+  const T tt = t_top / t_bot;
+
+  if (t_bot < kEpsilon) {
+    // if t_bot small, then v0 == v1,
+    // and dist = 0.5 * dot(pv0, pv0) + 0.5 * dot(pv1, pv1)
+    grad_p = grad_dist * 2.0f * pv0;
+    grad_v0 = -0.5f * grad_p;
+    grad_v1 = grad_v0;
+  } else if (tt < 0.0f) {
+    grad_p = grad_dist * 2.0f * pv0;
+    grad_v0 = -1.0f * grad_p;
+    // no gradients wrt v1
+  } else if (tt > 1.0f) {
+    grad_p = grad_dist * 2.0f * (p - v1);
+    grad_v1 = -1.0f * grad_p;
+    // no gradients wrt v0
+  } else {
+    const vec3<T> p_proj = v0 + tt * v1v0;
+    const vec3<T> diff = p - p_proj;
+    const vec3<T> grad_base = grad_dist * 2.0f * diff;
+    grad_p = grad_base - dot(grad_base, v1v0) * v1v0 / t_bot;
+    const vec3<T> dtt_v0 = (-1.0f * v1v0 - pv0 + 2.0f * tt * v1v0) / t_bot;
+    grad_v0 = (-1.0f + tt) * grad_base - dot(grad_base, v1v0) * dtt_v0;
+    const vec3<T> dtt_v1 = (pv0 - 2.0f * tt * v1v0) / t_bot;
+    grad_v1 = -dot(grad_base, v1v0) * dtt_v1 - tt * grad_base;
+  }
+
+  return std::make_tuple(grad_p, grad_v0, grad_v1);
+}
+
+// The forward pass for calculating the shortest distance between a point
+// and a triangle.
+// Ref: https://www.randygaul.net/2014/07/23/distance-point-to-line-segment/
+//
+// Args:
+//     p: Coordinates of a point.
+//     v0, v1, v2: Coordinates of the three triangle vertices.
+//
+// Returns:
+//     shortest squared distance from a point to a triangle.
+//
+//
+template <typename T>
+T PointTriangleDistanceForward(
+    const vec2<T>& p,
+    const vec2<T>& v0,
+    const vec2<T>& v1,
+    const vec2<T>& v2) {
+  // Compute distance of point to 3 edges of the triangle and return the
+  // minimum value.
+  const T e01_dist = PointLineDistanceForward(p, v0, v1);
+  const T e02_dist = PointLineDistanceForward(p, v0, v2);
+  const T e12_dist = PointLineDistanceForward(p, v1, v2);
+  const T edge_dist = std::min(std::min(e01_dist, e02_dist), e12_dist);
+
+  return edge_dist;
+}
+
+// Backward pass for point triangle distance.
+//
+// Args:
+//     p: Coordinates of a point.
+//     v0, v1, v2: Coordinates of the three triangle vertices.
+//     grad_dist: Upstream gradient for the distance.
+//
+// Returns:
+//    tuple of gradients for each of the triangle vertices:
+//      (vec2<T> grad_v0, vec2<T> grad_v1, vec2<T> grad_v2)
+//
+template <typename T>
+inline std::tuple<vec2<T>, vec2<T>, vec2<T>, vec2<T>>
+PointTriangleDistanceBackward(
+    const vec2<T>& p,
+    const vec2<T>& v0,
+    const vec2<T>& v1,
+    const vec2<T>& v2,
+    const T& grad_dist) {
+  // Compute distance to all 3 edges of the triangle.
+  const T e01_dist = PointLineDistanceForward(p, v0, v1);
+  const T e02_dist = PointLineDistanceForward(p, v0, v2);
+  const T e12_dist = PointLineDistanceForward(p, v1, v2);
+
+  // Initialize output tensors.
+  vec2<T> grad_v0(0.0f, 0.0f);
+  vec2<T> grad_v1(0.0f, 0.0f);
+  vec2<T> grad_v2(0.0f, 0.0f);
+  vec2<T> grad_p(0.0f, 0.0f);
+
+  // Find which edge is the closest and return PointLineDistanceBackward for
+  // that edge.
+  if (e01_dist <= e02_dist && e01_dist <= e12_dist) {
+    // Closest edge is v1 - v0.
+    auto grad_e01 = PointLineDistanceBackward(p, v0, v1, grad_dist);
+    grad_p = std::get<0>(grad_e01);
+    grad_v0 = std::get<1>(grad_e01);
+    grad_v1 = std::get<2>(grad_e01);
+  } else if (e02_dist <= e01_dist && e02_dist <= e12_dist) {
+    // Closest edge is v2 - v0.
+    auto grad_e02 = PointLineDistanceBackward(p, v0, v2, grad_dist);
+    grad_p = std::get<0>(grad_e02);
+    grad_v0 = std::get<1>(grad_e02);
+    grad_v2 = std::get<2>(grad_e02);
+  } else if (e12_dist <= e01_dist && e12_dist <= e02_dist) {
+    // Closest edge is v2 - v1.
+    auto grad_e12 = PointLineDistanceBackward(p, v1, v2, grad_dist);
+    grad_p = std::get<0>(grad_e12);
+    grad_v1 = std::get<1>(grad_e12);
+    grad_v2 = std::get<2>(grad_e12);
+  }
+
+  return std::make_tuple(grad_p, grad_v0, grad_v1, grad_v2);
+}
+
+// Computes the area of a triangle (v0, v1, v2).
+// Args:
+//     v0, v1, v2: vec3 coordinates of the triangle vertices
+//
+// Returns:
+//     area: float: the area of the triangle
+//
+template <typename T>
+T AreaOfTriangle(const vec3<T>& v0, const vec3<T>& v1, const vec3<T>& v2) {
+  vec3<T> p0 = v1 - v0;
+  vec3<T> p1 = v2 - v0;
+
+  // compute the hypotenus of the scross product (p0 x p1)
+  float dd = std::hypot(
+      p0.y * p1.z - p0.z * p1.y,
+      std::hypot(p0.z * p1.x - p0.x * p1.z, p0.x * p1.y - p0.y * p1.x));
+
+  return dd / 2.0;
+}
+
+// Computes the squared distance of a point p relative to a triangle (v0, v1,
+// v2). If the point's projection p0 on the plane spanned by (v0, v1, v2) is
+// inside the triangle with vertices (v0, v1, v2), then the returned value is
+// the squared distance of p to its projection p0. Otherwise, the returned value
+// is the smallest squared distance of p from the line segments (v0, v1), (v0,
+// v2) and (v1, v2).
+//
+// Args:
+//     p: vec3 coordinates of a point
+//     v0, v1, v2: vec3 coordinates of the triangle vertices
+//
+// Returns:
+//     dist: Float of the squared distance
+//
+
+const float vEpsilon = 1e-8;
+
+template <typename T>
+vec3<T> BarycentricCoords3Forward(
+    const vec3<T>& p,
+    const vec3<T>& v0,
+    const vec3<T>& v1,
+    const vec3<T>& v2) {
+  vec3<T> p0 = v1 - v0;
+  vec3<T> p1 = v2 - v0;
+  vec3<T> p2 = p - v0;
+
+  const T d00 = dot(p0, p0);
+  const T d01 = dot(p0, p1);
+  const T d11 = dot(p1, p1);
+  const T d20 = dot(p2, p0);
+  const T d21 = dot(p2, p1);
+
+  const T denom = d00 * d11 - d01 * d01 + kEpsilon;
+  const T w1 = (d11 * d20 - d01 * d21) / denom;
+  const T w2 = (d00 * d21 - d01 * d20) / denom;
+  const T w0 = 1.0f - w1 - w2;
+
+  return vec3<T>(w0, w1, w2);
+}
+
+// Checks whether the point p is inside the triangle (v0, v1, v2).
+// A point is inside the triangle, if all barycentric coordinates
+// wrt the triangle are >= 0 & <= 1.
+// If the triangle is degenerate, aka line or point, then return False.
+//
+// NOTE that this function assumes that p lives on the space spanned
+// by (v0, v1, v2).
+// TODO(gkioxari) explicitly check whether p is coplanar with (v0, v1, v2)
+// and throw an error if check fails
+//
+// Args:
+//     p: vec3 coordinates of a point
+//     v0, v1, v2: vec3 coordinates of the triangle vertices
+//     min_triangle_area: triangles less than this size are considered
+//     points/lines, IsInsideTriangle returns False
+//
+// Returns:
+//     inside: bool indicating wether p is inside triangle
+//
+template <typename T>
+static bool IsInsideTriangle(
+    const vec3<T>& p,
+    const vec3<T>& v0,
+    const vec3<T>& v1,
+    const vec3<T>& v2,
+    const double min_triangle_area) {
+  bool inside;
+  if (AreaOfTriangle(v0, v1, v2) < min_triangle_area) {
+    inside = 0;
+  } else {
+    vec3<T> bary = BarycentricCoords3Forward(p, v0, v1, v2);
+    bool x_in = 0.0f <= bary.x && bary.x <= 1.0f;
+    bool y_in = 0.0f <= bary.y && bary.y <= 1.0f;
+    bool z_in = 0.0f <= bary.z && bary.z <= 1.0f;
+    inside = x_in && y_in && z_in;
+  }
+  return inside;
+}
+
+template <typename T>
+T PointTriangle3DistanceForward(
+    const vec3<T>& p,
+    const vec3<T>& v0,
+    const vec3<T>& v1,
+    const vec3<T>& v2,
+    const double min_triangle_area) {
+  vec3<T> normal = cross(v2 - v0, v1 - v0);
+  const T norm_normal = norm(normal);
+  normal = normal / (norm_normal + vEpsilon);
+
+  // p0 is the projection of p on the plane spanned by (v0, v1, v2)
+  // i.e. p0 = p + t * normal, s.t. (p0 - v0) is orthogonal to normal
+  const T t = dot(v0 - p, normal);
+  const vec3<T> p0 = p + t * normal;
+
+  bool is_inside = IsInsideTriangle(p0, v0, v1, v2, min_triangle_area);
+  T dist = 0.0f;
+
+  if ((is_inside) && (norm_normal > kEpsilon)) {
+    // if projection p0 is inside triangle spanned by (v0, v1, v2)
+    // then distance is equal to norm(p0 - p)^2
+    dist = t * t;
+  } else {
+    const float e01 = PointLine3DistanceForward(p, v0, v1);
+    const float e02 = PointLine3DistanceForward(p, v0, v2);
+    const float e12 = PointLine3DistanceForward(p, v1, v2);
+
+    dist = (e01 > e02) ? e02 : e01;
+    dist = (dist > e12) ? e12 : dist;
+  }
+
+  return dist;
+}
+
+template <typename T>
+std::tuple<vec3<T>, vec3<T>>
+cross_backward(const vec3<T>& a, const vec3<T>& b, const vec3<T>& grad_cross) {
+  const float grad_ax = -grad_cross.y * b.z + grad_cross.z * b.y;
+  const float grad_ay = grad_cross.x * b.z - grad_cross.z * b.x;
+  const float grad_az = -grad_cross.x * b.y + grad_cross.y * b.x;
+  const vec3<T> grad_a = vec3<T>(grad_ax, grad_ay, grad_az);
+
+  const float grad_bx = grad_cross.y * a.z - grad_cross.z * a.y;
+  const float grad_by = -grad_cross.x * a.z + grad_cross.z * a.x;
+  const float grad_bz = grad_cross.x * a.y - grad_cross.y * a.x;
+  const vec3<T> grad_b = vec3<T>(grad_bx, grad_by, grad_bz);
+
+  return std::make_tuple(grad_a, grad_b);
+}
+
+template <typename T>
+vec3<T> normalize_backward(const vec3<T>& a, const vec3<T>& grad_normz) {
+  const float a_norm = norm(a) + vEpsilon;
+  const vec3<T> out = a / a_norm;
+
+  const float grad_ax = grad_normz.x * (1.0f - out.x * out.x) / a_norm +
+      grad_normz.y * (-out.x * out.y) / a_norm +
+      grad_normz.z * (-out.x * out.z) / a_norm;
+  const float grad_ay = grad_normz.x * (-out.x * out.y) / a_norm +
+      grad_normz.y * (1.0f - out.y * out.y) / a_norm +
+      grad_normz.z * (-out.y * out.z) / a_norm;
+  const float grad_az = grad_normz.x * (-out.x * out.z) / a_norm +
+      grad_normz.y * (-out.y * out.z) / a_norm +
+      grad_normz.z * (1.0f - out.z * out.z) / a_norm;
+  return vec3<T>(grad_ax, grad_ay, grad_az);
+}
+
+// The backward pass for computing the squared distance of a point
+// to the triangle (v0, v1, v2).
+//
+// Args:
+//     p: xyz coordinates of a point
+//     v0, v1, v2: xyz coordinates of the triangle vertices
+//     grad_dist: Float of the gradient wrt dist
+//     min_triangle_area: triangles less than this size are considered
+//     points/lines, IsInsideTriangle returns False
+//
+// Returns:
+//     tuple of gradients for the point and triangle:
+//        (float3 grad_p, float3 grad_v0, float3 grad_v1, float3 grad_v2)
+//
+
+template <typename T>
+static std::tuple<vec3<T>, vec3<T>, vec3<T>, vec3<T>>
+PointTriangle3DistanceBackward(
+    const vec3<T>& p,
+    const vec3<T>& v0,
+    const vec3<T>& v1,
+    const vec3<T>& v2,
+    const T& grad_dist,
+    const double min_triangle_area) {
+  const vec3<T> v2v0 = v2 - v0;
+  const vec3<T> v1v0 = v1 - v0;
+  const vec3<T> v0p = v0 - p;
+  vec3<T> raw_normal = cross(v2v0, v1v0);
+  const T norm_normal = norm(raw_normal);
+  vec3<T> normal = raw_normal / (norm_normal + vEpsilon);
+
+  // p0 is the projection of p on the plane spanned by (v0, v1, v2)
+  // i.e. p0 = p + t * normal, s.t. (p0 - v0) is orthogonal to normal
+  const T t = dot(v0 - p, normal);
+  const vec3<T> p0 = p + t * normal;
+  const vec3<T> diff = t * normal;
+
+  bool is_inside = IsInsideTriangle(p0, v0, v1, v2, min_triangle_area);
+
+  vec3<T> grad_p(0.0f, 0.0f, 0.0f);
+  vec3<T> grad_v0(0.0f, 0.0f, 0.0f);
+  vec3<T> grad_v1(0.0f, 0.0f, 0.0f);
+  vec3<T> grad_v2(0.0f, 0.0f, 0.0f);
+
+  if ((is_inside) && (norm_normal > kEpsilon)) {
+    // derivative of dist wrt p
+    grad_p = -2.0f * grad_dist * t * normal;
+    // derivative of dist wrt normal
+    const vec3<T> grad_normal = 2.0f * grad_dist * t * (v0p + diff);
+    // derivative of dist wrt raw_normal
+    const vec3<T> grad_raw_normal = normalize_backward(raw_normal, grad_normal);
+    // derivative of dist wrt v2v0 and v1v0
+    const auto grad_cross = cross_backward(v2v0, v1v0, grad_raw_normal);
+    const vec3<T> grad_cross_v2v0 = std::get<0>(grad_cross);
+    const vec3<T> grad_cross_v1v0 = std::get<1>(grad_cross);
+    grad_v0 =
+        grad_dist * 2.0f * t * normal - (grad_cross_v2v0 + grad_cross_v1v0);
+    grad_v1 = grad_cross_v1v0;
+    grad_v2 = grad_cross_v2v0;
+  } else {
+    const T e01 = PointLine3DistanceForward(p, v0, v1);
+    const T e02 = PointLine3DistanceForward(p, v0, v2);
+    const T e12 = PointLine3DistanceForward(p, v1, v2);
+
+    if ((e01 <= e02) && (e01 <= e12)) {
+      // e01 is smallest
+      const auto grads = PointLine3DistanceBackward(p, v0, v1, grad_dist);
+      grad_p = std::get<0>(grads);
+      grad_v0 = std::get<1>(grads);
+      grad_v1 = std::get<2>(grads);
+    } else if ((e02 <= e01) && (e02 <= e12)) {
+      // e02 is smallest
+      const auto grads = PointLine3DistanceBackward(p, v0, v2, grad_dist);
+      grad_p = std::get<0>(grads);
+      grad_v0 = std::get<1>(grads);
+      grad_v2 = std::get<2>(grads);
+    } else if ((e12 <= e01) && (e12 <= e02)) {
+      // e12 is smallest
+      const auto grads = PointLine3DistanceBackward(p, v1, v2, grad_dist);
+      grad_p = std::get<0>(grads);
+      grad_v1 = std::get<1>(grads);
+      grad_v2 = std::get<2>(grads);
+    }
+  }
+
+  return std::make_tuple(grad_p, grad_v0, grad_v1, grad_v2);
+}
diff --git a/pytorch3d/pytorch3d/csrc/utils/index_utils.cuh b/pytorch3d/pytorch3d/csrc/utils/index_utils.cuh
new file mode 100644
index 0000000000000000000000000000000000000000..74bca270f344b0d0b0bc19c45fc7373eb5875a14
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/utils/index_utils.cuh
@@ -0,0 +1,224 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+// This converts dynamic array lookups into static array lookups, for small
+// arrays up to size 32.
+//
+// Suppose we have a small thread-local array:
+//
+// float vals[10];
+//
+// Ideally we should only index this array using static indices:
+//
+// for (int i = 0; i < 10; ++i) vals[i] = i * i;
+//
+// If we do so, then the CUDA compiler may be able to place the array into
+// registers, which can have a big performance improvement. However if we
+// access the array dynamically, the the compiler may force the array into
+// local memory, which has the same latency as global memory.
+//
+// These functions convert dynamic array access into static array access
+// using a brute-force lookup table. It can be used like this:
+//
+// float vals[10];
+// int idx = 3;
+// float val = 3.14f;
+// RegisterIndexUtils<float, 10>::set(vals, idx, val);
+// float val2 = RegisterIndexUtils<float, 10>::get(vals, idx);
+//
+// The implementation is based on fbcuda/RegisterUtils.cuh:
+// https://github.com/facebook/fbcuda/blob/master/RegisterUtils.cuh
+// To avoid depending on the entire library, we just reimplement these two
+// functions. The fbcuda implementation is a bit more sophisticated, and uses
+// the preprocessor to generate switch statements that go up to N for each
+// value of N. We are lazy and just have a giant explicit switch statement.
+//
+// We might be able to use a template metaprogramming approach similar to
+// DispatchKernel1D for this. However DispatchKernel1D is intended to be used
+// for dispatching to the correct CUDA kernel on the host, while this is
+// is intended to run on the device. I was concerned that a metaprogramming
+// approach for this might lead to extra function calls at runtime if the
+// compiler fails to optimize them away, which could be very slow on device.
+// However I didn't actually benchmark or test this.
+template <typename T, int N>
+struct RegisterIndexUtils {
+  __device__ __forceinline__ static T get(const T arr[N], int idx) {
+    if (idx < 0 || idx >= N)
+      return T();
+    switch (idx) {
+      case 0:
+        return arr[0];
+      case 1:
+        return arr[1];
+      case 2:
+        return arr[2];
+      case 3:
+        return arr[3];
+      case 4:
+        return arr[4];
+      case 5:
+        return arr[5];
+      case 6:
+        return arr[6];
+      case 7:
+        return arr[7];
+      case 8:
+        return arr[8];
+      case 9:
+        return arr[9];
+      case 10:
+        return arr[10];
+      case 11:
+        return arr[11];
+      case 12:
+        return arr[12];
+      case 13:
+        return arr[13];
+      case 14:
+        return arr[14];
+      case 15:
+        return arr[15];
+      case 16:
+        return arr[16];
+      case 17:
+        return arr[17];
+      case 18:
+        return arr[18];
+      case 19:
+        return arr[19];
+      case 20:
+        return arr[20];
+      case 21:
+        return arr[21];
+      case 22:
+        return arr[22];
+      case 23:
+        return arr[23];
+      case 24:
+        return arr[24];
+      case 25:
+        return arr[25];
+      case 26:
+        return arr[26];
+      case 27:
+        return arr[27];
+      case 28:
+        return arr[28];
+      case 29:
+        return arr[29];
+      case 30:
+        return arr[30];
+      case 31:
+        return arr[31];
+    };
+    return T();
+  }
+
+  __device__ __forceinline__ static void set(T arr[N], int idx, T val) {
+    if (idx < 0 || idx >= N)
+      return;
+    switch (idx) {
+      case 0:
+        arr[0] = val;
+        break;
+      case 1:
+        arr[1] = val;
+        break;
+      case 2:
+        arr[2] = val;
+        break;
+      case 3:
+        arr[3] = val;
+        break;
+      case 4:
+        arr[4] = val;
+        break;
+      case 5:
+        arr[5] = val;
+        break;
+      case 6:
+        arr[6] = val;
+        break;
+      case 7:
+        arr[7] = val;
+        break;
+      case 8:
+        arr[8] = val;
+        break;
+      case 9:
+        arr[9] = val;
+        break;
+      case 10:
+        arr[10] = val;
+        break;
+      case 11:
+        arr[11] = val;
+        break;
+      case 12:
+        arr[12] = val;
+        break;
+      case 13:
+        arr[13] = val;
+        break;
+      case 14:
+        arr[14] = val;
+        break;
+      case 15:
+        arr[15] = val;
+        break;
+      case 16:
+        arr[16] = val;
+        break;
+      case 17:
+        arr[17] = val;
+        break;
+      case 18:
+        arr[18] = val;
+        break;
+      case 19:
+        arr[19] = val;
+        break;
+      case 20:
+        arr[20] = val;
+        break;
+      case 21:
+        arr[21] = val;
+        break;
+      case 22:
+        arr[22] = val;
+        break;
+      case 23:
+        arr[23] = val;
+        break;
+      case 24:
+        arr[24] = val;
+        break;
+      case 25:
+        arr[25] = val;
+        break;
+      case 26:
+        arr[26] = val;
+        break;
+      case 27:
+        arr[27] = val;
+        break;
+      case 28:
+        arr[28] = val;
+        break;
+      case 29:
+        arr[29] = val;
+        break;
+      case 30:
+        arr[30] = val;
+        break;
+      case 31:
+        arr[31] = val;
+        break;
+    }
+  }
+};
diff --git a/pytorch3d/pytorch3d/csrc/utils/mink.cuh b/pytorch3d/pytorch3d/csrc/utils/mink.cuh
new file mode 100644
index 0000000000000000000000000000000000000000..c7858f58c8b92c12f5e889c10fe6e98a622d82b7
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/utils/mink.cuh
@@ -0,0 +1,165 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+#define MINK_H
+
+#include "index_utils.cuh"
+
+// A data structure to keep track of the smallest K keys seen so far as well
+// as their associated values, intended to be used in device code.
+// This data structure doesn't allocate any memory; keys and values are stored
+// in arrays passed to the constructor.
+//
+// The implementation is generic; it can be used for any key type that supports
+// the < operator, and can be used with any value type.
+//
+// Example usage:
+//
+// float keys[K];
+// int values[K];
+// MinK<float, int> mink(keys, values, K);
+// for (...) {
+//   // Produce some key and value from somewhere
+//   mink.add(key, value);
+// }
+// mink.sort();
+//
+// Now keys and values store the smallest K keys seen so far and the values
+// associated to these keys:
+//
+// for (int k = 0; k < K; ++k) {
+//   float key_k = keys[k];
+//   int value_k = values[k];
+// }
+template <typename key_t, typename value_t>
+class MinK {
+ public:
+  // Constructor.
+  //
+  // Arguments:
+  //   keys: Array in which to store keys
+  //   values: Array in which to store values
+  //   K: How many values to keep track of
+  __device__ MinK(key_t* keys, value_t* vals, int K)
+      : keys(keys), vals(vals), K(K), _size(0) {}
+
+  // Try to add a new key and associated value to the data structure. If the key
+  // is one of the smallest K seen so far then it will be kept; otherwise it
+  // it will not be kept.
+  //
+  // This takes O(1) operations if the new key is not kept, or if the structure
+  // currently contains fewer than K elements. Otherwise this takes O(K) time.
+  //
+  // Arguments:
+  //   key: The key to add
+  //   val: The value associated to the key
+  __device__ __forceinline__ void add(const key_t& key, const value_t& val) {
+    if (_size < K) {
+      keys[_size] = key;
+      vals[_size] = val;
+      if (_size == 0 || key > max_key) {
+        max_key = key;
+        max_idx = _size;
+      }
+      _size++;
+    } else if (key < max_key) {
+      keys[max_idx] = key;
+      vals[max_idx] = val;
+      max_key = key;
+      for (int k = 0; k < K; ++k) {
+        key_t cur_key = keys[k];
+        if (cur_key > max_key) {
+          max_key = cur_key;
+          max_idx = k;
+        }
+      }
+    }
+  }
+
+  // Get the number of items currently stored in the structure.
+  // This takes O(1) time.
+  __device__ __forceinline__ int size() {
+    return _size;
+  }
+
+  // Sort the items stored in the structure using bubble sort.
+  // This takes O(K^2) time.
+  __device__ __forceinline__ void sort() {
+    for (int i = 0; i < _size - 1; ++i) {
+      for (int j = 0; j < _size - i - 1; ++j) {
+        if (keys[j + 1] < keys[j]) {
+          key_t key = keys[j];
+          value_t val = vals[j];
+          keys[j] = keys[j + 1];
+          vals[j] = vals[j + 1];
+          keys[j + 1] = key;
+          vals[j + 1] = val;
+        }
+      }
+    }
+  }
+
+ private:
+  key_t* keys;
+  value_t* vals;
+  int K;
+  int _size;
+  key_t max_key;
+  int max_idx;
+};
+
+// This is a version of MinK that only touches the arrays using static indexing
+// via RegisterIndexUtils. If the keys and values are stored in thread-local
+// arrays, then this may allow the compiler to place them in registers for
+// fast access.
+//
+// This has the same API as RegisterMinK, but doesn't support sorting.
+// We found that sorting via RegisterIndexUtils gave very poor performance,
+// and suspect it may have prevented the compiler from placing the arrays
+// into registers.
+template <typename key_t, typename value_t, int K>
+class RegisterMinK {
+ public:
+  __device__ RegisterMinK(key_t* keys, value_t* vals)
+      : keys(keys), vals(vals), _size(0) {}
+
+  __device__ __forceinline__ void add(const key_t& key, const value_t& val) {
+    if (_size < K) {
+      RegisterIndexUtils<key_t, K>::set(keys, _size, key);
+      RegisterIndexUtils<value_t, K>::set(vals, _size, val);
+      if (_size == 0 || key > max_key) {
+        max_key = key;
+        max_idx = _size;
+      }
+      _size++;
+    } else if (key < max_key) {
+      RegisterIndexUtils<key_t, K>::set(keys, max_idx, key);
+      RegisterIndexUtils<value_t, K>::set(vals, max_idx, val);
+      max_key = key;
+      for (int k = 0; k < K; ++k) {
+        key_t cur_key = RegisterIndexUtils<key_t, K>::get(keys, k);
+        if (cur_key > max_key) {
+          max_key = cur_key;
+          max_idx = k;
+        }
+      }
+    }
+  }
+
+  __device__ __forceinline__ int size() {
+    return _size;
+  }
+
+ private:
+  key_t* keys;
+  value_t* vals;
+  int _size;
+  key_t max_key;
+  int max_idx;
+};
diff --git a/pytorch3d/pytorch3d/csrc/utils/pytorch3d_cutils.h b/pytorch3d/pytorch3d/csrc/utils/pytorch3d_cutils.h
new file mode 100644
index 0000000000000000000000000000000000000000..48d04546e6f96fd7eb2c182f7b6d692f5821c1d8
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/utils/pytorch3d_cutils.h
@@ -0,0 +1,17 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+#include <torch/extension.h>
+
+#define CHECK_CUDA(x) TORCH_CHECK(x.is_cuda(), #x " must be a CUDA tensor.")
+#define CHECK_CONTIGUOUS(x) \
+  TORCH_CHECK(x.is_contiguous(), #x " must be contiguous.")
+#define CHECK_CONTIGUOUS_CUDA(x) \
+  CHECK_CUDA(x);                 \
+  CHECK_CONTIGUOUS(x)
diff --git a/pytorch3d/pytorch3d/csrc/utils/vec2.h b/pytorch3d/pytorch3d/csrc/utils/vec2.h
new file mode 100644
index 0000000000000000000000000000000000000000..f4550f918394c06ecbcca5db013e4f0f014ee914
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/utils/vec2.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+#include <type_traits>
+
+// A fixed-sized vector with basic arithmetic operators useful for
+// representing 2D coordinates.
+// TODO: switch to Eigen if more functionality is needed.
+
+template <
+    typename T,
+    typename = std::enable_if_t<
+        std::is_same<T, double>::value || std::is_same<T, float>::value>>
+struct vec2 {
+  T x, y;
+  typedef T scalar_t;
+  vec2(T x, T y) : x(x), y(y) {}
+};
+
+template <typename T>
+inline vec2<T> operator+(const vec2<T>& a, const vec2<T>& b) {
+  return vec2<T>(a.x + b.x, a.y + b.y);
+}
+
+template <typename T>
+inline vec2<T> operator-(const vec2<T>& a, const vec2<T>& b) {
+  return vec2<T>(a.x - b.x, a.y - b.y);
+}
+
+template <typename T>
+inline vec2<T> operator*(const T a, const vec2<T>& b) {
+  return vec2<T>(a * b.x, a * b.y);
+}
+
+template <typename T>
+inline vec2<T> operator/(const vec2<T>& a, const T b) {
+  if (b == 0.0) {
+    AT_ERROR(
+        "denominator in vec2 division is 0"); // prevent divide by 0 errors.
+  }
+  return vec2<T>(a.x / b, a.y / b);
+}
+
+template <typename T>
+inline T dot(const vec2<T>& a, const vec2<T>& b) {
+  return a.x * b.x + a.y * b.y;
+}
+
+template <typename T>
+inline T norm(const vec2<T>& a, const vec2<T>& b) {
+  const vec2<T> ba = b - a;
+  return sqrt(dot(ba, ba));
+}
+
+template <typename T>
+std::ostream& operator<<(std::ostream& os, const vec2<T>& v) {
+  os << "vec2(" << v.x << ", " << v.y << ")";
+  return os;
+}
diff --git a/pytorch3d/pytorch3d/csrc/utils/vec3.h b/pytorch3d/pytorch3d/csrc/utils/vec3.h
new file mode 100644
index 0000000000000000000000000000000000000000..fc37bf5c0dc0f041e58bd801dbf4b7d36eb1979b
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/utils/vec3.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+// A fixed-sized vector with basic arithmetic operators useful for
+// representing 3D coordinates.
+// TODO: switch to Eigen if more functionality is needed.
+
+template <
+    typename T,
+    typename = std::enable_if_t<
+        std::is_same<T, double>::value || std::is_same<T, float>::value>>
+struct vec3 {
+  T x, y, z;
+  typedef T scalar_t;
+  vec3(T x, T y, T z) : x(x), y(y), z(z) {}
+};
+
+template <typename T>
+inline vec3<T> operator+(const vec3<T>& a, const vec3<T>& b) {
+  return vec3<T>(a.x + b.x, a.y + b.y, a.z + b.z);
+}
+
+template <typename T>
+inline vec3<T> operator-(const vec3<T>& a, const vec3<T>& b) {
+  return vec3<T>(a.x - b.x, a.y - b.y, a.z - b.z);
+}
+
+template <typename T>
+inline vec3<T> operator/(const vec3<T>& a, const T b) {
+  if (b == 0.0) {
+    AT_ERROR(
+        "denominator in vec3 division is 0"); // prevent divide by 0 errors.
+  }
+  return vec3<T>(a.x / b, a.y / b, a.z / b);
+}
+
+template <typename T>
+inline vec3<T> operator*(const T a, const vec3<T>& b) {
+  return vec3<T>(a * b.x, a * b.y, a * b.z);
+}
+
+template <typename T>
+inline vec3<T> operator*(const vec3<T>& a, const vec3<T>& b) {
+  return vec3<T>(a.x * b.x, a.y * b.y, a.z * b.z);
+}
+
+template <typename T>
+inline T dot(const vec3<T>& a, const vec3<T>& b) {
+  return a.x * b.x + a.y * b.y + a.z * b.z;
+}
+
+template <typename T>
+inline vec3<T> cross(const vec3<T>& a, const vec3<T>& b) {
+  return vec3<T>(
+      a.y * b.z - a.z * b.y, a.z * b.x - a.x * b.z, a.x * b.y - a.y * b.x);
+}
+
+template <typename T>
+inline T norm(const vec3<T>& a) {
+  return sqrt(dot(a, a));
+}
+
+template <typename T>
+std::ostream& operator<<(std::ostream& os, const vec3<T>& v) {
+  os << "vec3(" << v.x << ", " << v.y << ", " << v.z << ")";
+  return os;
+}
diff --git a/pytorch3d/pytorch3d/csrc/utils/warp_reduce.cuh b/pytorch3d/pytorch3d/csrc/utils/warp_reduce.cuh
new file mode 100644
index 0000000000000000000000000000000000000000..3c903019debf5db594a6c71e1296ccd764991736
--- /dev/null
+++ b/pytorch3d/pytorch3d/csrc/utils/warp_reduce.cuh
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <float.h>
+#include <math.h>
+#include <cstdio>
+
+// Helper functions WarpReduceMin and WarpReduceMax used in .cu files
+// Starting in Volta, instructions are no longer synchronous within a warp.
+// We need to call __syncwarp() to sync the 32 threads in the warp
+// instead of all the threads in the block.
+
+template <typename scalar_t>
+__device__ void
+WarpReduceMin(scalar_t* min_dists, int64_t* min_idxs, const size_t tid) {
+  // s = 32
+  if (min_dists[tid] > min_dists[tid + 32]) {
+    min_idxs[tid] = min_idxs[tid + 32];
+    min_dists[tid] = min_dists[tid + 32];
+  }
+  __syncwarp();
+  // s = 16
+  if (min_dists[tid] > min_dists[tid + 16]) {
+    min_idxs[tid] = min_idxs[tid + 16];
+    min_dists[tid] = min_dists[tid + 16];
+  }
+  __syncwarp();
+  // s = 8
+  if (min_dists[tid] > min_dists[tid + 8]) {
+    min_idxs[tid] = min_idxs[tid + 8];
+    min_dists[tid] = min_dists[tid + 8];
+  }
+  __syncwarp();
+  // s = 4
+  if (min_dists[tid] > min_dists[tid + 4]) {
+    min_idxs[tid] = min_idxs[tid + 4];
+    min_dists[tid] = min_dists[tid + 4];
+  }
+  __syncwarp();
+  // s = 2
+  if (min_dists[tid] > min_dists[tid + 2]) {
+    min_idxs[tid] = min_idxs[tid + 2];
+    min_dists[tid] = min_dists[tid + 2];
+  }
+  __syncwarp();
+  // s = 1
+  if (min_dists[tid] > min_dists[tid + 1]) {
+    min_idxs[tid] = min_idxs[tid + 1];
+    min_dists[tid] = min_dists[tid + 1];
+  }
+  __syncwarp();
+}
+
+template <typename scalar_t>
+__device__ void WarpReduceMax(
+    volatile scalar_t* dists,
+    volatile int64_t* dists_idx,
+    const size_t tid) {
+  if (dists[tid] < dists[tid + 32]) {
+    dists[tid] = dists[tid + 32];
+    dists_idx[tid] = dists_idx[tid + 32];
+  }
+  __syncwarp();
+  if (dists[tid] < dists[tid + 16]) {
+    dists[tid] = dists[tid + 16];
+    dists_idx[tid] = dists_idx[tid + 16];
+  }
+  __syncwarp();
+  if (dists[tid] < dists[tid + 8]) {
+    dists[tid] = dists[tid + 8];
+    dists_idx[tid] = dists_idx[tid + 8];
+  }
+  __syncwarp();
+  if (dists[tid] < dists[tid + 4]) {
+    dists[tid] = dists[tid + 4];
+    dists_idx[tid] = dists_idx[tid + 4];
+  }
+  __syncwarp();
+  if (dists[tid] < dists[tid + 2]) {
+    dists[tid] = dists[tid + 2];
+    dists_idx[tid] = dists_idx[tid + 2];
+  }
+  __syncwarp();
+  if (dists[tid] < dists[tid + 1]) {
+    dists[tid] = dists[tid + 1];
+    dists_idx[tid] = dists_idx[tid + 1];
+  }
+  __syncwarp();
+}
diff --git a/pytorch3d/pytorch3d/datasets/__init__.py b/pytorch3d/pytorch3d/datasets/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..3dbee1ebd4940b0671fe027a109d7e0704a659c5
--- /dev/null
+++ b/pytorch3d/pytorch3d/datasets/__init__.py
@@ -0,0 +1,12 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from .r2n2 import BlenderCamera, collate_batched_R2N2, R2N2, render_cubified_voxels
+from .shapenet import ShapeNetCore
+from .utils import collate_batched_meshes
+
+
+__all__ = [k for k in globals().keys() if not k.startswith("_")]
diff --git a/pytorch3d/pytorch3d/datasets/r2n2/__init__.py b/pytorch3d/pytorch3d/datasets/r2n2/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..d2593d0440e2bfe463330c7e2551a114a00ed0d4
--- /dev/null
+++ b/pytorch3d/pytorch3d/datasets/r2n2/__init__.py
@@ -0,0 +1,11 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from .r2n2 import R2N2
+from .utils import BlenderCamera, collate_batched_R2N2, render_cubified_voxels
+
+
+__all__ = [k for k in globals().keys() if not k.startswith("_")]
diff --git a/pytorch3d/pytorch3d/datasets/r2n2/r2n2.py b/pytorch3d/pytorch3d/datasets/r2n2/r2n2.py
new file mode 100644
index 0000000000000000000000000000000000000000..6f93ad765b2c288aaf319931f49118fb5d58fc0c
--- /dev/null
+++ b/pytorch3d/pytorch3d/datasets/r2n2/r2n2.py
@@ -0,0 +1,425 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import json
+import warnings
+from os import path
+from pathlib import Path
+from typing import Dict, List, Optional
+
+import numpy as np
+import torch
+from PIL import Image
+from pytorch3d.common.datatypes import Device
+from pytorch3d.datasets.shapenet_base import ShapeNetBase
+from pytorch3d.renderer import HardPhongShader
+from tabulate import tabulate
+
+from .utils import (
+    align_bbox,
+    BlenderCamera,
+    compute_extrinsic_matrix,
+    read_binvox_coords,
+    voxelize,
+)
+
+
+SYNSET_DICT_DIR = Path(__file__).resolve().parent
+MAX_CAMERA_DISTANCE = 1.75  # Constant from R2N2.
+VOXEL_SIZE = 128
+# Intrinsic matrix extracted from Blender. Taken from meshrcnn codebase:
+# https://github.com/facebookresearch/meshrcnn/blob/main/shapenet/utils/coords.py
+BLENDER_INTRINSIC = torch.tensor(
+    [
+        [2.1875, 0.0, 0.0, 0.0],
+        [0.0, 2.1875, 0.0, 0.0],
+        [0.0, 0.0, -1.002002, -0.2002002],
+        [0.0, 0.0, -1.0, 0.0],
+    ]
+)
+
+
+class R2N2(ShapeNetBase):  # pragma: no cover
+    """
+    This class loads the R2N2 dataset from a given directory into a Dataset object.
+    The R2N2 dataset contains 13 categories that are a subset of the ShapeNetCore v.1
+    dataset. The R2N2 dataset also contains its own 24 renderings of each object and
+    voxelized models. Most of the models have all 24 views in the same split, but there
+    are eight of them that divide their views between train and test splits.
+
+    """
+
+    def __init__(
+        self,
+        split: str,
+        shapenet_dir: str,
+        r2n2_dir: str,
+        splits_file: str,
+        return_all_views: bool = True,
+        return_voxels: bool = False,
+        views_rel_path: str = "ShapeNetRendering",
+        voxels_rel_path: str = "ShapeNetVoxels",
+        load_textures: bool = True,
+        texture_resolution: int = 4,
+    ) -> None:
+        """
+        Store each object's synset id and models id the given directories.
+
+        Args:
+            split (str): One of (train, val, test).
+            shapenet_dir (str): Path to ShapeNet core v1.
+            r2n2_dir (str): Path to the R2N2 dataset.
+            splits_file (str): File containing the train/val/test splits.
+            return_all_views (bool): Indicator of whether or not to load all the views in
+                the split. If set to False, one of the views in the split will be randomly
+                selected and loaded.
+            return_voxels(bool): Indicator of whether or not to return voxels as a tensor
+                of shape (D, D, D) where D is the number of voxels along each dimension.
+            views_rel_path: path to rendered views within the r2n2_dir. If not specified,
+                the renderings are assumed to be at os.path.join(rn2n_dir, "ShapeNetRendering").
+            voxels_rel_path: path to rendered views within the r2n2_dir. If not specified,
+                the renderings are assumed to be at os.path.join(rn2n_dir, "ShapeNetVoxels").
+            load_textures: Boolean indicating whether textures should loaded for the model.
+                Textures will be of type TexturesAtlas i.e. a texture map per face.
+            texture_resolution: Int specifying the resolution of the texture map per face
+                created using the textures in the obj file. A
+                (texture_resolution, texture_resolution, 3) map is created per face.
+
+        """
+        super().__init__()
+        self.shapenet_dir = shapenet_dir
+        self.r2n2_dir = r2n2_dir
+        self.views_rel_path = views_rel_path
+        self.voxels_rel_path = voxels_rel_path
+        self.load_textures = load_textures
+        self.texture_resolution = texture_resolution
+        # Examine if split is valid.
+        if split not in ["train", "val", "test"]:
+            raise ValueError("split has to be one of (train, val, test).")
+        # Synset dictionary mapping synset offsets in R2N2 to corresponding labels.
+        with open(
+            path.join(SYNSET_DICT_DIR, "r2n2_synset_dict.json"), "r"
+        ) as read_dict:
+            self.synset_dict = json.load(read_dict)
+        # Inverse dictionary mapping synset labels to corresponding offsets.
+        self.synset_inv = {label: offset for offset, label in self.synset_dict.items()}
+
+        # Store synset and model ids of objects mentioned in the splits_file.
+        with open(splits_file) as splits:
+            split_dict = json.load(splits)[split]
+
+        self.return_images = True
+        # Check if the folder containing R2N2 renderings is included in r2n2_dir.
+        if not path.isdir(path.join(r2n2_dir, views_rel_path)):
+            self.return_images = False
+            msg = (
+                "%s not found in %s. R2N2 renderings will "
+                "be skipped when returning models."
+            ) % (views_rel_path, r2n2_dir)
+            warnings.warn(msg)
+
+        self.return_voxels = return_voxels
+        # Check if the folder containing voxel coordinates is included in r2n2_dir.
+        if not path.isdir(path.join(r2n2_dir, voxels_rel_path)):
+            self.return_voxels = False
+            msg = (
+                "%s not found in %s. Voxel coordinates will "
+                "be skipped when returning models."
+            ) % (voxels_rel_path, r2n2_dir)
+            warnings.warn(msg)
+
+        synset_set = set()
+        # Store lists of views of each model in a list.
+        self.views_per_model_list = []
+        # Store tuples of synset label and total number of views in each category in a list.
+        synset_num_instances = []
+        for synset in split_dict.keys():
+            # Examine if the given synset is present in the ShapeNetCore dataset
+            # and is also part of the standard R2N2 dataset.
+            if not (
+                path.isdir(path.join(shapenet_dir, synset))
+                and synset in self.synset_dict
+            ):
+                msg = (
+                    "Synset category %s from the splits file is either not "
+                    "present in %s or not part of the standard R2N2 dataset."
+                ) % (synset, shapenet_dir)
+                warnings.warn(msg)
+                continue
+
+            synset_set.add(synset)
+            self.synset_start_idxs[synset] = len(self.synset_ids)
+            # Start counting total number of views in the current category.
+            synset_view_count = 0
+            for model in split_dict[synset]:
+                # Examine if the given model is present in the ShapeNetCore path.
+                shapenet_path = path.join(shapenet_dir, synset, model)
+                if not path.isdir(shapenet_path):
+                    msg = "Model %s from category %s is not present in %s." % (
+                        model,
+                        synset,
+                        shapenet_dir,
+                    )
+                    warnings.warn(msg)
+                    continue
+                self.synset_ids.append(synset)
+                self.model_ids.append(model)
+
+                model_views = split_dict[synset][model]
+                # Randomly select a view index if return_all_views set to False.
+                if not return_all_views:
+                    rand_idx = torch.randint(len(model_views), (1,))
+                    model_views = [model_views[rand_idx]]
+                self.views_per_model_list.append(model_views)
+                synset_view_count += len(model_views)
+            synset_num_instances.append((self.synset_dict[synset], synset_view_count))
+            model_count = len(self.synset_ids) - self.synset_start_idxs[synset]
+            self.synset_num_models[synset] = model_count
+        headers = ["category", "#instances"]
+        synset_num_instances.append(("total", sum(n for _, n in synset_num_instances)))
+        print(
+            tabulate(synset_num_instances, headers, numalign="left", stralign="center")
+        )
+
+        # Examine if all the synsets in the standard R2N2 mapping are present.
+        # Update self.synset_inv so that it only includes the loaded categories.
+        synset_not_present = [
+            self.synset_inv.pop(self.synset_dict[synset])
+            for synset in self.synset_dict
+            if synset not in synset_set
+        ]
+        if len(synset_not_present) > 0:
+            msg = (
+                "The following categories are included in R2N2's"
+                "official mapping but not found in the dataset location %s: %s"
+            ) % (shapenet_dir, ", ".join(synset_not_present))
+            warnings.warn(msg)
+
+    def __getitem__(self, model_idx, view_idxs: Optional[List[int]] = None) -> Dict:
+        """
+        Read a model by the given index.
+
+        Args:
+            model_idx: The idx of the model to be retrieved in the dataset.
+            view_idx: List of indices of the view to be returned. Each index needs to be
+                contained in the loaded split (always between 0 and 23, inclusive). If
+                an invalid index is supplied, view_idx will be ignored and all the loaded
+                views will be returned.
+
+        Returns:
+            dictionary with following keys:
+            - verts: FloatTensor of shape (V, 3).
+            - faces: faces.verts_idx, LongTensor of shape (F, 3).
+            - synset_id (str): synset id.
+            - model_id (str): model id.
+            - label (str): synset label.
+            - images: FloatTensor of shape (V, H, W, C), where V is number of views
+                returned. Returns a batch of the renderings of the models from the R2N2 dataset.
+            - R: Rotation matrix of shape (V, 3, 3), where V is number of views returned.
+            - T: Translation matrix of shape (V, 3), where V is number of views returned.
+            - K: Intrinsic matrix of shape (V, 4, 4), where V is number of views returned.
+            - voxels: Voxels of shape (D, D, D), where D is the number of voxels along each
+                dimension.
+        """
+        if isinstance(model_idx, tuple):
+            model_idx, view_idxs = model_idx
+        if view_idxs is not None:
+            if isinstance(view_idxs, int):
+                view_idxs = [view_idxs]
+            if not isinstance(view_idxs, list) and not torch.is_tensor(view_idxs):
+                raise TypeError(
+                    "view_idxs is of type %s but it needs to be a list."
+                    % type(view_idxs)
+                )
+
+        model_views = self.views_per_model_list[model_idx]
+        if view_idxs is not None and any(
+            idx not in self.views_per_model_list[model_idx] for idx in view_idxs
+        ):
+            msg = """At least one of the indices in view_idxs is not available.
+                Specified view of the model needs to be contained in the
+                loaded split. If return_all_views is set to False, only one
+                random view is loaded. Try accessing the specified view(s)
+                after loading the dataset with self.return_all_views set to True.
+                Now returning all view(s) in the loaded dataset."""
+            warnings.warn(msg)
+        elif view_idxs is not None:
+            model_views = view_idxs
+
+        model = self._get_item_ids(model_idx)
+        model_path = path.join(
+            self.shapenet_dir, model["synset_id"], model["model_id"], "model.obj"
+        )
+
+        verts, faces, textures = self._load_mesh(model_path)
+        model["verts"] = verts
+        model["faces"] = faces
+        model["textures"] = textures
+        model["label"] = self.synset_dict[model["synset_id"]]
+
+        model["images"] = None
+        images, Rs, Ts, voxel_RTs = [], [], [], []
+        # Retrieve R2N2's renderings if required.
+        if self.return_images:
+            rendering_path = path.join(
+                self.r2n2_dir,
+                self.views_rel_path,
+                model["synset_id"],
+                model["model_id"],
+                "rendering",
+            )
+            # Read metadata file to obtain params for calibration matrices.
+            with open(path.join(rendering_path, "rendering_metadata.txt"), "r") as f:
+                metadata_lines = f.readlines()
+            for i in model_views:
+                # Read image.
+                image_path = path.join(rendering_path, "%02d.png" % i)
+                raw_img = Image.open(image_path)
+                image = torch.from_numpy(np.array(raw_img) / 255.0)[..., :3]
+                images.append(image.to(dtype=torch.float32))
+
+                # Get camera calibration.
+                azim, elev, yaw, dist_ratio, fov = [
+                    float(v) for v in metadata_lines[i].strip().split(" ")
+                ]
+                dist = dist_ratio * MAX_CAMERA_DISTANCE
+                # Extrinsic matrix before transformation to PyTorch3D world space.
+                RT = compute_extrinsic_matrix(azim, elev, dist)
+                R, T = self._compute_camera_calibration(RT)
+                Rs.append(R)
+                Ts.append(T)
+                voxel_RTs.append(RT)
+
+            # Intrinsic matrix extracted from the Blender with slight modification to work with
+            # PyTorch3D world space. Taken from meshrcnn codebase:
+            # https://github.com/facebookresearch/meshrcnn/blob/main/shapenet/utils/coords.py
+            K = torch.tensor(
+                [
+                    [2.1875, 0.0, 0.0, 0.0],
+                    [0.0, 2.1875, 0.0, 0.0],
+                    [0.0, 0.0, -1.002002, -0.2002002],
+                    [0.0, 0.0, 1.0, 0.0],
+                ]
+            )
+            model["images"] = torch.stack(images)
+            model["R"] = torch.stack(Rs)
+            model["T"] = torch.stack(Ts)
+            model["K"] = K.expand(len(model_views), 4, 4)
+
+        voxels_list = []
+
+        # Read voxels if required.
+        voxel_path = path.join(
+            self.r2n2_dir,
+            self.voxels_rel_path,
+            model["synset_id"],
+            model["model_id"],
+            "model.binvox",
+        )
+        if self.return_voxels:
+            if not path.isfile(voxel_path):
+                msg = "Voxel file not found for model %s from category %s."
+                raise FileNotFoundError(msg % (model["model_id"], model["synset_id"]))
+
+            with open(voxel_path, "rb") as f:
+                # Read voxel coordinates as a tensor of shape (N, 3).
+                voxel_coords = read_binvox_coords(f)
+            # Align voxels to the same coordinate system as mesh verts.
+            voxel_coords = align_bbox(voxel_coords, model["verts"])
+            for RT in voxel_RTs:
+                # Compute projection matrix.
+                P = BLENDER_INTRINSIC.mm(RT)
+                # Convert voxel coordinates of shape (N, 3) to voxels of shape (D, D, D).
+                voxels = voxelize(voxel_coords, P, VOXEL_SIZE)
+                voxels_list.append(voxels)
+            model["voxels"] = torch.stack(voxels_list)
+
+        return model
+
+    def _compute_camera_calibration(self, RT):
+        """
+        Helper function for calculating rotation and translation matrices from ShapeNet
+        to camera transformation and ShapeNet to PyTorch3D transformation.
+
+        Args:
+            RT: Extrinsic matrix that performs ShapeNet world view to camera view
+                transformation.
+
+        Returns:
+            R: Rotation matrix of shape (3, 3).
+            T: Translation matrix of shape (3).
+        """
+        # Transform the mesh vertices from shapenet world to pytorch3d world.
+        shapenet_to_pytorch3d = torch.tensor(
+            [
+                [-1.0, 0.0, 0.0, 0.0],
+                [0.0, 1.0, 0.0, 0.0],
+                [0.0, 0.0, -1.0, 0.0],
+                [0.0, 0.0, 0.0, 1.0],
+            ],
+            dtype=torch.float32,
+        )
+        RT = torch.transpose(RT, 0, 1).mm(shapenet_to_pytorch3d)  # (4, 4)
+        # Extract rotation and translation matrices from RT.
+        R = RT[:3, :3]
+        T = RT[3, :3]
+        return R, T
+
+    def render(
+        self,
+        model_ids: Optional[List[str]] = None,
+        categories: Optional[List[str]] = None,
+        sample_nums: Optional[List[int]] = None,
+        idxs: Optional[List[int]] = None,
+        view_idxs: Optional[List[int]] = None,
+        shader_type=HardPhongShader,
+        device: Device = "cpu",
+        **kwargs,
+    ) -> torch.Tensor:
+        """
+        Render models with BlenderCamera by default to achieve the same orientations as the
+        R2N2 renderings. Also accepts other types of cameras and any of the args that the
+        render function in the ShapeNetBase class accepts.
+
+        Args:
+            view_idxs: each model will be rendered with the orientation(s) of the specified
+                views. Only render by view_idxs if no camera or args for BlenderCamera is
+                supplied.
+            Accepts any of the args of the render function in ShapeNetBase:
+            model_ids: List[str] of model_ids of models intended to be rendered.
+            categories: List[str] of categories intended to be rendered. categories
+                and sample_nums must be specified at the same time. categories can be given
+                in the form of synset offsets or labels, or a combination of both.
+            sample_nums: List[int] of number of models to be randomly sampled from
+                each category. Could also contain one single integer, in which case it
+                will be broadcasted for every category.
+            idxs: List[int] of indices of models to be rendered in the dataset.
+            shader_type: Shader to use for rendering. Examples include HardPhongShader
+            (default), SoftPhongShader etc or any other type of valid Shader class.
+            device: Device (as str or torch.device) on which the tensors should be located.
+            **kwargs: Accepts any of the kwargs that the renderer supports and any of the
+                args that BlenderCamera supports.
+
+        Returns:
+            Batch of rendered images of shape (N, H, W, 3).
+        """
+        idxs = self._handle_render_inputs(model_ids, categories, sample_nums, idxs)
+        r = torch.cat([self[idxs[i], view_idxs]["R"] for i in range(len(idxs))])
+        t = torch.cat([self[idxs[i], view_idxs]["T"] for i in range(len(idxs))])
+        k = torch.cat([self[idxs[i], view_idxs]["K"] for i in range(len(idxs))])
+        # Initialize default camera using R, T, K from kwargs or R, T, K of the specified views.
+        blend_cameras = BlenderCamera(
+            R=kwargs.get("R", r),
+            T=kwargs.get("T", t),
+            K=kwargs.get("K", k),
+            device=device,
+        )
+        cameras = kwargs.get("cameras", blend_cameras).to(device)
+        kwargs.pop("cameras", None)
+        # pass down all the same inputs
+        return super().render(
+            idxs=idxs, shader_type=shader_type, device=device, cameras=cameras, **kwargs
+        )
diff --git a/pytorch3d/pytorch3d/datasets/r2n2/r2n2_synset_dict.json b/pytorch3d/pytorch3d/datasets/r2n2/r2n2_synset_dict.json
new file mode 100644
index 0000000000000000000000000000000000000000..b8cbae58173e58ea0607e95161e65944979aff23
--- /dev/null
+++ b/pytorch3d/pytorch3d/datasets/r2n2/r2n2_synset_dict.json
@@ -0,0 +1,15 @@
+{
+    "04256520": "sofa",
+    "02933112": "cabinet",
+    "02828884": "bench",
+    "03001627": "chair",
+    "03211117": "display",
+    "04090263": "rifle",
+    "03691459": "loudspeaker",
+    "03636649": "lamp",
+    "04401088": "telephone",
+    "02691156": "airplane",
+    "04379243": "table",
+    "02958343": "car",
+    "04530566": "watercraft"
+}
diff --git a/pytorch3d/pytorch3d/datasets/r2n2/utils.py b/pytorch3d/pytorch3d/datasets/r2n2/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..c7b80537fdf5bc443d90a48277e837d7abeeed62
--- /dev/null
+++ b/pytorch3d/pytorch3d/datasets/r2n2/utils.py
@@ -0,0 +1,502 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import math
+from typing import Dict, List
+
+import numpy as np
+import torch
+from pytorch3d.common.datatypes import Device
+from pytorch3d.datasets.utils import collate_batched_meshes
+from pytorch3d.ops import cubify
+from pytorch3d.renderer import (
+    HardPhongShader,
+    MeshRasterizer,
+    MeshRenderer,
+    PointLights,
+    RasterizationSettings,
+    TexturesVertex,
+)
+from pytorch3d.renderer.cameras import CamerasBase
+from pytorch3d.transforms import Transform3d
+
+
+# Empirical min and max over the dataset from meshrcnn.
+# https://github.com/facebookresearch/meshrcnn/blob/main/shapenet/utils/coords.py#L9
+SHAPENET_MIN_ZMIN = 0.67
+SHAPENET_MAX_ZMAX = 0.92
+# Threshold for cubify from meshrcnn:
+# https://github.com/facebookresearch/meshrcnn/blob/main/configs/shapenet/voxmesh_R50.yaml#L11
+CUBIFY_THRESH = 0.2
+
+# Default values of rotation, translation and intrinsic matrices for BlenderCamera.
+r = np.expand_dims(np.eye(3), axis=0)  # (1, 3, 3)
+t = np.expand_dims(np.zeros(3), axis=0)  # (1, 3)
+k = np.expand_dims(np.eye(4), axis=0)  # (1, 4, 4)
+
+
+def collate_batched_R2N2(batch: List[Dict]):  # pragma: no cover
+    """
+    Take a list of objects in the form of dictionaries and merge them
+    into a single dictionary. This function can be used with a Dataset
+    object to create a torch.utils.data.Dataloader which directly
+    returns Meshes objects.
+    TODO: Add support for textures.
+
+    Args:
+        batch: List of dictionaries containing information about objects
+            in the dataset.
+
+    Returns:
+        collated_dict: Dictionary of collated lists. If batch contains both
+            verts and faces, a collated mesh batch is also returned.
+    """
+    collated_dict = collate_batched_meshes(batch)
+
+    # If collate_batched_meshes receives R2N2 items with images and that
+    # all models have the same number of views V, stack the batches of
+    # views of each model into a new batch of shape (N, V, H, W, 3).
+    # Otherwise leave it as a list.
+    if "images" in collated_dict:
+        try:
+            collated_dict["images"] = torch.stack(collated_dict["images"])
+        except RuntimeError:
+            print(
+                "Models don't have the same number of views. Now returning "
+                "lists of images instead of batches."
+            )
+
+    # If collate_batched_meshes receives R2N2 items with camera calibration
+    # matrices and that all models have the same number of views V, stack each
+    # type of matrices into a new batch of shape (N, V, ...).
+    # Otherwise leave them as lists.
+    if all(x in collated_dict for x in ["R", "T", "K"]):
+        try:
+            collated_dict["R"] = torch.stack(collated_dict["R"])  # (N, V, 3, 3)
+            collated_dict["T"] = torch.stack(collated_dict["T"])  # (N, V, 3)
+            collated_dict["K"] = torch.stack(collated_dict["K"])  # (N, V, 4, 4)
+        except RuntimeError:
+            print(
+                "Models don't have the same number of views. Now returning "
+                "lists of calibration matrices instead of a batched tensor."
+            )
+
+    # If collate_batched_meshes receives voxels and all models have the same
+    # number of views V, stack the batches of voxels into a new batch of shape
+    # (N, V, S, S, S), where S is the voxel size.
+    if "voxels" in collated_dict:
+        try:
+            collated_dict["voxels"] = torch.stack(collated_dict["voxels"])
+        except RuntimeError:
+            print(
+                "Models don't have the same number of views. Now returning "
+                "lists of voxels instead of a batched tensor."
+            )
+    return collated_dict
+
+
+def compute_extrinsic_matrix(
+    azimuth: float, elevation: float, distance: float
+):  # pragma: no cover
+    """
+    Copied from meshrcnn codebase:
+    https://github.com/facebookresearch/meshrcnn/blob/main/shapenet/utils/coords.py#L96
+
+    Compute 4x4 extrinsic matrix that converts from homogeneous world coordinates
+    to homogeneous camera coordinates. We assume that the camera is looking at the
+    origin.
+    Used in R2N2 Dataset when computing calibration matrices.
+
+    Args:
+        azimuth: Rotation about the z-axis, in degrees.
+        elevation: Rotation above the xy-plane, in degrees.
+        distance: Distance from the origin.
+
+    Returns:
+        FloatTensor of shape (4, 4).
+    """
+    azimuth, elevation, distance = float(azimuth), float(elevation), float(distance)
+
+    az_rad = -math.pi * azimuth / 180.0
+    el_rad = -math.pi * elevation / 180.0
+    sa = math.sin(az_rad)
+    ca = math.cos(az_rad)
+    se = math.sin(el_rad)
+    ce = math.cos(el_rad)
+    R_world2obj = torch.tensor(
+        [[ca * ce, sa * ce, -se], [-sa, ca, 0], [ca * se, sa * se, ce]]
+    )
+    R_obj2cam = torch.tensor([[0.0, 1.0, 0.0], [0.0, 0.0, 1.0], [1.0, 0.0, 0.0]])
+    R_world2cam = R_obj2cam.mm(R_world2obj)
+    cam_location = torch.tensor([[distance, 0, 0]]).t()
+    T_world2cam = -(R_obj2cam.mm(cam_location))
+    RT = torch.cat([R_world2cam, T_world2cam], dim=1)
+    RT = torch.cat([RT, torch.tensor([[0.0, 0, 0, 1]])])
+
+    # Georgia: For some reason I cannot fathom, when Blender loads a .obj file it
+    # rotates the model 90 degrees about the x axis. To compensate for this quirk we
+    # roll that rotation into the extrinsic matrix here
+    rot = torch.tensor([[1, 0, 0, 0], [0, 0, -1, 0], [0, 1, 0, 0], [0, 0, 0, 1]])
+    RT = RT.mm(rot.to(RT))
+
+    return RT
+
+
+def read_binvox_coords(
+    f,
+    integer_division: bool = True,
+    dtype: torch.dtype = torch.float32,
+):  # pragma: no cover
+    """
+    Copied from meshrcnn codebase:
+    https://github.com/facebookresearch/meshrcnn/blob/main/shapenet/utils/binvox_torch.py#L5
+
+    Read a binvox file and return the indices of all nonzero voxels.
+
+    This matches the behavior of binvox_rw.read_as_coord_array
+    (https://github.com/dimatura/binvox-rw-py/blob/public/binvox_rw.py#L153)
+    but this implementation uses torch rather than numpy, and is more efficient
+    due to improved vectorization.
+
+    Georgia: I think that binvox_rw.read_as_coord_array actually has a bug; when converting
+    linear indices into three-dimensional indices, they use floating-point
+    division instead of integer division. We can reproduce their incorrect
+    implementation by passing integer_division=False.
+
+    Args:
+      f (str): A file pointer to the binvox file to read
+      integer_division (bool): If False, then match the buggy implementation from binvox_rw
+      dtype: Datatype of the output tensor. Use float64 to match binvox_rw
+
+    Returns:
+      coords (tensor): A tensor of shape (N, 3) where N is the number of nonzero voxels,
+           and coords[i] = (x, y, z) gives the index of the ith nonzero voxel. If the
+           voxel grid has shape (V, V, V) then we have 0 <= x, y, z < V.
+    """
+    size, translation, scale = _read_binvox_header(f)
+    storage = torch.ByteStorage.from_buffer(f.read())
+    data = torch.tensor([], dtype=torch.uint8)
+    # pyre-fixme[28]: Unexpected keyword argument `source`.
+    data.set_(source=storage)
+    vals, counts = data[::2], data[1::2]
+    idxs = _compute_idxs(vals, counts)
+    if not integer_division:
+        idxs = idxs.to(dtype)
+    x_idxs = idxs // (size * size)
+    zy_idxs = idxs % (size * size)
+    z_idxs = zy_idxs // size
+    y_idxs = zy_idxs % size
+    coords = torch.stack([x_idxs, y_idxs, z_idxs], dim=1)
+    return coords.to(dtype)
+
+
+def _compute_idxs(vals, counts):  # pragma: no cover
+    """
+    Copied from meshrcnn codebase:
+    https://github.com/facebookresearch/meshrcnn/blob/main/shapenet/utils/binvox_torch.py#L58
+
+    Fast vectorized version of index computation.
+
+    Args:
+        vals: tensor of binary values indicating voxel presence in a dense format.
+        counts: tensor of number of occurrence of each value in vals.
+
+    Returns:
+        idxs: A tensor of shape (N), where N is the number of nonzero voxels.
+    """
+    # Consider an example where:
+    # vals   = [0, 1, 0, 1, 1]
+    # counts = [2, 3, 3, 2, 1]
+    #
+    # These values of counts and vals mean that the dense binary grid is:
+    # [0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1]
+    #
+    # So the nonzero indices we want to return are:
+    # [2, 3, 4, 8, 9, 10]
+
+    # After the cumsum we will have:
+    # end_idxs = [2, 5, 8, 10, 11]
+    end_idxs = counts.cumsum(dim=0)
+
+    # After masking and computing start_idx we have:
+    # end_idxs   = [5, 10, 11]
+    # counts     = [3,  2,  1]
+    # start_idxs = [2,  8, 10]
+    mask = vals == 1
+    end_idxs = end_idxs[mask]
+    counts = counts[mask].to(end_idxs)
+    start_idxs = end_idxs - counts
+
+    # We initialize delta as:
+    # [2, 1, 1, 1, 1, 1]
+    delta = torch.ones(counts.sum().item(), dtype=torch.int64)
+    delta[0] = start_idxs[0]
+
+    # We compute pos = [3, 5], val = [3, 0]; then delta is
+    # [2, 1, 1, 4, 1, 1]
+    pos = counts.cumsum(dim=0)[:-1]
+    val = start_idxs[1:] - end_idxs[:-1]
+    delta[pos] += val
+
+    # A final cumsum gives the idx we want: [2, 3, 4, 8, 9, 10]
+    idxs = delta.cumsum(dim=0)
+    return idxs
+
+
+def _read_binvox_header(f):  # pragma: no cover
+    """
+    Copied from meshrcnn codebase:
+    https://github.com/facebookresearch/meshrcnn/blob/main/shapenet/utils/binvox_torch.py#L99
+
+    Read binvox header and extract information regarding voxel sizes and translations
+    to original voxel coordinates.
+
+    Args:
+        f (str): A file pointer to the binvox file to read.
+
+    Returns:
+        size (int): size of voxel.
+        translation (tuple(float)): translation to original voxel coordinates.
+        scale (float): scale to original voxel coordinates.
+    """
+    # First line of the header should be "#binvox 1"
+    line = f.readline().strip()
+    if line != b"#binvox 1":
+        raise ValueError("Invalid header (line 1)")
+
+    # Second line of the header should be "dim [int] [int] [int]"
+    # and all three int should be the same
+    line = f.readline().strip()
+    if not line.startswith(b"dim "):
+        raise ValueError("Invalid header (line 2)")
+    dims = line.split(b" ")
+    try:
+        dims = [int(d) for d in dims[1:]]
+    except ValueError:
+        raise ValueError("Invalid header (line 2)") from None
+    if len(dims) != 3 or dims[0] != dims[1] or dims[0] != dims[2]:
+        raise ValueError("Invalid header (line 2)")
+    size = dims[0]
+
+    # Third line of the header should be "translate [float] [float] [float]"
+    line = f.readline().strip()
+    if not line.startswith(b"translate "):
+        raise ValueError("Invalid header (line 3)")
+    translation = line.split(b" ")
+    if len(translation) != 4:
+        raise ValueError("Invalid header (line 3)")
+    try:
+        translation = tuple(float(t) for t in translation[1:])
+    except ValueError:
+        raise ValueError("Invalid header (line 3)") from None
+
+    # Fourth line of the header should be "scale [float]"
+    line = f.readline().strip()
+    if not line.startswith(b"scale "):
+        raise ValueError("Invalid header (line 4)")
+    line = line.split(b" ")
+    if not len(line) == 2:
+        raise ValueError("Invalid header (line 4)")
+    scale = float(line[1])
+
+    # Fifth line of the header should be "data"
+    line = f.readline().strip()
+    if not line == b"data":
+        raise ValueError("Invalid header (line 5)")
+
+    return size, translation, scale
+
+
+def align_bbox(src, tgt):  # pragma: no cover
+    """
+    Copied from meshrcnn codebase:
+    https://github.com/facebookresearch/meshrcnn/blob/main/tools/preprocess_shapenet.py#L263
+
+    Return a copy of src points in the coordinate system of tgt by applying a
+    scale and shift along each coordinate axis to make the min / max values align.
+
+    Args:
+        src, tgt: Torch Tensor of shape (N, 3)
+
+    Returns:
+        out: Torch Tensor of shape (N, 3)
+    """
+    if src.ndim != 2 or tgt.ndim != 2:
+        raise ValueError("Both src and tgt need to have dimensions of 2.")
+    if src.shape[-1] != 3 or tgt.shape[-1] != 3:
+        raise ValueError(
+            "Both src and tgt need to have sizes of 3 along the second dimension."
+        )
+    src_min = src.min(dim=0)[0]
+    src_max = src.max(dim=0)[0]
+    tgt_min = tgt.min(dim=0)[0]
+    tgt_max = tgt.max(dim=0)[0]
+    scale = (tgt_max - tgt_min) / (src_max - src_min)
+    shift = tgt_min - scale * src_min
+    out = scale * src + shift
+    return out
+
+
+def voxelize(voxel_coords, P, V):  # pragma: no cover
+    """
+    Copied from meshrcnn codebase:
+    https://github.com/facebookresearch/meshrcnn/blob/main/tools/preprocess_shapenet.py#L284
+    but changing flip y to flip x.
+
+    Creating voxels of shape (D, D, D) from voxel_coords and projection matrix.
+
+    Args:
+        voxel_coords: FloatTensor of shape (V, 3) giving voxel's coordinates aligned to
+            the vertices.
+        P: FloatTensor of shape (4, 4) giving the projection matrix.
+        V: Voxel size of the output.
+
+    Returns:
+        voxels: Tensor of shape (D, D, D) giving the voxelized result.
+    """
+    device = voxel_coords.device
+    voxel_coords = project_verts(voxel_coords, P)
+
+    # Using the actual zmin and zmax of the model is bad because we need them
+    # to perform the inverse transform, which transform voxels back into world
+    # space for refinement or evaluation. Instead we use an empirical min and
+    # max over the dataset; that way it is consistent for all images.
+    zmin = SHAPENET_MIN_ZMIN
+    zmax = SHAPENET_MAX_ZMAX
+
+    # Once we know zmin and zmax, we need to adjust the z coordinates so the
+    # range [zmin, zmax] instead runs from [-1, 1]
+    m = 2.0 / (zmax - zmin)
+    b = -2.0 * zmin / (zmax - zmin) - 1
+    voxel_coords[:, 2].mul_(m).add_(b)
+    voxel_coords[:, 0].mul_(-1)  # Flip x
+
+    # Now voxels are in [-1, 1]^3; map to [0, V-1)^3
+    voxel_coords = 0.5 * (V - 1) * (voxel_coords + 1.0)
+    voxel_coords = voxel_coords.round().to(torch.int64)
+    valid = (0 <= voxel_coords) * (voxel_coords < V)
+    valid = valid[:, 0] * valid[:, 1] * valid[:, 2]
+    x, y, z = voxel_coords.unbind(dim=1)
+    x, y, z = x[valid], y[valid], z[valid]
+    voxels = torch.zeros(V, V, V, dtype=torch.uint8, device=device)
+    voxels[z, y, x] = 1
+
+    return voxels
+
+
+def project_verts(verts, P, eps: float = 1e-1):  # pragma: no cover
+    """
+    Copied from meshrcnn codebase:
+    https://github.com/facebookresearch/meshrcnn/blob/main/shapenet/utils/coords.py#L159
+
+    Project vertices using a 4x4 transformation matrix.
+
+    Args:
+        verts: FloatTensor of shape (N, V, 3) giving a batch of vertex positions or of
+            shape (V, 3) giving a single set of vertex positions.
+        P: FloatTensor of shape (N, 4, 4) giving projection matrices or of shape (4, 4)
+            giving a single projection matrix.
+
+    Returns:
+        verts_out: FloatTensor of shape (N, V, 3) giving vertex positions (x, y, z)
+            where verts_out[i] is the result of transforming verts[i] by P[i].
+    """
+    # Handle unbatched inputs
+    singleton = False
+    if verts.dim() == 2:
+        assert P.dim() == 2
+        singleton = True
+        verts, P = verts[None], P[None]
+
+    N, V = verts.shape[0], verts.shape[1]
+    dtype, device = verts.dtype, verts.device
+
+    # Add an extra row of ones to the world-space coordinates of verts before
+    # multiplying by the projection matrix. We could avoid this allocation by
+    # instead multiplying by a 4x3 submatrix of the projection matrix, then
+    # adding the remaining 4x1 vector. Not sure whether there will be much
+    # performance difference between the two.
+    ones = torch.ones(N, V, 1, dtype=dtype, device=device)
+    verts_hom = torch.cat([verts, ones], dim=2)
+    verts_cam_hom = torch.bmm(verts_hom, P.transpose(1, 2))
+
+    # Avoid division by zero by clamping the absolute value
+    w = verts_cam_hom[:, :, 3:]
+    w_sign = w.sign()
+    w_sign[w == 0] = 1
+    w = w_sign * w.abs().clamp(min=eps)
+
+    verts_proj = verts_cam_hom[:, :, :3] / w
+
+    if singleton:
+        return verts_proj[0]
+    return verts_proj
+
+
+class BlenderCamera(CamerasBase):  # pragma: no cover
+    """
+    Camera for rendering objects with calibration matrices from the R2N2 dataset
+    (which uses Blender for rendering the views for each model).
+    """
+
+    def __init__(self, R=r, T=t, K=k, device: Device = "cpu") -> None:
+        """
+        Args:
+            R: Rotation matrix of shape (N, 3, 3).
+            T: Translation matrix of shape (N, 3).
+            K: Intrinsic matrix of shape (N, 4, 4).
+            device: Device (as str or torch.device).
+        """
+        # The initializer formats all inputs to torch tensors and broadcasts
+        # all the inputs to have the same batch dimension where necessary.
+        super().__init__(device=device, R=R, T=T, K=K)
+
+    def get_projection_transform(self, **kwargs) -> Transform3d:
+        transform = Transform3d(device=self.device)
+        transform._matrix = self.K.transpose(1, 2).contiguous()
+        return transform
+
+    def is_perspective(self):
+        return False
+
+    def in_ndc(self):
+        return True
+
+
+def render_cubified_voxels(
+    voxels: torch.Tensor, shader_type=HardPhongShader, device: Device = "cpu", **kwargs
+):  # pragma: no cover
+    """
+    Use the Cubify operator to convert inputs voxels to a mesh and then render that mesh.
+
+    Args:
+        voxels: FloatTensor of shape (N, D, D, D) where N is the batch size and
+            D is the number of voxels along each dimension.
+        shader_type: shader_type: shader_type: Shader to use for rendering. Examples
+            include HardPhongShader (default), SoftPhongShader etc or any other type
+            of valid Shader class.
+        device: Device (as str or torch.device) on which the tensors should be located.
+        **kwargs: Accepts any of the kwargs that the renderer supports.
+    Returns:
+        Batch of rendered images of shape (N, H, W, 3).
+    """
+    cubified_voxels = cubify(voxels, CUBIFY_THRESH).to(device)
+    cubified_voxels.textures = TexturesVertex(
+        verts_features=torch.ones_like(cubified_voxels.verts_padded(), device=device)
+    )
+    cameras = BlenderCamera(device=device)
+    renderer = MeshRenderer(
+        rasterizer=MeshRasterizer(
+            cameras=cameras,
+            raster_settings=kwargs.get("raster_settings", RasterizationSettings()),
+        ),
+        shader=shader_type(
+            device=device,
+            cameras=cameras,
+            lights=kwargs.get("lights", PointLights()).to(device),
+        ),
+    )
+    return renderer(cubified_voxels)
diff --git a/pytorch3d/pytorch3d/datasets/shapenet/__init__.py b/pytorch3d/pytorch3d/datasets/shapenet/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..295ec79312e17f92baf722667f5a2d727bc703fa
--- /dev/null
+++ b/pytorch3d/pytorch3d/datasets/shapenet/__init__.py
@@ -0,0 +1,10 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from .shapenet_core import ShapeNetCore
+
+
+__all__ = [k for k in globals().keys() if not k.startswith("_")]
diff --git a/pytorch3d/pytorch3d/datasets/shapenet/shapenet_core.py b/pytorch3d/pytorch3d/datasets/shapenet/shapenet_core.py
new file mode 100644
index 0000000000000000000000000000000000000000..61908414ea6a8a91e679cabc699f542f50deae62
--- /dev/null
+++ b/pytorch3d/pytorch3d/datasets/shapenet/shapenet_core.py
@@ -0,0 +1,158 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import json
+import os
+import warnings
+from os import path
+from pathlib import Path
+from typing import Dict
+
+from pytorch3d.datasets.shapenet_base import ShapeNetBase
+
+
+SYNSET_DICT_DIR = Path(__file__).resolve().parent
+
+
+class ShapeNetCore(ShapeNetBase):  # pragma: no cover
+    """
+    This class loads ShapeNetCore from a given directory into a Dataset object.
+    ShapeNetCore is a subset of the ShapeNet dataset and can be downloaded from
+    https://www.shapenet.org/.
+    """
+
+    def __init__(
+        self,
+        data_dir,
+        synsets=None,
+        version: int = 1,
+        load_textures: bool = True,
+        texture_resolution: int = 4,
+    ) -> None:
+        """
+        Store each object's synset id and models id from data_dir.
+
+        Args:
+            data_dir: Path to ShapeNetCore data.
+            synsets: List of synset categories to load from ShapeNetCore in the form of
+                synset offsets or labels. A combination of both is also accepted.
+                When no category is specified, all categories in data_dir are loaded.
+            version: (int) version of ShapeNetCore data in data_dir, 1 or 2.
+                Default is set to be 1. Version 1 has 57 categories and version 2 has 55
+                categories.
+                Note: version 1 has two categories 02858304(boat) and 02992529(cellphone)
+                that are hyponyms of categories 04530566(watercraft) and 04401088(telephone)
+                respectively. You can combine the categories manually if needed.
+                Version 2 doesn't have 02858304(boat) or 02834778(bicycle) compared to
+                version 1.
+            load_textures: Boolean indicating whether textures should loaded for the model.
+                Textures will be of type TexturesAtlas i.e. a texture map per face.
+            texture_resolution: Int specifying the resolution of the texture map per face
+                created using the textures in the obj file. A
+                (texture_resolution, texture_resolution, 3) map is created per face.
+        """
+        super().__init__()
+        self.shapenet_dir = data_dir
+        self.load_textures = load_textures
+        self.texture_resolution = texture_resolution
+
+        if version not in [1, 2]:
+            raise ValueError("Version number must be either 1 or 2.")
+        self.model_dir = "model.obj" if version == 1 else "models/model_normalized.obj"
+
+        # Synset dictionary mapping synset offsets to corresponding labels.
+        dict_file = "shapenet_synset_dict_v%d.json" % version
+        with open(path.join(SYNSET_DICT_DIR, dict_file), "r") as read_dict:
+            self.synset_dict = json.load(read_dict)
+        # Inverse dictionary mapping synset labels to corresponding offsets.
+        self.synset_inv = {label: offset for offset, label in self.synset_dict.items()}
+
+        # If categories are specified, check if each category is in the form of either
+        # synset offset or synset label, and if the category exists in the given directory.
+        if synsets is not None:
+            # Set of categories to load in the form of synset offsets.
+            synset_set = set()
+            for synset in synsets:
+                if (synset in self.synset_dict.keys()) and (
+                    path.isdir(path.join(data_dir, synset))
+                ):
+                    synset_set.add(synset)
+                elif (synset in self.synset_inv.keys()) and (
+                    (path.isdir(path.join(data_dir, self.synset_inv[synset])))
+                ):
+                    synset_set.add(self.synset_inv[synset])
+                else:
+                    msg = (
+                        "Synset category %s either not part of ShapeNetCore dataset "
+                        "or cannot be found in %s."
+                    ) % (synset, data_dir)
+                    warnings.warn(msg)
+        # If no category is given, load every category in the given directory.
+        # Ignore synset folders not included in the official mapping.
+        else:
+            synset_set = {
+                synset
+                for synset in os.listdir(data_dir)
+                if path.isdir(path.join(data_dir, synset))
+                and synset in self.synset_dict
+            }
+
+        # Check if there are any categories in the official mapping that are not loaded.
+        # Update self.synset_inv so that it only includes the loaded categories.
+        synset_not_present = set(self.synset_dict.keys()).difference(synset_set)
+        [self.synset_inv.pop(self.synset_dict[synset]) for synset in synset_not_present]
+
+        if len(synset_not_present) > 0:
+            msg = (
+                "The following categories are included in ShapeNetCore ver.%d's "
+                "official mapping but not found in the dataset location %s: %s"
+                ""
+            ) % (version, data_dir, ", ".join(synset_not_present))
+            warnings.warn(msg)
+
+        # Extract model_id of each object from directory names.
+        # Each grandchildren directory of data_dir contains an object, and the name
+        # of the directory is the object's model_id.
+        for synset in synset_set:
+            self.synset_start_idxs[synset] = len(self.synset_ids)
+            for model in os.listdir(path.join(data_dir, synset)):
+                if not path.exists(path.join(data_dir, synset, model, self.model_dir)):
+                    msg = (
+                        "Object file not found in the model directory %s "
+                        "under synset directory %s."
+                    ) % (model, synset)
+                    warnings.warn(msg)
+                    continue
+                self.synset_ids.append(synset)
+                self.model_ids.append(model)
+            model_count = len(self.synset_ids) - self.synset_start_idxs[synset]
+            self.synset_num_models[synset] = model_count
+
+    def __getitem__(self, idx: int) -> Dict:
+        """
+        Read a model by the given index.
+
+        Args:
+            idx: The idx of the model to be retrieved in the dataset.
+
+        Returns:
+            dictionary with following keys:
+            - verts: FloatTensor of shape (V, 3).
+            - faces: LongTensor of shape (F, 3) which indexes into the verts tensor.
+            - synset_id (str): synset id
+            - model_id (str): model id
+            - label (str): synset label.
+        """
+        model = self._get_item_ids(idx)
+        model_path = path.join(
+            self.shapenet_dir, model["synset_id"], model["model_id"], self.model_dir
+        )
+        verts, faces, textures = self._load_mesh(model_path)
+        model["verts"] = verts
+        model["faces"] = faces
+        model["textures"] = textures
+        model["label"] = self.synset_dict[model["synset_id"]]
+        return model
diff --git a/pytorch3d/pytorch3d/datasets/shapenet/shapenet_synset_dict_v1.json b/pytorch3d/pytorch3d/datasets/shapenet/shapenet_synset_dict_v1.json
new file mode 100644
index 0000000000000000000000000000000000000000..b2fc62ae62107a81e078ec02432fb554ae8f1b41
--- /dev/null
+++ b/pytorch3d/pytorch3d/datasets/shapenet/shapenet_synset_dict_v1.json
@@ -0,0 +1,59 @@
+{
+    "04379243": "table",
+    "02958343": "car",
+    "03001627": "chair",
+    "02691156": "airplane",
+    "04256520": "sofa",
+    "04090263": "rifle",
+    "03636649": "lamp",
+    "04530566": "watercraft",
+    "02828884": "bench",
+    "03691459": "loudspeaker",
+    "02933112": "cabinet",
+    "03211117": "display",
+    "04401088": "telephone",
+    "02924116": "bus",
+    "02808440": "bathtub",
+    "03467517": "guitar",
+    "03325088": "faucet",
+    "03046257": "clock",
+    "03991062": "flowerpot",
+    "03593526": "jar",
+    "02876657": "bottle",
+    "02871439": "bookshelf",
+    "03642806": "laptop",
+    "03624134": "knife",
+    "04468005": "train",
+    "02747177": "trash bin",
+    "03790512": "motorbike",
+    "03948459": "pistol",
+    "03337140": "file cabinet",
+    "02818832": "bed",
+    "03928116": "piano",
+    "04330267": "stove",
+    "03797390": "mug",
+    "02880940": "bowl",
+    "04554684": "washer",
+    "04004475": "printer",
+    "03513137": "helmet",
+    "03761084": "microwaves",
+    "04225987": "skateboard",
+    "04460130": "tower",
+    "02942699": "camera",
+    "02801938": "basket",
+    "02946921": "can",
+    "03938244": "pillow",
+    "03710193": "mailbox",
+    "03207941": "dishwasher",
+    "04099429": "rocket",
+    "02773838": "bag",
+    "02843684": "birdhouse",
+    "03261776": "earphone",
+    "03759954": "microphone",
+    "04074963": "remote",
+    "03085013": "keyboard",
+    "02834778": "bicycle",
+    "02954340": "cap",
+    "02858304": "boat",
+    "02992529": "mobile phone"
+}
diff --git a/pytorch3d/pytorch3d/datasets/shapenet/shapenet_synset_dict_v2.json b/pytorch3d/pytorch3d/datasets/shapenet/shapenet_synset_dict_v2.json
new file mode 100644
index 0000000000000000000000000000000000000000..f0107c93c3535e2454070be1dcb622ac66899c90
--- /dev/null
+++ b/pytorch3d/pytorch3d/datasets/shapenet/shapenet_synset_dict_v2.json
@@ -0,0 +1,57 @@
+{
+    "02691156": "airplane",
+    "02747177": "trash bin",
+    "02773838": "bag",
+    "02801938": "basket",
+    "02808440": "bathtub",
+    "02818832": "bed",
+    "02828884": "bench",
+    "02843684": "birdhouse",
+    "02871439": "bookshelf",
+    "02876657": "bottle",
+    "02880940": "bowl",
+    "02924116": "bus",
+    "02933112": "cabinet",
+    "02942699": "camera",
+    "02946921": "can",
+    "02954340": "cap",
+    "02958343": "car",
+    "02992529": "cellphone",
+    "03001627": "chair",
+    "03046257": "clock",
+    "03085013": "keyboard",
+    "03207941": "dishwasher",
+    "03211117": "display",
+    "03261776": "earphone",
+    "03325088": "faucet",
+    "03337140": "file cabinet",
+    "03467517": "guitar",
+    "03513137": "helmet",
+    "03593526": "jar",
+    "03624134": "knife",
+    "03636649": "lamp",
+    "03642806": "laptop",
+    "03691459": "loudspeaker",
+    "03710193": "mailbox",
+    "03759954": "microphone",
+    "03761084": "microwaves",
+    "03790512": "motorbike",
+    "03797390": "mug",
+    "03928116": "piano",
+    "03938244": "pillow",
+    "03948459": "pistol",
+    "03991062": "flowerpot",
+    "04004475": "printer",
+    "04074963": "remote",
+    "04090263": "rifle",
+    "04099429": "rocket",
+    "04225987": "skateboard",
+    "04256520": "sofa",
+    "04330267": "stove",
+    "04379243": "table",
+    "04401088": "telephone",
+    "04460130": "tower",
+    "04468005": "train",
+    "04530566": "watercraft",
+    "04554684": "washer"
+}
diff --git a/pytorch3d/pytorch3d/datasets/shapenet_base.py b/pytorch3d/pytorch3d/datasets/shapenet_base.py
new file mode 100644
index 0000000000000000000000000000000000000000..7160ca6cbec680faeffd5de65b077b3c13f13057
--- /dev/null
+++ b/pytorch3d/pytorch3d/datasets/shapenet_base.py
@@ -0,0 +1,289 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import warnings
+from typing import Dict, List, Optional, Tuple
+
+import torch
+from pytorch3d.common.datatypes import Device
+from pytorch3d.io import load_obj
+from pytorch3d.renderer import (
+    FoVPerspectiveCameras,
+    HardPhongShader,
+    MeshRasterizer,
+    MeshRenderer,
+    PointLights,
+    RasterizationSettings,
+    TexturesVertex,
+)
+
+from .utils import collate_batched_meshes
+
+
+class ShapeNetBase(torch.utils.data.Dataset):  # pragma: no cover
+    """
+    'ShapeNetBase' implements a base Dataset for ShapeNet and R2N2 with helper methods.
+    It is not intended to be used on its own as a Dataset for a Dataloader. Both __init__
+    and __getitem__ need to be implemented.
+    """
+
+    def __init__(self) -> None:
+        """
+        Set up lists of synset_ids and model_ids.
+        """
+        self.synset_ids = []
+        self.model_ids = []
+        self.synset_inv = {}
+        self.synset_start_idxs = {}
+        self.synset_num_models = {}
+        self.shapenet_dir = ""
+        self.model_dir = "model.obj"
+        self.load_textures = True
+        self.texture_resolution = 4
+
+    def __len__(self) -> int:
+        """
+        Return number of total models in the loaded dataset.
+        """
+        return len(self.model_ids)
+
+    def __getitem__(self, idx) -> Dict:
+        """
+        Read a model by the given index. Need to be implemented for every child class
+        of ShapeNetBase.
+
+        Args:
+            idx: The idx of the model to be retrieved in the dataset.
+
+        Returns:
+            dictionary containing information about the model.
+        """
+        raise NotImplementedError(
+            "__getitem__ should be implemented in the child class of ShapeNetBase"
+        )
+
+    def _get_item_ids(self, idx) -> Dict:
+        """
+        Read a model by the given index.
+
+        Args:
+            idx: The idx of the model to be retrieved in the dataset.
+
+        Returns:
+            dictionary with following keys:
+            - synset_id (str): synset id
+            - model_id (str): model id
+        """
+        model = {}
+        model["synset_id"] = self.synset_ids[idx]
+        model["model_id"] = self.model_ids[idx]
+        return model
+
+    def _load_mesh(self, model_path) -> Tuple:
+        verts, faces, aux = load_obj(
+            model_path,
+            create_texture_atlas=self.load_textures,
+            load_textures=self.load_textures,
+            texture_atlas_size=self.texture_resolution,
+        )
+        if self.load_textures:
+            textures = aux.texture_atlas
+            # Some meshes don't have textures. In this case
+            # create a white texture map
+            if textures is None:
+                textures = verts.new_ones(
+                    faces.verts_idx.shape[0],
+                    self.texture_resolution,
+                    self.texture_resolution,
+                    3,
+                )
+        else:
+            textures = None
+
+        return verts, faces.verts_idx, textures
+
+    def render(
+        self,
+        model_ids: Optional[List[str]] = None,
+        categories: Optional[List[str]] = None,
+        sample_nums: Optional[List[int]] = None,
+        idxs: Optional[List[int]] = None,
+        shader_type=HardPhongShader,
+        device: Device = "cpu",
+        **kwargs,
+    ) -> torch.Tensor:
+        """
+        If a list of model_ids are supplied, render all the objects by the given model_ids.
+        If no model_ids are supplied, but categories and sample_nums are specified, randomly
+        select a number of objects (number specified in sample_nums) in the given categories
+        and render these objects. If instead a list of idxs is specified, check if the idxs
+        are all valid and render models by the given idxs. Otherwise, randomly select a number
+        (first number in sample_nums, default is set to be 1) of models from the loaded dataset
+        and render these models.
+
+        Args:
+            model_ids: List[str] of model_ids of models intended to be rendered.
+            categories: List[str] of categories intended to be rendered. categories
+                and sample_nums must be specified at the same time. categories can be given
+                in the form of synset offsets or labels, or a combination of both.
+            sample_nums: List[int] of number of models to be randomly sampled from
+                each category. Could also contain one single integer, in which case it
+                will be broadcasted for every category.
+            idxs: List[int] of indices of models to be rendered in the dataset.
+            shader_type: Select shading. Valid options include HardPhongShader (default),
+                SoftPhongShader, HardGouraudShader, SoftGouraudShader, HardFlatShader,
+                SoftSilhouetteShader.
+            device: Device (as str or torch.device) on which the tensors should be located.
+            **kwargs: Accepts any of the kwargs that the renderer supports.
+
+        Returns:
+            Batch of rendered images of shape (N, H, W, 3).
+        """
+        idxs = self._handle_render_inputs(model_ids, categories, sample_nums, idxs)
+        # Use the getitem method which loads mesh + texture
+        models = [self[idx] for idx in idxs]
+        meshes = collate_batched_meshes(models)["mesh"]
+        if meshes.textures is None:
+            meshes.textures = TexturesVertex(
+                verts_features=torch.ones_like(meshes.verts_padded(), device=device)
+            )
+
+        meshes = meshes.to(device)
+        cameras = kwargs.get("cameras", FoVPerspectiveCameras()).to(device)
+        if len(cameras) != 1 and len(cameras) % len(meshes) != 0:
+            raise ValueError("Mismatch between batch dims of cameras and meshes.")
+        if len(cameras) > 1:
+            # When rendering R2N2 models, if more than one views are provided, broadcast
+            # the meshes so that each mesh can be rendered for each of the views.
+            meshes = meshes.extend(len(cameras) // len(meshes))
+        renderer = MeshRenderer(
+            rasterizer=MeshRasterizer(
+                cameras=cameras,
+                raster_settings=kwargs.get("raster_settings", RasterizationSettings()),
+            ),
+            shader=shader_type(
+                device=device,
+                cameras=cameras,
+                lights=kwargs.get("lights", PointLights()).to(device),
+            ),
+        )
+        return renderer(meshes)
+
+    def _handle_render_inputs(
+        self,
+        model_ids: Optional[List[str]] = None,
+        categories: Optional[List[str]] = None,
+        sample_nums: Optional[List[int]] = None,
+        idxs: Optional[List[int]] = None,
+    ) -> List[int]:
+        """
+        Helper function for converting user provided model_ids, categories and sample_nums
+        to indices of models in the loaded dataset. If model idxs are provided, we check if
+        the idxs are valid. If no models are specified, the first model in the loaded dataset
+        is chosen. The function returns the file paths to the selected models.
+
+        Args:
+            model_ids: List[str] of model_ids of models to be rendered.
+            categories: List[str] of categories to be rendered.
+            sample_nums: List[int] of number of models to be randomly sampled from
+                each category.
+            idxs: List[int] of indices of models to be rendered in the dataset.
+
+        Returns:
+            List of paths of models to be rendered.
+        """
+        # Get corresponding indices if model_ids are supplied.
+        if model_ids is not None and len(model_ids) > 0:
+            idxs = []
+            for model_id in model_ids:
+                if model_id not in self.model_ids:
+                    raise ValueError(
+                        "model_id %s not found in the loaded dataset." % model_id
+                    )
+                idxs.append(self.model_ids.index(model_id))
+
+        # Sample random models if categories and sample_nums are supplied and get
+        # the corresponding indices.
+        elif categories is not None and len(categories) > 0:
+            sample_nums = [1] if sample_nums is None else sample_nums
+            if len(categories) != len(sample_nums) and len(sample_nums) != 1:
+                raise ValueError(
+                    "categories and sample_nums needs to be of the same length or "
+                    "sample_nums needs to be of length 1."
+                )
+
+            idxs_tensor = torch.empty(0, dtype=torch.int32)
+            for i in range(len(categories)):
+                category = self.synset_inv.get(categories[i], categories[i])
+                if category not in self.synset_inv.values():
+                    raise ValueError(
+                        "Category %s is not in the loaded dataset." % category
+                    )
+                # Broadcast if sample_nums has length of 1.
+                sample_num = sample_nums[i] if len(sample_nums) > 1 else sample_nums[0]
+                sampled_idxs = self._sample_idxs_from_category(
+                    sample_num=sample_num, category=category
+                )
+                # pyre-fixme[6]: For 1st param expected `Union[List[Tensor],
+                #  typing.Tuple[Tensor, ...]]` but got `Tuple[Tensor, List[int]]`.
+                idxs_tensor = torch.cat((idxs_tensor, sampled_idxs))
+            idxs = idxs_tensor.tolist()
+        # Check if the indices are valid if idxs are supplied.
+        elif idxs is not None and len(idxs) > 0:
+            if any(idx < 0 or idx >= len(self.model_ids) for idx in idxs):
+                raise IndexError(
+                    "One or more idx values are out of bounds. Indices need to be"
+                    "between 0 and %s." % (len(self.model_ids) - 1)
+                )
+        # Check if sample_nums is specified, if so sample sample_nums[0] number
+        # of indices from the entire loaded dataset. Otherwise randomly select one
+        # index from the dataset.
+        else:
+            sample_nums = [1] if sample_nums is None else sample_nums
+            if len(sample_nums) > 1:
+                msg = (
+                    "More than one sample sizes specified, now sampling "
+                    "%d models from the dataset." % sample_nums[0]
+                )
+                warnings.warn(msg)
+            idxs = self._sample_idxs_from_category(sample_nums[0])
+        return idxs
+
+    def _sample_idxs_from_category(
+        self, sample_num: int = 1, category: Optional[str] = None
+    ) -> List[int]:
+        """
+        Helper function for sampling a number of indices from the given category.
+
+        Args:
+            sample_num: number of indices to be sampled from the given category.
+            category: category synset of the category to be sampled from. If not
+                specified, sample from all models in the loaded dataset.
+        """
+        start = self.synset_start_idxs[category] if category is not None else 0
+        range_len = (
+            self.synset_num_models[category] if category is not None else self.__len__()
+        )
+        replacement = sample_num > range_len
+        sampled_idxs = (
+            torch.multinomial(
+                torch.ones((range_len), dtype=torch.float32),
+                sample_num,
+                replacement=replacement,
+            )
+            + start
+        )
+        if replacement:
+            msg = (
+                "Sample size %d is larger than the number of objects in %s, "
+                "values sampled with replacement."
+            ) % (
+                sample_num,
+                "category " + category if category is not None else "all categories",
+            )
+            warnings.warn(msg)
+        # pyre-fixme[7]: Expected `List[int]` but got `Tensor`.
+        return sampled_idxs
diff --git a/pytorch3d/pytorch3d/datasets/utils.py b/pytorch3d/pytorch3d/datasets/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..88aafac1df8de57992a6fee5c8ab8f10a33639d8
--- /dev/null
+++ b/pytorch3d/pytorch3d/datasets/utils.py
@@ -0,0 +1,48 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Dict, List
+
+from pytorch3d.renderer.mesh import TexturesAtlas
+from pytorch3d.structures import Meshes
+
+
+def collate_batched_meshes(batch: List[Dict]):  # pragma: no cover
+    """
+    Take a list of objects in the form of dictionaries and merge them
+    into a single dictionary. This function can be used with a Dataset
+    object to create a torch.utils.data.Dataloader which directly
+    returns Meshes objects.
+    TODO: Add support for textures.
+
+    Args:
+        batch: List of dictionaries containing information about objects
+            in the dataset.
+
+    Returns:
+        collated_dict: Dictionary of collated lists. If batch contains both
+            verts and faces, a collated mesh batch is also returned.
+    """
+    if batch is None or len(batch) == 0:
+        return None
+    collated_dict = {}
+    for k in batch[0].keys():
+        collated_dict[k] = [d[k] for d in batch]
+
+    collated_dict["mesh"] = None
+    if {"verts", "faces"}.issubset(collated_dict.keys()):
+
+        textures = None
+        if "textures" in collated_dict:
+            textures = TexturesAtlas(atlas=collated_dict["textures"])
+
+        collated_dict["mesh"] = Meshes(
+            verts=collated_dict["verts"],
+            faces=collated_dict["faces"],
+            textures=textures,
+        )
+
+    return collated_dict
diff --git a/pytorch3d/pytorch3d/implicitron/__init__.py b/pytorch3d/pytorch3d/implicitron/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..2e41cd717f6a439a9c08d76a9d0e4a54e190fc5a
--- /dev/null
+++ b/pytorch3d/pytorch3d/implicitron/__init__.py
@@ -0,0 +1,5 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
diff --git a/pytorch3d/pytorch3d/implicitron/dataset/__init__.py b/pytorch3d/pytorch3d/implicitron/dataset/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..2e41cd717f6a439a9c08d76a9d0e4a54e190fc5a
--- /dev/null
+++ b/pytorch3d/pytorch3d/implicitron/dataset/__init__.py
@@ -0,0 +1,5 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
diff --git a/pytorch3d/pytorch3d/implicitron/dataset/blender_dataset_map_provider.py b/pytorch3d/pytorch3d/implicitron/dataset/blender_dataset_map_provider.py
new file mode 100644
index 0000000000000000000000000000000000000000..2eab25602659b802bdb2e411b2a4cf63b9542591
--- /dev/null
+++ b/pytorch3d/pytorch3d/implicitron/dataset/blender_dataset_map_provider.py
@@ -0,0 +1,53 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import torch
+from pytorch3d.implicitron.tools.config import registry
+
+from .load_blender import load_blender_data
+from .single_sequence_dataset import (
+    _interpret_blender_cameras,
+    SingleSceneDatasetMapProviderBase,
+)
+
+
+@registry.register
+class BlenderDatasetMapProvider(SingleSceneDatasetMapProviderBase):
+    """
+    Provides data for one scene from Blender synthetic dataset.
+    Uses the code in load_blender.py
+
+    Members:
+        base_dir: directory holding the data for the scene.
+        object_name: The name of the scene (e.g. "lego"). This is just used as a label.
+            It will typically be equal to the name of the directory self.base_dir.
+        path_manager_factory: Creates path manager which may be used for
+            interpreting paths.
+        n_known_frames_for_test: If set, training frames are included in the val
+            and test datasets, and this many random training frames are added to
+            each test batch. If not set, test batches each contain just a single
+            testing frame.
+    """
+
+    def _load_data(self) -> None:
+        path_manager = self.path_manager_factory.get()
+        images, poses, _, hwf, i_split = load_blender_data(
+            self.base_dir,
+            testskip=1,
+            path_manager=path_manager,
+        )
+        H, W, focal = hwf
+        images_masks = torch.from_numpy(images).permute(0, 3, 1, 2)
+
+        # pyre-ignore[16]
+        self.poses = _interpret_blender_cameras(poses, focal)
+        # pyre-ignore[16]
+        self.images = images_masks[:, :3]
+        # pyre-ignore[16]
+        self.fg_probabilities = images_masks[:, 3:4]
+        # pyre-ignore[16]
+        self.i_split = i_split
diff --git a/pytorch3d/pytorch3d/implicitron/dataset/data_loader_map_provider.py b/pytorch3d/pytorch3d/implicitron/dataset/data_loader_map_provider.py
new file mode 100644
index 0000000000000000000000000000000000000000..6c0436adf5b9551d735a46bf6f7c52d7d0660cab
--- /dev/null
+++ b/pytorch3d/pytorch3d/implicitron/dataset/data_loader_map_provider.py
@@ -0,0 +1,524 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from dataclasses import dataclass
+from enum import Enum
+from typing import Iterator, List, Optional, Tuple
+
+import torch
+from pytorch3d.implicitron.tools.config import registry, ReplaceableBase
+from torch.utils.data import (
+    BatchSampler,
+    ConcatDataset,
+    DataLoader,
+    RandomSampler,
+    Sampler,
+)
+
+from .dataset_base import DatasetBase
+from .dataset_map_provider import DatasetMap
+from .frame_data import FrameData
+from .scene_batch_sampler import SceneBatchSampler
+from .utils import is_known_frame_scalar
+
+
+@dataclass
+class DataLoaderMap:
+    """
+    A collection of data loaders for Implicitron.
+
+    Members:
+
+        train: a data loader for training
+        val: a data loader for validating during training
+        test: a data loader for final evaluation
+    """
+
+    train: Optional[DataLoader[FrameData]]
+    val: Optional[DataLoader[FrameData]]
+    test: Optional[DataLoader[FrameData]]
+
+    def __getitem__(self, split: str) -> Optional[DataLoader[FrameData]]:
+        """
+        Get one of the data loaders by key (name of data split)
+        """
+        if split not in ["train", "val", "test"]:
+            raise ValueError(f"{split} was not a valid split name (train/val/test)")
+        return getattr(self, split)
+
+
+class DataLoaderMapProviderBase(ReplaceableBase):
+    """
+    Provider of a collection of data loaders for a given collection of datasets.
+    """
+
+    def get_data_loader_map(self, datasets: DatasetMap) -> DataLoaderMap:
+        """
+        Returns a collection of data loaders for a given collection of datasets.
+        """
+        raise NotImplementedError()
+
+
+@registry.register
+class SimpleDataLoaderMapProvider(DataLoaderMapProviderBase):
+    """
+    Trivial implementation of DataLoaderMapProviderBase.
+
+    If a dataset returns batches from get_eval_batches(), then
+    they will be what the corresponding dataloader returns,
+    independently of any of the fields on this class.
+
+    Otherwise, returns shuffled batches.
+    """
+
+    batch_size: int = 1
+    num_workers: int = 0
+    dataset_length_train: int = 0
+    dataset_length_val: int = 0
+    dataset_length_test: int = 0
+
+    def get_data_loader_map(self, datasets: DatasetMap) -> DataLoaderMap:
+        """
+        Returns a collection of data loaders for a given collection of datasets.
+        """
+        return DataLoaderMap(
+            train=self._make_data_loader(
+                datasets.train,
+                self.dataset_length_train,
+            ),
+            val=self._make_data_loader(
+                datasets.val,
+                self.dataset_length_val,
+            ),
+            test=self._make_data_loader(
+                datasets.test,
+                self.dataset_length_test,
+            ),
+        )
+
+    def _make_data_loader(
+        self,
+        dataset: Optional[DatasetBase],
+        num_batches: int,
+    ) -> Optional[DataLoader[FrameData]]:
+        """
+        Returns the dataloader for a dataset.
+
+        Args:
+            dataset: the dataset
+            num_batches: possible ceiling on number of batches per epoch
+        """
+        if dataset is None:
+            return None
+
+        data_loader_kwargs = {
+            "num_workers": self.num_workers,
+            "collate_fn": dataset.frame_data_type.collate,
+        }
+
+        eval_batches = dataset.get_eval_batches()
+        if eval_batches is not None:
+            return DataLoader(
+                dataset,
+                batch_sampler=eval_batches,
+                **data_loader_kwargs,
+            )
+
+        if num_batches > 0:
+            num_samples = self.batch_size * num_batches
+        else:
+            num_samples = None
+
+        # sample with replacement only if a custom number of samples is specified
+        sampler = RandomSampler(
+            dataset,
+            replacement=num_samples is not None,
+            num_samples=num_samples,
+        )
+
+        batch_sampler = BatchSampler(sampler, self.batch_size, drop_last=True)
+        return DataLoader(
+            dataset,
+            batch_sampler=batch_sampler,
+            **data_loader_kwargs,
+        )
+
+
+class DoublePoolBatchSampler(Sampler[List[int]]):
+    """
+    Batch sampler for making random batches of a single frame
+    from one list and a number of known frames from another list.
+    """
+
+    def __init__(
+        self,
+        first_indices: List[int],
+        rest_indices: List[int],
+        batch_size: int,
+        replacement: bool,
+        num_batches: Optional[int] = None,
+    ) -> None:
+        """
+        Args:
+            first_indices: indexes of dataset items to use as the first element
+                        of each batch.
+            rest_indices: indexes of dataset items to use as the subsequent
+                        elements of each batch. Not used if batch_size==1.
+            batch_size: The common size of any batch.
+            replacement: Whether the sampling of first items is with replacement.
+            num_batches: The number of batches in an epoch. If 0 or None,
+                        one epoch is the length of `first_indices`.
+        """
+        self.first_indices = first_indices
+        self.rest_indices = rest_indices
+        self.batch_size = batch_size
+        self.replacement = replacement
+        self.num_batches = None if num_batches == 0 else num_batches
+
+        if batch_size - 1 > len(rest_indices):
+            raise ValueError(
+                f"Cannot make up ({batch_size})-batches from {len(self.rest_indices)}"
+            )
+
+        # copied from RandomSampler
+        seed = int(torch.empty((), dtype=torch.int64).random_().item())
+        self.generator = torch.Generator()
+        self.generator.manual_seed(seed)
+
+    def __len__(self) -> int:
+        if self.num_batches is not None:
+            return self.num_batches
+        return len(self.first_indices)
+
+    def __iter__(self) -> Iterator[List[int]]:
+        num_batches = self.num_batches
+        if self.replacement:
+            i_first = torch.randint(
+                len(self.first_indices),
+                size=(len(self),),
+                generator=self.generator,
+            )
+        elif num_batches is not None:
+            n_copies = 1 + (num_batches - 1) // len(self.first_indices)
+            raw_indices = [
+                torch.randperm(len(self.first_indices), generator=self.generator)
+                for _ in range(n_copies)
+            ]
+            i_first = torch.cat(raw_indices)[:num_batches]
+        else:
+            i_first = torch.randperm(len(self.first_indices), generator=self.generator)
+        first_indices = [self.first_indices[i] for i in i_first]
+
+        if self.batch_size == 1:
+            for first_index in first_indices:
+                yield [first_index]
+            return
+
+        for first_index in first_indices:
+            # Consider using this class in a program which sets the seed. This use
+            # of randperm means that rerunning with a higher batch_size
+            # results in batches whose first elements as the first run.
+            i_rest = torch.randperm(
+                len(self.rest_indices),
+                generator=self.generator,
+            )[: self.batch_size - 1]
+            yield [first_index] + [self.rest_indices[i] for i in i_rest]
+
+
+class BatchConditioningType(Enum):
+    """
+    Ways to add conditioning frames for the val and test batches.
+
+    SAME: Use the corresponding dataset for all elements of val batches
+        without regard to frame type.
+    TRAIN: Use the corresponding dataset for the first element of each
+        batch, and the training dataset for the extra conditioning
+            elements. No regard to frame type.
+    KNOWN: Use frames from the corresponding dataset but separate them
+        according to their frame_type. Each batch will contain one UNSEEN
+        frame followed by many KNOWN frames.
+    """
+
+    SAME = "same"
+    TRAIN = "train"
+    KNOWN = "known"
+
+
+@registry.register
+class SequenceDataLoaderMapProvider(DataLoaderMapProviderBase):
+    """
+    Default implementation of DataLoaderMapProviderBase.
+
+    If a dataset returns batches from get_eval_batches(), then
+    they will be what the corresponding dataloader returns,
+    independently of any of the fields on this class.
+
+    If conditioning is not required, then the batch size should
+    be set as 1, and most of the fields do not matter.
+
+    If conditioning is required, each batch will contain one main
+    frame first to predict and the, rest of the elements are for
+    conditioning.
+
+    If images_per_seq_options is left empty, the conditioning
+    frames are picked according to the conditioning type given.
+    This does not have regard to the order of frames in a
+    scene, or which frames belong to what scene.
+
+    If images_per_seq_options is given, then the conditioning types
+    must be SAME and the remaining fields are used.
+
+    Members:
+        batch_size: The size of the batch of the data loader.
+        num_workers: Number of data-loading threads in each data loader.
+        dataset_length_train: The number of batches in a training epoch. Or 0 to mean
+            an epoch is the length of the training set.
+        dataset_length_val: The number of batches in a validation epoch. Or 0 to mean
+            an epoch is the length of the validation set.
+        dataset_length_test: The number of batches in a testing epoch. Or 0 to mean
+            an epoch is the length of the test set.
+        train_conditioning_type: Whether the train data loader should use
+            only known frames for conditioning.
+            Only used if batch_size>1 and train dataset is
+            present and does not return eval_batches.
+        val_conditioning_type: Whether the val data loader should use
+            training frames or known frames for conditioning.
+            Only used if batch_size>1 and val dataset is
+            present and does not return eval_batches.
+        test_conditioning_type: Whether the test data loader should use
+            training frames or known frames for conditioning.
+            Only used if batch_size>1 and test dataset is
+            present and does not return eval_batches.
+        images_per_seq_options: Possible numbers of frames sampled per sequence in a batch.
+            If a conditioning_type is KNOWN or TRAIN, then this must be left at its initial
+            value. Empty (the default) means that we are not careful about which frames
+            come from which scene.
+        sample_consecutive_frames: if True, will sample a contiguous interval of frames
+            in the sequence. It first sorts the frames by timestimps when available,
+            otherwise by frame numbers, finds the connected segments within the sequence
+            of sufficient length, then samples a random pivot element among them and
+            ideally uses it as a middle of the temporal window, shifting the borders
+            where necessary. This strategy mitigates the bias against shorter segments
+            and their boundaries.
+        consecutive_frames_max_gap: if a number > 0, then used to define the maximum
+            difference in frame_number of neighbouring frames when forming connected
+            segments; if both this and consecutive_frames_max_gap_seconds are 0s,
+            the whole sequence is considered a segment regardless of frame numbers.
+        consecutive_frames_max_gap_seconds: if a number > 0.0, then used to define the
+            maximum difference in frame_timestamp of neighbouring frames when forming
+            connected segments; if both this and consecutive_frames_max_gap are 0s,
+            the whole sequence is considered a segment regardless of frame timestamps.
+    """
+
+    batch_size: int = 1
+    num_workers: int = 0
+    dataset_length_train: int = 0
+    dataset_length_val: int = 0
+    dataset_length_test: int = 0
+    train_conditioning_type: BatchConditioningType = BatchConditioningType.SAME
+    val_conditioning_type: BatchConditioningType = BatchConditioningType.SAME
+    test_conditioning_type: BatchConditioningType = BatchConditioningType.KNOWN
+    images_per_seq_options: Tuple[int, ...] = ()
+    sample_consecutive_frames: bool = False
+    consecutive_frames_max_gap: int = 0
+    consecutive_frames_max_gap_seconds: float = 0.1
+
+    def get_data_loader_map(self, datasets: DatasetMap) -> DataLoaderMap:
+        """
+        Returns a collection of data loaders for a given collection of datasets.
+        """
+        return DataLoaderMap(
+            train=self._make_data_loader(
+                datasets.train,
+                self.dataset_length_train,
+                datasets.train,
+                self.train_conditioning_type,
+            ),
+            val=self._make_data_loader(
+                datasets.val,
+                self.dataset_length_val,
+                datasets.train,
+                self.val_conditioning_type,
+            ),
+            test=self._make_data_loader(
+                datasets.test,
+                self.dataset_length_test,
+                datasets.train,
+                self.test_conditioning_type,
+            ),
+        )
+
+    def _make_data_loader(
+        self,
+        dataset: Optional[DatasetBase],
+        num_batches: int,
+        train_dataset: Optional[DatasetBase],
+        conditioning_type: BatchConditioningType,
+    ) -> Optional[DataLoader[FrameData]]:
+        """
+        Returns the dataloader for a dataset.
+
+        Args:
+            dataset: the dataset
+            num_batches: possible ceiling on number of batches per epoch
+            train_dataset: the training dataset, used if conditioning_type==TRAIN
+            conditioning_type: source for padding of batches
+        """
+        if dataset is None:
+            return None
+
+        data_loader_kwargs = {
+            "num_workers": self.num_workers,
+            "collate_fn": dataset.frame_data_type.collate,
+        }
+
+        eval_batches = dataset.get_eval_batches()
+        if eval_batches is not None:
+            return DataLoader(
+                dataset,
+                batch_sampler=eval_batches,
+                **data_loader_kwargs,
+            )
+
+        scenes_matter = len(self.images_per_seq_options) > 0
+        if scenes_matter and conditioning_type != BatchConditioningType.SAME:
+            raise ValueError(
+                f"{conditioning_type} cannot be used with images_per_seq "
+                + str(self.images_per_seq_options)
+            )
+
+        if self.batch_size == 1 or (
+            not scenes_matter and conditioning_type == BatchConditioningType.SAME
+        ):
+            return self._simple_loader(dataset, num_batches, data_loader_kwargs)
+
+        if scenes_matter:
+            assert conditioning_type == BatchConditioningType.SAME
+            batch_sampler = SceneBatchSampler(
+                dataset,
+                self.batch_size,
+                num_batches=len(dataset) if num_batches <= 0 else num_batches,
+                images_per_seq_options=self.images_per_seq_options,
+                sample_consecutive_frames=self.sample_consecutive_frames,
+                consecutive_frames_max_gap=self.consecutive_frames_max_gap,
+                consecutive_frames_max_gap_seconds=self.consecutive_frames_max_gap_seconds,
+            )
+            return DataLoader(
+                dataset,
+                batch_sampler=batch_sampler,
+                **data_loader_kwargs,
+            )
+
+        if conditioning_type == BatchConditioningType.TRAIN:
+            return self._train_loader(
+                dataset, train_dataset, num_batches, data_loader_kwargs
+            )
+
+        assert conditioning_type == BatchConditioningType.KNOWN
+        return self._known_loader(dataset, num_batches, data_loader_kwargs)
+
+    def _simple_loader(
+        self,
+        dataset: DatasetBase,
+        num_batches: int,
+        data_loader_kwargs: dict,
+    ) -> DataLoader[FrameData]:
+        """
+        Return a simple loader for frames in the dataset.
+
+        This is equivalent to
+            Dataloader(dataset, batch_size=self.batch_size, **data_loader_kwargs)
+        except that num_batches is fixed.
+
+        Args:
+            dataset: the dataset
+            num_batches: possible ceiling on number of batches per epoch
+            data_loader_kwargs: common args for dataloader
+        """
+        if num_batches > 0:
+            num_samples = self.batch_size * num_batches
+            replacement = True
+        else:
+            num_samples = None
+            replacement = False
+        sampler = RandomSampler(
+            dataset, replacement=replacement, num_samples=num_samples
+        )
+        batch_sampler = BatchSampler(sampler, self.batch_size, drop_last=True)
+        return DataLoader(
+            dataset,
+            batch_sampler=batch_sampler,
+            **data_loader_kwargs,
+        )
+
+    def _train_loader(
+        self,
+        dataset: DatasetBase,
+        train_dataset: Optional[DatasetBase],
+        num_batches: int,
+        data_loader_kwargs: dict,
+    ) -> DataLoader[FrameData]:
+        """
+        Return the loader for TRAIN conditioning.
+
+        Args:
+            dataset: the dataset
+            train_dataset: the training dataset
+            num_batches: possible ceiling on number of batches per epoch
+            data_loader_kwargs: common args for dataloader
+        """
+        if train_dataset is None:
+            raise ValueError("No training data for conditioning.")
+        length = len(dataset)
+        first_indices = list(range(length))
+        rest_indices = list(range(length, length + len(train_dataset)))
+        sampler = DoublePoolBatchSampler(
+            first_indices=first_indices,
+            rest_indices=rest_indices,
+            batch_size=self.batch_size,
+            replacement=True,
+            num_batches=num_batches,
+        )
+        return DataLoader(
+            ConcatDataset([dataset, train_dataset]),
+            batch_sampler=sampler,
+            **data_loader_kwargs,
+        )
+
+    def _known_loader(
+        self,
+        dataset: DatasetBase,
+        num_batches: int,
+        data_loader_kwargs: dict,
+    ) -> DataLoader[FrameData]:
+        """
+        Return the loader for KNOWN conditioning.
+
+        Args:
+            dataset: the dataset
+            num_batches: possible ceiling on number of batches per epoch
+            data_loader_kwargs: common args for dataloader
+        """
+        first_indices, rest_indices = [], []
+        for idx in range(len(dataset)):
+            frame_type = dataset[idx].frame_type
+            assert isinstance(frame_type, str)
+            if is_known_frame_scalar(frame_type):
+                rest_indices.append(idx)
+            else:
+                first_indices.append(idx)
+        sampler = DoublePoolBatchSampler(
+            first_indices=first_indices,
+            rest_indices=rest_indices,
+            batch_size=self.batch_size,
+            replacement=True,
+            num_batches=num_batches,
+        )
+        return DataLoader(
+            dataset,
+            batch_sampler=sampler,
+            **data_loader_kwargs,
+        )
diff --git a/pytorch3d/pytorch3d/implicitron/dataset/data_source.py b/pytorch3d/pytorch3d/implicitron/dataset/data_source.py
new file mode 100644
index 0000000000000000000000000000000000000000..a7989ac900daff4a8eb9f7c724829df0e0d4dc6b
--- /dev/null
+++ b/pytorch3d/pytorch3d/implicitron/dataset/data_source.py
@@ -0,0 +1,106 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Optional, Tuple
+
+from pytorch3d.implicitron.tools.config import (
+    registry,
+    ReplaceableBase,
+    run_auto_creation,
+)
+from pytorch3d.renderer.cameras import CamerasBase
+
+from .data_loader_map_provider import DataLoaderMap, DataLoaderMapProviderBase
+from .dataset_map_provider import DatasetMap, DatasetMapProviderBase
+
+
+class DataSourceBase(ReplaceableBase):
+    """
+    Base class for a data source in Implicitron. It encapsulates Dataset
+    and DataLoader configuration.
+    """
+
+    def get_datasets_and_dataloaders(self) -> Tuple[DatasetMap, DataLoaderMap]:
+        raise NotImplementedError()
+
+    @property
+    def all_train_cameras(self) -> Optional[CamerasBase]:
+        """
+        DEPRECATED! The property will be removed in future versions.
+        If the data is all for a single scene, a list
+        of the known training cameras for that scene, which is
+        used for evaluating the viewpoint difficulty of the
+        unseen cameras.
+        """
+        raise NotImplementedError()
+
+
+@registry.register
+class ImplicitronDataSource(DataSourceBase):  # pyre-ignore[13]
+    """
+    Represents the data used in Implicitron. This is the only implementation
+    of DataSourceBase provided.
+
+    Members:
+        dataset_map_provider_class_type: identifies type for dataset_map_provider.
+            e.g. JsonIndexDatasetMapProvider for Co3D.
+        data_loader_map_provider_class_type: identifies type for data_loader_map_provider.
+    """
+
+    dataset_map_provider: DatasetMapProviderBase
+    dataset_map_provider_class_type: str
+    data_loader_map_provider: DataLoaderMapProviderBase
+    data_loader_map_provider_class_type: str = "SequenceDataLoaderMapProvider"
+
+    @classmethod
+    def pre_expand(cls) -> None:
+        # use try/finally to bypass cinder's lazy imports
+        try:
+            from .blender_dataset_map_provider import (  # noqa: F401
+                BlenderDatasetMapProvider,
+            )
+            from .json_index_dataset_map_provider import (  # noqa: F401
+                JsonIndexDatasetMapProvider,
+            )
+            from .json_index_dataset_map_provider_v2 import (  # noqa: F401
+                JsonIndexDatasetMapProviderV2,
+            )
+            from .llff_dataset_map_provider import LlffDatasetMapProvider  # noqa: F401
+            from .rendered_mesh_dataset_map_provider import (  # noqa: F401
+                RenderedMeshDatasetMapProvider,
+            )
+            from .train_eval_data_loader_provider import (  # noqa: F401
+                TrainEvalDataLoaderMapProvider,
+            )
+
+            try:
+                from .sql_dataset_provider import (  # noqa: F401  # pyre-ignore
+                    SqlIndexDatasetMapProvider,
+                )
+            except ModuleNotFoundError:
+                pass  # environment without SQL dataset
+        finally:
+            pass
+
+    def __post_init__(self):
+        run_auto_creation(self)
+        self._all_train_cameras_cache: Optional[Tuple[Optional[CamerasBase]]] = None
+
+    def get_datasets_and_dataloaders(self) -> Tuple[DatasetMap, DataLoaderMap]:
+        datasets = self.dataset_map_provider.get_dataset_map()
+        dataloaders = self.data_loader_map_provider.get_data_loader_map(datasets)
+        return datasets, dataloaders
+
+    @property
+    def all_train_cameras(self) -> Optional[CamerasBase]:
+        """
+        DEPRECATED! The property will be removed in future versions.
+        """
+        if self._all_train_cameras_cache is None:  # pyre-ignore[16]
+            all_train_cameras = self.dataset_map_provider.get_all_train_cameras()
+            self._all_train_cameras_cache = (all_train_cameras,)
+
+        return self._all_train_cameras_cache[0]
diff --git a/pytorch3d/pytorch3d/implicitron/dataset/dataset_base.py b/pytorch3d/pytorch3d/implicitron/dataset/dataset_base.py
new file mode 100644
index 0000000000000000000000000000000000000000..033b170c0969c9220fe8b6246ff5cfe768ab79e1
--- /dev/null
+++ b/pytorch3d/pytorch3d/implicitron/dataset/dataset_base.py
@@ -0,0 +1,145 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from collections import defaultdict
+from dataclasses import dataclass
+from typing import (
+    ClassVar,
+    Dict,
+    Iterable,
+    Iterator,
+    List,
+    Optional,
+    Sequence,
+    Tuple,
+    Type,
+)
+
+import torch
+
+from pytorch3d.implicitron.dataset.frame_data import FrameData
+from pytorch3d.implicitron.dataset.utils import GenericWorkaround
+
+
+@dataclass(eq=False)
+class DatasetBase(GenericWorkaround, torch.utils.data.Dataset[FrameData]):
+    """
+    Base class to describe a dataset to be used with Implicitron.
+
+    The dataset is made up of frames, and the frames are grouped into sequences.
+    Each sequence has a name (a string).
+    (A sequence could be a video, or a set of images of one scene.)
+
+    This means they have a __getitem__ which returns an instance of a FrameData,
+    which will describe one frame in one sequence.
+    """
+
+    # _seq_to_idx is a member which implementations can define.
+    # It maps sequence name to the sequence's global frame indices.
+    # It is used for the default implementations of some functions in this class.
+    # Implementations which override them are free to ignore it.
+    # _seq_to_idx: Dict[str, List[int]] = field(init=False)
+
+    def __len__(self) -> int:
+        raise NotImplementedError()
+
+    def get_frame_numbers_and_timestamps(
+        self, idxs: Sequence[int], subset_filter: Optional[Sequence[str]] = None
+    ) -> List[Tuple[int, float]]:
+        """
+        If the sequences in the dataset are videos rather than
+        unordered views, then the dataset should override this method to
+        return the index and timestamp in their videos of the frames whose
+        indices are given in `idxs`. In addition,
+        the values in _seq_to_idx should be in ascending order.
+        If timestamps are absent, they should be replaced with a constant.
+
+        This is used for letting SceneBatchSampler identify consecutive
+        frames.
+
+        Args:
+            idxs: frame index in self
+            subset_filter: If given, an index in idxs is ignored if the
+                corresponding frame is not in any of the named subsets.
+
+        Returns:
+            tuple of
+                - frame index in video
+                - timestamp of frame in video
+        """
+        raise ValueError("This dataset does not contain videos.")
+
+    def join(self, other_datasets: Iterable["DatasetBase"]) -> None:
+        """
+        Joins the current dataset with a list of other datasets of the same type.
+        """
+        raise NotImplementedError()
+
+    def get_eval_batches(self) -> Optional[List[List[int]]]:
+        return None
+
+    def sequence_names(self) -> Iterable[str]:
+        """Returns an iterator over sequence names in the dataset."""
+        # pyre-ignore[16]
+        return self._seq_to_idx.keys()
+
+    def category_to_sequence_names(self) -> Dict[str, List[str]]:
+        """
+        Returns a dict mapping from each dataset category to a list of its
+        sequence names.
+
+        Returns:
+            category_to_sequence_names: Dict {category_i: [..., sequence_name_j, ...]}
+        """
+        c2seq = defaultdict(list)
+        for sequence_name in self.sequence_names():
+            first_frame_idx = next(self.sequence_indices_in_order(sequence_name))
+            # crashes without overriding __getitem__
+            sequence_category = self[first_frame_idx].sequence_category
+            c2seq[sequence_category].append(sequence_name)
+        return dict(c2seq)
+
+    def sequence_frames_in_order(
+        self, seq_name: str, subset_filter: Optional[Sequence[str]] = None
+    ) -> Iterator[Tuple[float, int, int]]:
+        """Returns an iterator over the frame indices in a given sequence.
+        We attempt to first sort by timestamp (if they are available),
+        then by frame number.
+
+        Args:
+            seq_name: the name of the sequence.
+
+        Returns:
+            an iterator over triplets `(timestamp, frame_no, dataset_idx)`,
+                where `frame_no` is the index within the sequence, and
+                `dataset_idx` is the index within the dataset.
+                `None` timestamps are replaced with 0s.
+        """
+        # pyre-ignore[16]
+        seq_frame_indices = self._seq_to_idx[seq_name]
+        nos_timestamps = self.get_frame_numbers_and_timestamps(
+            seq_frame_indices, subset_filter
+        )
+
+        yield from sorted(
+            [
+                (timestamp, frame_no, idx)
+                for idx, (frame_no, timestamp) in zip(seq_frame_indices, nos_timestamps)
+            ]
+        )
+
+    def sequence_indices_in_order(
+        self, seq_name: str, subset_filter: Optional[Sequence[str]] = None
+    ) -> Iterator[int]:
+        """Same as `sequence_frames_in_order` but returns the iterator over
+        only dataset indices.
+        """
+        for _, _, idx in self.sequence_frames_in_order(seq_name, subset_filter):
+            yield idx
+
+    # frame_data_type is the actual type of frames returned by the dataset.
+    # Collation uses its classmethod `collate`
+    frame_data_type: ClassVar[Type[FrameData]] = FrameData
diff --git a/pytorch3d/pytorch3d/implicitron/dataset/dataset_map_provider.py b/pytorch3d/pytorch3d/implicitron/dataset/dataset_map_provider.py
new file mode 100644
index 0000000000000000000000000000000000000000..91274f18542a42831309ba0230597036e21c4c64
--- /dev/null
+++ b/pytorch3d/pytorch3d/implicitron/dataset/dataset_map_provider.py
@@ -0,0 +1,140 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+import os
+from dataclasses import dataclass
+from typing import Iterable, Iterator, Optional
+
+from iopath.common.file_io import PathManager
+from pytorch3d.implicitron.tools.config import registry, ReplaceableBase
+from pytorch3d.renderer.cameras import CamerasBase
+
+from .dataset_base import DatasetBase
+
+
+@dataclass
+class DatasetMap:
+    """
+    A collection of datasets for implicitron.
+
+    Members:
+
+        train: a dataset for training
+        val: a dataset for validating during training
+        test: a dataset for final evaluation
+    """
+
+    train: Optional[DatasetBase]
+    val: Optional[DatasetBase]
+    test: Optional[DatasetBase]
+
+    def __getitem__(self, split: str) -> Optional[DatasetBase]:
+        """
+        Get one of the datasets by key (name of data split)
+        """
+        if split not in ["train", "val", "test"]:
+            raise ValueError(f"{split} was not a valid split name (train/val/test)")
+        return getattr(self, split)
+
+    def iter_datasets(self) -> Iterator[DatasetBase]:
+        """
+        Iterator over all datasets.
+        """
+        if self.train is not None:
+            yield self.train
+        if self.val is not None:
+            yield self.val
+        if self.test is not None:
+            yield self.test
+
+    def join(self, other_dataset_maps: Iterable["DatasetMap"]) -> None:
+        """
+        Joins the current DatasetMap with other dataset maps from the input list.
+
+        For each subset of each dataset map (train/val/test), the function
+        omits joining the subsets that are None.
+
+        Note the train/val/test datasets of the current dataset map will be
+        modified in-place.
+
+        Args:
+            other_dataset_maps: The list of dataset maps to be joined into the
+                current dataset map.
+        """
+        for set_ in ["train", "val", "test"]:
+            dataset_list = [
+                getattr(self, set_),
+                *[getattr(dmap, set_) for dmap in other_dataset_maps],
+            ]
+            dataset_list = [d for d in dataset_list if d is not None]
+            if len(dataset_list) == 0:
+                setattr(self, set_, None)
+                continue
+            d0 = dataset_list[0]
+            if len(dataset_list) > 1:
+                d0.join(dataset_list[1:])
+            setattr(self, set_, d0)
+
+
+class DatasetMapProviderBase(ReplaceableBase):
+    """
+    Base class for a provider of training / validation and testing
+    dataset objects.
+    """
+
+    def get_dataset_map(self) -> DatasetMap:
+        """
+        Returns:
+            An object containing the torch.Dataset objects in train/val/test fields.
+        """
+        raise NotImplementedError()
+
+    def get_all_train_cameras(self) -> Optional[CamerasBase]:
+        """
+        DEPRECATED! The function will be removed in future versions.
+        If the data is all for a single scene, returns a list
+        of the known training cameras for that scene, which is
+        used for evaluating the difficulty of the unknown
+        cameras. Otherwise return None.
+        """
+        raise NotImplementedError()
+
+
+@registry.register
+class PathManagerFactory(ReplaceableBase):
+    """
+    Base class and default implementation of a tool which dataset_map_provider implementations
+    may use to construct a path manager if needed.
+
+    Args:
+        silence_logs: Whether to reduce log output from iopath library.
+    """
+
+    silence_logs: bool = True
+
+    def get(self) -> Optional[PathManager]:
+        """
+        Makes a PathManager if needed.
+        For open source users, this function should always return None.
+        Internally, this allows manifold access.
+        """
+        if os.environ.get("INSIDE_RE_WORKER", False):
+            return None
+
+        try:
+            from iopath.fb.manifold import ManifoldPathHandler
+        except ImportError:
+            return None
+
+        if self.silence_logs:
+            logging.getLogger("iopath.fb.manifold").setLevel(logging.CRITICAL)
+            logging.getLogger("iopath.common.file_io").setLevel(logging.CRITICAL)
+
+        path_manager = PathManager()
+        path_manager.register_handler(ManifoldPathHandler())
+
+        return path_manager
diff --git a/pytorch3d/pytorch3d/implicitron/dataset/frame_data.py b/pytorch3d/pytorch3d/implicitron/dataset/frame_data.py
new file mode 100644
index 0000000000000000000000000000000000000000..e32c086401eb7845815793d1223ef8c0ef7c5306
--- /dev/null
+++ b/pytorch3d/pytorch3d/implicitron/dataset/frame_data.py
@@ -0,0 +1,777 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import os
+from abc import ABC, abstractmethod
+from collections import defaultdict
+from dataclasses import dataclass, field, fields
+from typing import (
+    Any,
+    ClassVar,
+    Generic,
+    List,
+    Mapping,
+    Optional,
+    Tuple,
+    Type,
+    TypeVar,
+    Union,
+)
+
+import numpy as np
+import torch
+
+from pytorch3d.implicitron.dataset import types
+from pytorch3d.implicitron.dataset.utils import (
+    adjust_camera_to_bbox_crop_,
+    adjust_camera_to_image_scale_,
+    bbox_xyxy_to_xywh,
+    clamp_box_to_image_bounds_and_round,
+    crop_around_box,
+    GenericWorkaround,
+    get_bbox_from_mask,
+    get_clamp_bbox,
+    load_depth,
+    load_depth_mask,
+    load_image,
+    load_mask,
+    load_pointcloud,
+    rescale_bbox,
+    resize_image,
+    safe_as_tensor,
+)
+from pytorch3d.implicitron.tools.config import registry, ReplaceableBase
+from pytorch3d.renderer.camera_utils import join_cameras_as_batch
+from pytorch3d.renderer.cameras import CamerasBase, PerspectiveCameras
+from pytorch3d.structures.pointclouds import join_pointclouds_as_batch, Pointclouds
+
+
+@dataclass
+class FrameData(Mapping[str, Any]):
+    """
+    A type of the elements returned by indexing the dataset object.
+    It can represent both individual frames and batches of thereof;
+    in this documentation, the sizes of tensors refer to single frames;
+    add the first batch dimension for the collation result.
+
+    Args:
+        frame_number: The number of the frame within its sequence.
+            0-based continuous integers.
+        sequence_name: The unique name of the frame's sequence.
+        sequence_category: The object category of the sequence.
+        frame_timestamp: The time elapsed since the start of a sequence in sec.
+        image_size_hw: The size of the original image in pixels; (height, width)
+            tensor of shape (2,). Note that it is optional, e.g. it can be `None`
+            if the frame annotation has no size ans image_rgb has not [yet] been
+            loaded. Image-less FrameData is valid but mutators like crop/resize
+            may fail if the original image size cannot be deduced.
+        effective_image_size_hw: The size of the image after mutations such as
+            crop/resize in pixels; (height, width). if the image has not been mutated,
+            it is equal to `image_size_hw`. Note that it is also optional, for the
+            same reason as `image_size_hw`.
+        image_path: The qualified path to the loaded image (with dataset_root).
+        image_rgb: A Tensor of shape `(3, H, W)` holding the RGB image
+            of the frame; elements are floats in [0, 1].
+        mask_crop: A binary mask of shape `(1, H, W)` denoting the valid image
+            regions. Regions can be invalid (mask_crop[i,j]=0) in case they
+            are a result of zero-padding of the image after cropping around
+            the object bounding box; elements are floats in {0.0, 1.0}.
+        depth_path: The qualified path to the frame's depth map.
+        depth_map: A float Tensor of shape `(1, H, W)` holding the depth map
+            of the frame; values correspond to distances from the camera;
+            use `depth_mask` and `mask_crop` to filter for valid pixels.
+        depth_mask: A binary mask of shape `(1, H, W)` denoting pixels of the
+            depth map that are valid for evaluation, they have been checked for
+            consistency across views; elements are floats in {0.0, 1.0}.
+        mask_path: A qualified path to the foreground probability mask.
+        fg_probability: A Tensor of `(1, H, W)` denoting the probability of the
+            pixels belonging to the captured object; elements are floats
+            in [0, 1].
+        bbox_xywh: The bounding box tightly enclosing the foreground object in the
+            format (x0, y0, width, height). The convention assumes that
+            `x0+width` and `y0+height` includes the boundary of the box.
+            I.e., to slice out the corresponding crop from an image tensor `I`
+            we execute `crop = I[..., y0:y0+height, x0:x0+width]`
+        crop_bbox_xywh: The bounding box denoting the boundaries of `image_rgb`
+            in the original image coordinates in the format (x0, y0, width, height).
+            The convention is the same as for `bbox_xywh`. `crop_bbox_xywh` differs
+            from `bbox_xywh` due to padding (which can happen e.g. due to
+            setting `JsonIndexDataset.box_crop_context > 0`)
+        camera: A PyTorch3D camera object corresponding the frame's viewpoint,
+            corrected for cropping if it happened.
+        camera_quality_score: The score proportional to the confidence of the
+            frame's camera estimation (the higher the more accurate).
+        point_cloud_quality_score: The score proportional to the accuracy of the
+            frame's sequence point cloud (the higher the more accurate).
+        sequence_point_cloud_path: The path to the sequence's point cloud.
+        sequence_point_cloud: A PyTorch3D Pointclouds object holding the
+            point cloud corresponding to the frame's sequence. When the object
+            represents a batch of frames, point clouds may be deduplicated;
+            see `sequence_point_cloud_idx`.
+        sequence_point_cloud_idx: Integer indices mapping frame indices to the
+            corresponding point clouds in `sequence_point_cloud`; to get the
+            corresponding point cloud to `image_rgb[i]`, use
+            `sequence_point_cloud[sequence_point_cloud_idx[i]]`.
+        frame_type: The type of the loaded frame specified in
+            `subset_lists_file`, if provided.
+        meta: A dict for storing additional frame information.
+    """
+
+    frame_number: Optional[torch.LongTensor]
+    sequence_name: Union[str, List[str]]
+    sequence_category: Union[str, List[str]]
+    frame_timestamp: Optional[torch.Tensor] = None
+    image_size_hw: Optional[torch.LongTensor] = None
+    effective_image_size_hw: Optional[torch.LongTensor] = None
+    image_path: Union[str, List[str], None] = None
+    image_rgb: Optional[torch.Tensor] = None
+    # masks out padding added due to cropping the square bit
+    mask_crop: Optional[torch.Tensor] = None
+    depth_path: Union[str, List[str], None] = None
+    depth_map: Optional[torch.Tensor] = None
+    depth_mask: Optional[torch.Tensor] = None
+    mask_path: Union[str, List[str], None] = None
+    fg_probability: Optional[torch.Tensor] = None
+    bbox_xywh: Optional[torch.Tensor] = None
+    crop_bbox_xywh: Optional[torch.Tensor] = None
+    camera: Optional[PerspectiveCameras] = None
+    camera_quality_score: Optional[torch.Tensor] = None
+    point_cloud_quality_score: Optional[torch.Tensor] = None
+    sequence_point_cloud_path: Union[str, List[str], None] = None
+    sequence_point_cloud: Optional[Pointclouds] = None
+    sequence_point_cloud_idx: Optional[torch.Tensor] = None
+    frame_type: Union[str, List[str], None] = None  # known | unseen
+    meta: dict = field(default_factory=lambda: {})
+
+    # NOTE that batching resets this attribute
+    _uncropped: bool = field(init=False, default=True)
+
+    def to(self, *args, **kwargs):
+        new_params = {}
+        for field_name in iter(self):
+            value = getattr(self, field_name)
+            if isinstance(value, (torch.Tensor, Pointclouds, CamerasBase)):
+                new_params[field_name] = value.to(*args, **kwargs)
+            else:
+                new_params[field_name] = value
+        frame_data = type(self)(**new_params)
+        frame_data._uncropped = self._uncropped
+        return frame_data
+
+    def cpu(self):
+        return self.to(device=torch.device("cpu"))
+
+    def cuda(self):
+        return self.to(device=torch.device("cuda"))
+
+    # the following functions make sure **frame_data can be passed to functions
+    def __iter__(self):
+        for f in fields(self):
+            if f.name.startswith("_"):
+                continue
+
+            yield f.name
+
+    def __getitem__(self, key):
+        return getattr(self, key)
+
+    def __len__(self):
+        return sum(1 for f in iter(self))
+
+    def crop_by_metadata_bbox_(
+        self,
+        box_crop_context: float,
+    ) -> None:
+        """Crops the frame data in-place by (possibly expanded) bounding box.
+        The bounding box is taken from the object state (usually taken from
+        the frame annotation or estimated from the foregroubnd mask).
+        If the expanded bounding box does not fit the image, it is clamped,
+        i.e. the image is *not* padded.
+
+        Args:
+            box_crop_context: rate of expansion for bbox; 0 means no expansion,
+
+        Raises:
+            ValueError: If the object does not contain a bounding box (usually when no
+                mask annotation is provided)
+            ValueError: If the frame data have been cropped or resized, thus the intrinsic
+                bounding box is not valid for the current image size.
+            ValueError: If the frame does not have an image size (usually a corner case
+                when no image has been loaded)
+        """
+        if self.bbox_xywh is None:
+            raise ValueError(
+                "Attempted cropping by metadata with empty bounding box. Consider either"
+                " to remove_empty_masks or turn off box_crop in the dataset config."
+            )
+
+        if not self._uncropped:
+            raise ValueError(
+                "Trying to apply the metadata bounding box to already cropped "
+                "or resized image; coordinates have changed."
+            )
+
+        self._crop_by_bbox_(
+            box_crop_context,
+            self.bbox_xywh,
+        )
+
+    def crop_by_given_bbox_(
+        self,
+        box_crop_context: float,
+        bbox_xywh: torch.Tensor,
+    ) -> None:
+        """Crops the frame data in-place by (possibly expanded) bounding box.
+        If the expanded bounding box does not fit the image, it is clamped,
+        i.e. the image is *not* padded.
+
+        Args:
+            box_crop_context: rate of expansion for bbox; 0 means no expansion,
+            bbox_xywh: bounding box in [x0, y0, width, height] format. If float
+                tensor, values are floored (after converting to [x0, y0, x1, y1]).
+
+        Raises:
+            ValueError: If the frame does not have an image size (usually a corner case
+                when no image has been loaded)
+        """
+        self._crop_by_bbox_(
+            box_crop_context,
+            bbox_xywh,
+        )
+
+    def _crop_by_bbox_(
+        self,
+        box_crop_context: float,
+        bbox_xywh: torch.Tensor,
+    ) -> None:
+        """Crops the frame data in-place by (possibly expanded) bounding box.
+        If the expanded bounding box does not fit the image, it is clamped,
+        i.e. the image is *not* padded.
+
+        Args:
+            box_crop_context: rate of expansion for bbox; 0 means no expansion,
+            bbox_xywh: bounding box in [x0, y0, width, height] format. If float
+                tensor, values are floored (after converting to [x0, y0, x1, y1]).
+
+        Raises:
+            ValueError: If the frame does not have an image size (usually a corner case
+                when no image has been loaded)
+        """
+        effective_image_size_hw = self.effective_image_size_hw
+        if effective_image_size_hw is None:
+            raise ValueError("Calling crop on image-less FrameData")
+
+        bbox_xyxy = get_clamp_bbox(
+            bbox_xywh,
+            image_path=self.image_path,  # pyre-ignore
+            box_crop_context=box_crop_context,
+        )
+        clamp_bbox_xyxy = clamp_box_to_image_bounds_and_round(
+            bbox_xyxy,
+            image_size_hw=tuple(self.effective_image_size_hw),  # pyre-ignore
+        )
+        crop_bbox_xywh = bbox_xyxy_to_xywh(clamp_bbox_xyxy)
+
+        if self.fg_probability is not None:
+            self.fg_probability = crop_around_box(
+                self.fg_probability,
+                clamp_bbox_xyxy,
+                self.mask_path,  # pyre-ignore
+            )
+        if self.image_rgb is not None:
+            self.image_rgb = crop_around_box(
+                self.image_rgb,
+                clamp_bbox_xyxy,
+                self.image_path,  # pyre-ignore
+            )
+
+        depth_map = self.depth_map
+        if depth_map is not None:
+            clamp_bbox_xyxy_depth = rescale_bbox(
+                clamp_bbox_xyxy, tuple(depth_map.shape[-2:]), effective_image_size_hw
+            ).long()
+            self.depth_map = crop_around_box(
+                depth_map,
+                clamp_bbox_xyxy_depth,
+                self.depth_path,  # pyre-ignore
+            )
+
+        depth_mask = self.depth_mask
+        if depth_mask is not None:
+            clamp_bbox_xyxy_depth = rescale_bbox(
+                clamp_bbox_xyxy, tuple(depth_mask.shape[-2:]), effective_image_size_hw
+            ).long()
+            self.depth_mask = crop_around_box(
+                depth_mask,
+                clamp_bbox_xyxy_depth,
+                self.mask_path,  # pyre-ignore
+            )
+
+        # changing principal_point according to bbox_crop
+        if self.camera is not None:
+            adjust_camera_to_bbox_crop_(
+                camera=self.camera,
+                image_size_wh=effective_image_size_hw.flip(dims=[-1]),
+                clamp_bbox_xywh=crop_bbox_xywh,
+            )
+
+        # pyre-ignore
+        self.effective_image_size_hw = crop_bbox_xywh[..., 2:].flip(dims=[-1])
+        self._uncropped = False
+
+    def resize_frame_(self, new_size_hw: torch.LongTensor) -> None:
+        """Resizes frame data in-place according to given dimensions.
+
+        Args:
+            new_size_hw: target image size [height, width], a LongTensor of shape (2,)
+
+        Raises:
+            ValueError: If the frame does not have an image size (usually a corner case
+                when no image has been loaded)
+        """
+
+        effective_image_size_hw = self.effective_image_size_hw
+        if effective_image_size_hw is None:
+            raise ValueError("Calling resize on image-less FrameData")
+
+        image_height, image_width = new_size_hw.tolist()
+
+        if self.fg_probability is not None:
+            self.fg_probability, _, _ = resize_image(
+                self.fg_probability,
+                image_height=image_height,
+                image_width=image_width,
+                mode="nearest",
+            )
+
+        if self.image_rgb is not None:
+            self.image_rgb, _, self.mask_crop = resize_image(
+                self.image_rgb, image_height=image_height, image_width=image_width
+            )
+
+        if self.depth_map is not None:
+            self.depth_map, _, _ = resize_image(
+                self.depth_map,
+                image_height=image_height,
+                image_width=image_width,
+                mode="nearest",
+            )
+
+        if self.depth_mask is not None:
+            self.depth_mask, _, _ = resize_image(
+                self.depth_mask,
+                image_height=image_height,
+                image_width=image_width,
+                mode="nearest",
+            )
+
+        if self.camera is not None:
+            if self.image_size_hw is None:
+                raise ValueError(
+                    "image_size_hw has to be defined for resizing FrameData with cameras."
+                )
+            adjust_camera_to_image_scale_(
+                camera=self.camera,
+                original_size_wh=effective_image_size_hw.flip(dims=[-1]),
+                new_size_wh=new_size_hw.flip(dims=[-1]),  # pyre-ignore
+            )
+
+        self.effective_image_size_hw = new_size_hw
+        self._uncropped = False
+
+    @classmethod
+    def collate(cls, batch):
+        """
+        Given a list objects `batch` of class `cls`, collates them into a batched
+        representation suitable for processing with deep networks.
+        """
+
+        elem = batch[0]
+
+        if isinstance(elem, cls):
+            pointcloud_ids = [id(el.sequence_point_cloud) for el in batch]
+            id_to_idx = defaultdict(list)
+            for i, pc_id in enumerate(pointcloud_ids):
+                id_to_idx[pc_id].append(i)
+
+            sequence_point_cloud = []
+            sequence_point_cloud_idx = -np.ones((len(batch),))
+            for i, ind in enumerate(id_to_idx.values()):
+                sequence_point_cloud_idx[ind] = i
+                sequence_point_cloud.append(batch[ind[0]].sequence_point_cloud)
+            assert (sequence_point_cloud_idx >= 0).all()
+
+            override_fields = {
+                "sequence_point_cloud": sequence_point_cloud,
+                "sequence_point_cloud_idx": sequence_point_cloud_idx.tolist(),
+            }
+            # note that the pre-collate value of sequence_point_cloud_idx is unused
+
+            collated = {}
+            for f in fields(elem):
+                if not f.init:
+                    continue
+
+                list_values = override_fields.get(
+                    f.name, [getattr(d, f.name) for d in batch]
+                )
+                collated[f.name] = (
+                    cls.collate(list_values)
+                    if all(list_value is not None for list_value in list_values)
+                    else None
+                )
+            return cls(**collated)
+
+        elif isinstance(elem, Pointclouds):
+            return join_pointclouds_as_batch(batch)
+
+        elif isinstance(elem, CamerasBase):
+            # TODO: don't store K; enforce working in NDC space
+            return join_cameras_as_batch(batch)
+        else:
+            return torch.utils.data._utils.collate.default_collate(batch)
+
+
+FrameDataSubtype = TypeVar("FrameDataSubtype", bound=FrameData)
+
+
+class FrameDataBuilderBase(ReplaceableBase, Generic[FrameDataSubtype], ABC):
+    """A base class for FrameDataBuilders that build a FrameData object, load and
+    process the binary data (crop and resize). Implementations should parametrize
+    the class with a subtype of FrameData and set frame_data_type class variable to
+    that type. They have to also implement `build` method.
+    """
+
+    # To be initialised to FrameDataSubtype
+    frame_data_type: ClassVar[Type[FrameDataSubtype]]
+
+    @abstractmethod
+    def build(
+        self,
+        frame_annotation: types.FrameAnnotation,
+        sequence_annotation: types.SequenceAnnotation,
+        *,
+        load_blobs: bool = True,
+        **kwargs,
+    ) -> FrameDataSubtype:
+        """An abstract method to build the frame data based on raw frame/sequence
+        annotations, load the binary data and adjust them according to the metadata.
+        """
+        raise NotImplementedError()
+
+
+class GenericFrameDataBuilder(FrameDataBuilderBase[FrameDataSubtype], ABC):
+    """
+    A class to build a FrameData object, load and process the binary data (crop and
+    resize). This is an abstract class for extending to build FrameData subtypes. Most
+    users need to use concrete `FrameDataBuilder` class instead.
+    Beware that modifications of frame data are done in-place.
+
+    Args:
+        dataset_root: The root folder of the dataset; all paths in frame / sequence
+            annotations are defined w.r.t. this root. Has to be set if any of the
+            load_* flabs below is true.
+        load_images: Enable loading the frame RGB data.
+        load_depths: Enable loading the frame depth maps.
+        load_depth_masks: Enable loading the frame depth map masks denoting the
+            depth values used for evaluation (the points consistent across views).
+        load_masks: Enable loading frame foreground masks.
+        load_point_clouds: Enable loading sequence-level point clouds.
+        max_points: Cap on the number of loaded points in the point cloud;
+            if reached, they are randomly sampled without replacement.
+        mask_images: Whether to mask the images with the loaded foreground masks;
+            0 value is used for background.
+        mask_depths: Whether to mask the depth maps with the loaded foreground
+            masks; 0 value is used for background.
+        image_height: The height of the returned images, masks, and depth maps;
+            aspect ratio is preserved during cropping/resizing.
+        image_width: The width of the returned images, masks, and depth maps;
+            aspect ratio is preserved during cropping/resizing.
+        box_crop: Enable cropping of the image around the bounding box inferred
+            from the foreground region of the loaded segmentation mask; masks
+            and depth maps are cropped accordingly; cameras are corrected.
+        box_crop_mask_thr: The threshold used to separate pixels into foreground
+            and background based on the foreground_probability mask; if no value
+            is greater than this threshold, the loader lowers it and repeats.
+        box_crop_context: The amount of additional padding added to each
+            dimension of the cropping bounding box, relative to box size.
+        path_manager: Optionally a PathManager for interpreting paths in a special way.
+    """
+
+    dataset_root: Optional[str] = None
+    load_images: bool = True
+    load_depths: bool = True
+    load_depth_masks: bool = True
+    load_masks: bool = True
+    load_point_clouds: bool = False
+    max_points: int = 0
+    mask_images: bool = False
+    mask_depths: bool = False
+    image_height: Optional[int] = 800
+    image_width: Optional[int] = 800
+    box_crop: bool = True
+    box_crop_mask_thr: float = 0.4
+    box_crop_context: float = 0.3
+    path_manager: Any = None
+
+    def __post_init__(self) -> None:
+        load_any_blob = (
+            self.load_images
+            or self.load_depths
+            or self.load_depth_masks
+            or self.load_masks
+            or self.load_point_clouds
+        )
+        if load_any_blob and self.dataset_root is None:
+            raise ValueError(
+                "dataset_root must be set to load any blob data. "
+                "Make sure it is set in either FrameDataBuilder or Dataset params."
+            )
+
+        if load_any_blob and not self._exists_in_dataset_root(""):
+            raise ValueError(
+                f"dataset_root is passed but {self.dataset_root} does not exist."
+            )
+
+    def build(
+        self,
+        frame_annotation: types.FrameAnnotation,
+        sequence_annotation: types.SequenceAnnotation,
+        *,
+        load_blobs: bool = True,
+        **kwargs,
+    ) -> FrameDataSubtype:
+        """Builds the frame data based on raw frame/sequence annotations, loads the
+        binary data and adjust them according to the metadata. The processing includes:
+            * if box_crop is set, the image/mask/depth are cropped with the bounding
+                box provided or estimated from MaskAnnotation,
+            * if image_height/image_width are set, the image/mask/depth are resized to
+                fit that resolution. Note that the aspect ratio is preserved, and the
+                (possibly cropped) image is pasted into the top-left corner. In the
+                resulting frame_data, mask_crop field corresponds to the mask of the
+                pasted image.
+
+        Args:
+            frame_annotation: frame annotation
+            sequence_annotation: sequence annotation
+            load_blobs: if the function should attempt loading the image, depth map
+                and mask, and foreground mask
+
+        Returns:
+            The constructed FrameData object.
+        """
+
+        point_cloud = sequence_annotation.point_cloud
+
+        frame_data = self.frame_data_type(
+            frame_number=safe_as_tensor(frame_annotation.frame_number, torch.long),
+            frame_timestamp=safe_as_tensor(
+                frame_annotation.frame_timestamp, torch.float
+            ),
+            sequence_name=frame_annotation.sequence_name,
+            sequence_category=sequence_annotation.category,
+            camera_quality_score=safe_as_tensor(
+                sequence_annotation.viewpoint_quality_score, torch.float
+            ),
+            point_cloud_quality_score=safe_as_tensor(
+                point_cloud.quality_score, torch.float
+            )
+            if point_cloud is not None
+            else None,
+        )
+
+        fg_mask_np: Optional[np.ndarray] = None
+        mask_annotation = frame_annotation.mask
+        if mask_annotation is not None:
+            if load_blobs and self.load_masks:
+                fg_mask_np, mask_path = self._load_fg_probability(frame_annotation)
+                frame_data.mask_path = mask_path
+                frame_data.fg_probability = safe_as_tensor(fg_mask_np, torch.float)
+
+            bbox_xywh = mask_annotation.bounding_box_xywh
+            if bbox_xywh is None and fg_mask_np is not None:
+                bbox_xywh = get_bbox_from_mask(fg_mask_np, self.box_crop_mask_thr)
+
+            frame_data.bbox_xywh = safe_as_tensor(bbox_xywh, torch.float)
+
+        if frame_annotation.image is not None:
+            image_size_hw = safe_as_tensor(frame_annotation.image.size, torch.long)
+            frame_data.image_size_hw = image_size_hw  # original image size
+            # image size after crop/resize
+            frame_data.effective_image_size_hw = image_size_hw
+            image_path = None
+            dataset_root = self.dataset_root
+            if frame_annotation.image.path is not None and dataset_root is not None:
+                image_path = os.path.join(dataset_root, frame_annotation.image.path)
+                frame_data.image_path = image_path
+
+            if load_blobs and self.load_images:
+                if image_path is None:
+                    raise ValueError("Image path is required to load images.")
+
+                image_np = load_image(self._local_path(image_path))
+                frame_data.image_rgb = self._postprocess_image(
+                    image_np, frame_annotation.image.size, frame_data.fg_probability
+                )
+
+        if (
+            load_blobs
+            and self.load_depths
+            and frame_annotation.depth is not None
+            and frame_annotation.depth.path is not None
+        ):
+            (
+                frame_data.depth_map,
+                frame_data.depth_path,
+                frame_data.depth_mask,
+            ) = self._load_mask_depth(frame_annotation, fg_mask_np)
+
+        if load_blobs and self.load_point_clouds and point_cloud is not None:
+            pcl_path = self._fix_point_cloud_path(point_cloud.path)
+            frame_data.sequence_point_cloud = load_pointcloud(
+                self._local_path(pcl_path), max_points=self.max_points
+            )
+            frame_data.sequence_point_cloud_path = pcl_path
+
+        if frame_annotation.viewpoint is not None:
+            frame_data.camera = self._get_pytorch3d_camera(frame_annotation)
+
+        if self.box_crop:
+            frame_data.crop_by_metadata_bbox_(self.box_crop_context)
+
+        if self.image_height is not None and self.image_width is not None:
+            new_size = (self.image_height, self.image_width)
+            frame_data.resize_frame_(
+                new_size_hw=torch.tensor(new_size, dtype=torch.long),  # pyre-ignore
+            )
+
+        return frame_data
+
+    def _load_fg_probability(
+        self, entry: types.FrameAnnotation
+    ) -> Tuple[np.ndarray, str]:
+        assert self.dataset_root is not None and entry.mask is not None
+        full_path = os.path.join(self.dataset_root, entry.mask.path)
+        fg_probability = load_mask(self._local_path(full_path))
+        if fg_probability.shape[-2:] != entry.image.size:
+            raise ValueError(
+                f"bad mask size: {fg_probability.shape[-2:]} vs {entry.image.size}!"
+            )
+
+        return fg_probability, full_path
+
+    def _postprocess_image(
+        self,
+        image_np: np.ndarray,
+        image_size: Tuple[int, int],
+        fg_probability: Optional[torch.Tensor],
+    ) -> torch.Tensor:
+        image_rgb = safe_as_tensor(image_np, torch.float)
+
+        if image_rgb.shape[-2:] != image_size:
+            raise ValueError(f"bad image size: {image_rgb.shape[-2:]} vs {image_size}!")
+
+        if self.mask_images:
+            assert fg_probability is not None
+            image_rgb *= fg_probability
+
+        return image_rgb
+
+    def _load_mask_depth(
+        self,
+        entry: types.FrameAnnotation,
+        fg_mask: Optional[np.ndarray],
+    ) -> Tuple[torch.Tensor, str, torch.Tensor]:
+        entry_depth = entry.depth
+        dataset_root = self.dataset_root
+        assert dataset_root is not None
+        assert entry_depth is not None and entry_depth.path is not None
+        path = os.path.join(dataset_root, entry_depth.path)
+        depth_map = load_depth(self._local_path(path), entry_depth.scale_adjustment)
+
+        if self.mask_depths:
+            assert fg_mask is not None
+            depth_map *= fg_mask
+
+        mask_path = entry_depth.mask_path
+        if self.load_depth_masks and mask_path is not None:
+            mask_path = os.path.join(dataset_root, mask_path)
+            depth_mask = load_depth_mask(self._local_path(mask_path))
+        else:
+            depth_mask = (depth_map > 0.0).astype(np.float32)
+
+        return torch.tensor(depth_map), path, torch.tensor(depth_mask)
+
+    def _get_pytorch3d_camera(
+        self,
+        entry: types.FrameAnnotation,
+    ) -> PerspectiveCameras:
+        entry_viewpoint = entry.viewpoint
+        assert entry_viewpoint is not None
+        # principal point and focal length
+        principal_point = torch.tensor(
+            entry_viewpoint.principal_point, dtype=torch.float
+        )
+        focal_length = torch.tensor(entry_viewpoint.focal_length, dtype=torch.float)
+
+        format = entry_viewpoint.intrinsics_format
+        if entry_viewpoint.intrinsics_format == "ndc_norm_image_bounds":
+            # legacy PyTorch3D NDC format
+            # convert to pixels unequally and convert to ndc equally
+            image_size_as_list = list(reversed(entry.image.size))
+            image_size_wh = torch.tensor(image_size_as_list, dtype=torch.float)
+            per_axis_scale = image_size_wh / image_size_wh.min()
+            focal_length = focal_length * per_axis_scale
+            principal_point = principal_point * per_axis_scale
+        elif entry_viewpoint.intrinsics_format != "ndc_isotropic":
+            raise ValueError(f"Unknown intrinsics format: {format}")
+
+        return PerspectiveCameras(
+            focal_length=focal_length[None],
+            principal_point=principal_point[None],
+            R=torch.tensor(entry_viewpoint.R, dtype=torch.float)[None],
+            T=torch.tensor(entry_viewpoint.T, dtype=torch.float)[None],
+        )
+
+    def _fix_point_cloud_path(self, path: str) -> str:
+        """
+        Fix up a point cloud path from the dataset.
+        Some files in Co3Dv2 have an accidental absolute path stored.
+        """
+        unwanted_prefix = (
+            "/large_experiments/p3/replay/datasets/co3d/co3d45k_220512/export_v23/"
+        )
+        if path.startswith(unwanted_prefix):
+            path = path[len(unwanted_prefix) :]
+        assert self.dataset_root is not None
+        return os.path.join(self.dataset_root, path)
+
+    def _local_path(self, path: str) -> str:
+        if self.path_manager is None:
+            return path
+        return self.path_manager.get_local_path(path)
+
+    def _exists_in_dataset_root(self, relpath) -> bool:
+        if not self.dataset_root:
+            return False
+
+        full_path = os.path.join(self.dataset_root, relpath)
+        if self.path_manager is None:
+            return os.path.exists(full_path)
+        else:
+            return self.path_manager.exists(full_path)
+
+
+@registry.register
+class FrameDataBuilder(GenericWorkaround, GenericFrameDataBuilder[FrameData]):
+    """
+    A concrete class to build a FrameData object, load and process the binary data (crop
+    and resize). Beware that modifications of frame data are done in-place. Please see
+    the documentation for `GenericFrameDataBuilder` for the description of parameters
+    and methods.
+    """
+
+    frame_data_type: ClassVar[Type[FrameData]] = FrameData
diff --git a/pytorch3d/pytorch3d/implicitron/dataset/json_index_dataset.py b/pytorch3d/pytorch3d/implicitron/dataset/json_index_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..8caf581dfae6511acd41da01ecf835ec06c43c10
--- /dev/null
+++ b/pytorch3d/pytorch3d/implicitron/dataset/json_index_dataset.py
@@ -0,0 +1,669 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import copy
+import functools
+import gzip
+import hashlib
+import json
+import logging
+import os
+import random
+import warnings
+from collections import defaultdict
+from itertools import islice
+from typing import (
+    Any,
+    ClassVar,
+    Dict,
+    Iterable,
+    List,
+    Optional,
+    Sequence,
+    Tuple,
+    Type,
+    TYPE_CHECKING,
+    Union,
+)
+
+from pytorch3d.implicitron.dataset import types
+from pytorch3d.implicitron.dataset.dataset_base import DatasetBase
+from pytorch3d.implicitron.dataset.frame_data import FrameData, FrameDataBuilder
+from pytorch3d.implicitron.dataset.utils import is_known_frame_scalar
+from pytorch3d.implicitron.tools.config import registry, ReplaceableBase
+from pytorch3d.renderer.camera_utils import join_cameras_as_batch
+from pytorch3d.renderer.cameras import CamerasBase
+
+from tqdm import tqdm
+
+
+logger = logging.getLogger(__name__)
+
+
+if TYPE_CHECKING:
+    from typing import TypedDict
+
+    class FrameAnnotsEntry(TypedDict):
+        subset: Optional[str]
+        frame_annotation: types.FrameAnnotation
+
+else:
+    FrameAnnotsEntry = dict
+
+
+@registry.register
+class JsonIndexDataset(DatasetBase, ReplaceableBase):
+    """
+    A dataset with annotations in json files like the Common Objects in 3D
+    (CO3D) dataset.
+
+    Metadata-related args::
+        frame_annotations_file: A zipped json file containing metadata of the
+            frames in the dataset, serialized List[types.FrameAnnotation].
+        sequence_annotations_file: A zipped json file containing metadata of the
+            sequences in the dataset, serialized List[types.SequenceAnnotation].
+        subset_lists_file: A json file containing the lists of frames corresponding
+            corresponding to different subsets (e.g. train/val/test) of the dataset;
+            format: {subset: (sequence_name, frame_id, file_path)}.
+        subsets: Restrict frames/sequences only to the given list of subsets
+            as defined in subset_lists_file (see above).
+        limit_to: Limit the dataset to the first #limit_to frames (after other
+            filters have been applied).
+        limit_sequences_to: Limit the dataset to the first
+            #limit_sequences_to sequences (after other sequence filters have been
+            applied but before frame-based filters).
+        pick_sequence: A list of sequence names to restrict the dataset to.
+        exclude_sequence: A list of the names of the sequences to exclude.
+        limit_category_to: Restrict the dataset to the given list of categories.
+        remove_empty_masks: Removes the frames with no active foreground pixels
+            in the segmentation mask after thresholding (see box_crop_mask_thr).
+        n_frames_per_sequence: If > 0, randomly samples #n_frames_per_sequence
+            frames in each sequences uniformly without replacement if it has
+            more frames than that; applied before other frame-level filters.
+        seed: The seed of the random generator sampling #n_frames_per_sequence
+            random frames per sequence.
+        sort_frames: Enable frame annotations sorting to group frames from the
+            same sequences together and order them by timestamps
+        eval_batches: A list of batches that form the evaluation set;
+            list of batch-sized lists of indices corresponding to __getitem__
+            of this class, thus it can be used directly as a batch sampler.
+        eval_batch_index:
+            ( Optional[List[List[Union[Tuple[str, int, str], Tuple[str, int]]]] )
+            A list of batches of frames described as (sequence_name, frame_idx)
+            that can form the evaluation set, `eval_batches` will be set from this.
+
+    Blob-loading parameters:
+        dataset_root: The root folder of the dataset; all the paths in jsons are
+            specified relative to this root (but not json paths themselves).
+        load_images: Enable loading the frame RGB data.
+        load_depths: Enable loading the frame depth maps.
+        load_depth_masks: Enable loading the frame depth map masks denoting the
+            depth values used for evaluation (the points consistent across views).
+        load_masks: Enable loading frame foreground masks.
+        load_point_clouds: Enable loading sequence-level point clouds.
+        max_points: Cap on the number of loaded points in the point cloud;
+            if reached, they are randomly sampled without replacement.
+        mask_images: Whether to mask the images with the loaded foreground masks;
+            0 value is used for background.
+        mask_depths: Whether to mask the depth maps with the loaded foreground
+            masks; 0 value is used for background.
+        image_height: The height of the returned images, masks, and depth maps;
+            aspect ratio is preserved during cropping/resizing.
+        image_width: The width of the returned images, masks, and depth maps;
+            aspect ratio is preserved during cropping/resizing.
+        box_crop: Enable cropping of the image around the bounding box inferred
+            from the foreground region of the loaded segmentation mask; masks
+            and depth maps are cropped accordingly; cameras are corrected.
+        box_crop_mask_thr: The threshold used to separate pixels into foreground
+            and background based on the foreground_probability mask; if no value
+            is greater than this threshold, the loader lowers it and repeats.
+        box_crop_context: The amount of additional padding added to each
+            dimension of the cropping bounding box, relative to box size.
+    """
+
+    frame_annotations_type: ClassVar[
+        Type[types.FrameAnnotation]
+    ] = types.FrameAnnotation
+
+    path_manager: Any = None
+    frame_annotations_file: str = ""
+    sequence_annotations_file: str = ""
+    subset_lists_file: str = ""
+    subsets: Optional[List[str]] = None
+    limit_to: int = 0
+    limit_sequences_to: int = 0
+    pick_sequence: Tuple[str, ...] = ()
+    exclude_sequence: Tuple[str, ...] = ()
+    limit_category_to: Tuple[int, ...] = ()
+    dataset_root: str = ""
+    load_images: bool = True
+    load_depths: bool = True
+    load_depth_masks: bool = True
+    load_masks: bool = True
+    load_point_clouds: bool = False
+    max_points: int = 0
+    mask_images: bool = False
+    mask_depths: bool = False
+    image_height: Optional[int] = 800
+    image_width: Optional[int] = 800
+    box_crop: bool = True
+    box_crop_mask_thr: float = 0.4
+    box_crop_context: float = 0.3
+    remove_empty_masks: bool = True
+    n_frames_per_sequence: int = -1
+    seed: int = 0
+    sort_frames: bool = False
+    eval_batches: Any = None
+    eval_batch_index: Any = None
+    # initialised in __post_init__
+    # commented because of OmegaConf (for tests to pass)
+    # _frame_data_builder: FrameDataBuilder = field(init=False)
+    # frame_annots: List[FrameAnnotsEntry] = field(init=False)
+    # seq_annots: Dict[str, types.SequenceAnnotation] = field(init=False)
+    # _seq_to_idx: Dict[str, List[int]] = field(init=False)
+
+    def __post_init__(self) -> None:
+        self._load_frames()
+        self._load_sequences()
+        if self.sort_frames:
+            self._sort_frames()
+        self._load_subset_lists()
+        self._filter_db()  # also computes sequence indices
+        self._extract_and_set_eval_batches()
+
+        # pyre-ignore
+        self._frame_data_builder = FrameDataBuilder(
+            dataset_root=self.dataset_root,
+            load_images=self.load_images,
+            load_depths=self.load_depths,
+            load_depth_masks=self.load_depth_masks,
+            load_masks=self.load_masks,
+            load_point_clouds=self.load_point_clouds,
+            max_points=self.max_points,
+            mask_images=self.mask_images,
+            mask_depths=self.mask_depths,
+            image_height=self.image_height,
+            image_width=self.image_width,
+            box_crop=self.box_crop,
+            box_crop_mask_thr=self.box_crop_mask_thr,
+            box_crop_context=self.box_crop_context,
+            path_manager=self.path_manager,
+        )
+        logger.info(str(self))
+
+    def _extract_and_set_eval_batches(self) -> None:
+        """
+        Sets eval_batches based on input eval_batch_index.
+        """
+        if self.eval_batch_index is not None:
+            if self.eval_batches is not None:
+                raise ValueError(
+                    "Cannot define both eval_batch_index and eval_batches."
+                )
+            self.eval_batches = self.seq_frame_index_to_dataset_index(
+                self.eval_batch_index
+            )
+
+    def join(self, other_datasets: Iterable[DatasetBase]) -> None:
+        """
+        Join the dataset with other JsonIndexDataset objects.
+
+        Args:
+            other_datasets: A list of JsonIndexDataset objects to be joined
+                into the current dataset.
+        """
+        if not all(isinstance(d, JsonIndexDataset) for d in other_datasets):
+            raise ValueError("This function can only join a list of JsonIndexDataset")
+        # pyre-ignore[16]
+        self.frame_annots.extend([fa for d in other_datasets for fa in d.frame_annots])
+        # pyre-ignore[16]
+        self.seq_annots.update(
+            # https://gist.github.com/treyhunner/f35292e676efa0be1728
+            functools.reduce(
+                lambda a, b: {**a, **b},
+                # pyre-ignore[16]
+                [d.seq_annots for d in other_datasets],
+            )
+        )
+        all_eval_batches = [
+            self.eval_batches,
+            *[d.eval_batches for d in other_datasets],  # pyre-ignore[16]
+        ]
+        if not (
+            all(ba is None for ba in all_eval_batches)
+            or all(ba is not None for ba in all_eval_batches)
+        ):
+            raise ValueError(
+                "When joining datasets, either all joined datasets have to have their"
+                " eval_batches defined, or all should have their eval batches undefined."
+            )
+        if self.eval_batches is not None:
+            self.eval_batches = sum(all_eval_batches, [])
+        self._invalidate_indexes(filter_seq_annots=True)
+
+    def is_filtered(self) -> bool:
+        """
+        Returns `True` in case the dataset has been filtered and thus some frame annotations
+        stored on the disk might be missing in the dataset object.
+
+        Returns:
+            is_filtered: `True` if the dataset has been filtered, else `False`.
+        """
+        return (
+            self.remove_empty_masks
+            or self.limit_to > 0
+            or self.limit_sequences_to > 0
+            or len(self.pick_sequence) > 0
+            or len(self.exclude_sequence) > 0
+            or len(self.limit_category_to) > 0
+            or self.n_frames_per_sequence > 0
+        )
+
+    def seq_frame_index_to_dataset_index(
+        self,
+        seq_frame_index: List[List[Union[Tuple[str, int, str], Tuple[str, int]]]],
+        allow_missing_indices: bool = False,
+        remove_missing_indices: bool = False,
+        suppress_missing_index_warning: bool = True,
+    ) -> Union[List[List[Optional[int]]], List[List[int]]]:
+        """
+        Obtain indices into the dataset object given a list of frame ids.
+
+        Args:
+            seq_frame_index: The list of frame ids specified as
+                `List[List[Tuple[sequence_name:str, frame_number:int]]]`. Optionally,
+                Image paths relative to the dataset_root can be stored specified as well:
+                `List[List[Tuple[sequence_name:str, frame_number:int, image_path:str]]]`
+            allow_missing_indices: If `False`, throws an IndexError upon reaching the first
+                entry from `seq_frame_index` which is missing in the dataset.
+                Otherwise, depending on `remove_missing_indices`, either returns `None`
+                in place of missing entries or removes the indices of missing entries.
+            remove_missing_indices: Active when `allow_missing_indices=True`.
+                If `False`, returns `None` in place of `seq_frame_index` entries that
+                are not present in the dataset.
+                If `True` removes missing indices from the returned indices.
+            suppress_missing_index_warning:
+                Active if `allow_missing_indices==True`. Suppressess a warning message
+                in case an entry from `seq_frame_index` is missing in the dataset
+                (expected in certain cases - e.g. when setting
+                `self.remove_empty_masks=True`).
+
+        Returns:
+            dataset_idx: Indices of dataset entries corresponding to`seq_frame_index`.
+        """
+        _dataset_seq_frame_n_index = {
+            seq: {
+                # pyre-ignore[16]
+                self.frame_annots[idx]["frame_annotation"].frame_number: idx
+                for idx in seq_idx
+            }
+            # pyre-ignore[16]
+            for seq, seq_idx in self._seq_to_idx.items()
+        }
+
+        def _get_dataset_idx(
+            seq_name: str, frame_no: int, path: Optional[str] = None
+        ) -> Optional[int]:
+            idx_seq = _dataset_seq_frame_n_index.get(seq_name, None)
+            idx = idx_seq.get(frame_no, None) if idx_seq is not None else None
+            if idx is None:
+                msg = (
+                    f"sequence_name={seq_name} / frame_number={frame_no}"
+                    " not in the dataset!"
+                )
+                if not allow_missing_indices:
+                    raise IndexError(msg)
+                if not suppress_missing_index_warning:
+                    warnings.warn(msg)
+                return idx
+            if path is not None:
+                # Check that the loaded frame path is consistent
+                # with the one stored in self.frame_annots.
+                assert os.path.normpath(
+                    # pyre-ignore[16]
+                    self.frame_annots[idx]["frame_annotation"].image.path
+                ) == os.path.normpath(
+                    path
+                ), f"Inconsistent frame indices {seq_name, frame_no, path}."
+            return idx
+
+        dataset_idx = [
+            [_get_dataset_idx(*b) for b in batch]  # pyre-ignore [6]
+            for batch in seq_frame_index
+        ]
+
+        if allow_missing_indices and remove_missing_indices:
+            # remove all None indices, and also batches with only None entries
+            valid_dataset_idx = [
+                [b for b in batch if b is not None] for batch in dataset_idx
+            ]
+            return [batch for batch in valid_dataset_idx if len(batch) > 0]
+
+        return dataset_idx
+
+    def subset_from_frame_index(
+        self,
+        frame_index: List[Union[Tuple[str, int], Tuple[str, int, str]]],
+        allow_missing_indices: bool = True,
+    ) -> "JsonIndexDataset":
+        """
+        Generate a dataset subset given the list of frames specified in `frame_index`.
+
+        Args:
+            frame_index: The list of frame indentifiers (as stored in the metadata)
+                specified as `List[Tuple[sequence_name:str, frame_number:int]]`. Optionally,
+                Image paths relative to the dataset_root can be stored specified as well:
+                `List[Tuple[sequence_name:str, frame_number:int, image_path:str]]`,
+                in the latter case, if imaga_path do not match the stored paths, an error
+                is raised.
+            allow_missing_indices: If `False`, throws an IndexError upon reaching the first
+                entry from `frame_index` which is missing in the dataset.
+                Otherwise, generates a subset consisting of frames entries that actually
+                exist in the dataset.
+        """
+        # Get the indices into the frame annots.
+        dataset_indices = self.seq_frame_index_to_dataset_index(
+            [frame_index],
+            allow_missing_indices=self.is_filtered() and allow_missing_indices,
+        )[0]
+        valid_dataset_indices = [i for i in dataset_indices if i is not None]
+
+        # Deep copy the whole dataset except frame_annots, which are large so we
+        # deep copy only the requested subset of frame_annots.
+        memo = {id(self.frame_annots): None}  # pyre-ignore[16]
+        dataset_new = copy.deepcopy(self, memo)
+        dataset_new.frame_annots = copy.deepcopy(
+            [self.frame_annots[i] for i in valid_dataset_indices]
+        )
+
+        # This will kill all unneeded sequence annotations.
+        dataset_new._invalidate_indexes(filter_seq_annots=True)
+
+        # Finally annotate the frame annotations with the name of the subset
+        # stored in meta.
+        for frame_annot in dataset_new.frame_annots:
+            frame_annotation = frame_annot["frame_annotation"]
+            if frame_annotation.meta is not None:
+                frame_annot["subset"] = frame_annotation.meta.get("frame_type", None)
+
+        # A sanity check - this will crash in case some entries from frame_index are missing
+        # in dataset_new.
+        valid_frame_index = [
+            fi for fi, di in zip(frame_index, dataset_indices) if di is not None
+        ]
+        dataset_new.seq_frame_index_to_dataset_index(
+            [valid_frame_index], allow_missing_indices=False
+        )
+
+        return dataset_new
+
+    def __str__(self) -> str:
+        # pyre-ignore[16]
+        return f"JsonIndexDataset #frames={len(self.frame_annots)}"
+
+    def __len__(self) -> int:
+        # pyre-ignore[16]
+        return len(self.frame_annots)
+
+    def _get_frame_type(self, entry: FrameAnnotsEntry) -> Optional[str]:
+        return entry["subset"]
+
+    def get_all_train_cameras(self) -> CamerasBase:
+        """
+        Returns the cameras corresponding to all the known frames.
+        """
+        logger.info("Loading all train cameras.")
+        cameras = []
+        # pyre-ignore[16]
+        for frame_idx, frame_annot in enumerate(tqdm(self.frame_annots)):
+            frame_type = self._get_frame_type(frame_annot)
+            if frame_type is None:
+                raise ValueError("subsets not loaded")
+            if is_known_frame_scalar(frame_type):
+                cameras.append(self[frame_idx].camera)
+        return join_cameras_as_batch(cameras)
+
+    def __getitem__(self, index) -> FrameData:
+        # pyre-ignore[16]
+        if index >= len(self.frame_annots):
+            raise IndexError(f"index {index} out of range {len(self.frame_annots)}")
+
+        entry = self.frame_annots[index]["frame_annotation"]
+
+        # pyre-ignore
+        frame_data = self._frame_data_builder.build(
+            entry,
+            # pyre-ignore
+            self.seq_annots[entry.sequence_name],
+        )
+        # Optional field
+        frame_data.frame_type = self._get_frame_type(self.frame_annots[index])
+
+        return frame_data
+
+    def _load_frames(self) -> None:
+        logger.info(f"Loading Co3D frames from {self.frame_annotations_file}.")
+        local_file = self._local_path(self.frame_annotations_file)
+        with gzip.open(local_file, "rt", encoding="utf8") as zipfile:
+            frame_annots_list = types.load_dataclass(
+                zipfile, List[self.frame_annotations_type]
+            )
+        if not frame_annots_list:
+            raise ValueError("Empty dataset!")
+        # pyre-ignore[16]
+        self.frame_annots = [
+            FrameAnnotsEntry(frame_annotation=a, subset=None) for a in frame_annots_list
+        ]
+
+    def _load_sequences(self) -> None:
+        logger.info(f"Loading Co3D sequences from {self.sequence_annotations_file}.")
+        local_file = self._local_path(self.sequence_annotations_file)
+        with gzip.open(local_file, "rt", encoding="utf8") as zipfile:
+            seq_annots = types.load_dataclass(zipfile, List[types.SequenceAnnotation])
+        if not seq_annots:
+            raise ValueError("Empty sequences file!")
+        # pyre-ignore[16]
+        self.seq_annots = {entry.sequence_name: entry for entry in seq_annots}
+
+    def _load_subset_lists(self) -> None:
+        logger.info(f"Loading Co3D subset lists from {self.subset_lists_file}.")
+        if not self.subset_lists_file:
+            return
+
+        with open(self._local_path(self.subset_lists_file), "r") as f:
+            subset_to_seq_frame = json.load(f)
+
+        frame_path_to_subset = {
+            path: subset
+            for subset, frames in subset_to_seq_frame.items()
+            for _, _, path in frames
+        }
+        # pyre-ignore[16]
+        for frame in self.frame_annots:
+            frame["subset"] = frame_path_to_subset.get(
+                frame["frame_annotation"].image.path, None
+            )
+            if frame["subset"] is None:
+                warnings.warn(
+                    "Subset lists are given but don't include "
+                    + frame["frame_annotation"].image.path
+                )
+
+    def _sort_frames(self) -> None:
+        # Sort frames to have them grouped by sequence, ordered by timestamp
+        # pyre-ignore[16]
+        self.frame_annots = sorted(
+            self.frame_annots,
+            key=lambda f: (
+                f["frame_annotation"].sequence_name,
+                f["frame_annotation"].frame_timestamp or 0,
+            ),
+        )
+
+    def _filter_db(self) -> None:
+        if self.remove_empty_masks:
+            logger.info("Removing images with empty masks.")
+            # pyre-ignore[16]
+            old_len = len(self.frame_annots)
+
+            msg = "remove_empty_masks needs every MaskAnnotation.mass to be set."
+
+            def positive_mass(frame_annot: types.FrameAnnotation) -> bool:
+                mask = frame_annot.mask
+                if mask is None:
+                    return False
+                if mask.mass is None:
+                    raise ValueError(msg)
+                return mask.mass > 1
+
+            self.frame_annots = [
+                frame
+                for frame in self.frame_annots
+                if positive_mass(frame["frame_annotation"])
+            ]
+            logger.info("... filtered %d -> %d" % (old_len, len(self.frame_annots)))
+
+        # this has to be called after joining with categories!!
+        subsets = self.subsets
+        if subsets:
+            if not self.subset_lists_file:
+                raise ValueError(
+                    "Subset filter is on but subset_lists_file was not given"
+                )
+
+            logger.info(f"Limiting Co3D dataset to the '{subsets}' subsets.")
+
+            # truncate the list of subsets to the valid one
+            self.frame_annots = [
+                entry for entry in self.frame_annots if entry["subset"] in subsets
+            ]
+            if len(self.frame_annots) == 0:
+                raise ValueError(f"There are no frames in the '{subsets}' subsets!")
+
+            self._invalidate_indexes(filter_seq_annots=True)
+
+        if len(self.limit_category_to) > 0:
+            logger.info(f"Limiting dataset to categories: {self.limit_category_to}")
+            # pyre-ignore[16]
+            self.seq_annots = {
+                name: entry
+                for name, entry in self.seq_annots.items()
+                if entry.category in self.limit_category_to
+            }
+
+        # sequence filters
+        for prefix in ("pick", "exclude"):
+            orig_len = len(self.seq_annots)
+            attr = f"{prefix}_sequence"
+            arr = getattr(self, attr)
+            if len(arr) > 0:
+                logger.info(f"{attr}: {str(arr)}")
+                self.seq_annots = {
+                    name: entry
+                    for name, entry in self.seq_annots.items()
+                    if (name in arr) == (prefix == "pick")
+                }
+                logger.info("... filtered %d -> %d" % (orig_len, len(self.seq_annots)))
+
+        if self.limit_sequences_to > 0:
+            self.seq_annots = dict(
+                islice(self.seq_annots.items(), self.limit_sequences_to)
+            )
+
+        # retain only frames from retained sequences
+        self.frame_annots = [
+            f
+            for f in self.frame_annots
+            if f["frame_annotation"].sequence_name in self.seq_annots
+        ]
+
+        self._invalidate_indexes()
+
+        if self.n_frames_per_sequence > 0:
+            logger.info(f"Taking max {self.n_frames_per_sequence} per sequence.")
+            keep_idx = []
+            # pyre-ignore[16]
+            for seq, seq_indices in self._seq_to_idx.items():
+                # infer the seed from the sequence name, this is reproducible
+                # and makes the selection differ for different sequences
+                seed = _seq_name_to_seed(seq) + self.seed
+                seq_idx_shuffled = random.Random(seed).sample(
+                    sorted(seq_indices), len(seq_indices)
+                )
+                keep_idx.extend(seq_idx_shuffled[: self.n_frames_per_sequence])
+
+            logger.info(
+                "... filtered %d -> %d" % (len(self.frame_annots), len(keep_idx))
+            )
+            self.frame_annots = [self.frame_annots[i] for i in keep_idx]
+            self._invalidate_indexes(filter_seq_annots=False)
+            # sequences are not decimated, so self.seq_annots is valid
+
+        if self.limit_to > 0 and self.limit_to < len(self.frame_annots):
+            logger.info(
+                "limit_to: filtered %d -> %d" % (len(self.frame_annots), self.limit_to)
+            )
+            self.frame_annots = self.frame_annots[: self.limit_to]
+            self._invalidate_indexes(filter_seq_annots=True)
+
+    def _invalidate_indexes(self, filter_seq_annots: bool = False) -> None:
+        # update _seq_to_idx and filter seq_meta according to frame_annots change
+        # if filter_seq_annots, also uldates seq_annots based on the changed _seq_to_idx
+        self._invalidate_seq_to_idx()
+
+        if filter_seq_annots:
+            # pyre-ignore[16]
+            self.seq_annots = {
+                k: v
+                for k, v in self.seq_annots.items()
+                # pyre-ignore[16]
+                if k in self._seq_to_idx
+            }
+
+    def _invalidate_seq_to_idx(self) -> None:
+        seq_to_idx = defaultdict(list)
+        # pyre-ignore[16]
+        for idx, entry in enumerate(self.frame_annots):
+            seq_to_idx[entry["frame_annotation"].sequence_name].append(idx)
+        # pyre-ignore[16]
+        self._seq_to_idx = seq_to_idx
+
+    def _local_path(self, path: str) -> str:
+        if self.path_manager is None:
+            return path
+        return self.path_manager.get_local_path(path)
+
+    def get_frame_numbers_and_timestamps(
+        self, idxs: Sequence[int], subset_filter: Optional[Sequence[str]] = None
+    ) -> List[Tuple[int, float]]:
+        out: List[Tuple[int, float]] = []
+        for idx in idxs:
+            if (
+                subset_filter is not None
+                # pyre-fixme[16]: `JsonIndexDataset` has no attribute `frame_annots`.
+                and self.frame_annots[idx]["subset"] not in subset_filter
+            ):
+                continue
+
+            frame_annotation = self.frame_annots[idx]["frame_annotation"]
+            out.append(
+                (frame_annotation.frame_number, frame_annotation.frame_timestamp)
+            )
+        return out
+
+    def category_to_sequence_names(self) -> Dict[str, List[str]]:
+        c2seq = defaultdict(list)
+        # pyre-ignore
+        for sequence_name, sa in self.seq_annots.items():
+            c2seq[sa.category].append(sequence_name)
+        return dict(c2seq)
+
+    def get_eval_batches(self) -> Optional[List[List[int]]]:
+        return self.eval_batches
+
+
+def _seq_name_to_seed(seq_name) -> int:
+    return int(hashlib.sha1(seq_name.encode("utf-8")).hexdigest(), 16)
diff --git a/pytorch3d/pytorch3d/implicitron/dataset/json_index_dataset_map_provider.py b/pytorch3d/pytorch3d/implicitron/dataset/json_index_dataset_map_provider.py
new file mode 100644
index 0000000000000000000000000000000000000000..53170871b3f19cf3e0380ad89a688552ea090072
--- /dev/null
+++ b/pytorch3d/pytorch3d/implicitron/dataset/json_index_dataset_map_provider.py
@@ -0,0 +1,318 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import json
+import os
+from typing import Dict, List, Optional, Tuple, Type
+
+from omegaconf import DictConfig
+from pytorch3d.implicitron.tools.config import (
+    expand_args_fields,
+    registry,
+    run_auto_creation,
+)
+from pytorch3d.renderer.cameras import CamerasBase
+
+from .dataset_map_provider import DatasetMap, DatasetMapProviderBase, PathManagerFactory
+from .json_index_dataset import JsonIndexDataset
+
+from .utils import (
+    DATASET_TYPE_KNOWN,
+    DATASET_TYPE_TEST,
+    DATASET_TYPE_TRAIN,
+    DATASET_TYPE_UNKNOWN,
+)
+
+
+# fmt: off
+CO3D_CATEGORIES: List[str] = list(reversed([
+    "baseballbat", "banana", "bicycle", "microwave", "tv",
+    "cellphone", "toilet", "hairdryer", "couch", "kite", "pizza",
+    "umbrella", "wineglass", "laptop",
+    "hotdog", "stopsign", "frisbee", "baseballglove",
+    "cup", "parkingmeter", "backpack", "toyplane", "toybus",
+    "handbag", "chair", "keyboard", "car", "motorcycle",
+    "carrot", "bottle", "sandwich", "remote", "bowl", "skateboard",
+    "toaster", "mouse", "toytrain", "book", "toytruck",
+    "orange", "broccoli", "plant", "teddybear",
+    "suitcase", "bench", "ball", "cake",
+    "vase", "hydrant", "apple", "donut",
+]))
+# fmt: on
+
+_CO3D_DATASET_ROOT: str = os.getenv("CO3D_DATASET_ROOT", "")
+
+# _NEED_CONTROL is a list of those elements of JsonIndexDataset which
+# are not directly specified for it in the config but come from the
+# DatasetMapProvider.
+_NEED_CONTROL: Tuple[str, ...] = (
+    "dataset_root",
+    "eval_batches",
+    "eval_batch_index",
+    "n_frames_per_sequence",
+    "path_manager",
+    "pick_sequence",
+    "subsets",
+    "frame_annotations_file",
+    "sequence_annotations_file",
+    "subset_lists_file",
+)
+
+
+@registry.register
+class JsonIndexDatasetMapProvider(DatasetMapProviderBase):  # pyre-ignore [13]
+    """
+    Generates the training / validation and testing dataset objects for
+    a dataset laid out on disk like Co3D, with annotations in json files.
+
+    Args:
+        category: The object category of the dataset.
+        task_str: "multisequence" or "singlesequence".
+        dataset_root: The root folder of the dataset.
+        n_frames_per_sequence: Randomly sample #n_frames_per_sequence frames
+            in each sequence.
+        test_on_train: Construct validation and test datasets from
+            the training subset.
+        restrict_sequence_name: Restrict the dataset sequences to the ones
+            present in the given list of names.
+        test_restrict_sequence_id: The ID of the loaded sequence.
+            Active for task_str='singlesequence'.
+        assert_single_seq: Assert that only frames from a single sequence
+            are present in all generated datasets.
+        only_test_set: Load only the test set.
+        dataset_class_type: name of class (JsonIndexDataset or a subclass)
+                            to use for the dataset.
+        dataset_X_args (e.g. dataset_JsonIndexDataset_args): arguments passed
+            to all the dataset constructors.
+        path_manager_factory: (Optional) An object that generates an instance of
+            PathManager that can translate provided file paths.
+        path_manager_factory_class_type: The class type of `path_manager_factory`.
+    """
+
+    category: str
+    task_str: str = "singlesequence"
+    dataset_root: str = _CO3D_DATASET_ROOT
+    n_frames_per_sequence: int = -1
+    test_on_train: bool = False
+    restrict_sequence_name: Tuple[str, ...] = ()
+    test_restrict_sequence_id: int = -1
+    assert_single_seq: bool = False
+    only_test_set: bool = False
+    dataset: JsonIndexDataset
+    dataset_class_type: str = "JsonIndexDataset"
+    path_manager_factory: PathManagerFactory
+    path_manager_factory_class_type: str = "PathManagerFactory"
+
+    @classmethod
+    def dataset_tweak_args(cls, type, args: DictConfig) -> None:
+        """
+        Called by get_default_args(JsonIndexDatasetMapProvider) to
+        not expose certain fields of each dataset class.
+        """
+        for key in _NEED_CONTROL:
+            del args[key]
+
+    def create_dataset(self):
+        """
+        Prevent the member named dataset from being created.
+        """
+        return
+
+    def __post_init__(self):
+        super().__init__()
+        run_auto_creation(self)
+        if self.only_test_set and self.test_on_train:
+            raise ValueError("Cannot have only_test_set and test_on_train")
+
+        path_manager = self.path_manager_factory.get()
+
+        # TODO:
+        # - implement loading multiple categories
+
+        frame_file = os.path.join(
+            self.dataset_root, self.category, "frame_annotations.jgz"
+        )
+        sequence_file = os.path.join(
+            self.dataset_root, self.category, "sequence_annotations.jgz"
+        )
+        subset_lists_file = os.path.join(
+            self.dataset_root, self.category, "set_lists.json"
+        )
+        common_kwargs = {
+            "dataset_root": self.dataset_root,
+            "path_manager": path_manager,
+            "frame_annotations_file": frame_file,
+            "sequence_annotations_file": sequence_file,
+            "subset_lists_file": subset_lists_file,
+            **getattr(self, f"dataset_{self.dataset_class_type}_args"),
+        }
+
+        # This maps the common names of the dataset subsets ("train"/"val"/"test")
+        # to the names of the subsets in the CO3D dataset.
+        set_names_mapping = _get_co3d_set_names_mapping(
+            self.task_str,
+            self.test_on_train,
+            self.only_test_set,
+        )
+
+        # load the evaluation batches
+        batch_indices_path = os.path.join(
+            self.dataset_root,
+            self.category,
+            f"eval_batches_{self.task_str}.json",
+        )
+        if path_manager is not None:
+            batch_indices_path = path_manager.get_local_path(batch_indices_path)
+        if not os.path.isfile(batch_indices_path):
+            # The batch indices file does not exist.
+            # Most probably the user has not specified the root folder.
+            raise ValueError(
+                f"Looking for batch indices in {batch_indices_path}. "
+                + "Please specify a correct dataset_root folder."
+            )
+
+        with open(batch_indices_path, "r") as f:
+            eval_batch_index = json.load(f)
+        restrict_sequence_name = self.restrict_sequence_name
+
+        if self.task_str == "singlesequence":
+            if (
+                self.test_restrict_sequence_id is None
+                or self.test_restrict_sequence_id < 0
+            ):
+                raise ValueError(
+                    "Please specify an integer id 'test_restrict_sequence_id'"
+                    + " of the sequence considered for 'singlesequence'"
+                    + " training and evaluation."
+                )
+            if len(self.restrict_sequence_name) > 0:
+                raise ValueError(
+                    "For the 'singlesequence' task, the restrict_sequence_name has"
+                    " to be unset while test_restrict_sequence_id has to be set to an"
+                    " integer defining the order of the evaluation sequence."
+                )
+            # a sort-stable set() equivalent:
+            eval_batches_sequence_names = list(
+                {b[0][0]: None for b in eval_batch_index}.keys()
+            )
+            eval_sequence_name = eval_batches_sequence_names[
+                self.test_restrict_sequence_id
+            ]
+            eval_batch_index = [
+                b for b in eval_batch_index if b[0][0] == eval_sequence_name
+            ]
+            # overwrite the restrict_sequence_name
+            restrict_sequence_name = [eval_sequence_name]
+        if len(restrict_sequence_name) > 0:
+            eval_batch_index = [
+                b for b in eval_batch_index if b[0][0] in restrict_sequence_name
+            ]
+
+        dataset_type: Type[JsonIndexDataset] = registry.get(
+            JsonIndexDataset, self.dataset_class_type
+        )
+        expand_args_fields(dataset_type)
+        train_dataset = None
+        if not self.only_test_set:
+            train_dataset = dataset_type(
+                n_frames_per_sequence=self.n_frames_per_sequence,
+                subsets=set_names_mapping["train"],
+                pick_sequence=restrict_sequence_name,
+                **common_kwargs,
+            )
+        if self.test_on_train:
+            assert train_dataset is not None
+            val_dataset = test_dataset = train_dataset
+        else:
+            val_dataset = dataset_type(
+                n_frames_per_sequence=-1,
+                subsets=set_names_mapping["val"],
+                pick_sequence=restrict_sequence_name,
+                **common_kwargs,
+            )
+            test_dataset = dataset_type(
+                n_frames_per_sequence=-1,
+                subsets=set_names_mapping["test"],
+                pick_sequence=restrict_sequence_name,
+                eval_batch_index=eval_batch_index,
+                **common_kwargs,
+            )
+        dataset_map = DatasetMap(
+            train=train_dataset, val=val_dataset, test=test_dataset
+        )
+
+        if self.assert_single_seq:
+            # check there's only one sequence in all datasets
+            sequence_names = {
+                sequence_name
+                for dset in dataset_map.iter_datasets()
+                for sequence_name in dset.sequence_names()
+            }
+            if len(sequence_names) > 1:
+                raise ValueError("Multiple sequences loaded but expected one")
+
+        self.dataset_map = dataset_map
+
+    def get_dataset_map(self) -> DatasetMap:
+        # pyre-ignore[16]
+        return self.dataset_map
+
+    def get_all_train_cameras(self) -> Optional[CamerasBase]:
+        if self.task_str == "multisequence":
+            return None
+
+        assert self.task_str == "singlesequence"
+
+        # pyre-ignore[16]
+        train_dataset = self.dataset_map.train
+        assert isinstance(train_dataset, JsonIndexDataset)
+        return train_dataset.get_all_train_cameras()
+
+
+def _get_co3d_set_names_mapping(
+    task_str: str,
+    test_on_train: bool,
+    only_test: bool,
+) -> Dict[str, List[str]]:
+    """
+    Returns the mapping of the common dataset subset names ("train"/"val"/"test")
+    to the names of the corresponding subsets in the CO3D dataset
+    ("test_known"/"test_unseen"/"train_known"/"train_unseen").
+
+    The keys returned will be
+        - train (if not only_test)
+        - val (if not test_on_train)
+        - test (if not test_on_train)
+    """
+    single_seq = task_str == "singlesequence"
+
+    if only_test:
+        set_names_mapping = {}
+    else:
+        set_names_mapping = {
+            "train": [
+                (DATASET_TYPE_TEST if single_seq else DATASET_TYPE_TRAIN)
+                + "_"
+                + DATASET_TYPE_KNOWN
+            ]
+        }
+    if not test_on_train:
+        prefixes = [DATASET_TYPE_TEST]
+        if not single_seq:
+            prefixes.append(DATASET_TYPE_TRAIN)
+        set_names_mapping.update(
+            {
+                dset: [
+                    p + "_" + t
+                    for p in prefixes
+                    for t in [DATASET_TYPE_KNOWN, DATASET_TYPE_UNKNOWN]
+                ]
+                for dset in ["val", "test"]
+            }
+        )
+
+    return set_names_mapping
diff --git a/pytorch3d/pytorch3d/implicitron/dataset/json_index_dataset_map_provider_v2.py b/pytorch3d/pytorch3d/implicitron/dataset/json_index_dataset_map_provider_v2.py
new file mode 100644
index 0000000000000000000000000000000000000000..d8790d35bcc8f05f128730c9ac0ebc39e324854a
--- /dev/null
+++ b/pytorch3d/pytorch3d/implicitron/dataset/json_index_dataset_map_provider_v2.py
@@ -0,0 +1,477 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import copy
+import json
+import logging
+import multiprocessing
+import os
+import warnings
+from collections import defaultdict
+from typing import Dict, List, Optional, Tuple, Type, Union
+
+import numpy as np
+from iopath.common.file_io import PathManager
+
+from omegaconf import DictConfig
+from pytorch3d.implicitron.dataset.dataset_map_provider import (
+    DatasetMap,
+    DatasetMapProviderBase,
+    PathManagerFactory,
+)
+from pytorch3d.implicitron.dataset.json_index_dataset import JsonIndexDataset
+from pytorch3d.implicitron.tools.config import (
+    expand_args_fields,
+    registry,
+    run_auto_creation,
+)
+
+from pytorch3d.renderer.cameras import CamerasBase
+from tqdm import tqdm
+
+
+_CO3DV2_DATASET_ROOT: str = os.getenv("CO3DV2_DATASET_ROOT", "")
+
+# _NEED_CONTROL is a list of those elements of JsonIndexDataset which
+# are not directly specified for it in the config but come from the
+# DatasetMapProvider.
+_NEED_CONTROL: Tuple[str, ...] = (
+    "dataset_root",
+    "eval_batches",
+    "eval_batch_index",
+    "path_manager",
+    "subsets",
+    "frame_annotations_file",
+    "sequence_annotations_file",
+    "subset_lists_file",
+)
+
+logger = logging.getLogger(__name__)
+
+
+@registry.register
+class JsonIndexDatasetMapProviderV2(DatasetMapProviderBase):  # pyre-ignore [13]
+    """
+    Generates the training, validation, and testing dataset objects for
+    a dataset laid out on disk like CO3Dv2, with annotations in gzipped json files.
+
+    The dataset is organized in the filesystem as follows::
+
+        self.dataset_root
+            ├── <category_0>
+            │   ├── <sequence_name_0>
+            │   │   ├── depth_masks
+            │   │   ├── depths
+            │   │   ├── images
+            │   │   ├── masks
+            │   │   └── pointcloud.ply
+            │   ├── <sequence_name_1>
+            │   │   ├── depth_masks
+            │   │   ├── depths
+            │   │   ├── images
+            │   │   ├── masks
+            │   │   └── pointcloud.ply
+            │   ├── ...
+            │   ├── <sequence_name_N>
+            │   ├── set_lists
+            │       ├── set_lists_<subset_name_0>.json
+            │       ├── set_lists_<subset_name_1>.json
+            │       ├── ...
+            │       ├── set_lists_<subset_name_M>.json
+            │   ├── eval_batches
+            │   │   ├── eval_batches_<subset_name_0>.json
+            │   │   ├── eval_batches_<subset_name_1>.json
+            │   │   ├── ...
+            │   │   ├── eval_batches_<subset_name_M>.json
+            │   ├── frame_annotations.jgz
+            │   ├── sequence_annotations.jgz
+            ├── <category_1>
+            ├── ...
+            ├── <category_K>
+
+    The dataset contains sequences named `<sequence_name_i>` from `K` categories with
+    names `<category_j>`. Each category comprises sequence folders
+    `<category_k>/<sequence_name_i>` containing the list of sequence images, depth maps,
+    foreground masks, and valid-depth masks `images`, `depths`, `masks`, and `depth_masks`
+    respectively. Furthermore, `<category_k>/<sequence_name_i>/set_lists/` stores `M`
+    json files `set_lists_<subset_name_l>.json`, each describing a certain sequence subset.
+
+    Users specify the loaded dataset subset by setting `self.subset_name` to one of the
+    available subset names `<subset_name_l>`.
+
+    `frame_annotations.jgz` and `sequence_annotations.jgz` are gzipped json files containing
+    the list of all frames and sequences of the given category stored as lists of
+    `FrameAnnotation` and `SequenceAnnotation` objects respectivelly.
+
+    Each `set_lists_<subset_name_l>.json` file contains the following dictionary::
+
+        {
+            "train": [
+                (sequence_name: str, frame_number: int, image_path: str),
+                ...
+            ],
+            "val": [
+                (sequence_name: str, frame_number: int, image_path: str),
+                ...
+            ],
+            "test": [
+                (sequence_name: str, frame_number: int, image_path: str),
+                ...
+            ],
+        ]
+
+    defining the list of frames (identified with their `sequence_name` and `frame_number`)
+    in the "train", "val", and "test" subsets of the dataset.
+    Note that `frame_number` can be obtained only from `frame_annotations.jgz` and
+    does not necesarrily correspond to the numeric suffix of the corresponding image
+    file name (e.g. a file `<category_0>/<sequence_name_0>/images/frame00005.jpg` can
+    have its frame number set to `20`, not 5).
+
+    Each `eval_batches_<subset_name_l>.json` file contains a list of evaluation examples
+    in the following form::
+
+        [
+            [  # batch 1
+                (sequence_name: str, frame_number: int, image_path: str),
+                ...
+            ],
+            [  # batch 1
+                (sequence_name: str, frame_number: int, image_path: str),
+                ...
+            ],
+        ]
+
+    Note that the evaluation examples always come from the `"test"` subset of the dataset.
+    (test frames can repeat across batches).
+
+    Args:
+        category: Dataset categories to load expressed as a string of comma-separated
+            category names (e.g. `"apple,car,orange"`).
+        subset_name: The name of the dataset subset. For CO3Dv2, these include
+            e.g. "manyview_dev_0", "fewview_test", ...
+        dataset_root: The root folder of the dataset.
+        test_on_train: Construct validation and test datasets from
+            the training subset.
+        only_test_set: Load only the test set. Incompatible with `test_on_train`.
+        load_eval_batches: Load the file containing eval batches pointing to the
+            test dataset.
+        n_known_frames_for_test: Add a certain number of known frames to each
+            eval batch. Useful for evaluating models that require
+            source views as input (e.g. NeRF-WCE / PixelNeRF).
+        dataset_args: Specifies additional arguments to the
+            JsonIndexDataset constructor call.
+        path_manager_factory: (Optional) An object that generates an instance of
+            PathManager that can translate provided file paths.
+        path_manager_factory_class_type: The class type of `path_manager_factory`.
+    """
+
+    category: str
+    subset_name: str
+    dataset_root: str = _CO3DV2_DATASET_ROOT
+
+    test_on_train: bool = False
+    only_test_set: bool = False
+    load_eval_batches: bool = True
+    num_load_workers: int = 4
+
+    n_known_frames_for_test: int = 0
+
+    dataset_class_type: str = "JsonIndexDataset"
+    dataset: JsonIndexDataset
+
+    path_manager_factory: PathManagerFactory
+    path_manager_factory_class_type: str = "PathManagerFactory"
+
+    def __post_init__(self):
+        super().__init__()
+        run_auto_creation(self)
+
+        if self.only_test_set and self.test_on_train:
+            raise ValueError("Cannot have only_test_set and test_on_train")
+
+        if "," in self.category:
+            # a comma-separated list of categories to load
+            categories = [c.strip() for c in self.category.split(",")]
+            logger.info(f"Loading a list of categories: {str(categories)}.")
+            with multiprocessing.Pool(
+                processes=min(self.num_load_workers, len(categories))
+            ) as pool:
+                category_dataset_maps = list(
+                    tqdm(
+                        pool.imap(self._load_category, categories),
+                        total=len(categories),
+                    )
+                )
+            dataset_map = category_dataset_maps[0]
+            dataset_map.join(category_dataset_maps[1:])
+
+        else:
+            # one category to load
+            dataset_map = self._load_category(self.category)
+
+        self.dataset_map = dataset_map
+
+    def _load_category(self, category: str) -> DatasetMap:
+
+        frame_file = os.path.join(self.dataset_root, category, "frame_annotations.jgz")
+        sequence_file = os.path.join(
+            self.dataset_root, category, "sequence_annotations.jgz"
+        )
+
+        path_manager = self.path_manager_factory.get()
+
+        if path_manager is not None:
+            path_managed_frame_file = path_manager.get_local_path(frame_file)
+        else:
+            path_managed_frame_file = frame_file
+        if not os.path.isfile(path_managed_frame_file):
+            # The frame_file does not exist.
+            # Most probably the user has not specified the root folder.
+            raise ValueError(
+                f"Looking for frame annotations in {path_managed_frame_file}."
+                + " Please specify a correct dataset_root folder."
+                + " Note: By default the root folder is taken from the"
+                + " CO3DV2_DATASET_ROOT environment variable."
+            )
+
+        # setup the common dataset arguments
+        common_dataset_kwargs = getattr(self, f"dataset_{self.dataset_class_type}_args")
+        common_dataset_kwargs = {
+            **common_dataset_kwargs,
+            "dataset_root": self.dataset_root,
+            "frame_annotations_file": frame_file,
+            "sequence_annotations_file": sequence_file,
+            "subsets": None,
+            "subset_lists_file": "",
+            "path_manager": path_manager,
+        }
+
+        # get the used dataset type
+        dataset_type: Type[JsonIndexDataset] = registry.get(
+            JsonIndexDataset, self.dataset_class_type
+        )
+        expand_args_fields(dataset_type)
+
+        dataset = dataset_type(**common_dataset_kwargs)
+
+        available_subset_names = self._get_available_subset_names(category)
+        logger.debug(f"Available subset names: {str(available_subset_names)}.")
+        if self.subset_name not in available_subset_names:
+            raise ValueError(
+                f"Unknown subset name {self.subset_name}."
+                + f" Choose one of available subsets: {str(available_subset_names)}."
+            )
+
+        # load the list of train/val/test frames
+        subset_mapping = self._load_annotation_json(
+            os.path.join(category, "set_lists", f"set_lists_{self.subset_name}.json")
+        )
+
+        # load the evaluation batches
+        if self.load_eval_batches:
+            eval_batch_index = self._load_annotation_json(
+                os.path.join(
+                    category,
+                    "eval_batches",
+                    f"eval_batches_{self.subset_name}.json",
+                )
+            )
+        else:
+            eval_batch_index = None
+
+        train_dataset = None
+        if not self.only_test_set:
+            # load the training set
+            logger.debug("Extracting train dataset.")
+            train_dataset = dataset.subset_from_frame_index(subset_mapping["train"])
+            logger.info(f"Train dataset: {str(train_dataset)}")
+
+        if self.test_on_train:
+            assert train_dataset is not None
+            val_dataset = test_dataset = train_dataset
+        else:
+            # load the val and test sets
+            logger.debug("Extracting val dataset.")
+            val_dataset = dataset.subset_from_frame_index(subset_mapping["val"])
+            logger.info(f"Val dataset: {str(val_dataset)}")
+            logger.debug("Extracting test dataset.")
+
+            if (self.n_known_frames_for_test > 0) and self.load_eval_batches:
+                # extend the test subset mapping and the dataset with additional
+                # known views from the train dataset
+                (
+                    eval_batch_index,
+                    subset_mapping["test"],
+                ) = self._extend_test_data_with_known_views(
+                    subset_mapping,
+                    eval_batch_index,
+                )
+
+            test_dataset = dataset.subset_from_frame_index(subset_mapping["test"])
+            logger.info(f"Test dataset: {str(test_dataset)}")
+            if self.load_eval_batches:
+                # load the eval batches
+                logger.debug("Extracting eval batches.")
+                try:
+                    test_dataset.eval_batches = (
+                        test_dataset.seq_frame_index_to_dataset_index(
+                            eval_batch_index,
+                        )
+                    )
+                except IndexError:
+                    warnings.warn(
+                        "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n"
+                        + "Some eval batches are missing from the test dataset.\n"
+                        + "The evaluation results will be incomparable to the\n"
+                        + "evaluation results calculated on the original dataset.\n"
+                        + "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@"
+                    )
+                    test_dataset.eval_batches = (
+                        test_dataset.seq_frame_index_to_dataset_index(
+                            eval_batch_index,
+                            allow_missing_indices=True,
+                            remove_missing_indices=True,
+                        )
+                    )
+                logger.info(f"# eval batches: {len(test_dataset.eval_batches)}")
+
+        return DatasetMap(train=train_dataset, val=val_dataset, test=test_dataset)
+
+    @classmethod
+    def dataset_tweak_args(cls, type, args: DictConfig) -> None:
+        """
+        Called by get_default_args(JsonIndexDatasetMapProviderV2) to
+        not expose certain fields of each dataset class.
+        """
+        for key in _NEED_CONTROL:
+            del args[key]
+
+    def create_dataset(self):
+        # The dataset object is created inside `self.get_dataset_map`
+        pass
+
+    def get_dataset_map(self) -> DatasetMap:
+        return self.dataset_map  # pyre-ignore [16]
+
+    def get_category_to_subset_name_list(self) -> Dict[str, List[str]]:
+        """
+        Returns a global dataset index containing the available subset names per category
+        as a dictionary.
+
+        Returns:
+            category_to_subset_name_list: A dictionary containing subset names available
+                per category of the following form::
+
+                    {
+                        category_0: [category_0_subset_name_0, category_0_subset_name_1, ...],
+                        category_1: [category_1_subset_name_0, category_1_subset_name_1, ...],
+                        ...
+                    }
+
+        """
+        category_to_subset_name_list_json = "category_to_subset_name_list.json"
+        category_to_subset_name_list = self._load_annotation_json(
+            category_to_subset_name_list_json
+        )
+        return category_to_subset_name_list
+
+    def get_all_train_cameras(self) -> Optional[CamerasBase]:
+        # pyre-ignore[16]
+        train_dataset = self.dataset_map.train
+        assert isinstance(train_dataset, JsonIndexDataset)
+        return train_dataset.get_all_train_cameras()
+
+    def _load_annotation_json(self, json_filename: str):
+        full_path = os.path.join(
+            self.dataset_root,
+            json_filename,
+        )
+        logger.info(f"Loading frame index json from {full_path}.")
+        path_manager = self.path_manager_factory.get()
+        if path_manager is not None:
+            full_path = path_manager.get_local_path(full_path)
+        if not os.path.isfile(full_path):
+            # The batch indices file does not exist.
+            # Most probably the user has not specified the root folder.
+            raise ValueError(
+                f"Looking for dataset json file in {full_path}. "
+                + "Please specify a correct dataset_root folder."
+            )
+        with open(full_path, "r") as f:
+            data = json.load(f)
+        return data
+
+    def _get_available_subset_names(self, category: str):
+        return get_available_subset_names(
+            self.dataset_root,
+            category,
+            path_manager=self.path_manager_factory.get(),
+        )
+
+    def _extend_test_data_with_known_views(
+        self,
+        subset_mapping: Dict[str, List[Union[Tuple[str, int], Tuple[str, int, str]]]],
+        eval_batch_index: List[List[Union[Tuple[str, int, str], Tuple[str, int]]]],
+    ):
+        # convert the train subset mapping to a dict:
+        #   sequence_to_train_frames: {sequence_name: frame_index}
+        sequence_to_train_frames = defaultdict(list)
+        for frame_entry in subset_mapping["train"]:
+            sequence_name = frame_entry[0]
+            sequence_to_train_frames[sequence_name].append(frame_entry)
+        sequence_to_train_frames = dict(sequence_to_train_frames)
+        test_subset_mapping_set = {tuple(s) for s in subset_mapping["test"]}
+
+        # extend the eval batches / subset mapping with the additional examples
+        eval_batch_index_out = copy.deepcopy(eval_batch_index)
+        generator = np.random.default_rng(seed=0)
+        for batch in eval_batch_index_out:
+            sequence_name = batch[0][0]
+            sequence_known_entries = sequence_to_train_frames[sequence_name]
+            idx_to_add = generator.permutation(len(sequence_known_entries))[
+                : self.n_known_frames_for_test
+            ]
+            entries_to_add = [sequence_known_entries[a] for a in idx_to_add]
+            assert all(e in subset_mapping["train"] for e in entries_to_add)
+
+            # extend the eval batch with the known views
+            batch.extend(entries_to_add)
+
+            # also add these new entries to the test subset mapping
+            test_subset_mapping_set.update(tuple(e) for e in entries_to_add)
+
+        return eval_batch_index_out, list(test_subset_mapping_set)
+
+
+def get_available_subset_names(
+    dataset_root: str,
+    category: str,
+    path_manager: Optional[PathManager] = None,
+) -> List[str]:
+    """
+    Get the available subset names for a given category folder inside a root dataset
+    folder `dataset_root`.
+    """
+    category_dir = os.path.join(dataset_root, category)
+    category_dir_exists = (
+        (path_manager is not None) and path_manager.isdir(category_dir)
+    ) or os.path.isdir(category_dir)
+    if not category_dir_exists:
+        raise ValueError(
+            f"Looking for dataset files in {category_dir}. "
+            + "Please specify a correct dataset_root folder."
+        )
+
+    set_list_dir = os.path.join(category_dir, "set_lists")
+    set_list_jsons = (os.listdir if path_manager is None else path_manager.ls)(
+        set_list_dir
+    )
+
+    return [
+        json_file.replace("set_lists_", "").replace(".json", "")
+        for json_file in set_list_jsons
+    ]
diff --git a/pytorch3d/pytorch3d/implicitron/dataset/llff_dataset_map_provider.py b/pytorch3d/pytorch3d/implicitron/dataset/llff_dataset_map_provider.py
new file mode 100644
index 0000000000000000000000000000000000000000..8a4993e1391e284c83d5d82714531521022e32e8
--- /dev/null
+++ b/pytorch3d/pytorch3d/implicitron/dataset/llff_dataset_map_provider.py
@@ -0,0 +1,67 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import numpy as np
+import torch
+from pytorch3d.implicitron.tools.config import registry
+
+from .load_llff import load_llff_data
+
+from .single_sequence_dataset import (
+    _interpret_blender_cameras,
+    SingleSceneDatasetMapProviderBase,
+)
+
+
+@registry.register
+class LlffDatasetMapProvider(SingleSceneDatasetMapProviderBase):
+    """
+    Provides data for one scene from the LLFF dataset.
+
+    Members:
+        base_dir: directory holding the data for the scene.
+        object_name: The name of the scene (e.g. "fern"). This is just used as a label.
+            It will typically be equal to the name of the directory self.base_dir.
+        path_manager_factory: Creates path manager which may be used for
+            interpreting paths.
+        n_known_frames_for_test: If set, training frames are included in the val
+            and test datasets, and this many random training frames are added to
+            each test batch. If not set, test batches each contain just a single
+            testing frame.
+        downscale_factor: determines image sizes.
+    """
+
+    downscale_factor: int = 4
+
+    def _load_data(self) -> None:
+        path_manager = self.path_manager_factory.get()
+        images, poses, _ = load_llff_data(
+            self.base_dir, factor=self.downscale_factor, path_manager=path_manager
+        )
+        hwf = poses[0, :3, -1]
+        poses = poses[:, :3, :4]
+
+        llffhold = 8
+        i_test = np.arange(images.shape[0])[::llffhold]
+        i_test_index = set(i_test.tolist())
+        i_train = np.array(
+            [i for i in np.arange(images.shape[0]) if i not in i_test_index]
+        )
+        i_split = (i_train, i_test, i_test)
+        H, W, focal = hwf
+        focal_ndc = 2 * focal / min(H, W)
+        images = torch.from_numpy(images).permute(0, 3, 1, 2)
+        poses = torch.from_numpy(poses)
+
+        # pyre-ignore[16]
+        self.poses = _interpret_blender_cameras(poses, focal_ndc)
+        # pyre-ignore[16]
+        self.images = images
+        # pyre-ignore[16]
+        self.fg_probabilities = None
+        # pyre-ignore[16]
+        self.i_split = i_split
diff --git a/pytorch3d/pytorch3d/implicitron/dataset/load_blender.py b/pytorch3d/pytorch3d/implicitron/dataset/load_blender.py
new file mode 100644
index 0000000000000000000000000000000000000000..42b9cb530baf2981407bfc7d7a914cfcfccecc18
--- /dev/null
+++ b/pytorch3d/pytorch3d/implicitron/dataset/load_blender.py
@@ -0,0 +1,141 @@
+# @lint-ignore-every LICENSELINT
+# Adapted from https://github.com/bmild/nerf/blob/master/load_blender.py
+# Copyright (c) 2020 bmild
+import json
+import os
+
+import numpy as np
+import torch
+from PIL import Image
+
+
+def translate_by_t_along_z(t):
+    tform = np.eye(4).astype(np.float32)
+    tform[2][3] = t
+    return tform
+
+
+def rotate_by_phi_along_x(phi):
+    tform = np.eye(4).astype(np.float32)
+    tform[1, 1] = tform[2, 2] = np.cos(phi)
+    tform[1, 2] = -np.sin(phi)
+    tform[2, 1] = -tform[1, 2]
+    return tform
+
+
+def rotate_by_theta_along_y(theta):
+    tform = np.eye(4).astype(np.float32)
+    tform[0, 0] = tform[2, 2] = np.cos(theta)
+    tform[0, 2] = -np.sin(theta)
+    tform[2, 0] = -tform[0, 2]
+    return tform
+
+
+def pose_spherical(theta, phi, radius):
+    c2w = translate_by_t_along_z(radius)
+    c2w = rotate_by_phi_along_x(phi / 180.0 * np.pi) @ c2w
+    c2w = rotate_by_theta_along_y(theta / 180 * np.pi) @ c2w
+    c2w = np.array([[-1, 0, 0, 0], [0, 0, 1, 0], [0, 1, 0, 0], [0, 0, 0, 1]]) @ c2w
+    return c2w
+
+
+def _local_path(path_manager, path):
+    if path_manager is None:
+        return path
+    return path_manager.get_local_path(path)
+
+
+def load_blender_data(
+    basedir,
+    half_res=False,
+    testskip=1,
+    debug=False,
+    path_manager=None,
+    focal_length_in_screen_space=False,
+):
+    splits = ["train", "val", "test"]
+    metas = {}
+    for s in splits:
+        path = os.path.join(basedir, f"transforms_{s}.json")
+        with open(_local_path(path_manager, path)) as fp:
+            metas[s] = json.load(fp)
+
+    all_imgs = []
+    all_poses = []
+    counts = [0]
+    for s in splits:
+        meta = metas[s]
+        imgs = []
+        poses = []
+        if s == "train" or testskip == 0:
+            skip = 1
+        else:
+            skip = testskip
+
+        for frame in meta["frames"][::skip]:
+            fname = os.path.join(basedir, frame["file_path"] + ".png")
+            imgs.append(np.array(Image.open(_local_path(path_manager, fname))))
+            poses.append(np.array(frame["transform_matrix"]))
+        imgs = (np.array(imgs) / 255.0).astype(np.float32)
+        poses = np.array(poses).astype(np.float32)
+        counts.append(counts[-1] + imgs.shape[0])
+        all_imgs.append(imgs)
+        all_poses.append(poses)
+
+    i_split = [np.arange(counts[i], counts[i + 1]) for i in range(3)]
+
+    imgs = np.concatenate(all_imgs, 0)
+    poses = np.concatenate(all_poses, 0)
+
+    H, W = imgs[0].shape[:2]
+    camera_angle_x = float(meta["camera_angle_x"])
+    if focal_length_in_screen_space:
+        focal = 0.5 * W / np.tan(0.5 * camera_angle_x)
+    else:
+        focal = 1 / np.tan(0.5 * camera_angle_x)
+
+    render_poses = torch.stack(
+        [
+            torch.from_numpy(pose_spherical(angle, -30.0, 4.0))
+            for angle in np.linspace(-180, 180, 40 + 1)[:-1]
+        ],
+        0,
+    )
+
+    # In debug mode, return extremely tiny images
+    if debug:
+        import cv2
+
+        H = H // 32
+        W = W // 32
+        if focal_length_in_screen_space:
+            focal = focal / 32.0
+        imgs = [
+            torch.from_numpy(
+                cv2.resize(imgs[i], dsize=(25, 25), interpolation=cv2.INTER_AREA)
+            )
+            for i in range(imgs.shape[0])
+        ]
+        imgs = torch.stack(imgs, 0)
+        poses = torch.from_numpy(poses)
+        return imgs, poses, render_poses, [H, W, focal], i_split
+
+    if half_res:
+        import cv2
+
+        # TODO: resize images using INTER_AREA (cv2)
+        H = H // 2
+        W = W // 2
+        if focal_length_in_screen_space:
+            focal = focal / 2.0
+        imgs = [
+            torch.from_numpy(
+                cv2.resize(imgs[i], dsize=(400, 400), interpolation=cv2.INTER_AREA)
+            )
+            for i in range(imgs.shape[0])
+        ]
+        imgs = torch.stack(imgs, 0)
+
+    poses = torch.from_numpy(poses)
+
+    return imgs, poses, render_poses, [H, W, focal], i_split
diff --git a/pytorch3d/pytorch3d/implicitron/dataset/load_llff.py b/pytorch3d/pytorch3d/implicitron/dataset/load_llff.py
new file mode 100644
index 0000000000000000000000000000000000000000..d19337a9151916c01951f45bcb3b10c2e59a1873
--- /dev/null
+++ b/pytorch3d/pytorch3d/implicitron/dataset/load_llff.py
@@ -0,0 +1,339 @@
+# @lint-ignore-every LICENSELINT
+# Adapted from https://github.com/bmild/nerf/blob/master/load_llff.py
+# Copyright (c) 2020 bmild
+import logging
+import os
+import warnings
+
+import numpy as np
+
+from PIL import Image
+
+
+# Slightly modified version of LLFF data loading code
+#  see https://github.com/Fyusion/LLFF for original
+
+logger = logging.getLogger(__name__)
+
+
+def _minify(basedir, path_manager, factors=(), resolutions=()):
+    needtoload = False
+    for r in factors:
+        imgdir = os.path.join(basedir, "images_{}".format(r))
+        if not _exists(path_manager, imgdir):
+            needtoload = True
+    for r in resolutions:
+        imgdir = os.path.join(basedir, "images_{}x{}".format(r[1], r[0]))
+        if not _exists(path_manager, imgdir):
+            needtoload = True
+    if not needtoload:
+        return
+    assert path_manager is None
+
+    from subprocess import check_output
+
+    imgdir = os.path.join(basedir, "images")
+    imgs = [os.path.join(imgdir, f) for f in sorted(_ls(path_manager, imgdir))]
+    imgs = [f for f in imgs if f.endswith("JPG", "jpg", "png", "jpeg", "PNG")]
+    imgdir_orig = imgdir
+
+    wd = os.getcwd()
+
+    for r in factors + resolutions:
+        if isinstance(r, int):
+            name = "images_{}".format(r)
+            resizearg = "{}%".format(100.0 / r)
+        else:
+            name = "images_{}x{}".format(r[1], r[0])
+            resizearg = "{}x{}".format(r[1], r[0])
+        imgdir = os.path.join(basedir, name)
+        if os.path.exists(imgdir):
+            continue
+
+        logger.info(f"Minifying {r}, {basedir}")
+
+        os.makedirs(imgdir)
+        check_output("cp {}/* {}".format(imgdir_orig, imgdir), shell=True)
+
+        ext = imgs[0].split(".")[-1]
+        args = " ".join(
+            ["mogrify", "-resize", resizearg, "-format", "png", "*.{}".format(ext)]
+        )
+        logger.info(args)
+        os.chdir(imgdir)
+        check_output(args, shell=True)
+        os.chdir(wd)
+
+        if ext != "png":
+            check_output("rm {}/*.{}".format(imgdir, ext), shell=True)
+            logger.info("Removed duplicates")
+        logger.info("Done")
+
+
+def _load_data(
+    basedir, factor=None, width=None, height=None, load_imgs=True, path_manager=None
+):
+
+    poses_arr = np.load(
+        _local_path(path_manager, os.path.join(basedir, "poses_bounds.npy"))
+    )
+    poses = poses_arr[:, :-2].reshape([-1, 3, 5]).transpose([1, 2, 0])
+    bds = poses_arr[:, -2:].transpose([1, 0])
+
+    img0 = [
+        os.path.join(basedir, "images", f)
+        for f in sorted(_ls(path_manager, os.path.join(basedir, "images")))
+        if f.endswith("JPG") or f.endswith("jpg") or f.endswith("png")
+    ][0]
+
+    def imread(f):
+        return np.array(Image.open(f))
+
+    sh = imread(_local_path(path_manager, img0)).shape
+
+    sfx = ""
+
+    if factor is not None:
+        sfx = "_{}".format(factor)
+        _minify(basedir, path_manager, factors=[factor])
+        factor = factor
+    elif height is not None:
+        factor = sh[0] / float(height)
+        width = int(sh[1] / factor)
+        _minify(basedir, path_manager, resolutions=[[height, width]])
+        sfx = "_{}x{}".format(width, height)
+    elif width is not None:
+        factor = sh[1] / float(width)
+        height = int(sh[0] / factor)
+        _minify(basedir, path_manager, resolutions=[[height, width]])
+        sfx = "_{}x{}".format(width, height)
+    else:
+        factor = 1
+
+    imgdir = os.path.join(basedir, "images" + sfx)
+    if not _exists(path_manager, imgdir):
+        raise ValueError(f"{imgdir} does not exist, returning")
+
+    imgfiles = [
+        _local_path(path_manager, os.path.join(imgdir, f))
+        for f in sorted(_ls(path_manager, imgdir))
+        if f.endswith("JPG") or f.endswith("jpg") or f.endswith("png")
+    ]
+    if poses.shape[-1] != len(imgfiles):
+        raise ValueError(
+            "Mismatch between imgs {} and poses {} !!!!".format(
+                len(imgfiles), poses.shape[-1]
+            )
+        )
+
+    sh = imread(imgfiles[0]).shape
+    poses[:2, 4, :] = np.array(sh[:2]).reshape([2, 1])
+    poses[2, 4, :] = poses[2, 4, :] * 1.0 / factor
+
+    if not load_imgs:
+        return poses, bds
+
+    imgs = imgs = [imread(f)[..., :3] / 255.0 for f in imgfiles]
+    imgs = np.stack(imgs, -1)
+
+    logger.info(f"Loaded image data, shape {imgs.shape}")
+    return poses, bds, imgs
+
+
+def normalize(x):
+    denom = np.linalg.norm(x)
+    if denom < 0.001:
+        warnings.warn("unsafe normalize()")
+    return x / denom
+
+
+def viewmatrix(z, up, pos):
+    vec2 = normalize(z)
+    vec1_avg = up
+    vec0 = normalize(np.cross(vec1_avg, vec2))
+    vec1 = normalize(np.cross(vec2, vec0))
+    m = np.stack([vec0, vec1, vec2, pos], 1)
+    return m
+
+
+def ptstocam(pts, c2w):
+    tt = np.matmul(c2w[:3, :3].T, (pts - c2w[:3, 3])[..., np.newaxis])[..., 0]
+    return tt
+
+
+def poses_avg(poses):
+
+    hwf = poses[0, :3, -1:]
+
+    center = poses[:, :3, 3].mean(0)
+    vec2 = normalize(poses[:, :3, 2].sum(0))
+    up = poses[:, :3, 1].sum(0)
+    c2w = np.concatenate([viewmatrix(vec2, up, center), hwf], 1)
+
+    return c2w
+
+
+def render_path_spiral(c2w, up, rads, focal, zdelta, zrate, rots, N):
+    render_poses = []
+    rads = np.array(list(rads) + [1.0])
+    hwf = c2w[:, 4:5]
+
+    for theta in np.linspace(0.0, 2.0 * np.pi * rots, N + 1)[:-1]:
+        c = np.dot(
+            c2w[:3, :4],
+            np.array([np.cos(theta), -np.sin(theta), -np.sin(theta * zrate), 1.0])
+            * rads,
+        )
+        z = normalize(c - np.dot(c2w[:3, :4], np.array([0, 0, -focal, 1.0])))
+        render_poses.append(np.concatenate([viewmatrix(z, up, c), hwf], 1))
+    return render_poses
+
+
+def recenter_poses(poses):
+
+    poses_ = poses + 0
+    bottom = np.reshape([0, 0, 0, 1.0], [1, 4])
+    c2w = poses_avg(poses)
+    c2w = np.concatenate([c2w[:3, :4], bottom], -2)
+    bottom = np.tile(np.reshape(bottom, [1, 1, 4]), [poses.shape[0], 1, 1])
+    poses = np.concatenate([poses[:, :3, :4], bottom], -2)
+
+    poses = np.linalg.inv(c2w) @ poses
+    poses_[:, :3, :4] = poses[:, :3, :4]
+    poses = poses_
+    return poses
+
+
+def spherify_poses(poses, bds):
+    def add_row_to_homogenize_transform(p):
+        r"""Add the last row to homogenize 3 x 4 transformation matrices."""
+        return np.concatenate(
+            [p, np.tile(np.reshape(np.eye(4)[-1, :], [1, 1, 4]), [p.shape[0], 1, 1])], 1
+        )
+
+    # p34_to_44 = lambda p: np.concatenate(
+    #     [p, np.tile(np.reshape(np.eye(4)[-1, :], [1, 1, 4]), [p.shape[0], 1, 1])], 1
+    # )
+
+    p34_to_44 = add_row_to_homogenize_transform
+
+    rays_d = poses[:, :3, 2:3]
+    rays_o = poses[:, :3, 3:4]
+
+    def min_line_dist(rays_o, rays_d):
+        A_i = np.eye(3) - rays_d * np.transpose(rays_d, [0, 2, 1])
+        b_i = -A_i @ rays_o
+        pt_mindist = np.squeeze(
+            -np.linalg.inv((np.transpose(A_i, [0, 2, 1]) @ A_i).mean(0)) @ (b_i).mean(0)
+        )
+        return pt_mindist
+
+    pt_mindist = min_line_dist(rays_o, rays_d)
+
+    center = pt_mindist
+    up = (poses[:, :3, 3] - center).mean(0)
+
+    vec0 = normalize(up)
+    vec1 = normalize(np.cross([0.1, 0.2, 0.3], vec0))
+    vec2 = normalize(np.cross(vec0, vec1))
+    pos = center
+    c2w = np.stack([vec1, vec2, vec0, pos], 1)
+
+    poses_reset = np.linalg.inv(p34_to_44(c2w[None])) @ p34_to_44(poses[:, :3, :4])
+
+    rad = np.sqrt(np.mean(np.sum(np.square(poses_reset[:, :3, 3]), -1)))
+
+    sc = 1.0 / rad
+    poses_reset[:, :3, 3] *= sc
+    bds *= sc
+    rad *= sc
+
+    centroid = np.mean(poses_reset[:, :3, 3], 0)
+    zh = centroid[2]
+    radcircle = np.sqrt(rad**2 - zh**2)
+    new_poses = []
+
+    for th in np.linspace(0.0, 2.0 * np.pi, 120):
+
+        camorigin = np.array([radcircle * np.cos(th), radcircle * np.sin(th), zh])
+        up = np.array([0, 0, -1.0])
+
+        vec2 = normalize(camorigin)
+        vec0 = normalize(np.cross(vec2, up))
+        vec1 = normalize(np.cross(vec2, vec0))
+        pos = camorigin
+        p = np.stack([vec0, vec1, vec2, pos], 1)
+
+        new_poses.append(p)
+
+    new_poses = np.stack(new_poses, 0)
+
+    new_poses = np.concatenate(
+        [new_poses, np.broadcast_to(poses[0, :3, -1:], new_poses[:, :3, -1:].shape)], -1
+    )
+    poses_reset = np.concatenate(
+        [
+            poses_reset[:, :3, :4],
+            np.broadcast_to(poses[0, :3, -1:], poses_reset[:, :3, -1:].shape),
+        ],
+        -1,
+    )
+
+    return poses_reset, new_poses, bds
+
+
+def _local_path(path_manager, path):
+    if path_manager is None:
+        return path
+    return path_manager.get_local_path(path)
+
+
+def _ls(path_manager, path):
+    if path_manager is None:
+        return os.listdir(path)
+    return path_manager.ls(path)
+
+
+def _exists(path_manager, path):
+    if path_manager is None:
+        return os.path.exists(path)
+    return path_manager.exists(path)
+
+
+def load_llff_data(
+    basedir,
+    factor=8,
+    recenter=True,
+    bd_factor=0.75,
+    spherify=False,
+    path_zflat=False,
+    path_manager=None,
+):
+
+    poses, bds, imgs = _load_data(
+        basedir, factor=factor, path_manager=path_manager
+    )  # factor=8 downsamples original imgs by 8x
+    logger.info(f"Loaded {basedir}, {bds.min()}, {bds.max()}")
+
+    # Correct rotation matrix ordering and move variable dim to axis 0
+    poses = np.concatenate([poses[:, 1:2, :], -poses[:, 0:1, :], poses[:, 2:, :]], 1)
+    poses = np.moveaxis(poses, -1, 0).astype(np.float32)
+    imgs = np.moveaxis(imgs, -1, 0).astype(np.float32)
+    images = imgs
+    bds = np.moveaxis(bds, -1, 0).astype(np.float32)
+
+    # Rescale if bd_factor is provided
+    sc = 1.0 if bd_factor is None else 1.0 / (bds.min() * bd_factor)
+    poses[:, :3, 3] *= sc
+    bds *= sc
+
+    if recenter:
+        poses = recenter_poses(poses)
+
+    if spherify:
+        poses, render_poses, bds = spherify_poses(poses, bds)
+
+    images = images.astype(np.float32)
+    poses = poses.astype(np.float32)
+
+    return images, poses, bds
diff --git a/pytorch3d/pytorch3d/implicitron/dataset/orm_types.py b/pytorch3d/pytorch3d/implicitron/dataset/orm_types.py
new file mode 100644
index 0000000000000000000000000000000000000000..2e916021a9a80d48ae8a9741694ca8f5bce38c56
--- /dev/null
+++ b/pytorch3d/pytorch3d/implicitron/dataset/orm_types.py
@@ -0,0 +1,189 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# This functionality requires SQLAlchemy 2.0 or later.
+
+import math
+import struct
+from typing import Optional, Tuple
+
+import numpy as np
+
+from pytorch3d.implicitron.dataset.types import (
+    DepthAnnotation,
+    ImageAnnotation,
+    MaskAnnotation,
+    PointCloudAnnotation,
+    VideoAnnotation,
+    ViewpointAnnotation,
+)
+
+from sqlalchemy import LargeBinary
+from sqlalchemy.orm import (
+    composite,
+    DeclarativeBase,
+    Mapped,
+    mapped_column,
+    MappedAsDataclass,
+)
+from sqlalchemy.types import TypeDecorator
+
+
+# these produce policies to serialize structured types to blobs
+def ArrayTypeFactory(shape=None):
+    if shape is None:
+
+        class VariableShapeNumpyArrayType(TypeDecorator):
+            impl = LargeBinary
+
+            def process_bind_param(self, value, dialect):
+                if value is None:
+                    return None
+
+                ndim_bytes = np.int32(value.ndim).tobytes()
+                shape_bytes = np.array(value.shape, dtype=np.int64).tobytes()
+                value_bytes = value.astype(np.float32).tobytes()
+                return ndim_bytes + shape_bytes + value_bytes
+
+            def process_result_value(self, value, dialect):
+                if value is None:
+                    return None
+
+                ndim = np.frombuffer(value[:4], dtype=np.int32)[0]
+                value_start = 4 + 8 * ndim
+                shape = np.frombuffer(value[4:value_start], dtype=np.int64)
+                assert shape.shape == (ndim,)
+                return np.frombuffer(value[value_start:], dtype=np.float32).reshape(
+                    shape
+                )
+
+        return VariableShapeNumpyArrayType
+
+    class NumpyArrayType(TypeDecorator):
+        impl = LargeBinary
+
+        def process_bind_param(self, value, dialect):
+            if value is not None:
+                if value.shape != shape:
+                    raise ValueError(f"Passed an array of wrong shape: {value.shape}")
+                return value.astype(np.float32).tobytes()
+            return None
+
+        def process_result_value(self, value, dialect):
+            if value is not None:
+                return np.frombuffer(value, dtype=np.float32).reshape(shape)
+            return None
+
+    return NumpyArrayType
+
+
+def TupleTypeFactory(dtype=float, shape: Tuple[int, ...] = (2,)):
+    format_symbol = {
+        float: "f",  # float32
+        int: "i",  # int32
+    }[dtype]
+
+    class TupleType(TypeDecorator):
+        impl = LargeBinary
+        _format = format_symbol * math.prod(shape)
+
+        def process_bind_param(self, value, _):
+            if value is None:
+                return None
+
+            if len(shape) > 1:
+                value = np.array(value, dtype=dtype).reshape(-1)
+
+            return struct.pack(TupleType._format, *value)
+
+        def process_result_value(self, value, _):
+            if value is None:
+                return None
+
+            loaded = struct.unpack(TupleType._format, value)
+            if len(shape) > 1:
+                loaded = _rec_totuple(
+                    np.array(loaded, dtype=dtype).reshape(shape).tolist()
+                )
+
+            return loaded
+
+    return TupleType
+
+
+def _rec_totuple(t):
+    if isinstance(t, list):
+        return tuple(_rec_totuple(x) for x in t)
+
+    return t
+
+
+class Base(MappedAsDataclass, DeclarativeBase):
+    """subclasses will be converted to dataclasses"""
+
+
+class SqlFrameAnnotation(Base):
+    __tablename__ = "frame_annots"
+
+    sequence_name: Mapped[str] = mapped_column(primary_key=True)
+    frame_number: Mapped[int] = mapped_column(primary_key=True)
+    frame_timestamp: Mapped[float] = mapped_column(index=True)
+
+    image: Mapped[ImageAnnotation] = composite(
+        mapped_column("_image_path"),
+        mapped_column("_image_size", TupleTypeFactory(int)),
+    )
+
+    depth: Mapped[DepthAnnotation] = composite(
+        mapped_column("_depth_path", nullable=True),
+        mapped_column("_depth_scale_adjustment", nullable=True),
+        mapped_column("_depth_mask_path", nullable=True),
+    )
+
+    mask: Mapped[MaskAnnotation] = composite(
+        mapped_column("_mask_path", nullable=True),
+        mapped_column("_mask_mass", index=True, nullable=True),
+        mapped_column(
+            "_mask_bounding_box_xywh",
+            TupleTypeFactory(float, shape=(4,)),
+            nullable=True,
+        ),
+    )
+
+    viewpoint: Mapped[ViewpointAnnotation] = composite(
+        mapped_column(
+            "_viewpoint_R", TupleTypeFactory(float, shape=(3, 3)), nullable=True
+        ),
+        mapped_column(
+            "_viewpoint_T", TupleTypeFactory(float, shape=(3,)), nullable=True
+        ),
+        mapped_column(
+            "_viewpoint_focal_length", TupleTypeFactory(float), nullable=True
+        ),
+        mapped_column(
+            "_viewpoint_principal_point", TupleTypeFactory(float), nullable=True
+        ),
+        mapped_column("_viewpoint_intrinsics_format", nullable=True),
+    )
+
+
+class SqlSequenceAnnotation(Base):
+    __tablename__ = "sequence_annots"
+
+    sequence_name: Mapped[str] = mapped_column(primary_key=True)
+    category: Mapped[str] = mapped_column(index=True)
+
+    video: Mapped[VideoAnnotation] = composite(
+        mapped_column("_video_path", nullable=True),
+        mapped_column("_video_length", nullable=True),
+    )
+    point_cloud: Mapped[PointCloudAnnotation] = composite(
+        mapped_column("_point_cloud_path", nullable=True),
+        mapped_column("_point_cloud_quality_score", nullable=True),
+        mapped_column("_point_cloud_n_points", nullable=True),
+    )
+    # the bigger the better
+    viewpoint_quality_score: Mapped[Optional[float]] = mapped_column()
diff --git a/pytorch3d/pytorch3d/implicitron/dataset/rendered_mesh_dataset_map_provider.py b/pytorch3d/pytorch3d/implicitron/dataset/rendered_mesh_dataset_map_provider.py
new file mode 100644
index 0000000000000000000000000000000000000000..3ce99fb5d9ef4600f5f065b015626cd6e56668c1
--- /dev/null
+++ b/pytorch3d/pytorch3d/implicitron/dataset/rendered_mesh_dataset_map_provider.py
@@ -0,0 +1,215 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from os.path import dirname, join, realpath
+from typing import Optional, Tuple
+
+import torch
+from pytorch3d.implicitron.tools.config import registry, run_auto_creation
+from pytorch3d.io import IO
+from pytorch3d.renderer import (
+    AmbientLights,
+    BlendParams,
+    CamerasBase,
+    FoVPerspectiveCameras,
+    HardPhongShader,
+    look_at_view_transform,
+    MeshRasterizer,
+    MeshRendererWithFragments,
+    PointLights,
+    RasterizationSettings,
+)
+from pytorch3d.structures.meshes import Meshes
+
+from .dataset_map_provider import DatasetMap, DatasetMapProviderBase, PathManagerFactory
+from .single_sequence_dataset import SingleSceneDataset
+from .utils import DATASET_TYPE_KNOWN
+
+
+@registry.register
+class RenderedMeshDatasetMapProvider(DatasetMapProviderBase):  # pyre-ignore [13]
+    """
+    A simple single-scene dataset based on PyTorch3D renders of a mesh.
+    Provides `num_views` renders of the mesh as train, with no val
+    and test. The renders are generated from viewpoints sampled at uniformly
+    distributed azimuth intervals. The elevation is kept constant so that the
+    camera's vertical position coincides with the equator.
+
+    By default, uses Keenan Crane's cow model, and the camera locations are
+    set to make sense for that.
+
+    Although the rendering used to generate this dataset will use a GPU
+    if one is available, the data it produces is on the CPU just like
+    the data returned by implicitron's other dataset map providers.
+    This is because both datasets and models can be large, so implicitron's
+    training loop expects data on the CPU and only moves
+    what it needs to the device.
+
+    For a more detailed explanation of this code, please refer to the
+    docs/tutorials/fit_textured_mesh.ipynb notebook.
+
+    Members:
+        num_views: The number of generated renders.
+        data_file: The folder that contains the mesh file. By default, finds
+            the cow mesh in the same repo as this code.
+        azimuth_range: number of degrees on each side of the start position to
+            take samples
+        distance: distance from camera centres to the origin.
+        resolution: the common height and width of the output images.
+        use_point_light: whether to use a particular point light as opposed
+            to ambient white.
+        gpu_idx: which gpu to use for rendering the mesh.
+        path_manager_factory: (Optional) An object that generates an instance of
+            PathManager that can translate provided file paths.
+        path_manager_factory_class_type: The class type of `path_manager_factory`.
+    """
+
+    num_views: int = 40
+    data_file: Optional[str] = None
+    azimuth_range: float = 180
+    distance: float = 2.7
+    resolution: int = 128
+    use_point_light: bool = True
+    gpu_idx: Optional[int] = 0
+    path_manager_factory: PathManagerFactory
+    path_manager_factory_class_type: str = "PathManagerFactory"
+
+    def get_dataset_map(self) -> DatasetMap:
+        # pyre-ignore[16]
+        return DatasetMap(train=self.train_dataset, val=None, test=None)
+
+    def get_all_train_cameras(self) -> CamerasBase:
+        # pyre-ignore[16]
+        return self.poses
+
+    def __post_init__(self) -> None:
+        super().__init__()
+        run_auto_creation(self)
+        if torch.cuda.is_available() and self.gpu_idx is not None:
+            device = torch.device(f"cuda:{self.gpu_idx}")
+        else:
+            device = torch.device("cpu")
+        if self.data_file is None:
+            data_file = join(
+                dirname(dirname(dirname(dirname(realpath(__file__))))),
+                "docs",
+                "tutorials",
+                "data",
+                "cow_mesh",
+                "cow.obj",
+            )
+        else:
+            data_file = self.data_file
+        io = IO(path_manager=self.path_manager_factory.get())
+        mesh = io.load_mesh(data_file, device=device)
+        poses, images, masks = _generate_cow_renders(
+            num_views=self.num_views,
+            mesh=mesh,
+            azimuth_range=self.azimuth_range,
+            distance=self.distance,
+            resolution=self.resolution,
+            device=device,
+            use_point_light=self.use_point_light,
+        )
+        # pyre-ignore[16]
+        self.poses = poses.cpu()
+        # pyre-ignore[16]
+        self.train_dataset = SingleSceneDataset(  # pyre-ignore[28]
+            object_name="cow",
+            images=list(images.permute(0, 3, 1, 2).cpu()),
+            fg_probabilities=list(masks[:, None].cpu()),
+            poses=[self.poses[i] for i in range(len(poses))],
+            frame_types=[DATASET_TYPE_KNOWN] * len(poses),
+            eval_batches=None,
+        )
+
+
+@torch.no_grad()
+def _generate_cow_renders(
+    *,
+    num_views: int,
+    mesh: Meshes,
+    azimuth_range: float,
+    distance: float,
+    resolution: int,
+    device: torch.device,
+    use_point_light: bool,
+) -> Tuple[CamerasBase, torch.Tensor, torch.Tensor]:
+    """
+    Returns:
+        cameras: A batch of `num_views` `FoVPerspectiveCameras` from which the
+            images are rendered.
+        images: A tensor of shape `(num_views, height, width, 3)` containing
+            the rendered images.
+        silhouettes: A tensor of shape `(num_views, height, width)` containing
+            the rendered silhouettes.
+    """
+
+    # Load obj file
+
+    # We scale normalize and center the target mesh to fit in a sphere of radius 1
+    # centered at (0,0,0). (scale, center) will be used to bring the predicted mesh
+    # to its original center and scale.  Note that normalizing the target mesh,
+    # speeds up the optimization but is not necessary!
+    verts = mesh.verts_packed()
+    N = verts.shape[0]
+    center = verts.mean(0)
+    scale = max((verts - center).abs().max(0)[0])
+    mesh.offset_verts_(-(center.expand(N, 3)))
+    mesh.scale_verts_((1.0 / float(scale)))
+
+    # Get a batch of viewing angles.
+    elev = torch.linspace(0, 0, num_views)  # keep constant
+    azim = torch.linspace(-azimuth_range, azimuth_range, num_views) + 180.0
+
+    # Place a point light in front of the object. As mentioned above, the front of
+    # the cow is facing the -z direction.
+    if use_point_light:
+        lights = PointLights(device=device, location=[[0.0, 0.0, -3.0]])
+    else:
+        lights = AmbientLights(device=device)
+
+    # Initialize a perspective camera that represents a batch of different
+    # viewing angles. All the cameras helper methods support mixed type inputs and
+    # broadcasting. So we can view the camera from a fixed distance, and
+    # then specify elevation and azimuth angles for each viewpoint as tensors.
+    R, T = look_at_view_transform(dist=distance, elev=elev, azim=azim)
+    cameras = FoVPerspectiveCameras(device=device, R=R, T=T)
+
+    # Define the settings for rasterization and shading.
+    # As we are rendering images for visualization
+    # purposes only we will set faces_per_pixel=1 and blur_radius=0.0. Refer to
+    # rasterize_meshes.py for explanations of these parameters.  We also leave
+    # bin_size and max_faces_per_bin to their default values of None, which sets
+    # their values using heuristics and ensures that the faster coarse-to-fine
+    # rasterization method is used.  Refer to docs/notes/renderer.md for an
+    # explanation of the difference between naive and coarse-to-fine rasterization.
+    raster_settings = RasterizationSettings(
+        image_size=resolution, blur_radius=0.0, faces_per_pixel=1
+    )
+
+    # Create a Phong renderer by composing a rasterizer and a shader. The textured
+    # Phong shader will interpolate the texture uv coordinates for each vertex,
+    # sample from a texture image and apply the Phong lighting model
+    blend_params = BlendParams(sigma=1e-4, gamma=1e-4, background_color=(0.0, 0.0, 0.0))
+    rasterizer_type = MeshRasterizer
+    renderer = MeshRendererWithFragments(
+        rasterizer=rasterizer_type(cameras=cameras, raster_settings=raster_settings),
+        shader=HardPhongShader(
+            device=device, cameras=cameras, lights=lights, blend_params=blend_params
+        ),
+    )
+
+    # Create a batch of meshes by repeating the cow mesh and associated textures.
+    # Meshes has a useful `extend` method which allows us do this very easily.
+    # This also extends the textures.
+    meshes = mesh.extend(num_views)
+
+    # Render the cow mesh from each viewing angle
+    target_images, fragments = renderer(meshes, cameras=cameras, lights=lights)
+    silhouette_binary = (fragments.pix_to_face[..., 0] >= 0).float()
+
+    return cameras, target_images[..., :3], silhouette_binary
diff --git a/pytorch3d/pytorch3d/implicitron/dataset/scene_batch_sampler.py b/pytorch3d/pytorch3d/implicitron/dataset/scene_batch_sampler.py
new file mode 100644
index 0000000000000000000000000000000000000000..f724fd07fd5cde4d218cbcb91fdf9e14d648f339
--- /dev/null
+++ b/pytorch3d/pytorch3d/implicitron/dataset/scene_batch_sampler.py
@@ -0,0 +1,214 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import warnings
+from collections import Counter
+from dataclasses import dataclass, field
+from typing import Dict, Iterable, Iterator, List, Sequence, Tuple
+
+import numpy as np
+from torch.utils.data.sampler import Sampler
+
+from .dataset_base import DatasetBase
+
+
+@dataclass(eq=False)  # TODO: do we need this if not init from config?
+class SceneBatchSampler(Sampler[List[int]]):
+    """
+    A class for sampling training batches with a controlled composition
+    of sequences.
+    """
+
+    dataset: DatasetBase
+    batch_size: int
+    num_batches: int
+    # the sampler first samples a random element k from this list and then
+    # takes k random frames per sequence
+    images_per_seq_options: Sequence[int]
+
+    # if True, will sample a contiguous interval of frames in the sequence
+    # it first finds the connected segments within the sequence of sufficient length,
+    # then samples a random pivot element among them and ideally uses it as a middle
+    # of the temporal window, shifting the borders where necessary.
+    # This strategy mitigates the bias against shorter segments and their boundaries.
+    sample_consecutive_frames: bool = False
+    # if a number > 0, then used to define the maximum difference in frame_number
+    # of neighbouring frames when forming connected segments; otherwise the whole
+    # sequence is considered a segment regardless of frame numbers
+    consecutive_frames_max_gap: int = 0
+    # same but for timestamps if they are available
+    consecutive_frames_max_gap_seconds: float = 0.1
+
+    # if True, the sampler first reads from the dataset the mapping between
+    # sequence names and their categories.
+    # During batch sampling, the sampler ensures uniform distribution over the categories
+    # of the sampled sequences.
+    category_aware: bool = True
+
+    seq_names: List[str] = field(init=False)
+
+    category_to_sequence_names: Dict[str, List[str]] = field(init=False)
+    categories: List[str] = field(init=False)
+
+    def __post_init__(self) -> None:
+        if self.batch_size <= 0:
+            raise ValueError(
+                "batch_size should be a positive integral value, "
+                f"but got batch_size={self.batch_size}"
+            )
+
+        if len(self.images_per_seq_options) < 1:
+            raise ValueError("n_per_seq_posibilities list cannot be empty")
+
+        self.seq_names = list(self.dataset.sequence_names())
+
+        if self.category_aware:
+            self.category_to_sequence_names = self.dataset.category_to_sequence_names()
+            self.categories = list(self.category_to_sequence_names.keys())
+
+    def __len__(self) -> int:
+        return self.num_batches
+
+    def __iter__(self) -> Iterator[List[int]]:
+        for batch_idx in range(len(self)):
+            batch = self._sample_batch(batch_idx)
+            yield batch
+
+    def _sample_batch(self, batch_idx) -> List[int]:
+        n_per_seq = np.random.choice(self.images_per_seq_options)
+        n_seqs = -(-self.batch_size // n_per_seq)  # round up
+
+        if self.category_aware:
+            # first sample categories at random, these can be repeated in the batch
+            chosen_cat = _capped_random_choice(self.categories, n_seqs, replace=True)
+            # then randomly sample a set of unique sequences within each category
+            chosen_seq = []
+            for cat, n_per_category in Counter(chosen_cat).items():
+                category_chosen_seq = _capped_random_choice(
+                    self.category_to_sequence_names[cat],
+                    n_per_category,
+                    replace=False,
+                )
+                chosen_seq.extend([str(s) for s in category_chosen_seq])
+        else:
+            chosen_seq = _capped_random_choice(
+                self.seq_names,
+                n_seqs,
+                replace=False,
+            )
+
+        if self.sample_consecutive_frames:
+            frame_idx = []
+            for seq in chosen_seq:
+                segment_index = self._build_segment_index(seq, n_per_seq)
+
+                segment, idx = segment_index[np.random.randint(len(segment_index))]
+                if len(segment) <= n_per_seq:
+                    frame_idx.append(segment)
+                else:
+                    start = np.clip(idx - n_per_seq // 2, 0, len(segment) - n_per_seq)
+                    frame_idx.append(segment[start : start + n_per_seq])
+
+        else:
+            frame_idx = [
+                _capped_random_choice(
+                    list(self.dataset.sequence_indices_in_order(seq)),
+                    n_per_seq,
+                    replace=False,
+                )
+                for seq in chosen_seq
+            ]
+        frame_idx = np.concatenate(frame_idx)[: self.batch_size].tolist()
+        if len(frame_idx) < self.batch_size:
+            warnings.warn(
+                "Batch size smaller than self.batch_size!"
+                + " (This is fine for experiments with a single scene and viewpooling)"
+            )
+        return frame_idx
+
+    def _build_segment_index(self, seq: str, size: int) -> List[Tuple[List[int], int]]:
+        """
+        Returns a list of (segment, index) tuples, one per eligible frame, where
+            segment is a list of frame indices in the contiguous segment the frame
+            belongs to index is the frame's index within that segment.
+        Segment references are repeated but the memory is shared.
+        """
+        if (
+            self.consecutive_frames_max_gap > 0
+            or self.consecutive_frames_max_gap_seconds > 0.0
+        ):
+            segments = self._split_to_segments(
+                self.dataset.sequence_frames_in_order(seq)
+            )
+            segments = _cull_short_segments(segments, size)
+            if not segments:
+                raise AssertionError("Empty segments after culling")
+        else:
+            segments = [list(self.dataset.sequence_indices_in_order(seq))]
+
+        # build an index of segment for random selection of a pivot frame
+        segment_index = [
+            (segment, i) for segment in segments for i in range(len(segment))
+        ]
+
+        return segment_index
+
+    def _split_to_segments(
+        self, sequence_timestamps: Iterable[Tuple[float, int, int]]
+    ) -> List[List[int]]:
+        if (
+            self.consecutive_frames_max_gap <= 0
+            and self.consecutive_frames_max_gap_seconds <= 0.0
+        ):
+            raise AssertionError("This function is only needed for non-trivial max_gap")
+
+        segments = []
+        last_no = -self.consecutive_frames_max_gap - 1  # will trigger a new segment
+        last_ts = -self.consecutive_frames_max_gap_seconds - 1.0
+        for ts, no, idx in sequence_timestamps:
+            if ts <= 0.0 and no <= last_no:
+                raise AssertionError(
+                    "Sequence frames are not ordered while timestamps are not given"
+                )
+
+            if (
+                no - last_no > self.consecutive_frames_max_gap > 0
+                or ts - last_ts > self.consecutive_frames_max_gap_seconds > 0.0
+            ):  # new group
+                segments.append([idx])
+            else:
+                segments[-1].append(idx)
+
+            last_no = no
+            last_ts = ts
+
+        return segments
+
+
+def _cull_short_segments(segments: List[List[int]], min_size: int) -> List[List[int]]:
+    lengths = [(len(segment), segment) for segment in segments]
+    max_len, longest_segment = max(lengths)
+
+    if max_len < min_size:
+        return [longest_segment]
+
+    return [segment for segment in segments if len(segment) >= min_size]
+
+
+def _capped_random_choice(x, size, replace: bool = True):
+    """
+    if replace==True
+        randomly chooses from x `size` elements without replacement if len(x)>size
+        else allows replacement and selects `size` elements again.
+    if replace==False
+        randomly chooses from x `min(len(x), size)` elements without replacement
+    """
+    len_x = x if isinstance(x, int) else len(x)
+    if replace:
+        return np.random.choice(x, size=size, replace=len_x < size)
+    else:
+        return np.random.choice(x, size=min(size, len_x), replace=False)
diff --git a/pytorch3d/pytorch3d/implicitron/dataset/single_sequence_dataset.py b/pytorch3d/pytorch3d/implicitron/dataset/single_sequence_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..1090faa1e5734caad18e3af5b9dcc53c1edeef12
--- /dev/null
+++ b/pytorch3d/pytorch3d/implicitron/dataset/single_sequence_dataset.py
@@ -0,0 +1,206 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+# This file defines a base class for dataset map providers which
+# provide data for a single scene.
+
+from dataclasses import field
+from typing import Iterable, Iterator, List, Optional, Sequence, Tuple
+
+import numpy as np
+import torch
+from pytorch3d.implicitron.tools.config import (
+    Configurable,
+    expand_args_fields,
+    run_auto_creation,
+)
+from pytorch3d.renderer import CamerasBase, join_cameras_as_batch, PerspectiveCameras
+
+from .dataset_base import DatasetBase
+from .dataset_map_provider import DatasetMap, DatasetMapProviderBase, PathManagerFactory
+from .frame_data import FrameData
+from .utils import DATASET_TYPE_KNOWN, DATASET_TYPE_UNKNOWN
+
+_SINGLE_SEQUENCE_NAME: str = "one_sequence"
+
+
+@expand_args_fields
+class SingleSceneDataset(DatasetBase, Configurable):
+    """
+    A dataset from images from a single scene.
+    """
+
+    images: List[torch.Tensor] = field()
+    fg_probabilities: Optional[List[torch.Tensor]] = field()
+    poses: List[PerspectiveCameras] = field()
+    object_name: str = field()
+    frame_types: List[str] = field()
+    eval_batches: Optional[List[List[int]]] = field()
+
+    def sequence_names(self) -> Iterable[str]:
+        return [_SINGLE_SEQUENCE_NAME]
+
+    def __len__(self) -> int:
+        return len(self.poses)
+
+    def sequence_frames_in_order(
+        self, seq_name: str, subset_filter: Optional[Sequence[str]] = None
+    ) -> Iterator[Tuple[float, int, int]]:
+        for i in range(len(self)):
+            if subset_filter is None or self.frame_types[i] in subset_filter:
+                yield 0.0, i, i
+
+    def __getitem__(self, index) -> FrameData:
+        if index >= len(self):
+            raise IndexError(f"index {index} out of range {len(self)}")
+        image = self.images[index]
+        pose = self.poses[index]
+        frame_type = self.frame_types[index]
+        fg_probability = (
+            None if self.fg_probabilities is None else self.fg_probabilities[index]
+        )
+
+        frame_data = FrameData(
+            frame_number=index,
+            sequence_name=_SINGLE_SEQUENCE_NAME,
+            sequence_category=self.object_name,
+            camera=pose,
+            # pyre-ignore
+            image_size_hw=torch.tensor(image.shape[1:], dtype=torch.long),
+            image_rgb=image,
+            fg_probability=fg_probability,
+            frame_type=frame_type,
+        )
+        return frame_data
+
+    def get_eval_batches(self) -> Optional[List[List[int]]]:
+        return self.eval_batches
+
+
+# pyre-fixme[13]: Uninitialized attribute
+class SingleSceneDatasetMapProviderBase(DatasetMapProviderBase):
+    """
+    Base for provider of data for one scene from LLFF or blender datasets.
+
+    Members:
+        base_dir: directory holding the data for the scene.
+        object_name: The name of the scene (e.g. "lego"). This is just used as a label.
+            It will typically be equal to the name of the directory self.base_dir.
+        path_manager_factory: Creates path manager which may be used for
+            interpreting paths.
+        n_known_frames_for_test: If set, training frames are included in the val
+            and test datasets, and this many random training frames are added to
+            each test batch. If not set, test batches each contain just a single
+            testing frame.
+    """
+
+    base_dir: str
+    object_name: str
+    path_manager_factory: PathManagerFactory
+    path_manager_factory_class_type: str = "PathManagerFactory"
+    n_known_frames_for_test: Optional[int] = None
+
+    def __post_init__(self) -> None:
+        run_auto_creation(self)
+        self._load_data()
+
+    def _load_data(self) -> None:
+        # This must be defined by each subclass,
+        # and should set the following on self.
+        # - poses: a list of length-1 camera objects
+        # - images: [N, 3, H, W] tensor of rgb images - floats in [0,1]
+        # - fg_probabilities: None or [N, 1, H, W] of floats in [0,1]
+        # - splits: List[List[int]] of indices for train/val/test subsets.
+        raise NotImplementedError()
+
+    def _get_dataset(
+        self, split_idx: int, frame_type: str, set_eval_batches: bool = False
+    ) -> SingleSceneDataset:
+        # pyre-ignore[16]
+        split = self.i_split[split_idx]
+        frame_types = [frame_type] * len(split)
+        fg_probabilities = (
+            None
+            # pyre-ignore[16]
+            if self.fg_probabilities is None
+            else self.fg_probabilities[split]
+        )
+        eval_batches = [[i] for i in range(len(split))]
+        if split_idx != 0 and self.n_known_frames_for_test is not None:
+            train_split = self.i_split[0]
+            if set_eval_batches:
+                generator = np.random.default_rng(seed=0)
+                for batch in eval_batches:
+                    # using permutation so that changes to n_known_frames_for_test
+                    # result in consistent batches.
+                    to_add = generator.permutation(len(train_split))[
+                        : self.n_known_frames_for_test
+                    ]
+                    batch.extend((to_add + len(split)).tolist())
+            split = np.concatenate([split, train_split])
+            frame_types.extend([DATASET_TYPE_KNOWN] * len(train_split))
+
+        # pyre-ignore[28]
+        return SingleSceneDataset(
+            object_name=self.object_name,
+            # pyre-ignore[16]
+            images=self.images[split],
+            fg_probabilities=fg_probabilities,
+            # pyre-ignore[16]
+            poses=[self.poses[i] for i in split],
+            frame_types=frame_types,
+            eval_batches=eval_batches if set_eval_batches else None,
+        )
+
+    def get_dataset_map(self) -> DatasetMap:
+        return DatasetMap(
+            train=self._get_dataset(0, DATASET_TYPE_KNOWN),
+            val=self._get_dataset(1, DATASET_TYPE_UNKNOWN),
+            test=self._get_dataset(2, DATASET_TYPE_UNKNOWN, True),
+        )
+
+    def get_all_train_cameras(self) -> Optional[CamerasBase]:
+        # pyre-ignore[16]
+        cameras = [self.poses[i] for i in self.i_split[0]]
+        return join_cameras_as_batch(cameras)
+
+
+def _interpret_blender_cameras(
+    poses: torch.Tensor, focal: float
+) -> List[PerspectiveCameras]:
+    """
+    Convert 4x4 matrices representing cameras in blender format
+    to PyTorch3D format.
+
+    Args:
+        poses: N x 3 x 4 camera matrices
+        focal: ndc space focal length
+    """
+    pose_target_cameras = []
+    for pose_target in poses:
+        pose_target = pose_target[:3, :4]
+        mtx = torch.eye(4, dtype=pose_target.dtype)
+        mtx[:3, :3] = pose_target[:3, :3].t()
+        mtx[3, :3] = pose_target[:, 3]
+        mtx = mtx.inverse()
+
+        # flip the XZ coordinates.
+        mtx[:, [0, 2]] *= -1.0
+
+        Rpt3, Tpt3 = mtx[:, :3].split([3, 1], dim=0)
+
+        focal_length_pt3 = torch.FloatTensor([[focal, focal]])
+        principal_point_pt3 = torch.FloatTensor([[0.0, 0.0]])
+
+        cameras = PerspectiveCameras(
+            focal_length=focal_length_pt3,
+            principal_point=principal_point_pt3,
+            R=Rpt3[None],
+            T=Tpt3,
+        )
+        pose_target_cameras.append(cameras)
+    return pose_target_cameras
diff --git a/pytorch3d/pytorch3d/implicitron/dataset/sql_dataset.py b/pytorch3d/pytorch3d/implicitron/dataset/sql_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..470f5a95bf100595659918ee979d8e350aa71480
--- /dev/null
+++ b/pytorch3d/pytorch3d/implicitron/dataset/sql_dataset.py
@@ -0,0 +1,768 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import hashlib
+import json
+import logging
+import os
+from dataclasses import dataclass
+from typing import (
+    Any,
+    ClassVar,
+    Dict,
+    Iterable,
+    Iterator,
+    List,
+    Optional,
+    Sequence,
+    Tuple,
+    Type,
+    Union,
+)
+
+import numpy as np
+import pandas as pd
+import sqlalchemy as sa
+import torch
+from pytorch3d.implicitron.dataset.dataset_base import DatasetBase
+
+from pytorch3d.implicitron.dataset.frame_data import (  # noqa
+    FrameData,
+    FrameDataBuilder,
+    FrameDataBuilderBase,
+)
+from pytorch3d.implicitron.tools.config import (
+    registry,
+    ReplaceableBase,
+    run_auto_creation,
+)
+from sqlalchemy.orm import Session
+
+from .orm_types import SqlFrameAnnotation, SqlSequenceAnnotation
+
+
+logger = logging.getLogger(__name__)
+
+
+_SET_LISTS_TABLE: str = "set_lists"
+
+
+@registry.register
+class SqlIndexDataset(DatasetBase, ReplaceableBase):  # pyre-ignore
+    """
+    A dataset with annotations stored as SQLite tables. This is an index-based dataset.
+    The length is returned after all sequence and frame filters are applied (see param
+    definitions below). Indices can either be ordinal in [0, len), or pairs of
+    (sequence_name, frame_number); with the performance of `dataset[i]` and
+    `dataset[sequence_name, frame_number]` being same. A faster way to get metadata only
+    (without blobs) is `dataset.meta[idx]` indexing; it requires box_crop==False.
+    With ordinal indexing, the sequences are NOT guaranteed to span contiguous index
+    ranges, and frame numbers are NOT guaranteed to be increasing within a sequence.
+    Sequence-aware batch samplers have to use `sequence_[frames|indices]_in_order`
+    iterators, which are efficient.
+
+    This functionality requires SQLAlchemy 2.0 or later.
+
+    Metadata-related args:
+        sqlite_metadata_file: A SQLite file containing frame and sequence annotation
+            tables (mapping to SqlFrameAnnotation and SqlSequenceAnnotation,
+            respectively).
+        dataset_root: A root directory to look for images, masks, etc. It can be
+            alternatively set in `frame_data_builder` args, but this takes precedence.
+        subset_lists_file: A JSON/sqlite file containing the lists of frames
+            corresponding to different subsets (e.g. train/val/test) of the dataset;
+            format: {subset: [(sequence_name, frame_id, file_path)]}. All entries
+            must be present in frame_annotation metadata table.
+        path_manager: a facade for non-POSIX filesystems.
+        subsets: Restrict frames/sequences only to the given list of subsets
+            as defined in subset_lists_file (see above). Applied before all other
+            filters.
+        remove_empty_masks: Removes the frames with no active foreground pixels
+            in the segmentation mask (needs frame_annotation.mask.mass to be set;
+            null values are retained).
+        pick_frames_sql_clause: SQL WHERE clause to constrain frame annotations
+            NOTE: This is a potential security risk! The string is passed to the SQL
+            engine verbatim. Don’t expose it to end users of your application!
+        pick_categories: Restrict the dataset to the given list of categories.
+        pick_sequences: A Sequence of sequence names to restrict the dataset to.
+        exclude_sequences: A Sequence of the names of the sequences to exclude.
+        limit_sequences_per_category_to: Limit the dataset to the first up to N
+            sequences within each category (applies after all other sequence filters
+            but before `limit_sequences_to`).
+        limit_sequences_to: Limit the dataset to the first `limit_sequences_to`
+            sequences (after other sequence filters have been applied but before
+            frame-based filters).
+        limit_to: Limit the dataset to the first #limit_to frames (after other
+            filters have been applied, except n_frames_per_sequence).
+        n_frames_per_sequence: If > 0, randomly samples `n_frames_per_sequence`
+            frames in each sequences uniformly without replacement if it has
+            more frames than that; applied after other frame-level filters.
+        seed: The seed of the random generator sampling `n_frames_per_sequence`
+            random frames per sequence.
+    """
+
+    frame_annotations_type: ClassVar[Type[SqlFrameAnnotation]] = SqlFrameAnnotation
+
+    sqlite_metadata_file: str = ""
+    dataset_root: Optional[str] = None
+    subset_lists_file: str = ""
+    eval_batches_file: Optional[str] = None
+    path_manager: Any = None
+    subsets: Optional[List[str]] = None
+    remove_empty_masks: bool = True
+    pick_frames_sql_clause: Optional[str] = None
+    pick_categories: Tuple[str, ...] = ()
+
+    pick_sequences: Tuple[str, ...] = ()
+    exclude_sequences: Tuple[str, ...] = ()
+    limit_sequences_per_category_to: int = 0
+    limit_sequences_to: int = 0
+    limit_to: int = 0
+    n_frames_per_sequence: int = -1
+    seed: int = 0
+    remove_empty_masks_poll_whole_table_threshold: int = 300_000
+    # we set it manually in the constructor
+    # _index: pd.DataFrame = field(init=False)
+
+    frame_data_builder: FrameDataBuilderBase
+    frame_data_builder_class_type: str = "FrameDataBuilder"
+
+    def __post_init__(self) -> None:
+        if sa.__version__ < "2.0":
+            raise ImportError("This class requires SQL Alchemy 2.0 or later")
+
+        if not self.sqlite_metadata_file:
+            raise ValueError("sqlite_metadata_file must be set")
+
+        if self.dataset_root:
+            frame_builder_type = self.frame_data_builder_class_type
+            getattr(self, f"frame_data_builder_{frame_builder_type}_args")[
+                "dataset_root"
+            ] = self.dataset_root
+
+        run_auto_creation(self)
+        self.frame_data_builder.path_manager = self.path_manager
+
+        # pyre-ignore  # NOTE: sqlite-specific args (read-only mode).
+        self._sql_engine = sa.create_engine(
+            f"sqlite:///file:{self.sqlite_metadata_file}?mode=ro&uri=true"
+        )
+
+        sequences = self._get_filtered_sequences_if_any()
+
+        if self.subsets:
+            index = self._build_index_from_subset_lists(sequences)
+        else:
+            # TODO: if self.subset_lists_file and not self.subsets, it might be faster to
+            # still use the concatenated lists, assuming they cover the whole dataset
+            index = self._build_index_from_db(sequences)
+
+        if self.n_frames_per_sequence >= 0:
+            index = self._stratified_sample_index(index)
+
+        if len(index) == 0:
+            raise ValueError(f"There are no frames in the subsets: {self.subsets}!")
+
+        self._index = index.set_index(["sequence_name", "frame_number"])  # pyre-ignore
+
+        self.eval_batches = None  # pyre-ignore
+        if self.eval_batches_file:
+            self.eval_batches = self._load_filter_eval_batches()
+
+        logger.info(str(self))
+
+    def __len__(self) -> int:
+        # pyre-ignore[16]
+        return len(self._index)
+
+    def __getitem__(self, frame_idx: Union[int, Tuple[str, int]]) -> FrameData:
+        """
+        Fetches FrameData by either iloc in the index or by (sequence, frame_no) pair
+        """
+        return self._get_item(frame_idx, True)
+
+    @property
+    def meta(self):
+        """
+        Allows accessing metadata only without loading blobs using `dataset.meta[idx]`.
+        Requires box_crop==False, since in that case, cameras cannot be adjusted
+        without loading masks.
+
+        Returns:
+            FrameData objects with blob fields like `image_rgb` set to None.
+
+        Raises:
+            ValueError if dataset.box_crop is set.
+        """
+        return SqlIndexDataset._MetadataAccessor(self)
+
+    @dataclass
+    class _MetadataAccessor:
+        dataset: "SqlIndexDataset"
+
+        def __getitem__(self, frame_idx: Union[int, Tuple[str, int]]) -> FrameData:
+            return self.dataset._get_item(frame_idx, False)
+
+    def _get_item(
+        self, frame_idx: Union[int, Tuple[str, int]], load_blobs: bool = True
+    ) -> FrameData:
+        if isinstance(frame_idx, int):
+            if frame_idx >= len(self._index):
+                raise IndexError(f"index {frame_idx} out of range {len(self._index)}")
+
+            seq, frame = self._index.index[frame_idx]
+        else:
+            seq, frame, *rest = frame_idx
+            if isinstance(frame, torch.LongTensor):
+                frame = frame.item()
+
+            if (seq, frame) not in self._index.index:
+                raise IndexError(
+                    f"Sequence-frame index {frame_idx} not found; was it filtered out?"
+                )
+
+            if rest and rest[0] != self._index.loc[(seq, frame), "_image_path"]:
+                raise IndexError(f"Non-matching image path in {frame_idx}.")
+
+        stmt = sa.select(self.frame_annotations_type).where(
+            self.frame_annotations_type.sequence_name == seq,
+            self.frame_annotations_type.frame_number
+            == int(frame),  # cast from np.int64
+        )
+        seq_stmt = sa.select(SqlSequenceAnnotation).where(
+            SqlSequenceAnnotation.sequence_name == seq
+        )
+        with Session(self._sql_engine) as session:
+            entry = session.scalars(stmt).one()
+            seq_metadata = session.scalars(seq_stmt).one()
+
+        assert entry.image.path == self._index.loc[(seq, frame), "_image_path"]
+
+        frame_data = self.frame_data_builder.build(
+            entry, seq_metadata, load_blobs=load_blobs
+        )
+
+        # The rest of the fields are optional
+        frame_data.frame_type = self._get_frame_type(entry)
+        return frame_data
+
+    def __str__(self) -> str:
+        # pyre-ignore[16]
+        return f"SqlIndexDataset #frames={len(self._index)}"
+
+    def sequence_names(self) -> Iterable[str]:
+        """Returns an iterator over sequence names in the dataset."""
+        return self._index.index.unique("sequence_name")
+
+    # override
+    def category_to_sequence_names(self) -> Dict[str, List[str]]:
+        stmt = sa.select(
+            SqlSequenceAnnotation.category, SqlSequenceAnnotation.sequence_name
+        ).where(  # we limit results to sequences that have frames after all filters
+            SqlSequenceAnnotation.sequence_name.in_(self.sequence_names())
+        )
+        with self._sql_engine.connect() as connection:
+            cat_to_seqs = pd.read_sql(stmt, connection)
+
+        return cat_to_seqs.groupby("category")["sequence_name"].apply(list).to_dict()
+
+    # override
+    def get_frame_numbers_and_timestamps(
+        self, idxs: Sequence[int], subset_filter: Optional[Sequence[str]] = None
+    ) -> List[Tuple[int, float]]:
+        """
+        Implements the DatasetBase method.
+
+        NOTE: Avoid this function as there are more efficient alternatives such as
+        querying `dataset[idx]` directly or getting all sequence frames with
+        `sequence_[frames|indices]_in_order`.
+
+        Return the index and timestamp in their videos of the frames whose
+        indices are given in `idxs`. They need to belong to the same sequence!
+        If timestamps are absent, they are replaced with zeros.
+        This is used for letting SceneBatchSampler identify consecutive
+        frames.
+
+        Args:
+            idxs: a sequence int frame index in the dataset (it can be a slice)
+            subset_filter: must remain None
+
+        Returns:
+            list of tuples of
+                - frame index in video
+                - timestamp of frame in video, coalesced with 0s
+
+        Raises:
+            ValueError if idxs belong to more than one sequence.
+        """
+
+        if subset_filter is not None:
+            raise NotImplementedError(
+                "Subset filters are not supported in SQL Dataset. "
+                "We encourage creating a dataset per subset."
+            )
+
+        index_slice, _ = self._get_frame_no_coalesced_ts_by_row_indices(idxs)
+        # alternatively, we can use `.values.tolist()`, which may be faster
+        # but returns a list of lists
+        return list(index_slice.itertuples())
+
+    # override
+    def sequence_frames_in_order(
+        self, seq_name: str, subset_filter: Optional[Sequence[str]] = None
+    ) -> Iterator[Tuple[float, int, int]]:
+        """
+        Overrides the default DatasetBase implementation (we don’t use `_seq_to_idx`).
+        Returns an iterator over the frame indices in a given sequence.
+        We attempt to first sort by timestamp (if they are available),
+        then by frame number.
+
+        Args:
+            seq_name: the name of the sequence.
+            subset_filter: subset names to filter to
+
+        Returns:
+            an iterator over triplets `(timestamp, frame_no, dataset_idx)`,
+                where `frame_no` is the index within the sequence, and
+                `dataset_idx` is the index within the dataset.
+                `None` timestamps are replaced with 0s.
+        """
+        # TODO: implement sort_timestamp_first? (which would matter if the orders
+        # of frame numbers and timestamps are different)
+        rows = self._index.index.get_loc(seq_name)
+        if isinstance(rows, slice):
+            assert rows.stop is not None, "Unexpected result from pandas"
+            rows = range(rows.start or 0, rows.stop, rows.step or 1)
+        else:
+            rows = np.where(rows)[0]
+
+        index_slice, idx = self._get_frame_no_coalesced_ts_by_row_indices(
+            rows, seq_name, subset_filter
+        )
+        index_slice["idx"] = idx
+
+        yield from index_slice.itertuples(index=False)
+
+    # override
+    def get_eval_batches(self) -> Optional[List[Any]]:
+        """
+        This class does not support eval batches with ordinal indices. You can pass
+        eval_batches as a batch_sampler to a data_loader since the dataset supports
+        `dataset[seq_name, frame_no]` indexing.
+        """
+        return self.eval_batches
+
+    # override
+    def join(self, other_datasets: Iterable[DatasetBase]) -> None:
+        raise ValueError("Not supported! Preprocess the data by merging them instead.")
+
+    # override
+    @property
+    def frame_data_type(self) -> Type[FrameData]:
+        return self.frame_data_builder.frame_data_type
+
+    def is_filtered(self) -> bool:
+        """
+        Returns `True` in case the dataset has been filtered and thus some frame
+        annotations stored on the disk might be missing in the dataset object.
+        Does not account for subsets.
+
+        Returns:
+            is_filtered: `True` if the dataset has been filtered, else `False`.
+        """
+        return (
+            self.remove_empty_masks
+            or self.limit_to > 0
+            or self.limit_sequences_to > 0
+            or self.limit_sequences_per_category_to > 0
+            or len(self.pick_sequences) > 0
+            or len(self.exclude_sequences) > 0
+            or len(self.pick_categories) > 0
+            or self.n_frames_per_sequence > 0
+        )
+
+    def _get_filtered_sequences_if_any(self) -> Optional[pd.Series]:
+        # maximum possible filter (if limit_sequences_per_category_to == 0):
+        # WHERE category IN 'self.pick_categories'
+        # AND sequence_name IN 'self.pick_sequences'
+        # AND sequence_name NOT IN 'self.exclude_sequences'
+        # LIMIT 'self.limit_sequence_to'
+
+        where_conditions = [
+            *self._get_category_filters(),
+            *self._get_pick_filters(),
+            *self._get_exclude_filters(),
+        ]
+
+        def add_where(stmt):
+            return stmt.where(*where_conditions) if where_conditions else stmt
+
+        if self.limit_sequences_per_category_to <= 0:
+            stmt = add_where(sa.select(SqlSequenceAnnotation.sequence_name))
+        else:
+            subquery = sa.select(
+                SqlSequenceAnnotation.sequence_name,
+                sa.func.row_number()
+                .over(
+                    order_by=sa.text("ROWID"),  # NOTE: ROWID is SQLite-specific
+                    partition_by=SqlSequenceAnnotation.category,
+                )
+                .label("row_number"),
+            )
+
+            subquery = add_where(subquery).subquery()
+            stmt = sa.select(subquery.c.sequence_name).where(
+                subquery.c.row_number <= self.limit_sequences_per_category_to
+            )
+
+        if self.limit_sequences_to > 0:
+            logger.info(
+                f"Limiting dataset to first {self.limit_sequences_to} sequences"
+            )
+            # NOTE: ROWID is SQLite-specific
+            stmt = stmt.order_by(sa.text("ROWID")).limit(self.limit_sequences_to)
+
+        if (
+            not where_conditions
+            and self.limit_sequences_to <= 0
+            and self.limit_sequences_per_category_to <= 0
+        ):
+            # we will not need to filter by sequences
+            return None
+
+        with self._sql_engine.connect() as connection:
+            sequences = pd.read_sql_query(stmt, connection)["sequence_name"]
+        logger.info("... retained %d sequences" % len(sequences))
+
+        return sequences
+
+    def _get_category_filters(self) -> List[sa.ColumnElement]:
+        if not self.pick_categories:
+            return []
+
+        logger.info(f"Limiting dataset to categories: {self.pick_categories}")
+        return [SqlSequenceAnnotation.category.in_(self.pick_categories)]
+
+    def _get_pick_filters(self) -> List[sa.ColumnElement]:
+        if not self.pick_sequences:
+            return []
+
+        logger.info(f"Limiting dataset to sequences: {self.pick_sequences}")
+        return [SqlSequenceAnnotation.sequence_name.in_(self.pick_sequences)]
+
+    def _get_exclude_filters(self) -> List[sa.ColumnOperators]:
+        if not self.exclude_sequences:
+            return []
+
+        logger.info(f"Removing sequences from the dataset: {self.exclude_sequences}")
+        return [SqlSequenceAnnotation.sequence_name.notin_(self.exclude_sequences)]
+
+    def _load_subsets_from_json(self, subset_lists_path: str) -> pd.DataFrame:
+        assert self.subsets is not None
+        with open(subset_lists_path, "r") as f:
+            subset_to_seq_frame = json.load(f)
+
+        seq_frame_list = sum(
+            (
+                [(*row, subset) for row in subset_to_seq_frame[subset]]
+                for subset in self.subsets
+            ),
+            [],
+        )
+        index = pd.DataFrame(
+            seq_frame_list,
+            columns=["sequence_name", "frame_number", "_image_path", "subset"],
+        )
+        return index
+
+    def _load_subsets_from_sql(self, subset_lists_path: str) -> pd.DataFrame:
+        subsets = self.subsets
+        assert subsets is not None
+        # we need a new engine since we store the subsets in a separate DB
+        engine = sa.create_engine(f"sqlite:///{subset_lists_path}")
+        table = sa.Table(_SET_LISTS_TABLE, sa.MetaData(), autoload_with=engine)
+        stmt = sa.select(table).where(table.c.subset.in_(subsets))
+        with engine.connect() as connection:
+            index = pd.read_sql(stmt, connection)
+
+        return index
+
+    def _build_index_from_subset_lists(
+        self, sequences: Optional[pd.Series]
+    ) -> pd.DataFrame:
+        if not self.subset_lists_file:
+            raise ValueError("Requested subsets but subset_lists_file not given")
+
+        logger.info(f"Loading subset lists from {self.subset_lists_file}.")
+
+        subset_lists_path = self._local_path(self.subset_lists_file)
+        if subset_lists_path.lower().endswith(".json"):
+            index = self._load_subsets_from_json(subset_lists_path)
+        else:
+            index = self._load_subsets_from_sql(subset_lists_path)
+        index = index.set_index(["sequence_name", "frame_number"])
+        logger.info(f"  -> loaded {len(index)} samples of {self.subsets}.")
+
+        if sequences is not None:
+            logger.info("Applying filtered sequences.")
+            sequence_values = index.index.get_level_values("sequence_name")
+            index = index.loc[sequence_values.isin(sequences)]
+            logger.info(f"  -> retained {len(index)} samples.")
+
+        pick_frames_criteria = []
+        if self.remove_empty_masks:
+            logger.info("Culling samples with empty masks.")
+
+            if len(index) > self.remove_empty_masks_poll_whole_table_threshold:
+                # APPROACH 1: find empty masks and drop indices.
+                # dev load: 17s / 15 s (3.1M / 500K)
+                stmt = sa.select(
+                    self.frame_annotations_type.sequence_name,
+                    self.frame_annotations_type.frame_number,
+                ).where(self.frame_annotations_type._mask_mass == 0)
+                with Session(self._sql_engine) as session:
+                    to_remove = session.execute(stmt).all()
+
+                # Pandas uses np.int64 for integer types, so we have to case
+                # we might want to read it to pandas DataFrame directly to avoid the loop
+                to_remove = [(seq, np.int64(fr)) for seq, fr in to_remove]
+                index.drop(to_remove, errors="ignore", inplace=True)
+            else:
+                # APPROACH 3: load index into a temp table and join with annotations
+                # dev load: 94 s / 23 s (3.1M / 500K)
+                pick_frames_criteria.append(
+                    sa.or_(
+                        self.frame_annotations_type._mask_mass.is_(None),
+                        self.frame_annotations_type._mask_mass != 0,
+                    )
+                )
+
+        if self.pick_frames_sql_clause:
+            logger.info("Applying the custom SQL clause.")
+            pick_frames_criteria.append(sa.text(self.pick_frames_sql_clause))
+
+        if pick_frames_criteria:
+            index = self._pick_frames_by_criteria(index, pick_frames_criteria)
+
+        logger.info(f"  -> retained {len(index)} samples.")
+
+        if self.limit_to > 0:
+            logger.info(f"Limiting dataset to first {self.limit_to} frames")
+            index = index.sort_index().iloc[: self.limit_to]
+
+        return index.reset_index()
+
+    def _pick_frames_by_criteria(self, index: pd.DataFrame, criteria) -> pd.DataFrame:
+        IndexTable = self._get_temp_index_table_instance()
+        with self._sql_engine.connect() as connection:
+            IndexTable.create(connection)
+            # we don’t let pandas’s `to_sql` create the table automatically as
+            # the table would be permanent, so we create it and append with pandas
+            n_rows = index.to_sql(IndexTable.name, connection, if_exists="append")
+            assert n_rows == len(index)
+            sa_type = self.frame_annotations_type
+            stmt = (
+                sa.select(IndexTable)
+                .select_from(
+                    IndexTable.join(
+                        self.frame_annotations_type,
+                        sa.and_(
+                            sa_type.sequence_name == IndexTable.c.sequence_name,
+                            sa_type.frame_number == IndexTable.c.frame_number,
+                        ),
+                    )
+                )
+                .where(*criteria)
+            )
+            return pd.read_sql_query(stmt, connection).set_index(
+                ["sequence_name", "frame_number"]
+            )
+
+    def _build_index_from_db(self, sequences: Optional[pd.Series]):
+        logger.info("Loading sequcence-frame index from the database")
+        stmt = sa.select(
+            self.frame_annotations_type.sequence_name,
+            self.frame_annotations_type.frame_number,
+            self.frame_annotations_type._image_path,
+            sa.null().label("subset"),
+        )
+        where_conditions = []
+        if sequences is not None:
+            logger.info("  applying filtered sequences")
+            where_conditions.append(
+                self.frame_annotations_type.sequence_name.in_(sequences.tolist())
+            )
+
+        if self.remove_empty_masks:
+            logger.info("  excluding samples with empty masks")
+            where_conditions.append(
+                sa.or_(
+                    self.frame_annotations_type._mask_mass.is_(None),
+                    self.frame_annotations_type._mask_mass != 0,
+                )
+            )
+
+        if self.pick_frames_sql_clause:
+            logger.info("  applying custom SQL clause")
+            where_conditions.append(sa.text(self.pick_frames_sql_clause))
+
+        if where_conditions:
+            stmt = stmt.where(*where_conditions)
+
+        if self.limit_to > 0:
+            logger.info(f"Limiting dataset to first {self.limit_to} frames")
+            stmt = stmt.order_by(
+                self.frame_annotations_type.sequence_name,
+                self.frame_annotations_type.frame_number,
+            ).limit(self.limit_to)
+
+        with self._sql_engine.connect() as connection:
+            index = pd.read_sql_query(stmt, connection)
+
+        logger.info(f"  -> loaded {len(index)} samples.")
+        return index
+
+    def _sort_index_(self, index):
+        logger.info("Sorting the index by sequence and frame number.")
+        index.sort_values(["sequence_name", "frame_number"], inplace=True)
+        logger.info("  -> Done.")
+
+    def _load_filter_eval_batches(self):
+        assert self.eval_batches_file
+        logger.info(f"Loading eval batches from {self.eval_batches_file}")
+
+        if not os.path.isfile(self.eval_batches_file):
+            # The batch indices file does not exist.
+            # Most probably the user has not specified the root folder.
+            raise ValueError(
+                f"Looking for dataset json file in {self.eval_batches_file}. "
+                + "Please specify a correct dataset_root folder."
+            )
+
+        with open(self.eval_batches_file, "r") as f:
+            eval_batches = json.load(f)
+
+        # limit the dataset to sequences to allow multiple evaluations in one file
+        pick_sequences = set(self.pick_sequences)
+        if self.pick_categories:
+            cat_to_seq = self.category_to_sequence_names()
+            pick_sequences.update(
+                seq for cat in self.pick_categories for seq in cat_to_seq[cat]
+            )
+
+        if pick_sequences:
+            old_len = len(eval_batches)
+            eval_batches = [b for b in eval_batches if b[0][0] in pick_sequences]
+            logger.warn(
+                f"Picked eval batches by sequence/cat: {old_len} -> {len(eval_batches)}"
+            )
+
+        if self.exclude_sequences:
+            old_len = len(eval_batches)
+            exclude_sequences = set(self.exclude_sequences)
+            eval_batches = [b for b in eval_batches if b[0][0] not in exclude_sequences]
+            logger.warn(
+                f"Excluded eval batches by sequence: {old_len} -> {len(eval_batches)}"
+            )
+
+        return eval_batches
+
+    def _stratified_sample_index(self, index):
+        # NOTE this stratified sampling can be done more efficiently in
+        # the no-subset case above if it is added to the SQL query.
+        # We keep this generic implementation since no-subset case is uncommon
+        index = index.groupby("sequence_name", group_keys=False).apply(
+            lambda seq_frames: seq_frames.sample(
+                min(len(seq_frames), self.n_frames_per_sequence),
+                random_state=(
+                    _seq_name_to_seed(seq_frames.iloc[0]["sequence_name"]) + self.seed
+                ),
+            )
+        )
+        logger.info(f"  -> retained {len(index)} samples aster stratified sampling.")
+        return index
+
+    def _get_frame_type(self, entry: SqlFrameAnnotation) -> Optional[str]:
+        return self._index.loc[(entry.sequence_name, entry.frame_number), "subset"]
+
+    def _get_frame_no_coalesced_ts_by_row_indices(
+        self,
+        idxs: Sequence[int],
+        seq_name: Optional[str] = None,
+        subset_filter: Union[Sequence[str], str, None] = None,
+    ) -> Tuple[pd.DataFrame, Sequence[int]]:
+        """
+        Loads timestamps for given index rows belonging to the same sequence.
+        If seq_name is known, it speeds up the computation.
+        Raises ValueError if `idxs` do not all belong to a single sequences .
+        """
+        index_slice = self._index.iloc[idxs]
+        if subset_filter is not None:
+            if isinstance(subset_filter, str):
+                subset_filter = [subset_filter]
+            indicator = index_slice["subset"].isin(subset_filter)
+            index_slice = index_slice.loc[indicator]
+            idxs = [i for i, isin in zip(idxs, indicator) if isin]
+
+        frames = index_slice.index.get_level_values("frame_number").tolist()
+        if seq_name is None:
+            seq_name_list = index_slice.index.get_level_values("sequence_name").tolist()
+            seq_name_set = set(seq_name_list)
+            if len(seq_name_set) > 1:
+                raise ValueError("Given indices belong to more than one sequence.")
+            elif len(seq_name_set) == 1:
+                seq_name = seq_name_list[0]
+
+        coalesced_ts = sa.sql.functions.coalesce(
+            self.frame_annotations_type.frame_timestamp, 0
+        )
+        stmt = sa.select(
+            coalesced_ts.label("frame_timestamp"),
+            self.frame_annotations_type.frame_number,
+        ).where(
+            self.frame_annotations_type.sequence_name == seq_name,
+            self.frame_annotations_type.frame_number.in_(frames),
+        )
+
+        with self._sql_engine.connect() as connection:
+            frame_no_ts = pd.read_sql_query(stmt, connection)
+
+        if len(frame_no_ts) != len(index_slice):
+            raise ValueError(
+                "Not all indices are found in the database; "
+                "do they belong to more than one sequence?"
+            )
+
+        return frame_no_ts, idxs
+
+    def _local_path(self, path: str) -> str:
+        if self.path_manager is None:
+            return path
+        return self.path_manager.get_local_path(path)
+
+    def _get_temp_index_table_instance(self, table_name: str = "__index"):
+        CachedTable = self.frame_annotations_type.metadata.tables.get(table_name)
+        if CachedTable is not None:  # table definition is not idempotent
+            return CachedTable
+
+        return sa.Table(
+            table_name,
+            self.frame_annotations_type.metadata,
+            sa.Column("sequence_name", sa.String, primary_key=True),
+            sa.Column("frame_number", sa.Integer, primary_key=True),
+            sa.Column("_image_path", sa.String),
+            sa.Column("subset", sa.String),
+            prefixes=["TEMP"],  # NOTE SQLite specific!
+        )
+
+
+def _seq_name_to_seed(seq_name) -> int:
+    """Generates numbers in [0, 2 ** 28)"""
+    return int(hashlib.sha1(seq_name.encode("utf-8")).hexdigest()[:7], 16)
+
+
+def _safe_as_tensor(data, dtype):
+    return torch.tensor(data, dtype=dtype) if data is not None else None
diff --git a/pytorch3d/pytorch3d/implicitron/dataset/sql_dataset_provider.py b/pytorch3d/pytorch3d/implicitron/dataset/sql_dataset_provider.py
new file mode 100644
index 0000000000000000000000000000000000000000..ab161e8d73b3c1e7f0f00195fd6fdf94bf9b7af1
--- /dev/null
+++ b/pytorch3d/pytorch3d/implicitron/dataset/sql_dataset_provider.py
@@ -0,0 +1,424 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import logging
+import os
+from typing import List, Optional, Tuple, Type
+
+import numpy as np
+
+from omegaconf import DictConfig, OmegaConf
+
+from pytorch3d.implicitron.dataset.dataset_map_provider import (
+    DatasetMap,
+    DatasetMapProviderBase,
+    PathManagerFactory,
+)
+from pytorch3d.implicitron.tools.config import (
+    expand_args_fields,
+    registry,
+    run_auto_creation,
+)
+
+from .sql_dataset import SqlIndexDataset
+
+
+_CO3D_SQL_DATASET_ROOT: str = os.getenv("CO3D_SQL_DATASET_ROOT", "")
+
+# _NEED_CONTROL is a list of those elements of SqlIndexDataset which
+# are not directly specified for it in the config but come from the
+# DatasetMapProvider.
+_NEED_CONTROL: Tuple[str, ...] = (
+    "path_manager",
+    "subsets",
+    "sqlite_metadata_file",
+    "subset_lists_file",
+)
+
+logger = logging.getLogger(__name__)
+
+
+@registry.register
+class SqlIndexDatasetMapProvider(DatasetMapProviderBase):  # pyre-ignore [13]
+    """
+    Generates the training, validation, and testing dataset objects for
+    a dataset laid out on disk like SQL-CO3D, with annotations in an SQLite data base.
+
+    The dataset is organized in the filesystem as follows::
+
+        self.dataset_root
+            ├── <possible/partition/0>
+            │   ├── <sequence_name_0>
+            │   │   ├── depth_masks
+            │   │   ├── depths
+            │   │   ├── images
+            │   │   ├── masks
+            │   │   └── pointcloud.ply
+            │   ├── <sequence_name_1>
+            │   │   ├── depth_masks
+            │   │   ├── depths
+            │   │   ├── images
+            │   │   ├── masks
+            │   │   └── pointcloud.ply
+            │   ├── ...
+            │   ├── <sequence_name_N>
+            │   ├── set_lists
+            │       ├── <subset_base_name_0>.json
+            │       ├── <subset_base_name_1>.json
+            │       ├── ...
+            │       ├── <subset_base_name_2>.json
+            │   ├── eval_batches
+            │   │   ├── <eval_batches_base_name_0>.json
+            │   │   ├── <eval_batches_base_name_1>.json
+            │   │   ├── ...
+            │   │   ├── <eval_batches_base_name_M>.json
+            │   ├── frame_annotations.jgz
+            │   ├── sequence_annotations.jgz
+            ├── <possible/partition/1>
+            ├── ...
+            ├── <possible/partition/K>
+            ├── set_lists
+                ├── <subset_base_name_0>.sqlite
+                ├── <subset_base_name_1>.sqlite
+                ├── ...
+                ├── <subset_base_name_2>.sqlite
+            ├── eval_batches
+            │   ├── <eval_batches_base_name_0>.json
+            │   ├── <eval_batches_base_name_1>.json
+            │   ├── ...
+            │   ├── <eval_batches_base_name_M>.json
+
+    The dataset contains sequences named `<sequence_name_i>` that may be partitioned by
+    directories such as `<possible/partition/0>` e.g. representing categories but they
+    can also be stored in a flat structure. Each sequence folder contains the list of
+    sequence images, depth maps, foreground masks, and valid-depth masks
+    `images`, `depths`, `masks`, and `depth_masks` respectively. Furthermore,
+    `set_lists/` dirtectories (with partitions or global) store json or sqlite files
+    `<subset_base_name_l>.<ext>`, each describing a certain sequence subset.
+    These subset path conventions are not hard-coded and arbitrary relative path can be
+    specified by setting `self.subset_lists_path` to the relative path w.r.t.
+    dataset root.
+
+    Each `<subset_base_name_l>.json` file contains the following dictionary::
+
+        {
+            "train": [
+                (sequence_name: str, frame_number: int, image_path: str),
+                ...
+            ],
+            "val": [
+                (sequence_name: str, frame_number: int, image_path: str),
+                ...
+            ],
+            "test": [
+                (sequence_name: str, frame_number: int, image_path: str),
+                ...
+            ],
+        ]
+
+    defining the list of frames (identified with their `sequence_name` and
+    `frame_number`) in the "train", "val", and "test" subsets of the dataset. In case of
+    SQLite format, `<subset_base_name_l>.sqlite` contains a table with the header::
+
+        | sequence_name | frame_number | image_path | subset |
+
+    Note that `frame_number` can be obtained only from the metadata and
+    does not necesarrily correspond to the numeric suffix of the corresponding image
+    file name (e.g. a file `<partition_0>/<sequence_name_0>/images/frame00005.jpg` can
+    have its frame number set to `20`, not 5).
+
+    Each `<eval_batches_base_name_M>.json` file contains a list of evaluation examples
+    in the following form::
+
+        [
+            [  # batch 1
+                (sequence_name: str, frame_number: int, image_path: str),
+                ...
+            ],
+            [  # batch 2
+                (sequence_name: str, frame_number: int, image_path: str),
+                ...
+            ],
+        ]
+
+    Note that the evaluation examples always come from the `"test"` subset of the dataset.
+    (test frames can repeat across batches). The batches can contain single element,
+    which is typical in case of regular radiance field fitting.
+
+    Args:
+        subset_lists_path: The relative path to the dataset subset definition.
+            For CO3D, these include e.g. "skateboard/set_lists/set_lists_manyview_dev_0.json".
+            By default (None), dataset is not partitioned to subsets (in that case, setting
+            `ignore_subsets` will speed up construction)
+        dataset_root: The root folder of the dataset.
+        metadata_basename: name of the SQL metadata file in dataset_root;
+            not expected to be changed by users
+        test_on_train: Construct validation and test datasets from
+            the training subset; note that in practice, in this
+            case all subset dataset objects will be same
+        only_test_set: Load only the test set. Incompatible with `test_on_train`.
+        ignore_subsets: Don’t filter by subsets in the dataset; note that in this
+            case all subset datasets will be same
+        eval_batch_num_training_frames: Add a certain number of training frames to each
+            eval batch. Useful for evaluating models that require
+            source views as input (e.g. NeRF-WCE / PixelNeRF).
+        dataset_args: Specifies additional arguments to the
+            JsonIndexDataset constructor call.
+        path_manager_factory: (Optional) An object that generates an instance of
+            PathManager that can translate provided file paths.
+        path_manager_factory_class_type: The class type of `path_manager_factory`.
+    """
+
+    category: Optional[str] = None
+    subset_list_name: Optional[str] = None  # TODO: docs
+    # OR
+    subset_lists_path: Optional[str] = None
+    eval_batches_path: Optional[str] = None
+
+    dataset_root: str = _CO3D_SQL_DATASET_ROOT
+    metadata_basename: str = "metadata.sqlite"
+
+    test_on_train: bool = False
+    only_test_set: bool = False
+    ignore_subsets: bool = False
+    train_subsets: Tuple[str, ...] = ("train",)
+    val_subsets: Tuple[str, ...] = ("val",)
+    test_subsets: Tuple[str, ...] = ("test",)
+
+    eval_batch_num_training_frames: int = 0
+
+    # this is a mould that is never constructed, used to build self._dataset_map values
+    dataset_class_type: str = "SqlIndexDataset"
+    dataset: SqlIndexDataset
+
+    path_manager_factory: PathManagerFactory
+    path_manager_factory_class_type: str = "PathManagerFactory"
+
+    def __post_init__(self):
+        super().__init__()
+        run_auto_creation(self)
+
+        if self.only_test_set and self.test_on_train:
+            raise ValueError("Cannot have only_test_set and test_on_train")
+
+        if self.ignore_subsets and not self.only_test_set:
+            self.test_on_train = True  # no point in loading same data 3 times
+
+        path_manager = self.path_manager_factory.get()
+
+        sqlite_metadata_file = os.path.join(self.dataset_root, self.metadata_basename)
+        sqlite_metadata_file = _local_path(path_manager, sqlite_metadata_file)
+
+        if not os.path.isfile(sqlite_metadata_file):
+            # The sqlite_metadata_file does not exist.
+            # Most probably the user has not specified the root folder.
+            raise ValueError(
+                f"Looking for frame annotations in {sqlite_metadata_file}."
+                + " Please specify a correct dataset_root folder."
+                + " Note: By default the root folder is taken from the"
+                + " CO3D_SQL_DATASET_ROOT environment variable."
+            )
+
+        if self.subset_lists_path and self.subset_list_name:
+            raise ValueError(
+                "subset_lists_path and subset_list_name cannot be both set"
+            )
+
+        subset_lists_file = self._get_lists_file("set_lists")
+
+        # setup the common dataset arguments
+        common_dataset_kwargs = {
+            **getattr(self, f"dataset_{self.dataset_class_type}_args"),
+            "sqlite_metadata_file": sqlite_metadata_file,
+            "dataset_root": self.dataset_root,
+            "subset_lists_file": subset_lists_file,
+            "path_manager": path_manager,
+        }
+
+        if self.category:
+            logger.info(f"Forcing category filter in the datasets to {self.category}")
+            common_dataset_kwargs["pick_categories"] = self.category.split(",")
+
+        # get the used dataset type
+        dataset_type: Type[SqlIndexDataset] = registry.get(
+            SqlIndexDataset, self.dataset_class_type
+        )
+        expand_args_fields(dataset_type)
+
+        if subset_lists_file is not None and not os.path.isfile(subset_lists_file):
+            available_subsets = self._get_available_subsets(
+                OmegaConf.to_object(common_dataset_kwargs["pick_categories"])
+            )
+            msg = f"Cannot find subset list file {self.subset_lists_path}."
+            if available_subsets:
+                msg += f" Some of the available subsets: {str(available_subsets)}."
+            raise ValueError(msg)
+
+        train_dataset = None
+        val_dataset = None
+        if not self.only_test_set:
+            # load the training set
+            logger.debug("Constructing train dataset.")
+            train_dataset = dataset_type(
+                **common_dataset_kwargs, subsets=self._get_subsets(self.train_subsets)
+            )
+            logger.info(f"Train dataset: {str(train_dataset)}")
+
+        if self.test_on_train:
+            assert train_dataset is not None
+            val_dataset = test_dataset = train_dataset
+        else:
+            # load the val and test sets
+            if not self.only_test_set:
+                # NOTE: this is always loaded in JsonProviderV2
+                logger.debug("Extracting val dataset.")
+                val_dataset = dataset_type(
+                    **common_dataset_kwargs, subsets=self._get_subsets(self.val_subsets)
+                )
+                logger.info(f"Val dataset: {str(val_dataset)}")
+
+            logger.debug("Extracting test dataset.")
+            eval_batches_file = self._get_lists_file("eval_batches")
+            del common_dataset_kwargs["eval_batches_file"]
+            test_dataset = dataset_type(
+                **common_dataset_kwargs,
+                subsets=self._get_subsets(self.test_subsets, True),
+                eval_batches_file=eval_batches_file,
+            )
+            logger.info(f"Test dataset: {str(test_dataset)}")
+
+            if (
+                eval_batches_file is not None
+                and self.eval_batch_num_training_frames > 0
+            ):
+                self._extend_eval_batches(test_dataset)
+
+        self._dataset_map = DatasetMap(
+            train=train_dataset, val=val_dataset, test=test_dataset
+        )
+
+    def _get_subsets(self, subsets, is_eval: bool = False):
+        if self.ignore_subsets:
+            return None
+
+        if is_eval and self.eval_batch_num_training_frames > 0:
+            # we will need to have training frames for extended batches
+            return list(subsets) + list(self.train_subsets)
+
+        return subsets
+
+    def _extend_eval_batches(self, test_dataset: SqlIndexDataset) -> None:
+        rng = np.random.default_rng(seed=0)
+        eval_batches = test_dataset.get_eval_batches()
+        if eval_batches is None:
+            raise ValueError("Eval batches were not loaded!")
+
+        for batch in eval_batches:
+            sequence = batch[0][0]
+            seq_frames = list(
+                test_dataset.sequence_frames_in_order(sequence, self.train_subsets)
+            )
+            idx_to_add = rng.permutation(len(seq_frames))[
+                : self.eval_batch_num_training_frames
+            ]
+            batch.extend((sequence, seq_frames[a][1]) for a in idx_to_add)
+
+    @classmethod
+    def dataset_tweak_args(cls, type, args: DictConfig) -> None:
+        """
+        Called by get_default_args.
+        Certain fields are not exposed on each dataset class
+        but rather are controlled by this provider class.
+        """
+        for key in _NEED_CONTROL:
+            del args[key]
+
+    def create_dataset(self):
+        # No `dataset` member of this class is created.
+        # The dataset(s) live in `self.get_dataset_map`.
+        pass
+
+    def get_dataset_map(self) -> DatasetMap:
+        return self._dataset_map  # pyre-ignore [16]
+
+    def _get_available_subsets(self, categories: List[str]):
+        """
+        Get the available subset names for a given category folder (if given) inside
+        a root dataset folder `dataset_root`.
+        """
+        path_manager = self.path_manager_factory.get()
+
+        subsets: List[str] = []
+        for prefix in [""] + categories:
+            set_list_dir = os.path.join(self.dataset_root, prefix, "set_lists")
+            if not (
+                (path_manager is not None) and path_manager.isdir(set_list_dir)
+            ) and not os.path.isdir(set_list_dir):
+                continue
+
+            set_list_files = (os.listdir if path_manager is None else path_manager.ls)(
+                set_list_dir
+            )
+            subsets.extend(os.path.join(prefix, "set_lists", f) for f in set_list_files)
+
+        return subsets
+
+    def _get_lists_file(self, flavor: str) -> Optional[str]:
+        if flavor == "eval_batches":
+            subset_lists_path = self.eval_batches_path
+        else:
+            subset_lists_path = self.subset_lists_path
+
+        if not subset_lists_path and not self.subset_list_name:
+            return None
+
+        category_elem = ""
+        if self.category and "," not in self.category:
+            # if multiple categories are given, looking for global set lists
+            category_elem = self.category
+
+        subset_lists_path = subset_lists_path or (
+            os.path.join(
+                category_elem, f"{flavor}", f"{flavor}_{self.subset_list_name}"
+            )
+        )
+
+        assert subset_lists_path
+        path_manager = self.path_manager_factory.get()
+        # try absolute path first
+        subset_lists_file = _get_local_path_check_extensions(
+            subset_lists_path, path_manager
+        )
+        if subset_lists_file:
+            return subset_lists_file
+
+        full_path = os.path.join(self.dataset_root, subset_lists_path)
+        subset_lists_file = _get_local_path_check_extensions(full_path, path_manager)
+
+        if not subset_lists_file:
+            raise FileNotFoundError(
+                f"Subset lists path given but not found: {full_path}"
+            )
+
+        return subset_lists_file
+
+
+def _get_local_path_check_extensions(
+    path, path_manager, extensions=("", ".sqlite", ".json")
+) -> Optional[str]:
+    for ext in extensions:
+        local = _local_path(path_manager, path + ext)
+        if os.path.isfile(local):
+            return local
+
+    return None
+
+
+def _local_path(path_manager, path: str) -> str:
+    if path_manager is None:
+        return path
+    return path_manager.get_local_path(path)
diff --git a/pytorch3d/pytorch3d/implicitron/dataset/train_eval_data_loader_provider.py b/pytorch3d/pytorch3d/implicitron/dataset/train_eval_data_loader_provider.py
new file mode 100644
index 0000000000000000000000000000000000000000..4640feb232878eb1578419eaebd8e0bc7163c4e4
--- /dev/null
+++ b/pytorch3d/pytorch3d/implicitron/dataset/train_eval_data_loader_provider.py
@@ -0,0 +1,189 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+from typing import Any, Dict, Optional, Tuple
+
+from pytorch3d.implicitron.dataset.data_loader_map_provider import (
+    DataLoaderMap,
+    SceneBatchSampler,
+    SequenceDataLoaderMapProvider,
+)
+from pytorch3d.implicitron.dataset.dataset_base import DatasetBase
+from pytorch3d.implicitron.dataset.dataset_map_provider import DatasetMap
+from pytorch3d.implicitron.dataset.frame_data import FrameData
+from pytorch3d.implicitron.tools.config import registry, run_auto_creation
+
+from torch.utils.data import DataLoader
+
+logger = logging.getLogger(__name__)
+
+
+# TODO: we can merge it with SequenceDataLoaderMapProvider in PyTorch3D
+# and support both eval_batches protocols
+@registry.register
+class TrainEvalDataLoaderMapProvider(SequenceDataLoaderMapProvider):
+    """
+    Implementation of DataLoaderMapProviderBase that may use internal eval batches for
+    the test dataset. In particular, if `eval_batches_relpath` is set, it loads
+    eval batches from that json file, otherwise test set is treated in the same way as
+    train and val, i.e. the parameters `dataset_length_test` and `test_conditioning_type`
+    are respected.
+
+    If conditioning is not required, then the batch size should
+    be set as 1, and most of the fields do not matter.
+
+    If conditioning is required, each batch will contain one main
+    frame first to predict and the, rest of the elements are for
+    conditioning.
+
+    If images_per_seq_options is left empty, the conditioning
+    frames are picked according to the conditioning type given.
+    This does not have regard to the order of frames in a
+    scene, or which frames belong to what scene.
+
+    If images_per_seq_options is given, then the conditioning types
+    must be SAME and the remaining fields are used.
+
+    Members:
+        batch_size: The size of the batch of the data loader.
+        num_workers: Number of data-loading threads in each data loader.
+        dataset_length_train: The number of batches in a training epoch. Or 0 to mean
+            an epoch is the length of the training set.
+        dataset_length_val: The number of batches in a validation epoch. Or 0 to mean
+            an epoch is the length of the validation set.
+        dataset_length_test: used if test_dataset.eval_batches is NOT set. The number of
+            batches in a testing epoch. Or 0 to mean an epoch is the length of the test
+            set.
+        images_per_seq_options: Possible numbers of frames sampled per sequence in a batch.
+            If a conditioning_type is KNOWN or TRAIN, then this must be left at its initial
+            value. Empty (the default) means that we are not careful about which frames
+            come from which scene.
+        sample_consecutive_frames: if True, will sample a contiguous interval of frames
+            in the sequence. It first sorts the frames by timestimps when available,
+            otherwise by frame numbers, finds the connected segments within the sequence
+            of sufficient length, then samples a random pivot element among them and
+            ideally uses it as a middle of the temporal window, shifting the borders
+            where necessary. This strategy mitigates the bias against shorter segments
+            and their boundaries.
+        consecutive_frames_max_gap: if a number > 0, then used to define the maximum
+            difference in frame_number of neighbouring frames when forming connected
+            segments; if both this and consecutive_frames_max_gap_seconds are 0s,
+            the whole sequence is considered a segment regardless of frame numbers.
+        consecutive_frames_max_gap_seconds: if a number > 0.0, then used to define the
+            maximum difference in frame_timestamp of neighbouring frames when forming
+            connected segments; if both this and consecutive_frames_max_gap are 0s,
+            the whole sequence is considered a segment regardless of frame timestamps.
+    """
+
+    batch_size: int = 1
+    num_workers: int = 0
+
+    dataset_length_train: int = 0
+    dataset_length_val: int = 0
+    dataset_length_test: int = 0
+
+    images_per_seq_options: Tuple[int, ...] = ()
+    sample_consecutive_frames: bool = False
+    consecutive_frames_max_gap: int = 0
+    consecutive_frames_max_gap_seconds: float = 0.1
+
+    def __post_init__(self):
+        run_auto_creation(self)
+
+    def get_data_loader_map(self, datasets: DatasetMap) -> DataLoaderMap:
+        """
+        Returns a collection of data loaders for a given collection of datasets.
+        """
+        train = self._make_generic_data_loader(
+            datasets.train,
+            self.dataset_length_train,
+            datasets.train,
+        )
+
+        val = self._make_generic_data_loader(
+            datasets.val,
+            self.dataset_length_val,
+            datasets.train,
+        )
+
+        if datasets.test is not None and datasets.test.get_eval_batches() is not None:
+            test = self._make_eval_data_loader(datasets.test)
+        else:
+            test = self._make_generic_data_loader(
+                datasets.test,
+                self.dataset_length_test,
+                datasets.train,
+            )
+
+        return DataLoaderMap(train=train, val=val, test=test)
+
+    def _make_eval_data_loader(
+        self,
+        dataset: Optional[DatasetBase],
+    ) -> Optional[DataLoader[FrameData]]:
+        if dataset is None:
+            return None
+
+        return DataLoader(
+            dataset,
+            batch_sampler=dataset.get_eval_batches(),
+            **self._get_data_loader_common_kwargs(dataset),
+        )
+
+    def _make_generic_data_loader(
+        self,
+        dataset: Optional[DatasetBase],
+        num_batches: int,
+        train_dataset: Optional[DatasetBase],
+    ) -> Optional[DataLoader[FrameData]]:
+        """
+        Returns the dataloader for a dataset.
+
+        Args:
+            dataset: the dataset
+            num_batches: possible ceiling on number of batches per epoch
+            train_dataset: the training dataset, used if conditioning_type==TRAIN
+            conditioning_type: source for padding of batches
+        """
+        if dataset is None:
+            return None
+
+        data_loader_kwargs = self._get_data_loader_common_kwargs(dataset)
+
+        if len(self.images_per_seq_options) > 0:
+            # this is a typical few-view setup
+            # conditioning comes from the same subset since subsets are split by seqs
+            batch_sampler = SceneBatchSampler(
+                dataset,
+                self.batch_size,
+                num_batches=len(dataset) if num_batches <= 0 else num_batches,
+                images_per_seq_options=self.images_per_seq_options,
+                sample_consecutive_frames=self.sample_consecutive_frames,
+                consecutive_frames_max_gap=self.consecutive_frames_max_gap,
+                consecutive_frames_max_gap_seconds=self.consecutive_frames_max_gap_seconds,
+            )
+            return DataLoader(
+                dataset,
+                batch_sampler=batch_sampler,
+                **data_loader_kwargs,
+            )
+
+        if self.batch_size == 1:
+            # this is a typical many-view setup (without conditioning)
+            return self._simple_loader(dataset, num_batches, data_loader_kwargs)
+
+        # edge case: conditioning on train subset, typical for Nerformer-like many-view
+        # there is only one sequence in all datasets, so we condition on another subset
+        return self._train_loader(
+            dataset, train_dataset, num_batches, data_loader_kwargs
+        )
+
+    def _get_data_loader_common_kwargs(self, dataset: DatasetBase) -> Dict[str, Any]:
+        return {
+            "num_workers": self.num_workers,
+            "collate_fn": dataset.frame_data_type.collate,
+        }
diff --git a/pytorch3d/pytorch3d/implicitron/dataset/types.py b/pytorch3d/pytorch3d/implicitron/dataset/types.py
new file mode 100644
index 0000000000000000000000000000000000000000..58eac677affe7f879bfc78271b90d74df9507329
--- /dev/null
+++ b/pytorch3d/pytorch3d/implicitron/dataset/types.py
@@ -0,0 +1,355 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import dataclasses
+import gzip
+import json
+from dataclasses import dataclass, Field, MISSING
+from typing import (
+    Any,
+    cast,
+    Dict,
+    get_args,
+    get_origin,
+    IO,
+    Optional,
+    Tuple,
+    Type,
+    TypeVar,
+    Union,
+)
+
+import numpy as np
+
+
+_X = TypeVar("_X")
+
+TF3 = Tuple[float, float, float]
+
+
+@dataclass
+class ImageAnnotation:
+    # path to jpg file, relative w.r.t. dataset_root
+    path: str
+    # H x W
+    size: Tuple[int, int]  # TODO: rename size_hw?
+
+
+@dataclass
+class DepthAnnotation:
+    # path to png file, relative w.r.t. dataset_root, storing `depth / scale_adjustment`
+    path: str
+    # a factor to convert png values to actual depth: `depth = png * scale_adjustment`
+    scale_adjustment: float
+    # path to png file, relative w.r.t. dataset_root, storing binary `depth` mask
+    mask_path: Optional[str]
+
+
+@dataclass
+class MaskAnnotation:
+    # path to png file storing (Prob(fg | pixel) * 255)
+    path: str
+    # (soft) number of pixels in the mask; sum(Prob(fg | pixel))
+    mass: Optional[float] = None
+    # tight bounding box around the foreground mask
+    bounding_box_xywh: Optional[Tuple[float, float, float, float]] = None
+
+
+@dataclass
+class ViewpointAnnotation:
+    # In right-multiply (PyTorch3D) format. X_cam = X_world @ R + T
+    R: Tuple[TF3, TF3, TF3]
+    T: TF3
+
+    focal_length: Tuple[float, float]
+    principal_point: Tuple[float, float]
+
+    intrinsics_format: str = "ndc_norm_image_bounds"
+    # Defines the co-ordinate system where focal_length and principal_point live.
+    # Possible values: ndc_isotropic | ndc_norm_image_bounds (default)
+    # ndc_norm_image_bounds: legacy PyTorch3D NDC format, where image boundaries
+    #     correspond to [-1, 1] x [-1, 1], and the scale along x and y may differ
+    # ndc_isotropic: PyTorch3D 0.5+ NDC convention where the shorter side has
+    #     the range [-1, 1], and the longer one has the range [-s, s]; s >= 1,
+    #     where s is the aspect ratio. The scale is same along x and y.
+
+
+@dataclass
+class FrameAnnotation:
+    """A dataclass used to load annotations from json."""
+
+    # can be used to join with `SequenceAnnotation`
+    sequence_name: str
+    # 0-based, continuous frame number within sequence
+    frame_number: int
+    # timestamp in seconds from the video start
+    frame_timestamp: float
+
+    image: ImageAnnotation
+    depth: Optional[DepthAnnotation] = None
+    mask: Optional[MaskAnnotation] = None
+    viewpoint: Optional[ViewpointAnnotation] = None
+    meta: Optional[Dict[str, Any]] = None
+
+
+@dataclass
+class PointCloudAnnotation:
+    # path to ply file with points only, relative w.r.t. dataset_root
+    path: str
+    # the bigger the better
+    quality_score: float
+    n_points: Optional[int]
+
+
+@dataclass
+class VideoAnnotation:
+    # path to the original video file, relative w.r.t. dataset_root
+    path: str
+    # length of the video in seconds
+    length: float
+
+
+@dataclass
+class SequenceAnnotation:
+    sequence_name: str
+    category: str
+    video: Optional[VideoAnnotation] = None
+    point_cloud: Optional[PointCloudAnnotation] = None
+    # the bigger the better
+    viewpoint_quality_score: Optional[float] = None
+
+
+def dump_dataclass(obj: Any, f: IO, binary: bool = False) -> None:
+    """
+    Args:
+        f: Either a path to a file, or a file opened for writing.
+        obj: A @dataclass or collection hierarchy including dataclasses.
+        binary: Set to True if `f` is a file handle, else False.
+    """
+    if binary:
+        f.write(json.dumps(_asdict_rec(obj)).encode("utf8"))
+    else:
+        json.dump(_asdict_rec(obj), f)
+
+
+def load_dataclass(f: IO, cls: Type[_X], binary: bool = False) -> _X:
+    """
+    Loads to a @dataclass or collection hierarchy including dataclasses
+    from a json recursively.
+    Call it like load_dataclass(f, typing.List[FrameAnnotationAnnotation]).
+    raises KeyError if json has keys not mapping to the dataclass fields.
+
+    Args:
+        f: Either a path to a file, or a file opened for writing.
+        cls: The class of the loaded dataclass.
+        binary: Set to True if `f` is a file handle, else False.
+    """
+    if binary:
+        asdict = json.loads(f.read().decode("utf8"))
+    else:
+        asdict = json.load(f)
+
+    if isinstance(asdict, list):
+        # in the list case, run a faster "vectorized" version
+        cls = get_args(cls)[0]
+        res = list(_dataclass_list_from_dict_list(asdict, cls))
+    else:
+        res = _dataclass_from_dict(asdict, cls)
+
+    return res
+
+
+def _dataclass_list_from_dict_list(dlist, typeannot):
+    """
+    Vectorised version of `_dataclass_from_dict`.
+    The output should be equivalent to
+    `[_dataclass_from_dict(d, typeannot) for d in dlist]`.
+
+    Args:
+        dlist: list of objects to convert.
+        typeannot: type of each of those objects.
+    Returns:
+        iterator or list over converted objects of the same length as `dlist`.
+
+    Raises:
+        ValueError: it assumes the objects have None's in consistent places across
+            objects, otherwise it would ignore some values. This generally holds for
+            auto-generated annotations, but otherwise use `_dataclass_from_dict`.
+    """
+
+    cls = get_origin(typeannot) or typeannot
+
+    if typeannot is Any:
+        return dlist
+    if all(obj is None for obj in dlist):  # 1st recursion base: all None nodes
+        return dlist
+    if any(obj is None for obj in dlist):
+        # filter out Nones and recurse on the resulting list
+        idx_notnone = [(i, obj) for i, obj in enumerate(dlist) if obj is not None]
+        idx, notnone = zip(*idx_notnone)
+        converted = _dataclass_list_from_dict_list(notnone, typeannot)
+        res = [None] * len(dlist)
+        for i, obj in zip(idx, converted):
+            res[i] = obj
+        return res
+
+    is_optional, contained_type = _resolve_optional(typeannot)
+    if is_optional:
+        return _dataclass_list_from_dict_list(dlist, contained_type)
+
+    # otherwise, we dispatch by the type of the provided annotation to convert to
+    if issubclass(cls, tuple) and hasattr(cls, "_fields"):  # namedtuple
+        # For namedtuple, call the function recursively on the lists of corresponding keys
+        types = cls.__annotations__.values()
+        dlist_T = zip(*dlist)
+        res_T = [
+            _dataclass_list_from_dict_list(key_list, tp)
+            for key_list, tp in zip(dlist_T, types)
+        ]
+        return [cls(*converted_as_tuple) for converted_as_tuple in zip(*res_T)]
+    elif issubclass(cls, (list, tuple)):
+        # For list/tuple, call the function recursively on the lists of corresponding positions
+        types = get_args(typeannot)
+        if len(types) == 1:  # probably List; replicate for all items
+            types = types * len(dlist[0])
+        dlist_T = zip(*dlist)
+        res_T = (
+            _dataclass_list_from_dict_list(pos_list, tp)
+            for pos_list, tp in zip(dlist_T, types)
+        )
+        if issubclass(cls, tuple):
+            return list(zip(*res_T))
+        else:
+            return [cls(converted_as_tuple) for converted_as_tuple in zip(*res_T)]
+    elif issubclass(cls, dict):
+        # For the dictionary, call the function recursively on concatenated keys and vertices
+        key_t, val_t = get_args(typeannot)
+        all_keys_res = _dataclass_list_from_dict_list(
+            [k for obj in dlist for k in obj.keys()], key_t
+        )
+        all_vals_res = _dataclass_list_from_dict_list(
+            [k for obj in dlist for k in obj.values()], val_t
+        )
+        indices = np.cumsum([len(obj) for obj in dlist])
+        assert indices[-1] == len(all_keys_res)
+
+        keys = np.split(list(all_keys_res), indices[:-1])
+        all_vals_res_iter = iter(all_vals_res)
+        return [cls(zip(k, all_vals_res_iter)) for k in keys]
+    elif not dataclasses.is_dataclass(typeannot):
+        return dlist
+
+    # dataclass node: 2nd recursion base; call the function recursively on the lists
+    # of the corresponding fields
+    assert dataclasses.is_dataclass(cls)
+    fieldtypes = {
+        f.name: (_unwrap_type(f.type), _get_dataclass_field_default(f))
+        for f in dataclasses.fields(typeannot)
+    }
+
+    # NOTE the default object is shared here
+    key_lists = (
+        _dataclass_list_from_dict_list([obj.get(k, default) for obj in dlist], type_)
+        for k, (type_, default) in fieldtypes.items()
+    )
+    transposed = zip(*key_lists)
+    return [cls(*vals_as_tuple) for vals_as_tuple in transposed]
+
+
+def _dataclass_from_dict(d, typeannot):
+    if d is None or typeannot is Any:
+        return d
+    is_optional, contained_type = _resolve_optional(typeannot)
+    if is_optional:
+        # an Optional not set to None, just use the contents of the Optional.
+        return _dataclass_from_dict(d, contained_type)
+
+    cls = get_origin(typeannot) or typeannot
+    if issubclass(cls, tuple) and hasattr(cls, "_fields"):  # namedtuple
+        types = cls.__annotations__.values()
+        return cls(*[_dataclass_from_dict(v, tp) for v, tp in zip(d, types)])
+    elif issubclass(cls, (list, tuple)):
+        types = get_args(typeannot)
+        if len(types) == 1:  # probably List; replicate for all items
+            types = types * len(d)
+        return cls(_dataclass_from_dict(v, tp) for v, tp in zip(d, types))
+    elif issubclass(cls, dict):
+        key_t, val_t = get_args(typeannot)
+        return cls(
+            (_dataclass_from_dict(k, key_t), _dataclass_from_dict(v, val_t))
+            for k, v in d.items()
+        )
+    elif not dataclasses.is_dataclass(typeannot):
+        return d
+
+    assert dataclasses.is_dataclass(cls)
+    fieldtypes = {f.name: _unwrap_type(f.type) for f in dataclasses.fields(typeannot)}
+    return cls(**{k: _dataclass_from_dict(v, fieldtypes[k]) for k, v in d.items()})
+
+
+def _unwrap_type(tp):
+    # strips Optional wrapper, if any
+    if get_origin(tp) is Union:
+        args = get_args(tp)
+        if len(args) == 2 and any(a is type(None) for a in args):  # noqa: E721
+            # this is typing.Optional
+            return args[0] if args[1] is type(None) else args[1]  # noqa: E721
+    return tp
+
+
+def _get_dataclass_field_default(field: Field) -> Any:
+    if field.default_factory is not MISSING:
+        # pyre-fixme[29]: `Union[dataclasses._MISSING_TYPE,
+        #  dataclasses._DefaultFactory[typing.Any]]` is not a function.
+        return field.default_factory()
+    elif field.default is not MISSING:
+        return field.default
+    else:
+        return None
+
+
+def _asdict_rec(obj):
+    return dataclasses._asdict_inner(obj, dict)
+
+
+def dump_dataclass_jgzip(outfile: str, obj: Any) -> None:
+    """
+    Dumps obj to a gzipped json outfile.
+
+    Args:
+        obj: A @dataclass or collection hiererchy including dataclasses.
+        outfile: The path to the output file.
+    """
+    with gzip.GzipFile(outfile, "wb") as f:
+        dump_dataclass(obj, cast(IO, f), binary=True)
+
+
+def load_dataclass_jgzip(outfile, cls):
+    """
+    Loads a dataclass from a gzipped json outfile.
+
+    Args:
+        outfile: The path to the loaded file.
+        cls: The type annotation of the loaded dataclass.
+
+    Returns:
+        loaded_dataclass: The loaded dataclass.
+    """
+    with gzip.GzipFile(outfile, "rb") as f:
+        return load_dataclass(cast(IO, f), cls, binary=True)
+
+
+def _resolve_optional(type_: Any) -> Tuple[bool, Any]:
+    """Check whether `type_` is equivalent to `typing.Optional[T]` for some T."""
+    if get_origin(type_) is Union:
+        args = get_args(type_)
+        if len(args) == 2 and args[1] == type(None):  # noqa E721
+            return True, args[0]
+    if type_ is Any:
+        return True, Any
+
+    return False, type_
diff --git a/pytorch3d/pytorch3d/implicitron/dataset/utils.py b/pytorch3d/pytorch3d/implicitron/dataset/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..01573a1a772eac6284e986b2aa4cb045d274b9ea
--- /dev/null
+++ b/pytorch3d/pytorch3d/implicitron/dataset/utils.py
@@ -0,0 +1,380 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import functools
+import warnings
+from pathlib import Path
+from typing import List, Optional, Tuple, TypeVar, Union
+
+import numpy as np
+import torch
+from PIL import Image
+
+from pytorch3d.io import IO
+from pytorch3d.renderer.cameras import PerspectiveCameras
+from pytorch3d.structures.pointclouds import Pointclouds
+
+DATASET_TYPE_TRAIN = "train"
+DATASET_TYPE_TEST = "test"
+DATASET_TYPE_KNOWN = "known"
+DATASET_TYPE_UNKNOWN = "unseen"
+
+
+class GenericWorkaround:
+    """
+    OmegaConf.structured has a weirdness when you try to apply
+    it to a dataclass whose first base class is a Generic which is not
+    Dict. The issue is with a function called get_dict_key_value_types
+    in omegaconf/_utils.py.
+    For example this fails:
+
+        @dataclass(eq=False)
+        class D(torch.utils.data.Dataset[int]):
+            a: int = 3
+
+        OmegaConf.structured(D)
+
+    We avoid the problem by adding this class as an extra base class.
+    """
+
+    pass
+
+
+def is_known_frame_scalar(frame_type: str) -> bool:
+    """
+    Given a single frame type corresponding to a single frame, return whether
+    the frame is a known frame.
+    """
+    return frame_type.endswith(DATASET_TYPE_KNOWN)
+
+
+def is_known_frame(
+    frame_type: List[str], device: Optional[str] = None
+) -> torch.BoolTensor:
+    """
+    Given a list `frame_type` of frame types in a batch, return a tensor
+    of boolean flags expressing whether the corresponding frame is a known frame.
+    """
+    # pyre-fixme[7]: Expected `BoolTensor` but got `Tensor`.
+    return torch.tensor(
+        [is_known_frame_scalar(ft) for ft in frame_type],
+        dtype=torch.bool,
+        device=device,
+    )
+
+
+def is_train_frame(
+    frame_type: List[str], device: Optional[str] = None
+) -> torch.BoolTensor:
+    """
+    Given a list `frame_type` of frame types in a batch, return a tensor
+    of boolean flags expressing whether the corresponding frame is a training frame.
+    """
+    # pyre-fixme[7]: Expected `BoolTensor` but got `Tensor`.
+    return torch.tensor(
+        [ft.startswith(DATASET_TYPE_TRAIN) for ft in frame_type],
+        dtype=torch.bool,
+        device=device,
+    )
+
+
+def get_bbox_from_mask(
+    mask: np.ndarray, thr: float, decrease_quant: float = 0.05
+) -> Tuple[int, int, int, int]:
+    # bbox in xywh
+    masks_for_box = np.zeros_like(mask)
+    while masks_for_box.sum() <= 1.0:
+        masks_for_box = (mask > thr).astype(np.float32)
+        thr -= decrease_quant
+    if thr <= 0.0:
+        warnings.warn(
+            f"Empty masks_for_bbox (thr={thr}) => using full image.", stacklevel=1
+        )
+
+    x0, x1 = get_1d_bounds(masks_for_box.sum(axis=-2))
+    y0, y1 = get_1d_bounds(masks_for_box.sum(axis=-1))
+
+    return x0, y0, x1 - x0, y1 - y0
+
+
+def crop_around_box(
+    tensor: torch.Tensor, bbox: torch.Tensor, impath: str = ""
+) -> torch.Tensor:
+    # bbox is xyxy, where the upper bound is corrected with +1
+    bbox = clamp_box_to_image_bounds_and_round(
+        bbox,
+        image_size_hw=tuple(tensor.shape[-2:]),
+    )
+    tensor = tensor[..., bbox[1] : bbox[3], bbox[0] : bbox[2]]
+    assert all(c > 0 for c in tensor.shape), f"squashed image {impath}"
+    return tensor
+
+
+def clamp_box_to_image_bounds_and_round(
+    bbox_xyxy: torch.Tensor,
+    image_size_hw: Tuple[int, int],
+) -> torch.LongTensor:
+    bbox_xyxy = bbox_xyxy.clone()
+    bbox_xyxy[[0, 2]] = torch.clamp(bbox_xyxy[[0, 2]], 0, image_size_hw[-1])
+    bbox_xyxy[[1, 3]] = torch.clamp(bbox_xyxy[[1, 3]], 0, image_size_hw[-2])
+    if not isinstance(bbox_xyxy, torch.LongTensor):
+        bbox_xyxy = bbox_xyxy.round().long()
+    return bbox_xyxy  # pyre-ignore [7]
+
+
+T = TypeVar("T", bound=torch.Tensor)
+
+
+def bbox_xyxy_to_xywh(xyxy: T) -> T:
+    wh = xyxy[2:] - xyxy[:2]
+    xywh = torch.cat([xyxy[:2], wh])
+    return xywh  # pyre-ignore
+
+
+def get_clamp_bbox(
+    bbox: torch.Tensor,
+    box_crop_context: float = 0.0,
+    image_path: str = "",
+) -> torch.Tensor:
+    # box_crop_context: rate of expansion for bbox
+    # returns possibly expanded bbox xyxy as float
+
+    bbox = bbox.clone()  # do not edit bbox in place
+
+    # increase box size
+    if box_crop_context > 0.0:
+        c = box_crop_context
+        bbox = bbox.float()
+        bbox[0] -= bbox[2] * c / 2
+        bbox[1] -= bbox[3] * c / 2
+        bbox[2] += bbox[2] * c
+        bbox[3] += bbox[3] * c
+
+    if (bbox[2:] <= 1.0).any():
+        raise ValueError(
+            f"squashed image {image_path}!! The bounding box contains no pixels."
+        )
+
+    bbox[2:] = torch.clamp(bbox[2:], 2)  # set min height, width to 2 along both axes
+    bbox_xyxy = bbox_xywh_to_xyxy(bbox, clamp_size=2)
+
+    return bbox_xyxy
+
+
+def rescale_bbox(
+    bbox: torch.Tensor,
+    orig_res: Union[Tuple[int, int], torch.LongTensor],
+    new_res: Union[Tuple[int, int], torch.LongTensor],
+) -> torch.Tensor:
+    assert bbox is not None
+    assert np.prod(orig_res) > 1e-8
+    # average ratio of dimensions
+    # pyre-ignore
+    rel_size = (new_res[0] / orig_res[0] + new_res[1] / orig_res[1]) / 2.0
+    return bbox * rel_size
+
+
+def bbox_xywh_to_xyxy(
+    xywh: torch.Tensor, clamp_size: Optional[int] = None
+) -> torch.Tensor:
+    xyxy = xywh.clone()
+    if clamp_size is not None:
+        xyxy[2:] = torch.clamp(xyxy[2:], clamp_size)
+    xyxy[2:] += xyxy[:2]
+    return xyxy
+
+
+def get_1d_bounds(arr: np.ndarray) -> Tuple[int, int]:
+    nz = np.flatnonzero(arr)
+    return nz[0], nz[-1] + 1
+
+
+def resize_image(
+    image: Union[np.ndarray, torch.Tensor],
+    image_height: Optional[int],
+    image_width: Optional[int],
+    mode: str = "bilinear",
+) -> Tuple[torch.Tensor, float, torch.Tensor]:
+
+    if isinstance(image, np.ndarray):
+        image = torch.from_numpy(image)
+
+    if image_height is None or image_width is None:
+        # skip the resizing
+        return image, 1.0, torch.ones_like(image[:1])
+    # takes numpy array or tensor, returns pytorch tensor
+    minscale = min(
+        image_height / image.shape[-2],
+        image_width / image.shape[-1],
+    )
+    imre = torch.nn.functional.interpolate(
+        image[None],
+        scale_factor=minscale,
+        mode=mode,
+        align_corners=False if mode == "bilinear" else None,
+        recompute_scale_factor=True,
+    )[0]
+    imre_ = torch.zeros(image.shape[0], image_height, image_width)
+    imre_[:, 0 : imre.shape[1], 0 : imre.shape[2]] = imre
+    mask = torch.zeros(1, image_height, image_width)
+    mask[:, 0 : imre.shape[1], 0 : imre.shape[2]] = 1.0
+    return imre_, minscale, mask
+
+
+def transpose_normalize_image(image: np.ndarray) -> np.ndarray:
+    im = np.atleast_3d(image).transpose((2, 0, 1))
+    return im.astype(np.float32) / 255.0
+
+
+def load_image(path: str) -> np.ndarray:
+    with Image.open(path) as pil_im:
+        im = np.array(pil_im.convert("RGB"))
+
+    return transpose_normalize_image(im)
+
+
+def load_mask(path: str) -> np.ndarray:
+    with Image.open(path) as pil_im:
+        mask = np.array(pil_im)
+
+    return transpose_normalize_image(mask)
+
+
+def load_depth(path: str, scale_adjustment: float) -> np.ndarray:
+    if path.lower().endswith(".exr"):
+        # NOTE: environment variable OPENCV_IO_ENABLE_OPENEXR must be set to 1
+        # You will have to accept these vulnerabilities by using OpenEXR:
+        # https://github.com/opencv/opencv/issues/21326
+        import cv2
+
+        d = cv2.imread(path, cv2.IMREAD_ANYCOLOR | cv2.IMREAD_ANYDEPTH)[..., 0]
+        d[d > 1e9] = 0.0
+    elif path.lower().endswith(".png"):
+        d = load_16big_png_depth(path)
+    else:
+        raise ValueError('unsupported depth file name "%s"' % path)
+
+    d = d * scale_adjustment
+
+    d[~np.isfinite(d)] = 0.0
+    return d[None]  # fake feature channel
+
+
+def load_16big_png_depth(depth_png: str) -> np.ndarray:
+    with Image.open(depth_png) as depth_pil:
+        # the image is stored with 16-bit depth but PIL reads it as I (32 bit).
+        # we cast it to uint16, then reinterpret as float16, then cast to float32
+        depth = (
+            np.frombuffer(np.array(depth_pil, dtype=np.uint16), dtype=np.float16)
+            .astype(np.float32)
+            .reshape((depth_pil.size[1], depth_pil.size[0]))
+        )
+    return depth
+
+
+def load_1bit_png_mask(file: str) -> np.ndarray:
+    with Image.open(file) as pil_im:
+        mask = (np.array(pil_im.convert("L")) > 0.0).astype(np.float32)
+    return mask
+
+
+def load_depth_mask(path: str) -> np.ndarray:
+    if not path.lower().endswith(".png"):
+        raise ValueError('unsupported depth mask file name "%s"' % path)
+    m = load_1bit_png_mask(path)
+    return m[None]  # fake feature channel
+
+
+def safe_as_tensor(data, dtype):
+    return torch.tensor(data, dtype=dtype) if data is not None else None
+
+
+def _convert_ndc_to_pixels(
+    focal_length: torch.Tensor,
+    principal_point: torch.Tensor,
+    image_size_wh: torch.Tensor,
+) -> Tuple[torch.Tensor, torch.Tensor]:
+    half_image_size = image_size_wh / 2
+    rescale = half_image_size.min()
+    principal_point_px = half_image_size - principal_point * rescale
+    focal_length_px = focal_length * rescale
+    return focal_length_px, principal_point_px
+
+
+def _convert_pixels_to_ndc(
+    focal_length_px: torch.Tensor,
+    principal_point_px: torch.Tensor,
+    image_size_wh: torch.Tensor,
+) -> Tuple[torch.Tensor, torch.Tensor]:
+    half_image_size = image_size_wh / 2
+    rescale = half_image_size.min()
+    principal_point = (half_image_size - principal_point_px) / rescale
+    focal_length = focal_length_px / rescale
+    return focal_length, principal_point
+
+
+def adjust_camera_to_bbox_crop_(
+    camera: PerspectiveCameras,
+    image_size_wh: torch.Tensor,
+    clamp_bbox_xywh: torch.Tensor,
+) -> None:
+    if len(camera) != 1:
+        raise ValueError("Adjusting currently works with singleton cameras camera only")
+
+    focal_length_px, principal_point_px = _convert_ndc_to_pixels(
+        camera.focal_length[0],
+        camera.principal_point[0],
+        image_size_wh,
+    )
+    principal_point_px_cropped = principal_point_px - clamp_bbox_xywh[:2]
+
+    focal_length, principal_point_cropped = _convert_pixels_to_ndc(
+        focal_length_px,
+        principal_point_px_cropped,
+        clamp_bbox_xywh[2:],
+    )
+
+    camera.focal_length = focal_length[None]
+    camera.principal_point = principal_point_cropped[None]
+
+
+def adjust_camera_to_image_scale_(
+    camera: PerspectiveCameras,
+    original_size_wh: torch.Tensor,
+    new_size_wh: torch.LongTensor,
+) -> PerspectiveCameras:
+    focal_length_px, principal_point_px = _convert_ndc_to_pixels(
+        camera.focal_length[0],
+        camera.principal_point[0],
+        original_size_wh,
+    )
+
+    # now scale and convert from pixels to NDC
+    image_size_wh_output = new_size_wh.float()
+    scale = (image_size_wh_output / original_size_wh).min(dim=-1, keepdim=True).values
+    focal_length_px_scaled = focal_length_px * scale
+    principal_point_px_scaled = principal_point_px * scale
+
+    focal_length_scaled, principal_point_scaled = _convert_pixels_to_ndc(
+        focal_length_px_scaled,
+        principal_point_px_scaled,
+        image_size_wh_output,
+    )
+    camera.focal_length = focal_length_scaled[None]
+    camera.principal_point = principal_point_scaled[None]  # pyre-ignore
+
+
+# NOTE this cache is per-worker; they are implemented as processes.
+# each batch is loaded and collated by a single worker;
+# since sequences tend to co-occur within batches, this is useful.
+@functools.lru_cache(maxsize=256)
+def load_pointcloud(pcl_path: Union[str, Path], max_points: int = 0) -> Pointclouds:
+    pcl = IO().load_pointcloud(pcl_path)
+    if max_points > 0:
+        pcl = pcl.subsample(max_points)
+
+    return pcl
diff --git a/pytorch3d/pytorch3d/implicitron/dataset/visualize.py b/pytorch3d/pytorch3d/implicitron/dataset/visualize.py
new file mode 100644
index 0000000000000000000000000000000000000000..4ac633f6e763bd02665d5c0ebbb4aa655861a93b
--- /dev/null
+++ b/pytorch3d/pytorch3d/implicitron/dataset/visualize.py
@@ -0,0 +1,96 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import cast, Optional, Tuple
+
+import torch
+from pytorch3d.implicitron.tools.point_cloud_utils import get_rgbd_point_cloud
+from pytorch3d.structures import Pointclouds
+
+from .frame_data import FrameData
+from .json_index_dataset import JsonIndexDataset
+
+
+def get_implicitron_sequence_pointcloud(
+    dataset: JsonIndexDataset,
+    sequence_name: Optional[str] = None,
+    mask_points: bool = True,
+    max_frames: int = -1,
+    num_workers: int = 0,
+    load_dataset_point_cloud: bool = False,
+) -> Tuple[Pointclouds, FrameData]:
+    """
+    Make a point cloud by sampling random points from each frame the dataset.
+    """
+
+    if len(dataset) == 0:
+        raise ValueError("The dataset is empty.")
+
+    if not dataset.load_depths:
+        raise ValueError("The dataset has to load depths (dataset.load_depths=True).")
+
+    if mask_points and not dataset.load_masks:
+        raise ValueError(
+            "For mask_points=True, the dataset has to load masks"
+            + " (dataset.load_masks=True)."
+        )
+
+    # setup the indices of frames loaded from the dataset db
+    sequence_entries = list(range(len(dataset)))
+    if sequence_name is not None:
+        sequence_entries = [
+            ei
+            for ei in sequence_entries
+            # pyre-ignore[16]
+            if dataset.frame_annots[ei]["frame_annotation"].sequence_name
+            == sequence_name
+        ]
+        if len(sequence_entries) == 0:
+            raise ValueError(
+                f'There are no dataset entries for sequence name "{sequence_name}".'
+            )
+
+    # subsample loaded frames if needed
+    if (max_frames > 0) and (len(sequence_entries) > max_frames):
+        sequence_entries = [
+            sequence_entries[i]
+            for i in torch.randperm(len(sequence_entries))[:max_frames].sort().values
+        ]
+
+    # take only the part of the dataset corresponding to the sequence entries
+    sequence_dataset = torch.utils.data.Subset(dataset, sequence_entries)
+
+    # load the required part of the dataset
+    loader = torch.utils.data.DataLoader(
+        sequence_dataset,
+        batch_size=len(sequence_dataset),
+        shuffle=False,
+        num_workers=num_workers,
+        collate_fn=dataset.frame_data_type.collate,
+    )
+
+    frame_data = next(iter(loader))  # there's only one batch
+
+    # scene point cloud
+    if load_dataset_point_cloud:
+        if not dataset.load_point_clouds:
+            raise ValueError(
+                "For load_dataset_point_cloud=True, the dataset has to"
+                + " load point clouds (dataset.load_point_clouds=True)."
+            )
+        point_cloud = frame_data.sequence_point_cloud
+
+    else:
+        point_cloud = get_rgbd_point_cloud(
+            frame_data.camera,
+            frame_data.image_rgb,
+            frame_data.depth_map,
+            (cast(torch.Tensor, frame_data.fg_probability) > 0.5).float()
+            if mask_points and frame_data.fg_probability is not None
+            else None,
+        )
+
+    return point_cloud, frame_data
diff --git a/pytorch3d/pytorch3d/implicitron/eval_demo.py b/pytorch3d/pytorch3d/implicitron/eval_demo.py
new file mode 100644
index 0000000000000000000000000000000000000000..91e696945b1625a7889870ea5ceb3ed2efd24311
--- /dev/null
+++ b/pytorch3d/pytorch3d/implicitron/eval_demo.py
@@ -0,0 +1,181 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import dataclasses
+import os
+from enum import Enum
+from typing import Any, cast, Dict, List, Optional, Tuple
+
+import lpips
+import torch
+from pytorch3d.implicitron.dataset.data_source import ImplicitronDataSource
+from pytorch3d.implicitron.dataset.json_index_dataset import JsonIndexDataset
+from pytorch3d.implicitron.dataset.json_index_dataset_map_provider import (
+    CO3D_CATEGORIES,
+)
+from pytorch3d.implicitron.evaluation.evaluate_new_view_synthesis import (
+    aggregate_nvs_results,
+    eval_batch,
+    pretty_print_nvs_metrics,
+    summarize_nvs_eval_results,
+)
+from pytorch3d.implicitron.models.model_dbir import ModelDBIR
+from pytorch3d.implicitron.tools.utils import dataclass_to_cuda_
+from tqdm import tqdm
+
+
+class Task(Enum):
+    SINGLE_SEQUENCE = "singlesequence"
+    MULTI_SEQUENCE = "multisequence"
+
+
+def main() -> None:
+    """
+    Evaluates new view synthesis metrics of a simple depth-based image rendering
+    (DBIR) model for multisequence/singlesequence tasks for several categories.
+
+    The evaluation is conducted on the same data as in [1] and, hence, the results
+    are directly comparable to the numbers reported in [1].
+
+    References:
+        [1] J. Reizenstein, R. Shapovalov, P. Henzler, L. Sbordone,
+                P. Labatut, D. Novotny:
+            Common Objects in 3D: Large-Scale Learning
+                and Evaluation of Real-life 3D Category Reconstruction
+    """
+
+    task_results = {}
+    for task in (Task.SINGLE_SEQUENCE, Task.MULTI_SEQUENCE):
+        task_results[task] = []
+        for category in CO3D_CATEGORIES[: (20 if task == Task.SINGLE_SEQUENCE else 10)]:
+            for single_sequence_id in (
+                (0, 1) if task == Task.SINGLE_SEQUENCE else (None,)
+            ):
+                category_result = evaluate_dbir_for_category(
+                    category, task=task, single_sequence_id=single_sequence_id
+                )
+                print("")
+                print(
+                    f"Results for task={task}; category={category};"
+                    + (
+                        f" sequence={single_sequence_id}:"
+                        if single_sequence_id is not None
+                        else ":"
+                    )
+                )
+                pretty_print_nvs_metrics(category_result)
+                print("")
+
+                task_results[task].append(category_result)
+            _print_aggregate_results(task, task_results)
+
+    for task in task_results:
+        _print_aggregate_results(task, task_results)
+
+
+def evaluate_dbir_for_category(
+    category: str,
+    task: Task,
+    bg_color: Tuple[float, float, float] = (0.0, 0.0, 0.0),
+    single_sequence_id: Optional[int] = None,
+    num_workers: int = 16,
+):
+    """
+    Evaluates new view synthesis metrics of a simple depth-based image rendering
+    (DBIR) model for a given task, category, and sequence (in case task=='singlesequence').
+
+    Args:
+        category: Object category.
+        bg_color: Background color of the renders.
+        task: Evaluation task. Either singlesequence or multisequence.
+        single_sequence_id: The ID of the evaluiation sequence for the singlesequence task.
+        num_workers: The number of workers for the employed dataloaders.
+        path_manager: (optional) Used for interpreting paths.
+
+    Returns:
+        category_result: A dictionary of quantitative metrics.
+    """
+
+    single_sequence_id = single_sequence_id if single_sequence_id is not None else -1
+
+    torch.manual_seed(42)
+
+    dataset_map_provider_args = {
+        "category": category,
+        "dataset_root": os.environ["CO3D_DATASET_ROOT"],
+        "assert_single_seq": task == Task.SINGLE_SEQUENCE,
+        "task_str": task.value,
+        "test_on_train": False,
+        "test_restrict_sequence_id": single_sequence_id,
+        "dataset_JsonIndexDataset_args": {"load_point_clouds": True},
+    }
+    data_source = ImplicitronDataSource(
+        dataset_map_provider_JsonIndexDatasetMapProvider_args=dataset_map_provider_args
+    )
+
+    datasets, dataloaders = data_source.get_datasets_and_dataloaders()
+
+    test_dataset = datasets.test
+    test_dataloader = dataloaders.test
+    if test_dataset is None or test_dataloader is None:
+        raise ValueError("must have a test dataset.")
+
+    image_size = cast(JsonIndexDataset, test_dataset).image_width
+
+    if image_size is None:
+        raise ValueError("Image size should be set in the dataset")
+
+    # init the simple DBIR model
+    model = ModelDBIR(
+        render_image_width=image_size,
+        render_image_height=image_size,
+        bg_color=bg_color,
+        max_points=int(1e5),
+    )
+    model.cuda()
+
+    # init the lpips model for eval
+    lpips_model = lpips.LPIPS(net="vgg")
+    lpips_model = lpips_model.cuda()
+
+    per_batch_eval_results = []
+    print("Evaluating DBIR model ...")
+    for frame_data in tqdm(test_dataloader):
+        frame_data = dataclass_to_cuda_(frame_data)
+        preds = model(**dataclasses.asdict(frame_data))
+        per_batch_eval_results.append(
+            eval_batch(
+                frame_data,
+                preds["implicitron_render"],
+                bg_color=bg_color,
+                lpips_model=lpips_model,
+            )
+        )
+
+    category_result_flat, category_result = summarize_nvs_eval_results(
+        per_batch_eval_results,
+        is_multisequence=task != Task.SINGLE_SEQUENCE,
+    )
+
+    return category_result["results"]
+
+
+def _print_aggregate_results(
+    task: Task, task_results: Dict[Task, List[List[Dict[str, Any]]]]
+) -> None:
+    """
+    Prints the aggregate metrics for a given task.
+    """
+    aggregate_task_result = aggregate_nvs_results(task_results[task])
+    print("")
+    print(f"Aggregate results for task={task}:")
+    pretty_print_nvs_metrics(aggregate_task_result)
+    print("")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/pytorch3d/pytorch3d/implicitron/evaluation/__init__.py b/pytorch3d/pytorch3d/implicitron/evaluation/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..2e41cd717f6a439a9c08d76a9d0e4a54e190fc5a
--- /dev/null
+++ b/pytorch3d/pytorch3d/implicitron/evaluation/__init__.py
@@ -0,0 +1,5 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
diff --git a/pytorch3d/pytorch3d/implicitron/evaluation/evaluate_new_view_synthesis.py b/pytorch3d/pytorch3d/implicitron/evaluation/evaluate_new_view_synthesis.py
new file mode 100644
index 0000000000000000000000000000000000000000..decf938b2cdc6195f29b280066cb541692978b5f
--- /dev/null
+++ b/pytorch3d/pytorch3d/implicitron/evaluation/evaluate_new_view_synthesis.py
@@ -0,0 +1,596 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import copy
+import warnings
+from collections import OrderedDict
+from dataclasses import dataclass, field
+from typing import Any, Dict, List, Optional, Sequence, Tuple, TYPE_CHECKING, Union
+
+import numpy as np
+import torch
+import torch.nn.functional as F
+from pytorch3d.implicitron.dataset.frame_data import FrameData
+from pytorch3d.implicitron.dataset.utils import is_train_frame
+from pytorch3d.implicitron.models.base_model import ImplicitronRender
+from pytorch3d.implicitron.tools import vis_utils
+from pytorch3d.implicitron.tools.image_utils import mask_background
+from pytorch3d.implicitron.tools.metric_utils import calc_psnr, eval_depth, iou, rgb_l1
+from pytorch3d.implicitron.tools.point_cloud_utils import get_rgbd_point_cloud
+from pytorch3d.implicitron.tools.vis_utils import make_depth_image
+from pytorch3d.renderer.cameras import PerspectiveCameras
+from pytorch3d.vis.plotly_vis import plot_scene
+from tabulate import tabulate
+
+if TYPE_CHECKING:
+    from visdom import Visdom
+
+
+EVAL_N_SRC_VIEWS = [1, 3, 5, 7, 9]
+
+
+@dataclass
+class _Visualizer:
+    image_render: torch.Tensor
+    image_rgb_masked: torch.Tensor
+    depth_render: torch.Tensor
+    depth_map: Optional[torch.Tensor]
+    depth_mask: Optional[torch.Tensor]
+
+    visdom_env: str = "eval_debug"
+
+    _viz: Optional["Visdom"] = field(init=False)
+
+    def __post_init__(self):
+        self._viz = vis_utils.get_visdom_connection()
+
+    def show_rgb(
+        self, loss_value: float, metric_name: str, loss_mask_now: torch.Tensor
+    ):
+        if self._viz is None:
+            return
+        self._viz.images(
+            torch.cat(
+                (
+                    self.image_render,
+                    self.image_rgb_masked,
+                    loss_mask_now.repeat(1, 3, 1, 1),
+                ),
+                dim=3,
+            ),
+            env=self.visdom_env,
+            win=metric_name,
+            opts={"title": f"{metric_name}_{loss_value:1.2f}"},
+        )
+
+    def show_depth(
+        self, depth_loss: float, name_postfix: str, loss_mask_now: torch.Tensor
+    ):
+        if self._viz is None:
+            return
+        viz = self._viz
+        viz.images(
+            torch.cat(
+                (make_depth_image(self.depth_render, loss_mask_now),)
+                + (
+                    (make_depth_image(self.depth_map, loss_mask_now),)
+                    if self.depth_map is not None
+                    else ()
+                ),
+                dim=3,
+            ),
+            env=self.visdom_env,
+            win="depth_abs" + name_postfix,
+            opts={"title": f"depth_abs_{name_postfix}_{depth_loss:1.2f}"},
+        )
+        viz.images(
+            loss_mask_now,
+            env=self.visdom_env,
+            win="depth_abs" + name_postfix + "_mask",
+            opts={"title": f"depth_abs_{name_postfix}_{depth_loss:1.2f}_mask"},
+        )
+        if self.depth_mask is not None:
+            viz.images(
+                self.depth_mask,
+                env=self.visdom_env,
+                win="depth_abs" + name_postfix + "_maskd",
+                opts={"title": f"depth_abs_{name_postfix}_{depth_loss:1.2f}_maskd"},
+            )
+
+        # show the 3D plot
+        # pyre-fixme[9]: viewpoint_trivial has type `PerspectiveCameras`; used as
+        #  `TensorProperties`.
+        viewpoint_trivial: PerspectiveCameras = PerspectiveCameras().to(
+            loss_mask_now.device
+        )
+        _pcls = {
+            "pred_depth": get_rgbd_point_cloud(
+                viewpoint_trivial,
+                self.image_render,
+                self.depth_render,
+                # mask_crop,
+                torch.ones_like(self.depth_render),
+                # loss_mask_now,
+            )
+        }
+        if self.depth_map is not None:
+            _pcls["gt_depth"] = get_rgbd_point_cloud(
+                viewpoint_trivial,
+                self.image_rgb_masked,
+                self.depth_map,
+                # mask_crop,
+                torch.ones_like(self.depth_map),
+                # loss_mask_now,
+            )
+
+        _pcls = {pn: p for pn, p in _pcls.items() if int(p.num_points_per_cloud()) > 0}
+
+        plotlyplot = plot_scene(
+            {f"pcl{name_postfix}": _pcls},  # pyre-ignore
+            camera_scale=1.0,
+            pointcloud_max_points=10000,
+            pointcloud_marker_size=1,
+        )
+        viz.plotlyplot(
+            plotlyplot,
+            env=self.visdom_env,
+            win=f"pcl{name_postfix}",
+        )
+
+
+def eval_batch(
+    frame_data: FrameData,
+    implicitron_render: ImplicitronRender,
+    bg_color: Union[torch.Tensor, Sequence, str, float] = "black",
+    mask_thr: float = 0.5,
+    lpips_model=None,
+    visualize: bool = False,
+    visualize_visdom_env: str = "eval_debug",
+    break_after_visualising: bool = True,
+) -> Dict[str, Any]:
+    """
+    Produce performance metrics for a single batch of new-view synthesis
+    predictions.
+
+    Given a set of known views (for which frame_data.frame_type.endswith('known')
+    is True), a new-view synthesis method (NVS) is tasked to generate new views
+    of the scene from the viewpoint of the target views (for which
+    frame_data.frame_type.endswith('known') is False). The resulting
+    synthesized new views, stored in `implicitron_render`, are compared to the
+    target ground truth in `frame_data` in terms of geometry and appearance
+    resulting in a dictionary of metrics returned by the `eval_batch` function.
+
+    Args:
+        frame_data: A FrameData object containing the input to the new view
+            synthesis method.
+        implicitron_render: The data describing the synthesized new views.
+        bg_color: The background color of the generated new views and the
+            ground truth.
+        lpips_model: A pre-trained model for evaluating the LPIPS metric.
+        visualize: If True, visualizes the results to Visdom.
+
+    Returns:
+        results: A dictionary holding evaluation metrics.
+
+    Throws:
+        ValueError if frame_data does not have frame_type, camera, or image_rgb
+        ValueError if the batch has a mix of training and test samples
+        ValueError if the batch frames are not [unseen, known, known, ...]
+        ValueError if one of the required fields in implicitron_render is missing
+    """
+    frame_type = frame_data.frame_type
+    if frame_type is None:
+        raise ValueError("Frame type has not been set.")
+
+    # we check that all those fields are not None but Pyre can't infer that properly
+    # TODO: assign to local variables and simplify the code.
+    if frame_data.image_rgb is None:
+        raise ValueError("Image is not in the evaluation batch.")
+
+    if frame_data.camera is None:
+        raise ValueError("Camera is not in the evaluation batch.")
+
+    # eval all results in the resolution of the frame_data image
+    image_resol = tuple(frame_data.image_rgb.shape[2:])
+
+    # Post-process the render:
+    # 1) check implicitron_render for Nones,
+    # 2) obtain copies to make sure we dont edit the original data,
+    # 3) take only the 1st (target) image
+    # 4) resize to match ground-truth resolution
+    cloned_render: Dict[str, torch.Tensor] = {}
+    for k in ["mask_render", "image_render", "depth_render"]:
+        field = getattr(implicitron_render, k)
+        if field is None:
+            raise ValueError(f"A required predicted field {k} is missing")
+
+        imode = "bilinear" if k == "image_render" else "nearest"
+        cloned_render[k] = (
+            F.interpolate(field[:1], size=image_resol, mode=imode).detach().clone()
+        )
+
+    frame_data = copy.deepcopy(frame_data)
+
+    # mask the ground truth depth in case frame_data contains the depth mask
+    if frame_data.depth_map is not None and frame_data.depth_mask is not None:
+        frame_data.depth_map *= frame_data.depth_mask
+
+    if not isinstance(frame_type, list):  # not batch FrameData
+        frame_type = [frame_type]
+
+    is_train = is_train_frame(frame_type)
+    if len(is_train) > 1 and (is_train[1] != is_train[1:]).any():
+        raise ValueError(
+            "All (conditioning) frames in the eval batch have to be either train/test."
+        )
+
+    for k in [
+        "depth_map",
+        "image_rgb",
+        "fg_probability",
+        "mask_crop",
+    ]:
+        if not hasattr(frame_data, k) or getattr(frame_data, k) is None:
+            continue
+        setattr(frame_data, k, getattr(frame_data, k)[:1])
+
+    if frame_data.depth_map is None or frame_data.depth_map.sum() <= 0:
+        warnings.warn("Empty or missing depth map in evaluation!")
+
+    if frame_data.mask_crop is None:
+        warnings.warn("mask_crop is None, assuming the whole image is valid.")
+
+    if frame_data.fg_probability is None:
+        warnings.warn("fg_probability is None, assuming the whole image is fg.")
+
+    # threshold the masks to make ground truth binary masks
+    mask_fg = (
+        frame_data.fg_probability >= mask_thr
+        if frame_data.fg_probability is not None
+        # pyre-ignore [16]
+        else torch.ones_like(frame_data.image_rgb[:, :1, ...]).bool()
+    )
+
+    mask_crop = (
+        frame_data.mask_crop
+        if frame_data.mask_crop is not None
+        else torch.ones_like(mask_fg)
+    )
+
+    # unmasked g.t. image
+    image_rgb = frame_data.image_rgb
+
+    # fg-masked g.t. image
+    image_rgb_masked = mask_background(
+        # pyre-fixme[6]: Expected `Tensor` for 1st param but got
+        #  `Optional[torch.Tensor]`.
+        frame_data.image_rgb,
+        mask_fg,
+        bg_color=bg_color,
+    )
+
+    # clamp predicted images
+    image_render = cloned_render["image_render"].clamp(0.0, 1.0)
+
+    if visualize:
+        visualizer = _Visualizer(
+            image_render=image_render,
+            image_rgb_masked=image_rgb_masked,
+            depth_render=cloned_render["depth_render"],
+            depth_map=frame_data.depth_map,
+            depth_mask=frame_data.depth_mask[:1]
+            if frame_data.depth_mask is not None
+            else None,
+            visdom_env=visualize_visdom_env,
+        )
+
+    results: Dict[str, Any] = {}
+
+    results["iou"] = iou(
+        cloned_render["mask_render"],
+        mask_fg,
+        mask=mask_crop,
+    )
+
+    for loss_fg_mask, name_postfix in zip((mask_crop, mask_fg), ("_masked", "_fg")):
+
+        loss_mask_now = mask_crop * loss_fg_mask
+
+        for rgb_metric_name, rgb_metric_fun in zip(
+            ("psnr", "rgb_l1"), (calc_psnr, rgb_l1)
+        ):
+            metric_name = rgb_metric_name + name_postfix
+            results[metric_name] = rgb_metric_fun(
+                image_render,
+                image_rgb_masked,
+                mask=loss_mask_now,
+            )
+
+            if visualize:
+                visualizer.show_rgb(
+                    results[metric_name].item(), metric_name, loss_mask_now
+                )
+
+        if name_postfix == "_fg" and frame_data.depth_map is not None:
+            # only record depth metrics for the foreground
+            _, abs_ = eval_depth(
+                cloned_render["depth_render"],
+                # pyre-fixme[6]: For 2nd param expected `Tensor` but got
+                #  `Optional[Tensor]`.
+                frame_data.depth_map,
+                get_best_scale=True,
+                mask=loss_mask_now,
+                crop=5,
+            )
+            results["depth_abs" + name_postfix] = abs_.mean()
+
+            if visualize:
+                visualizer.show_depth(abs_.mean().item(), name_postfix, loss_mask_now)
+                if break_after_visualising:
+                    breakpoint()  # noqa: B601
+
+    # add the rgb metrics between the render and the unmasked image
+    for rgb_metric_name, rgb_metric_fun in zip(
+        ("psnr_full_image", "rgb_l1_full_image"), (calc_psnr, rgb_l1)
+    ):
+        results[rgb_metric_name] = rgb_metric_fun(
+            image_render,
+            # pyre-fixme[6]: For 2nd argument expected `Tensor` but got
+            #  `Optional[Tensor]`.
+            image_rgb,
+            mask=mask_crop,
+        )
+
+    if lpips_model is not None:
+        for gt_image_type in ("_full_image", "_masked"):
+            im1, im2 = [
+                2.0 * im.clamp(0.0, 1.0) - 1.0  # pyre-ignore[16]
+                for im in (
+                    image_rgb_masked if gt_image_type == "_masked" else image_rgb,
+                    cloned_render["image_render"],
+                )
+            ]
+            results["lpips" + gt_image_type] = lpips_model.forward(im1, im2).item()
+
+    # convert all metrics to floats
+    results = {k: float(v) for k, v in results.items()}
+
+    results["meta"] = {
+        # store the size of the batch (corresponds to n_src_views+1)
+        "batch_size": len(frame_type),
+        # store the type of the target frame
+        # pyre-fixme[16]: `None` has no attribute `__getitem__`.
+        "frame_type": str(frame_data.frame_type[0]),
+    }
+
+    return results
+
+
+def average_per_batch_results(
+    results_per_batch: List[Dict[str, Any]],
+    idx: Optional[torch.Tensor] = None,
+) -> dict:
+    """
+    Average a list of per-batch metrics `results_per_batch`.
+    Optionally, if `idx` is given, only a subset of the per-batch
+    metrics, indexed by `idx`, is averaged.
+    """
+    result_keys = list(results_per_batch[0].keys())
+    result_keys.remove("meta")
+    if idx is not None:
+        results_per_batch = [results_per_batch[i] for i in idx]
+    if len(results_per_batch) == 0:
+        return {k: float("NaN") for k in result_keys}
+    return {
+        k: float(np.array([r[k] for r in results_per_batch]).mean())
+        for k in result_keys
+    }
+
+
+def _reduce_camera_iou_overlap(ious: torch.Tensor, topk: int = 2) -> torch.Tensor:
+    """
+    Calculate the final camera difficulty by computing the average of the
+    ious of the two most similar cameras.
+
+    Returns:
+        single-element Tensor
+    """
+    return ious.topk(k=min(topk, len(ious) - 1)).values.mean()
+
+
+def _get_camera_difficulty_bin_edges(camera_difficulty_bin_breaks: Tuple[float, float]):
+    """
+    Get the edges of camera difficulty bins.
+    """
+    _eps = 1e-5
+    lower, upper = camera_difficulty_bin_breaks
+    diff_bin_edges = torch.tensor([0.0 - _eps, lower, upper, 1.0 + _eps]).float()
+    diff_bin_names = ["hard", "medium", "easy"]
+    return diff_bin_edges, diff_bin_names
+
+
+def summarize_nvs_eval_results(
+    per_batch_eval_results: List[Dict[str, Any]],
+    is_multisequence: bool,
+) -> Tuple[Dict[str, Any], Dict[str, Any]]:
+    """
+    Compile the per-batch evaluation results `per_batch_eval_results` into
+    a set of aggregate metrics. The produced metrics depend on is_multisequence.
+
+    Args:
+        per_batch_eval_results: Metrics of each per-batch evaluation.
+        is_multisequence: Whether to evaluate as a multisequence task
+        camera_difficulty_bin_breaks: edge hard-medium and medium-easy
+
+
+    Returns:
+        nvs_results_flat: A flattened dict of all aggregate metrics.
+        aux_out: A dictionary holding a set of auxiliary results.
+    """
+    n_batches = len(per_batch_eval_results)
+    eval_sets: List[Optional[str]] = []
+    eval_sets = [None]
+    if is_multisequence:
+        eval_sets = ["train", "test"]
+    batch_sizes = torch.tensor(
+        [r["meta"]["batch_size"] for r in per_batch_eval_results]
+    ).long()
+
+    is_train = is_train_frame([r["meta"]["frame_type"] for r in per_batch_eval_results])
+
+    # init the result database dict
+    results = []
+
+    # add per set averages
+    for SET in eval_sets:
+        if SET is None:
+            ok_set = torch.ones(n_batches, dtype=torch.bool)
+            set_name = "test"
+        else:
+            ok_set = is_train == int(SET == "train")
+            set_name = SET
+
+        # average over all results
+        bin_results = average_per_batch_results(
+            per_batch_eval_results, idx=torch.where(ok_set)[0]
+        )
+        results.append(
+            {
+                "subset": set_name,
+                "subsubset": "diff=all",
+                "metrics": bin_results,
+            }
+        )
+
+        if is_multisequence:
+            # split based on n_src_views
+            n_src_views = batch_sizes - 1
+            for n_src in EVAL_N_SRC_VIEWS:
+                ok_src = ok_set & (n_src_views == n_src)
+                n_src_results = average_per_batch_results(
+                    per_batch_eval_results,
+                    idx=torch.where(ok_src)[0],
+                )
+                results.append(
+                    {
+                        "subset": set_name,
+                        "subsubset": f"n_src={int(n_src)}",
+                        "metrics": n_src_results,
+                    }
+                )
+
+    aux_out = {"results": results}
+    return flatten_nvs_results(results), aux_out
+
+
+def _get_flat_nvs_metric_key(result, metric_name) -> str:
+    metric_key_postfix = f"|subset={result['subset']}|{result['subsubset']}"
+    metric_key = f"{metric_name}{metric_key_postfix}"
+    return metric_key
+
+
+def flatten_nvs_results(results) -> Dict[str, Any]:
+    """
+    Takes input `results` list of dicts of the form::
+
+        [
+            {
+                'subset':'train/test/...',
+                'subsubset': 'src=1/src=2/...',
+                'metrics': nvs_eval_metrics}
+            },
+            ...
+        ]
+
+    And converts to a flat dict as follows::
+
+        {
+            'subset=train/test/...|subsubset=src=1/src=2/...': nvs_eval_metrics,
+            ...
+        }
+    """
+    results_flat = {}
+    for result in results:
+        for metric_name, metric_val in result["metrics"].items():
+            metric_key = _get_flat_nvs_metric_key(result, metric_name)
+            assert metric_key not in results_flat
+            results_flat[metric_key] = metric_val
+    return results_flat
+
+
+def pretty_print_nvs_metrics(results) -> None:
+    subsets, subsubsets = [
+        _ordered_set([r[k] for r in results]) for k in ("subset", "subsubset")
+    ]
+    metrics = _ordered_set([metric for r in results for metric in r["metrics"]])
+
+    for subset in subsets:
+        tab = {}
+        for metric in metrics:
+            tab[metric] = []
+            header = ["metric"]
+            for subsubset in subsubsets:
+                metric_vals = [
+                    r["metrics"][metric]
+                    for r in results
+                    if r["subsubset"] == subsubset and r["subset"] == subset
+                ]
+                if len(metric_vals) > 0:
+                    tab[metric].extend(metric_vals)
+                    header.extend(subsubsets)
+
+        if any(len(v) > 0 for v in tab.values()):
+            print(f"===== NVS results; subset={subset} =====")
+            print(
+                tabulate(
+                    [[metric, *v] for metric, v in tab.items()],
+                    # pyre-fixme[61]: `header` is undefined, or not always defined.
+                    headers=header,
+                )
+            )
+
+
+def _ordered_set(list_):
+    return list(OrderedDict((i, 0) for i in list_).keys())
+
+
+def aggregate_nvs_results(task_results):
+    """
+    Aggregate nvs results.
+    For singlescene, this averages over all categories and scenes,
+    for multiscene, the average is over all per-category results.
+    """
+    task_results_cat = [r_ for r in task_results for r_ in r]
+    subsets, subsubsets = [
+        _ordered_set([r[k] for r in task_results_cat]) for k in ("subset", "subsubset")
+    ]
+    metrics = _ordered_set(
+        [metric for r in task_results_cat for metric in r["metrics"]]
+    )
+    average_results = []
+    for subset in subsets:
+        for subsubset in subsubsets:
+            metrics_lists = [
+                r["metrics"]
+                for r in task_results_cat
+                if r["subsubset"] == subsubset and r["subset"] == subset
+            ]
+            avg_metrics = {}
+            for metric in metrics:
+                avg_metrics[metric] = float(
+                    np.nanmean(
+                        np.array([metric_list[metric] for metric_list in metrics_lists])
+                    )
+                )
+            average_results.append(
+                {
+                    "subset": subset,
+                    "subsubset": subsubset,
+                    "metrics": avg_metrics,
+                }
+            )
+    return average_results
diff --git a/pytorch3d/pytorch3d/implicitron/evaluation/evaluator.py b/pytorch3d/pytorch3d/implicitron/evaluation/evaluator.py
new file mode 100644
index 0000000000000000000000000000000000000000..e869a6566bf3742655ef70b6343d15ae7407b8e9
--- /dev/null
+++ b/pytorch3d/pytorch3d/implicitron/evaluation/evaluator.py
@@ -0,0 +1,158 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import copy
+import json
+import logging
+import os
+import warnings
+from typing import Any, Dict, List, Optional, Tuple
+
+import torch
+
+import tqdm
+from pytorch3d.implicitron.evaluation import evaluate_new_view_synthesis as evaluate
+from pytorch3d.implicitron.models.base_model import EvaluationMode, ImplicitronModelBase
+from pytorch3d.implicitron.tools.config import (
+    registry,
+    ReplaceableBase,
+    run_auto_creation,
+)
+from torch.utils.data import DataLoader
+
+logger = logging.getLogger(__name__)
+
+
+class EvaluatorBase(ReplaceableBase):
+    """
+    Evaluate a trained model on given data. Returns a dict of loss/objective
+    names and their values.
+    """
+
+    is_multisequence: bool = False
+
+    def run(
+        self, model: ImplicitronModelBase, dataloader: DataLoader, **kwargs
+    ) -> Dict[str, Any]:
+        """
+        Evaluate the results of Implicitron training.
+        """
+        raise NotImplementedError()
+
+
+@registry.register
+class ImplicitronEvaluator(EvaluatorBase):
+    """
+    Evaluate the results of Implicitron training.
+    """
+
+    # UNUSED; preserved for compatibility purposes
+    camera_difficulty_bin_breaks: Tuple[float, ...] = 0.97, 0.98
+
+    def __post_init__(self):
+        run_auto_creation(self)
+
+    # pyre-fixme[14]: `run` overrides method defined in `EvaluatorBase` inconsistently.
+    def run(
+        self,
+        model: ImplicitronModelBase,
+        dataloader: DataLoader,
+        device: torch.device,
+        dump_to_json: bool = False,
+        exp_dir: Optional[str] = None,
+        epoch: Optional[int] = None,
+        **kwargs,
+    ) -> Dict[str, Any]:
+        """
+        Evaluate the results of Implicitron training. Optionally, dump results to
+        exp_dir/results_test.json.
+
+        Args:
+            model: A (trained) model to evaluate.
+            dataloader: A test dataloader.
+            device: A torch device.
+            dump_to_json: If True, will dump the results to a json file.
+            exp_dir: Root expeirment directory.
+            epoch: Evaluation epoch (to be stored in the results dict).
+
+        Returns:
+            A dictionary of results.
+        """
+        try:
+            import lpips
+
+            lpips_model = lpips.LPIPS(net="vgg")
+            lpips_model = lpips_model.to(device)
+        except ImportError:
+            warnings.warn(
+                "lpips library NOT FOUND. lpips losses will not be calculated"
+            )
+            lpips_model = None
+
+        model.eval()
+
+        per_batch_eval_results = []
+        logger.info("Evaluating model ...")
+        for frame_data in tqdm.tqdm(dataloader):
+            frame_data = frame_data.to(device)
+
+            # mask out the unknown images so that the model does not see them
+            frame_data_for_eval = _get_eval_frame_data(frame_data)
+
+            with torch.no_grad():
+                preds = model(
+                    **{
+                        **frame_data_for_eval,
+                        "evaluation_mode": EvaluationMode.EVALUATION,
+                    }
+                )
+                implicitron_render = copy.deepcopy(preds["implicitron_render"])
+                per_batch_eval_results.append(
+                    evaluate.eval_batch(
+                        frame_data,
+                        implicitron_render,
+                        bg_color="black",
+                        lpips_model=lpips_model,
+                    )
+                )
+
+        _, category_result = evaluate.summarize_nvs_eval_results(
+            per_batch_eval_results,
+            self.is_multisequence,
+        )
+
+        results = category_result["results"]
+        evaluate.pretty_print_nvs_metrics(results)
+        if dump_to_json:
+            _dump_to_json(epoch, exp_dir, results)
+
+        return category_result["results"]
+
+
+def _dump_to_json(
+    epoch: Optional[int], exp_dir: Optional[str], results: List[Dict[str, Any]]
+) -> None:
+    if epoch is not None:
+        for r in results:
+            r["eval_epoch"] = int(epoch)
+    logger.info("Evaluation results")
+
+    if exp_dir is None:
+        raise ValueError("Cannot save results to json without a specified save path.")
+    with open(os.path.join(exp_dir, "results_test.json"), "w") as f:
+        json.dump(results, f)
+
+
+def _get_eval_frame_data(frame_data: Any) -> Any:
+    """
+    Masks the target image data to make sure we cannot use it at model evaluation
+    time. Assumes the first batch element is target, the rest are source.
+    """
+    frame_data_for_eval = copy.deepcopy(frame_data)
+    for k in ("image_rgb", "depth_map", "fg_probability", "mask_crop"):
+        value = getattr(frame_data_for_eval, k)
+        value[0].zero_()
+    return frame_data_for_eval
diff --git a/pytorch3d/pytorch3d/implicitron/models/__init__.py b/pytorch3d/pytorch3d/implicitron/models/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..5a3ab83f3ae77e605eeb043fa4a29e246bdc1a91
--- /dev/null
+++ b/pytorch3d/pytorch3d/implicitron/models/__init__.py
@@ -0,0 +1,10 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# Allows to register the models
+# see: pytorch3d.implicitron.tools.config.registry:register
+from pytorch3d.implicitron.models.generic_model import GenericModel
+from pytorch3d.implicitron.models.overfit_model import OverfitModel
diff --git a/pytorch3d/pytorch3d/implicitron/models/base_model.py b/pytorch3d/pytorch3d/implicitron/models/base_model.py
new file mode 100644
index 0000000000000000000000000000000000000000..bd48bf7f8b6a59258375450e905a0e2db0a31601
--- /dev/null
+++ b/pytorch3d/pytorch3d/implicitron/models/base_model.py
@@ -0,0 +1,91 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from dataclasses import dataclass, field
+from typing import Any, Dict, List, Optional
+
+import torch
+
+from pytorch3d.implicitron.models.renderer.base import EvaluationMode
+from pytorch3d.implicitron.tools.config import ReplaceableBase
+from pytorch3d.renderer.cameras import CamerasBase
+
+
+@dataclass
+class ImplicitronRender:
+    """
+    Holds the tensors that describe a result of rendering.
+    """
+
+    depth_render: Optional[torch.Tensor] = None
+    image_render: Optional[torch.Tensor] = None
+    mask_render: Optional[torch.Tensor] = None
+    camera_distance: Optional[torch.Tensor] = None
+
+    def clone(self) -> "ImplicitronRender":
+        def safe_clone(t: Optional[torch.Tensor]) -> Optional[torch.Tensor]:
+            return t.detach().clone() if t is not None else None
+
+        return ImplicitronRender(
+            depth_render=safe_clone(self.depth_render),
+            image_render=safe_clone(self.image_render),
+            mask_render=safe_clone(self.mask_render),
+            camera_distance=safe_clone(self.camera_distance),
+        )
+
+
+class ImplicitronModelBase(ReplaceableBase, torch.nn.Module):
+    """
+    Replaceable abstract base for all image generation / rendering models.
+    `forward()` method produces a render with a depth map. Derives from Module
+    so we can rely on basic functionality provided to torch for model
+    optimization.
+    """
+
+    # The keys from `preds` (output of ImplicitronModelBase.forward) to be logged in
+    # the training loop.
+    log_vars: List[str] = field(default_factory=lambda: ["objective"])
+
+    def forward(
+        self,
+        *,  # force keyword-only arguments
+        image_rgb: Optional[torch.Tensor],
+        camera: CamerasBase,
+        fg_probability: Optional[torch.Tensor],
+        mask_crop: Optional[torch.Tensor],
+        depth_map: Optional[torch.Tensor],
+        sequence_name: Optional[List[str]],
+        evaluation_mode: EvaluationMode = EvaluationMode.EVALUATION,
+        **kwargs,
+    ) -> Dict[str, Any]:
+        """
+        Args:
+            image_rgb: A tensor of shape `(B, 3, H, W)` containing a batch of rgb images;
+                the first `min(B, n_train_target_views)` images are considered targets and
+                are used to supervise the renders; the rest corresponding to the source
+                viewpoints from which features will be extracted.
+            camera: An instance of CamerasBase containing a batch of `B` cameras corresponding
+                to the viewpoints of target images, from which the rays will be sampled,
+                and source images, which will be used for intersecting with target rays.
+            fg_probability: A tensor of shape `(B, 1, H, W)` containing a batch of
+                foreground masks.
+            mask_crop: A binary tensor of shape `(B, 1, H, W)` deonting valid
+                regions in the input images (i.e. regions that do not correspond
+                to, e.g., zero-padding). When the `RaySampler`'s sampling mode is set to
+                "mask_sample", rays  will be sampled in the non zero regions.
+            depth_map: A tensor of shape `(B, 1, H, W)` containing a batch of depth maps.
+            sequence_name: A list of `B` strings corresponding to the sequence names
+                from which images `image_rgb` were extracted. They are used to match
+                target frames with relevant source frames.
+            evaluation_mode: one of EvaluationMode.TRAINING or
+                EvaluationMode.EVALUATION which determines the settings used for
+                rendering.
+
+        Returns:
+            preds: A dictionary containing all outputs of the forward pass. All models should
+                output an instance of `ImplicitronRender` in `preds["implicitron_render"]`.
+        """
+        raise NotImplementedError()
diff --git a/pytorch3d/pytorch3d/implicitron/models/feature_extractor/__init__.py b/pytorch3d/pytorch3d/implicitron/models/feature_extractor/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..9141562c848ec7d21b0e4aeb953fe962b8d8a325
--- /dev/null
+++ b/pytorch3d/pytorch3d/implicitron/models/feature_extractor/__init__.py
@@ -0,0 +1,7 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from .feature_extractor import FeatureExtractorBase
diff --git a/pytorch3d/pytorch3d/implicitron/models/feature_extractor/feature_extractor.py b/pytorch3d/pytorch3d/implicitron/models/feature_extractor/feature_extractor.py
new file mode 100644
index 0000000000000000000000000000000000000000..9ce7f5e56b033293321b9cc73b31962bd49a249c
--- /dev/null
+++ b/pytorch3d/pytorch3d/implicitron/models/feature_extractor/feature_extractor.py
@@ -0,0 +1,41 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Any, Dict, Optional
+
+import torch
+from pytorch3d.implicitron.tools.config import ReplaceableBase
+
+
+class FeatureExtractorBase(ReplaceableBase, torch.nn.Module):
+    """
+    Base class for an extractor of a set of features from images.
+    """
+
+    def get_feat_dims(self) -> int:
+        """
+        Returns:
+            total number of feature dimensions of the output.
+            (i.e. sum_i(dim_i))
+        """
+        raise NotImplementedError
+
+    def forward(
+        self,
+        imgs: Optional[torch.Tensor],
+        masks: Optional[torch.Tensor] = None,
+        **kwargs,
+    ) -> Dict[Any, torch.Tensor]:
+        """
+        Args:
+            imgs: A batch of input images of shape `(B, 3, H, W)`.
+            masks: A batch of input masks of shape `(B, 3, H, W)`.
+
+        Returns:
+            out_feats: A dict `{f_i: t_i}` keyed by predicted feature names `f_i`
+                and their corresponding tensors `t_i` of shape `(B, dim_i, H_i, W_i)`.
+        """
+        raise NotImplementedError
diff --git a/pytorch3d/pytorch3d/implicitron/models/feature_extractor/resnet_feature_extractor.py b/pytorch3d/pytorch3d/implicitron/models/feature_extractor/resnet_feature_extractor.py
new file mode 100644
index 0000000000000000000000000000000000000000..ca7cefd0e84ffc6cb930ca8dd965f0454bcaf5e8
--- /dev/null
+++ b/pytorch3d/pytorch3d/implicitron/models/feature_extractor/resnet_feature_extractor.py
@@ -0,0 +1,212 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+import math
+from typing import Any, Dict, Optional, Tuple
+
+import torch
+import torch.nn.functional as Fu
+import torchvision
+from pytorch3d.implicitron.tools.config import registry
+
+from . import FeatureExtractorBase
+
+
+logger = logging.getLogger(__name__)
+
+MASK_FEATURE_NAME = "mask"
+IMAGE_FEATURE_NAME = "image"
+
+_FEAT_DIMS = {
+    "resnet18": (64, 128, 256, 512),
+    "resnet34": (64, 128, 256, 512),
+    "resnet50": (256, 512, 1024, 2048),
+    "resnet101": (256, 512, 1024, 2048),
+    "resnet152": (256, 512, 1024, 2048),
+}
+
+_RESNET_MEAN = [0.485, 0.456, 0.406]
+_RESNET_STD = [0.229, 0.224, 0.225]
+
+
+@registry.register
+class ResNetFeatureExtractor(FeatureExtractorBase):
+    """
+    Implements an image feature extractor. Depending on the settings allows
+    to extract:
+        - deep features: A CNN ResNet backbone from torchvision (with/without
+            pretrained weights) which extracts deep features.
+        - masks: Segmentation masks.
+        - images: Raw input RGB images.
+
+    Settings:
+        name: name of the resnet backbone (from torchvision)
+        pretrained: If true, will load the pretrained weights
+        stages: List of stages from which to extract features.
+            Features from each stage are returned as key value
+            pairs in the forward function
+        normalize_image: If set will normalize the RGB values of
+            the image based on the Resnet mean/std
+        image_rescale: If not 1.0, this rescale factor will be
+            used to resize the image
+        first_max_pool: If set, a max pool layer is added after the first
+            convolutional layer
+        proj_dim: The number of output channels for the convolutional layers
+        l2_norm: If set, l2 normalization is applied to the extracted features
+        add_masks: If set, the masks will be saved in the output dictionary
+        add_images: If set, the images will be saved in the output dictionary
+        global_average_pool: If set, global average pooling step is performed
+        feature_rescale: If not 1.0, this rescale factor will be used to
+            rescale the output features
+    """
+
+    name: str = "resnet34"
+    pretrained: bool = True
+    stages: Tuple[int, ...] = (1, 2, 3, 4)
+    normalize_image: bool = True
+    image_rescale: float = 128 / 800.0
+    first_max_pool: bool = True
+    proj_dim: int = 32
+    l2_norm: bool = True
+    add_masks: bool = True
+    add_images: bool = True
+    global_average_pool: bool = False  # this can simulate global/non-spacial features
+    feature_rescale: float = 1.0
+
+    def __post_init__(self):
+        if self.normalize_image:
+            # register buffers needed to normalize the image
+            for k, v in (("_resnet_mean", _RESNET_MEAN), ("_resnet_std", _RESNET_STD)):
+                self.register_buffer(
+                    k,
+                    torch.FloatTensor(v).view(1, 3, 1, 1),
+                    persistent=False,
+                )
+
+        self._feat_dim = {}
+
+        if len(self.stages) == 0:
+            # do not extract any resnet features
+            pass
+        else:
+            net = getattr(torchvision.models, self.name)(pretrained=self.pretrained)
+            if self.first_max_pool:
+                self.stem = torch.nn.Sequential(
+                    net.conv1, net.bn1, net.relu, net.maxpool
+                )
+            else:
+                self.stem = torch.nn.Sequential(net.conv1, net.bn1, net.relu)
+            self.max_stage = max(self.stages)
+            self.layers = torch.nn.ModuleList()
+            self.proj_layers = torch.nn.ModuleList()
+            for stage in range(self.max_stage):
+                stage_name = f"layer{stage+1}"
+                feature_name = self._get_resnet_stage_feature_name(stage)
+                if (stage + 1) in self.stages:
+                    if (
+                        self.proj_dim > 0
+                        and _FEAT_DIMS[self.name][stage] > self.proj_dim
+                    ):
+                        proj = torch.nn.Conv2d(
+                            _FEAT_DIMS[self.name][stage],
+                            self.proj_dim,
+                            1,
+                            1,
+                            bias=True,
+                        )
+                        self._feat_dim[feature_name] = self.proj_dim
+                    else:
+                        proj = torch.nn.Identity()
+                        self._feat_dim[feature_name] = _FEAT_DIMS[self.name][stage]
+                else:
+                    proj = torch.nn.Identity()
+                self.proj_layers.append(proj)
+                self.layers.append(getattr(net, stage_name))
+
+        if self.add_masks:
+            self._feat_dim[MASK_FEATURE_NAME] = 1
+
+        if self.add_images:
+            self._feat_dim[IMAGE_FEATURE_NAME] = 3
+
+        logger.info(f"Feat extractor total dim = {self.get_feat_dims()}")
+        self.stages = set(self.stages)  # convert to set for faster "in"
+
+    def _get_resnet_stage_feature_name(self, stage) -> str:
+        return f"res_layer_{stage+1}"
+
+    def _resnet_normalize_image(self, img: torch.Tensor) -> torch.Tensor:
+        return (img - self._resnet_mean) / self._resnet_std
+
+    def get_feat_dims(self) -> int:
+        return sum(self._feat_dim.values())
+
+    def forward(
+        self,
+        imgs: Optional[torch.Tensor],
+        masks: Optional[torch.Tensor] = None,
+        **kwargs,
+    ) -> Dict[Any, torch.Tensor]:
+        """
+        Args:
+            imgs: A batch of input images of shape `(B, 3, H, W)`.
+            masks: A batch of input masks of shape `(B, 3, H, W)`.
+
+        Returns:
+            out_feats: A dict `{f_i: t_i}` keyed by predicted feature names `f_i`
+                and their corresponding tensors `t_i` of shape `(B, dim_i, H_i, W_i)`.
+        """
+
+        out_feats = {}
+
+        imgs_input = imgs
+        if self.image_rescale != 1.0 and imgs_input is not None:
+            imgs_resized = Fu.interpolate(
+                imgs_input,
+                scale_factor=self.image_rescale,
+                mode="bilinear",
+            )
+        else:
+            imgs_resized = imgs_input
+
+        if len(self.stages) > 0:
+            assert imgs_resized is not None
+
+            if self.normalize_image:
+                imgs_normed = self._resnet_normalize_image(imgs_resized)
+            else:
+                imgs_normed = imgs_resized
+            #  is not a function.
+            feats = self.stem(imgs_normed)
+            for stage, (layer, proj) in enumerate(zip(self.layers, self.proj_layers)):
+                feats = layer(feats)
+                # just a sanity check below
+                assert feats.shape[1] == _FEAT_DIMS[self.name][stage]
+                if (stage + 1) in self.stages:
+                    f = proj(feats)
+                    if self.global_average_pool:
+                        f = f.mean(dims=(2, 3))
+                    if self.l2_norm:
+                        normfac = 1.0 / math.sqrt(len(self.stages))
+                        f = Fu.normalize(f, dim=1) * normfac
+                    feature_name = self._get_resnet_stage_feature_name(stage)
+                    out_feats[feature_name] = f
+
+        if self.add_masks:
+            assert masks is not None
+            out_feats[MASK_FEATURE_NAME] = masks
+
+        if self.add_images:
+            assert imgs_resized is not None
+            out_feats[IMAGE_FEATURE_NAME] = imgs_resized
+
+        if self.feature_rescale != 1.0:
+            out_feats = {k: self.feature_rescale * f for k, f in out_feats.items()}
+
+        # pyre-fixme[7]: Incompatible return type, expected `Dict[typing.Any, Tensor]`
+        # but got `Dict[typing.Any, float]`
+        return out_feats
diff --git a/pytorch3d/pytorch3d/implicitron/models/generic_model.py b/pytorch3d/pytorch3d/implicitron/models/generic_model.py
new file mode 100644
index 0000000000000000000000000000000000000000..7d319594a8db13f4a0c396998599ec374892b660
--- /dev/null
+++ b/pytorch3d/pytorch3d/implicitron/models/generic_model.py
@@ -0,0 +1,765 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+# Note: The #noqa comments below are for unused imports of pluggable implementations
+# which are part of implicitron. They ensure that the registry is prepopulated.
+
+import logging
+from dataclasses import field
+from typing import Any, Dict, List, Optional, Tuple, TYPE_CHECKING, Union
+
+import torch
+from omegaconf import DictConfig
+
+from pytorch3d.implicitron.models.base_model import (
+    ImplicitronModelBase,
+    ImplicitronRender,
+)
+from pytorch3d.implicitron.models.feature_extractor import FeatureExtractorBase
+from pytorch3d.implicitron.models.global_encoder.global_encoder import GlobalEncoderBase
+from pytorch3d.implicitron.models.implicit_function.base import ImplicitFunctionBase
+from pytorch3d.implicitron.models.metrics import (
+    RegularizationMetricsBase,
+    ViewMetricsBase,
+)
+
+from pytorch3d.implicitron.models.renderer.base import (
+    BaseRenderer,
+    EvaluationMode,
+    ImplicitFunctionWrapper,
+    ImplicitronRayBundle,
+    RendererOutput,
+    RenderSamplingMode,
+)
+from pytorch3d.implicitron.models.renderer.ray_sampler import RaySamplerBase
+
+from pytorch3d.implicitron.models.utils import (
+    apply_chunked,
+    chunk_generator,
+    log_loss_weights,
+    preprocess_input,
+    weighted_sum_losses,
+)
+from pytorch3d.implicitron.models.view_pooler.view_pooler import ViewPooler
+from pytorch3d.implicitron.tools import vis_utils
+from pytorch3d.implicitron.tools.config import (
+    expand_args_fields,
+    registry,
+    run_auto_creation,
+)
+
+from pytorch3d.implicitron.tools.rasterize_mc import rasterize_sparse_ray_bundle
+from pytorch3d.renderer import utils as rend_utils
+from pytorch3d.renderer.cameras import CamerasBase
+
+
+if TYPE_CHECKING:
+    from visdom import Visdom
+logger = logging.getLogger(__name__)
+
+
+@registry.register
+class GenericModel(ImplicitronModelBase):  # pyre-ignore: 13
+    """
+    GenericModel is a wrapper for the neural implicit
+    rendering and reconstruction pipeline which consists
+    of the following sequence of 7 steps (steps 2–4 are normally
+    skipped in overfitting scenario, since conditioning on source views
+    does not add much information; otherwise they should be present altogether):
+
+
+        (1) Ray Sampling
+        ------------------
+        Rays are sampled from an image grid based on the target view(s).
+                │_____________
+                │             │
+                │             ▼
+                │    (2) Feature Extraction (optional)
+                │    -----------------------
+                │    A feature extractor (e.g. a convolutional
+                │    neural net) is used to extract image features
+                │    from the source view(s).
+                │            │
+                │            ▼
+                │    (3) View Sampling  (optional)
+                │    ------------------
+                │    Image features are sampled at the 2D projections
+                │    of a set of 3D points along each of the sampled
+                │    target rays from (1).
+                │            │
+                │            ▼
+                │    (4) Feature Aggregation  (optional)
+                │    ------------------
+                │    Aggregate features and masks sampled from
+                │    image view(s) in (3).
+                │            │
+                │____________▼
+                │
+                ▼
+        (5) Implicit Function Evaluation
+        ------------------
+        Evaluate the implicit function(s) at the sampled ray points
+        (optionally pass in the aggregated image features from (4)).
+        (also optionally pass in a global encoding from global_encoder).
+                │
+                ▼
+        (6) Rendering
+        ------------------
+        Render the image into the target cameras by raymarching along
+        the sampled rays and aggregating the colors and densities
+        output by the implicit function in (5).
+                │
+                ▼
+        (7) Loss Computation
+        ------------------
+        Compute losses based on the predicted target image(s).
+
+
+    The `forward` function of GenericModel executes
+    this sequence of steps. Currently, steps 1, 3, 4, 5, 6
+    can be customized by intializing a subclass of the appropriate
+    baseclass and adding the newly created module to the registry.
+    Please see https://github.com/facebookresearch/pytorch3d/blob/main/projects/implicitron_trainer/README.md#custom-plugins
+    for more details on how to create and register a custom component.
+
+    In the config .yaml files for experiments, the parameters below are
+    contained in the
+    `model_factory_ImplicitronModelFactory_args.model_GenericModel_args`
+    node. As GenericModel derives from ReplaceableBase, the input arguments are
+    parsed by the run_auto_creation function to initialize the
+    necessary member modules. Please see implicitron_trainer/README.md
+    for more details on this process.
+
+    Args:
+        mask_images: Whether or not to mask the RGB image background given the
+            foreground mask (the `fg_probability` argument of `GenericModel.forward`)
+        mask_depths: Whether or not to mask the depth image background given the
+            foreground mask (the `fg_probability` argument of `GenericModel.forward`)
+        render_image_width: Width of the output image to render
+        render_image_height: Height of the output image to render
+        mask_threshold: If greater than 0.0, the foreground mask is
+            thresholded by this value before being applied to the RGB/Depth images
+        output_rasterized_mc: If True, visualize the Monte-Carlo pixel renders by
+            splatting onto an image grid. Default: False.
+        bg_color: RGB values for setting the background color of input image
+            if mask_images=True. Defaults to (0.0, 0.0, 0.0). Each renderer has its own
+            way to determine the background color of its output, unrelated to this.
+        num_passes: The specified implicit_function is initialized num_passes
+            times and run sequentially.
+        chunk_size_grid: The total number of points which can be rendered
+            per chunk. This is used to compute the number of rays used
+            per chunk when the chunked version of the renderer is used (in order
+            to fit rendering on all rays in memory)
+        render_features_dimensions: The number of output features to render.
+            Defaults to 3, corresponding to RGB images.
+        n_train_target_views: The number of cameras to render into at training
+            time; first `n_train_target_views` in the batch are considered targets,
+            the rest are sources.
+        sampling_mode_training: The sampling method to use during training. Must be
+            a value from the RenderSamplingMode Enum.
+        sampling_mode_evaluation: Same as above but for evaluation.
+        global_encoder_class_type: The name of the class to use for global_encoder,
+            which must be available in the registry. Or `None` to disable global encoder.
+        global_encoder: An instance of `GlobalEncoder`. This is used to generate an encoding
+            of the image (referred to as the global_code) that can be used to model aspects of
+            the scene such as multiple objects or morphing objects. It is up to the implicit
+            function definition how to use it, but the most typical way is to broadcast and
+            concatenate to the other inputs for the implicit function.
+        raysampler_class_type: The name of the raysampler class which is available
+            in the global registry.
+        raysampler: An instance of RaySampler which is used to emit
+            rays from the target view(s).
+        renderer_class_type: The name of the renderer class which is available in the global
+            registry.
+        renderer: A renderer class which inherits from BaseRenderer. This is used to
+            generate the images from the target view(s).
+        image_feature_extractor_class_type: If a str, constructs and enables
+            the `image_feature_extractor` object of this type. Or None if not needed.
+        image_feature_extractor: A module for extrating features from an input image.
+        view_pooler_enabled: If `True`, constructs and enables the `view_pooler` object.
+            This means features are sampled from the source image(s)
+            at the projected 2d locations of the sampled 3d ray points from the target
+            view(s), i.e. this activates step (3) above.
+        view_pooler: An instance of ViewPooler which is used for sampling of
+            image-based features at the 2D projections of a set
+            of 3D points and aggregating the sampled features.
+        implicit_function_class_type: The type of implicit function to use which
+            is available in the global registry.
+        implicit_function: An instance of ImplicitFunctionBase. The actual implicit functions
+            are initialised to be in self._implicit_functions.
+        view_metrics: An instance of ViewMetricsBase used to compute loss terms which
+            are independent of the model's parameters.
+        view_metrics_class_type: The type of view metrics to use, must be available in
+            the global registry.
+        regularization_metrics: An instance of RegularizationMetricsBase used to compute
+            regularization terms which can depend on the model's parameters.
+        regularization_metrics_class_type: The type of regularization metrics to use,
+            must be available in the global registry.
+        loss_weights: A dictionary with a {loss_name: weight} mapping; see documentation
+            for `ViewMetrics` class for available loss functions.
+        log_vars: A list of variable names which should be logged.
+            The names should correspond to a subset of the keys of the
+            dict `preds` output by the `forward` function.
+    """  # noqa: B950
+
+    mask_images: bool = True
+    mask_depths: bool = True
+    render_image_width: int = 400
+    render_image_height: int = 400
+    mask_threshold: float = 0.5
+    output_rasterized_mc: bool = False
+    bg_color: Tuple[float, float, float] = (0.0, 0.0, 0.0)
+    num_passes: int = 1
+    chunk_size_grid: int = 4096
+    render_features_dimensions: int = 3
+    tqdm_trigger_threshold: int = 16
+
+    n_train_target_views: int = 1
+    sampling_mode_training: str = "mask_sample"
+    sampling_mode_evaluation: str = "full_grid"
+
+    # ---- global encoder settings
+    global_encoder_class_type: Optional[str] = None
+    global_encoder: Optional[GlobalEncoderBase]
+
+    # ---- raysampler
+    raysampler_class_type: str = "AdaptiveRaySampler"
+    raysampler: RaySamplerBase
+
+    # ---- renderer configs
+    renderer_class_type: str = "MultiPassEmissionAbsorptionRenderer"
+    renderer: BaseRenderer
+
+    # ---- image feature extractor settings
+    # (This is only created if view_pooler is enabled)
+    image_feature_extractor: Optional[FeatureExtractorBase]
+    image_feature_extractor_class_type: Optional[str] = None
+    # ---- view pooler settings
+    view_pooler_enabled: bool = False
+    view_pooler: Optional[ViewPooler]
+
+    # ---- implicit function settings
+    implicit_function_class_type: str = "NeuralRadianceFieldImplicitFunction"
+    # This is just a model, never constructed.
+    # The actual implicit functions live in self._implicit_functions
+    implicit_function: ImplicitFunctionBase
+
+    # ----- metrics
+    view_metrics: ViewMetricsBase
+    view_metrics_class_type: str = "ViewMetrics"
+
+    regularization_metrics: RegularizationMetricsBase
+    regularization_metrics_class_type: str = "RegularizationMetrics"
+
+    # ---- loss weights
+    loss_weights: Dict[str, float] = field(
+        default_factory=lambda: {
+            "loss_rgb_mse": 1.0,
+            "loss_prev_stage_rgb_mse": 1.0,
+            "loss_mask_bce": 0.0,
+            "loss_prev_stage_mask_bce": 0.0,
+        }
+    )
+
+    # ---- variables to be logged (logger automatically ignores if not computed)
+    log_vars: List[str] = field(
+        default_factory=lambda: [
+            "loss_rgb_psnr_fg",
+            "loss_rgb_psnr",
+            "loss_rgb_mse",
+            "loss_rgb_huber",
+            "loss_depth_abs",
+            "loss_depth_abs_fg",
+            "loss_mask_neg_iou",
+            "loss_mask_bce",
+            "loss_mask_beta_prior",
+            "loss_eikonal",
+            "loss_density_tv",
+            "loss_depth_neg_penalty",
+            "loss_autodecoder_norm",
+            # metrics that are only logged in 2+stage renderes
+            "loss_prev_stage_rgb_mse",
+            "loss_prev_stage_rgb_psnr_fg",
+            "loss_prev_stage_rgb_psnr",
+            "loss_prev_stage_mask_bce",
+            # basic metrics
+            "objective",
+            "epoch",
+            "sec/it",
+        ]
+    )
+
+    @classmethod
+    def pre_expand(cls) -> None:
+        # use try/finally to bypass cinder's lazy imports
+        try:
+            from pytorch3d.implicitron.models.feature_extractor.resnet_feature_extractor import (  # noqa: F401, B950
+                ResNetFeatureExtractor,
+            )
+            from pytorch3d.implicitron.models.implicit_function.idr_feature_field import (  # noqa: F401, B950
+                IdrFeatureField,
+            )
+            from pytorch3d.implicitron.models.implicit_function.neural_radiance_field import (  # noqa: F401, B950
+                NeRFormerImplicitFunction,
+            )
+            from pytorch3d.implicitron.models.implicit_function.scene_representation_networks import (  # noqa: F401, B950
+                SRNHyperNetImplicitFunction,
+            )
+            from pytorch3d.implicitron.models.implicit_function.voxel_grid_implicit_function import (  # noqa: F401, B950
+                VoxelGridImplicitFunction,
+            )
+            from pytorch3d.implicitron.models.renderer.lstm_renderer import (  # noqa: F401
+                LSTMRenderer,
+            )
+            from pytorch3d.implicitron.models.renderer.multipass_ea import (  # noqa
+                MultiPassEmissionAbsorptionRenderer,
+            )
+            from pytorch3d.implicitron.models.renderer.sdf_renderer import (  # noqa: F401
+                SignedDistanceFunctionRenderer,
+            )
+        finally:
+            pass
+
+    def __post_init__(self):
+        if self.view_pooler_enabled:
+            if self.image_feature_extractor_class_type is None:
+                raise ValueError(
+                    "image_feature_extractor must be present for view pooling."
+                )
+        run_auto_creation(self)
+
+        self._implicit_functions = self._construct_implicit_functions()
+
+        log_loss_weights(self.loss_weights, logger)
+
+    def forward(
+        self,
+        *,  # force keyword-only arguments
+        image_rgb: Optional[torch.Tensor],
+        camera: CamerasBase,
+        fg_probability: Optional[torch.Tensor] = None,
+        mask_crop: Optional[torch.Tensor] = None,
+        depth_map: Optional[torch.Tensor] = None,
+        sequence_name: Optional[List[str]] = None,
+        frame_timestamp: Optional[torch.Tensor] = None,
+        evaluation_mode: EvaluationMode = EvaluationMode.EVALUATION,
+        **kwargs,
+    ) -> Dict[str, Any]:
+        """
+        Args:
+            image_rgb: A tensor of shape `(B, 3, H, W)` containing a batch of rgb images;
+                the first `min(B, n_train_target_views)` images are considered targets and
+                are used to supervise the renders; the rest corresponding to the source
+                viewpoints from which features will be extracted.
+            camera: An instance of CamerasBase containing a batch of `B` cameras corresponding
+                to the viewpoints of target images, from which the rays will be sampled,
+                and source images, which will be used for intersecting with target rays.
+            fg_probability: A tensor of shape `(B, 1, H, W)` containing a batch of
+                foreground masks.
+            mask_crop: A binary tensor of shape `(B, 1, H, W)` denoting valid
+                regions in the input images (i.e. regions that do not correspond
+                to, e.g., zero-padding). When the `RaySampler`'s sampling mode is set to
+                "mask_sample", rays  will be sampled in the non zero regions.
+            depth_map: A tensor of shape `(B, 1, H, W)` containing a batch of depth maps.
+            sequence_name: A list of `B` strings corresponding to the sequence names
+                from which images `image_rgb` were extracted. They are used to match
+                target frames with relevant source frames.
+            frame_timestamp: Optionally a tensor of shape `(B,)` containing a batch
+                of frame timestamps.
+            evaluation_mode: one of EvaluationMode.TRAINING or
+                EvaluationMode.EVALUATION which determines the settings used for
+                rendering.
+
+        Returns:
+            preds: A dictionary containing all outputs of the forward pass including the
+                rendered images, depths, masks, losses and other metrics.
+        """
+        image_rgb, fg_probability, depth_map = preprocess_input(
+            image_rgb,
+            fg_probability,
+            depth_map,
+            self.mask_images,
+            self.mask_depths,
+            self.mask_threshold,
+            self.bg_color,
+        )
+
+        # Obtain the batch size from the camera as this is the only required input.
+        batch_size = camera.R.shape[0]
+
+        # Determine the number of target views, i.e. cameras we render into.
+        n_targets = (
+            1
+            if evaluation_mode == EvaluationMode.EVALUATION
+            else batch_size
+            if self.n_train_target_views <= 0
+            else min(self.n_train_target_views, batch_size)
+        )
+
+        # A helper function for selecting n_target first elements from the input
+        # where the latter can be None.
+        def safe_slice_targets(
+            tensor: Optional[Union[torch.Tensor, List[str]]],
+        ) -> Optional[Union[torch.Tensor, List[str]]]:
+            return None if tensor is None else tensor[:n_targets]
+
+        # Select the target cameras.
+        target_cameras = camera[list(range(n_targets))]
+
+        # Determine the used ray sampling mode.
+        sampling_mode = RenderSamplingMode(
+            self.sampling_mode_training
+            if evaluation_mode == EvaluationMode.TRAINING
+            else self.sampling_mode_evaluation
+        )
+
+        # (1) Sample rendering rays with the ray sampler.
+        # pyre-ignore[29]
+        ray_bundle: ImplicitronRayBundle = self.raysampler(
+            target_cameras,
+            evaluation_mode,
+            mask=mask_crop[:n_targets]
+            if mask_crop is not None and sampling_mode == RenderSamplingMode.MASK_SAMPLE
+            else None,
+        )
+
+        # custom_args hold additional arguments to the implicit function.
+        custom_args = {}
+
+        if self.image_feature_extractor is not None:
+            # (2) Extract features for the image
+            img_feats = self.image_feature_extractor(image_rgb, fg_probability)
+        else:
+            img_feats = None
+
+        if self.view_pooler_enabled:
+            if sequence_name is None:
+                raise ValueError("sequence_name must be provided for view pooling")
+            assert img_feats is not None
+
+            # (3-4) Sample features and masks at the ray points.
+            #       Aggregate features from multiple views.
+            def curried_viewpooler(pts):
+                return self.view_pooler(
+                    pts=pts,
+                    seq_id_pts=sequence_name[:n_targets],
+                    camera=camera,
+                    seq_id_camera=sequence_name,
+                    feats=img_feats,
+                    masks=mask_crop,
+                )
+
+            custom_args["fun_viewpool"] = curried_viewpooler
+
+        global_code = None
+        if self.global_encoder is not None:
+            global_code = self.global_encoder(  # pyre-fixme[29]
+                sequence_name=safe_slice_targets(sequence_name),
+                frame_timestamp=safe_slice_targets(frame_timestamp),
+            )
+        custom_args["global_code"] = global_code
+
+        for func in self._implicit_functions:
+            func.bind_args(**custom_args)
+
+        inputs_to_be_chunked = {}
+        if fg_probability is not None and self.renderer.requires_object_mask():
+            sampled_fb_prob = rend_utils.ndc_grid_sample(
+                fg_probability[:n_targets], ray_bundle.xys, mode="nearest"
+            )
+            inputs_to_be_chunked["object_mask"] = sampled_fb_prob > 0.5
+
+        # (5)-(6) Implicit function evaluation and Rendering
+        rendered = self._render(
+            ray_bundle=ray_bundle,
+            sampling_mode=sampling_mode,
+            evaluation_mode=evaluation_mode,
+            implicit_functions=self._implicit_functions,
+            inputs_to_be_chunked=inputs_to_be_chunked,
+        )
+
+        # Unbind the custom arguments to prevent pytorch from storing
+        # large buffers of intermediate results due to points in the
+        # bound arguments.
+        for func in self._implicit_functions:
+            func.unbind_args()
+
+        # A dict to store losses as well as rendering results.
+        preds: Dict[str, Any] = {}
+
+        preds.update(
+            self.view_metrics(
+                results=preds,
+                raymarched=rendered,
+                ray_bundle=ray_bundle,
+                image_rgb=safe_slice_targets(image_rgb),
+                depth_map=safe_slice_targets(depth_map),
+                fg_probability=safe_slice_targets(fg_probability),
+                mask_crop=safe_slice_targets(mask_crop),
+            )
+        )
+
+        preds.update(
+            self.regularization_metrics(
+                results=preds,
+                model=self,
+            )
+        )
+
+        if sampling_mode == RenderSamplingMode.MASK_SAMPLE:
+            if self.output_rasterized_mc:
+                # Visualize the monte-carlo pixel renders by splatting onto
+                # an image grid.
+                (
+                    preds["images_render"],
+                    preds["depths_render"],
+                    preds["masks_render"],
+                ) = rasterize_sparse_ray_bundle(
+                    ray_bundle,
+                    rendered.features,
+                    (self.render_image_height, self.render_image_width),
+                    rendered.depths,
+                    masks=rendered.masks,
+                )
+        elif sampling_mode == RenderSamplingMode.FULL_GRID:
+            preds["images_render"] = rendered.features.permute(0, 3, 1, 2)
+            preds["depths_render"] = rendered.depths.permute(0, 3, 1, 2)
+            preds["masks_render"] = rendered.masks.permute(0, 3, 1, 2)
+
+            preds["implicitron_render"] = ImplicitronRender(
+                image_render=preds["images_render"],
+                depth_render=preds["depths_render"],
+                mask_render=preds["masks_render"],
+            )
+        else:
+            raise AssertionError("Unreachable state")
+
+        # (7) Compute losses
+        objective = self._get_objective(preds)
+        if objective is not None:
+            preds["objective"] = objective
+
+        return preds
+
+    def _get_objective(self, preds: Dict[str, torch.Tensor]) -> Optional[torch.Tensor]:
+        """
+        A helper function to compute the overall loss as the dot product
+        of individual loss functions with the corresponding weights.
+        """
+        return weighted_sum_losses(preds, self.loss_weights)
+
+    def visualize(
+        self,
+        viz: Optional["Visdom"],
+        visdom_env_imgs: str,
+        preds: Dict[str, Any],
+        prefix: str,
+    ) -> None:
+        """
+        Helper function to visualize the predictions generated
+        in the forward pass.
+
+        Args:
+            viz: Visdom connection object
+            visdom_env_imgs: name of visdom environment for the images.
+            preds: predictions dict like returned by forward()
+            prefix: prepended to the names of images
+        """
+        if viz is None or not viz.check_connection():
+            logger.info("no visdom server! -> skipping batch vis")
+            return
+
+        idx_image = 0
+        title = f"{prefix}_im{idx_image}"
+
+        vis_utils.visualize_basics(viz, preds, visdom_env_imgs, title=title)
+
+    def _render(
+        self,
+        *,
+        ray_bundle: ImplicitronRayBundle,
+        inputs_to_be_chunked: Dict[str, torch.Tensor],
+        sampling_mode: RenderSamplingMode,
+        **kwargs,
+    ) -> RendererOutput:
+        """
+        Args:
+            ray_bundle: A `ImplicitronRayBundle` object containing the parametrizations of the
+                sampled rendering rays.
+            inputs_to_be_chunked: A collection of tensor of shape `(B, _, H, W)`. E.g.
+                SignedDistanceFunctionRenderer requires "object_mask", shape
+                (B, 1, H, W), the silhouette of the object in the image. When
+                chunking, they are passed to the renderer as shape
+                `(B, _, chunksize)`.
+            sampling_mode: The sampling method to use. Must be a value from the
+                RenderSamplingMode Enum.
+
+        Returns:
+            An instance of RendererOutput
+        """
+        if sampling_mode == RenderSamplingMode.FULL_GRID and self.chunk_size_grid > 0:
+            return apply_chunked(
+                self.renderer,
+                chunk_generator(
+                    self.chunk_size_grid,
+                    ray_bundle,
+                    inputs_to_be_chunked,
+                    self.tqdm_trigger_threshold,
+                    **kwargs,
+                ),
+                lambda batch: torch.cat(batch, dim=1).reshape(
+                    *ray_bundle.lengths.shape[:-1], -1
+                ),
+            )
+        else:
+            # pyre-fixme[29]: `BaseRenderer` is not a function.
+            return self.renderer(
+                ray_bundle=ray_bundle,
+                **inputs_to_be_chunked,
+                **kwargs,
+            )
+
+    def _get_viewpooled_feature_dim(self) -> int:
+        if self.view_pooler is None:
+            return 0
+        assert self.image_feature_extractor is not None
+        return self.view_pooler.get_aggregated_feature_dim(
+            self.image_feature_extractor.get_feat_dims()
+        )
+
+    @classmethod
+    def raysampler_tweak_args(cls, type, args: DictConfig) -> None:
+        """
+        We don't expose certain fields of the raysampler because we want to set
+        them from our own members.
+        """
+        del args["sampling_mode_training"]
+        del args["sampling_mode_evaluation"]
+        del args["image_width"]
+        del args["image_height"]
+
+    def create_raysampler(self):
+        extra_args = {
+            "sampling_mode_training": self.sampling_mode_training,
+            "sampling_mode_evaluation": self.sampling_mode_evaluation,
+            "image_width": self.render_image_width,
+            "image_height": self.render_image_height,
+        }
+        raysampler_args = getattr(
+            self, "raysampler_" + self.raysampler_class_type + "_args"
+        )
+        self.raysampler = registry.get(RaySamplerBase, self.raysampler_class_type)(
+            **raysampler_args, **extra_args
+        )
+
+    @classmethod
+    def renderer_tweak_args(cls, type, args: DictConfig) -> None:
+        """
+        We don't expose certain fields of the renderer because we want to set
+        them based on other inputs.
+        """
+        args.pop("render_features_dimensions", None)
+        args.pop("object_bounding_sphere", None)
+
+    def create_renderer(self):
+        extra_args = {}
+
+        if self.renderer_class_type == "SignedDistanceFunctionRenderer":
+            extra_args["render_features_dimensions"] = self.render_features_dimensions
+            if not hasattr(self.raysampler, "scene_extent"):
+                raise ValueError(
+                    "SignedDistanceFunctionRenderer requires"
+                    + " a raysampler that defines the 'scene_extent' field"
+                    + " (this field is supported by, e.g., the adaptive raysampler - "
+                    + " self.raysampler_class_type='AdaptiveRaySampler')."
+                )
+            extra_args["object_bounding_sphere"] = self.raysampler.scene_extent
+
+        renderer_args = getattr(self, "renderer_" + self.renderer_class_type + "_args")
+        self.renderer = registry.get(BaseRenderer, self.renderer_class_type)(
+            **renderer_args, **extra_args
+        )
+
+    def create_implicit_function(self) -> None:
+        """
+        No-op called by run_auto_creation so that self.implicit_function
+        does not get created. __post_init__ creates the implicit function(s)
+        in wrappers explicitly in self._implicit_functions.
+        """
+        pass
+
+    @classmethod
+    def implicit_function_tweak_args(cls, type, args: DictConfig) -> None:
+        """
+        We don't expose certain implicit_function fields because we want to set
+        them based on other inputs.
+        """
+        args.pop("feature_vector_size", None)
+        args.pop("encoding_dim", None)
+        args.pop("latent_dim", None)
+        args.pop("latent_dim_hypernet", None)
+        args.pop("color_dim", None)
+
+    def _construct_implicit_functions(self):
+        """
+        After run_auto_creation has been called, the arguments
+        for each of the possible implicit function methods are
+        available. `GenericModel` arguments are first validated
+        based on the custom requirements for each specific
+        implicit function method. Then the required implicit
+        function(s) are initialized.
+        """
+        extra_args = {}
+        global_encoder_dim = (
+            0 if self.global_encoder is None else self.global_encoder.get_encoding_dim()
+        )
+        viewpooled_feature_dim = self._get_viewpooled_feature_dim()
+
+        if self.implicit_function_class_type in (
+            "NeuralRadianceFieldImplicitFunction",
+            "NeRFormerImplicitFunction",
+        ):
+            extra_args["latent_dim"] = viewpooled_feature_dim + global_encoder_dim
+            extra_args["color_dim"] = self.render_features_dimensions
+
+        if self.implicit_function_class_type == "IdrFeatureField":
+            extra_args["feature_vector_size"] = self.render_features_dimensions
+            extra_args["encoding_dim"] = global_encoder_dim
+
+        if self.implicit_function_class_type == "SRNImplicitFunction":
+            extra_args["latent_dim"] = viewpooled_feature_dim + global_encoder_dim
+
+        # srn_hypernet preprocessing
+        if self.implicit_function_class_type == "SRNHyperNetImplicitFunction":
+            extra_args["latent_dim"] = viewpooled_feature_dim
+            extra_args["latent_dim_hypernet"] = global_encoder_dim
+
+        # check that for srn, srn_hypernet, idr we have self.num_passes=1
+        implicit_function_type = registry.get(
+            ImplicitFunctionBase, self.implicit_function_class_type
+        )
+        expand_args_fields(implicit_function_type)
+        if self.num_passes != 1 and not implicit_function_type.allows_multiple_passes():
+            raise ValueError(
+                self.implicit_function_class_type
+                + f"requires num_passes=1 not {self.num_passes}"
+            )
+
+        if implicit_function_type.requires_pooling_without_aggregation():
+            if self.view_pooler_enabled and self.view_pooler.has_aggregation():
+                raise ValueError(
+                    "The chosen implicit function requires view pooling without aggregation."
+                )
+        config_name = f"implicit_function_{self.implicit_function_class_type}_args"
+        config = getattr(self, config_name, None)
+        if config is None:
+            raise ValueError(f"{config_name} not present")
+        implicit_functions_list = [
+            ImplicitFunctionWrapper(implicit_function_type(**config, **extra_args))
+            for _ in range(self.num_passes)
+        ]
+        return torch.nn.ModuleList(implicit_functions_list)
diff --git a/pytorch3d/pytorch3d/implicitron/models/global_encoder/__init__.py b/pytorch3d/pytorch3d/implicitron/models/global_encoder/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..2e41cd717f6a439a9c08d76a9d0e4a54e190fc5a
--- /dev/null
+++ b/pytorch3d/pytorch3d/implicitron/models/global_encoder/__init__.py
@@ -0,0 +1,5 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
diff --git a/pytorch3d/pytorch3d/implicitron/models/global_encoder/autodecoder.py b/pytorch3d/pytorch3d/implicitron/models/global_encoder/autodecoder.py
new file mode 100644
index 0000000000000000000000000000000000000000..764785bc1f3f17735b119deb65c162cdab6fea61
--- /dev/null
+++ b/pytorch3d/pytorch3d/implicitron/models/global_encoder/autodecoder.py
@@ -0,0 +1,161 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import warnings
+from collections import defaultdict
+from typing import Dict, List, Optional, Union
+
+import torch
+from pytorch3d.implicitron.tools.config import Configurable
+
+
+class Autodecoder(Configurable, torch.nn.Module):
+    """
+    Autodecoder which maps a list of integer or string keys to optimizable embeddings.
+
+    Settings:
+        encoding_dim: Embedding dimension for the decoder.
+        n_instances: The maximum number of instances stored by the autodecoder.
+        init_scale: Scale factor for the initial autodecoder weights.
+        ignore_input: If `True`, optimizes a single code for any input.
+    """
+
+    encoding_dim: int = 0
+    n_instances: int = 1
+    init_scale: float = 1.0
+    ignore_input: bool = False
+
+    def __post_init__(self):
+        if self.n_instances <= 0:
+            raise ValueError(f"Invalid n_instances {self.n_instances}")
+
+        self._autodecoder_codes = torch.nn.Embedding(
+            self.n_instances,
+            self.encoding_dim,
+            scale_grad_by_freq=True,
+        )
+        with torch.no_grad():
+            # weight has been initialised from Normal(0, 1)
+            self._autodecoder_codes.weight *= self.init_scale
+
+        self._key_map = self._build_key_map()
+        # Make sure to register hooks for correct handling of saving/loading
+        # the module's _key_map.
+        self._register_load_state_dict_pre_hook(self._load_key_map_hook)
+        self._register_state_dict_hook(_save_key_map_hook)
+
+    def _build_key_map(
+        self, key_map_dict: Optional[Dict[str, int]] = None
+    ) -> Dict[str, int]:
+        """
+        Args:
+            key_map_dict: A dictionary used to initialize the key_map.
+
+        Returns:
+            key_map: a dictionary of key: id pairs.
+        """
+        # increments the counter when asked for a new value
+        key_map = defaultdict(iter(range(self.n_instances)).__next__)
+        if key_map_dict is not None:
+            # Assign all keys from the loaded key_map_dict to self._key_map.
+            # Since this is done in the original order, it should generate
+            # the same set of key:id pairs. We check this with an assert to be sure.
+            for x, x_id in key_map_dict.items():
+                x_id_ = key_map[x]
+                assert x_id == x_id_
+        return key_map
+
+    def calculate_squared_encoding_norm(self) -> Optional[torch.Tensor]:
+        return (self._autodecoder_codes.weight**2).mean()
+
+    def get_encoding_dim(self) -> int:
+        return self.encoding_dim
+
+    def forward(self, x: Union[torch.LongTensor, List[str]]) -> Optional[torch.Tensor]:
+        """
+        Args:
+            x: A batch of `N` identifiers. Either a long tensor of size
+            `(N,)` keys in [0, n_instances), or a list of `N` string keys that
+            are hashed to codes (without collisions).
+
+        Returns:
+            codes: A tensor of shape `(N, self.encoding_dim)` containing the
+                key-specific autodecoder codes.
+        """
+        if self.ignore_input:
+            x = ["singleton"]
+
+        if isinstance(x[0], str):
+            try:
+                # pyre-fixme[9]: x has type `Union[List[str], LongTensor]`; used as
+                #  `Tensor`.
+                x = torch.tensor(
+                    [self._key_map[elem] for elem in x],
+                    dtype=torch.long,
+                    device=next(self.parameters()).device,
+                )
+            except StopIteration:
+                raise ValueError("Not enough n_instances in the autodecoder") from None
+
+        return self._autodecoder_codes(x)
+
+    def _load_key_map_hook(
+        self,
+        state_dict,
+        prefix,
+        local_metadata,
+        strict,
+        missing_keys,
+        unexpected_keys,
+        error_msgs,
+    ):
+        """
+        Args:
+            state_dict (dict): a dict containing parameters and
+                persistent buffers.
+            prefix (str): the prefix for parameters and buffers used in this
+                module
+            local_metadata (dict): a dict containing the metadata for this module.
+            strict (bool): whether to strictly enforce that the keys in
+                :attr:`state_dict` with :attr:`prefix` match the names of
+                parameters and buffers in this module
+            missing_keys (list of str): if ``strict=True``, add missing keys to
+                this list
+            unexpected_keys (list of str): if ``strict=True``, add unexpected
+                keys to this list
+            error_msgs (list of str): error messages should be added to this
+                list, and will be reported together in
+                :meth:`~torch.nn.Module.load_state_dict`
+
+        Returns:
+            Constructed key_map if it exists in the state_dict
+            else raises a warning only.
+        """
+        key_map_key = prefix + "_key_map"
+        if key_map_key in state_dict:
+            key_map_dict = state_dict.pop(key_map_key)
+            self._key_map = self._build_key_map(key_map_dict=key_map_dict)
+        else:
+            warnings.warn("No key map in Autodecoder state dict!")
+
+
+def _save_key_map_hook(
+    self,
+    state_dict,
+    prefix,
+    local_metadata,
+) -> None:
+    """
+    Args:
+        state_dict (dict): a dict containing parameters and
+            persistent buffers.
+        prefix (str): the prefix for parameters and buffers used in this
+            module
+        local_metadata (dict): a dict containing the metadata for this module.
+    """
+    key_map_key = prefix + "_key_map"
+    key_map_dict = dict(self._key_map.items())
+    state_dict[key_map_key] = key_map_dict
diff --git a/pytorch3d/pytorch3d/implicitron/models/global_encoder/global_encoder.py b/pytorch3d/pytorch3d/implicitron/models/global_encoder/global_encoder.py
new file mode 100644
index 0000000000000000000000000000000000000000..bb7defac308a6d7497a23b2b2d93cd713ceee6f5
--- /dev/null
+++ b/pytorch3d/pytorch3d/implicitron/models/global_encoder/global_encoder.py
@@ -0,0 +1,125 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import List, Optional, Union
+
+import torch
+from pytorch3d.implicitron.tools.config import (
+    registry,
+    ReplaceableBase,
+    run_auto_creation,
+)
+from pytorch3d.renderer.implicit import HarmonicEmbedding
+
+from .autodecoder import Autodecoder
+
+
+class GlobalEncoderBase(ReplaceableBase):
+    """
+    A base class for implementing encoders of global frame-specific quantities.
+
+    The latter includes e.g. the harmonic encoding of a frame timestamp
+    (`HarmonicTimeEncoder`), or an autodecoder encoding of the frame's sequence
+    (`SequenceAutodecoder`).
+    """
+
+    def get_encoding_dim(self):
+        """
+        Returns the dimensionality of the returned encoding.
+        """
+        raise NotImplementedError()
+
+    def calculate_squared_encoding_norm(self) -> Optional[torch.Tensor]:
+        """
+        Calculates the squared norm of the encoding to report as the
+        `autodecoder_norm` loss of the model, as a zero dimensional tensor.
+        """
+        raise NotImplementedError()
+
+    def forward(
+        self,
+        *,
+        frame_timestamp: Optional[torch.Tensor] = None,
+        sequence_name: Optional[Union[torch.LongTensor, List[str]]] = None,
+        **kwargs,
+    ) -> torch.Tensor:
+        """
+        Given a set of inputs to encode, generates a tensor containing the encoding.
+
+        Returns:
+            encoding: The tensor containing the global encoding.
+        """
+        raise NotImplementedError()
+
+
+# TODO: probabilistic embeddings?
+@registry.register
+class SequenceAutodecoder(GlobalEncoderBase, torch.nn.Module):  # pyre-ignore: 13
+    """
+    A global encoder implementation which provides an autodecoder encoding
+    of the frame's sequence identifier.
+    """
+
+    autodecoder: Autodecoder
+
+    def __post_init__(self):
+        run_auto_creation(self)
+
+    def get_encoding_dim(self):
+        return self.autodecoder.get_encoding_dim()
+
+    def forward(
+        self,
+        *,
+        frame_timestamp: Optional[torch.Tensor] = None,
+        sequence_name: Optional[Union[torch.LongTensor, List[str]]] = None,
+        **kwargs,
+    ) -> torch.Tensor:
+        if sequence_name is None:
+            raise ValueError("sequence_name must be provided.")
+        # run dtype checks and pass sequence_name to self.autodecoder
+        return self.autodecoder(sequence_name)
+
+    def calculate_squared_encoding_norm(self) -> Optional[torch.Tensor]:
+        return self.autodecoder.calculate_squared_encoding_norm()
+
+
+@registry.register
+class HarmonicTimeEncoder(GlobalEncoderBase, torch.nn.Module):
+    """
+    A global encoder implementation which provides harmonic embeddings
+    of each frame's timestamp.
+    """
+
+    n_harmonic_functions: int = 10
+    append_input: bool = True
+    time_divisor: float = 1.0
+
+    def __post_init__(self):
+        self._harmonic_embedding = HarmonicEmbedding(
+            n_harmonic_functions=self.n_harmonic_functions,
+            append_input=self.append_input,
+        )
+
+    def get_encoding_dim(self):
+        return self._harmonic_embedding.get_output_dim(1)
+
+    def forward(
+        self,
+        *,
+        frame_timestamp: Optional[torch.Tensor] = None,
+        sequence_name: Optional[Union[torch.LongTensor, List[str]]] = None,
+        **kwargs,
+    ) -> torch.Tensor:
+        if frame_timestamp is None:
+            raise ValueError("frame_timestamp must be provided.")
+        if frame_timestamp.shape[-1] != 1:
+            raise ValueError("Frame timestamp's last dimensions should be one.")
+        time = frame_timestamp / self.time_divisor
+        return self._harmonic_embedding(time)
+
+    def calculate_squared_encoding_norm(self) -> Optional[torch.Tensor]:
+        return None
diff --git a/pytorch3d/pytorch3d/implicitron/models/implicit_function/__init__.py b/pytorch3d/pytorch3d/implicitron/models/implicit_function/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..2e41cd717f6a439a9c08d76a9d0e4a54e190fc5a
--- /dev/null
+++ b/pytorch3d/pytorch3d/implicitron/models/implicit_function/__init__.py
@@ -0,0 +1,5 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
diff --git a/pytorch3d/pytorch3d/implicitron/models/implicit_function/base.py b/pytorch3d/pytorch3d/implicitron/models/implicit_function/base.py
new file mode 100644
index 0000000000000000000000000000000000000000..7cd67edeb4dcd0597605b3b77a8759bebb44ab74
--- /dev/null
+++ b/pytorch3d/pytorch3d/implicitron/models/implicit_function/base.py
@@ -0,0 +1,49 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from abc import ABC, abstractmethod
+from typing import Optional
+
+from pytorch3d.implicitron.models.renderer.base import ImplicitronRayBundle
+
+from pytorch3d.implicitron.tools.config import ReplaceableBase
+from pytorch3d.renderer.cameras import CamerasBase
+
+
+class ImplicitFunctionBase(ABC, ReplaceableBase):
+    @abstractmethod
+    def forward(
+        self,
+        *,
+        ray_bundle: ImplicitronRayBundle,
+        fun_viewpool=None,
+        camera: Optional[CamerasBase] = None,
+        global_code=None,
+        **kwargs,
+    ):
+        raise NotImplementedError()
+
+    @staticmethod
+    def allows_multiple_passes() -> bool:
+        """
+        Returns True if this implicit function allows
+        multiple passes.
+        """
+        return False
+
+    @staticmethod
+    def requires_pooling_without_aggregation() -> bool:
+        """
+        Returns True if this implicit function needs
+        pooling without aggregation.
+        """
+        return False
+
+    def on_bind_args(self) -> None:
+        """
+        Called when the custom args are fixed in the main model forward pass.
+        """
+        pass
diff --git a/pytorch3d/pytorch3d/implicitron/models/implicit_function/decoding_functions.py b/pytorch3d/pytorch3d/implicitron/models/implicit_function/decoding_functions.py
new file mode 100644
index 0000000000000000000000000000000000000000..e7b3dadfc2b1d18cb1935825e8f69014b7b5e419
--- /dev/null
+++ b/pytorch3d/pytorch3d/implicitron/models/implicit_function/decoding_functions.py
@@ -0,0 +1,489 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""
+This file contains
+    - modules which get used by ImplicitFunction objects for decoding an embedding defined in
+        space, e.g. to color or opacity.
+    - DecoderFunctionBase and its subclasses, which wrap some of those modules, providing
+        some such modules as an extension point which an ImplicitFunction object could use.
+"""
+
+import logging
+from dataclasses import field
+
+from enum import Enum
+from typing import Dict, Optional, Tuple
+
+import torch
+
+from omegaconf import DictConfig
+
+from pytorch3d.implicitron.tools.config import (
+    Configurable,
+    registry,
+    ReplaceableBase,
+    run_auto_creation,
+)
+
+logger = logging.getLogger(__name__)
+
+
+class DecoderActivation(Enum):
+    RELU = "relu"
+    SOFTPLUS = "softplus"
+    SIGMOID = "sigmoid"
+    IDENTITY = "identity"
+
+
+class DecoderFunctionBase(ReplaceableBase, torch.nn.Module):
+    """
+    Decoding function is a torch.nn.Module which takes the embedding of a location in
+    space and transforms it into the required quantity (for example density and color).
+    """
+
+    def forward(
+        self, features: torch.Tensor, z: Optional[torch.Tensor] = None
+    ) -> torch.Tensor:
+        """
+        Args:
+            features (torch.Tensor): tensor of shape (batch, ..., num_in_features)
+            z: optional tensor to append to parts of the decoding function
+        Returns:
+            decoded_features (torch.Tensor) : tensor of
+                shape (batch, ..., num_out_features)
+        """
+        raise NotImplementedError()
+
+
+@registry.register
+class ElementwiseDecoder(DecoderFunctionBase):
+    """
+    Decoding function which scales the input, adds shift and then applies
+    `relu`, `softplus`, `sigmoid` or nothing on its input:
+    `result = operation(input * scale + shift)`
+
+    Members:
+        scale: a scalar with which input is multiplied before being shifted.
+            Defaults to 1.
+        shift: a scalar which is added to the scaled input before performing
+            the operation. Defaults to 0.
+        operation: which operation to perform on the transformed input. Options are:
+            `RELU`, `SOFTPLUS`, `SIGMOID` or `IDENTITY`. Defaults to `IDENTITY`.
+    """
+
+    scale: float = 1
+    shift: float = 0
+    operation: DecoderActivation = DecoderActivation.IDENTITY
+
+    def __post_init__(self):
+        if self.operation not in [
+            DecoderActivation.RELU,
+            DecoderActivation.SOFTPLUS,
+            DecoderActivation.SIGMOID,
+            DecoderActivation.IDENTITY,
+        ]:
+            raise ValueError(
+                "`operation` can only be `RELU`, `SOFTPLUS`, `SIGMOID` or `IDENTITY`."
+            )
+
+    def forward(
+        self, features: torch.Tensor, z: Optional[torch.Tensor] = None
+    ) -> torch.Tensor:
+        transfomed_input = features * self.scale + self.shift
+        if self.operation == DecoderActivation.SOFTPLUS:
+            return torch.nn.functional.softplus(transfomed_input)
+        if self.operation == DecoderActivation.RELU:
+            return torch.nn.functional.relu(transfomed_input)
+        if self.operation == DecoderActivation.SIGMOID:
+            return torch.nn.functional.sigmoid(transfomed_input)
+        return transfomed_input
+
+
+class MLPWithInputSkips(Configurable, torch.nn.Module):
+    """
+    Implements the multi-layer perceptron architecture of the Neural Radiance Field.
+
+    As such, `MLPWithInputSkips` is a multi layer perceptron consisting
+    of a sequence of linear layers with ReLU activations.
+
+    Additionally, for a set of predefined layers `input_skips`, the forward pass
+    appends a skip tensor `z` to the output of the preceding layer.
+
+    Note that this follows the architecture described in the Supplementary
+    Material (Fig. 7) of [1], for which keep the defaults for:
+        - `last_layer_bias_init` to None
+        - `last_activation` to "relu"
+        - `use_xavier_init` to `true`
+
+    If you want to use this as a part of the color prediction in TensoRF model set:
+        - `last_layer_bias_init` to 0
+        - `last_activation` to "sigmoid"
+        - `use_xavier_init` to `False`
+
+    References:
+        [1] Ben Mildenhall and Pratul P. Srinivasan and Matthew Tancik
+            and Jonathan T. Barron and Ravi Ramamoorthi and Ren Ng:
+            NeRF: Representing Scenes as Neural Radiance Fields for View
+            Synthesis, ECCV2020
+
+    Members:
+        n_layers: The number of linear layers of the MLP.
+        input_dim: The number of channels of the input tensor.
+        output_dim: The number of channels of the output.
+        skip_dim: The number of channels of the tensor `z` appended when
+            evaluating the skip layers.
+        hidden_dim: The number of hidden units of the MLP.
+        input_skips: The list of layer indices at which we append the skip
+            tensor `z`.
+        last_layer_bias_init: If set then all the biases in the last layer
+            are initialized to that value.
+        last_activation: Which activation to use in the last layer. Options are:
+            "relu", "softplus", "sigmoid" and "identity". Default is "relu".
+        use_xavier_init: If True uses xavier init for all linear layer weights.
+            Otherwise the default PyTorch initialization is used. Default True.
+    """
+
+    n_layers: int = 8
+    input_dim: int = 39
+    output_dim: int = 256
+    skip_dim: int = 39
+    hidden_dim: int = 256
+    input_skips: Tuple[int, ...] = (5,)
+    skip_affine_trans: bool = False
+    last_layer_bias_init: Optional[float] = None
+    last_activation: DecoderActivation = DecoderActivation.RELU
+    use_xavier_init: bool = True
+
+    def __post_init__(self):
+        try:
+            last_activation = {
+                DecoderActivation.RELU: torch.nn.ReLU(True),
+                DecoderActivation.SOFTPLUS: torch.nn.Softplus(),
+                DecoderActivation.SIGMOID: torch.nn.Sigmoid(),
+                DecoderActivation.IDENTITY: torch.nn.Identity(),
+            }[self.last_activation]
+        except KeyError as e:
+            raise ValueError(
+                "`last_activation` can only be `RELU`,"
+                " `SOFTPLUS`, `SIGMOID` or `IDENTITY`."
+            ) from e
+
+        layers = []
+        skip_affine_layers = []
+        for layeri in range(self.n_layers):
+            dimin = self.hidden_dim if layeri > 0 else self.input_dim
+            dimout = self.hidden_dim if layeri + 1 < self.n_layers else self.output_dim
+
+            if layeri > 0 and layeri in self.input_skips:
+                if self.skip_affine_trans:
+                    skip_affine_layers.append(
+                        self._make_affine_layer(self.skip_dim, self.hidden_dim)
+                    )
+                else:
+                    dimin = self.hidden_dim + self.skip_dim
+
+            linear = torch.nn.Linear(dimin, dimout)
+            if self.use_xavier_init:
+                _xavier_init(linear)
+            if layeri == self.n_layers - 1 and self.last_layer_bias_init is not None:
+                torch.nn.init.constant_(linear.bias, self.last_layer_bias_init)
+            layers.append(
+                torch.nn.Sequential(linear, torch.nn.ReLU(True))
+                if not layeri + 1 < self.n_layers
+                else torch.nn.Sequential(linear, last_activation)
+            )
+        self.mlp = torch.nn.ModuleList(layers)
+        if self.skip_affine_trans:
+            self.skip_affines = torch.nn.ModuleList(skip_affine_layers)
+        self._input_skips = set(self.input_skips)
+        self._skip_affine_trans = self.skip_affine_trans
+
+    def _make_affine_layer(self, input_dim, hidden_dim):
+        l1 = torch.nn.Linear(input_dim, hidden_dim * 2)
+        l2 = torch.nn.Linear(hidden_dim * 2, hidden_dim * 2)
+        if self.use_xavier_init:
+            _xavier_init(l1)
+            _xavier_init(l2)
+        return torch.nn.Sequential(l1, torch.nn.ReLU(True), l2)
+
+    def _apply_affine_layer(self, layer, x, z):
+        mu_log_std = layer(z)
+        mu, log_std = mu_log_std.split(mu_log_std.shape[-1] // 2, dim=-1)
+        std = torch.nn.functional.softplus(log_std)
+        return (x - mu) * std
+
+    def forward(self, x: torch.Tensor, z: Optional[torch.Tensor] = None):
+        """
+        Args:
+            x: The input tensor of shape `(..., input_dim)`.
+            z: The input skip tensor of shape `(..., skip_dim)` which is appended
+                to layers whose indices are specified by `input_skips`.
+        Returns:
+            y: The output tensor of shape `(..., output_dim)`.
+        """
+        y = x
+        if z is None:
+            # if the skip tensor is None, we use `x` instead.
+            z = x
+        skipi = 0
+        for li, layer in enumerate(self.mlp):
+            if li in self._input_skips:
+                if self._skip_affine_trans:
+                    y = self._apply_affine_layer(self.skip_affines[skipi], y, z)
+                else:
+                    y = torch.cat((y, z), dim=-1)
+                skipi += 1
+            y = layer(y)
+        return y
+
+
+@registry.register
+# pyre-fixme[13]: Attribute `network` is never initialized.
+class MLPDecoder(DecoderFunctionBase):
+    """
+    Decoding function which uses `MLPWithIputSkips` to convert the embedding to output.
+    The `input_dim` of the `network` is set from the value of `input_dim` member.
+
+    Members:
+        input_dim: dimension of input.
+        param_groups: dictionary where keys are names of individual parameters
+            or module members and values are the parameter group where the
+            parameter/member will be sorted to. "self" key is used to denote the
+            parameter group at the module level. Possible keys, including the "self" key
+            do not have to be defined. By default all parameters are put into "default"
+            parameter group and have the learning rate defined in the optimizer,
+            it can be overridden at the:
+                - module level with “self” key, all the parameters and child
+                    module's parameters will be put to that parameter group
+                - member level, which is the same as if the `param_groups` in that
+                    member has key=“self” and value equal to that parameter group.
+                    This is useful if members do not have `param_groups`, for
+                    example torch.nn.Linear.
+                - parameter level, parameter with the same name as the key
+                    will be put to that parameter group.
+        network_args: configuration for MLPWithInputSkips
+    """
+
+    input_dim: int = 3
+    param_groups: Dict[str, str] = field(default_factory=lambda: {})
+    network: MLPWithInputSkips
+
+    def __post_init__(self):
+        run_auto_creation(self)
+
+    def forward(
+        self, features: torch.Tensor, z: Optional[torch.Tensor] = None
+    ) -> torch.Tensor:
+        return self.network(features, z)
+
+    @classmethod
+    def network_tweak_args(cls, type, args: DictConfig) -> None:
+        """
+        Special method to stop get_default_args exposing member's `input_dim`.
+        """
+        args.pop("input_dim", None)
+
+    def create_network_impl(self, type, args: DictConfig) -> None:
+        """
+        Set the input dimension of the `network` to the input dimension of the
+        decoding function.
+        """
+        self.network = MLPWithInputSkips(input_dim=self.input_dim, **args)
+
+
+class TransformerWithInputSkips(torch.nn.Module):
+    def __init__(
+        self,
+        n_layers: int = 8,
+        input_dim: int = 39,
+        output_dim: int = 256,
+        skip_dim: int = 39,
+        hidden_dim: int = 64,
+        input_skips: Tuple[int, ...] = (5,),
+        dim_down_factor: float = 1,
+    ):
+        """
+        Args:
+            n_layers: The number of linear layers of the MLP.
+            input_dim: The number of channels of the input tensor.
+            output_dim: The number of channels of the output.
+            skip_dim: The number of channels of the tensor `z` appended when
+                evaluating the skip layers.
+            hidden_dim: The number of hidden units of the MLP.
+            input_skips: The list of layer indices at which we append the skip
+                tensor `z`.
+        """
+        super().__init__()
+
+        self.first = torch.nn.Linear(input_dim, hidden_dim)
+        _xavier_init(self.first)
+
+        self.skip_linear = torch.nn.ModuleList()
+
+        layers_pool, layers_ray = [], []
+        dimout = 0
+        for layeri in range(n_layers):
+            dimin = int(round(hidden_dim / (dim_down_factor**layeri)))
+            dimout = int(round(hidden_dim / (dim_down_factor ** (layeri + 1))))
+            logger.info(f"Tr: {dimin} -> {dimout}")
+            for _i, l in enumerate((layers_pool, layers_ray)):
+                l.append(
+                    TransformerEncoderLayer(
+                        d_model=[dimin, dimout][_i],
+                        nhead=4,
+                        dim_feedforward=hidden_dim,
+                        dropout=0.0,
+                        d_model_out=dimout,
+                    )
+                )
+
+            if layeri in input_skips:
+                self.skip_linear.append(torch.nn.Linear(input_dim, dimin))
+
+        self.last = torch.nn.Linear(dimout, output_dim)
+        _xavier_init(self.last)
+
+        # pyre-fixme[8]: Attribute has type `Tuple[ModuleList, ModuleList]`; used as
+        #  `ModuleList`.
+        self.layers_pool, self.layers_ray = (
+            torch.nn.ModuleList(layers_pool),
+            torch.nn.ModuleList(layers_ray),
+        )
+        self._input_skips = set(input_skips)
+
+    def forward(
+        self,
+        x: torch.Tensor,
+        z: Optional[torch.Tensor] = None,
+    ):
+        """
+        Args:
+            x: The input tensor of shape
+                `(minibatch, n_pooled_feats, ..., n_ray_pts, input_dim)`.
+            z: The input skip tensor of shape
+                `(minibatch, n_pooled_feats, ..., n_ray_pts, skip_dim)`
+                which is appended to layers whose indices are specified by `input_skips`.
+        Returns:
+            y: The output tensor of shape
+                `(minibatch, 1, ..., n_ray_pts, input_dim)`.
+        """
+
+        if z is None:
+            # if the skip tensor is None, we use `x` instead.
+            z = x
+
+        y = self.first(x)
+
+        B, n_pool, n_rays, n_pts, dim = y.shape
+
+        # y_p in n_pool, n_pts, B x n_rays x dim
+        y_p = y.permute(1, 3, 0, 2, 4)
+
+        skipi = 0
+        dimh = dim
+        for li, (layer_pool, layer_ray) in enumerate(
+            zip(self.layers_pool, self.layers_ray)
+        ):
+            y_pool_attn = y_p.reshape(n_pool, n_pts * B * n_rays, dimh)
+            if li in self._input_skips:
+                z_skip = self.skip_linear[skipi](z)
+                y_pool_attn = y_pool_attn + z_skip.permute(1, 3, 0, 2, 4).reshape(
+                    n_pool, n_pts * B * n_rays, dimh
+                )
+                skipi += 1
+            # n_pool x B*n_rays*n_pts x dim
+            y_pool_attn, pool_attn = layer_pool(y_pool_attn, src_key_padding_mask=None)
+            dimh = y_pool_attn.shape[-1]
+
+            y_ray_attn = (
+                y_pool_attn.view(n_pool, n_pts, B * n_rays, dimh)
+                .permute(1, 0, 2, 3)
+                .reshape(n_pts, n_pool * B * n_rays, dimh)
+            )
+            # n_pts x n_pool*B*n_rays x dim
+            y_ray_attn, ray_attn = layer_ray(
+                y_ray_attn,
+                src_key_padding_mask=None,
+            )
+
+            y_p = y_ray_attn.view(n_pts, n_pool, B * n_rays, dimh).permute(1, 0, 2, 3)
+
+        y = y_p.view(n_pool, n_pts, B, n_rays, dimh).permute(2, 0, 3, 1, 4)
+
+        W = torch.softmax(y[..., :1], dim=1)
+        y = (y * W).sum(dim=1)
+        y = self.last(y)
+
+        return y
+
+
+class TransformerEncoderLayer(torch.nn.Module):
+    r"""TransformerEncoderLayer is made up of self-attn and feedforward network.
+    This standard encoder layer is based on the paper "Attention Is All You Need".
+    Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez,
+    Lukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. In Advances in
+    Neural Information Processing Systems, pages 6000-6010. Users may modify or implement
+    in a different way during application.
+
+    Args:
+        d_model: the number of expected features in the input (required).
+        nhead: the number of heads in the multiheadattention models (required).
+        dim_feedforward: the dimension of the feedforward network model (default=2048).
+        dropout: the dropout value (default=0.1).
+        activation: the activation function of intermediate layer, relu or gelu (default=relu).
+
+    Examples::
+        >>> encoder_layer = nn.TransformerEncoderLayer(d_model=512, nhead=8)
+        >>> src = torch.rand(10, 32, 512)
+        >>> out = encoder_layer(src)
+    """
+
+    def __init__(
+        self, d_model, nhead, dim_feedforward=2048, dropout=0.1, d_model_out=-1
+    ):
+        super(TransformerEncoderLayer, self).__init__()
+        self.self_attn = torch.nn.MultiheadAttention(d_model, nhead, dropout=dropout)
+        # Implementation of Feedforward model
+        self.linear1 = torch.nn.Linear(d_model, dim_feedforward)
+        self.dropout = torch.nn.Dropout(dropout)
+        d_model_out = d_model if d_model_out <= 0 else d_model_out
+        self.linear2 = torch.nn.Linear(dim_feedforward, d_model_out)
+        self.norm1 = torch.nn.LayerNorm(d_model)
+        self.norm2 = torch.nn.LayerNorm(d_model_out)
+        self.dropout1 = torch.nn.Dropout(dropout)
+        self.dropout2 = torch.nn.Dropout(dropout)
+
+        self.activation = torch.nn.functional.relu
+
+    def forward(self, src, src_mask=None, src_key_padding_mask=None):
+        r"""Pass the input through the encoder layer.
+
+        Args:
+            src: the sequence to the encoder layer (required).
+            src_mask: the mask for the src sequence (optional).
+            src_key_padding_mask: the mask for the src keys per batch (optional).
+
+        Shape:
+            see the docs in Transformer class.
+        """
+        src2, attn = self.self_attn(
+            src, src, src, attn_mask=src_mask, key_padding_mask=src_key_padding_mask
+        )
+        src = src + self.dropout1(src2)
+        src = self.norm1(src)
+        src2 = self.linear2(self.dropout(self.activation(self.linear1(src))))
+        d_out = src2.shape[-1]
+        src = src[..., :d_out] + self.dropout2(src2)[..., :d_out]
+        src = self.norm2(src)
+        return src, attn
+
+
+def _xavier_init(linear) -> None:
+    """
+    Performs the Xavier weight initialization of the linear layer `linear`.
+    """
+    torch.nn.init.xavier_uniform_(linear.weight.data)
diff --git a/pytorch3d/pytorch3d/implicitron/models/implicit_function/idr_feature_field.py b/pytorch3d/pytorch3d/implicitron/models/implicit_function/idr_feature_field.py
new file mode 100644
index 0000000000000000000000000000000000000000..cb70c957b3cf3f2dd77653c5fbf46a222525e85c
--- /dev/null
+++ b/pytorch3d/pytorch3d/implicitron/models/implicit_function/idr_feature_field.py
@@ -0,0 +1,176 @@
+# @lint-ignore-every LICENSELINT
+# Adapted from https://github.com/lioryariv/idr/blob/main/code/model/
+#              implicit_differentiable_renderer.py
+# Copyright (c) 2020 Lior Yariv
+import math
+from typing import Optional, Tuple
+
+import torch
+from pytorch3d.implicitron.models.renderer.base import ImplicitronRayBundle
+from pytorch3d.implicitron.tools.config import registry
+from pytorch3d.renderer.implicit import HarmonicEmbedding
+
+from torch import nn
+
+from .base import ImplicitFunctionBase
+from .utils import get_rays_points_world
+
+
+@registry.register
+class IdrFeatureField(ImplicitFunctionBase, torch.nn.Module):
+    """
+    Implicit function as used in http://github.com/lioryariv/idr.
+
+    Members:
+        d_in: dimension of the input point.
+        n_harmonic_functions_xyz: If -1, do not embed the point.
+            If >=0, use a harmonic embedding with this number of
+            harmonic functions. (The harmonic embedding includes the input
+            itself, so a value of 0 means the point is used but without
+            any harmonic functions.)
+        d_out and feature_vector_size: Sum of these is the output
+            dimension. This implicit function thus returns a concatenation
+            of `d_out` signed distance function values and `feature_vector_size`
+            features (such as colors). When used in `GenericModel`,
+            `feature_vector_size` corresponds is automatically set to
+            `render_features_dimensions`.
+        dims: list of hidden layer sizes.
+        geometric_init: whether to use custom weight initialization
+            in linear layers. If False, pytorch default (uniform sampling)
+            is used.
+        bias: if geometric_init=True, initial value for bias subtracted
+            in the last layer.
+        skip_in: List of indices of layers that receive as input the initial
+            value concatenated with the output of the previous layers.
+        weight_norm: whether to apply weight normalization to each layer.
+        pooled_feature_dim: If view pooling is in use (provided as
+            fun_viewpool to forward()) this must be its number of features.
+            Otherwise this must be set to 0. (If used from GenericModel,
+            this config value will be overridden automatically.)
+        encoding_dim: If global coding is in use (provided as global_code
+            to forward()) this must be its number of featuress.
+            Otherwise this must be set to 0. (If used from GenericModel,
+            this config value will be overridden automatically.)
+    """
+
+    feature_vector_size: int = 3
+    d_in: int = 3
+    d_out: int = 1
+    dims: Tuple[int, ...] = (512, 512, 512, 512, 512, 512, 512, 512)
+    geometric_init: bool = True
+    bias: float = 1.0
+    skip_in: Tuple[int, ...] = ()
+    weight_norm: bool = True
+    n_harmonic_functions_xyz: int = 0
+    pooled_feature_dim: int = 0
+    encoding_dim: int = 0
+
+    def __post_init__(self):
+        dims = [self.d_in] + list(self.dims) + [self.d_out + self.feature_vector_size]
+
+        self.embed_fn = None
+        if self.n_harmonic_functions_xyz >= 0:
+            self.embed_fn = HarmonicEmbedding(
+                self.n_harmonic_functions_xyz, append_input=True
+            )
+            dims[0] = self.embed_fn.get_output_dim()
+        if self.pooled_feature_dim > 0:
+            dims[0] += self.pooled_feature_dim
+        if self.encoding_dim > 0:
+            dims[0] += self.encoding_dim
+
+        self.num_layers = len(dims)
+
+        out_dim = 0
+        layers = []
+        for layer_idx in range(self.num_layers - 1):
+            if layer_idx + 1 in self.skip_in:
+                out_dim = dims[layer_idx + 1] - dims[0]
+            else:
+                out_dim = dims[layer_idx + 1]
+
+            lin = nn.Linear(dims[layer_idx], out_dim)
+
+            if self.geometric_init:
+                if layer_idx == self.num_layers - 2:
+                    torch.nn.init.normal_(
+                        lin.weight,
+                        mean=math.pi**0.5 / dims[layer_idx] ** 0.5,
+                        std=0.0001,
+                    )
+                    torch.nn.init.constant_(lin.bias, -self.bias)
+                elif self.n_harmonic_functions_xyz >= 0 and layer_idx == 0:
+                    torch.nn.init.constant_(lin.bias, 0.0)
+                    torch.nn.init.constant_(lin.weight[:, 3:], 0.0)
+                    torch.nn.init.normal_(
+                        lin.weight[:, :3], 0.0, 2**0.5 / out_dim**0.5
+                    )
+                elif self.n_harmonic_functions_xyz >= 0 and layer_idx in self.skip_in:
+                    torch.nn.init.constant_(lin.bias, 0.0)
+                    torch.nn.init.normal_(lin.weight, 0.0, 2**0.5 / out_dim**0.5)
+                    torch.nn.init.constant_(lin.weight[:, -(dims[0] - 3) :], 0.0)
+                else:
+                    torch.nn.init.constant_(lin.bias, 0.0)
+                    torch.nn.init.normal_(lin.weight, 0.0, 2**0.5 / out_dim**0.5)
+
+            if self.weight_norm:
+                lin = nn.utils.weight_norm(lin)
+
+            layers.append(lin)
+
+        self.linear_layers = torch.nn.ModuleList(layers)
+        self.out_dim = out_dim
+        self.softplus = nn.Softplus(beta=100)
+
+    # pyre-fixme[14]: `forward` overrides method defined in `ImplicitFunctionBase`
+    #  inconsistently.
+    def forward(
+        self,
+        *,
+        ray_bundle: Optional[ImplicitronRayBundle] = None,
+        rays_points_world: Optional[torch.Tensor] = None,
+        fun_viewpool=None,
+        global_code=None,
+        **kwargs,
+    ):
+        # this field only uses point locations
+        # rays_points_world.shape = [minibatch x ... x pts_per_ray x 3]
+        rays_points_world = get_rays_points_world(ray_bundle, rays_points_world)
+
+        if rays_points_world.numel() == 0 or (
+            self.embed_fn is None and fun_viewpool is None and global_code is None
+        ):
+            return torch.tensor(
+                [], device=rays_points_world.device, dtype=rays_points_world.dtype
+            ).view(0, self.out_dim)
+
+        embeddings = []
+        if self.embed_fn is not None:
+            embeddings.append(self.embed_fn(rays_points_world))
+
+        if fun_viewpool is not None:
+            assert rays_points_world.ndim == 2
+            pooled_feature = fun_viewpool(rays_points_world[None])
+            # TODO: pooled features are 4D!
+            embeddings.append(pooled_feature)
+
+        if global_code is not None:
+            assert global_code.shape[0] == 1  # TODO: generalize to batches!
+            # This will require changing raytracer code
+            # embedding = embedding[None].expand(global_code.shape[0], *embedding.shape)
+            embeddings.append(
+                global_code[0, None, :].expand(rays_points_world.shape[0], -1)
+            )
+
+        embedding = torch.cat(embeddings, dim=-1)
+        x = embedding
+        for layer_idx in range(self.num_layers - 1):
+            if layer_idx in self.skip_in:
+                x = torch.cat([x, embedding], dim=-1) / 2**0.5
+
+            x = self.linear_layers[layer_idx](x)
+
+            if layer_idx < self.num_layers - 2:
+                x = self.softplus(x)
+
+        return x
diff --git a/pytorch3d/pytorch3d/implicitron/models/implicit_function/neural_radiance_field.py b/pytorch3d/pytorch3d/implicitron/models/implicit_function/neural_radiance_field.py
new file mode 100644
index 0000000000000000000000000000000000000000..0706d9a87acb4252a0cf7c920f7302ddef293b2b
--- /dev/null
+++ b/pytorch3d/pytorch3d/implicitron/models/implicit_function/neural_radiance_field.py
@@ -0,0 +1,273 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+from typing import Optional, Tuple
+
+import torch
+from pytorch3d.common.linear_with_repeat import LinearWithRepeat
+from pytorch3d.implicitron.models.renderer.base import (
+    conical_frustum_to_gaussian,
+    ImplicitronRayBundle,
+)
+from pytorch3d.implicitron.tools.config import expand_args_fields, registry
+from pytorch3d.renderer.cameras import CamerasBase
+from pytorch3d.renderer.implicit import HarmonicEmbedding
+from pytorch3d.renderer.implicit.utils import ray_bundle_to_ray_points
+
+from .base import ImplicitFunctionBase
+
+from .decoding_functions import (  # noqa
+    _xavier_init,
+    MLPWithInputSkips,
+    TransformerWithInputSkips,
+)
+from .utils import create_embeddings_for_implicit_function
+
+
+logger = logging.getLogger(__name__)
+
+
+class NeuralRadianceFieldBase(ImplicitFunctionBase, torch.nn.Module):
+    n_harmonic_functions_xyz: int = 10
+    n_harmonic_functions_dir: int = 4
+    n_hidden_neurons_dir: int = 128
+    latent_dim: int = 0
+    input_xyz: bool = True
+    xyz_ray_dir_in_camera_coords: bool = False
+    color_dim: int = 3
+    use_integrated_positional_encoding: bool = False
+    """
+    Args:
+        n_harmonic_functions_xyz: The number of harmonic functions
+            used to form the harmonic embedding of 3D point locations.
+        n_harmonic_functions_dir: The number of harmonic functions
+            used to form the harmonic embedding of the ray directions.
+        n_hidden_neurons_xyz: The number of hidden units in the
+            fully connected layers of the MLP that accepts the 3D point
+            locations and outputs the occupancy field with the intermediate
+            features.
+        n_hidden_neurons_dir: The number of hidden units in the
+            fully connected layers of the MLP that accepts the intermediate
+            features and ray directions and outputs the radiance field
+            (per-point colors).
+        n_layers_xyz: The number of layers of the MLP that outputs the
+            occupancy field.
+        append_xyz: The list of indices of the skip layers of the occupancy MLP.
+        use_integrated_positional_encoding: If True, use integrated positional enoding
+            as defined in `MIP-NeRF <https://arxiv.org/abs/2103.13415>`_.
+            If False, use the classical harmonic embedding
+            defined in `NeRF <https://arxiv.org/abs/2003.08934>`_.
+    """
+
+    def __post_init__(self):
+        # The harmonic embedding layer converts input 3D coordinates
+        # to a representation that is more suitable for
+        # processing with a deep neural network.
+        self.harmonic_embedding_xyz = HarmonicEmbedding(
+            self.n_harmonic_functions_xyz, append_input=True
+        )
+        self.harmonic_embedding_dir = HarmonicEmbedding(
+            self.n_harmonic_functions_dir, append_input=True
+        )
+        if not self.input_xyz and self.latent_dim <= 0:
+            raise ValueError("The latent dimension has to be > 0 if xyz is not input!")
+
+        embedding_dim_dir = self.harmonic_embedding_dir.get_output_dim()
+
+        self.xyz_encoder = self._construct_xyz_encoder(
+            input_dim=self.get_xyz_embedding_dim()
+        )
+
+        self.intermediate_linear = torch.nn.Linear(
+            self.n_hidden_neurons_xyz, self.n_hidden_neurons_xyz
+        )
+        _xavier_init(self.intermediate_linear)
+
+        self.density_layer = torch.nn.Linear(self.n_hidden_neurons_xyz, 1)
+        _xavier_init(self.density_layer)
+
+        # Zero the bias of the density layer to avoid
+        # a completely transparent initialization.
+        self.density_layer.bias.data[:] = 0.0  # fixme: Sometimes this is not enough
+
+        self.color_layer = torch.nn.Sequential(
+            LinearWithRepeat(
+                self.n_hidden_neurons_xyz + embedding_dim_dir, self.n_hidden_neurons_dir
+            ),
+            torch.nn.ReLU(True),
+            torch.nn.Linear(self.n_hidden_neurons_dir, self.color_dim),
+            torch.nn.Sigmoid(),
+        )
+
+    def get_xyz_embedding_dim(self):
+        return (
+            self.harmonic_embedding_xyz.get_output_dim() * int(self.input_xyz)
+            + self.latent_dim
+        )
+
+    def _construct_xyz_encoder(self, input_dim: int):
+        raise NotImplementedError()
+
+    def _get_colors(self, features: torch.Tensor, rays_directions: torch.Tensor):
+        """
+        This function takes per-point `features` predicted by `self.xyz_encoder`
+        and evaluates the color model in order to attach to each
+        point a 3D vector of its RGB color.
+        """
+        # Normalize the ray_directions to unit l2 norm.
+        rays_directions_normed = torch.nn.functional.normalize(rays_directions, dim=-1)
+        # Obtain the harmonic embedding of the normalized ray directions.
+        rays_embedding = self.harmonic_embedding_dir(rays_directions_normed)
+
+        return self.color_layer((self.intermediate_linear(features), rays_embedding))
+
+    @staticmethod
+    def allows_multiple_passes() -> bool:
+        """
+        Returns True as this implicit function allows
+        multiple passes. Overridden from ImplicitFunctionBase.
+        """
+        return True
+
+    def forward(
+        self,
+        *,
+        ray_bundle: ImplicitronRayBundle,
+        fun_viewpool=None,
+        camera: Optional[CamerasBase] = None,
+        global_code=None,
+        **kwargs,
+    ):
+        """
+        The forward function accepts the parametrizations of
+        3D points sampled along projection rays. The forward
+        pass is responsible for attaching a 3D vector
+        and a 1D scalar representing the point's
+        RGB color and opacity respectively.
+
+        Args:
+            ray_bundle: An ImplicitronRayBundle object containing the following variables:
+                origins: A tensor of shape `(minibatch, ..., 3)` denoting the
+                    origins of the sampling rays in world coords.
+                directions: A tensor of shape `(minibatch, ..., 3)`
+                    containing the direction vectors of sampling rays in world coords.
+                lengths: A tensor of shape `(minibatch, ..., num_points_per_ray)`
+                    containing the lengths at which the rays are sampled.
+                bins: An optional tensor of shape `(minibatch,..., num_points_per_ray + 1)`
+                    containing the bins at which the rays are sampled. In this case
+                    lengths is equal to the midpoints of bins.
+
+            fun_viewpool: an optional callback with the signature
+                    fun_fiewpool(points) -> pooled_features
+                where points is a [N_TGT x N x 3] tensor of world coords,
+                and pooled_features is a [N_TGT x ... x N_SRC x latent_dim] tensor
+                of the features pooled from the context images.
+
+        Returns:
+            rays_densities: A tensor of shape `(minibatch, ..., num_points_per_ray, 1)`
+                denoting the opacitiy of each ray point.
+            rays_colors: A tensor of shape `(minibatch, ..., num_points_per_ray, 3)`
+                denoting the color of each ray point.
+
+        Raises:
+            ValueError: If `use_integrated_positional_encoding` is True and
+                `ray_bundle.bins` is None.
+        """
+        if self.use_integrated_positional_encoding and ray_bundle.bins is None:
+            raise ValueError(
+                "When use_integrated_positional_encoding is True, ray_bundle.bins must be set."
+                "Have you set to True `AbstractMaskRaySampler.use_bins_for_ray_sampling`?"
+            )
+
+        rays_points_world, diag_cov = (
+            conical_frustum_to_gaussian(ray_bundle)
+            if self.use_integrated_positional_encoding
+            else (ray_bundle_to_ray_points(ray_bundle), None)  # pyre-ignore
+        )
+        # rays_points_world.shape = [minibatch x ... x pts_per_ray x 3]
+
+        embeds = create_embeddings_for_implicit_function(
+            xyz_world=rays_points_world,
+            #  for 2nd param but got `Union[None, torch.Tensor, torch.nn.Module]`.
+            xyz_embedding_function=self.harmonic_embedding_xyz
+            if self.input_xyz
+            else None,
+            global_code=global_code,
+            fun_viewpool=fun_viewpool,
+            xyz_in_camera_coords=self.xyz_ray_dir_in_camera_coords,
+            camera=camera,
+            diag_cov=diag_cov,
+        )
+
+        # embeds.shape = [minibatch x n_src x n_rays x n_pts x self.n_harmonic_functions*6+3]
+        features = self.xyz_encoder(embeds)
+        # features.shape = [minibatch x ... x self.n_hidden_neurons_xyz]
+        # NNs operate on the flattenned rays; reshaping to the correct spatial size
+        # TODO: maybe make the transformer work on non-flattened tensors to avoid this reshape
+        features = features.reshape(*rays_points_world.shape[:-1], -1)
+
+        raw_densities = self.density_layer(features)
+        # raw_densities.shape = [minibatch x ... x 1] in [0-1]
+
+        if self.xyz_ray_dir_in_camera_coords:
+            if camera is None:
+                raise ValueError("Camera must be given if xyz_ray_dir_in_camera_coords")
+
+            directions = ray_bundle.directions @ camera.R
+        else:
+            directions = ray_bundle.directions
+
+        rays_colors = self._get_colors(features, directions)
+        # rays_colors.shape = [minibatch x ... x 3] in [0-1]
+
+        return raw_densities, rays_colors, {}
+
+
+@registry.register
+class NeuralRadianceFieldImplicitFunction(NeuralRadianceFieldBase):
+    transformer_dim_down_factor: float = 1.0
+    n_hidden_neurons_xyz: int = 256
+    n_layers_xyz: int = 8
+    append_xyz: Tuple[int, ...] = (5,)
+
+    def _construct_xyz_encoder(self, input_dim: int):
+        expand_args_fields(MLPWithInputSkips)
+        return MLPWithInputSkips(
+            self.n_layers_xyz,
+            input_dim,
+            self.n_hidden_neurons_xyz,
+            input_dim,
+            self.n_hidden_neurons_xyz,
+            input_skips=self.append_xyz,
+        )
+
+
+@registry.register
+class NeRFormerImplicitFunction(NeuralRadianceFieldBase):
+    transformer_dim_down_factor: float = 2.0
+    n_hidden_neurons_xyz: int = 80
+    n_layers_xyz: int = 2
+    append_xyz: Tuple[int, ...] = (1,)
+
+    def _construct_xyz_encoder(self, input_dim: int):
+        return TransformerWithInputSkips(
+            self.n_layers_xyz,
+            input_dim,
+            self.n_hidden_neurons_xyz,
+            input_dim,
+            self.n_hidden_neurons_xyz,
+            input_skips=self.append_xyz,
+            dim_down_factor=self.transformer_dim_down_factor,
+        )
+
+    @staticmethod
+    def requires_pooling_without_aggregation() -> bool:
+        """
+        Returns True as this implicit function needs
+        pooling without aggregation. Overridden from ImplicitFunctionBase.
+        """
+        return True
diff --git a/pytorch3d/pytorch3d/implicitron/models/implicit_function/scene_representation_networks.py b/pytorch3d/pytorch3d/implicitron/models/implicit_function/scene_representation_networks.py
new file mode 100644
index 0000000000000000000000000000000000000000..9ac0992cf52d0b38ec552904390c60b6387eb313
--- /dev/null
+++ b/pytorch3d/pytorch3d/implicitron/models/implicit_function/scene_representation_networks.py
@@ -0,0 +1,427 @@
+# @lint-ignore-every LICENSELINT
+# Adapted from https://github.com/vsitzmann/scene-representation-networks
+# Copyright (c) 2019 Vincent Sitzmann
+from typing import Any, cast, Optional, Tuple
+
+import torch
+from omegaconf import DictConfig
+from pytorch3d.common.linear_with_repeat import LinearWithRepeat
+from pytorch3d.implicitron.models.renderer.base import ImplicitronRayBundle
+from pytorch3d.implicitron.third_party import hyperlayers, pytorch_prototyping
+from pytorch3d.implicitron.tools.config import Configurable, registry, run_auto_creation
+from pytorch3d.renderer import ray_bundle_to_ray_points
+from pytorch3d.renderer.cameras import CamerasBase
+from pytorch3d.renderer.implicit import HarmonicEmbedding
+
+from .base import ImplicitFunctionBase
+from .utils import create_embeddings_for_implicit_function
+
+
+def _kaiming_normal_init(module: torch.nn.Module) -> None:
+    if isinstance(module, (torch.nn.Linear, LinearWithRepeat)):
+        torch.nn.init.kaiming_normal_(
+            module.weight, a=0.0, nonlinearity="relu", mode="fan_in"
+        )
+
+
+class SRNRaymarchFunction(Configurable, torch.nn.Module):
+    n_harmonic_functions: int = 3  # 0 means raw 3D coord inputs
+    n_hidden_units: int = 256
+    n_layers: int = 2
+    in_features: int = 3
+    out_features: int = 256
+    latent_dim: int = 0
+    xyz_in_camera_coords: bool = False
+
+    # The internal network can be set as an output of an SRNHyperNet.
+    # Note that, in order to avoid Pytorch's automatic registering of the
+    # raymarch_function module on construction, we input the network wrapped
+    # as a 1-tuple.
+
+    # raymarch_function should ideally be typed as Optional[Tuple[Callable]]
+    # but Omegaconf.structured doesn't like that. TODO: revisit after new
+    # release of omegaconf including https://github.com/omry/omegaconf/pull/749 .
+    raymarch_function: Any = None
+
+    def __post_init__(self):
+        self._harmonic_embedding = HarmonicEmbedding(
+            self.n_harmonic_functions, append_input=True
+        )
+        input_embedding_dim = (
+            HarmonicEmbedding.get_output_dim_static(
+                self.in_features,
+                self.n_harmonic_functions,
+                True,
+            )
+            + self.latent_dim
+        )
+
+        if self.raymarch_function is not None:
+            self._net = self.raymarch_function[0]
+        else:
+            self._net = pytorch_prototyping.FCBlock(
+                hidden_ch=self.n_hidden_units,
+                num_hidden_layers=self.n_layers,
+                in_features=input_embedding_dim,
+                out_features=self.out_features,
+            )
+
+    def forward(
+        self,
+        ray_bundle: ImplicitronRayBundle,
+        fun_viewpool=None,
+        camera: Optional[CamerasBase] = None,
+        global_code=None,
+        **kwargs,
+    ):
+        """
+        Args:
+            ray_bundle: An ImplicitronRayBundle object containing the following variables:
+                origins: A tensor of shape `(minibatch, ..., 3)` denoting the
+                    origins of the sampling rays in world coords.
+                directions: A tensor of shape `(minibatch, ..., 3)`
+                    containing the direction vectors of sampling rays in world coords.
+                lengths: A tensor of shape `(minibatch, ..., num_points_per_ray)`
+                    containing the lengths at which the rays are sampled.
+            fun_viewpool: an optional callback with the signature
+                    fun_fiewpool(points) -> pooled_features
+                where points is a [N_TGT x N x 3] tensor of world coords,
+                and pooled_features is a [N_TGT x ... x N_SRC x latent_dim] tensor
+                of the features pooled from the context images.
+
+        Returns:
+            rays_densities: A tensor of shape `(minibatch, ..., num_points_per_ray, 1)`
+                denoting the opacitiy of each ray point.
+            rays_colors: Set to None.
+        """
+        # We first convert the ray parametrizations to world
+        # coordinates with `ray_bundle_to_ray_points`.
+        # pyre-ignore[6]
+        rays_points_world = ray_bundle_to_ray_points(ray_bundle)
+
+        embeds = create_embeddings_for_implicit_function(
+            xyz_world=rays_points_world,
+            xyz_embedding_function=self._harmonic_embedding,
+            global_code=global_code,
+            fun_viewpool=fun_viewpool,
+            xyz_in_camera_coords=self.xyz_in_camera_coords,
+            camera=camera,
+        )
+
+        # Before running the network, we have to resize embeds to ndims=3,
+        # otherwise the SRN layers consume huge amounts of memory.
+        raymarch_features = self._net(
+            embeds.view(embeds.shape[0], -1, embeds.shape[-1])
+        )
+        # raymarch_features.shape = [minibatch x ... x self.n_hidden_neurons_xyz]
+
+        # NNs operate on the flattenned rays; reshaping to the correct spatial size
+        raymarch_features = raymarch_features.reshape(*rays_points_world.shape[:-1], -1)
+
+        return raymarch_features, None
+
+
+class SRNPixelGenerator(Configurable, torch.nn.Module):
+    n_harmonic_functions: int = 4
+    n_hidden_units: int = 256
+    n_hidden_units_color: int = 128
+    n_layers: int = 2
+    in_features: int = 256
+    out_features: int = 3
+    ray_dir_in_camera_coords: bool = False
+
+    def __post_init__(self):
+        self._harmonic_embedding = HarmonicEmbedding(
+            self.n_harmonic_functions, append_input=True
+        )
+        self._net = pytorch_prototyping.FCBlock(
+            hidden_ch=self.n_hidden_units,
+            num_hidden_layers=self.n_layers,
+            in_features=self.in_features,
+            out_features=self.n_hidden_units,
+        )
+        self._density_layer = torch.nn.Linear(self.n_hidden_units, 1)
+        self._density_layer.apply(_kaiming_normal_init)
+        embedding_dim_dir = self._harmonic_embedding.get_output_dim(input_dims=3)
+        self._color_layer = torch.nn.Sequential(
+            LinearWithRepeat(
+                self.n_hidden_units + embedding_dim_dir,
+                self.n_hidden_units_color,
+            ),
+            torch.nn.LayerNorm([self.n_hidden_units_color]),
+            torch.nn.ReLU(inplace=True),
+            torch.nn.Linear(self.n_hidden_units_color, self.out_features),
+        )
+        self._color_layer.apply(_kaiming_normal_init)
+
+    # TODO: merge with NeuralRadianceFieldBase's _get_colors
+    def _get_colors(self, features: torch.Tensor, rays_directions: torch.Tensor):
+        """
+        This function takes per-point `features` predicted by `self.net`
+        and evaluates the color model in order to attach to each
+        point a 3D vector of its RGB color.
+        """
+        # Normalize the ray_directions to unit l2 norm.
+        rays_directions_normed = torch.nn.functional.normalize(rays_directions, dim=-1)
+        # Obtain the harmonic embedding of the normalized ray directions.
+        rays_embedding = self._harmonic_embedding(rays_directions_normed)
+        return self._color_layer((features, rays_embedding))
+
+    def forward(
+        self,
+        raymarch_features: torch.Tensor,
+        ray_bundle: ImplicitronRayBundle,
+        camera: Optional[CamerasBase] = None,
+        **kwargs,
+    ):
+        """
+        Args:
+            raymarch_features: Features from the raymarching network of shape
+                `(minibatch, ..., self.in_features)`
+            ray_bundle: An ImplicitronRayBundle object containing the following variables:
+                origins: A tensor of shape `(minibatch, ..., 3)` denoting the
+                    origins of the sampling rays in world coords.
+                directions: A tensor of shape `(minibatch, ..., 3)`
+                    containing the direction vectors of sampling rays in world coords.
+                lengths: A tensor of shape `(minibatch, ..., num_points_per_ray)`
+                    containing the lengths at which the rays are sampled.
+
+        Returns:
+            rays_densities: A tensor of shape `(minibatch, ..., num_points_per_ray, 1)`
+                denoting the opacitiy of each ray point.
+            rays_colors: A tensor of shape `(minibatch, ..., num_points_per_ray, 3)`
+                denoting the color of each ray point.
+        """
+        # raymarch_features.shape = [minibatch x ... x pts_per_ray x 3]
+        features = self._net(raymarch_features)
+        # features.shape = [minibatch x ... x self.n_hidden_units]
+
+        if self.ray_dir_in_camera_coords:
+            if camera is None:
+                raise ValueError("Camera must be given if xyz_ray_dir_in_camera_coords")
+
+            directions = ray_bundle.directions @ camera.R
+        else:
+            directions = ray_bundle.directions
+
+        # NNs operate on the flattenned rays; reshaping to the correct spatial size
+        features = features.reshape(*raymarch_features.shape[:-1], -1)
+
+        raw_densities = self._density_layer(features)
+
+        rays_colors = self._get_colors(features, directions)
+
+        return raw_densities, rays_colors
+
+
+class SRNRaymarchHyperNet(Configurable, torch.nn.Module):
+    """
+    This is a raymarching function which has a forward like SRNRaymarchFunction
+    but instead of the weights being parameters of the module, they
+    are the output of another network, the hypernet, which takes the global_code
+    as input. All the dataclass members of SRNRaymarchFunction are here with the
+    same meaning. In addition, there are members with names ending `_hypernet`
+    which affect the hypernet.
+
+    Because this class may be called repeatedly for the same global_code, the
+    output of the hypernet is cached in self.cached_srn_raymarch_function.
+    This member must be manually set to None whenever the global_code changes.
+    """
+
+    n_harmonic_functions: int = 3  # 0 means raw 3D coord inputs
+    n_hidden_units: int = 256
+    n_layers: int = 2
+    n_hidden_units_hypernet: int = 256
+    n_layers_hypernet: int = 1
+    in_features: int = 3
+    out_features: int = 256
+    latent_dim_hypernet: int = 0
+    latent_dim: int = 0
+    xyz_in_camera_coords: bool = False
+
+    def __post_init__(self):
+        raymarch_input_embedding_dim = (
+            HarmonicEmbedding.get_output_dim_static(
+                self.in_features,
+                self.n_harmonic_functions,
+                True,
+            )
+            + self.latent_dim
+        )
+
+        self._hypernet = hyperlayers.HyperFC(
+            hyper_in_ch=self.latent_dim_hypernet,
+            hyper_num_hidden_layers=self.n_layers_hypernet,
+            hyper_hidden_ch=self.n_hidden_units_hypernet,
+            hidden_ch=self.n_hidden_units,
+            num_hidden_layers=self.n_layers,
+            in_ch=raymarch_input_embedding_dim,
+            out_ch=self.n_hidden_units,
+        )
+
+        self.cached_srn_raymarch_function: Optional[Tuple[SRNRaymarchFunction]] = None
+
+    def _run_hypernet(self, global_code: torch.Tensor) -> Tuple[SRNRaymarchFunction]:
+        """
+        Runs the hypernet and returns a 1-tuple containing the generated
+        srn_raymarch_function.
+        """
+
+        net = self._hypernet(global_code)
+
+        # use the hyper-net generated network to instantiate the raymarch module
+        srn_raymarch_function = SRNRaymarchFunction(
+            n_harmonic_functions=self.n_harmonic_functions,
+            n_hidden_units=self.n_hidden_units,
+            n_layers=self.n_layers,
+            in_features=self.in_features,
+            out_features=self.out_features,
+            latent_dim=self.latent_dim,
+            xyz_in_camera_coords=self.xyz_in_camera_coords,
+            raymarch_function=(net,),
+        )
+
+        # move the generated raymarch function to the correct device
+        srn_raymarch_function.to(global_code.device)
+
+        return (srn_raymarch_function,)
+
+    def forward(
+        self,
+        ray_bundle: ImplicitronRayBundle,
+        fun_viewpool=None,
+        camera: Optional[CamerasBase] = None,
+        global_code=None,
+        **kwargs,
+    ):
+
+        if global_code is None:
+            raise ValueError("SRN Hypernetwork requires a non-trivial global code.")
+
+        # The raymarching network is cached in case the function is called repeatedly
+        # across LSTM iterations for the same global_code.
+        if self.cached_srn_raymarch_function is None:
+            # generate the raymarching network from the hypernet
+            self.cached_srn_raymarch_function = self._run_hypernet(global_code)
+        (srn_raymarch_function,) = cast(
+            Tuple[SRNRaymarchFunction], self.cached_srn_raymarch_function
+        )
+
+        return srn_raymarch_function(
+            ray_bundle=ray_bundle,
+            fun_viewpool=fun_viewpool,
+            camera=camera,
+            global_code=None,  # the hypernetwork takes the global code
+        )
+
+
+@registry.register
+# pyre-fixme[13]: Uninitialized attribute
+class SRNImplicitFunction(ImplicitFunctionBase, torch.nn.Module):
+    latent_dim: int = 0
+    raymarch_function: SRNRaymarchFunction
+    pixel_generator: SRNPixelGenerator
+
+    def __post_init__(self):
+        run_auto_creation(self)
+
+    def create_raymarch_function(self) -> None:
+        self.raymarch_function = SRNRaymarchFunction(
+            latent_dim=self.latent_dim,
+            **self.raymarch_function_args,
+        )
+
+    @classmethod
+    def raymarch_function_tweak_args(cls, type, args: DictConfig) -> None:
+        args.pop("latent_dim", None)
+
+    def forward(
+        self,
+        *,
+        ray_bundle: ImplicitronRayBundle,
+        fun_viewpool=None,
+        camera: Optional[CamerasBase] = None,
+        global_code=None,
+        raymarch_features: Optional[torch.Tensor] = None,
+        **kwargs,
+    ):
+        predict_colors = raymarch_features is not None
+        if predict_colors:
+            return self.pixel_generator(
+                raymarch_features=raymarch_features,
+                ray_bundle=ray_bundle,
+                camera=camera,
+                **kwargs,
+            )
+        else:
+            return self.raymarch_function(
+                ray_bundle=ray_bundle,
+                fun_viewpool=fun_viewpool,
+                camera=camera,
+                global_code=global_code,
+                **kwargs,
+            )
+
+
+@registry.register
+# pyre-fixme[13]: Uninitialized attribute
+class SRNHyperNetImplicitFunction(ImplicitFunctionBase, torch.nn.Module):
+    """
+    This implicit function uses a hypernetwork to generate the
+    SRNRaymarchingFunction, and this is cached. Whenever the
+    global_code changes, `on_bind_args` must be called to clear
+    the cache.
+    """
+
+    latent_dim_hypernet: int = 0
+    latent_dim: int = 0
+    hypernet: SRNRaymarchHyperNet
+    pixel_generator: SRNPixelGenerator
+
+    def __post_init__(self):
+        run_auto_creation(self)
+
+    def create_hypernet(self) -> None:
+        self.hypernet = SRNRaymarchHyperNet(
+            latent_dim=self.latent_dim,
+            latent_dim_hypernet=self.latent_dim_hypernet,
+            **self.hypernet_args,
+        )
+
+    @classmethod
+    def hypernet_tweak_args(cls, type, args: DictConfig) -> None:
+        args.pop("latent_dim", None)
+        args.pop("latent_dim_hypernet", None)
+
+    def forward(
+        self,
+        *,
+        ray_bundle: ImplicitronRayBundle,
+        fun_viewpool=None,
+        camera: Optional[CamerasBase] = None,
+        global_code=None,
+        raymarch_features: Optional[torch.Tensor] = None,
+        **kwargs,
+    ):
+        predict_colors = raymarch_features is not None
+        if predict_colors:
+            return self.pixel_generator(
+                raymarch_features=raymarch_features,
+                ray_bundle=ray_bundle,
+                camera=camera,
+                **kwargs,
+            )
+        else:
+            return self.hypernet(
+                ray_bundle=ray_bundle,
+                fun_viewpool=fun_viewpool,
+                camera=camera,
+                global_code=global_code,
+                **kwargs,
+            )
+
+    def on_bind_args(self):
+        """
+        The global_code may have changed, so we reset the hypernet.
+        """
+        self.hypernet.cached_srn_raymarch_function = None
diff --git a/pytorch3d/pytorch3d/implicitron/models/implicit_function/utils.py b/pytorch3d/pytorch3d/implicitron/models/implicit_function/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..25ec3fcb6ef6947666f7ce7bfbcdd591cd61ec81
--- /dev/null
+++ b/pytorch3d/pytorch3d/implicitron/models/implicit_function/utils.py
@@ -0,0 +1,219 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Callable, Optional
+
+import torch
+
+import torch.nn.functional as F
+from pytorch3d.common.compat import prod
+from pytorch3d.implicitron.models.renderer.base import ImplicitronRayBundle
+from pytorch3d.renderer import ray_bundle_to_ray_points
+from pytorch3d.renderer.cameras import CamerasBase
+
+
+def broadcast_global_code(embeds: torch.Tensor, global_code: torch.Tensor):
+    """
+    Expands the `global_code` of shape (minibatch, dim)
+    so that it can be appended to `embeds` of shape (minibatch, ..., dim2),
+    and appends to the last dimension of `embeds`.
+    """
+    bs = embeds.shape[0]
+    global_code_broadcast = global_code.view(bs, *([1] * (embeds.ndim - 2)), -1).expand(
+        *embeds.shape[:-1],
+        global_code.shape[-1],
+    )
+    return torch.cat([embeds, global_code_broadcast], dim=-1)
+
+
+def create_embeddings_for_implicit_function(
+    xyz_world: torch.Tensor,
+    xyz_in_camera_coords: bool,
+    global_code: Optional[torch.Tensor],
+    camera: Optional[CamerasBase],
+    fun_viewpool: Optional[Callable],
+    xyz_embedding_function: Optional[Callable],
+    diag_cov: Optional[torch.Tensor] = None,
+) -> torch.Tensor:
+
+    bs, *spatial_size, pts_per_ray, _ = xyz_world.shape
+
+    if xyz_in_camera_coords:
+        if camera is None:
+            raise ValueError("Camera must be given if xyz_in_camera_coords")
+
+        ray_points_for_embed = (
+            camera.get_world_to_view_transform()
+            .transform_points(xyz_world.view(bs, -1, 3))
+            .view(xyz_world.shape)
+        )
+    else:
+        ray_points_for_embed = xyz_world
+
+    if xyz_embedding_function is None:
+        embeds = torch.empty(
+            bs,
+            1,
+            prod(spatial_size),
+            pts_per_ray,
+            0,
+        )
+    else:
+
+        embeds = xyz_embedding_function(ray_points_for_embed, diag_cov=diag_cov)
+        embeds = embeds.reshape(
+            bs,
+            1,
+            prod(spatial_size),
+            pts_per_ray,
+            -1,
+        )  # flatten spatial, add n_src dim
+
+    if fun_viewpool is not None:
+        # viewpooling
+        embeds_viewpooled = fun_viewpool(xyz_world.reshape(bs, -1, 3))
+        embed_shape = (
+            bs,
+            embeds_viewpooled.shape[1],
+            prod(spatial_size),
+            pts_per_ray,
+            -1,
+        )
+        embeds_viewpooled = embeds_viewpooled.reshape(*embed_shape)
+        if embeds is not None:
+            embeds = torch.cat([embeds.expand(*embed_shape), embeds_viewpooled], dim=-1)
+        else:
+            embeds = embeds_viewpooled
+
+    if global_code is not None:
+        # append the broadcasted global code to embeds
+        embeds = broadcast_global_code(embeds, global_code)
+
+    return embeds
+
+
+def interpolate_line(
+    points: torch.Tensor,
+    source: torch.Tensor,
+    **kwargs,
+) -> torch.Tensor:
+    """
+    Linearly interpolates values of source grids. The first dimension of points represents
+    number of points and the second coordinate, for example ([[x0], [x1], ...]). The first
+    dimension of argument source represents feature and ones after that the spatial
+    dimension.
+
+    Arguments:
+        points: shape (n_grids, n_points, 1),
+        source: tensor of shape (n_grids, features, width),
+    Returns:
+        interpolated tensor of shape (n_grids, n_points, features)
+    """
+    # To enable sampling of the source using the torch.functional.grid_sample
+    # points need to have 2 coordinates.
+    expansion = points.new_zeros(points.shape)
+    points = torch.cat((points, expansion), dim=-1)
+
+    source = source[:, :, None, :]
+    points = points[:, :, None, :]
+
+    out = F.grid_sample(
+        grid=points,
+        input=source,
+        **kwargs,
+    )
+    return out[:, :, :, 0].permute(0, 2, 1)
+
+
+def interpolate_plane(
+    points: torch.Tensor,
+    source: torch.Tensor,
+    **kwargs,
+) -> torch.Tensor:
+    """
+    Bilinearly interpolates values of source grids. The first dimension of points represents
+    number of points and the second coordinates, for example ([[x0, y0], [x1, y1], ...]).
+    The first dimension of argument source represents feature and ones after that the
+    spatial dimension.
+
+    Arguments:
+        points: shape (n_grids, n_points, 2),
+        source: tensor of shape (n_grids, features, width, height),
+    Returns:
+        interpolated tensor of shape (n_grids, n_points, features)
+    """
+    # permuting because torch.nn.functional.grid_sample works with
+    # (features, height, width) and not
+    # (features, width, height)
+    source = source.permute(0, 1, 3, 2)
+    points = points[:, :, None, :]
+
+    out = F.grid_sample(
+        grid=points,
+        input=source,
+        **kwargs,
+    )
+    return out[:, :, :, 0].permute(0, 2, 1)
+
+
+def interpolate_volume(
+    points: torch.Tensor, source: torch.Tensor, **kwargs
+) -> torch.Tensor:
+    """
+    Interpolates values of source grids. The first dimension of points represents
+    number of points and the second coordinates, for example
+    [[x0, y0, z0], [x1, y1, z1], ...]. The first dimension of a source represents features
+    and ones after that the spatial dimension.
+
+    Arguments:
+        points: shape (n_grids, n_points, 3),
+        source: tensor of shape (n_grids, features, width, height, depth),
+    Returns:
+        interpolated tensor of shape (n_grids, n_points, features)
+    """
+    if "mode" in kwargs and kwargs["mode"] == "trilinear":
+        kwargs = kwargs.copy()
+        kwargs["mode"] = "bilinear"
+    # permuting because torch.nn.functional.grid_sample works with
+    # (features, depth, height, width) and not (features, width, height, depth)
+    source = source.permute(0, 1, 4, 3, 2)
+    grid = points[:, :, None, None, :]
+
+    out = F.grid_sample(
+        grid=grid,
+        input=source,
+        **kwargs,
+    )
+    return out[:, :, :, 0, 0].permute(0, 2, 1)
+
+
+def get_rays_points_world(
+    ray_bundle: Optional[ImplicitronRayBundle] = None,
+    rays_points_world: Optional[torch.Tensor] = None,
+) -> torch.Tensor:
+    """
+    Converts the ray_bundle to rays_points_world if rays_points_world is not defined
+    and raises error if both are defined.
+
+    Args:
+        ray_bundle: An ImplicitronRayBundle object or None
+        rays_points_world: A torch.Tensor representing ray points converted to
+            world coordinates
+    Returns:
+        A torch.Tensor representing ray points converted to world coordinates
+            of shape [minibatch x ... x pts_per_ray x 3].
+    """
+    if rays_points_world is not None and ray_bundle is not None:
+        raise ValueError(
+            "Cannot define both rays_points_world and ray_bundle,"
+            + " one has to be None."
+        )
+    if rays_points_world is not None:
+        return rays_points_world
+    if ray_bundle is not None:
+        # pyre-ignore[6]
+        return ray_bundle_to_ray_points(ray_bundle)
+    raise ValueError("ray_bundle and rays_points_world cannot both be None")
diff --git a/pytorch3d/pytorch3d/implicitron/models/implicit_function/voxel_grid.py b/pytorch3d/pytorch3d/implicitron/models/implicit_function/voxel_grid.py
new file mode 100644
index 0000000000000000000000000000000000000000..8115d072dafa62ee1fb455711aa32198adf233a9
--- /dev/null
+++ b/pytorch3d/pytorch3d/implicitron/models/implicit_function/voxel_grid.py
@@ -0,0 +1,1137 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""
+This file contains classes that implement Voxel grids, both in their full resolution
+as in the factorized form. There are two factorized forms implemented, Tensor rank decomposition
+or CANDECOMP/PARAFAC (here CP) and Vector Matrix (here VM) factorization from the
+TensoRF (https://arxiv.org/abs/2203.09517) paper.
+
+In addition, the module VoxelGridModule implements a trainable instance of one of
+these classes.
+
+"""
+
+import logging
+import warnings
+from collections.abc import Mapping
+from dataclasses import dataclass, field
+
+from distutils.version import LooseVersion
+from typing import Any, Callable, ClassVar, Dict, Iterator, List, Optional, Tuple, Type
+
+import torch
+from omegaconf import DictConfig
+from pytorch3d.implicitron.tools.config import (
+    Configurable,
+    registry,
+    ReplaceableBase,
+    run_auto_creation,
+)
+from pytorch3d.structures.volumes import VolumeLocator
+
+from .utils import interpolate_line, interpolate_plane, interpolate_volume
+
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class VoxelGridValuesBase:
+    pass
+
+
+class VoxelGridBase(ReplaceableBase, torch.nn.Module):
+    """
+    Base class for all the voxel grid variants whith added trilinear interpolation between
+    voxels (for example if voxel (0.333, 1, 3) is queried that would return the result
+    2/3*voxel[0, 1, 3] + 1/3*voxel[1, 1, 3])
+
+    Internally voxel grids are indexed by (features, x, y, z). If queried the point is not
+    inside the voxel grid the vector that will be returned is determined by padding.
+
+    Members:
+        align_corners: parameter used in torch.functional.grid_sample. For details go to
+            https://pytorch.org/docs/stable/generated/torch.nn.functional.grid_sample.html by
+            default is True
+        padding: padding mode for outside grid values 'zeros' | 'border' | 'reflection'.
+            Default is 'zeros'
+        mode: interpolation mode to calculate output values :
+            'bilinear' | 'nearest' | 'bicubic' | 'trilinear'.
+            Default: 'bilinear' Note: mode='bicubic' supports only FullResolutionVoxelGrid.
+            When mode='bilinear' and the input is 5-D, the interpolation mode used internally
+            will actually be trilinear.
+        n_features: number of dimensions of base feature vector. Determines how many features
+            the grid returns.
+        resolution_changes: a dictionary, where keys are change epochs and values are
+            3-tuples containing x, y, z grid sizes corresponding to each axis to each epoch
+    """
+
+    align_corners: bool = True
+    padding: str = "zeros"
+    mode: str = "bilinear"
+    n_features: int = 1
+    # return the line below once we drop OmegaConf 2.1 support
+    # resolution_changes: Dict[int, List[int]] = field(
+    resolution_changes: Dict[int, Any] = field(
+        default_factory=lambda: {0: [128, 128, 128]}
+    )
+
+    def __post_init__(self):
+        if 0 not in self.resolution_changes:
+            raise ValueError("There has to be key `0` in `resolution_changes`.")
+
+    def evaluate_world(
+        self,
+        points: torch.Tensor,
+        grid_values: VoxelGridValuesBase,
+        locator: VolumeLocator,
+    ) -> torch.Tensor:
+        """
+        Evaluates the voxel grid at points in the world coordinate frame.
+        The interpolation type is determined by the `mode` member.
+
+        Arguments:
+            points (torch.Tensor): tensor of points that you want to query
+                of a form (n_grids, ..., 3)
+            grid_values: an object of type Class.values_type which has tensors as
+                members which have shapes derived from the get_shapes() method
+            locator: a VolumeLocator object
+        Returns:
+            torch.Tensor: shape (n_grids, ..., n_features)
+        """
+        points_local = locator.world_to_local_coords(points)
+        return self.evaluate_local(points_local, grid_values)
+
+    def evaluate_local(
+        self, points: torch.Tensor, grid_values: VoxelGridValuesBase
+    ) -> torch.Tensor:
+        """
+        Evaluates the voxel grid at points in the local coordinate frame,
+        The interpolation type is determined by the `mode` member.
+
+        Arguments:
+            points (torch.Tensor): tensor of points that you want to query
+                of a form (n_grids, ..., 3), in a normalized form (coordinates are in [-1, 1])
+            grid_values: an object of type VMFactorizedVoxelGrid.values_type which has tensors
+                as members which have shapes derived from the get_shapes() method
+        Returns:
+            torch.Tensor: shape (n_grids, ..., n_features)
+        """
+        raise NotImplementedError()
+
+    def get_shapes(self, epoch: int) -> Dict[str, Tuple]:
+        """
+        Using parameters from the __init__ method, this method returns the
+        shapes of individual tensors needed to run the evaluate method.
+
+        Args:
+            epoch: If the shape varies during training, which training epoch's shape to return.
+        Returns:
+            a dictionary of needed shapes. To use the evaluate_local and evaluate_world methods
+                replace the shapes in the dictionary with tensors of those shapes and add the
+                first 'batch' dimension. If the required shape is (a, b) and you want to
+                have g grids then the tensor that replaces the shape should have the
+                shape (g, a, b).
+        """
+        raise NotImplementedError()
+
+    def get_resolution(self, epoch: int) -> List[int]:
+        """
+        Returns the resolution which the grid should have at specific epoch
+
+        Args:
+            epoch which to use in the resolution calculation
+        Returns:
+            resolution at specific epoch
+        """
+        last_change = 0
+        for change_epoch in self.resolution_changes:
+            if change_epoch <= epoch:
+                last_change = max(last_change, change_epoch)
+        return self.resolution_changes[last_change]
+
+    @staticmethod
+    def get_output_dim(args: DictConfig) -> int:
+        """
+        Given all the arguments of the grid's __init__, returns output's last dimension length.
+
+        In particular, if self.evaluate_world or self.evaluate_local
+        are called with `points` of shape (n_grids, n_points, 3),
+        their output will be of shape
+        (n_grids, n_points, grid.get_output_dim()).
+
+        Args:
+            args: DictConfig which would be used to initialize the object
+        Returns:
+            output's last dimension length
+        """
+        return args["n_features"]
+
+    def change_resolution(
+        self,
+        grid_values: VoxelGridValuesBase,
+        *,
+        epoch: Optional[int] = None,
+        grid_values_with_wanted_resolution: Optional[VoxelGridValuesBase] = None,
+        mode: str = "linear",
+        align_corners: bool = True,
+        antialias: bool = False,
+    ) -> Tuple[VoxelGridValuesBase, bool]:
+        """
+        Changes resolution of tensors in `grid_values` to match the
+        `grid_values_with_wanted_resolution` or resolution on wanted epoch.
+
+        Args:
+            epoch: current training epoch, used to see if the grid needs regridding
+            grid_values: instance of self.values_type which contains
+                the voxel grid which will be interpolated to create the new grid
+            epoch: epoch which is used to get the resolution of the new
+                `grid_values` using `self.resolution_changes`.
+            grid_values_with_wanted_resolution: `VoxelGridValuesBase` to whose resolution
+                to interpolate grid_values
+            align_corners: as for torch.nn.functional.interpolate
+            mode: as for torch.nn.functional.interpolate
+                'nearest' | 'bicubic' | 'linear' | 'area' | 'nearest-exact'.
+                Default: 'linear'
+            antialias: as for torch.nn.functional.interpolate.
+                 Using anti-alias option
+                 together with align_corners=False and mode='bicubic', interpolation
+                 result would match Pillow result for downsampling operation.
+                 Supported mode: 'bicubic'
+        Returns:
+            tuple of
+                - new voxel grid_values of desired resolution, of type self.values_type
+                - True if regridding has happened.
+        """
+
+        if (epoch is None) == (grid_values_with_wanted_resolution is None):
+            raise ValueError(
+                "Exactly one of `epoch` or "
+                "`grid_values_with_wanted_resolution` has to be defined."
+            )
+
+        if mode not in ("nearest", "bicubic", "linear", "area", "nearest-exact"):
+            raise ValueError(
+                "`mode` should be one of the following 'nearest'"
+                + "| 'bicubic' | 'linear' | 'area' | 'nearest-exact'"
+            )
+
+        interpolate_has_antialias = LooseVersion(torch.__version__) >= "1.11"
+
+        if antialias and not interpolate_has_antialias:
+            warnings.warn("Antialiased interpolation requires PyTorch 1.11+; ignoring")
+
+        interp_kwargs = {"antialias": antialias} if interpolate_has_antialias else {}
+
+        def change_individual_resolution(tensor, wanted_resolution):
+            if mode == "linear":
+                n_dim = len(wanted_resolution)
+                new_mode = ("linear", "bilinear", "trilinear")[n_dim - 1]
+            else:
+                new_mode = mode
+            return torch.nn.functional.interpolate(
+                input=tensor,
+                size=wanted_resolution,
+                mode=new_mode,
+                align_corners=align_corners,
+                recompute_scale_factor=False,
+                **interp_kwargs,
+            )
+
+        if epoch is not None:
+            if epoch not in self.resolution_changes:
+                return grid_values, False
+
+            wanted_shapes = self.get_shapes(epoch=epoch)
+            params = {
+                name: change_individual_resolution(
+                    getattr(grid_values, name), shape[1:]
+                )
+                for name, shape in wanted_shapes.items()
+            }
+            res = self.get_resolution(epoch)
+            logger.info(f"Changed grid resolutiuon at epoch {epoch} to {res}")
+        else:
+            params = {
+                name: (
+                    change_individual_resolution(
+                        getattr(grid_values, name), tensor.shape[2:]
+                    )
+                    if tensor is not None
+                    else None
+                )
+                for name, tensor in vars(grid_values_with_wanted_resolution).items()
+            }
+
+        return self.values_type(**params), True
+
+    def get_resolution_change_epochs(self) -> Tuple[int, ...]:
+        """
+        Returns epochs at which this grid should change epochs.
+        """
+        return tuple(self.resolution_changes.keys())
+
+    def get_align_corners(self) -> bool:
+        """
+        Returns True if voxel grid uses align_corners=True
+        """
+        return self.align_corners
+
+    def crop_world(
+        self,
+        min_point_world: torch.Tensor,
+        max_point_world: torch.Tensor,
+        grid_values: VoxelGridValuesBase,
+        volume_locator: VolumeLocator,
+    ) -> VoxelGridValuesBase:
+        """
+        Crops the voxel grid based on minimum and maximum occupied point in
+        world coordinates. After cropping all 8 corner points are preserved in
+        the voxel grid. This is achieved by preserving all the voxels needed to
+        calculate the point.
+
+           +--------B
+          /        /|
+         /        / |
+        +--------+  |  <==== Bounding box represented by points A and B:
+        |        |  |           - B has x, y and z coordinates bigger or equal
+        |        |  +              to all other points of the object
+        |        | /            - A has x, y and z coordinates smaller or equal
+        |        |/                to all other points of the object
+        A--------+
+
+        Args:
+            min_point_world: torch.Tensor of shape (3,). Has x, y and z coordinates
+                smaller or equal to all other occupied points. Point A from the
+                picture above.
+            max_point_world: torch.Tensor of shape (3,). Has x, y and z coordinates
+                bigger or equal to all other occupied points. Point B from the
+                picture above.
+            grid_values: instance of self.values_type which contains
+                the voxel grid which will be cropped to create the new grid
+            volume_locator: VolumeLocator object used to convert world to local
+                cordinates
+        Returns:
+            instance of self.values_type which has volume cropped to desired size.
+        """
+        min_point_local = volume_locator.world_to_local_coords(min_point_world[None])[0]
+        max_point_local = volume_locator.world_to_local_coords(max_point_world[None])[0]
+        return self.crop_local(min_point_local, max_point_local, grid_values)
+
+    def crop_local(
+        self,
+        min_point_local: torch.Tensor,
+        max_point_local: torch.Tensor,
+        grid_values: VoxelGridValuesBase,
+    ) -> VoxelGridValuesBase:
+        """
+        Crops the voxel grid based on minimum and maximum occupied point in local
+        coordinates. After cropping both min and max point are preserved in the voxel
+        grid. This is achieved by preserving all the voxels needed to calculate the point.
+
+           +--------B
+          /        /|
+         /        / |
+        +--------+  |  <==== Bounding box represented by points A and B:
+        |        |  |           - B has x, y and z coordinates bigger or equal
+        |        |  +              to all other points of the object
+        |        | /            - A has x, y and z coordinates smaller or equal
+        |        |/                to all other points of the object
+        A--------+
+
+        Args:
+            min_point_local: torch.Tensor of shape (3,). Has x, y and z coordinates
+                smaller or equal to all other occupied points. Point A from the
+                picture above. All elements in [-1, 1].
+            max_point_local: torch.Tensor of shape (3,). Has x, y and z coordinates
+                bigger or equal to all other occupied points. Point B from the
+                picture above. All elements in [-1, 1].
+            grid_values: instance of self.values_type which contains
+                the voxel grid which will be cropped to create the new grid
+        Returns:
+            instance of self.values_type which has volume cropped to desired size.
+        """
+        raise NotImplementedError()
+
+
+@dataclass
+class FullResolutionVoxelGridValues(VoxelGridValuesBase):
+    voxel_grid: torch.Tensor
+
+
+@registry.register
+class FullResolutionVoxelGrid(VoxelGridBase):
+    """
+    Full resolution voxel grid equivalent to 4D tensor where shape is
+    (features, width, height, depth) with linear interpolation between voxels.
+    """
+
+    # the type of grid_values argument needed to run evaluate_local()
+    values_type: ClassVar[Type[VoxelGridValuesBase]] = FullResolutionVoxelGridValues
+
+    # pyre-fixme[14]: `evaluate_local` overrides method defined in `VoxelGridBase`
+    #  inconsistently.
+    def evaluate_local(
+        self, points: torch.Tensor, grid_values: FullResolutionVoxelGridValues
+    ) -> torch.Tensor:
+        """
+        Evaluates the voxel grid at points in the local coordinate frame,
+        The interpolation type is determined by the `mode` member.
+
+        Arguments:
+            points (torch.Tensor): tensor of points that you want to query
+                of a form (..., 3), in a normalized form (coordinates are in [-1, 1])
+            grid_values: an object of type values_type which has tensors as
+                members which have shapes derived from the get_shapes() method
+        Returns:
+            torch.Tensor: shape (n_grids, ..., n_features)
+        """
+        # (n_grids, n_points_total, n_features) from (n_grids, ..., n_features)
+        recorded_shape = points.shape
+        points = points.view(points.shape[0], -1, points.shape[-1])
+        interpolated = interpolate_volume(
+            points,
+            grid_values.voxel_grid,
+            align_corners=self.align_corners,
+            padding_mode=self.padding,
+            mode=self.mode,
+        )
+        return interpolated.view(*recorded_shape[:-1], -1)
+
+    def get_shapes(self, epoch: int) -> Dict[str, Tuple]:
+        width, height, depth = self.get_resolution(epoch)
+        return {"voxel_grid": (self.n_features, width, height, depth)}
+
+    # pyre-ignore[14]
+    def crop_local(
+        self,
+        min_point_local: torch.Tensor,
+        max_point_local: torch.Tensor,
+        grid_values: FullResolutionVoxelGridValues,
+    ) -> FullResolutionVoxelGridValues:
+        assert torch.all(min_point_local < max_point_local)
+        min_point_local = torch.clamp(min_point_local, -1, 1)
+        max_point_local = torch.clamp(max_point_local, -1, 1)
+        _, _, width, height, depth = grid_values.voxel_grid.shape
+        resolution = grid_values.voxel_grid.new_tensor([width, height, depth])
+        min_point_local01 = (min_point_local + 1) / 2
+        max_point_local01 = (max_point_local + 1) / 2
+
+        if self.align_corners:
+            minx, miny, minz = torch.floor(min_point_local01 * (resolution - 1)).long()
+            maxx, maxy, maxz = torch.ceil(max_point_local01 * (resolution - 1)).long()
+        else:
+            minx, miny, minz = torch.floor(min_point_local01 * resolution - 0.5).long()
+            maxx, maxy, maxz = torch.ceil(max_point_local01 * resolution - 0.5).long()
+
+        return FullResolutionVoxelGridValues(
+            voxel_grid=grid_values.voxel_grid[
+                :, :, minx : maxx + 1, miny : maxy + 1, minz : maxz + 1
+            ]
+        )
+
+
+@dataclass
+class CPFactorizedVoxelGridValues(VoxelGridValuesBase):
+    vector_components_x: torch.Tensor
+    vector_components_y: torch.Tensor
+    vector_components_z: torch.Tensor
+    basis_matrix: Optional[torch.Tensor] = None
+
+
+@registry.register
+class CPFactorizedVoxelGrid(VoxelGridBase):
+    """
+    Canonical Polyadic (CP/CANDECOMP/PARAFAC) Factorization factorizes the 3d grid into three
+    vectors (x, y, z). For n_components=n, the 3d grid is a sum of the two outer products
+    (call it ⊗) of each vector type (x, y, z):
+
+    3d_grid = x0 ⊗ y0 ⊗ z0 + x1 ⊗ y1 ⊗ z1 + ... + xn ⊗ yn ⊗ zn
+
+    These tensors are passed in a object of CPFactorizedVoxelGridValues (here obj) as
+    obj.vector_components_x, obj.vector_components_y, obj.vector_components_z. Their shapes are
+    `(n_components, r)` where `r` is the relevant resolution.
+
+    Each element of this sum has an extra dimension, which gets matrix-multiplied by an
+    appropriate "basis matrix" of shape (n_grids, n_components, n_features). This multiplication
+    brings us to the desired "n_features" dimensionality. If basis_matrix=False the elements
+    of different components are summed together to create (n_grids, n_components, 1) tensor.
+    With some notation abuse, ignoring the interpolation operation, simplifying and denoting
+    n_features as F, n_components as C and n_grids as G:
+
+    3d_grid = (x ⊗ y ⊗ z) @ basis # GWHDC x GCF -> GWHDF
+
+    The basis feature vectors are passed as obj.basis_matrix.
+
+    Members:
+        n_components: number of vector triplets, higher number gives better approximation.
+        basis_matrix: how to transform components. If matrix_reduction=True result
+            matrix of shape (n_grids, n_points_total, n_components) is batch matrix multiplied
+            by the basis_matrix of shape (n_grids, n_components, n_features). If
+            matrix_reduction=False, the result tensor of (n_grids, n_points_total, n_components)
+            is summed along the rows to get (n_grids, n_points_total, 1), which is then viewed
+            to return to starting shape (n_grids, ..., 1).
+    """
+
+    # the type of grid_values argument needed to run evaluate_local()
+    values_type: ClassVar[Type[VoxelGridValuesBase]] = CPFactorizedVoxelGridValues
+
+    n_components: int = 24
+    basis_matrix: bool = True
+
+    # pyre-fixme[14]: `evaluate_local` overrides method defined in `VoxelGridBase`
+    #  inconsistently.
+    def evaluate_local(
+        self, points: torch.Tensor, grid_values: CPFactorizedVoxelGridValues
+    ) -> torch.Tensor:
+        def factor(axis):
+            i = {"x": 0, "y": 1, "z": 2}[axis]
+            index = points[..., i, None]
+            vector = getattr(grid_values, "vector_components_" + axis)
+            return interpolate_line(
+                index,
+                vector,
+                align_corners=self.align_corners,
+                padding_mode=self.padding,
+                mode=self.mode,
+            )
+
+        # (n_grids, n_points_total, n_features) from (n_grids, ..., n_features)
+        recorded_shape = points.shape
+        points = points.view(points.shape[0], -1, points.shape[-1])
+
+        # collect points from all the vectors and multipy them out
+        mult = factor("x") * factor("y") * factor("z")
+
+        # reduce the result from
+        # (n_grids, n_points_total, n_components) to (n_grids, n_points_total, n_features)
+        if grid_values.basis_matrix is not None:
+            # (n_grids, n_points_total, n_features) =
+            # (n_grids, n_points_total, total_n_components) @
+            # (n_grids, total_n_components, n_features)
+            result = torch.bmm(mult, grid_values.basis_matrix)
+        else:
+            # (n_grids, n_points_total, 1) from (n_grids, n_points_total, n_features)
+            result = mult.sum(axis=-1, keepdim=True)
+        # (n_grids, ..., n_features)
+        return result.view(*recorded_shape[:-1], -1)
+
+    def get_shapes(self, epoch: int) -> Dict[str, Tuple[int, int]]:
+        if self.basis_matrix is False and self.n_features != 1:
+            raise ValueError("Cannot set basis_matrix=False and n_features to != 1")
+
+        width, height, depth = self.get_resolution(epoch=epoch)
+        shape_dict = {
+            "vector_components_x": (self.n_components, width),
+            "vector_components_y": (self.n_components, height),
+            "vector_components_z": (self.n_components, depth),
+        }
+        if self.basis_matrix:
+            shape_dict["basis_matrix"] = (self.n_components, self.n_features)
+        return shape_dict
+
+    # pyre-ignore[14]
+    def crop_local(
+        self,
+        min_point_local: torch.Tensor,
+        max_point_local: torch.Tensor,
+        grid_values: CPFactorizedVoxelGridValues,
+    ) -> CPFactorizedVoxelGridValues:
+        assert torch.all(min_point_local < max_point_local)
+        min_point_local = torch.clamp(min_point_local, -1, 1)
+        max_point_local = torch.clamp(max_point_local, -1, 1)
+        _, _, width = grid_values.vector_components_x.shape
+        _, _, height = grid_values.vector_components_y.shape
+        _, _, depth = grid_values.vector_components_z.shape
+        resolution = grid_values.vector_components_x.new_tensor([width, height, depth])
+        min_point_local01 = (min_point_local + 1) / 2
+        max_point_local01 = (max_point_local + 1) / 2
+
+        if self.align_corners:
+            minx, miny, minz = torch.floor(min_point_local01 * (resolution - 1)).long()
+            maxx, maxy, maxz = torch.ceil(max_point_local01 * (resolution - 1)).long()
+        else:
+            minx, miny, minz = torch.floor(min_point_local01 * resolution - 0.5).long()
+            maxx, maxy, maxz = torch.ceil(max_point_local01 * resolution - 0.5).long()
+
+        return CPFactorizedVoxelGridValues(
+            vector_components_x=grid_values.vector_components_x[:, :, minx : maxx + 1],
+            vector_components_y=grid_values.vector_components_y[:, :, miny : maxy + 1],
+            vector_components_z=grid_values.vector_components_z[:, :, minz : maxz + 1],
+            basis_matrix=grid_values.basis_matrix,
+        )
+
+
+@dataclass
+class VMFactorizedVoxelGridValues(VoxelGridValuesBase):
+    vector_components_x: torch.Tensor
+    vector_components_y: torch.Tensor
+    vector_components_z: torch.Tensor
+    matrix_components_xy: torch.Tensor
+    matrix_components_yz: torch.Tensor
+    matrix_components_xz: torch.Tensor
+    basis_matrix: Optional[torch.Tensor] = None
+
+
+@registry.register
+class VMFactorizedVoxelGrid(VoxelGridBase):
+    """
+    Implementation of Vector-Matrix Factorization of a tensor from
+    https://arxiv.org/abs/2203.09517.
+
+    Vector-Matrix Factorization factorizes the 3d grid into three matrices
+    (xy, xz, yz) and three vectors (x, y, z). For n_components=1, the 3d grid
+    is a sum of the outer products (call it ⊗) of each matrix with its
+    complementary vector:
+
+    3d_grid = xy ⊗ z + xz ⊗ y + yz ⊗ x.
+
+    These tensors are passed in a VMFactorizedVoxelGridValues object (here obj)
+    as obj.matrix_components_xy, obj.matrix_components_xy, obj.vector_components_y, etc.
+
+    Their shapes are `(n_grids, n_components, r0, r1)` for matrix_components and
+    (n_grids, n_components, r2)` for vector_componenets. Each of `r0, r1 and r2` coresponds
+    to one resolution in (width, height and depth).
+
+    Each element of this sum has an extra dimension, which gets matrix-multiplied by an
+    appropriate "basis matrix" of shape (n_grids, n_components, n_features). This multiplication
+    brings us to the desired "n_features" dimensionality. If basis_matrix=False the elements
+    of different components are summed together to create (n_grids, n_components, 1) tensor.
+    With some notation abuse, ignoring the interpolation operation, simplifying and denoting
+    n_features as F, n_components as C (which can differ for each dimension) and n_grids as G:
+
+    3d_grid = concat((xy ⊗ z), (xz ⊗ y).permute(0, 2, 1),
+                (yz ⊗ x).permute(2, 0, 1)) @ basis_matrix # GWHDC x GCF -> GWHDF
+
+    Members:
+        n_components: total number of matrix vector pairs, this must be divisible by 3. Set
+            this if you want to have equal representational power in all 3 directions. You
+            must specify either n_components or distribution_of_components, you cannot
+            specify both.
+        distribution_of_components: if you do not want equal representational power in
+            all 3 directions specify a tuple of numbers of matrix_vector pairs for each
+            coordinate of a form (n_xy_planes, n_yz_planes, n_xz_planes). You must specify
+            either n_components or distribution_of_components, you cannot specify both.
+        basis_matrix: how to transform components. If matrix_reduction=True result
+            matrix of shape (n_grids, n_points_total, n_components) is batch matrix multiplied
+            by the basis_matrix of shape (n_grids, n_components, n_features). If
+            matrix_reduction=False, the result tensor of (n_grids, n_points_total, n_components)
+            is summed along the rows to get (n_grids, n_points_total, 1), which is then viewed
+            to return to starting shape (n_grids, ..., 1).
+    """
+
+    # the type of grid_values argument needed to run evaluate_local()
+    values_type: ClassVar[Type[VoxelGridValuesBase]] = VMFactorizedVoxelGridValues
+
+    n_components: Optional[int] = None
+    distribution_of_components: Optional[Tuple[int, int, int]] = None
+    basis_matrix: bool = True
+
+    # pyre-fixme[14]: `evaluate_local` overrides method defined in `VoxelGridBase`
+    #  inconsistently.
+    def evaluate_local(
+        self, points: torch.Tensor, grid_values: VMFactorizedVoxelGridValues
+    ) -> torch.Tensor:
+        # (n_grids, n_points_total, n_features) from (n_grids, ..., n_features)
+        recorded_shape = points.shape
+        points = points.view(points.shape[0], -1, points.shape[-1])
+
+        # collect points from matrices and vectors and multiply them
+        a = interpolate_plane(
+            points[..., :2],
+            grid_values.matrix_components_xy,
+            align_corners=self.align_corners,
+            padding_mode=self.padding,
+            mode=self.mode,
+        ) * interpolate_line(
+            points[..., 2:],
+            grid_values.vector_components_z,
+            align_corners=self.align_corners,
+            padding_mode=self.padding,
+            mode=self.mode,
+        )
+        b = interpolate_plane(
+            points[..., [0, 2]],
+            grid_values.matrix_components_xz,
+            align_corners=self.align_corners,
+            padding_mode=self.padding,
+            mode=self.mode,
+        ) * interpolate_line(
+            points[..., 1:2],
+            grid_values.vector_components_y,
+            align_corners=self.align_corners,
+            padding_mode=self.padding,
+            mode=self.mode,
+        )
+        c = interpolate_plane(
+            points[..., 1:],
+            grid_values.matrix_components_yz,
+            align_corners=self.align_corners,
+            padding_mode=self.padding,
+            mode=self.mode,
+        ) * interpolate_line(
+            points[..., :1],
+            grid_values.vector_components_x,
+            align_corners=self.align_corners,
+            padding_mode=self.padding,
+            mode=self.mode,
+        )
+        # pyre-ignore[28]
+        feats = torch.cat((a, b, c), axis=-1)
+
+        # reduce the result from
+        # (n_grids, n_points, n_components) to (n_grids, n_points, n_features)
+        if grid_values.basis_matrix is not None:
+            # (n_grids, n_points, n_features) =
+            # (n_grids, n_points, total_n_components) x
+            #               (n_grids, total_n_components, n_features)
+            result = torch.bmm(feats, grid_values.basis_matrix)
+        else:
+            # pyre-ignore[28]
+            # (n_grids, n_points, 1) from (n_grids, n_points, n_features)
+            result = feats.sum(axis=-1, keepdim=True)
+        # (n_grids, ..., n_features)
+        return result.view(*recorded_shape[:-1], -1)
+
+    def get_shapes(self, epoch: int) -> Dict[str, Tuple]:
+        if self.basis_matrix is False and self.n_features != 1:
+            raise ValueError("Cannot set basis_matrix=False and n_features to != 1")
+        if self.distribution_of_components is None and self.n_components is None:
+            raise ValueError(
+                "You need to provide n_components or distribution_of_components"
+            )
+        if (
+            self.distribution_of_components is not None
+            and self.n_components is not None
+        ):
+            raise ValueError(
+                "You cannot define n_components and distribution_of_components"
+            )
+        # pyre-ignore[58]
+        if self.distribution_of_components is None and self.n_components % 3 != 0:
+            raise ValueError("n_components must be divisible by 3")
+        if self.distribution_of_components is None:
+            calculated_distribution_of_components = [
+                # pyre-fixme[58]: `//` is not supported for operand types
+                #  `Optional[int]` and `int`.
+                self.n_components // 3
+                for _ in range(3)
+            ]
+        else:
+            calculated_distribution_of_components = self.distribution_of_components
+
+        width, height, depth = self.get_resolution(epoch=epoch)
+        shape_dict = {
+            "vector_components_x": (
+                calculated_distribution_of_components[1],
+                width,
+            ),
+            "vector_components_y": (
+                calculated_distribution_of_components[2],
+                height,
+            ),
+            "vector_components_z": (
+                calculated_distribution_of_components[0],
+                depth,
+            ),
+            "matrix_components_xy": (
+                calculated_distribution_of_components[0],
+                width,
+                height,
+            ),
+            "matrix_components_yz": (
+                calculated_distribution_of_components[1],
+                height,
+                depth,
+            ),
+            "matrix_components_xz": (
+                calculated_distribution_of_components[2],
+                width,
+                depth,
+            ),
+        }
+        if self.basis_matrix:
+            shape_dict["basis_matrix"] = (
+                sum(calculated_distribution_of_components),
+                self.n_features,
+            )
+
+        return shape_dict
+
+    # pyre-ignore[14]
+    def crop_local(
+        self,
+        min_point_local: torch.Tensor,
+        max_point_local: torch.Tensor,
+        grid_values: VMFactorizedVoxelGridValues,
+    ) -> VMFactorizedVoxelGridValues:
+        assert torch.all(min_point_local < max_point_local)
+        min_point_local = torch.clamp(min_point_local, -1, 1)
+        max_point_local = torch.clamp(max_point_local, -1, 1)
+        _, _, width = grid_values.vector_components_x.shape
+        _, _, height = grid_values.vector_components_y.shape
+        _, _, depth = grid_values.vector_components_z.shape
+        resolution = grid_values.vector_components_x.new_tensor([width, height, depth])
+        min_point_local01 = (min_point_local + 1) / 2
+        max_point_local01 = (max_point_local + 1) / 2
+
+        if self.align_corners:
+            minx, miny, minz = torch.floor(min_point_local01 * (resolution - 1)).long()
+            maxx, maxy, maxz = torch.ceil(max_point_local01 * (resolution - 1)).long()
+        else:
+            minx, miny, minz = torch.floor(min_point_local01 * resolution - 0.5).long()
+            maxx, maxy, maxz = torch.ceil(max_point_local01 * resolution - 0.5).long()
+
+        return VMFactorizedVoxelGridValues(
+            vector_components_x=grid_values.vector_components_x[:, :, minx : maxx + 1],
+            vector_components_y=grid_values.vector_components_y[:, :, miny : maxy + 1],
+            vector_components_z=grid_values.vector_components_z[:, :, minz : maxz + 1],
+            matrix_components_xy=grid_values.matrix_components_xy[
+                :, :, minx : maxx + 1, miny : maxy + 1
+            ],
+            matrix_components_yz=grid_values.matrix_components_yz[
+                :, :, miny : maxy + 1, minz : maxz + 1
+            ],
+            matrix_components_xz=grid_values.matrix_components_xz[
+                :, :, minx : maxx + 1, minz : maxz + 1
+            ],
+            basis_matrix=grid_values.basis_matrix,
+        )
+
+
+# pyre-fixme[13]: Attribute `voxel_grid` is never initialized.
+class VoxelGridModule(Configurable, torch.nn.Module):
+    """
+    A wrapper torch.nn.Module for the VoxelGrid classes, which
+    contains parameters that are needed to train the VoxelGrid classes.
+    Can contain the parameters for the voxel grid as pytorch parameters
+    or as registered buffers.
+
+    Members:
+        voxel_grid_class_type: The name of the class to use for voxel_grid,
+            which must be available in the registry. Default FullResolutionVoxelGrid.
+        voxel_grid: An instance of `VoxelGridBase`. This is the object which
+            this class wraps.
+        extents: 3-tuple of a form (width, height, depth), denotes the size of the grid
+            in world units.
+        translation: 3-tuple of float. The center of the volume in world units as (x, y, z).
+        init_std: Parameters are initialized using the gaussian distribution
+            with mean=init_mean and std=init_std. Default 0.1
+        init_mean: Parameters are initialized using the gaussian distribution
+            with mean=init_mean and std=init_std. Default 0.
+        hold_voxel_grid_as_parameters: if True components of the underlying voxel grids
+            will be saved as parameters and therefore be trainable. Default True.
+        param_groups: dictionary where keys are names of individual parameters
+            or module members and values are the parameter group where the
+            parameter/member will be sorted to. "self" key is used to denote the
+            parameter group at the module level. Possible keys, including the "self" key
+            do not have to be defined. By default all parameters are put into "default"
+            parameter group and have the learning rate defined in the optimizer,
+            it can be overridden at the:
+                - module level with “self” key, all the parameters and child
+                    module's parameters will be put to that parameter group
+                - member level, which is the same as if the `param_groups` in that
+                    member has key=“self” and value equal to that parameter group.
+                    This is useful if members do not have `param_groups`, for
+                    example torch.nn.Linear.
+                - parameter level, parameter with the same name as the key
+                    will be put to that parameter group.
+    """
+
+    voxel_grid_class_type: str = "FullResolutionVoxelGrid"
+    voxel_grid: VoxelGridBase
+
+    extents: Tuple[float, float, float] = (2.0, 2.0, 2.0)
+    translation: Tuple[float, float, float] = (0.0, 0.0, 0.0)
+
+    init_std: float = 0.1
+    init_mean: float = 0
+
+    hold_voxel_grid_as_parameters: bool = True
+    param_groups: Dict[str, str] = field(default_factory=lambda: {})
+
+    def __post_init__(self):
+        run_auto_creation(self)
+        n_grids = 1  # Voxel grid objects are batched. We need only a single grid.
+        shapes = self.voxel_grid.get_shapes(epoch=0)
+        params = {
+            name: torch.normal(
+                mean=torch.zeros((n_grids, *shape)) + self.init_mean,
+                std=self.init_std,
+            )
+            for name, shape in shapes.items()
+        }
+
+        self.set_voxel_grid_parameters(self.voxel_grid.values_type(**params))
+        self._register_load_state_dict_pre_hook(self._create_parameters_with_new_size)
+
+    def forward(self, points: torch.Tensor) -> torch.Tensor:
+        """
+        Evaluates points in the world coordinate frame on the voxel_grid.
+
+        Args:
+            points (torch.Tensor): tensor of points that you want to query
+                of a form (..., 3)
+        Returns:
+            torch.Tensor of shape (..., n_features)
+        """
+        locator = self._get_volume_locator()
+        grid_values = self.voxel_grid.values_type(**self.params)
+        # voxel grids operate with extra n_grids dimension, which we fix to one
+        return self.voxel_grid.evaluate_world(points[None], grid_values, locator)[0]
+
+    def set_voxel_grid_parameters(self, params: VoxelGridValuesBase) -> None:
+        """
+        Sets the parameters of the underlying voxel grid.
+
+        Args:
+            params: parameters of type `self.voxel_grid.values_type` which will
+                replace current parameters
+        """
+        if self.hold_voxel_grid_as_parameters:
+            self.params = torch.nn.ParameterDict(
+                {
+                    k: torch.nn.Parameter(val)
+                    for k, val in vars(params).items()
+                    if val is not None
+                }
+            )
+        else:
+            # Torch Module to hold parameters since they can only be registered
+            # at object level.
+            self.params = _RegistratedBufferDict(vars(params))
+
+    @staticmethod
+    def get_output_dim(args: DictConfig) -> int:
+        """
+        Utility to help predict the shape of the output of `forward`.
+
+        Args:
+            args: DictConfig which would be used to initialize the object
+        Returns:
+            int: the length of the last dimension of the output tensor
+        """
+        grid = registry.get(VoxelGridBase, args["voxel_grid_class_type"])
+        return grid.get_output_dim(
+            args["voxel_grid_" + args["voxel_grid_class_type"] + "_args"]
+        )
+
+    def subscribe_to_epochs(self) -> Tuple[Tuple[int, ...], Callable[[int], bool]]:
+        """
+        Method which expresses interest in subscribing to optimization epoch updates.
+
+        Returns:
+            tuple of epochs on which to call a callable and callable to be called on
+                particular epoch. The callable returns True if parameter change has
+                happened else False and it must be supplied with one argument, epoch.
+        """
+        return self.voxel_grid.get_resolution_change_epochs(), self._apply_epochs
+
+    def _apply_epochs(self, epoch: int) -> bool:
+        """
+        Asks voxel_grid to change the resolution.
+        This method is returned with subscribe_to_epochs and is the method that collects
+        updates on training epochs, it is run on the training epochs that are requested.
+
+        Args:
+            epoch: current training epoch used for voxel grids to know to which
+                resolution to change
+        Returns:
+            True if parameter change has happened else False.
+        """
+        grid_values = self.voxel_grid.values_type(**self.params)
+        grid_values, change = self.voxel_grid.change_resolution(
+            grid_values, epoch=epoch
+        )
+        if change:
+            self.set_voxel_grid_parameters(grid_values)
+        return change and self.hold_voxel_grid_as_parameters
+
+    def _create_parameters_with_new_size(
+        self,
+        state_dict: dict,
+        prefix: str,
+        local_metadata: dict,
+        strict: bool,
+        missing_keys: List[str],
+        unexpected_keys: List[str],
+        error_msgs: List[str],
+    ) -> None:
+        '''
+        Automatically ran before loading the parameters with `load_state_dict()`.
+        Creates new parameters with the sizes of the ones in the loaded state dict.
+        This is necessary because the parameters are changing throughout training and
+        at the time of construction `VoxelGridModule` does not know the size of
+        parameters which will be loaded.
+
+        Args:
+            state_dict (dict): a dict containing parameters and
+                persistent buffers.
+            prefix (str): the prefix for parameters and buffers used in this
+                module
+            local_metadata (dict): a dict containing the metadata for this module.
+                See
+            strict (bool): whether to strictly enforce that the keys in
+                :attr:`state_dict` with :attr:`prefix` match the names of
+                parameters and buffers in this module
+            missing_keys (list of str): if ``strict=True``, add missing keys to
+                this list
+            unexpected_keys (list of str): if ``strict=True``, add unexpected
+                keys to this list
+            error_msgs (list of str): error messages should be added to this
+                list, and will be reported together in
+                :meth:`~torch.nn.Module.load_state_dict`
+        Returns:
+            nothing
+        """
+        '''
+        new_params = {}
+        for name in self.params:
+            key = prefix + "params." + name
+            if key in state_dict:
+                new_params[name] = torch.zeros_like(state_dict[key])
+        self.set_voxel_grid_parameters(self.voxel_grid.values_type(**new_params))
+
+    def get_device(self) -> torch.device:
+        """
+        Returns torch.device on which module parameters are located
+        """
+        return next(val for val in self.params.values() if val is not None).device
+
+    def crop_self(self, min_point: torch.Tensor, max_point: torch.Tensor) -> None:
+        """
+        Crops self to only represent points between min_point and max_point (inclusive).
+
+        Args:
+            min_point: torch.Tensor of shape (3,). Has x, y and z coordinates
+                smaller or equal to all other occupied points.
+            max_point: torch.Tensor of shape (3,). Has x, y and z coordinates
+                bigger or equal to all other occupied points.
+        Returns:
+            nothing
+        """
+        locator = self._get_volume_locator()
+        #  torch.nn.modules.module.Module]` is not a function.
+        old_grid_values = self.voxel_grid.values_type(**self.params)
+        new_grid_values = self.voxel_grid.crop_world(
+            min_point, max_point, old_grid_values, locator
+        )
+        grid_values, _ = self.voxel_grid.change_resolution(
+            new_grid_values, grid_values_with_wanted_resolution=old_grid_values
+        )
+        self.params = torch.nn.ParameterDict(
+            {
+                k: torch.nn.Parameter(val)
+                for k, val in vars(grid_values).items()
+                if val is not None
+            }
+        )
+        # New center of voxel grid is the middle point between max and min points.
+        self.translation = tuple((max_point + min_point) / 2)
+        # new extents of voxel grid are distances between min and max points
+        self.extents = tuple(max_point - min_point)
+
+    def _get_volume_locator(self) -> VolumeLocator:
+        """
+        Returns VolumeLocator calculated from `extents` and `translation` members.
+        """
+        return VolumeLocator(
+            batch_size=1,
+            # The resolution of the voxel grid does not need to be known
+            # to the locator object. It is easiest to fix the resolution of the locator.
+            # In particular we fix it to (2,2,2) so that there is exactly one voxel of the
+            # desired size. The locator object uses (z, y, x) convention for the grid_size,
+            # and this module uses (x, y, z) convention so the order has to be reversed
+            # (irrelevant in this case since they are all equal).
+            # It is (2, 2, 2) because the VolumeLocator object behaves like
+            # align_corners=True, which means that the points are in the corners of
+            # the volume. So in the grid of (2, 2, 2) there is only one voxel.
+            grid_sizes=(2, 2, 2),
+            # The locator object uses (x, y, z) convention for the
+            # voxel size and translation.
+            voxel_size=tuple(self.extents),
+            # volume_translation is defined in `VolumeLocator` as a vector from the origin
+            # of local coordinate frame to origin of world coordinate frame, that is:
+            # x_world = x_local * extents/2 - translation.
+            # To get the reverse we need to negate it.
+            volume_translation=tuple(-t for t in self.translation),
+            device=self.get_device(),
+        )
+
+    def get_grid_points(self, epoch: int) -> torch.Tensor:
+        """
+        Returns a grid of points that represent centers of voxels of the
+        underlying voxel grid in world coordinates at specific epoch.
+
+        Args:
+            epoch: underlying voxel grids change resolution depending on the
+                epoch, this argument is used to determine the resolution
+                of the voxel grid at that epoch.
+        Returns:
+            tensor of shape [xresolution, yresolution, zresolution, 3] where
+                xresolution, yresolution, zresolution are resolutions of the
+                underlying voxel grid
+        """
+        xresolution, yresolution, zresolution = self.voxel_grid.get_resolution(epoch)
+        width, height, depth = self.extents
+        if not self.voxel_grid.get_align_corners():
+            width = (
+                width * (xresolution - 1) / xresolution if xresolution > 1 else width
+            )
+            height = (
+                height * (xresolution - 1) / xresolution if xresolution > 1 else height
+            )
+            depth = (
+                depth * (xresolution - 1) / xresolution if xresolution > 1 else depth
+            )
+        xs = torch.linspace(
+            -width / 2, width / 2, xresolution, device=self.get_device()
+        )
+        ys = torch.linspace(
+            -height / 2, height / 2, yresolution, device=self.get_device()
+        )
+        zs = torch.linspace(
+            -depth / 2, depth / 2, zresolution, device=self.get_device()
+        )
+        xmesh, ymesh, zmesh = torch.meshgrid(xs, ys, zs, indexing="ij")
+        return torch.stack((xmesh, ymesh, zmesh), dim=3)
+
+
+class _RegistratedBufferDict(torch.nn.Module, Mapping):
+    """
+    Mapping class and a torch.nn.Module that registeres its values
+    with `self.register_buffer`. Can be indexed like a regular Python
+    dictionary, but torch.Tensors it contains are properly registered, and will be visible
+    by all Module methods. Supports only `torch.Tensor` as value and str as key.
+    """
+
+    def __init__(self, init_dict: Optional[Dict[str, torch.Tensor]] = None) -> None:
+        """
+        Args:
+            init_dict: dictionary which will be used to populate the object
+        """
+        super().__init__()
+        self._keys = set()
+        if init_dict is not None:
+            for k, v in init_dict.items():
+                self[k] = v
+
+    def __iter__(self) -> Iterator[Dict[str, torch.Tensor]]:
+        return iter({k: self[k] for k in self._keys})
+
+    def __len__(self) -> int:
+        return len(self._keys)
+
+    def __getitem__(self, key: str) -> torch.Tensor:
+        return getattr(self, key)
+
+    def __setitem__(self, key, value) -> None:
+        self._keys.add(key)
+        self.register_buffer(key, value)
+
+    def __hash__(self) -> int:
+        return hash(repr(self))
diff --git a/pytorch3d/pytorch3d/implicitron/models/implicit_function/voxel_grid_implicit_function.py b/pytorch3d/pytorch3d/implicitron/models/implicit_function/voxel_grid_implicit_function.py
new file mode 100644
index 0000000000000000000000000000000000000000..b413204600192ba387332ce8c5e8d166f3be3c46
--- /dev/null
+++ b/pytorch3d/pytorch3d/implicitron/models/implicit_function/voxel_grid_implicit_function.py
@@ -0,0 +1,616 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+import math
+import warnings
+from dataclasses import fields
+from typing import Callable, Dict, Optional, Tuple
+
+import torch
+
+from omegaconf import DictConfig
+
+from pytorch3d.implicitron.models.implicit_function.base import ImplicitFunctionBase
+from pytorch3d.implicitron.models.implicit_function.decoding_functions import (
+    DecoderFunctionBase,
+)
+from pytorch3d.implicitron.models.implicit_function.voxel_grid import VoxelGridModule
+from pytorch3d.implicitron.models.renderer.base import ImplicitronRayBundle
+from pytorch3d.implicitron.tools.config import (
+    enable_get_default_args,
+    get_default_args_field,
+    registry,
+    run_auto_creation,
+)
+from pytorch3d.renderer import ray_bundle_to_ray_points
+from pytorch3d.renderer.cameras import CamerasBase
+from pytorch3d.renderer.implicit import HarmonicEmbedding
+
+logger = logging.getLogger(__name__)
+
+
+enable_get_default_args(HarmonicEmbedding)
+
+
+@registry.register
+# pyre-ignore[13]
+class VoxelGridImplicitFunction(ImplicitFunctionBase, torch.nn.Module):
+    """
+    This implicit function consists of two streams, one for the density calculation and one
+    for the color calculation. Each of these streams has three main parts:
+        1) Voxel grids:
+            They take the (x, y, z) position and return the embedding of that point.
+            These components are replaceable, you can make your own or choose one of
+            several options.
+        2) Harmonic embeddings:
+            Convert each feature into series of 'harmonic features', feature is passed through
+            sine and cosine functions. Input is of shape [minibatch, ..., D] output
+            [minibatch, ..., (n_harmonic_functions * 2 + int(append_input)) * D]. Appends
+            input by default. If you want it to behave like identity, put n_harmonic_functions=0
+            and append_input=True.
+        3) Decoding functions:
+            The decoder is an instance of the DecoderFunctionBase and converts the embedding
+            of a spatial location to density/color. Examples are Identity which returns its
+            input and the MLP which uses fully connected nerual network to transform the input.
+            These components are replaceable, you can make your own or choose from
+            several options.
+
+    Calculating density is done in three steps:
+        1) Evaluating the voxel grid on points
+        2) Embedding the outputs with harmonic embedding
+        3) Passing through the Density decoder
+
+    To calculate the color we need the embedding and the viewing direction, it has five steps:
+        1) Transforming the viewing direction with camera
+        2) Evaluating the voxel grid on points
+        3) Embedding the outputs with harmonic embedding
+        4) Embedding the normalized direction with harmonic embedding
+        5) Passing everything through the Color decoder
+
+    If using the Implicitron configuration system the input_dim to the decoding functions will
+    be set to the output_dim of the Harmonic embeddings.
+
+    A speed up comes from using the scaffold, a low resolution voxel grid.
+    The scaffold is referenced as "binary occupancy grid mask" in TensoRF paper and "AlphaMask"
+    in official TensoRF implementation.
+    The scaffold is used in:
+        1) filtering points in empty space
+            - controlled by `scaffold_filter_points` boolean. If set to True, points for which
+                scaffold predicts that are in empty space will return 0 density and
+                (0, 0, 0) color.
+        2) calculating the bounding box of an object and cropping the voxel grids
+            - controlled by `volume_cropping_epochs`.
+            - at those epochs the implicit function will find the bounding box of an object
+                inside it and crop density and color grids. Cropping of the voxel grids means
+                preserving only voxel values that are inside the bounding box and changing the
+                resolution to match the original, while preserving the new cropped location in
+                world coordinates.
+
+    The scaffold has to exist before attempting filtering and cropping, and is created on
+    `scaffold_calculating_epochs`. Each voxel in the scaffold is labeled as having density 1 if
+    the point in the center of it evaluates to greater than `scaffold_empty_space_threshold`.
+    3D max pooling is performed on the densities of the points in 3D.
+    Scaffold features are off by default.
+
+    Members:
+        voxel_grid_density (VoxelGridBase): voxel grid to use for density estimation
+        voxel_grid_color   (VoxelGridBase): voxel grid to use for color   estimation
+
+        harmonic_embedder_xyz_density (HarmonicEmbedder): Function to transform the outputs of
+            the voxel_grid_density
+        harmonic_embedder_xyz_color (HarmonicEmbedder): Function to transform the outputs of
+            the voxel_grid_color for density
+        harmonic_embedder_dir_color (HarmonicEmbedder): Function to transform the outputs of
+            the voxel_grid_color for color
+
+        decoder_density (DecoderFunctionBase): decoder function to use for density estimation
+        color_density   (DecoderFunctionBase): decoder function to use for color   estimation
+
+        use_multiple_streams (bool): if you want the density and color calculations to run on
+            different cuda streams set this to True. Default True.
+        xyz_ray_dir_in_camera_coords (bool): This is true if the directions are given in
+            camera coordinates. Default False.
+
+        voxel_grid_scaffold (VoxelGridModule): which holds the scaffold. Extents and
+            translation of it are set to those of voxel_grid_density.
+        scaffold_calculating_epochs (Tuple[int, ...]): at which epochs to recalculate the
+            scaffold. (The scaffold will be created automatically at the beginning of
+            the calculation.)
+        scaffold_resolution (Tuple[int, int, int]): (width, height, depth) of the underlying
+            voxel grid which stores scaffold
+        scaffold_empty_space_threshold (float): if `self._get_density` evaluates to less than
+            this it will be considered as empty space and the scaffold at that point would
+            evaluate as empty space.
+        scaffold_occupancy_chunk_size (str or int): Number of xy scaffold planes to calculate
+            at the same time. To calculate the scaffold we need to query `_get_density()` at
+            every voxel, this calculation can be split into scaffold depth number of xy plane
+            calculations if you want the lowest memory usage, one calculation to calculate the
+            whole scaffold, but with higher memory footprint or any other number of planes.
+            Setting to a non-positive number calculates all planes at the same time.
+            Defaults to '-1' (=calculating all planes).
+        scaffold_max_pool_kernel_size (int): Size of the pooling region to use when
+            calculating the scaffold. Defaults to 3.
+        scaffold_filter_points (bool): If set to True the points will be filtered using
+            `self.voxel_grid_scaffold`. Filtered points will be predicted as having 0 density
+            and (0, 0, 0) color. The points which were not evaluated as empty space will be
+            passed through the steps outlined above.
+        volume_cropping_epochs: on which epochs to crop the voxel grids to fit the object's
+            bounding box. Scaffold has to be calculated before cropping.
+    """
+
+    # ---- voxel grid for density
+    voxel_grid_density: VoxelGridModule
+
+    # ---- voxel grid for color
+    voxel_grid_color: VoxelGridModule
+
+    # ---- harmonic embeddings density
+    harmonic_embedder_xyz_density_args: DictConfig = get_default_args_field(
+        HarmonicEmbedding
+    )
+    harmonic_embedder_xyz_color_args: DictConfig = get_default_args_field(
+        HarmonicEmbedding
+    )
+    harmonic_embedder_dir_color_args: DictConfig = get_default_args_field(
+        HarmonicEmbedding
+    )
+
+    # ---- decoder function for density
+    decoder_density_class_type: str = "MLPDecoder"
+    decoder_density: DecoderFunctionBase
+
+    # ---- decoder function for color
+    decoder_color_class_type: str = "MLPDecoder"
+    decoder_color: DecoderFunctionBase
+
+    # ---- cuda streams
+    use_multiple_streams: bool = True
+
+    # ---- camera
+    xyz_ray_dir_in_camera_coords: bool = False
+
+    # --- scaffold
+    # voxel_grid_scaffold: VoxelGridModule
+    scaffold_calculating_epochs: Tuple[int, ...] = ()
+    scaffold_resolution: Tuple[int, int, int] = (128, 128, 128)
+    scaffold_empty_space_threshold: float = 0.001
+    scaffold_occupancy_chunk_size: int = -1
+    scaffold_max_pool_kernel_size: int = 3
+    scaffold_filter_points: bool = True
+
+    # --- cropping
+    volume_cropping_epochs: Tuple[int, ...] = ()
+
+    def __post_init__(self) -> None:
+        run_auto_creation(self)
+        self.voxel_grid_scaffold = self._create_voxel_grid_scaffold()
+        self.harmonic_embedder_xyz_density = HarmonicEmbedding(
+            **self.harmonic_embedder_xyz_density_args
+        )
+        self.harmonic_embedder_xyz_color = HarmonicEmbedding(
+            **self.harmonic_embedder_xyz_color_args
+        )
+        self.harmonic_embedder_dir_color = HarmonicEmbedding(
+            **self.harmonic_embedder_dir_color_args
+        )
+        self._scaffold_ready = False
+
+    def forward(
+        self,
+        ray_bundle: ImplicitronRayBundle,
+        fun_viewpool=None,
+        camera: Optional[CamerasBase] = None,
+        global_code=None,
+        **kwargs,
+    ) -> Tuple[torch.Tensor, torch.Tensor, Dict]:
+        """
+        The forward function accepts the parametrizations of 3D points sampled along
+        projection rays. The forward pass is responsible for attaching a 3D vector
+        and a 1D scalar representing the point's RGB color and opacity respectively.
+
+        Args:
+            ray_bundle: An ImplicitronRayBundle object containing the following variables:
+                origins: A tensor of shape `(minibatch, ..., 3)` denoting the
+                    origins of the sampling rays in world coords.
+                directions: A tensor of shape `(minibatch, ..., 3)`
+                    containing the direction vectors of sampling rays in world coords.
+                lengths: A tensor of shape `(minibatch, ..., num_points_per_ray)`
+                    containing the lengths at which the rays are sampled.
+            fun_viewpool: an optional callback with the signature
+                    fun_fiewpool(points) -> pooled_features
+                where points is a [N_TGT x N x 3] tensor of world coords,
+                and pooled_features is a [N_TGT x ... x N_SRC x latent_dim] tensor
+                of the features pooled from the context images.
+            camera: A camera model which will be used to transform the viewing
+                directions
+
+        Returns:
+            rays_densities: A tensor of shape `(minibatch, ..., num_points_per_ray, 1)`
+                denoting the opacitiy of each ray point.
+            rays_colors: A tensor of shape `(minibatch, ..., num_points_per_ray, 3)`
+                denoting the color of each ray point.
+        """
+        # ########## convert the ray parametrizations to world coordinates ########## #
+        # points.shape = [minibatch x n_rays_width x n_rays_height x pts_per_ray x 3]
+        # pyre-ignore[6]
+        points = ray_bundle_to_ray_points(ray_bundle)
+        directions = ray_bundle.directions.reshape(-1, 3)
+        input_shape = points.shape
+        num_points_per_ray = input_shape[-2]
+        points = points.view(-1, 3)
+        non_empty_points = None
+
+        # ########## filter the points using the scaffold ########## #
+        if self._scaffold_ready and self.scaffold_filter_points:
+            with torch.no_grad():
+                non_empty_points = self.voxel_grid_scaffold(points)[..., 0] > 0
+            points = points[non_empty_points]
+            if len(points) == 0:
+                warnings.warn(
+                    "The scaffold has filtered all the points."
+                    "The voxel grids and decoding functions will not be run."
+                )
+                return (
+                    points.new_zeros((*input_shape[:-1], 1)),
+                    points.new_zeros((*input_shape[:-1], 3)),
+                    {},
+                )
+
+        # ########## calculate color and density ########## #
+        rays_densities, rays_colors = self._calculate_density_and_color(
+            points, directions, camera, non_empty_points, num_points_per_ray
+        )
+
+        if not (self._scaffold_ready and self.scaffold_filter_points):
+            return (
+                rays_densities.view((*input_shape[:-1], rays_densities.shape[-1])),
+                rays_colors.view((*input_shape[:-1], rays_colors.shape[-1])),
+                {},
+            )
+
+        # ########## merge scaffold calculated points ########## #
+        # Create a zeroed tensor corresponding to a point with density=0 and fill it
+        # with calculated density for points which are not in empty space. Do the
+        # same for color
+        rays_densities_combined = rays_densities.new_zeros(
+            (math.prod(input_shape[:-1]), rays_densities.shape[-1])
+        )
+        rays_colors_combined = rays_colors.new_zeros(
+            (math.prod(input_shape[:-1]), rays_colors.shape[-1])
+        )
+        assert non_empty_points is not None
+        rays_densities_combined[non_empty_points] = rays_densities
+        rays_colors_combined[non_empty_points] = rays_colors
+
+        return (
+            rays_densities_combined.view((*input_shape[:-1], rays_densities.shape[-1])),
+            rays_colors_combined.view((*input_shape[:-1], rays_colors.shape[-1])),
+            {},
+        )
+
+    def _calculate_density_and_color(
+        self,
+        points: torch.Tensor,
+        directions: torch.Tensor,
+        camera: Optional[CamerasBase],
+        non_empty_points: Optional[torch.Tensor],
+        num_points_per_ray: int,
+    ) -> Tuple[torch.Tensor, torch.Tensor]:
+        """
+        Calculates density and color at `points`.
+        If enabled use cuda streams.
+
+        Args:
+            points: points at which to calculate density and color.
+                Tensor of shape [n_points, 3].
+            directions: from which directions are the points viewed.
+                One per ray. Tensor of shape [n_rays, 3].
+            camera: A camera model which will be used to transform the viewing
+                directions
+            non_empty_points: indices of points which weren't filtered out;
+                used for expanding directions
+            num_points_per_ray: number of points per ray, needed to expand directions.
+        Returns:
+               Tuple of color (tensor of shape [..., 3]) and density
+                (tensor of shape [..., 1])
+        """
+        if self.use_multiple_streams and points.is_cuda:
+            current_stream = torch.cuda.current_stream(points.device)
+            other_stream = torch.cuda.Stream(points.device)
+            other_stream.wait_stream(current_stream)
+
+            with torch.cuda.stream(other_stream):
+                # rays_densities.shape =
+                # [minibatch x n_rays_width x n_rays_height x pts_per_ray x density_dim]
+                rays_densities = self._get_density(points)
+
+            # rays_colors.shape =
+            # [minibatch x n_rays_width x n_rays_height x pts_per_ray x color_dim]
+            rays_colors = self._get_color(
+                points, camera, directions, non_empty_points, num_points_per_ray
+            )
+
+            current_stream.wait_stream(other_stream)
+        else:
+            # Same calculation as above, just serial.
+            rays_densities = self._get_density(points)
+            rays_colors = self._get_color(
+                points, camera, directions, non_empty_points, num_points_per_ray
+            )
+        return rays_densities, rays_colors
+
+    def _get_density(self, points: torch.Tensor) -> torch.Tensor:
+        """
+        Calculates density at points:
+            1) Evaluates the voxel grid on points
+            2) Embeds the outputs with harmonic embedding
+            3) Passes everything through the Density decoder
+
+        Args:
+            points: tensor of shape [..., 3]
+                where the last dimension is the points in the (x, y, z)
+        Returns:
+            calculated densities of shape [..., density_dim], `density_dim` is the
+                feature dimensionality which `decoder_density` returns
+        """
+        embeds_density = self.voxel_grid_density(points)
+        harmonic_embedding_density = self.harmonic_embedder_xyz_density(embeds_density)
+        # shape = [..., density_dim]
+        return self.decoder_density(harmonic_embedding_density)
+
+    def _get_color(
+        self,
+        points: torch.Tensor,
+        camera: Optional[CamerasBase],
+        directions: torch.Tensor,
+        non_empty_points: Optional[torch.Tensor],
+        num_points_per_ray: int,
+    ) -> torch.Tensor:
+        """
+        Calculates color at points using the viewing direction:
+            1) Transforms the viewing direction with camera
+            2) Evaluates the voxel grid on points
+            3) Embeds the outputs with harmonic embedding
+            4) Embeds the normalized direction with harmonic embedding
+            5) Passes everything through the Color decoder
+        Args:
+            points: tensor of shape (..., 3)
+                where the last dimension is the points in the (x, y, z)
+            camera: A camera model which will be used to transform the viewing
+                directions
+            directions: A tensor of shape `(..., 3)`
+                containing the direction vectors of sampling rays in world coords.
+            non_empty_points: indices of points which weren't filtered out;
+                used for expanding directions
+            num_points_per_ray: number of points per ray, needed to expand directions.
+        """
+        # ########## transform direction ########## #
+        if self.xyz_ray_dir_in_camera_coords:
+            if camera is None:
+                raise ValueError("Camera must be given if xyz_ray_dir_in_camera_coords")
+            directions = directions @ camera.R
+
+        # ########## get voxel grid output ########## #
+        # embeds_color.shape = [..., pts_per_ray, n_features]
+        embeds_color = self.voxel_grid_color(points)
+
+        # ########## embed with the harmonic function ########## #
+        # Obtain the harmonic embedding of the voxel grid output.
+        harmonic_embedding_color = self.harmonic_embedder_xyz_color(embeds_color)
+
+        # Normalize the ray_directions to unit l2 norm.
+        rays_directions_normed = torch.nn.functional.normalize(directions, dim=-1)
+        # Obtain the harmonic embedding of the normalized ray directions.
+        harmonic_embedding_dir = self.harmonic_embedder_dir_color(
+            rays_directions_normed
+        )
+
+        harmonic_embedding_dir = torch.repeat_interleave(
+            harmonic_embedding_dir, num_points_per_ray, dim=0
+        )
+        if non_empty_points is not None:
+            harmonic_embedding_dir = harmonic_embedding_dir[non_empty_points]
+
+        # total color embedding is concatenation of the harmonic embedding of voxel grid
+        # output and harmonic embedding of the normalized direction
+        total_color_embedding = torch.cat(
+            (harmonic_embedding_color, harmonic_embedding_dir), dim=-1
+        )
+
+        # ########## evaluate color with the decoding function ########## #
+        # rays_colors.shape = [..., pts_per_ray, 3] in [0-1]
+        return self.decoder_color(total_color_embedding)
+
+    @staticmethod
+    def allows_multiple_passes() -> bool:
+        """
+        Returns True as this implicit function allows
+        multiple passes. Overridden from ImplicitFunctionBase.
+        """
+        return True
+
+    def subscribe_to_epochs(self) -> Tuple[Tuple[int, ...], Callable[[int], bool]]:
+        """
+        Method which expresses interest in subscribing to optimization epoch updates.
+        This implicit function subscribes to epochs to calculate the scaffold and to
+        crop voxel grids, so this method combines wanted epochs and wraps their callbacks.
+
+        Returns:
+            list of epochs on which to call a callable and callable to be called on
+                particular epoch. The callable returns True if parameter change has
+                happened else False and it must be supplied with one argument, epoch.
+        """
+
+        def callback(epoch) -> bool:
+            change = False
+            if epoch in self.scaffold_calculating_epochs:
+                change = self._get_scaffold(epoch)
+            if epoch in self.volume_cropping_epochs:
+                change = self._crop(epoch) or change
+            return change
+
+        # remove duplicates
+        call_epochs = list(
+            set(self.scaffold_calculating_epochs) | set(self.volume_cropping_epochs)
+        )
+        return call_epochs, callback
+
+    def _crop(self, epoch: int) -> bool:
+        """
+        Finds the bounding box of an object represented in the scaffold and crops
+        density and color voxel grids to match that bounding box. If density of the
+        scaffold is 0 everywhere (there is no object in it) no change will
+        happen.
+
+        Args:
+            epoch: ignored
+        Returns:
+            True (indicating that parameter change has happened) if there is
+            an object inside, else False.
+        """
+        # find bounding box
+        points = self.voxel_grid_scaffold.get_grid_points(epoch=epoch)
+        assert self._scaffold_ready, "Scaffold has to be calculated before cropping."
+        occupancy = self.voxel_grid_scaffold(points)[..., 0] > 0
+        non_zero_idxs = torch.nonzero(occupancy)
+        if len(non_zero_idxs) == 0:
+            return False
+        min_indices = tuple(torch.min(non_zero_idxs, dim=0)[0])
+        max_indices = tuple(torch.max(non_zero_idxs, dim=0)[0])
+        min_point, max_point = points[min_indices], points[max_indices]
+
+        logger.info(
+            f"Cropping at epoch {epoch} to bounding box "
+            f"[{min_point.tolist()}, {max_point.tolist()}]."
+        )
+
+        # crop the voxel grids
+        self.voxel_grid_density.crop_self(min_point, max_point)
+        self.voxel_grid_color.crop_self(min_point, max_point)
+        return True
+
+    @torch.no_grad()
+    def _get_scaffold(self, epoch: int) -> bool:
+        """
+        Creates a low resolution grid which is used to filter points that are in empty
+        space.
+
+        Args:
+            epoch: epoch on which it is called, ignored inside method
+        Returns:
+             Always False: Modifies `self.voxel_grid_scaffold` member.
+        """
+
+        planes = []
+        points = self.voxel_grid_scaffold.get_grid_points(epoch=epoch)
+
+        chunk_size = (
+            self.scaffold_occupancy_chunk_size
+            if self.scaffold_occupancy_chunk_size > 0
+            else points.shape[-1]
+        )
+        for k in range(0, points.shape[-1], chunk_size):
+            points_in_planes = points[..., k : k + chunk_size]
+            planes.append(self._get_density(points_in_planes)[..., 0])
+
+        density_cube = torch.cat(planes, dim=-1)
+        density_cube = torch.nn.functional.max_pool3d(
+            density_cube[None, None],
+            kernel_size=self.scaffold_max_pool_kernel_size,
+            padding=self.scaffold_max_pool_kernel_size // 2,
+            stride=1,
+        )
+        occupancy_cube = density_cube > self.scaffold_empty_space_threshold
+        self.voxel_grid_scaffold.params["voxel_grid"] = occupancy_cube.float()
+        self._scaffold_ready = True
+
+        return False
+
+    @classmethod
+    def decoder_density_tweak_args(cls, type_, args: DictConfig) -> None:
+        args.pop("input_dim", None)
+
+    def create_decoder_density_impl(self, type_, args: DictConfig) -> None:
+        """
+        Decoding functions come after harmonic embedding and voxel grid. In order to not
+        calculate the input dimension of the decoder in the config file this function
+        calculates the required input dimension and sets the input dimension of the
+        decoding function to this value.
+        """
+        grid_args = self.voxel_grid_density_args
+        grid_output_dim = VoxelGridModule.get_output_dim(grid_args)
+
+        embedder_args = self.harmonic_embedder_xyz_density_args
+        input_dim = HarmonicEmbedding.get_output_dim_static(
+            grid_output_dim,
+            embedder_args["n_harmonic_functions"],
+            embedder_args["append_input"],
+        )
+
+        cls = registry.get(DecoderFunctionBase, type_)
+        need_input_dim = any(field.name == "input_dim" for field in fields(cls))
+        if need_input_dim:
+            self.decoder_density = cls(input_dim=input_dim, **args)
+        else:
+            self.decoder_density = cls(**args)
+
+    @classmethod
+    def decoder_color_tweak_args(cls, type_, args: DictConfig) -> None:
+        args.pop("input_dim", None)
+
+    def create_decoder_color_impl(self, type_, args: DictConfig) -> None:
+        """
+        Decoding functions come after harmonic embedding and voxel grid. In order to not
+        calculate the input dimension of the decoder in the config file this function
+        calculates the required input dimension and sets the input dimension of the
+        decoding function to this value.
+        """
+        grid_args = self.voxel_grid_color_args
+        grid_output_dim = VoxelGridModule.get_output_dim(grid_args)
+
+        embedder_args = self.harmonic_embedder_xyz_color_args
+        input_dim0 = HarmonicEmbedding.get_output_dim_static(
+            grid_output_dim,
+            embedder_args["n_harmonic_functions"],
+            embedder_args["append_input"],
+        )
+
+        dir_dim = 3
+        embedder_args = self.harmonic_embedder_dir_color_args
+        input_dim1 = HarmonicEmbedding.get_output_dim_static(
+            dir_dim,
+            embedder_args["n_harmonic_functions"],
+            embedder_args["append_input"],
+        )
+
+        input_dim = input_dim0 + input_dim1
+
+        cls = registry.get(DecoderFunctionBase, type_)
+        need_input_dim = any(field.name == "input_dim" for field in fields(cls))
+        if need_input_dim:
+            self.decoder_color = cls(input_dim=input_dim, **args)
+        else:
+            self.decoder_color = cls(**args)
+
+    def _create_voxel_grid_scaffold(self) -> VoxelGridModule:
+        """
+        Creates object to become self.voxel_grid_scaffold:
+            -  makes `self.voxel_grid_scaffold` have same world to local mapping as
+                    `self.voxel_grid_density`
+        """
+        return VoxelGridModule(
+            extents=self.voxel_grid_density_args["extents"],
+            translation=self.voxel_grid_density_args["translation"],
+            voxel_grid_class_type="FullResolutionVoxelGrid",
+            hold_voxel_grid_as_parameters=False,
+            voxel_grid_FullResolutionVoxelGrid_args={
+                "resolution_changes": {0: self.scaffold_resolution},
+                "padding": "zeros",
+                "align_corners": True,
+                "mode": "trilinear",
+            },
+        )
diff --git a/pytorch3d/pytorch3d/implicitron/models/metrics.py b/pytorch3d/pytorch3d/implicitron/models/metrics.py
new file mode 100644
index 0000000000000000000000000000000000000000..edd4b9408d2e6d70c0ec017fd2077b5093248603
--- /dev/null
+++ b/pytorch3d/pytorch3d/implicitron/models/metrics.py
@@ -0,0 +1,400 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import warnings
+from typing import Any, Dict, Optional
+
+import torch
+from pytorch3d.implicitron.models.renderer.ray_sampler import ImplicitronRayBundle
+from pytorch3d.implicitron.tools import metric_utils as utils
+from pytorch3d.implicitron.tools.config import registry, ReplaceableBase
+from pytorch3d.ops import padded_to_packed
+from pytorch3d.renderer import utils as rend_utils
+
+from .renderer.base import RendererOutput
+
+
+class RegularizationMetricsBase(ReplaceableBase, torch.nn.Module):
+    """
+    Replaceable abstract base for regularization metrics.
+    `forward()` method produces regularization metrics and (unlike ViewMetrics) can
+    depend on the model's parameters.
+    """
+
+    def forward(
+        self, model: Any, keys_prefix: str = "loss_", **kwargs
+    ) -> Dict[str, Any]:
+        """
+        Calculates various regularization terms useful for supervising differentiable
+        rendering pipelines.
+
+        Args:
+            model: A model instance. Useful, for example, to implement
+                weights-based regularization.
+            keys_prefix: A common prefix for all keys in the output dictionary
+                containing all regularization metrics.
+
+        Returns:
+            A dictionary with the resulting regularization metrics. The items
+                will have form `{metric_name_i: metric_value_i}` keyed by the
+                names of the output metrics `metric_name_i` with their corresponding
+                values `metric_value_i` represented as 0-dimensional float tensors.
+        """
+        raise NotImplementedError
+
+
+class ViewMetricsBase(ReplaceableBase, torch.nn.Module):
+    """
+    Replaceable abstract base for model metrics.
+    `forward()` method produces losses and other metrics.
+    """
+
+    def forward(
+        self,
+        raymarched: RendererOutput,
+        ray_bundle: ImplicitronRayBundle,
+        image_rgb: Optional[torch.Tensor] = None,
+        depth_map: Optional[torch.Tensor] = None,
+        fg_probability: Optional[torch.Tensor] = None,
+        mask_crop: Optional[torch.Tensor] = None,
+        keys_prefix: str = "loss_",
+        **kwargs,
+    ) -> Dict[str, Any]:
+        """
+        Calculates various metrics and loss functions useful for supervising
+        differentiable rendering pipelines. Any additional parameters can be passed
+        in the `raymarched.aux` dictionary.
+
+        Args:
+            results: A dictionary with the resulting view metrics. The items
+                will have form `{metric_name_i: metric_value_i}` keyed by the
+                names of the output metrics `metric_name_i` with their corresponding
+                values `metric_value_i` represented as 0-dimensional float tensors.
+            raymarched: Output of the renderer.
+            ray_bundle: ImplicitronRayBundle object which was used to produce the raymarched
+                object
+            image_rgb: A tensor of shape `(B, H, W, 3)` containing ground truth rgb
+                values.
+            depth_map: A tensor of shape `(B, Hd, Wd, 1)` containing ground truth depth
+                values.
+            fg_probability: A tensor of shape `(B, Hm, Wm, 1)` containing ground truth
+                foreground masks.
+            keys_prefix: A common prefix for all keys in the output dictionary
+                containing all view metrics.
+
+        Returns:
+            A dictionary with the resulting view metrics. The items
+                will have form `{metric_name_i: metric_value_i}` keyed by the
+                names of the output metrics `metric_name_i` with their corresponding
+                values `metric_value_i` represented as 0-dimensional float tensors.
+        """
+        raise NotImplementedError()
+
+
+@registry.register
+class RegularizationMetrics(RegularizationMetricsBase):
+    def forward(
+        self, model: Any, keys_prefix: str = "loss_", **kwargs
+    ) -> Dict[str, Any]:
+        """
+        Calculates the AD penalty, or returns an empty dict if the model's autoencoder
+        is inactive.
+
+        Args:
+            model: A model instance.
+            keys_prefix: A common prefix for all keys in the output dictionary
+                containing all regularization metrics.
+
+        Returns:
+            A dictionary with the resulting regularization metrics. The items
+                will have form `{metric_name_i: metric_value_i}` keyed by the
+                names of the output metrics `metric_name_i` with their corresponding
+                values `metric_value_i` represented as 0-dimensional float tensors.
+
+            The calculated metric is:
+                autoencoder_norm: Autoencoder weight norm regularization term.
+        """
+        metrics = {}
+        if getattr(model, "sequence_autodecoder", None) is not None:
+            ad_penalty = model.sequence_autodecoder.calculate_squared_encoding_norm()
+            if ad_penalty is not None:
+                metrics["autodecoder_norm"] = ad_penalty
+
+        if keys_prefix is not None:
+            metrics = {(keys_prefix + k): v for k, v in metrics.items()}
+
+        return metrics
+
+
+@registry.register
+class ViewMetrics(ViewMetricsBase):
+    def forward(
+        self,
+        raymarched: RendererOutput,
+        ray_bundle: ImplicitronRayBundle,
+        image_rgb: Optional[torch.Tensor] = None,
+        depth_map: Optional[torch.Tensor] = None,
+        fg_probability: Optional[torch.Tensor] = None,
+        mask_crop: Optional[torch.Tensor] = None,
+        keys_prefix: str = "loss_",
+        **kwargs,
+    ) -> Dict[str, Any]:
+        """
+        Calculates various differentiable metrics useful for supervising
+        differentiable rendering pipelines.
+
+        Args:
+            results: A dict to store the results in.
+            raymarched.features: Predicted rgb or feature values.
+            raymarched.depths: A tensor of shape `(B, ..., 1)` containing
+                predicted depth values.
+            raymarched.masks: A tensor of shape `(B, ..., 1)` containing
+                predicted foreground masks.
+            raymarched.aux["grad_theta"]: A tensor of shape `(B, ..., 3)` containing an
+                evaluation of a gradient of a signed distance function w.r.t.
+                input 3D coordinates used to compute the eikonal loss.
+            raymarched.aux["density_grid"]: A tensor of shape `(B, Hg, Wg, Dg, 1)`
+                containing a `Hg x Wg x Dg` voxel grid of density values.
+            ray_bundle: ImplicitronRayBundle object which was used to produce the raymarched
+                object
+            image_rgb: A tensor of shape `(B, H, W, 3)` containing ground truth rgb
+                values.
+            depth_map: A tensor of shape `(B, Hd, Wd, 1)` containing ground truth depth
+                values.
+            fg_probability: A tensor of shape `(B, Hm, Wm, 1)` containing ground truth
+                foreground masks.
+            keys_prefix: A common prefix for all keys in the output dictionary
+                containing all view metrics.
+
+        Returns:
+            A dictionary `{metric_name_i: metric_value_i}` keyed by the
+                names of the output metrics `metric_name_i` with their corresponding
+                values `metric_value_i` represented as 0-dimensional float tensors.
+
+                The calculated metrics are:
+                    rgb_huber: A robust huber loss between `image_pred` and `image`.
+                    rgb_mse: Mean squared error between `image_pred` and `image`.
+                    rgb_psnr: Peak signal-to-noise ratio between `image_pred` and `image`.
+                    rgb_psnr_fg: Peak signal-to-noise ratio between the foreground
+                        region of `image_pred` and `image` as defined by `mask`.
+                    rgb_mse_fg: Mean squared error between the foreground
+                        region of `image_pred` and `image` as defined by `mask`.
+                    mask_neg_iou: (1 - intersection-over-union) between `mask_pred`
+                        and `mask`.
+                    mask_bce: Binary cross entropy between `mask_pred` and `mask`.
+                    mask_beta_prior: A loss enforcing strictly binary values
+                        of `mask_pred`: `log(mask_pred) + log(1-mask_pred)`
+                    depth_abs: Mean per-pixel L1 distance between
+                        `depth_pred` and `depth`.
+                    depth_abs_fg: Mean per-pixel L1 distance between the foreground
+                        region of `depth_pred` and `depth` as defined by `mask`.
+                    eikonal: Eikonal regularizer `(||grad_theta|| - 1)**2`.
+                    density_tv: The Total Variation regularizer of density
+                        values in `density_grid` (sum of L1 distances of values
+                        of all 4-neighbouring cells).
+                    depth_neg_penalty: `min(depth_pred, 0)**2` penalizing negative
+                        predicted depth values.
+        """
+        metrics = self._calculate_stage(
+            raymarched,
+            ray_bundle,
+            image_rgb,
+            depth_map,
+            fg_probability,
+            mask_crop,
+            keys_prefix,
+        )
+
+        if raymarched.prev_stage:
+            metrics.update(
+                self(
+                    raymarched.prev_stage,
+                    ray_bundle,
+                    image_rgb,
+                    depth_map,
+                    fg_probability,
+                    mask_crop,
+                    keys_prefix=(keys_prefix + "prev_stage_"),
+                )
+            )
+
+        return metrics
+
+    def _calculate_stage(
+        self,
+        raymarched: RendererOutput,
+        ray_bundle: ImplicitronRayBundle,
+        image_rgb: Optional[torch.Tensor] = None,
+        depth_map: Optional[torch.Tensor] = None,
+        fg_probability: Optional[torch.Tensor] = None,
+        mask_crop: Optional[torch.Tensor] = None,
+        keys_prefix: str = "loss_",
+        **kwargs,
+    ) -> Dict[str, Any]:
+        """
+        Calculate metrics for the current stage.
+        """
+        # TODO: extract functions
+
+        # reshape from B x ... x DIM to B x DIM x -1 x 1
+        image_rgb_pred, fg_probability_pred, depth_map_pred = [
+            _reshape_nongrid_var(x)
+            for x in [raymarched.features, raymarched.masks, raymarched.depths]
+        ]
+        xys = ray_bundle.xys
+
+        # If ray_bundle is packed than we can sample images in padded state to lower
+        # memory requirements. Instead of having one image for every element in
+        # ray_bundle we can than have one image per unique sampled camera.
+        if ray_bundle.is_packed():
+            xys, first_idxs, num_inputs = ray_bundle.get_padded_xys()
+
+        # reshape the sampling grid as well
+        # TODO: we can get rid of the singular dimension here and in _reshape_nongrid_var
+        # now that we use rend_utils.ndc_grid_sample
+        xys = xys.reshape(xys.shape[0], -1, 1, 2)
+
+        # closure with the given xys
+        def sample_full(tensor, mode):
+            if tensor is None:
+                return tensor
+            return rend_utils.ndc_grid_sample(tensor, xys, mode=mode)
+
+        def sample_packed(tensor, mode):
+            if tensor is None:
+                return tensor
+
+            # select images that corespond to sampled cameras if raybundle is packed
+            tensor = tensor[ray_bundle.camera_ids]
+            if ray_bundle.is_packed():
+                # select images that corespond to sampled cameras if raybundle is packed
+                tensor = tensor[ray_bundle.camera_ids]
+            result = rend_utils.ndc_grid_sample(tensor, xys, mode=mode)
+            return padded_to_packed(result, first_idxs, num_inputs, max_size_dim=2)[
+                :, :, None
+            ]  # the result is [n_rays_total_training, 3, 1, 1]
+
+        sample = sample_packed if ray_bundle.is_packed() else sample_full
+
+        # eval all results in this size
+        image_rgb = sample(image_rgb, mode="bilinear")
+        depth_map = sample(depth_map, mode="nearest")
+        fg_probability = sample(fg_probability, mode="nearest")
+        mask_crop = sample(mask_crop, mode="nearest")
+        if mask_crop is None and image_rgb_pred is not None:
+            mask_crop = torch.ones_like(image_rgb_pred[:, :1])
+        if mask_crop is None and depth_map_pred is not None:
+            mask_crop = torch.ones_like(depth_map_pred[:, :1])
+
+        metrics = {}
+        if image_rgb is not None and image_rgb_pred is not None:
+            metrics.update(
+                _rgb_metrics(
+                    image_rgb,
+                    image_rgb_pred,
+                    fg_probability,
+                    fg_probability_pred,
+                    mask_crop,
+                )
+            )
+
+        if fg_probability_pred is not None:
+            metrics["mask_beta_prior"] = utils.beta_prior(fg_probability_pred)
+        if fg_probability is not None and fg_probability_pred is not None:
+            metrics["mask_neg_iou"] = utils.neg_iou_loss(
+                fg_probability_pred, fg_probability, mask=mask_crop
+            )
+            metrics["mask_bce"] = utils.calc_bce(
+                fg_probability_pred, fg_probability, mask=mask_crop
+            )
+
+        if depth_map is not None and depth_map_pred is not None:
+            assert mask_crop is not None
+            _, abs_ = utils.eval_depth(
+                depth_map_pred, depth_map, get_best_scale=True, mask=mask_crop, crop=0
+            )
+            metrics["depth_abs"] = abs_.mean()
+
+            if fg_probability is not None:
+                mask = fg_probability * mask_crop
+                _, abs_ = utils.eval_depth(
+                    depth_map_pred, depth_map, get_best_scale=True, mask=mask, crop=0
+                )
+                metrics["depth_abs_fg"] = abs_.mean()
+
+        # regularizers
+        grad_theta = raymarched.aux.get("grad_theta")
+        if grad_theta is not None:
+            metrics["eikonal"] = _get_eikonal_loss(grad_theta)
+
+        density_grid = raymarched.aux.get("density_grid")
+        if density_grid is not None:
+            metrics["density_tv"] = _get_grid_tv_loss(density_grid)
+
+        if depth_map_pred is not None:
+            metrics["depth_neg_penalty"] = _get_depth_neg_penalty_loss(depth_map_pred)
+
+        if keys_prefix is not None:
+            metrics = {(keys_prefix + k): v for k, v in metrics.items()}
+
+        return metrics
+
+
+def _rgb_metrics(images, images_pred, masks, masks_pred, masks_crop):
+    assert masks_crop is not None
+    if images.shape[1] != images_pred.shape[1]:
+        raise ValueError(
+            f"Network output's RGB images had {images_pred.shape[1]} "
+            f"channels. {images.shape[1]} expected."
+        )
+    rgb_squared = ((images_pred - images) ** 2).mean(dim=1, keepdim=True)
+    rgb_loss = utils.huber(rgb_squared, scaling=0.03)
+    crop_mass = masks_crop.sum().clamp(1.0)
+    results = {
+        "rgb_huber": (rgb_loss * masks_crop).sum() / crop_mass,
+        "rgb_mse": (rgb_squared * masks_crop).sum() / crop_mass,
+        "rgb_psnr": utils.calc_psnr(images_pred, images, mask=masks_crop),
+    }
+    if masks is not None:
+        masks = masks_crop * masks
+        results["rgb_psnr_fg"] = utils.calc_psnr(images_pred, images, mask=masks)
+        results["rgb_mse_fg"] = (rgb_squared * masks).sum() / masks.sum().clamp(1.0)
+    return results
+
+
+def _get_eikonal_loss(grad_theta):
+    return ((grad_theta.norm(2, dim=1) - 1) ** 2).mean()
+
+
+def _get_grid_tv_loss(grid, log_domain: bool = True, eps: float = 1e-5):
+    if log_domain:
+        if (grid <= -eps).any():
+            warnings.warn("Grid has negative values; this will produce NaN loss")
+        grid = torch.log(grid + eps)
+
+    # this is an isotropic version, note that it ignores last rows/cols
+    return torch.mean(
+        utils.safe_sqrt(
+            (grid[..., :-1, :-1, 1:] - grid[..., :-1, :-1, :-1]) ** 2
+            + (grid[..., :-1, 1:, :-1] - grid[..., :-1, :-1, :-1]) ** 2
+            + (grid[..., 1:, :-1, :-1] - grid[..., :-1, :-1, :-1]) ** 2,
+            eps=1e-5,
+        )
+    )
+
+
+def _get_depth_neg_penalty_loss(depth):
+    neg_penalty = depth.clamp(min=None, max=0.0) ** 2
+    return torch.mean(neg_penalty)
+
+
+def _reshape_nongrid_var(x):
+    if x is None:
+        return None
+
+    ba, *_, dim = x.shape
+    return x.reshape(ba, -1, 1, dim).permute(0, 3, 1, 2).contiguous()
diff --git a/pytorch3d/pytorch3d/implicitron/models/model_dbir.py b/pytorch3d/pytorch3d/implicitron/models/model_dbir.py
new file mode 100644
index 0000000000000000000000000000000000000000..4f470a6e00da3e0b146880d8a0cfb18b03ec37d7
--- /dev/null
+++ b/pytorch3d/pytorch3d/implicitron/models/model_dbir.py
@@ -0,0 +1,151 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+from typing import Any, Dict, List, Optional, Tuple
+
+import torch
+from pytorch3d.implicitron.dataset.utils import is_known_frame
+from pytorch3d.implicitron.tools.config import registry
+from pytorch3d.implicitron.tools.point_cloud_utils import (
+    get_rgbd_point_cloud,
+    render_point_cloud_pytorch3d,
+)
+from pytorch3d.renderer.cameras import CamerasBase
+from pytorch3d.structures import Pointclouds
+
+from .base_model import ImplicitronModelBase, ImplicitronRender
+from .renderer.base import EvaluationMode
+
+
+@registry.register
+class ModelDBIR(ImplicitronModelBase):
+    """
+    A simple depth-based image rendering model.
+
+    Args:
+        render_image_width: The width of the rendered rectangular images.
+        render_image_height: The height of the rendered rectangular images.
+        bg_color: The color of the background.
+        max_points: Maximum number of points in the point cloud
+            formed by unprojecting all source view depths.
+            If more points are present, they are randomly subsampled
+            to this number of points without replacement.
+    """
+
+    render_image_width: int = 256
+    render_image_height: int = 256
+    bg_color: Tuple[float, float, float] = (0.0, 0.0, 0.0)
+    max_points: int = -1
+
+    # pyre-fixme[14]: `forward` overrides method defined in `ImplicitronModelBase`
+    #  inconsistently.
+    def forward(
+        self,
+        *,  # force keyword-only arguments
+        image_rgb: Optional[torch.Tensor],
+        camera: CamerasBase,
+        fg_probability: Optional[torch.Tensor],
+        mask_crop: Optional[torch.Tensor],
+        depth_map: Optional[torch.Tensor],
+        sequence_name: Optional[List[str]],
+        evaluation_mode: EvaluationMode = EvaluationMode.EVALUATION,
+        frame_type: List[str],
+        **kwargs,
+    ) -> Dict[str, Any]:  # TODO: return a namedtuple or dataclass
+        """
+        Given a set of input source cameras images and depth maps, unprojects
+        all RGBD maps to a colored point cloud and renders into the target views.
+
+        Args:
+            camera: A batch of `N` PyTorch3D cameras.
+            image_rgb: A batch of `N` images of shape `(N, 3, H, W)`.
+            depth_map: A batch of `N` depth maps of shape `(N, 1, H, W)`.
+            fg_probability: A batch of `N` foreground probability maps
+                of shape `(N, 1, H, W)`.
+            frame_type: A list of `N` strings containing frame type indicators
+                which specify target and source views.
+
+        Returns:
+            preds: A dict with the following fields:
+                implicitron_render: The rendered colors, depth and mask
+                    of the target views.
+                point_cloud: The point cloud of the scene. It's renders are
+                    stored in `implicitron_render`.
+        """
+
+        if image_rgb is None:
+            raise ValueError("ModelDBIR needs image input")
+
+        if fg_probability is None:
+            raise ValueError("ModelDBIR needs foreground mask input")
+
+        if depth_map is None:
+            raise ValueError("ModelDBIR needs depth map input")
+
+        is_known = is_known_frame(frame_type)
+        is_known_idx = torch.where(is_known)[0]
+
+        mask_fg = (fg_probability > 0.5).type_as(image_rgb)
+
+        point_cloud = get_rgbd_point_cloud(
+            # pyre-fixme[6]: For 1st param expected `Union[List[int], int,
+            #  LongTensor]` but got `Tensor`.
+            camera[is_known_idx],
+            image_rgb[is_known_idx],
+            depth_map[is_known_idx],
+            mask_fg[is_known_idx],
+        )
+
+        pcl_size = point_cloud.num_points_per_cloud().item()
+        if (self.max_points > 0) and (pcl_size > self.max_points):
+            # pyre-fixme[6]: For 1st param expected `int` but got `Union[bool,
+            #  float, int]`.
+            prm = torch.randperm(pcl_size)[: self.max_points]
+            point_cloud = Pointclouds(
+                point_cloud.points_padded()[:, prm, :],
+                # pyre-fixme[16]: Optional type has no attribute `__getitem__`.
+                features=point_cloud.features_padded()[:, prm, :],
+            )
+
+        is_target_idx = torch.where(~is_known)[0]
+
+        depth_render, image_render, mask_render = [], [], []
+
+        # render into target frames in a for loop to save memory
+        for tgt_idx in is_target_idx:
+            _image_render, _mask_render, _depth_render = render_point_cloud_pytorch3d(
+                camera[int(tgt_idx)],
+                point_cloud,
+                render_size=(self.render_image_height, self.render_image_width),
+                point_radius=1e-2,
+                topk=10,
+                bg_color=self.bg_color,
+            )
+            _image_render = _image_render.clamp(0.0, 1.0)
+            # the mask is the set of pixels with opacity bigger than eps
+            _mask_render = (_mask_render > 1e-4).float()
+
+            depth_render.append(_depth_render)
+            image_render.append(_image_render)
+            mask_render.append(_mask_render)
+
+        implicitron_render = ImplicitronRender(
+            **{
+                k: torch.cat(v, dim=0)
+                for k, v in zip(
+                    ["depth_render", "image_render", "mask_render"],
+                    [depth_render, image_render, mask_render],
+                )
+            }
+        )
+
+        preds = {
+            "implicitron_render": implicitron_render,
+            "point_cloud": point_cloud,
+        }
+
+        return preds
diff --git a/pytorch3d/pytorch3d/implicitron/models/overfit_model.py b/pytorch3d/pytorch3d/implicitron/models/overfit_model.py
new file mode 100644
index 0000000000000000000000000000000000000000..40ee5a1ba8008e5f02d9037312c47452eb5d2970
--- /dev/null
+++ b/pytorch3d/pytorch3d/implicitron/models/overfit_model.py
@@ -0,0 +1,664 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+# Note: The #noqa comments below are for unused imports of pluggable implementations
+# which are part of implicitron. They ensure that the registry is prepopulated.
+
+import functools
+import logging
+from dataclasses import field
+from typing import Any, Callable, Dict, List, Optional, Tuple, TYPE_CHECKING, Union
+
+import torch
+from omegaconf import DictConfig
+
+from pytorch3d.implicitron.models.base_model import (
+    ImplicitronModelBase,
+    ImplicitronRender,
+)
+from pytorch3d.implicitron.models.global_encoder.global_encoder import GlobalEncoderBase
+from pytorch3d.implicitron.models.implicit_function.base import ImplicitFunctionBase
+from pytorch3d.implicitron.models.metrics import (
+    RegularizationMetricsBase,
+    ViewMetricsBase,
+)
+
+from pytorch3d.implicitron.models.renderer.base import (
+    BaseRenderer,
+    EvaluationMode,
+    ImplicitronRayBundle,
+    RendererOutput,
+    RenderSamplingMode,
+)
+from pytorch3d.implicitron.models.renderer.ray_sampler import RaySamplerBase
+from pytorch3d.implicitron.models.utils import (
+    apply_chunked,
+    chunk_generator,
+    log_loss_weights,
+    preprocess_input,
+    weighted_sum_losses,
+)
+from pytorch3d.implicitron.tools import vis_utils
+from pytorch3d.implicitron.tools.config import (
+    expand_args_fields,
+    registry,
+    run_auto_creation,
+)
+
+from pytorch3d.implicitron.tools.rasterize_mc import rasterize_sparse_ray_bundle
+from pytorch3d.renderer import utils as rend_utils
+from pytorch3d.renderer.cameras import CamerasBase
+
+
+if TYPE_CHECKING:
+    from visdom import Visdom
+logger = logging.getLogger(__name__)
+
+IMPLICIT_FUNCTION_ARGS_TO_REMOVE: List[str] = [
+    "feature_vector_size",
+    "encoding_dim",
+    "latent_dim",
+    "color_dim",
+]
+
+
+@registry.register
+class OverfitModel(ImplicitronModelBase):  # pyre-ignore: 13
+    """
+    OverfitModel is a wrapper for the neural implicit
+    rendering and reconstruction pipeline which consists
+    of the following sequence of 4 steps:
+
+
+        (1) Ray Sampling
+        ------------------
+        Rays are sampled from an image grid based on the target view(s).
+                │
+                ▼
+        (2) Implicit Function Evaluation
+        ------------------
+        Evaluate the implicit function(s) at the sampled ray points
+        (also optionally pass in a global encoding from global_encoder).
+                │
+                ▼
+        (3) Rendering
+        ------------------
+        Render the image into the target cameras by raymarching along
+        the sampled rays and aggregating the colors and densities
+        output by the implicit function in (2).
+                │
+                ▼
+        (4) Loss Computation
+        ------------------
+        Compute losses based on the predicted target image(s).
+
+
+    The `forward` function of OverfitModel executes
+    this sequence of steps. Currently, steps 1, 2, 3
+    can be customized by intializing a subclass of the appropriate
+    base class and adding the newly created module to the registry.
+    Please see https://github.com/facebookresearch/pytorch3d/blob/main/projects/implicitron_trainer/README.md#custom-plugins
+    for more details on how to create and register a custom component.
+
+    In the config .yaml files for experiments, the parameters below are
+    contained in the
+    `model_factory_ImplicitronModelFactory_args.model_OverfitModel_args`
+    node. As OverfitModel derives from ReplaceableBase, the input arguments are
+    parsed by the run_auto_creation function to initialize the
+    necessary member modules. Please see implicitron_trainer/README.md
+    for more details on this process.
+
+    Args:
+        mask_images: Whether or not to mask the RGB image background given the
+            foreground mask (the `fg_probability` argument of `GenericModel.forward`)
+        mask_depths: Whether or not to mask the depth image background given the
+            foreground mask (the `fg_probability` argument of `GenericModel.forward`)
+        render_image_width: Width of the output image to render
+        render_image_height: Height of the output image to render
+        mask_threshold: If greater than 0.0, the foreground mask is
+            thresholded by this value before being applied to the RGB/Depth images
+        output_rasterized_mc: If True, visualize the Monte-Carlo pixel renders by
+            splatting onto an image grid. Default: False.
+        bg_color: RGB values for setting the background color of input image
+            if mask_images=True. Defaults to (0.0, 0.0, 0.0). Each renderer has its own
+            way to determine the background color of its output, unrelated to this.
+        chunk_size_grid: The total number of points which can be rendered
+            per chunk. This is used to compute the number of rays used
+            per chunk when the chunked version of the renderer is used (in order
+            to fit rendering on all rays in memory)
+        render_features_dimensions: The number of output features to render.
+            Defaults to 3, corresponding to RGB images.
+        sampling_mode_training: The sampling method to use during training. Must be
+            a value from the RenderSamplingMode Enum.
+        sampling_mode_evaluation: Same as above but for evaluation.
+        global_encoder_class_type: The name of the class to use for global_encoder,
+            which must be available in the registry. Or `None` to disable global encoder.
+        global_encoder: An instance of `GlobalEncoder`. This is used to generate an encoding
+            of the image (referred to as the global_code) that can be used to model aspects of
+            the scene such as multiple objects or morphing objects. It is up to the implicit
+            function definition how to use it, but the most typical way is to broadcast and
+            concatenate to the other inputs for the implicit function.
+        raysampler_class_type: The name of the raysampler class which is available
+            in the global registry.
+        raysampler: An instance of RaySampler which is used to emit
+            rays from the target view(s).
+        renderer_class_type: The name of the renderer class which is available in the global
+            registry.
+        renderer: A renderer class which inherits from BaseRenderer. This is used to
+            generate the images from the target view(s).
+        share_implicit_function_across_passes: If set to True
+            coarse_implicit_function is automatically set as implicit_function
+            (coarse_implicit_function=implicit_funciton). The
+            implicit_functions are then run sequentially during the rendering.
+        implicit_function_class_type: The type of implicit function to use which
+            is available in the global registry.
+        implicit_function: An instance of ImplicitFunctionBase.
+        coarse_implicit_function_class_type: The type of implicit function to use which
+            is available in the global registry.
+        coarse_implicit_function: An instance of ImplicitFunctionBase.
+            If set and `share_implicit_function_across_passes` is set to False,
+            coarse_implicit_function is instantiated on itself. It
+            is then used as the second pass during the rendering.
+            If set to None, we only do a single pass with implicit_function.
+        view_metrics: An instance of ViewMetricsBase used to compute loss terms which
+            are independent of the model's parameters.
+        view_metrics_class_type: The type of view metrics to use, must be available in
+            the global registry.
+        regularization_metrics: An instance of RegularizationMetricsBase used to compute
+            regularization terms which can depend on the model's parameters.
+        regularization_metrics_class_type: The type of regularization metrics to use,
+            must be available in the global registry.
+        loss_weights: A dictionary with a {loss_name: weight} mapping; see documentation
+            for `ViewMetrics` class for available loss functions.
+        log_vars: A list of variable names which should be logged.
+            The names should correspond to a subset of the keys of the
+            dict `preds` output by the `forward` function.
+    """  # noqa: B950
+
+    mask_images: bool = True
+    mask_depths: bool = True
+    render_image_width: int = 400
+    render_image_height: int = 400
+    mask_threshold: float = 0.5
+    output_rasterized_mc: bool = False
+    bg_color: Tuple[float, float, float] = (0.0, 0.0, 0.0)
+    chunk_size_grid: int = 4096
+    render_features_dimensions: int = 3
+    tqdm_trigger_threshold: int = 16
+
+    n_train_target_views: int = 1
+    sampling_mode_training: str = "mask_sample"
+    sampling_mode_evaluation: str = "full_grid"
+
+    # ---- global encoder settings
+    global_encoder_class_type: Optional[str] = None
+    global_encoder: Optional[GlobalEncoderBase]
+
+    # ---- raysampler
+    raysampler_class_type: str = "AdaptiveRaySampler"
+    raysampler: RaySamplerBase
+
+    # ---- renderer configs
+    renderer_class_type: str = "MultiPassEmissionAbsorptionRenderer"
+    renderer: BaseRenderer
+
+    # ---- implicit function settings
+    share_implicit_function_across_passes: bool = False
+    implicit_function_class_type: str = "NeuralRadianceFieldImplicitFunction"
+    implicit_function: ImplicitFunctionBase
+    coarse_implicit_function_class_type: Optional[str] = None
+    coarse_implicit_function: Optional[ImplicitFunctionBase]
+
+    # ----- metrics
+    view_metrics: ViewMetricsBase
+    view_metrics_class_type: str = "ViewMetrics"
+
+    regularization_metrics: RegularizationMetricsBase
+    regularization_metrics_class_type: str = "RegularizationMetrics"
+
+    # ---- loss weights
+    loss_weights: Dict[str, float] = field(
+        default_factory=lambda: {
+            "loss_rgb_mse": 1.0,
+            "loss_prev_stage_rgb_mse": 1.0,
+            "loss_mask_bce": 0.0,
+            "loss_prev_stage_mask_bce": 0.0,
+        }
+    )
+
+    # ---- variables to be logged (logger automatically ignores if not computed)
+    log_vars: List[str] = field(
+        default_factory=lambda: [
+            "loss_rgb_psnr_fg",
+            "loss_rgb_psnr",
+            "loss_rgb_mse",
+            "loss_rgb_huber",
+            "loss_depth_abs",
+            "loss_depth_abs_fg",
+            "loss_mask_neg_iou",
+            "loss_mask_bce",
+            "loss_mask_beta_prior",
+            "loss_eikonal",
+            "loss_density_tv",
+            "loss_depth_neg_penalty",
+            "loss_autodecoder_norm",
+            # metrics that are only logged in 2+stage renderes
+            "loss_prev_stage_rgb_mse",
+            "loss_prev_stage_rgb_psnr_fg",
+            "loss_prev_stage_rgb_psnr",
+            "loss_prev_stage_mask_bce",
+            # basic metrics
+            "objective",
+            "epoch",
+            "sec/it",
+        ]
+    )
+
+    @classmethod
+    def pre_expand(cls) -> None:
+        # use try/finally to bypass cinder's lazy imports
+        try:
+            from pytorch3d.implicitron.models.implicit_function.idr_feature_field import (  # noqa: F401, B950
+                IdrFeatureField,
+            )
+            from pytorch3d.implicitron.models.implicit_function.neural_radiance_field import (  # noqa: F401, B950
+                NeuralRadianceFieldImplicitFunction,
+            )
+            from pytorch3d.implicitron.models.implicit_function.scene_representation_networks import (  # noqa: F401, B950
+                SRNImplicitFunction,
+            )
+            from pytorch3d.implicitron.models.renderer.lstm_renderer import (  # noqa: F401
+                LSTMRenderer,
+            )
+            from pytorch3d.implicitron.models.renderer.multipass_ea import (  # noqa: F401
+                MultiPassEmissionAbsorptionRenderer,
+            )
+            from pytorch3d.implicitron.models.renderer.sdf_renderer import (  # noqa: F401
+                SignedDistanceFunctionRenderer,
+            )
+        finally:
+            pass
+
+    def __post_init__(self):
+        # The attribute will be filled by run_auto_creation
+        run_auto_creation(self)
+        log_loss_weights(self.loss_weights, logger)
+        # We need to set it here since run_auto_creation
+        # will create coarse_implicit_function before implicit_function
+        if self.share_implicit_function_across_passes:
+            self.coarse_implicit_function = self.implicit_function
+
+    def forward(
+        self,
+        *,  # force keyword-only arguments
+        image_rgb: Optional[torch.Tensor],
+        camera: CamerasBase,
+        fg_probability: Optional[torch.Tensor] = None,
+        mask_crop: Optional[torch.Tensor] = None,
+        depth_map: Optional[torch.Tensor] = None,
+        sequence_name: Optional[List[str]] = None,
+        frame_timestamp: Optional[torch.Tensor] = None,
+        evaluation_mode: EvaluationMode = EvaluationMode.EVALUATION,
+        **kwargs,
+    ) -> Dict[str, Any]:
+        """
+        Args:
+            image_rgb: A tensor of shape `(B, 3, H, W)` containing a batch of rgb images;
+                the first `min(B, n_train_target_views)` images are considered targets and
+                are used to supervise the renders; the rest corresponding to the source
+                viewpoints from which features will be extracted.
+            camera: An instance of CamerasBase containing a batch of `B` cameras corresponding
+                to the viewpoints of target images, from which the rays will be sampled,
+                and source images, which will be used for intersecting with target rays.
+            fg_probability: A tensor of shape `(B, 1, H, W)` containing a batch of
+                foreground masks.
+            mask_crop: A binary tensor of shape `(B, 1, H, W)` deonting valid
+                regions in the input images (i.e. regions that do not correspond
+                to, e.g., zero-padding). When the `RaySampler`'s sampling mode is set to
+                "mask_sample", rays  will be sampled in the non zero regions.
+            depth_map: A tensor of shape `(B, 1, H, W)` containing a batch of depth maps.
+            sequence_name: A list of `B` strings corresponding to the sequence names
+                from which images `image_rgb` were extracted. They are used to match
+                target frames with relevant source frames.
+            frame_timestamp: Optionally a tensor of shape `(B,)` containing a batch
+                of frame timestamps.
+            evaluation_mode: one of EvaluationMode.TRAINING or
+                EvaluationMode.EVALUATION which determines the settings used for
+                rendering.
+
+        Returns:
+            preds: A dictionary containing all outputs of the forward pass including the
+                rendered images, depths, masks, losses and other metrics.
+        """
+        image_rgb, fg_probability, depth_map = preprocess_input(
+            image_rgb,
+            fg_probability,
+            depth_map,
+            self.mask_images,
+            self.mask_depths,
+            self.mask_threshold,
+            self.bg_color,
+        )
+
+        # Determine the used ray sampling mode.
+        sampling_mode = RenderSamplingMode(
+            self.sampling_mode_training
+            if evaluation_mode == EvaluationMode.TRAINING
+            else self.sampling_mode_evaluation
+        )
+
+        # (1) Sample rendering rays with the ray sampler.
+        # pyre-ignore[29]
+        ray_bundle: ImplicitronRayBundle = self.raysampler(
+            camera,
+            evaluation_mode,
+            mask=mask_crop
+            if mask_crop is not None and sampling_mode == RenderSamplingMode.MASK_SAMPLE
+            else None,
+        )
+
+        inputs_to_be_chunked = {}
+        if fg_probability is not None and self.renderer.requires_object_mask():
+            sampled_fb_prob = rend_utils.ndc_grid_sample(
+                fg_probability, ray_bundle.xys, mode="nearest"
+            )
+            inputs_to_be_chunked["object_mask"] = sampled_fb_prob > 0.5
+
+        # (2)-(3) Implicit function evaluation and Rendering
+        implicit_functions: List[Union[Callable, ImplicitFunctionBase]] = [
+            self.implicit_function
+        ]
+        if self.coarse_implicit_function is not None:
+            implicit_functions = [self.coarse_implicit_function, self.implicit_function]
+
+        if self.global_encoder is not None:
+            global_code = self.global_encoder(  # pyre-fixme[29]
+                sequence_name=sequence_name,
+                frame_timestamp=frame_timestamp,
+            )
+            implicit_functions = [
+                functools.partial(implicit_function, global_code=global_code)
+                if isinstance(implicit_function, Callable)
+                else functools.partial(
+                    implicit_function.forward, global_code=global_code
+                )
+                for implicit_function in implicit_functions
+            ]
+        rendered = self._render(
+            ray_bundle=ray_bundle,
+            sampling_mode=sampling_mode,
+            evaluation_mode=evaluation_mode,
+            implicit_functions=implicit_functions,
+            inputs_to_be_chunked=inputs_to_be_chunked,
+        )
+
+        # A dict to store losses as well as rendering results.
+        preds: Dict[str, Any] = self.view_metrics(
+            results={},
+            raymarched=rendered,
+            ray_bundle=ray_bundle,
+            image_rgb=image_rgb,
+            depth_map=depth_map,
+            fg_probability=fg_probability,
+            mask_crop=mask_crop,
+        )
+
+        preds.update(
+            self.regularization_metrics(
+                results=preds,
+                model=self,
+            )
+        )
+
+        if sampling_mode == RenderSamplingMode.MASK_SAMPLE:
+            if self.output_rasterized_mc:
+                # Visualize the monte-carlo pixel renders by splatting onto
+                # an image grid.
+                (
+                    preds["images_render"],
+                    preds["depths_render"],
+                    preds["masks_render"],
+                ) = rasterize_sparse_ray_bundle(
+                    ray_bundle,
+                    rendered.features,
+                    (self.render_image_height, self.render_image_width),
+                    rendered.depths,
+                    masks=rendered.masks,
+                )
+        elif sampling_mode == RenderSamplingMode.FULL_GRID:
+            preds["images_render"] = rendered.features.permute(0, 3, 1, 2)
+            preds["depths_render"] = rendered.depths.permute(0, 3, 1, 2)
+            preds["masks_render"] = rendered.masks.permute(0, 3, 1, 2)
+
+            preds["implicitron_render"] = ImplicitronRender(
+                image_render=preds["images_render"],
+                depth_render=preds["depths_render"],
+                mask_render=preds["masks_render"],
+            )
+        else:
+            raise AssertionError("Unreachable state")
+
+        # (4) Compute losses
+        # finally get the optimization objective using self.loss_weights
+        objective = self._get_objective(preds)
+        if objective is not None:
+            preds["objective"] = objective
+
+        return preds
+
+    def _get_objective(self, preds: Dict[str, torch.Tensor]) -> Optional[torch.Tensor]:
+        """
+        A helper function to compute the overall loss as the dot product
+        of individual loss functions with the corresponding weights.
+        """
+        return weighted_sum_losses(preds, self.loss_weights)
+
+    def visualize(
+        self,
+        viz: Optional["Visdom"],
+        visdom_env_imgs: str,
+        preds: Dict[str, Any],
+        prefix: str,
+    ) -> None:
+        """
+        Helper function to visualize the predictions generated
+        in the forward pass.
+
+        Args:
+            viz: Visdom connection object
+            visdom_env_imgs: name of visdom environment for the images.
+            preds: predictions dict like returned by forward()
+            prefix: prepended to the names of images
+        """
+        if viz is None or not viz.check_connection():
+            logger.info("no visdom server! -> skipping batch vis")
+            return
+
+        idx_image = 0
+        title = f"{prefix}_im{idx_image}"
+
+        vis_utils.visualize_basics(viz, preds, visdom_env_imgs, title=title)
+
+    def _render(
+        self,
+        *,
+        ray_bundle: ImplicitronRayBundle,
+        inputs_to_be_chunked: Dict[str, torch.Tensor],
+        sampling_mode: RenderSamplingMode,
+        **kwargs,
+    ) -> RendererOutput:
+        """
+        Args:
+            ray_bundle: A `ImplicitronRayBundle` object containing the parametrizations of the
+                sampled rendering rays.
+            inputs_to_be_chunked: A collection of tensor of shape `(B, _, H, W)`. E.g.
+                SignedDistanceFunctionRenderer requires "object_mask", shape
+                (B, 1, H, W), the silhouette of the object in the image. When
+                chunking, they are passed to the renderer as shape
+                `(B, _, chunksize)`.
+            sampling_mode: The sampling method to use. Must be a value from the
+                RenderSamplingMode Enum.
+
+        Returns:
+            An instance of RendererOutput
+        """
+        if sampling_mode == RenderSamplingMode.FULL_GRID and self.chunk_size_grid > 0:
+            return apply_chunked(
+                self.renderer,
+                chunk_generator(
+                    self.chunk_size_grid,
+                    ray_bundle,
+                    inputs_to_be_chunked,
+                    self.tqdm_trigger_threshold,
+                    **kwargs,
+                ),
+                lambda batch: torch.cat(batch, dim=1).reshape(
+                    *ray_bundle.lengths.shape[:-1], -1
+                ),
+            )
+        else:
+            # pyre-fixme[29]: `BaseRenderer` is not a function.
+            return self.renderer(
+                ray_bundle=ray_bundle,
+                **inputs_to_be_chunked,
+                **kwargs,
+            )
+
+    @classmethod
+    def raysampler_tweak_args(cls, type, args: DictConfig) -> None:
+        """
+        We don't expose certain fields of the raysampler because we want to set
+        them from our own members.
+        """
+        del args["sampling_mode_training"]
+        del args["sampling_mode_evaluation"]
+        del args["image_width"]
+        del args["image_height"]
+
+    def create_raysampler(self):
+        extra_args = {
+            "sampling_mode_training": self.sampling_mode_training,
+            "sampling_mode_evaluation": self.sampling_mode_evaluation,
+            "image_width": self.render_image_width,
+            "image_height": self.render_image_height,
+        }
+        raysampler_args = getattr(
+            self, "raysampler_" + self.raysampler_class_type + "_args"
+        )
+        self.raysampler = registry.get(RaySamplerBase, self.raysampler_class_type)(
+            **raysampler_args, **extra_args
+        )
+
+    @classmethod
+    def renderer_tweak_args(cls, type, args: DictConfig) -> None:
+        """
+        We don't expose certain fields of the renderer because we want to set
+        them based on other inputs.
+        """
+        args.pop("render_features_dimensions", None)
+        args.pop("object_bounding_sphere", None)
+
+    def create_renderer(self):
+        extra_args = {}
+
+        if self.renderer_class_type == "SignedDistanceFunctionRenderer":
+            extra_args["render_features_dimensions"] = self.render_features_dimensions
+            if not hasattr(self.raysampler, "scene_extent"):
+                raise ValueError(
+                    "SignedDistanceFunctionRenderer requires"
+                    + " a raysampler that defines the 'scene_extent' field"
+                    + " (this field is supported by, e.g., the adaptive raysampler - "
+                    + " self.raysampler_class_type='AdaptiveRaySampler')."
+                )
+            extra_args["object_bounding_sphere"] = self.raysampler.scene_extent
+
+        renderer_args = getattr(self, "renderer_" + self.renderer_class_type + "_args")
+        self.renderer = registry.get(BaseRenderer, self.renderer_class_type)(
+            **renderer_args, **extra_args
+        )
+
+    @classmethod
+    def implicit_function_tweak_args(cls, type, args: DictConfig) -> None:
+        """
+        We don't expose certain implicit_function fields because we want to set
+        them based on other inputs.
+        """
+        for arg in IMPLICIT_FUNCTION_ARGS_TO_REMOVE:
+            args.pop(arg, None)
+
+    @classmethod
+    def coarse_implicit_function_tweak_args(cls, type, args: DictConfig) -> None:
+        """
+        We don't expose certain implicit_function fields because we want to set
+        them based on other inputs.
+        """
+        for arg in IMPLICIT_FUNCTION_ARGS_TO_REMOVE:
+            args.pop(arg, None)
+
+    def _create_extra_args_for_implicit_function(self) -> Dict[str, Any]:
+        extra_args = {}
+        global_encoder_dim = (
+            0 if self.global_encoder is None else self.global_encoder.get_encoding_dim()
+        )
+        if self.implicit_function_class_type in (
+            "NeuralRadianceFieldImplicitFunction",
+            "NeRFormerImplicitFunction",
+        ):
+            extra_args["latent_dim"] = global_encoder_dim
+            extra_args["color_dim"] = self.render_features_dimensions
+
+        if self.implicit_function_class_type == "IdrFeatureField":
+            extra_args["feature_work_size"] = global_encoder_dim
+            extra_args["feature_vector_size"] = self.render_features_dimensions
+
+        if self.implicit_function_class_type == "SRNImplicitFunction":
+            extra_args["latent_dim"] = global_encoder_dim
+        return extra_args
+
+    def create_implicit_function(self) -> None:
+        implicit_function_type = registry.get(
+            ImplicitFunctionBase, self.implicit_function_class_type
+        )
+        expand_args_fields(implicit_function_type)
+
+        config_name = f"implicit_function_{self.implicit_function_class_type}_args"
+        config = getattr(self, config_name, None)
+        if config is None:
+            raise ValueError(f"{config_name} not present")
+
+        extra_args = self._create_extra_args_for_implicit_function()
+        self.implicit_function = implicit_function_type(**config, **extra_args)
+
+    def create_coarse_implicit_function(self) -> None:
+        # If coarse_implicit_function_class_type has been defined
+        # then we init a module based on its arguments
+        if (
+            self.coarse_implicit_function_class_type is not None
+            and not self.share_implicit_function_across_passes
+        ):
+            config_name = "coarse_implicit_function_{0}_args".format(
+                self.coarse_implicit_function_class_type
+            )
+            config = getattr(self, config_name, {})
+
+            implicit_function_type = registry.get(
+                ImplicitFunctionBase,
+                # pyre-ignore: config is None allow to check if this is None.
+                self.coarse_implicit_function_class_type,
+            )
+            expand_args_fields(implicit_function_type)
+
+            extra_args = self._create_extra_args_for_implicit_function()
+            self.coarse_implicit_function = implicit_function_type(
+                **config, **extra_args
+            )
+        elif self.share_implicit_function_across_passes:
+            # Since coarse_implicit_function is initialised before
+            # implicit_function we handle this case in the post_init.
+            pass
+        else:
+            self.coarse_implicit_function = None
diff --git a/pytorch3d/pytorch3d/implicitron/models/renderer/__init__.py b/pytorch3d/pytorch3d/implicitron/models/renderer/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..2e41cd717f6a439a9c08d76a9d0e4a54e190fc5a
--- /dev/null
+++ b/pytorch3d/pytorch3d/implicitron/models/renderer/__init__.py
@@ -0,0 +1,5 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
diff --git a/pytorch3d/pytorch3d/implicitron/models/renderer/base.py b/pytorch3d/pytorch3d/implicitron/models/renderer/base.py
new file mode 100644
index 0000000000000000000000000000000000000000..3e891bf763454f540f1d22717c33d9a6c375593a
--- /dev/null
+++ b/pytorch3d/pytorch3d/implicitron/models/renderer/base.py
@@ -0,0 +1,411 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+from dataclasses import dataclass, field
+from enum import Enum
+from typing import Any, Dict, List, Optional, Tuple
+
+import torch
+from pytorch3d.implicitron.tools.config import ReplaceableBase
+from pytorch3d.ops import packed_to_padded
+from pytorch3d.renderer.implicit.utils import ray_bundle_variables_to_ray_points
+
+
+class EvaluationMode(Enum):
+    TRAINING = "training"
+    EVALUATION = "evaluation"
+
+
+class RenderSamplingMode(Enum):
+    MASK_SAMPLE = "mask_sample"
+    FULL_GRID = "full_grid"
+
+
+class ImplicitronRayBundle:
+    """
+    Parametrizes points along projection rays by storing ray `origins`,
+    `directions` vectors and `lengths` at which the ray-points are sampled.
+    Furthermore, the xy-locations (`xys`) of the ray pixels are stored as well.
+    Note that `directions` don't have to be normalized; they define unit vectors
+    in the respective 1D coordinate systems; see documentation for
+    :func:`ray_bundle_to_ray_points` for the conversion formula.
+
+    Ray bundle may represent rays from multiple cameras. In that case, cameras
+    are stored in the packed form (i.e. rays from the same camera are stored in
+    the consecutive elements). The following indices will be set:
+        camera_ids: A tensor of shape (N, ) which indicates which camera
+            was used to sample the rays. `N` is the number of different
+            sampled cameras.
+        camera_counts: A tensor of shape (N, ) which how many times the
+            coresponding camera in `camera_ids` was sampled.
+            `sum(camera_counts) == minibatch`, where `minibatch = origins.shape[0]`.
+
+    Attributes:
+        origins: A tensor of shape `(..., 3)` denoting the
+            origins of the sampling rays in world coords.
+        directions: A tensor of shape `(..., 3)` containing the direction
+            vectors of sampling rays in world coords. They don't have to be normalized;
+            they define unit vectors in the respective 1D coordinate systems; see
+            documentation for :func:`ray_bundle_to_ray_points` for the conversion formula.
+        lengths: A tensor of shape `(..., num_points_per_ray)`
+            containing the lengths at which the rays are sampled.
+        xys: A tensor of shape `(..., 2)`, the xy-locations (`xys`) of the ray pixels
+        camera_ids: An optional tensor of shape (N, ) which indicates which camera
+            was used to sample the rays. `N` is the number of unique sampled cameras.
+        camera_counts: An optional tensor of shape (N, ) indicates how many times the
+            coresponding camera in `camera_ids` was sampled.
+            `sum(camera_counts)==total_number_of_rays`.
+        bins: An optional tensor of shape `(..., num_points_per_ray + 1)`
+            containing the bins at which the rays are sampled. In this case
+            lengths should be equal to the midpoints of bins `(..., num_points_per_ray)`.
+        pixel_radii_2d: An optional tensor of shape `(..., 1)`
+            base radii of the conical frustums.
+
+    Raises:
+        ValueError: If either bins or lengths are not provided.
+        ValueError: If bins is provided and the last dim is inferior or equal to 1.
+    """
+
+    def __init__(
+        self,
+        origins: torch.Tensor,
+        directions: torch.Tensor,
+        lengths: Optional[torch.Tensor],
+        xys: torch.Tensor,
+        camera_ids: Optional[torch.LongTensor] = None,
+        camera_counts: Optional[torch.LongTensor] = None,
+        bins: Optional[torch.Tensor] = None,
+        pixel_radii_2d: Optional[torch.Tensor] = None,
+    ):
+        if bins is not None and bins.shape[-1] <= 1:
+            raise ValueError(
+                "The last dim of bins must be at least superior or equal to 2."
+            )
+
+        if bins is None and lengths is None:
+            raise ValueError(
+                "Please set either bins or lengths to initialize an ImplicitronRayBundle."
+            )
+
+        self.origins = origins
+        self.directions = directions
+        self._lengths = lengths if bins is None else None
+        self.xys = xys
+        self.bins = bins
+        self.pixel_radii_2d = pixel_radii_2d
+        self.camera_ids = camera_ids
+        self.camera_counts = camera_counts
+
+    @property
+    def lengths(self) -> torch.Tensor:
+        if self.bins is not None:
+            # equivalent to: 0.5 * (bins[..., 1:] + bins[..., :-1]) but more efficient
+            # pyre-ignore
+            return torch.lerp(self.bins[..., :-1], self.bins[..., 1:], 0.5)
+        return self._lengths
+
+    @lengths.setter
+    def lengths(self, value):
+        if self.bins is not None:
+            raise ValueError(
+                "If the bins attribute is not None you cannot set the lengths attribute."
+            )
+        else:
+            self._lengths = value
+
+    def is_packed(self) -> bool:
+        """
+        Returns whether the ImplicitronRayBundle carries data in packed state
+        """
+        return self.camera_ids is not None and self.camera_counts is not None
+
+    def get_padded_xys(self) -> Tuple[torch.Tensor, torch.LongTensor, int]:
+        """
+        For a packed ray bundle, returns padded rays. Assumes the input bundle is packed
+        (i.e. `camera_ids` and `camera_counts` are set).
+
+        Returns:
+            - xys: Tensor of shape (N, max_size, ...) containing the padded
+                representation of the pixel coordinated;
+                where max_size is max of `camera_counts`. The values for camera id `i`
+                will be copied to `xys[i, :]`, with zeros padding out the extra inputs.
+            - first_idxs: cumulative sum of `camera_counts` defininf the boundaries
+                between cameras in the packed representation
+            - num_inputs: the number of cameras in the bundle.
+        """
+        if not self.is_packed():
+            raise ValueError("get_padded_xys can be called only on a packed bundle")
+
+        camera_counts = self.camera_counts
+        assert camera_counts is not None
+
+        cumsum = torch.cumsum(camera_counts, dim=0, dtype=torch.long)
+        first_idxs = torch.cat(
+            (camera_counts.new_zeros((1,), dtype=torch.long), cumsum[:-1])
+        )
+        num_inputs = camera_counts.sum().item()
+        max_size = torch.max(camera_counts).item()
+        xys = packed_to_padded(self.xys, first_idxs, max_size)
+        # pyre-ignore [7] pytorch typeshed inaccuracy
+        return xys, first_idxs, num_inputs
+
+
+@dataclass
+class RendererOutput:
+    """
+    A structure for storing the output of a renderer.
+
+    Args:
+        features: rendered features (usually RGB colors), (B, ..., C) tensor.
+        depth: rendered ray-termination depth map, in NDC coordinates, (B, ..., 1) tensor.
+        mask: rendered object mask, values in [0, 1], (B, ..., 1) tensor.
+        prev_stage: for multi-pass renderers (e.g. in NeRF),
+            a reference to the output of the previous stage.
+        normals: surface normals, for renderers that estimate them; (B, ..., 3) tensor.
+        points: ray-termination points in the world coordinates, (B, ..., 3) tensor.
+        aux: dict for implementation-specific renderer outputs.
+    """
+
+    features: torch.Tensor
+    depths: torch.Tensor
+    masks: torch.Tensor
+    prev_stage: Optional[RendererOutput] = None
+    normals: Optional[torch.Tensor] = None
+    points: Optional[torch.Tensor] = None  # TODO: redundant with depths
+    weights: Optional[torch.Tensor] = None
+    aux: Dict[str, Any] = field(default_factory=lambda: {})
+
+
+class ImplicitFunctionWrapper(torch.nn.Module):
+    def __init__(self, fn: torch.nn.Module):
+        super().__init__()
+        self._fn = fn
+        self.bound_args = {}
+
+    def bind_args(self, **bound_args):
+        self.bound_args = bound_args
+        self._fn.on_bind_args()
+
+    def unbind_args(self):
+        self.bound_args = {}
+
+    def forward(self, *args, **kwargs):
+        return self._fn(*args, **{**kwargs, **self.bound_args})
+
+
+class BaseRenderer(ABC, ReplaceableBase):
+    """
+    Base class for all Renderer implementations.
+    """
+
+    def requires_object_mask(self) -> bool:
+        """
+        Whether `forward` needs the object_mask.
+        """
+        return False
+
+    @abstractmethod
+    def forward(
+        self,
+        ray_bundle: ImplicitronRayBundle,
+        implicit_functions: List[ImplicitFunctionWrapper],
+        evaluation_mode: EvaluationMode = EvaluationMode.EVALUATION,
+        **kwargs,
+    ) -> RendererOutput:
+        """
+        Each Renderer should implement its own forward function
+        that returns an instance of RendererOutput.
+
+        Args:
+            ray_bundle: An ImplicitronRayBundle object containing the following variables:
+                origins: A tensor of shape (minibatch, ..., 3) denoting
+                    the origins of the rendering rays.
+                directions: A tensor of shape (minibatch, ..., 3)
+                    containing the direction vectors of rendering rays.
+                lengths: A tensor of shape
+                    (minibatch, ..., num_points_per_ray)containing the
+                    lengths at which the ray points are sampled.
+                    The coordinates of the points on the rays are thus computed
+                    as `origins + lengths * directions`.
+                xys: A tensor of shape
+                    (minibatch, ..., 2) containing the
+                    xy locations of each ray's pixel in the NDC screen space.
+                camera_ids: A tensor of shape (N, ) which indicates which camera
+                    was used to sample the rays. `N` is the number of different
+                    sampled cameras.
+                camera_counts: A tensor of shape (N, ) which how many times the
+                    coresponding camera in `camera_ids` was sampled.
+                    `sum(camera_counts)==minibatch`
+            implicit_functions: List of ImplicitFunctionWrappers which define the
+                implicit function methods to be used. Most Renderers only allow
+                a single implicit function. Currently, only the
+                MultiPassEmissionAbsorptionRenderer allows specifying mulitple
+                values in the list.
+            evaluation_mode: one of EvaluationMode.TRAINING or
+                EvaluationMode.EVALUATION which determines the settings used for
+                rendering.
+            **kwargs: In addition to the name args, custom keyword args can be specified.
+                For example in the SignedDistanceFunctionRenderer, an object_mask is
+                required which needs to be passed via the kwargs.
+
+        Returns:
+            instance of RendererOutput
+        """
+        pass
+
+
+def compute_3d_diagonal_covariance_gaussian(
+    rays_directions: torch.Tensor,
+    rays_dir_variance: torch.Tensor,
+    radii_variance: torch.Tensor,
+    eps: float = 1e-6,
+) -> torch.Tensor:
+    """
+    Transform the variances (rays_dir_variance, radii_variance) of the gaussians from
+    the coordinate frame of the conical frustum to 3D world coordinates.
+
+    It follows the equation 16 of `MIP-NeRF <https://arxiv.org/abs/2103.13415>`_
+
+    Args:
+        rays_directions: A tensor of shape `(..., 3)`
+        rays_dir_variance: A tensor of shape `(..., num_intervals)` representing
+            the variance of the conical frustum  with respect to the rays direction.
+        radii_variance: A tensor of shape `(..., num_intervals)` representing
+            the variance of the conical frustum with respect to its radius.
+        eps: a small number to prevent division by zero.
+
+    Returns:
+        A tensor of shape `(..., num_intervals, 3)` containing the diagonal
+            of the covariance matrix.
+    """
+    d_outer_diag = torch.pow(rays_directions, 2)
+    dir_mag_sq = torch.clamp(torch.sum(d_outer_diag, dim=-1, keepdim=True), min=eps)
+
+    null_outer_diag = 1 - d_outer_diag / dir_mag_sq
+    ray_dir_cov_diag = rays_dir_variance[..., None] * d_outer_diag[..., None, :]
+    xy_cov_diag = radii_variance[..., None] * null_outer_diag[..., None, :]
+    return ray_dir_cov_diag + xy_cov_diag
+
+
+def approximate_conical_frustum_as_gaussians(
+    bins: torch.Tensor, radii: torch.Tensor
+) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+    """
+    Approximates a conical frustum as two Gaussian distributions.
+
+    The Gaussian distributions are characterized by
+    three values:
+
+    - rays_dir_mean: mean along the rays direction
+        (defined as t in the parametric representation of a cone).
+    - rays_dir_variance: the variance of the conical frustum  along the rays direction.
+    - radii_variance: variance of the conical frustum with respect to its radius.
+
+
+    The computation is stable and follows equation 7
+    of `MIP-NeRF <https://arxiv.org/abs/2103.13415>`_.
+
+    For more information on how the mean and variances are computed
+    refers to the appendix of the paper.
+
+    Args:
+        bins: A tensor of shape `(..., num_points_per_ray + 1)`
+            containing the bins at which the rays are sampled.
+            `bin[..., t]` and `bin[..., t+1]` represent respectively
+            the left and right coordinates of the interval.
+        t0: A tensor of shape `(..., num_points_per_ray)`
+            containing the left coordinates of the intervals
+            on which the rays are sampled.
+        t1: A tensor of shape `(..., num_points_per_ray)`
+            containing the rights coordinates of the intervals
+            on which the rays are sampled.
+        radii: A tensor of shape `(..., 1)`
+            base radii of the conical frustums.
+
+    Returns:
+        rays_dir_mean: A tensor of shape `(..., num_intervals)` representing
+            the mean along the rays direction
+            (t in the parametric represention of the cone)
+        rays_dir_variance: A tensor of shape `(...,  num_intervals)` representing
+            the variance of the conical frustum along the rays
+            (t in the parametric represention of the cone).
+        radii_variance: A tensor of shape `(..., num_intervals)` representing
+            the variance of the conical frustum with respect to its radius.
+    """
+    t_mu = torch.lerp(bins[..., 1:], bins[..., :-1], 0.5)
+    t_delta = torch.diff(bins, dim=-1) / 2
+
+    t_mu_pow2 = torch.pow(t_mu, 2)
+    t_delta_pow2 = torch.pow(t_delta, 2)
+    t_delta_pow4 = torch.pow(t_delta, 4)
+
+    den = 3 * t_mu_pow2 + t_delta_pow2
+
+    # mean along the rays direction
+    rays_dir_mean = t_mu + 2 * t_mu * t_delta_pow2 / den
+
+    # Variance of the conical frustum with along the rays directions
+    rays_dir_variance = t_delta_pow2 / 3 - (4 / 15) * (
+        t_delta_pow4 * (12 * t_mu_pow2 - t_delta_pow2) / torch.pow(den, 2)
+    )
+
+    # Variance of the conical frustum with respect to its radius
+    radii_variance = torch.pow(radii, 2) * (
+        t_mu_pow2 / 4 + (5 / 12) * t_delta_pow2 - 4 / 15 * (t_delta_pow4) / den
+    )
+    return rays_dir_mean, rays_dir_variance, radii_variance
+
+
+def conical_frustum_to_gaussian(
+    ray_bundle: ImplicitronRayBundle,
+) -> Tuple[torch.Tensor, torch.Tensor]:
+    """
+    Approximate a conical frustum following a ray bundle as a Gaussian.
+
+    Args:
+        ray_bundle: A `RayBundle` or `HeterogeneousRayBundle` object with fields:
+            origins: A tensor of shape `(..., 3)`
+            directions: A tensor of shape `(..., 3)`
+            lengths: A tensor of shape `(..., num_points_per_ray)`
+            bins: A tensor of shape `(..., num_points_per_ray + 1)`
+                containing the bins at which the rays are sampled. .
+            pixel_radii_2d: A tensor of shape `(..., 1)`
+                base radii of the conical frustums.
+
+    Returns:
+        means: A tensor of shape `(..., num_points_per_ray - 1, 3)`
+            representing the means of the Gaussians
+            approximating the conical frustums.
+        diag_covariances: A tensor of shape `(...,num_points_per_ray -1, 3)`
+            representing the diagonal covariance matrices of our Gaussians.
+    """
+
+    if ray_bundle.pixel_radii_2d is None or ray_bundle.bins is None:
+        raise ValueError(
+            "RayBundle pixel_radii_2d or bins have not been provided."
+            " Look at pytorch3d.renderer.implicit.renderer.ray_sampler::"
+            "AbstractMaskRaySampler to see how to compute them. Have you forgot to set"
+            "`cast_ray_bundle_as_cone` to True?"
+        )
+
+    (
+        rays_dir_mean,
+        rays_dir_variance,
+        radii_variance,
+    ) = approximate_conical_frustum_as_gaussians(
+        ray_bundle.bins,
+        ray_bundle.pixel_radii_2d,
+    )
+    means = ray_bundle_variables_to_ray_points(
+        ray_bundle.origins, ray_bundle.directions, rays_dir_mean
+    )
+    diag_covariances = compute_3d_diagonal_covariance_gaussian(
+        ray_bundle.directions, rays_dir_variance, radii_variance
+    )
+    return means, diag_covariances
diff --git a/pytorch3d/pytorch3d/implicitron/models/renderer/lstm_renderer.py b/pytorch3d/pytorch3d/implicitron/models/renderer/lstm_renderer.py
new file mode 100644
index 0000000000000000000000000000000000000000..19848ed6e7391de1ab46ac82c56894f1a3e6a598
--- /dev/null
+++ b/pytorch3d/pytorch3d/implicitron/models/renderer/lstm_renderer.py
@@ -0,0 +1,187 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import copy
+import logging
+from typing import List, Optional, Tuple
+
+import torch
+from pytorch3d.implicitron.models.renderer.base import ImplicitronRayBundle
+from pytorch3d.implicitron.tools.config import registry
+
+from .base import BaseRenderer, EvaluationMode, ImplicitFunctionWrapper, RendererOutput
+
+
+logger = logging.getLogger(__name__)
+
+
+@registry.register
+class LSTMRenderer(BaseRenderer, torch.nn.Module):
+    """
+    Implements the learnable LSTM raymarching function from SRN [1].
+    This requires there to be one implicit function, and it is expected to be
+    like SRNImplicitFunction or SRNHyperNetImplicitFunction.
+
+    Settings:
+        num_raymarch_steps: The number of LSTM raymarching steps.
+        init_depth: Initializes the bias of the last raymarching LSTM layer so that
+            the farthest point from the camera reaches a far z-plane that
+            lies `init_depth` units from the camera plane.
+        init_depth_noise_std: The standard deviation of the random normal noise
+            added to the initial depth of each marched ray.
+        hidden_size: The dimensionality of the LSTM's hidden state.
+        n_feature_channels: The number of feature channels returned by the
+            implicit_function evaluated at each raymarching step.
+        bg_color: If supplied, used as the background color. Otherwise the pixel
+            generator is used everywhere. This has to have length either 1
+            (for a constant value for all output channels) or equal to the number
+            of output channels (which is `out_features` on the pixel generator,
+            typically 3.)
+        verbose: If `True`, logs raymarching debug info.
+
+    References:
+        [1] Sitzmann, V. and Zollhöfer, M. and Wetzstein, G..
+            "Scene representation networks: Continuous 3d-structure-aware
+            neural scene representations." NeurIPS 2019.
+    """
+
+    num_raymarch_steps: int = 10
+    init_depth: float = 17.0
+    init_depth_noise_std: float = 5e-4
+    hidden_size: int = 16
+    n_feature_channels: int = 256
+    bg_color: Optional[List[float]] = None
+    verbose: bool = False
+
+    def __post_init__(self):
+        self._lstm = torch.nn.LSTMCell(
+            input_size=self.n_feature_channels,
+            hidden_size=self.hidden_size,
+        )
+        self._lstm.apply(_init_recurrent_weights)
+        _lstm_forget_gate_init(self._lstm)
+        self._out_layer = torch.nn.Linear(self.hidden_size, 1)
+
+        one_step = self.init_depth / self.num_raymarch_steps
+        self._out_layer.bias.data.fill_(one_step)
+        self._out_layer.weight.data.normal_(mean=0.0, std=1e-3)
+
+    def forward(
+        self,
+        ray_bundle: ImplicitronRayBundle,
+        implicit_functions: List[ImplicitFunctionWrapper],
+        evaluation_mode: EvaluationMode = EvaluationMode.EVALUATION,
+        **kwargs,
+    ) -> RendererOutput:
+        """
+
+        Args:
+            ray_bundle: A `ImplicitronRayBundle` object containing the parametrizations of the
+                sampled rendering rays.
+            implicit_functions: A single-element list of ImplicitFunctionWrappers which
+                defines the implicit function to be used.
+            evaluation_mode: one of EvaluationMode.TRAINING or
+                EvaluationMode.EVALUATION which determines the settings used for
+                rendering, specifically the RayPointRefiner and the density_noise_std.
+
+        Returns:
+            instance of RendererOutput
+        """
+        if len(implicit_functions) != 1:
+            raise ValueError("LSTM renderer expects a single implicit function.")
+
+        implicit_function = implicit_functions[0]
+
+        if ray_bundle.lengths.shape[-1] != 1:
+            raise ValueError(
+                "LSTM renderer requires a ray-bundle with a single point per ray"
+                + " which is the initial raymarching point."
+            )
+
+        # jitter the initial depths
+
+        ray_bundle_t = copy.copy(ray_bundle)
+        ray_bundle_t.lengths = (
+            ray_bundle.lengths
+            + torch.randn_like(ray_bundle.lengths) * self.init_depth_noise_std
+        )
+
+        states: List[Optional[Tuple[torch.Tensor, torch.Tensor]]] = [None]
+        signed_distance = torch.zeros_like(ray_bundle_t.lengths)
+        raymarch_features = None
+        for t in range(self.num_raymarch_steps + 1):
+            # move signed_distance along each ray
+            ray_bundle_t.lengths += signed_distance
+
+            # eval the raymarching function
+            raymarch_features, _ = implicit_function(
+                ray_bundle=ray_bundle_t,
+                raymarch_features=None,
+            )
+            if self.verbose:
+                msg = (
+                    f"{t}: mu={float(signed_distance.mean()):1.2e};"
+                    + f" std={float(signed_distance.std()):1.2e};"
+                    + f" mu_d={float(ray_bundle_t.lengths.mean()):1.2e};"
+                    + f" std_d={float(ray_bundle_t.lengths.std()):1.2e};"
+                )
+                logger.info(msg)
+            if t == self.num_raymarch_steps:
+                break
+
+            # run the lstm marcher
+            state_h, state_c = self._lstm(
+                raymarch_features.view(-1, raymarch_features.shape[-1]),
+                states[-1],
+            )
+            if state_h.requires_grad:
+                state_h.register_hook(lambda x: x.clamp(min=-10, max=10))
+            # predict the next step size
+            signed_distance = self._out_layer(state_h).view(ray_bundle_t.lengths.shape)
+            # log the lstm states
+            states.append((state_h, state_c))
+
+        opacity_logits, features = implicit_function(
+            raymarch_features=raymarch_features,
+            ray_bundle=ray_bundle_t,
+        )
+        mask = torch.sigmoid(opacity_logits)
+        depth = ray_bundle_t.lengths * ray_bundle_t.directions.norm(
+            dim=-1, keepdim=True
+        )
+
+        if self.bg_color is not None:
+            background = features.new_tensor(self.bg_color)
+            features = torch.lerp(background, features, mask)
+
+        return RendererOutput(
+            features=features[..., 0, :],
+            depths=depth,
+            masks=mask[..., 0, :],
+        )
+
+
+def _init_recurrent_weights(self) -> None:
+    # copied from SRN codebase
+    for m in self.modules():
+        if type(m) in [torch.nn.GRU, torch.nn.LSTM, torch.nn.RNN]:
+            for name, param in m.named_parameters():
+                if "weight_ih" in name:
+                    torch.nn.init.kaiming_normal_(param.data)
+                elif "weight_hh" in name:
+                    torch.nn.init.orthogonal_(param.data)
+                elif "bias" in name:
+                    param.data.fill_(0)
+
+
+def _lstm_forget_gate_init(lstm_layer) -> None:
+    # copied from SRN codebase
+    for name, parameter in lstm_layer.named_parameters():
+        if "bias" not in name:
+            continue
+        n = parameter.size(0)
+        start, end = n // 4, n // 2
+        parameter.data[start:end].fill_(1.0)
diff --git a/pytorch3d/pytorch3d/implicitron/models/renderer/multipass_ea.py b/pytorch3d/pytorch3d/implicitron/models/renderer/multipass_ea.py
new file mode 100644
index 0000000000000000000000000000000000000000..92042e131ae2ce9f171cb8cd8174b5ad4c992da8
--- /dev/null
+++ b/pytorch3d/pytorch3d/implicitron/models/renderer/multipass_ea.py
@@ -0,0 +1,185 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import List
+
+import torch
+from pytorch3d.implicitron.models.renderer.base import ImplicitronRayBundle
+from pytorch3d.implicitron.tools.config import registry, run_auto_creation
+
+from .base import BaseRenderer, EvaluationMode, ImplicitFunctionWrapper, RendererOutput
+from .ray_point_refiner import RayPointRefiner
+from .raymarcher import RaymarcherBase
+
+
+@registry.register
+class MultiPassEmissionAbsorptionRenderer(  # pyre-ignore: 13
+    BaseRenderer, torch.nn.Module
+):
+    """
+    Implements the multi-pass rendering function, in particular,
+    with emission-absorption ray marching used in NeRF [1]. First, it evaluates
+    opacity-based ray-point weights and then optionally (in case more implicit
+    functions are given) resamples points using importance sampling and evaluates
+    new weights.
+
+    During each ray marching pass, features, depth map, and masks
+    are integrated: Let o_i be the opacity estimated by the implicit function,
+    and d_i be the offset between points `i` and `i+1` along the respective ray.
+    Ray marching is performed using the following equations::
+
+        ray_opacity_n = cap_fn(sum_i=1^n cap_fn(d_i * o_i)),
+        weight_n = weight_fn(cap_fn(d_i * o_i), 1 - ray_opacity_{n-1}),
+
+    and the final rendered quantities are computed by a dot-product of ray values
+    with the weights, e.g. `features = sum_n(weight_n * ray_features_n)`.
+
+    By default, for the EA raymarcher from [1] (
+        activated with `self.raymarcher_class_type="EmissionAbsorptionRaymarcher"`
+    )::
+
+        cap_fn(x) = 1 - exp(-x),
+        weight_fn(x) = w * x.
+
+    Note that the latter can altered by changing `self.raymarcher_class_type`,
+    e.g. to "CumsumRaymarcher" which implements the cumulative-sum raymarcher
+    from NeuralVolumes [2].
+
+    Settings:
+        n_pts_per_ray_fine_training: The number of points sampled per ray for the
+            fine rendering pass during training.
+        n_pts_per_ray_fine_evaluation: The number of points sampled per ray for the
+            fine rendering pass during evaluation.
+        stratified_sampling_coarse_training: Enable/disable stratified sampling in the
+            refiner during training. Only matters if there are multiple implicit
+            functions (i.e. in GenericModel if num_passes>1).
+        stratified_sampling_coarse_evaluation: Enable/disable stratified sampling in
+            the refiner during evaluation. Only matters if there are multiple implicit
+            functions (i.e. in GenericModel if num_passes>1).
+        append_coarse_samples_to_fine: Add the fine ray points to the coarse points
+            after sampling.
+        density_noise_std_train: Standard deviation of the noise added to the
+            opacity field.
+        return_weights: Enables returning the rendering weights of the EA raymarcher.
+            Setting to `True` can lead to a prohibitivelly large memory consumption.
+        blurpool_weights: Use blurpool defined in [3], on the input weights of
+            each implicit_function except the first (implicit_functions[0]).
+        sample_pdf_eps: Padding applied to the weights (alpha in equation 18 of [3]).
+        raymarcher_class_type: The type of self.raymarcher corresponding to
+            a child of `RaymarcherBase` in the registry.
+        raymarcher: The raymarcher object used to convert per-point features
+            and opacities to a feature render.
+
+    References:
+        [1] Mildenhall, Ben, et al. "Nerf: Representing Scenes as Neural Radiance
+            Fields for View Synthesis." ECCV 2020.
+        [2] Lombardi, Stephen, et al. "Neural Volumes: Learning Dynamic Renderable
+            Volumes from Images." SIGGRAPH 2019.
+        [3] Jonathan T. Barron, et al. "Mip-NeRF: A Multiscale Representation
+            for Anti-Aliasing Neural Radiance Fields." ICCV 2021.
+
+    """
+
+    raymarcher_class_type: str = "EmissionAbsorptionRaymarcher"
+    raymarcher: RaymarcherBase
+
+    n_pts_per_ray_fine_training: int = 64
+    n_pts_per_ray_fine_evaluation: int = 64
+    stratified_sampling_coarse_training: bool = True
+    stratified_sampling_coarse_evaluation: bool = False
+    append_coarse_samples_to_fine: bool = True
+    density_noise_std_train: float = 0.0
+    return_weights: bool = False
+    blurpool_weights: bool = False
+    sample_pdf_eps: float = 1e-5
+
+    def __post_init__(self):
+        self._refiners = {
+            EvaluationMode.TRAINING: RayPointRefiner(
+                n_pts_per_ray=self.n_pts_per_ray_fine_training,
+                random_sampling=self.stratified_sampling_coarse_training,
+                add_input_samples=self.append_coarse_samples_to_fine,
+                blurpool_weights=self.blurpool_weights,
+                sample_pdf_eps=self.sample_pdf_eps,
+            ),
+            EvaluationMode.EVALUATION: RayPointRefiner(
+                n_pts_per_ray=self.n_pts_per_ray_fine_evaluation,
+                random_sampling=self.stratified_sampling_coarse_evaluation,
+                add_input_samples=self.append_coarse_samples_to_fine,
+                blurpool_weights=self.blurpool_weights,
+                sample_pdf_eps=self.sample_pdf_eps,
+            ),
+        }
+        run_auto_creation(self)
+
+    def forward(
+        self,
+        ray_bundle: ImplicitronRayBundle,
+        implicit_functions: List[ImplicitFunctionWrapper],
+        evaluation_mode: EvaluationMode = EvaluationMode.EVALUATION,
+        **kwargs,
+    ) -> RendererOutput:
+        """
+        Args:
+            ray_bundle: A `ImplicitronRayBundle` object containing the parametrizations of the
+                sampled rendering rays.
+            implicit_functions: List of ImplicitFunctionWrappers which
+                define the implicit functions to be used sequentially in
+                the raymarching step. The output of raymarching with
+                implicit_functions[n-1] is refined, and then used as
+                input for raymarching with implicit_functions[n].
+            evaluation_mode: one of EvaluationMode.TRAINING or
+                EvaluationMode.EVALUATION which determines the settings used for
+                rendering
+
+        Returns:
+            instance of RendererOutput
+        """
+        if not implicit_functions:
+            raise ValueError("EA renderer expects implicit functions")
+
+        return self._run_raymarcher(
+            ray_bundle,
+            implicit_functions,
+            None,
+            evaluation_mode,
+        )
+
+    def _run_raymarcher(
+        self, ray_bundle, implicit_functions, prev_stage, evaluation_mode
+    ):
+        density_noise_std = (
+            self.density_noise_std_train
+            if evaluation_mode == EvaluationMode.TRAINING
+            else 0.0
+        )
+
+        ray_deltas = (
+            None if ray_bundle.bins is None else torch.diff(ray_bundle.bins, dim=-1)
+        )
+        output = self.raymarcher(
+            *implicit_functions[0](ray_bundle=ray_bundle),
+            ray_lengths=ray_bundle.lengths,
+            ray_deltas=ray_deltas,
+            density_noise_std=density_noise_std,
+        )
+        output.prev_stage = prev_stage
+
+        weights = output.weights
+        if not self.return_weights:
+            output.weights = None
+
+        # we may need to make a recursive call
+        if len(implicit_functions) > 1:
+            fine_ray_bundle = self._refiners[evaluation_mode](ray_bundle, weights)
+            output = self._run_raymarcher(
+                fine_ray_bundle,
+                implicit_functions[1:],
+                output,
+                evaluation_mode,
+            )
+
+        return output
diff --git a/pytorch3d/pytorch3d/implicitron/models/renderer/ray_point_refiner.py b/pytorch3d/pytorch3d/implicitron/models/renderer/ray_point_refiner.py
new file mode 100644
index 0000000000000000000000000000000000000000..b71574d23f6a4d7f3407c3fa7157a47ff63f7bc2
--- /dev/null
+++ b/pytorch3d/pytorch3d/implicitron/models/renderer/ray_point_refiner.py
@@ -0,0 +1,145 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import copy
+
+import torch
+from pytorch3d.implicitron.models.renderer.base import ImplicitronRayBundle
+from pytorch3d.implicitron.tools.config import Configurable, expand_args_fields
+
+from pytorch3d.renderer.implicit.sample_pdf import sample_pdf
+
+
+@expand_args_fields
+# pyre-fixme[13]: Attribute `n_pts_per_ray` is never initialized.
+# pyre-fixme[13]: Attribute `random_sampling` is never initialized.
+class RayPointRefiner(Configurable, torch.nn.Module):
+    """
+    Implements the importance sampling of points along rays.
+    The input is a `RayBundle` object with a `ray_weights` tensor
+    which specifies the probabilities of sampling a point along each ray.
+
+    This raysampler is used for the fine rendering pass of NeRF.
+    As such, the forward pass accepts the RayBundle output by the
+    raysampling of the coarse rendering pass. Hence, it does not
+    take cameras as input.
+
+    Args:
+        n_pts_per_ray: The number of points to sample along each ray.
+        random_sampling: If `False`, returns equispaced percentiles of the
+            distribution defined by the input weights, otherwise performs
+            sampling from that distribution.
+        add_input_samples: Concatenates and returns the sampled values
+            together with the input samples.
+        blurpool_weights: Use blurpool defined in [1], on the input weights.
+        sample_pdf_eps: A constant preventing division by zero in case empty bins
+            are present.
+
+    References:
+        [1] Jonathan T. Barron, et al. "Mip-NeRF: A Multiscale Representation
+            for Anti-Aliasing Neural Radiance Fields." ICCV 2021.
+    """
+
+    n_pts_per_ray: int
+    random_sampling: bool
+    add_input_samples: bool = True
+    blurpool_weights: bool = False
+    sample_pdf_eps: float = 1e-5
+
+    def forward(
+        self,
+        input_ray_bundle: ImplicitronRayBundle,
+        ray_weights: torch.Tensor,
+        blurpool_weights: bool = False,
+        sample_pdf_padding: float = 1e-5,
+        **kwargs,
+    ) -> ImplicitronRayBundle:
+        """
+        Args:
+            input_ray_bundle: An instance of `ImplicitronRayBundle` specifying the
+                source rays for sampling of the probability distribution.
+            ray_weights: A tensor of shape
+                `(..., input_ray_bundle.lengths.shape[-1])` with non-negative
+                elements defining the probability distribution to sample
+                ray points from.
+            blurpool_weights: Use blurpool defined in [1], on the input weights.
+            sample_pdf_padding: A constant preventing division by zero in case empty bins
+                are present.
+
+        Returns:
+            ray_bundle: A new `ImplicitronRayBundle` instance containing the input ray
+                points together with `n_pts_per_ray` additionally sampled
+                points per ray. For each ray, the lengths are sorted.
+
+        References:
+            [1] Jonathan T. Barron, et al. "Mip-NeRF: A Multiscale Representation
+                for Anti-Aliasing Neural Radiance Fields." ICCV 2021.
+
+        """
+
+        with torch.no_grad():
+            if self.blurpool_weights:
+                ray_weights = apply_blurpool_on_weights(ray_weights)
+
+            n_pts_per_ray = self.n_pts_per_ray
+            ray_weights = ray_weights.view(-1, ray_weights.shape[-1])
+            if input_ray_bundle.bins is None:
+                z_vals: torch.Tensor = input_ray_bundle.lengths
+                ray_weights = ray_weights[..., 1:-1]
+                bins = torch.lerp(z_vals[..., 1:], z_vals[..., :-1], 0.5)
+            else:
+                z_vals = input_ray_bundle.bins
+                n_pts_per_ray += 1
+                bins = z_vals
+            z_samples = sample_pdf(
+                bins.view(-1, bins.shape[-1]),
+                ray_weights,
+                n_pts_per_ray,
+                det=not self.random_sampling,
+                eps=self.sample_pdf_eps,
+            ).view(*z_vals.shape[:-1], n_pts_per_ray)
+
+        if self.add_input_samples:
+            z_vals = torch.cat((z_vals, z_samples), dim=-1)
+        else:
+            z_vals = z_samples
+        # Resort by depth.
+        z_vals, _ = torch.sort(z_vals, dim=-1)
+        ray_bundle = copy.copy(input_ray_bundle)
+        if input_ray_bundle.bins is None:
+            ray_bundle.lengths = z_vals
+        else:
+            ray_bundle.bins = z_vals
+
+        return ray_bundle
+
+
+def apply_blurpool_on_weights(weights) -> torch.Tensor:
+    """
+    Filter weights with a 2-tap max filters followed by a 2-tap blur filter,
+    which produces a wide and smooth upper envelope on the weights.
+
+    Args:
+        weights: Tensor of shape `(..., dim)`
+
+    Returns:
+        blured_weights: Tensor of shape `(..., dim)`
+    """
+    weights_pad = torch.concatenate(
+        [
+            weights[..., :1],
+            weights,
+            weights[..., -1:],
+        ],
+        dim=-1,
+    )
+
+    weights_max = torch.nn.functional.max_pool1d(
+        weights_pad.flatten(end_dim=-2), 2, stride=1
+    )
+    return torch.lerp(weights_max[..., :-1], weights_max[..., 1:], 0.5).reshape_as(
+        weights
+    )
diff --git a/pytorch3d/pytorch3d/implicitron/models/renderer/ray_sampler.py b/pytorch3d/pytorch3d/implicitron/models/renderer/ray_sampler.py
new file mode 100644
index 0000000000000000000000000000000000000000..fe464f67076f501591edd281d8d488207033c582
--- /dev/null
+++ b/pytorch3d/pytorch3d/implicitron/models/renderer/ray_sampler.py
@@ -0,0 +1,381 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Optional, Tuple
+
+import torch
+from pytorch3d.implicitron.tools import camera_utils
+from pytorch3d.implicitron.tools.config import registry, ReplaceableBase
+from pytorch3d.renderer import NDCMultinomialRaysampler
+from pytorch3d.renderer.cameras import CamerasBase
+from pytorch3d.renderer.implicit.utils import HeterogeneousRayBundle
+
+from .base import EvaluationMode, ImplicitronRayBundle, RenderSamplingMode
+
+
+class RaySamplerBase(ReplaceableBase):
+    """
+    Base class for ray samplers.
+    """
+
+    def forward(
+        self,
+        cameras: CamerasBase,
+        evaluation_mode: EvaluationMode,
+        mask: Optional[torch.Tensor] = None,
+    ) -> ImplicitronRayBundle:
+        """
+        Args:
+            cameras: A batch of `batch_size` cameras from which the rays are emitted.
+            evaluation_mode: one of `EvaluationMode.TRAINING` or
+                `EvaluationMode.EVALUATION` which determines the sampling mode
+                that is used.
+            mask: Active for the `RenderSamplingMode.MASK_SAMPLE` sampling mode.
+                Defines a non-negative mask of shape
+                `(batch_size, image_height, image_width)` where each per-pixel
+                value is proportional to the probability of sampling the
+                corresponding pixel's ray.
+
+        Returns:
+            ray_bundle: A `ImplicitronRayBundle` object containing the parametrizations of the
+                sampled rendering rays.
+        """
+        raise NotImplementedError()
+
+
+class AbstractMaskRaySampler(RaySamplerBase, torch.nn.Module):
+    """
+    Samples a fixed number of points along rays which are in turn sampled for
+    each camera in a batch.
+
+    This class utilizes `NDCMultinomialRaysampler` which allows to either
+    randomly sample rays from an input foreground saliency mask
+    (`RenderSamplingMode.MASK_SAMPLE`), or on a rectangular image grid
+    (`RenderSamplingMode.FULL_GRID`). The sampling mode can be set separately
+    for training and evaluation by setting `self.sampling_mode_training`
+    and `self.sampling_mode_training` accordingly.
+
+    The class allows to adjust the sampling points along rays by overwriting the
+    `AbstractMaskRaySampler._get_min_max_depth_bounds` function which returns
+    the near/far planes (`min_depth`/`max_depth`) `NDCMultinomialRaysampler`.
+
+    Settings:
+        image_width: The horizontal size of the image grid.
+        image_height: The vertical size of the image grid.
+        sampling_mode_training: The ray sampling mode for training. This should be a str
+            option from the RenderSamplingMode Enum
+        sampling_mode_evaluation: Same as above but for evaluation.
+        n_pts_per_ray_training: The number of points sampled along each ray during training.
+        n_pts_per_ray_evaluation: The number of points sampled along each ray during evaluation.
+        n_rays_per_image_sampled_from_mask: The amount of rays to be sampled from the image
+            grid. Given a batch of image grids, this many is sampled from each.
+            `n_rays_per_image_sampled_from_mask` and `n_rays_total_training` cannot both be
+            defined.
+        n_rays_total_training: (optional) How many rays in total to sample from the entire
+            batch of provided image grid. The result is as if `n_rays_total_training`
+            cameras/image grids were sampled with replacement from the cameras / image grids
+            provided and for every camera one ray was sampled.
+            `n_rays_per_image_sampled_from_mask` and `n_rays_total_training` cannot both be
+            defined, to use you have to set `n_rays_per_image` to None.
+            Used only for EvaluationMode.TRAINING.
+        stratified_point_sampling_training: if set, performs stratified random sampling
+            along the ray; otherwise takes ray points at deterministic offsets.
+        stratified_point_sampling_evaluation: Same as above but for evaluation.
+        cast_ray_bundle_as_cone: If True, the sampling will generate the bins and radii
+            attribute of ImplicitronRayBundle. The `bins` contain the z-coordinate
+            (=depth) of each ray in world units and are of shape
+            `(batch_size, n_rays_per_image, n_pts_per_ray_training/evaluation + 1)`
+            while `lengths` is equal to the midpoint of the bins:
+            (0.5 * (bins[..., 1:] + bins[..., :-1]).
+            If False, `bins` is None, `radii` is None and `lengths` contains
+            the z-coordinate (=depth) of each ray in world units and are of shape
+            `(batch_size, n_rays_per_image, n_pts_per_ray_training/evaluation)`
+
+    Raises:
+        TypeError: if cast_ray_bundle_as_cone is set to True and n_rays_total_training
+            is not None will result in an error. HeterogeneousRayBundle is
+            not supported for conical frustum computation yet.
+    """
+
+    image_width: int = 400
+    image_height: int = 400
+    sampling_mode_training: str = "mask_sample"
+    sampling_mode_evaluation: str = "full_grid"
+    n_pts_per_ray_training: int = 64
+    n_pts_per_ray_evaluation: int = 64
+    n_rays_per_image_sampled_from_mask: Optional[int] = 1024
+    n_rays_total_training: Optional[int] = None
+    # stratified sampling vs taking points at deterministic offsets
+    stratified_point_sampling_training: bool = True
+    stratified_point_sampling_evaluation: bool = False
+    cast_ray_bundle_as_cone: bool = False
+
+    def __post_init__(self):
+        if (self.n_rays_per_image_sampled_from_mask is not None) and (
+            self.n_rays_total_training is not None
+        ):
+            raise ValueError(
+                "Cannot both define n_rays_total_training and "
+                "n_rays_per_image_sampled_from_mask."
+            )
+
+        self._sampling_mode = {
+            EvaluationMode.TRAINING: RenderSamplingMode(self.sampling_mode_training),
+            EvaluationMode.EVALUATION: RenderSamplingMode(
+                self.sampling_mode_evaluation
+            ),
+        }
+
+        n_pts_per_ray_training = (
+            self.n_pts_per_ray_training + 1
+            if self.cast_ray_bundle_as_cone
+            else self.n_pts_per_ray_training
+        )
+        n_pts_per_ray_evaluation = (
+            self.n_pts_per_ray_evaluation + 1
+            if self.cast_ray_bundle_as_cone
+            else self.n_pts_per_ray_evaluation
+        )
+        self._training_raysampler = NDCMultinomialRaysampler(
+            image_width=self.image_width,
+            image_height=self.image_height,
+            n_pts_per_ray=n_pts_per_ray_training,
+            min_depth=0.0,
+            max_depth=0.0,
+            n_rays_per_image=self.n_rays_per_image_sampled_from_mask
+            if self._sampling_mode[EvaluationMode.TRAINING]
+            == RenderSamplingMode.MASK_SAMPLE
+            else None,
+            n_rays_total=self.n_rays_total_training,
+            unit_directions=True,
+            stratified_sampling=self.stratified_point_sampling_training,
+        )
+
+        self._evaluation_raysampler = NDCMultinomialRaysampler(
+            image_width=self.image_width,
+            image_height=self.image_height,
+            n_pts_per_ray=n_pts_per_ray_evaluation,
+            min_depth=0.0,
+            max_depth=0.0,
+            n_rays_per_image=self.n_rays_per_image_sampled_from_mask
+            if self._sampling_mode[EvaluationMode.EVALUATION]
+            == RenderSamplingMode.MASK_SAMPLE
+            else None,
+            unit_directions=True,
+            stratified_sampling=self.stratified_point_sampling_evaluation,
+        )
+
+        max_y, min_y = self._training_raysampler.max_y, self._training_raysampler.min_y
+        max_x, min_x = self._training_raysampler.max_x, self._training_raysampler.min_x
+        self.pixel_height: float = (max_y - min_y) / (self.image_height - 1)
+        self.pixel_width: float = (max_x - min_x) / (self.image_width - 1)
+
+    def _get_min_max_depth_bounds(self, cameras: CamerasBase) -> Tuple[float, float]:
+        raise NotImplementedError()
+
+    def forward(
+        self,
+        cameras: CamerasBase,
+        evaluation_mode: EvaluationMode,
+        mask: Optional[torch.Tensor] = None,
+    ) -> ImplicitronRayBundle:
+        """
+
+        Args:
+            cameras: A batch of `batch_size` cameras from which the rays are emitted.
+            evaluation_mode: one of `EvaluationMode.TRAINING` or
+                `EvaluationMode.EVALUATION` which determines the sampling mode
+                that is used.
+            mask: Active for the `RenderSamplingMode.MASK_SAMPLE` sampling mode.
+                Defines a non-negative mask of shape
+                `(batch_size, image_height, image_width)` where each per-pixel
+                value is proportional to the probability of sampling the
+                corresponding pixel's ray.
+
+        Returns:
+            ray_bundle: A `ImplicitronRayBundle` object containing the parametrizations of the
+                sampled rendering rays.
+        """
+        sample_mask = None
+        if (
+            self._sampling_mode[evaluation_mode] == RenderSamplingMode.MASK_SAMPLE
+            and mask is not None
+        ):
+            sample_mask = torch.nn.functional.interpolate(
+                mask,
+                size=[self.image_height, self.image_width],
+                mode="nearest",
+            )[:, 0]
+
+        min_depth, max_depth = self._get_min_max_depth_bounds(cameras)
+
+        raysampler = {
+            EvaluationMode.TRAINING: self._training_raysampler,
+            EvaluationMode.EVALUATION: self._evaluation_raysampler,
+        }[evaluation_mode]
+
+        ray_bundle = raysampler(
+            cameras=cameras,
+            mask=sample_mask,
+            min_depth=min_depth,
+            max_depth=max_depth,
+        )
+        if self.cast_ray_bundle_as_cone and isinstance(
+            ray_bundle, HeterogeneousRayBundle
+        ):
+            # If this error rises it means that raysampler has among
+            # its arguments `n_ray_totals`. If it is the case
+            # then you should update the radii computation and lengths
+            # computation to handle padding and unpadding.
+            raise TypeError(
+                "Heterogeneous ray bundle is not supported for conical frustum computation yet"
+            )
+        elif self.cast_ray_bundle_as_cone:
+            pixel_hw: Tuple[float, float] = (self.pixel_height, self.pixel_width)
+            pixel_radii_2d = compute_radii(cameras, ray_bundle.xys[..., :2], pixel_hw)
+            return ImplicitronRayBundle(
+                directions=ray_bundle.directions,
+                origins=ray_bundle.origins,
+                lengths=None,
+                xys=ray_bundle.xys,
+                bins=ray_bundle.lengths,
+                pixel_radii_2d=pixel_radii_2d,
+            )
+
+        return ImplicitronRayBundle(
+            directions=ray_bundle.directions,
+            origins=ray_bundle.origins,
+            lengths=ray_bundle.lengths,
+            xys=ray_bundle.xys,
+            camera_counts=getattr(ray_bundle, "camera_counts", None),
+            camera_ids=getattr(ray_bundle, "camera_ids", None),
+        )
+
+
+@registry.register
+class AdaptiveRaySampler(AbstractMaskRaySampler):
+    """
+    Adaptively samples points on each ray between near and far planes whose
+    depths are determined based on the distance from the camera center
+    to a predefined scene center.
+
+    More specifically,
+    `min_depth = max(
+        (self.scene_center-camera_center).norm() - self.scene_extent, eps
+    )` and
+    `max_depth = (self.scene_center-camera_center).norm() + self.scene_extent`.
+
+    This sampling is ideal for object-centric scenes whose contents are
+    centered around a known `self.scene_center` and fit into a bounding sphere
+    with a radius of `self.scene_extent`.
+
+    Args:
+        scene_center: The xyz coordinates of the center of the scene used
+            along with `scene_extent` to compute the min and max depth planes
+            for sampling ray-points.
+        scene_extent: The radius of the scene bounding box centered at `scene_center`.
+    """
+
+    scene_extent: float = 8.0
+    scene_center: Tuple[float, float, float] = (0.0, 0.0, 0.0)
+
+    def __post_init__(self):
+        super().__post_init__()
+        if self.scene_extent <= 0.0:
+            raise ValueError("Adaptive raysampler requires self.scene_extent > 0.")
+        self._scene_center = torch.FloatTensor(self.scene_center)
+
+    def _get_min_max_depth_bounds(self, cameras: CamerasBase) -> Tuple[float, float]:
+        """
+        Returns the adaptively calculated near/far planes.
+        """
+        min_depth, max_depth = camera_utils.get_min_max_depth_bounds(
+            cameras, self._scene_center, self.scene_extent
+        )
+        return float(min_depth[0]), float(max_depth[0])
+
+
+@registry.register
+class NearFarRaySampler(AbstractMaskRaySampler):
+    """
+    Samples a fixed number of points between fixed near and far z-planes.
+    Specifically, samples points along each ray with approximately uniform spacing
+    of z-coordinates between the minimum depth `self.min_depth` and the maximum depth
+    `self.max_depth`. This sampling is useful for rendering scenes where the camera is
+    in a constant distance from the focal point of the scene.
+
+    Args:
+        min_depth: The minimum depth of a ray-point.
+        max_depth: The maximum depth of a ray-point.
+    """
+
+    min_depth: float = 0.1
+    max_depth: float = 8.0
+
+    def _get_min_max_depth_bounds(self, cameras: CamerasBase) -> Tuple[float, float]:
+        """
+        Returns the stored near/far planes.
+        """
+        return self.min_depth, self.max_depth
+
+
+def compute_radii(
+    cameras: CamerasBase,
+    xy_grid: torch.Tensor,
+    pixel_hw_ndc: Tuple[float, float],
+) -> torch.Tensor:
+    """
+    Compute radii of conical frustums in world coordinates.
+
+    Args:
+        cameras: cameras object representing a batch of cameras.
+        xy_grid: torch.tensor grid of image xy coords.
+        pixel_hw_ndc: pixel height and width in NDC
+
+    Returns:
+        radii: A tensor of shape `(..., 1)` radii of a cone.
+    """
+    batch_size = xy_grid.shape[0]
+    spatial_size = xy_grid.shape[1:-1]
+    n_rays_per_image = spatial_size.numel()
+
+    xy = xy_grid.view(batch_size, n_rays_per_image, 2)
+
+    # [batch_size, 3 * n_rays_per_image, 2]
+    xy = torch.cat(
+        [
+            xy,
+            # Will allow to find the norm on the x axis
+            xy + torch.tensor([pixel_hw_ndc[1], 0], device=xy.device),
+            # Will allow to find the norm on the y axis
+            xy + torch.tensor([0, pixel_hw_ndc[0]], device=xy.device),
+        ],
+        dim=1,
+    )
+    # [batch_size, 3 * n_rays_per_image, 3]
+    xyz = torch.cat(
+        (
+            xy,
+            xy.new_ones(batch_size, 3 * n_rays_per_image, 1),
+        ),
+        dim=-1,
+    )
+
+    # unproject the points
+    unprojected_xyz = cameras.unproject_points(xyz, from_ndc=True)
+
+    plane_world, plane_world_dx, plane_world_dy = torch.split(
+        unprojected_xyz, n_rays_per_image, dim=1
+    )
+
+    # Distance from each unit-norm direction vector to its neighbors.
+    dx_norm = torch.linalg.norm(plane_world_dx - plane_world, dim=-1, keepdims=True)
+    dy_norm = torch.linalg.norm(plane_world_dy - plane_world, dim=-1, keepdims=True)
+    # Cut the distance in half to obtain the base radius: (dx_norm + dy_norm) * 0.5
+    # Scale it by 2/12**0.5 to match the variance of the pixel’s footprint
+    radii = (dx_norm + dy_norm) / 12**0.5
+
+    return radii.view(batch_size, *spatial_size, 1)
diff --git a/pytorch3d/pytorch3d/implicitron/models/renderer/ray_tracing.py b/pytorch3d/pytorch3d/implicitron/models/renderer/ray_tracing.py
new file mode 100644
index 0000000000000000000000000000000000000000..5c0dd0a40cd4987e19ddaaa196fd56c23ba35800
--- /dev/null
+++ b/pytorch3d/pytorch3d/implicitron/models/renderer/ray_tracing.py
@@ -0,0 +1,590 @@
+# @lint-ignore-every LICENSELINT
+# Adapted from https://github.com/lioryariv/idr
+# Copyright (c) 2020 Lior Yariv
+
+from typing import Any, Callable, Tuple
+
+import torch
+import torch.nn as nn
+from pytorch3d.implicitron.tools.config import Configurable
+
+
+class RayTracing(Configurable, nn.Module):
+    """
+    Finds the intersection points of rays with the implicit surface defined
+    by a signed distance function (SDF). The algorithm follows the pipeline:
+    1. Initialise start and end points on rays by the intersections with
+        the circumscribing sphere.
+    2. Run sphere tracing from both ends.
+    3. Divide the untraced segments of non-convergent rays into uniform
+        intervals and find the one with the sign transition.
+    4. Run the secant method to estimate the point of the sign transition.
+
+    Args:
+        object_bounding_sphere: The radius of the initial sphere circumscribing
+            the object.
+        sdf_threshold: Absolute SDF value small enough for the sphere tracer
+            to consider it a surface.
+        line_search_step: Length of the backward correction on sphere tracing
+            iterations.
+        line_step_iters: Number of backward correction iterations.
+        sphere_tracing_iters: Maximum number of sphere tracing iterations
+            (the actual number of iterations may be smaller if all ray
+            intersections are found).
+        n_steps: Number of intervals sampled for unconvergent rays.
+        n_secant_steps: Number of iterations in the secant algorithm.
+    """
+
+    object_bounding_sphere: float = 1.0
+    sdf_threshold: float = 5.0e-5
+    line_search_step: float = 0.5
+    line_step_iters: int = 1
+    sphere_tracing_iters: int = 10
+    n_steps: int = 100
+    n_secant_steps: int = 8
+
+    def forward(
+        self,
+        sdf: Callable[[torch.Tensor], torch.Tensor],
+        cam_loc: torch.Tensor,
+        object_mask: torch.BoolTensor,
+        ray_directions: torch.Tensor,
+    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+        """
+        Args:
+            sdf: A callable that takes a (N, 3) tensor of points and returns
+                a tensor of (N,) SDF values.
+            cam_loc: A tensor of (B, N, 3) ray origins.
+            object_mask: A (N, 3) tensor of indicators whether a sampled pixel
+                corresponds to the rendered object or background.
+            ray_directions: A tensor of (B, N, 3) ray directions.
+
+        Returns:
+            curr_start_points: A tensor of (B*N, 3) found intersection points
+                with the implicit surface.
+            network_object_mask: A tensor of (B*N,) indicators denoting whether
+                intersections were found.
+            acc_start_dis: A tensor of (B*N,) distances from the ray origins
+                to intersrection points.
+        """
+        batch_size, num_pixels, _ = ray_directions.shape
+        device = cam_loc.device
+
+        sphere_intersections, mask_intersect = _get_sphere_intersection(
+            cam_loc, ray_directions, r=self.object_bounding_sphere
+        )
+
+        (
+            curr_start_points,
+            unfinished_mask_start,
+            acc_start_dis,
+            acc_end_dis,
+            min_dis,
+            max_dis,
+        ) = self.sphere_tracing(
+            batch_size,
+            num_pixels,
+            sdf,
+            cam_loc,
+            ray_directions,
+            mask_intersect,
+            sphere_intersections,
+        )
+
+        network_object_mask = acc_start_dis < acc_end_dis
+
+        # The non convergent rays should be handled by the sampler
+        sampler_mask = unfinished_mask_start
+        sampler_net_obj_mask = torch.zeros_like(
+            sampler_mask, dtype=torch.bool, device=device
+        )
+        if sampler_mask.sum() > 0:
+            sampler_min_max = torch.zeros((batch_size, num_pixels, 2), device=device)
+            sampler_min_max.reshape(-1, 2)[sampler_mask, 0] = acc_start_dis[
+                sampler_mask
+            ]
+            sampler_min_max.reshape(-1, 2)[sampler_mask, 1] = acc_end_dis[sampler_mask]
+
+            sampler_pts, sampler_net_obj_mask, sampler_dists = self.ray_sampler(
+                sdf, cam_loc, object_mask, ray_directions, sampler_min_max, sampler_mask
+            )
+
+            curr_start_points[sampler_mask] = sampler_pts[sampler_mask]
+            acc_start_dis[sampler_mask] = sampler_dists[sampler_mask]
+            network_object_mask[sampler_mask] = sampler_net_obj_mask[sampler_mask]
+
+        if not self.training:
+            return curr_start_points, network_object_mask, acc_start_dis
+
+        # in case we are training, we are updating curr_start_points and acc_start_dis for
+
+        ray_directions = ray_directions.reshape(-1, 3)
+        mask_intersect = mask_intersect.reshape(-1)
+        # pyre-fixme[9]: object_mask has type `BoolTensor`; used as `Tensor`.
+        object_mask = object_mask.reshape(-1)
+
+        in_mask = ~network_object_mask & object_mask & ~sampler_mask
+        out_mask = ~object_mask & ~sampler_mask
+
+        mask_left_out = (in_mask | out_mask) & ~mask_intersect
+        if (
+            mask_left_out.sum() > 0
+        ):  # project the origin to the not intersect points on the sphere
+            cam_left_out = cam_loc.reshape(-1, 3)[mask_left_out]
+            rays_left_out = ray_directions[mask_left_out]
+            acc_start_dis[mask_left_out] = -torch.bmm(
+                rays_left_out.view(-1, 1, 3), cam_left_out.view(-1, 3, 1)
+            ).squeeze()
+            curr_start_points[mask_left_out] = (
+                cam_left_out + acc_start_dis[mask_left_out].unsqueeze(1) * rays_left_out
+            )
+
+        mask = (in_mask | out_mask) & mask_intersect
+
+        if mask.sum() > 0:
+            min_dis[network_object_mask & out_mask] = acc_start_dis[
+                network_object_mask & out_mask
+            ]
+
+            min_mask_points, min_mask_dist = self.minimal_sdf_points(
+                sdf, cam_loc, ray_directions, mask, min_dis, max_dis
+            )
+
+            curr_start_points[mask] = min_mask_points
+            acc_start_dis[mask] = min_mask_dist
+
+        return curr_start_points, network_object_mask, acc_start_dis
+
+    def sphere_tracing(
+        self,
+        batch_size: int,
+        num_pixels: int,
+        sdf: Callable[[torch.Tensor], torch.Tensor],
+        cam_loc: torch.Tensor,
+        ray_directions: torch.Tensor,
+        mask_intersect: torch.Tensor,
+        sphere_intersections: torch.Tensor,
+    ) -> Tuple[Any, Any, Any, Any, Any, Any]:
+        """
+        Run sphere tracing algorithm for max iterations
+        from both sides of unit sphere intersection
+
+        Args:
+            batch_size:
+            num_pixels:
+            sdf:
+            cam_loc:
+            ray_directions:
+            mask_intersect:
+            sphere_intersections:
+
+        Returns:
+            curr_start_points:
+            unfinished_mask_start:
+            acc_start_dis:
+            acc_end_dis:
+            min_dis:
+            max_dis:
+        """
+
+        device = cam_loc.device
+        sphere_intersections_points = (
+            cam_loc[..., None, :]
+            + sphere_intersections[..., None] * ray_directions[..., None, :]
+        )
+        unfinished_mask_start = mask_intersect.reshape(-1).clone()
+        unfinished_mask_end = mask_intersect.reshape(-1).clone()
+
+        # Initialize start current points
+        curr_start_points = torch.zeros(batch_size * num_pixels, 3, device=device)
+        curr_start_points[unfinished_mask_start] = sphere_intersections_points[
+            :, :, 0, :
+        ].reshape(-1, 3)[unfinished_mask_start]
+        acc_start_dis = torch.zeros(batch_size * num_pixels, device=device)
+        acc_start_dis[unfinished_mask_start] = sphere_intersections.reshape(-1, 2)[
+            unfinished_mask_start, 0
+        ]
+
+        # Initialize end current points
+        curr_end_points = torch.zeros(batch_size * num_pixels, 3, device=device)
+        curr_end_points[unfinished_mask_end] = sphere_intersections_points[
+            :, :, 1, :
+        ].reshape(-1, 3)[unfinished_mask_end]
+        acc_end_dis = torch.zeros(batch_size * num_pixels, device=device)
+        acc_end_dis[unfinished_mask_end] = sphere_intersections.reshape(-1, 2)[
+            unfinished_mask_end, 1
+        ]
+
+        # Initialise min and max depth
+        min_dis = acc_start_dis.clone()
+        max_dis = acc_end_dis.clone()
+
+        # Iterate on the rays (from both sides) till finding a surface
+        iters = 0
+
+        # TODO: sdf should also pass info about batches
+
+        next_sdf_start = torch.zeros_like(acc_start_dis)
+        next_sdf_start[unfinished_mask_start] = sdf(
+            curr_start_points[unfinished_mask_start]
+        )
+
+        next_sdf_end = torch.zeros_like(acc_end_dis)
+        next_sdf_end[unfinished_mask_end] = sdf(curr_end_points[unfinished_mask_end])
+
+        while True:
+            # Update sdf
+            curr_sdf_start = torch.zeros_like(acc_start_dis)
+            curr_sdf_start[unfinished_mask_start] = next_sdf_start[
+                unfinished_mask_start
+            ]
+            curr_sdf_start[curr_sdf_start <= self.sdf_threshold] = 0
+
+            curr_sdf_end = torch.zeros_like(acc_end_dis)
+            curr_sdf_end[unfinished_mask_end] = next_sdf_end[unfinished_mask_end]
+            curr_sdf_end[curr_sdf_end <= self.sdf_threshold] = 0
+
+            # Update masks
+            unfinished_mask_start = unfinished_mask_start & (
+                curr_sdf_start > self.sdf_threshold
+            )
+            unfinished_mask_end = unfinished_mask_end & (
+                curr_sdf_end > self.sdf_threshold
+            )
+
+            if (
+                unfinished_mask_start.sum() == 0 and unfinished_mask_end.sum() == 0
+            ) or iters == self.sphere_tracing_iters:
+                break
+            iters += 1
+
+            # Make step
+            # Update distance
+            acc_start_dis = acc_start_dis + curr_sdf_start
+            acc_end_dis = acc_end_dis - curr_sdf_end
+
+            # Update points
+            curr_start_points = (
+                cam_loc
+                + acc_start_dis.reshape(batch_size, num_pixels, 1) * ray_directions
+            ).reshape(-1, 3)
+            curr_end_points = (
+                cam_loc
+                + acc_end_dis.reshape(batch_size, num_pixels, 1) * ray_directions
+            ).reshape(-1, 3)
+
+            # Fix points which wrongly crossed the surface
+            next_sdf_start = torch.zeros_like(acc_start_dis)
+            next_sdf_start[unfinished_mask_start] = sdf(
+                curr_start_points[unfinished_mask_start]
+            )
+
+            next_sdf_end = torch.zeros_like(acc_end_dis)
+            next_sdf_end[unfinished_mask_end] = sdf(
+                curr_end_points[unfinished_mask_end]
+            )
+
+            not_projected_start = next_sdf_start < 0
+            not_projected_end = next_sdf_end < 0
+            not_proj_iters = 0
+            while (
+                not_projected_start.sum() > 0 or not_projected_end.sum() > 0
+            ) and not_proj_iters < self.line_step_iters:
+                # Step backwards
+                acc_start_dis[not_projected_start] -= (
+                    (1 - self.line_search_step) / (2**not_proj_iters)
+                ) * curr_sdf_start[not_projected_start]
+                curr_start_points[not_projected_start] = (
+                    cam_loc
+                    + acc_start_dis.reshape(batch_size, num_pixels, 1) * ray_directions
+                ).reshape(-1, 3)[not_projected_start]
+
+                acc_end_dis[not_projected_end] += (
+                    (1 - self.line_search_step) / (2**not_proj_iters)
+                ) * curr_sdf_end[not_projected_end]
+                curr_end_points[not_projected_end] = (
+                    cam_loc
+                    + acc_end_dis.reshape(batch_size, num_pixels, 1) * ray_directions
+                ).reshape(-1, 3)[not_projected_end]
+
+                # Calc sdf
+                next_sdf_start[not_projected_start] = sdf(
+                    curr_start_points[not_projected_start]
+                )
+                next_sdf_end[not_projected_end] = sdf(
+                    curr_end_points[not_projected_end]
+                )
+
+                # Update mask
+                not_projected_start = next_sdf_start < 0
+                not_projected_end = next_sdf_end < 0
+                not_proj_iters += 1
+
+            unfinished_mask_start = unfinished_mask_start & (
+                acc_start_dis < acc_end_dis
+            )
+            unfinished_mask_end = unfinished_mask_end & (acc_start_dis < acc_end_dis)
+
+        return (
+            curr_start_points,
+            unfinished_mask_start,
+            acc_start_dis,
+            acc_end_dis,
+            min_dis,
+            max_dis,
+        )
+
+    def ray_sampler(
+        self,
+        sdf: Callable[[torch.Tensor], torch.Tensor],
+        cam_loc: torch.Tensor,
+        object_mask: torch.Tensor,
+        ray_directions: torch.Tensor,
+        sampler_min_max: torch.Tensor,
+        sampler_mask: torch.Tensor,
+    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+        """
+        Sample the ray in a given range and run secant on rays which have sign transition.
+
+        Args:
+            sdf:
+            cam_loc:
+            object_mask:
+            ray_directions:
+            sampler_min_max:
+            sampler_mask:
+
+        Returns:
+
+        """
+
+        batch_size, num_pixels, _ = ray_directions.shape
+        device = cam_loc.device
+        n_total_pxl = batch_size * num_pixels
+        sampler_pts = torch.zeros(n_total_pxl, 3, device=device)
+        sampler_dists = torch.zeros(n_total_pxl, device=device)
+
+        intervals_dist = torch.linspace(0, 1, steps=self.n_steps, device=device).view(
+            1, 1, -1
+        )
+
+        pts_intervals = sampler_min_max[:, :, 0].unsqueeze(-1) + intervals_dist * (
+            sampler_min_max[:, :, 1] - sampler_min_max[:, :, 0]
+        ).unsqueeze(-1)
+        points = (
+            cam_loc[..., None, :]
+            + pts_intervals[..., None] * ray_directions[..., None, :]
+        )
+
+        # Get the non convergent rays
+        mask_intersect_idx = torch.nonzero(sampler_mask).flatten()
+        points = points.reshape((-1, self.n_steps, 3))[sampler_mask, :, :]
+        pts_intervals = pts_intervals.reshape((-1, self.n_steps))[sampler_mask]
+
+        sdf_val_all = []
+        for pnts in torch.split(points.reshape(-1, 3), 100000, dim=0):
+            sdf_val_all.append(sdf(pnts))
+        sdf_val = torch.cat(sdf_val_all).reshape(-1, self.n_steps)
+
+        tmp = torch.sign(sdf_val) * torch.arange(
+            self.n_steps, 0, -1, device=device, dtype=torch.float32
+        ).reshape(1, self.n_steps)
+        # Force argmin to return the first min value
+        sampler_pts_ind = torch.argmin(tmp, -1)
+        sampler_pts[mask_intersect_idx] = points[
+            torch.arange(points.shape[0]), sampler_pts_ind, :
+        ]
+        sampler_dists[mask_intersect_idx] = pts_intervals[
+            torch.arange(pts_intervals.shape[0]), sampler_pts_ind
+        ]
+
+        true_surface_pts = object_mask.reshape(-1)[sampler_mask]
+        net_surface_pts = sdf_val[torch.arange(sdf_val.shape[0]), sampler_pts_ind] < 0
+
+        # take points with minimal SDF value for P_out pixels
+        p_out_mask = ~(true_surface_pts & net_surface_pts)
+        n_p_out = p_out_mask.sum()
+        if n_p_out > 0:
+            out_pts_idx = torch.argmin(sdf_val[p_out_mask, :], -1)
+            sampler_pts[mask_intersect_idx[p_out_mask]] = points[p_out_mask, :, :][
+                # pyre-fixme[6]: For 1st param expected `Union[bool, float, int]`
+                #  but got `Tensor`.
+                torch.arange(n_p_out),
+                out_pts_idx,
+                :,
+            ]
+            sampler_dists[mask_intersect_idx[p_out_mask]] = pts_intervals[
+                p_out_mask,
+                :
+                # pyre-fixme[6]: For 1st param expected `Union[bool, float, int]` but
+                #  got `Tensor`.
+            ][torch.arange(n_p_out), out_pts_idx]
+
+        # Get Network object mask
+        sampler_net_obj_mask = sampler_mask.clone()
+        sampler_net_obj_mask[mask_intersect_idx[~net_surface_pts]] = False
+
+        # Run Secant method
+        secant_pts = (
+            net_surface_pts & true_surface_pts if self.training else net_surface_pts
+        )
+        n_secant_pts = secant_pts.sum()
+        if n_secant_pts > 0:
+            # Get secant z predictions
+            z_high = pts_intervals[
+                torch.arange(pts_intervals.shape[0]), sampler_pts_ind
+            ][secant_pts]
+            sdf_high = sdf_val[torch.arange(sdf_val.shape[0]), sampler_pts_ind][
+                secant_pts
+            ]
+            z_low = pts_intervals[secant_pts][
+                # pyre-fixme[6]: For 1st param expected `Union[bool, float, int]`
+                #  but got `Tensor`.
+                torch.arange(n_secant_pts),
+                sampler_pts_ind[secant_pts] - 1,
+            ]
+            sdf_low = sdf_val[secant_pts][
+                # pyre-fixme[6]: For 1st param expected `Union[bool, float, int]`
+                #  but got `Tensor`.
+                torch.arange(n_secant_pts),
+                sampler_pts_ind[secant_pts] - 1,
+            ]
+            cam_loc_secant = cam_loc.reshape(-1, 3)[mask_intersect_idx[secant_pts]]
+            ray_directions_secant = ray_directions.reshape((-1, 3))[
+                mask_intersect_idx[secant_pts]
+            ]
+            z_pred_secant = self.secant(
+                sdf_low,
+                sdf_high,
+                z_low,
+                z_high,
+                cam_loc_secant,
+                ray_directions_secant,
+                # pyre-fixme[6]: For 7th param expected `Module` but got `(Tensor)
+                #  -> Tensor`.
+                sdf,
+            )
+
+            # Get points
+            sampler_pts[mask_intersect_idx[secant_pts]] = (
+                cam_loc_secant + z_pred_secant.unsqueeze(-1) * ray_directions_secant
+            )
+            sampler_dists[mask_intersect_idx[secant_pts]] = z_pred_secant
+
+        return sampler_pts, sampler_net_obj_mask, sampler_dists
+
+    def secant(
+        self,
+        sdf_low: torch.Tensor,
+        sdf_high: torch.Tensor,
+        z_low: torch.Tensor,
+        z_high: torch.Tensor,
+        cam_loc: torch.Tensor,
+        ray_directions: torch.Tensor,
+        sdf: nn.Module,
+    ) -> torch.Tensor:
+        """
+        Runs the secant method for interval [z_low, z_high] for n_secant_steps
+        """
+
+        z_pred = -sdf_low * (z_high - z_low) / (sdf_high - sdf_low) + z_low
+        for _ in range(self.n_secant_steps):
+            p_mid = cam_loc + z_pred.unsqueeze(-1) * ray_directions
+            sdf_mid = sdf(p_mid)
+            ind_low = sdf_mid > 0
+            if ind_low.sum() > 0:
+                z_low[ind_low] = z_pred[ind_low]
+                sdf_low[ind_low] = sdf_mid[ind_low]
+            ind_high = sdf_mid < 0
+            if ind_high.sum() > 0:
+                z_high[ind_high] = z_pred[ind_high]
+                sdf_high[ind_high] = sdf_mid[ind_high]
+
+            z_pred = -sdf_low * (z_high - z_low) / (sdf_high - sdf_low) + z_low
+
+        return z_pred
+
+    def minimal_sdf_points(
+        self,
+        sdf: Callable[[torch.Tensor], torch.Tensor],
+        cam_loc: torch.Tensor,
+        ray_directions: torch.Tensor,
+        mask: torch.Tensor,
+        min_dis: torch.Tensor,
+        max_dis: torch.Tensor,
+    ) -> Tuple[torch.Tensor, torch.Tensor]:
+        """
+        Find points with minimal SDF value on rays for P_out pixels
+        """
+
+        n_mask_points = mask.sum()
+
+        n = self.n_steps
+        steps = torch.empty(n, device=cam_loc.device).uniform_(0.0, 1.0)
+        mask_max_dis = max_dis[mask].unsqueeze(-1)
+        mask_min_dis = min_dis[mask].unsqueeze(-1)
+        steps = (
+            # pyre-fixme[6]: For 1st param expected `int` but got `Tensor`.
+            steps.unsqueeze(0).repeat(n_mask_points, 1) * (mask_max_dis - mask_min_dis)
+            + mask_min_dis
+        )
+
+        mask_points = cam_loc.reshape(-1, 3)[mask]
+        mask_rays = ray_directions[mask, :]
+
+        mask_points_all = mask_points.unsqueeze(1).repeat(1, n, 1) + steps.unsqueeze(
+            -1
+        ) * mask_rays.unsqueeze(1).repeat(1, n, 1)
+        points = mask_points_all.reshape(-1, 3)
+
+        mask_sdf_all = []
+        for pnts in torch.split(points, 100000, dim=0):
+            mask_sdf_all.append(sdf(pnts))
+
+        mask_sdf_all = torch.cat(mask_sdf_all).reshape(-1, n)
+        min_vals, min_idx = mask_sdf_all.min(-1)
+        min_mask_points = mask_points_all.reshape(-1, n, 3)[
+            # pyre-fixme[6]: For 2nd param expected `Union[bool, float, int]` but
+            #  got `Tensor`.
+            torch.arange(0, n_mask_points),
+            min_idx,
+        ]
+        # pyre-fixme[6]: For 2nd param expected `Union[bool, float, int]` but got
+        #  `Tensor`.
+        min_mask_dist = steps.reshape(-1, n)[torch.arange(0, n_mask_points), min_idx]
+
+        return min_mask_points, min_mask_dist
+
+
+# TODO: support variable origins
+def _get_sphere_intersection(
+    cam_loc: torch.Tensor, ray_directions: torch.Tensor, r: float = 1.0
+) -> Tuple[torch.Tensor, torch.Tensor]:
+    # Input: n_images x 3 ; n_images x n_rays x 3
+    # Output: n_images * n_rays x 2 (close and far) ; n_images * n_rays
+
+    n_imgs, n_pix, _ = ray_directions.shape
+    device = cam_loc.device
+
+    # cam_loc = cam_loc.unsqueeze(-1)
+    # ray_cam_dot = torch.bmm(ray_directions, cam_loc).squeeze()
+    ray_cam_dot = (ray_directions * cam_loc).sum(-1)  # n_images x n_rays
+    # pyre-fixme[58]: `**` is not supported for operand types `Tensor` and `int`.
+    under_sqrt = ray_cam_dot**2 - (cam_loc.norm(2, dim=-1) ** 2 - r**2)
+
+    under_sqrt = under_sqrt.reshape(-1)
+    mask_intersect = under_sqrt > 0
+
+    sphere_intersections = torch.zeros(n_imgs * n_pix, 2, device=device)
+    sphere_intersections[mask_intersect] = torch.sqrt(
+        under_sqrt[mask_intersect]
+    ).unsqueeze(-1) * torch.tensor([-1.0, 1.0], device=device)
+    sphere_intersections[mask_intersect] -= ray_cam_dot.reshape(-1)[
+        mask_intersect
+    ].unsqueeze(-1)
+
+    sphere_intersections = sphere_intersections.reshape(n_imgs, n_pix, 2)
+    sphere_intersections = sphere_intersections.clamp_min(0.0)
+    mask_intersect = mask_intersect.reshape(n_imgs, n_pix)
+
+    return sphere_intersections, mask_intersect
diff --git a/pytorch3d/pytorch3d/implicitron/models/renderer/raymarcher.py b/pytorch3d/pytorch3d/implicitron/models/renderer/raymarcher.py
new file mode 100644
index 0000000000000000000000000000000000000000..9c6addf1aa78ab6523333ba451f758c8c7fe5415
--- /dev/null
+++ b/pytorch3d/pytorch3d/implicitron/models/renderer/raymarcher.py
@@ -0,0 +1,238 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Any, Callable, Dict, Optional, Tuple
+
+import torch
+from pytorch3d.implicitron.models.renderer.base import RendererOutput
+from pytorch3d.implicitron.tools.config import registry, ReplaceableBase
+from pytorch3d.renderer.implicit.raymarching import _check_raymarcher_inputs
+
+
+_TTensor = torch.Tensor
+
+
+class RaymarcherBase(ReplaceableBase):
+    """
+    Defines a base class for raymarchers. Specifically, a raymarcher is responsible
+    for taking a set of features and density descriptors along rendering rays
+    and marching along them in order to generate a feature render.
+    """
+
+    def forward(
+        self,
+        rays_densities: torch.Tensor,
+        rays_features: torch.Tensor,
+        aux: Dict[str, Any],
+    ) -> RendererOutput:
+        """
+        Args:
+            rays_densities: Per-ray density values represented with a tensor
+                of shape `(..., n_points_per_ray, 1)`.
+            rays_features: Per-ray feature values represented with a tensor
+                of shape `(..., n_points_per_ray, feature_dim)`.
+            aux: a dictionary with extra information.
+        """
+        raise NotImplementedError()
+
+
+class AccumulativeRaymarcherBase(RaymarcherBase, torch.nn.Module):
+    """
+    This generalizes the `pytorch3d.renderer.EmissionAbsorptionRaymarcher`
+    and NeuralVolumes' cumsum ray marcher. It additionally returns
+    the rendering weights that can be used in the NVS pipeline to carry out
+    the importance ray-sampling in the refining pass.
+    Different from `pytorch3d.renderer.EmissionAbsorptionRaymarcher`, it takes raw
+    (non-exponentiated) densities.
+
+    Args:
+        surface_thickness: The thickness of the raymarched surface.
+        bg_color: The background color. A tuple of either 1 element or of D elements,
+            where D matches the feature dimensionality; it is broadcast when necessary.
+        replicate_last_interval: If True, the ray length assigned to the last interval
+            for the opacity delta calculation is copied from the penultimate interval.
+        background_opacity: The length over which the last raw opacity value
+            (i.e. before exponentiation) is considered to apply, for the delta
+            calculation. Ignored if replicate_last_interval=True.
+        density_relu: If `True`, passes the input density through ReLU before
+            raymarching.
+        blend_output: If `True`, alpha-blends the output renders with the
+            background color using the rendered opacity mask.
+
+        capping_function: The capping function of the raymarcher.
+            Options:
+                - "exponential" (`cap_fn(x) = 1 - exp(-x)`)
+                - "cap1" (`cap_fn(x) = min(x, 1)`)
+            Set to "exponential" for the standard Emission Absorption raymarching.
+        weight_function: The weighting function of the raymarcher.
+            Options:
+                - "product" (`weight_fn(w, x) = w * x`)
+                - "minimum" (`weight_fn(w, x) = min(w, x)`)
+            Set to "product" for the standard Emission Absorption raymarching.
+    """
+
+    surface_thickness: int = 1
+    bg_color: Tuple[float, ...] = (0.0,)
+    replicate_last_interval: bool = False
+    background_opacity: float = 0.0
+    density_relu: bool = True
+    blend_output: bool = False
+
+    @property
+    def capping_function_type(self) -> str:
+        raise NotImplementedError()
+
+    @property
+    def weight_function_type(self) -> str:
+        raise NotImplementedError()
+
+    def __post_init__(self):
+        """
+        Args:
+            surface_thickness: Denotes the overlap between the absorption
+                function and the density function.
+        """
+        bg_color = torch.tensor(self.bg_color)
+        if bg_color.ndim != 1:
+            raise ValueError(f"bg_color (shape {bg_color.shape}) should be a 1D tensor")
+
+        self.register_buffer("_bg_color", bg_color, persistent=False)
+
+        self._capping_function: Callable[[_TTensor], _TTensor] = {
+            "exponential": lambda x: 1.0 - torch.exp(-x),
+            "cap1": lambda x: x.clamp(max=1.0),
+        }[self.capping_function_type]
+
+        self._weight_function: Callable[[_TTensor, _TTensor], _TTensor] = {
+            "product": lambda curr, acc: curr * acc,
+            "minimum": lambda curr, acc: torch.minimum(curr, acc),
+        }[self.weight_function_type]
+
+    # pyre-fixme[14]: `forward` overrides method defined in `RaymarcherBase`
+    #  inconsistently.
+    def forward(
+        self,
+        rays_densities: torch.Tensor,
+        rays_features: torch.Tensor,
+        aux: Dict[str, Any],
+        ray_lengths: torch.Tensor,
+        ray_deltas: Optional[torch.Tensor] = None,
+        density_noise_std: float = 0.0,
+        **kwargs,
+    ) -> RendererOutput:
+        """
+        Args:
+            rays_densities: Per-ray density values represented with a tensor
+                of shape `(..., n_points_per_ray, 1)`.
+            rays_features: Per-ray feature values represented with a tensor
+                of shape `(..., n_points_per_ray, feature_dim)`.
+            aux: a dictionary with extra information.
+            ray_lengths: Per-ray depth values represented with a tensor
+                of shape `(..., n_points_per_ray, feature_dim)`.
+            ray_deltas: Optional differences between consecutive elements along the ray bundle
+                represented with a tensor of shape `(..., n_points_per_ray)`. If None,
+                these differences are computed from ray_lengths.
+            density_noise_std: the magnitude of the noise added to densities.
+
+        Returns:
+            features: A tensor of shape `(..., feature_dim)` containing
+                the rendered features for each ray.
+            depth: A tensor of shape `(..., 1)` containing estimated depth.
+            opacities: A tensor of shape `(..., 1)` containing rendered opacities.
+            weights: A tensor of shape `(..., n_points_per_ray)` containing
+                the ray-specific non-negative opacity weights. In general, they
+                don't sum to 1 but do not overcome it, i.e.
+                `(weights.sum(dim=-1) <= 1.0).all()` holds.
+        """
+        _check_raymarcher_inputs(
+            rays_densities,
+            rays_features,
+            ray_lengths,
+            z_can_be_none=True,
+            features_can_be_none=False,
+            density_1d=True,
+        )
+
+        if ray_deltas is None:
+            ray_lengths_diffs = torch.diff(ray_lengths, dim=-1)
+            if self.replicate_last_interval:
+                last_interval = ray_lengths_diffs[..., -1:]
+            else:
+                last_interval = torch.full_like(
+                    ray_lengths[..., :1], self.background_opacity
+                )
+            deltas = torch.cat((ray_lengths_diffs, last_interval), dim=-1)
+        else:
+            deltas = ray_deltas
+
+        rays_densities = rays_densities[..., 0]
+
+        if density_noise_std > 0.0:
+            noise: _TTensor = torch.randn_like(rays_densities).mul(density_noise_std)
+            rays_densities = rays_densities + noise
+        if self.density_relu:
+            rays_densities = torch.relu(rays_densities)
+
+        weighted_densities = deltas * rays_densities
+        capped_densities = self._capping_function(weighted_densities)
+
+        rays_opacities = self._capping_function(
+            torch.cumsum(weighted_densities, dim=-1)
+        )
+        opacities = rays_opacities[..., -1:]
+        absorption_shifted = (-rays_opacities + 1.0).roll(
+            self.surface_thickness, dims=-1
+        )
+        absorption_shifted[..., : self.surface_thickness] = 1.0
+
+        weights = self._weight_function(capped_densities, absorption_shifted)
+        features = (weights[..., None] * rays_features).sum(dim=-2)
+        depth = (weights * ray_lengths)[..., None].sum(dim=-2)
+
+        alpha = opacities if self.blend_output else 1
+        if self._bg_color.shape[-1] not in [1, features.shape[-1]]:
+            raise ValueError("Wrong number of background color channels.")
+        features = alpha * features + (1 - opacities) * self._bg_color
+
+        return RendererOutput(
+            features=features,
+            depths=depth,
+            masks=opacities,
+            weights=weights,
+            aux=aux,
+        )
+
+
+@registry.register
+class EmissionAbsorptionRaymarcher(AccumulativeRaymarcherBase):
+    """
+    Implements the EmissionAbsorption raymarcher.
+    """
+
+    background_opacity: float = 1e10
+
+    @property
+    def capping_function_type(self) -> str:
+        return "exponential"
+
+    @property
+    def weight_function_type(self) -> str:
+        return "product"
+
+
+@registry.register
+class CumsumRaymarcher(AccumulativeRaymarcherBase):
+    """
+    Implements the NeuralVolumes' cumulative-sum raymarcher.
+    """
+
+    @property
+    def capping_function_type(self) -> str:
+        return "cap1"
+
+    @property
+    def weight_function_type(self) -> str:
+        return "minimum"
diff --git a/pytorch3d/pytorch3d/implicitron/models/renderer/rgb_net.py b/pytorch3d/pytorch3d/implicitron/models/renderer/rgb_net.py
new file mode 100644
index 0000000000000000000000000000000000000000..6d41d2165b2c7769509925708b387c5db17137d2
--- /dev/null
+++ b/pytorch3d/pytorch3d/implicitron/models/renderer/rgb_net.py
@@ -0,0 +1,138 @@
+# @lint-ignore-every LICENSELINT
+# Adapted from RenderingNetwork from IDR
+# https://github.com/lioryariv/idr/
+# Copyright (c) 2020 Lior Yariv
+
+import logging
+from typing import List, Tuple
+
+import torch
+from pytorch3d.implicitron.models.renderer.base import ImplicitronRayBundle
+from pytorch3d.implicitron.tools.config import enable_get_default_args
+from pytorch3d.renderer.implicit import HarmonicEmbedding
+
+from torch import nn
+
+
+logger = logging.getLogger(__name__)
+
+
+class RayNormalColoringNetwork(torch.nn.Module):
+    """
+    Members:
+        d_in and feature_vector_size: Sum of these is the input
+            dimension. These must add up to the sum of
+                - 3 [for the points]
+                - 3 unless mode=no_normal [for the normals]
+                - 3 unless mode=no_view_dir [for view directions]
+                - the feature size, [number of channels in feature_vectors]
+
+        d_out: dimension of output.
+        mode: One of "idr", "no_view_dir" or "no_normal" to allow omitting
+            part of the network input.
+        dims: list of hidden layer sizes.
+        weight_norm: whether to apply weight normalization to each layer.
+        n_harmonic_functions_dir:
+            If >0, use a harmonic embedding with this number of
+            harmonic functions for the view direction. Otherwise view directions
+            are fed without embedding, unless mode is `no_view_dir`.
+        pooled_feature_dim: If a pooling function is in use (provided as
+            pooling_fn to forward()) this must be its number of features.
+            Otherwise this must be set to 0. (If used from GenericModel,
+            this will be set automatically.)
+    """
+
+    def __init__(
+        self,
+        feature_vector_size: int = 3,
+        mode: str = "idr",
+        d_in: int = 9,
+        d_out: int = 3,
+        dims: Tuple[int, ...] = (512, 512, 512, 512),
+        weight_norm: bool = True,
+        n_harmonic_functions_dir: int = 0,
+        pooled_feature_dim: int = 0,
+    ) -> None:
+        super().__init__()
+
+        self.mode = mode
+        self.output_dimensions = d_out
+        dims_full: List[int] = [d_in + feature_vector_size] + list(dims) + [d_out]
+
+        self.embedview_fn = None
+        if n_harmonic_functions_dir > 0:
+            self.embedview_fn = HarmonicEmbedding(
+                n_harmonic_functions_dir, append_input=True
+            )
+            dims_full[0] += self.embedview_fn.get_output_dim() - 3
+
+        if pooled_feature_dim > 0:
+            logger.info("Pooled features in rendering network.")
+            dims_full[0] += pooled_feature_dim
+
+        self.num_layers = len(dims_full)
+
+        layers = []
+        for layer_idx in range(self.num_layers - 1):
+            out_dim = dims_full[layer_idx + 1]
+            lin = nn.Linear(dims_full[layer_idx], out_dim)
+
+            if weight_norm:
+                lin = nn.utils.weight_norm(lin)
+
+            layers.append(lin)
+        self.linear_layers = torch.nn.ModuleList(layers)
+
+        self.relu = nn.ReLU()
+        self.tanh = nn.Tanh()
+
+    def forward(
+        self,
+        feature_vectors: torch.Tensor,
+        points,
+        normals,
+        ray_bundle: ImplicitronRayBundle,
+        masks=None,
+        pooling_fn=None,
+    ):
+        if masks is not None and not masks.any():
+            return torch.zeros_like(normals)
+
+        view_dirs = ray_bundle.directions
+        if masks is not None:
+            # in case of IDR, other outputs are passed here after applying the mask
+            view_dirs = view_dirs.reshape(view_dirs.shape[0], -1, 3)[
+                :, masks.reshape(-1)
+            ]
+
+        if self.embedview_fn is not None:
+            view_dirs = self.embedview_fn(view_dirs)
+
+        if self.mode == "idr":
+            rendering_input = torch.cat(
+                [points, view_dirs, normals, feature_vectors], dim=-1
+            )
+        elif self.mode == "no_view_dir":
+            rendering_input = torch.cat([points, normals, feature_vectors], dim=-1)
+        elif self.mode == "no_normal":
+            rendering_input = torch.cat([points, view_dirs, feature_vectors], dim=-1)
+        else:
+            raise ValueError(f"Unsupported rendering mode: {self.mode}")
+
+        if pooling_fn is not None:
+            featspool = pooling_fn(points[None])[0]
+            rendering_input = torch.cat((rendering_input, featspool), dim=-1)
+
+        x = rendering_input
+
+        for layer_idx in range(self.num_layers - 1):
+            x = self.linear_layers[layer_idx](x)
+
+            if layer_idx < self.num_layers - 2:
+                x = self.relu(x)
+
+        x = self.tanh(x)
+        return x
+
+
+enable_get_default_args(RayNormalColoringNetwork)
diff --git a/pytorch3d/pytorch3d/implicitron/models/renderer/sdf_renderer.py b/pytorch3d/pytorch3d/implicitron/models/renderer/sdf_renderer.py
new file mode 100644
index 0000000000000000000000000000000000000000..12e54b9d38f34e6c870abba2c302ca45fba89907
--- /dev/null
+++ b/pytorch3d/pytorch3d/implicitron/models/renderer/sdf_renderer.py
@@ -0,0 +1,274 @@
+# @lint-ignore-every LICENSELINT
+# Adapted from https://github.com/lioryariv/idr/blob/main/code/model/
+#              implicit_differentiable_renderer.py
+# Copyright (c) 2020 Lior Yariv
+import functools
+from typing import List, Optional, Tuple
+
+import torch
+from omegaconf import DictConfig
+from pytorch3d.common.compat import prod
+from pytorch3d.implicitron.models.renderer.base import ImplicitronRayBundle
+from pytorch3d.implicitron.tools.config import (
+    get_default_args_field,
+    registry,
+    run_auto_creation,
+)
+from pytorch3d.implicitron.tools.utils import evaluating
+
+from .base import BaseRenderer, EvaluationMode, ImplicitFunctionWrapper, RendererOutput
+from .ray_tracing import RayTracing
+from .rgb_net import RayNormalColoringNetwork
+
+
+@registry.register
+class SignedDistanceFunctionRenderer(BaseRenderer, torch.nn.Module):  # pyre-ignore[13]
+    render_features_dimensions: int = 3
+    object_bounding_sphere: float = 1.0
+    ray_tracer: RayTracing
+    ray_normal_coloring_network_args: DictConfig = get_default_args_field(
+        RayNormalColoringNetwork
+    )
+    bg_color: Tuple[float, ...] = (0.0,)
+    soft_mask_alpha: float = 50.0
+
+    def __post_init__(
+        self,
+    ):
+        render_features_dimensions = self.render_features_dimensions
+        if len(self.bg_color) not in [1, render_features_dimensions]:
+            raise ValueError(
+                f"Background color should have {render_features_dimensions} entries."
+            )
+
+        run_auto_creation(self)
+
+        self.ray_normal_coloring_network_args[
+            "feature_vector_size"
+        ] = render_features_dimensions
+        self._rgb_network = RayNormalColoringNetwork(
+            **self.ray_normal_coloring_network_args
+        )
+
+        self.register_buffer("_bg_color", torch.tensor(self.bg_color), persistent=False)
+
+    @classmethod
+    def ray_tracer_tweak_args(cls, type, args: DictConfig) -> None:
+        del args["object_bounding_sphere"]
+
+    def create_ray_tracer(self) -> None:
+        self.ray_tracer = RayTracing(
+            **self.ray_tracer_args,
+            object_bounding_sphere=self.object_bounding_sphere,
+        )
+
+    def requires_object_mask(self) -> bool:
+        return True
+
+    def forward(
+        self,
+        ray_bundle: ImplicitronRayBundle,
+        implicit_functions: List[ImplicitFunctionWrapper],
+        evaluation_mode: EvaluationMode = EvaluationMode.EVALUATION,
+        object_mask: Optional[torch.Tensor] = None,
+        **kwargs,
+    ) -> RendererOutput:
+        """
+        Args:
+            ray_bundle: A `ImplicitronRayBundle` object containing the parametrizations of the
+                sampled rendering rays.
+            implicit_functions: single element list of ImplicitFunctionWrappers which
+                defines the implicit function to be used.
+            evaluation_mode: one of EvaluationMode.TRAINING or
+                EvaluationMode.EVALUATION which determines the settings used for
+                rendering.
+            kwargs:
+                object_mask: BoolTensor, denoting the silhouette of the object.
+                    This is a required keyword argument for SignedDistanceFunctionRenderer
+
+        Returns:
+            instance of RendererOutput
+        """
+        if len(implicit_functions) != 1:
+            raise ValueError(
+                "SignedDistanceFunctionRenderer supports only single pass."
+            )
+
+        if object_mask is None:
+            raise ValueError("Expected object_mask to be provided in the kwargs")
+        object_mask = object_mask.bool()
+
+        implicit_function = implicit_functions[0]
+        implicit_function_gradient = functools.partial(_gradient, implicit_function)
+
+        # object_mask: silhouette of the object
+        batch_size, *spatial_size, _ = ray_bundle.lengths.shape
+        num_pixels = prod(spatial_size)
+
+        cam_loc = ray_bundle.origins.reshape(batch_size, -1, 3)
+        ray_dirs = ray_bundle.directions.reshape(batch_size, -1, 3)
+        object_mask = object_mask.reshape(batch_size, -1)
+
+        with torch.no_grad(), evaluating(implicit_function):
+            points, network_object_mask, dists = self.ray_tracer(
+                sdf=lambda x: implicit_function(rays_points_world=x)[
+                    :, 0
+                ],  # TODO: get rid of this wrapper
+                cam_loc=cam_loc,
+                object_mask=object_mask,
+                ray_directions=ray_dirs,
+            )
+
+        # TODO: below, cam_loc might as well be different
+        depth = dists.reshape(batch_size, num_pixels, 1)
+        points = (cam_loc + depth * ray_dirs).reshape(-1, 3)
+
+        sdf_output = implicit_function(rays_points_world=points)[:, 0:1]
+        # NOTE most of the intermediate variables are flattened for
+        # no apparent reason (here and in the ray tracer)
+        ray_dirs = ray_dirs.reshape(-1, 3)
+        object_mask = object_mask.reshape(-1)
+
+        # TODO: move it to loss computation
+        if evaluation_mode == EvaluationMode.TRAINING:
+            surface_mask = network_object_mask & object_mask
+            surface_points = points[surface_mask]
+            surface_dists = dists[surface_mask].unsqueeze(-1)
+            surface_ray_dirs = ray_dirs[surface_mask]
+            surface_cam_loc = cam_loc.reshape(-1, 3)[surface_mask]
+            surface_output = sdf_output[surface_mask]
+            N = surface_points.shape[0]
+
+            # Sample points for the eikonal loss
+            eik_bounding_box: float = self.object_bounding_sphere
+            n_eik_points = batch_size * num_pixels // 2
+            eikonal_points = torch.empty(
+                n_eik_points,
+                3,
+                #  but got `Union[device, Tensor, Module]`.
+                device=self._bg_color.device,
+            ).uniform_(-eik_bounding_box, eik_bounding_box)
+            eikonal_pixel_points = points.clone()
+            eikonal_pixel_points = eikonal_pixel_points.detach()
+            eikonal_points = torch.cat([eikonal_points, eikonal_pixel_points], 0)
+
+            points_all = torch.cat([surface_points, eikonal_points], dim=0)
+
+            output = implicit_function(rays_points_world=surface_points)
+            surface_sdf_values = output[
+                :N, 0:1
+            ].detach()  # how is it different from sdf_output?
+
+            g = implicit_function_gradient(points_all)
+            surface_points_grad = g[:N, 0, :].clone().detach()
+            grad_theta = g[N:, 0, :]
+
+            differentiable_surface_points = _sample_network(
+                surface_output,
+                surface_sdf_values,
+                surface_points_grad,
+                surface_dists,
+                surface_cam_loc,
+                surface_ray_dirs,
+            )
+
+        else:
+            surface_mask = network_object_mask
+            differentiable_surface_points = points[surface_mask]
+            grad_theta = None
+
+        empty_render = differentiable_surface_points.shape[0] == 0
+        features = implicit_function(rays_points_world=differentiable_surface_points)[
+            None, :, 1:
+        ]
+        normals_full = features.new_zeros(
+            batch_size, *spatial_size, 3, requires_grad=empty_render
+        )
+        render_full = (
+            features.new_ones(
+                batch_size,
+                *spatial_size,
+                self.render_features_dimensions,
+                requires_grad=empty_render,
+            )
+            * self._bg_color
+        )
+        mask_full = features.new_ones(
+            batch_size, *spatial_size, 1, requires_grad=empty_render
+        )
+        if not empty_render:
+            normals = implicit_function_gradient(differentiable_surface_points)[
+                None, :, 0, :
+            ]
+            normals_full.view(-1, 3)[surface_mask] = normals
+            render_full.view(-1, self.render_features_dimensions)[
+                surface_mask
+            ] = self._rgb_network(
+                features,
+                differentiable_surface_points[None],
+                normals,
+                ray_bundle,
+                surface_mask[None, :, None],
+                pooling_fn=None,  # TODO
+            )
+            mask_full.view(-1, 1)[~surface_mask] = torch.sigmoid(
+                # pyre-fixme[6]: For 1st param expected `Tensor` but got `float`.
+                -self.soft_mask_alpha
+                * sdf_output[~surface_mask]
+            )
+
+        # scatter points with surface_mask
+        points_full = ray_bundle.origins.detach().clone()
+        points_full.view(-1, 3)[surface_mask] = differentiable_surface_points
+
+        # TODO: it is sparse here but otherwise dense
+        return RendererOutput(
+            features=render_full,
+            normals=normals_full,
+            depths=depth.reshape(batch_size, *spatial_size, 1),
+            masks=mask_full,  # this is a differentiable approximation, see (7) in the paper
+            points=points_full,
+            aux={"grad_theta": grad_theta},  # TODO: will be moved to eikonal loss
+            # TODO: do we need sdf_output, grad_theta? Only for loss probably
+        )
+
+
+def _sample_network(
+    surface_output,
+    surface_sdf_values,
+    surface_points_grad,
+    surface_dists,
+    surface_cam_loc,
+    surface_ray_dirs,
+    eps: float = 1e-4,
+):
+    # t -> t(theta)
+    surface_ray_dirs_0 = surface_ray_dirs.detach()
+    surface_points_dot = torch.bmm(
+        surface_points_grad.view(-1, 1, 3), surface_ray_dirs_0.view(-1, 3, 1)
+    ).squeeze(-1)
+    dot_sign = (surface_points_dot >= 0).to(surface_points_dot) * 2 - 1
+    surface_dists_theta = surface_dists - (surface_output - surface_sdf_values) / (
+        surface_points_dot.abs().clip(eps) * dot_sign
+    )
+
+    # t(theta) -> x(theta,c,v)
+    surface_points_theta_c_v = surface_cam_loc + surface_dists_theta * surface_ray_dirs
+
+    return surface_points_theta_c_v
+
+
+@torch.enable_grad()
+def _gradient(module, rays_points_world):
+    rays_points_world.requires_grad_(True)
+    y = module.forward(rays_points_world=rays_points_world)[:, :1]
+    d_output = torch.ones_like(y, requires_grad=False, device=y.device)
+    gradients = torch.autograd.grad(
+        outputs=y,
+        inputs=rays_points_world,
+        grad_outputs=d_output,
+        create_graph=True,
+        retain_graph=True,
+        only_inputs=True,
+    )[0]
+    return gradients.unsqueeze(1)
diff --git a/pytorch3d/pytorch3d/implicitron/models/utils.py b/pytorch3d/pytorch3d/implicitron/models/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..b2f7dc668c22d6bb37cb08ff023c4cb23418e283
--- /dev/null
+++ b/pytorch3d/pytorch3d/implicitron/models/utils.py
@@ -0,0 +1,211 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+# Note: The #noqa comments below are for unused imports of pluggable implementations
+# which are part of implicitron. They ensure that the registry is prepopulated.
+
+import warnings
+from logging import Logger
+from typing import Any, Dict, Optional, Tuple
+
+import torch
+import tqdm
+from pytorch3d.common.compat import prod
+
+from pytorch3d.implicitron.models.renderer.base import ImplicitronRayBundle
+
+from pytorch3d.implicitron.tools import image_utils
+
+from pytorch3d.implicitron.tools.utils import cat_dataclass
+
+
+def preprocess_input(
+    image_rgb: Optional[torch.Tensor],
+    fg_probability: Optional[torch.Tensor],
+    depth_map: Optional[torch.Tensor],
+    mask_images: bool,
+    mask_depths: bool,
+    mask_threshold: float,
+    bg_color: Tuple[float, float, float],
+) -> Tuple[Optional[torch.Tensor], Optional[torch.Tensor], Optional[torch.Tensor]]:
+    """
+    Helper function to preprocess the input images and optional depth maps
+    to apply masking if required.
+
+    Args:
+        image_rgb: A tensor of shape `(B, 3, H, W)` containing a batch of rgb images
+            corresponding to the source viewpoints from which features will be extracted
+        fg_probability: A tensor of shape `(B, 1, H, W)` containing a batch
+            of foreground masks with values in [0, 1].
+        depth_map: A tensor of shape `(B, 1, H, W)` containing a batch of depth maps.
+        mask_images: Whether or not to mask the RGB image background given the
+            foreground mask (the `fg_probability` argument of `GenericModel.forward`)
+        mask_depths: Whether or not to mask the depth image background given the
+            foreground mask (the `fg_probability` argument of `GenericModel.forward`)
+        mask_threshold: If greater than 0.0, the foreground mask is
+            thresholded by this value before being applied to the RGB/Depth images
+        bg_color: RGB values for setting the background color of input image
+            if mask_images=True. Defaults to (0.0, 0.0, 0.0). Each renderer has its own
+            way to determine the background color of its output, unrelated to this.
+
+    Returns:
+        Modified image_rgb, fg_mask, depth_map
+    """
+    if image_rgb is not None and image_rgb.ndim == 3:
+        # The FrameData object is used for both frames and batches of frames,
+        # and a user might get this error if those were confused.
+        # Perhaps a user has a FrameData `fd` representing a single frame and
+        # wrote something like `model(**fd)` instead of
+        # `model(**fd.collate([fd]))`.
+        raise ValueError(
+            "Model received unbatched inputs. "
+            + "Perhaps they came from a FrameData which had not been collated."
+        )
+
+    fg_mask = fg_probability
+    if fg_mask is not None and mask_threshold > 0.0:
+        # threshold masks
+        warnings.warn("Thresholding masks!")
+        fg_mask = (fg_mask >= mask_threshold).type_as(fg_mask)
+
+    if mask_images and fg_mask is not None and image_rgb is not None:
+        # mask the image
+        warnings.warn("Masking images!")
+        image_rgb = image_utils.mask_background(
+            image_rgb, fg_mask, dim_color=1, bg_color=torch.tensor(bg_color)
+        )
+
+    if mask_depths and fg_mask is not None and depth_map is not None:
+        # mask the depths
+        assert (
+            mask_threshold > 0.0
+        ), "Depths should be masked only with thresholded masks"
+        warnings.warn("Masking depths!")
+        depth_map = depth_map * fg_mask
+
+    return image_rgb, fg_mask, depth_map
+
+
+def log_loss_weights(loss_weights: Dict[str, float], logger: Logger) -> None:
+    """
+    Print a table of the loss weights.
+    """
+    loss_weights_message = (
+        "-------\nloss_weights:\n"
+        + "\n".join(f"{k:40s}: {w:1.2e}" for k, w in loss_weights.items())
+        + "-------"
+    )
+    logger.info(loss_weights_message)
+
+
+def weighted_sum_losses(
+    preds: Dict[str, torch.Tensor], loss_weights: Dict[str, float]
+) -> Optional[torch.Tensor]:
+    """
+    A helper function to compute the overall loss as the dot product
+    of individual loss functions with the corresponding weights.
+    """
+    losses_weighted = [
+        preds[k] * float(w)
+        for k, w in loss_weights.items()
+        if (k in preds and w != 0.0)
+    ]
+    if len(losses_weighted) == 0:
+        warnings.warn("No main objective found.")
+        return None
+    loss = sum(losses_weighted)
+    assert torch.is_tensor(loss)
+    # pyre-fixme[7]: Expected `Optional[Tensor]` but got `int`.
+    return loss
+
+
+def apply_chunked(func, chunk_generator, tensor_collator):
+    """
+    Helper function to apply a function on a sequence of
+    chunked inputs yielded by a generator and collate
+    the result.
+    """
+    processed_chunks = [
+        func(*chunk_args, **chunk_kwargs)
+        for chunk_args, chunk_kwargs in chunk_generator
+    ]
+
+    return cat_dataclass(processed_chunks, tensor_collator)
+
+
+def chunk_generator(
+    chunk_size: int,
+    ray_bundle: ImplicitronRayBundle,
+    chunked_inputs: Dict[str, torch.Tensor],
+    tqdm_trigger_threshold: int,
+    *args,
+    **kwargs,
+):
+    """
+    Helper function which yields chunks of rays from the
+    input ray_bundle, to be used when the number of rays is
+    large and will not fit in memory for rendering.
+    """
+    (
+        batch_size,
+        *spatial_dim,
+        n_pts_per_ray,
+    ) = ray_bundle.lengths.shape  # B x ... x n_pts_per_ray
+    if n_pts_per_ray > 0 and chunk_size % n_pts_per_ray != 0:
+        raise ValueError(
+            f"chunk_size_grid ({chunk_size}) should be divisible "
+            f"by n_pts_per_ray ({n_pts_per_ray})"
+        )
+
+    n_rays = prod(spatial_dim)
+    # special handling for raytracing-based methods
+    n_chunks = -(-n_rays * max(n_pts_per_ray, 1) // chunk_size)
+    chunk_size_in_rays = -(-n_rays // n_chunks)
+
+    iter = range(0, n_rays, chunk_size_in_rays)
+    if len(iter) >= tqdm_trigger_threshold:
+        iter = tqdm.tqdm(iter)
+
+    def _safe_slice(
+        tensor: Optional[torch.Tensor], start_idx: int, end_idx: int
+    ) -> Any:
+        return tensor[start_idx:end_idx] if tensor is not None else None
+
+    for start_idx in iter:
+        end_idx = min(start_idx + chunk_size_in_rays, n_rays)
+        bins = (
+            None
+            if ray_bundle.bins is None
+            else ray_bundle.bins.reshape(batch_size, n_rays, n_pts_per_ray + 1)[
+                :, start_idx:end_idx
+            ]
+        )
+        pixel_radii_2d = (
+            None
+            if ray_bundle.pixel_radii_2d is None
+            else ray_bundle.pixel_radii_2d.reshape(batch_size, -1, 1)[
+                :, start_idx:end_idx
+            ]
+        )
+        ray_bundle_chunk = ImplicitronRayBundle(
+            origins=ray_bundle.origins.reshape(batch_size, -1, 3)[:, start_idx:end_idx],
+            directions=ray_bundle.directions.reshape(batch_size, -1, 3)[
+                :, start_idx:end_idx
+            ],
+            lengths=ray_bundle.lengths.reshape(batch_size, n_rays, n_pts_per_ray)[
+                :, start_idx:end_idx
+            ],
+            xys=ray_bundle.xys.reshape(batch_size, -1, 2)[:, start_idx:end_idx],
+            bins=bins,
+            pixel_radii_2d=pixel_radii_2d,
+            camera_ids=_safe_slice(ray_bundle.camera_ids, start_idx, end_idx),
+            camera_counts=_safe_slice(ray_bundle.camera_counts, start_idx, end_idx),
+        )
+        extra_args = kwargs.copy()
+        for k, v in chunked_inputs.items():
+            extra_args[k] = v.flatten(2)[:, :, start_idx:end_idx]
+        yield [ray_bundle_chunk, *args], extra_args
diff --git a/pytorch3d/pytorch3d/implicitron/models/view_pooler/__init__.py b/pytorch3d/pytorch3d/implicitron/models/view_pooler/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..2e41cd717f6a439a9c08d76a9d0e4a54e190fc5a
--- /dev/null
+++ b/pytorch3d/pytorch3d/implicitron/models/view_pooler/__init__.py
@@ -0,0 +1,5 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
diff --git a/pytorch3d/pytorch3d/implicitron/models/view_pooler/feature_aggregator.py b/pytorch3d/pytorch3d/implicitron/models/view_pooler/feature_aggregator.py
new file mode 100644
index 0000000000000000000000000000000000000000..bd9817393f0509ecc560c46f694f8c37804c1d3f
--- /dev/null
+++ b/pytorch3d/pytorch3d/implicitron/models/view_pooler/feature_aggregator.py
@@ -0,0 +1,687 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from abc import ABC, abstractmethod
+from enum import Enum
+from typing import Dict, Optional, Sequence, Tuple, Union
+
+import torch
+import torch.nn.functional as F
+from pytorch3d.implicitron.models.view_pooler.view_sampler import (
+    cameras_points_cartesian_product,
+)
+from pytorch3d.implicitron.tools.config import registry, ReplaceableBase
+from pytorch3d.ops import wmean
+from pytorch3d.renderer.cameras import CamerasBase
+
+
+class ReductionFunction(Enum):
+    AVG = "avg"  # simple average
+    MAX = "max"  # maximum
+    STD = "std"  # standard deviation
+    STD_AVG = "std_avg"  # average of per-dimension standard deviations
+
+
+class FeatureAggregatorBase(ABC, ReplaceableBase):
+    """
+    Base class for aggregating features.
+
+    Typically, the aggregated features and their masks are output by `ViewSampler`
+    which samples feature tensors extracted from a set of source images.
+
+    Settings:
+        exclude_target_view: If `True`/`False`, enables/disables pooling
+            from target view to itself.
+        exclude_target_view_mask_features: If `True`,
+            mask the features from the target view before aggregation
+        concatenate_output: If `True`,
+            concatenate the aggregated features into a single tensor,
+            otherwise return a dictionary mapping feature names to tensors.
+    """
+
+    exclude_target_view: bool = True
+    exclude_target_view_mask_features: bool = True
+    concatenate_output: bool = True
+
+    @abstractmethod
+    def forward(
+        self,
+        feats_sampled: Dict[str, torch.Tensor],
+        masks_sampled: torch.Tensor,
+        camera: Optional[CamerasBase] = None,
+        pts: Optional[torch.Tensor] = None,
+        **kwargs,
+    ) -> Union[torch.Tensor, Dict[str, torch.Tensor]]:
+        """
+        Args:
+            feats_sampled: A `dict` of sampled feature tensors `{f_i: t_i}`,
+                where each `t_i` is a tensor of shape
+                `(minibatch, n_source_views, n_samples, dim_i)`.
+            masks_sampled: A binary mask represented as a tensor of shape
+                `(minibatch, n_source_views, n_samples, 1)` denoting valid
+                sampled features.
+            camera: A batch of `n_source_views` `CamerasBase` objects corresponding
+                to the source view cameras.
+            pts: A tensor of shape `(minibatch, n_samples, 3)` denoting the
+                3D points whose 2D projections to source views were sampled in
+                order to generate `feats_sampled` and `masks_sampled`.
+
+        Returns:
+            feats_aggregated: If `concatenate_output==True`, a tensor
+                of shape `(minibatch, reduce_dim, n_samples, sum(dim_1, ... dim_N))`
+                containing the concatenation of the aggregated features `feats_sampled`.
+                `reduce_dim` depends on the specific feature aggregator
+                implementation and typically equals 1 or `n_source_views`.
+                If `concatenate_output==False`, the aggregator does not concatenate
+                the aggregated features and returns a dictionary of per-feature
+                aggregations `{f_i: t_i_aggregated}` instead. Each `t_i_aggregated`
+                is of shape `(minibatch, reduce_dim, n_samples, aggr_dim_i)`.
+        """
+        raise NotImplementedError()
+
+    @abstractmethod
+    def get_aggregated_feature_dim(
+        self, feats_or_feats_dim: Union[Dict[str, torch.Tensor], int]
+    ):
+        """
+        Returns the final dimensionality of the output aggregated features.
+
+        Args:
+            feats_or_feats_dim: Either a `dict` of sampled features `{f_i: t_i}` corresponding
+                to the `feats_sampled` argument of `forward`,
+                or an `int` representing the sum of dimensionalities of each `t_i`.
+
+        Returns:
+            aggregated_feature_dim: The final dimensionality of the output
+                aggregated features.
+        """
+        raise NotImplementedError()
+
+    def has_aggregation(self) -> bool:
+        """
+        Specifies whether the aggregator reduces the output `reduce_dim` dimension to 1.
+
+        Returns:
+            has_aggregation: `True` if `reduce_dim==1`, else `False`.
+        """
+        return hasattr(self, "reduction_functions")
+
+
+@registry.register
+class IdentityFeatureAggregator(torch.nn.Module, FeatureAggregatorBase):
+    """
+    This aggregator does not perform any feature aggregation. Depending on the
+    settings the aggregator allows to mask target view features and concatenate
+    the outputs.
+    """
+
+    def get_aggregated_feature_dim(
+        self, feats_or_feats_dim: Union[Dict[str, torch.Tensor], int]
+    ):
+        return _get_reduction_aggregator_feature_dim(feats_or_feats_dim, [])
+
+    def forward(
+        self,
+        feats_sampled: Dict[str, torch.Tensor],
+        masks_sampled: torch.Tensor,
+        camera: Optional[CamerasBase] = None,
+        pts: Optional[torch.Tensor] = None,
+        **kwargs,
+    ) -> Union[torch.Tensor, Dict[str, torch.Tensor]]:
+        """
+        Args:
+            feats_sampled: A `dict` of sampled feature tensors `{f_i: t_i}`,
+                where each `t_i` is a tensor of shape
+                `(minibatch, n_source_views, n_samples, dim_i)`.
+            masks_sampled: A binary mask represented as a tensor of shape
+                `(minibatch, n_source_views, n_samples, 1)` denoting valid
+                sampled features.
+            camera: A batch of `n_source_views` `CamerasBase` objects
+                corresponding to the source view cameras.
+            pts: A tensor of shape `(minibatch, n_samples, 3)` denoting the
+                3D points whose 2D projections to source views were sampled in
+                order to generate `feats_sampled` and `masks_sampled`.
+
+        Returns:
+            feats_aggregated: If `concatenate_output==True`, a tensor
+                of shape `(minibatch, 1, n_samples, sum(dim_1, ... dim_N))`.
+                If `concatenate_output==False`, a dictionary `{f_i: t_i_aggregated}`
+                with each `t_i_aggregated` of shape
+                `(minibatch, n_source_views, n_samples, dim_i)`.
+        """
+        if self.exclude_target_view_mask_features:
+            feats_sampled = _mask_target_view_features(feats_sampled)
+        feats_aggregated = feats_sampled
+        if self.concatenate_output:
+            feats_aggregated = torch.cat(tuple(feats_aggregated.values()), dim=-1)
+        return feats_aggregated
+
+
+@registry.register
+class ReductionFeatureAggregator(torch.nn.Module, FeatureAggregatorBase):
+    """
+    Aggregates using a set of predefined `reduction_functions` and concatenates
+    the results of each aggregation function along the
+    channel dimension. The reduction functions singularize the second dimension
+    of the sampled features which stacks the source views.
+
+    Settings:
+        reduction_functions: A list of `ReductionFunction`s` that reduce the
+            the stack of source-view-specific features to a single feature.
+    """
+
+    reduction_functions: Tuple[ReductionFunction, ...] = (
+        ReductionFunction.AVG,
+        ReductionFunction.STD,
+    )
+
+    def get_aggregated_feature_dim(
+        self, feats_or_feats_dim: Union[Dict[str, torch.Tensor], int]
+    ):
+        return _get_reduction_aggregator_feature_dim(
+            feats_or_feats_dim, self.reduction_functions
+        )
+
+    def forward(
+        self,
+        feats_sampled: Dict[str, torch.Tensor],
+        masks_sampled: torch.Tensor,
+        camera: Optional[CamerasBase] = None,
+        pts: Optional[torch.Tensor] = None,
+        **kwargs,
+    ) -> Union[torch.Tensor, Dict[str, torch.Tensor]]:
+        """
+        Args:
+            feats_sampled: A `dict` of sampled feature tensors `{f_i: t_i}`,
+                where each `t_i` is a tensor of shape
+                `(minibatch, n_source_views, n_samples, dim_i)`.
+            masks_sampled: A binary mask represented as a tensor of shape
+                `(minibatch, n_source_views, n_samples, 1)` denoting valid
+                sampled features.
+            camera: A batch of `n_source_views` `CamerasBase` objects corresponding
+                to the source view cameras.
+            pts: A tensor of shape `(minibatch, n_samples, 3)` denoting the
+                3D points whose 2D projections to source views were sampled in
+                order to generate `feats_sampled` and `masks_sampled`.
+
+        Returns:
+            feats_aggregated: If `concatenate_output==True`, a tensor
+                of shape `(minibatch, 1, n_samples, sum(dim_1, ... dim_N))`.
+                If `concatenate_output==False`, a dictionary `{f_i: t_i_aggregated}`
+                with each `t_i_aggregated` of shape `(minibatch, 1, n_samples, aggr_dim_i)`.
+        """
+
+        pts_batch, n_cameras = masks_sampled.shape[:2]
+        if self.exclude_target_view_mask_features:
+            feats_sampled = _mask_target_view_features(feats_sampled)
+        sampling_mask = _get_view_sampling_mask(
+            n_cameras,
+            pts_batch,
+            masks_sampled.device,
+            self.exclude_target_view,
+        )
+        aggr_weigths = masks_sampled[..., 0] * sampling_mask[..., None]
+        feats_aggregated = {
+            k: _avgmaxstd_reduction_function(
+                f,
+                aggr_weigths,
+                dim=1,
+                reduction_functions=self.reduction_functions,
+            )
+            for k, f in feats_sampled.items()
+        }
+        if self.concatenate_output:
+            feats_aggregated = torch.cat(tuple(feats_aggregated.values()), dim=-1)
+        return feats_aggregated
+
+
+@registry.register
+class AngleWeightedReductionFeatureAggregator(torch.nn.Module, FeatureAggregatorBase):
+    """
+    Performs a weighted aggregation using a set of predefined `reduction_functions`
+    and concatenates the results of each aggregation function along the
+    channel dimension. The weights are proportional to the cosine of the
+    angle between the target ray and the source ray::
+
+        weight = (
+            dot(target_ray, source_ray) * 0.5 + 0.5 + self.min_ray_angle_weight
+        )**self.weight_by_ray_angle_gamma
+
+    The reduction functions singularize the second dimension
+    of the sampled features which stacks the source views.
+
+    Settings:
+        reduction_functions: A list of `ReductionFunction`s that reduce the
+            the stack of source-view-specific features to a single feature.
+        min_ray_angle_weight: The minimum possible aggregation weight
+            before rasising to the power of `self.weight_by_ray_angle_gamma`.
+        weight_by_ray_angle_gamma: The exponent of the cosine of the ray angles
+            used when calculating the angle-based aggregation weights.
+    """
+
+    reduction_functions: Tuple[ReductionFunction, ...] = (
+        ReductionFunction.AVG,
+        ReductionFunction.STD,
+    )
+    weight_by_ray_angle_gamma: float = 1.0
+    min_ray_angle_weight: float = 0.1
+
+    def get_aggregated_feature_dim(
+        self, feats_or_feats_dim: Union[Dict[str, torch.Tensor], int]
+    ):
+        return _get_reduction_aggregator_feature_dim(
+            feats_or_feats_dim, self.reduction_functions
+        )
+
+    def forward(
+        self,
+        feats_sampled: Dict[str, torch.Tensor],
+        masks_sampled: torch.Tensor,
+        camera: Optional[CamerasBase] = None,
+        pts: Optional[torch.Tensor] = None,
+        **kwargs,
+    ) -> Union[torch.Tensor, Dict[str, torch.Tensor]]:
+        """
+        Args:
+            feats_sampled: A `dict` of sampled feature tensors `{f_i: t_i}`,
+                where each `t_i` is a tensor of shape
+                `(minibatch, n_source_views, n_samples, dim_i)`.
+            masks_sampled: A binary mask represented as a tensor of shape
+                `(minibatch, n_source_views, n_samples, 1)` denoting valid
+                sampled features.
+            camera: A batch of `n_source_views` `CamerasBase` objects
+                corresponding to the source view cameras.
+            pts: A tensor of shape `(minibatch, n_samples, 3)` denoting the
+                3D points whose 2D projections to source views were sampled in
+                order to generate `feats_sampled` and `masks_sampled`.
+
+        Returns:
+            feats_aggregated: If `concatenate_output==True`, a tensor
+                of shape `(minibatch, 1, n_samples, sum(dim_1, ... dim_N))`.
+                If `concatenate_output==False`, a dictionary `{f_i: t_i_aggregated}`
+                with each `t_i_aggregated` of shape
+                `(minibatch, n_source_views, n_samples, dim_i)`.
+        """
+
+        if camera is None:
+            raise ValueError("camera cannot be None for angle weighted aggregation")
+
+        if pts is None:
+            raise ValueError("Points cannot be None for angle weighted aggregation")
+
+        pts_batch, n_cameras = masks_sampled.shape[:2]
+        if self.exclude_target_view_mask_features:
+            feats_sampled = _mask_target_view_features(feats_sampled)
+        view_sampling_mask = _get_view_sampling_mask(
+            n_cameras,
+            pts_batch,
+            masks_sampled.device,
+            self.exclude_target_view,
+        )
+        aggr_weights = _get_angular_reduction_weights(
+            view_sampling_mask,
+            masks_sampled,
+            camera,
+            pts,
+            self.min_ray_angle_weight,
+            self.weight_by_ray_angle_gamma,
+        )
+        assert torch.isfinite(aggr_weights).all()
+        feats_aggregated = {
+            k: _avgmaxstd_reduction_function(
+                f,
+                aggr_weights,
+                dim=1,
+                reduction_functions=self.reduction_functions,
+            )
+            for k, f in feats_sampled.items()
+        }
+        if self.concatenate_output:
+            feats_aggregated = torch.cat(tuple(feats_aggregated.values()), dim=-1)
+        return feats_aggregated
+
+
+@registry.register
+class AngleWeightedIdentityFeatureAggregator(torch.nn.Module, FeatureAggregatorBase):
+    """
+    This aggregator does not perform any feature aggregation. It only weights
+    the features by the weights proportional to the cosine of the
+    angle between the target ray and the source ray::
+
+        weight = (
+            dot(target_ray, source_ray) * 0.5 + 0.5 + self.min_ray_angle_weight
+        )**self.weight_by_ray_angle_gamma
+
+    Settings:
+        min_ray_angle_weight: The minimum possible aggregation weight
+            before rasising to the power of `self.weight_by_ray_angle_gamma`.
+        weight_by_ray_angle_gamma: The exponent of the cosine of the ray angles
+            used when calculating the angle-based aggregation weights.
+
+    Additionally the aggregator allows to mask target view features and to concatenate
+    the outputs.
+    """
+
+    weight_by_ray_angle_gamma: float = 1.0
+    min_ray_angle_weight: float = 0.1
+
+    def get_aggregated_feature_dim(
+        self, feats_or_feats_dim: Union[Dict[str, torch.Tensor], int]
+    ):
+        return _get_reduction_aggregator_feature_dim(feats_or_feats_dim, [])
+
+    def forward(
+        self,
+        feats_sampled: Dict[str, torch.Tensor],
+        masks_sampled: torch.Tensor,
+        camera: Optional[CamerasBase] = None,
+        pts: Optional[torch.Tensor] = None,
+        **kwargs,
+    ) -> Union[torch.Tensor, Dict[str, torch.Tensor]]:
+        """
+        Args:
+            feats_sampled: A `dict` of sampled feature tensors `{f_i: t_i}`,
+                where each `t_i` is a tensor of shape
+                `(minibatch, n_source_views, n_samples, dim_i)`.
+            masks_sampled: A binary mask represented as a tensor of shape
+                `(minibatch, n_source_views, n_samples, 1)` denoting valid
+                sampled features.
+            camera: A batch of `n_source_views` `CamerasBase` objects corresponding
+                to the source view cameras.
+            pts: A tensor of shape `(minibatch, n_samples, 3)` denoting the
+                3D points whose 2D projections to source views were sampled in
+                order to generate `feats_sampled` and `masks_sampled`.
+
+        Returns:
+            feats_aggregated: If `concatenate_output==True`, a tensor
+                of shape `(minibatch, n_source_views, n_samples, sum(dim_1, ... dim_N))`.
+                If `concatenate_output==False`, a dictionary `{f_i: t_i_aggregated}`
+                with each `t_i_aggregated` of shape
+                `(minibatch, n_source_views, n_samples, dim_i)`.
+        """
+
+        if camera is None:
+            raise ValueError("camera cannot be None for angle weighted aggregation")
+
+        if pts is None:
+            raise ValueError("Points cannot be None for angle weighted aggregation")
+
+        pts_batch, n_cameras = masks_sampled.shape[:2]
+        if self.exclude_target_view_mask_features:
+            feats_sampled = _mask_target_view_features(feats_sampled)
+        view_sampling_mask = _get_view_sampling_mask(
+            n_cameras,
+            pts_batch,
+            masks_sampled.device,
+            self.exclude_target_view,
+        )
+        aggr_weights = _get_angular_reduction_weights(
+            view_sampling_mask,
+            masks_sampled,
+            camera,
+            pts,
+            self.min_ray_angle_weight,
+            self.weight_by_ray_angle_gamma,
+        )
+        feats_aggregated = {
+            k: f * aggr_weights[..., None] for k, f in feats_sampled.items()
+        }
+        if self.concatenate_output:
+            feats_aggregated = torch.cat(tuple(feats_aggregated.values()), dim=-1)
+        return feats_aggregated
+
+
+def _get_reduction_aggregator_feature_dim(
+    feats_or_feats_dim: Union[Dict[str, torch.Tensor], int],
+    reduction_functions: Sequence[ReductionFunction],
+) -> int:
+    if isinstance(feats_or_feats_dim, int):
+        feat_dim = feats_or_feats_dim
+    else:
+        feat_dim = int(sum(f.shape[1] for f in feats_or_feats_dim.values()))
+    if len(reduction_functions) == 0:
+        return feat_dim
+    return sum(
+        _get_reduction_function_output_dim(
+            reduction_function,
+            feat_dim,
+        )
+        for reduction_function in reduction_functions
+    )
+
+
+def _get_reduction_function_output_dim(
+    reduction_function: ReductionFunction,
+    feat_dim: int,
+) -> int:
+    if reduction_function == ReductionFunction.STD_AVG:
+        return 1
+    else:
+        return feat_dim
+
+
+def _get_view_sampling_mask(
+    n_cameras: int,
+    pts_batch: int,
+    device: Union[str, torch.device],
+    exclude_target_view: bool,
+):
+    return (
+        -torch.eye(n_cameras, device=device, dtype=torch.float32)
+        * float(exclude_target_view)
+        + 1.0
+    )[:pts_batch]
+
+
+def _mask_target_view_features(
+    feats_sampled: Dict[str, torch.Tensor],
+):
+    # mask out the sampled features to be sure we dont use them
+    # anywhere later
+    one_feature_sampled = next(iter(feats_sampled.values()))
+    pts_batch, n_cameras = one_feature_sampled.shape[:2]
+    view_sampling_mask = _get_view_sampling_mask(
+        n_cameras,
+        pts_batch,
+        one_feature_sampled.device,
+        True,
+    )
+    view_sampling_mask = view_sampling_mask.view(
+        pts_batch, n_cameras, *([1] * (one_feature_sampled.ndim - 2))
+    )
+    return {k: f * view_sampling_mask for k, f in feats_sampled.items()}
+
+
+def _get_angular_reduction_weights(
+    view_sampling_mask: torch.Tensor,
+    masks_sampled: torch.Tensor,
+    camera: CamerasBase,
+    pts: torch.Tensor,
+    min_ray_angle_weight: float,
+    weight_by_ray_angle_gamma: float,
+):
+    aggr_weights = masks_sampled.clone()[..., 0]
+    assert not any(v is None for v in [camera, pts])
+    angle_weight = _get_ray_angle_weights(
+        camera,
+        pts,
+        min_ray_angle_weight,
+        weight_by_ray_angle_gamma,
+    )
+    assert torch.isfinite(angle_weight).all()
+    # multiply the final aggr weights with ray angles
+    view_sampling_mask = view_sampling_mask.view(
+        *view_sampling_mask.shape[:2], *([1] * (aggr_weights.ndim - 2))
+    )
+    aggr_weights = (
+        aggr_weights * angle_weight.reshape_as(aggr_weights) * view_sampling_mask
+    )
+    return aggr_weights
+
+
+def _get_ray_dir_dot_prods(camera: CamerasBase, pts: torch.Tensor):
+    n_cameras = camera.R.shape[0]
+    pts_batch = pts.shape[0]
+
+    camera_rep, pts_rep = cameras_points_cartesian_product(camera, pts)
+
+    # does not produce nans randomly unlike get_camera_center() below
+    cam_centers_rep = -torch.bmm(
+        camera_rep.T[:, None],
+        camera_rep.R.permute(0, 2, 1),
+    ).reshape(-1, *([1] * (pts.ndim - 2)), 3)
+    # cam_centers_rep = camera_rep.get_camera_center().reshape(
+    #     -1, *([1]*(pts.ndim - 2)), 3
+    # )
+
+    ray_dirs = F.normalize(pts_rep - cam_centers_rep, dim=-1)
+    # camera_rep = [                 pts_rep = [
+    #     camera[0]                      pts[0],
+    #     camera[0]                      pts[1],
+    #     camera[0]                      ...,
+    #     ...                            pts[batch_pts-1],
+    #     camera[1]                      pts[0],
+    #     camera[1]                      pts[1],
+    #     camera[1]                      ...,
+    #     ...                            pts[batch_pts-1],
+    #     ...                            ...,
+    #     camera[n_cameras-1]            pts[0],
+    #     camera[n_cameras-1]            pts[1],
+    #     camera[n_cameras-1]            ...,
+    #     ...                            pts[batch_pts-1],
+    # ]                              ]
+
+    ray_dirs_reshape = ray_dirs.view(n_cameras, pts_batch, -1, 3)
+    # [
+    #   [pts_0 in cam_0, pts_1 in cam_0, ..., pts_m in cam_0],
+    #   [pts_0 in cam_1, pts_1 in cam_1, ..., pts_m in cam_1],
+    #   ...
+    #   [pts_0 in cam_n, pts_1 in cam_n, ..., pts_m in cam_n],
+    # ]
+
+    ray_dirs_pts = torch.stack([ray_dirs_reshape[i, i] for i in range(pts_batch)])
+    ray_dir_dot_prods = (ray_dirs_pts[None] * ray_dirs_reshape).sum(
+        dim=-1
+    )  # pts_batch x n_cameras x n_pts
+
+    return ray_dir_dot_prods.transpose(0, 1)
+
+
+def _get_ray_angle_weights(
+    camera: CamerasBase,
+    pts: torch.Tensor,
+    min_ray_angle_weight: float,
+    weight_by_ray_angle_gamma: float,
+):
+    ray_dir_dot_prods = _get_ray_dir_dot_prods(
+        camera, pts
+    )  # pts_batch x n_cameras x ... x 3
+    angle_weight_01 = ray_dir_dot_prods * 0.5 + 0.5  # [-1, 1] to [0, 1]
+    angle_weight = (angle_weight_01 + min_ray_angle_weight) ** weight_by_ray_angle_gamma
+    return angle_weight
+
+
+def _avgmaxstd_reduction_function(
+    x: torch.Tensor,
+    w: torch.Tensor,
+    reduction_functions: Sequence[ReductionFunction],
+    dim: int = 1,
+):
+    """
+    Args:
+        x: Features to aggreagate. Tensor of shape `(batch, n_views, ..., dim)`.
+        w: Aggregation weights. Tensor of shape `(batch, n_views, ...,)`.
+        dim: the dimension along which to aggregate.
+        reduction_functions: The set of reduction functions.
+
+    Returns:
+        x_aggr: Aggregation of `x` to a tensor of shape `(batch, 1, ..., dim_aggregate)`.
+    """
+
+    pooled_features = []
+
+    mu = None
+    std = None
+
+    if ReductionFunction.AVG in reduction_functions:
+        # average pool
+        mu = _avg_reduction_function(x, w, dim=dim)
+        pooled_features.append(mu)
+
+    if ReductionFunction.STD in reduction_functions:
+        # standard-dev pool
+        std = _std_reduction_function(x, w, dim=dim, mu=mu)
+        pooled_features.append(std)
+
+    if ReductionFunction.STD_AVG in reduction_functions:
+        # average-of-standard-dev pool
+        stdavg = _std_avg_reduction_function(x, w, dim=dim, mu=mu, std=std)
+        pooled_features.append(stdavg)
+
+    if ReductionFunction.MAX in reduction_functions:
+        max_ = _max_reduction_function(x, w, dim=dim)
+        pooled_features.append(max_)
+
+    # cat all results along the feature dimension (the last dim)
+    x_aggr = torch.cat(pooled_features, dim=-1)
+
+    # zero out features that were all masked out
+    # pyre-fixme[16]: `bool` has no attribute `type_as`.
+    any_active = (w.max(dim=dim, keepdim=True).values > 1e-4).type_as(x_aggr)
+    x_aggr = x_aggr * any_active[..., None]
+
+    # some asserts to check that everything was done right
+    assert torch.isfinite(x_aggr).all()
+    assert x_aggr.shape[1] == 1
+
+    return x_aggr
+
+
+def _avg_reduction_function(
+    x: torch.Tensor,
+    w: torch.Tensor,
+    dim: int = 1,
+):
+    mu = wmean(x, w, dim=dim, eps=1e-2)
+    return mu
+
+
+def _std_reduction_function(
+    x: torch.Tensor,
+    w: torch.Tensor,
+    dim: int = 1,
+    mu: Optional[torch.Tensor] = None,  # pre-computed mean
+):
+    if mu is None:
+        mu = _avg_reduction_function(x, w, dim=dim)
+    # pyre-fixme[58]: `**` is not supported for operand types `Tensor` and `int`.
+    std = wmean((x - mu) ** 2, w, dim=dim, eps=1e-2).clamp(1e-4).sqrt()
+    # FIXME: somehow this is extremely heavy in mem?
+    return std
+
+
+def _std_avg_reduction_function(
+    x: torch.Tensor,
+    w: torch.Tensor,
+    dim: int = 1,
+    mu: Optional[torch.Tensor] = None,  # pre-computed mean
+    std: Optional[torch.Tensor] = None,  # pre-computed std
+):
+    if std is None:
+        std = _std_reduction_function(x, w, dim=dim, mu=mu)
+    stdmean = std.mean(dim=-1, keepdim=True)
+    return stdmean
+
+
+def _max_reduction_function(
+    x: torch.Tensor,
+    w: torch.Tensor,
+    dim: int = 1,
+    big_M_factor: float = 10.0,
+):
+    big_M = x.max(dim=dim, keepdim=True).values.abs() * big_M_factor
+    max_ = (x * w - ((1 - w) * big_M)).max(dim=dim, keepdim=True).values
+    return max_
diff --git a/pytorch3d/pytorch3d/implicitron/models/view_pooler/view_pooler.py b/pytorch3d/pytorch3d/implicitron/models/view_pooler/view_pooler.py
new file mode 100644
index 0000000000000000000000000000000000000000..a47ef72de7a2ac0192f10a1c53b3cb4a9c246346
--- /dev/null
+++ b/pytorch3d/pytorch3d/implicitron/models/view_pooler/view_pooler.py
@@ -0,0 +1,127 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Dict, List, Optional, Union
+
+import torch
+from pytorch3d.implicitron.tools.config import Configurable, run_auto_creation
+from pytorch3d.renderer.cameras import CamerasBase
+
+from .feature_aggregator import FeatureAggregatorBase
+from .view_sampler import ViewSampler
+
+
+# pyre-ignore: 13
+class ViewPooler(Configurable, torch.nn.Module):
+    """
+    Implements sampling of image-based features at the 2d projections of a set
+    of 3D points, and a subsequent aggregation of the resulting set of features
+    per-point.
+
+    Args:
+        view_sampler: An instance of ViewSampler which is used for sampling of
+            image-based features at the 2D projections of a set
+            of 3D points.
+        feature_aggregator_class_type: The name of the feature aggregator class which
+            is available in the global registry.
+        feature_aggregator: A feature aggregator class which inherits from
+            FeatureAggregatorBase. Typically, the aggregated features and their
+            masks are output by a `ViewSampler` which samples feature tensors extracted
+            from a set of source images. FeatureAggregator executes step (4) above.
+    """
+
+    view_sampler: ViewSampler
+    feature_aggregator_class_type: str = "AngleWeightedReductionFeatureAggregator"
+    feature_aggregator: FeatureAggregatorBase
+
+    def __post_init__(self):
+        run_auto_creation(self)
+
+    def get_aggregated_feature_dim(self, feats: Union[Dict[str, torch.Tensor], int]):
+        """
+        Returns the final dimensionality of the output aggregated features.
+
+        Args:
+            feats: Either a `dict` of sampled features `{f_i: t_i}` corresponding
+                to the `feats_sampled` argument of `feature_aggregator,forward`,
+                or an `int` representing the sum of dimensionalities of each `t_i`.
+
+        Returns:
+            aggregated_feature_dim: The final dimensionality of the output
+                aggregated features.
+        """
+        return self.feature_aggregator.get_aggregated_feature_dim(feats)
+
+    def has_aggregation(self):
+        """
+        Specifies whether the `feature_aggregator` reduces the output `reduce_dim`
+        dimension to 1.
+
+        Returns:
+            has_aggregation: `True` if `reduce_dim==1`, else `False`.
+        """
+        return self.feature_aggregator.has_aggregation()
+
+    def forward(
+        self,
+        *,  # force kw args
+        pts: torch.Tensor,
+        seq_id_pts: Union[List[int], List[str], torch.LongTensor],
+        camera: CamerasBase,
+        seq_id_camera: Union[List[int], List[str], torch.LongTensor],
+        feats: Dict[str, torch.Tensor],
+        masks: Optional[torch.Tensor],
+        **kwargs,
+    ) -> Union[torch.Tensor, Dict[str, torch.Tensor]]:
+        """
+        Project each point cloud from a batch of point clouds to corresponding
+        input cameras, sample features at the 2D projection locations in a batch
+        of source images, and aggregate the pointwise sampled features.
+
+        Args:
+            pts: A tensor of shape `[pts_batch x n_pts x 3]` in world coords.
+            seq_id_pts: LongTensor of shape `[pts_batch]` denoting the ids of the scenes
+                from which `pts` were extracted, or a list of string names.
+            camera: 'n_cameras' cameras, each coresponding to a batch element of `feats`.
+            seq_id_camera: LongTensor of shape `[n_cameras]` denoting the ids of the scenes
+                corresponding to cameras in `camera`, or a list of string names.
+            feats: a dict of tensors of per-image features `{feat_i: T_i}`.
+                Each tensor `T_i` is of shape `[n_cameras x dim_i x H_i x W_i]`.
+            masks: `[n_cameras x 1 x H x W]`, define valid image regions
+                for sampling `feats`.
+        Returns:
+            feats_aggregated: If `feature_aggregator.concatenate_output==True`, a tensor
+                of shape `(pts_batch, reduce_dim, n_pts, sum(dim_1, ... dim_N))`
+                containing the aggregated features. `reduce_dim` depends on
+                the specific feature aggregator implementation and typically
+                equals 1 or `n_cameras`.
+                If `feature_aggregator.concatenate_output==False`, the aggregator
+                does not concatenate the aggregated features and returns a dictionary
+                of per-feature aggregations `{f_i: t_i_aggregated}` instead.
+                Each `t_i_aggregated` is of shape
+                `(pts_batch, reduce_dim, n_pts, aggr_dim_i)`.
+        """
+
+        # (1) Sample features and masks at the ray points
+        sampled_feats, sampled_masks = self.view_sampler(
+            pts=pts,
+            seq_id_pts=seq_id_pts,
+            camera=camera,
+            seq_id_camera=seq_id_camera,
+            feats=feats,
+            masks=masks,
+        )
+
+        # (2) Aggregate features from multiple views
+        # pyre-fixme[29]: `Union[torch.Tensor, torch.nn.Module]` is not a function.
+        feats_aggregated = self.feature_aggregator(  # noqa: E731
+            sampled_feats,
+            sampled_masks,
+            pts=pts,
+            camera=camera,
+        )  # TODO: do we need to pass a callback rather than compute here?
+
+        return feats_aggregated
diff --git a/pytorch3d/pytorch3d/implicitron/models/view_pooler/view_sampler.py b/pytorch3d/pytorch3d/implicitron/models/view_pooler/view_sampler.py
new file mode 100644
index 0000000000000000000000000000000000000000..56f91ed2f2e74ab10d3d0f4db6801c3136f419b4
--- /dev/null
+++ b/pytorch3d/pytorch3d/implicitron/models/view_pooler/view_sampler.py
@@ -0,0 +1,293 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Dict, List, Optional, Tuple, Union
+
+import torch
+from pytorch3d.implicitron.tools.config import Configurable
+from pytorch3d.renderer.cameras import CamerasBase
+from pytorch3d.renderer.utils import ndc_grid_sample
+
+
+class ViewSampler(Configurable, torch.nn.Module):
+    """
+    Implements sampling of image-based features at the 2d projections of a set
+    of 3D points.
+
+    Args:
+        masked_sampling: If `True`, the `sampled_masks` output of `self.forward`
+            contains the input `masks` sampled at the 2d projections. Otherwise,
+            all entries of `sampled_masks` are set to 1.
+        sampling_mode: Controls the mode of the `torch.nn.functional.grid_sample`
+            function used to interpolate the sampled feature tensors at the
+            locations of the 2d projections.
+    """
+
+    masked_sampling: bool = False
+    sampling_mode: str = "bilinear"
+
+    def forward(
+        self,
+        *,  # force kw args
+        pts: torch.Tensor,
+        seq_id_pts: Union[List[int], List[str], torch.LongTensor],
+        camera: CamerasBase,
+        seq_id_camera: Union[List[int], List[str], torch.LongTensor],
+        feats: Dict[str, torch.Tensor],
+        masks: Optional[torch.Tensor],
+        **kwargs,
+    ) -> Tuple[Dict[str, torch.Tensor], torch.Tensor]:
+        """
+        Project each point cloud from a batch of point clouds to corresponding
+        input cameras and sample features at the 2D projection locations.
+
+        Args:
+            pts: A tensor of shape `[pts_batch x n_pts x 3]` in world coords.
+            seq_id_pts: LongTensor of shape `[pts_batch]` denoting the ids of the scenes
+                from which `pts` were extracted, or a list of string names.
+            camera: 'n_cameras' cameras, each coresponding to a batch element of `feats`.
+            seq_id_camera: LongTensor of shape `[n_cameras]` denoting the ids of the scenes
+                corresponding to cameras in `camera`, or a list of string names.
+            feats: a dict of tensors of per-image features `{feat_i: T_i}`.
+                Each tensor `T_i` is of shape `[n_cameras x dim_i x H_i x W_i]`.
+            masks: `[n_cameras x 1 x H x W]`, define valid image regions
+                for sampling `feats`.
+        Returns:
+            sampled_feats: Dict of sampled features `{feat_i: sampled_T_i}`.
+                Each `sampled_T_i` of shape `[pts_batch, n_cameras, n_pts, dim_i]`.
+            sampled_masks: A tensor with  mask of the sampled features
+                of shape `(pts_batch, n_cameras, n_pts, 1)`.
+        """
+
+        # convert sequence ids to long tensors
+        seq_id_pts, seq_id_camera = [
+            handle_seq_id(seq_id, pts.device) for seq_id in [seq_id_pts, seq_id_camera]
+        ]
+
+        if self.masked_sampling and masks is None:
+            raise ValueError(
+                "Masks have to be provided for `self.masked_sampling==True`"
+            )
+
+        # project pts to all cameras and sample feats from the locations of
+        # the 2D projections
+        sampled_feats_all_cams, sampled_masks_all_cams = project_points_and_sample(
+            pts,
+            feats,
+            camera,
+            masks if self.masked_sampling else None,
+            sampling_mode=self.sampling_mode,
+        )
+
+        # generate the mask that invalidates features sampled from
+        # non-corresponding cameras
+        camera_pts_mask = (seq_id_camera[None] == seq_id_pts[:, None])[
+            ..., None, None
+        ].to(pts)
+
+        # mask the sampled features and masks
+        sampled_feats = {
+            k: f * camera_pts_mask for k, f in sampled_feats_all_cams.items()
+        }
+        sampled_masks = sampled_masks_all_cams * camera_pts_mask
+
+        return sampled_feats, sampled_masks
+
+
+def project_points_and_sample(
+    pts: torch.Tensor,
+    feats: Dict[str, torch.Tensor],
+    camera: CamerasBase,
+    masks: Optional[torch.Tensor],
+    eps: float = 1e-2,
+    sampling_mode: str = "bilinear",
+) -> Tuple[Dict[str, torch.Tensor], torch.Tensor]:
+    """
+    Project each point cloud from a batch of point clouds to all input cameras
+    and sample features at the 2D projection locations.
+
+    Args:
+        pts: `(pts_batch, n_pts, 3)` tensor containing a batch of 3D point clouds.
+        feats: A dict `{feat_i: feat_T_i}` of features to sample,
+            where each `feat_T_i` is a tensor of shape
+            `(n_cameras, feat_i_dim, feat_i_H, feat_i_W)`
+            of `feat_i_dim`-dimensional features extracted from `n_cameras`
+            source views.
+        camera: A batch of `n_cameras` cameras corresponding to their feature
+            tensors `feat_T_i` from `feats`.
+        masks: A tensor of shape `(n_cameras, 1, mask_H, mask_W)` denoting
+            valid locations for sampling.
+        eps: A small constant controlling the minimum depth of projections
+            of `pts` to avoid divisons by zero in the projection operation.
+        sampling_mode: Sampling mode of the grid sampler.
+
+    Returns:
+        sampled_feats: Dict of sampled features `{feat_i: sampled_T_i}`.
+            Each `sampled_T_i` is of shape
+            `(pts_batch, n_cameras, n_pts, feat_i_dim)`.
+        sampled_masks: A tensor with the mask of the sampled features
+            of shape `(pts_batch, n_cameras, n_pts, 1)`.
+            If `masks` is `None`, the returned `sampled_masks` will be
+            filled with 1s.
+    """
+
+    n_cameras = camera.R.shape[0]
+    pts_batch = pts.shape[0]
+    n_pts = pts.shape[1:-1]
+
+    camera_rep, pts_rep = cameras_points_cartesian_product(camera, pts)
+
+    # The eps here is super-important to avoid NaNs in backprop!
+    proj_rep = camera_rep.transform_points(
+        pts_rep.reshape(n_cameras * pts_batch, -1, 3), eps=eps
+    )[..., :2]
+    # [ pts1 in cam1, pts2 in cam1, pts3 in cam1,
+    #   pts1 in cam2, pts2 in cam2, pts3 in cam2,
+    #   pts1 in cam3, pts2 in cam3, pts3 in cam3 ]
+
+    # reshape for the grid sampler
+    sampling_grid_ndc = proj_rep.view(n_cameras, pts_batch, -1, 2)
+    # [ [pts1 in cam1, pts2 in cam1, pts3 in cam1],
+    #   [pts1 in cam2, pts2 in cam2, pts3 in cam2],
+    #   [pts1 in cam3, pts2 in cam3, pts3 in cam3] ]
+    #   n_cameras x pts_batch x n_pts x 2
+
+    # sample both feats
+    feats_sampled = {
+        k: ndc_grid_sample(
+            f,
+            sampling_grid_ndc,
+            mode=sampling_mode,
+            align_corners=False,
+        )
+        .permute(2, 0, 3, 1)
+        .reshape(pts_batch, n_cameras, *n_pts, -1)
+        for k, f in feats.items()
+    }  # {k: pts_batch x n_cameras x *n_pts x dim} for each feat type "k"
+
+    if masks is not None:
+        # sample masks
+        masks_sampled = (
+            ndc_grid_sample(
+                masks,
+                sampling_grid_ndc,
+                mode=sampling_mode,
+                align_corners=False,
+            )
+            .permute(2, 0, 3, 1)
+            .reshape(pts_batch, n_cameras, *n_pts, 1)
+        )
+    else:
+        masks_sampled = sampling_grid_ndc.new_ones(pts_batch, n_cameras, *n_pts, 1)
+
+    return feats_sampled, masks_sampled
+
+
+def handle_seq_id(
+    seq_id: Union[torch.LongTensor, List[str], List[int]],
+    device,
+) -> torch.LongTensor:
+    """
+    Converts the input sequence id to a LongTensor.
+
+    Args:
+        seq_id: A sequence of sequence ids.
+        device: The target device of the output.
+    Returns
+        long_seq_id: `seq_id` converted to a `LongTensor` and moved to `device`.
+    """
+    if not torch.is_tensor(seq_id):
+        if isinstance(seq_id[0], str):
+            seq_id = [hash(s) for s in seq_id]
+        # pyre-fixme[9]: seq_id has type `Union[List[int], List[str], LongTensor]`;
+        #  used as `Tensor`.
+        seq_id = torch.tensor(seq_id, dtype=torch.long, device=device)
+    # pyre-fixme[16]: Item `List` of `Union[List[int], List[str], LongTensor]` has
+    #  no attribute `to`.
+    return seq_id.to(device)
+
+
+def cameras_points_cartesian_product(
+    camera: CamerasBase, pts: torch.Tensor
+) -> Tuple[CamerasBase, torch.Tensor]:
+    """
+    Generates all pairs of pairs of elements from 'camera' and 'pts' and returns
+    `camera_rep` and `pts_rep` such that::
+
+        camera_rep = [                 pts_rep = [
+            camera[0]                      pts[0],
+            camera[0]                      pts[1],
+            camera[0]                      ...,
+            ...                            pts[batch_pts-1],
+            camera[1]                      pts[0],
+            camera[1]                      pts[1],
+            camera[1]                      ...,
+            ...                            pts[batch_pts-1],
+            ...                            ...,
+            camera[n_cameras-1]            pts[0],
+            camera[n_cameras-1]            pts[1],
+            camera[n_cameras-1]            ...,
+            ...                            pts[batch_pts-1],
+        ]                              ]
+
+    Args:
+        camera: A batch of `n_cameras` cameras.
+        pts: A batch of `batch_pts` points of shape `(batch_pts, ..., dim)`
+
+    Returns:
+        camera_rep: A batch of batch_pts*n_cameras cameras such that::
+
+            camera_rep = [
+                camera[0]
+                camera[0]
+                camera[0]
+                ...
+                camera[1]
+                camera[1]
+                camera[1]
+                ...
+                ...
+                camera[n_cameras-1]
+                camera[n_cameras-1]
+                camera[n_cameras-1]
+            ]
+
+
+        pts_rep: Repeated `pts` of shape `(batch_pts*n_cameras, ..., dim)`,
+            such that::
+
+            pts_rep = [
+                pts[0],
+                pts[1],
+                ...,
+                pts[batch_pts-1],
+                pts[0],
+                pts[1],
+                ...,
+                pts[batch_pts-1],
+                ...,
+                pts[0],
+                pts[1],
+                ...,
+                pts[batch_pts-1],
+            ]
+
+    """
+    n_cameras = camera.R.shape[0]
+    batch_pts = pts.shape[0]
+    pts_rep = pts.repeat(n_cameras, *[1 for _ in pts.shape[1:]])
+    idx_cams = (
+        torch.arange(n_cameras)[:, None]
+        .expand(
+            n_cameras,
+            batch_pts,
+        )
+        .reshape(batch_pts * n_cameras)
+    )
+    # pyre-fixme[6]: For 1st param expected `Union[List[int], int, LongTensor]` but
+    #  got `Tensor`.
+    camera_rep = camera[idx_cams]
+    return camera_rep, pts_rep
diff --git a/pytorch3d/pytorch3d/implicitron/models/visualization/__init__.py b/pytorch3d/pytorch3d/implicitron/models/visualization/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..a9fdb3b996b73ba9ae811fa42fb7615768a928fc
--- /dev/null
+++ b/pytorch3d/pytorch3d/implicitron/models/visualization/__init__.py
@@ -0,0 +1,6 @@
+#!/usr/bin/env python3
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
diff --git a/pytorch3d/pytorch3d/implicitron/models/visualization/render_flyaround.py b/pytorch3d/pytorch3d/implicitron/models/visualization/render_flyaround.py
new file mode 100644
index 0000000000000000000000000000000000000000..2a3afadbb86e0307bea4b9ab5e7a54ef0c7183fb
--- /dev/null
+++ b/pytorch3d/pytorch3d/implicitron/models/visualization/render_flyaround.py
@@ -0,0 +1,391 @@
+#!/usr/bin/env python3
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import logging
+import math
+import os
+import random
+from typing import (
+    Any,
+    Dict,
+    Iterable,
+    List,
+    Optional,
+    Sequence,
+    Tuple,
+    TYPE_CHECKING,
+    Union,
+)
+
+import numpy as np
+import torch
+import torch.nn.functional as Fu
+from pytorch3d.implicitron.dataset.dataset_base import DatasetBase, FrameData
+from pytorch3d.implicitron.dataset.utils import is_train_frame
+from pytorch3d.implicitron.models.base_model import EvaluationMode
+from pytorch3d.implicitron.tools.eval_video_trajectory import (
+    generate_eval_video_cameras,
+)
+from pytorch3d.implicitron.tools.video_writer import VideoWriter
+from pytorch3d.implicitron.tools.vis_utils import (
+    get_visdom_connection,
+    make_depth_image,
+)
+from tqdm import tqdm
+
+if TYPE_CHECKING:
+    from visdom import Visdom
+
+logger = logging.getLogger(__name__)
+
+
+def render_flyaround(
+    dataset: DatasetBase,
+    sequence_name: str,
+    model: torch.nn.Module,
+    output_video_path: str,
+    n_flyaround_poses: int = 40,
+    fps: int = 20,
+    trajectory_type: str = "circular_lsq_fit",
+    max_angle: float = 2 * math.pi,
+    trajectory_scale: float = 1.1,
+    scene_center: Tuple[float, float, float] = (0.0, 0.0, 0.0),
+    up: Tuple[float, float, float] = (0.0, -1.0, 0.0),
+    traj_offset: float = 0.0,
+    n_source_views: int = 9,
+    visdom_show_preds: bool = False,
+    visdom_environment: str = "render_flyaround",
+    visdom_server: str = "http://127.0.0.1",
+    visdom_port: int = 8097,
+    num_workers: int = 10,
+    device: Union[str, torch.device] = "cuda",
+    seed: Optional[int] = None,
+    video_resize: Optional[Tuple[int, int]] = None,
+    output_video_frames_dir: Optional[str] = None,
+    visualize_preds_keys: Sequence[str] = (
+        "images_render",
+        "masks_render",
+        "depths_render",
+        "_all_source_images",
+    ),
+) -> None:
+    """
+    Uses `model` to generate a video consisting of renders of a scene imaged from
+    a camera flying around the scene. The scene is specified with the `dataset` object and
+    `sequence_name` which denotes the name of the scene whose frames are in `dataset`.
+
+    Args:
+        dataset: The dataset object containing frames from a sequence in `sequence_name`.
+        sequence_name: Name of a sequence from `dataset`.
+        model: The model whose predictions are going to be visualized.
+        output_video_path: The path to the video output by this script.
+        n_flyaround_poses: The number of camera poses of the flyaround trajectory.
+        fps: Framerate of the output video.
+        trajectory_type: The type of the camera trajectory. Can be one of:
+            circular_lsq_fit: Camera centers follow a trajectory obtained
+                by fitting a 3D circle to train_cameras centers.
+                All cameras are looking towards scene_center.
+            figure_eight: Figure-of-8 trajectory around the center of the
+                central camera of the training dataset.
+            trefoil_knot: Same as 'figure_eight', but the trajectory has a shape
+                of a trefoil knot (https://en.wikipedia.org/wiki/Trefoil_knot).
+            figure_eight_knot: Same as 'figure_eight', but the trajectory has a shape
+                of a figure-eight knot
+                (https://en.wikipedia.org/wiki/Figure-eight_knot_(mathematics)).
+        trajectory_type: The type of the camera trajectory. Can be one of:
+            circular_lsq_fit: Camera centers follow a trajectory obtained
+                by fitting a 3D circle to train_cameras centers.
+                All cameras are looking towards scene_center.
+            figure_eight: Figure-of-8 trajectory around the center of the
+                central camera of the training dataset.
+            trefoil_knot: Same as 'figure_eight', but the trajectory has a shape
+                of a trefoil knot (https://en.wikipedia.org/wiki/Trefoil_knot).
+            figure_eight_knot: Same as 'figure_eight', but the trajectory has a shape
+                of a figure-eight knot
+                (https://en.wikipedia.org/wiki/Figure-eight_knot_(mathematics)).
+        max_angle: Defines the total length of the generated camera trajectory.
+            All possible trajectories (set with the `trajectory_type` argument) are
+            periodic with the period of `time==2pi`.
+            E.g. setting `trajectory_type=circular_lsq_fit` and `time=4pi` will generate
+            a trajectory of camera poses rotating the total of 720 deg around the object.
+        trajectory_scale: The extent of the trajectory.
+        scene_center: The center of the scene in world coordinates which all
+            the cameras from the generated trajectory look at.
+        up: The "up" vector of the scene (=the normal of the scene floor).
+            Active for the `trajectory_type="circular"`.
+        traj_offset: 3D offset vector added to each point of the trajectory.
+        n_source_views: The number of source views sampled from the known views of the
+            training sequence added to each evaluation batch.
+        visdom_show_preds: If `True`, exports the visualizations to visdom.
+        visdom_environment: The name of the visdom environment.
+        visdom_server: The address of the visdom server.
+        visdom_port: The visdom port.
+        num_workers: The number of workers used to load the training data.
+        seed: The random seed used for reproducible sampling of the source views.
+        video_resize: Optionally, defines the size of the output video.
+        output_video_frames_dir: If specified, the frames of the output video are going
+            to be permanently stored in this directory.
+        visualize_preds_keys: The names of the model predictions to visualize.
+    """
+
+    if seed is None:
+        seed = hash(sequence_name)
+
+    if visdom_show_preds:
+        viz = get_visdom_connection(server=visdom_server, port=visdom_port)
+    else:
+        viz = None
+
+    logger.info(f"Loading all data of sequence '{sequence_name}'.")
+    seq_idx = list(dataset.sequence_indices_in_order(sequence_name))
+    train_data = _load_whole_dataset(dataset, seq_idx, num_workers=num_workers)
+    assert all(train_data.sequence_name[0] == sn for sn in train_data.sequence_name)
+    # pyre-ignore[6]
+    sequence_set_name = "train" if is_train_frame(train_data.frame_type)[0] else "test"
+    logger.info(f"Sequence set = {sequence_set_name}.")
+    train_cameras = train_data.camera
+    time = torch.linspace(0, max_angle, n_flyaround_poses + 1)[:n_flyaround_poses]
+    test_cameras = generate_eval_video_cameras(
+        train_cameras,
+        time=time,
+        n_eval_cams=n_flyaround_poses,
+        trajectory_type=trajectory_type,
+        trajectory_scale=trajectory_scale,
+        scene_center=scene_center,
+        up=up,
+        focal_length=None,
+        principal_point=torch.zeros(n_flyaround_poses, 2),
+        traj_offset_canonical=(0.0, 0.0, traj_offset),
+    )
+
+    # sample the source views reproducibly
+    with torch.random.fork_rng():
+        torch.manual_seed(seed)
+        source_views_i = torch.randperm(len(seq_idx))[:n_source_views]
+
+    # add the first dummy view that will get replaced with the target camera
+    source_views_i = Fu.pad(source_views_i, [1, 0])
+    source_views = [seq_idx[i] for i in source_views_i.tolist()]
+    batch = _load_whole_dataset(dataset, source_views, num_workers=num_workers)
+    assert all(batch.sequence_name[0] == sn for sn in batch.sequence_name)
+
+    preds_total = []
+    for n in tqdm(range(n_flyaround_poses), total=n_flyaround_poses):
+        # set the first batch camera to the target camera
+        for k in ("R", "T", "focal_length", "principal_point"):
+            getattr(batch.camera, k)[0] = getattr(test_cameras[n], k)
+
+        # Move to cuda
+        net_input = batch.to(device)
+        with torch.no_grad():
+            preds = model(**{**net_input, "evaluation_mode": EvaluationMode.EVALUATION})
+
+            # make sure we dont overwrite something
+            assert all(k not in preds for k in net_input.keys())
+            preds.update(net_input)  # merge everything into one big dict
+
+            # Render the predictions to images
+            rendered_pred = _images_from_preds(preds, extract_keys=visualize_preds_keys)
+            preds_total.append(rendered_pred)
+
+            # show the preds every 5% of the export iterations
+            if visdom_show_preds and (
+                n % max(n_flyaround_poses // 20, 1) == 0 or n == n_flyaround_poses - 1
+            ):
+                assert viz is not None
+                _show_predictions(
+                    preds_total,
+                    sequence_name=batch.sequence_name[0],
+                    viz=viz,
+                    viz_env=visdom_environment,
+                    predicted_keys=visualize_preds_keys,
+                )
+
+    logger.info(f"Exporting videos for sequence {sequence_name} ...")
+    _generate_prediction_videos(
+        preds_total,
+        sequence_name=batch.sequence_name[0],
+        viz=viz,
+        viz_env=visdom_environment,
+        fps=fps,
+        video_path=output_video_path,
+        resize=video_resize,
+        video_frames_dir=output_video_frames_dir,
+        predicted_keys=visualize_preds_keys,
+    )
+
+
+def _load_whole_dataset(
+    dataset: torch.utils.data.Dataset, idx: Sequence[int], num_workers: int = 10
+) -> FrameData:
+    load_all_dataloader = torch.utils.data.DataLoader(
+        torch.utils.data.Subset(dataset, idx),
+        batch_size=len(idx),
+        num_workers=num_workers,
+        shuffle=False,
+        collate_fn=FrameData.collate,
+    )
+    return next(iter(load_all_dataloader))
+
+
+def _images_from_preds(
+    preds: Dict[str, Any],
+    extract_keys: Iterable[str] = (
+        "image_rgb",
+        "images_render",
+        "fg_probability",
+        "masks_render",
+        "depths_render",
+        "depth_map",
+        "_all_source_images",
+    ),
+) -> Dict[str, torch.Tensor]:
+    imout = {}
+    for k in extract_keys:
+        if k == "_all_source_images" and "image_rgb" in preds:
+            src_ims = preds["image_rgb"][1:].cpu().detach().clone()
+            v = _stack_images(src_ims, None)[None]
+        else:
+            if k not in preds or preds[k] is None:
+                print(f"cant show {k}")
+                continue
+            v = preds[k].cpu().detach().clone()
+        if k.startswith("depth"):
+            mask_resize = Fu.interpolate(
+                preds["masks_render"],
+                size=preds[k].shape[2:],
+                mode="nearest",
+            )
+            v = make_depth_image(preds[k], mask_resize)
+        if v.shape[1] == 1:
+            v = v.repeat(1, 3, 1, 1)
+        imout[k] = v.detach().cpu()
+
+    return imout
+
+
+def _stack_images(ims: torch.Tensor, size: Optional[Tuple[int, int]]) -> torch.Tensor:
+    ba = ims.shape[0]
+    H = int(np.ceil(np.sqrt(ba)))
+    W = H
+    n_add = H * W - ba
+    if n_add > 0:
+        ims = torch.cat((ims, torch.zeros_like(ims[:1]).repeat(n_add, 1, 1, 1)))
+
+    ims = ims.view(H, W, *ims.shape[1:])
+    cated = torch.cat([torch.cat(list(row), dim=2) for row in ims], dim=1)
+    if size is not None:
+        cated = Fu.interpolate(cated[None], size=size, mode="bilinear")[0]
+    return cated.clamp(0.0, 1.0)
+
+
+def _show_predictions(
+    preds: List[Dict[str, Any]],
+    sequence_name: str,
+    viz: "Visdom",
+    viz_env: str = "visualizer",
+    predicted_keys: Sequence[str] = (
+        "images_render",
+        "masks_render",
+        "depths_render",
+        "_all_source_images",
+    ),
+    n_samples=10,
+    one_image_width=200,
+) -> None:
+    """Given a list of predictions visualize them into a single image using visdom."""
+    assert isinstance(preds, list)
+
+    pred_all = []
+    # Randomly choose a subset of the rendered images, sort by ordr in the sequence
+    n_samples = min(n_samples, len(preds))
+    pred_idx = sorted(random.sample(list(range(len(preds))), n_samples))
+    for predi in pred_idx:
+        # Make the concatentation for the same camera vertically
+        pred_all.append(
+            torch.cat(
+                [
+                    torch.nn.functional.interpolate(
+                        preds[predi][k].cpu(),
+                        scale_factor=one_image_width / preds[predi][k].shape[3],
+                        mode="bilinear",
+                    ).clamp(0.0, 1.0)
+                    for k in predicted_keys
+                ],
+                dim=2,
+            )
+        )
+    # Concatenate the images horizontally
+    pred_all_cat = torch.cat(pred_all, dim=3)[0]
+    viz.image(
+        pred_all_cat,
+        win="show_predictions",
+        env=viz_env,
+        opts={"title": f"pred_{sequence_name}"},
+    )
+
+
+def _generate_prediction_videos(
+    preds: List[Dict[str, Any]],
+    sequence_name: str,
+    viz: Optional["Visdom"] = None,
+    viz_env: str = "visualizer",
+    predicted_keys: Sequence[str] = (
+        "images_render",
+        "masks_render",
+        "depths_render",
+        "_all_source_images",
+    ),
+    fps: int = 20,
+    video_path: str = "/tmp/video",
+    video_frames_dir: Optional[str] = None,
+    resize: Optional[Tuple[int, int]] = None,
+) -> None:
+    """Given a list of predictions create and visualize rotating videos of the
+    objects using visdom.
+    """
+
+    # make sure the target video directory exists
+    os.makedirs(os.path.dirname(video_path), exist_ok=True)
+
+    # init a video writer for each predicted key
+    vws = {}
+    for k in predicted_keys:
+        if k not in preds[0]:
+            logger.warn(f"Cannot generate video for prediction key '{k}'")
+            continue
+        cache_dir = (
+            None
+            if video_frames_dir is None
+            else os.path.join(video_frames_dir, f"{sequence_name}_{k}")
+        )
+        vws[k] = VideoWriter(
+            fps=fps,
+            out_path=f"{video_path}_{sequence_name}_{k}.mp4",
+            cache_dir=cache_dir,
+        )
+
+    for rendered_pred in tqdm(preds):
+        for k in vws:
+            vws[k].write_frame(
+                rendered_pred[k][0].clip(0.0, 1.0).detach().cpu().numpy(),
+                resize=resize,
+            )
+
+    for k in predicted_keys:
+        if k not in vws:
+            continue
+        vws[k].get_video()
+        logger.info(f"Generated {vws[k].out_path}.")
+        if viz is not None:
+            viz.video(
+                videofile=vws[k].out_path,
+                env=viz_env,
+                win=k,  # we reuse the same window otherwise visdom dies
+                opts={"title": sequence_name + " " + k},
+            )
diff --git a/pytorch3d/pytorch3d/implicitron/third_party/__init__.py b/pytorch3d/pytorch3d/implicitron/third_party/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..2e41cd717f6a439a9c08d76a9d0e4a54e190fc5a
--- /dev/null
+++ b/pytorch3d/pytorch3d/implicitron/third_party/__init__.py
@@ -0,0 +1,5 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
diff --git a/pytorch3d/pytorch3d/implicitron/third_party/hyperlayers.py b/pytorch3d/pytorch3d/implicitron/third_party/hyperlayers.py
new file mode 100644
index 0000000000000000000000000000000000000000..e56235130166b75f50a9aa9700a8de25b3f472c8
--- /dev/null
+++ b/pytorch3d/pytorch3d/implicitron/third_party/hyperlayers.py
@@ -0,0 +1,253 @@
+# a copy-paste from https://github.com/vsitzmann/scene-representation-networks/blob/master/hyperlayers.py
+# fmt: off
+# flake8: noqa
+'''Pytorch implementations of hyper-network modules.
+'''
+import functools
+
+import torch
+import torch.nn as nn
+
+from . import pytorch_prototyping
+
+
+def partialclass(cls, *args, **kwds):
+    class NewCls(cls):
+        __init__ = functools.partialmethod(cls.__init__, *args, **kwds)
+
+    return NewCls
+
+
+class LookupLayer(nn.Module):
+    def __init__(self, in_ch, out_ch, num_objects):
+        super().__init__()
+
+        self.out_ch = out_ch
+        self.lookup_lin = LookupLinear(in_ch, out_ch, num_objects=num_objects)
+        self.norm_nl = nn.Sequential(
+            nn.LayerNorm([self.out_ch], elementwise_affine=False), nn.ReLU(inplace=True)
+        )
+
+    def forward(self, obj_idx):
+        net = nn.Sequential(self.lookup_lin(obj_idx), self.norm_nl)
+        return net
+
+
+class LookupFC(nn.Module):
+    def __init__(
+        self,
+        hidden_ch,
+        num_hidden_layers,
+        num_objects,
+        in_ch,
+        out_ch,
+        outermost_linear=False,
+    ):
+        super().__init__()
+        self.layers = nn.ModuleList()
+        self.layers.append(
+            LookupLayer(in_ch=in_ch, out_ch=hidden_ch, num_objects=num_objects)
+        )
+
+        for i in range(num_hidden_layers):
+            self.layers.append(
+                LookupLayer(in_ch=hidden_ch, out_ch=hidden_ch, num_objects=num_objects)
+            )
+
+        if outermost_linear:
+            self.layers.append(
+                LookupLinear(in_ch=hidden_ch, out_ch=out_ch, num_objects=num_objects)
+            )
+        else:
+            self.layers.append(
+                LookupLayer(in_ch=hidden_ch, out_ch=out_ch, num_objects=num_objects)
+            )
+
+    def forward(self, obj_idx):
+        net = []
+        for i in range(len(self.layers)):
+            net.append(self.layers[i](obj_idx))
+
+        return nn.Sequential(*net)
+
+
+class LookupLinear(nn.Module):
+    def __init__(self, in_ch, out_ch, num_objects):
+        super().__init__()
+        self.in_ch = in_ch
+        self.out_ch = out_ch
+
+        self.hypo_params = nn.Embedding(num_objects, in_ch * out_ch + out_ch)
+
+        for i in range(num_objects):
+            nn.init.kaiming_normal_(
+                self.hypo_params.weight.data[i, : self.in_ch * self.out_ch].view(
+                    self.out_ch, self.in_ch
+                ),
+                a=0.0,
+                nonlinearity="relu",
+                mode="fan_in",
+            )
+            self.hypo_params.weight.data[i, self.in_ch * self.out_ch :].fill_(0.0)
+
+    def forward(self, obj_idx):
+        hypo_params = self.hypo_params(obj_idx)
+
+        # Indices explicit to catch erros in shape of output layer
+        weights = hypo_params[..., : self.in_ch * self.out_ch]
+        biases = hypo_params[
+            ..., self.in_ch * self.out_ch : (self.in_ch * self.out_ch) + self.out_ch
+        ]
+
+        biases = biases.view(*(biases.size()[:-1]), 1, self.out_ch)
+        weights = weights.view(*(weights.size()[:-1]), self.out_ch, self.in_ch)
+
+        return BatchLinear(weights=weights, biases=biases)
+
+
+class HyperLayer(nn.Module):
+    """A hypernetwork that predicts a single Dense Layer, including LayerNorm and a ReLU."""
+
+    def __init__(
+        self, in_ch, out_ch, hyper_in_ch, hyper_num_hidden_layers, hyper_hidden_ch
+    ):
+        super().__init__()
+
+        self.hyper_linear = HyperLinear(
+            in_ch=in_ch,
+            out_ch=out_ch,
+            hyper_in_ch=hyper_in_ch,
+            hyper_num_hidden_layers=hyper_num_hidden_layers,
+            hyper_hidden_ch=hyper_hidden_ch,
+        )
+        self.norm_nl = nn.Sequential(
+            nn.LayerNorm([out_ch], elementwise_affine=False), nn.ReLU(inplace=True)
+        )
+
+    def forward(self, hyper_input):
+        """
+        :param hyper_input: input to hypernetwork.
+        :return: nn.Module; predicted fully connected network.
+        """
+        return nn.Sequential(self.hyper_linear(hyper_input), self.norm_nl)
+
+
+class HyperFC(nn.Module):
+    """Builds a hypernetwork that predicts a fully connected neural network."""
+
+    def __init__(
+        self,
+        hyper_in_ch,
+        hyper_num_hidden_layers,
+        hyper_hidden_ch,
+        hidden_ch,
+        num_hidden_layers,
+        in_ch,
+        out_ch,
+        outermost_linear=False,
+    ):
+        super().__init__()
+
+        PreconfHyperLinear = partialclass(
+            HyperLinear,
+            hyper_in_ch=hyper_in_ch,
+            hyper_num_hidden_layers=hyper_num_hidden_layers,
+            hyper_hidden_ch=hyper_hidden_ch,
+        )
+        PreconfHyperLayer = partialclass(
+            HyperLayer,
+            hyper_in_ch=hyper_in_ch,
+            hyper_num_hidden_layers=hyper_num_hidden_layers,
+            hyper_hidden_ch=hyper_hidden_ch,
+        )
+
+        self.layers = nn.ModuleList()
+        self.layers.append(PreconfHyperLayer(in_ch=in_ch, out_ch=hidden_ch))
+
+        for i in range(num_hidden_layers):
+            self.layers.append(PreconfHyperLayer(in_ch=hidden_ch, out_ch=hidden_ch))
+
+        if outermost_linear:
+            self.layers.append(PreconfHyperLinear(in_ch=hidden_ch, out_ch=out_ch))
+        else:
+            self.layers.append(PreconfHyperLayer(in_ch=hidden_ch, out_ch=out_ch))
+
+    def forward(self, hyper_input):
+        """
+        :param hyper_input: Input to hypernetwork.
+        :return: nn.Module; Predicted fully connected neural network.
+        """
+        net = []
+        for i in range(len(self.layers)):
+            net.append(self.layers[i](hyper_input))
+
+        return nn.Sequential(*net)
+
+
+class BatchLinear(nn.Module):
+    def __init__(self, weights, biases):
+        """Implements a batch linear layer.
+
+        :param weights: Shape: (batch, out_ch, in_ch)
+        :param biases: Shape: (batch, 1, out_ch)
+        """
+        super().__init__()
+
+        self.weights = weights
+        self.biases = biases
+
+    def __repr__(self):
+        return "BatchLinear(in_ch=%d, out_ch=%d)" % (
+            self.weights.shape[-1],
+            self.weights.shape[-2],
+        )
+
+    def forward(self, input):
+        output = input.matmul(
+            self.weights.permute(
+                *[i for i in range(len(self.weights.shape) - 2)], -1, -2
+            )
+        )
+        output += self.biases
+        return output
+
+
+def last_hyper_layer_init(m) -> None:
+    if type(m) == nn.Linear:
+        nn.init.kaiming_normal_(m.weight, a=0.0, nonlinearity="relu", mode="fan_in")
+        m.weight.data *= 1e-1
+
+
+class HyperLinear(nn.Module):
+    """A hypernetwork that predicts a single linear layer (weights & biases)."""
+
+    def __init__(
+        self, in_ch, out_ch, hyper_in_ch, hyper_num_hidden_layers, hyper_hidden_ch
+    ):
+
+        super().__init__()
+        self.in_ch = in_ch
+        self.out_ch = out_ch
+
+        self.hypo_params = pytorch_prototyping.FCBlock(
+            in_features=hyper_in_ch,
+            hidden_ch=hyper_hidden_ch,
+            num_hidden_layers=hyper_num_hidden_layers,
+            out_features=(in_ch * out_ch) + out_ch,
+            outermost_linear=True,
+        )
+        self.hypo_params[-1].apply(last_hyper_layer_init)
+
+    def forward(self, hyper_input):
+        hypo_params = self.hypo_params(hyper_input)
+
+        # Indices explicit to catch erros in shape of output layer
+        weights = hypo_params[..., : self.in_ch * self.out_ch]
+        biases = hypo_params[
+            ..., self.in_ch * self.out_ch : (self.in_ch * self.out_ch) + self.out_ch
+        ]
+
+        biases = biases.view(*(biases.size()[:-1]), 1, self.out_ch)
+        weights = weights.view(*(weights.size()[:-1]), self.out_ch, self.in_ch)
+
+        return BatchLinear(weights=weights, biases=biases)
diff --git a/pytorch3d/pytorch3d/implicitron/third_party/pytorch_prototyping.py b/pytorch3d/pytorch3d/implicitron/third_party/pytorch_prototyping.py
new file mode 100644
index 0000000000000000000000000000000000000000..7dd973fc4053eaa6d38ba82c872a38ff83ba7741
--- /dev/null
+++ b/pytorch3d/pytorch3d/implicitron/third_party/pytorch_prototyping.py
@@ -0,0 +1,771 @@
+# a copy-paste from https://raw.githubusercontent.com/vsitzmann/pytorch_prototyping/10f49b1e7df38a58fd78451eac91d7ac1a21df64/pytorch_prototyping.py
+# fmt: off
+# flake8: noqa
+'''A number of custom pytorch modules with sane defaults that I find useful for model prototyping.
+'''
+import torch
+import torch.nn as nn
+import torchvision.utils
+from torch.nn import functional as F
+
+
+class FCLayer(nn.Module):
+    def __init__(self, in_features, out_features):
+        super().__init__()
+        self.net = nn.Sequential(
+            nn.Linear(in_features, out_features),
+            nn.LayerNorm([out_features]),
+            nn.ReLU(inplace=True),
+        )
+
+    def forward(self, input):
+        return self.net(input)
+
+
+# From https://gist.github.com/wassname/ecd2dac6fc8f9918149853d17e3abf02
+class LayerNormConv2d(nn.Module):
+    def __init__(self, num_features, eps=1e-5, affine=True):
+        super().__init__()
+        self.num_features = num_features
+        self.affine = affine
+        self.eps = eps
+
+        if self.affine:
+            self.gamma = nn.Parameter(torch.Tensor(num_features).uniform_())
+            self.beta = nn.Parameter(torch.zeros(num_features))
+
+    def forward(self, x):
+        shape = [-1] + [1] * (x.dim() - 1)
+        mean = x.view(x.size(0), -1).mean(1).view(*shape)
+        std = x.view(x.size(0), -1).std(1).view(*shape)
+
+        y = (x - mean) / (std + self.eps)
+        if self.affine:
+            shape = [1, -1] + [1] * (x.dim() - 2)
+            y = self.gamma.view(*shape) * y + self.beta.view(*shape)
+        return y
+
+
+class FCBlock(nn.Module):
+    def __init__(
+        self,
+        hidden_ch,
+        num_hidden_layers,
+        in_features,
+        out_features,
+        outermost_linear=False,
+    ):
+        super().__init__()
+
+        self.net = []
+        self.net.append(FCLayer(in_features=in_features, out_features=hidden_ch))
+
+        for i in range(num_hidden_layers):
+            self.net.append(FCLayer(in_features=hidden_ch, out_features=hidden_ch))
+
+        if outermost_linear:
+            self.net.append(nn.Linear(in_features=hidden_ch, out_features=out_features))
+        else:
+            self.net.append(FCLayer(in_features=hidden_ch, out_features=out_features))
+
+        self.net = nn.Sequential(*self.net)
+        self.net.apply(self.init_weights)
+
+    def __getitem__(self, item):
+        return self.net[item]
+
+    def init_weights(self, m):
+        if type(m) == nn.Linear:
+            nn.init.kaiming_normal_(m.weight, a=0.0, nonlinearity="relu", mode="fan_in")
+
+    def forward(self, input):
+        return self.net(input)
+
+
+class DownBlock3D(nn.Module):
+    """A 3D convolutional downsampling block."""
+
+    def __init__(self, in_channels, out_channels, norm=nn.BatchNorm3d):
+        super().__init__()
+
+        self.net = [
+            nn.ReplicationPad3d(1),
+            nn.Conv3d(
+                in_channels,
+                out_channels,
+                kernel_size=4,
+                padding=0,
+                stride=2,
+                bias=False if norm is not None else True,
+            ),
+        ]
+
+        if norm is not None:
+            self.net += [norm(out_channels, affine=True)]
+
+        self.net += [nn.LeakyReLU(0.2, True)]
+        self.net = nn.Sequential(*self.net)
+
+    def forward(self, x):
+        return self.net(x)
+
+
+class UpBlock3D(nn.Module):
+    """A 3D convolutional upsampling block."""
+
+    def __init__(self, in_channels, out_channels, norm=nn.BatchNorm3d):
+        super().__init__()
+
+        self.net = [
+            nn.ConvTranspose3d(
+                in_channels,
+                out_channels,
+                kernel_size=4,
+                stride=2,
+                padding=1,
+                bias=False if norm is not None else True,
+            ),
+        ]
+
+        if norm is not None:
+            self.net += [norm(out_channels, affine=True)]
+
+        self.net += [nn.ReLU(True)]
+        self.net = nn.Sequential(*self.net)
+
+    def forward(self, x, skipped=None):
+        if skipped is not None:
+            input = torch.cat([skipped, x], dim=1)
+        else:
+            input = x
+        return self.net(input)
+
+
+class Conv3dSame(torch.nn.Module):
+    """3D convolution that pads to keep spatial dimensions equal.
+    Cannot deal with stride. Only quadratic kernels (=scalar kernel_size).
+    """
+
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        kernel_size,
+        bias=True,
+        padding_layer=nn.ReplicationPad3d,
+    ):
+        """
+        :param in_channels: Number of input channels
+        :param out_channels: Number of output channels
+        :param kernel_size: Scalar. Spatial dimensions of kernel (only quadratic kernels supported).
+        :param bias: Whether or not to use bias.
+        :param padding_layer: Which padding to use. Default is reflection padding.
+        """
+        super().__init__()
+        ka = kernel_size // 2
+        kb = ka - 1 if kernel_size % 2 == 0 else ka
+        self.net = nn.Sequential(
+            padding_layer((ka, kb, ka, kb, ka, kb)),
+            nn.Conv3d(in_channels, out_channels, kernel_size, bias=bias, stride=1),
+        )
+
+    def forward(self, x):
+        return self.net(x)
+
+
+class Conv2dSame(torch.nn.Module):
+    """2D convolution that pads to keep spatial dimensions equal.
+    Cannot deal with stride. Only quadratic kernels (=scalar kernel_size).
+    """
+
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        kernel_size,
+        bias=True,
+        padding_layer=nn.ReflectionPad2d,
+    ):
+        """
+        :param in_channels: Number of input channels
+        :param out_channels: Number of output channels
+        :param kernel_size: Scalar. Spatial dimensions of kernel (only quadratic kernels supported).
+        :param bias: Whether or not to use bias.
+        :param padding_layer: Which padding to use. Default is reflection padding.
+        """
+        super().__init__()
+        ka = kernel_size // 2
+        kb = ka - 1 if kernel_size % 2 == 0 else ka
+        self.net = nn.Sequential(
+            padding_layer((ka, kb, ka, kb)),
+            nn.Conv2d(in_channels, out_channels, kernel_size, bias=bias, stride=1),
+        )
+
+        self.weight = self.net[1].weight
+        self.bias = self.net[1].bias
+
+    def forward(self, x):
+        return self.net(x)
+
+
+class UpBlock(nn.Module):
+    """A 2d-conv upsampling block with a variety of options for upsampling, and following best practices / with
+    reasonable defaults. (LeakyReLU, kernel size multiple of stride)
+    """
+
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        post_conv=True,
+        use_dropout=False,
+        dropout_prob=0.1,
+        norm=nn.BatchNorm2d,
+        upsampling_mode="transpose",
+    ):
+        """
+        :param in_channels: Number of input channels
+        :param out_channels: Number of output channels
+        :param post_conv: Whether to have another convolutional layer after the upsampling layer.
+        :param use_dropout: bool. Whether to use dropout or not.
+        :param dropout_prob: Float. The dropout probability (if use_dropout is True)
+        :param norm: Which norm to use. If None, no norm is used. Default is Batchnorm with affinity.
+        :param upsampling_mode: Which upsampling mode:
+                transpose: Upsampling with stride-2, kernel size 4 transpose convolutions.
+                bilinear: Feature map is upsampled with bilinear upsampling, then a conv layer.
+                nearest: Feature map is upsampled with nearest neighbor upsampling, then a conv layer.
+                shuffle: Feature map is upsampled with pixel shuffling, then a conv layer.
+        """
+        super().__init__()
+
+        net = list()
+
+        if upsampling_mode == "transpose":
+            net += [
+                nn.ConvTranspose2d(
+                    in_channels,
+                    out_channels,
+                    kernel_size=4,
+                    stride=2,
+                    padding=1,
+                    bias=True if norm is None else False,
+                )
+            ]
+        elif upsampling_mode == "bilinear":
+            net += [nn.UpsamplingBilinear2d(scale_factor=2)]
+            net += [
+                Conv2dSame(
+                    in_channels,
+                    out_channels,
+                    kernel_size=3,
+                    bias=True if norm is None else False,
+                )
+            ]
+        elif upsampling_mode == "nearest":
+            net += [nn.UpsamplingNearest2d(scale_factor=2)]
+            net += [
+                Conv2dSame(
+                    in_channels,
+                    out_channels,
+                    kernel_size=3,
+                    bias=True if norm is None else False,
+                )
+            ]
+        elif upsampling_mode == "shuffle":
+            net += [nn.PixelShuffle(upscale_factor=2)]
+            net += [
+                Conv2dSame(
+                    in_channels // 4,
+                    out_channels,
+                    kernel_size=3,
+                    bias=True if norm is None else False,
+                )
+            ]
+        else:
+            raise ValueError("Unknown upsampling mode!")
+
+        if norm is not None:
+            net += [norm(out_channels, affine=True)]
+
+        net += [nn.ReLU(True)]
+
+        if use_dropout:
+            net += [nn.Dropout2d(dropout_prob, False)]
+
+        if post_conv:
+            net += [
+                Conv2dSame(
+                    out_channels,
+                    out_channels,
+                    kernel_size=3,
+                    bias=True if norm is None else False,
+                )
+            ]
+
+            if norm is not None:
+                net += [norm(out_channels, affine=True)]
+
+            net += [nn.ReLU(True)]
+
+            if use_dropout:
+                net += [nn.Dropout2d(0.1, False)]
+
+        self.net = nn.Sequential(*net)
+
+    def forward(self, x, skipped=None):
+        if skipped is not None:
+            input = torch.cat([skipped, x], dim=1)
+        else:
+            input = x
+        return self.net(input)
+
+
+class DownBlock(nn.Module):
+    """A 2D-conv downsampling block following best practices / with reasonable defaults
+    (LeakyReLU, kernel size multiple of stride)
+    """
+
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        prep_conv=True,
+        middle_channels=None,
+        use_dropout=False,
+        dropout_prob=0.1,
+        norm=nn.BatchNorm2d,
+    ):
+        """
+        :param in_channels: Number of input channels
+        :param out_channels: Number of output channels
+        :param prep_conv: Whether to have another convolutional layer before the downsampling layer.
+        :param middle_channels: If prep_conv is true, this sets the number of channels between the prep and downsampling
+                                convs.
+        :param use_dropout: bool. Whether to use dropout or not.
+        :param dropout_prob: Float. The dropout probability (if use_dropout is True)
+        :param norm: Which norm to use. If None, no norm is used. Default is Batchnorm with affinity.
+        """
+        super().__init__()
+
+        if middle_channels is None:
+            middle_channels = in_channels
+
+        net = list()
+
+        if prep_conv:
+            net += [
+                nn.ReflectionPad2d(1),
+                nn.Conv2d(
+                    in_channels,
+                    middle_channels,
+                    kernel_size=3,
+                    padding=0,
+                    stride=1,
+                    bias=True if norm is None else False,
+                ),
+            ]
+
+            if norm is not None:
+                net += [norm(middle_channels, affine=True)]
+
+            net += [nn.LeakyReLU(0.2, True)]
+
+            if use_dropout:
+                net += [nn.Dropout2d(dropout_prob, False)]
+
+        net += [
+            nn.ReflectionPad2d(1),
+            nn.Conv2d(
+                middle_channels,
+                out_channels,
+                kernel_size=4,
+                padding=0,
+                stride=2,
+                bias=True if norm is None else False,
+            ),
+        ]
+
+        if norm is not None:
+            net += [norm(out_channels, affine=True)]
+
+        net += [nn.LeakyReLU(0.2, True)]
+
+        if use_dropout:
+            net += [nn.Dropout2d(dropout_prob, False)]
+
+        self.net = nn.Sequential(*net)
+
+    def forward(self, x):
+        return self.net(x)
+
+
+class Unet3d(nn.Module):
+    """A 3d-Unet implementation with sane defaults."""
+
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        nf0,
+        num_down,
+        max_channels,
+        norm=nn.BatchNorm3d,
+        outermost_linear=False,
+    ):
+        """
+        :param in_channels: Number of input channels
+        :param out_channels: Number of output channels
+        :param nf0: Number of features at highest level of U-Net
+        :param num_down: Number of downsampling stages.
+        :param max_channels: Maximum number of channels (channels multiply by 2 with every downsampling stage)
+        :param norm: Which norm to use. If None, no norm is used. Default is Batchnorm with affinity.
+        :param outermost_linear: Whether the output layer should be a linear layer or a nonlinear one.
+        """
+        super().__init__()
+
+        assert num_down > 0, "Need at least one downsampling layer in UNet3d."
+
+        # Define the in block
+        self.in_layer = [Conv3dSame(in_channels, nf0, kernel_size=3, bias=False)]
+
+        if norm is not None:
+            self.in_layer += [norm(nf0, affine=True)]
+
+        self.in_layer += [nn.LeakyReLU(0.2, True)]
+        self.in_layer = nn.Sequential(*self.in_layer)
+
+        # Define the center UNet block. The feature map has height and width 1 --> no batchnorm.
+        self.unet_block = UnetSkipConnectionBlock3d(
+            int(min(2 ** (num_down - 1) * nf0, max_channels)),
+            int(min(2 ** (num_down - 1) * nf0, max_channels)),
+            norm=None,
+        )
+        for i in list(range(0, num_down - 1))[::-1]:
+            self.unet_block = UnetSkipConnectionBlock3d(
+                int(min(2 ** i * nf0, max_channels)),
+                int(min(2 ** (i + 1) * nf0, max_channels)),
+                submodule=self.unet_block,
+                norm=norm,
+            )
+
+        # Define the out layer. Each unet block concatenates its inputs with its outputs - so the output layer
+        # automatically receives the output of the in_layer and the output of the last unet layer.
+        self.out_layer = [
+            Conv3dSame(2 * nf0, out_channels, kernel_size=3, bias=outermost_linear)
+        ]
+
+        if not outermost_linear:
+            if norm is not None:
+                self.out_layer += [norm(out_channels, affine=True)]
+            self.out_layer += [nn.ReLU(True)]
+        self.out_layer = nn.Sequential(*self.out_layer)
+
+    def forward(self, x):
+        in_layer = self.in_layer(x)
+        unet = self.unet_block(in_layer)
+        out_layer = self.out_layer(unet)
+        return out_layer
+
+
+class UnetSkipConnectionBlock3d(nn.Module):
+    """Helper class for building a 3D unet."""
+
+    def __init__(self, outer_nc, inner_nc, norm=nn.BatchNorm3d, submodule=None):
+        super().__init__()
+
+        if submodule is None:
+            model = [
+                DownBlock3D(outer_nc, inner_nc, norm=norm),
+                UpBlock3D(inner_nc, outer_nc, norm=norm),
+            ]
+        else:
+            model = [
+                DownBlock3D(outer_nc, inner_nc, norm=norm),
+                submodule,
+                UpBlock3D(2 * inner_nc, outer_nc, norm=norm),
+            ]
+
+        self.model = nn.Sequential(*model)
+
+    def forward(self, x):
+        forward_passed = self.model(x)
+        return torch.cat([x, forward_passed], 1)
+
+
+class UnetSkipConnectionBlock(nn.Module):
+    """Helper class for building a 2D unet."""
+
+    def __init__(
+        self,
+        outer_nc,
+        inner_nc,
+        upsampling_mode,
+        norm=nn.BatchNorm2d,
+        submodule=None,
+        use_dropout=False,
+        dropout_prob=0.1,
+    ):
+        super().__init__()
+
+        if submodule is None:
+            model = [
+                DownBlock(
+                    outer_nc,
+                    inner_nc,
+                    use_dropout=use_dropout,
+                    dropout_prob=dropout_prob,
+                    norm=norm,
+                ),
+                UpBlock(
+                    inner_nc,
+                    outer_nc,
+                    use_dropout=use_dropout,
+                    dropout_prob=dropout_prob,
+                    norm=norm,
+                    upsampling_mode=upsampling_mode,
+                ),
+            ]
+        else:
+            model = [
+                DownBlock(
+                    outer_nc,
+                    inner_nc,
+                    use_dropout=use_dropout,
+                    dropout_prob=dropout_prob,
+                    norm=norm,
+                ),
+                submodule,
+                UpBlock(
+                    2 * inner_nc,
+                    outer_nc,
+                    use_dropout=use_dropout,
+                    dropout_prob=dropout_prob,
+                    norm=norm,
+                    upsampling_mode=upsampling_mode,
+                ),
+            ]
+
+        self.model = nn.Sequential(*model)
+
+    def forward(self, x):
+        forward_passed = self.model(x)
+        return torch.cat([x, forward_passed], 1)
+
+
+class Unet(nn.Module):
+    """A 2d-Unet implementation with sane defaults."""
+
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        nf0,
+        num_down,
+        max_channels,
+        use_dropout,
+        upsampling_mode="transpose",
+        dropout_prob=0.1,
+        norm=nn.BatchNorm2d,
+        outermost_linear=False,
+    ):
+        """
+        :param in_channels: Number of input channels
+        :param out_channels: Number of output channels
+        :param nf0: Number of features at highest level of U-Net
+        :param num_down: Number of downsampling stages.
+        :param max_channels: Maximum number of channels (channels multiply by 2 with every downsampling stage)
+        :param use_dropout: Whether to use dropout or no.
+        :param dropout_prob: Dropout probability if use_dropout=True.
+        :param upsampling_mode: Which type of upsampling should be used. See "UpBlock" for documentation.
+        :param norm: Which norm to use. If None, no norm is used. Default is Batchnorm with affinity.
+        :param outermost_linear: Whether the output layer should be a linear layer or a nonlinear one.
+        """
+        super().__init__()
+
+        assert num_down > 0, "Need at least one downsampling layer in UNet."
+
+        # Define the in block
+        self.in_layer = [
+            Conv2dSame(
+                in_channels, nf0, kernel_size=3, bias=True if norm is None else False
+            )
+        ]
+        if norm is not None:
+            self.in_layer += [norm(nf0, affine=True)]
+        self.in_layer += [nn.LeakyReLU(0.2, True)]
+
+        if use_dropout:
+            self.in_layer += [nn.Dropout2d(dropout_prob)]
+        self.in_layer = nn.Sequential(*self.in_layer)
+
+        # Define the center UNet block
+        self.unet_block = UnetSkipConnectionBlock(
+            min(2 ** (num_down - 1) * nf0, max_channels),
+            min(2 ** (num_down - 1) * nf0, max_channels),
+            use_dropout=use_dropout,
+            dropout_prob=dropout_prob,
+            norm=None,  # Innermost has no norm (spatial dimension 1)
+            upsampling_mode=upsampling_mode,
+        )
+
+        for i in list(range(0, num_down - 1))[::-1]:
+            self.unet_block = UnetSkipConnectionBlock(
+                min(2 ** i * nf0, max_channels),
+                min(2 ** (i + 1) * nf0, max_channels),
+                use_dropout=use_dropout,
+                dropout_prob=dropout_prob,
+                submodule=self.unet_block,
+                norm=norm,
+                upsampling_mode=upsampling_mode,
+            )
+
+        # Define the out layer. Each unet block concatenates its inputs with its outputs - so the output layer
+        # automatically receives the output of the in_layer and the output of the last unet layer.
+        self.out_layer = [
+            Conv2dSame(
+                2 * nf0,
+                out_channels,
+                kernel_size=3,
+                bias=outermost_linear or (norm is None),
+            )
+        ]
+
+        if not outermost_linear:
+            if norm is not None:
+                self.out_layer += [norm(out_channels, affine=True)]
+            self.out_layer += [nn.ReLU(True)]
+
+            if use_dropout:
+                self.out_layer += [nn.Dropout2d(dropout_prob)]
+        self.out_layer = nn.Sequential(*self.out_layer)
+
+        self.out_layer_weight = self.out_layer[0].weight
+
+    def forward(self, x):
+        in_layer = self.in_layer(x)
+        unet = self.unet_block(in_layer)
+        out_layer = self.out_layer(unet)
+        return out_layer
+
+
+class Identity(nn.Module):
+    """Helper module to allow Downsampling and Upsampling nets to default to identity if they receive an empty list."""
+
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, input):
+        return input
+
+
+class DownsamplingNet(nn.Module):
+    """A subnetwork that downsamples a 2D feature map with strided convolutions."""
+
+    def __init__(
+        self,
+        per_layer_out_ch,
+        in_channels,
+        use_dropout,
+        dropout_prob=0.1,
+        last_layer_one=False,
+        norm=nn.BatchNorm2d,
+    ):
+        """
+        :param per_layer_out_ch: python list of integers. Defines the number of output channels per layer. Length of
+                                list defines number of downsampling steps (each step dowsamples by factor of 2.)
+        :param in_channels: Number of input channels.
+        :param use_dropout: Whether or not to use dropout.
+        :param dropout_prob: Dropout probability.
+        :param last_layer_one: Whether the output of the last layer will have a spatial size of 1. In that case,
+                               the last layer will not have batchnorm, else, it will.
+        :param norm: Which norm to use. Defaults to BatchNorm.
+        """
+        super().__init__()
+
+        if not len(per_layer_out_ch):
+            self.downs = Identity()
+        else:
+            self.downs = list()
+            self.downs.append(
+                DownBlock(
+                    in_channels,
+                    per_layer_out_ch[0],
+                    use_dropout=use_dropout,
+                    dropout_prob=dropout_prob,
+                    middle_channels=per_layer_out_ch[0],
+                    norm=norm,
+                )
+            )
+            for i in range(0, len(per_layer_out_ch) - 1):
+                if last_layer_one and (i == len(per_layer_out_ch) - 2):
+                    norm = None
+                self.downs.append(
+                    DownBlock(
+                        per_layer_out_ch[i],
+                        per_layer_out_ch[i + 1],
+                        dropout_prob=dropout_prob,
+                        use_dropout=use_dropout,
+                        norm=norm,
+                    )
+                )
+            self.downs = nn.Sequential(*self.downs)
+
+    def forward(self, input):
+        return self.downs(input)
+
+
+class UpsamplingNet(nn.Module):
+    """A subnetwork that upsamples a 2D feature map with a variety of upsampling options."""
+
+    def __init__(
+        self,
+        per_layer_out_ch,
+        in_channels,
+        upsampling_mode,
+        use_dropout,
+        dropout_prob=0.1,
+        first_layer_one=False,
+        norm=nn.BatchNorm2d,
+    ):
+        """
+        :param per_layer_out_ch: python list of integers. Defines the number of output channels per layer. Length of
+                                list defines number of upsampling steps (each step upsamples by factor of 2.)
+        :param in_channels: Number of input channels.
+        :param upsampling_mode: Mode of upsampling. For documentation, see class "UpBlock"
+        :param use_dropout: Whether or not to use dropout.
+        :param dropout_prob: Dropout probability.
+        :param first_layer_one: Whether the input to the last layer will have a spatial size of 1. In that case,
+                               the first layer will not have a norm, else, it will.
+        :param norm: Which norm to use. Defaults to BatchNorm.
+        """
+        super().__init__()
+
+        if not len(per_layer_out_ch):
+            self.ups = Identity()
+        else:
+            self.ups = list()
+            self.ups.append(
+                UpBlock(
+                    in_channels,
+                    per_layer_out_ch[0],
+                    use_dropout=use_dropout,
+                    dropout_prob=dropout_prob,
+                    norm=None if first_layer_one else norm,
+                    upsampling_mode=upsampling_mode,
+                )
+            )
+            for i in range(0, len(per_layer_out_ch) - 1):
+                self.ups.append(
+                    UpBlock(
+                        per_layer_out_ch[i],
+                        per_layer_out_ch[i + 1],
+                        use_dropout=use_dropout,
+                        dropout_prob=dropout_prob,
+                        norm=norm,
+                        upsampling_mode=upsampling_mode,
+                    )
+                )
+            self.ups = nn.Sequential(*self.ups)
+
+    def forward(self, input):
+        return self.ups(input)
diff --git a/pytorch3d/pytorch3d/implicitron/tools/__init__.py b/pytorch3d/pytorch3d/implicitron/tools/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..2e41cd717f6a439a9c08d76a9d0e4a54e190fc5a
--- /dev/null
+++ b/pytorch3d/pytorch3d/implicitron/tools/__init__.py
@@ -0,0 +1,5 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
diff --git a/pytorch3d/pytorch3d/implicitron/tools/camera_utils.py b/pytorch3d/pytorch3d/implicitron/tools/camera_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..ecf6e9fadbc559574de801af45f81ccf1788742b
--- /dev/null
+++ b/pytorch3d/pytorch3d/implicitron/tools/camera_utils.py
@@ -0,0 +1,142 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+# TODO: all this potentially goes to PyTorch3D
+
+import math
+from typing import Tuple
+
+import pytorch3d as pt3d
+import torch
+from pytorch3d.renderer.cameras import CamerasBase
+
+
+def jitter_extrinsics(
+    R: torch.Tensor,
+    T: torch.Tensor,
+    max_angle: float = (math.pi * 2.0),
+    translation_std: float = 1.0,
+    scale_std: float = 0.3,
+):
+    """
+    Jitter the extrinsic camera parameters `R` and `T` with a random similarity
+    transformation. The transformation rotates by a random angle between [0, max_angle];
+    scales by a random factor exp(N(0, scale_std)), where N(0, scale_std) is
+    a random sample from a normal distrubtion with zero mean and variance scale_std;
+    and translates by a 3D offset sampled from N(0, translation_std).
+    """
+    assert all(x >= 0.0 for x in (max_angle, translation_std, scale_std))
+    N = R.shape[0]
+    R_jit = pt3d.transforms.random_rotations(1, device=R.device)
+    R_jit = pt3d.transforms.so3_exponential_map(
+        pt3d.transforms.so3_log_map(R_jit) * max_angle
+    )
+    T_jit = torch.randn_like(R_jit[:1, :, 0]) * translation_std
+    rigid_transform = pt3d.ops.eyes(dim=4, N=N, device=R.device)
+    rigid_transform[:, :3, :3] = R_jit.expand(N, 3, 3)
+    rigid_transform[:, 3, :3] = T_jit.expand(N, 3)
+    scale_jit = torch.exp(torch.randn_like(T_jit[:, 0]) * scale_std).expand(N)
+    return apply_camera_alignment(R, T, rigid_transform, scale_jit)
+
+
+def apply_camera_alignment(
+    R: torch.Tensor,
+    T: torch.Tensor,
+    rigid_transform: torch.Tensor,
+    scale: torch.Tensor,
+):
+    """
+    Args:
+        R: Camera rotation matrix of shape (N, 3, 3).
+        T: Camera translation  of shape (N, 3).
+        rigid_transform: A tensor of shape (N, 4, 4) representing a batch of
+            N 4x4 tensors that map the scene pointcloud from misaligned coords
+            to the aligned space.
+        scale: A list of N scaling factors. A tensor of shape (N,)
+
+    Returns:
+        R_aligned: The aligned rotations R.
+        T_aligned: The aligned translations T.
+    """
+    R_rigid = rigid_transform[:, :3, :3]
+    T_rigid = rigid_transform[:, 3:, :3]
+    R_aligned = R_rigid.permute(0, 2, 1).bmm(R)
+    T_aligned = scale[:, None] * (T - (T_rigid @ R_aligned)[:, 0])
+    return R_aligned, T_aligned
+
+
+def get_min_max_depth_bounds(cameras, scene_center, scene_extent):
+    """
+    Estimate near/far depth plane as:
+    near = dist(cam_center, self.scene_center) - self.scene_extent
+    far  = dist(cam_center, self.scene_center) + self.scene_extent
+    """
+    cam_center = cameras.get_camera_center()
+    center_dist = (
+        ((cam_center - scene_center.to(cameras.R)[None]) ** 2)
+        .sum(dim=-1)
+        .clamp(0.001)
+        .sqrt()
+    )
+    center_dist = center_dist.clamp(scene_extent + 1e-3)
+    min_depth = center_dist - scene_extent
+    max_depth = center_dist + scene_extent
+    return min_depth, max_depth
+
+
+def volumetric_camera_overlaps(
+    cameras: CamerasBase,
+    scene_extent: float = 8.0,
+    scene_center: Tuple[float, float, float] = (0.0, 0.0, 0.0),
+    resol: int = 16,
+    weigh_by_ray_angle: bool = True,
+):
+    """
+    Compute the overlaps between viewing frustrums of all pairs of cameras
+    in `cameras`.
+    """
+    device = cameras.device
+    ba = cameras.R.shape[0]
+    n_vox = int(resol**3)
+    grid = pt3d.structures.Volumes(
+        densities=torch.zeros([1, 1, resol, resol, resol], device=device),
+        volume_translation=-torch.FloatTensor(scene_center)[None].to(device),
+        voxel_size=2.0 * scene_extent / resol,
+    ).get_coord_grid(world_coordinates=True)
+
+    grid = grid.view(1, n_vox, 3).expand(ba, n_vox, 3)
+    gridp = cameras.transform_points(grid, eps=1e-2)
+    proj_in_camera = (
+        torch.prod((gridp[..., :2].abs() <= 1.0), dim=-1)
+        * (gridp[..., 2] > 0.0).float()
+    )  # ba x n_vox
+
+    if weigh_by_ray_angle:
+        rays = torch.nn.functional.normalize(
+            grid - cameras.get_camera_center()[:, None], dim=-1
+        )
+        rays_masked = rays * proj_in_camera[..., None]
+
+        # - slow and readable:
+        # inter = torch.zeros(ba, ba)
+        # for i1 in range(ba):
+        #     for i2 in range(ba):
+        #         inter[i1, i2] = (
+        #             1 + (rays_masked[i1] * rays_masked[i2]
+        #         ).sum(dim=-1)).sum()
+
+        # - fast:
+        rays_masked = rays_masked.view(ba, n_vox * 3)
+        inter = n_vox + (rays_masked @ rays_masked.t())
+
+    else:
+        inter = proj_in_camera @ proj_in_camera.t()
+
+    mass = torch.diag(inter)
+    iou = inter / (mass[:, None] + mass[None, :] - inter).clamp(0.1)
+
+    return iou
diff --git a/pytorch3d/pytorch3d/implicitron/tools/circle_fitting.py b/pytorch3d/pytorch3d/implicitron/tools/circle_fitting.py
new file mode 100644
index 0000000000000000000000000000000000000000..2f50278933a690e0e1f4bdcff1e33408b70bc4d1
--- /dev/null
+++ b/pytorch3d/pytorch3d/implicitron/tools/circle_fitting.py
@@ -0,0 +1,238 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import warnings
+from dataclasses import dataclass
+from math import pi
+from typing import Optional
+
+import torch
+
+
+def get_rotation_to_best_fit_xy(
+    points: torch.Tensor, centroid: Optional[torch.Tensor] = None
+) -> torch.Tensor:
+    """
+    Returns a rotation R such that `points @ R` has a best fit plane
+    parallel to the xy plane
+
+    Args:
+        points: (*, N, 3) tensor of points in 3D
+        centroid: (*, 1, 3), (3,) or scalar: their centroid
+
+    Returns:
+        (*, 3, 3) tensor rotation matrix
+    """
+    if centroid is None:
+        centroid = points.mean(dim=-2, keepdim=True)
+
+    points_centered = points - centroid
+    _, evec = torch.linalg.eigh(points_centered.transpose(-1, -2) @ points_centered)
+    # in general, evec can form either right- or left-handed basis,
+    # but we need the former to have a proper rotation (not reflection)
+    return torch.cat(
+        (evec[..., 1:], torch.cross(evec[..., 1], evec[..., 2])[..., None]), dim=-1
+    )
+
+
+def _signed_area(path: torch.Tensor) -> torch.Tensor:
+    """
+    Calculates the signed area / Lévy area of a 2D path. If the path is closed,
+    i.e. ends where it starts, this is the integral of the winding number over
+    the whole plane. If not, consider a closed path made by adding a straight
+    line from the end to the start; the signed area is the integral of the
+    winding number (also over the plane) with respect to that closed path.
+
+    If this number is positive, it indicates in some sense that the path
+    turns anticlockwise more than clockwise, and vice versa.
+
+    Args:
+        path: N x 2 tensor of points.
+
+    Returns:
+        signed area, shape ()
+    """
+    # This calculation is a sum of areas of triangles of the form
+    # (path[0], path[i], path[i+1]), where each triangle is half a
+    # parallelogram.
+    x, y = (path[1:] - path[:1]).unbind(1)
+    return (y[1:] * x[:-1] - x[1:] * y[:-1]).sum() * 0.5
+
+
+@dataclass(frozen=True)
+class Circle2D:
+    """
+    Contains details of a circle in a plane.
+    Members
+        center: tensor shape (2,)
+        radius: tensor shape ()
+        generated_points: points around the circle, shape (n_points, 2)
+    """
+
+    center: torch.Tensor
+    radius: torch.Tensor
+    generated_points: torch.Tensor
+
+
+def fit_circle_in_2d(
+    points2d, *, n_points: int = 0, angles: Optional[torch.Tensor] = None
+) -> Circle2D:
+    """
+    Simple best fitting of a circle to 2D points. In particular, the circle which
+    minimizes the sum of the squares of the squared-distances to the circle.
+
+    Finds (a,b) and r to minimize the sum of squares (over the x,y pairs) of
+        r**2 - [(x-a)**2+(y-b)**2]
+    i.e.
+        (2*a)*x + (2*b)*y + (r**2 - a**2 - b**2)*1 - (x**2 + y**2)
+
+    In addition, generates points along the circle. If angles is None (default)
+    then n_points around the circle equally spaced are given. These begin at the
+    point closest to the first input point. They continue in the direction which
+    seems to match the movement of points in points2d, as judged by its
+    signed area. If `angles` are provided, then n_points is ignored, and points
+    along the circle at the given angles are returned, with the starting point
+    and direction as before.
+
+    (Note that `generated_points` is affected by the order of the points in
+    points2d, but the other outputs are not.)
+
+    Args:
+        points2d: N x 2 tensor of 2D points
+        n_points: number of points to generate on the circle, if angles not given
+        angles: optional angles in radians of points to generate.
+
+    Returns:
+        Circle2D object
+    """
+    design = torch.cat([points2d, torch.ones_like(points2d[:, :1])], dim=1)
+    rhs = (points2d**2).sum(1)
+    n_provided = points2d.shape[0]
+    if n_provided < 3:
+        raise ValueError(f"{n_provided} points are not enough to determine a circle")
+    solution = torch.linalg.lstsq(design, rhs[:, None]).solution
+    center = solution[:2, 0] / 2
+    radius = torch.sqrt(solution[2, 0] + (center**2).sum())
+    if n_points > 0:
+        if angles is not None:
+            warnings.warn("n_points ignored because angles provided")
+        else:
+            angles = torch.linspace(0, 2 * pi, n_points, device=points2d.device)
+
+    if angles is not None:
+        initial_direction_xy = (points2d[0] - center).unbind()
+        initial_angle = torch.atan2(initial_direction_xy[1], initial_direction_xy[0])
+        with torch.no_grad():
+            anticlockwise = _signed_area(points2d) > 0
+        if anticlockwise:
+            use_angles = initial_angle + angles
+        else:
+            use_angles = initial_angle - angles
+        generated_points = center[None] + radius * torch.stack(
+            [torch.cos(use_angles), torch.sin(use_angles)], dim=-1
+        )
+    else:
+        generated_points = points2d.new_zeros(0, 2)
+    return Circle2D(center=center, radius=radius, generated_points=generated_points)
+
+
+@dataclass(frozen=True)
+class Circle3D:
+    """
+    Contains details of a circle in 3D.
+    Members
+        center: tensor shape (3,)
+        radius: tensor shape ()
+        normal: tensor shape (3,)
+        generated_points: points around the circle, shape (n_points, 3)
+    """
+
+    center: torch.Tensor
+    radius: torch.Tensor
+    normal: torch.Tensor
+    generated_points: torch.Tensor
+
+
+def fit_circle_in_3d(
+    points,
+    *,
+    n_points: int = 0,
+    angles: Optional[torch.Tensor] = None,
+    offset: Optional[torch.Tensor] = None,
+    up: Optional[torch.Tensor] = None,
+) -> Circle3D:
+    """
+    Simple best fit circle to 3D points. Uses circle_2d in the
+    least-squares best fit plane.
+
+    In addition, generates points along the circle. If angles is None (default)
+    then n_points around the circle equally spaced are given. These begin at the
+    point closest to the first input point. They continue in the direction which
+    seems to be match the movement of points. If angles is provided, then n_points
+    is ignored, and points along the circle at the given angles are returned,
+    with the starting point and direction as before.
+
+    Further, an offset can be given to add to the generated points; this is
+    interpreted in a rotated coordinate system where (0, 0, 1) is normal to the
+    circle, specifically the normal which is approximately in the direction of a
+    given `up` vector. The remaining rotation is disambiguated in an unspecified
+    but deterministic way.
+
+    (Note that `generated_points` is affected by the order of the points in
+    points, but the other outputs are not.)
+
+    Args:
+        points2d: N x 3 tensor of 3D points
+        n_points: number of points to generate on the circle
+        angles: optional angles in radians of points to generate.
+        offset: optional tensor (3,), a displacement expressed in a "canonical"
+                coordinate system to add to the generated points.
+        up: optional tensor (3,), a vector which helps define the
+            "canonical" coordinate system for interpretting `offset`.
+            Required if offset is used.
+
+
+    Returns:
+        Circle3D object
+    """
+    centroid = points.mean(0)
+    r = get_rotation_to_best_fit_xy(points, centroid)
+    normal = r[:, 2]
+    rotated_points = (points - centroid) @ r
+    result_2d = fit_circle_in_2d(
+        rotated_points[:, :2], n_points=n_points, angles=angles
+    )
+    center_3d = result_2d.center @ r[:, :2].t() + centroid
+    n_generated_points = result_2d.generated_points.shape[0]
+    if n_generated_points > 0:
+        generated_points_in_plane = torch.cat(
+            [
+                result_2d.generated_points,
+                torch.zeros_like(result_2d.generated_points[:, :1]),
+            ],
+            dim=1,
+        )
+        if offset is not None:
+            if up is None:
+                raise ValueError("Missing `up` input for interpreting offset")
+            with torch.no_grad():
+                swap = torch.dot(up, normal) < 0
+            if swap:
+                # We need some rotation which takes +z to -z. Here's one.
+                generated_points_in_plane += offset * offset.new_tensor([1, -1, -1])
+            else:
+                generated_points_in_plane += offset
+
+        generated_points = generated_points_in_plane @ r.t() + centroid
+    else:
+        generated_points = points.new_zeros(0, 3)
+
+    return Circle3D(
+        radius=result_2d.radius,
+        center=center_3d,
+        normal=normal,
+        generated_points=generated_points,
+    )
diff --git a/pytorch3d/pytorch3d/implicitron/tools/config.py b/pytorch3d/pytorch3d/implicitron/tools/config.py
new file mode 100644
index 0000000000000000000000000000000000000000..0fb4012e6546242b7dced42a48c57fe52c0fb495
--- /dev/null
+++ b/pytorch3d/pytorch3d/implicitron/tools/config.py
@@ -0,0 +1,1208 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import dataclasses
+import inspect
+import itertools
+import sys
+import warnings
+from collections import Counter, defaultdict
+from enum import Enum
+from functools import partial
+from typing import (
+    Any,
+    Callable,
+    Dict,
+    get_args,
+    get_origin,
+    List,
+    Optional,
+    Tuple,
+    Type,
+    TypeVar,
+    Union,
+)
+
+from omegaconf import DictConfig, OmegaConf, open_dict
+
+
+"""
+This functionality allows a configurable system to be determined in a dataclass-type
+way. It is a generalization of omegaconf's "structured", in the dataclass case.
+Core functionality:
+
+- Configurable -- A base class used to label a class as being one which uses this
+                    system. Uses class members and __post_init__ like a dataclass.
+
+- expand_args_fields -- Expands a class like `dataclasses.dataclass`. Runs automatically.
+
+- get_default_args -- gets an omegaconf.DictConfig for initializing a given class.
+
+- run_auto_creation -- Initialises nested members. To be called in __post_init__.
+
+
+In addition, a Configurable may contain members whose type is decided at runtime.
+
+- ReplaceableBase -- As a base instead of Configurable, labels a class to say that
+                     any child class can be used instead.
+
+- registry -- A global store of named child classes of  ReplaceableBase classes.
+              Used as `@registry.register` decorator on class definition.
+
+
+Additional utility functions:
+
+- remove_unused_components -- used for simplifying a DictConfig instance.
+- get_default_args_field -- default for DictConfig member of another configurable.
+- enable_get_default_args -- Allows get_default_args on a function or plain class.
+
+
+1. The simplest usage of this functionality is as follows. First a schema is defined
+in dataclass style.
+
+    class A(Configurable):
+        n: int = 9
+
+    class B(Configurable):
+        a: A
+
+        def __post_init__(self):
+            run_auto_creation(self)
+
+Then it can be used like
+
+    b_args = get_default_args(B)
+    b = B(**b_args)
+
+In this case, get_default_args(B) returns an omegaconf.DictConfig with the right
+members {"a_args": {"n": 9}}. It also modifies the definitions of the classes to
+something like the following. (The modification itself is done by the function
+`expand_args_fields`, which is called inside `get_default_args`.)
+
+    @dataclasses.dataclass
+    class A:
+        n: int = 9
+
+    @dataclasses.dataclass
+    class B:
+        a_args: DictConfig = dataclasses.field(default_factory=lambda: DictConfig({"n": 9}))
+
+        def __post_init__(self):
+            self.a = A(**self.a_args)
+
+2. Pluggability. Instead of a dataclass-style member being given a concrete class,
+it can be given a base class and the implementation will be looked up by name in the
+global `registry` in this module. E.g.
+
+    class A(ReplaceableBase):
+        k: int = 1
+
+    @registry.register
+    class A1(A):
+        m: int = 3
+
+    @registry.register
+    class A2(A):
+        n: str = "2"
+
+    class B(Configurable):
+        a: A
+        a_class_type: str = "A2"
+        b: Optional[A]
+        b_class_type: Optional[str] = "A2"
+
+        def __post_init__(self):
+            run_auto_creation(self)
+
+will expand to
+
+    @dataclasses.dataclass
+    class A:
+        k: int = 1
+
+    @dataclasses.dataclass
+    class A1(A):
+        m: int = 3
+
+    @dataclasses.dataclass
+    class A2(A):
+        n: str = "2"
+
+    @dataclasses.dataclass
+    class B:
+        a_class_type: str = "A2"
+        a_A1_args: DictConfig = dataclasses.field(
+            default_factory=lambda: DictConfig({"k": 1, "m": 3}
+        )
+        a_A2_args: DictConfig = dataclasses.field(
+            default_factory=lambda: DictConfig({"k": 1, "n": 2}
+        )
+        b_class_type: Optional[str] = "A2"
+        b_A1_args: DictConfig = dataclasses.field(
+            default_factory=lambda: DictConfig({"k": 1, "m": 3}
+        )
+        b_A2_args: DictConfig = dataclasses.field(
+            default_factory=lambda: DictConfig({"k": 1, "n": 2}
+        )
+
+        def __post_init__(self):
+            if self.a_class_type == "A1":
+                self.a = A1(**self.a_A1_args)
+            elif self.a_class_type == "A2":
+                self.a = A2(**self.a_A2_args)
+            else:
+                raise ValueError(...)
+
+            if self.b_class_type is None:
+                self.b = None
+            elif self.b_class_type == "A1":
+                self.b = A1(**self.b_A1_args)
+            elif self.b_class_type == "A2":
+                self.b = A2(**self.b_A2_args)
+            else:
+                raise ValueError(...)
+
+3. Aside from these classes, the members of these classes should be things
+which DictConfig is happy with: e.g. (bool, int, str, None, float) and what
+can be built from them with `DictConfig`s and lists of them.
+
+In addition, you can call `get_default_args` on a function or class to get
+the `DictConfig` of its defaulted arguments, assuming those are all things
+which `DictConfig` is happy with, so long as you add a call to
+`enable_get_default_args` after its definition. If you want to use such a
+thing as the default for a member of another configured class,
+`get_default_args_field` is a helper.
+"""
+
+
+TYPE_SUFFIX: str = "_class_type"
+ARGS_SUFFIX: str = "_args"
+ENABLED_SUFFIX: str = "_enabled"
+CREATE_PREFIX: str = "create_"
+IMPL_SUFFIX: str = "_impl"
+TWEAK_SUFFIX: str = "_tweak_args"
+_DATACLASS_INIT: str = "__dataclass_own_init__"
+PRE_EXPAND_NAME: str = "pre_expand"
+
+
+class ReplaceableBase:
+    """
+    Base class for a class (a "replaceable") which is a base class for
+    dataclass-style implementations. The implementations can be stored
+    in the registry. They get expanded into dataclasses with expand_args_fields.
+    This expansion is delayed.
+    """
+
+    def __new__(cls, *args, **kwargs):
+        """
+        These classes should be expanded only when needed (because processing
+        fixes the list of replaceable subclasses of members of the class). It
+        is safer if users expand the classes explicitly. But if the class gets
+        instantiated when it hasn't been processed, we expand it here.
+        """
+        obj = super().__new__(cls)
+        if cls is not ReplaceableBase and not _is_actually_dataclass(cls):
+            expand_args_fields(cls)
+        return obj
+
+
+class Configurable:
+    """
+    Base class for dataclass-style classes which are not replaceable. These get
+    expanded into a dataclass with expand_args_fields.
+    This expansion is delayed.
+    """
+
+    def __new__(cls, *args, **kwargs):
+        """
+        These classes should be expanded only when needed (because processing
+        fixes the list of replaceable subclasses of members of the class). It
+        is safer if users expand the classes explicitly. But if the class gets
+        instantiated when it hasn't been processed, we expand it here.
+        """
+        obj = super().__new__(cls)
+        if cls is not Configurable and not _is_actually_dataclass(cls):
+            expand_args_fields(cls)
+        return obj
+
+
+_X = TypeVar("X", bound=ReplaceableBase)
+_Y = TypeVar("Y", bound=Union[ReplaceableBase, Configurable])
+
+
+class _Registry:
+    """
+    Register from names to classes. In particular, we say that direct subclasses of
+    ReplaceableBase are "base classes" and we register subclasses of each base class
+    in a separate namespace.
+    """
+
+    def __init__(self) -> None:
+        self._mapping: Dict[
+            Type[ReplaceableBase], Dict[str, Type[ReplaceableBase]]
+        ] = defaultdict(dict)
+
+    def register(self, some_class: Type[_X]) -> Type[_X]:
+        """
+        A class decorator, to register a class in self.
+        """
+        name = some_class.__name__
+        self._register(some_class, name=name)
+        return some_class
+
+    def _register(
+        self,
+        some_class: Type[ReplaceableBase],
+        *,
+        base_class: Optional[Type[ReplaceableBase]] = None,
+        name: str,
+    ) -> None:
+        """
+        Register a new member.
+
+        Args:
+            cls: the new member
+            base_class: (optional) what the new member is a type for
+            name: name for the new member
+        """
+        if base_class is None:
+            base_class = self._base_class_from_class(some_class)
+            if base_class is None:
+                raise ValueError(
+                    f"Cannot register {some_class}. Cannot tell what it is."
+                )
+        self._mapping[base_class][name] = some_class
+
+    def get(self, base_class_wanted: Type[_X], name: str) -> Type[_X]:
+        """
+        Retrieve a class from the registry by name
+
+        Args:
+            base_class_wanted: parent type of type we are looking for.
+                        It determines the namespace.
+                        This will typically be a direct subclass of ReplaceableBase.
+            name: what to look for
+
+        Returns:
+            class type
+        """
+        if self._is_base_class(base_class_wanted):
+            base_class = base_class_wanted
+        else:
+            base_class = self._base_class_from_class(base_class_wanted)
+            if base_class is None:
+                raise ValueError(
+                    f"Cannot look up {base_class_wanted}. Cannot tell what it is."
+                )
+        if not isinstance(name, str):
+            raise ValueError(
+                f"Cannot look up a {type(name)} in the registry. Got {name}."
+            )
+        result = self._mapping[base_class].get(name)
+        if result is None:
+            raise ValueError(f"{name} has not been registered.")
+        if not issubclass(result, base_class_wanted):
+            raise ValueError(
+                f"{name} resolves to {result} which does not subclass {base_class_wanted}"
+            )
+        # pyre-ignore[7]
+        return result
+
+    def get_all(
+        self, base_class_wanted: Type[ReplaceableBase]
+    ) -> List[Type[ReplaceableBase]]:
+        """
+        Retrieve all registered implementations from the registry
+
+        Args:
+            base_class_wanted: parent type of type we are looking for.
+                        It determines the namespace.
+                        This will typically be a direct subclass of ReplaceableBase.
+        Returns:
+            list of class types in alphabetical order of registered name.
+        """
+        if self._is_base_class(base_class_wanted):
+            source = self._mapping[base_class_wanted]
+            return [source[key] for key in sorted(source)]
+
+        base_class = self._base_class_from_class(base_class_wanted)
+        if base_class is None:
+            raise ValueError(
+                f"Cannot look up {base_class_wanted}. Cannot tell what it is."
+            )
+        source = self._mapping[base_class]
+        return [
+            source[key]
+            for key in sorted(source)
+            if issubclass(source[key], base_class_wanted)
+            and source[key] is not base_class_wanted
+        ]
+
+    @staticmethod
+    def _is_base_class(some_class: Type[ReplaceableBase]) -> bool:
+        """
+        Return whether the given type is a direct subclass of ReplaceableBase
+        and so gets used as a namespace.
+        """
+        return ReplaceableBase in some_class.__bases__
+
+    @staticmethod
+    def _base_class_from_class(
+        some_class: Type[ReplaceableBase],
+    ) -> Optional[Type[ReplaceableBase]]:
+        """
+        Find the parent class of some_class which inherits ReplaceableBase, or None
+        """
+        for base in some_class.mro()[-3::-1]:
+            if base is not ReplaceableBase and issubclass(base, ReplaceableBase):
+                return base
+        return None
+
+
+# Global instance of the registry
+registry = _Registry()
+
+
+class _ProcessType(Enum):
+    """
+    Type of member which gets rewritten by expand_args_fields.
+    """
+
+    CONFIGURABLE = 1
+    REPLACEABLE = 2
+    OPTIONAL_CONFIGURABLE = 3
+    OPTIONAL_REPLACEABLE = 4
+
+
+def _default_create(
+    name: str, type_: Type, process_type: _ProcessType
+) -> Callable[[Any], None]:
+    """
+    Return the default creation function for a member. This is a function which
+    could be called in __post_init__ to initialise the member, and will be called
+    from run_auto_creation.
+
+    Args:
+        name: name of the member
+        type_: type of the member (with any Optional removed)
+        process_type: Shows whether member's declared type inherits ReplaceableBase,
+                    in which case the actual type to be created is decided at
+                    runtime.
+
+    Returns:
+        Function taking one argument, the object whose member should be
+            initialized, i.e. self.
+    """
+    impl_name = f"{CREATE_PREFIX}{name}{IMPL_SUFFIX}"
+
+    def inner(self):
+        expand_args_fields(type_)
+        impl = getattr(self, impl_name)
+        args = getattr(self, name + ARGS_SUFFIX)
+        impl(True, args)
+
+    def inner_optional(self):
+        expand_args_fields(type_)
+        impl = getattr(self, impl_name)
+        enabled = getattr(self, name + ENABLED_SUFFIX)
+        args = getattr(self, name + ARGS_SUFFIX)
+        impl(enabled, args)
+
+    def inner_pluggable(self):
+        type_name = getattr(self, name + TYPE_SUFFIX)
+        impl = getattr(self, impl_name)
+        if type_name is None:
+            args = None
+        else:
+            args = getattr(self, f"{name}_{type_name}{ARGS_SUFFIX}", None)
+        impl(type_name, args)
+
+    if process_type == _ProcessType.OPTIONAL_CONFIGURABLE:
+        return inner_optional
+    return inner if process_type == _ProcessType.CONFIGURABLE else inner_pluggable
+
+
+def _default_create_impl(
+    name: str, type_: Type, process_type: _ProcessType
+) -> Callable[[Any, Any, DictConfig], None]:
+    """
+    Return the default internal function for initialising a member. This is a function
+    which could be called in the create_ function to initialise the member.
+
+    Args:
+        name: name of the member
+        type_: type of the member (with any Optional removed)
+        process_type: Shows whether member's declared type inherits ReplaceableBase,
+                    in which case the actual type to be created is decided at
+                    runtime.
+
+    Returns:
+        Function taking
+            - self, the object whose member should be initialized.
+            - option for what to do. This is
+                - for pluggables, the type to initialise or None to do nothing
+                - for non pluggables, a bool indicating whether to initialise.
+            - the args for initializing the member.
+    """
+
+    def create_configurable(self, enabled, args):
+        if enabled:
+            expand_args_fields(type_)
+            setattr(self, name, type_(**args))
+        else:
+            setattr(self, name, None)
+
+    def create_pluggable(self, type_name, args):
+        if type_name is None:
+            setattr(self, name, None)
+            return
+
+        if not isinstance(type_name, str):
+            raise ValueError(
+                f"A {type(type_name)} was received as the type of {name}."
+                + f" Perhaps this is from {name}{TYPE_SUFFIX}?"
+            )
+        chosen_class = registry.get(type_, type_name)
+        if self._known_implementations.get(type_name, chosen_class) is not chosen_class:
+            # If this warning is raised, it means that a new definition of
+            # the chosen class has been registered since our class was processed
+            # (i.e. expanded). A DictConfig which comes from our get_default_args
+            # (which might have triggered the processing) will contain the old default
+            # values for the members of the chosen class. Changes to those defaults which
+            # were made in the redefinition will not be reflected here.
+            warnings.warn(f"New implementation of {type_name} is being chosen.")
+        expand_args_fields(chosen_class)
+        setattr(self, name, chosen_class(**args))
+
+    if process_type in (_ProcessType.CONFIGURABLE, _ProcessType.OPTIONAL_CONFIGURABLE):
+        return create_configurable
+    return create_pluggable
+
+
+def run_auto_creation(self: Any) -> None:
+    """
+    Run all the functions named in self._creation_functions.
+    """
+    for create_function in self._creation_functions:
+        getattr(self, create_function)()
+
+
+def _is_configurable_class(C) -> bool:
+    return isinstance(C, type) and issubclass(C, (Configurable, ReplaceableBase))
+
+
+def get_default_args(C, *, _do_not_process: Tuple[type, ...] = ()) -> DictConfig:
+    """
+    Get the DictConfig corresponding to the defaults in a dataclass or
+    configurable. Normal use is to provide a dataclass can be provided as C.
+    If enable_get_default_args has been called on a function or plain class,
+    then that function or class can be provided as C.
+
+    If C is a subclass of Configurable or ReplaceableBase, we make sure
+    it has been processed with expand_args_fields.
+
+    Args:
+        C: the class or function to be processed
+        _do_not_process: (internal use) When this function is called from
+                    expand_args_fields, we specify any class currently being
+                    processed, to make sure we don't try to process a class
+                    while it is already being processed.
+
+    Returns:
+        new DictConfig object, which is typed.
+    """
+    if C is None:
+        return DictConfig({})
+
+    if _is_configurable_class(C):
+        if C in _do_not_process:
+            raise ValueError(
+                f"Internal recursion error. Need processed {C},"
+                f" but cannot get it. _do_not_process={_do_not_process}"
+            )
+        # This is safe to run multiple times. It will return
+        # straight away if C has already been processed.
+        expand_args_fields(C, _do_not_process=_do_not_process)
+
+    if dataclasses.is_dataclass(C):
+        # Note that if get_default_args_field is used somewhere in C,
+        # this call is recursive. No special care is needed,
+        # because in practice get_default_args_field is used for
+        # separate types than the outer type.
+
+        try:
+            out: DictConfig = OmegaConf.structured(C)
+        except Exception:
+            print(f"### OmegaConf.structured({C}) failed ###")
+            # We don't use `raise From` here, because that gets the original
+            # exception hidden by the OC_CAUSE logic in the case where we are
+            # called by hydra.
+            raise
+        exclude = getattr(C, "_processed_members", ())
+        with open_dict(out):
+            for field in exclude:
+                out.pop(field, None)
+        return out
+
+    if _is_configurable_class(C):
+        raise ValueError(f"Failed to process {C}")
+
+    if not inspect.isfunction(C) and not inspect.isclass(C):
+        raise ValueError(f"Unexpected {C}")
+
+    dataclass_name = _dataclass_name_for_function(C)
+    dataclass = getattr(sys.modules[C.__module__], dataclass_name, None)
+    if dataclass is None:
+        raise ValueError(
+            f"Cannot get args for {C}. Was enable_get_default_args forgotten?"
+        )
+
+    try:
+        out: DictConfig = OmegaConf.structured(dataclass)
+    except Exception:
+        print(f"### OmegaConf.structured failed for {C.__name__} ###")
+        raise
+    return out
+
+
+def _dataclass_name_for_function(C: Any) -> str:
+    """
+    Returns the name of the dataclass which enable_get_default_args(C)
+    creates.
+    """
+    name = f"_{C.__name__}_default_args_"
+    return name
+
+
+def _field_annotations_for_default_args(
+    C: Any,
+) -> List[Tuple[str, Any, dataclasses.Field]]:
+    """
+    If C is a function or a plain class with an __init__ function,
+    return the fields which `enable_get_default_args(C)` will need
+    to make a dataclass with.
+
+    Args:
+        C: a function, or a class with an __init__ function. Must
+            have types for all its defaulted args.
+
+    Returns:
+        a list of fields for a dataclass.
+    """
+
+    field_annotations = []
+    for pname, defval in _params_iter(C):
+        default = defval.default
+        if default == inspect.Parameter.empty:
+            # we do not have a default value for the parameter
+            continue
+
+        if defval.annotation == inspect._empty:
+            raise ValueError(
+                "All arguments of the input to enable_get_default_args have to"
+                f" be typed. Argument '{pname}' does not have a type annotation."
+            )
+
+        _, annotation = _resolve_optional(defval.annotation)
+
+        if isinstance(default, set):  # force OmegaConf to convert it to ListConfig
+            default = tuple(default)
+
+        if isinstance(default, (list, dict)):
+            # OmegaConf will convert to [Dict|List]Config, so it is safe to reuse the value
+            field_ = dataclasses.field(default_factory=lambda default=default: default)
+        elif not _is_immutable_type(annotation, default):
+            continue
+        else:
+            # we can use a simple default argument for dataclass.field
+            field_ = dataclasses.field(default=default)
+        field_annotations.append((pname, defval.annotation, field_))
+
+    return field_annotations
+
+
+def enable_get_default_args(C: Any, *, overwrite: bool = True) -> None:
+    """
+    If C is a function or a plain class with an __init__ function,
+    and you want get_default_args(C) to work, then add
+    `enable_get_default_args(C)` straight after the definition of C.
+    This makes a dataclass corresponding to the default arguments of C
+    and stores it in the same module as C.
+
+    Args:
+        C: a function, or a class with an __init__ function. Must
+            have types for all its defaulted args.
+        overwrite: whether to allow calling this a second time on
+            the same function.
+    """
+    if not inspect.isfunction(C) and not inspect.isclass(C):
+        raise ValueError(f"Unexpected {C}")
+
+    field_annotations = _field_annotations_for_default_args(C)
+
+    name = _dataclass_name_for_function(C)
+    module = sys.modules[C.__module__]
+    if hasattr(module, name):
+        if overwrite:
+            warnings.warn(f"Overwriting {name} in {C.__module__}.")
+        else:
+            raise ValueError(f"Cannot overwrite {name} in {C.__module__}.")
+    dc = dataclasses.make_dataclass(name, field_annotations)
+    dc.__module__ = C.__module__
+    setattr(module, name, dc)
+
+
+def _params_iter(C):
+    """Returns dict of keyword args of a class or function C."""
+    if inspect.isclass(C):
+        return itertools.islice(  # exclude `self`
+            inspect.signature(C.__init__).parameters.items(), 1, None
+        )
+
+    return inspect.signature(C).parameters.items()
+
+
+def _is_immutable_type(type_: Type, val: Any) -> bool:
+    if val is None:
+        return True
+
+    PRIMITIVE_TYPES = (int, float, bool, str, bytes, tuple)
+    # sometimes type can be too relaxed (e.g. Any), so we also check values
+    if isinstance(val, PRIMITIVE_TYPES):
+        return True
+
+    return type_ in PRIMITIVE_TYPES or (
+        inspect.isclass(type_) and issubclass(type_, Enum)
+    )
+
+
+# copied from OmegaConf
+def _resolve_optional(type_: Any) -> Tuple[bool, Any]:
+    """Check whether `type_` is equivalent to `typing.Optional[T]` for some T."""
+    if get_origin(type_) is Union:
+        args = get_args(type_)
+        if len(args) == 2 and args[1] == type(None):  # noqa E721
+            return True, args[0]
+    if type_ is Any:
+        return True, Any
+
+    return False, type_
+
+
+def _is_actually_dataclass(some_class) -> bool:
+    # Return whether the class some_class has been processed with
+    # the dataclass annotation. This is more specific than
+    # dataclasses.is_dataclass which returns True on anything
+    # deriving from a dataclass.
+
+    # Checking for __init__ would also work for our purpose.
+    return "__dataclass_fields__" in some_class.__dict__
+
+
+def expand_args_fields(
+    some_class: Type[_Y], *, _do_not_process: Tuple[type, ...] = ()
+) -> Type[_Y]:
+    """
+    This expands a class which inherits Configurable or ReplaceableBase classes,
+    including dataclass processing. some_class is modified in place by this function.
+    If expand_args_fields(some_class) has already been called, subsequent calls do
+    nothing and return some_class unmodified.
+    For classes of type ReplaceableBase, you can add some_class to the registry before
+    or after calling this function. But potential inner classes need to be registered
+    before this function is run on the outer class.
+
+    The transformations this function makes, before the concluding
+    dataclasses.dataclass, are as follows. If X is a base class with registered
+    subclasses Y and Z, replace a class member
+
+        x: X
+
+    and optionally
+
+        x_class_type: str = "Y"
+        def create_x(self):...
+
+    with
+
+        x_Y_args: dict = dataclasses.field(default_factory=lambda: get_default_args(Y))
+        x_Z_args: dict = dataclasses.field(default_factory=lambda: get_default_args(Z))
+        def create_x(self):
+            args = self.getattr(f"x_{self.x_class_type}_args")
+            self.create_x_impl(self.x_class_type, args)
+        def create_x_impl(self, x_type, args):
+            x_type = registry.get(X, x_type)
+            expand_args_fields(x_type)
+            self.x = x_type(**args)
+        x_class_type: str = "UNDEFAULTED"
+
+    without adding the optional attributes if they are already there.
+
+    Similarly, replace
+
+        x: Optional[X]
+
+    and optionally
+
+        x_class_type: Optional[str] = "Y"
+        def create_x(self):...
+
+    with
+
+        x_Y_args: dict = dataclasses.field(default_factory=lambda: get_default_args(Y))
+        x_Z_args: dict = dataclasses.field(default_factory=lambda: get_default_args(Z))
+        def create_x(self):
+            if self.x_class_type is None:
+                args = None
+            else:
+                args = self.getattr(f"x_{self.x_class_type}_args", None)
+            self.create_x_impl(self.x_class_type, args)
+        def create_x_impl(self, x_class_type, args):
+            if x_class_type is None:
+                self.x = None
+                return
+
+            x_type = registry.get(X, x_class_type)
+            expand_args_fields(x_type)
+            assert args is not None
+            self.x = x_type(**args)
+        x_class_type: Optional[str] = "UNDEFAULTED"
+
+    without adding the optional attributes if they are already there.
+
+    Similarly, if X is a subclass of Configurable,
+
+        x: X
+
+    and optionally
+
+        def create_x(self):...
+
+    will be replaced with
+
+        x_args: dict = dataclasses.field(default_factory=lambda: get_default_args(X))
+        def create_x(self):
+            self.create_x_impl(True, self.x_args)
+
+        def create_x_impl(self, enabled, args):
+            if enabled:
+                expand_args_fields(X)
+                self.x = X(**args)
+            else:
+                self.x = None
+
+    Similarly, replace,
+
+        x: Optional[X]
+        x_enabled: bool = ...
+
+    and optionally
+
+        def create_x(self):...
+
+    with
+
+        x_args: dict = dataclasses.field(default_factory=lambda: get_default_args(X))
+        x_enabled: bool = ...
+        def create_x(self):
+            self.create_x_impl(self.x_enabled, self.x_args)
+
+        def create_x_impl(self, enabled, args):
+            if enabled:
+                expand_args_fields(X)
+                self.x = X(**args)
+            else:
+                self.x = None
+
+
+    Also adds the following class members, unannotated so that dataclass
+    ignores them.
+        - _creation_functions: Tuple[str, ...] of all the create_ functions,
+            including those from base classes (not the create_x_impl ones).
+        - _known_implementations: Dict[str, Type] containing the classes which
+            have been found from the registry.
+            (used only to raise a warning if it one has been overwritten)
+        - _processed_members: a Dict[str, Any] of all the members which have been
+            transformed, with values giving the types they were declared to have.
+            (E.g. {"x": X} or {"x": Optional[X]} in the cases above.)
+
+    In addition, if the class has a member function
+
+        @classmethod
+        def x_tweak_args(cls, member_type: Type, args: DictConfig) -> None
+
+    then the default_factory of x_args will also have a call to x_tweak_args(X, x_args) and
+    the default_factory of x_Y_args will also have a call to x_tweak_args(Y, x_Y_args).
+
+    In addition, if the class inherits torch.nn.Module, the generated __init__ will
+    call torch.nn.Module's __init__ before doing anything else.
+
+    Before any transformation of the class, if the class has a classmethod called
+    `pre_expand`, it will be called with no arguments.
+
+    Note that although the *_args members are intended to have type DictConfig, they
+    are actually internally annotated as dicts. OmegaConf is happy to see a DictConfig
+    in place of a dict, but not vice-versa. Allowing dict lets a class user specify
+    x_args as an explicit dict without getting an incomprehensible error.
+
+    Args:
+        some_class: the class to be processed
+        _do_not_process: Internal use for get_default_args: Because get_default_args calls
+                        and is called by this function, we let it specify any class currently
+                        being processed, to make sure we don't try to process a class while
+                        it is already being processed.
+
+
+    Returns:
+        some_class itself, which has been modified in place. This
+        allows this function to be used as a class decorator.
+    """
+    if _is_actually_dataclass(some_class):
+        return some_class
+
+    if hasattr(some_class, PRE_EXPAND_NAME):
+        getattr(some_class, PRE_EXPAND_NAME)()
+
+    # The functions this class's run_auto_creation will run.
+    creation_functions: List[str] = []
+    # The classes which this type knows about from the registry
+    # We could use a weakref.WeakValueDictionary here which would mean
+    # that we don't warn if the class we should have expected is elsewhere
+    # unused.
+    known_implementations: Dict[str, Type] = {}
+    # Names of members which have been processed.
+    processed_members: Dict[str, Any] = {}
+
+    # For all bases except ReplaceableBase and Configurable and object,
+    # we need to process them before our own processing. This is
+    # because dataclasses expect to inherit dataclasses and not unprocessed
+    # dataclasses.
+    for base in some_class.mro()[-3:0:-1]:
+        if base is ReplaceableBase:
+            continue
+        if base is Configurable:
+            continue
+        if not issubclass(base, (Configurable, ReplaceableBase)):
+            continue
+        expand_args_fields(base, _do_not_process=_do_not_process)
+        if "_creation_functions" in base.__dict__:
+            creation_functions.extend(base._creation_functions)
+        if "_known_implementations" in base.__dict__:
+            known_implementations.update(base._known_implementations)
+        if "_processed_members" in base.__dict__:
+            processed_members.update(base._processed_members)
+
+    to_process: List[Tuple[str, Type, _ProcessType]] = []
+    if "__annotations__" in some_class.__dict__:
+        for name, type_ in some_class.__annotations__.items():
+            underlying_and_process_type = _get_type_to_process(type_)
+            if underlying_and_process_type is None:
+                continue
+            underlying_type, process_type = underlying_and_process_type
+            to_process.append((name, underlying_type, process_type))
+
+    for name, underlying_type, process_type in to_process:
+        processed_members[name] = some_class.__annotations__[name]
+        _process_member(
+            name=name,
+            type_=underlying_type,
+            process_type=process_type,
+            some_class=some_class,
+            creation_functions=creation_functions,
+            _do_not_process=_do_not_process,
+            known_implementations=known_implementations,
+        )
+
+    for key, count in Counter(creation_functions).items():
+        if count > 1:
+            warnings.warn(f"Clash with {key} in a base class.")
+    some_class._creation_functions = tuple(creation_functions)
+    some_class._processed_members = processed_members
+    some_class._known_implementations = known_implementations
+
+    dataclasses.dataclass(eq=False)(some_class)
+    _fixup_class_init(some_class)
+    return some_class
+
+
+def _fixup_class_init(some_class) -> None:
+    """
+    In-place modification of the some_class class which happens
+    after dataclass processing.
+
+    If the dataclass some_class inherits torch.nn.Module, then
+    makes torch.nn.Module's __init__ be called before anything else
+    on instantiation of some_class.
+    This is a bit like attr's __pre_init__.
+    """
+
+    assert _is_actually_dataclass(some_class)
+    try:
+        import torch
+    except ModuleNotFoundError:
+        return
+
+    if not issubclass(some_class, torch.nn.Module):
+        return
+
+    def init(self, *args, **kwargs) -> None:
+        torch.nn.Module.__init__(self)
+        getattr(self, _DATACLASS_INIT)(*args, **kwargs)
+
+    assert _DATACLASS_INIT not in some_class.__dict__
+
+    setattr(some_class, _DATACLASS_INIT, some_class.__init__)
+    some_class.__init__ = init
+
+
+def get_default_args_field(
+    C,
+    *,
+    _do_not_process: Tuple[type, ...] = (),
+    _hook: Optional[Callable[[DictConfig], None]] = None,
+):
+    """
+    Get a dataclass field which defaults to get_default_args(...)
+
+    Args:
+        C: As for get_default_args.
+        _do_not_process: As for get_default_args
+        _hook: Function called on the result before returning.
+
+    Returns:
+        function to return new DictConfig object
+    """
+
+    def create():
+        args = get_default_args(C, _do_not_process=_do_not_process)
+        if _hook is not None:
+            with open_dict(args):
+                _hook(args)
+        return args
+
+    return dataclasses.field(default_factory=create)
+
+
+def _get_default_args_field_from_registry(
+    *,
+    base_class_wanted: Type[_X],
+    name: str,
+    _do_not_process: Tuple[type, ...] = (),
+    _hook: Optional[Callable[[DictConfig], None]] = None,
+):
+    """
+    Get a dataclass field which defaults to
+    get_default_args(registry.get(base_class_wanted, name)).
+
+    This is used internally in place of get_default_args_field in
+    order that default values are updated if a class is redefined.
+
+    Args:
+        base_class_wanted: As for registry.get.
+        name: As for registry.get.
+        _do_not_process: As for get_default_args
+        _hook: Function called on the result before returning.
+
+    Returns:
+        function to return new DictConfig object
+    """
+
+    def create():
+        C = registry.get(base_class_wanted=base_class_wanted, name=name)
+        args = get_default_args(C, _do_not_process=_do_not_process)
+        if _hook is not None:
+            with open_dict(args):
+                _hook(args)
+        return args
+
+    return dataclasses.field(default_factory=create)
+
+
+def _get_type_to_process(type_) -> Optional[Tuple[Type, _ProcessType]]:
+    """
+    If a member is annotated as `type_`, and that should expanded in
+    expand_args_fields, return how it should be expanded.
+    """
+    if get_origin(type_) == Union:
+        # We look for Optional[X] which is a Union of X with None.
+        args = get_args(type_)
+        if len(args) != 2 or all(a is not type(None) for a in args):  # noqa: E721
+            return
+        underlying = args[0] if args[1] is type(None) else args[1]  # noqa: E721
+        if (
+            isinstance(underlying, type)
+            and issubclass(underlying, ReplaceableBase)
+            and ReplaceableBase in underlying.__bases__
+        ):
+            return underlying, _ProcessType.OPTIONAL_REPLACEABLE
+
+        if isinstance(underlying, type) and issubclass(underlying, Configurable):
+            return underlying, _ProcessType.OPTIONAL_CONFIGURABLE
+
+    if not isinstance(type_, type):
+        # e.g. any other Union or Tuple. Or ClassVar.
+        return
+
+    if issubclass(type_, ReplaceableBase) and ReplaceableBase in type_.__bases__:
+        return type_, _ProcessType.REPLACEABLE
+
+    if issubclass(type_, Configurable):
+        return type_, _ProcessType.CONFIGURABLE
+
+
+def _process_member(
+    *,
+    name: str,
+    type_: Type,
+    process_type: _ProcessType,
+    some_class: Type,
+    creation_functions: List[str],
+    _do_not_process: Tuple[type, ...],
+    known_implementations: Dict[str, Type],
+) -> None:
+    """
+    Make the modification (of expand_args_fields) to some_class for a single member.
+
+    Args:
+        name: member name
+        type_: member type (with Optional removed if needed)
+        process_type: whether member has dynamic type
+        some_class: (MODIFIED IN PLACE) the class being processed
+        creation_functions: (MODIFIED IN PLACE) the names of the create functions
+        _do_not_process: as for expand_args_fields.
+        known_implementations: (MODIFIED IN PLACE) known types from the registry
+    """
+    # Because we are adding defaultable members, make
+    # sure they go at the end of __annotations__ in case
+    # there are non-defaulted standard class members.
+    del some_class.__annotations__[name]
+    hook = getattr(some_class, name + TWEAK_SUFFIX, None)
+
+    if process_type in (_ProcessType.REPLACEABLE, _ProcessType.OPTIONAL_REPLACEABLE):
+        type_name = name + TYPE_SUFFIX
+        if type_name not in some_class.__annotations__:
+            if process_type == _ProcessType.OPTIONAL_REPLACEABLE:
+                some_class.__annotations__[type_name] = Optional[str]
+            else:
+                some_class.__annotations__[type_name] = str
+            setattr(some_class, type_name, "UNDEFAULTED")
+
+        for derived_type in registry.get_all(type_):
+            if derived_type in _do_not_process:
+                continue
+            if issubclass(derived_type, some_class):
+                # When derived_type is some_class we have a simple
+                # recursion to avoid. When it's a strict subclass the
+                # situation is even worse.
+                continue
+            known_implementations[derived_type.__name__] = derived_type
+            args_name = f"{name}_{derived_type.__name__}{ARGS_SUFFIX}"
+            if args_name in some_class.__annotations__:
+                raise ValueError(
+                    f"Cannot generate {args_name} because it is already present."
+                )
+            some_class.__annotations__[args_name] = dict
+            if hook is not None:
+                hook_closed = partial(hook, derived_type)
+            else:
+                hook_closed = None
+            setattr(
+                some_class,
+                args_name,
+                _get_default_args_field_from_registry(
+                    base_class_wanted=type_,
+                    name=derived_type.__name__,
+                    _do_not_process=_do_not_process + (some_class,),
+                    _hook=hook_closed,
+                ),
+            )
+    else:
+        args_name = name + ARGS_SUFFIX
+        if args_name in some_class.__annotations__:
+            raise ValueError(
+                f"Cannot generate {args_name} because it is already present."
+            )
+        if issubclass(type_, some_class) or type_ in _do_not_process:
+            raise ValueError(f"Cannot process {type_} inside {some_class}")
+
+        some_class.__annotations__[args_name] = dict
+        if hook is not None:
+            hook_closed = partial(hook, type_)
+        else:
+            hook_closed = None
+        setattr(
+            some_class,
+            args_name,
+            get_default_args_field(
+                type_,
+                _do_not_process=_do_not_process + (some_class,),
+                _hook=hook_closed,
+            ),
+        )
+        if process_type == _ProcessType.OPTIONAL_CONFIGURABLE:
+            enabled_name = name + ENABLED_SUFFIX
+            if enabled_name not in some_class.__annotations__:
+                raise ValueError(
+                    f"{name} is an Optional[{type_.__name__}] member "
+                    f"but there is no corresponding member {enabled_name}."
+                )
+
+    creation_function_name = f"{CREATE_PREFIX}{name}"
+    if not hasattr(some_class, creation_function_name):
+        setattr(
+            some_class,
+            creation_function_name,
+            _default_create(name, type_, process_type),
+        )
+    creation_functions.append(creation_function_name)
+
+    creation_function_impl_name = f"{CREATE_PREFIX}{name}{IMPL_SUFFIX}"
+    if not hasattr(some_class, creation_function_impl_name):
+        setattr(
+            some_class,
+            creation_function_impl_name,
+            _default_create_impl(name, type_, process_type),
+        )
+
+
+def remove_unused_components(dict_: DictConfig) -> None:
+    """
+    Assuming dict_ represents the state of a configurable,
+    modify it to remove all the portions corresponding to
+    pluggable parts which are not in use.
+    For example, if renderer_class_type is SignedDistanceFunctionRenderer,
+    the renderer_MultiPassEmissionAbsorptionRenderer_args will be
+    removed. Also, if chocolate_enabled is False, then chocolate_args will
+    be removed.
+
+    Args:
+        dict_: (MODIFIED IN PLACE) a DictConfig instance
+    """
+    keys = [key for key in dict_ if isinstance(key, str)]
+    suffix_length = len(TYPE_SUFFIX)
+    replaceables = [key[:-suffix_length] for key in keys if key.endswith(TYPE_SUFFIX)]
+    args_keys = [key for key in keys if key.endswith(ARGS_SUFFIX)]
+    for replaceable in replaceables:
+        selected_type = dict_[replaceable + TYPE_SUFFIX]
+        if selected_type is None:
+            expect = ""
+        else:
+            expect = replaceable + "_" + selected_type + ARGS_SUFFIX
+        with open_dict(dict_):
+            for key in args_keys:
+                if key.startswith(replaceable + "_") and key != expect:
+                    del dict_[key]
+
+    suffix_length = len(ENABLED_SUFFIX)
+    enableables = [key[:-suffix_length] for key in keys if key.endswith(ENABLED_SUFFIX)]
+    for enableable in enableables:
+        enabled = dict_[enableable + ENABLED_SUFFIX]
+        if not enabled:
+            with open_dict(dict_):
+                dict_.pop(enableable + ARGS_SUFFIX, None)
+
+    for key in dict_:
+        if isinstance(dict_.get(key), DictConfig):
+            remove_unused_components(dict_[key])
diff --git a/pytorch3d/pytorch3d/implicitron/tools/depth_cleanup.py b/pytorch3d/pytorch3d/implicitron/tools/depth_cleanup.py
new file mode 100644
index 0000000000000000000000000000000000000000..9e5b509e5f7ed7e7d03df19bd9d6fc56559dac69
--- /dev/null
+++ b/pytorch3d/pytorch3d/implicitron/tools/depth_cleanup.py
@@ -0,0 +1,113 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+import torch.nn.functional as Fu
+from pytorch3d.ops import wmean
+from pytorch3d.renderer.cameras import CamerasBase
+from pytorch3d.structures import Pointclouds
+
+
+def cleanup_eval_depth(
+    point_cloud: Pointclouds,
+    camera: CamerasBase,
+    depth: torch.Tensor,
+    mask: torch.Tensor,
+    sigma: float = 0.01,
+    image=None,
+):
+
+    ba, _, H, W = depth.shape
+
+    pcl = point_cloud.points_padded()
+    n_pts = point_cloud.num_points_per_cloud()
+    pcl_mask = (
+        torch.arange(pcl.shape[1], dtype=torch.int64, device=pcl.device)[None]
+        < n_pts[:, None]
+    ).type_as(pcl)
+
+    pcl_proj = camera.transform_points(pcl, eps=1e-2)[..., :-1]
+    pcl_depth = camera.get_world_to_view_transform().transform_points(pcl)[..., -1]
+
+    depth_and_idx = torch.cat(
+        (
+            depth,
+            torch.arange(H * W).view(1, 1, H, W).expand(ba, 1, H, W).type_as(depth),
+        ),
+        dim=1,
+    )
+
+    depth_and_idx_sampled = Fu.grid_sample(
+        depth_and_idx, -pcl_proj[:, None], mode="nearest"
+    )[:, :, 0].view(ba, 2, -1)
+
+    depth_sampled, idx_sampled = depth_and_idx_sampled.split([1, 1], dim=1)
+    df = (depth_sampled[:, 0] - pcl_depth).abs()
+
+    # the threshold is a sigma-multiple of the standard deviation of the depth
+    mu = wmean(depth.view(ba, -1, 1), mask.view(ba, -1)).view(ba, 1)
+    std = (
+        # pyre-fixme[58]: `**` is not supported for operand types `Tensor` and `int`.
+        wmean((depth.view(ba, -1) - mu).view(ba, -1, 1) ** 2, mask.view(ba, -1))
+        .clamp(1e-4)
+        .sqrt()
+        .view(ba, -1)
+    )
+    good_df_thr = std * sigma
+    good_depth = (df <= good_df_thr).float() * pcl_mask
+
+    # perc_kept = good_depth.sum(dim=1) / pcl_mask.sum(dim=1).clamp(1)
+    # print(f'Kept {100.0 * perc_kept.mean():1.3f} % points')
+
+    good_depth_raster = torch.zeros_like(depth).view(ba, -1)
+    good_depth_raster.scatter_add_(1, torch.round(idx_sampled[:, 0]).long(), good_depth)
+
+    good_depth_mask = (good_depth_raster.view(ba, 1, H, W) > 0).float()
+
+    # if float(torch.rand(1)) > 0.95:
+    #     depth_ok = depth * good_depth_mask
+
+    #     # visualize
+    #     visdom_env = 'depth_cleanup_dbg'
+    #     from visdom import Visdom
+    #     # from tools.vis_utils import make_depth_image
+    #     from pytorch3d.vis.plotly_vis import plot_scene
+    #     viz = Visdom()
+
+    #     show_pcls = {
+    #         'pointclouds': point_cloud,
+    #     }
+    #     for d, nm in zip(
+    #         (depth, depth_ok),
+    #         ('pointclouds_unproj', 'pointclouds_unproj_ok'),
+    #     ):
+    #         pointclouds_unproj = get_rgbd_point_cloud(
+    #             camera, image, d,
+    #         )
+    #         if int(pointclouds_unproj.num_points_per_cloud()) > 0:
+    #             show_pcls[nm] = pointclouds_unproj
+
+    #     scene_dict = {'1': {
+    #         **show_pcls,
+    #         'cameras': camera,
+    #     }}
+    #     scene = plot_scene(
+    #         scene_dict,
+    #         pointcloud_max_points=5000,
+    #         pointcloud_marker_size=1.5,
+    #         camera_scale=1.0,
+    #     )
+    #     viz.plotlyplot(scene, env=visdom_env, win='scene')
+
+    #     # depth_image_ok = make_depth_image(depths_ok, masks)
+    #     # viz.images(depth_image_ok, env=visdom_env, win='depth_ok')
+    #     # depth_image = make_depth_image(depths, masks)
+    #     # viz.images(depth_image, env=visdom_env, win='depth')
+    #     # # viz.images(rgb_rendered, env=visdom_env, win='images_render')
+    #     # viz.images(images, env=visdom_env, win='images')
+    #     import pdb; pdb.set_trace()
+
+    return good_depth_mask
diff --git a/pytorch3d/pytorch3d/implicitron/tools/eval_video_trajectory.py b/pytorch3d/pytorch3d/implicitron/tools/eval_video_trajectory.py
new file mode 100644
index 0000000000000000000000000000000000000000..bda9ec295729d58210fbec23cb3349a444516de2
--- /dev/null
+++ b/pytorch3d/pytorch3d/implicitron/tools/eval_video_trajectory.py
@@ -0,0 +1,266 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+import math
+from typing import Optional, Tuple
+
+import torch
+from pytorch3d.implicitron.tools import utils
+from pytorch3d.implicitron.tools.circle_fitting import fit_circle_in_3d
+from pytorch3d.renderer import look_at_view_transform, PerspectiveCameras
+from pytorch3d.transforms import Scale
+
+
+logger = logging.getLogger(__name__)
+
+
+def generate_eval_video_cameras(
+    train_cameras,
+    n_eval_cams: int = 100,
+    trajectory_type: str = "figure_eight",
+    trajectory_scale: float = 0.2,
+    scene_center: Tuple[float, float, float] = (0.0, 0.0, 0.0),
+    up: Tuple[float, float, float] = (0.0, 0.0, 1.0),
+    focal_length: Optional[torch.Tensor] = None,
+    principal_point: Optional[torch.Tensor] = None,
+    time: Optional[torch.Tensor] = None,
+    infer_up_as_plane_normal: bool = True,
+    traj_offset: Optional[Tuple[float, float, float]] = None,
+    traj_offset_canonical: Optional[Tuple[float, float, float]] = None,
+    remove_outliers_rate: float = 0.0,
+) -> PerspectiveCameras:
+    """
+    Generate a camera trajectory rendering a scene from multiple viewpoints.
+
+    Args:
+        train_cameras: The set of cameras from the training dataset object.
+        n_eval_cams: Number of cameras in the trajectory.
+        trajectory_type: The type of the camera trajectory. Can be one of:
+            circular_lsq_fit: Camera centers follow a trajectory obtained
+                by fitting a 3D circle to train_cameras centers.
+                All cameras are looking towards scene_center.
+            figure_eight: Figure-of-8 trajectory around the center of the
+                central camera of the training dataset.
+            trefoil_knot: Same as 'figure_eight', but the trajectory has a shape
+                of a trefoil knot (https://en.wikipedia.org/wiki/Trefoil_knot).
+            figure_eight_knot: Same as 'figure_eight', but the trajectory has a shape
+                of a figure-eight knot
+                (https://en.wikipedia.org/wiki/Figure-eight_knot_(mathematics)).
+        trajectory_scale: The extent of the trajectory.
+        scene_center: The center of the scene in world coordinates which all
+            the cameras from the generated trajectory look at.
+        up: The "circular_lsq_fit" vector of the scene (=the normal of the scene floor).
+            Active for the `trajectory_type="circular"`.
+        focal_length: The focal length of the output cameras. If `None`, an average
+            focal length of the train_cameras is used.
+        principal_point: The principal point of the output cameras. If `None`, an average
+            principal point of all train_cameras is used.
+        time: Defines the total length of the generated camera trajectory. All possible
+            trajectories (set with the `trajectory_type` argument) are periodic with
+            the period of `time=2pi`.
+            E.g. setting `trajectory_type=circular_lsq_fit` and `time=4pi`, will generate
+            a trajectory of camera poses rotating the total of 720 deg around the object.
+        infer_up_as_plane_normal: Infer the camera `up` vector automatically as the normal
+            of the plane fit to the optical centers of `train_cameras`.
+        traj_offset: 3D offset vector added to each point of the trajectory.
+        traj_offset_canonical: 3D offset vector expressed in the local coordinates of
+            the estimated trajectory which is added to each point of the trajectory.
+        remove_outliers_rate: the number between 0 and 1; if > 0,
+            some outlier train_cameras will be removed from trajectory estimation;
+            the filtering is based on camera center coordinates; top and
+            bottom `remove_outliers_rate` cameras on each dimension are removed.
+    Returns:
+        Batch of camera instances which can be used as the test dataset
+    """
+    if remove_outliers_rate > 0.0:
+        train_cameras = _remove_outlier_cameras(train_cameras, remove_outliers_rate)
+
+    if trajectory_type in ("figure_eight", "trefoil_knot", "figure_eight_knot"):
+        cam_centers = train_cameras.get_camera_center()
+        # get the nearest camera center to the mean of centers
+        mean_camera_idx = (
+            ((cam_centers - cam_centers.mean(dim=0)[None]) ** 2)
+            .sum(dim=1)
+            .min(dim=0)
+            .indices
+        )
+        # generate the knot trajectory in canonical coords
+        if time is None:
+            time = torch.linspace(0, 2 * math.pi, n_eval_cams + 1)[:n_eval_cams]
+        else:
+            assert time.numel() == n_eval_cams
+        if trajectory_type == "trefoil_knot":
+            traj = _trefoil_knot(time)
+        elif trajectory_type == "figure_eight_knot":
+            traj = _figure_eight_knot(time)
+        elif trajectory_type == "figure_eight":
+            traj = _figure_eight(time)
+        else:
+            raise ValueError(f"bad trajectory type: {trajectory_type}")
+        traj[:, 2] -= traj[:, 2].max()
+
+        # transform the canonical knot to the coord frame of the mean camera
+        mean_camera = PerspectiveCameras(
+            **{
+                k: getattr(train_cameras, k)[[int(mean_camera_idx)]]
+                for k in ("focal_length", "principal_point", "R", "T")
+            }
+        )
+        traj_trans = Scale(cam_centers.std(dim=0).mean() * trajectory_scale).compose(
+            mean_camera.get_world_to_view_transform().inverse()
+        )
+
+        if traj_offset_canonical is not None:
+            traj_trans = traj_trans.translate(
+                torch.FloatTensor(traj_offset_canonical)[None].to(traj)
+            )
+
+        traj = traj_trans.transform_points(traj)
+
+        plane_normal = _fit_plane(cam_centers)[:, 0]
+        if infer_up_as_plane_normal:
+            up = _disambiguate_normal(plane_normal, up)
+
+    elif trajectory_type == "circular_lsq_fit":
+        ### fit plane to the camera centers
+
+        # get the center of the plane as the median of the camera centers
+        cam_centers = train_cameras.get_camera_center()
+
+        if time is not None:
+            angle = time
+        else:
+            angle = torch.linspace(0, 2.0 * math.pi, n_eval_cams).to(cam_centers)
+
+        fit = fit_circle_in_3d(
+            cam_centers,
+            angles=angle,
+            offset=angle.new_tensor(traj_offset_canonical)
+            if traj_offset_canonical is not None
+            else None,
+            up=angle.new_tensor(up),
+        )
+        traj = fit.generated_points
+
+        # scalethe trajectory
+        _t_mu = traj.mean(dim=0, keepdim=True)
+        traj = (traj - _t_mu) * trajectory_scale + _t_mu
+
+        plane_normal = fit.normal
+
+        if infer_up_as_plane_normal:
+            up = _disambiguate_normal(plane_normal, up)
+
+    else:
+        raise ValueError(f"Uknown trajectory_type {trajectory_type}.")
+
+    if traj_offset is not None:
+        traj = traj + torch.FloatTensor(traj_offset)[None].to(traj)
+
+    # point all cameras towards the center of the scene
+    R, T = look_at_view_transform(
+        eye=traj,
+        at=(scene_center,),  # (1, 3)
+        up=(up,),  # (1, 3)
+        device=traj.device,
+    )
+
+    # get the average focal length and principal point
+    if focal_length is None:
+        focal_length = train_cameras.focal_length.mean(dim=0).repeat(n_eval_cams, 1)
+    if principal_point is None:
+        principal_point = train_cameras.principal_point.mean(dim=0).repeat(
+            n_eval_cams, 1
+        )
+
+    test_cameras = PerspectiveCameras(
+        focal_length=focal_length,
+        principal_point=principal_point,
+        R=R,
+        T=T,
+        device=focal_length.device,
+    )
+
+    # _visdom_plot_scene(
+    #     train_cameras,
+    #     test_cameras,
+    # )
+
+    return test_cameras
+
+
+def _remove_outlier_cameras(
+    cameras: PerspectiveCameras, outlier_rate: float
+) -> PerspectiveCameras:
+    keep_indices = utils.get_inlier_indicators(
+        cameras.get_camera_center(), dim=0, outlier_rate=outlier_rate
+    )
+    # pyre-fixme[6]: For 1st param expected `Union[List[int], int, BoolTensor,
+    #  LongTensor]` but got `Tensor`.
+    clean_cameras = cameras[keep_indices]
+    logger.info(
+        "Filtered outlier cameras when estimating the trajectory: "
+        f"{len(cameras)} → {len(clean_cameras)}"
+    )
+    # pyre-fixme[7]: Expected `PerspectiveCameras` but got `CamerasBase`.
+    return clean_cameras
+
+
+def _disambiguate_normal(normal, up):
+    up_t = torch.tensor(up).to(normal)
+    flip = (up_t * normal).sum().sign()
+    up = normal * flip
+    up = up.tolist()
+    return up
+
+
+def _fit_plane(x):
+    x = x - x.mean(dim=0)[None]
+    cov = (x.t() @ x) / x.shape[0]
+    _, e_vec = torch.linalg.eigh(cov)
+    return e_vec
+
+
+def _visdom_plot_scene(
+    train_cameras,
+    test_cameras,
+) -> None:
+    from pytorch3d.vis.plotly_vis import plot_scene
+
+    p = plot_scene(
+        {
+            "scene": {
+                "train_cams": train_cameras,
+                "test_cams": test_cameras,
+            }
+        }
+    )
+    from visdom import Visdom
+
+    viz = Visdom()
+    viz.plotlyplot(p, env="cam_traj_dbg", win="cam_trajs")
+
+
+def _figure_eight_knot(t: torch.Tensor, z_scale: float = 0.5):
+    x = (2 + (2 * t).cos()) * (3 * t).cos()
+    y = (2 + (2 * t).cos()) * (3 * t).sin()
+    z = (4 * t).sin() * z_scale
+    return torch.stack((x, y, z), dim=-1)
+
+
+def _trefoil_knot(t: torch.Tensor, z_scale: float = 0.5):
+    x = t.sin() + 2 * (2 * t).sin()
+    y = t.cos() - 2 * (2 * t).cos()
+    z = -(3 * t).sin() * z_scale
+    return torch.stack((x, y, z), dim=-1)
+
+
+def _figure_eight(t: torch.Tensor, z_scale: float = 0.5):
+    x = t.cos()
+    y = (2 * t).sin() / 2
+    z = t.sin() * z_scale
+    return torch.stack((x, y, z), dim=-1)
diff --git a/pytorch3d/pytorch3d/implicitron/tools/image_utils.py b/pytorch3d/pytorch3d/implicitron/tools/image_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..29c7e0a4122eb81b0e61343c74b29a0091ffcd8c
--- /dev/null
+++ b/pytorch3d/pytorch3d/implicitron/tools/image_utils.py
@@ -0,0 +1,55 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+from typing import Sequence, Union
+
+import torch
+
+
+def mask_background(
+    image_rgb: torch.Tensor,
+    mask_fg: torch.Tensor,
+    dim_color: int = 1,
+    bg_color: Union[torch.Tensor, Sequence, str, float] = 0.0,
+) -> torch.Tensor:
+    """
+    Mask the background input image tensor `image_rgb` with `bg_color`.
+    The background regions are obtained from the binary foreground segmentation
+    mask `mask_fg`.
+    """
+    tgt_view = [1, 1, 1, 1]
+    tgt_view[dim_color] = 3
+    # obtain the background color tensor
+    if isinstance(bg_color, torch.Tensor):
+        bg_color_t = bg_color.view(1, 3, 1, 1).clone().to(image_rgb)
+    elif isinstance(bg_color, (float, tuple, list)):
+        if isinstance(bg_color, float):
+            bg_color = [bg_color] * 3
+        bg_color_t = torch.tensor(
+            bg_color, device=image_rgb.device, dtype=image_rgb.dtype
+        ).view(*tgt_view)
+    elif isinstance(bg_color, str):
+        if bg_color == "white":
+            bg_color_t = image_rgb.new_ones(tgt_view)
+        elif bg_color == "black":
+            bg_color_t = image_rgb.new_zeros(tgt_view)
+        else:
+            raise ValueError(_invalid_color_error_msg(bg_color))
+    else:
+        raise ValueError(_invalid_color_error_msg(bg_color))
+    # cast to the image_rgb's type
+    mask_fg = mask_fg.type_as(image_rgb)
+    # mask the bg
+    image_masked = mask_fg * image_rgb + (1 - mask_fg) * bg_color_t
+    return image_masked
+
+
+def _invalid_color_error_msg(bg_color) -> str:
+    return (
+        f"Invalid bg_color={bg_color}. Plese set bg_color to a 3-element"
+        + " tensor. or a string (white | black), or a float."
+    )
diff --git a/pytorch3d/pytorch3d/implicitron/tools/metric_utils.py b/pytorch3d/pytorch3d/implicitron/tools/metric_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..2ed2a8e3f043b628949456ac27434efba9b76641
--- /dev/null
+++ b/pytorch3d/pytorch3d/implicitron/tools/metric_utils.py
@@ -0,0 +1,235 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import math
+from typing import Optional, Tuple
+
+import torch
+from torch.nn import functional as F
+
+
+def eval_depth(
+    pred: torch.Tensor,
+    gt: torch.Tensor,
+    crop: int = 1,
+    mask: Optional[torch.Tensor] = None,
+    get_best_scale: bool = True,
+    mask_thr: float = 0.5,
+    best_scale_clamp_thr: float = 1e-4,
+) -> Tuple[torch.Tensor, torch.Tensor]:
+    """
+    Evaluate the depth error between the prediction `pred` and the ground
+    truth `gt`.
+
+    Args:
+        pred: A tensor of shape (N, 1, H, W) denoting the predicted depth maps.
+        gt: A tensor of shape (N, 1, H, W) denoting the ground truth depth maps.
+        crop: The number of pixels to crop from the border.
+        mask: A mask denoting the valid regions of the gt depth.
+        get_best_scale: If `True`, estimates a scaling factor of the predicted depth
+            that yields the best mean squared error between `pred` and `gt`.
+            This is typically enabled for cases where predicted reconstructions
+            are inherently defined up to an arbitrary scaling factor.
+        mask_thr: A constant used to threshold the `mask` to specify the valid
+            regions.
+        best_scale_clamp_thr: The threshold for clamping the divisor in best
+            scale estimation.
+
+    Returns:
+        mse_depth: Mean squared error between `pred` and `gt`.
+        abs_depth: Mean absolute difference between `pred` and `gt`.
+    """
+
+    # chuck out the border
+    if crop > 0:
+        gt = gt[:, :, crop:-crop, crop:-crop]
+        pred = pred[:, :, crop:-crop, crop:-crop]
+
+    if mask is not None:
+        # mult gt by mask
+        if crop > 0:
+            mask = mask[:, :, crop:-crop, crop:-crop]
+        gt = gt * (mask > mask_thr).float()
+
+    dmask = (gt > 0.0).float()
+    dmask_mass = torch.clamp(dmask.sum((1, 2, 3)), 1e-4)
+
+    if get_best_scale:
+        # mult preds by a scalar "scale_best"
+        # 	s.t. we get best possible mse error
+        scale_best = estimate_depth_scale_factor(pred, gt, dmask, best_scale_clamp_thr)
+        pred = pred * scale_best[:, None, None, None]
+
+    df = gt - pred
+
+    # pyre-fixme[58]: `**` is not supported for operand types `Tensor` and `int`.
+    mse_depth = (dmask * (df**2)).sum((1, 2, 3)) / dmask_mass
+    abs_depth = (dmask * df.abs()).sum((1, 2, 3)) / dmask_mass
+
+    return mse_depth, abs_depth
+
+
+def estimate_depth_scale_factor(pred, gt, mask, clamp_thr):
+    xy = pred * gt * mask
+    xx = pred * pred * mask
+    scale_best = xy.mean((1, 2, 3)) / torch.clamp(xx.mean((1, 2, 3)), clamp_thr)
+    return scale_best
+
+
+def calc_psnr(
+    x: torch.Tensor,
+    y: torch.Tensor,
+    mask: Optional[torch.Tensor] = None,
+) -> torch.Tensor:
+    """
+    Calculates the Peak-signal-to-noise ratio between tensors `x` and `y`.
+    """
+    mse = calc_mse(x, y, mask=mask)
+    psnr = torch.log10(mse.clamp(1e-10)) * (-10.0)
+    return psnr
+
+
+def calc_mse(
+    x: torch.Tensor,
+    y: torch.Tensor,
+    mask: Optional[torch.Tensor] = None,
+) -> torch.Tensor:
+    """
+    Calculates the mean square error between tensors `x` and `y`.
+    """
+    if mask is None:
+        # pyre-fixme[58]: `**` is not supported for operand types `Tensor` and `int`.
+        return torch.mean((x - y) ** 2)
+    else:
+        # pyre-fixme[58]: `**` is not supported for operand types `Tensor` and `int`.
+        return (((x - y) ** 2) * mask).sum() / mask.expand_as(x).sum().clamp(1e-5)
+
+
+def calc_bce(
+    pred: torch.Tensor,
+    gt: torch.Tensor,
+    equal_w: bool = True,
+    pred_eps: float = 0.01,
+    mask: Optional[torch.Tensor] = None,
+    lerp_bound: Optional[float] = None,
+) -> torch.Tensor:
+    """
+    Calculates the binary cross entropy.
+    """
+    if pred_eps > 0.0:
+        # up/low bound the predictions
+        pred = torch.clamp(pred, pred_eps, 1.0 - pred_eps)
+
+    if mask is None:
+        mask = torch.ones_like(gt)
+
+    if equal_w:
+        mask_fg = (gt > 0.5).float() * mask
+        mask_bg = (1 - mask_fg) * mask
+        weight = mask_fg / mask_fg.sum().clamp(1.0) + mask_bg / mask_bg.sum().clamp(1.0)
+        # weight sum should be at this point ~2
+        # pyre-fixme[58]: `/` is not supported for operand types `int` and `Tensor`.
+        weight = weight * (weight.numel() / weight.sum().clamp(1.0))
+    else:
+        weight = torch.ones_like(gt) * mask
+
+    if lerp_bound is not None:
+        return binary_cross_entropy_lerp(pred, gt, weight, lerp_bound)
+    else:
+        return F.binary_cross_entropy(pred, gt, reduction="mean", weight=weight)
+
+
+def binary_cross_entropy_lerp(
+    pred: torch.Tensor,
+    gt: torch.Tensor,
+    weight: torch.Tensor,
+    lerp_bound: float,
+):
+    """
+    Binary cross entropy which avoids exploding gradients by linearly
+    extrapolating the log function for log(1-pred) mad log(pred) whenever
+    pred or 1-pred is smaller than lerp_bound.
+    """
+    loss = log_lerp(1 - pred, lerp_bound) * (1 - gt) + log_lerp(pred, lerp_bound) * gt
+    loss_reduced = -(loss * weight).sum() / weight.sum().clamp(1e-4)
+    return loss_reduced
+
+
+def log_lerp(x: torch.Tensor, b: float):
+    """
+    Linearly extrapolated log for x < b.
+    """
+    assert b > 0
+    return torch.where(x >= b, x.log(), math.log(b) + (x - b) / b)
+
+
+def rgb_l1(
+    pred: torch.Tensor, target: torch.Tensor, mask: Optional[torch.Tensor] = None
+) -> torch.Tensor:
+    """
+    Calculates the mean absolute error between the predicted colors `pred`
+    and ground truth colors `target`.
+    """
+    if mask is None:
+        mask = torch.ones_like(pred[:, :1])
+    return ((pred - target).abs() * mask).sum(dim=(1, 2, 3)) / mask.sum(
+        dim=(1, 2, 3)
+    ).clamp(1)
+
+
+def huber(dfsq: torch.Tensor, scaling: float = 0.03) -> torch.Tensor:
+    """
+    Calculates the huber function of the input squared error `dfsq`.
+    The function smoothly transitions from a region with unit gradient
+    to a hyperbolic function at `dfsq=scaling`.
+    """
+    loss = (safe_sqrt(1 + dfsq / (scaling * scaling), eps=1e-4) - 1) * scaling
+    return loss
+
+
+def neg_iou_loss(
+    predict: torch.Tensor,
+    target: torch.Tensor,
+    mask: Optional[torch.Tensor] = None,
+) -> torch.Tensor:
+    """
+    This is a great loss because it emphasizes on the active
+    regions of the predict and targets
+    """
+    return 1.0 - iou(predict, target, mask=mask)
+
+
+def safe_sqrt(A: torch.Tensor, eps: float = 1e-4) -> torch.Tensor:
+    """
+    performs safe differentiable sqrt
+    """
+    return (torch.clamp(A, float(0)) + eps).sqrt()
+
+
+def iou(
+    predict: torch.Tensor,
+    target: torch.Tensor,
+    mask: Optional[torch.Tensor] = None,
+) -> torch.Tensor:
+    """
+    This is a great loss because it emphasizes on the active
+    regions of the predict and targets
+    """
+    dims = tuple(range(predict.dim())[1:])
+    if mask is not None:
+        predict = predict * mask
+        target = target * mask
+    intersect = (predict * target).sum(dims)
+    union = (predict + target - predict * target).sum(dims) + 1e-4
+    return (intersect / union).sum() / intersect.numel()
+
+
+def beta_prior(pred: torch.Tensor, cap: float = 0.1) -> torch.Tensor:
+    if cap <= 0.0:
+        raise ValueError("capping should be positive to avoid unbound loss")
+
+    min_value = math.log(cap) + math.log(cap + 1.0)
+    return (torch.log(pred + cap) + torch.log(1.0 - pred + cap)).mean() - min_value
diff --git a/pytorch3d/pytorch3d/implicitron/tools/model_io.py b/pytorch3d/pytorch3d/implicitron/tools/model_io.py
new file mode 100644
index 0000000000000000000000000000000000000000..f94a4ed2511e72ddf2b9215826001d135f834dcc
--- /dev/null
+++ b/pytorch3d/pytorch3d/implicitron/tools/model_io.py
@@ -0,0 +1,173 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import glob
+import logging
+import os
+import shutil
+import tempfile
+from typing import Optional
+
+import torch
+
+
+logger = logging.getLogger(__name__)
+
+
+def load_stats(flstats):
+    from pytorch3d.implicitron.tools.stats import Stats
+
+    if not os.path.isfile(flstats):
+        return None
+
+    return Stats.load(flstats)
+
+
+def get_model_path(fl) -> str:
+    fl = os.path.splitext(fl)[0]
+    flmodel = "%s.pth" % fl
+    return flmodel
+
+
+def get_optimizer_path(fl) -> str:
+    fl = os.path.splitext(fl)[0]
+    flopt = "%s_opt.pth" % fl
+    return flopt
+
+
+def get_stats_path(fl, eval_results: bool = False) -> str:
+    fl = os.path.splitext(fl)[0]
+    if eval_results:
+        for postfix in ("_2", ""):
+            flstats = os.path.join(os.path.dirname(fl), f"stats_test{postfix}.jgz")
+            if os.path.isfile(flstats):
+                break
+    else:
+        flstats = "%s_stats.jgz" % fl
+    # pyre-fixme[61]: `flstats` is undefined, or not always defined.
+    return flstats
+
+
+def safe_save_model(model, stats, fl, optimizer=None, cfg=None) -> None:
+    """
+    This functions stores model files safely so that no model files exist on the
+    file system in case the saving procedure gets interrupted.
+
+    This is done first by saving the model files to a temporary directory followed
+    by (atomic) moves to the target location. Note, that this can still result
+    in a corrupt set of model files in case interruption happens while performing
+    the moves. It is however quite improbable that a crash would occur right at
+    this time.
+    """
+    logger.info(f"saving model files safely to {fl}")
+    # first store everything to a tmpdir
+    with tempfile.TemporaryDirectory() as tmpdir:
+        tmpfl = os.path.join(tmpdir, os.path.split(fl)[-1])
+        stored_tmp_fls = save_model(model, stats, tmpfl, optimizer=optimizer, cfg=cfg)
+        tgt_fls = [
+            (
+                os.path.join(os.path.split(fl)[0], os.path.split(tmpfl)[-1])
+                if (tmpfl is not None)
+                else None
+            )
+            for tmpfl in stored_tmp_fls
+        ]
+        # then move from the tmpdir to the right location
+        for tmpfl, tgt_fl in zip(stored_tmp_fls, tgt_fls):
+            if tgt_fl is None:
+                continue
+            shutil.move(tmpfl, tgt_fl)
+
+
+def save_model(model, stats, fl, optimizer=None, cfg=None):
+    flstats = get_stats_path(fl)
+    flmodel = get_model_path(fl)
+    logger.info("saving model to %s" % flmodel)
+    torch.save(model.state_dict(), flmodel)
+    flopt = None
+    if optimizer is not None:
+        flopt = get_optimizer_path(fl)
+        logger.info("saving optimizer to %s" % flopt)
+        torch.save(optimizer.state_dict(), flopt)
+    logger.info("saving model stats to %s" % flstats)
+    stats.save(flstats)
+
+    return flstats, flmodel, flopt
+
+
+def save_stats(stats, fl, cfg=None):
+    flstats = get_stats_path(fl)
+    logger.info("saving model stats to %s" % flstats)
+    stats.save(flstats)
+    return flstats
+
+
+def load_model(fl, map_location: Optional[dict]):
+    flstats = get_stats_path(fl)
+    flmodel = get_model_path(fl)
+    flopt = get_optimizer_path(fl)
+    model_state_dict = torch.load(flmodel, map_location=map_location)
+    stats = load_stats(flstats)
+    if os.path.isfile(flopt):
+        optimizer = torch.load(flopt, map_location=map_location)
+    else:
+        optimizer = None
+
+    return model_state_dict, stats, optimizer
+
+
+def parse_epoch_from_model_path(model_path) -> int:
+    return int(
+        os.path.split(model_path)[-1].replace(".pth", "").replace("model_epoch_", "")
+    )
+
+
+def get_checkpoint(exp_dir, epoch):
+    fl = os.path.join(exp_dir, "model_epoch_%08d.pth" % epoch)
+    return fl
+
+
+def find_last_checkpoint(
+    exp_dir, any_path: bool = False, all_checkpoints: bool = False
+):
+    if any_path:
+        exts = [".pth", "_stats.jgz", "_opt.pth"]
+    else:
+        exts = [".pth"]
+
+    for ext in exts:
+        fls = sorted(
+            glob.glob(
+                os.path.join(glob.escape(exp_dir), "model_epoch_" + "[0-9]" * 8 + ext)
+            )
+        )
+        if len(fls) > 0:
+            break
+    # pyre-fixme[61]: `fls` is undefined, or not always defined.
+    if len(fls) == 0:
+        fl = None
+    else:
+        if all_checkpoints:
+            # pyre-fixme[61]: `fls` is undefined, or not always defined.
+            fl = [f[0 : -len(ext)] + ".pth" for f in fls]
+        else:
+            # pyre-fixme[61]: `ext` is undefined, or not always defined.
+            fl = fls[-1][0 : -len(ext)] + ".pth"
+
+    return fl
+
+
+def purge_epoch(exp_dir, epoch) -> None:
+    model_path = get_checkpoint(exp_dir, epoch)
+
+    for file_path in [
+        model_path,
+        get_optimizer_path(model_path),
+        get_stats_path(model_path),
+    ]:
+        if os.path.isfile(file_path):
+            logger.info("deleting %s" % file_path)
+            os.remove(file_path)
diff --git a/pytorch3d/pytorch3d/implicitron/tools/point_cloud_utils.py b/pytorch3d/pytorch3d/implicitron/tools/point_cloud_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..5954aace687749fb4de2eb271bfb6017952ef7f2
--- /dev/null
+++ b/pytorch3d/pytorch3d/implicitron/tools/point_cloud_utils.py
@@ -0,0 +1,195 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+from typing import cast, Optional, Tuple
+
+import torch
+import torch.nn.functional as Fu
+from pytorch3d.renderer import (
+    AlphaCompositor,
+    NDCMultinomialRaysampler,
+    PointsRasterizationSettings,
+    PointsRasterizer,
+    ray_bundle_to_ray_points,
+)
+from pytorch3d.renderer.cameras import CamerasBase
+from pytorch3d.structures import Pointclouds
+
+
+def get_rgbd_point_cloud(
+    camera: CamerasBase,
+    image_rgb: torch.Tensor,
+    depth_map: torch.Tensor,
+    mask: Optional[torch.Tensor] = None,
+    mask_thr: float = 0.5,
+    *,
+    euclidean: bool = False,
+) -> Pointclouds:
+    """
+    Given a batch of images, depths, masks and cameras, generate a single colored
+    point cloud by unprojecting depth maps and coloring with the source
+    pixel colors.
+
+    Arguments:
+        camera: Batch of N cameras
+        image_rgb: Batch of N images of shape (N, C, H, W).
+            For RGB images C=3.
+        depth_map: Batch of N depth maps of shape (N, 1, H', W').
+            Only positive values here are used to generate points.
+            If euclidean=False (default) this contains perpendicular distances
+            from each point to the camera plane (z-values).
+            If euclidean=True, this contains distances from each point to
+            the camera center.
+        mask: If provided, batch of N masks of the same shape as depth_map.
+            If provided, values in depth_map are ignored if the corresponding
+            element of mask is smaller than mask_thr.
+        mask_thr: used in interpreting mask
+        euclidean: used in interpreting depth_map.
+
+    Returns:
+        Pointclouds object containing one point cloud.
+    """
+    imh, imw = depth_map.shape[2:]
+
+    # convert the depth maps to point clouds using the grid ray sampler
+    pts_3d = ray_bundle_to_ray_points(
+        NDCMultinomialRaysampler(
+            image_width=imw,
+            image_height=imh,
+            n_pts_per_ray=1,
+            min_depth=1.0,
+            max_depth=1.0,
+            unit_directions=euclidean,
+        )(camera)._replace(lengths=depth_map[:, 0, ..., None])
+    )
+
+    pts_mask = depth_map > 0.0
+    if mask is not None:
+        pts_mask *= mask > mask_thr
+    pts_mask = pts_mask.reshape(-1)
+
+    pts_3d = pts_3d.reshape(-1, 3)[pts_mask]
+
+    pts_colors = torch.nn.functional.interpolate(
+        image_rgb,
+        size=[imh, imw],
+        mode="bilinear",
+        align_corners=False,
+    )
+    pts_colors = pts_colors.permute(0, 2, 3, 1).reshape(-1, image_rgb.shape[1])[
+        pts_mask
+    ]
+
+    return Pointclouds(points=pts_3d[None], features=pts_colors[None])
+
+
+def render_point_cloud_pytorch3d(
+    camera,
+    point_cloud,
+    render_size: Tuple[int, int],
+    point_radius: float = 0.03,
+    topk: int = 10,
+    eps: float = 1e-2,
+    bg_color=None,
+    bin_size: Optional[int] = None,
+    **kwargs,
+):
+
+    # feature dimension
+    featdim = point_cloud.features_packed().shape[-1]
+
+    # move to the camera coordinates; using identity cameras in the renderer
+    point_cloud = _transform_points(camera, point_cloud, eps, **kwargs)
+    camera_trivial = camera.clone()
+    camera_trivial.R[:] = torch.eye(3)
+    camera_trivial.T *= 0.0
+
+    bin_size = (
+        bin_size
+        if bin_size is not None
+        else (64 if int(max(render_size)) > 1024 else None)
+    )
+    rasterizer = PointsRasterizer(
+        cameras=camera_trivial,
+        raster_settings=PointsRasterizationSettings(
+            image_size=render_size,
+            radius=point_radius,
+            points_per_pixel=topk,
+            bin_size=bin_size,
+        ),
+    )
+
+    fragments = rasterizer(point_cloud, **kwargs)
+
+    # Construct weights based on the distance of a point to the true point.
+    # However, this could be done differently: e.g. predicted as opposed
+    # to a function of the weights.
+    r = rasterizer.raster_settings.radius
+
+    # set up the blending weights
+    dists2 = fragments.dists
+    weights = 1 - dists2 / (r * r)
+    ok = cast(torch.BoolTensor, (fragments.idx >= 0)).float()
+
+    weights = weights * ok
+
+    fragments_prm = fragments.idx.long().permute(0, 3, 1, 2)
+    weights_prm = weights.permute(0, 3, 1, 2)
+    images = AlphaCompositor()(
+        fragments_prm,
+        weights_prm,
+        point_cloud.features_packed().permute(1, 0),
+        background_color=bg_color if bg_color is not None else [0.0] * featdim,
+        **kwargs,
+    )
+
+    # get the depths ...
+    # weighted_fs[b,c,i,j] = sum_k cum_alpha_k * features[c,pointsidx[b,k,i,j]]
+    # cum_alpha_k = alphas[b,k,i,j] * prod_l=0..k-1 (1 - alphas[b,l,i,j])
+    cumprod = torch.cumprod(1 - weights, dim=-1)
+    cumprod = torch.cat((torch.ones_like(cumprod[..., :1]), cumprod[..., :-1]), dim=-1)
+    depths = (weights * cumprod * fragments.zbuf).sum(dim=-1)
+    # add the rendering mask
+    # pyre-fixme[6]: For 1st param expected `Tensor` but got `float`.
+    render_mask = -torch.prod(1.0 - weights, dim=-1) + 1.0
+
+    # cat depths and render mask
+    rendered_blob = torch.cat((images, depths[:, None], render_mask[:, None]), dim=1)
+
+    # reshape back
+    rendered_blob = Fu.interpolate(
+        rendered_blob,
+        size=tuple(render_size),
+        mode="bilinear",
+        align_corners=False,
+    )
+
+    data_rendered, depth_rendered, render_mask = rendered_blob.split(
+        [rendered_blob.shape[1] - 2, 1, 1],
+        dim=1,
+    )
+
+    return data_rendered, render_mask, depth_rendered
+
+
+def _signed_clamp(x, eps):
+    sign = x.sign() + (x == 0.0).type_as(x)
+    x_clamp = sign * torch.clamp(x.abs(), eps)
+    return x_clamp
+
+
+def _transform_points(cameras, point_clouds, eps, **kwargs):
+    pts_world = point_clouds.points_padded()
+    pts_view = cameras.get_world_to_view_transform(**kwargs).transform_points(
+        pts_world, eps=eps
+    )
+    # it is crucial to actually clamp the points as well ...
+    pts_view = torch.cat(
+        (pts_view[..., :-1], _signed_clamp(pts_view[..., -1:], eps)), dim=-1
+    )
+    point_clouds = point_clouds.update_padded(pts_view)
+    return point_clouds
diff --git a/pytorch3d/pytorch3d/implicitron/tools/rasterize_mc.py b/pytorch3d/pytorch3d/implicitron/tools/rasterize_mc.py
new file mode 100644
index 0000000000000000000000000000000000000000..3fbf4b8d348c2451e987da194616719d76b1d115
--- /dev/null
+++ b/pytorch3d/pytorch3d/implicitron/tools/rasterize_mc.py
@@ -0,0 +1,145 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import math
+from typing import Optional, Tuple
+
+import pytorch3d
+
+import torch
+from pytorch3d.ops import packed_to_padded
+from pytorch3d.renderer import PerspectiveCameras
+from pytorch3d.structures import Pointclouds
+
+from .point_cloud_utils import render_point_cloud_pytorch3d
+
+
+@torch.no_grad()
+def rasterize_sparse_ray_bundle(
+    ray_bundle: "pytorch3d.implicitron.models.renderer.base.ImplicitronRayBundle",
+    features: torch.Tensor,
+    image_size_hw: Tuple[int, int],
+    depth: torch.Tensor,
+    masks: Optional[torch.Tensor] = None,
+) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+    """
+    Rasterizes sparse features corresponding to the coordinates defined by
+    the rays in the bundle.
+
+    Args:
+        ray_bundle: ray bundle object with B x ... x 2 pixel coordinates,
+            it can be packed.
+        features: B x ... x C tensor containing per-point rendered features.
+        image_size_hw: Tuple[image_height, image_width] containing
+            the size of rasterized image.
+        depth: B x ... x 1 tensor containing per-point rendered depth.
+        masks: B x ... x 1 tensor containing the alpha mask of the
+            rendered features.
+
+    Returns:
+        - image_render: B x C x H x W tensor of rasterized features
+        - depths_render: B x 1 x H x W tensor of rasterized depth maps
+        - masks_render: B x 1 x H x W tensor of opacities after splatting
+    """
+    # Flatten the features and xy locations.
+    features_depth_ras = torch.cat(
+        (features.flatten(1, -2), depth.flatten(1, -2)), dim=-1
+    )
+    xys = ray_bundle.xys
+    masks_ras = None
+    if ray_bundle.is_packed():
+        camera_counts = ray_bundle.camera_counts
+        assert camera_counts is not None
+        xys, first_idxs, _ = ray_bundle.get_padded_xys()
+        masks_ras = (
+            torch.arange(xys.shape[1], device=xys.device)[:, None]
+            < camera_counts[:, None, None]
+        )
+
+        max_size = torch.max(camera_counts).item()
+        features_depth_ras = packed_to_padded(
+            features_depth_ras[:, 0], first_idxs, max_size
+        )
+        if masks is not None:
+            padded_mask = packed_to_padded(masks.flatten(1, -1), first_idxs, max_size)
+            masks_ras = padded_mask * masks_ras
+
+    xys_ras = xys.flatten(1, -2)
+
+    if masks_ras is None:
+        assert not ray_bundle.is_packed()
+        masks_ras = masks.flatten(1, -2) if masks is not None else None
+
+    if min(*image_size_hw) <= 0:
+        raise ValueError(
+            "Need to specify a positive output_size_hw for bundle rasterisation."
+        )
+
+    # Estimate the rasterization point radius so that we approximately fill
+    # the whole image given the number of rasterized points.
+    pt_radius = 2.0 / math.sqrt(xys.shape[1])
+
+    # Rasterize the samples.
+    features_depth_render, masks_render = rasterize_mc_samples(
+        xys_ras,
+        features_depth_ras,
+        image_size_hw,
+        radius=pt_radius,
+        masks=masks_ras,
+    )
+    images_render = features_depth_render[:, :-1]
+    depths_render = features_depth_render[:, -1:]
+    return images_render, depths_render, masks_render
+
+
+def rasterize_mc_samples(
+    xys: torch.Tensor,
+    feats: torch.Tensor,
+    image_size_hw: Tuple[int, int],
+    radius: float = 0.03,
+    topk: int = 5,
+    masks: Optional[torch.Tensor] = None,
+) -> Tuple[torch.Tensor, torch.Tensor]:
+    """
+    Rasterizes Monte-Carlo sampled features back onto the image.
+
+    Specifically, the code uses the PyTorch3D point rasterizer to render
+    a z-flat point cloud composed of the xy MC locations and their features.
+
+    Args:
+        xys: B x N x 2 2D point locations in PyTorch3D NDC convention
+        feats: B x N x dim tensor containing per-point rendered features.
+        image_size_hw: Tuple[image_height, image_width] containing
+            the size of rasterized image.
+        radius: Rasterization point radius.
+        topk: The maximum z-buffer size for the PyTorch3D point cloud rasterizer.
+        masks: B x N x 1 tensor containing the alpha mask of the
+            rendered features.
+    """
+
+    if masks is None:
+        masks = torch.ones_like(xys[..., :1])
+
+    feats = torch.cat((feats, masks), dim=-1)
+    pointclouds = Pointclouds(
+        points=torch.cat([xys, torch.ones_like(xys[..., :1])], dim=-1),
+        features=feats,
+    )
+
+    data_rendered, render_mask, _ = render_point_cloud_pytorch3d(
+        PerspectiveCameras(device=feats.device),
+        pointclouds,
+        render_size=image_size_hw,
+        point_radius=radius,
+        topk=topk,
+    )
+
+    data_rendered, masks_pt = data_rendered.split(
+        [data_rendered.shape[1] - 1, 1], dim=1
+    )
+    render_mask = masks_pt * render_mask
+
+    return data_rendered, render_mask
diff --git a/pytorch3d/pytorch3d/implicitron/tools/stats.py b/pytorch3d/pytorch3d/implicitron/tools/stats.py
new file mode 100644
index 0000000000000000000000000000000000000000..c49ba4248062ab8624fa8d84b36739663a118505
--- /dev/null
+++ b/pytorch3d/pytorch3d/implicitron/tools/stats.py
@@ -0,0 +1,511 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import gzip
+import json
+import logging
+import time
+import warnings
+from collections.abc import Iterable
+from itertools import cycle
+
+import matplotlib
+import matplotlib.pyplot as plt
+import numpy as np
+from matplotlib import colors as mcolors
+from pytorch3d.implicitron.tools.vis_utils import get_visdom_connection
+
+logger = logging.getLogger(__name__)
+
+
+class AverageMeter:
+    """Computes and stores the average and current value"""
+
+    def __init__(self):
+        self.history = []
+        self.reset()
+
+    def reset(self):
+        self.val = 0
+        self.avg = 0
+        self.sum = 0
+        self.count = 0
+
+    def update(self, val, n=1, epoch=0):
+
+        # make sure the history is of the same len as epoch
+        while len(self.history) <= epoch:
+            self.history.append([])
+
+        self.history[epoch].append(val / n)
+        self.val = val
+        self.sum += val * n
+        self.count += n
+        self.avg = self.sum / self.count
+
+    def get_epoch_averages(self, epoch=-1):
+        if len(self.history) == 0:  # no stats here
+            return None
+        elif epoch == -1:
+            return [
+                (float(np.array(x).mean()) if len(x) > 0 else float("NaN"))
+                for x in self.history
+            ]
+        else:
+            return float(np.array(self.history[epoch]).mean())
+
+    def get_all_values(self):
+        all_vals = [np.array(x) for x in self.history]
+        all_vals = np.concatenate(all_vals)
+        return all_vals
+
+    def get_epoch(self):
+        return len(self.history)
+
+    @staticmethod
+    def from_json_str(json_str):
+        self = AverageMeter()
+        self.__dict__.update(json.loads(json_str))
+        return self
+
+
+class Stats:
+    # TODO: update this with context manager
+    """
+    stats logging object useful for gathering statistics of training a deep net in pytorch
+    Example::
+
+        # init stats structure that logs statistics 'objective' and 'top1e'
+        stats = Stats( ('objective','top1e') )
+        network = init_net() # init a pytorch module (=nueral network)
+        dataloader = init_dataloader() # init a dataloader
+        for epoch in range(10):
+            # start of epoch -> call new_epoch
+            stats.new_epoch()
+
+            # iterate over batches
+            for batch in dataloader:
+
+                output = network(batch) # run and save into a dict of output variables
+
+                # stats.update() automatically parses the 'objective' and 'top1e' from
+                # the "output" dict and stores this into the db
+                stats.update(output)
+                # prints the metric averages over given epoch
+                std_out = stats.get_status_string()
+                logger.info(str_out)
+            # stores the training plots into '/tmp/epoch_stats.pdf'
+            # and plots into a visdom server running at localhost (if running)
+            stats.plot_stats(plot_file='/tmp/epoch_stats.pdf')
+
+    """
+
+    def __init__(
+        self,
+        log_vars,
+        epoch=-1,
+        visdom_env="main",
+        do_plot=True,
+        plot_file=None,
+        visdom_server="http://localhost",
+        visdom_port=8097,
+    ):
+
+        self.log_vars = log_vars
+        self.visdom_env = visdom_env
+        self.visdom_server = visdom_server
+        self.visdom_port = visdom_port
+        self.plot_file = plot_file
+        self.do_plot = do_plot
+        self.hard_reset(epoch=epoch)
+        self._t_last_update = None
+
+    @staticmethod
+    def from_json_str(json_str):
+        self = Stats([])
+        # load the global state
+        self.__dict__.update(json.loads(json_str))
+        # recover the AverageMeters
+        for stat_set in self.stats:
+            self.stats[stat_set] = {
+                log_var: AverageMeter.from_json_str(log_vals_json_str)
+                for log_var, log_vals_json_str in self.stats[stat_set].items()
+            }
+        return self
+
+    @staticmethod
+    def load(flpath, postfix=".jgz"):
+        flpath = _get_postfixed_filename(flpath, postfix)
+        with gzip.open(flpath, "r") as fin:
+            data = json.loads(fin.read().decode("utf-8"))
+        return Stats.from_json_str(data)
+
+    def save(self, flpath, postfix=".jgz"):
+        flpath = _get_postfixed_filename(flpath, postfix)
+        # store into a gzipped-json
+        with gzip.open(flpath, "w") as fout:
+            fout.write(json.dumps(self, cls=StatsJSONEncoder).encode("utf-8"))
+
+    # some sugar to be used with "with stats:" at the beginning of the epoch
+    def __enter__(self):
+        if self.do_plot and self.epoch >= 0:
+            self.plot_stats(self.visdom_env)
+        self.new_epoch()
+
+    def __exit__(self, type, value, traceback):
+        iserr = type is not None and issubclass(type, Exception)
+        iserr = iserr or (type is KeyboardInterrupt)
+        if iserr:
+            logger.error("error inside 'with' block")
+            return
+        if self.do_plot:
+            self.plot_stats(self.visdom_env)
+
+    def reset(self):  # to be called after each epoch
+        stat_sets = list(self.stats.keys())
+        logger.debug(f"stats: epoch {self.epoch} - reset")
+        self.it = {k: -1 for k in stat_sets}
+        for stat_set in stat_sets:
+            for stat in self.stats[stat_set]:
+                self.stats[stat_set][stat].reset()
+
+    def hard_reset(self, epoch=-1):  # to be called during object __init__
+        self.epoch = epoch
+        logger.debug(f"stats: epoch {self.epoch} - hard reset")
+        self.stats = {}
+
+        # reset
+        self.reset()
+
+    def new_epoch(self):
+        logger.debug(f"stats: new epoch {(self.epoch + 1)}")
+        self.epoch += 1
+        self.reset()  # zero the stats + increase epoch counter
+
+    def gather_value(self, val):
+        if isinstance(val, (float, int)):
+            val = float(val)
+        else:
+            val = val.data.cpu().numpy()
+            val = float(val.sum())
+        return val
+
+    def add_log_vars(self, added_log_vars):
+        for add_log_var in added_log_vars:
+            if add_log_var not in self.stats:
+                logger.debug(f"Adding {add_log_var}")
+                self.log_vars.append(add_log_var)
+
+    def update(self, preds, time_start=None, freeze_iter=False, stat_set="train"):
+
+        if self.epoch == -1:  # uninitialized
+            logger.warning(
+                "epoch==-1 means uninitialized stats structure -> new_epoch() called"
+            )
+            self.new_epoch()
+
+        if stat_set not in self.stats:
+            self.stats[stat_set] = {}
+            self.it[stat_set] = -1
+
+        if not freeze_iter:
+            self.it[stat_set] += 1
+
+        epoch = self.epoch
+
+        for stat in self.log_vars:
+
+            if stat not in self.stats[stat_set]:
+                self.stats[stat_set][stat] = AverageMeter()
+
+            if stat == "sec/it":  # compute speed
+                if time_start is None:
+                    time_per_it = 0.0
+                else:
+                    now = time.time()
+                    time_per_it = now - (self._t_last_update or time_start)
+                    self._t_last_update = now
+                val = time_per_it
+            else:
+                if stat in preds:
+                    try:
+                        val = self.gather_value(preds[stat])
+                    except KeyError:
+                        raise ValueError(
+                            "could not extract prediction %s\
+                                          from the prediction dictionary"
+                            % stat
+                        ) from None
+                else:
+                    val = None
+
+            if val is not None:
+                self.stats[stat_set][stat].update(val, epoch=epoch, n=1)
+
+    def get_epoch_averages(self, epoch=None):
+
+        stat_sets = list(self.stats.keys())
+
+        if epoch is None:
+            epoch = self.epoch
+        if epoch == -1:
+            epoch = list(range(self.epoch))
+
+        outvals = {}
+        for stat_set in stat_sets:
+            outvals[stat_set] = {
+                "epoch": epoch,
+                "it": self.it[stat_set],
+                "epoch_max": self.epoch,
+            }
+
+            for stat in self.stats[stat_set].keys():
+                if self.stats[stat_set][stat].count == 0:
+                    continue
+                if isinstance(epoch, Iterable):
+                    avgs = self.stats[stat_set][stat].get_epoch_averages()
+                    avgs = [avgs[e] for e in epoch]
+                else:
+                    avgs = self.stats[stat_set][stat].get_epoch_averages(epoch=epoch)
+                outvals[stat_set][stat] = avgs
+
+        return outvals
+
+    def print(
+        self,
+        max_it=None,
+        stat_set="train",
+        vars_print=None,
+        get_str=False,
+        skip_nan=False,
+        stat_format=lambda s: s.replace("loss_", "").replace("prev_stage_", "ps_"),
+    ):
+        """
+        stats.print() is deprecated. Please use get_status_string() instead.
+        example:
+        std_out = stats.get_status_string()
+        logger.info(str_out)
+        """
+
+        epoch = self.epoch
+        stats = self.stats
+
+        str_out = ""
+
+        it = self.it[stat_set]
+        stat_str = ""
+        stats_print = sorted(stats[stat_set].keys())
+        for stat in stats_print:
+            if stats[stat_set][stat].count == 0:
+                continue
+            if skip_nan and not np.isfinite(stats[stat_set][stat].avg):
+                continue
+            stat_str += " {0:.12}: {1:1.3f} |".format(
+                stat_format(stat), stats[stat_set][stat].avg
+            )
+
+        head_str = "[%s] | epoch %3d | it %5d" % (stat_set, epoch, it)
+        if max_it:
+            head_str += "/ %d" % max_it
+
+        str_out = "%s | %s" % (head_str, stat_str)
+
+        if get_str:
+            return str_out
+        else:
+            warnings.warn(
+                "get_str=False is deprecated."
+                "Please enable this flag to get receive the output string.",
+                DeprecationWarning,
+            )
+            print(str_out)
+
+    def get_status_string(
+        self,
+        max_it=None,
+        stat_set="train",
+        vars_print=None,
+        skip_nan=False,
+        stat_format=lambda s: s.replace("loss_", "").replace("prev_stage_", "ps_"),
+    ):
+        return self.print(
+            max_it=max_it,
+            stat_set=stat_set,
+            vars_print=vars_print,
+            get_str=True,
+            skip_nan=skip_nan,
+            stat_format=stat_format,
+        )
+
+    def plot_stats(
+        self, visdom_env=None, plot_file=None, visdom_server=None, visdom_port=None
+    ):
+
+        # use the cached visdom env if none supplied
+        if visdom_env is None:
+            visdom_env = self.visdom_env
+        if visdom_server is None:
+            visdom_server = self.visdom_server
+        if visdom_port is None:
+            visdom_port = self.visdom_port
+        if plot_file is None:
+            plot_file = self.plot_file
+
+        stat_sets = list(self.stats.keys())
+
+        logger.debug(
+            f"printing charts to visdom env '{visdom_env}' ({visdom_server}:{visdom_port})"
+        )
+
+        novisdom = False
+
+        viz = get_visdom_connection(server=visdom_server, port=visdom_port)
+        if viz is None or not viz.check_connection():
+            logger.info("no visdom server! -> skipping visdom plots")
+            novisdom = True
+
+        lines = []
+
+        # plot metrics
+        if not novisdom:
+            viz.close(env=visdom_env, win=None)
+
+        for stat in self.log_vars:
+            vals = []
+            stat_sets_now = []
+            for stat_set in stat_sets:
+                val = self.stats[stat_set][stat].get_epoch_averages()
+                if val is None:
+                    continue
+                else:
+                    val = np.array(val).reshape(-1)
+                    stat_sets_now.append(stat_set)
+                vals.append(val)
+
+            if len(vals) == 0:
+                continue
+
+            lines.append((stat_sets_now, stat, vals))
+
+        if not novisdom:
+            for tmodes, stat, vals in lines:
+                title = "%s" % stat
+                opts = {"title": title, "legend": list(tmodes)}
+                for i, (tmode, val) in enumerate(zip(tmodes, vals)):
+                    update = "append" if i > 0 else None
+                    valid = np.where(np.isfinite(val))[0]
+                    if len(valid) == 0:
+                        continue
+                    x = np.arange(len(val))
+                    viz.line(
+                        Y=val[valid],
+                        X=x[valid],
+                        env=visdom_env,
+                        opts=opts,
+                        win=f"stat_plot_{title}",
+                        name=tmode,
+                        update=update,
+                    )
+
+        if plot_file:
+            logger.info(f"plotting stats to {plot_file}")
+            ncol = 3
+            nrow = int(np.ceil(float(len(lines)) / ncol))
+            matplotlib.rcParams.update({"font.size": 5})
+            color = cycle(plt.cm.tab10(np.linspace(0, 1, 10)))
+            fig = plt.figure(1)
+            plt.clf()
+            for idx, (tmodes, stat, vals) in enumerate(lines):
+                c = next(color)
+                plt.subplot(nrow, ncol, idx + 1)
+                plt.gca()
+                for vali, vals_ in enumerate(vals):
+                    c_ = c * (1.0 - float(vali) * 0.3)
+                    valid = np.where(np.isfinite(vals_))[0]
+                    if len(valid) == 0:
+                        continue
+                    x = np.arange(len(vals_))
+                    plt.plot(x[valid], vals_[valid], c=c_, linewidth=1)
+                plt.ylabel(stat)
+                plt.xlabel("epoch")
+                plt.gca().yaxis.label.set_color(c[0:3] * 0.75)
+                plt.legend(tmodes)
+                gcolor = np.array(mcolors.to_rgba("lightgray"))
+                grid_params = {"visible": True, "color": gcolor}
+                plt.grid(**grid_params, which="major", linestyle="-", linewidth=0.4)
+                plt.grid(**grid_params, which="minor", linestyle="--", linewidth=0.2)
+                plt.minorticks_on()
+
+            plt.tight_layout()
+            plt.show()
+            try:
+                fig.savefig(plot_file)
+            except PermissionError:
+                warnings.warn("Cant dump stats due to insufficient permissions!")
+
+    def synchronize_logged_vars(self, log_vars, default_val=float("NaN")):
+
+        stat_sets = list(self.stats.keys())
+
+        # remove the additional log_vars
+        for stat_set in stat_sets:
+            for stat in self.stats[stat_set].keys():
+                if stat not in log_vars:
+                    logger.warning(f"additional stat {stat_set}:{stat} -> removing")
+
+            self.stats[stat_set] = {
+                stat: v for stat, v in self.stats[stat_set].items() if stat in log_vars
+            }
+
+        self.log_vars = log_vars  # !!!
+
+        for stat_set in stat_sets:
+            for stat in log_vars:
+                if stat not in self.stats[stat_set]:
+                    logger.info(
+                        "missing stat %s:%s -> filling with default values (%1.2f)"
+                        % (stat_set, stat, default_val)
+                    )
+                elif len(self.stats[stat_set][stat].history) != self.epoch + 1:
+                    h = self.stats[stat_set][stat].history
+                    if len(h) == 0:  # just never updated stat ... skip
+                        continue
+                    else:
+                        logger.info(
+                            "incomplete stat %s:%s -> reseting with default values (%1.2f)"
+                            % (stat_set, stat, default_val)
+                        )
+                else:
+                    continue
+
+                self.stats[stat_set][stat] = AverageMeter()
+                self.stats[stat_set][stat].reset()
+
+                lastep = self.epoch + 1
+                for ep in range(lastep):
+                    self.stats[stat_set][stat].update(default_val, n=1, epoch=ep)
+                epoch_generated = self.stats[stat_set][stat].get_epoch()
+                assert (
+                    epoch_generated == self.epoch + 1
+                ), "bad epoch of synchronized log_var! %d vs %d" % (
+                    self.epoch + 1,
+                    epoch_generated,
+                )
+
+
+class StatsJSONEncoder(json.JSONEncoder):
+    def default(self, o):
+        if isinstance(o, (AverageMeter, Stats)):
+            enc = self.encode(o.__dict__)
+            return enc
+        else:
+            raise TypeError(
+                f"Object of type {o.__class__.__name__} " f"is not JSON serializable"
+            )
+
+
+def _get_postfixed_filename(fl, postfix):
+    return fl if fl.endswith(postfix) else fl + postfix
diff --git a/pytorch3d/pytorch3d/implicitron/tools/utils.py b/pytorch3d/pytorch3d/implicitron/tools/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..6cb0d4ec136467eb0996c39be668263168e07ddd
--- /dev/null
+++ b/pytorch3d/pytorch3d/implicitron/tools/utils.py
@@ -0,0 +1,203 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import collections
+import dataclasses
+import time
+from contextlib import contextmanager
+from typing import Any, Callable, Dict, Iterable, Iterator
+
+import torch
+
+
+@contextmanager
+def evaluating(net: torch.nn.Module):
+    """Temporarily switch to evaluation mode."""
+    istrain = net.training
+    try:
+        net.eval()
+        yield net
+    finally:
+        if istrain:
+            net.train()
+
+
+def try_to_cuda(t: Any) -> Any:
+    """
+    Try to move the input variable `t` to a cuda device.
+
+    Args:
+        t: Input.
+
+    Returns:
+        t_cuda: `t` moved to a cuda device, if supported.
+    """
+    try:
+        t = t.cuda()
+    except AttributeError:
+        pass
+    return t
+
+
+def try_to_cpu(t: Any) -> Any:
+    """
+    Try to move the input variable `t` to a cpu device.
+
+    Args:
+        t: Input.
+
+    Returns:
+        t_cpu: `t` moved to a cpu device, if supported.
+    """
+    try:
+        t = t.cpu()
+    except AttributeError:
+        pass
+    return t
+
+
+def dict_to_cuda(batch: Dict[Any, Any]) -> Dict[Any, Any]:
+    """
+    Move all values in a dictionary to cuda if supported.
+
+    Args:
+        batch: Input dict.
+
+    Returns:
+        batch_cuda: `batch` moved to a cuda device, if supported.
+    """
+    return {k: try_to_cuda(v) for k, v in batch.items()}
+
+
+def dict_to_cpu(batch):
+    """
+    Move all values in a dictionary to cpu if supported.
+
+    Args:
+        batch: Input dict.
+
+    Returns:
+        batch_cpu: `batch` moved to a cpu device, if supported.
+    """
+    return {k: try_to_cpu(v) for k, v in batch.items()}
+
+
+def dataclass_to_cuda_(obj):
+    """
+    Move all contents of a dataclass to cuda inplace if supported.
+
+    Args:
+        batch: Input dataclass.
+
+    Returns:
+        batch_cuda: `batch` moved to a cuda device, if supported.
+    """
+    for f in dataclasses.fields(obj):
+        setattr(obj, f.name, try_to_cuda(getattr(obj, f.name)))
+    return obj
+
+
+def dataclass_to_cpu_(obj):
+    """
+    Move all contents of a dataclass to cpu inplace if supported.
+
+    Args:
+        batch: Input dataclass.
+
+    Returns:
+        batch_cuda: `batch` moved to a cpu device, if supported.
+    """
+    for f in dataclasses.fields(obj):
+        setattr(obj, f.name, try_to_cpu(getattr(obj, f.name)))
+    return obj
+
+
+# TODO: test it
+def cat_dataclass(batch, tensor_collator: Callable):
+    """
+    Concatenate all fields of a list of dataclasses `batch` to a single
+    dataclass object using `tensor_collator`.
+
+    Args:
+        batch: Input list of dataclasses.
+
+    Returns:
+        concatenated_batch: All elements of `batch` concatenated to a single
+            dataclass object.
+        tensor_collator: The function used to concatenate tensor fields.
+    """
+
+    elem = batch[0]
+    collated = {}
+
+    for f in dataclasses.fields(elem):
+        elem_f = getattr(elem, f.name)
+        if elem_f is None:
+            collated[f.name] = None
+        elif torch.is_tensor(elem_f):
+            collated[f.name] = tensor_collator([getattr(e, f.name) for e in batch])
+        elif dataclasses.is_dataclass(elem_f):
+            collated[f.name] = cat_dataclass(
+                [getattr(e, f.name) for e in batch], tensor_collator
+            )
+        elif isinstance(elem_f, collections.abc.Mapping):
+            collated[f.name] = {
+                k: tensor_collator([getattr(e, f.name)[k] for e in batch])
+                if elem_f[k] is not None
+                else None
+                for k in elem_f
+            }
+        else:
+            raise ValueError("Unsupported field type for concatenation")
+
+    return type(elem)(**collated)
+
+
+def recursive_visitor(it: Iterable[Any]) -> Iterator[Any]:
+    for x in it:
+        if isinstance(x, Iterable) and not isinstance(x, (str, bytes)):
+            yield from recursive_visitor(x)
+        else:
+            yield x
+
+
+def get_inlier_indicators(
+    tensor: torch.Tensor, dim: int, outlier_rate: float
+) -> torch.Tensor:
+    remove_elements = int(min(outlier_rate, 1.0) * tensor.shape[dim] / 2)
+    hi = torch.topk(tensor, remove_elements, dim=dim).indices.tolist()
+    lo = torch.topk(-tensor, remove_elements, dim=dim).indices.tolist()
+    remove_indices = set(recursive_visitor([hi, lo]))
+    keep_indices = tensor.new_ones(tensor.shape[dim : dim + 1], dtype=torch.bool)
+    keep_indices[list(remove_indices)] = False
+    return keep_indices
+
+
+class Timer:
+    """
+    A simple class for timing execution.
+
+    Example::
+
+        with Timer():
+            print("This print statement is timed.")
+
+    """
+
+    def __init__(self, name="timer", quiet=False):
+        self.name = name
+        self.quiet = quiet
+
+    def __enter__(self):
+        self.start = time.time()
+        return self
+
+    def __exit__(self, *args):
+        self.end = time.time()
+        self.interval = self.end - self.start
+        if not self.quiet:
+            print("%20s: %1.6f sec" % (self.name, self.interval))
diff --git a/pytorch3d/pytorch3d/implicitron/tools/video_writer.py b/pytorch3d/pytorch3d/implicitron/tools/video_writer.py
new file mode 100644
index 0000000000000000000000000000000000000000..3db55e886587a9d080877b17e797f252fe64c055
--- /dev/null
+++ b/pytorch3d/pytorch3d/implicitron/tools/video_writer.py
@@ -0,0 +1,179 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import os
+import shutil
+import subprocess
+import tempfile
+import warnings
+from typing import Optional, Tuple, Union
+
+import matplotlib
+import matplotlib.pyplot as plt
+import numpy as np
+from PIL import Image
+
+_DEFAULT_FFMPEG = os.environ.get("FFMPEG", "ffmpeg")
+
+matplotlib.use("Agg")
+
+
+class VideoWriter:
+    """
+    A class for exporting videos.
+    """
+
+    def __init__(
+        self,
+        cache_dir: Optional[str] = None,
+        ffmpeg_bin: str = _DEFAULT_FFMPEG,
+        out_path: str = "/tmp/video.mp4",
+        fps: int = 20,
+        output_format: str = "visdom",
+        rmdir_allowed: bool = False,
+        **kwargs,
+    ) -> None:
+        """
+        Args:
+            cache_dir: A directory for storing the video frames. If `None`,
+                a temporary directory will be used.
+            ffmpeg_bin: The path to an `ffmpeg` executable.
+            out_path: The path to the output video.
+            fps: The speed of the generated video in frames-per-second.
+            output_format: Format of the output video. Currently only `"visdom"`
+                is supported.
+            rmdir_allowed: If `True` delete and create `cache_dir` in case
+                it is not empty.
+        """
+        self.rmdir_allowed = rmdir_allowed
+        self.output_format = output_format
+        self.fps = fps
+        self.out_path = out_path
+        self.cache_dir = cache_dir
+        self.ffmpeg_bin = ffmpeg_bin
+        self.frames = []
+        self.regexp = "frame_%08d.png"
+        self.frame_num = 0
+
+        if self.cache_dir is not None:
+            self.tmp_dir = None
+            if os.path.isdir(self.cache_dir):
+                if rmdir_allowed:
+                    shutil.rmtree(self.cache_dir)
+                else:
+                    warnings.warn(
+                        f"Warning: cache directory not empty ({self.cache_dir})."
+                    )
+            os.makedirs(self.cache_dir, exist_ok=True)
+        else:
+            self.tmp_dir = tempfile.TemporaryDirectory()
+            self.cache_dir = self.tmp_dir.name
+
+    def write_frame(
+        self,
+        frame: Union[matplotlib.figure.Figure, np.ndarray, Image.Image, str],
+        resize: Optional[Union[float, Tuple[int, int]]] = None,
+    ) -> None:
+        """
+        Write a frame to the video.
+
+        Args:
+            frame: An object containing the frame image.
+            resize: Either a floating defining the image rescaling factor
+                or a 2-tuple defining the size of the output image.
+        """
+
+        # pyre-fixme[6]: For 1st argument expected `Union[PathLike[str], str]` but
+        #  got `Optional[str]`.
+        outfile = os.path.join(self.cache_dir, self.regexp % self.frame_num)
+
+        if isinstance(frame, matplotlib.figure.Figure):
+            plt.savefig(outfile)
+            im = Image.open(outfile)
+        elif isinstance(frame, np.ndarray):
+            if frame.dtype in (np.float64, np.float32, float):
+                frame = (np.transpose(frame, (1, 2, 0)) * 255.0).astype(np.uint8)
+            im = Image.fromarray(frame)
+        elif isinstance(frame, Image.Image):
+            im = frame
+        elif isinstance(frame, str):
+            im = Image.open(frame).convert("RGB")
+        else:
+            raise ValueError("Cant convert type %s" % str(type(frame)))
+
+        if im is not None:
+            if resize is not None:
+                if isinstance(resize, float):
+                    resize = [int(resize * s) for s in im.size]
+            else:
+                resize = im.size
+            # make sure size is divisible by 2
+            resize = tuple([resize[i] + resize[i] % 2 for i in (0, 1)])
+            # pyre-fixme[16]: Module `Image` has no attribute `ANTIALIAS`.
+            im = im.resize(resize, Image.ANTIALIAS)
+            im.save(outfile)
+
+        self.frames.append(outfile)
+        self.frame_num += 1
+
+    def get_video(self, quiet: bool = True) -> str:
+        """
+        Generate the video from the written frames.
+
+        Args:
+            quiet: If `True`, suppresses logging messages.
+
+        Returns:
+            video_path: The path to the generated video if any frames were added.
+                Otherwise returns an empty string.
+        """
+        if self.frame_num == 0:
+            return ""
+
+        # pyre-fixme[6]: For 1st argument expected `Union[PathLike[str], str]` but
+        #  got `Optional[str]`.
+        regexp = os.path.join(self.cache_dir, self.regexp)
+
+        if shutil.which(self.ffmpeg_bin) is None:
+            raise ValueError(
+                f"Cannot find ffmpeg as `{self.ffmpeg_bin}`. "
+                + "Please set FFMPEG in the environment or ffmpeg_bin on this class."
+            )
+
+        if self.output_format == "visdom":  # works for ppt too
+            args = [
+                self.ffmpeg_bin,
+                "-r",
+                str(self.fps),
+                "-i",
+                regexp,
+                "-vcodec",
+                "h264",
+                "-f",
+                "mp4",
+                "-y",
+                "-crf",
+                "18",
+                "-b",
+                "2000k",
+                "-pix_fmt",
+                "yuv420p",
+                self.out_path,
+            ]
+            if quiet:
+                subprocess.check_call(
+                    args, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
+                )
+            else:
+                subprocess.check_call(args)
+        else:
+            raise ValueError("no such output type %s" % str(self.output_format))
+
+        return self.out_path
+
+    def __del__(self) -> None:
+        if self.tmp_dir is not None:
+            self.tmp_dir.cleanup()
diff --git a/pytorch3d/pytorch3d/implicitron/tools/vis_utils.py b/pytorch3d/pytorch3d/implicitron/tools/vis_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..1b3b1f15db5a86f0b1d4b0a319b9306e8212dbfe
--- /dev/null
+++ b/pytorch3d/pytorch3d/implicitron/tools/vis_utils.py
@@ -0,0 +1,187 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+from typing import Any, Dict, Optional, Tuple, TYPE_CHECKING
+
+import torch
+
+if TYPE_CHECKING:
+    from visdom import Visdom
+
+
+logger = logging.getLogger(__name__)
+
+
+def get_visdom_env(visdom_env: str, exp_dir: str) -> str:
+    """
+    Parse out visdom environment name from the input config.
+
+    Args:
+        visdom_env: Name of the wisdom environment, could be empty string.
+        exp_dir: Root experiment directory.
+
+    Returns:
+        visdom_env: The name of the visdom environment. If the given visdom_env is
+            empty, return the name of the bottom directory in exp_dir.
+    """
+    if len(visdom_env) == 0:
+        visdom_env = exp_dir.split("/")[-1]
+    else:
+        visdom_env = visdom_env
+    return visdom_env
+
+
+# TODO: a proper singleton
+_viz_singleton = None
+
+
+def get_visdom_connection(
+    server: str = "http://localhost",
+    port: int = 8097,
+) -> Optional["Visdom"]:
+    """
+    Obtain a connection to a visdom server if visdom is installed.
+
+    Args:
+        server: Server address.
+        port: Server port.
+
+    Returns:
+        connection: The connection object.
+    """
+    try:
+        from visdom import Visdom
+    except ImportError:
+        logger.debug("Cannot load visdom")
+        return None
+
+    if server == "None":
+        return None
+
+    global _viz_singleton
+    if _viz_singleton is None:
+        _viz_singleton = Visdom(server=server, port=port)
+    return _viz_singleton
+
+
+def visualize_basics(
+    viz: "Visdom",
+    preds: Dict[str, Any],
+    visdom_env_imgs: str,
+    title: str = "",
+    visualize_preds_keys: Tuple[str, ...] = (
+        "image_rgb",
+        "images_render",
+        "fg_probability",
+        "masks_render",
+        "depths_render",
+        "depth_map",
+    ),
+    store_history: bool = False,
+) -> None:
+    """
+    Visualize basic outputs of a `GenericModel` to visdom.
+
+    Args:
+        viz: The visdom object.
+        preds: A dictionary containing `GenericModel` outputs.
+        visdom_env_imgs: Target visdom environment name.
+        title: The title of produced visdom window.
+        visualize_preds_keys: The list of keys of `preds` for visualization.
+        store_history: Store the history buffer in visdom windows.
+    """
+    imout = {}
+    for k in visualize_preds_keys:
+        if k not in preds or preds[k] is None:
+            logger.info(f"cant show {k}")
+            continue
+        v = preds[k].cpu().detach().clone()
+        if k.startswith("depth"):
+            # divide by 95th percentile
+            normfac = (
+                v.view(v.shape[0], -1)
+                .topk(k=int(0.05 * (v.numel() // v.shape[0])), dim=-1)
+                .values[:, -1]
+            )
+            v = v / normfac[:, None, None, None].clamp(1e-4)
+        if v.shape[1] == 1:
+            v = v.repeat(1, 3, 1, 1)
+        v = torch.nn.functional.interpolate(
+            v,
+            scale_factor=(
+                600.0
+                if (
+                    "_eval" in visdom_env_imgs
+                    and k in ("images_render", "depths_render")
+                )
+                else 200.0
+            )
+            / v.shape[2],
+            mode="bilinear",
+        )
+        imout[k] = v
+
+    # TODO: handle errors on the outside
+    try:
+        imout = {"all": torch.cat(list(imout.values()), dim=2)}
+    except RuntimeError as e:
+        print("cant cat!", e.args)
+
+    for k, v in imout.items():
+        viz.images(
+            v.clamp(0.0, 1.0),
+            win=k,
+            env=visdom_env_imgs,
+            opts={"title": title + "_" + k, "store_history": store_history},
+        )
+
+
+def make_depth_image(
+    depths: torch.Tensor,
+    masks: torch.Tensor,
+    max_quantile: float = 0.98,
+    min_quantile: float = 0.02,
+    min_out_depth: float = 0.1,
+    max_out_depth: float = 0.9,
+) -> torch.Tensor:
+    """
+    Convert a batch of depth maps to a grayscale image.
+
+    Args:
+        depths: A tensor of shape `(B, 1, H, W)` containing a batch of depth maps.
+        masks: A tensor of shape `(B, 1, H, W)` containing a batch of foreground masks.
+        max_quantile: The quantile of the input depth values which will
+            be mapped to `max_out_depth`.
+        min_quantile: The quantile of the input depth values which will
+            be mapped to `min_out_depth`.
+        min_out_depth: The minimal value in each depth map will be assigned this color.
+        max_out_depth: The maximal value in each depth map will be assigned this color.
+
+    Returns:
+        depth_image: A tensor of shape `(B, 1, H, W)` a batch of grayscale
+            depth images.
+    """
+    normfacs = []
+    for d, m in zip(depths, masks):
+        ok = (d.view(-1) > 1e-6) * (m.view(-1) > 0.5)
+        if ok.sum() <= 1:
+            logger.info("empty depth!")
+            normfacs.append(torch.zeros(2).type_as(depths))
+            continue
+        dok = d.view(-1)[ok].view(-1)
+        _maxk = max(int(round((1 - max_quantile) * (dok.numel()))), 1)
+        _mink = max(int(round(min_quantile * (dok.numel()))), 1)
+        normfac_max = dok.topk(k=_maxk, dim=-1).values[-1]
+        normfac_min = dok.topk(k=_mink, dim=-1, largest=False).values[-1]
+        normfacs.append(torch.stack([normfac_min, normfac_max]))
+    normfacs = torch.stack(normfacs)
+    _min, _max = (normfacs[:, 0].view(-1, 1, 1, 1), normfacs[:, 1].view(-1, 1, 1, 1))
+    depths = (depths - _min) / (_max - _min).clamp(1e-4)
+    depths = (
+        (depths * (max_out_depth - min_out_depth) + min_out_depth) * masks.float()
+    ).clamp(0.0, 1.0)
+    return depths
diff --git a/pytorch3d/pytorch3d/io/__init__.py b/pytorch3d/pytorch3d/io/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..c28b5df11912086c2aa1801704be22a0c263775e
--- /dev/null
+++ b/pytorch3d/pytorch3d/io/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+from .obj_io import load_obj, load_objs_as_meshes, save_obj
+from .pluggable import IO
+from .ply_io import load_ply, save_ply
+
+
+__all__ = [k for k in globals().keys() if not k.startswith("_")]
diff --git a/pytorch3d/pytorch3d/io/experimental_gltf_io.py b/pytorch3d/pytorch3d/io/experimental_gltf_io.py
new file mode 100644
index 0000000000000000000000000000000000000000..10905227ddf38450058d6f6aa7137d87785d2e17
--- /dev/null
+++ b/pytorch3d/pytorch3d/io/experimental_gltf_io.py
@@ -0,0 +1,862 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+"""
+This module implements loading meshes from glTF 2 assets stored in a
+GLB container file or a glTF JSON file with embedded binary data.
+It is experimental.
+
+The module provides a MeshFormatInterpreter called
+MeshGlbFormat which must be used explicitly.
+e.g.
+
+.. code-block:: python
+
+    from pytorch3d.io import IO
+    from pytorch3d.io.experimental_gltf_io import MeshGlbFormat
+
+    io = IO()
+    io.register_meshes_format(MeshGlbFormat())
+    io.load_mesh(...)
+
+This implementation is quite restricted in what it supports.
+
+    - It does not try to validate the input against the standard.
+    - It loads the default scene only.
+    - Only triangulated geometry is supported.
+    - The geometry of all meshes of the entire scene is aggregated into a single mesh.
+      Use `load_meshes()` instead to get un-aggregated (but transformed) ones.
+    - All material properties are ignored except for either vertex color, baseColorTexture
+      or baseColorFactor. If available, one of these (in this order) is exclusively
+      used which does not match the semantics of the standard.
+"""
+
+import json
+import struct
+import warnings
+from base64 import b64decode
+from collections import defaultdict, deque
+from enum import IntEnum
+from io import BytesIO
+from typing import Any, BinaryIO, cast, Dict, List, Optional, Tuple, Union
+
+import numpy as np
+import torch
+from iopath.common.file_io import PathManager
+from PIL import Image
+from pytorch3d.io.utils import _open_file, PathOrStr
+from pytorch3d.renderer.mesh import TexturesBase, TexturesUV, TexturesVertex
+from pytorch3d.structures import join_meshes_as_scene, Meshes
+from pytorch3d.transforms import quaternion_to_matrix, Transform3d
+
+from .pluggable_formats import endswith, MeshFormatInterpreter
+
+
+_GLTF_MAGIC = 0x46546C67
+_JSON_CHUNK_TYPE = 0x4E4F534A
+_BINARY_CHUNK_TYPE = 0x004E4942
+_DATA_URI_PREFIX = "data:application/octet-stream;base64,"
+
+
+class _PrimitiveMode(IntEnum):
+    POINTS = 0
+    LINES = 1
+    LINE_LOOP = 2
+    LINE_STRIP = 3
+    TRIANGLES = 4
+    TRIANGLE_STRIP = 5
+    TRIANGLE_FAN = 6
+
+
+class _ComponentType(IntEnum):
+    BYTE = 5120
+    UNSIGNED_BYTE = 5121
+    SHORT = 5122
+    UNSIGNED_SHORT = 5123
+    UNSIGNED_INT = 5125
+    FLOAT = 5126
+
+
+_ITEM_TYPES: Dict[int, Any] = {
+    5120: np.int8,
+    5121: np.uint8,
+    5122: np.int16,
+    5123: np.uint16,
+    5125: np.uint32,
+    5126: np.float32,
+}
+
+
+_ElementShape = Union[Tuple[int], Tuple[int, int]]
+_ELEMENT_SHAPES: Dict[str, _ElementShape] = {
+    "SCALAR": (1,),
+    "VEC2": (2,),
+    "VEC3": (3,),
+    "VEC4": (4,),
+    "MAT2": (2, 2),
+    "MAT3": (3, 3),
+    "MAT4": (4, 4),
+}
+
+_DTYPE_BYTES: Dict[Any, int] = {
+    np.int8: 1,
+    np.uint8: 1,
+    np.int16: 2,
+    np.uint16: 2,
+    np.uint32: 4,
+    np.float32: 4,
+}
+
+
+class _TargetType(IntEnum):
+    ARRAY_BUFFER = 34962
+    ELEMENT_ARRAY_BUFFER = 34963
+
+
+class OurEncoder(json.JSONEncoder):
+    def default(self, obj):
+        if isinstance(obj, np.int64):
+            return str(obj)
+        return super(OurEncoder, self).default(obj)
+
+
+def _read_header(stream: BinaryIO) -> Optional[Tuple[int, int]]:
+    header = stream.read(12)
+    magic, version, length = struct.unpack("<III", header)
+
+    if magic != _GLTF_MAGIC:
+        return None
+    return version, length
+
+
+def _read_chunks(
+    stream: BinaryIO, length: int
+) -> Optional[Tuple[Dict[str, Any], np.ndarray]]:
+    """
+    Get the json header and the binary data from a
+    GLB file.
+    """
+    json_data = None
+    binary_data = None
+
+    while stream.tell() < length:
+        chunk_header = stream.read(8)
+        chunk_length, chunk_type = struct.unpack("<II", chunk_header)
+        chunk_data = stream.read(chunk_length)
+        if chunk_type == _JSON_CHUNK_TYPE:
+            json_data = json.loads(chunk_data)
+        elif chunk_type == _BINARY_CHUNK_TYPE:
+            binary_data = chunk_data
+        else:
+            warnings.warn("Unsupported chunk type")
+            return None
+
+    if json_data is None:
+        raise ValueError("Missing json header")
+
+    if binary_data is not None:
+        binary_data = np.frombuffer(binary_data, dtype=np.uint8)
+
+    return json_data, binary_data
+
+
+def _make_node_transform(node: Dict[str, Any]) -> Transform3d:
+    """
+    Convert a transform from the json data in to a PyTorch3D
+    Transform3d format.
+    """
+    array = node.get("matrix")
+    if array is not None:  # Stored in column-major order
+        M = np.array(array, dtype=np.float32).reshape(4, 4, order="F")
+        return Transform3d(matrix=torch.from_numpy(M))
+
+    out = Transform3d()
+
+    # Given some of (scale/rotation/translation), we do them in that order to
+    # get points in to the world space.
+    # See https://github.com/KhronosGroup/glTF/issues/743 .
+
+    array = node.get("scale", None)
+    if array is not None:
+        scale_vector = torch.FloatTensor(array)
+        out = out.scale(scale_vector[None])
+
+    # Rotation quaternion (x, y, z, w) where w is the scalar
+    array = node.get("rotation", None)
+    if array is not None:
+        x, y, z, w = array
+        # We negate w. This is equivalent to inverting the rotation.
+        # This is needed as quaternion_to_matrix makes a matrix which
+        # operates on column vectors, whereas Transform3d wants a
+        # matrix which operates on row vectors.
+        rotation_quaternion = torch.FloatTensor([-w, x, y, z])
+        rotation_matrix = quaternion_to_matrix(rotation_quaternion)
+        out = out.rotate(R=rotation_matrix)
+
+    array = node.get("translation", None)
+    if array is not None:
+        translation_vector = torch.FloatTensor(array)
+        out = out.translate(x=translation_vector[None])
+
+    return out
+
+
+class _GLTFLoader:
+    def __init__(self, stream: BinaryIO) -> None:
+        self._json_data = None
+        # Map from buffer index to (decoded) binary data
+        self._binary_data = {}
+
+        version_and_length = _read_header(stream)
+        if version_and_length is None:  # GLTF
+            stream.seek(0)
+            json_data = json.load(stream)
+        else:  # GLB
+            version, length = version_and_length
+            if version != 2:
+                warnings.warn("Unsupported version")
+                return
+            json_and_binary_data = _read_chunks(stream, length)
+            if json_and_binary_data is None:
+                raise ValueError("Data not found")
+            json_data, binary_data = json_and_binary_data
+            self._binary_data[0] = binary_data
+
+        self._json_data = json_data
+        self._accessors = json_data.get("accessors", [])
+        self._buffer_views = json_data.get("bufferViews", [])
+        self._buffers = json_data.get("buffers", [])
+        self._texture_map_images = {}
+
+    def _access_image(self, image_index: int) -> np.ndarray:
+        """
+        Get the data for an image from the file. This is only called
+        by _get_texture_map_image which caches it.
+        """
+
+        image_json = self._json_data["images"][image_index]
+        buffer_view = self._buffer_views[image_json["bufferView"]]
+        if "byteStride" in buffer_view:
+            raise NotImplementedError("strided buffer views")
+
+        length = buffer_view["byteLength"]
+        offset = buffer_view.get("byteOffset", 0)
+
+        binary_data = self.get_binary_data(buffer_view["buffer"])
+        bytesio = BytesIO(binary_data[offset : offset + length].tobytes())
+        with Image.open(bytesio) as f:
+            array = np.array(f)
+            if array.dtype == np.uint8:
+                return array.astype(np.float32) / 255.0
+            else:
+                return array
+
+    def _get_texture_map_image(self, image_index: int) -> torch.Tensor:
+        """
+        Return a texture map image as a torch tensor.
+        Calling this function repeatedly with the same arguments returns
+        the very same tensor, this allows a memory optimization to happen
+        later in TexturesUV.join_scene.
+        Any alpha channel is ignored.
+        """
+        im = self._texture_map_images.get(image_index)
+        if im is not None:
+            return im
+
+        im = torch.from_numpy(self._access_image(image_index))[:, :, :3]
+        self._texture_map_images[image_index] = im
+        return im
+
+    def _access_data(self, accessor_index: int) -> np.ndarray:
+        """
+        Get the raw data from an accessor as a numpy array.
+        """
+        accessor = self._accessors[accessor_index]
+
+        buffer_view_index = accessor.get("bufferView")
+        # Undefined buffer view (all zeros) are not (yet) supported
+        if buffer_view_index is None:
+            raise NotImplementedError("Undefined buffer view")
+
+        accessor_byte_offset = accessor.get("byteOffset", 0)
+        component_type = accessor["componentType"]
+        element_count = accessor["count"]
+        element_type = accessor["type"]
+
+        # Sparse accessors are not (yet) supported
+        if accessor.get("sparse") is not None:
+            raise NotImplementedError("Sparse Accessors")
+
+        buffer_view = self._buffer_views[buffer_view_index]
+        buffer_index = buffer_view["buffer"]
+        buffer_byte_length = buffer_view["byteLength"]
+        element_byte_offset = buffer_view.get("byteOffset", 0)
+        element_byte_stride = buffer_view.get("byteStride", 0)
+        if element_byte_stride != 0 and element_byte_stride < 4:
+            raise ValueError("Stride is too small.")
+        if element_byte_stride > 252:
+            raise ValueError("Stride is too big.")
+
+        element_shape = _ELEMENT_SHAPES[element_type]
+        item_type = _ITEM_TYPES[component_type]
+        item_dtype = np.dtype(item_type)
+        item_count = np.prod(element_shape)
+        item_size = item_dtype.itemsize
+        size = element_count * item_count * item_size
+        if size > buffer_byte_length:
+            raise ValueError("Buffer did not have enough data for the accessor")
+
+        buffer_ = self._buffers[buffer_index]
+        binary_data = self.get_binary_data(buffer_index)
+        if len(binary_data) < buffer_["byteLength"]:
+            raise ValueError("Not enough binary data for the buffer")
+
+        if element_byte_stride == 0:
+            element_byte_stride = item_size * item_count
+        # The same buffer can store interleaved elements
+        if element_byte_stride < item_size * item_count:
+            raise ValueError("Items should not overlap")
+
+        dtype = np.dtype(
+            {
+                "names": ["element"],
+                "formats": [str(element_shape) + item_dtype.str],
+                "offsets": [0],
+                "itemsize": element_byte_stride,
+            }
+        )
+
+        byte_offset = accessor_byte_offset + element_byte_offset
+        if byte_offset % item_size != 0:
+            raise ValueError("Misaligned data")
+        byte_length = element_count * element_byte_stride
+        buffer_view = binary_data[byte_offset : byte_offset + byte_length].view(dtype)[
+            "element"
+        ]
+
+        # Convert matrix data from column-major (OpenGL) to row-major order
+        if element_type in ("MAT2", "MAT3", "MAT4"):
+            buffer_view = np.transpose(buffer_view, (0, 2, 1))
+
+        return buffer_view
+
+    def _get_primitive_attribute(
+        self, primitive_attributes: Dict[str, Any], key: str, dtype
+    ) -> Optional[np.ndarray]:
+        accessor_index = primitive_attributes.get(key)
+        if accessor_index is None:
+            return None
+        primitive_attribute = self._access_data(accessor_index)
+        if key == "JOINTS_0":
+            pass
+        elif dtype == np.uint8:
+            primitive_attribute /= 255.0
+        elif dtype == np.uint16:
+            primitive_attribute /= 65535.0
+        else:
+            if dtype != np.float32:
+                raise ValueError("Unexpected data type")
+        primitive_attribute = primitive_attribute.astype(dtype)
+        return primitive_attribute
+
+    def get_binary_data(self, buffer_index: int):
+        """
+        Get the binary data from a buffer as a 1D numpy array of bytes.
+        This is implemented for explicit uri data buffers or the main GLB data
+        segment.
+        """
+        buffer_ = self._buffers[buffer_index]
+        binary_data = self._binary_data.get(buffer_index)
+        if binary_data is None:  # Lazily decode binary data
+            uri = buffer_.get("uri")
+            if not uri.startswith(_DATA_URI_PREFIX):
+                raise NotImplementedError("Unexpected URI type")
+            binary_data = b64decode(uri[len(_DATA_URI_PREFIX) :])
+            binary_data = np.frombuffer(binary_data, dtype=np.uint8)
+            self._binary_data[buffer_index] = binary_data
+        return binary_data
+
+    def get_texture_for_mesh(
+        self, primitive: Dict[str, Any], indices: torch.Tensor
+    ) -> Optional[TexturesBase]:
+        """
+        Get the texture object representing the given mesh primitive.
+
+        Args:
+            primitive: the mesh primitive being loaded.
+            indices: the face indices of the mesh
+        """
+        attributes = primitive["attributes"]
+        vertex_colors = self._get_primitive_attribute(attributes, "COLOR_0", np.float32)
+        if vertex_colors is not None:
+            return TexturesVertex([torch.from_numpy(vertex_colors)])
+
+        vertex_texcoords_0 = self._get_primitive_attribute(
+            attributes, "TEXCOORD_0", np.float32
+        )
+        if vertex_texcoords_0 is not None:
+            verts_uvs = torch.from_numpy(vertex_texcoords_0)
+            verts_uvs[:, 1] = 1 - verts_uvs[:, -1]
+            faces_uvs = indices
+            material_index = primitive.get("material", 0)
+            material = self._json_data["materials"][material_index]
+            material_roughness = material["pbrMetallicRoughness"]
+            if "baseColorTexture" in material_roughness:
+                texture_index = material_roughness["baseColorTexture"]["index"]
+                texture_json = self._json_data["textures"][texture_index]
+                # Todo - include baseColorFactor when also given
+                # Todo - look at the sampler
+                image_index = texture_json["source"]
+                map = self._get_texture_map_image(image_index)
+            elif "baseColorFactor" in material_roughness:
+                # Constant color?
+                map = torch.FloatTensor(material_roughness["baseColorFactor"])[
+                    None, None, :3
+                ]
+            texture = TexturesUV(
+                # pyre-fixme[61]: `map` may not be initialized here.
+                maps=[map],  # alpha channel ignored
+                faces_uvs=[faces_uvs],
+                verts_uvs=[verts_uvs],
+            )
+            return texture
+
+        return None
+
+    def load(self, include_textures: bool) -> List[Tuple[Optional[str], Meshes]]:
+        """
+        Attempt to load all the meshes making up the default scene from
+        the file as a list of possibly-named Meshes objects.
+
+        Args:
+            include_textures: Whether to try loading textures.
+
+        Returns:
+            Meshes object containing one mesh.
+        """
+        if self._json_data is None:
+            raise ValueError("Initialization problem")
+
+        # This loads the default scene from the file.
+        # This is usually the only one.
+        # It is possible to have multiple scenes, in which case
+        # you could choose another here instead of taking the default.
+        scene_index = self._json_data.get("scene")
+
+        if scene_index is None:
+            raise ValueError("Default scene is not specified.")
+
+        scene = self._json_data["scenes"][scene_index]
+        nodes = self._json_data.get("nodes", [])
+        meshes = self._json_data.get("meshes", [])
+        root_node_indices = scene["nodes"]
+
+        mesh_transform = Transform3d()
+        names_meshes_list: List[Tuple[Optional[str], Meshes]] = []
+
+        # Keep track and apply the transform of the scene node to mesh vertices
+        Q = deque([(Transform3d(), node_index) for node_index in root_node_indices])
+
+        while Q:
+            parent_transform, current_node_index = Q.popleft()
+
+            current_node = nodes[current_node_index]
+
+            transform = _make_node_transform(current_node)
+            current_transform = transform.compose(parent_transform)
+
+            if "mesh" in current_node:
+                mesh_index = current_node["mesh"]
+                mesh = meshes[mesh_index]
+                mesh_name = mesh.get("name", None)
+                mesh_transform = current_transform
+
+                for primitive in mesh["primitives"]:
+                    attributes = primitive["attributes"]
+                    accessor_index = attributes["POSITION"]
+                    positions = torch.from_numpy(
+                        self._access_data(accessor_index).copy()
+                    )
+                    positions = mesh_transform.transform_points(positions)
+
+                    mode = primitive.get("mode", _PrimitiveMode.TRIANGLES)
+                    if mode != _PrimitiveMode.TRIANGLES:
+                        raise NotImplementedError("Non triangular meshes")
+
+                    if "indices" in primitive:
+                        accessor_index = primitive["indices"]
+                        indices = self._access_data(accessor_index).astype(np.int64)
+                    else:
+                        indices = np.arange(0, len(positions), dtype=np.int64)
+                    indices = torch.from_numpy(indices.reshape(-1, 3))
+
+                    texture = None
+                    if include_textures:
+                        texture = self.get_texture_for_mesh(primitive, indices)
+
+                    mesh_obj = Meshes(
+                        verts=[positions], faces=[indices], textures=texture
+                    )
+                    names_meshes_list.append((mesh_name, mesh_obj))
+
+            if "children" in current_node:
+                children_node_indices = current_node["children"]
+                Q.extend(
+                    [
+                        (current_transform, node_index)
+                        for node_index in children_node_indices
+                    ]
+                )
+
+        return names_meshes_list
+
+
+def load_meshes(
+    path: PathOrStr,
+    path_manager: PathManager,
+    include_textures: bool = True,
+) -> List[Tuple[Optional[str], Meshes]]:
+    """
+    Loads all the meshes from the default scene in the given GLB file.
+    and returns them separately.
+
+    Args:
+        path: path to read from
+        path_manager: PathManager object for interpreting the path
+        include_textures: whether to load textures
+
+    Returns:
+        List of (name, mesh) pairs, where the name is the optional name property
+            from the GLB file, or None if it is absent, and the mesh is a Meshes
+            object containing one mesh.
+    """
+    with _open_file(path, path_manager, "rb") as f:
+        loader = _GLTFLoader(cast(BinaryIO, f))
+    names_meshes_list = loader.load(include_textures=include_textures)
+    return names_meshes_list
+
+
+class _GLTFWriter:
+    def __init__(self, data: Meshes, buffer_stream: BinaryIO) -> None:
+        self._json_data = defaultdict(list)
+        self.mesh = data
+        self.buffer_stream = buffer_stream
+
+        # initialize json with one scene and one node
+        scene_index = 0
+        # pyre-fixme[6]: Incompatible parameter type
+        self._json_data["scene"] = scene_index
+        self._json_data["scenes"].append({"nodes": [scene_index]})
+        self._json_data["asset"] = {"version": "2.0"}
+        node = {"name": "Node", "mesh": 0}
+        self._json_data["nodes"].append(node)
+
+        # mesh primitives
+        meshes = defaultdict(list)
+        # pyre-fixme[6]: Incompatible parameter type
+        meshes["name"] = "Node-Mesh"
+        if isinstance(self.mesh.textures, TexturesVertex):
+            primitives = {
+                "attributes": {"POSITION": 0, "COLOR_0": 2},
+                "indices": 1,
+                "mode": _PrimitiveMode.TRIANGLES,
+            }
+        elif isinstance(self.mesh.textures, TexturesUV):
+            primitives = {
+                "attributes": {"POSITION": 0, "TEXCOORD_0": 2},
+                "indices": 1,
+                "mode": _PrimitiveMode.TRIANGLES,
+                "material": 0,
+            }
+        else:
+            primitives = {
+                "attributes": {"POSITION": 0},
+                "indices": 1,
+                "mode": _PrimitiveMode.TRIANGLES,
+            }
+
+        meshes["primitives"].append(primitives)
+        self._json_data["meshes"].append(meshes)
+
+        # default material
+        material = {
+            "name": "material_1",
+            "pbrMetallicRoughness": {
+                "baseColorTexture": {"index": 0},
+                "baseColorFactor": [1, 1, 1, 1],
+                "metallicFactor": 0,
+                "roughnessFactor": 0.99,
+            },
+            "emissiveFactor": [0, 0, 0],
+            "alphaMode": "OPAQUE",
+        }
+        self._json_data["materials"].append(material)
+
+        # default sampler
+        sampler = {"magFilter": 9729, "minFilter": 9986, "wrapS": 10497, "wrapT": 10497}
+        self._json_data["samplers"].append(sampler)
+
+        # default textures
+        texture = {"sampler": 0, "source": 0}
+        self._json_data["textures"].append(texture)
+
+    def _write_accessor_json(self, key: str) -> Tuple[int, np.ndarray]:
+        name = "Node-Mesh_%s" % key
+        byte_offset = 0
+        if key == "positions":
+            data = self.mesh.verts_packed().cpu().numpy()
+            component_type = _ComponentType.FLOAT
+            element_type = "VEC3"
+            buffer_view = 0
+            element_min = list(map(float, np.min(data, axis=0)))
+            element_max = list(map(float, np.max(data, axis=0)))
+            byte_per_element = 3 * _DTYPE_BYTES[_ITEM_TYPES[_ComponentType.FLOAT]]
+        elif key == "texcoords":
+            component_type = _ComponentType.FLOAT
+            data = self.mesh.textures.verts_uvs_list()[0].cpu().numpy()
+            data[:, 1] = 1 - data[:, -1]  # flip y tex-coordinate
+            element_type = "VEC2"
+            buffer_view = 2
+            element_min = list(map(float, np.min(data, axis=0)))
+            element_max = list(map(float, np.max(data, axis=0)))
+            byte_per_element = 2 * _DTYPE_BYTES[_ITEM_TYPES[_ComponentType.FLOAT]]
+        elif key == "texvertices":
+            component_type = _ComponentType.FLOAT
+            data = self.mesh.textures.verts_features_list()[0].cpu().numpy()
+            element_type = "VEC3"
+            buffer_view = 2
+            element_min = list(map(float, np.min(data, axis=0)))
+            element_max = list(map(float, np.max(data, axis=0)))
+            byte_per_element = 3 * _DTYPE_BYTES[_ITEM_TYPES[_ComponentType.FLOAT]]
+        elif key == "indices":
+            component_type = _ComponentType.UNSIGNED_SHORT
+            data = (
+                self.mesh.faces_packed()
+                .cpu()
+                .numpy()
+                .astype(_ITEM_TYPES[component_type])
+            )
+            element_type = "SCALAR"
+            buffer_view = 1
+            element_min = list(map(int, np.min(data, keepdims=True)))
+            element_max = list(map(int, np.max(data, keepdims=True)))
+            byte_per_element = (
+                3 * _DTYPE_BYTES[_ITEM_TYPES[_ComponentType.UNSIGNED_SHORT]]
+            )
+        else:
+            raise NotImplementedError(
+                "invalid key accessor, should be one of positions, indices or texcoords"
+            )
+
+        count = int(data.shape[0])
+        byte_length = count * byte_per_element
+        accessor_json = {
+            "name": name,
+            "componentType": component_type,
+            "type": element_type,
+            "bufferView": buffer_view,
+            "byteOffset": byte_offset,
+            "min": element_min,
+            "max": element_max,
+            "count": count * 3 if key == "indices" else count,
+        }
+        self._json_data["accessors"].append(accessor_json)
+        return (byte_length, data)
+
+    def _write_bufferview(self, key: str, **kwargs):
+        if key not in ["positions", "texcoords", "texvertices", "indices"]:
+            raise ValueError(
+                "key must be one of positions, texcoords, texvertices or indices"
+            )
+
+        bufferview = {
+            "name": "bufferView_%s" % key,
+            "buffer": 0,
+        }
+        target = _TargetType.ARRAY_BUFFER
+        if key == "positions":
+            byte_per_element = 3 * _DTYPE_BYTES[_ITEM_TYPES[_ComponentType.FLOAT]]
+            bufferview["byteStride"] = int(byte_per_element)
+        elif key == "texcoords":
+            byte_per_element = 2 * _DTYPE_BYTES[_ITEM_TYPES[_ComponentType.FLOAT]]
+            target = _TargetType.ARRAY_BUFFER
+            bufferview["byteStride"] = int(byte_per_element)
+        elif key == "texvertices":
+            byte_per_element = 3 * _DTYPE_BYTES[_ITEM_TYPES[_ComponentType.FLOAT]]
+            target = _TargetType.ELEMENT_ARRAY_BUFFER
+            bufferview["byteStride"] = int(byte_per_element)
+        elif key == "indices":
+            byte_per_element = (
+                3 * _DTYPE_BYTES[_ITEM_TYPES[_ComponentType.UNSIGNED_SHORT]]
+            )
+            target = _TargetType.ELEMENT_ARRAY_BUFFER
+
+        bufferview["target"] = target
+        bufferview["byteOffset"] = kwargs.get("offset")
+        bufferview["byteLength"] = kwargs.get("byte_length")
+        self._json_data["bufferViews"].append(bufferview)
+
+    def _write_image_buffer(self, **kwargs) -> Tuple[int, bytes]:
+        image_np = self.mesh.textures.maps_list()[0].cpu().numpy()
+        image_array = (image_np * 255.0).astype(np.uint8)
+        im = Image.fromarray(image_array)
+        with BytesIO() as f:
+            im.save(f, format="PNG")
+            image_data = f.getvalue()
+
+        image_data_byte_length = len(image_data)
+        bufferview_image = {
+            "buffer": 0,
+        }
+        bufferview_image["byteOffset"] = kwargs.get("offset")
+        bufferview_image["byteLength"] = image_data_byte_length
+        self._json_data["bufferViews"].append(bufferview_image)
+
+        image = {"name": "texture", "mimeType": "image/png", "bufferView": 3}
+        self._json_data["images"].append(image)
+        return (image_data_byte_length, image_data)
+
+    def save(self):
+        # check validity of mesh
+        if self.mesh.verts_packed() is None or self.mesh.faces_packed() is None:
+            raise ValueError("invalid mesh to save, verts or face indices are empty")
+
+        # accessors for positions, texture uvs and face indices
+        pos_byte, pos_data = self._write_accessor_json("positions")
+        idx_byte, idx_data = self._write_accessor_json("indices")
+        include_textures = False
+        if self.mesh.textures is not None:
+            if hasattr(self.mesh.textures, "verts_features_list"):
+                tex_byte, tex_data = self._write_accessor_json("texvertices")
+                include_textures = True
+                texcoords = False
+            elif self.mesh.textures.verts_uvs_list()[0] is not None:
+                tex_byte, tex_data = self._write_accessor_json("texcoords")
+                include_textures = True
+                texcoords = True
+
+        # bufferViews for positions, texture coords and indices
+        byte_offset = 0
+        self._write_bufferview("positions", byte_length=pos_byte, offset=byte_offset)
+        byte_offset += pos_byte
+
+        self._write_bufferview("indices", byte_length=idx_byte, offset=byte_offset)
+        byte_offset += idx_byte
+
+        if include_textures:
+            if texcoords:
+                self._write_bufferview(
+                    "texcoords", byte_length=tex_byte, offset=byte_offset
+                )
+            else:
+                self._write_bufferview(
+                    "texvertices", byte_length=tex_byte, offset=byte_offset
+                )
+            byte_offset += tex_byte
+
+        # image bufferView
+        include_image = False
+        if self.mesh.textures is not None and hasattr(self.mesh.textures, "maps_list"):
+            include_image = True
+            image_byte, image_data = self._write_image_buffer(offset=byte_offset)
+            byte_offset += image_byte
+
+        # buffers
+        self._json_data["buffers"].append({"byteLength": int(byte_offset)})
+
+        # organize into a glb
+        json_bytes = bytes(json.dumps(self._json_data, cls=OurEncoder), "utf-8")
+        json_length = len(json_bytes)
+
+        # write header
+        version = 2
+        total_header_length = 28  # (file header = 12) + 2 * (chunk header = 8)
+        file_length = json_length + byte_offset + total_header_length
+        header = struct.pack("<III", _GLTF_MAGIC, version, file_length)
+        self.buffer_stream.write(header)
+
+        # write json
+        self.buffer_stream.write(struct.pack("<II", json_length, _JSON_CHUNK_TYPE))
+        self.buffer_stream.write(json_bytes)
+
+        # write binary data
+        self.buffer_stream.write(struct.pack("<II", byte_offset, _BINARY_CHUNK_TYPE))
+        self.buffer_stream.write(pos_data)
+        self.buffer_stream.write(idx_data)
+        if include_textures:
+            self.buffer_stream.write(tex_data)
+        if include_image:
+            self.buffer_stream.write(image_data)
+
+
+class MeshGlbFormat(MeshFormatInterpreter):
+    """
+    Implements loading meshes from glTF 2 assets stored in a
+    GLB container file or a glTF JSON file with embedded binary data.
+
+    This implementation is quite restricted in what it supports.
+
+        - It does not try to validate the input against the standard.
+        - It loads the default scene only.
+        - Only triangulated geometry is supported.
+        - The geometry of all meshes of the entire scene is aggregated into a single mesh.
+        Use `load_meshes()` instead to get un-aggregated (but transformed) ones.
+        - All material properties are ignored except for either vertex color, baseColorTexture
+        or baseColorFactor. If available, one of these (in this order) is exclusively
+        used which does not match the semantics of the standard.
+    """
+
+    def __init__(self) -> None:
+        self.known_suffixes = (".glb",)
+
+    def read(
+        self,
+        path: PathOrStr,
+        include_textures: bool,
+        device,
+        path_manager: PathManager,
+        **kwargs,
+    ) -> Optional[Meshes]:
+        if not endswith(path, self.known_suffixes):
+            return None
+
+        names_meshes_list = load_meshes(
+            path=path,
+            path_manager=path_manager,
+            include_textures=include_textures,
+        )
+
+        meshes_list = [mesh for name, mesh in names_meshes_list]
+        mesh = join_meshes_as_scene(meshes_list)
+        return mesh.to(device)
+
+    def save(
+        self,
+        data: Meshes,
+        path: PathOrStr,
+        path_manager: PathManager,
+        binary: Optional[bool],
+        **kwargs,
+    ) -> bool:
+        """
+        Writes all the meshes from the default scene to GLB file.
+
+        Args:
+            data: meshes to save
+            path: path of the GLB file to write into
+            path_manager: PathManager object for interpreting the path
+
+        Return True if saving succeeds and False otherwise
+        """
+
+        if not endswith(path, self.known_suffixes):
+            return False
+
+        with _open_file(path, path_manager, "wb") as f:
+            writer = _GLTFWriter(data, cast(BinaryIO, f))
+            writer.save()
+        return True
diff --git a/pytorch3d/pytorch3d/io/mtl_io.py b/pytorch3d/pytorch3d/io/mtl_io.py
new file mode 100644
index 0000000000000000000000000000000000000000..166f98a8ed55707ecb22a54a05c8be85031d7487
--- /dev/null
+++ b/pytorch3d/pytorch3d/io/mtl_io.py
@@ -0,0 +1,535 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""This module implements utility functions for loading .mtl files and textures."""
+import os
+import warnings
+from typing import Dict, List, Optional, Tuple
+
+import numpy as np
+import torch
+import torch.nn.functional as F
+from iopath.common.file_io import PathManager
+from pytorch3d.common.compat import meshgrid_ij
+from pytorch3d.common.datatypes import Device
+from pytorch3d.io.utils import _open_file, _read_image
+
+
+def make_mesh_texture_atlas(
+    material_properties: Dict,
+    texture_images: Dict,
+    face_material_names,
+    faces_uvs: torch.Tensor,
+    verts_uvs: torch.Tensor,
+    texture_size: int,
+    texture_wrap: Optional[str],
+) -> torch.Tensor:
+    """
+    Given properties for materials defined in the .mtl file, and the face texture uv
+    coordinates, construct an (F, R, R, 3) texture atlas where R is the texture_size
+    and F is the number of faces in the mesh.
+
+    Args:
+        material_properties: dict of properties for each material. If a material
+                does not have any properties it will have an empty dict.
+        texture_images: dict of material names and texture images
+        face_material_names: numpy array of the material name corresponding to each
+            face. Faces which don't have an associated material will be an empty string.
+            For these faces, a uniform white texture is assigned.
+        faces_uvs: LongTensor of shape (F, 3,) giving the index into the verts_uvs for
+            each face in the mesh.
+        verts_uvs: FloatTensor of shape (V, 2) giving the uv coordinates for each vertex.
+        texture_size: the resolution of the per face texture map returned by this function.
+            Each face will have a texture map of shape (texture_size, texture_size, 3).
+        texture_wrap: string, one of ["repeat", "clamp", None]
+            If `texture_wrap="repeat"` for uv values outside the range [0, 1] the integer part
+            is ignored and a repeating pattern is formed.
+            If `texture_wrap="clamp"` the values are clamped to the range [0, 1].
+            If None, do nothing.
+
+    Returns:
+        atlas: FloatTensor of shape (F, texture_size, texture_size, 3) giving the per
+        face texture map.
+    """
+    # Create an R x R texture map per face in the mesh
+    R = texture_size
+    F = faces_uvs.shape[0]
+
+    # Initialize the per face texture map to a white color.
+    # TODO: allow customization of this base color?
+    atlas = torch.ones(size=(F, R, R, 3), dtype=torch.float32, device=faces_uvs.device)
+
+    # Check for empty materials.
+    if not material_properties and not texture_images:
+        return atlas
+
+    # Iterate through the material properties - not
+    # all materials have texture images so this is
+    # done first separately to the texture interpolation.
+    for material_name, props in material_properties.items():
+        # Bool to indicate which faces use this texture map.
+        faces_material_ind = torch.from_numpy(face_material_names == material_name).to(
+            faces_uvs.device
+        )
+        if faces_material_ind.sum() > 0:
+            # For these faces, update the base color to the
+            # diffuse material color.
+            if "diffuse_color" not in props:
+                continue
+            atlas[faces_material_ind, ...] = props["diffuse_color"][None, :]
+
+    # If there are vertex texture coordinates, create an (F, 3, 2)
+    # tensor of the vertex textures per face.
+    faces_verts_uvs = verts_uvs[faces_uvs] if len(verts_uvs) > 0 else None
+
+    # Some meshes only have material properties and no texture image.
+    # In this case, return the atlas here.
+    if faces_verts_uvs is None:
+        return atlas
+
+    if texture_wrap == "repeat":
+        # If texture uv coordinates are outside the range [0, 1] follow
+        # the convention GL_REPEAT in OpenGL i.e the integer part of the coordinate
+        # will be ignored and a repeating pattern is formed.
+        # Shapenet data uses this format see:
+        # https://shapenet.org/qaforum/index.php?qa=15&qa_1=why-is-the-texture-coordinate-in-the-obj-file-not-in-the-range # noqa: B950
+        if (faces_verts_uvs > 1).any() or (faces_verts_uvs < 0).any():
+            msg = "Texture UV coordinates outside the range [0, 1]. \
+                The integer part will be ignored to form a repeating pattern."
+            warnings.warn(msg)
+            faces_verts_uvs = faces_verts_uvs % 1
+    elif texture_wrap == "clamp":
+        # Clamp uv coordinates to the [0, 1] range.
+        faces_verts_uvs = faces_verts_uvs.clamp(0.0, 1.0)
+
+    # Iterate through the materials used in this mesh. Update the
+    # texture atlas for the faces which use this material.
+    # Faces without texture are white.
+    for material_name, image in list(texture_images.items()):
+        # Only use the RGB colors
+        if image.shape[2] == 4:
+            image = image[:, :, :3]
+
+        # Reverse the image y direction
+        image = torch.flip(image, [0]).type_as(faces_verts_uvs)
+
+        # Bool to indicate which faces use this texture map.
+        faces_material_ind = torch.from_numpy(face_material_names == material_name).to(
+            faces_verts_uvs.device
+        )
+
+        # Find the subset of faces which use this texture with this texture image
+        uvs_subset = faces_verts_uvs[faces_material_ind, :, :]
+
+        # Update the texture atlas for the faces which use this texture.
+        # TODO: should the texture map values be multiplied
+        # by the diffuse material color (i.e. use *= as the atlas has
+        # been initialized to the diffuse color)?. This is
+        # not being done in SoftRas.
+        atlas[faces_material_ind, :, :] = make_material_atlas(image, uvs_subset, R)
+
+    return atlas
+
+
+def make_material_atlas(
+    image: torch.Tensor, faces_verts_uvs: torch.Tensor, texture_size: int
+) -> torch.Tensor:
+    r"""
+    Given a single texture image and the uv coordinates for all the
+    face vertices, create a square texture map per face using
+    the formulation from [1].
+
+    For a triangle with vertices (v0, v1, v2) we can create a barycentric coordinate system
+    with the x axis being the vector (v0 - v2) and the y axis being the vector (v1 - v2).
+    The barycentric coordinates range from [0, 1] in the +x and +y direction so this creates
+    a triangular texture space with vertices at (0, 1), (0, 0) and (1, 0).
+
+    The per face texture map is of shape (texture_size, texture_size, 3)
+    which is a square. To map a triangular texture to a square grid, each
+    triangle is parametrized as follows (e.g. R = texture_size = 3):
+
+    The triangle texture is first divided into RxR = 9 subtriangles which each
+    map to one grid cell. The numbers in the grid cells and triangles show the mapping.
+
+    ..code-block::python
+
+        Triangular Texture Space:
+
+              1
+                |\
+                |6 \
+                |____\
+                |\  7 |\
+                |3 \  |4 \
+                |____\|____\
+                |\ 8  |\  5 |\
+                |0 \  |1 \  |2 \
+                |____\|____\|____\
+               0                   1
+
+        Square per face texture map:
+
+               R ____________________
+                |      |      |      |
+                |  6   |  7   |  8   |
+                |______|______|______|
+                |      |      |      |
+                |  3   |  4   |  5   |
+                |______|______|______|
+                |      |      |      |
+                |  0   |  1   |  2   |
+                |______|______|______|
+               0                      R
+
+
+    The barycentric coordinates of each grid cell are calculated using the
+    xy coordinates:
+
+    ..code-block::python
+
+            The cartesian coordinates are:
+
+            Grid 1:
+
+               R ____________________
+                |      |      |      |
+                |  20  |  21  |  22  |
+                |______|______|______|
+                |      |      |      |
+                |  10  |  11  |  12  |
+                |______|______|______|
+                |      |      |      |
+                |  00  |  01  |  02  |
+                |______|______|______|
+               0                      R
+
+            where 02 means y = 0, x = 2
+
+        Now consider this subset of the triangle which corresponds to
+        grid cells 0 and 8:
+
+        ..code-block::python
+
+            1/R  ________
+                |\    8  |
+                |  \     |
+                | 0   \  |
+                |_______\|
+               0          1/R
+
+        The centroids of the triangles are:
+            0: (1/3, 1/3) * 1/R
+            8: (2/3, 2/3) * 1/R
+
+    For each grid cell we can now calculate the centroid `(c_y, c_x)`
+    of the corresponding texture triangle:
+        - if `(x + y) < R`, then offset the centroid of
+            triangle 0 by `(y, x) * (1/R)`
+        - if `(x + y) > R`, then offset the centroid of
+            triangle 8 by `((R-1-y), (R-1-x)) * (1/R)`.
+
+    This is equivalent to updating the portion of Grid 1
+    above the diagonal, replacing `(y, x)` with `((R-1-y), (R-1-x))`:
+
+    ..code-block::python
+
+              R _____________________
+                |      |      |      |
+                |  20  |  01  |  00  |
+                |______|______|______|
+                |      |      |      |
+                |  10  |  11  |  10  |
+                |______|______|______|
+                |      |      |      |
+                |  00  |  01  |  02  |
+                |______|______|______|
+               0                      R
+
+    The barycentric coordinates (w0, w1, w2) are then given by:
+
+    ..code-block::python
+
+        w0 = c_x
+        w1 = c_y
+        w2 = 1- w0 - w1
+
+    Args:
+        image: FloatTensor of shape (H, W, 3)
+        faces_verts_uvs: uv coordinates for each vertex in each face  (F, 3, 2)
+        texture_size: int
+
+    Returns:
+        atlas: a FloatTensor of shape (F, texture_size, texture_size, 3) giving a
+            per face texture map.
+
+    [1] Liu et al, 'Soft Rasterizer: A Differentiable Renderer for Image-based
+        3D Reasoning', ICCV 2019
+    """
+    R = texture_size
+    device = faces_verts_uvs.device
+    rng = torch.arange(R, device=device)
+
+    # Meshgrid returns (row, column) i.e (Y, X)
+    # Change order to (X, Y) to make the grid.
+    Y, X = meshgrid_ij(rng, rng)
+    # pyre-fixme[28]: Unexpected keyword argument `axis`.
+    grid = torch.stack([X, Y], axis=-1)  # (R, R, 2)
+
+    # Grid cells below the diagonal: x + y < R.
+    below_diag = grid.sum(-1) < R
+
+    # map a [0, R] grid -> to a [0, 1] barycentric coordinates of
+    # the texture triangle centroids.
+    bary = torch.zeros((R, R, 3), device=device)  # (R, R, 3)
+    slc = torch.arange(2, device=device)[:, None]
+    # w0, w1
+    bary[below_diag, slc] = ((grid[below_diag] + 1.0 / 3.0) / R).T
+    # w0, w1 for above diagonal grid cells.
+    bary[~below_diag, slc] = (((R - 1.0 - grid[~below_diag]) + 2.0 / 3.0) / R).T
+    # w2 = 1. - w0 - w1
+    bary[..., -1] = 1 - bary[..., :2].sum(dim=-1)
+
+    # Calculate the uv position in the image for each pixel
+    # in the per face texture map
+    # (F, 1, 1, 3, 2) * (R, R, 3, 1) -> (F, R, R, 3, 2) -> (F, R, R, 2)
+    uv_pos = (faces_verts_uvs[:, None, None] * bary[..., None]).sum(-2)
+
+    # bi-linearly interpolate the textures from the images
+    # using the uv coordinates given by uv_pos.
+    textures = _bilinear_interpolation_grid_sample(image, uv_pos)
+
+    return textures
+
+
+def _bilinear_interpolation_vectorized(
+    image: torch.Tensor, grid: torch.Tensor
+) -> torch.Tensor:
+    """
+    Bi linearly interpolate the image using the uv positions in the flow-field
+    grid (following the naming conventions for torch.nn.functional.grid_sample).
+
+    This implementation uses the same steps as in the SoftRasterizer CUDA kernel
+    for loading textures. We are keeping it for reference to make it easy to
+    compare if required.
+
+    However it doesn't properly handle the out of bound values in the same way as
+    the grid_sample function does with the padding_mode argument.
+    This vectorized version requires less memory than
+    _bilinear_interpolation_grid_sample but is slightly slower.
+
+    Args:
+        image: FloatTensor of shape (H, W, D) a single image/input tensor with D
+            channels.
+        grid: FloatTensor of shape (N, R, R, 2) giving the pixel locations of the
+            points at which to sample a value in the image. The grid values must
+            be in the range [0, 1]. u is the x direction and v is the y direction.
+
+    Returns:
+        out: FloatTensor of shape (N, H, W, D) giving the interpolated
+            D dimensional value from image at each of the pixel locations in grid.
+
+    """
+    H, W, _ = image.shape
+    # Convert [0, 1] to the range [0, W-1] and [0, H-1]
+    grid = grid * torch.tensor([W - 1, H - 1]).type_as(grid)
+    weight_1 = grid - grid.int()
+    weight_0 = 1.0 - weight_1
+
+    grid_x, grid_y = grid.unbind(-1)
+    y0 = grid_y.to(torch.int64)
+    y1 = (grid_y + 1).to(torch.int64)
+    x0 = grid_x.to(torch.int64)
+    x1 = x0 + 1
+
+    weight_x0, weight_y0 = weight_0.unbind(-1)
+    weight_x1, weight_y1 = weight_1.unbind(-1)
+
+    # Bi-linear interpolation
+    # griditions = [[y,     x], [(y+1),     x]
+    #              [y, (x+1)], [(y+1), (x+1)]]
+    # weights   = [[wx0*wy0, wx0*wy1],
+    #              [wx1*wy0, wx1*wy1]]
+    out = (
+        image[y0, x0] * (weight_x0 * weight_y0)[..., None]
+        + image[y1, x0] * (weight_x0 * weight_y1)[..., None]
+        + image[y0, x1] * (weight_x1 * weight_y0)[..., None]
+        + image[y1, x1] * (weight_x1 * weight_y1)[..., None]
+    )
+
+    return out
+
+
+def _bilinear_interpolation_grid_sample(
+    image: torch.Tensor, grid: torch.Tensor
+) -> torch.Tensor:
+    """
+    Bi linearly interpolate the image using the uv positions in the flow-field
+    grid (following the conventions for torch.nn.functional.grid_sample).
+
+    This implementation is faster than _bilinear_interpolation_vectorized but
+    requires more memory so can cause OOMs. If speed is an issue try this function
+    instead.
+
+    Args:
+        image: FloatTensor of shape (H, W, D) a single image/input tensor with D
+            channels.
+        grid: FloatTensor of shape (N, R, R, 2) giving the pixel locations of the
+            points at which to sample a value in the image. The grid values must
+            be in the range [0, 1]. u is the x direction and v is the y direction.
+
+    Returns:
+        out: FloatTensor of shape (N, H, W, D) giving the interpolated
+            D dimensional value from image at each of the pixel locations in grid.
+    """
+
+    N = grid.shape[0]
+    # convert [0, 1] to the range [-1, 1] expected by grid_sample.
+    grid = grid * 2.0 - 1.0
+    image = image.permute(2, 0, 1)[None, ...].expand(N, -1, -1, -1)  # (N, 3, H, W)
+    # Align_corners has to be set to True to match the output of the SoftRas
+    # cuda kernel for bilinear sampling.
+    out = F.grid_sample(image, grid, mode="bilinear", align_corners=True)
+    return out.permute(0, 2, 3, 1)
+
+
+MaterialProperties = Dict[str, Dict[str, torch.Tensor]]
+TextureFiles = Dict[str, str]
+TextureImages = Dict[str, torch.Tensor]
+
+
+def _parse_mtl(
+    f: str, path_manager: PathManager, device: Device = "cpu"
+) -> Tuple[MaterialProperties, TextureFiles]:
+    material_properties = {}
+    texture_files = {}
+    material_name = ""
+
+    with _open_file(f, path_manager, "r") as f:
+        for line in f:
+            tokens = line.strip().split()
+            if not tokens:
+                continue
+            if tokens[0] == "newmtl":
+                material_name = tokens[1]
+                material_properties[material_name] = {}
+            elif tokens[0] == "map_Kd":
+                # Diffuse texture map
+                # Account for the case where filenames might have spaces
+                filename = line.strip()[7:]
+                texture_files[material_name] = filename
+            elif tokens[0] == "Kd":
+                # RGB diffuse reflectivity
+                kd = np.array(tokens[1:4]).astype(np.float32)
+                kd = torch.from_numpy(kd).to(device)
+                material_properties[material_name]["diffuse_color"] = kd
+            elif tokens[0] == "Ka":
+                # RGB ambient reflectivity
+                ka = np.array(tokens[1:4]).astype(np.float32)
+                ka = torch.from_numpy(ka).to(device)
+                material_properties[material_name]["ambient_color"] = ka
+            elif tokens[0] == "Ks":
+                # RGB specular reflectivity
+                ks = np.array(tokens[1:4]).astype(np.float32)
+                ks = torch.from_numpy(ks).to(device)
+                material_properties[material_name]["specular_color"] = ks
+            elif tokens[0] == "Ns":
+                # Specular exponent
+                ns = np.array(tokens[1:4]).astype(np.float32)
+                ns = torch.from_numpy(ns).to(device)
+                material_properties[material_name]["shininess"] = ns
+
+    return material_properties, texture_files
+
+
+def _load_texture_images(
+    material_names: List[str],
+    data_dir: str,
+    material_properties: MaterialProperties,
+    texture_files: TextureFiles,
+    path_manager: PathManager,
+) -> Tuple[MaterialProperties, TextureImages]:
+    final_material_properties = {}
+    texture_images = {}
+
+    used_material_names = list(material_names)
+    if not used_material_names and material_properties:
+        if len(material_properties) > 1:
+            raise ValueError(
+                "Multiple materials but no usemtl declarations in the obj file"
+            )
+        # No materials were specified in obj file and only one is in the
+        # specified .mtl file, so we use it.
+        used_material_names.append(next(iter(material_properties.keys())))
+
+    # Only keep the materials referenced in the obj.
+    for material_name in used_material_names:
+        if material_name in texture_files:
+            # Load the texture image.
+            path = os.path.join(data_dir, texture_files[material_name])
+            if path_manager.exists(path):
+                image = (
+                    _read_image(path, path_manager=path_manager, format="RGB") / 255.0
+                )
+                image = torch.from_numpy(image)
+                texture_images[material_name] = image
+            else:
+                msg = f"Texture file does not exist: {path}"
+                warnings.warn(msg)
+
+        if material_name in material_properties:
+            final_material_properties[material_name] = material_properties[
+                material_name
+            ]
+
+    return final_material_properties, texture_images
+
+
+def load_mtl(
+    f: str,
+    *,
+    material_names: List[str],
+    data_dir: str,
+    device: Device = "cpu",
+    path_manager: PathManager,
+) -> Tuple[MaterialProperties, TextureImages]:
+    """
+    Load texture images and material reflectivity values for ambient, diffuse
+    and specular light (Ka, Kd, Ks, Ns).
+
+    Args:
+        f: path to the material information.
+        material_names: a list of the material names found in the .obj file.
+        data_dir: the directory where the material texture files are located.
+        device: Device (as str or torch.tensor) on which to return the new tensors.
+        path_manager: PathManager for interpreting both f and material_names.
+
+    Returns:
+        material_properties: dict of properties for each material. If a material
+                does not have any properties it will have an empty dict.
+                {
+                    material_name_1:  {
+                        "ambient_color": tensor of shape (1, 3),
+                        "diffuse_color": tensor of shape (1, 3),
+                        "specular_color": tensor of shape (1, 3),
+                        "shininess": tensor of shape (1)
+                    },
+                    material_name_2: {},
+                    ...
+                }
+        texture_images: dict of material names and texture images
+                {
+                    material_name_1: (H, W, 3) image,
+                    ...
+                }
+    """
+    material_properties, texture_files = _parse_mtl(f, path_manager, device)
+    return _load_texture_images(
+        material_names,
+        data_dir,
+        material_properties,
+        texture_files,
+        path_manager=path_manager,
+    )
diff --git a/pytorch3d/pytorch3d/io/obj_io.py b/pytorch3d/pytorch3d/io/obj_io.py
new file mode 100644
index 0000000000000000000000000000000000000000..834c51edf53cdbc2a40f1e08326afd85965c38c7
--- /dev/null
+++ b/pytorch3d/pytorch3d/io/obj_io.py
@@ -0,0 +1,939 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+"""This module implements utility functions for loading and saving meshes."""
+import os
+import warnings
+from collections import namedtuple
+from pathlib import Path
+from typing import List, Optional
+
+import numpy as np
+import torch
+from iopath.common.file_io import PathManager
+from PIL import Image
+from pytorch3d.common.datatypes import Device
+from pytorch3d.io.mtl_io import load_mtl, make_mesh_texture_atlas
+from pytorch3d.io.utils import _check_faces_indices, _make_tensor, _open_file, PathOrStr
+from pytorch3d.renderer import TexturesAtlas, TexturesUV
+from pytorch3d.structures import join_meshes_as_batch, Meshes
+
+from .pluggable_formats import endswith, MeshFormatInterpreter
+
+
+# Faces & Aux type returned from load_obj function.
+_Faces = namedtuple("Faces", "verts_idx normals_idx textures_idx materials_idx")
+_Aux = namedtuple(
+    "Properties", "normals verts_uvs material_colors texture_images texture_atlas"
+)
+
+
+def _format_faces_indices(faces_indices, max_index: int, device, pad_value=None):
+    """
+    Format indices and check for invalid values. Indices can refer to
+    values in one of the face properties: vertices, textures or normals.
+    See comments of the load_obj function for more details.
+
+    Args:
+        faces_indices: List of ints of indices.
+        max_index: Max index for the face property.
+        pad_value: if any of the face_indices are padded, specify
+            the value of the padding (e.g. -1). This is only used
+            for texture indices indices where there might
+            not be texture information for all the faces.
+
+    Returns:
+        faces_indices: List of ints of indices.
+
+    Raises:
+        ValueError if indices are not in a valid range.
+    """
+    faces_indices = _make_tensor(
+        faces_indices, cols=3, dtype=torch.int64, device=device
+    )
+
+    if pad_value is not None:
+        mask = faces_indices.eq(pad_value).all(dim=-1)
+
+    # Change to 0 based indexing.
+    faces_indices[(faces_indices > 0)] -= 1
+
+    # Negative indexing counts from the end.
+    faces_indices[(faces_indices < 0)] += max_index
+
+    if pad_value is not None:
+        # pyre-fixme[61]: `mask` is undefined, or not always defined.
+        faces_indices[mask] = pad_value
+
+    return _check_faces_indices(faces_indices, max_index, pad_value)
+
+
+def load_obj(
+    f,
+    load_textures: bool = True,
+    create_texture_atlas: bool = False,
+    texture_atlas_size: int = 4,
+    texture_wrap: Optional[str] = "repeat",
+    device: Device = "cpu",
+    path_manager: Optional[PathManager] = None,
+):
+    """
+    Load a mesh from a .obj file and optionally textures from a .mtl file.
+    Currently this handles verts, faces, vertex texture uv coordinates, normals,
+    texture images and material reflectivity values.
+
+    Note .obj files are 1-indexed. The tensors returned from this function
+    are 0-indexed. OBJ spec reference: http://www.martinreddy.net/gfx/3d/OBJ.spec
+
+    Example .obj file format:
+    ::
+        # this is a comment
+        v 1.000000 -1.000000 -1.000000
+        v 1.000000 -1.000000 1.000000
+        v -1.000000 -1.000000 1.000000
+        v -1.000000 -1.000000 -1.000000
+        v 1.000000 1.000000 -1.000000
+        vt 0.748573 0.750412
+        vt 0.749279 0.501284
+        vt 0.999110 0.501077
+        vt 0.999455 0.750380
+        vn 0.000000 0.000000 -1.000000
+        vn -1.000000 -0.000000 -0.000000
+        vn -0.000000 -0.000000 1.000000
+        f 5/2/1 1/2/1 4/3/1
+        f 5/1/1 4/3/1 2/4/1
+
+    The first character of the line denotes the type of input:
+    ::
+        - v is a vertex
+        - vt is the texture coordinate of one vertex
+        - vn is the normal of one vertex
+        - f is a face
+
+    Faces are interpreted as follows:
+    ::
+        5/2/1 describes the first vertex of the first triangle
+        - 5: index of vertex [1.000000 1.000000 -1.000000]
+        - 2: index of texture coordinate [0.749279 0.501284]
+        - 1: index of normal [0.000000 0.000000 -1.000000]
+
+    If there are faces with more than 3 vertices
+    they are subdivided into triangles. Polygonal faces are assumed to have
+    vertices ordered counter-clockwise so the (right-handed) normal points
+    out of the screen e.g. a proper rectangular face would be specified like this:
+    ::
+        0_________1
+        |         |
+        |         |
+        3 ________2
+
+    The face would be split into two triangles: (0, 2, 1) and (0, 3, 2),
+    both of which are also oriented counter-clockwise and have normals
+    pointing out of the screen.
+
+    Args:
+        f: A file-like object (with methods read, readline, tell, and seek),
+           a pathlib path or a string containing a file name.
+        load_textures: Boolean indicating whether material files are loaded
+        create_texture_atlas: Bool, If True a per face texture map is created and
+            a tensor `texture_atlas` is also returned in `aux`.
+        texture_atlas_size: Int specifying the resolution of the texture map per face
+            when `create_texture_atlas=True`. A (texture_size, texture_size, 3)
+            map is created per face.
+        texture_wrap: string, one of ["repeat", "clamp"]. This applies when computing
+            the texture atlas.
+            If `texture_mode="repeat"`, for uv values outside the range [0, 1] the integer part
+            is ignored and a repeating pattern is formed.
+            If `texture_mode="clamp"` the values are clamped to the range [0, 1].
+            If None, then there is no transformation of the texture values.
+        device: Device (as str or torch.device) on which to return the new tensors.
+        path_manager: optionally a PathManager object to interpret paths.
+
+    Returns:
+        6-element tuple containing
+
+        - **verts**: FloatTensor of shape (V, 3).
+        - **faces**: NamedTuple with fields:
+            - verts_idx: LongTensor of vertex indices, shape (F, 3).
+            - normals_idx: (optional) LongTensor of normal indices, shape (F, 3).
+            - textures_idx: (optional) LongTensor of texture indices, shape (F, 3).
+              This can be used to index into verts_uvs.
+            - materials_idx: (optional) List of indices indicating which
+              material the texture is derived from for each face.
+              If there is no material for a face, the index is -1.
+              This can be used to retrieve the corresponding values
+              in material_colors/texture_images after they have been
+              converted to tensors or Materials/Textures data
+              structures - see textures.py and materials.py for
+              more info.
+        - **aux**: NamedTuple with fields:
+            - normals: FloatTensor of shape (N, 3)
+            - verts_uvs: FloatTensor of shape (T, 2), giving the uv coordinate per
+              vertex. If a vertex is shared between two faces, it can have
+              a different uv value for each instance. Therefore it is
+              possible that the number of verts_uvs is greater than
+              num verts i.e. T > V.
+              vertex.
+            - material_colors: if `load_textures=True` and the material has associated
+              properties this will be a dict of material names and properties of the form:
+
+              .. code-block:: python
+
+                  {
+                      material_name_1:  {
+                          "ambient_color": tensor of shape (1, 3),
+                          "diffuse_color": tensor of shape (1, 3),
+                          "specular_color": tensor of shape (1, 3),
+                          "shininess": tensor of shape (1)
+                      },
+                      material_name_2: {},
+                      ...
+                  }
+
+              If a material does not have any properties it will have an
+              empty dict. If `load_textures=False`, `material_colors` will None.
+
+            - texture_images: if `load_textures=True` and the material has a texture map,
+              this will be a dict of the form:
+
+              .. code-block:: python
+
+                  {
+                      material_name_1: (H, W, 3) image,
+                      ...
+                  }
+              If `load_textures=False`, `texture_images` will None.
+            - texture_atlas: if `load_textures=True` and `create_texture_atlas=True`,
+              this will be a FloatTensor of the form: (F, texture_size, textures_size, 3)
+              If the material does not have a texture map, then all faces
+              will have a uniform white texture.  Otherwise `texture_atlas` will be
+              None.
+    """
+    data_dir = "./"
+    if isinstance(f, (str, bytes, Path)):
+        # pyre-fixme[6]: For 1st argument expected `PathLike[Variable[AnyStr <:
+        #  [str, bytes]]]` but got `Union[Path, bytes, str]`.
+        data_dir = os.path.dirname(f)
+    if path_manager is None:
+        path_manager = PathManager()
+    with _open_file(f, path_manager, "r") as f:
+        return _load_obj(
+            f,
+            data_dir=data_dir,
+            load_textures=load_textures,
+            create_texture_atlas=create_texture_atlas,
+            texture_atlas_size=texture_atlas_size,
+            texture_wrap=texture_wrap,
+            path_manager=path_manager,
+            device=device,
+        )
+
+
+def load_objs_as_meshes(
+    files: list,
+    device: Optional[Device] = None,
+    load_textures: bool = True,
+    create_texture_atlas: bool = False,
+    texture_atlas_size: int = 4,
+    texture_wrap: Optional[str] = "repeat",
+    path_manager: Optional[PathManager] = None,
+):
+    """
+    Load meshes from a list of .obj files using the load_obj function, and
+    return them as a Meshes object. This only works for meshes which have a
+    single texture image for the whole mesh. See the load_obj function for more
+    details. material_colors and normals are not stored.
+
+    Args:
+        files: A list of file-like objects (with methods read, readline, tell,
+            and seek), pathlib paths or strings containing file names.
+        device: Desired device of returned Meshes. Default:
+            uses the current device for the default tensor type.
+        load_textures: Boolean indicating whether material files are loaded
+        create_texture_atlas, texture_atlas_size, texture_wrap: as for load_obj.
+        path_manager: optionally a PathManager object to interpret paths.
+
+    Returns:
+        New Meshes object.
+    """
+    mesh_list = []
+    for f_obj in files:
+        verts, faces, aux = load_obj(
+            f_obj,
+            load_textures=load_textures,
+            create_texture_atlas=create_texture_atlas,
+            texture_atlas_size=texture_atlas_size,
+            texture_wrap=texture_wrap,
+            path_manager=path_manager,
+        )
+        tex = None
+        if create_texture_atlas:
+            # TexturesAtlas type
+            tex = TexturesAtlas(atlas=[aux.texture_atlas.to(device)])
+        else:
+            # TexturesUV type
+            tex_maps = aux.texture_images
+            if tex_maps is not None and len(tex_maps) > 0:
+                verts_uvs = aux.verts_uvs.to(device)  # (V, 2)
+                faces_uvs = faces.textures_idx.to(device)  # (F, 3)
+                image = list(tex_maps.values())[0].to(device)[None]
+                tex = TexturesUV(
+                    verts_uvs=[verts_uvs], faces_uvs=[faces_uvs], maps=image
+                )
+
+        mesh = Meshes(
+            verts=[verts.to(device)], faces=[faces.verts_idx.to(device)], textures=tex
+        )
+        mesh_list.append(mesh)
+    if len(mesh_list) == 1:
+        return mesh_list[0]
+    return join_meshes_as_batch(mesh_list)
+
+
+class MeshObjFormat(MeshFormatInterpreter):
+    def __init__(self) -> None:
+        self.known_suffixes = (".obj",)
+
+    def read(
+        self,
+        path: PathOrStr,
+        include_textures: bool,
+        device: Device,
+        path_manager: PathManager,
+        create_texture_atlas: bool = False,
+        texture_atlas_size: int = 4,
+        texture_wrap: Optional[str] = "repeat",
+        **kwargs,
+    ) -> Optional[Meshes]:
+        if not endswith(path, self.known_suffixes):
+            return None
+        mesh = load_objs_as_meshes(
+            files=[path],
+            device=device,
+            load_textures=include_textures,
+            create_texture_atlas=create_texture_atlas,
+            texture_atlas_size=texture_atlas_size,
+            texture_wrap=texture_wrap,
+            path_manager=path_manager,
+        )
+        return mesh
+
+    def save(
+        self,
+        data: Meshes,
+        path: PathOrStr,
+        path_manager: PathManager,
+        binary: Optional[bool],
+        decimal_places: Optional[int] = None,
+        **kwargs,
+    ) -> bool:
+        if not endswith(path, self.known_suffixes):
+            return False
+
+        verts = data.verts_list()[0]
+        faces = data.faces_list()[0]
+
+        verts_uvs: Optional[torch.Tensor] = None
+        faces_uvs: Optional[torch.Tensor] = None
+        texture_map: Optional[torch.Tensor] = None
+
+        if isinstance(data.textures, TexturesUV):
+            verts_uvs = data.textures.verts_uvs_padded()[0]
+            faces_uvs = data.textures.faces_uvs_padded()[0]
+            texture_map = data.textures.maps_padded()[0]
+
+        save_obj(
+            f=path,
+            verts=verts,
+            faces=faces,
+            decimal_places=decimal_places,
+            path_manager=path_manager,
+            verts_uvs=verts_uvs,
+            faces_uvs=faces_uvs,
+            texture_map=texture_map,
+        )
+        return True
+
+
+def _parse_face(
+    line,
+    tokens,
+    material_idx,
+    faces_verts_idx,
+    faces_normals_idx,
+    faces_textures_idx,
+    faces_materials_idx,
+) -> None:
+    face = tokens[1:]
+    face_list = [f.split("/") for f in face]
+    face_verts = []
+    face_normals = []
+    face_textures = []
+
+    for vert_props in face_list:
+        # Vertex index.
+        face_verts.append(int(vert_props[0]))
+        if len(vert_props) > 1:
+            if vert_props[1] != "":
+                # Texture index is present e.g. f 4/1/1.
+                face_textures.append(int(vert_props[1]))
+            if len(vert_props) > 2:
+                # Normal index present e.g. 4/1/1 or 4//1.
+                face_normals.append(int(vert_props[2]))
+            if len(vert_props) > 3:
+                raise ValueError(
+                    "Face vertices can only have 3 properties. \
+                                Face vert %s, Line: %s"
+                    % (str(vert_props), str(line))
+                )
+
+    # Triplets must be consistent for all vertices in a face e.g.
+    # legal statement: f 4/1/1 3/2/1 2/1/1.
+    # illegal statement: f 4/1/1 3//1 2//1.
+    # If the face does not have normals or textures indices
+    # fill with pad value = -1. This will ensure that
+    # all the face index tensors will have F values where
+    # F is the number of faces.
+    if len(face_normals) > 0:
+        if not (len(face_verts) == len(face_normals)):
+            raise ValueError(
+                "Face %s is an illegal statement. \
+                        Vertex properties are inconsistent. Line: %s"
+                % (str(face), str(line))
+            )
+    else:
+        face_normals = [-1] * len(face_verts)  # Fill with -1
+    if len(face_textures) > 0:
+        if not (len(face_verts) == len(face_textures)):
+            raise ValueError(
+                "Face %s is an illegal statement. \
+                        Vertex properties are inconsistent. Line: %s"
+                % (str(face), str(line))
+            )
+    else:
+        face_textures = [-1] * len(face_verts)  # Fill with -1
+
+    # Subdivide faces with more than 3 vertices.
+    # See comments of the load_obj function for more details.
+    for i in range(len(face_verts) - 2):
+        faces_verts_idx.append((face_verts[0], face_verts[i + 1], face_verts[i + 2]))
+        faces_normals_idx.append(
+            (face_normals[0], face_normals[i + 1], face_normals[i + 2])
+        )
+        faces_textures_idx.append(
+            (face_textures[0], face_textures[i + 1], face_textures[i + 2])
+        )
+        faces_materials_idx.append(material_idx)
+
+
+def _parse_obj(f, data_dir: str):
+    """
+    Load a mesh from a file-like object. See load_obj function for more details
+    about the return values.
+    """
+    verts, normals, verts_uvs = [], [], []
+    faces_verts_idx, faces_normals_idx, faces_textures_idx = [], [], []
+    faces_materials_idx = []
+    material_names = []
+    mtl_path = None
+
+    lines = [line.strip() for line in f]
+
+    # startswith expects each line to be a string. If the file is read in as
+    # bytes then first decode to strings.
+    if lines and isinstance(lines[0], bytes):
+        lines = [el.decode("utf-8") for el in lines]
+
+    materials_idx = -1
+
+    for line in lines:
+        tokens = line.strip().split()
+        if line.startswith("mtllib"):
+            if len(tokens) < 2:
+                raise ValueError("material file name is not specified")
+            # NOTE: only allow one .mtl file per .obj.
+            # Definitions for multiple materials can be included
+            # in this one .mtl file.
+            mtl_path = line[len(tokens[0]) :].strip()  # Take the remainder of the line
+            mtl_path = os.path.join(data_dir, mtl_path)
+        elif len(tokens) and tokens[0] == "usemtl":
+            material_name = tokens[1]
+            # materials are often repeated for different parts
+            # of a mesh.
+            if material_name not in material_names:
+                material_names.append(material_name)
+                materials_idx = len(material_names) - 1
+            else:
+                materials_idx = material_names.index(material_name)
+        elif line.startswith("v "):  # Line is a vertex.
+            vert = [float(x) for x in tokens[1:4]]
+            if len(vert) != 3:
+                msg = "Vertex %s does not have 3 values. Line: %s"
+                raise ValueError(msg % (str(vert), str(line)))
+            verts.append(vert)
+        elif line.startswith("vt "):  # Line is a texture.
+            tx = [float(x) for x in tokens[1:3]]
+            if len(tx) != 2:
+                raise ValueError(
+                    "Texture %s does not have 2 values. Line: %s" % (str(tx), str(line))
+                )
+            verts_uvs.append(tx)
+        elif line.startswith("vn "):  # Line is a normal.
+            norm = [float(x) for x in tokens[1:4]]
+            if len(norm) != 3:
+                msg = "Normal %s does not have 3 values. Line: %s"
+                raise ValueError(msg % (str(norm), str(line)))
+            normals.append(norm)
+        elif line.startswith("f "):  # Line is a face.
+            # Update face properties info.
+            _parse_face(
+                line,
+                tokens,
+                materials_idx,
+                faces_verts_idx,
+                faces_normals_idx,
+                faces_textures_idx,
+                faces_materials_idx,
+            )
+
+    return (
+        verts,
+        normals,
+        verts_uvs,
+        faces_verts_idx,
+        faces_normals_idx,
+        faces_textures_idx,
+        faces_materials_idx,
+        material_names,
+        mtl_path,
+    )
+
+
+def _load_materials(
+    material_names: List[str],
+    f: Optional[str],
+    *,
+    data_dir: str,
+    load_textures: bool,
+    device: Device,
+    path_manager: PathManager,
+):
+    """
+    Load materials and optionally textures from the specified path.
+
+    Args:
+        material_names: a list of the material names found in the .obj file.
+        f: path to the material information.
+        data_dir: the directory where the material texture files are located.
+        load_textures: whether textures should be loaded.
+        device: Device (as str or torch.device) on which to return the new tensors.
+        path_manager: PathManager object to interpret paths.
+
+    Returns:
+        material_colors: dict of properties for each material.
+        texture_images: dict of material names and texture images.
+    """
+    if not load_textures:
+        return None, None
+
+    if f is None:
+        warnings.warn("No mtl file provided")
+        return None, None
+
+    if not path_manager.exists(f):
+        warnings.warn(f"Mtl file does not exist: {f}")
+        return None, None
+
+    # Texture mode uv wrap
+    return load_mtl(
+        f,
+        material_names=material_names,
+        data_dir=data_dir,
+        path_manager=path_manager,
+        device=device,
+    )
+
+
+def _load_obj(
+    f_obj,
+    *,
+    data_dir: str,
+    load_textures: bool = True,
+    create_texture_atlas: bool = False,
+    texture_atlas_size: int = 4,
+    texture_wrap: Optional[str] = "repeat",
+    path_manager: PathManager,
+    device: Device = "cpu",
+):
+    """
+    Load a mesh from a file-like object. See load_obj function more details.
+    Any material files associated with the obj are expected to be in the
+    directory given by data_dir.
+    """
+
+    if texture_wrap is not None and texture_wrap not in ["repeat", "clamp"]:
+        msg = "texture_wrap must be one of ['repeat', 'clamp'] or None, got %s"
+        raise ValueError(msg % texture_wrap)
+
+    (
+        verts,
+        normals,
+        verts_uvs,
+        faces_verts_idx,
+        faces_normals_idx,
+        faces_textures_idx,
+        faces_materials_idx,
+        material_names,
+        mtl_path,
+    ) = _parse_obj(f_obj, data_dir)
+
+    verts = _make_tensor(verts, cols=3, dtype=torch.float32, device=device)  # (V, 3)
+    normals = _make_tensor(
+        normals,
+        cols=3,
+        dtype=torch.float32,
+        device=device,
+    )  # (N, 3)
+    verts_uvs = _make_tensor(
+        verts_uvs,
+        cols=2,
+        dtype=torch.float32,
+        device=device,
+    )  # (T, 2)
+
+    faces_verts_idx = _format_faces_indices(
+        faces_verts_idx, verts.shape[0], device=device
+    )
+
+    # Repeat for normals and textures if present.
+    if len(faces_normals_idx):
+        faces_normals_idx = _format_faces_indices(
+            faces_normals_idx, normals.shape[0], device=device, pad_value=-1
+        )
+    if len(faces_textures_idx):
+        faces_textures_idx = _format_faces_indices(
+            faces_textures_idx, verts_uvs.shape[0], device=device, pad_value=-1
+        )
+    if len(faces_materials_idx):
+        faces_materials_idx = torch.tensor(
+            faces_materials_idx, dtype=torch.int64, device=device
+        )
+
+    texture_atlas = None
+    material_colors, texture_images = _load_materials(
+        material_names,
+        mtl_path,
+        data_dir=data_dir,
+        load_textures=load_textures,
+        path_manager=path_manager,
+        device=device,
+    )
+
+    if material_colors and not material_names:
+        # usemtl was not present but single material was present in the .mtl file
+        material_names.append(next(iter(material_colors.keys())))
+        # replace all -1 by 0 material idx
+        if torch.is_tensor(faces_materials_idx):
+            faces_materials_idx.clamp_(min=0)
+
+    if create_texture_atlas:
+        # Using the images and properties from the
+        # material file make a per face texture map.
+
+        # Create an array of strings of material names for each face.
+        # If faces_materials_idx == -1 then that face doesn't have a material.
+        idx = faces_materials_idx.cpu().numpy()
+        face_material_names = np.array(material_names)[idx]  # (F,)
+        face_material_names[idx == -1] = ""
+
+        # Construct the atlas.
+        texture_atlas = make_mesh_texture_atlas(
+            material_colors,
+            texture_images,
+            face_material_names,
+            faces_textures_idx,
+            verts_uvs,
+            texture_atlas_size,
+            texture_wrap,
+        )
+
+    faces = _Faces(
+        verts_idx=faces_verts_idx,
+        normals_idx=faces_normals_idx,
+        textures_idx=faces_textures_idx,
+        materials_idx=faces_materials_idx,
+    )
+    aux = _Aux(
+        normals=normals if len(normals) else None,
+        verts_uvs=verts_uvs if len(verts_uvs) else None,
+        material_colors=material_colors,
+        texture_images=texture_images,
+        texture_atlas=texture_atlas,
+    )
+    return verts, faces, aux
+
+
+def save_obj(
+    f: PathOrStr,
+    verts,
+    faces,
+    decimal_places: Optional[int] = None,
+    path_manager: Optional[PathManager] = None,
+    *,
+    normals: Optional[torch.Tensor] = None,
+    faces_normals_idx: Optional[torch.Tensor] = None,
+    verts_uvs: Optional[torch.Tensor] = None,
+    faces_uvs: Optional[torch.Tensor] = None,
+    texture_map: Optional[torch.Tensor] = None,
+) -> None:
+    """
+    Save a mesh to an .obj file.
+
+    Args:
+        f: File (str or path) to which the mesh should be written.
+        verts: FloatTensor of shape (V, 3) giving vertex coordinates.
+        faces: LongTensor of shape (F, 3) giving faces.
+        decimal_places: Number of decimal places for saving.
+        path_manager: Optional PathManager for interpreting f if
+            it is a str.
+        normals: FloatTensor of shape (V, 3) giving normals for faces_normals_idx
+            to index into.
+        faces_normals_idx: LongTensor of shape (F, 3) giving the index into
+            normals for each vertex in the face.
+        verts_uvs: FloatTensor of shape (V, 2) giving the uv coordinate per vertex.
+        faces_uvs: LongTensor of shape (F, 3) giving the index into verts_uvs for
+            each vertex in the face.
+        texture_map: FloatTensor of shape (H, W, 3) representing the texture map
+            for the mesh which will be saved as an image. The values are expected
+            to be in the range [0, 1],
+    """
+    if len(verts) and (verts.dim() != 2 or verts.size(1) != 3):
+        message = "'verts' should either be empty or of shape (num_verts, 3)."
+        raise ValueError(message)
+
+    if len(faces) and (faces.dim() != 2 or faces.size(1) != 3):
+        message = "'faces' should either be empty or of shape (num_faces, 3)."
+        raise ValueError(message)
+
+    if (normals is None) != (faces_normals_idx is None):
+        message = "'normals' and 'faces_normals_idx' must both be None or neither."
+        raise ValueError(message)
+
+    if faces_normals_idx is not None and (
+        faces_normals_idx.dim() != 2 or faces_normals_idx.size(1) != 3
+    ):
+        message = (
+            "'faces_normals_idx' should either be empty or of shape (num_faces, 3)."
+        )
+        raise ValueError(message)
+
+    if normals is not None and (normals.dim() != 2 or normals.size(1) != 3):
+        message = "'normals' should either be empty or of shape (num_verts, 3)."
+        raise ValueError(message)
+
+    if faces_uvs is not None and (faces_uvs.dim() != 2 or faces_uvs.size(1) != 3):
+        message = "'faces_uvs' should either be empty or of shape (num_faces, 3)."
+        raise ValueError(message)
+
+    if verts_uvs is not None and (verts_uvs.dim() != 2 or verts_uvs.size(1) != 2):
+        message = "'verts_uvs' should either be empty or of shape (num_verts, 2)."
+        raise ValueError(message)
+
+    if texture_map is not None and (texture_map.dim() != 3 or texture_map.size(2) != 3):
+        message = "'texture_map' should either be empty or of shape (H, W, 3)."
+        raise ValueError(message)
+
+    if path_manager is None:
+        path_manager = PathManager()
+
+    save_texture = all(t is not None for t in [faces_uvs, verts_uvs, texture_map])
+    output_path = Path(f)
+
+    # Save the .obj file
+    with _open_file(f, path_manager, "w") as f:
+        if save_texture:
+            # Add the header required for the texture info to be loaded correctly
+            obj_header = "\nmtllib {0}.mtl\nusemtl mesh\n\n".format(output_path.stem)
+            f.write(obj_header)
+        _save(
+            f,
+            verts,
+            faces,
+            decimal_places,
+            normals=normals,
+            faces_normals_idx=faces_normals_idx,
+            verts_uvs=verts_uvs,
+            faces_uvs=faces_uvs,
+            save_texture=save_texture,
+            save_normals=normals is not None,
+        )
+
+    # Save the .mtl and .png files associated with the texture
+    if save_texture:
+        image_path = output_path.with_suffix(".png")
+        mtl_path = output_path.with_suffix(".mtl")
+        if isinstance(f, str):
+            # Back to str for iopath interpretation.
+            image_path = str(image_path)
+            mtl_path = str(mtl_path)
+
+        # Save texture map to output folder
+        # pyre-fixme[16] # undefined attribute cpu
+        texture_map = texture_map.detach().cpu() * 255.0
+        image = Image.fromarray(texture_map.numpy().astype(np.uint8))
+        with _open_file(image_path, path_manager, "wb") as im_f:
+            image.save(im_f)
+
+        # Create .mtl file with the material name and texture map filename
+        # TODO: enable material properties to also be saved.
+        with _open_file(mtl_path, path_manager, "w") as f_mtl:
+            lines = f"newmtl mesh\n" f"map_Kd {output_path.stem}.png\n"
+            f_mtl.write(lines)
+
+
+# TODO (nikhilar) Speed up this function.
+def _save(
+    f,
+    verts,
+    faces,
+    decimal_places: Optional[int] = None,
+    *,
+    normals: Optional[torch.Tensor] = None,
+    faces_normals_idx: Optional[torch.Tensor] = None,
+    verts_uvs: Optional[torch.Tensor] = None,
+    faces_uvs: Optional[torch.Tensor] = None,
+    save_texture: bool = False,
+    save_normals: bool = False,
+) -> None:
+
+    if len(verts) and (verts.dim() != 2 or verts.size(1) != 3):
+        message = "'verts' should either be empty or of shape (num_verts, 3)."
+        raise ValueError(message)
+
+    if len(faces) and (faces.dim() != 2 or faces.size(1) != 3):
+        message = "'faces' should either be empty or of shape (num_faces, 3)."
+        raise ValueError(message)
+
+    if not (len(verts) or len(faces)):
+        warnings.warn("Empty 'verts' and 'faces' arguments provided")
+        return
+
+    verts, faces = verts.cpu(), faces.cpu()
+
+    lines = ""
+
+    if decimal_places is None:
+        float_str = "%f"
+    else:
+        float_str = "%" + ".%df" % decimal_places
+
+    if len(verts):
+        V, D = verts.shape
+        for i in range(V):
+            vert = [float_str % verts[i, j] for j in range(D)]
+            lines += "v %s\n" % " ".join(vert)
+
+    if save_normals:
+        assert normals is not None
+        assert faces_normals_idx is not None
+        lines += _write_normals(normals, faces_normals_idx, float_str)
+
+    if save_texture:
+        assert faces_uvs is not None
+        assert verts_uvs is not None
+
+        if faces_uvs is not None and (faces_uvs.dim() != 2 or faces_uvs.size(1) != 3):
+            message = "'faces_uvs' should either be empty or of shape (num_faces, 3)."
+            raise ValueError(message)
+
+        if verts_uvs is not None and (verts_uvs.dim() != 2 or verts_uvs.size(1) != 2):
+            message = "'verts_uvs' should either be empty or of shape (num_verts, 2)."
+            raise ValueError(message)
+
+        verts_uvs, faces_uvs = verts_uvs.cpu(), faces_uvs.cpu()
+
+        # Save verts uvs after verts
+        if len(verts_uvs):
+            uV, uD = verts_uvs.shape
+            for i in range(uV):
+                uv = [float_str % verts_uvs[i, j] for j in range(uD)]
+                lines += "vt %s\n" % " ".join(uv)
+
+    f.write(lines)
+
+    if torch.any(faces >= verts.shape[0]) or torch.any(faces < 0):
+        warnings.warn("Faces have invalid indices")
+
+    if len(faces):
+        _write_faces(
+            f,
+            faces,
+            faces_uvs if save_texture else None,
+            faces_normals_idx if save_normals else None,
+        )
+
+
+def _write_normals(
+    normals: torch.Tensor, faces_normals_idx: torch.Tensor, float_str: str
+) -> str:
+    if faces_normals_idx.dim() != 2 or faces_normals_idx.size(1) != 3:
+        message = (
+            "'faces_normals_idx' should either be empty or of shape (num_faces, 3)."
+        )
+        raise ValueError(message)
+
+    if normals.dim() != 2 or normals.size(1) != 3:
+        message = "'normals' should either be empty or of shape (num_verts, 3)."
+        raise ValueError(message)
+
+    normals, faces_normals_idx = normals.cpu(), faces_normals_idx.cpu()
+
+    lines = []
+    V, D = normals.shape
+    for i in range(V):
+        normal = [float_str % normals[i, j] for j in range(D)]
+        lines.append("vn %s\n" % " ".join(normal))
+    return "".join(lines)
+
+
+def _write_faces(
+    f,
+    faces: torch.Tensor,
+    faces_uvs: Optional[torch.Tensor],
+    faces_normals_idx: Optional[torch.Tensor],
+) -> None:
+    F, P = faces.shape
+    for i in range(F):
+        if faces_normals_idx is not None:
+            if faces_uvs is not None:
+                # Format faces as {verts_idx}/{verts_uvs_idx}/{verts_normals_idx}
+                face = [
+                    "%d/%d/%d"
+                    % (
+                        faces[i, j] + 1,
+                        faces_uvs[i, j] + 1,
+                        faces_normals_idx[i, j] + 1,
+                    )
+                    for j in range(P)
+                ]
+            else:
+                # Format faces as {verts_idx}//{verts_normals_idx}
+                face = [
+                    "%d//%d" % (faces[i, j] + 1, faces_normals_idx[i, j] + 1)
+                    for j in range(P)
+                ]
+        elif faces_uvs is not None:
+            # Format faces as {verts_idx}/{verts_uvs_idx}
+            face = ["%d/%d" % (faces[i, j] + 1, faces_uvs[i, j] + 1) for j in range(P)]
+        else:
+            face = ["%d" % (faces[i, j] + 1) for j in range(P)]
+
+        if i + 1 < F:
+            f.write("f %s\n" % " ".join(face))
+        else:
+            # No newline at the end of the file.
+            f.write("f %s" % " ".join(face))
diff --git a/pytorch3d/pytorch3d/io/off_io.py b/pytorch3d/pytorch3d/io/off_io.py
new file mode 100644
index 0000000000000000000000000000000000000000..4262269ee4bd38881e02c652c5cdbb2cc5d673f5
--- /dev/null
+++ b/pytorch3d/pytorch3d/io/off_io.py
@@ -0,0 +1,494 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+"""
+This module implements utility functions for loading and saving
+meshes as .off files.
+
+This format is introduced, for example, at
+http://www.geomview.org/docs/html/OFF.html .
+"""
+import warnings
+from typing import cast, Optional, Tuple, Union
+
+import numpy as np
+import torch
+from iopath.common.file_io import PathManager
+from pytorch3d.io.utils import _check_faces_indices, _open_file, PathOrStr
+from pytorch3d.renderer import TexturesAtlas, TexturesVertex
+from pytorch3d.structures import Meshes
+
+from .pluggable_formats import endswith, MeshFormatInterpreter
+
+
+def _is_line_empty(line: Union[str, bytes]) -> bool:
+    """
+    Returns whether line is not relevant in an OFF file.
+    """
+    line = line.strip()
+    return len(line) == 0 or line[:1] == b"#"
+
+
+def _count_next_line_periods(file) -> int:
+    """
+    Returns the number of . characters before any # on the next
+    meaningful line.
+    """
+    old_offset = file.tell()
+    line = file.readline()
+    while _is_line_empty(line):
+        line = file.readline()
+        if len(line) == 0:
+            raise ValueError("Premature end of file")
+
+    contents = line.split(b"#")[0]
+    count = contents.count(b".")
+    file.seek(old_offset)
+    return count
+
+
+def _read_faces_lump(
+    file, n_faces: int, n_colors: Optional[int]
+) -> Optional[Tuple[np.ndarray, int, Optional[np.ndarray]]]:
+    """
+    Parse n_faces faces and faces_colors from the file,
+    if they all have the same number of vertices.
+    This is used in two ways.
+    1) To try to read all faces.
+    2) To read faces one-by-one if that failed.
+
+    Args:
+        file: file-like object being read.
+        n_faces: The known number of faces yet to read.
+        n_colors: The number of colors if known already.
+
+    Returns:
+        - 2D numpy array of faces
+        - number of colors found
+        - 2D numpy array of face colors if found.
+        of None if there are faces with different numbers of vertices.
+    """
+    if n_faces == 0:
+        return np.array([[]]), 0, None
+    old_offset = file.tell()
+    try:
+        with warnings.catch_warnings():
+            warnings.filterwarnings(
+                "ignore", message=".* Empty input file.*", category=UserWarning
+            )
+            data = np.loadtxt(file, dtype=np.float32, ndmin=2, max_rows=n_faces)
+    except ValueError as e:
+        if n_faces > 1 and "Wrong number of columns" in e.args[0]:
+            file.seek(old_offset)
+            return None
+        raise ValueError("Not enough face data.") from None
+
+    if len(data) != n_faces:
+        raise ValueError("Not enough face data.")
+    face_size = int(data[0, 0])
+    if (data[:, 0] != face_size).any():
+        msg = "A line of face data did not have the specified length."
+        raise ValueError(msg)
+    if face_size < 3:
+        raise ValueError("Faces must have at least 3 vertices.")
+
+    n_colors_found = data.shape[1] - 1 - face_size
+    if n_colors is not None and n_colors_found != n_colors:
+        raise ValueError("Number of colors differs between faces.")
+    n_colors = n_colors_found
+    if n_colors not in [0, 3, 4]:
+        raise ValueError("Unexpected number of colors.")
+
+    face_raw_data = data[:, 1 : 1 + face_size].astype("int64")
+    if face_size == 3:
+        face_data = face_raw_data
+    else:
+        face_arrays = [
+            face_raw_data[:, [0, i + 1, i + 2]] for i in range(face_size - 2)
+        ]
+        face_data = np.vstack(face_arrays)
+
+    if n_colors == 0:
+        return face_data, 0, None
+    colors = data[:, 1 + face_size :]
+    if face_size == 3:
+        return face_data, n_colors, colors
+    return face_data, n_colors, np.tile(colors, (face_size - 2, 1))
+
+
+def _read_faces(
+    file, n_faces: int
+) -> Tuple[Optional[np.ndarray], Optional[np.ndarray]]:
+    """
+    Returns faces and face colors from the file.
+
+    Args:
+        file: file-like object being read.
+        n_faces: The known number of faces.
+
+    Returns:
+        2D numpy arrays of faces and face colors, or None for each if
+            they are not present.
+    """
+    if n_faces == 0:
+        return None, None
+
+    color_is_int = 0 == _count_next_line_periods(file)
+    color_scale = 1 / 255.0 if color_is_int else 1
+
+    faces_ncolors_colors = _read_faces_lump(file, n_faces=n_faces, n_colors=None)
+    if faces_ncolors_colors is not None:
+        faces, _, colors = faces_ncolors_colors
+        if colors is None:
+            return faces, None
+        return faces, colors * color_scale
+
+    faces_list, colors_list = [], []
+    n_colors = None
+    for _ in range(n_faces):
+        faces_ncolors_colors = _read_faces_lump(file, n_faces=1, n_colors=n_colors)
+        faces_found, n_colors, colors_found = cast(
+            Tuple[np.ndarray, int, Optional[np.ndarray]], faces_ncolors_colors
+        )
+        faces_list.append(faces_found)
+        colors_list.append(colors_found)
+    faces = np.vstack(faces_list)
+    if n_colors == 0:
+        colors = None
+    else:
+        colors = np.vstack(colors_list) * color_scale
+    return faces, colors
+
+
+def _read_verts(file, n_verts: int) -> Tuple[np.ndarray, Optional[np.ndarray]]:
+    """
+    Returns verts and vertex colors from the file.
+
+    Args:
+        file: file-like object being read.
+        n_verts: The known number of faces.
+
+    Returns:
+        2D numpy arrays of verts and (if present)
+        vertex colors.
+    """
+
+    color_is_int = 3 == _count_next_line_periods(file)
+    color_scale = 1 / 255.0 if color_is_int else 1
+
+    with warnings.catch_warnings():
+        warnings.filterwarnings(
+            "ignore", message=".* Empty input file.*", category=UserWarning
+        )
+        data = np.loadtxt(file, dtype=np.float32, ndmin=2, max_rows=n_verts)
+    if data.shape[0] != n_verts:
+        raise ValueError("Not enough vertex data.")
+    if data.shape[1] not in [3, 6, 7]:
+        raise ValueError("Bad vertex data.")
+
+    if data.shape[1] == 3:
+        return data, None
+    return data[:, :3], data[:, 3:] * color_scale  # []
+
+
+def _load_off_stream(file) -> dict:
+    """
+    Load the data from a stream of an .off file.
+
+    Example .off file format:
+
+    off
+    8 6 1927                   { number of vertices, faces, and (not used) edges }
+    # comment                  { comments with # sign }
+    0 0 0                      { start of vertex list }
+    0 0 1
+    0 1 1
+    0 1 0
+    1 0 0
+    1 0 1
+    1 1 1
+    1 1 0
+    4 0 1 2 3                  { start of face list }
+    4 7 6 5 4
+    4 0 4 5 1
+    4 1 5 6 2
+    4 2 6 7 3
+    4 3 7 4 0
+
+    Args:
+        file:  A binary file-like object (with methods read, readline,
+            tell and seek).
+
+    Returns dictionary possibly containing:
+        verts: (always present) FloatTensor of shape (V, 3).
+        verts_colors: FloatTensor of shape (V, C) where C is 3 or 4.
+        faces: LongTensor of vertex indices, split into triangles, shape (F, 3).
+        faces_colors: FloatTensor of shape (F, C), where C is 3 or 4.
+    """
+    header = file.readline()
+
+    while _is_line_empty(header):
+        header = file.readline()
+
+    if header[:3].lower() == b"off":
+        header = header[3:]
+
+    while _is_line_empty(header):
+        header = file.readline()
+
+    items = header.split()
+    if len(items) < 3:
+        raise ValueError("Invalid counts line: %s" % header)
+
+    try:
+        n_verts = int(items[0])
+    except ValueError:
+        raise ValueError("Invalid counts line: %s" % header) from None
+    try:
+        n_faces = int(items[1])
+    except ValueError:
+        raise ValueError("Invalid counts line: %s" % header) from None
+
+    if (len(items) > 3 and not items[3].startswith(b"#")) or n_verts < 0 or n_faces < 0:
+        raise ValueError("Invalid counts line: %s" % header)
+
+    verts, verts_colors = _read_verts(file, n_verts)
+    faces, faces_colors = _read_faces(file, n_faces)
+
+    end = file.read().strip()
+    if len(end) != 0:
+        raise ValueError("Extra data at end of file: " + str(end[:20]))
+
+    out = {"verts": verts}
+    if verts_colors is not None:
+        out["verts_colors"] = verts_colors
+    if faces is not None:
+        out["faces"] = faces
+    if faces_colors is not None:
+        out["faces_colors"] = faces_colors
+    return out
+
+
+def _write_off_data(
+    file,
+    verts: torch.Tensor,
+    verts_colors: Optional[torch.Tensor] = None,
+    faces: Optional[torch.LongTensor] = None,
+    faces_colors: Optional[torch.Tensor] = None,
+    decimal_places: Optional[int] = None,
+) -> None:
+    """
+    Internal implementation for saving 3D data to a .off file.
+
+    Args:
+        file: Binary file object to which the 3D data should be written.
+        verts: FloatTensor of shape (V, 3) giving vertex coordinates.
+        verts_colors: FloatTensor of shape (V, C) giving vertex colors where C is 3 or 4.
+        faces: LongTensor of shape (F, 3) giving faces.
+        faces_colors: FloatTensor of shape (V, C) giving face colors where C is 3 or 4.
+        decimal_places: Number of decimal places for saving.
+    """
+    nfaces = 0 if faces is None else faces.shape[0]
+    file.write(f"off\n{verts.shape[0]} {nfaces} 0\n".encode("ascii"))
+
+    if verts_colors is not None:
+        verts = torch.cat((verts, verts_colors), dim=1)
+    if decimal_places is None:
+        float_str = "%f"
+    else:
+        float_str = "%" + ".%df" % decimal_places
+    np.savetxt(file, verts.cpu().detach().numpy(), float_str)
+
+    if faces is not None:
+        _check_faces_indices(faces, max_index=verts.shape[0])
+
+    if faces_colors is not None:
+        face_data = torch.cat(
+            [
+                cast(torch.Tensor, faces).cpu().to(torch.float64),
+                faces_colors.detach().cpu().to(torch.float64),
+            ],
+            dim=1,
+        )
+        format = "3 %d %d %d" + " %f" * faces_colors.shape[1]
+        np.savetxt(file, face_data.numpy(), format)
+    elif faces is not None:
+        np.savetxt(file, faces.cpu().detach().numpy(), "3 %d %d %d")
+
+
+def _save_off(
+    file,
+    *,
+    verts: torch.Tensor,
+    verts_colors: Optional[torch.Tensor] = None,
+    faces: Optional[torch.LongTensor] = None,
+    faces_colors: Optional[torch.Tensor] = None,
+    decimal_places: Optional[int] = None,
+    path_manager: PathManager,
+) -> None:
+    """
+    Save a mesh to an ascii .off file.
+
+    Args:
+        file: File (or path) to which the mesh should be written.
+        verts: FloatTensor of shape (V, 3) giving vertex coordinates.
+        verts_colors: FloatTensor of shape (V, C) giving vertex colors where C is 3 or 4.
+        faces: LongTensor of shape (F, 3) giving faces.
+        faces_colors: FloatTensor of shape (V, C) giving face colors where C is 3 or 4.
+        decimal_places: Number of decimal places for saving.
+    """
+    if len(verts) and not (verts.dim() == 2 and verts.size(1) == 3):
+        message = "Argument 'verts' should either be empty or of shape (num_verts, 3)."
+        raise ValueError(message)
+
+    if verts_colors is not None and 0 == len(verts_colors):
+        verts_colors = None
+    if faces_colors is not None and 0 == len(faces_colors):
+        faces_colors = None
+    if faces is not None and 0 == len(faces):
+        faces = None
+
+    if verts_colors is not None:
+        if not (verts_colors.dim() == 2 and verts_colors.size(1) in [3, 4]):
+            message = "verts_colors should have shape (num_faces, C)."
+            raise ValueError(message)
+        if verts_colors.shape[0] != verts.shape[0]:
+            message = "verts_colors should have the same length as verts."
+            raise ValueError(message)
+
+    if faces is not None and not (faces.dim() == 2 and faces.size(1) == 3):
+        message = "Argument 'faces' if present should have shape (num_faces, 3)."
+        raise ValueError(message)
+    if faces_colors is not None and faces is None:
+        message = "Cannot have face colors without faces"
+        raise ValueError(message)
+
+    if faces_colors is not None:
+        if not (faces_colors.dim() == 2 and faces_colors.size(1) in [3, 4]):
+            message = "faces_colors should have shape (num_faces, C)."
+            raise ValueError(message)
+        if faces_colors.shape[0] != cast(torch.LongTensor, faces).shape[0]:
+            message = "faces_colors should have the same length as faces."
+            raise ValueError(message)
+
+    with _open_file(file, path_manager, "wb") as f:
+        _write_off_data(f, verts, verts_colors, faces, faces_colors, decimal_places)
+
+
+class MeshOffFormat(MeshFormatInterpreter):
+    """
+    Loads and saves meshes in the ascii OFF format. This is a simple
+    format which can only deal with the following texture types:
+
+    - TexturesVertex, i.e. one color for each vertex
+    - TexturesAtlas with R=1, i.e. one color for each face.
+
+    There are some possible features of OFF files which we do not support
+    and which appear to be rare:
+
+    - Four dimensional data.
+    - Binary data.
+    - Vertex Normals.
+    - Texture coordinates.
+    - "COFF" header.
+
+    Example .off file format:
+
+    off
+    8 6 1927                   { number of vertices, faces, and (not used) edges }
+    # comment                  { comments with # sign }
+    0 0 0                      { start of vertex list }
+    0 0 1
+    0 1 1
+    0 1 0
+    1 0 0
+    1 0 1
+    1 1 1
+    1 1 0
+    4 0 1 2 3                  { start of face list }
+    4 7 6 5 4
+    4 0 4 5 1
+    4 1 5 6 2
+    4 2 6 7 3
+    4 3 7 4 0
+
+    """
+
+    def __init__(self) -> None:
+        self.known_suffixes = (".off",)
+
+    def read(
+        self,
+        path: PathOrStr,
+        include_textures: bool,
+        device,
+        path_manager: PathManager,
+        **kwargs,
+    ) -> Optional[Meshes]:
+        if not endswith(path, self.known_suffixes):
+            return None
+
+        with _open_file(path, path_manager, "rb") as f:
+            data = _load_off_stream(f)
+        verts = torch.from_numpy(data["verts"]).to(device)
+        if "faces" in data:
+            faces = torch.from_numpy(data["faces"]).to(dtype=torch.int64, device=device)
+        else:
+            faces = torch.zeros((0, 3), dtype=torch.int64, device=device)
+
+        textures = None
+        if "verts_colors" in data:
+            if "faces_colors" in data:
+                msg = "Faces colors ignored because vertex colors provided too."
+                warnings.warn(msg)
+            verts_colors = torch.from_numpy(data["verts_colors"]).to(device)
+            textures = TexturesVertex([verts_colors])
+        elif "faces_colors" in data:
+            faces_colors = torch.from_numpy(data["faces_colors"]).to(device)
+            textures = TexturesAtlas([faces_colors[:, None, None, :]])
+
+        mesh = Meshes(
+            verts=[verts.to(device)], faces=[faces.to(device)], textures=textures
+        )
+        return mesh
+
+    def save(
+        self,
+        data: Meshes,
+        path: PathOrStr,
+        path_manager: PathManager,
+        binary: Optional[bool],
+        decimal_places: Optional[int] = None,
+        **kwargs,
+    ) -> bool:
+        if not endswith(path, self.known_suffixes):
+            return False
+
+        verts = data.verts_list()[0]
+        faces = data.faces_list()[0]
+        if isinstance(data.textures, TexturesVertex):
+            [verts_colors] = data.textures.verts_features_list()
+        else:
+            verts_colors = None
+
+        faces_colors = None
+        if isinstance(data.textures, TexturesAtlas):
+            [atlas] = data.textures.atlas_list()
+            F, R, _, D = atlas.shape
+            if R == 1:
+                faces_colors = atlas[:, 0, 0, :]
+
+        _save_off(
+            file=path,
+            verts=verts,
+            faces=faces,
+            verts_colors=verts_colors,
+            faces_colors=faces_colors,
+            decimal_places=decimal_places,
+            path_manager=path_manager,
+        )
+        return True
diff --git a/pytorch3d/pytorch3d/io/pluggable.py b/pytorch3d/pytorch3d/io/pluggable.py
new file mode 100644
index 0000000000000000000000000000000000000000..0c37859e8f4e392b1fb50d10a3880647887bd665
--- /dev/null
+++ b/pytorch3d/pytorch3d/io/pluggable.py
@@ -0,0 +1,223 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+from collections import deque
+from pathlib import Path
+from typing import Deque, Optional, Union
+
+from iopath.common.file_io import PathManager
+from pytorch3d.common.datatypes import Device
+from pytorch3d.structures import Meshes, Pointclouds
+
+from .obj_io import MeshObjFormat
+from .off_io import MeshOffFormat
+from .pluggable_formats import MeshFormatInterpreter, PointcloudFormatInterpreter
+from .ply_io import MeshPlyFormat, PointcloudPlyFormat
+
+
+"""
+This module has the master functions for loading and saving data.
+
+The main usage is via the IO object, and its methods
+`load_mesh`, `save_mesh`, `load_pointcloud` and `save_pointcloud`.
+
+For example, to load a mesh you might do::
+
+    from pytorch3d.io import IO
+
+    mesh = IO().load_mesh("mymesh.obj")
+
+and to save a point cloud you might do::
+
+    pcl = Pointclouds(...)
+    IO().save_pointcloud(pcl, "output_pointcloud.obj")
+
+"""
+
+
+class IO:
+    """
+    This class is the interface to flexible loading and saving of meshes and point clouds.
+
+    In simple cases the user will just initialize an instance of this class as `IO()`
+    and then use its load and save functions. The arguments of the initializer are not
+    usually needed.
+
+    The user can add their own formats for saving and loading by passing their own objects
+    to the register_* functions.
+
+    Args:
+        include_default_formats: If False, the built-in file formats will not be available.
+            Then only user-registered formats can be used.
+        path_manager: Used to customize how paths given as strings are interpreted.
+    """
+
+    def __init__(
+        self,
+        include_default_formats: bool = True,
+        path_manager: Optional[PathManager] = None,
+    ) -> None:
+        if path_manager is None:
+            self.path_manager = PathManager()
+        else:
+            self.path_manager = path_manager
+
+        self.mesh_interpreters: Deque[MeshFormatInterpreter] = deque()
+        self.pointcloud_interpreters: Deque[PointcloudFormatInterpreter] = deque()
+
+        if include_default_formats:
+            self.register_default_formats()
+
+    def register_default_formats(self) -> None:
+        self.register_meshes_format(MeshObjFormat())
+        self.register_meshes_format(MeshOffFormat())
+        self.register_meshes_format(MeshPlyFormat())
+        self.register_pointcloud_format(PointcloudPlyFormat())
+
+    def register_meshes_format(self, interpreter: MeshFormatInterpreter) -> None:
+        """
+        Register a new interpreter for a new mesh file format.
+
+        Args:
+            interpreter: the new interpreter to use, which must be an instance
+                of a class which inherits MeshFormatInterpreter.
+        """
+        if not isinstance(interpreter, MeshFormatInterpreter):
+            raise ValueError("Invalid interpreter")
+        self.mesh_interpreters.appendleft(interpreter)
+
+    def register_pointcloud_format(
+        self, interpreter: PointcloudFormatInterpreter
+    ) -> None:
+        """
+        Register a new interpreter for a new point cloud file format.
+
+        Args:
+            interpreter: the new interpreter to use, which must be an instance
+                of a class which inherits PointcloudFormatInterpreter.
+        """
+        if not isinstance(interpreter, PointcloudFormatInterpreter):
+            raise ValueError("Invalid interpreter")
+        self.pointcloud_interpreters.appendleft(interpreter)
+
+    def load_mesh(
+        self,
+        path: Union[str, Path],
+        include_textures: bool = True,
+        device: Device = "cpu",
+        **kwargs,
+    ) -> Meshes:
+        """
+        Attempt to load a mesh from the given file, using a registered format.
+        Materials are not returned. If you have a .obj file with materials
+        you might want to load them with the load_obj function instead.
+
+        Args:
+            path: file to read
+            include_textures: whether to try to load texture information
+            device: device on which to leave the data.
+
+        Returns:
+            new Meshes object containing one mesh.
+        """
+        for mesh_interpreter in self.mesh_interpreters:
+            mesh = mesh_interpreter.read(
+                path,
+                include_textures=include_textures,
+                path_manager=self.path_manager,
+                device=device,
+                **kwargs,
+            )
+            if mesh is not None:
+                return mesh
+
+        raise ValueError(f"No mesh interpreter found to read {path}.")
+
+    def save_mesh(
+        self,
+        data: Meshes,
+        path: Union[str, Path],
+        binary: Optional[bool] = None,
+        include_textures: bool = True,
+        **kwargs,
+    ) -> None:
+        """
+        Attempt to save a mesh to the given file, using a registered format.
+
+        Args:
+            data: a 1-element Meshes
+            path: file to write
+            binary: If there is a choice, whether to save in a binary format.
+            include_textures: If textures are present, whether to try to save
+                                them.
+        """
+        if not isinstance(data, Meshes):
+            raise ValueError("Meshes object expected.")
+
+        if len(data) != 1:
+            raise ValueError("Can only save a single mesh.")
+
+        for mesh_interpreter in self.mesh_interpreters:
+            success = mesh_interpreter.save(
+                data, path, path_manager=self.path_manager, binary=binary, **kwargs
+            )
+            if success:
+                return
+
+        raise ValueError(f"No mesh interpreter found to write to {path}.")
+
+    def load_pointcloud(
+        self, path: Union[str, Path], device: Device = "cpu", **kwargs
+    ) -> Pointclouds:
+        """
+        Attempt to load a point cloud from the given file, using a registered format.
+
+        Args:
+            path: file to read
+            device: Device (as str or torch.device) on which to load the data.
+
+        Returns:
+            new Pointclouds object containing one mesh.
+        """
+        for pointcloud_interpreter in self.pointcloud_interpreters:
+            pointcloud = pointcloud_interpreter.read(
+                path, path_manager=self.path_manager, device=device, **kwargs
+            )
+            if pointcloud is not None:
+                return pointcloud
+
+        raise ValueError(f"No point cloud interpreter found to read {path}.")
+
+    def save_pointcloud(
+        self,
+        data: Pointclouds,
+        path: Union[str, Path],
+        binary: Optional[bool] = None,
+        **kwargs,
+    ) -> None:
+        """
+        Attempt to save a point cloud to the given file, using a registered format.
+
+        Args:
+            data: a 1-element Pointclouds
+            path: file to write
+            binary: If there is a choice, whether to save in a binary format.
+        """
+        if not isinstance(data, Pointclouds):
+            raise ValueError("Pointclouds object expected.")
+
+        if len(data) != 1:
+            raise ValueError("Can only save a single point cloud.")
+
+        for pointcloud_interpreter in self.pointcloud_interpreters:
+            success = pointcloud_interpreter.save(
+                data, path, path_manager=self.path_manager, binary=binary, **kwargs
+            )
+            if success:
+                return
+
+        raise ValueError(f"No point cloud interpreter found to write to {path}.")
diff --git a/pytorch3d/pytorch3d/io/pluggable_formats.py b/pytorch3d/pytorch3d/io/pluggable_formats.py
new file mode 100644
index 0000000000000000000000000000000000000000..8973b7c701e249fe8576ec1878e66dd9ae17758d
--- /dev/null
+++ b/pytorch3d/pytorch3d/io/pluggable_formats.py
@@ -0,0 +1,140 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import pathlib
+from typing import Optional, Tuple
+
+from iopath.common.file_io import PathManager
+from pytorch3d.common.datatypes import Device
+from pytorch3d.io.utils import PathOrStr
+from pytorch3d.structures import Meshes, Pointclouds
+
+
+"""
+This module has the base classes which must be extended to define
+an interpreter for loading and saving data in a particular format.
+These can be registered on an IO object so that they can be used in
+its load_* and save_* functions.
+"""
+
+
+def endswith(path: PathOrStr, suffixes: Tuple[str, ...]) -> bool:
+    """
+    Returns whether the path ends with one of the given suffixes.
+    If `path` is not actually a path, returns True. This is useful
+    for allowing interpreters to bypass inappropriate paths, but
+    always accepting streams.
+    """
+    if isinstance(path, pathlib.Path):
+        return path.suffix.lower() in suffixes
+    if isinstance(path, str):
+        return path.lower().endswith(suffixes)
+    return True
+
+
+class MeshFormatInterpreter:
+    """
+    This is a base class for an interpreter which can read or write
+    a mesh in a particular format.
+    """
+
+    def read(
+        self,
+        path: PathOrStr,
+        include_textures: bool,
+        device: Device,
+        path_manager: PathManager,
+        **kwargs,
+    ) -> Optional[Meshes]:
+        """
+        Read the data from the specified file and return it as
+        a Meshes object.
+
+        Args:
+            path: path to load.
+            include_textures: whether to try to load texture information.
+            device: torch.device to load data on to.
+            path_manager: PathManager to interpret the path.
+
+        Returns:
+            None if self is not the appropriate object to interpret the given
+                path.
+            Otherwise, the read Meshes object.
+        """
+        raise NotImplementedError()
+
+    def save(
+        self,
+        data: Meshes,
+        path: PathOrStr,
+        path_manager: PathManager,
+        binary: Optional[bool],
+        **kwargs,
+    ) -> bool:
+        """
+        Save the given Meshes object to the given path.
+
+        Args:
+            data: mesh to save
+            path: path to save to, which may be overwritten.
+            path_manager: PathManager to interpret the path.
+            binary: If there is a choice, whether to save in a binary format.
+
+        Returns:
+            False: if self is not the appropriate object to write to the given path.
+            True: on success.
+        """
+        raise NotImplementedError()
+
+
+class PointcloudFormatInterpreter:
+    """
+    This is a base class for an interpreter which can read or write
+    a point cloud in a particular format.
+    """
+
+    def read(
+        self, path: PathOrStr, device: Device, path_manager: PathManager, **kwargs
+    ) -> Optional[Pointclouds]:
+        """
+        Read the data from the specified file and return it as
+        a Pointclouds object.
+
+        Args:
+            path: path to load.
+            device: torch.device to load data on to.
+            path_manager: PathManager to interpret the path.
+
+        Returns:
+            None if self is not the appropriate object to interpret the given
+                path.
+            Otherwise, the read Pointclouds object.
+        """
+        raise NotImplementedError()
+
+    def save(
+        self,
+        data: Pointclouds,
+        path: PathOrStr,
+        path_manager: PathManager,
+        binary: Optional[bool],
+        **kwargs,
+    ) -> bool:
+        """
+        Save the given Pointclouds object to the given path.
+
+        Args:
+            data: point cloud object to save
+            path: path to save to, which may be overwritten.
+            path_manager: PathManager to interpret the path.
+            binary: If there is a choice, whether to save in a binary format.
+
+        Returns:
+            False: if self is not the appropriate object to write to the given path.
+            True: on success.
+        """
+        raise NotImplementedError()
diff --git a/pytorch3d/pytorch3d/io/ply_io.py b/pytorch3d/pytorch3d/io/ply_io.py
new file mode 100644
index 0000000000000000000000000000000000000000..1d59b1934c1e717dbe06e993a05eae0ef6a2beef
--- /dev/null
+++ b/pytorch3d/pytorch3d/io/ply_io.py
@@ -0,0 +1,1537 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+"""
+This module implements utility functions for loading and saving
+meshes and point clouds as PLY files.
+"""
+import itertools
+import os
+import struct
+import sys
+import warnings
+from collections import namedtuple
+from dataclasses import asdict, dataclass
+from io import BytesIO, TextIOBase
+from typing import List, Optional, Tuple
+
+import numpy as np
+import torch
+from iopath.common.file_io import PathManager
+from pytorch3d.io.utils import (
+    _check_faces_indices,
+    _make_tensor,
+    _open_file,
+    _read_image,
+    PathOrStr,
+)
+from pytorch3d.renderer import TexturesUV, TexturesVertex
+from pytorch3d.structures import Meshes, Pointclouds
+
+from .pluggable_formats import (
+    endswith,
+    MeshFormatInterpreter,
+    PointcloudFormatInterpreter,
+)
+
+
+_PlyTypeData = namedtuple("_PlyTypeData", "size struct_char np_type")
+
+_PLY_TYPES = {
+    "char": _PlyTypeData(1, "b", np.byte),
+    "uchar": _PlyTypeData(1, "B", np.ubyte),
+    "short": _PlyTypeData(2, "h", np.short),
+    "ushort": _PlyTypeData(2, "H", np.ushort),
+    "int": _PlyTypeData(4, "i", np.int32),
+    "uint": _PlyTypeData(4, "I", np.uint32),
+    "float": _PlyTypeData(4, "f", np.float32),
+    "double": _PlyTypeData(8, "d", np.float64),
+    "int8": _PlyTypeData(1, "b", np.byte),
+    "uint8": _PlyTypeData(1, "B", np.ubyte),
+    "int16": _PlyTypeData(2, "h", np.short),
+    "uint16": _PlyTypeData(2, "H", np.ushort),
+    "int32": _PlyTypeData(4, "i", np.int32),
+    "uint32": _PlyTypeData(4, "I", np.uint32),
+    "float32": _PlyTypeData(4, "f", np.float32),
+    "float64": _PlyTypeData(8, "d", np.float64),
+}
+
+_Property = namedtuple("_Property", "name data_type list_size_type")
+
+
+class _PlyElementType:
+    """
+    Description of an element of a Ply file.
+    Members:
+        self.properties: (List[_Property]) description of all the properties.
+                            Each one contains a name and data type.
+        self.count:      (int) number of such elements in the file
+        self.name:       (str) name of the element
+    """
+
+    def __init__(self, name: str, count: int) -> None:
+        self.name = name
+        self.count = count
+        self.properties: List[_Property] = []
+
+    def add_property(
+        self, name: str, data_type: str, list_size_type: Optional[str] = None
+    ):
+        """Adds a new property.
+
+        Args:
+            name:           (str) name of the property.
+            data_type:      (str) PLY data type.
+            list_size_type: (str) PLY data type of the list size, or None if not
+                            a list.
+        """
+        for property in self.properties:
+            if property.name == name:
+                msg = "Cannot have two properties called %s in %s."
+                raise ValueError(msg % (name, self.name))
+        self.properties.append(_Property(name, data_type, list_size_type))
+
+    def is_fixed_size(self) -> bool:
+        """Return whether the Element has no list properties
+
+        Returns:
+            True if none of the properties are lists.
+        """
+        for property in self.properties:
+            if property.list_size_type is not None:
+                return False
+        return True
+
+    def is_constant_type_fixed_size(self) -> bool:
+        """Return whether the Element has all properties of the same non-list
+        type.
+
+        Returns:
+            True if none of the properties are lists and all the properties
+            share a type.
+        """
+        if not self.is_fixed_size():
+            return False
+        first_type = _PLY_TYPES[self.properties[0].data_type]
+        for property in self.properties:
+            if _PLY_TYPES[property.data_type] != first_type:
+                return False
+        return True
+
+    def try_constant_list(self) -> bool:
+        """Whether the element is just a single list, which might have a
+        constant size, and therefore we could try to parse quickly with numpy.
+
+        Returns:
+            True if the only property is a list.
+        """
+        if len(self.properties) != 1:
+            return False
+        if self.properties[0].list_size_type is None:
+            return False
+        return True
+
+
+class _PlyHeader:
+    def __init__(self, f) -> None:
+        """
+        Load a header of a Ply file from a file-like object.
+        Members:
+            self.elements:   (List[_PlyElementType]) element description
+            self.ascii:      (bool) Whether in ascii format
+            self.big_endian: (bool) (if not ascii) whether big endian
+            self.obj_info:   (List[str]) arbitrary extra data
+            self.comments:   (List[str]) comments
+
+        Args:
+            f: file-like object.
+        """
+        if f.readline() not in [b"ply\n", b"ply\r\n", "ply\n"]:
+            raise ValueError("Invalid file header.")
+        seen_format = False
+        self.elements: List[_PlyElementType] = []
+        self.comments: List[str] = []
+        self.obj_info: List[str] = []
+        while True:
+            line = f.readline()
+            if isinstance(line, bytes):
+                line = line.decode("ascii")
+            line = line.strip()
+            if line == "end_header":
+                if not self.elements:
+                    raise ValueError("No elements found.")
+                if not self.elements[-1].properties:
+                    raise ValueError("Found an element with no properties.")
+                if not seen_format:
+                    raise ValueError("No format line found.")
+                break
+            if not seen_format:
+                if line == "format ascii 1.0":
+                    seen_format = True
+                    self.ascii = True
+                    continue
+                if line == "format binary_little_endian 1.0":
+                    seen_format = True
+                    self.ascii = False
+                    self.big_endian = False
+                    continue
+                if line == "format binary_big_endian 1.0":
+                    seen_format = True
+                    self.ascii = False
+                    self.big_endian = True
+                    continue
+            if line.startswith("format"):
+                raise ValueError("Invalid format line.")
+            if line.startswith("comment "):
+                self.comments.append(line[8:])
+                continue
+            if line.startswith("comment") or len(line) == 0:
+                continue
+            if line.startswith("element"):
+                self._parse_element(line)
+                continue
+            if line.startswith("obj_info "):
+                self.obj_info.append(line[9:])
+                continue
+            if line.startswith("property"):
+                self._parse_property(line)
+                continue
+            raise ValueError("Invalid line: %s." % line)
+
+    def _parse_property(self, line: str):
+        """
+        Decode a ply file header property line.
+
+        Args:
+            line: (str) the ply file's line.
+        """
+        if not self.elements:
+            raise ValueError("Encountered property before any element.")
+        items = line.split(" ")
+        if len(items) not in [3, 5]:
+            raise ValueError("Invalid line: %s" % line)
+        datatype = items[1]
+        name = items[-1]
+        if datatype == "list":
+            datatype = items[3]
+            list_size_type = items[2]
+            if list_size_type not in _PLY_TYPES:
+                raise ValueError("Invalid datatype: %s" % list_size_type)
+        else:
+            list_size_type = None
+        if datatype not in _PLY_TYPES:
+            raise ValueError("Invalid datatype: %s" % datatype)
+        self.elements[-1].add_property(name, datatype, list_size_type)
+
+    def _parse_element(self, line: str):
+        """
+        Decode a ply file header element line.
+
+        Args:
+            line: (str) the ply file's line.
+        """
+        if self.elements and not self.elements[-1].properties:
+            raise ValueError("Found an element with no properties.")
+        items = line.split(" ")
+        if len(items) != 3:
+            raise ValueError("Invalid line: %s" % line)
+        try:
+            count = int(items[2])
+        except ValueError:
+            msg = "Number of items for %s was not a number."
+            raise ValueError(msg % items[1]) from None
+        self.elements.append(_PlyElementType(items[1], count))
+
+
+def _read_ply_fixed_size_element_ascii(f, definition: _PlyElementType):
+    """
+    Given an element which has no lists and one type, read the
+    corresponding data.
+
+    For example
+
+        element vertex 8
+        property float x
+        property float y
+        property float z
+
+    Args:
+        f: file-like object being read.
+        definition: The element object which describes what we are reading.
+
+    Returns:
+        1-element list containing a 2D numpy array corresponding to the data.
+        The rows are the different values. There is one column for each property.
+    """
+    np_type = _PLY_TYPES[definition.properties[0].data_type].np_type
+    old_offset = f.tell()
+    with warnings.catch_warnings():
+        warnings.filterwarnings(
+            "ignore", message=".* Empty input file.*", category=UserWarning
+        )
+        data = np.loadtxt(
+            f, dtype=np_type, comments=None, ndmin=2, max_rows=definition.count
+        )
+    if not len(data):  # np.loadtxt() seeks even on empty data
+        f.seek(old_offset)
+    if data.shape[1] != len(definition.properties):
+        raise ValueError("Inconsistent data for %s." % definition.name)
+    if data.shape[0] != definition.count:
+        raise ValueError("Not enough data for %s." % definition.name)
+    return [data]
+
+
+def _read_ply_nolist_element_ascii(f, definition: _PlyElementType):
+    """
+    Given an element which has no lists and multiple types, read the
+    corresponding data, by loading all the data as float64 and converting
+    the relevant parts later.
+
+    For example, given
+
+        element vertex 8
+        property float x
+        property float y
+        property float z
+        property uchar red
+        property uchar green
+        property uchar blue
+
+    the output will have two arrays, the first containing (x,y,z)
+    and the second (red,green,blue).
+
+    Args:
+        f: file-like object being read.
+        definition: The element object which describes what we are reading.
+
+    Returns:
+        List of 2D numpy arrays corresponding to the data.
+    """
+    old_offset = f.tell()
+    with warnings.catch_warnings():
+        warnings.filterwarnings(
+            "ignore", message=".* Empty input file.*", category=UserWarning
+        )
+        data = np.loadtxt(
+            f, dtype=np.float64, comments=None, ndmin=2, max_rows=definition.count
+        )
+    if not len(data):  # np.loadtxt() seeks even on empty data
+        f.seek(old_offset)
+    if data.shape[1] != len(definition.properties):
+        raise ValueError("Inconsistent data for %s." % definition.name)
+    if data.shape[0] != definition.count:
+        raise ValueError("Not enough data for %s." % definition.name)
+    pieces = []
+    offset = 0
+    for dtype, it in itertools.groupby(p.data_type for p in definition.properties):
+        count = sum(1 for _ in it)
+        end_offset = offset + count
+        piece = data[:, offset:end_offset].astype(_PLY_TYPES[dtype].np_type)
+        pieces.append(piece)
+        offset = end_offset
+    return pieces
+
+
+def _try_read_ply_constant_list_ascii(f, definition: _PlyElementType):
+    """
+    If definition is an element which is a single list, attempt to read the
+    corresponding data assuming every value has the same length.
+    If the data is ragged, return None and leave f undisturbed.
+
+    For example, if the element is
+
+        element face 2
+        property list uchar int vertex_index
+
+    and the data is
+
+        4 0 1 2 3
+        4 7 6 5 4
+
+    then the function will return
+
+        [[0, 1, 2, 3],
+         [7, 6, 5, 4]]
+
+    but if the data is
+
+        4 0 1 2 3
+        3 6 5 4
+
+    then the function will return None.
+
+    Args:
+        f: file-like object being read.
+        definition: The element object which describes what we are reading.
+
+    Returns:
+        If every element has the same size, 2D numpy array corresponding to the
+        data. The rows are the different values. Otherwise None.
+    """
+    np_type = _PLY_TYPES[definition.properties[0].data_type].np_type
+    old_offset = f.tell()
+    try:
+        with warnings.catch_warnings():
+            warnings.filterwarnings(
+                "ignore", message=".* Empty input file.*", category=UserWarning
+            )
+            data = np.loadtxt(
+                f, dtype=np_type, comments=None, ndmin=2, max_rows=definition.count
+            )
+    except ValueError:
+        f.seek(old_offset)
+        return None
+    if not len(data):  # np.loadtxt() seeks even on empty data
+        f.seek(old_offset)
+    if (data[:, 0] != data.shape[1] - 1).any():
+        msg = "A line of %s data did not have the specified length."
+        raise ValueError(msg % definition.name)
+    if data.shape[0] != definition.count:
+        raise ValueError("Not enough data for %s." % definition.name)
+    return data[:, 1:]
+
+
+def _parse_heterogeneous_property_ascii(datum, line_iter, property: _Property):
+    """
+    Read a general data property from an ascii .ply file.
+
+    Args:
+        datum: list to append the single value to. That value will be a numpy
+                array if the property is a list property, otherwise an int or
+                float.
+        line_iter: iterator to words on the line from which we read.
+        property: the property object describing the property we are reading.
+    """
+    value = next(line_iter, None)
+    if value is None:
+        raise ValueError("Too little data for an element.")
+    if property.list_size_type is None:
+        try:
+            if property.data_type in ["double", "float"]:
+                datum.append(float(value))
+            else:
+                datum.append(int(value))
+        except ValueError:
+            raise ValueError("Bad numerical data.") from None
+    else:
+        try:
+            length = int(value)
+        except ValueError:
+            raise ValueError("A list length was not a number.") from None
+        list_value = np.zeros(length, dtype=_PLY_TYPES[property.data_type].np_type)
+        for i in range(length):
+            inner_value = next(line_iter, None)
+            if inner_value is None:
+                raise ValueError("Too little data for an element.")
+            try:
+                list_value[i] = float(inner_value)
+            except ValueError:
+                raise ValueError("Bad numerical data.") from None
+        datum.append(list_value)
+
+
+def _read_ply_element_ascii(f, definition: _PlyElementType):
+    """
+    Decode all instances of a single element from an ascii .ply file.
+
+    Args:
+        f: file-like object being read.
+        definition: The element object which describes what we are reading.
+
+    Returns:
+        In simple cases where every element has the same size, 2D numpy array
+        corresponding to the data. The rows are the different values.
+        Otherwise a list of lists of values, where the outer list is
+        each occurrence of the element, and the inner lists have one value per
+        property.
+    """
+    if not definition.count:
+        return []
+    if definition.is_constant_type_fixed_size():
+        return _read_ply_fixed_size_element_ascii(f, definition)
+    if definition.is_fixed_size():
+        return _read_ply_nolist_element_ascii(f, definition)
+    if definition.try_constant_list():
+        data = _try_read_ply_constant_list_ascii(f, definition)
+        if data is not None:
+            return data
+
+    # We failed to read the element as a lump, must process each line manually.
+    data = []
+    for _i in range(definition.count):
+        line_string = f.readline()
+        if line_string == "":
+            raise ValueError("Not enough data for %s." % definition.name)
+        datum = []
+        line_iter = iter(line_string.strip().split())
+        for property in definition.properties:
+            _parse_heterogeneous_property_ascii(datum, line_iter, property)
+        data.append(datum)
+        if next(line_iter, None) is not None:
+            raise ValueError("Too much data for an element.")
+    return data
+
+
+def _read_raw_array(
+    f, aim: str, length: int, dtype: type = np.uint8, dtype_size: int = 1
+):
+    """
+    Read [length] elements from a file.
+
+    Args:
+        f: file object
+        aim: name of target for error message
+        length: number of elements
+        dtype: numpy type
+        dtype_size: number of bytes per element.
+
+    Returns:
+        new numpy array
+    """
+
+    if isinstance(f, BytesIO):
+        # np.fromfile is faster but won't work on a BytesIO
+        needed_bytes = length * dtype_size
+        bytes_data = bytearray(needed_bytes)
+        n_bytes_read = f.readinto(bytes_data)
+        if n_bytes_read != needed_bytes:
+            raise ValueError("Not enough data for %s." % aim)
+        data = np.frombuffer(bytes_data, dtype=dtype)
+    else:
+        data = np.fromfile(f, dtype=dtype, count=length)
+        if data.shape[0] != length:
+            raise ValueError("Not enough data for %s." % aim)
+    return data
+
+
+def _read_ply_fixed_size_element_binary(
+    f, definition: _PlyElementType, big_endian: bool
+):
+    """
+    Given an element which has no lists and one type, read the
+    corresponding data.
+
+    For example
+
+        element vertex 8
+        property float x
+        property float y
+        property float z
+
+
+    Args:
+        f: file-like object being read.
+        definition: The element object which describes what we are reading.
+        big_endian: (bool) whether the document is encoded as big endian.
+
+    Returns:
+        1-element list containing a 2D numpy array corresponding to the data.
+        The rows are the different values. There is one column for each property.
+    """
+    ply_type = _PLY_TYPES[definition.properties[0].data_type]
+    np_type = ply_type.np_type
+    type_size = ply_type.size
+    needed_length = definition.count * len(definition.properties)
+    data = _read_raw_array(f, definition.name, needed_length, np_type, type_size)
+
+    if (sys.byteorder == "big") != big_endian:
+        data = data.byteswap()
+    return [data.reshape(definition.count, len(definition.properties))]
+
+
+def _read_ply_element_binary_nolists(f, definition: _PlyElementType, big_endian: bool):
+    """
+    Given an element which has no lists, read the corresponding data as tuple
+    of numpy arrays, one for each set of adjacent columns with the same type.
+
+    For example, given
+
+        element vertex 8
+        property float x
+        property float y
+        property float z
+        property uchar red
+        property uchar green
+        property uchar blue
+
+    the output will have two arrays, the first containing (x,y,z)
+    and the second (red,green,blue).
+
+    Args:
+        f: file-like object being read.
+        definition: The element object which describes what we are reading.
+        big_endian: (bool) whether the document is encoded as big endian.
+
+    Returns:
+        List of 2D numpy arrays corresponding to the data. The rows are the different
+        values.
+    """
+    size = sum(_PLY_TYPES[prop.data_type].size for prop in definition.properties)
+    needed_bytes = size * definition.count
+    data = _read_raw_array(f, definition.name, needed_bytes).reshape(-1, size)
+    offset = 0
+    pieces = []
+    for dtype, it in itertools.groupby(p.data_type for p in definition.properties):
+        count = sum(1 for _ in it)
+        bytes_each = count * _PLY_TYPES[dtype].size
+        end_offset = offset + bytes_each
+
+        # what we want to do is
+        # piece = data[:, offset:end_offset].view(_PLY_TYPES[dtype].np_type)
+        # but it fails in the general case
+        # because of https://github.com/numpy/numpy/issues/9496.
+        piece = np.lib.stride_tricks.as_strided(
+            data[:1, offset:end_offset].view(_PLY_TYPES[dtype].np_type),
+            shape=(definition.count, count),
+            strides=(data.strides[0], _PLY_TYPES[dtype].size),
+        )
+
+        if (sys.byteorder == "big") != big_endian:
+            piece = piece.byteswap()
+        pieces.append(piece)
+        offset = end_offset
+    return pieces
+
+
+def _try_read_ply_constant_list_binary(
+    f, definition: _PlyElementType, big_endian: bool
+):
+    """
+    If definition is an element which is a single list, attempt to read the
+    corresponding data assuming every value has the same length.
+    If the data is ragged, return None and leave f undisturbed.
+
+    For example, if the element is
+
+        element face 2
+        property list uchar int vertex_index
+
+    and the data is
+
+        4 0 1 2 3
+        4 7 6 5 4
+
+    then the function will return
+
+        [[0, 1, 2, 3],
+         [7, 6, 5, 4]]
+
+    but if the data is
+
+        4 0 1 2 3
+        3 6 5 4
+
+    then the function will return None.
+
+    Args:
+        f: file-like object being read.
+        definition: The element object which describes what we are reading.
+        big_endian: (bool) whether the document is encoded as big endian.
+
+    Returns:
+        If every element has the same size, 2D numpy array corresponding to the
+        data. The rows are the different values. Otherwise None.
+    """
+    property = definition.properties[0]
+    endian_str = ">" if big_endian else "<"
+    length_format = endian_str + _PLY_TYPES[property.list_size_type].struct_char
+    length_struct = struct.Struct(length_format)
+
+    def get_length():
+        bytes_data = f.read(length_struct.size)
+        if len(bytes_data) != length_struct.size:
+            raise ValueError("Not enough data for %s." % definition.name)
+        [length] = length_struct.unpack(bytes_data)
+        return length
+
+    old_offset = f.tell()
+
+    length = get_length()
+    np_type = _PLY_TYPES[definition.properties[0].data_type].np_type
+    type_size = _PLY_TYPES[definition.properties[0].data_type].size
+    data_size = type_size * length
+
+    output = np.zeros((definition.count, length), dtype=np_type)
+
+    for i in range(definition.count):
+        bytes_data = f.read(data_size)
+        if len(bytes_data) != data_size:
+            raise ValueError("Not enough data for %s" % definition.name)
+        output[i] = np.frombuffer(bytes_data, dtype=np_type)
+        if i + 1 == definition.count:
+            break
+        if length != get_length():
+            f.seek(old_offset)
+            return None
+    if (sys.byteorder == "big") != big_endian:
+        output = output.byteswap()
+
+    return output
+
+
+def _read_ply_element_binary(f, definition: _PlyElementType, big_endian: bool) -> list:
+    """
+    Decode all instances of a single element from a binary .ply file.
+
+    Args:
+        f: file-like object being read.
+        definition: The element object which describes what we are reading.
+        big_endian: (bool) whether the document is encoded as big endian.
+
+    Returns:
+        In simple cases where every element has the same size, 2D numpy array
+        corresponding to the data. The rows are the different values.
+        Otherwise a list of lists/tuples of values, where the outer list is
+        each occurrence of the element, and the inner lists have one value per
+        property.
+    """
+    if not definition.count:
+        return []
+
+    if definition.is_constant_type_fixed_size():
+        return _read_ply_fixed_size_element_binary(f, definition, big_endian)
+    if definition.is_fixed_size():
+        return _read_ply_element_binary_nolists(f, definition, big_endian)
+    if definition.try_constant_list():
+        data = _try_read_ply_constant_list_binary(f, definition, big_endian)
+        if data is not None:
+            return data
+
+    # We failed to read the element as a lump, must process each line manually.
+    endian_str = ">" if big_endian else "<"
+    property_structs = []
+    for property in definition.properties:
+        initial_type = property.list_size_type or property.data_type
+        property_structs.append(
+            struct.Struct(endian_str + _PLY_TYPES[initial_type].struct_char)
+        )
+
+    data = []
+    for _i in range(definition.count):
+        datum = []
+        for property, property_struct in zip(definition.properties, property_structs):
+            size = property_struct.size
+            initial_data = f.read(size)
+            if len(initial_data) != size:
+                raise ValueError("Not enough data for %s" % definition.name)
+            [initial] = property_struct.unpack(initial_data)
+            if property.list_size_type is None:
+                datum.append(initial)
+            else:
+                type_size = _PLY_TYPES[property.data_type].size
+                needed_bytes = type_size * initial
+                list_data = f.read(needed_bytes)
+                if len(list_data) != needed_bytes:
+                    raise ValueError("Not enough data for %s" % definition.name)
+                np_type = _PLY_TYPES[property.data_type].np_type
+                list_np = np.frombuffer(list_data, dtype=np_type)
+                if (sys.byteorder == "big") != big_endian:
+                    list_np = list_np.byteswap()
+                datum.append(list_np)
+        data.append(datum)
+    return data
+
+
+def _load_ply_raw_stream(f) -> Tuple[_PlyHeader, dict]:
+    """
+    Implementation for _load_ply_raw which takes a stream.
+
+    Args:
+        f:  A binary or text file-like object.
+
+    Returns:
+        header: A _PlyHeader object describing the metadata in the ply file.
+        elements: A dictionary of element names to values. If an element is regular, in
+        the sense of having no lists or being one uniformly-sized list, then the
+        value will be a 2D numpy array. If not, it is a list of the relevant
+        property values.
+    """
+
+    header = _PlyHeader(f)
+    elements = {}
+    if header.ascii:
+        for element in header.elements:
+            elements[element.name] = _read_ply_element_ascii(f, element)
+    else:
+        if isinstance(f, TextIOBase):
+            raise ValueError(
+                "Cannot safely read a binary ply file using a Text stream."
+            )
+        big = header.big_endian
+        for element in header.elements:
+            elements[element.name] = _read_ply_element_binary(f, element, big)
+    end = f.read().strip()
+    if len(end) != 0:
+        raise ValueError("Extra data at end of file: " + str(end[:20]))
+    return header, elements
+
+
+def _load_ply_raw(f, path_manager: PathManager) -> Tuple[_PlyHeader, dict]:
+    """
+    Load the data from a .ply file.
+
+    Args:
+        f:  A binary or text file-like object (with methods read, readline,
+            tell and seek), a pathlib path or a string containing a file name.
+            If the ply file is binary, a text stream is not supported.
+            It is recommended to use a binary stream.
+        path_manager: PathManager for loading if f is a str.
+
+    Returns:
+        header: A _PlyHeader object describing the metadata in the ply file.
+        elements: A dictionary of element names to values. If an element is
+                  regular, in the sense of having no lists or being one
+                  uniformly-sized list, then the value will be a 2D numpy array.
+                  If it has no lists but more than one type, it will be a list of arrays.
+                  If not, it is a list of the relevant property values.
+    """
+    with _open_file(f, path_manager, "rb") as f:
+        header, elements = _load_ply_raw_stream(f)
+    return header, elements
+
+
+@dataclass(frozen=True)
+class _VertsColumnIndices:
+    """
+    Contains the relevant layout of the verts section of file being read.
+    Members
+        point_idxs: List[int] of 3 point columns.
+        color_idxs: List[int] of 3 color columns if they are present,
+                    otherwise None.
+        color_scale: value to scale colors by.
+        normal_idxs: List[int] of 3 normals columns if they are present,
+                    otherwise None.
+    """
+
+    point_idxs: List[int]
+    color_idxs: Optional[List[int]]
+    color_scale: float
+    normal_idxs: Optional[List[int]]
+    texture_uv_idxs: Optional[List[int]]
+
+
+def _get_verts_column_indices(
+    vertex_head: _PlyElementType,
+) -> _VertsColumnIndices:
+    """
+    Get the columns of verts, verts_colors, and verts_normals in the vertex
+    element of a parsed ply file, together with a color scale factor.
+    When the colors are in byte format, they are scaled from 0..255 to [0,1].
+    Otherwise they are not scaled.
+
+    For example, if the vertex element looks as follows:
+
+        element vertex 892
+        property double x
+        property double y
+        property double z
+        property double nx
+        property double ny
+        property double nz
+        property uchar red
+        property uchar green
+        property uchar blue
+        property double texture_u
+        property double texture_v
+
+    then the return value will be ([0,1,2], [6,7,8], 1.0/255, [3,4,5])
+
+    Args:
+        vertex_head: as returned from load_ply_raw.
+
+    Returns:
+        _VertsColumnIndices object
+    """
+    point_idxs: List[Optional[int]] = [None, None, None]
+    color_idxs: List[Optional[int]] = [None, None, None]
+    normal_idxs: List[Optional[int]] = [None, None, None]
+    texture_uv_idxs: List[Optional[int]] = [None, None]
+    for i, prop in enumerate(vertex_head.properties):
+        if prop.list_size_type is not None:
+            raise ValueError("Invalid vertices in file: did not expect list.")
+        for j, letter in enumerate(["x", "y", "z"]):
+            if prop.name == letter:
+                point_idxs[j] = i
+        for j, name in enumerate(["red", "green", "blue"]):
+            if prop.name == name:
+                color_idxs[j] = i
+        for j, name in enumerate(["nx", "ny", "nz"]):
+            if prop.name == name:
+                normal_idxs[j] = i
+        for j, name in enumerate(["texture_u", "texture_v"]):
+            if prop.name == name:
+                texture_uv_idxs[j] = i
+    if None in point_idxs:
+        raise ValueError("Invalid vertices in file.")
+    color_scale = 1.0
+    if all(
+        idx is not None and _PLY_TYPES[vertex_head.properties[idx].data_type].size == 1
+        for idx in color_idxs
+    ):
+        color_scale = 1.0 / 255
+    return _VertsColumnIndices(
+        point_idxs=point_idxs,
+        color_idxs=None if None in color_idxs else color_idxs,
+        color_scale=color_scale,
+        normal_idxs=None if None in normal_idxs else normal_idxs,
+        texture_uv_idxs=None if None in texture_uv_idxs else texture_uv_idxs,
+    )
+
+
+@dataclass(frozen=True)
+class _VertsData:
+    """
+    Contains the data of the verts section of file being read.
+    Members:
+        verts: FloatTensor of shape (V, 3).
+        verts_colors: None or FloatTensor of shape (V, 3).
+        verts_normals: None or FloatTensor of shape (V, 3).
+    """
+
+    verts: torch.Tensor
+    verts_colors: Optional[torch.Tensor] = None
+    verts_normals: Optional[torch.Tensor] = None
+    verts_texture_uvs: Optional[torch.Tensor] = None
+
+
+def _get_verts(header: _PlyHeader, elements: dict) -> _VertsData:
+    """
+    Get the vertex locations, colors and normals from a parsed ply file.
+
+    Args:
+        header, elements: as returned from load_ply_raw.
+
+    Returns:
+        _VertsData object
+    """
+
+    vertex = elements.get("vertex", None)
+    if vertex is None:
+        raise ValueError("The ply file has no vertex element.")
+    if not isinstance(vertex, list):
+        raise ValueError("Invalid vertices in file.")
+    vertex_head = next(head for head in header.elements if head.name == "vertex")
+
+    column_idxs = _get_verts_column_indices(vertex_head)
+
+    # Case of no vertices
+    if vertex_head.count == 0:
+        verts = torch.zeros((0, 3), dtype=torch.float32)
+        if column_idxs.color_idxs is None:
+            return _VertsData(verts=verts)
+        return _VertsData(
+            verts=verts, verts_colors=torch.zeros((0, 3), dtype=torch.float32)
+        )
+
+    # Simple case where the only data is the vertices themselves
+    if (
+        len(vertex) == 1
+        and isinstance(vertex[0], np.ndarray)
+        and vertex[0].ndim == 2
+        and vertex[0].shape[1] == 3
+    ):
+        return _VertsData(verts=_make_tensor(vertex[0], cols=3, dtype=torch.float32))
+
+    vertex_colors = None
+    vertex_normals = None
+    vertex_texture_uvs = None
+
+    if len(vertex) == 1:
+        # This is the case where the whole vertex element has one type,
+        # so it was read as a single array and we can index straight into it.
+        verts = torch.tensor(vertex[0][:, column_idxs.point_idxs], dtype=torch.float32)
+        if column_idxs.color_idxs is not None:
+            vertex_colors = column_idxs.color_scale * torch.tensor(
+                vertex[0][:, column_idxs.color_idxs], dtype=torch.float32
+            )
+        if column_idxs.normal_idxs is not None:
+            vertex_normals = torch.tensor(
+                vertex[0][:, column_idxs.normal_idxs], dtype=torch.float32
+            )
+        if column_idxs.texture_uv_idxs is not None:
+            vertex_texture_uvs = torch.tensor(
+                vertex[0][:, column_idxs.texture_uv_idxs], dtype=torch.float32
+            )
+    else:
+        # The vertex element is heterogeneous. It was read as several arrays,
+        # part by part, where a part is a set of properties with the same type.
+        # For each property (=column in the file), we store in
+        # prop_to_partnum_col its partnum (i.e. the index of what part it is
+        # in) and its column number (its index within its part).
+        prop_to_partnum_col = [
+            (partnum, col)
+            for partnum, array in enumerate(vertex)
+            for col in range(array.shape[1])
+        ]
+        verts = torch.empty(size=(vertex_head.count, 3), dtype=torch.float32)
+        for axis in range(3):
+            partnum, col = prop_to_partnum_col[column_idxs.point_idxs[axis]]
+            verts.numpy()[:, axis] = vertex[partnum][:, col]
+            # Note that in the previous line, we made the assignment
+            # as numpy arrays by casting verts. If we took the (more
+            # obvious) method of converting the right hand side to
+            # torch, then we might have an extra data copy because
+            # torch wants contiguity. The code would be like:
+            #   if not vertex[partnum].flags["C_CONTIGUOUS"]:
+            #      vertex[partnum] = np.ascontiguousarray(vertex[partnum])
+            #   verts[:, axis] = torch.tensor((vertex[partnum][:, col]))
+        if column_idxs.color_idxs is not None:
+            vertex_colors = torch.empty(
+                size=(vertex_head.count, 3), dtype=torch.float32
+            )
+            for color in range(3):
+                partnum, col = prop_to_partnum_col[column_idxs.color_idxs[color]]
+                vertex_colors.numpy()[:, color] = vertex[partnum][:, col]
+            vertex_colors *= column_idxs.color_scale
+        if column_idxs.normal_idxs is not None:
+            vertex_normals = torch.empty(
+                size=(vertex_head.count, 3), dtype=torch.float32
+            )
+            for axis in range(3):
+                partnum, col = prop_to_partnum_col[column_idxs.normal_idxs[axis]]
+                vertex_normals.numpy()[:, axis] = vertex[partnum][:, col]
+        if column_idxs.texture_uv_idxs is not None:
+            vertex_texture_uvs = torch.empty(
+                size=(vertex_head.count, 2),
+                dtype=torch.float32,
+            )
+            for axis in range(2):
+                partnum, col = prop_to_partnum_col[column_idxs.texture_uv_idxs[axis]]
+                vertex_texture_uvs.numpy()[:, axis] = vertex[partnum][:, col]
+    return _VertsData(
+        verts=verts,
+        verts_colors=vertex_colors,
+        verts_normals=vertex_normals,
+        verts_texture_uvs=vertex_texture_uvs,
+    )
+
+
+@dataclass(frozen=True)
+class _PlyData:
+    """
+    Contains the data from a PLY file which has been read.
+    Members:
+        header: _PlyHeader of file metadata from the header
+        verts: FloatTensor of shape (V, 3).
+        faces: None or LongTensor of vertex indices, shape (F, 3).
+        verts_colors: None or FloatTensor of shape (V, 3).
+        verts_normals: None or FloatTensor of shape (V, 3).
+    """
+
+    header: _PlyHeader
+    verts: torch.Tensor
+    faces: Optional[torch.Tensor]
+    verts_colors: Optional[torch.Tensor]
+    verts_normals: Optional[torch.Tensor]
+    verts_texture_uvs: Optional[torch.Tensor]
+
+
+def _load_ply(f, *, path_manager: PathManager) -> _PlyData:
+    """
+    Load the data from a .ply file.
+
+    Args:
+        f:  A binary or text file-like object (with methods read, readline,
+            tell and seek), a pathlib path or a string containing a file name.
+            If the ply file is in the binary ply format rather than the text
+            ply format, then a text stream is not supported.
+            It is easiest to use a binary stream in all cases.
+        path_manager: PathManager for loading if f is a str.
+
+    Returns:
+        _PlyData object
+    """
+    header, elements = _load_ply_raw(f, path_manager=path_manager)
+
+    verts_data = _get_verts(header, elements)
+
+    face = elements.get("face", None)
+    if face is not None:
+        face_head = next(head for head in header.elements if head.name == "face")
+        if (
+            len(face_head.properties) != 1
+            or face_head.properties[0].list_size_type is None
+        ):
+            raise ValueError("Unexpected form of faces data.")
+        # face_head.properties[0].name is usually "vertex_index" or "vertex_indices"
+        # but we don't need to enforce this.
+
+    if face is None:
+        faces = None
+    elif not len(face):
+        # pyre is happier when this condition is not joined to the
+        # previous one with `or`.
+        faces = None
+    elif isinstance(face, np.ndarray) and face.ndim == 2:  # Homogeneous elements
+        if face.shape[1] < 3:
+            raise ValueError("Faces must have at least 3 vertices.")
+        face_arrays = [face[:, [0, i + 1, i + 2]] for i in range(face.shape[1] - 2)]
+        faces = torch.LongTensor(np.vstack(face_arrays).astype(np.int64))
+    else:
+        face_list = []
+        for (face_item,) in face:
+            if face_item.ndim != 1:
+                raise ValueError("Bad face data.")
+            if face_item.shape[0] < 3:
+                raise ValueError("Faces must have at least 3 vertices.")
+            for i in range(face_item.shape[0] - 2):
+                face_list.append([face_item[0], face_item[i + 1], face_item[i + 2]])
+        faces = torch.tensor(face_list, dtype=torch.int64)
+
+    if faces is not None:
+        _check_faces_indices(faces, max_index=verts_data.verts.shape[0])
+
+    return _PlyData(**asdict(verts_data), faces=faces, header=header)
+
+
+def load_ply(
+    f, *, path_manager: Optional[PathManager] = None
+) -> Tuple[torch.Tensor, torch.Tensor]:
+    """
+    Load the verts and faces from a .ply file.
+    Note that the preferred way to load data from such a file
+    is to use the IO.load_mesh and IO.load_pointcloud functions,
+    which can read more of the data.
+
+    Example .ply file format::
+
+        ply
+        format ascii 1.0           { ascii/binary, format version number }
+        comment made by Greg Turk  { comments keyword specified, like all lines }
+        comment this file is a cube
+        element vertex 8           { define "vertex" element, 8 of them in file }
+        property float x           { vertex contains float "x" coordinate }
+        property float y           { y coordinate is also a vertex property }
+        property float z           { z coordinate, too }
+        element face 6             { there are 6 "face" elements in the file }
+        property list uchar int vertex_index { "vertex_indices" is a list of ints }
+        end_header                 { delimits the end of the header }
+        0 0 0                      { start of vertex list }
+        0 0 1
+        0 1 1
+        0 1 0
+        1 0 0
+        1 0 1
+        1 1 1
+        1 1 0
+        4 0 1 2 3                  { start of face list }
+        4 7 6 5 4
+        4 0 4 5 1
+        4 1 5 6 2
+        4 2 6 7 3
+        4 3 7 4 0
+
+    Args:
+        f:  A binary or text file-like object (with methods read, readline,
+            tell and seek), a pathlib path or a string containing a file name.
+            If the ply file is in the binary ply format rather than the text
+            ply format, then a text stream is not supported.
+            It is easiest to use a binary stream in all cases.
+        path_manager: PathManager for loading if f is a str.
+
+    Returns:
+        verts: FloatTensor of shape (V, 3).
+        faces: LongTensor of vertex indices, shape (F, 3).
+    """
+
+    if path_manager is None:
+        path_manager = PathManager()
+    data = _load_ply(f, path_manager=path_manager)
+    faces = data.faces
+    if faces is None:
+        faces = torch.zeros(0, 3, dtype=torch.int64)
+
+    return data.verts, faces
+
+
+def _write_ply_header(
+    f,
+    *,
+    verts: torch.Tensor,
+    faces: Optional[torch.LongTensor],
+    verts_normals: Optional[torch.Tensor],
+    verts_colors: Optional[torch.Tensor],
+    ascii: bool,
+    colors_as_uint8: bool,
+) -> None:
+    """
+    Internal implementation for writing header when saving to a .ply file.
+
+    Args:
+        f: File object to which the 3D data should be written.
+        verts: FloatTensor of shape (V, 3) giving vertex coordinates.
+        faces: LongTensor of shape (F, 3) giving faces.
+        verts_normals: FloatTensor of shape (V, 3) giving vertex normals.
+        verts_colors: FloatTensor of shape (V, 3) giving vertex colors.
+        ascii: (bool) whether to use the ascii ply format.
+        colors_as_uint8: Whether to save colors as numbers in the range
+                    [0, 255] instead of float32.
+    """
+    assert not len(verts) or (verts.dim() == 2 and verts.size(1) == 3)
+    assert faces is None or not len(faces) or (faces.dim() == 2 and faces.size(1) == 3)
+    assert verts_normals is None or (
+        verts_normals.dim() == 2 and verts_normals.size(1) == 3
+    )
+    assert verts_colors is None or (
+        verts_colors.dim() == 2 and verts_colors.size(1) == 3
+    )
+
+    if ascii:
+        f.write(b"ply\nformat ascii 1.0\n")
+    elif sys.byteorder == "big":
+        f.write(b"ply\nformat binary_big_endian 1.0\n")
+    else:
+        f.write(b"ply\nformat binary_little_endian 1.0\n")
+    f.write(f"element vertex {verts.shape[0]}\n".encode("ascii"))
+    f.write(b"property float x\n")
+    f.write(b"property float y\n")
+    f.write(b"property float z\n")
+    if verts_normals is not None:
+        f.write(b"property float nx\n")
+        f.write(b"property float ny\n")
+        f.write(b"property float nz\n")
+    if verts_colors is not None:
+        color_ply_type = b"uchar" if colors_as_uint8 else b"float"
+        for color in (b"red", b"green", b"blue"):
+            f.write(b"property " + color_ply_type + b" " + color + b"\n")
+    if len(verts) and faces is not None:
+        f.write(f"element face {faces.shape[0]}\n".encode("ascii"))
+        f.write(b"property list uchar int vertex_index\n")
+    f.write(b"end_header\n")
+
+
+def _save_ply(
+    f,
+    *,
+    verts: torch.Tensor,
+    faces: Optional[torch.LongTensor],
+    verts_normals: Optional[torch.Tensor],
+    verts_colors: Optional[torch.Tensor],
+    ascii: bool,
+    decimal_places: Optional[int] = None,
+    colors_as_uint8: bool,
+) -> None:
+    """
+    Internal implementation for saving 3D data to a .ply file.
+
+    Args:
+        f: File object to which the 3D data should be written.
+        verts: FloatTensor of shape (V, 3) giving vertex coordinates.
+        faces: LongTensor of shape (F, 3) giving faces.
+        verts_normals: FloatTensor of shape (V, 3) giving vertex normals.
+        verts_colors: FloatTensor of shape (V, 3) giving vertex colors.
+        ascii: (bool) whether to use the ascii ply format.
+        decimal_places: Number of decimal places for saving if ascii=True.
+        colors_as_uint8: Whether to save colors as numbers in the range
+                    [0, 255] instead of float32.
+    """
+    _write_ply_header(
+        f,
+        verts=verts,
+        faces=faces,
+        verts_normals=verts_normals,
+        verts_colors=verts_colors,
+        ascii=ascii,
+        colors_as_uint8=colors_as_uint8,
+    )
+
+    if not (len(verts)):
+        warnings.warn("Empty 'verts' provided")
+        return
+
+    color_np_type = np.ubyte if colors_as_uint8 else np.float32
+    verts_dtype = [("verts", np.float32, 3)]
+    if verts_normals is not None:
+        verts_dtype.append(("normals", np.float32, 3))
+    if verts_colors is not None:
+        verts_dtype.append(("colors", color_np_type, 3))
+
+    vert_data = np.zeros(verts.shape[0], dtype=verts_dtype)
+    vert_data["verts"] = verts.detach().cpu().numpy()
+    if verts_normals is not None:
+        vert_data["normals"] = verts_normals.detach().cpu().numpy()
+    if verts_colors is not None:
+        color_data = verts_colors.detach().cpu().numpy()
+        if colors_as_uint8:
+            vert_data["colors"] = np.rint(color_data * 255)
+        else:
+            vert_data["colors"] = color_data
+
+    if ascii:
+        if decimal_places is None:
+            float_str = b"%f"
+        else:
+            float_str = b"%" + b".%df" % decimal_places
+        float_group_str = (float_str + b" ") * 3
+        formats = [float_group_str]
+        if verts_normals is not None:
+            formats.append(float_group_str)
+        if verts_colors is not None:
+            formats.append(b"%d %d %d " if colors_as_uint8 else float_group_str)
+        formats[-1] = formats[-1][:-1] + b"\n"
+        for line_data in vert_data:
+            for data, format in zip(line_data, formats):
+                f.write(format % tuple(data))
+    else:
+        if isinstance(f, BytesIO):
+            # tofile only works with real files, but is faster than this.
+            f.write(vert_data.tobytes())
+        else:
+            vert_data.tofile(f)
+
+    if faces is not None:
+        faces_array = faces.detach().cpu().numpy()
+
+        _check_faces_indices(faces, max_index=verts.shape[0])
+
+        if len(faces_array):
+            if ascii:
+                np.savetxt(f, faces_array, "3 %d %d %d")
+            else:
+                faces_recs = np.zeros(
+                    len(faces_array),
+                    dtype=[("count", np.uint8), ("vertex_indices", np.uint32, 3)],
+                )
+                faces_recs["count"] = 3
+                faces_recs["vertex_indices"] = faces_array
+                faces_uints = faces_recs.view(np.uint8)
+
+                if isinstance(f, BytesIO):
+                    f.write(faces_uints.tobytes())
+                else:
+                    faces_uints.tofile(f)
+
+
+def save_ply(
+    f,
+    verts: torch.Tensor,
+    faces: Optional[torch.LongTensor] = None,
+    verts_normals: Optional[torch.Tensor] = None,
+    ascii: bool = False,
+    decimal_places: Optional[int] = None,
+    path_manager: Optional[PathManager] = None,
+) -> None:
+    """
+    Save a mesh to a .ply file.
+
+    Args:
+        f: File (or path) to which the mesh should be written.
+        verts: FloatTensor of shape (V, 3) giving vertex coordinates.
+        faces: LongTensor of shape (F, 3) giving faces.
+        verts_normals: FloatTensor of shape (V, 3) giving vertex normals.
+        ascii: (bool) whether to use the ascii ply format.
+        decimal_places: Number of decimal places for saving if ascii=True.
+        path_manager: PathManager for interpreting f if it is a str.
+    """
+
+    if len(verts) and not (verts.dim() == 2 and verts.size(1) == 3):
+        message = "Argument 'verts' should either be empty or of shape (num_verts, 3)."
+        raise ValueError(message)
+
+    if (
+        faces is not None
+        and len(faces)
+        and not (faces.dim() == 2 and faces.size(1) == 3)
+    ):
+        message = "Argument 'faces' should either be empty or of shape (num_faces, 3)."
+        raise ValueError(message)
+
+    if (
+        verts_normals is not None
+        and len(verts_normals)
+        and not (
+            verts_normals.dim() == 2
+            and verts_normals.size(1) == 3
+            and verts_normals.size(0) == verts.size(0)
+        )
+    ):
+        message = "Argument 'verts_normals' should either be empty or of shape (num_verts, 3)."
+        raise ValueError(message)
+
+    if path_manager is None:
+        path_manager = PathManager()
+    with _open_file(f, path_manager, "wb") as f:
+        _save_ply(
+            f,
+            verts=verts,
+            faces=faces,
+            verts_normals=verts_normals,
+            verts_colors=None,
+            ascii=ascii,
+            decimal_places=decimal_places,
+            colors_as_uint8=False,
+        )
+
+
+class MeshPlyFormat(MeshFormatInterpreter):
+    def __init__(self) -> None:
+        self.known_suffixes = (".ply",)
+
+    def read(
+        self,
+        path: PathOrStr,
+        include_textures: bool,
+        device,
+        path_manager: PathManager,
+        **kwargs,
+    ) -> Optional[Meshes]:
+        if not endswith(path, self.known_suffixes):
+            return None
+
+        data = _load_ply(f=path, path_manager=path_manager)
+        faces = data.faces
+        if faces is None:
+            faces = torch.zeros(0, 3, dtype=torch.int64)
+
+        texture = None
+        if include_textures:
+            if data.verts_colors is not None:
+                texture = TexturesVertex([data.verts_colors.to(device)])
+            elif data.verts_texture_uvs is not None:
+                texture_file_path = None
+                for comment in data.header.comments:
+                    if "TextureFile" in comment:
+                        given_texture_file = comment.split(" ")[-1]
+                        texture_file_path = os.path.join(
+                            os.path.dirname(str(path)), given_texture_file
+                        )
+                if texture_file_path is not None:
+                    texture_map = _read_image(
+                        texture_file_path, path_manager, format="RGB"
+                    )
+                    texture_map = torch.tensor(texture_map, dtype=torch.float32) / 255.0
+                    texture = TexturesUV(
+                        [texture_map.to(device)],
+                        [faces.to(device)],
+                        [data.verts_texture_uvs.to(device)],
+                    )
+
+        verts_normals = None
+        if data.verts_normals is not None:
+            verts_normals = [data.verts_normals.to(device)]
+        mesh = Meshes(
+            verts=[data.verts.to(device)],
+            faces=[faces.to(device)],
+            textures=texture,
+            verts_normals=verts_normals,
+        )
+        return mesh
+
+    def save(
+        self,
+        data: Meshes,
+        path: PathOrStr,
+        path_manager: PathManager,
+        binary: Optional[bool],
+        decimal_places: Optional[int] = None,
+        colors_as_uint8: bool = False,
+        **kwargs,
+    ) -> bool:
+        """
+        Extra optional args:
+            colors_as_uint8: (bool) Whether to save colors as numbers in the
+                        range [0, 255] instead of float32.
+        """
+        if not endswith(path, self.known_suffixes):
+            return False
+
+        verts = data.verts_list()[0]
+        faces = data.faces_list()[0]
+
+        if data.has_verts_normals():
+            verts_normals = data.verts_normals_list()[0]
+        else:
+            verts_normals = None
+
+        if isinstance(data.textures, TexturesVertex):
+            mesh_verts_colors = data.textures.verts_features_list()[0]
+            n_colors = mesh_verts_colors.shape[1]
+            if n_colors == 3:
+                verts_colors = mesh_verts_colors
+            else:
+                warnings.warn(
+                    f"Texture will not be saved as it has {n_colors} colors, not 3."
+                )
+                verts_colors = None
+        else:
+            verts_colors = None
+
+        with _open_file(path, path_manager, "wb") as f:
+            _save_ply(
+                f=f,
+                verts=verts,
+                faces=faces,
+                verts_colors=verts_colors,
+                verts_normals=verts_normals,
+                ascii=binary is False,
+                decimal_places=decimal_places,
+                colors_as_uint8=colors_as_uint8,
+            )
+        return True
+
+
+class PointcloudPlyFormat(PointcloudFormatInterpreter):
+    def __init__(self) -> None:
+        self.known_suffixes = (".ply",)
+
+    def read(
+        self,
+        path: PathOrStr,
+        device,
+        path_manager: PathManager,
+        **kwargs,
+    ) -> Optional[Pointclouds]:
+        if not endswith(path, self.known_suffixes):
+            return None
+
+        data = _load_ply(f=path, path_manager=path_manager)
+        features = None
+        if data.verts_colors is not None:
+            features = [data.verts_colors.to(device)]
+        normals = None
+        if data.verts_normals is not None:
+            normals = [data.verts_normals.to(device)]
+
+        pointcloud = Pointclouds(
+            points=[data.verts.to(device)], features=features, normals=normals
+        )
+        return pointcloud
+
+    def save(
+        self,
+        data: Pointclouds,
+        path: PathOrStr,
+        path_manager: PathManager,
+        binary: Optional[bool],
+        decimal_places: Optional[int] = None,
+        colors_as_uint8: bool = False,
+        **kwargs,
+    ) -> bool:
+        """
+        Extra optional args:
+            colors_as_uint8: (bool) Whether to save colors as numbers in the
+                        range [0, 255] instead of float32.
+        """
+        if not endswith(path, self.known_suffixes):
+            return False
+
+        points = data.points_list()[0]
+        features = data.features_packed()
+        normals = data.normals_packed()
+
+        with _open_file(path, path_manager, "wb") as f:
+            _save_ply(
+                f=f,
+                verts=points,
+                verts_colors=features,
+                verts_normals=normals,
+                faces=None,
+                ascii=binary is False,
+                decimal_places=decimal_places,
+                colors_as_uint8=colors_as_uint8,
+            )
+        return True
diff --git a/pytorch3d/pytorch3d/io/utils.py b/pytorch3d/pytorch3d/io/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..ee437b3a082cf503c9050c3731450941431ee993
--- /dev/null
+++ b/pytorch3d/pytorch3d/io/utils.py
@@ -0,0 +1,83 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import contextlib
+import pathlib
+import warnings
+from typing import cast, ContextManager, IO, Optional, Union
+
+import numpy as np
+import torch
+from iopath.common.file_io import PathManager
+from PIL import Image
+
+from ..common.datatypes import Device
+
+
+PathOrStr = Union[pathlib.Path, str]
+
+
+def _open_file(f, path_manager: PathManager, mode: str = "r") -> ContextManager[IO]:
+    if isinstance(f, str):
+        # pyre-fixme[6]: For 2nd argument expected `Union[typing_extensions.Literal['...
+        f = path_manager.open(f, mode)
+        return contextlib.closing(f)
+    elif isinstance(f, pathlib.Path):
+        f = f.open(mode)
+        return contextlib.closing(f)
+    else:
+        return contextlib.nullcontext(cast(IO, f))
+
+
+def _make_tensor(
+    data, cols: int, dtype: torch.dtype, device: Device = "cpu"
+) -> torch.Tensor:
+    """
+    Return a 2D tensor with the specified cols and dtype filled with data,
+    even when data is empty.
+    """
+    if not len(data):
+        return torch.zeros((0, cols), dtype=dtype, device=device)
+
+    return torch.tensor(data, dtype=dtype, device=device)
+
+
+def _check_faces_indices(
+    faces_indices: torch.Tensor, max_index: int, pad_value: Optional[int] = None
+) -> torch.Tensor:
+    if pad_value is None:
+        mask = torch.ones(faces_indices.shape[:-1]).bool()  # Keep all faces
+    else:
+        mask = faces_indices.ne(pad_value).any(dim=-1)
+    if torch.any(faces_indices[mask] >= max_index) or torch.any(
+        faces_indices[mask] < 0
+    ):
+        warnings.warn("Faces have invalid indices")
+    return faces_indices
+
+
+def _read_image(file_name: str, path_manager: PathManager, format=None):
+    """
+    Read an image from a file using Pillow.
+    Args:
+        file_name: image file path.
+        path_manager: PathManager for interpreting file_name.
+        format: one of ["RGB", "BGR"]
+    Returns:
+        image: an image of shape (H, W, C).
+    """
+    if format not in ["RGB", "BGR"]:
+        raise ValueError("format can only be one of [RGB, BGR]; got %s", format)
+    with path_manager.open(file_name, "rb") as f:
+        image = Image.open(f)
+        if format is not None:
+            # PIL only supports RGB. First convert to RGB and flip channels
+            # below for BGR.
+            image = image.convert("RGB")
+        image = np.asarray(image).astype(np.float32)
+        if format == "BGR":
+            image = image[:, :, ::-1]
+        return image
diff --git a/pytorch3d/pytorch3d/loss/__init__.py b/pytorch3d/pytorch3d/loss/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..2b8d10de9c34a3f6a5eb27f060a2eb8db5755344
--- /dev/null
+++ b/pytorch3d/pytorch3d/loss/__init__.py
@@ -0,0 +1,15 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+from .chamfer import chamfer_distance
+from .mesh_edge_loss import mesh_edge_loss
+from .mesh_laplacian_smoothing import mesh_laplacian_smoothing
+from .mesh_normal_consistency import mesh_normal_consistency
+from .point_mesh_distance import point_mesh_edge_distance, point_mesh_face_distance
+
+
+__all__ = [k for k in globals().keys() if not k.startswith("_")]
diff --git a/pytorch3d/pytorch3d/loss/chamfer.py b/pytorch3d/pytorch3d/loss/chamfer.py
new file mode 100644
index 0000000000000000000000000000000000000000..3ef1d6f42fe8451517024ead5faa50dd4bd35575
--- /dev/null
+++ b/pytorch3d/pytorch3d/loss/chamfer.py
@@ -0,0 +1,268 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Union
+
+import torch
+import torch.nn.functional as F
+from pytorch3d.ops.knn import knn_gather, knn_points
+from pytorch3d.structures.pointclouds import Pointclouds
+
+
+def _validate_chamfer_reduction_inputs(
+    batch_reduction: Union[str, None], point_reduction: Union[str, None]
+) -> None:
+    """Check the requested reductions are valid.
+
+    Args:
+        batch_reduction: Reduction operation to apply for the loss across the
+            batch, can be one of ["mean", "sum"] or None.
+        point_reduction: Reduction operation to apply for the loss across the
+            points, can be one of ["mean", "sum"] or None.
+    """
+    if batch_reduction is not None and batch_reduction not in ["mean", "sum"]:
+        raise ValueError('batch_reduction must be one of ["mean", "sum"] or None')
+    if point_reduction is not None and point_reduction not in ["mean", "sum"]:
+        raise ValueError('point_reduction must be one of ["mean", "sum"] or None')
+    if point_reduction is None and batch_reduction is not None:
+        raise ValueError("Batch reduction must be None if point_reduction is None")
+
+
+def _handle_pointcloud_input(
+    points: Union[torch.Tensor, Pointclouds],
+    lengths: Union[torch.Tensor, None],
+    normals: Union[torch.Tensor, None],
+):
+    """
+    If points is an instance of Pointclouds, retrieve the padded points tensor
+    along with the number of points per batch and the padded normals.
+    Otherwise, return the input points (and normals) with the number of points per cloud
+    set to the size of the second dimension of `points`.
+    """
+    if isinstance(points, Pointclouds):
+        X = points.points_padded()
+        lengths = points.num_points_per_cloud()
+        normals = points.normals_padded()  # either a tensor or None
+    elif torch.is_tensor(points):
+        if points.ndim != 3:
+            raise ValueError("Expected points to be of shape (N, P, D)")
+        X = points
+        if lengths is not None:
+            if lengths.ndim != 1 or lengths.shape[0] != X.shape[0]:
+                raise ValueError("Expected lengths to be of shape (N,)")
+            if lengths.max() > X.shape[1]:
+                raise ValueError("A length value was too long")
+        if lengths is None:
+            lengths = torch.full(
+                (X.shape[0],), X.shape[1], dtype=torch.int64, device=points.device
+            )
+        if normals is not None and normals.ndim != 3:
+            raise ValueError("Expected normals to be of shape (N, P, 3")
+    else:
+        raise ValueError(
+            "The input pointclouds should be either "
+            + "Pointclouds objects or torch.Tensor of shape "
+            + "(minibatch, num_points, 3)."
+        )
+    return X, lengths, normals
+
+
+def _chamfer_distance_single_direction(
+    x,
+    y,
+    x_lengths,
+    y_lengths,
+    x_normals,
+    y_normals,
+    weights,
+    batch_reduction: Union[str, None],
+    point_reduction: Union[str, None],
+    norm: int,
+    abs_cosine: bool,
+):
+    return_normals = x_normals is not None and y_normals is not None
+
+    N, P1, D = x.shape
+
+    # Check if inputs are heterogeneous and create a lengths mask.
+    is_x_heterogeneous = (x_lengths != P1).any()
+    x_mask = (
+        torch.arange(P1, device=x.device)[None] >= x_lengths[:, None]
+    )  # shape [N, P1]
+    if y.shape[0] != N or y.shape[2] != D:
+        raise ValueError("y does not have the correct shape.")
+    if weights is not None:
+        if weights.size(0) != N:
+            raise ValueError("weights must be of shape (N,).")
+        if not (weights >= 0).all():
+            raise ValueError("weights cannot be negative.")
+        if weights.sum() == 0.0:
+            weights = weights.view(N, 1)
+            if batch_reduction in ["mean", "sum"]:
+                return (
+                    (x.sum((1, 2)) * weights).sum() * 0.0,
+                    (x.sum((1, 2)) * weights).sum() * 0.0,
+                )
+            return ((x.sum((1, 2)) * weights) * 0.0, (x.sum((1, 2)) * weights) * 0.0)
+
+    cham_norm_x = x.new_zeros(())
+
+    x_nn = knn_points(x, y, lengths1=x_lengths, lengths2=y_lengths, norm=norm, K=1)
+    cham_x = x_nn.dists[..., 0]  # (N, P1)
+
+    if is_x_heterogeneous:
+        cham_x[x_mask] = 0.0
+
+    if weights is not None:
+        cham_x *= weights.view(N, 1)
+
+    if return_normals:
+        # Gather the normals using the indices and keep only value for k=0
+        x_normals_near = knn_gather(y_normals, x_nn.idx, y_lengths)[..., 0, :]
+
+        cosine_sim = F.cosine_similarity(x_normals, x_normals_near, dim=2, eps=1e-6)
+        # If abs_cosine, ignore orientation and take the absolute value of the cosine sim.
+        cham_norm_x = 1 - (torch.abs(cosine_sim) if abs_cosine else cosine_sim)
+
+        if is_x_heterogeneous:
+            cham_norm_x[x_mask] = 0.0
+
+        if weights is not None:
+            cham_norm_x *= weights.view(N, 1)
+
+    if point_reduction is not None:
+        # Apply point reduction
+        cham_x = cham_x.sum(1)  # (N,)
+        if return_normals:
+            cham_norm_x = cham_norm_x.sum(1)  # (N,)
+        if point_reduction == "mean":
+            x_lengths_clamped = x_lengths.clamp(min=1)
+            cham_x /= x_lengths_clamped
+            if return_normals:
+                cham_norm_x /= x_lengths_clamped
+
+        if batch_reduction is not None:
+            # batch_reduction == "sum"
+            cham_x = cham_x.sum()
+            if return_normals:
+                cham_norm_x = cham_norm_x.sum()
+            if batch_reduction == "mean":
+                div = weights.sum() if weights is not None else max(N, 1)
+                cham_x /= div
+                if return_normals:
+                    cham_norm_x /= div
+
+    cham_dist = cham_x
+    cham_normals = cham_norm_x if return_normals else None
+    return cham_dist, cham_normals
+
+
+def chamfer_distance(
+    x,
+    y,
+    x_lengths=None,
+    y_lengths=None,
+    x_normals=None,
+    y_normals=None,
+    weights=None,
+    batch_reduction: Union[str, None] = "mean",
+    point_reduction: Union[str, None] = "mean",
+    norm: int = 2,
+    single_directional: bool = False,
+    abs_cosine: bool = True,
+):
+    """
+    Chamfer distance between two pointclouds x and y.
+
+    Args:
+        x: FloatTensor of shape (N, P1, D) or a Pointclouds object representing
+            a batch of point clouds with at most P1 points in each batch element,
+            batch size N and feature dimension D.
+        y: FloatTensor of shape (N, P2, D) or a Pointclouds object representing
+            a batch of point clouds with at most P2 points in each batch element,
+            batch size N and feature dimension D.
+        x_lengths: Optional LongTensor of shape (N,) giving the number of points in each
+            cloud in x.
+        y_lengths: Optional LongTensor of shape (N,) giving the number of points in each
+            cloud in y.
+        x_normals: Optional FloatTensor of shape (N, P1, D).
+        y_normals: Optional FloatTensor of shape (N, P2, D).
+        weights: Optional FloatTensor of shape (N,) giving weights for
+            batch elements for reduction operation.
+        batch_reduction: Reduction operation to apply for the loss across the
+            batch, can be one of ["mean", "sum"] or None.
+        point_reduction: Reduction operation to apply for the loss across the
+            points, can be one of ["mean", "sum"] or None.
+        norm: int indicates the norm used for the distance. Supports 1 for L1 and 2 for L2.
+        single_directional: If False (default), loss comes from both the distance between
+            each point in x and its nearest neighbor in y and each point in y and its nearest
+            neighbor in x. If True, loss is the distance between each point in x and its
+            nearest neighbor in y.
+        abs_cosine: If False, loss_normals is from one minus the cosine similarity.
+            If True (default), loss_normals is from one minus the absolute value of the
+            cosine similarity, which means that exactly opposite normals are considered
+            equivalent to exactly matching normals, i.e. sign does not matter.
+
+    Returns:
+        2-element tuple containing
+
+        - **loss**: Tensor giving the reduced distance between the pointclouds
+          in x and the pointclouds in y. If point_reduction is None, a 2-element
+          tuple of Tensors containing forward and backward loss terms shaped (N, P1)
+          and (N, P2) (if single_directional is False) or a Tensor containing loss
+          terms shaped (N, P1) (if single_directional is True) is returned.
+        - **loss_normals**: Tensor giving the reduced cosine distance of normals
+          between pointclouds in x and pointclouds in y. Returns None if
+          x_normals and y_normals are None. If point_reduction is None, a 2-element
+          tuple of Tensors containing forward and backward loss terms shaped (N, P1)
+          and (N, P2) (if single_directional is False) or a Tensor containing loss
+          terms shaped (N, P1) (if single_directional is True) is returned.
+    """
+    _validate_chamfer_reduction_inputs(batch_reduction, point_reduction)
+
+    if not ((norm == 1) or (norm == 2)):
+        raise ValueError("Support for 1 or 2 norm.")
+    x, x_lengths, x_normals = _handle_pointcloud_input(x, x_lengths, x_normals)
+    y, y_lengths, y_normals = _handle_pointcloud_input(y, y_lengths, y_normals)
+
+    cham_x, cham_norm_x = _chamfer_distance_single_direction(
+        x,
+        y,
+        x_lengths,
+        y_lengths,
+        x_normals,
+        y_normals,
+        weights,
+        batch_reduction,
+        point_reduction,
+        norm,
+        abs_cosine,
+    )
+    if single_directional:
+        return cham_x, cham_norm_x
+    else:
+        cham_y, cham_norm_y = _chamfer_distance_single_direction(
+            y,
+            x,
+            y_lengths,
+            x_lengths,
+            y_normals,
+            x_normals,
+            weights,
+            batch_reduction,
+            point_reduction,
+            norm,
+            abs_cosine,
+        )
+        if point_reduction is not None:
+            return (
+                cham_x + cham_y,
+                (cham_norm_x + cham_norm_y) if cham_norm_x is not None else None,
+            )
+        return (
+            (cham_x, cham_y),
+            (cham_norm_x, cham_norm_y) if cham_norm_x is not None else None,
+        )
diff --git a/pytorch3d/pytorch3d/loss/mesh_edge_loss.py b/pytorch3d/pytorch3d/loss/mesh_edge_loss.py
new file mode 100644
index 0000000000000000000000000000000000000000..e54ddf9fdf57cb13b756bc213c6d0f60852fd642
--- /dev/null
+++ b/pytorch3d/pytorch3d/loss/mesh_edge_loss.py
@@ -0,0 +1,50 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+
+
+def mesh_edge_loss(meshes, target_length: float = 0.0):
+    """
+    Computes mesh edge length regularization loss averaged across all meshes
+    in a batch. Each mesh contributes equally to the final loss, regardless of
+    the number of edges per mesh in the batch by weighting each mesh with the
+    inverse number of edges. For example, if mesh 3 (out of N) has only E=4
+    edges, then the loss for each edge in mesh 3 should be multiplied by 1/E to
+    contribute to the final loss.
+
+    Args:
+        meshes: Meshes object with a batch of meshes.
+        target_length: Resting value for the edge length.
+
+    Returns:
+        loss: Average loss across the batch. Returns 0 if meshes contains
+        no meshes or all empty meshes.
+    """
+    if meshes.isempty():
+        return torch.tensor(
+            [0.0], dtype=torch.float32, device=meshes.device, requires_grad=True
+        )
+
+    N = len(meshes)
+    edges_packed = meshes.edges_packed()  # (sum(E_n), 3)
+    verts_packed = meshes.verts_packed()  # (sum(V_n), 3)
+    edge_to_mesh_idx = meshes.edges_packed_to_mesh_idx()  # (sum(E_n), )
+    num_edges_per_mesh = meshes.num_edges_per_mesh()  # N
+
+    # Determine the weight for each edge based on the number of edges in the
+    # mesh it corresponds to.
+    # TODO (nikhilar) Find a faster way of computing the weights for each edge
+    # as this is currently a bottleneck for meshes with a large number of faces.
+    weights = num_edges_per_mesh.gather(0, edge_to_mesh_idx)
+    weights = 1.0 / weights.float()
+
+    verts_edges = verts_packed[edges_packed]
+    v0, v1 = verts_edges.unbind(1)
+    loss = ((v0 - v1).norm(dim=1, p=2) - target_length) ** 2.0
+    loss = loss * weights
+
+    return loss.sum() / N
diff --git a/pytorch3d/pytorch3d/loss/mesh_laplacian_smoothing.py b/pytorch3d/pytorch3d/loss/mesh_laplacian_smoothing.py
new file mode 100644
index 0000000000000000000000000000000000000000..3ce9298a15169e6c070d2b60edd4d1e442a9d987
--- /dev/null
+++ b/pytorch3d/pytorch3d/loss/mesh_laplacian_smoothing.py
@@ -0,0 +1,135 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import torch
+from pytorch3d.ops import cot_laplacian
+
+
+def mesh_laplacian_smoothing(meshes, method: str = "uniform"):
+    r"""
+    Computes the laplacian smoothing objective for a batch of meshes.
+    This function supports three variants of Laplacian smoothing,
+    namely with uniform weights("uniform"), with cotangent weights ("cot"),
+    and cotangent curvature ("cotcurv").For more details read [1, 2].
+
+    Args:
+        meshes: Meshes object with a batch of meshes.
+        method: str specifying the method for the laplacian.
+    Returns:
+        loss: Average laplacian smoothing loss across the batch.
+        Returns 0 if meshes contains no meshes or all empty meshes.
+
+    Consider a mesh M = (V, F), with verts of shape Nx3 and faces of shape Mx3.
+    The Laplacian matrix L is a NxN tensor such that LV gives a tensor of vectors:
+    for a uniform Laplacian, LuV[i] points to the centroid of its neighboring
+    vertices, a cotangent Laplacian LcV[i] is known to be an approximation of
+    the surface normal, while the curvature variant LckV[i] scales the normals
+    by the discrete mean curvature. For vertex i, assume S[i] is the set of
+    neighboring vertices to i, a_ij and b_ij are the "outside" angles in the
+    two triangles connecting vertex v_i and its neighboring vertex v_j
+    for j in S[i], as seen in the diagram below.
+
+    .. code-block:: python
+
+               a_ij
+                /\
+               /  \
+              /    \
+             /      \
+        v_i /________\ v_j
+            \        /
+             \      /
+              \    /
+               \  /
+                \/
+               b_ij
+
+        The definition of the Laplacian is LV[i] = sum_j w_ij (v_j - v_i)
+        For the uniform variant,    w_ij = 1 / |S[i]|
+        For the cotangent variant,
+            w_ij = (cot a_ij + cot b_ij) / (sum_k cot a_ik + cot b_ik)
+        For the cotangent curvature, w_ij = (cot a_ij + cot b_ij) / (4 A[i])
+        where A[i] is the sum of the areas of all triangles containing vertex v_i.
+
+    There is a nice trigonometry identity to compute cotangents. Consider a triangle
+    with side lengths A, B, C and angles a, b, c.
+
+    .. code-block:: python
+
+               c
+              /|\
+             / | \
+            /  |  \
+         B /  H|   \ A
+          /    |    \
+         /     |     \
+        /a_____|_____b\
+               C
+
+        Then cot a = (B^2 + C^2 - A^2) / 4 * area
+        We know that area = CH/2, and by the law of cosines we have
+
+        A^2 = B^2 + C^2 - 2BC cos a => B^2 + C^2 - A^2 = 2BC cos a
+
+        Putting these together, we get:
+
+        B^2 + C^2 - A^2     2BC cos a
+        _______________  =  _________ = (B/H) cos a = cos a / sin a = cot a
+           4 * area            2CH
+
+
+    [1] Desbrun et al, "Implicit fairing of irregular meshes using diffusion
+    and curvature flow", SIGGRAPH 1999.
+
+    [2] Nealan et al, "Laplacian Mesh Optimization", Graphite 2006.
+    """
+
+    if meshes.isempty():
+        return torch.tensor(
+            [0.0], dtype=torch.float32, device=meshes.device, requires_grad=True
+        )
+
+    N = len(meshes)
+    verts_packed = meshes.verts_packed()  # (sum(V_n), 3)
+    faces_packed = meshes.faces_packed()  # (sum(F_n), 3)
+    num_verts_per_mesh = meshes.num_verts_per_mesh()  # (N,)
+    verts_packed_idx = meshes.verts_packed_to_mesh_idx()  # (sum(V_n),)
+    weights = num_verts_per_mesh.gather(0, verts_packed_idx)  # (sum(V_n),)
+    weights = 1.0 / weights.float()
+
+    # We don't want to backprop through the computation of the Laplacian;
+    # just treat it as a magic constant matrix that is used to transform
+    # verts into normals
+    with torch.no_grad():
+        if method == "uniform":
+            L = meshes.laplacian_packed()
+        elif method in ["cot", "cotcurv"]:
+            L, inv_areas = cot_laplacian(verts_packed, faces_packed)
+            if method == "cot":
+                norm_w = torch.sparse.sum(L, dim=1).to_dense().view(-1, 1)
+                idx = norm_w > 0
+                # pyre-fixme[58]: `/` is not supported for operand types `float` and
+                #  `Tensor`.
+                norm_w[idx] = 1.0 / norm_w[idx]
+            else:
+                L_sum = torch.sparse.sum(L, dim=1).to_dense().view(-1, 1)
+                norm_w = 0.25 * inv_areas
+        else:
+            raise ValueError("Method should be one of {uniform, cot, cotcurv}")
+
+    if method == "uniform":
+        loss = L.mm(verts_packed)
+    elif method == "cot":
+        # pyre-fixme[61]: `norm_w` is undefined, or not always defined.
+        loss = L.mm(verts_packed) * norm_w - verts_packed
+    elif method == "cotcurv":
+        # pyre-fixme[61]: `norm_w` may not be initialized here.
+        loss = (L.mm(verts_packed) - L_sum * verts_packed) * norm_w
+    loss = loss.norm(dim=1)
+
+    loss = loss * weights
+    return loss.sum() / N
diff --git a/pytorch3d/pytorch3d/loss/mesh_normal_consistency.py b/pytorch3d/pytorch3d/loss/mesh_normal_consistency.py
new file mode 100644
index 0000000000000000000000000000000000000000..a1dbf670707e590bdf12d9589f5378ee5526c6ac
--- /dev/null
+++ b/pytorch3d/pytorch3d/loss/mesh_normal_consistency.py
@@ -0,0 +1,132 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+from pytorch3d import _C
+
+
+def mesh_normal_consistency(meshes):
+    r"""
+    Computes the normal consistency of each mesh in meshes.
+    We compute the normal consistency for each pair of neighboring faces.
+    If e = (v0, v1) is the connecting edge of two neighboring faces f0 and f1,
+    then the normal consistency between f0 and f1
+
+    .. code-block:: python
+
+                    a
+                    /\
+                   /  \
+                  / f0 \
+                 /      \
+            v0  /____e___\ v1
+                \        /
+                 \      /
+                  \ f1 /
+                   \  /
+                    \/
+                    b
+
+    The normal consistency is
+
+    .. code-block:: python
+
+        nc(f0, f1) = 1 - cos(n0, n1)
+
+        where cos(n0, n1) = n0^n1 / ||n0|| / ||n1|| is the cosine of the angle
+        between the normals n0 and n1, and
+
+        n0 = (v1 - v0) x (a - v0)
+        n1 = - (v1 - v0) x (b - v0) = (b - v0) x (v1 - v0)
+
+    This means that if nc(f0, f1) = 0 then n0 and n1 point to the same
+    direction, while if nc(f0, f1) = 2 then n0 and n1 point opposite direction.
+
+    .. note::
+        For well-constructed meshes the assumption that only two faces share an
+        edge is true. This assumption could make the implementation easier and faster.
+        This implementation does not follow this assumption. All the faces sharing e,
+        which can be any in number, are discovered.
+
+    Args:
+        meshes: Meshes object with a batch of meshes.
+
+    Returns:
+        loss: Average normal consistency across the batch.
+        Returns 0 if meshes contains no meshes or all empty meshes.
+    """
+    if meshes.isempty():
+        return torch.tensor(
+            [0.0], dtype=torch.float32, device=meshes.device, requires_grad=True
+        )
+
+    N = len(meshes)
+    verts_packed = meshes.verts_packed()  # (sum(V_n), 3)
+    faces_packed = meshes.faces_packed()  # (sum(F_n), 3)
+    edges_packed = meshes.edges_packed()  # (sum(E_n), 2)
+    verts_packed_to_mesh_idx = meshes.verts_packed_to_mesh_idx()  # (sum(V_n),)
+    face_to_edge = meshes.faces_packed_to_edges_packed()  # (sum(F_n), 3)
+    E = edges_packed.shape[0]  # sum(E_n)
+    F = faces_packed.shape[0]  # sum(F_n)
+
+    # We don't want gradients for the following operation. The goal is to
+    # find for each edge e all the vertices associated with e. In the example
+    # above, the vertices associated with e are (a, b), i.e. the points connected
+    # on faces to e.
+    with torch.no_grad():
+        edge_idx = face_to_edge.reshape(F * 3)  # (3 * F,) indexes into edges
+        vert_idx = (
+            faces_packed.view(1, F, 3).expand(3, F, 3).transpose(0, 1).reshape(3 * F, 3)
+        )
+        edge_idx, edge_sort_idx = edge_idx.sort()
+        vert_idx = vert_idx[edge_sort_idx]
+
+        # In well constructed meshes each edge is shared by precisely 2 faces
+        # However, in many meshes, this assumption is not always satisfied.
+        # We want to find all faces that share an edge, a number which can
+        # vary and which depends on the topology.
+        # In particular, we find the vertices not on the edge on the shared faces.
+        # In the example above, we want to associate edge e with vertices a and b.
+        # This operation is done more efficiently in cpu with lists.
+        # TODO(gkioxari) find a better way to do this.
+
+        # edge_idx represents the index of the edge for each vertex. We can count
+        # the number of vertices which are associated with each edge.
+        # There can be a different number for each edge.
+        edge_num = edge_idx.bincount(minlength=E)
+
+        # This calculates all pairs of vertices which are opposite to the same edge.
+        vert_edge_pair_idx = _C.mesh_normal_consistency_find_verts(edge_num.cpu()).to(
+            edge_num.device
+        )
+
+    if vert_edge_pair_idx.shape[0] == 0:
+        return torch.tensor(
+            [0.0], dtype=torch.float32, device=meshes.device, requires_grad=True
+        )
+
+    v0_idx = edges_packed[edge_idx, 0]
+    v0 = verts_packed[v0_idx]
+    v1_idx = edges_packed[edge_idx, 1]
+    v1 = verts_packed[v1_idx]
+
+    # two of the following cross products are zeros as they are cross product
+    # with either (v1-v0)x(v1-v0) or (v1-v0)x(v0-v0)
+    n_temp0 = (v1 - v0).cross(verts_packed[vert_idx[:, 0]] - v0, dim=1)
+    n_temp1 = (v1 - v0).cross(verts_packed[vert_idx[:, 1]] - v0, dim=1)
+    n_temp2 = (v1 - v0).cross(verts_packed[vert_idx[:, 2]] - v0, dim=1)
+    n = n_temp0 + n_temp1 + n_temp2
+    n0 = n[vert_edge_pair_idx[:, 0]]
+    n1 = -n[vert_edge_pair_idx[:, 1]]
+    loss = 1 - torch.cosine_similarity(n0, n1, dim=1)
+
+    verts_packed_to_mesh_idx = verts_packed_to_mesh_idx[vert_idx[:, 0]]
+    verts_packed_to_mesh_idx = verts_packed_to_mesh_idx[vert_edge_pair_idx[:, 0]]
+    num_normals = verts_packed_to_mesh_idx.bincount(minlength=N)
+    weights = 1.0 / num_normals[verts_packed_to_mesh_idx].float()
+
+    loss = loss * weights
+    return loss.sum() / N
diff --git a/pytorch3d/pytorch3d/loss/point_mesh_distance.py b/pytorch3d/pytorch3d/loss/point_mesh_distance.py
new file mode 100644
index 0000000000000000000000000000000000000000..fc45bc124e8a4711ff080ab9ed89db1649d1d809
--- /dev/null
+++ b/pytorch3d/pytorch3d/loss/point_mesh_distance.py
@@ -0,0 +1,396 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from pytorch3d import _C
+from pytorch3d.structures import Meshes, Pointclouds
+from torch.autograd import Function
+from torch.autograd.function import once_differentiable
+
+
+"""
+This file defines distances between meshes and pointclouds.
+The functions make use of the definition of a distance between a point and
+an edge segment or the distance of a point and a triangle (face).
+
+The exact mathematical formulations and implementations of these
+distances can be found in `csrc/utils/geometry_utils.cuh`.
+"""
+
+_DEFAULT_MIN_TRIANGLE_AREA: float = 5e-3
+
+
+# PointFaceDistance
+class _PointFaceDistance(Function):
+    """
+    Torch autograd Function wrapper PointFaceDistance Cuda implementation
+    """
+
+    @staticmethod
+    def forward(
+        ctx,
+        points,
+        points_first_idx,
+        tris,
+        tris_first_idx,
+        max_points,
+        min_triangle_area=_DEFAULT_MIN_TRIANGLE_AREA,
+    ):
+        """
+        Args:
+            ctx: Context object used to calculate gradients.
+            points: FloatTensor of shape `(P, 3)`
+            points_first_idx: LongTensor of shape `(N,)` indicating the first point
+                index in each example in the batch
+            tris: FloatTensor of shape `(T, 3, 3)` of triangular faces. The `t`-th
+                triangular face is spanned by `(tris[t, 0], tris[t, 1], tris[t, 2])`
+            tris_first_idx: LongTensor of shape `(N,)` indicating the first face
+                index in each example in the batch
+            max_points: Scalar equal to maximum number of points in the batch
+            min_triangle_area: (float, defaulted) Triangles of area less than this
+                will be treated as points/lines.
+        Returns:
+            dists: FloatTensor of shape `(P,)`, where `dists[p]` is the squared
+                euclidean distance of `p`-th point to the closest triangular face
+                in the corresponding example in the batch
+            idxs: LongTensor of shape `(P,)` indicating the closest triangular face
+                in the corresponding example in the batch.
+
+            `dists[p]` is
+            `d(points[p], tris[idxs[p], 0], tris[idxs[p], 1], tris[idxs[p], 2])`
+            where `d(u, v0, v1, v2)` is the distance of point `u` from the triangular
+            face `(v0, v1, v2)`
+
+        """
+        dists, idxs = _C.point_face_dist_forward(
+            points,
+            points_first_idx,
+            tris,
+            tris_first_idx,
+            max_points,
+            min_triangle_area,
+        )
+        ctx.save_for_backward(points, tris, idxs)
+        ctx.min_triangle_area = min_triangle_area
+        return dists
+
+    @staticmethod
+    @once_differentiable
+    def backward(ctx, grad_dists):
+        grad_dists = grad_dists.contiguous()
+        points, tris, idxs = ctx.saved_tensors
+        min_triangle_area = ctx.min_triangle_area
+        grad_points, grad_tris = _C.point_face_dist_backward(
+            points, tris, idxs, grad_dists, min_triangle_area
+        )
+        return grad_points, None, grad_tris, None, None, None
+
+
+point_face_distance = _PointFaceDistance.apply
+
+
+# FacePointDistance
+class _FacePointDistance(Function):
+    """
+    Torch autograd Function wrapper FacePointDistance Cuda implementation
+    """
+
+    @staticmethod
+    def forward(
+        ctx,
+        points,
+        points_first_idx,
+        tris,
+        tris_first_idx,
+        max_tris,
+        min_triangle_area=_DEFAULT_MIN_TRIANGLE_AREA,
+    ):
+        """
+        Args:
+            ctx: Context object used to calculate gradients.
+            points: FloatTensor of shape `(P, 3)`
+            points_first_idx: LongTensor of shape `(N,)` indicating the first point
+                index in each example in the batch
+            tris: FloatTensor of shape `(T, 3, 3)` of triangular faces. The `t`-th
+                triangular face is spanned by `(tris[t, 0], tris[t, 1], tris[t, 2])`
+            tris_first_idx: LongTensor of shape `(N,)` indicating the first face
+                index in each example in the batch
+            max_tris: Scalar equal to maximum number of faces in the batch
+            min_triangle_area: (float, defaulted) Triangles of area less than this
+                will be treated as points/lines.
+        Returns:
+            dists: FloatTensor of shape `(T,)`, where `dists[t]` is the squared
+                euclidean distance of `t`-th triangular face to the closest point in the
+                corresponding example in the batch
+            idxs: LongTensor of shape `(T,)` indicating the closest point in the
+                corresponding example in the batch.
+
+            `dists[t] = d(points[idxs[t]], tris[t, 0], tris[t, 1], tris[t, 2])`,
+            where `d(u, v0, v1, v2)` is the distance of point `u` from the triangular
+            face `(v0, v1, v2)`.
+        """
+        dists, idxs = _C.face_point_dist_forward(
+            points, points_first_idx, tris, tris_first_idx, max_tris, min_triangle_area
+        )
+        ctx.save_for_backward(points, tris, idxs)
+        ctx.min_triangle_area = min_triangle_area
+        return dists
+
+    @staticmethod
+    @once_differentiable
+    def backward(ctx, grad_dists):
+        grad_dists = grad_dists.contiguous()
+        points, tris, idxs = ctx.saved_tensors
+        min_triangle_area = ctx.min_triangle_area
+        grad_points, grad_tris = _C.face_point_dist_backward(
+            points, tris, idxs, grad_dists, min_triangle_area
+        )
+        return grad_points, None, grad_tris, None, None, None
+
+
+face_point_distance = _FacePointDistance.apply
+
+
+# PointEdgeDistance
+class _PointEdgeDistance(Function):
+    """
+    Torch autograd Function wrapper PointEdgeDistance Cuda implementation
+    """
+
+    @staticmethod
+    def forward(ctx, points, points_first_idx, segms, segms_first_idx, max_points):
+        """
+        Args:
+            ctx: Context object used to calculate gradients.
+            points: FloatTensor of shape `(P, 3)`
+            points_first_idx: LongTensor of shape `(N,)` indicating the first point
+                index for each example in the mesh
+            segms: FloatTensor of shape `(S, 2, 3)` of edge segments. The `s`-th
+                edge segment is spanned by `(segms[s, 0], segms[s, 1])`
+            segms_first_idx: LongTensor of shape `(N,)` indicating the first edge
+                index for each example in the mesh
+            max_points: Scalar equal to maximum number of points in the batch
+        Returns:
+            dists: FloatTensor of shape `(P,)`, where `dists[p]` is the squared
+                euclidean distance of `p`-th point to the closest edge in the
+                corresponding example in the batch
+            idxs: LongTensor of shape `(P,)` indicating the closest edge in the
+                corresponding example in the batch.
+
+            `dists[p] = d(points[p], segms[idxs[p], 0], segms[idxs[p], 1])`,
+            where `d(u, v0, v1)` is the distance of point `u` from the edge segment
+            spanned by `(v0, v1)`.
+        """
+        dists, idxs = _C.point_edge_dist_forward(
+            points, points_first_idx, segms, segms_first_idx, max_points
+        )
+        ctx.save_for_backward(points, segms, idxs)
+        return dists
+
+    @staticmethod
+    @once_differentiable
+    def backward(ctx, grad_dists):
+        grad_dists = grad_dists.contiguous()
+        points, segms, idxs = ctx.saved_tensors
+        grad_points, grad_segms = _C.point_edge_dist_backward(
+            points, segms, idxs, grad_dists
+        )
+        return grad_points, None, grad_segms, None, None
+
+
+point_edge_distance = _PointEdgeDistance.apply
+
+
+# EdgePointDistance
+class _EdgePointDistance(Function):
+    """
+    Torch autograd Function wrapper EdgePointDistance Cuda implementation
+    """
+
+    @staticmethod
+    def forward(ctx, points, points_first_idx, segms, segms_first_idx, max_segms):
+        """
+        Args:
+            ctx: Context object used to calculate gradients.
+            points: FloatTensor of shape `(P, 3)`
+            points_first_idx: LongTensor of shape `(N,)` indicating the first point
+                index for each example in the mesh
+            segms: FloatTensor of shape `(S, 2, 3)` of edge segments. The `s`-th
+                edge segment is spanned by `(segms[s, 0], segms[s, 1])`
+            segms_first_idx: LongTensor of shape `(N,)` indicating the first edge
+                index for each example in the mesh
+            max_segms: Scalar equal to maximum number of edges in the batch
+        Returns:
+            dists: FloatTensor of shape `(S,)`, where `dists[s]` is the squared
+                euclidean distance of `s`-th edge to the closest point in the
+                corresponding example in the batch
+            idxs: LongTensor of shape `(S,)` indicating the closest point in the
+                corresponding example in the batch.
+
+            `dists[s] = d(points[idxs[s]], edges[s, 0], edges[s, 1])`,
+            where `d(u, v0, v1)` is the distance of point `u` from the segment
+            spanned by `(v0, v1)`.
+        """
+        dists, idxs = _C.edge_point_dist_forward(
+            points, points_first_idx, segms, segms_first_idx, max_segms
+        )
+        ctx.save_for_backward(points, segms, idxs)
+        return dists
+
+    @staticmethod
+    @once_differentiable
+    def backward(ctx, grad_dists):
+        grad_dists = grad_dists.contiguous()
+        points, segms, idxs = ctx.saved_tensors
+        grad_points, grad_segms = _C.edge_point_dist_backward(
+            points, segms, idxs, grad_dists
+        )
+        return grad_points, None, grad_segms, None, None
+
+
+edge_point_distance = _EdgePointDistance.apply
+
+
+def point_mesh_edge_distance(meshes: Meshes, pcls: Pointclouds):
+    """
+    Computes the distance between a pointcloud and a mesh within a batch.
+    Given a pair `(mesh, pcl)` in the batch, we define the distance to be the
+    sum of two distances, namely `point_edge(mesh, pcl) + edge_point(mesh, pcl)`
+
+    `point_edge(mesh, pcl)`: Computes the squared distance of each point p in pcl
+        to the closest edge segment in mesh and averages across all points in pcl
+    `edge_point(mesh, pcl)`: Computes the squared distance of each edge segment in mesh
+        to the closest point in pcl and averages across all edges in mesh.
+
+    The above distance functions are applied for all `(mesh, pcl)` pairs in the batch
+    and then averaged across the batch.
+
+    Args:
+        meshes: A Meshes data structure containing N meshes
+        pcls: A Pointclouds data structure containing N pointclouds
+
+    Returns:
+        loss: The `point_edge(mesh, pcl) + edge_point(mesh, pcl)` distance
+            between all `(mesh, pcl)` in a batch averaged across the batch.
+    """
+    if len(meshes) != len(pcls):
+        raise ValueError("meshes and pointclouds must be equal sized batches")
+    N = len(meshes)
+
+    # packed representation for pointclouds
+    points = pcls.points_packed()  # (P, 3)
+    points_first_idx = pcls.cloud_to_packed_first_idx()
+    max_points = pcls.num_points_per_cloud().max().item()
+
+    # packed representation for edges
+    verts_packed = meshes.verts_packed()
+    edges_packed = meshes.edges_packed()
+    segms = verts_packed[edges_packed]  # (S, 2, 3)
+    segms_first_idx = meshes.mesh_to_edges_packed_first_idx()
+    max_segms = meshes.num_edges_per_mesh().max().item()
+
+    # point to edge distance: shape (P,)
+    point_to_edge = point_edge_distance(
+        points, points_first_idx, segms, segms_first_idx, max_points
+    )
+
+    # weight each example by the inverse of number of points in the example
+    point_to_cloud_idx = pcls.packed_to_cloud_idx()  # (sum(P_i), )
+    num_points_per_cloud = pcls.num_points_per_cloud()  # (N,)
+    weights_p = num_points_per_cloud.gather(0, point_to_cloud_idx)
+    # pyre-fixme[58]: `/` is not supported for operand types `float` and `Tensor`.
+    weights_p = 1.0 / weights_p.float()
+    point_to_edge = point_to_edge * weights_p
+    point_dist = point_to_edge.sum() / N
+
+    # edge to edge distance: shape (S,)
+    edge_to_point = edge_point_distance(
+        points, points_first_idx, segms, segms_first_idx, max_segms
+    )
+
+    # weight each example by the inverse of number of edges in the example
+    segm_to_mesh_idx = meshes.edges_packed_to_mesh_idx()  # (sum(S_n),)
+    num_segms_per_mesh = meshes.num_edges_per_mesh()  # (N,)
+    weights_s = num_segms_per_mesh.gather(0, segm_to_mesh_idx)
+    weights_s = 1.0 / weights_s.float()
+    edge_to_point = edge_to_point * weights_s
+    edge_dist = edge_to_point.sum() / N
+
+    return point_dist + edge_dist
+
+
+def point_mesh_face_distance(
+    meshes: Meshes,
+    pcls: Pointclouds,
+    min_triangle_area: float = _DEFAULT_MIN_TRIANGLE_AREA,
+):
+    """
+    Computes the distance between a pointcloud and a mesh within a batch.
+    Given a pair `(mesh, pcl)` in the batch, we define the distance to be the
+    sum of two distances, namely `point_face(mesh, pcl) + face_point(mesh, pcl)`
+
+    `point_face(mesh, pcl)`: Computes the squared distance of each point p in pcl
+        to the closest triangular face in mesh and averages across all points in pcl
+    `face_point(mesh, pcl)`: Computes the squared distance of each triangular face in
+        mesh to the closest point in pcl and averages across all faces in mesh.
+
+    The above distance functions are applied for all `(mesh, pcl)` pairs in the batch
+    and then averaged across the batch.
+
+    Args:
+        meshes: A Meshes data structure containing N meshes
+        pcls: A Pointclouds data structure containing N pointclouds
+        min_triangle_area: (float, defaulted) Triangles of area less than this
+            will be treated as points/lines.
+
+    Returns:
+        loss: The `point_face(mesh, pcl) + face_point(mesh, pcl)` distance
+            between all `(mesh, pcl)` in a batch averaged across the batch.
+    """
+
+    if len(meshes) != len(pcls):
+        raise ValueError("meshes and pointclouds must be equal sized batches")
+    N = len(meshes)
+
+    # packed representation for pointclouds
+    points = pcls.points_packed()  # (P, 3)
+    points_first_idx = pcls.cloud_to_packed_first_idx()
+    max_points = pcls.num_points_per_cloud().max().item()
+
+    # packed representation for faces
+    verts_packed = meshes.verts_packed()
+    faces_packed = meshes.faces_packed()
+    tris = verts_packed[faces_packed]  # (T, 3, 3)
+    tris_first_idx = meshes.mesh_to_faces_packed_first_idx()
+    max_tris = meshes.num_faces_per_mesh().max().item()
+
+    # point to face distance: shape (P,)
+    point_to_face = point_face_distance(
+        points, points_first_idx, tris, tris_first_idx, max_points, min_triangle_area
+    )
+
+    # weight each example by the inverse of number of points in the example
+    point_to_cloud_idx = pcls.packed_to_cloud_idx()  # (sum(P_i),)
+    num_points_per_cloud = pcls.num_points_per_cloud()  # (N,)
+    weights_p = num_points_per_cloud.gather(0, point_to_cloud_idx)
+    # pyre-fixme[58]: `/` is not supported for operand types `float` and `Tensor`.
+    weights_p = 1.0 / weights_p.float()
+    point_to_face = point_to_face * weights_p
+    point_dist = point_to_face.sum() / N
+
+    # face to point distance: shape (T,)
+    face_to_point = face_point_distance(
+        points, points_first_idx, tris, tris_first_idx, max_tris, min_triangle_area
+    )
+
+    # weight each example by the inverse of number of faces in the example
+    tri_to_mesh_idx = meshes.faces_packed_to_mesh_idx()  # (sum(T_n),)
+    num_tris_per_mesh = meshes.num_faces_per_mesh()  # (N, )
+    weights_t = num_tris_per_mesh.gather(0, tri_to_mesh_idx)
+    weights_t = 1.0 / weights_t.float()
+    face_to_point = face_to_point * weights_t
+    face_dist = face_to_point.sum() / N
+
+    return point_dist + face_dist
diff --git a/pytorch3d/pytorch3d/ops/__init__.py b/pytorch3d/pytorch3d/ops/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..9e561ccdc414e9f5f0428cbabe0e325c70f0a85b
--- /dev/null
+++ b/pytorch3d/pytorch3d/ops/__init__.py
@@ -0,0 +1,41 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from .ball_query import ball_query
+from .cameras_alignment import corresponding_cameras_alignment
+from .cubify import cubify
+from .graph_conv import GraphConv
+from .interp_face_attrs import interpolate_face_attributes
+from .iou_box3d import box3d_overlap
+from .knn import knn_gather, knn_points
+from .laplacian_matrices import cot_laplacian, laplacian, norm_laplacian
+from .mesh_face_areas_normals import mesh_face_areas_normals
+from .mesh_filtering import taubin_smoothing
+from .packed_to_padded import packed_to_padded, padded_to_packed
+from .perspective_n_points import efficient_pnp
+from .points_alignment import corresponding_points_alignment, iterative_closest_point
+from .points_normals import (
+    estimate_pointcloud_local_coord_frames,
+    estimate_pointcloud_normals,
+)
+from .points_to_volumes import (
+    add_pointclouds_to_volumes,
+    add_points_features_to_volume_densities_features,
+)
+from .sample_farthest_points import sample_farthest_points
+from .sample_points_from_meshes import sample_points_from_meshes
+from .subdivide_meshes import SubdivideMeshes
+from .utils import (
+    convert_pointclouds_to_tensor,
+    eyes,
+    get_point_covariances,
+    is_pointclouds,
+    wmean,
+)
+from .vert_align import vert_align
+
+
+__all__ = [k for k in globals().keys() if not k.startswith("_")]
diff --git a/pytorch3d/pytorch3d/ops/ball_query.py b/pytorch3d/pytorch3d/ops/ball_query.py
new file mode 100644
index 0000000000000000000000000000000000000000..af271a40a1626ad8e0fe0340e56800d8b91d3778
--- /dev/null
+++ b/pytorch3d/pytorch3d/ops/ball_query.py
@@ -0,0 +1,140 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Union
+
+import torch
+from pytorch3d import _C
+from torch.autograd import Function
+from torch.autograd.function import once_differentiable
+
+from .knn import _KNN
+from .utils import masked_gather
+
+
+class _ball_query(Function):
+    """
+    Torch autograd Function wrapper for Ball Query C++/CUDA implementations.
+    """
+
+    @staticmethod
+    def forward(ctx, p1, p2, lengths1, lengths2, K, radius):
+        """
+        Arguments defintions the same as in the ball_query function
+        """
+        idx, dists = _C.ball_query(p1, p2, lengths1, lengths2, K, radius)
+        ctx.save_for_backward(p1, p2, lengths1, lengths2, idx)
+        ctx.mark_non_differentiable(idx)
+        return dists, idx
+
+    @staticmethod
+    @once_differentiable
+    def backward(ctx, grad_dists, grad_idx):
+        p1, p2, lengths1, lengths2, idx = ctx.saved_tensors
+        # TODO(gkioxari) Change cast to floats once we add support for doubles.
+        if not (grad_dists.dtype == torch.float32):
+            grad_dists = grad_dists.float()
+        if not (p1.dtype == torch.float32):
+            p1 = p1.float()
+        if not (p2.dtype == torch.float32):
+            p2 = p2.float()
+
+        # Reuse the KNN backward function
+        # by default, norm is 2
+        grad_p1, grad_p2 = _C.knn_points_backward(
+            p1, p2, lengths1, lengths2, idx, 2, grad_dists
+        )
+        return grad_p1, grad_p2, None, None, None, None
+
+
+def ball_query(
+    p1: torch.Tensor,
+    p2: torch.Tensor,
+    lengths1: Union[torch.Tensor, None] = None,
+    lengths2: Union[torch.Tensor, None] = None,
+    K: int = 500,
+    radius: float = 0.2,
+    return_nn: bool = True,
+):
+    """
+    Ball Query is an alternative to KNN. It can be
+    used to find all points in p2 that are within a specified radius
+    to the query point in p1 (with an upper limit of K neighbors).
+
+    The neighbors returned are not necssarily the *nearest* to the
+    point in p1, just the first K values in p2 which are within the
+    specified radius.
+
+    This method is faster than kNN when there are large numbers of points
+    in p2 and the ordering of neighbors is not important compared to the
+    distance being within the radius threshold.
+
+    "Ball query’s local neighborhood guarantees a fixed region scale thus
+    making local region features more generalizable across space, which is
+    preferred for tasks requiring local pattern recognition
+    (e.g. semantic point labeling)" [1].
+
+    [1] Charles R. Qi et al, "PointNet++: Deep Hierarchical Feature Learning
+        on Point Sets in a Metric Space", NeurIPS 2017.
+
+    Args:
+        p1: Tensor of shape (N, P1, D) giving a batch of N point clouds, each
+            containing up to P1 points of dimension D. These represent the centers of
+            the ball queries.
+        p2: Tensor of shape (N, P2, D) giving a batch of N point clouds, each
+            containing up to P2 points of dimension D.
+        lengths1: LongTensor of shape (N,) of values in the range [0, P1], giving the
+            length of each pointcloud in p1. Or None to indicate that every cloud has
+            length P1.
+        lengths2: LongTensor of shape (N,) of values in the range [0, P2], giving the
+            length of each pointcloud in p2. Or None to indicate that every cloud has
+            length P2.
+        K: Integer giving the upper bound on the number of samples to take
+            within the radius
+        radius: the radius around each point within which the neighbors need to be located
+        return_nn: If set to True returns the K neighbor points in p2 for each point in p1.
+
+    Returns:
+        dists: Tensor of shape (N, P1, K) giving the squared distances to
+            the neighbors. This is padded with zeros both where a cloud in p2
+            has fewer than S points and where a cloud in p1 has fewer than P1 points
+            and also if there are fewer than K points which satisfy the radius threshold.
+
+        idx: LongTensor of shape (N, P1, K) giving the indices of the
+            S neighbors in p2 for points in p1.
+            Concretely, if `p1_idx[n, i, k] = j` then `p2[n, j]` is the k-th
+            neighbor to `p1[n, i]` in `p2[n]`. This is padded with -1 both where a cloud
+            in p2 has fewer than S points and where a cloud in p1 has fewer than P1
+            points and also if there are fewer than K points which satisfy the radius threshold.
+
+        nn: Tensor of shape (N, P1, K, D) giving the K neighbors in p2 for
+            each point in p1. Concretely, `p2_nn[n, i, k]` gives the k-th neighbor
+            for `p1[n, i]`. Returned if `return_nn` is True.  The output is a tensor
+            of shape (N, P1, K, U).
+
+    """
+    if p1.shape[0] != p2.shape[0]:
+        raise ValueError("pts1 and pts2 must have the same batch dimension.")
+    if p1.shape[2] != p2.shape[2]:
+        raise ValueError("pts1 and pts2 must have the same point dimension.")
+
+    p1 = p1.contiguous()
+    p2 = p2.contiguous()
+    P1 = p1.shape[1]
+    P2 = p2.shape[1]
+    N = p1.shape[0]
+
+    if lengths1 is None:
+        lengths1 = torch.full((N,), P1, dtype=torch.int64, device=p1.device)
+    if lengths2 is None:
+        lengths2 = torch.full((N,), P2, dtype=torch.int64, device=p1.device)
+
+    dists, idx = _ball_query.apply(p1, p2, lengths1, lengths2, K, radius)
+
+    # Gather the neighbors if needed
+    points_nn = masked_gather(p2, idx) if return_nn else None
+
+    return _KNN(dists=dists, idx=idx, knn=points_nn)
diff --git a/pytorch3d/pytorch3d/ops/cameras_alignment.py b/pytorch3d/pytorch3d/ops/cameras_alignment.py
new file mode 100644
index 0000000000000000000000000000000000000000..12412c6761fc8814cea507ae82eaa8565900d960
--- /dev/null
+++ b/pytorch3d/pytorch3d/ops/cameras_alignment.py
@@ -0,0 +1,222 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import TYPE_CHECKING
+
+import torch
+
+from .. import ops
+
+
+if TYPE_CHECKING:
+    from pytorch3d.renderer.cameras import CamerasBase
+
+
+def corresponding_cameras_alignment(
+    cameras_src: "CamerasBase",
+    cameras_tgt: "CamerasBase",
+    estimate_scale: bool = True,
+    mode: str = "extrinsics",
+    eps: float = 1e-9,
+) -> "CamerasBase":  # pragma: no cover
+    """
+    .. warning::
+        The `corresponding_cameras_alignment` API is experimental
+        and subject to change!
+
+    Estimates a single similarity transformation between two sets of cameras
+    `cameras_src` and `cameras_tgt` and returns an aligned version of
+    `cameras_src`.
+
+    Given source cameras [(R_1, T_1), (R_2, T_2), ..., (R_N, T_N)] and
+    target cameras [(R_1', T_1'), (R_2', T_2'), ..., (R_N', T_N')],
+    where (R_i, T_i) is a 2-tuple of the camera rotation and translation matrix
+    respectively, the algorithm finds a global rotation, translation and scale
+    (R_A, T_A, s_A) which aligns all source cameras with the target cameras
+    such that the following holds:
+
+        Under the change of coordinates using a similarity transform
+        (R_A, T_A, s_A) a 3D point X' is mapped to X with: ::
+
+            X = (X' R_A + T_A) / s_A
+
+        Then, for all cameras `i`, we assume that the following holds: ::
+
+            X R_i + T_i = s' (X' R_i' + T_i'),
+
+        i.e. an adjusted point X' is mapped by a camera (R_i', T_i')
+        to the same point as imaged from camera (R_i, T_i) after resolving
+        the scale ambiguity with a global scalar factor s'.
+
+        Substituting for X above gives rise to the following: ::
+
+            (X' R_A + T_A) / s_A R_i + T_i = s' (X' R_i' + T_i')       // · s_A
+            (X' R_A + T_A) R_i + T_i s_A = (s' s_A) (X' R_i' + T_i')
+            s' := 1 / s_A  # without loss of generality
+            (X' R_A + T_A) R_i + T_i s_A = X' R_i' + T_i'
+            X' R_A R_i + T_A R_i + T_i s_A = X' R_i' + T_i'
+               ^^^^^^^   ^^^^^^^^^^^^^^^^^
+               ~= R_i'        ~= T_i'
+
+        i.e. after estimating R_A, T_A, s_A, the aligned source cameras have
+        extrinsics: ::
+
+            cameras_src_align = (R_A R_i, T_A R_i + T_i s_A) ~= (R_i', T_i')
+
+    We support two ways `R_A, T_A, s_A` can be estimated:
+        1) `mode=='centers'`
+            Estimates the similarity alignment between camera centers using
+            Umeyama's algorithm (see `pytorch3d.ops.corresponding_points_alignment`
+            for details) and transforms camera extrinsics accordingly.
+
+        2) `mode=='extrinsics'`
+            Defines the alignment problem as a system
+            of the following equations: ::
+
+                for all i:
+                [ R_A   0 ] x [ R_i         0 ] = [ R_i' 0 ]
+                [ T_A^T 1 ]   [ (s_A T_i^T) 1 ]   [ T_i' 1 ]
+
+            `R_A, T_A` and `s_A` are then obtained by solving the
+            system in the least squares sense.
+
+    The estimated camera transformation is a true similarity transform, i.e.
+    it cannot be a reflection.
+
+    Args:
+        cameras_src: `N` cameras to be aligned.
+        cameras_tgt: `N` target cameras.
+        estimate_scale: Controls whether the alignment transform is rigid
+            (`estimate_scale=False`), or a similarity (`estimate_scale=True`).
+            `s_A` is set to `1` if `estimate_scale==False`.
+        mode: Controls the alignment algorithm.
+            Can be one either `'centers'` or `'extrinsics'`. Please refer to the
+            description above for details.
+        eps: A scalar for clamping to avoid dividing by zero.
+            Active when `estimate_scale==True`.
+
+    Returns:
+        cameras_src_aligned: `cameras_src` after applying the alignment transform.
+    """
+
+    if cameras_src.R.shape[0] != cameras_tgt.R.shape[0]:
+        raise ValueError(
+            "cameras_src and cameras_tgt have to contain the same number of cameras!"
+        )
+
+    if mode == "centers":
+        align_fun = _align_camera_centers
+    elif mode == "extrinsics":
+        align_fun = _align_camera_extrinsics
+    else:
+        raise ValueError("mode has to be one of (centers, extrinsics)")
+
+    align_t_R, align_t_T, align_t_s = align_fun(
+        cameras_src, cameras_tgt, estimate_scale=estimate_scale, eps=eps
+    )
+
+    # create a new cameras object and set the R and T accordingly
+    cameras_src_aligned = cameras_src.clone()
+    cameras_src_aligned.R = torch.bmm(align_t_R.expand_as(cameras_src.R), cameras_src.R)
+    cameras_src_aligned.T = (
+        torch.bmm(
+            align_t_T[:, None].repeat(cameras_src.R.shape[0], 1, 1),
+            cameras_src.R,
+        )[:, 0]
+        + cameras_src.T * align_t_s
+    )
+
+    return cameras_src_aligned
+
+
+def _align_camera_centers(
+    cameras_src: "CamerasBase",
+    cameras_tgt: "CamerasBase",
+    estimate_scale: bool = True,
+    eps: float = 1e-9,
+):  # pragma: no cover
+    """
+    Use Umeyama's algorithm to align the camera centers.
+    """
+    centers_src = cameras_src.get_camera_center()
+    centers_tgt = cameras_tgt.get_camera_center()
+    align_t = ops.corresponding_points_alignment(
+        centers_src[None],
+        centers_tgt[None],
+        estimate_scale=estimate_scale,
+        allow_reflection=False,
+        eps=eps,
+    )
+    # the camera transform is the inverse of the estimated transform between centers
+    align_t_R = align_t.R.permute(0, 2, 1)
+    align_t_T = -(torch.bmm(align_t.T[:, None], align_t_R))[:, 0]
+    align_t_s = align_t.s[0]
+
+    return align_t_R, align_t_T, align_t_s
+
+
+def _align_camera_extrinsics(
+    cameras_src: "CamerasBase",
+    cameras_tgt: "CamerasBase",
+    estimate_scale: bool = True,
+    eps: float = 1e-9,
+):  # pragma: no cover
+    """
+    Get the global rotation R_A with svd of cov(RR^T):
+        ```
+        R_A R_i = R_i' for all i
+        R_A [R_1 R_2 ... R_N] = [R_1' R_2' ... R_N']
+        U, _, V = svd([R_1 R_2 ... R_N]^T [R_1' R_2' ... R_N'])
+        R_A = (U V^T)^T
+        ```
+    """
+    RRcov = torch.bmm(cameras_src.R, cameras_tgt.R.transpose(2, 1)).mean(0)
+    U, _, V = torch.svd(RRcov)
+    align_t_R = V @ U.t()
+
+    """
+    The translation + scale `T_A` and `s_A` is computed by finding
+    a translation and scaling that aligns two tensors `A, B`
+    defined as follows:
+        ```
+        T_A R_i + s_A T_i   = T_i'        ;  for all i    // · R_i^T
+        s_A T_i R_i^T + T_A = T_i' R_i^T  ;  for all i
+            ^^^^^^^^^         ^^^^^^^^^^
+                A_i                B_i
+
+        A_i := T_i R_i^T
+        A = [A_1 A_2 ... A_N]
+        B_i := T_i' R_i^T
+        B = [B_1 B_2 ... B_N]
+        ```
+    The scale s_A can be retrieved by matching the correlations of
+    the points sets A and B:
+        ```
+        s_A = (A-mean(A))*(B-mean(B)).sum() / ((A-mean(A))**2).sum()
+        ```
+    The translation `T_A` is then defined as:
+        ```
+        T_A = mean(B) - mean(A) * s_A
+        ```
+    """
+    A = torch.bmm(cameras_src.R, cameras_src.T[:, :, None])[:, :, 0]
+    B = torch.bmm(cameras_src.R, cameras_tgt.T[:, :, None])[:, :, 0]
+    Amu = A.mean(0, keepdim=True)
+    Bmu = B.mean(0, keepdim=True)
+    if estimate_scale and A.shape[0] > 1:
+        # get the scaling component by matching covariances
+        # of centered A and centered B
+        Ac = A - Amu
+        Bc = B - Bmu
+        # pyre-fixme[58]: `**` is not supported for operand types `Tensor` and `int`.
+        align_t_s = (Ac * Bc).mean() / (Ac**2).mean().clamp(eps)
+    else:
+        # set the scale to identity
+        align_t_s = 1.0
+    # get the translation as the difference between the means of A and B
+    align_t_T = Bmu - align_t_s * Amu
+
+    return align_t_R, align_t_T, align_t_s
diff --git a/pytorch3d/pytorch3d/ops/cubify.py b/pytorch3d/pytorch3d/ops/cubify.py
new file mode 100644
index 0000000000000000000000000000000000000000..77bc924ad92810ebec50660d63ab52bb8fa9bd4c
--- /dev/null
+++ b/pytorch3d/pytorch3d/ops/cubify.py
@@ -0,0 +1,243 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import torch
+import torch.nn.functional as F
+from pytorch3d.common.compat import meshgrid_ij
+from pytorch3d.structures import Meshes
+
+
+def unravel_index(idx, dims) -> torch.Tensor:
+    r"""
+    Equivalent to np.unravel_index
+    Args:
+      idx: A LongTensor whose elements are indices into the
+          flattened version of an array of dimensions dims.
+      dims: The shape of the array to be indexed.
+    Implemented only for dims=(N, H, W, D)
+    """
+    if len(dims) != 4:
+        raise ValueError("Expects a 4-element list.")
+    N, H, W, D = dims
+    n = idx // (H * W * D)
+    h = (idx - n * H * W * D) // (W * D)
+    w = (idx - n * H * W * D - h * W * D) // D
+    d = idx - n * H * W * D - h * W * D - w * D
+    return torch.stack((n, h, w, d), dim=1)
+
+
+def ravel_index(idx, dims) -> torch.Tensor:
+    """
+    Computes the linear index in an array of shape dims.
+    It performs the reverse functionality of unravel_index
+    Args:
+      idx: A LongTensor of shape (N, 3). Each row corresponds to indices into an
+          array of dimensions dims.
+      dims: The shape of the array to be indexed.
+    Implemented only for dims=(H, W, D)
+    """
+    if len(dims) != 3:
+        raise ValueError("Expects a 3-element list")
+    if idx.shape[1] != 3:
+        raise ValueError("Expects an index tensor of shape Nx3")
+    H, W, D = dims
+    linind = idx[:, 0] * W * D + idx[:, 1] * D + idx[:, 2]
+    return linind
+
+
+@torch.no_grad()
+def cubify(voxels, thresh, device=None, align: str = "topleft") -> Meshes:
+    r"""
+    Converts a voxel to a mesh by replacing each occupied voxel with a cube
+    consisting of 12 faces and 8 vertices. Shared vertices are merged, and
+    internal faces are removed.
+    Args:
+      voxels: A FloatTensor of shape (N, D, H, W) containing occupancy probabilities.
+      thresh: A scalar threshold. If a voxel occupancy is larger than
+          thresh, the voxel is considered occupied.
+      device: The device of the output meshes
+      align: Defines the alignment of the mesh vertices and the grid locations.
+          Has to be one of {"topleft", "corner", "center"}. See below for explanation.
+          Default is "topleft".
+    Returns:
+      meshes: A Meshes object of the corresponding meshes.
+
+
+    The alignment between the vertices of the cubified mesh and the voxel locations (or pixels)
+    is defined by the choice of `align`. We support three modes, as shown below for a 2x2 grid:
+
+                X---X----         X-------X        ---------
+                |   |   |         |   |   |        | X | X |
+                X---X----         ---------        ---------
+                |   |   |         |   |   |        | X | X |
+                ---------         X-------X        ---------
+
+                 topleft           corner            center
+
+    In the figure, X denote the grid locations and the squares represent the added cuboids.
+    When `align="topleft"`, then the top left corner of each cuboid corresponds to the
+    pixel coordinate of the input grid.
+    When `align="corner"`, then the corners of the output mesh span the whole grid.
+    When `align="center"`, then the grid locations form the center of the cuboids.
+    """
+
+    if device is None:
+        device = voxels.device
+
+    if align not in ["topleft", "corner", "center"]:
+        raise ValueError("Align mode must be one of (topleft, corner, center).")
+
+    if len(voxels) == 0:
+        return Meshes(verts=[], faces=[])
+
+    N, D, H, W = voxels.size()
+    # vertices corresponding to a unit cube: 8x3
+    cube_verts = torch.tensor(
+        [
+            [0, 0, 0],
+            [0, 0, 1],
+            [0, 1, 0],
+            [0, 1, 1],
+            [1, 0, 0],
+            [1, 0, 1],
+            [1, 1, 0],
+            [1, 1, 1],
+        ],
+        dtype=torch.int64,
+        device=device,
+    )
+
+    # faces corresponding to a unit cube: 12x3
+    cube_faces = torch.tensor(
+        [
+            [0, 1, 2],
+            [1, 3, 2],  # left face: 0, 1
+            [2, 3, 6],
+            [3, 7, 6],  # bottom face: 2, 3
+            [0, 2, 6],
+            [0, 6, 4],  # front face: 4, 5
+            [0, 5, 1],
+            [0, 4, 5],  # up face: 6, 7
+            [6, 7, 5],
+            [6, 5, 4],  # right face: 8, 9
+            [1, 7, 3],
+            [1, 5, 7],  # back face: 10, 11
+        ],
+        dtype=torch.int64,
+        device=device,
+    )
+
+    wx = torch.tensor([0.5, 0.5], device=device).view(1, 1, 1, 1, 2)
+    wy = torch.tensor([0.5, 0.5], device=device).view(1, 1, 1, 2, 1)
+    wz = torch.tensor([0.5, 0.5], device=device).view(1, 1, 2, 1, 1)
+
+    voxelt = voxels.ge(thresh).float()
+    # N x 1 x D x H x W
+    voxelt = voxelt.view(N, 1, D, H, W)
+
+    # N x 1 x (D-1) x (H-1) x (W-1)
+    voxelt_x = F.conv3d(voxelt, wx).gt(0.5).float()
+    voxelt_y = F.conv3d(voxelt, wy).gt(0.5).float()
+    voxelt_z = F.conv3d(voxelt, wz).gt(0.5).float()
+
+    # 12 x N x 1 x D x H x W
+    faces_idx = torch.ones((cube_faces.size(0), N, 1, D, H, W), device=device)
+
+    # add left face
+    faces_idx[0, :, :, :, :, 1:] = 1 - voxelt_x
+    faces_idx[1, :, :, :, :, 1:] = 1 - voxelt_x
+    # add bottom face
+    faces_idx[2, :, :, :, :-1, :] = 1 - voxelt_y
+    faces_idx[3, :, :, :, :-1, :] = 1 - voxelt_y
+    # add front face
+    faces_idx[4, :, :, 1:, :, :] = 1 - voxelt_z
+    faces_idx[5, :, :, 1:, :, :] = 1 - voxelt_z
+    # add up face
+    faces_idx[6, :, :, :, 1:, :] = 1 - voxelt_y
+    faces_idx[7, :, :, :, 1:, :] = 1 - voxelt_y
+    # add right face
+    faces_idx[8, :, :, :, :, :-1] = 1 - voxelt_x
+    faces_idx[9, :, :, :, :, :-1] = 1 - voxelt_x
+    # add back face
+    faces_idx[10, :, :, :-1, :, :] = 1 - voxelt_z
+    faces_idx[11, :, :, :-1, :, :] = 1 - voxelt_z
+
+    faces_idx *= voxelt
+
+    # N x H x W x D x 12
+    faces_idx = faces_idx.permute(1, 2, 4, 5, 3, 0).squeeze(1)
+    # (NHWD) x 12
+    faces_idx = faces_idx.contiguous()
+    faces_idx = faces_idx.view(-1, cube_faces.size(0))
+
+    # boolean to linear index
+    # NF x 2
+    linind = torch.nonzero(faces_idx, as_tuple=False)
+    # NF x 4
+    nyxz = unravel_index(linind[:, 0], (N, H, W, D))
+
+    # NF x 3: faces
+    faces = torch.index_select(cube_faces, 0, linind[:, 1])
+
+    grid_faces = []
+    for d in range(cube_faces.size(1)):
+        # NF x 3
+        xyz = torch.index_select(cube_verts, 0, faces[:, d])
+        permute_idx = torch.tensor([1, 0, 2], device=device)
+        yxz = torch.index_select(xyz, 1, permute_idx)
+        yxz += nyxz[:, 1:]
+        # NF x 1
+        temp = ravel_index(yxz, (H + 1, W + 1, D + 1))
+        grid_faces.append(temp)
+    # NF x 3
+    grid_faces = torch.stack(grid_faces, dim=1)
+
+    y, x, z = meshgrid_ij(torch.arange(H + 1), torch.arange(W + 1), torch.arange(D + 1))
+    y = y.to(device=device, dtype=torch.float32)
+    x = x.to(device=device, dtype=torch.float32)
+    z = z.to(device=device, dtype=torch.float32)
+
+    if align == "center":
+        x = x - 0.5
+        y = y - 0.5
+        z = z - 0.5
+
+    margin = 0.0 if align == "corner" else 1.0
+    y = y * 2.0 / (H - margin) - 1.0
+    x = x * 2.0 / (W - margin) - 1.0
+    z = z * 2.0 / (D - margin) - 1.0
+
+    # ((H+1)(W+1)(D+1)) x 3
+    grid_verts = torch.stack((x, y, z), dim=3).view(-1, 3)
+
+    if len(nyxz) == 0:
+        verts_list = [torch.tensor([], dtype=torch.float32, device=device)] * N
+        faces_list = [torch.tensor([], dtype=torch.int64, device=device)] * N
+        return Meshes(verts=verts_list, faces=faces_list)
+
+    num_verts = grid_verts.size(0)
+    grid_faces += nyxz[:, 0].view(-1, 1) * num_verts
+    idleverts = torch.ones(num_verts * N, dtype=torch.uint8, device=device)
+
+    indices = grid_faces.flatten()
+    if device.type == "cpu":
+        indices = torch.unique(indices)
+    idleverts.scatter_(0, indices, 0)
+    grid_faces -= nyxz[:, 0].view(-1, 1) * num_verts
+    split_size = torch.bincount(nyxz[:, 0], minlength=N)
+    faces_list = list(torch.split(grid_faces, split_size.tolist(), 0))
+
+    idleverts = idleverts.view(N, num_verts)
+    idlenum = idleverts.cumsum(1)
+
+    verts_list = [
+        grid_verts.index_select(0, (idleverts[n] == 0).nonzero(as_tuple=False)[:, 0])
+        for n in range(N)
+    ]
+    faces_list = [nface - idlenum[n][nface] for n, nface in enumerate(faces_list)]
+
+    return Meshes(verts=verts_list, faces=faces_list)
diff --git a/pytorch3d/pytorch3d/ops/graph_conv.py b/pytorch3d/pytorch3d/ops/graph_conv.py
new file mode 100644
index 0000000000000000000000000000000000000000..1d7a6186b0acedd0cf28d6b7c9723e357acca357
--- /dev/null
+++ b/pytorch3d/pytorch3d/ops/graph_conv.py
@@ -0,0 +1,174 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import torch
+import torch.nn as nn
+from pytorch3d import _C
+from torch.autograd import Function
+from torch.autograd.function import once_differentiable
+
+
+class GraphConv(nn.Module):
+    """A single graph convolution layer."""
+
+    def __init__(
+        self,
+        input_dim: int,
+        output_dim: int,
+        init: str = "normal",
+        directed: bool = False,
+    ) -> None:
+        """
+        Args:
+            input_dim: Number of input features per vertex.
+            output_dim: Number of output features per vertex.
+            init: Weight initialization method. Can be one of ['zero', 'normal'].
+            directed: Bool indicating if edges in the graph are directed.
+        """
+        super().__init__()
+        self.input_dim = input_dim
+        self.output_dim = output_dim
+        self.directed = directed
+        self.w0 = nn.Linear(input_dim, output_dim)
+        self.w1 = nn.Linear(input_dim, output_dim)
+
+        if init == "normal":
+            nn.init.normal_(self.w0.weight, mean=0, std=0.01)
+            nn.init.normal_(self.w1.weight, mean=0, std=0.01)
+            self.w0.bias.data.zero_()
+            self.w1.bias.data.zero_()
+        elif init == "zero":
+            self.w0.weight.data.zero_()
+            self.w1.weight.data.zero_()
+        else:
+            raise ValueError('Invalid GraphConv initialization "%s"' % init)
+
+    def forward(self, verts, edges):
+        """
+        Args:
+            verts: FloatTensor of shape (V, input_dim) where V is the number of
+                vertices and input_dim is the number of input features
+                per vertex. input_dim has to match the input_dim specified
+                in __init__.
+            edges: LongTensor of shape (E, 2) where E is the number of edges
+                where each edge has the indices of the two vertices which
+                form the edge.
+
+        Returns:
+            out: FloatTensor of shape (V, output_dim) where output_dim is the
+            number of output features per vertex.
+        """
+        if verts.is_cuda != edges.is_cuda:
+            raise ValueError("verts and edges tensors must be on the same device.")
+        if verts.shape[0] == 0:
+            # empty graph.
+            return verts.new_zeros((0, self.output_dim)) * verts.sum()
+
+        verts_w0 = self.w0(verts)  # (V, output_dim)
+        verts_w1 = self.w1(verts)  # (V, output_dim)
+
+        if torch.cuda.is_available() and verts.is_cuda and edges.is_cuda:
+            neighbor_sums = gather_scatter(verts_w1, edges, self.directed)
+        else:
+            neighbor_sums = gather_scatter_python(
+                verts_w1, edges, self.directed
+            )  # (V, output_dim)
+
+        # Add neighbor features to each vertex's features.
+        out = verts_w0 + neighbor_sums
+        return out
+
+    def __repr__(self):
+        Din, Dout, directed = self.input_dim, self.output_dim, self.directed
+        return "GraphConv(%d -> %d, directed=%r)" % (Din, Dout, directed)
+
+
+def gather_scatter_python(input, edges, directed: bool = False):
+    """
+    Python implementation of gather_scatter for aggregating features of
+    neighbor nodes in a graph.
+
+    Given a directed graph: v0 -> v1 -> v2 the updated feature for v1 depends
+    on v2 in order to be consistent with Morris et al. AAAI 2019
+    (https://arxiv.org/abs/1810.02244). This only affects
+    directed graphs; for undirected graphs v1 will depend on both v0 and v2,
+    no matter which way the edges are physically stored.
+
+    Args:
+        input: Tensor of shape (num_vertices, input_dim).
+        edges: Tensor of edge indices of shape (num_edges, 2).
+        directed: bool indicating if edges are directed.
+
+    Returns:
+        output: Tensor of same shape as input.
+    """
+    if not (input.dim() == 2):
+        raise ValueError("input can only have 2 dimensions.")
+    if not (edges.dim() == 2):
+        raise ValueError("edges can only have 2 dimensions.")
+    if not (edges.shape[1] == 2):
+        raise ValueError("edges must be of shape (num_edges, 2).")
+
+    num_vertices, input_feature_dim = input.shape
+    num_edges = edges.shape[0]
+    output = torch.zeros_like(input)
+    idx0 = edges[:, 0].view(num_edges, 1).expand(num_edges, input_feature_dim)
+    idx1 = edges[:, 1].view(num_edges, 1).expand(num_edges, input_feature_dim)
+
+    output = output.scatter_add(0, idx0, input.gather(0, idx1))
+    if not directed:
+        output = output.scatter_add(0, idx1, input.gather(0, idx0))
+    return output
+
+
+class GatherScatter(Function):
+    """
+    Torch autograd Function wrapper for gather_scatter C++/CUDA implementations.
+    """
+
+    @staticmethod
+    def forward(ctx, input, edges, directed=False):
+        """
+        Args:
+            ctx: Context object used to calculate gradients.
+            input: Tensor of shape (num_vertices, input_dim)
+            edges: Tensor of edge indices of shape (num_edges, 2)
+            directed: Bool indicating if edges are directed.
+
+        Returns:
+            output: Tensor of same shape as input.
+        """
+        if not (input.dim() == 2):
+            raise ValueError("input can only have 2 dimensions.")
+        if not (edges.dim() == 2):
+            raise ValueError("edges can only have 2 dimensions.")
+        if not (edges.shape[1] == 2):
+            raise ValueError("edges must be of shape (num_edges, 2).")
+        if not (input.dtype == torch.float32):
+            raise ValueError("input has to be of type torch.float32.")
+
+        ctx.directed = directed
+        input, edges = input.contiguous(), edges.contiguous()
+        ctx.save_for_backward(edges)
+        backward = False
+        output = _C.gather_scatter(input, edges, directed, backward)
+        return output
+
+    @staticmethod
+    @once_differentiable
+    def backward(ctx, grad_output):
+        grad_output = grad_output.contiguous()
+        edges = ctx.saved_tensors[0]
+        directed = ctx.directed
+        backward = True
+        grad_input = _C.gather_scatter(grad_output, edges, directed, backward)
+        grad_edges = None
+        grad_directed = None
+        return grad_input, grad_edges, grad_directed
+
+
+gather_scatter = GatherScatter.apply
diff --git a/pytorch3d/pytorch3d/ops/interp_face_attrs.py b/pytorch3d/pytorch3d/ops/interp_face_attrs.py
new file mode 100644
index 0000000000000000000000000000000000000000..705fc152eddf96771391ad481dd8709cb9ba792b
--- /dev/null
+++ b/pytorch3d/pytorch3d/ops/interp_face_attrs.py
@@ -0,0 +1,99 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+from pytorch3d import _C
+from torch.autograd import Function
+from torch.autograd.function import once_differentiable
+
+
+def interpolate_face_attributes(
+    pix_to_face: torch.Tensor,
+    barycentric_coords: torch.Tensor,
+    face_attributes: torch.Tensor,
+) -> torch.Tensor:
+    """
+    Interpolate arbitrary face attributes using the barycentric coordinates
+    for each pixel in the rasterized output.
+
+    Args:
+        pix_to_face: LongTensor of shape (...) specifying the indices
+            of the faces (in the packed representation) which overlap each
+            pixel in the image. A value < 0 indicates that the pixel does not
+            overlap any face and should be skipped.
+        barycentric_coords: FloatTensor of shape (N, H, W, K, 3) specifying
+            the barycentric coordinates of each pixel
+            relative to the faces (in the packed
+            representation) which overlap the pixel.
+        face_attributes: packed attributes of shape (total_faces, 3, D),
+            specifying the value of the attribute for each
+            vertex in the face.
+
+    Returns:
+        pixel_vals: tensor of shape (N, H, W, K, D) giving the interpolated
+        value of the face attribute for each pixel.
+    """
+    # Check shapes
+    F, FV, D = face_attributes.shape
+    if FV != 3:
+        raise ValueError("Faces can only have three vertices; got %r" % FV)
+    N, H, W, K, _ = barycentric_coords.shape
+    if pix_to_face.shape != (N, H, W, K):
+        msg = "pix_to_face must have shape (batch_size, H, W, K); got %r"
+        raise ValueError(msg % (pix_to_face.shape,))
+
+    # On CPU use the python version
+    # TODO: Implement a C++ version of this function
+    if not pix_to_face.is_cuda:
+        args = (pix_to_face, barycentric_coords, face_attributes)
+        return interpolate_face_attributes_python(*args)
+
+    # Otherwise flatten and call the custom autograd function
+    N, H, W, K = pix_to_face.shape
+    pix_to_face = pix_to_face.view(-1)
+    barycentric_coords = barycentric_coords.view(N * H * W * K, 3)
+    args = (pix_to_face, barycentric_coords, face_attributes)
+    out = _InterpFaceAttrs.apply(*args)
+    out = out.view(N, H, W, K, -1)
+    return out
+
+
+class _InterpFaceAttrs(Function):
+    @staticmethod
+    def forward(ctx, pix_to_face, barycentric_coords, face_attrs):
+        args = (pix_to_face, barycentric_coords, face_attrs)
+        ctx.save_for_backward(*args)
+        return _C.interp_face_attrs_forward(*args)
+
+    @staticmethod
+    @once_differentiable
+    def backward(ctx, grad_pix_attrs):
+        args = ctx.saved_tensors
+        args = args + (grad_pix_attrs,)
+        grads = _C.interp_face_attrs_backward(*args)
+        grad_pix_to_face = None
+        grad_barycentric_coords = grads[0]
+        grad_face_attrs = grads[1]
+        return grad_pix_to_face, grad_barycentric_coords, grad_face_attrs
+
+
+def interpolate_face_attributes_python(
+    pix_to_face: torch.Tensor,
+    barycentric_coords: torch.Tensor,
+    face_attributes: torch.Tensor,
+) -> torch.Tensor:
+    F, FV, D = face_attributes.shape
+    N, H, W, K, _ = barycentric_coords.shape
+
+    # Replace empty pixels in pix_to_face with 0 in order to interpolate.
+    mask = pix_to_face < 0
+    pix_to_face = pix_to_face.clone()
+    pix_to_face[mask] = 0
+    idx = pix_to_face.view(N * H * W * K, 1, 1).expand(N * H * W * K, 3, D)
+    pixel_face_vals = face_attributes.gather(0, idx).view(N, H, W, K, 3, D)
+    pixel_vals = (barycentric_coords[..., None] * pixel_face_vals).sum(dim=-2)
+    pixel_vals[mask] = 0  # Replace masked values in output.
+    return pixel_vals
diff --git a/pytorch3d/pytorch3d/ops/iou_box3d.py b/pytorch3d/pytorch3d/ops/iou_box3d.py
new file mode 100644
index 0000000000000000000000000000000000000000..dbc6358bb41e412978a968964981354cac86f40f
--- /dev/null
+++ b/pytorch3d/pytorch3d/ops/iou_box3d.py
@@ -0,0 +1,166 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Tuple
+
+import torch
+import torch.nn.functional as F
+from pytorch3d import _C
+from torch.autograd import Function
+
+
+# -------------------------------------------------- #
+#                  CONSTANTS                         #
+# -------------------------------------------------- #
+"""
+_box_planes and _box_triangles define the 4- and 3-connectivity
+of the 8 box corners.
+_box_planes gives the quad faces of the 3D box
+_box_triangles gives the triangle faces of the 3D box
+"""
+_box_planes = [
+    [0, 1, 2, 3],
+    [3, 2, 6, 7],
+    [0, 1, 5, 4],
+    [0, 3, 7, 4],
+    [1, 2, 6, 5],
+    [4, 5, 6, 7],
+]
+_box_triangles = [
+    [0, 1, 2],
+    [0, 3, 2],
+    [4, 5, 6],
+    [4, 6, 7],
+    [1, 5, 6],
+    [1, 6, 2],
+    [0, 4, 7],
+    [0, 7, 3],
+    [3, 2, 6],
+    [3, 6, 7],
+    [0, 1, 5],
+    [0, 4, 5],
+]
+
+
+def _check_coplanar(boxes: torch.Tensor, eps: float = 1e-4) -> None:
+    faces = torch.tensor(_box_planes, dtype=torch.int64, device=boxes.device)
+    verts = boxes.index_select(index=faces.view(-1), dim=1)
+    B = boxes.shape[0]
+    P, V = faces.shape
+    # (B, P, 4, 3) -> (B, P, 3)
+    v0, v1, v2, v3 = verts.reshape(B, P, V, 3).unbind(2)
+
+    # Compute the normal
+    e0 = F.normalize(v1 - v0, dim=-1)
+    e1 = F.normalize(v2 - v0, dim=-1)
+    normal = F.normalize(torch.cross(e0, e1, dim=-1), dim=-1)
+
+    # Check the fourth vertex is also on the same plane
+    mat1 = (v3 - v0).view(B, 1, -1)  # (B, 1, P*3)
+    mat2 = normal.view(B, -1, 1)  # (B, P*3, 1)
+    if not (mat1.bmm(mat2).abs() < eps).all().item():
+        msg = "Plane vertices are not coplanar"
+        raise ValueError(msg)
+
+    return
+
+
+def _check_nonzero(boxes: torch.Tensor, eps: float = 1e-4) -> None:
+    """
+    Checks that the sides of the box have a non zero area
+    """
+    faces = torch.tensor(_box_triangles, dtype=torch.int64, device=boxes.device)
+    verts = boxes.index_select(index=faces.view(-1), dim=1)
+    B = boxes.shape[0]
+    T, V = faces.shape
+    # (B, T, 3, 3) -> (B, T, 3)
+    v0, v1, v2 = verts.reshape(B, T, V, 3).unbind(2)
+
+    normals = torch.cross(v1 - v0, v2 - v0, dim=-1)  # (B, T, 3)
+    face_areas = normals.norm(dim=-1) / 2
+
+    if (face_areas < eps).any().item():
+        msg = "Planes have zero areas"
+        raise ValueError(msg)
+
+    return
+
+
+class _box3d_overlap(Function):
+    """
+    Torch autograd Function wrapper for box3d_overlap C++/CUDA implementations.
+    Backward is not supported.
+    """
+
+    @staticmethod
+    def forward(ctx, boxes1, boxes2):
+        """
+        Arguments defintions the same as in the box3d_overlap function
+        """
+        vol, iou = _C.iou_box3d(boxes1, boxes2)
+        return vol, iou
+
+    @staticmethod
+    def backward(ctx, grad_vol, grad_iou):
+        raise ValueError("box3d_overlap backward is not supported")
+
+
+def box3d_overlap(
+    boxes1: torch.Tensor, boxes2: torch.Tensor, eps: float = 1e-4
+) -> Tuple[torch.Tensor, torch.Tensor]:
+    """
+    Computes the intersection of 3D boxes1 and boxes2.
+
+    Inputs boxes1, boxes2 are tensors of shape (B, 8, 3)
+    (where B doesn't have to be the same for boxes1 and boxes2),
+    containing the 8 corners of the boxes, as follows:
+
+        (4) +---------+. (5)
+            | ` .     |  ` .
+            | (0) +---+-----+ (1)
+            |     |   |     |
+        (7) +-----+---+. (6)|
+            ` .   |     ` . |
+            (3) ` +---------+ (2)
+
+
+    NOTE: Throughout this implementation, we assume that boxes
+    are defined by their 8 corners exactly in the order specified in the
+    diagram above for the function to give correct results. In addition
+    the vertices on each plane must be coplanar.
+    As an alternative to the diagram, this is a unit bounding
+    box which has the correct vertex ordering:
+
+    box_corner_vertices = [
+        [0, 0, 0],
+        [1, 0, 0],
+        [1, 1, 0],
+        [0, 1, 0],
+        [0, 0, 1],
+        [1, 0, 1],
+        [1, 1, 1],
+        [0, 1, 1],
+    ]
+
+    Args:
+        boxes1: tensor of shape (N, 8, 3) of the coordinates of the 1st boxes
+        boxes2: tensor of shape (M, 8, 3) of the coordinates of the 2nd boxes
+    Returns:
+        vol: (N, M) tensor of the volume of the intersecting convex shapes
+        iou: (N, M) tensor of the intersection over union which is
+            defined as: `iou = vol / (vol1 + vol2 - vol)`
+    """
+    if not all((8, 3) == box.shape[1:] for box in [boxes1, boxes2]):
+        raise ValueError("Each box in the batch must be of shape (8, 3)")
+
+    _check_coplanar(boxes1, eps)
+    _check_coplanar(boxes2, eps)
+    _check_nonzero(boxes1, eps)
+    _check_nonzero(boxes2, eps)
+
+    vol, iou = _box3d_overlap.apply(boxes1, boxes2)
+
+    return vol, iou
diff --git a/pytorch3d/pytorch3d/ops/knn.py b/pytorch3d/pytorch3d/ops/knn.py
new file mode 100644
index 0000000000000000000000000000000000000000..2b31c5cb1551b58209634cc87e80f0cbf51fc642
--- /dev/null
+++ b/pytorch3d/pytorch3d/ops/knn.py
@@ -0,0 +1,248 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from collections import namedtuple
+from typing import Union
+
+import torch
+from pytorch3d import _C
+from torch.autograd import Function
+from torch.autograd.function import once_differentiable
+
+
+_KNN = namedtuple("KNN", "dists idx knn")
+
+
+class _knn_points(Function):
+    """
+    Torch autograd Function wrapper for KNN C++/CUDA implementations.
+    """
+
+    @staticmethod
+    # pyre-fixme[14]: `forward` overrides method defined in `Function` inconsistently.
+    def forward(
+        ctx,
+        p1,
+        p2,
+        lengths1,
+        lengths2,
+        K,
+        version,
+        norm: int = 2,
+        return_sorted: bool = True,
+    ):
+        """
+        K-Nearest neighbors on point clouds.
+
+        Args:
+            p1: Tensor of shape (N, P1, D) giving a batch of N point clouds, each
+                containing up to P1 points of dimension D.
+            p2: Tensor of shape (N, P2, D) giving a batch of N point clouds, each
+                containing up to P2 points of dimension D.
+            lengths1: LongTensor of shape (N,) of values in the range [0, P1], giving the
+                length of each pointcloud in p1. Or None to indicate that every cloud has
+                length P1.
+            lengths2: LongTensor of shape (N,) of values in the range [0, P2], giving the
+                length of each pointcloud in p2. Or None to indicate that every cloud has
+                length P2.
+            K: Integer giving the number of nearest neighbors to return.
+            version: Which KNN implementation to use in the backend. If version=-1,
+                the correct implementation is selected based on the shapes of the inputs.
+            norm: (int) indicating the norm. Only supports 1 (for L1) and 2 (for L2).
+            return_sorted: (bool) whether to return the nearest neighbors sorted in
+                ascending order of distance.
+
+        Returns:
+            p1_dists: Tensor of shape (N, P1, K) giving the squared distances to
+                the nearest neighbors. This is padded with zeros both where a cloud in p2
+                has fewer than K points and where a cloud in p1 has fewer than P1 points.
+
+            p1_idx: LongTensor of shape (N, P1, K) giving the indices of the
+                K nearest neighbors from points in p1 to points in p2.
+                Concretely, if `p1_idx[n, i, k] = j` then `p2[n, j]` is the k-th nearest
+                neighbors to `p1[n, i]` in `p2[n]`. This is padded with zeros both where a cloud
+                in p2 has fewer than K points and where a cloud in p1 has fewer than P1 points.
+        """
+        if not ((norm == 1) or (norm == 2)):
+            raise ValueError("Support for 1 or 2 norm.")
+
+        idx, dists = _C.knn_points_idx(p1, p2, lengths1, lengths2, norm, K, version)
+
+        # sort KNN in ascending order if K > 1
+        if K > 1 and return_sorted:
+            if lengths2.min() < K:
+                P1 = p1.shape[1]
+                mask = lengths2[:, None] <= torch.arange(K, device=dists.device)[None]
+                # mask has shape [N, K], true where dists irrelevant
+                mask = mask[:, None].expand(-1, P1, -1)
+                # mask has shape [N, P1, K], true where dists irrelevant
+                dists[mask] = float("inf")
+                dists, sort_idx = dists.sort(dim=2)
+                dists[mask] = 0
+            else:
+                dists, sort_idx = dists.sort(dim=2)
+            idx = idx.gather(2, sort_idx)
+
+        ctx.save_for_backward(p1, p2, lengths1, lengths2, idx)
+        ctx.mark_non_differentiable(idx)
+        ctx.norm = norm
+        return dists, idx
+
+    @staticmethod
+    @once_differentiable
+    def backward(ctx, grad_dists, grad_idx):
+        p1, p2, lengths1, lengths2, idx = ctx.saved_tensors
+        norm = ctx.norm
+        # TODO(gkioxari) Change cast to floats once we add support for doubles.
+        if not (grad_dists.dtype == torch.float32):
+            grad_dists = grad_dists.float()
+        if not (p1.dtype == torch.float32):
+            p1 = p1.float()
+        if not (p2.dtype == torch.float32):
+            p2 = p2.float()
+        grad_p1, grad_p2 = _C.knn_points_backward(
+            p1, p2, lengths1, lengths2, idx, norm, grad_dists
+        )
+        return grad_p1, grad_p2, None, None, None, None, None, None
+
+
+def knn_points(
+    p1: torch.Tensor,
+    p2: torch.Tensor,
+    lengths1: Union[torch.Tensor, None] = None,
+    lengths2: Union[torch.Tensor, None] = None,
+    norm: int = 2,
+    K: int = 1,
+    version: int = -1,
+    return_nn: bool = False,
+    return_sorted: bool = True,
+) -> _KNN:
+    """
+    K-Nearest neighbors on point clouds.
+
+    Args:
+        p1: Tensor of shape (N, P1, D) giving a batch of N point clouds, each
+            containing up to P1 points of dimension D.
+        p2: Tensor of shape (N, P2, D) giving a batch of N point clouds, each
+            containing up to P2 points of dimension D.
+        lengths1: LongTensor of shape (N,) of values in the range [0, P1], giving the
+            length of each pointcloud in p1. Or None to indicate that every cloud has
+            length P1.
+        lengths2: LongTensor of shape (N,) of values in the range [0, P2], giving the
+            length of each pointcloud in p2. Or None to indicate that every cloud has
+            length P2.
+        norm: Integer indicating the norm of the distance. Supports only 1 for L1, 2 for L2.
+        K: Integer giving the number of nearest neighbors to return.
+        version: Which KNN implementation to use in the backend. If version=-1,
+            the correct implementation is selected based on the shapes of the inputs.
+        return_nn: If set to True returns the K nearest neighbors in p2 for each point in p1.
+        return_sorted: (bool) whether to return the nearest neighbors sorted in
+            ascending order of distance.
+
+    Returns:
+        dists: Tensor of shape (N, P1, K) giving the squared distances to
+            the nearest neighbors. This is padded with zeros both where a cloud in p2
+            has fewer than K points and where a cloud in p1 has fewer than P1 points.
+
+        idx: LongTensor of shape (N, P1, K) giving the indices of the
+            K nearest neighbors from points in p1 to points in p2.
+            Concretely, if `p1_idx[n, i, k] = j` then `p2[n, j]` is the k-th nearest
+            neighbors to `p1[n, i]` in `p2[n]`. This is padded with zeros both where a cloud
+            in p2 has fewer than K points and where a cloud in p1 has fewer than P1
+            points.
+
+        nn: Tensor of shape (N, P1, K, D) giving the K nearest neighbors in p2 for
+            each point in p1. Concretely, `p2_nn[n, i, k]` gives the k-th nearest neighbor
+            for `p1[n, i]`. Returned if `return_nn` is True.
+            The nearest neighbors are collected using `knn_gather`
+
+            .. code-block::
+
+                p2_nn = knn_gather(p2, p1_idx, lengths2)
+
+            which is a helper function that allows indexing any tensor of shape (N, P2, U) with
+            the indices `p1_idx` returned by `knn_points`. The output is a tensor
+            of shape (N, P1, K, U).
+
+    """
+    if p1.shape[0] != p2.shape[0]:
+        raise ValueError("pts1 and pts2 must have the same batch dimension.")
+    if p1.shape[2] != p2.shape[2]:
+        raise ValueError("pts1 and pts2 must have the same point dimension.")
+
+    p1 = p1.contiguous()
+    p2 = p2.contiguous()
+
+    P1 = p1.shape[1]
+    P2 = p2.shape[1]
+
+    if lengths1 is None:
+        lengths1 = torch.full((p1.shape[0],), P1, dtype=torch.int64, device=p1.device)
+    if lengths2 is None:
+        lengths2 = torch.full((p1.shape[0],), P2, dtype=torch.int64, device=p1.device)
+
+    p1_dists, p1_idx = _knn_points.apply(
+        p1, p2, lengths1, lengths2, K, version, norm, return_sorted
+    )
+
+    p2_nn = None
+    if return_nn:
+        p2_nn = knn_gather(p2, p1_idx, lengths2)
+
+    return _KNN(dists=p1_dists, idx=p1_idx, knn=p2_nn if return_nn else None)
+
+
+def knn_gather(
+    x: torch.Tensor, idx: torch.Tensor, lengths: Union[torch.Tensor, None] = None
+):
+    """
+    A helper function for knn that allows indexing a tensor x with the indices `idx`
+    returned by `knn_points`.
+
+    For example, if `dists, idx = knn_points(p, x, lengths_p, lengths, K)`
+    where p is a tensor of shape (N, L, D) and x a tensor of shape (N, M, D),
+    then one can compute the K nearest neighbors of p with `p_nn = knn_gather(x, idx, lengths)`.
+    It can also be applied for any tensor x of shape (N, M, U) where U != D.
+
+    Args:
+        x: Tensor of shape (N, M, U) containing U-dimensional features to
+            be gathered.
+        idx: LongTensor of shape (N, L, K) giving the indices returned by `knn_points`.
+        lengths: LongTensor of shape (N,) of values in the range [0, M], giving the
+            length of each example in the batch in x. Or None to indicate that every
+            example has length M.
+    Returns:
+        x_out: Tensor of shape (N, L, K, U) resulting from gathering the elements of x
+            with idx, s.t. `x_out[n, l, k] = x[n, idx[n, l, k]]`.
+            If `k > lengths[n]` then `x_out[n, l, k]` is filled with 0.0.
+    """
+    N, M, U = x.shape
+    _N, L, K = idx.shape
+
+    if N != _N:
+        raise ValueError("x and idx must have same batch dimension.")
+
+    if lengths is None:
+        lengths = torch.full((x.shape[0],), M, dtype=torch.int64, device=x.device)
+
+    idx_expanded = idx[:, :, :, None].expand(-1, -1, -1, U)
+    # idx_expanded has shape [N, L, K, U]
+
+    x_out = x[:, :, None].expand(-1, -1, K, -1).gather(1, idx_expanded)
+    # p2_nn has shape [N, L, K, U]
+
+    needs_mask = lengths.min() < K
+    if needs_mask:
+        # mask has shape [N, K], true where idx is irrelevant because
+        # there is less number of points in p2 than K
+        mask = lengths[:, None] <= torch.arange(K, device=x.device)[None]
+
+        # expand mask to shape [N, L, K, U]
+        mask = mask[:, None].expand(-1, L, -1)
+        mask = mask[:, :, :, None].expand(-1, -1, -1, U)
+        x_out[mask] = 0.0
+
+    return x_out
diff --git a/pytorch3d/pytorch3d/ops/laplacian_matrices.py b/pytorch3d/pytorch3d/ops/laplacian_matrices.py
new file mode 100644
index 0000000000000000000000000000000000000000..542fbebf52e909a399a38e3d5f3659186095094e
--- /dev/null
+++ b/pytorch3d/pytorch3d/ops/laplacian_matrices.py
@@ -0,0 +1,180 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Tuple
+
+import torch
+
+
+# ------------------------ Laplacian Matrices ------------------------ #
+# This file contains implementations of differentiable laplacian matrices.
+# These include
+# 1) Standard Laplacian matrix
+# 2) Cotangent Laplacian matrix
+# 3) Norm Laplacian matrix
+# -------------------------------------------------------------------- #
+
+
+def laplacian(verts: torch.Tensor, edges: torch.Tensor) -> torch.Tensor:
+    """
+    Computes the laplacian matrix.
+    The definition of the laplacian is
+    L[i, j] =    -1       , if i == j
+    L[i, j] = 1 / deg(i)  , if (i, j) is an edge
+    L[i, j] =    0        , otherwise
+    where deg(i) is the degree of the i-th vertex in the graph.
+
+    Args:
+        verts: tensor of shape (V, 3) containing the vertices of the graph
+        edges: tensor of shape (E, 2) containing the vertex indices of each edge
+    Returns:
+        L: Sparse FloatTensor of shape (V, V)
+    """
+    V = verts.shape[0]
+
+    e0, e1 = edges.unbind(1)
+
+    idx01 = torch.stack([e0, e1], dim=1)  # (E, 2)
+    idx10 = torch.stack([e1, e0], dim=1)  # (E, 2)
+    idx = torch.cat([idx01, idx10], dim=0).t()  # (2, 2*E)
+
+    # First, we construct the adjacency matrix,
+    # i.e. A[i, j] = 1 if (i,j) is an edge, or
+    # A[e0, e1] = 1 &  A[e1, e0] = 1
+    ones = torch.ones(idx.shape[1], dtype=torch.float32, device=verts.device)
+    # pyre-fixme[16]: Module `sparse` has no attribute `FloatTensor`.
+    A = torch.sparse.FloatTensor(idx, ones, (V, V))
+
+    # the sum of i-th row of A gives the degree of the i-th vertex
+    deg = torch.sparse.sum(A, dim=1).to_dense()
+
+    # We construct the Laplacian matrix by adding the non diagonal values
+    # i.e. L[i, j] = 1 ./ deg(i) if (i, j) is an edge
+    deg0 = deg[e0]
+    # pyre-fixme[58]: `/` is not supported for operand types `float` and `Tensor`.
+    deg0 = torch.where(deg0 > 0.0, 1.0 / deg0, deg0)
+    deg1 = deg[e1]
+    # pyre-fixme[58]: `/` is not supported for operand types `float` and `Tensor`.
+    deg1 = torch.where(deg1 > 0.0, 1.0 / deg1, deg1)
+    val = torch.cat([deg0, deg1])
+    # pyre-fixme[16]: Module `sparse` has no attribute `FloatTensor`.
+    L = torch.sparse.FloatTensor(idx, val, (V, V))
+
+    # Then we add the diagonal values L[i, i] = -1.
+    idx = torch.arange(V, device=verts.device)
+    idx = torch.stack([idx, idx], dim=0)
+    ones = torch.ones(idx.shape[1], dtype=torch.float32, device=verts.device)
+    # pyre-fixme[16]: Module `sparse` has no attribute `FloatTensor`.
+    L -= torch.sparse.FloatTensor(idx, ones, (V, V))
+
+    return L
+
+
+def cot_laplacian(
+    verts: torch.Tensor, faces: torch.Tensor, eps: float = 1e-12
+) -> Tuple[torch.Tensor, torch.Tensor]:
+    """
+    Returns the Laplacian matrix with cotangent weights and the inverse of the
+    face areas.
+
+    Args:
+        verts: tensor of shape (V, 3) containing the vertices of the graph
+        faces: tensor of shape (F, 3) containing the vertex indices of each face
+    Returns:
+        2-element tuple containing
+        - **L**: Sparse FloatTensor of shape (V,V) for the Laplacian matrix.
+           Here, L[i, j] = cot a_ij + cot b_ij iff (i, j) is an edge in meshes.
+           See the description above for more clarity.
+        - **inv_areas**: FloatTensor of shape (V,) containing the inverse of sum of
+           face areas containing each vertex
+    """
+    V, F = verts.shape[0], faces.shape[0]
+
+    face_verts = verts[faces]
+    v0, v1, v2 = face_verts[:, 0], face_verts[:, 1], face_verts[:, 2]
+
+    # Side lengths of each triangle, of shape (sum(F_n),)
+    # A is the side opposite v1, B is opposite v2, and C is opposite v3
+    A = (v1 - v2).norm(dim=1)
+    B = (v0 - v2).norm(dim=1)
+    C = (v0 - v1).norm(dim=1)
+
+    # Area of each triangle (with Heron's formula); shape is (sum(F_n),)
+    s = 0.5 * (A + B + C)
+    # note that the area can be negative (close to 0) causing nans after sqrt()
+    # we clip it to a small positive value
+    # pyre-fixme[16]: `float` has no attribute `clamp_`.
+    area = (s * (s - A) * (s - B) * (s - C)).clamp_(min=eps).sqrt()
+
+    # Compute cotangents of angles, of shape (sum(F_n), 3)
+    A2, B2, C2 = A * A, B * B, C * C
+    cota = (B2 + C2 - A2) / area
+    cotb = (A2 + C2 - B2) / area
+    cotc = (A2 + B2 - C2) / area
+    cot = torch.stack([cota, cotb, cotc], dim=1)
+    cot /= 4.0
+
+    # Construct a sparse matrix by basically doing:
+    # L[v1, v2] = cota
+    # L[v2, v0] = cotb
+    # L[v0, v1] = cotc
+    ii = faces[:, [1, 2, 0]]
+    jj = faces[:, [2, 0, 1]]
+    idx = torch.stack([ii, jj], dim=0).view(2, F * 3)
+    # pyre-fixme[16]: Module `sparse` has no attribute `FloatTensor`.
+    L = torch.sparse.FloatTensor(idx, cot.view(-1), (V, V))
+
+    # Make it symmetric; this means we are also setting
+    # L[v2, v1] = cota
+    # L[v0, v2] = cotb
+    # L[v1, v0] = cotc
+    L += L.t()
+
+    # For each vertex, compute the sum of areas for triangles containing it.
+    idx = faces.view(-1)
+    inv_areas = torch.zeros(V, dtype=torch.float32, device=verts.device)
+    val = torch.stack([area] * 3, dim=1).view(-1)
+    inv_areas.scatter_add_(0, idx, val)
+    idx = inv_areas > 0
+    # pyre-fixme[58]: `/` is not supported for operand types `float` and `Tensor`.
+    inv_areas[idx] = 1.0 / inv_areas[idx]
+    inv_areas = inv_areas.view(-1, 1)
+
+    return L, inv_areas
+
+
+def norm_laplacian(
+    verts: torch.Tensor, edges: torch.Tensor, eps: float = 1e-12
+) -> torch.Tensor:
+    """
+    Norm laplacian computes a variant of the laplacian matrix which weights each
+    affinity with the normalized distance of the neighboring nodes.
+    More concretely,
+    L[i, j] = 1. / wij where wij = ||vi - vj|| if (vi, vj) are neighboring nodes
+
+    Args:
+        verts: tensor of shape (V, 3) containing the vertices of the graph
+        edges: tensor of shape (E, 2) containing the vertex indices of each edge
+    Returns:
+        L: Sparse FloatTensor of shape (V, V)
+    """
+    edge_verts = verts[edges]  # (E, 2, 3)
+    v0, v1 = edge_verts[:, 0], edge_verts[:, 1]
+
+    # Side lengths of each edge, of shape (E,)
+    w01 = 1.0 / ((v0 - v1).norm(dim=1) + eps)
+
+    # Construct a sparse matrix by basically doing:
+    # L[v0, v1] = w01
+    # L[v1, v0] = w01
+    e01 = edges.t()  # (2, E)
+
+    V = verts.shape[0]
+    # pyre-fixme[16]: Module `sparse` has no attribute `FloatTensor`.
+    L = torch.sparse.FloatTensor(e01, w01, (V, V))
+    L = L + L.t()
+
+    return L
diff --git a/pytorch3d/pytorch3d/ops/marching_cubes.py b/pytorch3d/pytorch3d/ops/marching_cubes.py
new file mode 100644
index 0000000000000000000000000000000000000000..8ae3d16e98178f11faf86a7d2ee883daefdbf5a9
--- /dev/null
+++ b/pytorch3d/pytorch3d/ops/marching_cubes.py
@@ -0,0 +1,303 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import List, Optional, Tuple
+
+import torch
+from pytorch3d import _C
+from pytorch3d.ops.marching_cubes_data import EDGE_TO_VERTICES, FACE_TABLE, INDEX
+from pytorch3d.transforms import Translate
+from torch.autograd import Function
+
+
+EPS = 0.00001
+
+
+class Cube:
+    def __init__(
+        self,
+        bfl_v: Tuple[int, int, int],
+        volume: torch.Tensor,
+        isolevel: float,
+    ) -> None:
+        """
+        Initializes a cube given the bottom front left vertex coordinate
+        and computes the cube configuration given vertex values and isolevel.
+
+        Edge and vertex convention:
+
+                    v4_______e4____________v5
+                    /|                    /|
+                   / |                   / |
+                e7/  |                e5/  |
+                 /___|______e6_________/   |
+              v7|    |                 |v6 |e9
+                |    |                 |   |
+                |    |e8               |e10|
+             e11|    |                 |   |
+                |    |______e0_________|___|
+                |   / v0(bfl_v)        |   |v1
+                |  /                   |  /
+                | /e3                  | /e1
+                |/_____________________|/
+                v3         e2          v2
+
+        Args:
+            bfl_vertex: a tuple of size 3 corresponding to the bottom front left vertex
+                of the cube in (x, y, z) format
+            volume: the 3D scalar data
+            isolevel: the isosurface value used as a threshold for determining whether a point
+                is inside/outside the volume
+        """
+        x, y, z = bfl_v
+        self.x, self.y, self.z = x, y, z
+        self.bfl_v = bfl_v
+        self.verts = [
+            [x + (v & 1), y + (v >> 1 & 1), z + (v >> 2 & 1)] for v in range(8)
+        ]  # vertex position (x, y, z) for v0-v1-v4-v5-v3-v2-v7-v6
+
+        # Calculates cube configuration index given values of the cube vertices
+        self.cube_index = 0
+        for i in range(8):
+            v = self.verts[INDEX[i]]
+            value = volume[v[2]][v[1]][v[0]]
+            if value < isolevel:
+                self.cube_index |= 1 << i
+
+    def get_vpair_from_edge(self, edge: int, W: int, H: int) -> Tuple[int, int]:
+        """
+        Get a tuple of global vertex ID from a local edge ID
+        Global vertex ID is calculated as (x + dx) + (y + dy) * W + (z + dz) * W * H
+
+        Args:
+            edge: local edge ID in the cube
+            bfl_vertex: bottom-front-left coordinate of the cube
+
+        Returns:
+            a pair of global vertex ID
+        """
+        v1, v2 = EDGE_TO_VERTICES[edge]  # two end-points on the edge
+        v1_id = self.verts[v1][0] + self.verts[v1][1] * W + self.verts[v1][2] * W * H
+        v2_id = self.verts[v2][0] + self.verts[v2][1] * W + self.verts[v2][2] * W * H
+        return (v1_id, v2_id)
+
+    def vert_interp(
+        self,
+        isolevel: float,
+        edge: int,
+        vol: torch.Tensor,
+    ) -> List:
+        """
+        Linearly interpolate a vertex where an isosurface cuts an edge
+        between the two endpoint vertices, based on their values
+
+        Args:
+            isolevel: the isosurface value to use as the threshold to determine
+                whether points are within a volume.
+            edge: edge (ID) to interpolate
+            cube: current cube vertices
+            vol: 3D scalar field
+
+        Returns:
+            interpolated vertex: position of the interpolated vertex on the edge
+        """
+        v1, v2 = EDGE_TO_VERTICES[edge]
+        p1, p2 = self.verts[v1], self.verts[v2]
+        val1, val2 = (
+            vol[p1[2]][p1[1]][p1[0]],
+            vol[p2[2]][p2[1]][p2[0]],
+        )
+        point = None
+        if abs(isolevel - val1) < EPS:
+            point = p1
+        elif abs(isolevel - val2) < EPS:
+            point = p2
+        elif abs(val1 - val2) < EPS:
+            point = p1
+
+        if point is None:
+            mu = (isolevel - val1) / (val2 - val1)
+            x1, y1, z1 = p1
+            x2, y2, z2 = p2
+            x = x1 + mu * (x2 - x1)
+            y = y1 + mu * (y2 - y1)
+            z = z1 + mu * (z2 - z1)
+        else:
+            x, y, z = point
+        return [x, y, z]
+
+
+def marching_cubes_naive(
+    vol_batch: torch.Tensor,
+    isolevel: Optional[float] = None,
+    return_local_coords: bool = True,
+) -> Tuple[List[torch.Tensor], List[torch.Tensor]]:
+    """
+    Runs the classic marching cubes algorithm, iterating over
+    the coordinates of the volume and using a given isolevel
+    for determining intersected edges of cubes.
+    Returns vertices and faces of the obtained mesh.
+    This operation is non-differentiable.
+
+    Args:
+        vol_batch: a Tensor of size (N, D, H, W) corresponding to
+            a batch of 3D scalar fields
+        isolevel: the isosurface value to use as the threshold to determine
+            whether points are within a volume. If None, then the average of the
+            maximum and minimum value of the scalar field will be used.
+        return_local_coords: bool. If True the output vertices will be in local coordinates in
+        the range [-1, 1] x [-1, 1] x [-1, 1]. If False they will be in the range
+        [0, W-1] x [0, H-1] x [0, D-1]
+    Returns:
+        verts: [{V_0}, {V_1}, ...] List of N sets of vertices of shape (|V_i|, 3) in FloatTensor
+        faces: [{F_0}, {F_1}, ...] List of N sets of faces of shape (|F_i|, 3) in LongTensors
+    """
+    batched_verts, batched_faces = [], []
+    D, H, W = vol_batch.shape[1:]
+
+    # each edge is represented with its two endpoints (represented with global id)
+    for i in range(len(vol_batch)):
+        vol = vol_batch[i]
+        thresh = ((vol.max() + vol.min()) / 2).item() if isolevel is None else isolevel
+        vpair_to_edge = {}  # maps from tuple of edge endpoints to edge_id
+        edge_id_to_v = {}  # maps from edge ID to vertex position
+        uniq_edge_id = {}  # unique edge IDs
+        verts = []  # store vertex positions
+        faces = []  # store face indices
+        # enumerate each cell in the 3d grid
+        for z in range(0, D - 1):
+            for y in range(0, H - 1):
+                for x in range(0, W - 1):
+                    cube = Cube((x, y, z), vol, thresh)
+                    edge_indices = FACE_TABLE[cube.cube_index]
+                    # cube is entirely in/out of the surface
+                    if len(edge_indices) == 0:
+                        continue
+
+                    # gather mesh vertices/faces by processing each cube
+                    interp_points = [[0.0, 0.0, 0.0]] * 12
+                    # triangle vertex IDs and positions
+                    tri = []
+                    ps = []
+                    for i, edge in enumerate(edge_indices):
+                        interp_points[edge] = cube.vert_interp(thresh, edge, vol)
+
+                        # Bind interpolated vertex with a global edge_id, which
+                        # is represented by a pair of vertex ids (v1_id, v2_id)
+                        # corresponding to a local edge.
+                        (v1_id, v2_id) = cube.get_vpair_from_edge(edge, W, H)
+                        edge_id = vpair_to_edge.setdefault(
+                            (v1_id, v2_id), len(vpair_to_edge)
+                        )
+                        tri.append(edge_id)
+                        ps.append(interp_points[edge])
+                        # when the isolevel are the same as the edge endpoints, the interploated
+                        # vertices can share the same values, and lead to degenerate triangles.
+                        if (
+                            (i + 1) % 3 == 0
+                            and ps[0] != ps[1]
+                            and ps[1] != ps[2]
+                            and ps[2] != ps[0]
+                        ):
+                            for j, edge_id in enumerate(tri):
+                                edge_id_to_v[edge_id] = ps[j]
+                                if edge_id not in uniq_edge_id:
+                                    uniq_edge_id[edge_id] = len(verts)
+                                    verts.append(edge_id_to_v[edge_id])
+                            faces.append([uniq_edge_id[tri[j]] for j in range(3)])
+                            tri = []
+                            ps = []
+
+        if len(faces) > 0 and len(verts) > 0:
+            verts = torch.tensor(verts, dtype=vol.dtype)
+            # Convert from world coordinates ([0, D-1], [0, H-1], [0, W-1]) to
+            # local coordinates in the range [-1, 1]
+            if return_local_coords:
+                verts = (
+                    Translate(x=+1.0, y=+1.0, z=+1.0, device=vol_batch.device)
+                    .scale((vol_batch.new_tensor([W, H, D])[None] - 1) * 0.5)
+                    .inverse()
+                ).transform_points(verts[None])[0]
+            batched_verts.append(verts)
+            batched_faces.append(torch.tensor(faces, dtype=torch.int64))
+        else:
+            batched_verts.append([])
+            batched_faces.append([])
+    return batched_verts, batched_faces
+
+
+########################################
+# Marching Cubes Implementation in C++/Cuda
+########################################
+class _marching_cubes(Function):
+    """
+    Torch Function wrapper for marching_cubes implementation.
+    This function is not differentiable. An autograd wrapper is used
+    to ensure an error if user tries to get gradients.
+    """
+
+    @staticmethod
+    def forward(ctx, vol, isolevel):
+        verts, faces, ids = _C.marching_cubes(vol, isolevel)
+        return verts, faces, ids
+
+    @staticmethod
+    def backward(ctx, grad_verts, grad_faces):
+        raise ValueError("marching_cubes backward is not supported")
+
+
+def marching_cubes(
+    vol_batch: torch.Tensor,
+    isolevel: Optional[float] = None,
+    return_local_coords: bool = True,
+) -> Tuple[List[torch.Tensor], List[torch.Tensor]]:
+    """
+    Run marching cubes over a volume scalar field with a designated isolevel.
+    Returns vertices and faces of the obtained mesh.
+    This operation is non-differentiable.
+
+    Args:
+        vol_batch: a Tensor of size (N, D, H, W) corresponding to
+            a batch of 3D scalar fields
+        isolevel: float used as threshold to determine if a point is inside/outside
+            the volume.  If None, then the average of the maximum and minimum value
+            of the scalar field is used.
+        return_local_coords: bool. If True the output vertices will be in local coordinates in
+            the range [-1, 1] x [-1, 1] x [-1, 1]. If False they will be in the range
+            [0, W-1] x [0, H-1] x [0, D-1]
+
+    Returns:
+        verts: [{V_0}, {V_1}, ...] List of N sets of vertices of shape (|V_i|, 3) in FloatTensor
+        faces: [{F_0}, {F_1}, ...] List of N sets of faces of shape (|F_i|, 3) in LongTensors
+    """
+    batched_verts, batched_faces = [], []
+    D, H, W = vol_batch.shape[1:]
+    for i in range(len(vol_batch)):
+        vol = vol_batch[i]
+        thresh = ((vol.max() + vol.min()) / 2).item() if isolevel is None else isolevel
+        verts, faces, ids = _marching_cubes.apply(vol, thresh)
+        if len(faces) > 0 and len(verts) > 0:
+            # Convert from world coordinates ([0, D-1], [0, H-1], [0, W-1]) to
+            # local coordinates in the range [-1, 1]
+            if return_local_coords:
+                verts = (
+                    Translate(x=+1.0, y=+1.0, z=+1.0, device=vol.device)
+                    .scale((vol.new_tensor([W, H, D])[None] - 1) * 0.5)
+                    .inverse()
+                ).transform_points(verts[None])[0]
+            # deduplication for cuda
+            if vol.is_cuda:
+                unique_ids, inverse_idx = torch.unique(ids, return_inverse=True)
+                verts_ = verts.new_zeros(unique_ids.shape[0], 3)
+                verts_[inverse_idx] = verts
+                verts = verts_
+                faces = inverse_idx[faces]
+            batched_verts.append(verts)
+            batched_faces.append(faces)
+        else:
+            batched_verts.append([])
+            batched_faces.append([])
+    return batched_verts, batched_faces
diff --git a/pytorch3d/pytorch3d/ops/marching_cubes_data.py b/pytorch3d/pytorch3d/ops/marching_cubes_data.py
new file mode 100644
index 0000000000000000000000000000000000000000..802f67da265cca2c1081fdb6e6bd770efd3fdbec
--- /dev/null
+++ b/pytorch3d/pytorch3d/ops/marching_cubes_data.py
@@ -0,0 +1,289 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+# Maps each edge (by index) to the corresponding cube vertices
+EDGE_TO_VERTICES = [
+    [0, 1],
+    [1, 5],
+    [4, 5],
+    [0, 4],
+    [2, 3],
+    [3, 7],
+    [6, 7],
+    [2, 6],
+    [0, 2],
+    [1, 3],
+    [5, 7],
+    [4, 6],
+]
+
+# A list of lists mapping a cube_index (a given configuration)
+# to a list of faces corresponding to that configuration. Each face is represented
+# by 3 consecutive numbers. A configuration will at most have 5 faces.
+#
+# Table taken from http://paulbourke.net/geometry/polygonise/
+FACE_TABLE = [
+    [],
+    [0, 8, 3],
+    [0, 1, 9],
+    [1, 8, 3, 9, 8, 1],
+    [1, 2, 10],
+    [0, 8, 3, 1, 2, 10],
+    [9, 2, 10, 0, 2, 9],
+    [2, 8, 3, 2, 10, 8, 10, 9, 8],
+    [3, 11, 2],
+    [0, 11, 2, 8, 11, 0],
+    [1, 9, 0, 2, 3, 11],
+    [1, 11, 2, 1, 9, 11, 9, 8, 11],
+    [3, 10, 1, 11, 10, 3],
+    [0, 10, 1, 0, 8, 10, 8, 11, 10],
+    [3, 9, 0, 3, 11, 9, 11, 10, 9],
+    [9, 8, 10, 10, 8, 11],
+    [4, 7, 8],
+    [4, 3, 0, 7, 3, 4],
+    [0, 1, 9, 8, 4, 7],
+    [4, 1, 9, 4, 7, 1, 7, 3, 1],
+    [1, 2, 10, 8, 4, 7],
+    [3, 4, 7, 3, 0, 4, 1, 2, 10],
+    [9, 2, 10, 9, 0, 2, 8, 4, 7],
+    [2, 10, 9, 2, 9, 7, 2, 7, 3, 7, 9, 4],
+    [8, 4, 7, 3, 11, 2],
+    [11, 4, 7, 11, 2, 4, 2, 0, 4],
+    [9, 0, 1, 8, 4, 7, 2, 3, 11],
+    [4, 7, 11, 9, 4, 11, 9, 11, 2, 9, 2, 1],
+    [3, 10, 1, 3, 11, 10, 7, 8, 4],
+    [1, 11, 10, 1, 4, 11, 1, 0, 4, 7, 11, 4],
+    [4, 7, 8, 9, 0, 11, 9, 11, 10, 11, 0, 3],
+    [4, 7, 11, 4, 11, 9, 9, 11, 10],
+    [9, 5, 4],
+    [9, 5, 4, 0, 8, 3],
+    [0, 5, 4, 1, 5, 0],
+    [8, 5, 4, 8, 3, 5, 3, 1, 5],
+    [1, 2, 10, 9, 5, 4],
+    [3, 0, 8, 1, 2, 10, 4, 9, 5],
+    [5, 2, 10, 5, 4, 2, 4, 0, 2],
+    [2, 10, 5, 3, 2, 5, 3, 5, 4, 3, 4, 8],
+    [9, 5, 4, 2, 3, 11],
+    [0, 11, 2, 0, 8, 11, 4, 9, 5],
+    [0, 5, 4, 0, 1, 5, 2, 3, 11],
+    [2, 1, 5, 2, 5, 8, 2, 8, 11, 4, 8, 5],
+    [10, 3, 11, 10, 1, 3, 9, 5, 4],
+    [4, 9, 5, 0, 8, 1, 8, 10, 1, 8, 11, 10],
+    [5, 4, 0, 5, 0, 11, 5, 11, 10, 11, 0, 3],
+    [5, 4, 8, 5, 8, 10, 10, 8, 11],
+    [9, 7, 8, 5, 7, 9],
+    [9, 3, 0, 9, 5, 3, 5, 7, 3],
+    [0, 7, 8, 0, 1, 7, 1, 5, 7],
+    [1, 5, 3, 3, 5, 7],
+    [9, 7, 8, 9, 5, 7, 10, 1, 2],
+    [10, 1, 2, 9, 5, 0, 5, 3, 0, 5, 7, 3],
+    [8, 0, 2, 8, 2, 5, 8, 5, 7, 10, 5, 2],
+    [2, 10, 5, 2, 5, 3, 3, 5, 7],
+    [7, 9, 5, 7, 8, 9, 3, 11, 2],
+    [9, 5, 7, 9, 7, 2, 9, 2, 0, 2, 7, 11],
+    [2, 3, 11, 0, 1, 8, 1, 7, 8, 1, 5, 7],
+    [11, 2, 1, 11, 1, 7, 7, 1, 5],
+    [9, 5, 8, 8, 5, 7, 10, 1, 3, 10, 3, 11],
+    [5, 7, 0, 5, 0, 9, 7, 11, 0, 1, 0, 10, 11, 10, 0],
+    [11, 10, 0, 11, 0, 3, 10, 5, 0, 8, 0, 7, 5, 7, 0],
+    [11, 10, 5, 7, 11, 5],
+    [10, 6, 5],
+    [0, 8, 3, 5, 10, 6],
+    [9, 0, 1, 5, 10, 6],
+    [1, 8, 3, 1, 9, 8, 5, 10, 6],
+    [1, 6, 5, 2, 6, 1],
+    [1, 6, 5, 1, 2, 6, 3, 0, 8],
+    [9, 6, 5, 9, 0, 6, 0, 2, 6],
+    [5, 9, 8, 5, 8, 2, 5, 2, 6, 3, 2, 8],
+    [2, 3, 11, 10, 6, 5],
+    [11, 0, 8, 11, 2, 0, 10, 6, 5],
+    [0, 1, 9, 2, 3, 11, 5, 10, 6],
+    [5, 10, 6, 1, 9, 2, 9, 11, 2, 9, 8, 11],
+    [6, 3, 11, 6, 5, 3, 5, 1, 3],
+    [0, 8, 11, 0, 11, 5, 0, 5, 1, 5, 11, 6],
+    [3, 11, 6, 0, 3, 6, 0, 6, 5, 0, 5, 9],
+    [6, 5, 9, 6, 9, 11, 11, 9, 8],
+    [5, 10, 6, 4, 7, 8],
+    [4, 3, 0, 4, 7, 3, 6, 5, 10],
+    [1, 9, 0, 5, 10, 6, 8, 4, 7],
+    [10, 6, 5, 1, 9, 7, 1, 7, 3, 7, 9, 4],
+    [6, 1, 2, 6, 5, 1, 4, 7, 8],
+    [1, 2, 5, 5, 2, 6, 3, 0, 4, 3, 4, 7],
+    [8, 4, 7, 9, 0, 5, 0, 6, 5, 0, 2, 6],
+    [7, 3, 9, 7, 9, 4, 3, 2, 9, 5, 9, 6, 2, 6, 9],
+    [3, 11, 2, 7, 8, 4, 10, 6, 5],
+    [5, 10, 6, 4, 7, 2, 4, 2, 0, 2, 7, 11],
+    [0, 1, 9, 4, 7, 8, 2, 3, 11, 5, 10, 6],
+    [9, 2, 1, 9, 11, 2, 9, 4, 11, 7, 11, 4, 5, 10, 6],
+    [8, 4, 7, 3, 11, 5, 3, 5, 1, 5, 11, 6],
+    [5, 1, 11, 5, 11, 6, 1, 0, 11, 7, 11, 4, 0, 4, 11],
+    [0, 5, 9, 0, 6, 5, 0, 3, 6, 11, 6, 3, 8, 4, 7],
+    [6, 5, 9, 6, 9, 11, 4, 7, 9, 7, 11, 9],
+    [10, 4, 9, 6, 4, 10],
+    [4, 10, 6, 4, 9, 10, 0, 8, 3],
+    [10, 0, 1, 10, 6, 0, 6, 4, 0],
+    [8, 3, 1, 8, 1, 6, 8, 6, 4, 6, 1, 10],
+    [1, 4, 9, 1, 2, 4, 2, 6, 4],
+    [3, 0, 8, 1, 2, 9, 2, 4, 9, 2, 6, 4],
+    [0, 2, 4, 4, 2, 6],
+    [8, 3, 2, 8, 2, 4, 4, 2, 6],
+    [10, 4, 9, 10, 6, 4, 11, 2, 3],
+    [0, 8, 2, 2, 8, 11, 4, 9, 10, 4, 10, 6],
+    [3, 11, 2, 0, 1, 6, 0, 6, 4, 6, 1, 10],
+    [6, 4, 1, 6, 1, 10, 4, 8, 1, 2, 1, 11, 8, 11, 1],
+    [9, 6, 4, 9, 3, 6, 9, 1, 3, 11, 6, 3],
+    [8, 11, 1, 8, 1, 0, 11, 6, 1, 9, 1, 4, 6, 4, 1],
+    [3, 11, 6, 3, 6, 0, 0, 6, 4],
+    [6, 4, 8, 11, 6, 8],
+    [7, 10, 6, 7, 8, 10, 8, 9, 10],
+    [0, 7, 3, 0, 10, 7, 0, 9, 10, 6, 7, 10],
+    [10, 6, 7, 1, 10, 7, 1, 7, 8, 1, 8, 0],
+    [10, 6, 7, 10, 7, 1, 1, 7, 3],
+    [1, 2, 6, 1, 6, 8, 1, 8, 9, 8, 6, 7],
+    [2, 6, 9, 2, 9, 1, 6, 7, 9, 0, 9, 3, 7, 3, 9],
+    [7, 8, 0, 7, 0, 6, 6, 0, 2],
+    [7, 3, 2, 6, 7, 2],
+    [2, 3, 11, 10, 6, 8, 10, 8, 9, 8, 6, 7],
+    [2, 0, 7, 2, 7, 11, 0, 9, 7, 6, 7, 10, 9, 10, 7],
+    [1, 8, 0, 1, 7, 8, 1, 10, 7, 6, 7, 10, 2, 3, 11],
+    [11, 2, 1, 11, 1, 7, 10, 6, 1, 6, 7, 1],
+    [8, 9, 6, 8, 6, 7, 9, 1, 6, 11, 6, 3, 1, 3, 6],
+    [0, 9, 1, 11, 6, 7],
+    [7, 8, 0, 7, 0, 6, 3, 11, 0, 11, 6, 0],
+    [7, 11, 6],
+    [7, 6, 11],
+    [3, 0, 8, 11, 7, 6],
+    [0, 1, 9, 11, 7, 6],
+    [8, 1, 9, 8, 3, 1, 11, 7, 6],
+    [10, 1, 2, 6, 11, 7],
+    [1, 2, 10, 3, 0, 8, 6, 11, 7],
+    [2, 9, 0, 2, 10, 9, 6, 11, 7],
+    [6, 11, 7, 2, 10, 3, 10, 8, 3, 10, 9, 8],
+    [7, 2, 3, 6, 2, 7],
+    [7, 0, 8, 7, 6, 0, 6, 2, 0],
+    [2, 7, 6, 2, 3, 7, 0, 1, 9],
+    [1, 6, 2, 1, 8, 6, 1, 9, 8, 8, 7, 6],
+    [10, 7, 6, 10, 1, 7, 1, 3, 7],
+    [10, 7, 6, 1, 7, 10, 1, 8, 7, 1, 0, 8],
+    [0, 3, 7, 0, 7, 10, 0, 10, 9, 6, 10, 7],
+    [7, 6, 10, 7, 10, 8, 8, 10, 9],
+    [6, 8, 4, 11, 8, 6],
+    [3, 6, 11, 3, 0, 6, 0, 4, 6],
+    [8, 6, 11, 8, 4, 6, 9, 0, 1],
+    [9, 4, 6, 9, 6, 3, 9, 3, 1, 11, 3, 6],
+    [6, 8, 4, 6, 11, 8, 2, 10, 1],
+    [1, 2, 10, 3, 0, 11, 0, 6, 11, 0, 4, 6],
+    [4, 11, 8, 4, 6, 11, 0, 2, 9, 2, 10, 9],
+    [10, 9, 3, 10, 3, 2, 9, 4, 3, 11, 3, 6, 4, 6, 3],
+    [8, 2, 3, 8, 4, 2, 4, 6, 2],
+    [0, 4, 2, 4, 6, 2],
+    [1, 9, 0, 2, 3, 4, 2, 4, 6, 4, 3, 8],
+    [1, 9, 4, 1, 4, 2, 2, 4, 6],
+    [8, 1, 3, 8, 6, 1, 8, 4, 6, 6, 10, 1],
+    [10, 1, 0, 10, 0, 6, 6, 0, 4],
+    [4, 6, 3, 4, 3, 8, 6, 10, 3, 0, 3, 9, 10, 9, 3],
+    [10, 9, 4, 6, 10, 4],
+    [4, 9, 5, 7, 6, 11],
+    [0, 8, 3, 4, 9, 5, 11, 7, 6],
+    [5, 0, 1, 5, 4, 0, 7, 6, 11],
+    [11, 7, 6, 8, 3, 4, 3, 5, 4, 3, 1, 5],
+    [9, 5, 4, 10, 1, 2, 7, 6, 11],
+    [6, 11, 7, 1, 2, 10, 0, 8, 3, 4, 9, 5],
+    [7, 6, 11, 5, 4, 10, 4, 2, 10, 4, 0, 2],
+    [3, 4, 8, 3, 5, 4, 3, 2, 5, 10, 5, 2, 11, 7, 6],
+    [7, 2, 3, 7, 6, 2, 5, 4, 9],
+    [9, 5, 4, 0, 8, 6, 0, 6, 2, 6, 8, 7],
+    [3, 6, 2, 3, 7, 6, 1, 5, 0, 5, 4, 0],
+    [6, 2, 8, 6, 8, 7, 2, 1, 8, 4, 8, 5, 1, 5, 8],
+    [9, 5, 4, 10, 1, 6, 1, 7, 6, 1, 3, 7],
+    [1, 6, 10, 1, 7, 6, 1, 0, 7, 8, 7, 0, 9, 5, 4],
+    [4, 0, 10, 4, 10, 5, 0, 3, 10, 6, 10, 7, 3, 7, 10],
+    [7, 6, 10, 7, 10, 8, 5, 4, 10, 4, 8, 10],
+    [6, 9, 5, 6, 11, 9, 11, 8, 9],
+    [3, 6, 11, 0, 6, 3, 0, 5, 6, 0, 9, 5],
+    [0, 11, 8, 0, 5, 11, 0, 1, 5, 5, 6, 11],
+    [6, 11, 3, 6, 3, 5, 5, 3, 1],
+    [1, 2, 10, 9, 5, 11, 9, 11, 8, 11, 5, 6],
+    [0, 11, 3, 0, 6, 11, 0, 9, 6, 5, 6, 9, 1, 2, 10],
+    [11, 8, 5, 11, 5, 6, 8, 0, 5, 10, 5, 2, 0, 2, 5],
+    [6, 11, 3, 6, 3, 5, 2, 10, 3, 10, 5, 3],
+    [5, 8, 9, 5, 2, 8, 5, 6, 2, 3, 8, 2],
+    [9, 5, 6, 9, 6, 0, 0, 6, 2],
+    [1, 5, 8, 1, 8, 0, 5, 6, 8, 3, 8, 2, 6, 2, 8],
+    [1, 5, 6, 2, 1, 6],
+    [1, 3, 6, 1, 6, 10, 3, 8, 6, 5, 6, 9, 8, 9, 6],
+    [10, 1, 0, 10, 0, 6, 9, 5, 0, 5, 6, 0],
+    [0, 3, 8, 5, 6, 10],
+    [10, 5, 6],
+    [11, 5, 10, 7, 5, 11],
+    [11, 5, 10, 11, 7, 5, 8, 3, 0],
+    [5, 11, 7, 5, 10, 11, 1, 9, 0],
+    [10, 7, 5, 10, 11, 7, 9, 8, 1, 8, 3, 1],
+    [11, 1, 2, 11, 7, 1, 7, 5, 1],
+    [0, 8, 3, 1, 2, 7, 1, 7, 5, 7, 2, 11],
+    [9, 7, 5, 9, 2, 7, 9, 0, 2, 2, 11, 7],
+    [7, 5, 2, 7, 2, 11, 5, 9, 2, 3, 2, 8, 9, 8, 2],
+    [2, 5, 10, 2, 3, 5, 3, 7, 5],
+    [8, 2, 0, 8, 5, 2, 8, 7, 5, 10, 2, 5],
+    [9, 0, 1, 5, 10, 3, 5, 3, 7, 3, 10, 2],
+    [9, 8, 2, 9, 2, 1, 8, 7, 2, 10, 2, 5, 7, 5, 2],
+    [1, 3, 5, 3, 7, 5],
+    [0, 8, 7, 0, 7, 1, 1, 7, 5],
+    [9, 0, 3, 9, 3, 5, 5, 3, 7],
+    [9, 8, 7, 5, 9, 7],
+    [5, 8, 4, 5, 10, 8, 10, 11, 8],
+    [5, 0, 4, 5, 11, 0, 5, 10, 11, 11, 3, 0],
+    [0, 1, 9, 8, 4, 10, 8, 10, 11, 10, 4, 5],
+    [10, 11, 4, 10, 4, 5, 11, 3, 4, 9, 4, 1, 3, 1, 4],
+    [2, 5, 1, 2, 8, 5, 2, 11, 8, 4, 5, 8],
+    [0, 4, 11, 0, 11, 3, 4, 5, 11, 2, 11, 1, 5, 1, 11],
+    [0, 2, 5, 0, 5, 9, 2, 11, 5, 4, 5, 8, 11, 8, 5],
+    [9, 4, 5, 2, 11, 3],
+    [2, 5, 10, 3, 5, 2, 3, 4, 5, 3, 8, 4],
+    [5, 10, 2, 5, 2, 4, 4, 2, 0],
+    [3, 10, 2, 3, 5, 10, 3, 8, 5, 4, 5, 8, 0, 1, 9],
+    [5, 10, 2, 5, 2, 4, 1, 9, 2, 9, 4, 2],
+    [8, 4, 5, 8, 5, 3, 3, 5, 1],
+    [0, 4, 5, 1, 0, 5],
+    [8, 4, 5, 8, 5, 3, 9, 0, 5, 0, 3, 5],
+    [9, 4, 5],
+    [4, 11, 7, 4, 9, 11, 9, 10, 11],
+    [0, 8, 3, 4, 9, 7, 9, 11, 7, 9, 10, 11],
+    [1, 10, 11, 1, 11, 4, 1, 4, 0, 7, 4, 11],
+    [3, 1, 4, 3, 4, 8, 1, 10, 4, 7, 4, 11, 10, 11, 4],
+    [4, 11, 7, 9, 11, 4, 9, 2, 11, 9, 1, 2],
+    [9, 7, 4, 9, 11, 7, 9, 1, 11, 2, 11, 1, 0, 8, 3],
+    [11, 7, 4, 11, 4, 2, 2, 4, 0],
+    [11, 7, 4, 11, 4, 2, 8, 3, 4, 3, 2, 4],
+    [2, 9, 10, 2, 7, 9, 2, 3, 7, 7, 4, 9],
+    [9, 10, 7, 9, 7, 4, 10, 2, 7, 8, 7, 0, 2, 0, 7],
+    [3, 7, 10, 3, 10, 2, 7, 4, 10, 1, 10, 0, 4, 0, 10],
+    [1, 10, 2, 8, 7, 4],
+    [4, 9, 1, 4, 1, 7, 7, 1, 3],
+    [4, 9, 1, 4, 1, 7, 0, 8, 1, 8, 7, 1],
+    [4, 0, 3, 7, 4, 3],
+    [4, 8, 7],
+    [9, 10, 8, 10, 11, 8],
+    [3, 0, 9, 3, 9, 11, 11, 9, 10],
+    [0, 1, 10, 0, 10, 8, 8, 10, 11],
+    [3, 1, 10, 11, 3, 10],
+    [1, 2, 11, 1, 11, 9, 9, 11, 8],
+    [3, 0, 9, 3, 9, 11, 1, 2, 9, 2, 11, 9],
+    [0, 2, 11, 8, 0, 11],
+    [3, 2, 11],
+    [2, 3, 8, 2, 8, 10, 10, 8, 9],
+    [9, 10, 2, 0, 9, 2],
+    [2, 3, 8, 2, 8, 10, 0, 1, 8, 1, 10, 8],
+    [1, 10, 2],
+    [1, 3, 8, 9, 1, 8],
+    [0, 9, 1],
+    [0, 3, 8],
+    [],
+]
+
+# mapping from 0-7 to v0-v7 in cube.vertices
+INDEX = [0, 1, 5, 4, 2, 3, 7, 6]
diff --git a/pytorch3d/pytorch3d/ops/mesh_face_areas_normals.py b/pytorch3d/pytorch3d/ops/mesh_face_areas_normals.py
new file mode 100644
index 0000000000000000000000000000000000000000..f41ff1dcbe1370c59009f0c1426ab18ad50d2b12
--- /dev/null
+++ b/pytorch3d/pytorch3d/ops/mesh_face_areas_normals.py
@@ -0,0 +1,66 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+from pytorch3d import _C
+from torch.autograd import Function
+from torch.autograd.function import once_differentiable
+
+
+class _MeshFaceAreasNormals(Function):
+    """
+    Torch autograd Function wrapper for face areas & normals C++/CUDA implementations.
+    """
+
+    @staticmethod
+    def forward(ctx, verts, faces):
+        """
+        Args:
+            ctx: Context object used to calculate gradients.
+            verts: FloatTensor of shape (V, 3), representing the packed
+                batch verts tensor.
+            faces: LongTensor of shape (F, 3), representing the packed
+                batch faces tensor
+        Returns:
+            areas: FloatTensor of shape (F,) with the areas of each face
+            normals: FloatTensor of shape (F,3) with the normals of each face
+        """
+        if not (verts.dim() == 2):
+            raise ValueError("verts need to be of shape Vx3.")
+        if not (verts.shape[1] == 3):
+            raise ValueError("verts need to be of shape Vx3.")
+        if not (faces.dim() == 2):
+            raise ValueError("faces need to be of shape Fx3.")
+        if not (faces.shape[1] == 3):
+            raise ValueError("faces need to be of shape Fx3.")
+        if not (faces.dtype == torch.int64):
+            raise ValueError("faces need to be of type torch.int64.")
+        # TODO(gkioxari) Change cast to floats once we add support for doubles.
+        if not (verts.dtype == torch.float32):
+            verts = verts.float()
+
+        ctx.save_for_backward(verts, faces)
+        areas, normals = _C.face_areas_normals_forward(verts, faces)
+        return areas, normals
+
+    @staticmethod
+    @once_differentiable
+    def backward(ctx, grad_areas, grad_normals):
+        grad_areas = grad_areas.contiguous()
+        grad_normals = grad_normals.contiguous()
+        verts, faces = ctx.saved_tensors
+        # TODO(gkioxari) Change cast to floats once we add support for doubles.
+        if not (grad_areas.dtype == torch.float32):
+            grad_areas = grad_areas.float()
+        if not (grad_normals.dtype == torch.float32):
+            grad_normals = grad_normals.float()
+        grad_verts = _C.face_areas_normals_backward(
+            grad_areas, grad_normals, verts, faces
+        )
+        return grad_verts, None
+
+
+mesh_face_areas_normals = _MeshFaceAreasNormals.apply
diff --git a/pytorch3d/pytorch3d/ops/mesh_filtering.py b/pytorch3d/pytorch3d/ops/mesh_filtering.py
new file mode 100644
index 0000000000000000000000000000000000000000..90cf1211c52d06776d6ad2e8804645e363da57e9
--- /dev/null
+++ b/pytorch3d/pytorch3d/ops/mesh_filtering.py
@@ -0,0 +1,60 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+from pytorch3d.ops import norm_laplacian
+from pytorch3d.structures import Meshes, utils as struct_utils
+
+
+# ------------------------ Mesh Smoothing ------------------------ #
+# This file contains differentiable operators to filter meshes
+# The ops include
+# 1) Taubin Smoothing
+# TODO(gkioxari) add more! :)
+# ---------------------------------------------------------------- #
+
+
+# ----------------------- Taubin Smoothing ----------------------- #
+
+
+def taubin_smoothing(
+    meshes: Meshes, lambd: float = 0.53, mu: float = -0.53, num_iter: int = 10
+) -> Meshes:
+    """
+    Taubin smoothing [1] is an iterative smoothing operator for meshes.
+    At each iteration
+        verts := (1 - λ) * verts + λ * L * verts
+        verts := (1 - μ) * verts + μ * L * verts
+
+    This function returns a new mesh with smoothed vertices.
+    Args:
+        meshes: Meshes input to be smoothed
+        lambd, mu: float parameters for Taubin smoothing,
+            lambd > 0, mu < 0
+        num_iter: number of iterations to execute smoothing
+    Returns:
+        mesh: Smoothed input Meshes
+
+    [1] Curve and Surface Smoothing without Shrinkage,
+        Gabriel Taubin, ICCV 1997
+    """
+    verts = meshes.verts_packed()  # V x 3
+    edges = meshes.edges_packed()  # E x 3
+
+    for _ in range(num_iter):
+        L = norm_laplacian(verts, edges)
+        total_weight = torch.sparse.sum(L, dim=1).to_dense().view(-1, 1)
+        verts = (1 - lambd) * verts + lambd * torch.mm(L, verts) / total_weight
+
+        L = norm_laplacian(verts, edges)
+        total_weight = torch.sparse.sum(L, dim=1).to_dense().view(-1, 1)
+        verts = (1 - mu) * verts + mu * torch.mm(L, verts) / total_weight
+
+    verts_list = struct_utils.packed_to_list(
+        verts, meshes.num_verts_per_mesh().tolist()
+    )
+    mesh = Meshes(verts=list(verts_list), faces=meshes.faces_list())
+    return mesh
diff --git a/pytorch3d/pytorch3d/ops/packed_to_padded.py b/pytorch3d/pytorch3d/ops/packed_to_padded.py
new file mode 100644
index 0000000000000000000000000000000000000000..5072e6245ea01003c1e30a218b7f56097dc0dbca
--- /dev/null
+++ b/pytorch3d/pytorch3d/ops/packed_to_padded.py
@@ -0,0 +1,196 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+from pytorch3d import _C
+from torch.autograd import Function
+from torch.autograd.function import once_differentiable
+
+
+class _PackedToPadded(Function):
+    """
+    Torch autograd Function wrapper for packed_to_padded C++/CUDA implementations.
+    """
+
+    @staticmethod
+    def forward(ctx, inputs, first_idxs, max_size):
+        """
+        Args:
+            ctx: Context object used to calculate gradients.
+            inputs: FloatTensor of shape (F, D), representing the packed batch tensor.
+                e.g. areas for faces in a batch of meshes.
+            first_idxs: LongTensor of shape (N,) where N is the number of
+                elements in the batch and `first_idxs[i] = f`
+                means that the inputs for batch element i begin at `inputs[f]`.
+            max_size: Max length of an element in the batch.
+
+        Returns:
+            inputs_padded: FloatTensor of shape (N, max_size, D) where max_size is max
+                of `sizes`. The values for batch element i which start at
+                `inputs[first_idxs[i]]` will be copied to `inputs_padded[i, :]`,
+                with zeros padding out the extra inputs.
+        """
+        if not (inputs.dim() == 2):
+            raise ValueError("input can only be 2-dimensional.")
+        if not (first_idxs.dim() == 1):
+            raise ValueError("first_idxs can only be 1-dimensional.")
+        if not (inputs.dtype == torch.float32):
+            raise ValueError("input has to be of type torch.float32.")
+        if not (first_idxs.dtype == torch.int64):
+            raise ValueError("first_idxs has to be of type torch.int64.")
+        if not isinstance(max_size, int):
+            raise ValueError("max_size has to be int.")
+
+        ctx.save_for_backward(first_idxs)
+        ctx.num_inputs = int(inputs.shape[0])
+        inputs, first_idxs = inputs.contiguous(), first_idxs.contiguous()
+        inputs_padded = _C.packed_to_padded(inputs, first_idxs, max_size)
+        return inputs_padded
+
+    @staticmethod
+    @once_differentiable
+    def backward(ctx, grad_output):
+        grad_output = grad_output.contiguous()
+        first_idxs = ctx.saved_tensors[0]
+        num_inputs = ctx.num_inputs
+        grad_input = _C.padded_to_packed(grad_output, first_idxs, num_inputs)
+        return grad_input, None, None
+
+
+def packed_to_padded(
+    inputs: torch.Tensor, first_idxs: torch.LongTensor, max_size: int
+) -> torch.Tensor:
+    """
+    Torch wrapper that handles allowed input shapes. See description below.
+
+    Args:
+        inputs: FloatTensor of shape (F,) or (F, ...), representing the packed
+            batch tensor, e.g. areas for faces in a batch of meshes.
+        first_idxs: LongTensor of shape (N,) where N is the number of
+            elements in the batch and `first_idxs[i] = f`
+            means that the inputs for batch element i begin at `inputs[f]`.
+        max_size: Max length of an element in the batch.
+
+    Returns:
+        inputs_padded: FloatTensor of shape (N, max_size) or (N, max_size, ...)
+            where max_size is max of `sizes`. The values for batch element i
+            which start at `inputs[first_idxs[i]]` will be copied to
+            `inputs_padded[i, :]`, with zeros padding out the extra inputs.
+
+    To handle the allowed input shapes, we convert the inputs tensor of shape
+    (F,) to (F, 1). We reshape the output back to (N, max_size) from
+    (N, max_size, 1).
+    """
+    # if inputs is of shape (F,), reshape into (F, 1)
+    input_shape = inputs.shape
+    n_dims = inputs.dim()
+    if n_dims == 1:
+        inputs = inputs.unsqueeze(1)
+    else:
+        inputs = inputs.reshape(input_shape[0], -1)
+    inputs_padded = _PackedToPadded.apply(inputs, first_idxs, max_size)
+    # if flat is True, reshape output to (N, max_size) from (N, max_size, 1)
+    # else reshape output to (N, max_size, ...)
+    if n_dims == 1:
+        return inputs_padded.squeeze(2)
+    if n_dims == 2:
+        return inputs_padded
+    return inputs_padded.view(*inputs_padded.shape[:2], *input_shape[1:])
+
+
+class _PaddedToPacked(Function):
+    """
+    Torch autograd Function wrapper for padded_to_packed C++/CUDA implementations.
+    """
+
+    @staticmethod
+    def forward(ctx, inputs, first_idxs, num_inputs):
+        """
+        Args:
+            ctx: Context object used to calculate gradients.
+            inputs: FloatTensor of shape (N, max_size, D), representing
+            the padded tensor, e.g. areas for faces in a batch of meshes.
+            first_idxs: LongTensor of shape (N,) where N is the number of
+                elements in the batch and `first_idxs[i] = f`
+                means that the inputs for batch element i begin at `inputs_packed[f]`.
+            num_inputs: Number of packed entries (= F)
+
+        Returns:
+            inputs_packed: FloatTensor of shape (F, D) where
+                `inputs_packed[first_idx[i]:] = inputs[i, :]`.
+        """
+        if not (inputs.dim() == 3):
+            raise ValueError("input can only be 3-dimensional.")
+        if not (first_idxs.dim() == 1):
+            raise ValueError("first_idxs can only be 1-dimensional.")
+        if not (inputs.dtype == torch.float32):
+            raise ValueError("input has to be of type torch.float32.")
+        if not (first_idxs.dtype == torch.int64):
+            raise ValueError("first_idxs has to be of type torch.int64.")
+        if not isinstance(num_inputs, int):
+            raise ValueError("max_size has to be int.")
+
+        ctx.save_for_backward(first_idxs)
+        ctx.max_size = inputs.shape[1]
+        inputs, first_idxs = inputs.contiguous(), first_idxs.contiguous()
+        inputs_packed = _C.padded_to_packed(inputs, first_idxs, num_inputs)
+        return inputs_packed
+
+    @staticmethod
+    @once_differentiable
+    def backward(ctx, grad_output):
+        grad_output = grad_output.contiguous()
+        first_idxs = ctx.saved_tensors[0]
+        max_size = ctx.max_size
+        grad_input = _C.packed_to_padded(grad_output, first_idxs, max_size)
+        return grad_input, None, None
+
+
+def padded_to_packed(
+    inputs: torch.Tensor,
+    first_idxs: torch.LongTensor,
+    num_inputs: int,
+    max_size_dim: int = 1,
+) -> torch.Tensor:
+    """
+    Torch wrapper that handles allowed input shapes. See description below.
+
+    Args:
+        inputs: FloatTensor of shape (N, ..., max_size) or (N, ..., max_size, ...),
+            representing the padded tensor, e.g. areas for faces in a batch of
+            meshes, where max_size occurs on max_size_dim-th position.
+        first_idxs: LongTensor of shape (N,) where N is the number of
+            elements in the batch and `first_idxs[i] = f`
+            means that the inputs for batch element i begin at `inputs_packed[f]`.
+        num_inputs: Number of packed entries (= F)
+        max_size_dim: the dimension to be packed
+
+    Returns:
+        inputs_packed: FloatTensor of shape (F,) or (F, ...) where
+            `inputs_packed[first_idx[i]:first_idx[i+1]] = inputs[i, ..., :delta[i]]`,
+            where `delta[i] = first_idx[i+1] - first_idx[i]`.
+
+    To handle the allowed input shapes, we convert the inputs tensor of shape
+    (N, max_size) to (N, max_size, 1). We reshape the output back to (F,) from
+    (F, 1).
+    """
+    n_dims = inputs.dim()
+    # move the variable dim to position 1
+    inputs = inputs.movedim(max_size_dim, 1)
+
+    # if inputs is of shape (N, max_size), reshape into (N, max_size, 1))
+    input_shape = inputs.shape
+    if n_dims == 2:
+        inputs = inputs.unsqueeze(2)
+    else:
+        inputs = inputs.reshape(*input_shape[:2], -1)
+    inputs_packed = _PaddedToPacked.apply(inputs, first_idxs, num_inputs)
+    # if input is flat, reshape output to (F,) from (F, 1)
+    # else reshape output to (F, ...)
+    if n_dims == 2:
+        return inputs_packed.squeeze(1)
+
+    return inputs_packed.view(-1, *input_shape[2:])
diff --git a/pytorch3d/pytorch3d/ops/perspective_n_points.py b/pytorch3d/pytorch3d/ops/perspective_n_points.py
new file mode 100644
index 0000000000000000000000000000000000000000..c6b7d6816660b190e87cd16459abae4501f54558
--- /dev/null
+++ b/pytorch3d/pytorch3d/ops/perspective_n_points.py
@@ -0,0 +1,410 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""
+This file contains Efficient PnP algorithm for Perspective-n-Points problem.
+It finds a camera position (defined by rotation `R` and translation `T`) that
+minimizes re-projection error between the given 3D points `x` and
+the corresponding uncalibrated 2D points `y`.
+"""
+
+import warnings
+from typing import NamedTuple, Optional
+
+import torch
+import torch.nn.functional as F
+from pytorch3d.ops import points_alignment, utils as oputil
+
+
+class EpnpSolution(NamedTuple):
+    x_cam: torch.Tensor
+    R: torch.Tensor
+    T: torch.Tensor
+    err_2d: torch.Tensor
+    err_3d: torch.Tensor
+
+
+def _define_control_points(x, weight, storage_opts=None):
+    """
+    Returns control points that define barycentric coordinates
+    Args:
+        x: Batch of 3-dimensional points of shape `(minibatch, num_points, 3)`.
+        weight: Batch of non-negative weights of
+            shape `(minibatch, num_point)`. `None` means equal weights.
+        storage_opts: dict of keyword arguments to the tensor constructor.
+    """
+    storage_opts = storage_opts or {}
+    x_mean = oputil.wmean(x, weight)
+    c_world = F.pad(torch.eye(3, **storage_opts), (0, 0, 0, 1), value=0.0).expand_as(
+        x[:, :4, :]
+    )
+    return c_world + x_mean
+
+
+def _compute_alphas(x, c_world):
+    """
+    Computes barycentric coordinates of x in the frame c_world.
+    Args:
+        x: Batch of 3-dimensional points of shape `(minibatch, num_points, 3)`.
+        c_world: control points in world coordinates.
+    """
+    x = F.pad(x, (0, 1), value=1.0)
+    c = F.pad(c_world, (0, 1), value=1.0)
+    return torch.matmul(x, torch.inverse(c))  # B x N x 4
+
+
+def _build_M(y, alphas, weight):
+    """Returns the matrix defining the reprojection equations.
+    Args:
+        y: projected points in camera coordinates of size B x N x 2
+        alphas: barycentric coordinates of size B x N x 4
+        weight: Batch of non-negative weights of
+            shape `(minibatch, num_point)`. `None` means equal weights.
+    """
+    bs, n, _ = y.size()
+
+    # prepend t with the column of v's
+    def prepad(t, v):
+        return F.pad(t, (1, 0), value=v)
+
+    if weight is not None:
+        # weight the alphas in order to get a correctly weighted version of M
+        alphas = alphas * weight[:, :, None]
+
+    # outer left-multiply by alphas
+    def lm_alphas(t):
+        return torch.matmul(alphas[..., None], t).reshape(bs, n, 12)
+
+    M = torch.cat(
+        (
+            lm_alphas(
+                prepad(prepad(-y[:, :, 0, None, None], 0.0), 1.0)
+            ),  # u constraints
+            lm_alphas(
+                prepad(prepad(-y[:, :, 1, None, None], 1.0), 0.0)
+            ),  # v constraints
+        ),
+        dim=-1,
+    ).reshape(bs, -1, 12)
+
+    return M
+
+
+def _null_space(m, kernel_dim):
+    """Finds the null space (kernel) basis of the matrix
+    Args:
+        m: the batch of input matrices, B x N x 12
+        kernel_dim: number of dimensions to approximate the kernel
+    Returns:
+        * a batch of null space basis vectors
+            of size B x 4 x 3 x kernel_dim
+        * a batch of spectral values where near-0s correspond to actual
+            kernel vectors, of size B x kernel_dim
+    """
+    mTm = torch.bmm(m.transpose(1, 2), m)
+    s, v = torch.linalg.eigh(mTm)
+    return v[:, :, :kernel_dim].reshape(-1, 4, 3, kernel_dim), s[:, :kernel_dim]
+
+
+def _reproj_error(y_hat, y, weight, eps=1e-9):
+    """Projects estimated 3D points and computes the reprojection error
+    Args:
+        y_hat: a batch of predicted 2D points in homogeneous coordinates
+        y: a batch of ground-truth 2D points
+        weight: Batch of non-negative weights of
+            shape `(minibatch, num_point)`. `None` means equal weights.
+    Returns:
+        Optionally weighted RMSE of difference between y and y_hat.
+    """
+    y_hat = y_hat / torch.clamp(y_hat[..., 2:], eps)
+    dist = ((y - y_hat[..., :2]) ** 2).sum(dim=-1, keepdim=True) ** 0.5
+    return oputil.wmean(dist, weight)[:, 0, 0]
+
+
+def _algebraic_error(x_w_rotated, x_cam, weight):
+    """Computes the residual of Umeyama in 3D.
+    Args:
+        x_w_rotated: The given 3D points rotated with the predicted camera.
+        x_cam: the lifted 2D points y
+        weight: Batch of non-negative weights of
+            shape `(minibatch, num_point)`. `None` means equal weights.
+    Returns:
+        Optionally weighted MSE of difference between x_w_rotated and x_cam.
+    """
+    dist = ((x_w_rotated - x_cam) ** 2).sum(dim=-1, keepdim=True)
+    return oputil.wmean(dist, weight)[:, 0, 0]
+
+
+def _compute_norm_sign_scaling_factor(c_cam, alphas, x_world, y, weight, eps=1e-9):
+    """Given a solution, adjusts the scale and flip
+    Args:
+        c_cam: control points in camera coordinates
+        alphas: barycentric coordinates of the points
+        x_world: Batch of 3-dimensional points of shape `(minibatch, num_points, 3)`.
+        y: Batch of 2-dimensional points of shape `(minibatch, num_points, 2)`.
+        weights: Batch of non-negative weights of
+            shape `(minibatch, num_point)`. `None` means equal weights.
+        eps: epsilon to threshold negative `z` values
+    """
+    # position of reference points in camera coordinates
+    x_cam = torch.matmul(alphas, c_cam)
+
+    x_cam = x_cam * (1.0 - 2.0 * (oputil.wmean(x_cam[..., 2:], weight) < 0).float())
+    if torch.any(x_cam[..., 2:] < -eps):
+        neg_rate = oputil.wmean((x_cam[..., 2:] < 0).float(), weight, dim=(0, 1)).item()
+        warnings.warn("\nEPnP: %2.2f%% points have z<0." % (neg_rate * 100.0))
+
+    R, T, s = points_alignment.corresponding_points_alignment(
+        x_world, x_cam, weight, estimate_scale=True
+    )
+    s = s.clamp(eps)
+    x_cam = x_cam / s[:, None, None]
+    T = T / s[:, None]
+    x_w_rotated = torch.matmul(x_world, R) + T[:, None, :]
+    err_2d = _reproj_error(x_w_rotated, y, weight)
+    err_3d = _algebraic_error(x_w_rotated, x_cam, weight)
+
+    return EpnpSolution(x_cam, R, T, err_2d, err_3d)
+
+
+def _gen_pairs(input, dim=-2, reducer=lambda a, b: ((a - b) ** 2).sum(dim=-1)):
+    """Generates all pairs of different rows and then applies the reducer
+    Args:
+        input: a tensor
+        dim: a dimension to generate pairs across
+        reducer: a function of generated pair of rows to apply (beyond just concat)
+    Returns:
+        for default args, for A x B x C input, will output A x (B choose 2)
+    """
+    n = input.size()[dim]
+    range = torch.arange(n)
+    idx = torch.combinations(range).to(input).long()
+    left = input.index_select(dim, idx[:, 0])
+    right = input.index_select(dim, idx[:, 1])
+    return reducer(left, right)
+
+
+def _kernel_vec_distances(v):
+    """Computes the coefficients for linearization of the quadratic system
+        to match all pairwise distances between 4 control points (dim=1).
+        The last dimension corresponds to the coefficients for quadratic terms
+        Bij = Bi * Bj, where Bi and Bj correspond to kernel vectors.
+    Arg:
+        v: tensor of B x 4 x 3 x D, where D is dim(kernel), usually 4
+    Returns:
+        a tensor of B x 6 x [(D choose 2) + D];
+        for D=4, the last dim means [B11 B22 B33 B44 B12 B13 B14 B23 B24 B34].
+    """
+    dv = _gen_pairs(v, dim=-3, reducer=lambda a, b: a - b)  # B x 6 x 3 x D
+
+    # we should take dot-product of all (i,j), i < j, with coeff 2
+    rows_2ij = 2.0 * _gen_pairs(dv, dim=-1, reducer=lambda a, b: (a * b).sum(dim=-2))
+    # this should produce B x 6 x (D choose 2) tensor
+
+    # we should take dot-product of all (i,i)
+    rows_ii = (dv**2).sum(dim=-2)
+    # this should produce B x 6 x D tensor
+
+    return torch.cat((rows_ii, rows_2ij), dim=-1)
+
+
+def _solve_lstsq_subcols(rhs, lhs, lhs_col_idx):
+    """Solves an over-determined linear system for selected LHS columns.
+        A batched version of `torch.lstsq`.
+    Args:
+        rhs: right-hand side vectors
+        lhs: left-hand side matrices
+        lhs_col_idx: a slice of columns in lhs
+    Returns:
+        a least-squares solution for lhs * X = rhs
+    """
+    lhs = lhs.index_select(-1, torch.tensor(lhs_col_idx, device=lhs.device).long())
+    return torch.matmul(torch.pinverse(lhs), rhs[:, :, None])
+
+
+def _binary_sign(t):
+    return (t >= 0).to(t) * 2.0 - 1.0
+
+
+def _find_null_space_coords_1(kernel_dsts, cw_dst, eps=1e-9):
+    """Solves case 1 from the paper [1]; solve for 4 coefficients:
+       [B11 B22 B33 B44 B12 B13 B14 B23 B24 B34]
+         ^               ^   ^   ^
+    Args:
+        kernel_dsts: distances between kernel vectors
+        cw_dst: distances between control points
+    Returns:
+        coefficients to weight kernel vectors
+    [1] Moreno-Noguer, F., Lepetit, V., & Fua, P. (2009).
+    EPnP: An Accurate O(n) solution to the PnP problem.
+    International Journal of Computer Vision.
+    https://www.epfl.ch/labs/cvlab/software/multi-view-stereo/epnp/
+    """
+    beta = _solve_lstsq_subcols(cw_dst, kernel_dsts, [0, 4, 5, 6])
+
+    beta = beta * _binary_sign(beta[:, :1, :])
+    return beta / torch.clamp(beta[:, :1, :] ** 0.5, eps)
+
+
+def _find_null_space_coords_2(kernel_dsts, cw_dst):
+    """Solves case 2 from the paper; solve for 3 coefficients:
+        [B11 B22 B33 B44 B12 B13 B14 B23 B24 B34]
+          ^   ^           ^
+    Args:
+        kernel_dsts: distances between kernel vectors
+        cw_dst: distances between control points
+    Returns:
+        coefficients to weight kernel vectors
+    [1] Moreno-Noguer, F., Lepetit, V., & Fua, P. (2009).
+    EPnP: An Accurate O(n) solution to the PnP problem.
+    International Journal of Computer Vision.
+    https://www.epfl.ch/labs/cvlab/software/multi-view-stereo/epnp/
+    """
+    beta = _solve_lstsq_subcols(cw_dst, kernel_dsts, [0, 4, 1])
+
+    coord_0 = (beta[:, :1, :].abs() ** 0.5) * _binary_sign(beta[:, 1:2, :])
+    coord_1 = (beta[:, 2:3, :].abs() ** 0.5) * (
+        (beta[:, :1, :] >= 0) == (beta[:, 2:3, :] >= 0)
+    ).float()
+
+    return torch.cat((coord_0, coord_1, torch.zeros_like(beta[:, :2, :])), dim=1)
+
+
+def _find_null_space_coords_3(kernel_dsts, cw_dst, eps=1e-9):
+    """Solves case 3 from the paper; solve for 5 coefficients:
+        [B11 B22 B33 B44 B12 B13 B14 B23 B24 B34]
+          ^   ^           ^   ^       ^
+    Args:
+        kernel_dsts: distances between kernel vectors
+        cw_dst: distances between control points
+    Returns:
+        coefficients to weight kernel vectors
+    [1] Moreno-Noguer, F., Lepetit, V., & Fua, P. (2009).
+    EPnP: An Accurate O(n) solution to the PnP problem.
+    International Journal of Computer Vision.
+    https://www.epfl.ch/labs/cvlab/software/multi-view-stereo/epnp/
+    """
+    beta = _solve_lstsq_subcols(cw_dst, kernel_dsts, [0, 4, 1, 5, 7])
+
+    coord_0 = (beta[:, :1, :].abs() ** 0.5) * _binary_sign(beta[:, 1:2, :])
+    coord_1 = (beta[:, 2:3, :].abs() ** 0.5) * (
+        (beta[:, :1, :] >= 0) == (beta[:, 2:3, :] >= 0)
+    ).float()
+    coord_2 = beta[:, 3:4, :] / torch.clamp(coord_0[:, :1, :], eps)
+
+    return torch.cat(
+        (coord_0, coord_1, coord_2, torch.zeros_like(beta[:, :1, :])), dim=1
+    )
+
+
+def efficient_pnp(
+    x: torch.Tensor,
+    y: torch.Tensor,
+    weights: Optional[torch.Tensor] = None,
+    skip_quadratic_eq: bool = False,
+) -> EpnpSolution:
+    """
+    Implements Efficient PnP algorithm [1] for Perspective-n-Points problem:
+    finds a camera position (defined by rotation `R` and translation `T`) that
+    minimizes re-projection error between the given 3D points `x` and
+    the corresponding uncalibrated 2D points `y`, i.e. solves
+
+    `y[i] = Proj(x[i] R[i] + T[i])`
+
+    in the least-squares sense, where `i` are indices within the batch, and
+    `Proj` is the perspective projection operator: `Proj([x y z]) = [x/z y/z]`.
+    In the noise-less case, 4 points are enough to find the solution as long
+    as they are not co-planar.
+
+    Args:
+        x: Batch of 3-dimensional points of shape `(minibatch, num_points, 3)`.
+        y: Batch of 2-dimensional points of shape `(minibatch, num_points, 2)`.
+        weights: Batch of non-negative weights of
+            shape `(minibatch, num_point)`. `None` means equal weights.
+        skip_quadratic_eq: If True, assumes the solution space for the
+            linear system is one-dimensional, i.e. takes the scaled eigenvector
+            that corresponds to the smallest eigenvalue as a solution.
+            If False, finds the candidate coordinates in the potentially
+            4D null space by approximately solving the systems of quadratic
+            equations. The best candidate is chosen by examining the 2D
+            re-projection error. While this option finds a better solution,
+            especially when the number of points is small or perspective
+            distortions are low (the points are far away), it may be more
+            difficult to back-propagate through.
+
+    Returns:
+        `EpnpSolution` namedtuple containing elements:
+        **x_cam**: Batch of transformed points `x` that is used to find
+            the camera parameters, of shape `(minibatch, num_points, 3)`.
+            In the general (noisy) case, they are not exactly equal to
+            `x[i] R[i] + T[i]` but are some affine transform of `x[i]`s.
+        **R**: Batch of rotation matrices of shape `(minibatch, 3, 3)`.
+        **T**: Batch of translation vectors of shape `(minibatch, 3)`.
+        **err_2d**: Batch of mean 2D re-projection errors of shape
+            `(minibatch,)`. Specifically, if `yhat` is the re-projection for
+            the `i`-th batch element, it returns `sum_j norm(yhat_j - y_j)`
+            where `j` iterates over points and `norm` denotes the L2 norm.
+        **err_3d**: Batch of mean algebraic errors of shape `(minibatch,)`.
+            Specifically, those are squared distances between `x_world` and
+            estimated points on the rays defined by `y`.
+
+    [1] Moreno-Noguer, F., Lepetit, V., & Fua, P. (2009).
+    EPnP: An Accurate O(n) solution to the PnP problem.
+    International Journal of Computer Vision.
+    https://www.epfl.ch/labs/cvlab/software/multi-view-stereo/epnp/
+    """
+    # define control points in a world coordinate system (centered on the 3d
+    # points centroid); 4 x 3
+    # TODO: more stable when initialised with the center and eigenvectors!
+    c_world = _define_control_points(
+        x.detach(), weights, storage_opts={"dtype": x.dtype, "device": x.device}
+    )
+
+    # find the linear combination of the control points to represent the 3d points
+    alphas = _compute_alphas(x, c_world)
+
+    M = _build_M(y, alphas, weights)
+
+    # Compute kernel M
+    kernel, spectrum = _null_space(M, 4)
+
+    c_world_distances = _gen_pairs(c_world)
+    kernel_dsts = _kernel_vec_distances(kernel)
+
+    betas = (
+        []
+        if skip_quadratic_eq
+        else [
+            fnsc(kernel_dsts, c_world_distances)
+            for fnsc in [
+                _find_null_space_coords_1,
+                _find_null_space_coords_2,
+                _find_null_space_coords_3,
+            ]
+        ]
+    )
+
+    c_cam_variants = [kernel] + [
+        torch.matmul(kernel, beta[:, None, :, :]) for beta in betas
+    ]
+
+    solutions = [
+        _compute_norm_sign_scaling_factor(c_cam[..., 0], alphas, x, y, weights)
+        for c_cam in c_cam_variants
+    ]
+
+    sol_zipped = EpnpSolution(*(torch.stack(list(col)) for col in zip(*solutions)))
+    best = torch.argmin(sol_zipped.err_2d, dim=0)
+
+    def gather1d(source, idx):
+        # reduces the dim=1 by picking the slices in a 1D tensor idx
+        # in other words, it is batched index_select.
+        return source.gather(
+            0,
+            idx.reshape(1, -1, *([1] * (len(source.shape) - 2))).expand_as(source[:1]),
+        )[0]
+
+    return EpnpSolution(*[gather1d(sol_col, best) for sol_col in sol_zipped])
diff --git a/pytorch3d/pytorch3d/ops/points_alignment.py b/pytorch3d/pytorch3d/ops/points_alignment.py
new file mode 100644
index 0000000000000000000000000000000000000000..1b22b3cccc18350f7cdd2c49f8c345032e2f616d
--- /dev/null
+++ b/pytorch3d/pytorch3d/ops/points_alignment.py
@@ -0,0 +1,389 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import warnings
+from typing import List, NamedTuple, Optional, TYPE_CHECKING, Union
+
+import torch
+from pytorch3d.ops import knn_points
+from pytorch3d.structures import utils as strutil
+
+from . import utils as oputil
+
+
+if TYPE_CHECKING:
+    from pytorch3d.structures.pointclouds import Pointclouds
+
+
+# named tuples for inputs/outputs
+class SimilarityTransform(NamedTuple):
+    R: torch.Tensor
+    T: torch.Tensor
+    s: torch.Tensor
+
+
+class ICPSolution(NamedTuple):
+    converged: bool
+    rmse: Union[torch.Tensor, None]
+    Xt: torch.Tensor
+    RTs: SimilarityTransform
+    t_history: List[SimilarityTransform]
+
+
+def iterative_closest_point(
+    X: Union[torch.Tensor, "Pointclouds"],
+    Y: Union[torch.Tensor, "Pointclouds"],
+    init_transform: Optional[SimilarityTransform] = None,
+    max_iterations: int = 100,
+    relative_rmse_thr: float = 1e-6,
+    estimate_scale: bool = False,
+    allow_reflection: bool = False,
+    verbose: bool = False,
+) -> ICPSolution:
+    """
+    Executes the iterative closest point (ICP) algorithm [1, 2] in order to find
+    a similarity transformation (rotation `R`, translation `T`, and
+    optionally scale `s`) between two given differently-sized sets of
+    `d`-dimensional points `X` and `Y`, such that:
+
+    `s[i] X[i] R[i] + T[i] = Y[NN[i]]`,
+
+    for all batch indices `i` in the least squares sense. Here, Y[NN[i]] stands
+    for the indices of nearest neighbors from `Y` to each point in `X`.
+    Note, however, that the solution is only a local optimum.
+
+    Args:
+        **X**: Batch of `d`-dimensional points
+            of shape `(minibatch, num_points_X, d)` or a `Pointclouds` object.
+        **Y**: Batch of `d`-dimensional points
+            of shape `(minibatch, num_points_Y, d)` or a `Pointclouds` object.
+        **init_transform**: A named-tuple `SimilarityTransform` of tensors
+            `R`, `T, `s`, where `R` is a batch of orthonormal matrices of
+            shape `(minibatch, d, d)`, `T` is a batch of translations
+            of shape `(minibatch, d)` and `s` is a batch of scaling factors
+            of shape `(minibatch,)`.
+        **max_iterations**: The maximum number of ICP iterations.
+        **relative_rmse_thr**: A threshold on the relative root mean squared error
+            used to terminate the algorithm.
+        **estimate_scale**: If `True`, also estimates a scaling component `s`
+            of the transformation. Otherwise assumes the identity
+            scale and returns a tensor of ones.
+        **allow_reflection**: If `True`, allows the algorithm to return `R`
+            which is orthonormal but has determinant==-1.
+        **verbose**: If `True`, prints status messages during each ICP iteration.
+
+    Returns:
+        A named tuple `ICPSolution` with the following fields:
+        **converged**: A boolean flag denoting whether the algorithm converged
+            successfully (=`True`) or not (=`False`).
+        **rmse**: Attained root mean squared error after termination of ICP.
+        **Xt**: The point cloud `X` transformed with the final transformation
+            (`R`, `T`, `s`). If `X` is a `Pointclouds` object, returns an
+            instance of `Pointclouds`, otherwise returns `torch.Tensor`.
+        **RTs**: A named tuple `SimilarityTransform` containing
+        a batch of similarity transforms with fields:
+            **R**: Batch of orthonormal matrices of shape `(minibatch, d, d)`.
+            **T**: Batch of translations of shape `(minibatch, d)`.
+            **s**: batch of scaling factors of shape `(minibatch, )`.
+        **t_history**: A list of named tuples `SimilarityTransform`
+            the transformation parameters after each ICP iteration.
+
+    References:
+        [1] Besl & McKay: A Method for Registration of 3-D Shapes. TPAMI, 1992.
+        [2] https://en.wikipedia.org/wiki/Iterative_closest_point
+    """
+
+    # make sure we convert input Pointclouds structures to
+    # padded tensors of shape (N, P, 3)
+    Xt, num_points_X = oputil.convert_pointclouds_to_tensor(X)
+    Yt, num_points_Y = oputil.convert_pointclouds_to_tensor(Y)
+
+    b, size_X, dim = Xt.shape
+
+    if (Xt.shape[2] != Yt.shape[2]) or (Xt.shape[0] != Yt.shape[0]):
+        raise ValueError(
+            "Point sets X and Y have to have the same "
+            + "number of batches and data dimensions."
+        )
+
+    if ((num_points_Y < Yt.shape[1]).any() or (num_points_X < Xt.shape[1]).any()) and (
+        num_points_Y != num_points_X
+    ).any():
+        # we have a heterogeneous input (e.g. because X/Y is
+        # an instance of Pointclouds)
+        mask_X = (
+            torch.arange(size_X, dtype=torch.int64, device=Xt.device)[None]
+            < num_points_X[:, None]
+        ).type_as(Xt)
+    else:
+        mask_X = Xt.new_ones(b, size_X)
+
+    # clone the initial point cloud
+    Xt_init = Xt.clone()
+
+    if init_transform is not None:
+        # parse the initial transform from the input and apply to Xt
+        try:
+            R, T, s = init_transform
+            assert (
+                R.shape == torch.Size((b, dim, dim))
+                and T.shape == torch.Size((b, dim))
+                and s.shape == torch.Size((b,))
+            )
+        except Exception:
+            raise ValueError(
+                "The initial transformation init_transform has to be "
+                "a named tuple SimilarityTransform with elements (R, T, s). "
+                "R are dim x dim orthonormal matrices of shape "
+                "(minibatch, dim, dim), T is a batch of dim-dimensional "
+                "translations of shape (minibatch, dim) and s is a batch "
+                "of scalars of shape (minibatch,)."
+            ) from None
+        # apply the init transform to the input point cloud
+        Xt = _apply_similarity_transform(Xt, R, T, s)
+    else:
+        # initialize the transformation with identity
+        R = oputil.eyes(dim, b, device=Xt.device, dtype=Xt.dtype)
+        T = Xt.new_zeros((b, dim))
+        s = Xt.new_ones(b)
+
+    prev_rmse = None
+    rmse = None
+    iteration = -1
+    converged = False
+
+    # initialize the transformation history
+    t_history = []
+
+    # the main loop over ICP iterations
+    for iteration in range(max_iterations):
+        Xt_nn_points = knn_points(
+            Xt, Yt, lengths1=num_points_X, lengths2=num_points_Y, K=1, return_nn=True
+        ).knn[:, :, 0, :]
+
+        # get the alignment of the nearest neighbors from Yt with Xt_init
+        R, T, s = corresponding_points_alignment(
+            Xt_init,
+            Xt_nn_points,
+            weights=mask_X,
+            estimate_scale=estimate_scale,
+            allow_reflection=allow_reflection,
+        )
+
+        # apply the estimated similarity transform to Xt_init
+        Xt = _apply_similarity_transform(Xt_init, R, T, s)
+
+        # add the current transformation to the history
+        t_history.append(SimilarityTransform(R, T, s))
+
+        # compute the root mean squared error
+        # pyre-fixme[58]: `**` is not supported for operand types `Tensor` and `int`.
+        Xt_sq_diff = ((Xt - Xt_nn_points) ** 2).sum(2)
+        rmse = oputil.wmean(Xt_sq_diff[:, :, None], mask_X).sqrt()[:, 0, 0]
+
+        # compute the relative rmse
+        if prev_rmse is None:
+            relative_rmse = rmse.new_ones(b)
+        else:
+            relative_rmse = (prev_rmse - rmse) / prev_rmse
+
+        if verbose:
+            rmse_msg = (
+                f"ICP iteration {iteration}: mean/max rmse = "
+                + f"{rmse.mean():1.2e}/{rmse.max():1.2e} "
+                + f"; mean relative rmse = {relative_rmse.mean():1.2e}"
+            )
+            print(rmse_msg)
+
+        # check for convergence
+        if (relative_rmse <= relative_rmse_thr).all():
+            converged = True
+            break
+
+        # update the previous rmse
+        prev_rmse = rmse
+
+    if verbose:
+        if converged:
+            print(f"ICP has converged in {iteration + 1} iterations.")
+        else:
+            print(f"ICP has not converged in {max_iterations} iterations.")
+
+    if oputil.is_pointclouds(X):
+        Xt = X.update_padded(Xt)  # type: ignore
+
+    return ICPSolution(converged, rmse, Xt, SimilarityTransform(R, T, s), t_history)
+
+
+# threshold for checking that point crosscorelation
+# is full rank in corresponding_points_alignment
+AMBIGUOUS_ROT_SINGULAR_THR = 1e-15
+
+
+def corresponding_points_alignment(
+    X: Union[torch.Tensor, "Pointclouds"],
+    Y: Union[torch.Tensor, "Pointclouds"],
+    weights: Union[torch.Tensor, List[torch.Tensor], None] = None,
+    estimate_scale: bool = False,
+    allow_reflection: bool = False,
+    eps: float = 1e-9,
+) -> SimilarityTransform:
+    """
+    Finds a similarity transformation (rotation `R`, translation `T`
+    and optionally scale `s`)  between two given sets of corresponding
+    `d`-dimensional points `X` and `Y` such that:
+
+    `s[i] X[i] R[i] + T[i] = Y[i]`,
+
+    for all batch indexes `i` in the least squares sense.
+
+    The algorithm is also known as Umeyama [1].
+
+    Args:
+        **X**: Batch of `d`-dimensional points of shape `(minibatch, num_point, d)`
+            or a `Pointclouds` object.
+        **Y**: Batch of `d`-dimensional points of shape `(minibatch, num_point, d)`
+            or a `Pointclouds` object.
+        **weights**: Batch of non-negative weights of
+            shape `(minibatch, num_point)` or list of `minibatch` 1-dimensional
+            tensors that may have different shapes; in that case, the length of
+            i-th tensor should be equal to the number of points in X_i and Y_i.
+            Passing `None` means uniform weights.
+        **estimate_scale**: If `True`, also estimates a scaling component `s`
+            of the transformation. Otherwise assumes an identity
+            scale and returns a tensor of ones.
+        **allow_reflection**: If `True`, allows the algorithm to return `R`
+            which is orthonormal but has determinant==-1.
+        **eps**: A scalar for clamping to avoid dividing by zero. Active for the
+            code that estimates the output scale `s`.
+
+    Returns:
+        3-element named tuple `SimilarityTransform` containing
+        - **R**: Batch of orthonormal matrices of shape `(minibatch, d, d)`.
+        - **T**: Batch of translations of shape `(minibatch, d)`.
+        - **s**: batch of scaling factors of shape `(minibatch, )`.
+
+    References:
+        [1] Shinji Umeyama: Least-Suqares Estimation of
+        Transformation Parameters Between Two Point Patterns
+    """
+
+    # make sure we convert input Pointclouds structures to tensors
+    Xt, num_points = oputil.convert_pointclouds_to_tensor(X)
+    Yt, num_points_Y = oputil.convert_pointclouds_to_tensor(Y)
+
+    if (Xt.shape != Yt.shape) or (num_points != num_points_Y).any():
+        raise ValueError(
+            "Point sets X and Y have to have the same \
+            number of batches, points and dimensions."
+        )
+    if weights is not None:
+        if isinstance(weights, list):
+            if any(np != w.shape[0] for np, w in zip(num_points, weights)):
+                raise ValueError(
+                    "number of weights should equal to the "
+                    + "number of points in the point cloud."
+                )
+            weights = [w[..., None] for w in weights]
+            weights = strutil.list_to_padded(weights)[..., 0]
+
+        if Xt.shape[:2] != weights.shape:
+            raise ValueError("weights should have the same first two dimensions as X.")
+
+    b, n, dim = Xt.shape
+
+    if (num_points < Xt.shape[1]).any() or (num_points < Yt.shape[1]).any():
+        # in case we got Pointclouds as input, mask the unused entries in Xc, Yc
+        mask = (
+            torch.arange(n, dtype=torch.int64, device=Xt.device)[None]
+            < num_points[:, None]
+        ).type_as(Xt)
+        weights = mask if weights is None else mask * weights.type_as(Xt)
+
+    # compute the centroids of the point sets
+    Xmu = oputil.wmean(Xt, weight=weights, eps=eps)
+    Ymu = oputil.wmean(Yt, weight=weights, eps=eps)
+
+    # mean-center the point sets
+    Xc = Xt - Xmu
+    Yc = Yt - Ymu
+
+    total_weight = torch.clamp(num_points, 1)
+    # special handling for heterogeneous point clouds and/or input weights
+    if weights is not None:
+        Xc *= weights[:, :, None]
+        Yc *= weights[:, :, None]
+        total_weight = torch.clamp(weights.sum(1), eps)
+
+    if (num_points < (dim + 1)).any():
+        warnings.warn(
+            "The size of one of the point clouds is <= dim+1. "
+            + "corresponding_points_alignment cannot return a unique rotation."
+        )
+
+    # compute the covariance XYcov between the point sets Xc, Yc
+    XYcov = torch.bmm(Xc.transpose(2, 1), Yc)
+    XYcov = XYcov / total_weight[:, None, None]
+
+    # decompose the covariance matrix XYcov
+    U, S, V = torch.svd(XYcov)
+
+    # catch ambiguous rotation by checking the magnitude of singular values
+    if (S.abs() <= AMBIGUOUS_ROT_SINGULAR_THR).any() and not (
+        num_points < (dim + 1)
+    ).any():
+        warnings.warn(
+            "Excessively low rank of "
+            + "cross-correlation between aligned point clouds. "
+            + "corresponding_points_alignment cannot return a unique rotation."
+        )
+
+    # identity matrix used for fixing reflections
+    E = torch.eye(dim, dtype=XYcov.dtype, device=XYcov.device)[None].repeat(b, 1, 1)
+
+    if not allow_reflection:
+        # reflection test:
+        #   checks whether the estimated rotation has det==1,
+        #   if not, finds the nearest rotation s.t. det==1 by
+        #   flipping the sign of the last singular vector U
+        R_test = torch.bmm(U, V.transpose(2, 1))
+        E[:, -1, -1] = torch.det(R_test)
+
+    # find the rotation matrix by composing U and V again
+    R = torch.bmm(torch.bmm(U, E), V.transpose(2, 1))
+
+    if estimate_scale:
+        # estimate the scaling component of the transformation
+        trace_ES = (torch.diagonal(E, dim1=1, dim2=2) * S).sum(1)
+        Xcov = (Xc * Xc).sum((1, 2)) / total_weight
+
+        # the scaling component
+        s = trace_ES / torch.clamp(Xcov, eps)
+
+        # translation component
+        T = Ymu[:, 0, :] - s[:, None] * torch.bmm(Xmu, R)[:, 0, :]
+    else:
+        # translation component
+        T = Ymu[:, 0, :] - torch.bmm(Xmu, R)[:, 0, :]
+
+        # unit scaling since we do not estimate scale
+        s = T.new_ones(b)
+
+    return SimilarityTransform(R, T, s)
+
+
+def _apply_similarity_transform(
+    X: torch.Tensor, R: torch.Tensor, T: torch.Tensor, s: torch.Tensor
+) -> torch.Tensor:
+    """
+    Applies a similarity transformation parametrized with a batch of orthonormal
+    matrices `R` of shape `(minibatch, d, d)`, a batch of translations `T`
+    of shape `(minibatch, d)` and a batch of scaling factors `s`
+    of shape `(minibatch,)` to a given `d`-dimensional cloud `X`
+    of shape `(minibatch, num_points, d)`
+    """
+    X = s[:, None, None] * torch.bmm(X, R) + T[:, None, :]
+    return X
diff --git a/pytorch3d/pytorch3d/ops/points_normals.py b/pytorch3d/pytorch3d/ops/points_normals.py
new file mode 100644
index 0000000000000000000000000000000000000000..63aeefbd82814f4233b35ccab1c1e1c7cc828d66
--- /dev/null
+++ b/pytorch3d/pytorch3d/ops/points_normals.py
@@ -0,0 +1,189 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Tuple, TYPE_CHECKING, Union
+
+import torch
+from pytorch3d.common.workaround import symeig3x3
+
+from .utils import convert_pointclouds_to_tensor, get_point_covariances
+
+
+if TYPE_CHECKING:
+    from ..structures import Pointclouds
+
+
+def estimate_pointcloud_normals(
+    pointclouds: Union[torch.Tensor, "Pointclouds"],
+    neighborhood_size: int = 50,
+    disambiguate_directions: bool = True,
+    *,
+    use_symeig_workaround: bool = True,
+) -> torch.Tensor:
+    """
+    Estimates the normals of a batch of `pointclouds`.
+
+    The function uses `estimate_pointcloud_local_coord_frames` to estimate
+    the normals. Please refer to that function for more detailed information.
+
+    Args:
+      **pointclouds**: Batch of 3-dimensional points of shape
+        `(minibatch, num_point, 3)` or a `Pointclouds` object.
+      **neighborhood_size**: The size of the neighborhood used to estimate the
+        geometry around each point.
+      **disambiguate_directions**: If `True`, uses the algorithm from [1] to
+        ensure sign consistency of the normals of neighboring points.
+      **use_symeig_workaround**: If `True`, uses a custom eigenvalue
+        calculation.
+
+    Returns:
+      **normals**: A tensor of normals for each input point
+        of shape `(minibatch, num_point, 3)`.
+        If `pointclouds` are of `Pointclouds` class, returns a padded tensor.
+
+    References:
+      [1] Tombari, Salti, Di Stefano: Unique Signatures of Histograms for
+      Local Surface Description, ECCV 2010.
+    """
+
+    curvatures, local_coord_frames = estimate_pointcloud_local_coord_frames(
+        pointclouds,
+        neighborhood_size=neighborhood_size,
+        disambiguate_directions=disambiguate_directions,
+        use_symeig_workaround=use_symeig_workaround,
+    )
+
+    # the normals correspond to the first vector of each local coord frame
+    normals = local_coord_frames[:, :, :, 0]
+
+    return normals
+
+
+def estimate_pointcloud_local_coord_frames(
+    pointclouds: Union[torch.Tensor, "Pointclouds"],
+    neighborhood_size: int = 50,
+    disambiguate_directions: bool = True,
+    *,
+    use_symeig_workaround: bool = True,
+) -> Tuple[torch.Tensor, torch.Tensor]:
+    """
+    Estimates the principal directions of curvature (which includes normals)
+    of a batch of `pointclouds`.
+
+    The algorithm first finds `neighborhood_size` nearest neighbors for each
+    point of the point clouds, followed by obtaining principal vectors of
+    covariance matrices of each of the point neighborhoods.
+    The main principal vector corresponds to the normals, while the
+    other 2 are the direction of the highest curvature and the 2nd highest
+    curvature.
+
+    Note that each principal direction is given up to a sign. Hence,
+    the function implements `disambiguate_directions` switch that allows
+    to ensure consistency of the sign of neighboring normals. The implementation
+    follows the sign disabiguation from SHOT descriptors [1].
+
+    The algorithm also returns the curvature values themselves.
+    These are the eigenvalues of the estimated covariance matrices
+    of each point neighborhood.
+
+    Args:
+      **pointclouds**: Batch of 3-dimensional points of shape
+        `(minibatch, num_point, 3)` or a `Pointclouds` object.
+      **neighborhood_size**: The size of the neighborhood used to estimate the
+        geometry around each point.
+      **disambiguate_directions**: If `True`, uses the algorithm from [1] to
+        ensure sign consistency of the normals of neighboring points.
+      **use_symeig_workaround**: If `True`, uses a custom eigenvalue
+        calculation.
+
+    Returns:
+      **curvatures**: The three principal curvatures of each point
+        of shape `(minibatch, num_point, 3)`.
+        If `pointclouds` are of `Pointclouds` class, returns a padded tensor.
+      **local_coord_frames**: The three principal directions of the curvature
+        around each point of shape `(minibatch, num_point, 3, 3)`.
+        The principal directions are stored in columns of the output.
+        E.g. `local_coord_frames[i, j, :, 0]` is the normal of
+        `j`-th point in the `i`-th pointcloud.
+        If `pointclouds` are of `Pointclouds` class, returns a padded tensor.
+
+    References:
+      [1] Tombari, Salti, Di Stefano: Unique Signatures of Histograms for
+      Local Surface Description, ECCV 2010.
+    """
+
+    points_padded, num_points = convert_pointclouds_to_tensor(pointclouds)
+
+    ba, N, dim = points_padded.shape
+    if dim != 3:
+        raise ValueError(
+            "The pointclouds argument has to be of shape (minibatch, N, 3)"
+        )
+
+    if (num_points <= neighborhood_size).any():
+        raise ValueError(
+            "The neighborhood_size argument has to be"
+            + " >= size of each of the point clouds."
+        )
+
+    # undo global mean for stability
+    # TODO: replace with tutil.wmean once landed
+    pcl_mean = points_padded.sum(1) / num_points[:, None]
+    points_centered = points_padded - pcl_mean[:, None, :]
+
+    # get the per-point covariance and nearest neighbors used to compute it
+    cov, knns = get_point_covariances(points_centered, num_points, neighborhood_size)
+
+    # get the local coord frames as principal directions of
+    # the per-point covariance
+    # this is done with torch.symeig / torch.linalg.eigh, which returns the
+    # eigenvectors (=principal directions) in an ascending order of their
+    # corresponding eigenvalues, and the smallest eigenvalue's eigenvector
+    # corresponds to the normal direction; or with a custom equivalent.
+    if use_symeig_workaround:
+        curvatures, local_coord_frames = symeig3x3(cov, eigenvectors=True)
+    else:
+        curvatures, local_coord_frames = torch.linalg.eigh(cov)
+
+    # disambiguate the directions of individual principal vectors
+    if disambiguate_directions:
+        # disambiguate normal
+        n = _disambiguate_vector_directions(
+            points_centered, knns, local_coord_frames[:, :, :, 0]
+        )
+        # disambiguate the main curvature
+        z = _disambiguate_vector_directions(
+            points_centered, knns, local_coord_frames[:, :, :, 2]
+        )
+        # the secondary curvature is just a cross between n and z
+        y = torch.cross(n, z, dim=2)
+        # cat to form the set of principal directions
+        local_coord_frames = torch.stack((n, y, z), dim=3)
+
+    return curvatures, local_coord_frames
+
+
+def _disambiguate_vector_directions(pcl, knns, vecs: torch.Tensor) -> torch.Tensor:
+    """
+    Disambiguates normal directions according to [1].
+
+    References:
+      [1] Tombari, Salti, Di Stefano: Unique Signatures of Histograms for
+      Local Surface Description, ECCV 2010.
+    """
+    # parse out K from the shape of knns
+    K = knns.shape[2]
+    # the difference between the mean of each neighborhood and
+    # each element of the neighborhood
+    df = knns - pcl[:, :, None]
+    # projection of the difference on the principal direction
+    proj = (vecs[:, :, None] * df).sum(3)
+    # check how many projections are positive
+    n_pos = (proj > 0).type_as(knns).sum(2, keepdim=True)
+    # flip the principal directions where number of positive correlations
+    flip = (n_pos < (0.5 * K)).type_as(knns)
+    vecs = (1.0 - 2.0 * flip) * vecs
+    return vecs
diff --git a/pytorch3d/pytorch3d/ops/points_to_volumes.py b/pytorch3d/pytorch3d/ops/points_to_volumes.py
new file mode 100644
index 0000000000000000000000000000000000000000..f319d90aeaf2ff029df575451005ccf76d44b505
--- /dev/null
+++ b/pytorch3d/pytorch3d/ops/points_to_volumes.py
@@ -0,0 +1,762 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Optional, Tuple, TYPE_CHECKING
+
+import torch
+from pytorch3d import _C
+from torch.autograd import Function
+from torch.autograd.function import once_differentiable
+
+
+if TYPE_CHECKING:
+    from ..structures import Pointclouds, Volumes
+
+
+class _points_to_volumes_function(Function):
+    """
+    For each point in a pointcloud, add point_weight to the
+    corresponding volume density and point_weight times its features
+    to the corresponding volume features.
+
+    This function does not require any contiguity internally and therefore
+    doesn't need to make copies of its inputs, which is useful when GPU memory
+    is at a premium. (An implementation requiring contiguous inputs might be faster
+    though). The volumes are modified in place.
+
+    This function is differentiable with respect to
+    points_features, volume_densities and volume_features.
+    If splat is True then it is also differentiable with respect to
+    points_3d.
+
+    It may be useful to think about this function as a sort of opposite to
+    torch.nn.functional.grid_sample with 5D inputs.
+
+    Args:
+        points_3d: Batch of 3D point cloud coordinates of shape
+            `(minibatch, N, 3)` where N is the number of points
+            in each point cloud. Coordinates have to be specified in the
+            local volume coordinates (ranging in [-1, 1]).
+        points_features: Features of shape `(minibatch, N, feature_dim)`
+            corresponding to the points of the input point cloud `points_3d`.
+        volume_features: Batch of input feature volumes
+            of shape `(minibatch, feature_dim, D, H, W)`
+        volume_densities: Batch of input feature volume densities
+            of shape `(minibatch, 1, D, H, W)`. Each voxel should
+            contain a non-negative number corresponding to its
+            opaqueness (the higher, the less transparent).
+
+        grid_sizes: `LongTensor` of shape (minibatch, 3) representing the
+            spatial resolutions of each of the the non-flattened `volumes`
+            tensors. Note that the following has to hold:
+                `torch.prod(grid_sizes, dim=1)==N_voxels`.
+
+        point_weight: A scalar controlling how much weight a single point has.
+
+        mask: A binary mask of shape `(minibatch, N)` determining
+            which 3D points are going to be converted to the resulting
+            volume. Set to `None` if all points are valid.
+
+        align_corners: as for grid_sample.
+
+        splat: if true, trilinear interpolation. If false all the weight goes in
+            the nearest voxel.
+
+    Returns:
+        volume_densities and volume_features, which have been modified in place.
+    """
+
+    @staticmethod
+    # pyre-fixme[14]: `forward` overrides method defined in `Function` inconsistently.
+    def forward(
+        ctx,
+        points_3d: torch.Tensor,
+        points_features: torch.Tensor,
+        volume_densities: torch.Tensor,
+        volume_features: torch.Tensor,
+        grid_sizes: torch.LongTensor,
+        point_weight: float,
+        mask: torch.Tensor,
+        align_corners: bool,
+        splat: bool,
+    ):
+
+        ctx.mark_dirty(volume_densities, volume_features)
+
+        N, P, D = points_3d.shape
+        if D != 3:
+            raise ValueError("points_3d must be 3D")
+        if points_3d.dtype != torch.float32:
+            raise ValueError("points_3d must be float32")
+        if points_features.dtype != torch.float32:
+            raise ValueError("points_features must be float32")
+        N1, P1, C = points_features.shape
+        if N1 != N or P1 != P:
+            raise ValueError("Bad points_features shape")
+        if volume_densities.dtype != torch.float32:
+            raise ValueError("volume_densities must be float32")
+        N2, one, D, H, W = volume_densities.shape
+        if N2 != N or one != 1:
+            raise ValueError("Bad volume_densities shape")
+        if volume_features.dtype != torch.float32:
+            raise ValueError("volume_features must be float32")
+        N3, C1, D1, H1, W1 = volume_features.shape
+        if N3 != N or C1 != C or D1 != D or H1 != H or W1 != W:
+            raise ValueError("Bad volume_features shape")
+        if grid_sizes.dtype != torch.int64:
+            raise ValueError("grid_sizes must be int64")
+        N4, D1 = grid_sizes.shape
+        if N4 != N or D1 != 3:
+            raise ValueError("Bad grid_sizes.shape")
+        if mask.dtype != torch.float32:
+            raise ValueError("mask must be float32")
+        N5, P2 = mask.shape
+        if N5 != N or P2 != P:
+            raise ValueError("Bad mask shape")
+
+        # pyre-fixme[16]: Module `pytorch3d` has no attribute `_C`.
+        _C.points_to_volumes_forward(
+            points_3d,
+            points_features,
+            volume_densities,
+            volume_features,
+            grid_sizes,
+            mask,
+            point_weight,
+            align_corners,
+            splat,
+        )
+        if splat:
+            ctx.save_for_backward(points_3d, points_features, grid_sizes, mask)
+        else:
+            ctx.save_for_backward(points_3d, grid_sizes, mask)
+        ctx.point_weight = point_weight
+        ctx.splat = splat
+        ctx.align_corners = align_corners
+        return volume_densities, volume_features
+
+    @staticmethod
+    @once_differentiable
+    def backward(ctx, grad_volume_densities, grad_volume_features):
+        splat = ctx.splat
+        N, C = grad_volume_features.shape[:2]
+        if splat:
+            points_3d, points_features, grid_sizes, mask = ctx.saved_tensors
+            P = points_3d.shape[1]
+            grad_points_3d = torch.zeros_like(points_3d)
+        else:
+            points_3d, grid_sizes, mask = ctx.saved_tensors
+            P = points_3d.shape[1]
+            ones = points_3d.new_zeros(1, 1, 1)
+            # There is no gradient. Just need something to let its accessors exist.
+            grad_points_3d = ones.expand_as(points_3d)
+            # points_features not needed. Just need something to let its accessors exist.
+            points_features = ones.expand(N, P, C)
+        grad_points_features = points_3d.new_zeros(N, P, C)
+        _C.points_to_volumes_backward(
+            points_3d,
+            points_features,
+            grid_sizes,
+            mask,
+            ctx.point_weight,
+            ctx.align_corners,
+            splat,
+            grad_volume_densities,
+            grad_volume_features,
+            grad_points_3d,
+            grad_points_features,
+        )
+
+        return (
+            (grad_points_3d if splat else None),
+            grad_points_features,
+            grad_volume_densities,
+            grad_volume_features,
+            None,
+            None,
+            None,
+            None,
+            None,
+        )
+
+
+_points_to_volumes = _points_to_volumes_function.apply
+
+
+def add_pointclouds_to_volumes(
+    pointclouds: "Pointclouds",
+    initial_volumes: "Volumes",
+    mode: str = "trilinear",
+    min_weight: float = 1e-4,
+    rescale_features: bool = True,
+    _python: bool = False,
+) -> "Volumes":
+    """
+    Add a batch of point clouds represented with a `Pointclouds` structure
+    `pointclouds` to a batch of existing volumes represented with a
+    `Volumes` structure `initial_volumes`.
+
+    More specifically, the method casts a set of weighted votes (the weights are
+    determined based on `mode="trilinear"|"nearest"`) into the pre-initialized
+    `features` and `densities` fields of `initial_volumes`.
+
+    The method returns an updated `Volumes` object that contains a copy
+    of `initial_volumes` with its `features` and `densities` updated with the
+    result of the pointcloud addition.
+
+    Example::
+
+        # init a random point cloud
+        pointclouds = Pointclouds(
+            points=torch.randn(4, 100, 3), features=torch.rand(4, 100, 5)
+        )
+        # init an empty volume centered around [0.5, 0.5, 0.5] in world coordinates
+        # with a voxel size of 1.0.
+        initial_volumes = Volumes(
+            features = torch.zeros(4, 5, 25, 25, 25),
+            densities = torch.zeros(4, 1, 25, 25, 25),
+            volume_translation = [-0.5, -0.5, -0.5],
+            voxel_size = 1.0,
+        )
+        # add the pointcloud to the 'initial_volumes' buffer using
+        # trilinear splatting
+        updated_volumes = add_pointclouds_to_volumes(
+            pointclouds=pointclouds,
+            initial_volumes=initial_volumes,
+            mode="trilinear",
+        )
+
+    Args:
+        pointclouds: Batch of 3D pointclouds represented with a `Pointclouds`
+            structure. Note that `pointclouds.features` have to be defined.
+        initial_volumes: Batch of initial `Volumes` with pre-initialized 1-dimensional
+            densities which contain non-negative numbers corresponding to the
+            opaqueness of each voxel (the higher, the less transparent).
+        mode: The mode of the conversion of individual points into the volume.
+            Set either to `nearest` or `trilinear`:
+            `nearest`: Each 3D point is first rounded to the volumetric
+                lattice. Each voxel is then labeled with the average
+                over features that fall into the given voxel.
+                The gradients of nearest neighbor conversion w.r.t. the
+                3D locations of the points in `pointclouds` are *not* defined.
+            `trilinear`: Each 3D point casts 8 weighted votes to the 8-neighborhood
+                of its floating point coordinate. The weights are
+                determined using a trilinear interpolation scheme.
+                Trilinear splatting is fully differentiable w.r.t. all input arguments.
+        min_weight: A scalar controlling the lowest possible total per-voxel
+            weight used to normalize the features accumulated in a voxel.
+            Only active for `mode==trilinear`.
+        rescale_features: If False, output features are just the sum of input and
+                            added points. If True, they are averaged. In both cases,
+                            output densities are just summed without rescaling, so
+                            you may need to rescale them afterwards.
+        _python: Set to True to use a pure Python implementation, e.g. for test
+            purposes, which requires more memory and may be slower.
+
+    Returns:
+        updated_volumes: Output `Volumes` structure containing the conversion result.
+    """
+
+    if len(initial_volumes) != len(pointclouds):
+        raise ValueError(
+            "'initial_volumes' and 'pointclouds' have to have the same batch size."
+        )
+
+    # obtain the features and densities
+    pcl_feats = pointclouds.features_padded()
+    pcl_3d = pointclouds.points_padded()
+
+    if pcl_feats is None:
+        raise ValueError("'pointclouds' have to have their 'features' defined.")
+
+    # obtain the conversion mask
+    n_per_pcl = pointclouds.num_points_per_cloud().type_as(pcl_feats)
+    # pyre-fixme[6]: For 1st param expected `Union[bool, float, int]` but got `Tensor`.
+    mask = torch.arange(n_per_pcl.max(), dtype=pcl_feats.dtype, device=pcl_feats.device)
+    mask = (mask[None, :] < n_per_pcl[:, None]).type_as(mask)
+
+    # convert to the coord frame of the volume
+    pcl_3d_local = initial_volumes.world_to_local_coords(pcl_3d)
+
+    features_new, densities_new = add_points_features_to_volume_densities_features(
+        points_3d=pcl_3d_local,
+        points_features=pcl_feats,
+        volume_features=initial_volumes.features(),
+        volume_densities=initial_volumes.densities(),
+        min_weight=min_weight,
+        grid_sizes=initial_volumes.get_grid_sizes(),
+        mask=mask,
+        mode=mode,
+        rescale_features=rescale_features,
+        align_corners=initial_volumes.get_align_corners(),
+        _python=_python,
+    )
+
+    return initial_volumes.update_padded(
+        new_densities=densities_new, new_features=features_new
+    )
+
+
+def add_points_features_to_volume_densities_features(
+    points_3d: torch.Tensor,
+    points_features: torch.Tensor,
+    volume_densities: torch.Tensor,
+    volume_features: Optional[torch.Tensor],
+    mode: str = "trilinear",
+    min_weight: float = 1e-4,
+    mask: Optional[torch.Tensor] = None,
+    grid_sizes: Optional[torch.LongTensor] = None,
+    rescale_features: bool = True,
+    _python: bool = False,
+    align_corners: bool = True,
+) -> Tuple[torch.Tensor, torch.Tensor]:
+    """
+    Convert a batch of point clouds represented with tensors of per-point
+    3d coordinates and their features to a batch of volumes represented
+    with tensors of densities and features.
+
+    Args:
+        points_3d: Batch of 3D point cloud coordinates of shape
+            `(minibatch, N, 3)` where N is the number of points
+            in each point cloud. Coordinates have to be specified in the
+            local volume coordinates (ranging in [-1, 1]).
+        points_features: Features of shape `(minibatch, N, feature_dim)` corresponding
+            to the points of the input point clouds `pointcloud`.
+        volume_densities: Batch of input feature volume densities of shape
+            `(minibatch, 1, D, H, W)`. Each voxel should
+            contain a non-negative number corresponding to its
+            opaqueness (the higher, the less transparent).
+        volume_features: Batch of input feature volumes of shape
+            `(minibatch, feature_dim, D, H, W)`
+            If set to `None`, the `volume_features` will be automatically
+            instantiated with a correct size and filled with 0s.
+        mode: The mode of the conversion of individual points into the volume.
+            Set either to `nearest` or `trilinear`:
+            `nearest`: Each 3D point is first rounded to the volumetric
+                lattice. Each voxel is then labeled with the average
+                over features that fall into the given voxel.
+                The gradients of nearest neighbor rounding w.r.t. the
+                input point locations `points_3d` are *not* defined.
+            `trilinear`: Each 3D point casts 8 weighted votes to the 8-neighborhood
+                of its floating point coordinate. The weights are
+                determined using a trilinear interpolation scheme.
+                Trilinear splatting is fully differentiable w.r.t. all input arguments.
+        min_weight: A scalar controlling the lowest possible total per-voxel
+            weight used to normalize the features accumulated in a voxel.
+            Only active for `mode==trilinear`.
+        mask: A binary mask of shape `(minibatch, N)` determining which 3D points
+            are going to be converted to the resulting volume.
+            Set to `None` if all points are valid.
+        grid_sizes: `LongTensor` of shape (minibatch, 3) representing the
+            spatial resolutions of each of the the non-flattened `volumes` tensors,
+            or None to indicate the whole volume is used for every batch element.
+        rescale_features: If False, output features are just the sum of input and
+                            added points. If True, they are averaged. In both cases,
+                            output densities are just summed without rescaling, so
+                            you may need to rescale them afterwards.
+        _python: Set to True to use a pure Python implementation.
+        align_corners: as for grid_sample.
+    Returns:
+        volume_features: Output volume of shape `(minibatch, feature_dim, D, H, W)`
+        volume_densities: Occupancy volume of shape `(minibatch, 1, D, H, W)`
+            containing the total amount of votes cast to each of the voxels.
+    """
+
+    # number of points in the point cloud, its dim and batch size
+    ba, n_points, feature_dim = points_features.shape
+    ba_volume, density_dim = volume_densities.shape[:2]
+
+    if density_dim != 1:
+        raise ValueError("Only one-dimensional densities are allowed.")
+
+    # init the volumetric grid sizes if uninitialized
+    if grid_sizes is None:
+        # grid sizes shape (minibatch, 3)
+        grid_sizes = (
+            torch.LongTensor(list(volume_densities.shape[2:]))
+            .to(volume_densities.device)
+            .expand(volume_densities.shape[0], 3)
+        )
+
+    if _python:
+        return _add_points_features_to_volume_densities_features_python(
+            points_3d=points_3d,
+            points_features=points_features,
+            volume_densities=volume_densities,
+            volume_features=volume_features,
+            mode=mode,
+            min_weight=min_weight,
+            mask=mask,
+            # pyre-fixme[6]: For 8th param expected `LongTensor` but got `Tensor`.
+            grid_sizes=grid_sizes,
+        )
+
+    if mode == "trilinear":
+        splat = True
+    elif mode == "nearest":
+        splat = False
+    else:
+        raise ValueError('No such interpolation mode "%s"' % mode)
+
+    if mask is None:
+        mask = points_3d.new_ones(1).expand(points_3d.shape[:2])
+
+    volume_densities, volume_features = _points_to_volumes(
+        points_3d,
+        points_features,
+        volume_densities,
+        volume_features,
+        grid_sizes,
+        1.0,  # point_weight
+        mask,
+        align_corners,  # align_corners
+        splat,
+    )
+
+    if rescale_features:
+        # divide each feature by the total weight of the votes
+        if splat:
+            volume_features = volume_features / volume_densities.clamp(min_weight)
+        else:
+            volume_features = volume_features / volume_densities.clamp(1.0)
+
+    return volume_features, volume_densities
+
+
+def _add_points_features_to_volume_densities_features_python(
+    *,
+    points_3d: torch.Tensor,
+    points_features: torch.Tensor,
+    volume_densities: torch.Tensor,
+    volume_features: Optional[torch.Tensor],
+    mode: str,
+    min_weight: float,
+    mask: Optional[torch.Tensor],
+    grid_sizes: torch.LongTensor,
+) -> Tuple[torch.Tensor, torch.Tensor]:
+    """
+    Python implementation for add_points_features_to_volume_densities_features.
+
+    Returns:
+        volume_features: Output volume of shape `(minibatch, feature_dim, D, H, W)`
+        volume_densities: Occupancy volume of shape `(minibatch, 1, D, H, W)`
+            containing the total amount of votes cast to each of the voxels.
+    """
+    ba, n_points, feature_dim = points_features.shape
+
+    # flatten densities and features
+    v_shape = volume_densities.shape[2:]
+    volume_densities_flatten = volume_densities.view(ba, -1, 1)
+    n_voxels = volume_densities_flatten.shape[1]
+
+    if volume_features is None:
+        # initialize features if not passed in
+        volume_features_flatten = volume_densities.new_zeros(ba, feature_dim, n_voxels)
+    else:
+        # otherwise just flatten
+        volume_features_flatten = volume_features.view(ba, feature_dim, n_voxels)
+
+    if mode == "trilinear":  # do the splatting (trilinear interp)
+        volume_features, volume_densities = _splat_points_to_volumes(
+            points_3d,
+            points_features,
+            volume_densities_flatten,
+            volume_features_flatten,
+            grid_sizes,
+            mask=mask,
+            min_weight=min_weight,
+        )
+    elif mode == "nearest":  # nearest neighbor interp
+        volume_features, volume_densities = _round_points_to_volumes(
+            points_3d,
+            points_features,
+            volume_densities_flatten,
+            volume_features_flatten,
+            grid_sizes,
+            mask=mask,
+        )
+    else:
+        raise ValueError('No such interpolation mode "%s"' % mode)
+
+    # reshape into the volume shape
+    volume_features = volume_features.view(ba, feature_dim, *v_shape)
+    volume_densities = volume_densities.view(ba, 1, *v_shape)
+    return volume_features, volume_densities
+
+
+def _check_points_to_volumes_inputs(
+    points_3d: torch.Tensor,
+    points_features: torch.Tensor,
+    volume_densities: torch.Tensor,
+    volume_features: torch.Tensor,
+    grid_sizes: torch.LongTensor,
+    mask: Optional[torch.Tensor] = None,
+) -> None:
+
+    max_grid_size = grid_sizes.max(dim=0).values
+    if torch.prod(max_grid_size) > volume_densities.shape[1]:
+        raise ValueError(
+            "One of the grid sizes corresponds to a larger number"
+            + " of elements than the number of elements in volume_densities."
+        )
+
+    _, n_voxels, density_dim = volume_densities.shape
+
+    if density_dim != 1:
+        raise ValueError("Only one-dimensional densities are allowed.")
+
+    ba, n_points, feature_dim = points_features.shape
+
+    if volume_features.shape[1] != feature_dim:
+        raise ValueError(
+            "volume_features have a different number of channels"
+            + " than points_features."
+        )
+
+    if volume_features.shape[2] != n_voxels:
+        raise ValueError(
+            "volume_features have a different number of elements"
+            + " than volume_densities."
+        )
+
+
+def _splat_points_to_volumes(
+    points_3d: torch.Tensor,
+    points_features: torch.Tensor,
+    volume_densities: torch.Tensor,
+    volume_features: torch.Tensor,
+    grid_sizes: torch.LongTensor,
+    min_weight: float = 1e-4,
+    mask: Optional[torch.Tensor] = None,
+) -> Tuple[torch.Tensor, torch.Tensor]:
+    """
+    Convert a batch of point clouds to a batch of volumes using trilinear
+    splatting into a volume.
+
+    Args:
+        points_3d: Batch of 3D point cloud coordinates of shape
+            `(minibatch, N, 3)` where N is the number of points
+            in each point cloud. Coordinates have to be specified in the
+            local volume coordinates (ranging in [-1, 1]).
+        points_features: Features of shape `(minibatch, N, feature_dim)`
+            corresponding to the points of the input point cloud `points_3d`.
+        volume_features: Batch of input *flattened* feature volumes
+            of shape `(minibatch, feature_dim, N_voxels)`
+        volume_densities: Batch of input *flattened* feature volume densities
+            of shape `(minibatch, N_voxels, 1)`. Each voxel should
+            contain a non-negative number corresponding to its
+            opaqueness (the higher, the less transparent).
+        grid_sizes: `LongTensor` of shape (minibatch, 3) representing the
+            spatial resolutions of each of the the non-flattened `volumes` tensors.
+            Note that the following has to hold:
+                `torch.prod(grid_sizes, dim=1)==N_voxels`
+        min_weight: A scalar controlling the lowest possible total per-voxel
+            weight used to normalize the features accumulated in a voxel.
+        mask: A binary mask of shape `(minibatch, N)` determining which 3D points
+            are going to be converted to the resulting volume.
+            Set to `None` if all points are valid.
+    Returns:
+        volume_features: Output volume of shape `(minibatch, D, N_voxels)`.
+        volume_densities: Occupancy volume of shape `(minibatch, 1, N_voxels)`
+            containing the total amount of votes cast to each of the voxels.
+    """
+
+    _check_points_to_volumes_inputs(
+        points_3d,
+        points_features,
+        volume_densities,
+        volume_features,
+        grid_sizes,
+        mask=mask,
+    )
+
+    _, n_voxels, density_dim = volume_densities.shape
+    ba, n_points, feature_dim = points_features.shape
+
+    # minibatch x n_points x feature_dim -> minibatch x feature_dim x n_points
+    points_features = points_features.permute(0, 2, 1).contiguous()
+
+    # XYZ = the upper-left volume index of the 8-neighborhood of every point
+    # grid_sizes is of the form (minibatch, depth-height-width)
+    grid_sizes_xyz = grid_sizes[:, [2, 1, 0]]
+
+    # Convert from points_3d in the range [-1, 1] to
+    # indices in the volume grid in the range [0, grid_sizes_xyz-1]
+    points_3d_indices = ((points_3d + 1) * 0.5) * (
+        grid_sizes_xyz[:, None].type_as(points_3d) - 1
+    )
+    XYZ = points_3d_indices.floor().long()
+    rXYZ = points_3d_indices - XYZ.type_as(points_3d)  # remainder of floor
+
+    # split into separate coordinate vectors
+    X, Y, Z = XYZ.split(1, dim=2)
+    # rX = remainder after floor = 1-"the weight of each vote into
+    #      the X coordinate of the 8-neighborhood"
+    rX, rY, rZ = rXYZ.split(1, dim=2)
+
+    # get random indices for the purpose of adding out-of-bounds values
+    rand_idx = X.new_zeros(X.shape).random_(0, n_voxels)
+
+    # iterate over the x, y, z indices of the 8-neighborhood (xdiff, ydiff, zdiff)
+    for xdiff in (0, 1):
+        X_ = X + xdiff
+        wX = (1 - xdiff) + (2 * xdiff - 1) * rX
+        for ydiff in (0, 1):
+            Y_ = Y + ydiff
+            wY = (1 - ydiff) + (2 * ydiff - 1) * rY
+            for zdiff in (0, 1):
+                Z_ = Z + zdiff
+                wZ = (1 - zdiff) + (2 * zdiff - 1) * rZ
+
+                # weight of each vote into the given cell of 8-neighborhood
+                w = wX * wY * wZ
+
+                # valid - binary indicators of votes that fall into the volume
+                valid = (
+                    (0 <= X_)
+                    * (X_ < grid_sizes_xyz[:, None, 0:1])
+                    * (0 <= Y_)
+                    * (Y_ < grid_sizes_xyz[:, None, 1:2])
+                    * (0 <= Z_)
+                    * (Z_ < grid_sizes_xyz[:, None, 2:3])
+                ).long()
+
+                # linearized indices into the volume
+                idx = (Z_ * grid_sizes[:, None, 1:2] + Y_) * grid_sizes[
+                    :, None, 2:3
+                ] + X_
+
+                # out-of-bounds features added to a random voxel idx with weight=0.
+                idx_valid = idx * valid + rand_idx * (1 - valid)
+                w_valid = w * valid.type_as(w)
+                if mask is not None:
+                    w_valid = w_valid * mask.type_as(w)[:, :, None]
+
+                # scatter add casts the votes into the weight accumulator
+                # and the feature accumulator
+                volume_densities.scatter_add_(1, idx_valid, w_valid)
+
+                # reshape idx_valid -> (minibatch, feature_dim, n_points)
+                idx_valid = idx_valid.view(ba, 1, n_points).expand_as(points_features)
+                w_valid = w_valid.view(ba, 1, n_points)
+
+                # volume_features of shape (minibatch, feature_dim, n_voxels)
+                volume_features.scatter_add_(2, idx_valid, w_valid * points_features)
+
+    # divide each feature by the total weight of the votes
+    volume_features = volume_features / volume_densities.view(ba, 1, n_voxels).clamp(
+        min_weight
+    )
+
+    return volume_features, volume_densities
+
+
+def _round_points_to_volumes(
+    points_3d: torch.Tensor,
+    points_features: torch.Tensor,
+    volume_densities: torch.Tensor,
+    volume_features: torch.Tensor,
+    grid_sizes: torch.LongTensor,
+    mask: Optional[torch.Tensor] = None,
+) -> Tuple[torch.Tensor, torch.Tensor]:
+    """
+    Convert a batch of point clouds to a batch of volumes using rounding to the
+    nearest integer coordinate of the volume. Features that fall into the same
+    voxel are averaged.
+
+    Args:
+        points_3d: Batch of 3D point cloud coordinates of shape
+            `(minibatch, N, 3)` where N is the number of points
+            in each point cloud. Coordinates have to be specified in the
+            local volume coordinates (ranging in [-1, 1]).
+        points_features: Features of shape `(minibatch, N, feature_dim)`
+            corresponding to the points of the input point cloud `points_3d`.
+        volume_features: Batch of input *flattened* feature volumes
+            of shape `(minibatch, feature_dim, N_voxels)`
+        volume_densities: Batch of input *flattened* feature volume densities
+            of shape `(minibatch, 1, N_voxels)`. Each voxel should
+            contain a non-negative number corresponding to its
+            opaqueness (the higher, the less transparent).
+        grid_sizes: `LongTensor` of shape (minibatch, 3) representing the
+            spatial resolutions of each of the the non-flattened `volumes` tensors.
+            Note that the following has to hold:
+                `torch.prod(grid_sizes, dim=1)==N_voxels`
+        mask: A binary mask of shape `(minibatch, N)` determining which 3D points
+            are going to be converted to the resulting volume.
+            Set to `None` if all points are valid.
+    Returns:
+        volume_features: Output volume of shape `(minibatch, D, N_voxels)`.
+        volume_densities: Occupancy volume of shape `(minibatch, 1, N_voxels)`
+            containing the total amount of votes cast to each of the voxels.
+    """
+
+    _check_points_to_volumes_inputs(
+        points_3d,
+        points_features,
+        volume_densities,
+        volume_features,
+        grid_sizes,
+        mask=mask,
+    )
+
+    _, n_voxels, density_dim = volume_densities.shape
+    ba, n_points, feature_dim = points_features.shape
+
+    # minibatch x n_points x feature_dim-> minibatch x feature_dim x n_points
+    points_features = points_features.permute(0, 2, 1).contiguous()
+
+    # round the coordinates to nearest integer
+    # grid_sizes is of the form (minibatch, depth-height-width)
+    grid_sizes_xyz = grid_sizes[:, [2, 1, 0]]
+    XYZ = ((points_3d.detach() + 1) * 0.5) * (
+        grid_sizes_xyz[:, None].type_as(points_3d) - 1
+    )
+    XYZ = torch.round(XYZ).long()
+
+    # split into separate coordinate vectors
+    X, Y, Z = XYZ.split(1, dim=2)
+
+    # valid - binary indicators of votes that fall into the volume
+    # pyre-fixme[9]: grid_sizes has type `LongTensor`; used as `Tensor`.
+    grid_sizes = grid_sizes.type_as(XYZ)
+    valid = (
+        (0 <= X)
+        * (X < grid_sizes_xyz[:, None, 0:1])
+        * (0 <= Y)
+        * (Y < grid_sizes_xyz[:, None, 1:2])
+        * (0 <= Z)
+        * (Z < grid_sizes_xyz[:, None, 2:3])
+    ).long()
+    if mask is not None:
+        valid = valid * mask[:, :, None].long()
+
+    # get random indices for the purpose of adding out-of-bounds values
+    rand_idx = valid.new_zeros(X.shape).random_(0, n_voxels)
+
+    # linearized indices into the volume
+    idx = (Z * grid_sizes[:, None, 1:2] + Y) * grid_sizes[:, None, 2:3] + X
+
+    # out-of-bounds features added to a random voxel idx with weight=0.
+    idx_valid = idx * valid + rand_idx * (1 - valid)
+    w_valid = valid.type_as(volume_features)
+
+    # scatter add casts the votes into the weight accumulator
+    # and the feature accumulator
+    volume_densities.scatter_add_(1, idx_valid, w_valid)
+
+    # reshape idx_valid -> (minibatch, feature_dim, n_points)
+    idx_valid = idx_valid.view(ba, 1, n_points).expand_as(points_features)
+    w_valid = w_valid.view(ba, 1, n_points)
+
+    # volume_features of shape (minibatch, feature_dim, n_voxels)
+    volume_features.scatter_add_(2, idx_valid, w_valid * points_features)
+
+    # divide each feature by the total weight of the votes
+    volume_features = volume_features / volume_densities.view(ba, 1, n_voxels).clamp(
+        1.0
+    )
+
+    return volume_features, volume_densities
diff --git a/pytorch3d/pytorch3d/ops/sample_farthest_points.py b/pytorch3d/pytorch3d/ops/sample_farthest_points.py
new file mode 100644
index 0000000000000000000000000000000000000000..a2779e42dded1a4717943fb4dadb17166ea4baa1
--- /dev/null
+++ b/pytorch3d/pytorch3d/ops/sample_farthest_points.py
@@ -0,0 +1,195 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from random import randint
+from typing import List, Optional, Tuple, Union
+
+import torch
+from pytorch3d import _C
+
+from .utils import masked_gather
+
+
+def sample_farthest_points(
+    points: torch.Tensor,
+    lengths: Optional[torch.Tensor] = None,
+    K: Union[int, List, torch.Tensor] = 50,
+    random_start_point: bool = False,
+) -> Tuple[torch.Tensor, torch.Tensor]:
+    """
+    Iterative farthest point sampling algorithm [1] to subsample a set of
+    K points from a given pointcloud. At each iteration, a point is selected
+    which has the largest nearest neighbor distance to any of the
+    already selected points.
+
+    Farthest point sampling provides more uniform coverage of the input
+    point cloud compared to uniform random sampling.
+
+    [1] Charles R. Qi et al, "PointNet++: Deep Hierarchical Feature Learning
+        on Point Sets in a Metric Space", NeurIPS 2017.
+
+    Args:
+        points: (N, P, D) array containing the batch of pointclouds
+        lengths: (N,) number of points in each pointcloud (to support heterogeneous
+            batches of pointclouds)
+        K: samples required in each sampled point cloud (this is typically << P). If
+            K is an int then the same number of samples are selected for each
+            pointcloud in the batch. If K is a tensor is should be length (N,)
+            giving the number of samples to select for each element in the batch
+        random_start_point: bool, if True, a random point is selected as the starting
+            point for iterative sampling.
+
+    Returns:
+        selected_points: (N, K, D), array of selected values from points. If the input
+            K is a tensor, then the shape will be (N, max(K), D), and padded with
+            0.0 for batch elements where k_i < max(K).
+        selected_indices: (N, K) array of selected indices. If the input
+            K is a tensor, then the shape will be (N, max(K), D), and padded with
+            -1 for batch elements where k_i < max(K).
+    """
+    N, P, D = points.shape
+    device = points.device
+
+    # Validate inputs
+    if lengths is None:
+        lengths = torch.full((N,), P, dtype=torch.int64, device=device)
+    else:
+        if lengths.shape != (N,):
+            raise ValueError("points and lengths must have same batch dimension.")
+        if lengths.max() > P:
+            raise ValueError("A value in lengths was too large.")
+
+    # TODO: support providing K as a ratio of the total number of points instead of as an int
+    if isinstance(K, int):
+        K = torch.full((N,), K, dtype=torch.int64, device=device)
+    elif isinstance(K, list):
+        K = torch.tensor(K, dtype=torch.int64, device=device)
+
+    if K.shape[0] != N:
+        raise ValueError("K and points must have the same batch dimension")
+
+    # Check dtypes are correct and convert if necessary
+    if not (points.dtype == torch.float32):
+        points = points.to(torch.float32)
+    if not (lengths.dtype == torch.int64):
+        lengths = lengths.to(torch.int64)
+    if not (K.dtype == torch.int64):
+        K = K.to(torch.int64)
+
+    # Generate the starting indices for sampling
+    start_idxs = torch.zeros_like(lengths)
+    if random_start_point:
+        for n in range(N):
+            # pyre-fixme[6]: For 1st param expected `int` but got `Tensor`.
+            start_idxs[n] = torch.randint(high=lengths[n], size=(1,)).item()
+
+    with torch.no_grad():
+        # pyre-fixme[16]: `pytorch3d_._C` has no attribute `sample_farthest_points`.
+        idx = _C.sample_farthest_points(points, lengths, K, start_idxs)
+    sampled_points = masked_gather(points, idx)
+
+    return sampled_points, idx
+
+
+def sample_farthest_points_naive(
+    points: torch.Tensor,
+    lengths: Optional[torch.Tensor] = None,
+    K: Union[int, List, torch.Tensor] = 50,
+    random_start_point: bool = False,
+) -> Tuple[torch.Tensor, torch.Tensor]:
+    """
+    Same Args/Returns as sample_farthest_points
+    """
+    N, P, D = points.shape
+    device = points.device
+
+    # Validate inputs
+    if lengths is None:
+        lengths = torch.full((N,), P, dtype=torch.int64, device=device)
+    else:
+        if lengths.shape != (N,):
+            raise ValueError("points and lengths must have same batch dimension.")
+        if lengths.max() > P:
+            raise ValueError("Invalid lengths.")
+
+    # TODO: support providing K as a ratio of the total number of points instead of as an int
+    if isinstance(K, int):
+        K = torch.full((N,), K, dtype=torch.int64, device=device)
+    elif isinstance(K, list):
+        K = torch.tensor(K, dtype=torch.int64, device=device)
+
+    if K.shape[0] != N:
+        raise ValueError("K and points must have the same batch dimension")
+
+    # Find max value of K
+    max_K = torch.max(K)
+
+    # List of selected indices from each batch element
+    all_sampled_indices = []
+
+    for n in range(N):
+        # Initialize an array for the sampled indices, shape: (max_K,)
+        sample_idx_batch = torch.full(
+            # pyre-fixme[6]: For 1st param expected `Union[List[int], Size,
+            #  typing.Tuple[int, ...]]` but got `Tuple[Tensor]`.
+            (max_K,),
+            fill_value=-1,
+            dtype=torch.int64,
+            device=device,
+        )
+
+        # Initialize closest distances to inf, shape: (P,)
+        # This will be updated at each iteration to track the closest distance of the
+        # remaining points to any of the selected points
+        closest_dists = points.new_full(
+            # pyre-fixme[6]: For 1st param expected `Union[List[int], Size,
+            #  typing.Tuple[int, ...]]` but got `Tuple[Tensor]`.
+            (lengths[n],),
+            float("inf"),
+            dtype=torch.float32,
+        )
+
+        # Select a random point index and save it as the starting point
+        # pyre-fixme[6]: For 2nd argument expected `int` but got `Tensor`.
+        selected_idx = randint(0, lengths[n] - 1) if random_start_point else 0
+        sample_idx_batch[0] = selected_idx
+
+        # If the pointcloud has fewer than K points then only iterate over the min
+        # pyre-fixme[6]: For 1st param expected `SupportsRichComparisonT` but got
+        #  `Tensor`.
+        # pyre-fixme[6]: For 2nd param expected `SupportsRichComparisonT` but got
+        #  `Tensor`.
+        k_n = min(lengths[n], K[n])
+
+        # Iteratively select points for a maximum of k_n
+        for i in range(1, k_n):
+            # Find the distance between the last selected point
+            # and all the other points. If a point has already been selected
+            # it's distance will be 0.0 so it will not be selected again as the max.
+            dist = points[n, selected_idx, :] - points[n, : lengths[n], :]
+            # pyre-fixme[58]: `**` is not supported for operand types `Tensor` and
+            #  `int`.
+            dist_to_last_selected = (dist**2).sum(-1)  # (P - i)
+
+            # If closer than currently saved distance to one of the selected
+            # points, then updated closest_dists
+            closest_dists = torch.min(dist_to_last_selected, closest_dists)  # (P - i)
+
+            # The aim is to pick the point that has the largest
+            # nearest neighbour distance to any of the already selected points
+            selected_idx = torch.argmax(closest_dists)
+            sample_idx_batch[i] = selected_idx
+
+        # Add the list of points for this batch to the final list
+        all_sampled_indices.append(sample_idx_batch)
+
+    all_sampled_indices = torch.stack(all_sampled_indices, dim=0)
+
+    # Gather the points
+    all_sampled_points = masked_gather(points, all_sampled_indices)
+
+    # Return (N, max_K, D) subsampled points and indices
+    return all_sampled_points, all_sampled_indices
diff --git a/pytorch3d/pytorch3d/ops/sample_points_from_meshes.py b/pytorch3d/pytorch3d/ops/sample_points_from_meshes.py
new file mode 100644
index 0000000000000000000000000000000000000000..2e2d34890ed552d4973250bab5ad7636d9089157
--- /dev/null
+++ b/pytorch3d/pytorch3d/ops/sample_points_from_meshes.py
@@ -0,0 +1,175 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+"""
+This module implements utility functions for sampling points from
+batches of meshes.
+"""
+import sys
+from typing import Tuple, Union
+
+import torch
+from pytorch3d.ops.mesh_face_areas_normals import mesh_face_areas_normals
+from pytorch3d.ops.packed_to_padded import packed_to_padded
+from pytorch3d.renderer.mesh.rasterizer import Fragments as MeshFragments
+
+
+def sample_points_from_meshes(
+    meshes,
+    num_samples: int = 10000,
+    return_normals: bool = False,
+    return_textures: bool = False,
+) -> Union[
+    torch.Tensor,
+    Tuple[torch.Tensor, torch.Tensor],
+    Tuple[torch.Tensor, torch.Tensor, torch.Tensor],
+]:
+    """
+    Convert a batch of meshes to a batch of pointclouds by uniformly sampling
+    points on the surface of the mesh with probability proportional to the
+    face area.
+
+    Args:
+        meshes: A Meshes object with a batch of N meshes.
+        num_samples: Integer giving the number of point samples per mesh.
+        return_normals: If True, return normals for the sampled points.
+        return_textures: If True, return textures for the sampled points.
+
+    Returns:
+        3-element tuple containing
+
+        - **samples**: FloatTensor of shape (N, num_samples, 3) giving the
+          coordinates of sampled points for each mesh in the batch. For empty
+          meshes the corresponding row in the samples array will be filled with 0.
+        - **normals**: FloatTensor of shape (N, num_samples, 3) giving a normal vector
+          to each sampled point. Only returned if return_normals is True.
+          For empty meshes the corresponding row in the normals array will
+          be filled with 0.
+        - **textures**: FloatTensor of shape (N, num_samples, C) giving a C-dimensional
+          texture vector to each sampled point. Only returned if return_textures is True.
+          For empty meshes the corresponding row in the textures array will
+          be filled with 0.
+
+        Note that in a future releases, we will replace the 3-element tuple output
+        with a `Pointclouds` datastructure, as follows
+
+        .. code-block:: python
+
+            Pointclouds(samples, normals=normals, features=textures)
+    """
+    if meshes.isempty():
+        raise ValueError("Meshes are empty.")
+
+    verts = meshes.verts_packed()
+    if not torch.isfinite(verts).all():
+        raise ValueError("Meshes contain nan or inf.")
+
+    if return_textures and meshes.textures is None:
+        raise ValueError("Meshes do not contain textures.")
+
+    faces = meshes.faces_packed()
+    mesh_to_face = meshes.mesh_to_faces_packed_first_idx()
+    num_meshes = len(meshes)
+    num_valid_meshes = torch.sum(meshes.valid)  # Non empty meshes.
+
+    # Initialize samples tensor with fill value 0 for empty meshes.
+    samples = torch.zeros((num_meshes, num_samples, 3), device=meshes.device)
+
+    # Only compute samples for non empty meshes
+    with torch.no_grad():
+        areas, _ = mesh_face_areas_normals(verts, faces)  # Face areas can be zero.
+        max_faces = meshes.num_faces_per_mesh().max().item()
+        areas_padded = packed_to_padded(
+            areas, mesh_to_face[meshes.valid], max_faces
+        )  # (N, F)
+
+        # TODO (gkioxari) Confirm multinomial bug is not present with real data.
+        sample_face_idxs = areas_padded.multinomial(
+            num_samples, replacement=True
+        )  # (N, num_samples)
+        sample_face_idxs += mesh_to_face[meshes.valid].view(num_valid_meshes, 1)
+
+    # Get the vertex coordinates of the sampled faces.
+    face_verts = verts[faces]
+    v0, v1, v2 = face_verts[:, 0], face_verts[:, 1], face_verts[:, 2]
+
+    # Randomly generate barycentric coords.
+    w0, w1, w2 = _rand_barycentric_coords(
+        num_valid_meshes, num_samples, verts.dtype, verts.device
+    )
+
+    # Use the barycentric coords to get a point on each sampled face.
+    a = v0[sample_face_idxs]  # (N, num_samples, 3)
+    b = v1[sample_face_idxs]
+    c = v2[sample_face_idxs]
+    samples[meshes.valid] = w0[:, :, None] * a + w1[:, :, None] * b + w2[:, :, None] * c
+
+    if return_normals:
+        # Initialize normals tensor with fill value 0 for empty meshes.
+        # Normals for the sampled points are face normals computed from
+        # the vertices of the face in which the sampled point lies.
+        normals = torch.zeros((num_meshes, num_samples, 3), device=meshes.device)
+        vert_normals = (v1 - v0).cross(v2 - v1, dim=1)
+        vert_normals = vert_normals / vert_normals.norm(dim=1, p=2, keepdim=True).clamp(
+            min=sys.float_info.epsilon
+        )
+        vert_normals = vert_normals[sample_face_idxs]
+        normals[meshes.valid] = vert_normals
+
+    if return_textures:
+        # fragment data are of shape NxHxWxK. Here H=S, W=1 & K=1.
+        pix_to_face = sample_face_idxs.view(len(meshes), num_samples, 1, 1)  # NxSx1x1
+        bary = torch.stack((w0, w1, w2), dim=2).unsqueeze(2).unsqueeze(2)  # NxSx1x1x3
+        # zbuf and dists are not used in `sample_textures` so we initialize them with dummy
+        dummy = torch.zeros(
+            (len(meshes), num_samples, 1, 1), device=meshes.device, dtype=torch.float32
+        )  # NxSx1x1
+        fragments = MeshFragments(
+            pix_to_face=pix_to_face, zbuf=dummy, bary_coords=bary, dists=dummy
+        )
+        textures = meshes.sample_textures(fragments)  # NxSx1x1xC
+        textures = textures[:, :, 0, 0, :]  # NxSxC
+
+    # return
+    # TODO(gkioxari) consider returning a Pointclouds instance [breaking]
+    if return_normals and return_textures:
+        # pyre-fixme[61]: `normals` may not be initialized here.
+        # pyre-fixme[61]: `textures` may not be initialized here.
+        return samples, normals, textures
+    if return_normals:  # return_textures is False
+        # pyre-fixme[61]: `normals` may not be initialized here.
+        return samples, normals
+    if return_textures:  # return_normals is False
+        # pyre-fixme[61]: `textures` may not be initialized here.
+        return samples, textures
+    return samples
+
+
+def _rand_barycentric_coords(
+    size1, size2, dtype: torch.dtype, device: torch.device
+) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+    """
+    Helper function to generate random barycentric coordinates which are uniformly
+    distributed over a triangle.
+
+    Args:
+        size1, size2: The number of coordinates generated will be size1*size2.
+                      Output tensors will each be of shape (size1, size2).
+        dtype: Datatype to generate.
+        device: A torch.device object on which the outputs will be allocated.
+
+    Returns:
+        w0, w1, w2: Tensors of shape (size1, size2) giving random barycentric
+            coordinates
+    """
+    uv = torch.rand(2, size1, size2, dtype=dtype, device=device)
+    u, v = uv[0], uv[1]
+    u_sqrt = u.sqrt()
+    w0 = 1.0 - u_sqrt
+    w1 = u_sqrt * (1.0 - v)
+    w2 = u_sqrt * v
+    return w0, w1, w2
diff --git a/pytorch3d/pytorch3d/ops/subdivide_meshes.py b/pytorch3d/pytorch3d/ops/subdivide_meshes.py
new file mode 100644
index 0000000000000000000000000000000000000000..9a633ae2ca2ae3a0ddd1d3ed32606dc42710f177
--- /dev/null
+++ b/pytorch3d/pytorch3d/ops/subdivide_meshes.py
@@ -0,0 +1,470 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import torch
+import torch.nn as nn
+from pytorch3d.structures import Meshes
+
+
+class SubdivideMeshes(nn.Module):
+    """
+    Subdivide a triangle mesh by adding a new vertex at the center of each edge
+    and dividing each face into four new faces. Vectors of vertex
+    attributes can also be subdivided by averaging the values of the attributes
+    at the two vertices which form each edge. This implementation
+    preserves face orientation - if the vertices of a face are all ordered
+    counter-clockwise, then the faces in the subdivided meshes will also have
+    their vertices ordered counter-clockwise.
+
+    If meshes is provided as an input, the initializer performs the relatively
+    expensive computation of determining the new face indices. This one-time
+    computation can be reused for all meshes with the same face topology
+    but different vertex positions.
+    """
+
+    def __init__(self, meshes=None) -> None:
+        """
+        Args:
+            meshes: Meshes object or None. If a meshes object is provided,
+                the first mesh is used to compute the new faces of the
+                subdivided topology which can be reused for meshes with
+                the same input topology.
+        """
+        super(SubdivideMeshes, self).__init__()
+
+        self.precomputed = False
+        self._N = -1
+        if meshes is not None:
+            # This computation is on indices, so gradients do not need to be
+            # tracked.
+            mesh = meshes[0]
+            with torch.no_grad():
+                subdivided_faces = self.subdivide_faces(mesh)
+                if subdivided_faces.shape[1] != 3:
+                    raise ValueError("faces can only have three vertices")
+                self.register_buffer("_subdivided_faces", subdivided_faces)
+                self.precomputed = True
+
+    def subdivide_faces(self, meshes):
+        r"""
+        Args:
+            meshes: a Meshes object.
+
+        Returns:
+            subdivided_faces_packed: (4*sum(F_n), 3) shape LongTensor of
+            original and new faces.
+
+        Refer to pytorch3d.structures.meshes.py for more details on packed
+        representations of faces.
+
+        Each face is split into 4 faces e.g. Input face
+        ::
+                   v0
+                   /\
+                  /  \
+                 /    \
+             e1 /      \ e0
+               /        \
+              /          \
+             /            \
+            /______________\
+          v2       e2       v1
+
+          faces_packed = [[0, 1, 2]]
+          faces_packed_to_edges_packed = [[2, 1, 0]]
+
+        `faces_packed_to_edges_packed` is used to represent all the new
+        vertex indices corresponding to the mid-points of edges in the mesh.
+        The actual vertex coordinates will be computed in the forward function.
+        To get the indices of the new vertices, offset
+        `faces_packed_to_edges_packed` by the total number of vertices.
+        ::
+            faces_packed_to_edges_packed = [[2, 1, 0]] + 3 = [[5, 4, 3]]
+
+        e.g. subdivided face
+        ::
+                   v0
+                   /\
+                  /  \
+                 / f0 \
+             v4 /______\ v3
+               /\      /\
+              /  \ f3 /  \
+             / f2 \  / f1 \
+            /______\/______\
+           v2       v5       v1
+
+           f0 = [0, 3, 4]
+           f1 = [1, 5, 3]
+           f2 = [2, 4, 5]
+           f3 = [5, 4, 3]
+
+        """
+        verts_packed = meshes.verts_packed()
+        with torch.no_grad():
+            faces_packed = meshes.faces_packed()
+            faces_packed_to_edges_packed = (
+                meshes.faces_packed_to_edges_packed() + verts_packed.shape[0]
+            )
+
+            f0 = torch.stack(
+                [
+                    faces_packed[:, 0],
+                    faces_packed_to_edges_packed[:, 2],
+                    faces_packed_to_edges_packed[:, 1],
+                ],
+                dim=1,
+            )
+            f1 = torch.stack(
+                [
+                    faces_packed[:, 1],
+                    faces_packed_to_edges_packed[:, 0],
+                    faces_packed_to_edges_packed[:, 2],
+                ],
+                dim=1,
+            )
+            f2 = torch.stack(
+                [
+                    faces_packed[:, 2],
+                    faces_packed_to_edges_packed[:, 1],
+                    faces_packed_to_edges_packed[:, 0],
+                ],
+                dim=1,
+            )
+            f3 = faces_packed_to_edges_packed
+            subdivided_faces_packed = torch.cat(
+                [f0, f1, f2, f3], dim=0
+            )  # (4*sum(F_n), 3)
+
+            return subdivided_faces_packed
+
+    def forward(self, meshes, feats=None):
+        """
+        Subdivide a batch of meshes by adding a new vertex on each edge, and
+        dividing each face into four new faces. New meshes contains two types
+        of vertices:
+        1) Vertices that appear in the input meshes.
+           Data for these vertices are copied from the input meshes.
+        2) New vertices at the midpoint of each edge.
+           Data for these vertices is the average of the data for the two
+           vertices that make up the edge.
+
+        Args:
+            meshes: Meshes object representing a batch of meshes.
+            feats: Per-vertex features to be subdivided along with the verts.
+                Should be parallel to the packed vert representation of the
+                input meshes; so it should have shape (V, D) where V is the
+                total number of verts in the input meshes. Default: None.
+
+        Returns:
+            2-element tuple containing
+
+            - **new_meshes**: Meshes object of a batch of subdivided meshes.
+            - **new_feats**: (optional) Tensor of subdivided feats, parallel to the
+              (packed) vertices of the subdivided meshes. Only returned
+              if feats is not None.
+
+        """
+        self._N = len(meshes)
+        if self.precomputed:
+            return self.subdivide_homogeneous(meshes, feats)
+        else:
+            return self.subdivide_heterogenerous(meshes, feats)
+
+    def subdivide_homogeneous(self, meshes, feats=None):
+        """
+        Subdivide verts (and optionally features) of a batch of meshes
+        where each mesh has the same topology of faces. The subdivided faces
+        are precomputed in the initializer.
+
+        Args:
+            meshes: Meshes object representing a batch of meshes.
+            feats: Per-vertex features to be subdivided along with the verts.
+
+        Returns:
+            2-element tuple containing
+
+            - **new_meshes**: Meshes object of a batch of subdivided meshes.
+            - **new_feats**: (optional) Tensor of subdivided feats, parallel to the
+              (packed) vertices of the subdivided meshes. Only returned
+              if feats is not None.
+        """
+        verts = meshes.verts_padded()  # (N, V, D)
+        edges = meshes[0].edges_packed()
+
+        # The set of faces is the same across the different meshes.
+        new_faces = self._subdivided_faces.view(1, -1, 3).expand(self._N, -1, -1)
+
+        # Add one new vertex at the midpoint of each edge by taking the average
+        # of the vertices that form each edge.
+        new_verts = verts[:, edges].mean(dim=2)
+        new_verts = torch.cat([verts, new_verts], dim=1)  # (sum(V_n)+sum(E_n), 3)
+        new_feats = None
+
+        # Calculate features for new vertices.
+        if feats is not None:
+            if feats.dim() == 2:
+                # feats is in packed format, transform it from packed to
+                # padded, i.e. (N*V, D) to (N, V, D).
+                feats = feats.view(verts.size(0), verts.size(1), feats.size(1))
+            if feats.dim() != 3:
+                raise ValueError("features need to be of shape (N, V, D) or (N*V, D)")
+
+            # Take average of the features at the vertices that form each edge.
+            new_feats = feats[:, edges].mean(dim=2)
+            new_feats = torch.cat([feats, new_feats], dim=1)  # (sum(V_n)+sum(E_n), 3)
+
+        new_meshes = Meshes(verts=new_verts, faces=new_faces)
+
+        if feats is None:
+            return new_meshes
+        else:
+            return new_meshes, new_feats
+
+    def subdivide_heterogenerous(self, meshes, feats=None):
+        """
+        Subdivide faces, verts (and optionally features) of a batch of meshes
+        where each mesh can have different face topologies.
+
+        Args:
+            meshes: Meshes object representing a batch of meshes.
+            feats: Per-vertex features to be subdivided along with the verts.
+
+        Returns:
+            2-element tuple containing
+
+            - **new_meshes**: Meshes object of a batch of subdivided meshes.
+            - **new_feats**: (optional) Tensor of subdivided feats, parallel to the
+              (packed) vertices of the subdivided meshes. Only returned
+              if feats is not None.
+        """
+
+        # The computation of new faces is on face indices, so gradients do not
+        # need to be tracked.
+        verts = meshes.verts_packed()
+        with torch.no_grad():
+            new_faces = self.subdivide_faces(meshes)
+            edges = meshes.edges_packed()
+            face_to_mesh_idx = meshes.faces_packed_to_mesh_idx()
+            edge_to_mesh_idx = meshes.edges_packed_to_mesh_idx()
+            num_edges_per_mesh = edge_to_mesh_idx.bincount(minlength=self._N)
+            num_verts_per_mesh = meshes.num_verts_per_mesh()
+            num_faces_per_mesh = meshes.num_faces_per_mesh()
+
+            # Add one new vertex at the midpoint of each edge.
+            new_verts_per_mesh = num_verts_per_mesh + num_edges_per_mesh  # (N,)
+            new_face_to_mesh_idx = torch.cat([face_to_mesh_idx] * 4, dim=0)
+
+            # Calculate the indices needed to group the new and existing verts
+            # for each mesh.
+            verts_sort_idx = _create_verts_index(
+                num_verts_per_mesh, num_edges_per_mesh, meshes.device
+            )  # (sum(V_n)+sum(E_n),)
+
+            verts_ordered_idx_init = torch.zeros(
+                new_verts_per_mesh.sum(), dtype=torch.int64, device=meshes.device
+            )  # (sum(V_n)+sum(E_n),)
+
+            # Reassign vertex indices so that existing and new vertices for each
+            # mesh are sequential.
+            verts_ordered_idx = verts_ordered_idx_init.scatter_add(
+                0,
+                verts_sort_idx,
+                torch.arange(new_verts_per_mesh.sum(), device=meshes.device),
+            )
+
+            # Retrieve vertex indices for each face.
+            new_faces = verts_ordered_idx[new_faces]
+
+            # Calculate the indices needed to group the existing and new faces
+            # for each mesh.
+            face_sort_idx = _create_faces_index(
+                num_faces_per_mesh, device=meshes.device
+            )
+
+            # Reorder the faces to sequentially group existing and new faces
+            # for each mesh.
+            new_faces = new_faces[face_sort_idx]
+            new_face_to_mesh_idx = new_face_to_mesh_idx[face_sort_idx]
+            new_faces_per_mesh = new_face_to_mesh_idx.bincount(
+                minlength=self._N
+            )  # (sum(F_n)*4)
+
+        # Add one new vertex at the midpoint of each edge by taking the average
+        # of the verts that form each edge.
+        new_verts = verts[edges].mean(dim=1)
+        new_verts = torch.cat([verts, new_verts], dim=0)
+
+        # Reorder the verts to sequentially group existing and new verts for
+        # each mesh.
+        new_verts = new_verts[verts_sort_idx]
+
+        if feats is not None:
+            new_feats = feats[edges].mean(dim=1)
+            new_feats = torch.cat([feats, new_feats], dim=0)
+            new_feats = new_feats[verts_sort_idx]
+
+        verts_list = list(new_verts.split(new_verts_per_mesh.tolist(), 0))
+        faces_list = list(new_faces.split(new_faces_per_mesh.tolist(), 0))
+        new_verts_per_mesh_cumsum = torch.cat(
+            [
+                new_verts_per_mesh.new_full(size=(1,), fill_value=0.0),
+                new_verts_per_mesh.cumsum(0)[:-1],
+            ],
+            dim=0,
+        )
+        faces_list = [
+            faces_list[n] - new_verts_per_mesh_cumsum[n] for n in range(self._N)
+        ]
+        if feats is not None:
+            feats_list = new_feats.split(new_verts_per_mesh.tolist(), 0)
+        new_meshes = Meshes(verts=verts_list, faces=faces_list)
+
+        if feats is None:
+            return new_meshes
+        else:
+            new_feats = torch.cat(feats_list, dim=0)
+            return new_meshes, new_feats
+
+
+def _create_verts_index(verts_per_mesh, edges_per_mesh, device=None):
+    """
+    Helper function to group the vertex indices for each mesh. New vertices are
+    stacked at the end of the original verts tensor, so in order to have
+    sequential packing, the verts tensor needs to be reordered so that the
+    vertices corresponding to each mesh are grouped together.
+
+    Args:
+        verts_per_mesh: Tensor of shape (N,) giving the number of vertices
+            in each mesh in the batch where N is the batch size.
+        edges_per_mesh: Tensor of shape (N,) giving the number of edges
+            in each mesh in the batch
+
+    Returns:
+        verts_idx: A tensor with vert indices for each mesh ordered sequentially
+            by mesh index.
+    """
+    # e.g. verts_per_mesh = (4, 5, 6)
+    # e.g. edges_per_mesh = (5, 7, 9)
+
+    V = verts_per_mesh.sum()  # e.g. 15
+    E = edges_per_mesh.sum()  # e.g. 21
+
+    verts_per_mesh_cumsum = verts_per_mesh.cumsum(dim=0)  # (N,) e.g. (4, 9, 15)
+    edges_per_mesh_cumsum = edges_per_mesh.cumsum(dim=0)  # (N,) e.g. (5, 12, 21)
+
+    v_to_e_idx = verts_per_mesh_cumsum.clone()
+
+    # vertex to edge index.
+    v_to_e_idx[1:] += edges_per_mesh_cumsum[
+        :-1
+    ]  # e.g. (4, 9, 15) + (0, 5, 12) = (4, 14, 27)
+
+    # vertex to edge offset.
+    v_to_e_offset = V - verts_per_mesh_cumsum  # e.g. 15 - (4, 9, 15) = (11, 6, 0)
+    v_to_e_offset[1:] += edges_per_mesh_cumsum[
+        :-1
+    ]  # e.g. (11, 6, 0) + (0, 5, 12) = (11, 11, 12)
+    e_to_v_idx = (
+        verts_per_mesh_cumsum[:-1] + edges_per_mesh_cumsum[:-1]
+    )  # (4, 9) + (5, 12) = (9, 21)
+    e_to_v_offset = (
+        verts_per_mesh_cumsum[:-1] - edges_per_mesh_cumsum[:-1] - V
+    )  # (4, 9) - (5, 12) - 15 = (-16, -18)
+
+    # Add one new vertex per edge.
+    idx_diffs = torch.ones(V + E, device=device, dtype=torch.int64)  # (36,)
+    idx_diffs[v_to_e_idx] += v_to_e_offset
+    idx_diffs[e_to_v_idx] += e_to_v_offset
+
+    # e.g.
+    # [
+    #  1, 1, 1, 1, 12, 1, 1, 1, 1,
+    #  -15, 1, 1, 1, 1, 12, 1, 1, 1, 1, 1, 1,
+    #  -17, 1, 1, 1, 1, 1, 13, 1, 1, 1, 1, 1, 1, 1
+    # ]
+
+    verts_idx = idx_diffs.cumsum(dim=0) - 1
+
+    # e.g.
+    # [
+    #  0, 1, 2, 3, 15, 16, 17, 18, 19,                            --> mesh 0
+    #  4, 5, 6, 7, 8, 20, 21, 22, 23, 24, 25, 26,                 --> mesh 1
+    #  9, 10, 11, 12, 13, 14, 27, 28, 29, 30, 31, 32, 33, 34, 35  --> mesh 2
+    # ]
+    # where for mesh 0, [0, 1, 2, 3] are the indices of the existing verts, and
+    # [15, 16, 17, 18, 19] are the indices of the new verts after subdivision.
+
+    return verts_idx
+
+
+def _create_faces_index(faces_per_mesh: torch.Tensor, device=None):
+    """
+    Helper function to group the faces indices for each mesh. New faces are
+    stacked at the end of the original faces tensor, so in order to have
+    sequential packing, the faces tensor needs to be reordered to that faces
+    corresponding to each mesh are grouped together.
+
+    Args:
+        faces_per_mesh: Tensor of shape (N,) giving the number of faces
+            in each mesh in the batch where N is the batch size.
+
+    Returns:
+        faces_idx: A tensor with face indices for each mesh ordered sequentially
+            by mesh index.
+    """
+    # e.g. faces_per_mesh = [2, 5, 3]
+
+    F = faces_per_mesh.sum()  # e.g. 10
+    faces_per_mesh_cumsum = faces_per_mesh.cumsum(dim=0)  # (N,) e.g. (2, 7, 10)
+
+    switch1_idx = faces_per_mesh_cumsum.clone()
+    switch1_idx[1:] += (
+        3 * faces_per_mesh_cumsum[:-1]
+    )  # e.g. (2, 7, 10) + (0, 6, 21) = (2, 13, 31)
+
+    switch2_idx = 2 * faces_per_mesh_cumsum  # e.g. (4, 14, 20)
+    switch2_idx[1:] += (
+        2 * faces_per_mesh_cumsum[:-1]
+    )  # e.g. (4, 14, 20) + (0, 4, 14) = (4, 18, 34)
+
+    switch3_idx = 3 * faces_per_mesh_cumsum  # e.g. (6, 21, 30)
+    switch3_idx[1:] += faces_per_mesh_cumsum[
+        :-1
+    ]  # e.g. (6, 21, 30) + (0, 2, 7) = (6, 23, 37)
+
+    switch4_idx = 4 * faces_per_mesh_cumsum[:-1]  # e.g. (8, 28)
+
+    switch123_offset = F - faces_per_mesh  # e.g. (8, 5, 7)
+
+    # pyre-fixme[6]: For 1st param expected `Union[List[int], Size,
+    #  typing.Tuple[int, ...]]` but got `Tensor`.
+    idx_diffs = torch.ones(4 * F, device=device, dtype=torch.int64)
+    idx_diffs[switch1_idx] += switch123_offset
+    idx_diffs[switch2_idx] += switch123_offset
+    idx_diffs[switch3_idx] += switch123_offset
+    idx_diffs[switch4_idx] -= 3 * F
+
+    # e.g
+    # [
+    #  1, 1, 9, 1, 9, 1, 9, 1,                                       -> mesh 0
+    #  -29, 1, 1, 1, 1, 6, 1, 1, 1, 1, 6, 1, 1, 1, 1, 6, 1, 1, 1, 1, -> mesh 1
+    #  -29, 1, 1, 8, 1, 1, 8, 1, 1, 8, 1, 1                          -> mesh 2
+    # ]
+
+    faces_idx = idx_diffs.cumsum(dim=0) - 1
+
+    # e.g.
+    # [
+    #  0, 1, 10, 11, 20, 21, 30, 31,
+    #  2, 3, 4, 5, 6, 12, 13, 14, 15, 16, 22, 23, 24, 25, 26, 32, 33, 34, 35, 36,
+    #  7, 8, 9, 17, 18, 19, 27, 28, 29, 37, 38, 39
+    # ]
+    # where for mesh 0, [0, 1] are the indices of the existing faces, and
+    # [10, 11, 20, 21, 30, 31] are the indices of the new faces after subdivision.
+
+    return faces_idx
diff --git a/pytorch3d/pytorch3d/ops/utils.py b/pytorch3d/pytorch3d/ops/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..cb576d5b76b0cd7ef549b396a0e9743db874c21d
--- /dev/null
+++ b/pytorch3d/pytorch3d/ops/utils.py
@@ -0,0 +1,207 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Optional, Tuple, TYPE_CHECKING, Union
+
+import torch
+
+from .knn import knn_points
+
+
+if TYPE_CHECKING:
+    from pytorch3d.structures import Pointclouds
+
+
+def masked_gather(points: torch.Tensor, idx: torch.Tensor) -> torch.Tensor:
+    """
+    Helper function for torch.gather to collect the points at
+    the given indices in idx where some of the indices might be -1 to
+    indicate padding. These indices are first replaced with 0.
+    Then the points are gathered after which the padded values
+    are set to 0.0.
+
+    Args:
+        points: (N, P, D) float32 tensor of points
+        idx: (N, K) or (N, P, K) long tensor of indices into points, where
+            some indices are -1 to indicate padding
+
+    Returns:
+        selected_points: (N, K, D) float32 tensor of points
+            at the given indices
+    """
+
+    if len(idx) != len(points):
+        raise ValueError("points and idx must have the same batch dimension")
+
+    N, P, D = points.shape
+
+    if idx.ndim == 3:
+        # Case: KNN, Ball Query where idx is of shape (N, P', K)
+        # where P' is not necessarily the same as P as the
+        # points may be gathered from a different pointcloud.
+        K = idx.shape[2]
+        # Match dimensions for points and indices
+        idx_expanded = idx[..., None].expand(-1, -1, -1, D)
+        points = points[:, :, None, :].expand(-1, -1, K, -1)
+    elif idx.ndim == 2:
+        # Farthest point sampling where idx is of shape (N, K)
+        idx_expanded = idx[..., None].expand(-1, -1, D)
+    else:
+        raise ValueError("idx format is not supported %s" % repr(idx.shape))
+
+    idx_expanded_mask = idx_expanded.eq(-1)
+    idx_expanded = idx_expanded.clone()
+    # Replace -1 values with 0 for gather
+    idx_expanded[idx_expanded_mask] = 0
+    # Gather points
+    selected_points = points.gather(dim=1, index=idx_expanded)
+    # Replace padded values
+    selected_points[idx_expanded_mask] = 0.0
+    return selected_points
+
+
+def wmean(
+    x: torch.Tensor,
+    weight: Optional[torch.Tensor] = None,
+    dim: Union[int, Tuple[int]] = -2,
+    keepdim: bool = True,
+    eps: float = 1e-9,
+) -> torch.Tensor:
+    """
+    Finds the mean of the input tensor across the specified dimension.
+    If the `weight` argument is provided, computes weighted mean.
+    Args:
+        x: tensor of shape `(*, D)`, where D is assumed to be spatial;
+        weights: if given, non-negative tensor of shape `(*,)`. It must be
+            broadcastable to `x.shape[:-1]`. Note that the weights for
+            the last (spatial) dimension are assumed same;
+        dim: dimension(s) in `x` to average over;
+        keepdim: tells whether to keep the resulting singleton dimension.
+        eps: minimum clamping value in the denominator.
+    Returns:
+        the mean tensor:
+        * if `weights` is None => `mean(x, dim)`,
+        * otherwise => `sum(x*w, dim) / max{sum(w, dim), eps}`.
+    """
+    args = {"dim": dim, "keepdim": keepdim}
+
+    if weight is None:
+        # pyre-fixme[6]: For 1st param expected `Optional[dtype]` but got
+        #  `Union[Tuple[int], int]`.
+        return x.mean(**args)
+
+    if any(
+        xd != wd and xd != 1 and wd != 1
+        for xd, wd in zip(x.shape[-2::-1], weight.shape[::-1])
+    ):
+        raise ValueError("wmean: weights are not compatible with the tensor")
+
+    # pyre-fixme[6]: For 1st param expected `Optional[dtype]` but got
+    #  `Union[Tuple[int], int]`.
+    return (x * weight[..., None]).sum(**args) / weight[..., None].sum(**args).clamp(
+        eps
+    )
+
+
+def eyes(
+    dim: int,
+    N: int,
+    device: Optional[torch.device] = None,
+    dtype: torch.dtype = torch.float32,
+) -> torch.Tensor:
+    """
+    Generates a batch of `N` identity matrices of shape `(N, dim, dim)`.
+
+    Args:
+        **dim**: The dimensionality of the identity matrices.
+        **N**: The number of identity matrices.
+        **device**: The device to be used for allocating the matrices.
+        **dtype**: The datatype of the matrices.
+
+    Returns:
+        **identities**: A batch of identity matrices of shape `(N, dim, dim)`.
+    """
+    identities = torch.eye(dim, device=device, dtype=dtype)
+    return identities[None].repeat(N, 1, 1)
+
+
+def convert_pointclouds_to_tensor(pcl: Union[torch.Tensor, "Pointclouds"]):
+    """
+    If `type(pcl)==Pointclouds`, converts a `pcl` object to a
+    padded representation and returns it together with the number of points
+    per batch. Otherwise, returns the input itself with the number of points
+    set to the size of the second dimension of `pcl`.
+    """
+    if is_pointclouds(pcl):
+        X = pcl.points_padded()  # type: ignore
+        num_points = pcl.num_points_per_cloud()  # type: ignore
+    elif torch.is_tensor(pcl):
+        X = pcl
+        num_points = X.shape[1] * torch.ones(  # type: ignore
+            # pyre-fixme[16]: Item `Pointclouds` of `Union[Pointclouds, Tensor]` has
+            #  no attribute `shape`.
+            X.shape[0],
+            device=X.device,
+            dtype=torch.int64,
+        )
+    else:
+        raise ValueError(
+            "The inputs X, Y should be either Pointclouds objects or tensors."
+        )
+    return X, num_points
+
+
+def is_pointclouds(pcl: Union[torch.Tensor, "Pointclouds"]) -> bool:
+    """Checks whether the input `pcl` is an instance of `Pointclouds`
+    by checking the existence of `points_padded` and `num_points_per_cloud`
+    functions.
+    """
+    return hasattr(pcl, "points_padded") and hasattr(pcl, "num_points_per_cloud")
+
+
+def get_point_covariances(
+    points_padded: torch.Tensor,
+    num_points_per_cloud: torch.Tensor,
+    neighborhood_size: int,
+) -> Tuple[torch.Tensor, torch.Tensor]:
+    """
+    Computes the per-point covariance matrices by of the 3D locations of
+    K-nearest neighbors of each point.
+
+    Args:
+        **points_padded**: Input point clouds as a padded tensor
+            of shape `(minibatch, num_points, dim)`.
+        **num_points_per_cloud**: Number of points per cloud
+            of shape `(minibatch,)`.
+        **neighborhood_size**: Number of nearest neighbors for each point
+            used to estimate the covariance matrices.
+
+    Returns:
+        **covariances**: A batch of per-point covariance matrices
+            of shape `(minibatch, dim, dim)`.
+        **k_nearest_neighbors**: A batch of `neighborhood_size` nearest
+            neighbors for each of the point cloud points
+            of shape `(minibatch, num_points, neighborhood_size, dim)`.
+    """
+    # get K nearest neighbor idx for each point in the point cloud
+    k_nearest_neighbors = knn_points(
+        points_padded,
+        points_padded,
+        lengths1=num_points_per_cloud,
+        lengths2=num_points_per_cloud,
+        K=neighborhood_size,
+        return_nn=True,
+    ).knn
+    # obtain the mean of the neighborhood
+    pt_mean = k_nearest_neighbors.mean(2, keepdim=True)
+    # compute the diff of the neighborhood and the mean of the neighborhood
+    central_diff = k_nearest_neighbors - pt_mean
+    # per-nn-point covariances
+    per_pt_cov = central_diff.unsqueeze(4) * central_diff.unsqueeze(3)
+    # per-point covariances
+    covariances = per_pt_cov.mean(2)
+
+    return covariances, k_nearest_neighbors
diff --git a/pytorch3d/pytorch3d/ops/vert_align.py b/pytorch3d/pytorch3d/ops/vert_align.py
new file mode 100644
index 0000000000000000000000000000000000000000..f8181aebddecb1fe0e4ab7d9522e0eec60b2e560
--- /dev/null
+++ b/pytorch3d/pytorch3d/ops/vert_align.py
@@ -0,0 +1,105 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import torch
+import torch.nn.functional as F
+
+
+def vert_align(
+    feats,
+    verts,
+    return_packed: bool = False,
+    interp_mode: str = "bilinear",
+    padding_mode: str = "zeros",
+    align_corners: bool = True,
+) -> torch.Tensor:
+    """
+    Sample vertex features from a feature map. This operation is called
+    "perceptual feature pooling" in [1] or "vert align" in [2].
+
+    [1] Wang et al, "Pixel2Mesh: Generating 3D Mesh Models from Single
+        RGB Images", ECCV 2018.
+    [2] Gkioxari et al, "Mesh R-CNN", ICCV 2019
+
+    Args:
+        feats: FloatTensor of shape (N, C, H, W) representing image features
+            from which to sample or a list of features each with potentially
+            different C, H or W dimensions.
+        verts: FloatTensor of shape (N, V, 3) or an object (e.g. Meshes or Pointclouds)
+            with `verts_padded' or `points_padded' as an attribute giving the (x, y, z)
+            vertex positions for which to sample. (x, y) verts should be normalized such
+            that (-1, -1) corresponds to top-left and (+1, +1) to bottom-right
+            location in the input feature map.
+        return_packed: (bool) Indicates whether to return packed features
+        interp_mode: (str) Specifies how to interpolate features.
+            ('bilinear' or 'nearest')
+        padding_mode: (str) Specifies how to handle vertices outside of the
+            [-1, 1] range. ('zeros', 'reflection', or 'border')
+        align_corners (bool): Geometrically, we consider the pixels of the
+            input  as squares rather than points.
+            If set to ``True``, the extrema (``-1`` and ``1``) are considered as
+            referring to the center points of the input's corner pixels. If set
+            to ``False``, they are instead considered as referring to the corner
+            points of the input's corner pixels, making the sampling more
+            resolution agnostic. Default: ``True``
+
+    Returns:
+        feats_sampled: FloatTensor of shape (N, V, C) giving sampled features for each
+            vertex. If feats is a list, we return concatenated features in axis=2 of
+            shape (N, V, sum(C_n)) where C_n = feats[n].shape[1].
+            If return_packed = True, the features are transformed to a packed
+            representation of shape (sum(V), C)
+    """
+    if torch.is_tensor(verts):
+        if verts.dim() != 3:
+            raise ValueError("verts tensor should be 3 dimensional")
+        grid = verts
+    elif hasattr(verts, "verts_padded"):
+        grid = verts.verts_padded()
+    elif hasattr(verts, "points_padded"):
+        grid = verts.points_padded()
+    else:
+        raise ValueError(
+            "verts must be a tensor or have a "
+            + "`points_padded' or`verts_padded` attribute."
+        )
+
+    grid = grid[:, None, :, :2]  # (N, 1, V, 2)
+
+    if torch.is_tensor(feats):
+        feats = [feats]
+    for feat in feats:
+        if feat.dim() != 4:
+            raise ValueError("feats must have shape (N, C, H, W)")
+        if grid.shape[0] != feat.shape[0]:
+            raise ValueError("inconsistent batch dimension")
+
+    feats_sampled = []
+    for feat in feats:
+        feat_sampled = F.grid_sample(
+            feat,
+            grid,
+            mode=interp_mode,
+            padding_mode=padding_mode,
+            align_corners=align_corners,
+        )  # (N, C, 1, V)
+        feat_sampled = feat_sampled.squeeze(dim=2).transpose(1, 2)  # (N, V, C)
+        feats_sampled.append(feat_sampled)
+    feats_sampled = torch.cat(feats_sampled, dim=2)  # (N, V, sum(C))
+
+    if return_packed:
+        # flatten the first two dimensions: (N*V, C)
+        feats_sampled = feats_sampled.view(-1, feats_sampled.shape[-1])
+        if hasattr(verts, "verts_padded_to_packed_idx"):
+            idx = (
+                verts.verts_padded_to_packed_idx()
+                .view(-1, 1)
+                .expand(-1, feats_sampled.shape[-1])
+            )
+            feats_sampled = feats_sampled.gather(0, idx)  # (sum(V), C)
+
+    return feats_sampled
diff --git a/pytorch3d/pytorch3d/renderer/__init__.py b/pytorch3d/pytorch3d/renderer/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..a667b012862f4018d9c8192d52c013cfacfde205
--- /dev/null
+++ b/pytorch3d/pytorch3d/renderer/__init__.py
@@ -0,0 +1,87 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from .blending import (
+    BlendParams,
+    hard_rgb_blend,
+    sigmoid_alpha_blend,
+    softmax_rgb_blend,
+)
+from .camera_utils import join_cameras_as_batch, rotate_on_spot
+from .cameras import (  # deprecated  # deprecated  # deprecated  # deprecated
+    camera_position_from_spherical_angles,
+    CamerasBase,
+    FoVOrthographicCameras,
+    FoVPerspectiveCameras,
+    get_world_to_view_transform,
+    look_at_rotation,
+    look_at_view_transform,
+    OpenGLOrthographicCameras,
+    OpenGLPerspectiveCameras,
+    OrthographicCameras,
+    PerspectiveCameras,
+    SfMOrthographicCameras,
+    SfMPerspectiveCameras,
+)
+from .implicit import (
+    AbsorptionOnlyRaymarcher,
+    EmissionAbsorptionRaymarcher,
+    GridRaysampler,
+    HarmonicEmbedding,
+    HeterogeneousRayBundle,
+    ImplicitRenderer,
+    MonteCarloRaysampler,
+    MultinomialRaysampler,
+    NDCGridRaysampler,
+    NDCMultinomialRaysampler,
+    ray_bundle_to_ray_points,
+    ray_bundle_variables_to_ray_points,
+    RayBundle,
+    VolumeRenderer,
+    VolumeSampler,
+)
+from .lighting import AmbientLights, diffuse, DirectionalLights, PointLights, specular
+from .materials import Materials
+from .mesh import (
+    gouraud_shading,
+    HardFlatShader,
+    HardGouraudShader,
+    HardPhongShader,
+    MeshRasterizer,
+    MeshRenderer,
+    MeshRendererWithFragments,
+    phong_shading,
+    RasterizationSettings,
+    rasterize_meshes,
+    SoftGouraudShader,
+    SoftPhongShader,
+    SoftSilhouetteShader,
+    SplatterPhongShader,
+    Textures,
+    TexturesAtlas,
+    TexturesUV,
+    TexturesVertex,
+)
+
+from .points import (
+    AlphaCompositor,
+    NormWeightedCompositor,
+    PointsRasterizationSettings,
+    PointsRasterizer,
+    PointsRenderer,
+    PulsarPointsRenderer,
+    rasterize_points,
+)
+from .splatter_blend import SplatterBlender
+from .utils import (
+    convert_to_tensors_and_broadcast,
+    ndc_grid_sample,
+    ndc_to_grid_sample_coords,
+    TensorProperties,
+)
+
+
+__all__ = [k for k in globals().keys() if not k.startswith("_")]
diff --git a/pytorch3d/pytorch3d/renderer/blending.py b/pytorch3d/pytorch3d/renderer/blending.py
new file mode 100644
index 0000000000000000000000000000000000000000..07c9243727cb2e751b61a2744bfbf848eb12b942
--- /dev/null
+++ b/pytorch3d/pytorch3d/renderer/blending.py
@@ -0,0 +1,239 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import NamedTuple, Sequence, Union
+
+import torch
+from pytorch3d import _C
+from pytorch3d.common.datatypes import Device
+
+# Example functions for blending the top K colors per pixel using the outputs
+# from rasterization.
+# NOTE: All blending function should return an RGBA image per batch element
+
+
+class BlendParams(NamedTuple):
+    """
+    Data class to store blending params with defaults
+
+    Members:
+        sigma (float): For SoftmaxPhong, controls the width of the sigmoid
+            function used to calculate the 2D distance based probability. Determines
+            the sharpness of the edges of the shape. Higher => faces have less defined
+            edges. For SplatterPhong, this is the standard deviation of the Gaussian
+            kernel. Higher => splats have a stronger effect and the rendered image is
+            more blurry.
+        gamma (float): Controls the scaling of the exponential function used
+            to set the opacity of the color.
+            Higher => faces are more transparent.
+        background_color: RGB values for the background color as a tuple or
+            as a tensor of three floats.
+    """
+
+    sigma: float = 1e-4
+    gamma: float = 1e-4
+    background_color: Union[torch.Tensor, Sequence[float]] = (1.0, 1.0, 1.0)
+
+
+def _get_background_color(
+    blend_params: BlendParams, device: Device, dtype=torch.float32
+) -> torch.Tensor:
+    background_color_ = blend_params.background_color
+    if isinstance(background_color_, torch.Tensor):
+        background_color = background_color_.to(device)
+    else:
+        background_color = torch.tensor(background_color_, dtype=dtype, device=device)
+    return background_color
+
+
+def hard_rgb_blend(
+    colors: torch.Tensor, fragments, blend_params: BlendParams
+) -> torch.Tensor:
+    """
+    Naive blending of top K faces to return an RGBA image
+      - **RGB** - choose color of the closest point i.e. K=0
+      - **A** - 1.0
+
+    Args:
+        colors: (N, H, W, K, 3) RGB color for each of the top K faces per pixel.
+        fragments: the outputs of rasterization. From this we use
+            - pix_to_face: LongTensor of shape (N, H, W, K) specifying the indices
+              of the faces (in the packed representation) which
+              overlap each pixel in the image. This is used to
+              determine the output shape.
+        blend_params: BlendParams instance that contains a background_color
+        field specifying the color for the background
+    Returns:
+        RGBA pixel_colors: (N, H, W, 4)
+    """
+    background_color = _get_background_color(blend_params, fragments.pix_to_face.device)
+
+    # Mask for the background.
+    is_background = fragments.pix_to_face[..., 0] < 0  # (N, H, W)
+
+    # Find out how much background_color needs to be expanded to be used for masked_scatter.
+    num_background_pixels = is_background.sum()
+
+    # Set background color.
+    pixel_colors = colors[..., 0, :].masked_scatter(
+        is_background[..., None],
+        background_color[None, :].expand(num_background_pixels, -1),
+    )  # (N, H, W, 3)
+
+    # Concat with the alpha channel.
+    alpha = (~is_background).type_as(pixel_colors)[..., None]
+
+    return torch.cat([pixel_colors, alpha], dim=-1)  # (N, H, W, 4)
+
+
+# Wrapper for the C++/CUDA Implementation of sigmoid alpha blend.
+class _SigmoidAlphaBlend(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, dists, pix_to_face, sigma):
+        alphas = _C.sigmoid_alpha_blend(dists, pix_to_face, sigma)
+        ctx.save_for_backward(dists, pix_to_face, alphas)
+        ctx.sigma = sigma
+        return alphas
+
+    @staticmethod
+    def backward(ctx, grad_alphas):
+        dists, pix_to_face, alphas = ctx.saved_tensors
+        sigma = ctx.sigma
+        grad_dists = _C.sigmoid_alpha_blend_backward(
+            grad_alphas, alphas, dists, pix_to_face, sigma
+        )
+        return grad_dists, None, None
+
+
+_sigmoid_alpha = _SigmoidAlphaBlend.apply
+
+
+def sigmoid_alpha_blend(colors, fragments, blend_params: BlendParams) -> torch.Tensor:
+    """
+    Silhouette blending to return an RGBA image
+      - **RGB** - choose color of the closest point.
+      - **A** - blend based on the 2D distance based probability map [1].
+
+    Args:
+        colors: (N, H, W, K, 3) RGB color for each of the top K faces per pixel.
+        fragments: the outputs of rasterization. From this we use
+            - pix_to_face: LongTensor of shape (N, H, W, K) specifying the indices
+              of the faces (in the packed representation) which
+              overlap each pixel in the image.
+            - dists: FloatTensor of shape (N, H, W, K) specifying
+              the 2D euclidean distance from the center of each pixel
+              to each of the top K overlapping faces.
+
+    Returns:
+        RGBA pixel_colors: (N, H, W, 4)
+
+    [1] Liu et al, 'Soft Rasterizer: A Differentiable Renderer for Image-based
+        3D Reasoning', ICCV 2019
+    """
+    N, H, W, K = fragments.pix_to_face.shape
+    pixel_colors = torch.ones((N, H, W, 4), dtype=colors.dtype, device=colors.device)
+    pixel_colors[..., :3] = colors[..., 0, :]
+    alpha = _sigmoid_alpha(fragments.dists, fragments.pix_to_face, blend_params.sigma)
+    pixel_colors[..., 3] = alpha
+    return pixel_colors
+
+
+def softmax_rgb_blend(
+    colors: torch.Tensor,
+    fragments,
+    blend_params: BlendParams,
+    znear: Union[float, torch.Tensor] = 1.0,
+    zfar: Union[float, torch.Tensor] = 100,
+) -> torch.Tensor:
+    """
+    RGB and alpha channel blending to return an RGBA image based on the method
+    proposed in [1]
+      - **RGB** - blend the colors based on the 2D distance based probability map and
+        relative z distances.
+      - **A** - blend based on the 2D distance based probability map.
+
+    Args:
+        colors: (N, H, W, K, 3) RGB color for each of the top K faces per pixel.
+        fragments: namedtuple with outputs of rasterization. We use properties
+            - pix_to_face: LongTensor of shape (N, H, W, K) specifying the indices
+              of the faces (in the packed representation) which
+              overlap each pixel in the image.
+            - dists: FloatTensor of shape (N, H, W, K) specifying
+              the 2D euclidean distance from the center of each pixel
+              to each of the top K overlapping faces.
+            - zbuf: FloatTensor of shape (N, H, W, K) specifying
+              the interpolated depth from each pixel to to each of the
+              top K overlapping faces.
+        blend_params: instance of BlendParams dataclass containing properties
+            - sigma: float, parameter which controls the width of the sigmoid
+              function used to calculate the 2D distance based probability.
+              Sigma controls the sharpness of the edges of the shape.
+            - gamma: float, parameter which controls the scaling of the
+              exponential function used to control the opacity of the color.
+            - background_color: (3) element list/tuple/torch.Tensor specifying
+              the RGB values for the background color.
+        znear: float, near clipping plane in the z direction
+        zfar: float, far clipping plane in the z direction
+
+    Returns:
+        RGBA pixel_colors: (N, H, W, 4)
+
+    [0] Shichen Liu et al, 'Soft Rasterizer: A Differentiable Renderer for
+    Image-based 3D Reasoning'
+    """
+
+    N, H, W, K = fragments.pix_to_face.shape
+    pixel_colors = torch.ones((N, H, W, 4), dtype=colors.dtype, device=colors.device)
+    background_color = _get_background_color(blend_params, fragments.pix_to_face.device)
+
+    # Weight for background color
+    eps = 1e-10
+
+    # Mask for padded pixels.
+    mask = fragments.pix_to_face >= 0
+
+    # Sigmoid probability map based on the distance of the pixel to the face.
+    prob_map = torch.sigmoid(-fragments.dists / blend_params.sigma) * mask
+
+    # The cumulative product ensures that alpha will be 0.0 if at least 1
+    # face fully covers the pixel as for that face, prob will be 1.0.
+    # This results in a multiplication by 0.0 because of the (1.0 - prob)
+    # term. Therefore 1.0 - alpha will be 1.0.
+    alpha = torch.prod((1.0 - prob_map), dim=-1)
+
+    # Weights for each face. Adjust the exponential by the max z to prevent
+    # overflow. zbuf shape (N, H, W, K), find max over K.
+    # TODO: there may still be some instability in the exponent calculation.
+
+    # Reshape to be compatible with (N, H, W, K) values in fragments
+    if torch.is_tensor(zfar):
+        # pyre-fixme[16]
+        zfar = zfar[:, None, None, None]
+    if torch.is_tensor(znear):
+        # pyre-fixme[16]: Item `float` of `Union[float, Tensor]` has no attribute
+        #  `__getitem__`.
+        znear = znear[:, None, None, None]
+
+    z_inv = (zfar - fragments.zbuf) / (zfar - znear) * mask
+    z_inv_max = torch.max(z_inv, dim=-1).values[..., None].clamp(min=eps)
+    weights_num = prob_map * torch.exp((z_inv - z_inv_max) / blend_params.gamma)
+
+    # Also apply exp normalize trick for the background color weight.
+    # Clamp to ensure delta is never 0.
+    # pyre-fixme[6]: Expected `Tensor` for 1st param but got `float`.
+    delta = torch.exp((eps - z_inv_max) / blend_params.gamma).clamp(min=eps)
+
+    # Normalize weights.
+    # weights_num shape: (N, H, W, K). Sum over K and divide through by the sum.
+    denom = weights_num.sum(dim=-1)[..., None] + delta
+
+    # Sum: weights * textures + background color
+    weighted_colors = (weights_num[..., None] * colors).sum(dim=-2)
+    weighted_background = delta * background_color
+    pixel_colors[..., :3] = (weighted_colors + weighted_background) / denom
+    pixel_colors[..., 3] = 1.0 - alpha
+
+    return pixel_colors
diff --git a/pytorch3d/pytorch3d/renderer/camera_conversions.py b/pytorch3d/pytorch3d/renderer/camera_conversions.py
new file mode 100644
index 0000000000000000000000000000000000000000..7617513d3235ac8a8e59d8e1b737a03194112c47
--- /dev/null
+++ b/pytorch3d/pytorch3d/renderer/camera_conversions.py
@@ -0,0 +1,192 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+from typing import Tuple
+
+import torch
+
+from ..transforms import matrix_to_rotation_6d
+from .cameras import PerspectiveCameras
+
+
+LOGGER = logging.getLogger(__name__)
+
+
+def _cameras_from_opencv_projection(
+    R: torch.Tensor,
+    tvec: torch.Tensor,
+    camera_matrix: torch.Tensor,
+    image_size: torch.Tensor,
+) -> PerspectiveCameras:
+    focal_length = torch.stack([camera_matrix[:, 0, 0], camera_matrix[:, 1, 1]], dim=-1)
+    principal_point = camera_matrix[:, :2, 2]
+
+    # Retype the image_size correctly and flip to width, height.
+    image_size_wh = image_size.to(R).flip(dims=(1,))
+
+    # Screen to NDC conversion:
+    # For non square images, we scale the points such that smallest side
+    # has range [-1, 1] and the largest side has range [-u, u], with u > 1.
+    # This convention is consistent with the PyTorch3D renderer, as well as
+    # the transformation function `get_ndc_to_screen_transform`.
+    scale = image_size_wh.to(R).min(dim=1, keepdim=True)[0] / 2.0
+    scale = scale.expand(-1, 2)
+    c0 = image_size_wh / 2.0
+
+    # Get the PyTorch3D focal length and principal point.
+    focal_pytorch3d = focal_length / scale
+    p0_pytorch3d = -(principal_point - c0) / scale
+
+    # For R, T we flip x, y axes (opencv screen space has an opposite
+    # orientation of screen axes).
+    # We also transpose R (opencv multiplies points from the opposite=left side).
+    R_pytorch3d = R.clone().permute(0, 2, 1)
+    T_pytorch3d = tvec.clone()
+    R_pytorch3d[:, :, :2] *= -1
+    T_pytorch3d[:, :2] *= -1
+
+    return PerspectiveCameras(
+        R=R_pytorch3d,
+        T=T_pytorch3d,
+        focal_length=focal_pytorch3d,
+        principal_point=p0_pytorch3d,
+        image_size=image_size,
+        device=R.device,
+    )
+
+
+def _opencv_from_cameras_projection(
+    cameras: PerspectiveCameras,
+    image_size: torch.Tensor,
+) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+    R_pytorch3d = cameras.R.clone()
+    T_pytorch3d = cameras.T.clone()
+    focal_pytorch3d = cameras.focal_length
+    p0_pytorch3d = cameras.principal_point
+    T_pytorch3d[:, :2] *= -1
+    R_pytorch3d[:, :, :2] *= -1
+    tvec = T_pytorch3d
+    R = R_pytorch3d.permute(0, 2, 1)
+
+    # Retype the image_size correctly and flip to width, height.
+    image_size_wh = image_size.to(R).flip(dims=(1,))
+
+    # NDC to screen conversion.
+    scale = image_size_wh.to(R).min(dim=1, keepdim=True)[0] / 2.0
+    scale = scale.expand(-1, 2)
+    c0 = image_size_wh / 2.0
+
+    principal_point = -p0_pytorch3d * scale + c0
+    focal_length = focal_pytorch3d * scale
+
+    camera_matrix = torch.zeros_like(R)
+    camera_matrix[:, :2, 2] = principal_point
+    camera_matrix[:, 2, 2] = 1.0
+    camera_matrix[:, 0, 0] = focal_length[:, 0]
+    camera_matrix[:, 1, 1] = focal_length[:, 1]
+    return R, tvec, camera_matrix
+
+
+def _pulsar_from_opencv_projection(
+    R: torch.Tensor,
+    tvec: torch.Tensor,
+    camera_matrix: torch.Tensor,
+    image_size: torch.Tensor,
+    znear: float = 0.1,
+) -> torch.Tensor:
+    assert len(camera_matrix.size()) == 3, "This function requires batched inputs!"
+    assert len(R.size()) == 3, "This function requires batched inputs!"
+    assert len(tvec.size()) in (2, 3), "This function reuqires batched inputs!"
+
+    # Validate parameters.
+    image_size_wh = image_size.to(R).flip(dims=(1,))
+    assert torch.all(
+        image_size_wh > 0
+    ), "height and width must be positive but min is: %s" % (
+        str(image_size_wh.min().item())
+    )
+    assert (
+        camera_matrix.size(1) == 3 and camera_matrix.size(2) == 3
+    ), "Incorrect camera matrix shape: expected 3x3 but got %dx%d" % (
+        camera_matrix.size(1),
+        camera_matrix.size(2),
+    )
+    assert (
+        R.size(1) == 3 and R.size(2) == 3
+    ), "Incorrect R shape: expected 3x3 but got %dx%d" % (
+        R.size(1),
+        R.size(2),
+    )
+    if len(tvec.size()) == 2:
+        tvec = tvec.unsqueeze(2)
+    assert (
+        tvec.size(1) == 3 and tvec.size(2) == 1
+    ), "Incorrect tvec shape: expected 3x1 but got %dx%d" % (
+        tvec.size(1),
+        tvec.size(2),
+    )
+    # Check batch size.
+    batch_size = camera_matrix.size(0)
+    assert R.size(0) == batch_size, "Expected R to have batch size %d. Has size %d." % (
+        batch_size,
+        R.size(0),
+    )
+    assert (
+        tvec.size(0) == batch_size
+    ), "Expected tvec to have batch size %d. Has size %d." % (
+        batch_size,
+        tvec.size(0),
+    )
+    # Check image sizes.
+    image_w = image_size_wh[0, 0]
+    image_h = image_size_wh[0, 1]
+    assert torch.all(
+        image_size_wh[:, 0] == image_w
+    ), "All images in a batch must have the same width!"
+    assert torch.all(
+        image_size_wh[:, 1] == image_h
+    ), "All images in a batch must have the same height!"
+    # Focal length.
+    fx = camera_matrix[:, 0, 0].unsqueeze(1)
+    fy = camera_matrix[:, 1, 1].unsqueeze(1)
+    # Check that we introduce less than 1% error by averaging the focal lengths.
+    fx_y = fx / fy
+    if torch.any(fx_y > 1.01) or torch.any(fx_y < 0.99):
+        LOGGER.warning(
+            "Pulsar only supports a single focal lengths. For converting OpenCV "
+            "focal lengths, we average them for x and y directions. "
+            "The focal lengths for x and y you provided differ by more than 1%, "
+            "which means this could introduce a noticeable error."
+        )
+    f = (fx + fy) / 2
+    # Normalize f into normalized device coordinates.
+    focal_length_px = f / image_w
+    # Transfer into focal_length and sensor_width.
+    focal_length = torch.tensor([znear - 1e-5], dtype=torch.float32, device=R.device)
+    focal_length = focal_length[None, :].repeat(batch_size, 1)
+    sensor_width = focal_length / focal_length_px
+    # Principal point.
+    cx = camera_matrix[:, 0, 2].unsqueeze(1)
+    cy = camera_matrix[:, 1, 2].unsqueeze(1)
+    # Transfer principal point offset into centered offset.
+    cx = -(cx - image_w / 2)
+    cy = cy - image_h / 2
+    # Concatenate to final vector.
+    param = torch.cat([focal_length, sensor_width, cx, cy], dim=1)
+    R_trans = R.permute(0, 2, 1)
+    cam_pos = -torch.bmm(R_trans, tvec).squeeze(2)
+    cam_rot = matrix_to_rotation_6d(R_trans)
+    cam_params = torch.cat([cam_pos, cam_rot, param], dim=1)
+    return cam_params
+
+
+def _pulsar_from_cameras_projection(
+    cameras: PerspectiveCameras,
+    image_size: torch.Tensor,
+) -> torch.Tensor:
+    opencv_R, opencv_T, opencv_K = _opencv_from_cameras_projection(cameras, image_size)
+    return _pulsar_from_opencv_projection(opencv_R, opencv_T, opencv_K, image_size)
diff --git a/pytorch3d/pytorch3d/renderer/camera_utils.py b/pytorch3d/pytorch3d/renderer/camera_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..1bddcaf23335d4515135b08471147c0ea7338358
--- /dev/null
+++ b/pytorch3d/pytorch3d/renderer/camera_utils.py
@@ -0,0 +1,207 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Sequence, Tuple
+
+import torch
+from pytorch3d.transforms import Transform3d
+
+from .cameras import CamerasBase
+
+
+def camera_to_eye_at_up(
+    world_to_view_transform: Transform3d,
+) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+    """
+    Given a world to view transform, return the eye, at and up vectors which
+    represent its position.
+
+    For example, if cam is a camera object, then after running
+
+    .. code-block::
+
+        from cameras import look_at_view_transform
+        eye, at, up = camera_to_eye_at_up(cam.get_world_to_view_transform())
+        R, T = look_at_view_transform(eye=eye, at=at, up=up)
+
+    any other camera created from R and T will have the same world to view
+    transform as cam.
+
+    Also, given a camera position R and T, then after running:
+
+    .. code-block::
+
+        from cameras import get_world_to_view_transform, look_at_view_transform
+        eye, at, up = camera_to_eye_at_up(get_world_to_view_transform(R=R, T=T))
+        R2, T2 = look_at_view_transform(eye=eye, at=at, up=up)
+
+    R2 will equal R and T2 will equal T.
+
+    Args:
+        world_to_view_transform: Transform3d representing the extrinsic
+            transformation of N cameras.
+
+    Returns:
+        eye: FloatTensor of shape [N, 3] representing the camera centers in world space.
+        at: FloatTensor of shape [N, 3] representing points in world space directly in
+            front of the cameras e.g. the positions of objects to be viewed by the
+            cameras.
+        up: FloatTensor of shape [N, 3] representing vectors in world space which
+            when projected on to the camera plane point upwards.
+    """
+    cam_trans = world_to_view_transform.inverse()
+    # In the PyTorch3D right handed coordinate system, the camera in view space
+    # is always at the origin looking along the +z axis.
+
+    # The up vector is not a position so cannot be transformed with
+    # transform_points. However the position eye+up above the camera
+    # (whose position vector in the camera coordinate frame is an up vector)
+    # can be transformed with transform_points.
+    eye_at_up_view = torch.tensor(
+        [[0, 0, 0], [0, 0, 1], [0, 1, 0]], dtype=torch.float32, device=cam_trans.device
+    )
+    eye_at_up_world = cam_trans.transform_points(eye_at_up_view).reshape(-1, 3, 3)
+
+    eye, at, up_plus_eye = eye_at_up_world.unbind(1)
+    up = up_plus_eye - eye
+    return eye, at, up
+
+
+def rotate_on_spot(
+    R: torch.Tensor, T: torch.Tensor, rotation: torch.Tensor
+) -> Tuple[torch.Tensor, torch.Tensor]:
+    """
+    Given a camera position as R and T (batched or not),
+    and a rotation matrix (batched or not)
+    return a new R and T representing camera position(s)
+    in the same location but rotated on the spot by the
+    given rotation. In particular the new world to view
+    rotation will be the previous one followed by the inverse
+    of the given rotation.
+
+    For example, adding the following lines before constructing a camera
+    will make the camera point a little to the right of where it
+    otherwise would have been.
+
+    .. code-block::
+
+        from math import radians
+        from pytorch3d.transforms import axis_angle_to_matrix
+        angles = [0, radians(10), 0]
+        rotation = axis_angle_to_matrix(torch.FloatTensor(angles))
+        R, T = rotate_on_spot(R, T, rotation)
+
+    Note here that if you have a column vector, then when you
+    premultiply it by this `rotation` (see the rotation_conversions doc),
+    then it will be rotated anticlockwise if facing the -y axis.
+    In our context, where we postmultiply row vectors to transform them,
+    `rotation` will rotate the camera clockwise around the -y axis
+    (i.e. when looking down), which is a turn to the right.
+
+    If angles was [radians(10), 0, 0], the camera would get pointed
+    up a bit instead.
+
+    If angles was [0, 0, radians(10)], the camera would be rotated anticlockwise
+    a bit, so the image would appear rotated clockwise from how it
+    otherwise would have been.
+
+    If you want to translate the camera from the origin in camera
+    coordinates, this is simple and does not need a separate function.
+    In particular, a translation by X = [a, b, c] would cause
+    the camera to move a units left, b units up, and c units
+    forward. This is achieved by using T-X in place of T.
+
+    Args:
+        R: FloatTensor of shape [3, 3] or [N, 3, 3]
+        T: FloatTensor of shape [3] or [N, 3]
+        rotation: FloatTensor of shape [3, 3] or [n, 3, 3]
+        where if neither n nor N is 1, then n and N must be equal.
+
+    Returns:
+        R: FloatTensor of shape [max(N, n), 3, 3]
+        T: FloatTensor of shape [max(N, n), 3]
+    """
+    if R.ndim == 2:
+        R = R[None]
+    if T.ndim == 1:
+        T = T[None]
+    if rotation.ndim == 2:
+        rotation = rotation[None]
+
+    if R.ndim != 3 or R.shape[1:] != (3, 3):
+        raise ValueError("Invalid R")
+    if T.ndim != 2 or T.shape[1] != 3:
+        raise ValueError("Invalid T")
+    if rotation.ndim != 3 or rotation.shape[1:] != (3, 3):
+        raise ValueError("Invalid rotation")
+
+    new_R = R @ rotation.transpose(1, 2)
+    old_RT = torch.bmm(R, T[:, :, None])
+    new_T = torch.matmul(new_R.transpose(1, 2), old_RT)[:, :, 0]
+
+    return new_R, new_T
+
+
+def join_cameras_as_batch(cameras_list: Sequence[CamerasBase]) -> CamerasBase:
+    """
+    Create a batched cameras object by concatenating a list of input
+    cameras objects. All the tensor attributes will be joined along
+    the batch dimension.
+
+    Args:
+        cameras_list: List of camera classes all of the same type and
+            on the same device. Each represents one or more cameras.
+    Returns:
+        cameras: single batched cameras object of the same
+            type as all the objects in the input list.
+    """
+    # Get the type and fields to join from the first camera in the batch
+    c0 = cameras_list[0]
+    fields = c0._FIELDS
+    shared_fields = c0._SHARED_FIELDS
+
+    if not all(isinstance(c, CamerasBase) for c in cameras_list):
+        raise ValueError("cameras in cameras_list must inherit from CamerasBase")
+
+    if not all(type(c) is type(c0) for c in cameras_list[1:]):
+        raise ValueError("All cameras must be of the same type")
+
+    if not all(c.device == c0.device for c in cameras_list[1:]):
+        raise ValueError("All cameras in the batch must be on the same device")
+
+    # Concat the fields to make a batched tensor
+    kwargs = {}
+    kwargs["device"] = c0.device
+
+    for field in fields:
+        field_not_none = [(getattr(c, field) is not None) for c in cameras_list]
+        if not any(field_not_none):
+            continue
+        if not all(field_not_none):
+            raise ValueError(f"Attribute {field} is inconsistently present")
+
+        attrs_list = [getattr(c, field) for c in cameras_list]
+
+        if field in shared_fields:
+            # Only needs to be set once
+            if not all(a == attrs_list[0] for a in attrs_list):
+                raise ValueError(f"Attribute {field} is not constant across inputs")
+
+            # e.g. "in_ndc" is set as attribute "_in_ndc" on the class
+            # but provided as "in_ndc" in the input args
+            if field.startswith("_"):
+                field = field[1:]
+
+            kwargs[field] = attrs_list[0]
+        elif isinstance(attrs_list[0], torch.Tensor):
+            # In the init, all inputs will be converted to
+            # batched tensors before set as attributes
+            # Join as a tensor along the batch dimension
+            kwargs[field] = torch.cat(attrs_list, dim=0)
+        else:
+            raise ValueError(f"Field {field} type is not supported for batching")
+
+    return c0.__class__(**kwargs)
diff --git a/pytorch3d/pytorch3d/renderer/cameras.py b/pytorch3d/pytorch3d/renderer/cameras.py
new file mode 100644
index 0000000000000000000000000000000000000000..97c1dfd8e509f57cfff8f80608c5f5becb7124c6
--- /dev/null
+++ b/pytorch3d/pytorch3d/renderer/cameras.py
@@ -0,0 +1,1874 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import math
+import warnings
+from typing import Any, Dict, List, Optional, Sequence, Tuple, Union
+
+import numpy as np
+import torch
+import torch.nn.functional as F
+from pytorch3d.common.datatypes import Device
+from pytorch3d.transforms import Rotate, Transform3d, Translate
+
+from .utils import convert_to_tensors_and_broadcast, TensorProperties
+
+
+# Default values for rotation and translation matrices.
+_R = torch.eye(3)[None]  # (1, 3, 3)
+_T = torch.zeros(1, 3)  # (1, 3)
+
+# An input which is a float per batch element
+_BatchFloatType = Union[float, Sequence[float], torch.Tensor]
+
+# one or two floats per batch element
+_FocalLengthType = Union[
+    float, Sequence[Tuple[float]], Sequence[Tuple[float, float]], torch.Tensor
+]
+
+
+class CamerasBase(TensorProperties):
+    """
+    `CamerasBase` implements a base class for all cameras.
+
+    For cameras, there are four different coordinate systems (or spaces)
+    - World coordinate system: This is the system the object lives - the world.
+    - Camera view coordinate system: This is the system that has its origin on
+        the camera and the Z-axis perpendicular to the image plane.
+        In PyTorch3D, we assume that +X points left, and +Y points up and
+        +Z points out from the image plane.
+        The transformation from world --> view happens after applying a rotation (R)
+        and translation (T)
+    - NDC coordinate system: This is the normalized coordinate system that confines
+        points in a volume the rendered part of the object or scene, also known as
+        view volume. For square images, given the PyTorch3D convention, (+1, +1, znear)
+        is the top left near corner, and (-1, -1, zfar) is the bottom right far
+        corner of the volume.
+        The transformation from view --> NDC happens after applying the camera
+        projection matrix (P) if defined in NDC space.
+        For non square images, we scale the points such that smallest side
+        has range [-1, 1] and the largest side has range [-u, u], with u > 1.
+    - Screen coordinate system: This is another representation of the view volume with
+        the XY coordinates defined in image space instead of a normalized space.
+
+    An illustration of the coordinate systems can be found in pytorch3d/docs/notes/cameras.md.
+
+    CameraBase defines methods that are common to all camera models:
+        - `get_camera_center` that returns the optical center of the camera in
+            world coordinates
+        - `get_world_to_view_transform` which returns a 3D transform from
+            world coordinates to the camera view coordinates (R, T)
+        - `get_full_projection_transform` which composes the projection
+            transform (P) with the world-to-view transform (R, T)
+        - `transform_points` which takes a set of input points in world coordinates and
+            projects to the space the camera is defined in (NDC or screen)
+        - `get_ndc_camera_transform` which defines the transform from screen/NDC to
+            PyTorch3D's NDC space
+        - `transform_points_ndc` which takes a set of points in world coordinates and
+            projects them to PyTorch3D's NDC space
+        - `transform_points_screen` which takes a set of points in world coordinates and
+            projects them to screen space
+
+    For each new camera, one should implement the `get_projection_transform`
+    routine that returns the mapping from camera view coordinates to camera
+    coordinates (NDC or screen).
+
+    Another useful function that is specific to each camera model is
+    `unproject_points` which sends points from camera coordinates (NDC or screen)
+    back to camera view or world coordinates depending on the `world_coordinates`
+    boolean argument of the function.
+    """
+
+    # Used in __getitem__ to index the relevant fields
+    # When creating a new camera, this should be set in the __init__
+    _FIELDS: Tuple[str, ...] = ()
+
+    # Names of fields which are a constant property of the whole batch, rather
+    # than themselves a batch of data.
+    # When joining objects into a batch, they will have to agree.
+    _SHARED_FIELDS: Tuple[str, ...] = ()
+
+    def get_projection_transform(self, **kwargs):
+        """
+        Calculate the projective transformation matrix.
+
+        Args:
+            **kwargs: parameters for the projection can be passed in as keyword
+                arguments to override the default values set in `__init__`.
+
+        Return:
+            a `Transform3d` object which represents a batch of projection
+            matrices of shape (N, 3, 3)
+        """
+        raise NotImplementedError()
+
+    def unproject_points(self, xy_depth: torch.Tensor, **kwargs):
+        """
+        Transform input points from camera coordinates (NDC or screen)
+        to the world / camera coordinates.
+
+        Each of the input points `xy_depth` of shape (..., 3) is
+        a concatenation of the x, y location and its depth.
+
+        For instance, for an input 2D tensor of shape `(num_points, 3)`
+        `xy_depth` takes the following form:
+            `xy_depth[i] = [x[i], y[i], depth[i]]`,
+        for a each point at an index `i`.
+
+        The following example demonstrates the relationship between
+        `transform_points` and `unproject_points`:
+
+        .. code-block:: python
+
+            cameras = # camera object derived from CamerasBase
+            xyz = # 3D points of shape (batch_size, num_points, 3)
+            # transform xyz to the camera view coordinates
+            xyz_cam = cameras.get_world_to_view_transform().transform_points(xyz)
+            # extract the depth of each point as the 3rd coord of xyz_cam
+            depth = xyz_cam[:, :, 2:]
+            # project the points xyz to the camera
+            xy = cameras.transform_points(xyz)[:, :, :2]
+            # append depth to xy
+            xy_depth = torch.cat((xy, depth), dim=2)
+            # unproject to the world coordinates
+            xyz_unproj_world = cameras.unproject_points(xy_depth, world_coordinates=True)
+            print(torch.allclose(xyz, xyz_unproj_world)) # True
+            # unproject to the camera coordinates
+            xyz_unproj = cameras.unproject_points(xy_depth, world_coordinates=False)
+            print(torch.allclose(xyz_cam, xyz_unproj)) # True
+
+        Args:
+            xy_depth: torch tensor of shape (..., 3).
+            world_coordinates: If `True`, unprojects the points back to world
+                coordinates using the camera extrinsics `R` and `T`.
+                `False` ignores `R` and `T` and unprojects to
+                the camera view coordinates.
+            from_ndc: If `False` (default), assumes xy part of input is in
+                NDC space if self.in_ndc(), otherwise in screen space. If
+                `True`, assumes xy is in NDC space even if the camera
+                is defined in screen space.
+
+        Returns
+            new_points: unprojected points with the same shape as `xy_depth`.
+        """
+        raise NotImplementedError()
+
+    def get_camera_center(self, **kwargs) -> torch.Tensor:
+        """
+        Return the 3D location of the camera optical center
+        in the world coordinates.
+
+        Args:
+            **kwargs: parameters for the camera extrinsics can be passed in
+                as keyword arguments to override the default values
+                set in __init__.
+
+        Setting R or T here will update the values set in init as these
+        values may be needed later on in the rendering pipeline e.g. for
+        lighting calculations.
+
+        Returns:
+            C: a batch of 3D locations of shape (N, 3) denoting
+            the locations of the center of each camera in the batch.
+        """
+        w2v_trans = self.get_world_to_view_transform(**kwargs)
+        P = w2v_trans.inverse().get_matrix()
+        # the camera center is the translation component (the first 3 elements
+        # of the last row) of the inverted world-to-view
+        # transform (4x4 RT matrix)
+        C = P[:, 3, :3]
+        return C
+
+    def get_world_to_view_transform(self, **kwargs) -> Transform3d:
+        """
+        Return the world-to-view transform.
+
+        Args:
+            **kwargs: parameters for the camera extrinsics can be passed in
+                as keyword arguments to override the default values
+                set in __init__.
+
+        Setting R and T here will update the values set in init as these
+        values may be needed later on in the rendering pipeline e.g. for
+        lighting calculations.
+
+        Returns:
+            A Transform3d object which represents a batch of transforms
+            of shape (N, 3, 3)
+        """
+        R: torch.Tensor = kwargs.get("R", self.R)
+        T: torch.Tensor = kwargs.get("T", self.T)
+        self.R = R
+        self.T = T
+        world_to_view_transform = get_world_to_view_transform(R=R, T=T)
+        return world_to_view_transform
+
+    def get_full_projection_transform(self, **kwargs) -> Transform3d:
+        """
+        Return the full world-to-camera transform composing the
+        world-to-view and view-to-camera transforms.
+        If camera is defined in NDC space, the projected points are in NDC space.
+        If camera is defined in screen space, the projected points are in screen space.
+
+        Args:
+            **kwargs: parameters for the projection transforms can be passed in
+                as keyword arguments to override the default values
+                set in __init__.
+
+        Setting R and T here will update the values set in init as these
+        values may be needed later on in the rendering pipeline e.g. for
+        lighting calculations.
+
+        Returns:
+            a Transform3d object which represents a batch of transforms
+            of shape (N, 3, 3)
+        """
+        self.R: torch.Tensor = kwargs.get("R", self.R)
+        self.T: torch.Tensor = kwargs.get("T", self.T)
+        world_to_view_transform = self.get_world_to_view_transform(R=self.R, T=self.T)
+        view_to_proj_transform = self.get_projection_transform(**kwargs)
+        return world_to_view_transform.compose(view_to_proj_transform)
+
+    def transform_points(
+        self, points, eps: Optional[float] = None, **kwargs
+    ) -> torch.Tensor:
+        """
+        Transform input points from world to camera space.
+        If camera is defined in NDC space, the projected points are in NDC space.
+        If camera is defined in screen space, the projected points are in screen space.
+
+        For `CamerasBase.transform_points`, setting `eps > 0`
+        stabilizes gradients since it leads to avoiding division
+        by excessively low numbers for points close to the camera plane.
+
+        Args:
+            points: torch tensor of shape (..., 3).
+            eps: If eps!=None, the argument is used to clamp the
+                divisor in the homogeneous normalization of the points
+                transformed to the ndc space. Please see
+                `transforms.Transform3d.transform_points` for details.
+
+                For `CamerasBase.transform_points`, setting `eps > 0`
+                stabilizes gradients since it leads to avoiding division
+                by excessively low numbers for points close to the
+                camera plane.
+
+        Returns
+            new_points: transformed points with the same shape as the input.
+        """
+        world_to_proj_transform = self.get_full_projection_transform(**kwargs)
+        return world_to_proj_transform.transform_points(points, eps=eps)
+
+    def get_ndc_camera_transform(self, **kwargs) -> Transform3d:
+        """
+        Returns the transform from camera projection space (screen or NDC) to NDC space.
+        For cameras that can be specified in screen space, this transform
+        allows points to be converted from screen to NDC space.
+        The default transform scales the points from [0, W]x[0, H]
+        to [-1, 1]x[-u, u] or [-u, u]x[-1, 1] where u > 1 is the aspect ratio of the image.
+        This function should be modified per camera definitions if need be,
+        e.g. for Perspective/Orthographic cameras we provide a custom implementation.
+        This transform assumes PyTorch3D coordinate system conventions for
+        both the NDC space and the input points.
+
+        This transform interfaces with the PyTorch3D renderer which assumes
+        input points to the renderer to be in NDC space.
+        """
+        if self.in_ndc():
+            return Transform3d(device=self.device, dtype=torch.float32)
+        else:
+            # For custom cameras which can be defined in screen space,
+            # users might might have to implement the screen to NDC transform based
+            # on the definition of the camera parameters.
+            # See PerspectiveCameras/OrthographicCameras for an example.
+            # We don't flip xy because we assume that world points are in
+            # PyTorch3D coordinates, and thus conversion from screen to ndc
+            # is a mere scaling from image to [-1, 1] scale.
+            image_size = kwargs.get("image_size", self.get_image_size())
+            return get_screen_to_ndc_transform(
+                self, with_xyflip=False, image_size=image_size
+            )
+
+    def transform_points_ndc(
+        self, points, eps: Optional[float] = None, **kwargs
+    ) -> torch.Tensor:
+        """
+        Transforms points from PyTorch3D world/camera space to NDC space.
+        Input points follow the PyTorch3D coordinate system conventions: +X left, +Y up.
+        Output points are in NDC space: +X left, +Y up, origin at image center.
+
+        Args:
+            points: torch tensor of shape (..., 3).
+            eps: If eps!=None, the argument is used to clamp the
+                divisor in the homogeneous normalization of the points
+                transformed to the ndc space. Please see
+                `transforms.Transform3d.transform_points` for details.
+
+                For `CamerasBase.transform_points`, setting `eps > 0`
+                stabilizes gradients since it leads to avoiding division
+                by excessively low numbers for points close to the
+                camera plane.
+
+        Returns
+            new_points: transformed points with the same shape as the input.
+        """
+        world_to_ndc_transform = self.get_full_projection_transform(**kwargs)
+        if not self.in_ndc():
+            to_ndc_transform = self.get_ndc_camera_transform(**kwargs)
+            world_to_ndc_transform = world_to_ndc_transform.compose(to_ndc_transform)
+
+        return world_to_ndc_transform.transform_points(points, eps=eps)
+
+    def transform_points_screen(
+        self, points, eps: Optional[float] = None, with_xyflip: bool = True, **kwargs
+    ) -> torch.Tensor:
+        """
+        Transforms points from PyTorch3D world/camera space to screen space.
+        Input points follow the PyTorch3D coordinate system conventions: +X left, +Y up.
+        Output points are in screen space: +X right, +Y down, origin at top left corner.
+
+        Args:
+            points: torch tensor of shape (..., 3).
+            eps: If eps!=None, the argument is used to clamp the
+                divisor in the homogeneous normalization of the points
+                transformed to the ndc space. Please see
+                `transforms.Transform3d.transform_points` for details.
+
+                For `CamerasBase.transform_points`, setting `eps > 0`
+                stabilizes gradients since it leads to avoiding division
+                by excessively low numbers for points close to the
+                camera plane.
+            with_xyflip: If True, flip x and y directions. In world/camera/ndc coords,
+                +x points to the left and +y up. If with_xyflip is true, in screen
+                coords +x points right, and +y down, following the usual RGB image
+                convention. Warning: do not set to False unless you know what you're
+                doing!
+
+        Returns
+            new_points: transformed points with the same shape as the input.
+        """
+        points_ndc = self.transform_points_ndc(points, eps=eps, **kwargs)
+        image_size = kwargs.get("image_size", self.get_image_size())
+        return get_ndc_to_screen_transform(
+            self, with_xyflip=with_xyflip, image_size=image_size
+        ).transform_points(points_ndc, eps=eps)
+
+    def clone(self):
+        """
+        Returns a copy of `self`.
+        """
+        cam_type = type(self)
+        other = cam_type(device=self.device)
+        return super().clone(other)
+
+    def is_perspective(self):
+        raise NotImplementedError()
+
+    def in_ndc(self):
+        """
+        Specifies whether the camera is defined in NDC space
+        or in screen (image) space
+        """
+        raise NotImplementedError()
+
+    def get_znear(self):
+        return getattr(self, "znear", None)
+
+    def get_image_size(self):
+        """
+        Returns the image size, if provided, expected in the form of (height, width)
+        The image size is used for conversion of projected points to screen coordinates.
+        """
+        return getattr(self, "image_size", None)
+
+    def __getitem__(
+        self, index: Union[int, List[int], torch.BoolTensor, torch.LongTensor]
+    ) -> "CamerasBase":
+        """
+        Override for the __getitem__ method in TensorProperties which needs to be
+        refactored.
+
+        Args:
+            index: an integer index, list/tensor of integer indices, or tensor of boolean
+                indicators used to filter all the fields in the cameras given by self._FIELDS.
+        Returns:
+            an instance of the current cameras class with only the values at the selected index.
+        """
+
+        kwargs = {}
+
+        tensor_types = {
+            # pyre-fixme[16]: Module `cuda` has no attribute `BoolTensor`.
+            "bool": (torch.BoolTensor, torch.cuda.BoolTensor),
+            # pyre-fixme[16]: Module `cuda` has no attribute `LongTensor`.
+            "long": (torch.LongTensor, torch.cuda.LongTensor),
+        }
+        if not isinstance(
+            index, (int, list, *tensor_types["bool"], *tensor_types["long"])
+        ) or (
+            isinstance(index, list)
+            and not all(isinstance(i, int) and not isinstance(i, bool) for i in index)
+        ):
+            msg = (
+                "Invalid index type, expected int, List[int] or Bool/LongTensor; got %r"
+            )
+            raise ValueError(msg % type(index))
+
+        if isinstance(index, int):
+            index = [index]
+
+        if isinstance(index, tensor_types["bool"]):
+            # pyre-fixme[16]: Item `List` of `Union[List[int], BoolTensor,
+            #  LongTensor]` has no attribute `ndim`.
+            # pyre-fixme[16]: Item `List` of `Union[List[int], BoolTensor,
+            #  LongTensor]` has no attribute `shape`.
+            if index.ndim != 1 or index.shape[0] != len(self):
+                raise ValueError(
+                    # pyre-fixme[16]: Item `List` of `Union[List[int], BoolTensor,
+                    #  LongTensor]` has no attribute `shape`.
+                    f"Boolean index of shape {index.shape} does not match cameras"
+                )
+        elif max(index) >= len(self):
+            raise IndexError(f"Index {max(index)} is out of bounds for select cameras")
+
+        for field in self._FIELDS:
+            val = getattr(self, field, None)
+            if val is None:
+                continue
+
+            # e.g. "in_ndc" is set as attribute "_in_ndc" on the class
+            # but provided as "in_ndc" on initialization
+            if field.startswith("_"):
+                field = field[1:]
+
+            if isinstance(val, (str, bool)):
+                kwargs[field] = val
+            elif isinstance(val, torch.Tensor):
+                # In the init, all inputs will be converted to
+                # tensors before setting as attributes
+                kwargs[field] = val[index]
+            else:
+                raise ValueError(f"Field {field} type is not supported for indexing")
+
+        kwargs["device"] = self.device
+        return self.__class__(**kwargs)
+
+
+############################################################
+#             Field of View Camera Classes                 #
+############################################################
+
+
+def OpenGLPerspectiveCameras(
+    znear: _BatchFloatType = 1.0,
+    zfar: _BatchFloatType = 100.0,
+    aspect_ratio: _BatchFloatType = 1.0,
+    fov: _BatchFloatType = 60.0,
+    degrees: bool = True,
+    R: torch.Tensor = _R,
+    T: torch.Tensor = _T,
+    device: Device = "cpu",
+) -> "FoVPerspectiveCameras":
+    """
+    OpenGLPerspectiveCameras has been DEPRECATED. Use FoVPerspectiveCameras instead.
+    Preserving OpenGLPerspectiveCameras for backward compatibility.
+    """
+
+    warnings.warn(
+        """OpenGLPerspectiveCameras is deprecated,
+        Use FoVPerspectiveCameras instead.
+        OpenGLPerspectiveCameras will be removed in future releases.""",
+        PendingDeprecationWarning,
+    )
+
+    return FoVPerspectiveCameras(
+        znear=znear,
+        zfar=zfar,
+        aspect_ratio=aspect_ratio,
+        fov=fov,
+        degrees=degrees,
+        R=R,
+        T=T,
+        device=device,
+    )
+
+
+class FoVPerspectiveCameras(CamerasBase):
+    """
+    A class which stores a batch of parameters to generate a batch of
+    projection matrices by specifying the field of view.
+    The definitions of the parameters follow the OpenGL perspective camera.
+
+    The extrinsics of the camera (R and T matrices) can also be set in the
+    initializer or passed in to `get_full_projection_transform` to get
+    the full transformation from world -> ndc.
+
+    The `transform_points` method calculates the full world -> ndc transform
+    and then applies it to the input points.
+
+    The transforms can also be returned separately as Transform3d objects.
+
+    * Setting the Aspect Ratio for Non Square Images *
+
+    If the desired output image size is non square (i.e. a tuple of (H, W) where H != W)
+    the aspect ratio needs special consideration: There are two aspect ratios
+    to be aware of:
+        - the aspect ratio of each pixel
+        - the aspect ratio of the output image
+    The `aspect_ratio` setting in the FoVPerspectiveCameras sets the
+    pixel aspect ratio. When using this camera with the differentiable rasterizer
+    be aware that in the rasterizer we assume square pixels, but allow
+    variable image aspect ratio (i.e rectangle images).
+
+    In most cases you will want to set the camera `aspect_ratio=1.0`
+    (i.e. square pixels) and only vary the output image dimensions in pixels
+    for rasterization.
+    """
+
+    # For __getitem__
+    _FIELDS = (
+        "K",
+        "znear",
+        "zfar",
+        "aspect_ratio",
+        "fov",
+        "R",
+        "T",
+        "degrees",
+    )
+
+    _SHARED_FIELDS = ("degrees",)
+
+    def __init__(
+        self,
+        znear: _BatchFloatType = 1.0,
+        zfar: _BatchFloatType = 100.0,
+        aspect_ratio: _BatchFloatType = 1.0,
+        fov: _BatchFloatType = 60.0,
+        degrees: bool = True,
+        R: torch.Tensor = _R,
+        T: torch.Tensor = _T,
+        K: Optional[torch.Tensor] = None,
+        device: Device = "cpu",
+    ) -> None:
+        """
+
+        Args:
+            znear: near clipping plane of the view frustrum.
+            zfar: far clipping plane of the view frustrum.
+            aspect_ratio: aspect ratio of the image pixels.
+                1.0 indicates square pixels.
+            fov: field of view angle of the camera.
+            degrees: bool, set to True if fov is specified in degrees.
+            R: Rotation matrix of shape (N, 3, 3)
+            T: Translation matrix of shape (N, 3)
+            K: (optional) A calibration matrix of shape (N, 4, 4)
+                If provided, don't need znear, zfar, fov, aspect_ratio, degrees
+            device: Device (as str or torch.device)
+        """
+        # The initializer formats all inputs to torch tensors and broadcasts
+        # all the inputs to have the same batch dimension where necessary.
+        super().__init__(
+            device=device,
+            znear=znear,
+            zfar=zfar,
+            aspect_ratio=aspect_ratio,
+            fov=fov,
+            R=R,
+            T=T,
+            K=K,
+        )
+
+        # No need to convert to tensor or broadcast.
+        self.degrees = degrees
+
+    def compute_projection_matrix(
+        self, znear, zfar, fov, aspect_ratio, degrees: bool
+    ) -> torch.Tensor:
+        """
+        Compute the calibration matrix K of shape (N, 4, 4)
+
+        Args:
+            znear: near clipping plane of the view frustrum.
+            zfar: far clipping plane of the view frustrum.
+            fov: field of view angle of the camera.
+            aspect_ratio: aspect ratio of the image pixels.
+                1.0 indicates square pixels.
+            degrees: bool, set to True if fov is specified in degrees.
+
+        Returns:
+            torch.FloatTensor of the calibration matrix with shape (N, 4, 4)
+        """
+        K = torch.zeros((self._N, 4, 4), device=self.device, dtype=torch.float32)
+        ones = torch.ones((self._N), dtype=torch.float32, device=self.device)
+        if degrees:
+            fov = (np.pi / 180) * fov
+
+        if not torch.is_tensor(fov):
+            fov = torch.tensor(fov, device=self.device)
+        tanHalfFov = torch.tan((fov / 2))
+        max_y = tanHalfFov * znear
+        min_y = -max_y
+        max_x = max_y * aspect_ratio
+        min_x = -max_x
+
+        # NOTE: In OpenGL the projection matrix changes the handedness of the
+        # coordinate frame. i.e the NDC space positive z direction is the
+        # camera space negative z direction. This is because the sign of the z
+        # in the projection matrix is set to -1.0.
+        # In pytorch3d we maintain a right handed coordinate system throughout
+        # so the so the z sign is 1.0.
+        z_sign = 1.0
+
+        # pyre-fixme[58]: `/` is not supported for operand types `float` and `Tensor`.
+        K[:, 0, 0] = 2.0 * znear / (max_x - min_x)
+        # pyre-fixme[58]: `/` is not supported for operand types `float` and `Tensor`.
+        K[:, 1, 1] = 2.0 * znear / (max_y - min_y)
+        K[:, 0, 2] = (max_x + min_x) / (max_x - min_x)
+        K[:, 1, 2] = (max_y + min_y) / (max_y - min_y)
+        K[:, 3, 2] = z_sign * ones
+
+        # NOTE: This maps the z coordinate from [0, 1] where z = 0 if the point
+        # is at the near clipping plane and z = 1 when the point is at the far
+        # clipping plane.
+        K[:, 2, 2] = z_sign * zfar / (zfar - znear)
+        K[:, 2, 3] = -(zfar * znear) / (zfar - znear)
+
+        return K
+
+    def get_projection_transform(self, **kwargs) -> Transform3d:
+        """
+        Calculate the perspective projection matrix with a symmetric
+        viewing frustrum. Use column major order.
+        The viewing frustrum will be projected into ndc, s.t.
+        (max_x, max_y) -> (+1, +1)
+        (min_x, min_y) -> (-1, -1)
+
+        Args:
+            **kwargs: parameters for the projection can be passed in as keyword
+                arguments to override the default values set in `__init__`.
+
+        Return:
+            a Transform3d object which represents a batch of projection
+            matrices of shape (N, 4, 4)
+
+        .. code-block:: python
+
+            h1 = (max_y + min_y)/(max_y - min_y)
+            w1 = (max_x + min_x)/(max_x - min_x)
+            tanhalffov = tan((fov/2))
+            s1 = 1/tanhalffov
+            s2 = 1/(tanhalffov * (aspect_ratio))
+
+            # To map z to the range [0, 1] use:
+            f1 =  far / (far - near)
+            f2 = -(far * near) / (far - near)
+
+            # Projection matrix
+            K = [
+                    [s1,   0,   w1,   0],
+                    [0,   s2,   h1,   0],
+                    [0,    0,   f1,  f2],
+                    [0,    0,    1,   0],
+            ]
+        """
+        K = kwargs.get("K", self.K)
+        if K is not None:
+            if K.shape != (self._N, 4, 4):
+                msg = "Expected K to have shape of (%r, 4, 4)"
+                raise ValueError(msg % (self._N))
+        else:
+            K = self.compute_projection_matrix(
+                kwargs.get("znear", self.znear),
+                kwargs.get("zfar", self.zfar),
+                kwargs.get("fov", self.fov),
+                kwargs.get("aspect_ratio", self.aspect_ratio),
+                kwargs.get("degrees", self.degrees),
+            )
+
+        # Transpose the projection matrix as PyTorch3D transforms use row vectors.
+        transform = Transform3d(
+            matrix=K.transpose(1, 2).contiguous(), device=self.device
+        )
+        return transform
+
+    def unproject_points(
+        self,
+        xy_depth: torch.Tensor,
+        world_coordinates: bool = True,
+        scaled_depth_input: bool = False,
+        **kwargs,
+    ) -> torch.Tensor:
+        """>!
+        FoV cameras further allow for passing depth in world units
+        (`scaled_depth_input=False`) or in the [0, 1]-normalized units
+        (`scaled_depth_input=True`)
+
+        Args:
+            scaled_depth_input: If `True`, assumes the input depth is in
+                the [0, 1]-normalized units. If `False` the input depth is in
+                the world units.
+        """
+
+        # obtain the relevant transformation to ndc
+        if world_coordinates:
+            to_ndc_transform = self.get_full_projection_transform()
+        else:
+            to_ndc_transform = self.get_projection_transform()
+
+        if scaled_depth_input:
+            # the input is scaled depth, so we don't have to do anything
+            xy_sdepth = xy_depth
+        else:
+            # parse out important values from the projection matrix
+            K_matrix = self.get_projection_transform(**kwargs.copy()).get_matrix()
+            # parse out f1, f2 from K_matrix
+            unsqueeze_shape = [1] * xy_depth.dim()
+            unsqueeze_shape[0] = K_matrix.shape[0]
+            f1 = K_matrix[:, 2, 2].reshape(unsqueeze_shape)
+            f2 = K_matrix[:, 3, 2].reshape(unsqueeze_shape)
+            # get the scaled depth
+            sdepth = (f1 * xy_depth[..., 2:3] + f2) / xy_depth[..., 2:3]
+            # concatenate xy + scaled depth
+            xy_sdepth = torch.cat((xy_depth[..., 0:2], sdepth), dim=-1)
+
+        # unproject with inverse of the projection
+        unprojection_transform = to_ndc_transform.inverse()
+        return unprojection_transform.transform_points(xy_sdepth)
+
+    def is_perspective(self):
+        return True
+
+    def in_ndc(self):
+        return True
+
+
+def OpenGLOrthographicCameras(
+    znear: _BatchFloatType = 1.0,
+    zfar: _BatchFloatType = 100.0,
+    top: _BatchFloatType = 1.0,
+    bottom: _BatchFloatType = -1.0,
+    left: _BatchFloatType = -1.0,
+    right: _BatchFloatType = 1.0,
+    scale_xyz=((1.0, 1.0, 1.0),),  # (1, 3)
+    R: torch.Tensor = _R,
+    T: torch.Tensor = _T,
+    device: Device = "cpu",
+) -> "FoVOrthographicCameras":
+    """
+    OpenGLOrthographicCameras has been DEPRECATED. Use FoVOrthographicCameras instead.
+    Preserving OpenGLOrthographicCameras for backward compatibility.
+    """
+
+    warnings.warn(
+        """OpenGLOrthographicCameras is deprecated,
+        Use FoVOrthographicCameras instead.
+        OpenGLOrthographicCameras will be removed in future releases.""",
+        PendingDeprecationWarning,
+    )
+
+    return FoVOrthographicCameras(
+        znear=znear,
+        zfar=zfar,
+        max_y=top,
+        min_y=bottom,
+        max_x=right,
+        min_x=left,
+        scale_xyz=scale_xyz,
+        R=R,
+        T=T,
+        device=device,
+    )
+
+
+class FoVOrthographicCameras(CamerasBase):
+    """
+    A class which stores a batch of parameters to generate a batch of
+    projection matrices by specifying the field of view.
+    The definitions of the parameters follow the OpenGL orthographic camera.
+    """
+
+    # For __getitem__
+    _FIELDS = (
+        "K",
+        "znear",
+        "zfar",
+        "R",
+        "T",
+        "max_y",
+        "min_y",
+        "max_x",
+        "min_x",
+        "scale_xyz",
+    )
+
+    def __init__(
+        self,
+        znear: _BatchFloatType = 1.0,
+        zfar: _BatchFloatType = 100.0,
+        max_y: _BatchFloatType = 1.0,
+        min_y: _BatchFloatType = -1.0,
+        max_x: _BatchFloatType = 1.0,
+        min_x: _BatchFloatType = -1.0,
+        scale_xyz=((1.0, 1.0, 1.0),),  # (1, 3)
+        R: torch.Tensor = _R,
+        T: torch.Tensor = _T,
+        K: Optional[torch.Tensor] = None,
+        device: Device = "cpu",
+    ):
+        """
+
+        Args:
+            znear: near clipping plane of the view frustrum.
+            zfar: far clipping plane of the view frustrum.
+            max_y: maximum y coordinate of the frustrum.
+            min_y: minimum y coordinate of the frustrum.
+            max_x: maximum x coordinate of the frustrum.
+            min_x: minimum x coordinate of the frustrum
+            scale_xyz: scale factors for each axis of shape (N, 3).
+            R: Rotation matrix of shape (N, 3, 3).
+            T: Translation of shape (N, 3).
+            K: (optional) A calibration matrix of shape (N, 4, 4)
+                If provided, don't need znear, zfar, max_y, min_y, max_x, min_x, scale_xyz
+            device: torch.device or string.
+
+        Only need to set min_x, max_x, min_y, max_y for viewing frustrums
+        which are non symmetric about the origin.
+        """
+        # The initializer formats all inputs to torch tensors and broadcasts
+        # all the inputs to have the same batch dimension where necessary.
+        super().__init__(
+            device=device,
+            znear=znear,
+            zfar=zfar,
+            max_y=max_y,
+            min_y=min_y,
+            max_x=max_x,
+            min_x=min_x,
+            scale_xyz=scale_xyz,
+            R=R,
+            T=T,
+            K=K,
+        )
+
+    def compute_projection_matrix(
+        self, znear, zfar, max_x, min_x, max_y, min_y, scale_xyz
+    ) -> torch.Tensor:
+        """
+        Compute the calibration matrix K of shape (N, 4, 4)
+
+        Args:
+            znear: near clipping plane of the view frustrum.
+            zfar: far clipping plane of the view frustrum.
+            max_x: maximum x coordinate of the frustrum.
+            min_x: minimum x coordinate of the frustrum
+            max_y: maximum y coordinate of the frustrum.
+            min_y: minimum y coordinate of the frustrum.
+            scale_xyz: scale factors for each axis of shape (N, 3).
+        """
+        K = torch.zeros((self._N, 4, 4), dtype=torch.float32, device=self.device)
+        ones = torch.ones((self._N), dtype=torch.float32, device=self.device)
+        # NOTE: OpenGL flips handedness of coordinate system between camera
+        # space and NDC space so z sign is -ve. In PyTorch3D we maintain a
+        # right handed coordinate system throughout.
+        z_sign = +1.0
+
+        K[:, 0, 0] = (2.0 / (max_x - min_x)) * scale_xyz[:, 0]
+        K[:, 1, 1] = (2.0 / (max_y - min_y)) * scale_xyz[:, 1]
+        K[:, 0, 3] = -(max_x + min_x) / (max_x - min_x)
+        K[:, 1, 3] = -(max_y + min_y) / (max_y - min_y)
+        K[:, 3, 3] = ones
+
+        # NOTE: This maps the z coordinate to the range [0, 1] and replaces the
+        # the OpenGL z normalization to [-1, 1]
+        K[:, 2, 2] = z_sign * (1.0 / (zfar - znear)) * scale_xyz[:, 2]
+        K[:, 2, 3] = -znear / (zfar - znear)
+
+        return K
+
+    def get_projection_transform(self, **kwargs) -> Transform3d:
+        """
+        Calculate the orthographic projection matrix.
+        Use column major order.
+
+        Args:
+            **kwargs: parameters for the projection can be passed in to
+                      override the default values set in __init__.
+        Return:
+            a Transform3d object which represents a batch of projection
+               matrices of shape (N, 4, 4)
+
+        .. code-block:: python
+
+            scale_x = 2 / (max_x - min_x)
+            scale_y = 2 / (max_y - min_y)
+            scale_z = 2 / (far-near)
+            mid_x = (max_x + min_x) / (max_x - min_x)
+            mix_y = (max_y + min_y) / (max_y - min_y)
+            mid_z = (far + near) / (far - near)
+
+            K = [
+                    [scale_x,        0,         0,  -mid_x],
+                    [0,        scale_y,         0,  -mix_y],
+                    [0,              0,  -scale_z,  -mid_z],
+                    [0,              0,         0,       1],
+            ]
+        """
+        K = kwargs.get("K", self.K)
+        if K is not None:
+            if K.shape != (self._N, 4, 4):
+                msg = "Expected K to have shape of (%r, 4, 4)"
+                raise ValueError(msg % (self._N))
+        else:
+            K = self.compute_projection_matrix(
+                kwargs.get("znear", self.znear),
+                kwargs.get("zfar", self.zfar),
+                kwargs.get("max_x", self.max_x),
+                kwargs.get("min_x", self.min_x),
+                kwargs.get("max_y", self.max_y),
+                kwargs.get("min_y", self.min_y),
+                kwargs.get("scale_xyz", self.scale_xyz),
+            )
+
+        transform = Transform3d(
+            matrix=K.transpose(1, 2).contiguous(), device=self.device
+        )
+        return transform
+
+    def unproject_points(
+        self,
+        xy_depth: torch.Tensor,
+        world_coordinates: bool = True,
+        scaled_depth_input: bool = False,
+        **kwargs,
+    ) -> torch.Tensor:
+        """>!
+        FoV cameras further allow for passing depth in world units
+        (`scaled_depth_input=False`) or in the [0, 1]-normalized units
+        (`scaled_depth_input=True`)
+
+        Args:
+            scaled_depth_input: If `True`, assumes the input depth is in
+                the [0, 1]-normalized units. If `False` the input depth is in
+                the world units.
+        """
+
+        if world_coordinates:
+            to_ndc_transform = self.get_full_projection_transform(**kwargs.copy())
+        else:
+            to_ndc_transform = self.get_projection_transform(**kwargs.copy())
+
+        if scaled_depth_input:
+            # the input depth is already scaled
+            xy_sdepth = xy_depth
+        else:
+            # we have to obtain the scaled depth first
+            K = self.get_projection_transform(**kwargs).get_matrix()
+            unsqueeze_shape = [1] * K.dim()
+            unsqueeze_shape[0] = K.shape[0]
+            mid_z = K[:, 3, 2].reshape(unsqueeze_shape)
+            scale_z = K[:, 2, 2].reshape(unsqueeze_shape)
+            scaled_depth = scale_z * xy_depth[..., 2:3] + mid_z
+            # cat xy and scaled depth
+            xy_sdepth = torch.cat((xy_depth[..., :2], scaled_depth), dim=-1)
+        # finally invert the transform
+        unprojection_transform = to_ndc_transform.inverse()
+        return unprojection_transform.transform_points(xy_sdepth)
+
+    def is_perspective(self):
+        return False
+
+    def in_ndc(self):
+        return True
+
+
+############################################################
+#             MultiView Camera Classes                     #
+############################################################
+"""
+Note that the MultiView Cameras accept parameters in NDC space.
+"""
+
+
+def SfMPerspectiveCameras(
+    focal_length: _FocalLengthType = 1.0,
+    principal_point=((0.0, 0.0),),
+    R: torch.Tensor = _R,
+    T: torch.Tensor = _T,
+    device: Device = "cpu",
+) -> "PerspectiveCameras":
+    """
+    SfMPerspectiveCameras has been DEPRECATED. Use PerspectiveCameras instead.
+    Preserving SfMPerspectiveCameras for backward compatibility.
+    """
+
+    warnings.warn(
+        """SfMPerspectiveCameras is deprecated,
+        Use PerspectiveCameras instead.
+        SfMPerspectiveCameras will be removed in future releases.""",
+        PendingDeprecationWarning,
+    )
+
+    return PerspectiveCameras(
+        focal_length=focal_length,
+        principal_point=principal_point,
+        R=R,
+        T=T,
+        device=device,
+    )
+
+
+class PerspectiveCameras(CamerasBase):
+    """
+    A class which stores a batch of parameters to generate a batch of
+    transformation matrices using the multi-view geometry convention for
+    perspective camera.
+
+    Parameters for this camera are specified in NDC if `in_ndc` is set to True.
+    If parameters are specified in screen space, `in_ndc` must be set to False.
+    """
+
+    # For __getitem__
+    _FIELDS = (
+        "K",
+        "R",
+        "T",
+        "focal_length",
+        "principal_point",
+        "_in_ndc",  # arg is in_ndc but attribute set as _in_ndc
+        "image_size",
+    )
+
+    _SHARED_FIELDS = ("_in_ndc",)
+
+    def __init__(
+        self,
+        focal_length: _FocalLengthType = 1.0,
+        principal_point=((0.0, 0.0),),
+        R: torch.Tensor = _R,
+        T: torch.Tensor = _T,
+        K: Optional[torch.Tensor] = None,
+        device: Device = "cpu",
+        in_ndc: bool = True,
+        image_size: Optional[Union[List, Tuple, torch.Tensor]] = None,
+    ) -> None:
+        """
+
+        Args:
+            focal_length: Focal length of the camera in world units.
+                A tensor of shape (N, 1) or (N, 2) for
+                square and non-square pixels respectively.
+            principal_point: xy coordinates of the center of
+                the principal point of the camera in pixels.
+                A tensor of shape (N, 2).
+            in_ndc: True if camera parameters are specified in NDC.
+                If camera parameters are in screen space, it must
+                be set to False.
+            R: Rotation matrix of shape (N, 3, 3)
+            T: Translation matrix of shape (N, 3)
+            K: (optional) A calibration matrix of shape (N, 4, 4)
+                If provided, don't need focal_length, principal_point
+            image_size: (height, width) of image size.
+                A tensor of shape (N, 2) or a list/tuple. Required for screen cameras.
+            device: torch.device or string
+        """
+        # The initializer formats all inputs to torch tensors and broadcasts
+        # all the inputs to have the same batch dimension where necessary.
+        kwargs = {"image_size": image_size} if image_size is not None else {}
+        super().__init__(
+            device=device,
+            focal_length=focal_length,
+            principal_point=principal_point,
+            R=R,
+            T=T,
+            K=K,
+            _in_ndc=in_ndc,
+            **kwargs,  # pyre-ignore
+        )
+        if image_size is not None:
+            if (self.image_size < 1).any():  # pyre-ignore
+                raise ValueError("Image_size provided has invalid values")
+        else:
+            self.image_size = None
+
+        # When focal length is provided as one value, expand to
+        # create (N, 2) shape tensor
+        if self.focal_length.ndim == 1:  # (N,)
+            self.focal_length = self.focal_length[:, None]  # (N, 1)
+        self.focal_length = self.focal_length.expand(-1, 2)  # (N, 2)
+
+    def get_projection_transform(self, **kwargs) -> Transform3d:
+        """
+        Calculate the projection matrix using the
+        multi-view geometry convention.
+
+        Args:
+            **kwargs: parameters for the projection can be passed in as keyword
+                arguments to override the default values set in __init__.
+
+        Returns:
+            A `Transform3d` object with a batch of `N` projection transforms.
+
+        .. code-block:: python
+
+            fx = focal_length[:, 0]
+            fy = focal_length[:, 1]
+            px = principal_point[:, 0]
+            py = principal_point[:, 1]
+
+            K = [
+                    [fx,   0,   px,   0],
+                    [0,   fy,   py,   0],
+                    [0,    0,    0,   1],
+                    [0,    0,    1,   0],
+            ]
+        """
+        K = kwargs.get("K", self.K)
+        if K is not None:
+            if K.shape != (self._N, 4, 4):
+                msg = "Expected K to have shape of (%r, 4, 4)"
+                raise ValueError(msg % (self._N))
+        else:
+            K = _get_sfm_calibration_matrix(
+                self._N,
+                self.device,
+                kwargs.get("focal_length", self.focal_length),
+                kwargs.get("principal_point", self.principal_point),
+                orthographic=False,
+            )
+
+        transform = Transform3d(
+            matrix=K.transpose(1, 2).contiguous(), device=self.device
+        )
+        return transform
+
+    def unproject_points(
+        self,
+        xy_depth: torch.Tensor,
+        world_coordinates: bool = True,
+        from_ndc: bool = False,
+        **kwargs,
+    ) -> torch.Tensor:
+        """
+        Args:
+            from_ndc: If `False` (default), assumes xy part of input is in
+                NDC space if self.in_ndc(), otherwise in screen space. If
+                `True`, assumes xy is in NDC space even if the camera
+                is defined in screen space.
+        """
+        if world_coordinates:
+            to_camera_transform = self.get_full_projection_transform(**kwargs)
+        else:
+            to_camera_transform = self.get_projection_transform(**kwargs)
+        if from_ndc:
+            to_camera_transform = to_camera_transform.compose(
+                self.get_ndc_camera_transform()
+            )
+
+        unprojection_transform = to_camera_transform.inverse()
+        xy_inv_depth = torch.cat(
+            (xy_depth[..., :2], 1.0 / xy_depth[..., 2:3]), dim=-1  # type: ignore
+        )
+        return unprojection_transform.transform_points(xy_inv_depth)
+
+    def get_principal_point(self, **kwargs) -> torch.Tensor:
+        """
+        Return the camera's principal point
+
+        Args:
+            **kwargs: parameters for the camera extrinsics can be passed in
+                as keyword arguments to override the default values
+                set in __init__.
+        """
+        proj_mat = self.get_projection_transform(**kwargs).get_matrix()
+        return proj_mat[:, 2, :2]
+
+    def get_ndc_camera_transform(self, **kwargs) -> Transform3d:
+        """
+        Returns the transform from camera projection space (screen or NDC) to NDC space.
+        If the camera is defined already in NDC space, the transform is identity.
+        For cameras defined in screen space, we adjust the principal point computation
+        which is defined in the image space (commonly) and scale the points to NDC space.
+
+        This transform leaves the depth unchanged.
+
+        Important: This transforms assumes PyTorch3D conventions for the input points,
+        i.e. +X left, +Y up.
+        """
+        if self.in_ndc():
+            ndc_transform = Transform3d(device=self.device, dtype=torch.float32)
+        else:
+            # when cameras are defined in screen/image space, the principal point is
+            # provided in the (+X right, +Y down), aka image, coordinate system.
+            # Since input points are defined in the PyTorch3D system (+X left, +Y up),
+            # we need to adjust for the principal point transform.
+            pr_point_fix = torch.zeros(
+                (self._N, 4, 4), device=self.device, dtype=torch.float32
+            )
+            pr_point_fix[:, 0, 0] = 1.0
+            pr_point_fix[:, 1, 1] = 1.0
+            pr_point_fix[:, 2, 2] = 1.0
+            pr_point_fix[:, 3, 3] = 1.0
+            pr_point_fix[:, :2, 3] = -2.0 * self.get_principal_point(**kwargs)
+            pr_point_fix_transform = Transform3d(
+                matrix=pr_point_fix.transpose(1, 2).contiguous(), device=self.device
+            )
+            image_size = kwargs.get("image_size", self.get_image_size())
+            screen_to_ndc_transform = get_screen_to_ndc_transform(
+                self, with_xyflip=False, image_size=image_size
+            )
+            ndc_transform = pr_point_fix_transform.compose(screen_to_ndc_transform)
+
+        return ndc_transform
+
+    def is_perspective(self):
+        return True
+
+    def in_ndc(self):
+        return self._in_ndc
+
+
+def SfMOrthographicCameras(
+    focal_length: _FocalLengthType = 1.0,
+    principal_point=((0.0, 0.0),),
+    R: torch.Tensor = _R,
+    T: torch.Tensor = _T,
+    device: Device = "cpu",
+) -> "OrthographicCameras":
+    """
+    SfMOrthographicCameras has been DEPRECATED. Use OrthographicCameras instead.
+    Preserving SfMOrthographicCameras for backward compatibility.
+    """
+
+    warnings.warn(
+        """SfMOrthographicCameras is deprecated,
+        Use OrthographicCameras instead.
+        SfMOrthographicCameras will be removed in future releases.""",
+        PendingDeprecationWarning,
+    )
+
+    return OrthographicCameras(
+        focal_length=focal_length,
+        principal_point=principal_point,
+        R=R,
+        T=T,
+        device=device,
+    )
+
+
+class OrthographicCameras(CamerasBase):
+    """
+    A class which stores a batch of parameters to generate a batch of
+    transformation matrices using the multi-view geometry convention for
+    orthographic camera.
+
+    Parameters for this camera are specified in NDC if `in_ndc` is set to True.
+    If parameters are specified in screen space, `in_ndc` must be set to False.
+    """
+
+    # For __getitem__
+    _FIELDS = (
+        "K",
+        "R",
+        "T",
+        "focal_length",
+        "principal_point",
+        "_in_ndc",
+        "image_size",
+    )
+
+    _SHARED_FIELDS = ("_in_ndc",)
+
+    def __init__(
+        self,
+        focal_length: _FocalLengthType = 1.0,
+        principal_point=((0.0, 0.0),),
+        R: torch.Tensor = _R,
+        T: torch.Tensor = _T,
+        K: Optional[torch.Tensor] = None,
+        device: Device = "cpu",
+        in_ndc: bool = True,
+        image_size: Optional[Union[List, Tuple, torch.Tensor]] = None,
+    ) -> None:
+        """
+
+        Args:
+            focal_length: Focal length of the camera in world units.
+                A tensor of shape (N, 1) or (N, 2) for
+                square and non-square pixels respectively.
+            principal_point: xy coordinates of the center of
+                the principal point of the camera in pixels.
+                A tensor of shape (N, 2).
+            in_ndc: True if camera parameters are specified in NDC.
+                If False, then camera parameters are in screen space.
+            R: Rotation matrix of shape (N, 3, 3)
+            T: Translation matrix of shape (N, 3)
+            K: (optional) A calibration matrix of shape (N, 4, 4)
+                If provided, don't need focal_length, principal_point, image_size
+            image_size: (height, width) of image size.
+                A tensor of shape (N, 2) or list/tuple. Required for screen cameras.
+            device: torch.device or string
+        """
+        # The initializer formats all inputs to torch tensors and broadcasts
+        # all the inputs to have the same batch dimension where necessary.
+        kwargs = {"image_size": image_size} if image_size is not None else {}
+        super().__init__(
+            device=device,
+            focal_length=focal_length,
+            principal_point=principal_point,
+            R=R,
+            T=T,
+            K=K,
+            _in_ndc=in_ndc,
+            **kwargs,  # pyre-ignore
+        )
+        if image_size is not None:
+            if (self.image_size < 1).any():  # pyre-ignore
+                raise ValueError("Image_size provided has invalid values")
+        else:
+            self.image_size = None
+
+        # When focal length is provided as one value, expand to
+        # create (N, 2) shape tensor
+        if self.focal_length.ndim == 1:  # (N,)
+            self.focal_length = self.focal_length[:, None]  # (N, 1)
+        self.focal_length = self.focal_length.expand(-1, 2)  # (N, 2)
+
+    def get_projection_transform(self, **kwargs) -> Transform3d:
+        """
+        Calculate the projection matrix using
+        the multi-view geometry convention.
+
+        Args:
+            **kwargs: parameters for the projection can be passed in as keyword
+                arguments to override the default values set in __init__.
+
+        Returns:
+            A `Transform3d` object with a batch of `N` projection transforms.
+
+        .. code-block:: python
+
+            fx = focal_length[:,0]
+            fy = focal_length[:,1]
+            px = principal_point[:,0]
+            py = principal_point[:,1]
+
+            K = [
+                    [fx,   0,    0,  px],
+                    [0,   fy,    0,  py],
+                    [0,    0,    1,   0],
+                    [0,    0,    0,   1],
+            ]
+        """
+        K = kwargs.get("K", self.K)
+        if K is not None:
+            if K.shape != (self._N, 4, 4):
+                msg = "Expected K to have shape of (%r, 4, 4)"
+                raise ValueError(msg % (self._N))
+        else:
+            K = _get_sfm_calibration_matrix(
+                self._N,
+                self.device,
+                kwargs.get("focal_length", self.focal_length),
+                kwargs.get("principal_point", self.principal_point),
+                orthographic=True,
+            )
+
+        transform = Transform3d(
+            matrix=K.transpose(1, 2).contiguous(), device=self.device
+        )
+        return transform
+
+    def unproject_points(
+        self,
+        xy_depth: torch.Tensor,
+        world_coordinates: bool = True,
+        from_ndc: bool = False,
+        **kwargs,
+    ) -> torch.Tensor:
+        """
+        Args:
+            from_ndc: If `False` (default), assumes xy part of input is in
+                NDC space if self.in_ndc(), otherwise in screen space. If
+                `True`, assumes xy is in NDC space even if the camera
+                is defined in screen space.
+        """
+        if world_coordinates:
+            to_camera_transform = self.get_full_projection_transform(**kwargs)
+        else:
+            to_camera_transform = self.get_projection_transform(**kwargs)
+        if from_ndc:
+            to_camera_transform = to_camera_transform.compose(
+                self.get_ndc_camera_transform()
+            )
+
+        unprojection_transform = to_camera_transform.inverse()
+        return unprojection_transform.transform_points(xy_depth)
+
+    def get_principal_point(self, **kwargs) -> torch.Tensor:
+        """
+        Return the camera's principal point
+
+        Args:
+            **kwargs: parameters for the camera extrinsics can be passed in
+                as keyword arguments to override the default values
+                set in __init__.
+        """
+        proj_mat = self.get_projection_transform(**kwargs).get_matrix()
+        return proj_mat[:, 3, :2]
+
+    def get_ndc_camera_transform(self, **kwargs) -> Transform3d:
+        """
+        Returns the transform from camera projection space (screen or NDC) to NDC space.
+        If the camera is defined already in NDC space, the transform is identity.
+        For cameras defined in screen space, we adjust the principal point computation
+        which is defined in the image space (commonly) and scale the points to NDC space.
+
+        Important: This transforms assumes PyTorch3D conventions for the input points,
+        i.e. +X left, +Y up.
+        """
+        if self.in_ndc():
+            ndc_transform = Transform3d(device=self.device, dtype=torch.float32)
+        else:
+            # when cameras are defined in screen/image space, the principal point is
+            # provided in the (+X right, +Y down), aka image, coordinate system.
+            # Since input points are defined in the PyTorch3D system (+X left, +Y up),
+            # we need to adjust for the principal point transform.
+            pr_point_fix = torch.zeros(
+                (self._N, 4, 4), device=self.device, dtype=torch.float32
+            )
+            pr_point_fix[:, 0, 0] = 1.0
+            pr_point_fix[:, 1, 1] = 1.0
+            pr_point_fix[:, 2, 2] = 1.0
+            pr_point_fix[:, 3, 3] = 1.0
+            pr_point_fix[:, :2, 3] = -2.0 * self.get_principal_point(**kwargs)
+            pr_point_fix_transform = Transform3d(
+                matrix=pr_point_fix.transpose(1, 2).contiguous(), device=self.device
+            )
+            image_size = kwargs.get("image_size", self.get_image_size())
+            screen_to_ndc_transform = get_screen_to_ndc_transform(
+                self, with_xyflip=False, image_size=image_size
+            )
+            ndc_transform = pr_point_fix_transform.compose(screen_to_ndc_transform)
+
+        return ndc_transform
+
+    def is_perspective(self):
+        return False
+
+    def in_ndc(self):
+        return self._in_ndc
+
+
+################################################
+#       Helper functions for cameras           #
+################################################
+
+
+def _get_sfm_calibration_matrix(
+    N: int,
+    device: Device,
+    focal_length,
+    principal_point,
+    orthographic: bool = False,
+) -> torch.Tensor:
+    """
+    Returns a calibration matrix of a perspective/orthographic camera.
+
+    Args:
+        N: Number of cameras.
+        focal_length: Focal length of the camera.
+        principal_point: xy coordinates of the center of
+            the principal point of the camera in pixels.
+        orthographic: Boolean specifying if the camera is orthographic or not
+
+        The calibration matrix `K` is set up as follows:
+
+        .. code-block:: python
+
+            fx = focal_length[:,0]
+            fy = focal_length[:,1]
+            px = principal_point[:,0]
+            py = principal_point[:,1]
+
+            for orthographic==True:
+                K = [
+                        [fx,   0,    0,  px],
+                        [0,   fy,    0,  py],
+                        [0,    0,    1,   0],
+                        [0,    0,    0,   1],
+                ]
+            else:
+                K = [
+                        [fx,   0,   px,   0],
+                        [0,   fy,   py,   0],
+                        [0,    0,    0,   1],
+                        [0,    0,    1,   0],
+                ]
+
+    Returns:
+        A calibration matrix `K` of the SfM-conventioned camera
+        of shape (N, 4, 4).
+    """
+
+    if not torch.is_tensor(focal_length):
+        focal_length = torch.tensor(focal_length, device=device)
+
+    if focal_length.ndim in (0, 1) or focal_length.shape[1] == 1:
+        fx = fy = focal_length
+    else:
+        fx, fy = focal_length.unbind(1)
+
+    if not torch.is_tensor(principal_point):
+        principal_point = torch.tensor(principal_point, device=device)
+
+    px, py = principal_point.unbind(1)
+
+    K = fx.new_zeros(N, 4, 4)
+    K[:, 0, 0] = fx
+    K[:, 1, 1] = fy
+    if orthographic:
+        K[:, 0, 3] = px
+        K[:, 1, 3] = py
+        K[:, 2, 2] = 1.0
+        K[:, 3, 3] = 1.0
+    else:
+        K[:, 0, 2] = px
+        K[:, 1, 2] = py
+        K[:, 3, 2] = 1.0
+        K[:, 2, 3] = 1.0
+
+    return K
+
+
+################################################
+# Helper functions for world to view transforms
+################################################
+
+
+def get_world_to_view_transform(
+    R: torch.Tensor = _R, T: torch.Tensor = _T
+) -> Transform3d:
+    """
+    This function returns a Transform3d representing the transformation
+    matrix to go from world space to view space by applying a rotation and
+    a translation.
+
+    PyTorch3D uses the same convention as Hartley & Zisserman.
+    I.e., for camera extrinsic parameters R (rotation) and T (translation),
+    we map a 3D point `X_world` in world coordinates to
+    a point `X_cam` in camera coordinates with:
+    `X_cam = X_world R + T`
+
+    Args:
+        R: (N, 3, 3) matrix representing the rotation.
+        T: (N, 3) matrix representing the translation.
+
+    Returns:
+        a Transform3d object which represents the composed RT transformation.
+
+    """
+    # TODO: also support the case where RT is specified as one matrix
+    # of shape (N, 4, 4).
+
+    if T.shape[0] != R.shape[0]:
+        msg = "Expected R, T to have the same batch dimension; got %r, %r"
+        raise ValueError(msg % (R.shape[0], T.shape[0]))
+    if T.dim() != 2 or T.shape[1:] != (3,):
+        msg = "Expected T to have shape (N, 3); got %r"
+        raise ValueError(msg % repr(T.shape))
+    if R.dim() != 3 or R.shape[1:] != (3, 3):
+        msg = "Expected R to have shape (N, 3, 3); got %r"
+        raise ValueError(msg % repr(R.shape))
+
+    # Create a Transform3d object
+    T_ = Translate(T, device=T.device)
+    R_ = Rotate(R, device=R.device)
+    return R_.compose(T_)
+
+
+def camera_position_from_spherical_angles(
+    distance: float,
+    elevation: float,
+    azimuth: float,
+    degrees: bool = True,
+    device: Device = "cpu",
+) -> torch.Tensor:
+    """
+    Calculate the location of the camera based on the distance away from
+    the target point, the elevation and azimuth angles.
+
+    Args:
+        distance: distance of the camera from the object.
+        elevation, azimuth: angles.
+            The inputs distance, elevation and azimuth can be one of the following
+                - Python scalar
+                - Torch scalar
+                - Torch tensor of shape (N) or (1)
+        degrees: bool, whether the angles are specified in degrees or radians.
+        device: str or torch.device, device for new tensors to be placed on.
+
+    The vectors are broadcast against each other so they all have shape (N, 1).
+
+    Returns:
+        camera_position: (N, 3) xyz location of the camera.
+    """
+    broadcasted_args = convert_to_tensors_and_broadcast(
+        distance, elevation, azimuth, device=device
+    )
+    dist, elev, azim = broadcasted_args
+    if degrees:
+        elev = math.pi / 180.0 * elev
+        azim = math.pi / 180.0 * azim
+    x = dist * torch.cos(elev) * torch.sin(azim)
+    y = dist * torch.sin(elev)
+    z = dist * torch.cos(elev) * torch.cos(azim)
+    camera_position = torch.stack([x, y, z], dim=1)
+    if camera_position.dim() == 0:
+        camera_position = camera_position.view(1, -1)  # add batch dim.
+    return camera_position.view(-1, 3)
+
+
+def look_at_rotation(
+    camera_position, at=((0, 0, 0),), up=((0, 1, 0),), device: Device = "cpu"
+) -> torch.Tensor:
+    """
+    This function takes a vector 'camera_position' which specifies the location
+    of the camera in world coordinates and two vectors `at` and `up` which
+    indicate the position of the object and the up directions of the world
+    coordinate system respectively. The object is assumed to be centered at
+    the origin.
+
+    The output is a rotation matrix representing the transformation
+    from world coordinates -> view coordinates.
+
+    Args:
+        camera_position: position of the camera in world coordinates
+        at: position of the object in world coordinates
+        up: vector specifying the up direction in the world coordinate frame.
+
+    The inputs camera_position, at and up can each be a
+        - 3 element tuple/list
+        - torch tensor of shape (1, 3)
+        - torch tensor of shape (N, 3)
+
+    The vectors are broadcast against each other so they all have shape (N, 3).
+
+    Returns:
+        R: (N, 3, 3) batched rotation matrices
+    """
+    # Format input and broadcast
+    broadcasted_args = convert_to_tensors_and_broadcast(
+        camera_position, at, up, device=device
+    )
+    camera_position, at, up = broadcasted_args
+    for t, n in zip([camera_position, at, up], ["camera_position", "at", "up"]):
+        if t.shape[-1] != 3:
+            msg = "Expected arg %s to have shape (N, 3); got %r"
+            raise ValueError(msg % (n, t.shape))
+    z_axis = F.normalize(at - camera_position, eps=1e-5)
+    x_axis = F.normalize(torch.cross(up, z_axis, dim=1), eps=1e-5)
+    y_axis = F.normalize(torch.cross(z_axis, x_axis, dim=1), eps=1e-5)
+    is_close = torch.isclose(x_axis, torch.tensor(0.0), atol=5e-3).all(
+        dim=1, keepdim=True
+    )
+    if is_close.any():
+        replacement = F.normalize(torch.cross(y_axis, z_axis, dim=1), eps=1e-5)
+        x_axis = torch.where(is_close, replacement, x_axis)
+    R = torch.cat((x_axis[:, None, :], y_axis[:, None, :], z_axis[:, None, :]), dim=1)
+    return R.transpose(1, 2)
+
+
+def look_at_view_transform(
+    dist: _BatchFloatType = 1.0,
+    elev: _BatchFloatType = 0.0,
+    azim: _BatchFloatType = 0.0,
+    degrees: bool = True,
+    eye: Optional[Union[Sequence, torch.Tensor]] = None,
+    at=((0, 0, 0),),  # (1, 3)
+    up=((0, 1, 0),),  # (1, 3)
+    device: Device = "cpu",
+) -> Tuple[torch.Tensor, torch.Tensor]:
+    """
+    This function returns a rotation and translation matrix
+    to apply the 'Look At' transformation from world -> view coordinates [0].
+
+    Args:
+        dist: distance of the camera from the object
+        elev: angle in degrees or radians. This is the angle between the
+            vector from the object to the camera, and the horizontal plane y = 0 (xz-plane).
+        azim: angle in degrees or radians. The vector from the object to
+            the camera is projected onto a horizontal plane y = 0.
+            azim is the angle between the projected vector and a
+            reference vector at (0, 0, 1) on the reference plane (the horizontal plane).
+        dist, elev and azim can be of shape (1), (N).
+        degrees: boolean flag to indicate if the elevation and azimuth
+            angles are specified in degrees or radians.
+        eye: the position of the camera(s) in world coordinates. If eye is not
+            None, it will override the camera position derived from dist, elev, azim.
+        up: the direction of the x axis in the world coordinate system.
+        at: the position of the object(s) in world coordinates.
+        eye, up and at can be of shape (1, 3) or (N, 3).
+
+    Returns:
+        2-element tuple containing
+
+        - **R**: the rotation to apply to the points to align with the camera.
+        - **T**: the translation to apply to the points to align with the camera.
+
+    References:
+    [0] https://www.scratchapixel.com
+    """
+
+    if eye is not None:
+        broadcasted_args = convert_to_tensors_and_broadcast(eye, at, up, device=device)
+        eye, at, up = broadcasted_args
+        C = eye
+    else:
+        broadcasted_args = convert_to_tensors_and_broadcast(
+            dist, elev, azim, at, up, device=device
+        )
+        dist, elev, azim, at, up = broadcasted_args
+        C = (
+            camera_position_from_spherical_angles(
+                dist, elev, azim, degrees=degrees, device=device
+            )
+            + at
+        )
+
+    R = look_at_rotation(C, at, up, device=device)
+    T = -torch.bmm(R.transpose(1, 2), C[:, :, None])[:, :, 0]
+    return R, T
+
+
+def get_ndc_to_screen_transform(
+    cameras,
+    with_xyflip: bool = False,
+    image_size: Optional[Union[List, Tuple, torch.Tensor]] = None,
+) -> Transform3d:
+    """
+    PyTorch3D NDC to screen conversion.
+    Conversion from PyTorch3D's NDC space (+X left, +Y up) to screen/image space
+    (+X right, +Y down, origin top left).
+
+    Args:
+        cameras
+        with_xyflip: flips x- and y-axis if set to True.
+    Optional kwargs:
+        image_size: ((height, width),) specifying the height, width
+        of the image. If not provided, it reads it from cameras.
+
+    We represent the NDC to screen conversion as a Transform3d
+    with projection matrix
+
+    K = [
+            [s,   0,    0,  cx],
+            [0,   s,    0,  cy],
+            [0,   0,    1,   0],
+            [0,   0,    0,   1],
+    ]
+
+    """
+    # We require the image size, which is necessary for the transform
+    if image_size is None:
+        msg = "For NDC to screen conversion, image_size=(height, width) needs to be specified."
+        raise ValueError(msg)
+
+    K = torch.zeros((cameras._N, 4, 4), device=cameras.device, dtype=torch.float32)
+    if not torch.is_tensor(image_size):
+        image_size = torch.tensor(image_size, device=cameras.device)
+    # pyre-fixme[16]: Item `List` of `Union[List[typing.Any], Tensor, Tuple[Any,
+    #  ...]]` has no attribute `view`.
+    image_size = image_size.view(-1, 2)  # of shape (1 or B)x2
+    height, width = image_size.unbind(1)
+
+    # For non square images, we scale the points such that smallest side
+    # has range [-1, 1] and the largest side has range [-u, u], with u > 1.
+    # This convention is consistent with the PyTorch3D renderer
+    scale = (image_size.min(dim=1).values - 0.0) / 2.0
+
+    K[:, 0, 0] = scale
+    K[:, 1, 1] = scale
+    K[:, 0, 3] = -1.0 * (width - 0.0) / 2.0
+    K[:, 1, 3] = -1.0 * (height - 0.0) / 2.0
+    K[:, 2, 2] = 1.0
+    K[:, 3, 3] = 1.0
+
+    # Transpose the projection matrix as PyTorch3D transforms use row vectors.
+    transform = Transform3d(
+        matrix=K.transpose(1, 2).contiguous(), device=cameras.device
+    )
+
+    if with_xyflip:
+        # flip x, y axis
+        xyflip = torch.eye(4, device=cameras.device, dtype=torch.float32)
+        xyflip[0, 0] = -1.0
+        xyflip[1, 1] = -1.0
+        xyflip = xyflip.view(1, 4, 4).expand(cameras._N, -1, -1)
+        xyflip_transform = Transform3d(
+            matrix=xyflip.transpose(1, 2).contiguous(), device=cameras.device
+        )
+        transform = transform.compose(xyflip_transform)
+    return transform
+
+
+def get_screen_to_ndc_transform(
+    cameras,
+    with_xyflip: bool = False,
+    image_size: Optional[Union[List, Tuple, torch.Tensor]] = None,
+) -> Transform3d:
+    """
+    Screen to PyTorch3D NDC conversion.
+    Conversion from screen/image space (+X right, +Y down, origin top left)
+    to PyTorch3D's NDC space (+X left, +Y up).
+
+    Args:
+        cameras
+        with_xyflip: flips x- and y-axis if set to True.
+    Optional kwargs:
+        image_size: ((height, width),) specifying the height, width
+        of the image. If not provided, it reads it from cameras.
+
+    We represent the screen to NDC conversion as a Transform3d
+    with projection matrix
+
+    K = [
+            [1/s,    0,    0,  cx/s],
+            [  0,  1/s,    0,  cy/s],
+            [  0,    0,    1,     0],
+            [  0,    0,    0,     1],
+    ]
+
+    """
+    transform = get_ndc_to_screen_transform(
+        cameras,
+        with_xyflip=with_xyflip,
+        image_size=image_size,
+    ).inverse()
+    return transform
+
+
+def try_get_projection_transform(
+    cameras: CamerasBase, cameras_kwargs: Dict[str, Any]
+) -> Optional[Transform3d]:
+    """
+    Try block to get projection transform from cameras and cameras_kwargs.
+
+    Args:
+        cameras: cameras instance, can be linear cameras or nonliear cameras
+        cameras_kwargs: camera parameters to be passed to cameras
+
+    Returns:
+        If the camera implemented projection_transform, return the
+        projection transform; Otherwise, return None
+    """
+
+    transform = None
+    try:
+        transform = cameras.get_projection_transform(**cameras_kwargs)
+    except NotImplementedError:
+        pass
+    return transform
diff --git a/pytorch3d/pytorch3d/renderer/compositing.py b/pytorch3d/pytorch3d/renderer/compositing.py
new file mode 100644
index 0000000000000000000000000000000000000000..669108e6eccf3b12afdb093d7a6717f2f656c5f1
--- /dev/null
+++ b/pytorch3d/pytorch3d/renderer/compositing.py
@@ -0,0 +1,242 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+from pytorch3d import _C
+
+
+# Example functions for blending the top K features per pixel using the outputs
+# from rasterization.
+# NOTE: All blending function should return a (N, H, W, C) tensor per batch element.
+# This can be an image (C=3) or a set of features.
+
+
+class _CompositeAlphaPoints(torch.autograd.Function):
+    """
+    Composite features within a z-buffer using alpha compositing. Given a z-buffer
+    with corresponding features and weights, these values are accumulated according
+    to their weights such that features nearer in depth contribute more to the final
+    feature than ones further away.
+
+    Concretely this means:
+        weighted_fs[b,c,i,j] = sum_k cum_alpha_k * features[c,pointsidx[b,k,i,j]]
+        cum_alpha_k = alphas[b,k,i,j] * prod_l=0..k-1 (1 - alphas[b,l,i,j])
+
+    Args:
+        features: Packed Tensor of shape (C, P) giving the features of each point.
+        alphas: float32 Tensor of shape (N, points_per_pixel, image_size,
+            image_size) giving the weight of each point in the z-buffer.
+            Values should be in the interval [0, 1].
+        pointsidx: int32 Tensor of shape (N, points_per_pixel, image_size, image_size)
+            giving the indices of the nearest points at each pixel, sorted in z-order.
+            Concretely pointsidx[n, k, y, x] = p means that features[:, p] is the
+            feature of the kth closest point (along the z-direction) to pixel (y, x) in
+            batch element n. This is weighted by alphas[n, k, y, x].
+
+    Returns:
+        weighted_fs: Tensor of shape (N, C, image_size, image_size)
+            giving the accumulated features at each point.
+    """
+
+    @staticmethod
+    def forward(ctx, features, alphas, points_idx):
+        pt_cld = _C.accum_alphacomposite(features, alphas, points_idx)
+
+        ctx.save_for_backward(features.clone(), alphas.clone(), points_idx.clone())
+        return pt_cld
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        grad_features = None
+        grad_alphas = None
+        grad_points_idx = None
+        features, alphas, points_idx = ctx.saved_tensors
+
+        grad_features, grad_alphas = _C.accum_alphacomposite_backward(
+            grad_output, features, alphas, points_idx
+        )
+
+        return grad_features, grad_alphas, grad_points_idx, None
+
+
+def alpha_composite(pointsidx, alphas, pt_clds) -> torch.Tensor:
+    """
+    Composite features within a z-buffer using alpha compositing. Given a z-buffer
+    with corresponding features and weights, these values are accumulated according
+    to their weights such that features nearer in depth contribute more to the final
+    feature than ones further away.
+
+    Concretely this means:
+        weighted_fs[b,c,i,j] = sum_k cum_alpha_k * features[c,pointsidx[b,k,i,j]]
+        cum_alpha_k = alphas[b,k,i,j] * prod_l=0..k-1 (1 - alphas[b,l,i,j])
+
+
+    Args:
+        pt_clds: Tensor of shape (N, C, P) giving the features of each point (can use
+            RGB for example).
+        alphas: float32 Tensor of shape (N, points_per_pixel, image_size,
+            image_size) giving the weight of each point in the z-buffer.
+            Values should be in the interval [0, 1].
+        pointsidx: int32 Tensor of shape (N, points_per_pixel, image_size, image_size)
+            giving the indices of the nearest points at each pixel, sorted in z-order.
+            Concretely pointsidx[n, k, y, x] = p means that features[n, :, p] is the
+            feature of the kth closest point (along the z-direction) to pixel (y, x) in
+            batch element n. This is weighted by alphas[n, k, y, x].
+
+    Returns:
+        Combined features: Tensor of shape (N, C, image_size, image_size)
+            giving the accumulated features at each point.
+    """
+    return _CompositeAlphaPoints.apply(pt_clds, alphas, pointsidx)
+
+
+class _CompositeNormWeightedSumPoints(torch.autograd.Function):
+    """
+    Composite features within a z-buffer using normalized weighted sum. Given a z-buffer
+    with corresponding features and weights, these values are accumulated
+    according to their weights such that depth is ignored; the weights are used to
+    perform a weighted sum.
+
+    Concretely this means:
+        weighted_fs[b,c,i,j] =
+         sum_k alphas[b,k,i,j] * features[c,pointsidx[b,k,i,j]] / sum_k alphas[b,k,i,j]
+
+    Args:
+        features: Packed Tensor of shape (C, P) giving the features of each point.
+        alphas: float32 Tensor of shape (N, points_per_pixel, image_size,
+            image_size) giving the weight of each point in the z-buffer.
+            Values should be in the interval [0, 1].
+        pointsidx: int32 Tensor of shape (N, points_per_pixel, image_size, image_size)
+            giving the indices of the nearest points at each pixel, sorted in z-order.
+            Concretely pointsidx[n, k, y, x] = p means that features[:, p] is the
+            feature of the kth closest point (along the z-direction) to pixel (y, x) in
+            batch element n. This is weighted by alphas[n, k, y, x].
+
+    Returns:
+        weighted_fs: Tensor of shape (N, C, image_size, image_size)
+            giving the accumulated features at each point.
+    """
+
+    @staticmethod
+    def forward(ctx, features, alphas, points_idx):
+        pt_cld = _C.accum_weightedsumnorm(features, alphas, points_idx)
+
+        ctx.save_for_backward(features.clone(), alphas.clone(), points_idx.clone())
+        return pt_cld
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        grad_features = None
+        grad_alphas = None
+        grad_points_idx = None
+        features, alphas, points_idx = ctx.saved_tensors
+
+        grad_features, grad_alphas = _C.accum_weightedsumnorm_backward(
+            grad_output, features, alphas, points_idx
+        )
+
+        return grad_features, grad_alphas, grad_points_idx, None
+
+
+def norm_weighted_sum(pointsidx, alphas, pt_clds) -> torch.Tensor:
+    """
+    Composite features within a z-buffer using normalized weighted sum. Given a z-buffer
+    with corresponding features and weights, these values are accumulated
+    according to their weights such that depth is ignored; the weights are used to
+    perform a weighted sum.
+
+    Concretely this means:
+        weighted_fs[b,c,i,j] =
+         sum_k alphas[b,k,i,j] * features[c,pointsidx[b,k,i,j]] / sum_k alphas[b,k,i,j]
+
+    Args:
+        pt_clds: Packed feature tensor of shape (C, P) giving the features of each point
+            (can use RGB for example).
+        alphas: float32 Tensor of shape (N, points_per_pixel, image_size,
+            image_size) giving the weight of each point in the z-buffer.
+            Values should be in the interval [0, 1].
+        pointsidx: int32 Tensor of shape (N, points_per_pixel, image_size, image_size)
+            giving the indices of the nearest points at each pixel, sorted in z-order.
+            Concretely pointsidx[n, k, y, x] = p means that features[:, p] is the
+            feature of the kth closest point (along the z-direction) to pixel (y, x) in
+            batch element n. This is weighted by alphas[n, k, y, x].
+
+    Returns:
+        Combined features: Tensor of shape (N, C, image_size, image_size)
+            giving the accumulated features at each point.
+    """
+    return _CompositeNormWeightedSumPoints.apply(pt_clds, alphas, pointsidx)
+
+
+class _CompositeWeightedSumPoints(torch.autograd.Function):
+    """
+    Composite features within a z-buffer using normalized weighted sum. Given a z-buffer
+    with corresponding features and weights, these values are accumulated
+    according to their weights such that depth is ignored; the weights are used to
+    perform a weighted sum. As opposed to norm weighted sum, the weights are not
+    normalized to sum to 1.
+
+    Concretely this means:
+        weighted_fs[b,c,i,j] = sum_k alphas[b,k,i,j] * features[c,pointsidx[b,k,i,j]]
+
+    Args:
+        features: Packed Tensor of shape (C, P) giving the features of each point.
+        alphas: float32 Tensor of shape (N, points_per_pixel, image_size,
+            image_size) giving the weight of each point in the z-buffer.
+            Values should be in the interval [0, 1].
+        pointsidx: int32 Tensor of shape (N, points_per_pixel, image_size, image_size)
+            giving the indices of the nearest points at each pixel, sorted in z-order.
+            Concretely pointsidx[n, k, y, x] = p means that features[:, p] is the
+            feature of the kth closest point (along the z-direction) to pixel (y, x) in
+            batch element n. This is weighted by alphas[n, k, y, x].
+
+    Returns:
+        weighted_fs: Tensor of shape (N, C, image_size, image_size)
+            giving the accumulated features at each point.
+    """
+
+    @staticmethod
+    def forward(ctx, features, alphas, points_idx):
+        pt_cld = _C.accum_weightedsum(features, alphas, points_idx)
+
+        ctx.save_for_backward(features.clone(), alphas.clone(), points_idx.clone())
+        return pt_cld
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        grad_features = None
+        grad_alphas = None
+        grad_points_idx = None
+        features, alphas, points_idx = ctx.saved_tensors
+
+        grad_features, grad_alphas = _C.accum_weightedsum_backward(
+            grad_output, features, alphas, points_idx
+        )
+
+        return grad_features, grad_alphas, grad_points_idx, None
+
+
+def weighted_sum(pointsidx, alphas, pt_clds) -> torch.Tensor:
+    """
+    Composite features within a z-buffer using normalized weighted sum.
+
+    Args:
+        pt_clds: Packed Tensor of shape (C, P) giving the features of each point
+            (can use RGB for example).
+        alphas: float32 Tensor of shape (N, points_per_pixel, image_size,
+            image_size) giving the weight of each point in the z-buffer.
+            Values should be in the interval [0, 1].
+        pointsidx: int32 Tensor of shape (N, points_per_pixel, image_size, image_size)
+            giving the indices of the nearest points at each pixel, sorted in z-order.
+            Concretely pointsidx[n, k, y, x] = p means that features[:, p] is the
+            feature of the kth closest point (along the z-direction) to pixel (y, x) in
+            batch element n. This is weighted by alphas[n, k, y, x].
+
+    Returns:
+        Combined features: Tensor of shape (N, C, image_size, image_size)
+            giving the accumulated features at each point.
+    """
+    return _CompositeWeightedSumPoints.apply(pt_clds, alphas, pointsidx)
diff --git a/pytorch3d/pytorch3d/renderer/fisheyecameras.py b/pytorch3d/pytorch3d/renderer/fisheyecameras.py
new file mode 100644
index 0000000000000000000000000000000000000000..3da558df3ae02e3dc6a0e5858d9417bba485a1c7
--- /dev/null
+++ b/pytorch3d/pytorch3d/renderer/fisheyecameras.py
@@ -0,0 +1,584 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import math
+from typing import List, Optional, Tuple, Union
+
+import torch
+from pytorch3d.common.datatypes import Device
+from pytorch3d.renderer.cameras import _R, _T, CamerasBase
+
+_focal_length = torch.tensor(((1.0,),))
+_principal_point = torch.tensor(((0.0, 0.0),))
+_radial_params = torch.tensor(((0.0, 0.0, 0.0, 0.0, 0.0, 0.0),))
+_tangential_params = torch.tensor(((0.0, 0.0),))
+_thin_prism_params = torch.tensor(((0.0, 0.0, 0.0, 0.0),))
+
+
+class FishEyeCameras(CamerasBase):
+    """
+    A class which extends Pinhole camera by considering radial, tangential and
+    thin-prism distortion. For the fisheye camera model, k1, k2, ..., k_n_radial are
+    polynomial coefficents to model radial distortions. Two common types of radial
+    distortions are barrel and pincusion radial distortions.
+
+    a = x / z, b = y / z, r = (a*a+b*b)^(1/2)
+    th = atan(r)
+    [x_r]  = (th+ k0 * th^3 + k1* th^5 + ...) [a/r]
+    [y_r]                                     [b/r]                    [1]
+
+
+    The tangential distortion parameters are p1 and p2. The primary cause is
+    due to the lens assembly not being centered over and parallel to the image plane.
+    tangentialDistortion = [(2 x_r^2 + rd^2)*p_0 + 2*x_r*y_r*p_1]
+                           [(2 y_r^2 + rd^2)*p_1 + 2*x_r*y_r*p_0]      [2]
+    where rd^2 = x_r^2 + y_r^2
+
+    The thin-prism distortion is modeled with s1, s2, s3, s4 coefficients
+    thinPrismDistortion = [s0 * rd^2 + s1 rd^4]
+                          [s2 * rd^2 + s3 rd^4]                        [3]
+
+    The projection
+    proj = diag(f, f) * uvDistorted + [cu; cv]
+    uvDistorted = [x_r]  + tangentialDistortion  + thinPrismDistortion [4]
+                  [y_r]
+    f is the focal length and cu, cv are principal points in x, y axis.
+
+    """
+
+    _FIELDS = (
+        "focal_length",
+        "principal_point",
+        "R",
+        "T",
+        "radial_params",
+        "tangential_params",
+        "thin_prism_params",
+        "world_coordinates",
+        "use_radial",
+        "use_tangential",
+        "use_tin_prism",
+        "device",
+        "image_size",
+    )
+
+    def __init__(
+        self,
+        focal_length=_focal_length,
+        principal_point=_principal_point,
+        radial_params=_radial_params,
+        tangential_params=_tangential_params,
+        thin_prism_params=_thin_prism_params,
+        R: torch.Tensor = _R,
+        T: torch.Tensor = _T,
+        world_coordinates: bool = False,
+        use_radial: bool = True,
+        use_tangential: bool = True,
+        use_thin_prism: bool = True,
+        device: Device = "cpu",
+        image_size: Optional[Union[List, Tuple, torch.Tensor]] = None,
+    ) -> None:
+
+        """
+
+        Args:
+            focal_ength: Focal length of the camera in world units.
+                A tensor of shape (N, 1) for square pixels,
+                where N is number of transforms.
+            principal_point: xy coordinates of the center of
+                the principal point of the camera in pixels.
+                A tensor of shape (N, 2).
+            radial_params: parameters for radial distortions.
+                A tensor of shape (N, num_radial).
+            tangential_params:parameters for tangential distortions.
+                A tensor of shape (N, 2).
+            thin_prism_params: parameters for thin-prism distortions.
+                A tensor of shape (N, 4).
+            R: Rotation matrix of shape (N, 3, 3)
+            T: Translation matrix of shape (N, 3)
+            world_coordinates: if True, project from world coordinates; otherwise from camera
+                coordinates
+            use_radial: radial_distortion, default to True
+            use_tangential: tangential distortion, default to True
+            use_thin_prism: thin prism distortion, default to True
+            device: torch.device or string
+            image_size: (height, width) of image size.
+                A tensor of shape (N, 2) or a list/tuple. Required for screen cameras.
+
+        """
+
+        kwargs = {"image_size": image_size} if image_size is not None else {}
+        super().__init__(
+            device=device,
+            R=R,
+            T=T,
+            **kwargs,  # pyre-ignore
+        )
+        if image_size is not None:
+            if (self.image_size < 1).any():  # pyre-ignore
+                raise ValueError("Image_size provided has invalid values")
+        else:
+            self.image_size = None
+
+        self.device = device
+        self.focal = focal_length.to(self.device)
+        self.principal_point = principal_point.to(self.device)
+        self.radial_params = radial_params.to(self.device)
+        self.tangential_params = tangential_params.to(self.device)
+        self.thin_prism_params = thin_prism_params.to(self.device)
+        self.R = R
+        self.T = T
+        self.world_coordinates = world_coordinates
+        self.use_radial = use_radial
+        self.use_tangential = use_tangential
+        self.use_thin_prism = use_thin_prism
+        self.epsilon = 1e-10
+        self.num_distortion_iters = 50
+
+        self.R = self.R.to(self.device)
+        self.T = self.T.to(self.device)
+        self.num_radial = radial_params.shape[-1]
+
+    def _project_points_batch(
+        self,
+        focal,
+        principal_point,
+        radial_params,
+        tangential_params,
+        thin_prism_params,
+        points,
+    ) -> torch.Tensor:
+        """
+        Takes in points in the local reference frame of the camera and projects it
+        onto the image plan. Since this is a symmetric model, points with negative z are
+        projected to the positive sphere. i.e project(1,1,-1) == project(-1,-1,1)
+
+        Args:
+            focal: (1)
+            principal_point: (2)
+            radial_params: (num_radial)
+            tangential_params: (2)
+            thin_prism_params: (4)
+            points in the camera coordinate frame: (..., 3). E.g., (P, 3) (1, P, 3)
+                or (M, P, 3) where P is the number of points
+
+        Returns:
+            projected_points in the image plane: (..., 3). E.g., (P, 3) or
+                (1, P, 3) or (M, P, 3)
+
+        """
+        assert points.shape[-1] == 3, "points shape incorrect"
+        ab = points[..., :2] / points[..., 2:]
+        uv_distorted = ab
+
+        r = ab.norm(dim=-1)
+        th = r.atan()
+        theta_sq = th * th
+
+        # compute radial distortions, eq 1
+        t = theta_sq
+        theta_pow = torch.stack([t, t**2, t**3, t**4, t**5, t**6], dim=-1)
+        th_radial = 1 + torch.sum(theta_pow * radial_params, dim=-1)
+
+        # compute th/r, using the limit for small values
+        th_divr = th / r
+        boolean_mask = abs(r) < self.epsilon
+        th_divr[boolean_mask] = 1.0
+
+        # the distorted coordinates -- except for focal length and principal point
+        # start with the radial term
+        coeff = th_radial * th_divr
+        xr_yr = coeff[..., None] * ab
+        xr_yr_squared_norm = torch.pow(xr_yr, 2).sum(dim=-1, keepdim=True)
+
+        if self.use_radial:
+            uv_distorted = xr_yr
+
+        # compute tangential distortions, eq 2
+        if self.use_tangential:
+            temp = 2 * torch.sum(
+                xr_yr * tangential_params,
+                dim=-1,
+            )
+            uv_distorted = uv_distorted + (
+                temp[..., None] * xr_yr + xr_yr_squared_norm * tangential_params
+            )
+
+        # compute thin-prism distortions, eq 3
+        sh = uv_distorted.shape[:-1]
+        if self.use_thin_prism:
+            radial_powers = torch.cat(
+                [xr_yr_squared_norm, xr_yr_squared_norm * xr_yr_squared_norm], dim=-1
+            )
+            uv_distorted[..., 0] = uv_distorted[..., 0] + torch.sum(
+                thin_prism_params[..., 0:2] * radial_powers,
+                dim=-1,
+            )
+            uv_distorted[..., 1] = uv_distorted[..., 1] + torch.sum(
+                thin_prism_params[..., 2:4] * radial_powers,
+                dim=-1,
+            )
+        # return value: distorted points on the uv plane, eq 4
+        projected_points = focal * uv_distorted + principal_point
+        return torch.cat(
+            [projected_points, torch.ones(list(sh) + [1], device=self.device)], dim=-1
+        )
+
+    def check_input(self, points: torch.Tensor, batch_size: int):
+        """
+        Check if the shapes are broadcastable between points and transforms.
+        Accept points of shape (P, 3) or (1, P, 3) or (M, P, 3). The batch_size
+        for transforms should be 1 when points take (M, P, 3). The batch_size
+        can be 1 or N when points take shape (P, 3).
+
+        Args:
+            points: tensor of shape (P, 3) or (1, P, 3) or (M, P, 3)
+            batch_size: number of transforms
+
+        Returns:
+            Boolean value if the input shapes are compatible.
+        """
+        if points.ndim > 3:
+            return False
+        if points.ndim == 3:
+            M, P, K = points.shape
+            if K != 3:
+                return False
+            if M > 1 and batch_size > 1:
+                return False
+        return True
+
+    def transform_points(
+        self, points, eps: Optional[float] = None, **kwargs
+    ) -> torch.Tensor:
+        """
+        Transform input points from camera space to image space.
+        Args:
+            points: tensor of (..., 3). E.g., (P, 3) or (1, P, 3), (M, P, 3)
+            eps: tiny number to avoid zero divsion
+
+        Returns:
+            torch.Tensor
+            when points take shape (P, 3) or (1, P, 3), output is (N, P, 3)
+            when points take shape (M, P, 3), output is (M, P, 3)
+            where N is the number of transforms, P number of points
+        """
+        # project from world space to camera space
+        if self.world_coordinates:
+            world_to_view_transform = self.get_world_to_view_transform(
+                R=self.R, T=self.T
+            )
+            points = world_to_view_transform.transform_points(
+                points.to(self.device), eps=eps
+            )
+        else:
+            points = points.to(self.device)
+
+        # project from camera space to image space
+        N = len(self.radial_params)
+        if not self.check_input(points, N):
+            msg = "Expected points of (P, 3) with batch_size 1 or N, or shape (M, P, 3) \
+            with batch_size 1; got points of shape %r and batch_size %r"
+            raise ValueError(msg % (points.shape, N))
+
+        if N == 1:
+            return self._project_points_batch(
+                self.focal[0],
+                self.principal_point[0],
+                self.radial_params[0],
+                self.tangential_params[0],
+                self.thin_prism_params[0],
+                points,
+            )
+        else:
+            outputs = []
+            for i in range(N):
+                outputs.append(
+                    self._project_points_batch(
+                        self.focal[i],
+                        self.principal_point[i],
+                        self.radial_params[i],
+                        self.tangential_params[i],
+                        self.thin_prism_params[i],
+                        points,
+                    )
+                )
+            outputs = torch.stack(outputs, dim=0)
+        return outputs.squeeze()
+
+    def _unproject_points_batch(
+        self,
+        focal,
+        principal_point,
+        radial_params,
+        tangential_params,
+        thin_prism_params,
+        xy: torch.Tensor,
+    ) -> torch.Tensor:
+        """
+        Args:
+            focal: (1)
+            principal_point: (2)
+            radial_params: (num_radial)
+            tangential_params: (2)
+            thin_prism_params: (4)
+            xy: (..., 2)
+
+        Returns:
+            point3d_est: (..., 3)
+        """
+        sh = list(xy.shape[:-1])
+        assert xy.shape[-1] == 2, "xy_depth shape incorrect"
+        uv_distorted = (xy - principal_point) / focal
+
+        # get xr_yr from uvDistorted
+        xr_yr = self._compute_xr_yr_from_uv_distorted(
+            tangential_params, thin_prism_params, uv_distorted
+        )
+        xr_yrNorm = torch.norm(xr_yr, dim=-1)
+
+        # find theta
+        theta = self._get_theta_from_norm_xr_yr(radial_params, xr_yrNorm)
+        # get the point coordinates:
+        point3d_est = theta.new_ones(*sh, 3)
+        point3d_est[..., :2] = theta.tan()[..., None] / xr_yrNorm[..., None] * xr_yr
+        return point3d_est
+
+    def unproject_points(
+        self,
+        xy_depth: torch.Tensor,
+        world_coordinates: bool = True,
+        scaled_depth_input: bool = False,
+        **kwargs,
+    ) -> torch.Tensor:
+        """
+        Takes in 3-point ``uv_depth`` in the image plane of the camera and unprojects it
+        into the reference frame of the camera.
+        This function is the inverse of ``transform_points``. In particular it holds that
+
+        X = unproject(project(X))
+            and
+        x = project(unproject(s*x))
+
+        Args:
+            xy_depth: points in the image plane of shape (..., 3). E.g.,
+                (P, 3) or (1, P, 3) or (M, P, 3)
+            world_coordinates: if the output is in world_coordinate, if False, convert to
+            camera coordinate
+            scaled_depth_input: False
+
+        Returns:
+            unprojected_points in the camera frame with z = 1
+            when points take shape (P, 3) or (1, P, 3), output is (N, P, 3)
+            when points take shape (M, P, 3), output is (M, P, 3)
+            where N is the number of transforms, P number of point
+        """
+        xy_depth = xy_depth.to(self.device)
+        N = len(self.radial_params)
+        if N == 1:
+            return self._unproject_points_batch(
+                self.focal[0],
+                self.principal_point[0],
+                self.radial_params[0],
+                self.tangential_params[0],
+                self.thin_prism_params[0],
+                xy_depth[..., 0:2],
+            )
+        else:
+            outputs = []
+            for i in range(N):
+                outputs.append(
+                    self._unproject_points_batch(
+                        self.focal[i],
+                        self.principal_point[i],
+                        self.radial_params[i],
+                        self.tangential_params[i],
+                        self.thin_prism_params[i],
+                        xy_depth[..., 0:2],
+                    )
+                )
+            outputs = torch.stack(outputs, dim=0)
+        return outputs.squeeze()
+
+    def _compute_xr_yr_from_uv_distorted(
+        self, tangential_params, thin_prism_params, uv_distorted: torch.Tensor
+    ) -> torch.Tensor:
+        """
+        Helper function to compute the vector [x_r; y_r] from uvDistorted
+
+        Args:
+            tangential_params: (2)
+            thin_prism_params: (4)
+            uv_distorted: (..., 2), E.g., (P, 2), (1, P, 2), (M, P, 2)
+
+        Returns:
+            xr_yr: (..., 2)
+        """
+        # early exit if we're not using any tangential/ thin prism distortions
+        if not self.use_tangential and not self.use_thin_prism:
+            return uv_distorted
+
+        xr_yr = uv_distorted
+        # do Newton iterations to find xr_yr
+        for _ in range(self.num_distortion_iters):
+            # compute the estimated uvDistorted
+            uv_distorted_est = xr_yr.clone()
+            xr_yr_squared_norm = torch.pow(xr_yr, 2).sum(dim=-1, keepdim=True)
+
+            if self.use_tangential:
+                temp = 2.0 * torch.sum(
+                    xr_yr * tangential_params[..., 0:2],
+                    dim=-1,
+                    keepdim=True,
+                )
+                uv_distorted_est = uv_distorted_est + (
+                    temp * xr_yr + xr_yr_squared_norm * tangential_params[..., 0:2]
+                )
+
+            if self.use_thin_prism:
+                radial_powers = torch.cat(
+                    [xr_yr_squared_norm, xr_yr_squared_norm * xr_yr_squared_norm],
+                    dim=-1,
+                )
+                uv_distorted_est[..., 0] = uv_distorted_est[..., 0] + torch.sum(
+                    thin_prism_params[..., 0:2] * radial_powers,
+                    dim=-1,
+                )
+                uv_distorted_est[..., 1] = uv_distorted_est[..., 1] + torch.sum(
+                    thin_prism_params[..., 2:4] * radial_powers,
+                    dim=-1,
+                )
+
+            # compute the derivative of uvDistorted wrt xr_yr
+            duv_distorted_dxryr = self._compute_duv_distorted_dxryr(
+                tangential_params, thin_prism_params, xr_yr, xr_yr_squared_norm[..., 0]
+            )
+            # compute correction:
+            # note: the matrix duvDistorted_dxryr will be close to identity (for reasonable
+            # values of tangential/thin prism distortions)
+            correction = torch.linalg.solve(
+                duv_distorted_dxryr, (uv_distorted - uv_distorted_est)[..., None]
+            )
+            xr_yr = xr_yr + correction[..., 0]
+        return xr_yr
+
+    def _get_theta_from_norm_xr_yr(
+        self, radial_params, th_radial_desired
+    ) -> torch.Tensor:
+        """
+        Helper function to compute the angle theta from the norm of the vector [x_r; y_r]
+
+        Args:
+            radial_params: k1, k2, ..., k_num_radial, (num_radial)
+            th_radial_desired: desired angle of shape (...), E.g., (P), (1, P), (M, P)
+
+        Returns:
+            th: angle theta (in radians) of shape (...), E.g., (P), (1, P), (M, P)
+        """
+        sh = list(th_radial_desired.shape)
+        th = th_radial_desired
+        c = torch.tensor(
+            [2.0 * i + 3 for i in range(self.num_radial)], device=self.device
+        )
+        for _ in range(self.num_distortion_iters):
+            theta_sq = th * th
+            th_radial = 1.0
+            dthD_dth = 1.0
+
+            # compute the theta polynomial and its derivative wrt theta
+            t = theta_sq
+            theta_pow = torch.stack([t, t**2, t**3, t**4, t**5, t**6], dim=-1)
+            th_radial = th_radial + torch.sum(theta_pow * radial_params, dim=-1)
+
+            dthD_dth = dthD_dth + torch.sum(c * radial_params * theta_pow, dim=-1)
+            th_radial = th_radial * th
+
+            # compute the correction
+            step = torch.zeros(*sh, device=self.device)
+            # make sure don't divide by zero
+            nonzero_mask = dthD_dth.abs() > self.epsilon
+            step = step + nonzero_mask * (th_radial_desired - th_radial) / dthD_dth
+            # if derivative is close to zero, apply small correction in the appropriate
+            # direction to avoid numerical explosions
+            close_to_zero_mask = dthD_dth.abs() <= self.epsilon
+            dir_mask = (th_radial_desired - th_radial) * dthD_dth > 0.0
+            boolean_mask = close_to_zero_mask & dir_mask
+            step = step + 10.0 * self.epsilon * boolean_mask
+            step = step - 10 * self.epsilon * (~nonzero_mask & ~boolean_mask)
+
+            # apply correction
+            th = th + step
+            # revert to within 180 degrees FOV to avoid numerical overflow
+            idw = th.abs() >= math.pi / 2.0
+            th[idw] = 0.999 * math.pi / 2.0
+        return th
+
+    def _compute_duv_distorted_dxryr(
+        self, tangential_params, thin_prism_params, xr_yr, xr_yr_squareNorm
+    ) -> torch.Tensor:
+        """
+        Helper function, computes the Jacobian of uvDistorted wrt the vector [x_r;y_r]
+
+        Args:
+            tangential_params: (2)
+            thin_prism_params: (4)
+            xr_yr: (P, 2)
+            xr_yr_squareNorm: (...), E.g., (P), (1, P), (M, P)
+
+        Returns:
+            duv_distorted_dxryr: (..., 2, 2) Jacobian
+        """
+        sh = list(xr_yr.shape[:-1])
+        duv_distorted_dxryr = torch.empty((*sh, 2, 2), device=self.device)
+        if self.use_tangential:
+            duv_distorted_dxryr[..., 0, 0] = (
+                1.0
+                + 6.0 * xr_yr[..., 0] * tangential_params[..., 0]
+                + 2.0 * xr_yr[..., 1] * tangential_params[..., 1]
+            )
+            offdiag = 2.0 * (
+                xr_yr[..., 0] * tangential_params[..., 1]
+                + xr_yr[..., 1] * tangential_params[..., 0]
+            )
+            duv_distorted_dxryr[..., 0, 1] = offdiag
+            duv_distorted_dxryr[..., 1, 0] = offdiag
+            duv_distorted_dxryr[..., 1, 1] = (
+                1.0
+                + 6.0 * xr_yr[..., 1] * tangential_params[..., 1]
+                + 2.0 * xr_yr[..., 0] * tangential_params[..., 0]
+            )
+        else:
+            duv_distorted_dxryr = torch.eye(2).repeat(*sh, 1, 1)
+
+        if self.use_thin_prism:
+            temp1 = 2.0 * (
+                thin_prism_params[..., 0]
+                + 2.0 * thin_prism_params[..., 1] * xr_yr_squareNorm[...]
+            )
+            duv_distorted_dxryr[..., 0, 0] = (
+                duv_distorted_dxryr[..., 0, 0] + xr_yr[..., 0] * temp1
+            )
+            duv_distorted_dxryr[..., 0, 1] = (
+                duv_distorted_dxryr[..., 0, 1] + xr_yr[..., 1] * temp1
+            )
+
+            temp2 = 2.0 * (
+                thin_prism_params[..., 2]
+                + 2.0 * thin_prism_params[..., 3] * xr_yr_squareNorm[...]
+            )
+            duv_distorted_dxryr[..., 1, 0] = (
+                duv_distorted_dxryr[..., 1, 0] + xr_yr[..., 0] * temp2
+            )
+            duv_distorted_dxryr[..., 1, 1] = (
+                duv_distorted_dxryr[..., 1, 1] + xr_yr[..., 1] * temp2
+            )
+        return duv_distorted_dxryr
+
+    def in_ndc(self):
+        return True
+
+    def is_perspective(self):
+        return False
diff --git a/pytorch3d/pytorch3d/renderer/implicit/__init__.py b/pytorch3d/pytorch3d/renderer/implicit/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..39090112a4b7753e73a4e3306338ee7962a61406
--- /dev/null
+++ b/pytorch3d/pytorch3d/renderer/implicit/__init__.py
@@ -0,0 +1,25 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from .harmonic_embedding import HarmonicEmbedding
+from .raymarching import AbsorptionOnlyRaymarcher, EmissionAbsorptionRaymarcher
+from .raysampling import (
+    GridRaysampler,
+    MonteCarloRaysampler,
+    MultinomialRaysampler,
+    NDCGridRaysampler,
+    NDCMultinomialRaysampler,
+)
+from .renderer import ImplicitRenderer, VolumeRenderer, VolumeSampler
+from .utils import (
+    HeterogeneousRayBundle,
+    ray_bundle_to_ray_points,
+    ray_bundle_variables_to_ray_points,
+    RayBundle,
+)
+
+
+__all__ = [k for k in globals().keys() if not k.startswith("_")]
diff --git a/pytorch3d/pytorch3d/renderer/implicit/harmonic_embedding.py b/pytorch3d/pytorch3d/renderer/implicit/harmonic_embedding.py
new file mode 100644
index 0000000000000000000000000000000000000000..90e857f8aa7cc0286ae603f2d95ba96f72bfb22a
--- /dev/null
+++ b/pytorch3d/pytorch3d/renderer/implicit/harmonic_embedding.py
@@ -0,0 +1,182 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Optional
+
+import torch
+
+
+class HarmonicEmbedding(torch.nn.Module):
+    def __init__(
+        self,
+        n_harmonic_functions: int = 6,
+        omega_0: float = 1.0,
+        logspace: bool = True,
+        append_input: bool = True,
+    ) -> None:
+        """
+        The harmonic embedding layer supports the classical
+        Nerf positional encoding described in
+        `NeRF <https://arxiv.org/abs/2003.08934>`_
+        and the integrated position encoding in
+        `MIP-NeRF <https://arxiv.org/abs/2103.13415>`_.
+
+        During the inference you can provide the extra argument `diag_cov`.
+
+        If `diag_cov is None`, it converts
+        rays parametrized with a `ray_bundle` to 3D points by
+        extending each ray according to the corresponding length.
+        Then it converts each feature
+        (i.e. vector along the last dimension) in `x`
+        into a series of harmonic features `embedding`,
+        where for each i in range(dim) the following are present
+        in embedding[...]::
+
+            [
+                sin(f_1*x[..., i]),
+                sin(f_2*x[..., i]),
+                ...
+                sin(f_N * x[..., i]),
+                cos(f_1*x[..., i]),
+                cos(f_2*x[..., i]),
+                ...
+                cos(f_N * x[..., i]),
+                x[..., i],              # only present if append_input is True.
+            ]
+
+        where N corresponds to `n_harmonic_functions-1`, and f_i is a scalar
+        denoting the i-th frequency of the harmonic embedding.
+
+
+        If `diag_cov is not None`, it approximates
+        conical frustums following a ray bundle as gaussians,
+        defined by x, the means of the gaussians and diag_cov,
+        the diagonal covariances.
+        Then it converts each gaussian
+        into a series of harmonic features `embedding`,
+        where for each i in range(dim) the following are present
+        in embedding[...]::
+
+            [
+                sin(f_1*x[..., i]) * exp(0.5 * f_1**2 * diag_cov[..., i,]),
+                sin(f_2*x[..., i]) * exp(0.5 * f_2**2 * diag_cov[..., i,]),
+                ...
+                sin(f_N * x[..., i]) * exp(0.5 * f_N**2 * diag_cov[..., i,]),
+                cos(f_1*x[..., i]) * exp(0.5 * f_1**2 * diag_cov[..., i,]),
+                cos(f_2*x[..., i]) * exp(0.5 * f_2**2 * diag_cov[..., i,]),,
+                ...
+                cos(f_N * x[..., i]) * exp(0.5 * f_N**2 * diag_cov[..., i,]),
+                x[..., i],              # only present if append_input is True.
+            ]
+
+        where N equals `n_harmonic_functions-1`, and f_i is a scalar
+        denoting the i-th frequency of the harmonic embedding.
+
+        If `logspace==True`, the frequencies `[f_1, ..., f_N]` are
+        powers of 2:
+            `f_1, ..., f_N = 2**torch.arange(n_harmonic_functions)`
+
+        If `logspace==False`, frequencies are linearly spaced between
+        `1.0` and `2**(n_harmonic_functions-1)`:
+            `f_1, ..., f_N = torch.linspace(
+                1.0, 2**(n_harmonic_functions-1), n_harmonic_functions
+            )`
+
+        Note that `x` is also premultiplied by the base frequency `omega_0`
+        before evaluating the harmonic functions.
+
+        Args:
+            n_harmonic_functions: int, number of harmonic
+                features
+            omega_0: float, base frequency
+            logspace: bool, Whether to space the frequencies in
+                logspace or linear space
+            append_input: bool, whether to concat the original
+                input to the harmonic embedding. If true the
+                output is of the form (embed.sin(), embed.cos(), x)
+        """
+        super().__init__()
+
+        if logspace:
+            frequencies = 2.0 ** torch.arange(
+                n_harmonic_functions,
+                dtype=torch.float32,
+            )
+        else:
+            frequencies = torch.linspace(
+                1.0,
+                2.0 ** (n_harmonic_functions - 1),
+                n_harmonic_functions,
+                dtype=torch.float32,
+            )
+
+        self.register_buffer("_frequencies", frequencies * omega_0, persistent=False)
+        self.register_buffer(
+            "_zero_half_pi", torch.tensor([0.0, 0.5 * torch.pi]), persistent=False
+        )
+        self.append_input = append_input
+
+    def forward(
+        self, x: torch.Tensor, diag_cov: Optional[torch.Tensor] = None, **kwargs
+    ) -> torch.Tensor:
+        """
+        Args:
+            x: tensor of shape [..., dim]
+            diag_cov: An optional tensor of shape `(..., dim)`
+                representing the diagonal covariance matrices of our Gaussians, joined with x
+                as means of the Gaussians.
+
+        Returns:
+            embedding: a harmonic embedding of `x` of shape
+            [..., (n_harmonic_functions * 2 + int(append_input)) * num_points_per_ray]
+        """
+        # [..., dim, n_harmonic_functions]
+        embed = x[..., None] * self._frequencies
+        # [..., 1, dim, n_harmonic_functions] + [2, 1, 1] => [..., 2, dim, n_harmonic_functions]
+        embed = embed[..., None, :, :] + self._zero_half_pi[..., None, None]
+        # Use the trig identity cos(x) = sin(x + pi/2)
+        # and do one vectorized call to sin([x, x+pi/2]) instead of (sin(x), cos(x)).
+        embed = embed.sin()
+        if diag_cov is not None:
+            x_var = diag_cov[..., None] * torch.pow(self._frequencies, 2)
+            exp_var = torch.exp(-0.5 * x_var)
+            # [..., 2, dim, n_harmonic_functions]
+            embed = embed * exp_var[..., None, :, :]
+
+        embed = embed.reshape(*x.shape[:-1], -1)
+
+        if self.append_input:
+            return torch.cat([embed, x], dim=-1)
+        return embed
+
+    @staticmethod
+    def get_output_dim_static(
+        input_dims: int,
+        n_harmonic_functions: int,
+        append_input: bool,
+    ) -> int:
+        """
+        Utility to help predict the shape of the output of `forward`.
+
+        Args:
+            input_dims: length of the last dimension of the input tensor
+            n_harmonic_functions: number of embedding frequencies
+            append_input: whether or not to concat the original
+                input to the harmonic embedding
+        Returns:
+            int: the length of the last dimension of the output tensor
+        """
+        return input_dims * (2 * n_harmonic_functions + int(append_input))
+
+    def get_output_dim(self, input_dims: int = 3) -> int:
+        """
+        Same as above. The default for input_dims is 3 for 3D applications
+        which use harmonic embedding for positional encoding,
+        so the input might be xyz.
+        """
+        return self.get_output_dim_static(
+            input_dims, len(self._frequencies), self.append_input
+        )
diff --git a/pytorch3d/pytorch3d/renderer/implicit/raymarching.py b/pytorch3d/pytorch3d/renderer/implicit/raymarching.py
new file mode 100644
index 0000000000000000000000000000000000000000..047229b24af839d0d4183baca82ff56b197569e0
--- /dev/null
+++ b/pytorch3d/pytorch3d/renderer/implicit/raymarching.py
@@ -0,0 +1,231 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import warnings
+from typing import Optional, Tuple, Union
+
+import torch
+
+
+class EmissionAbsorptionRaymarcher(torch.nn.Module):
+    """
+    Raymarch using the Emission-Absorption (EA) algorithm.
+
+    The algorithm independently renders each ray by analyzing density and
+    feature values sampled at (typically uniformly) spaced 3D locations along
+    each ray. The density values `rays_densities` are of shape
+    `(..., n_points_per_ray)`, their values should range between [0, 1], and
+    represent the opaqueness of each point (the higher the less transparent).
+    The feature values `rays_features` of shape
+    `(..., n_points_per_ray, feature_dim)` represent the content of the
+    point that is supposed to be rendered in case the given point is opaque
+    (i.e. its density -> 1.0).
+
+    EA first utilizes `rays_densities` to compute the absorption function
+    along each ray as follows::
+
+        absorption = cumprod(1 - rays_densities, dim=-1)
+
+    The value of absorption at position `absorption[..., k]` specifies
+    how much light has reached `k`-th point along a ray since starting
+    its trajectory at `k=0`-th point.
+
+    Each ray is then rendered into a tensor `features` of shape `(..., feature_dim)`
+    by taking a weighed combination of per-ray features `rays_features` as follows::
+
+        weights = absorption * rays_densities
+        features = (rays_features * weights).sum(dim=-2)
+
+    Where `weights` denote a function that has a strong peak around the location
+    of the first surface point that a given ray passes through.
+
+    Note that for a perfectly bounded volume (with a strictly binary density),
+    the `weights = cumprod(1 - rays_densities, dim=-1) * rays_densities`
+    function would yield 0 everywhere. In order to prevent this,
+    the result of the cumulative product is shifted `self.surface_thickness`
+    elements along the ray direction.
+    """
+
+    def __init__(self, surface_thickness: int = 1) -> None:
+        """
+        Args:
+            surface_thickness: Denotes the overlap between the absorption
+                function and the density function.
+        """
+        super().__init__()
+        self.surface_thickness = surface_thickness
+
+    def forward(
+        self,
+        rays_densities: torch.Tensor,
+        rays_features: torch.Tensor,
+        eps: float = 1e-10,
+        **kwargs,
+    ) -> torch.Tensor:
+        """
+        Args:
+            rays_densities: Per-ray density values represented with a tensor
+                of shape `(..., n_points_per_ray, 1)` whose values range in [0, 1].
+            rays_features: Per-ray feature values represented with a tensor
+                of shape `(..., n_points_per_ray, feature_dim)`.
+            eps: A lower bound added to `rays_densities` before computing
+                the absorption function (cumprod of `1-rays_densities` along
+                each ray). This prevents the cumprod to yield exact 0
+                which would inhibit any gradient-based learning.
+
+        Returns:
+            features_opacities: A tensor of shape `(..., feature_dim+1)`
+                that concatenates two tensors along the last dimension:
+                    1) features: A tensor of per-ray renders
+                        of shape `(..., feature_dim)`.
+                    2) opacities: A tensor of per-ray opacity values
+                        of shape `(..., 1)`. Its values range between [0, 1] and
+                        denote the total amount of light that has been absorbed
+                        for each ray. E.g. a value of 0 corresponds to the ray
+                        completely passing through a volume. Please refer to the
+                        `AbsorptionOnlyRaymarcher` documentation for the
+                        explanation of the algorithm that computes `opacities`.
+        """
+        _check_raymarcher_inputs(
+            rays_densities,
+            rays_features,
+            None,
+            z_can_be_none=True,
+            features_can_be_none=False,
+            density_1d=True,
+        )
+        _check_density_bounds(rays_densities)
+        rays_densities = rays_densities[..., 0]
+        absorption = _shifted_cumprod(
+            (1.0 + eps) - rays_densities, shift=self.surface_thickness
+        )
+        weights = rays_densities * absorption
+        features = (weights[..., None] * rays_features).sum(dim=-2)
+        opacities = 1.0 - torch.prod(1.0 - rays_densities, dim=-1, keepdim=True)
+
+        return torch.cat((features, opacities), dim=-1)
+
+
+class AbsorptionOnlyRaymarcher(torch.nn.Module):
+    """
+    Raymarch using the Absorption-Only (AO) algorithm.
+
+    The algorithm independently renders each ray by analyzing density and
+    feature values sampled at (typically uniformly) spaced 3D locations along
+    each ray. The density values `rays_densities` are of shape
+    `(..., n_points_per_ray, 1)`, their values should range between [0, 1], and
+    represent the opaqueness of each point (the higher the less transparent).
+    The algorithm only measures the total amount of light absorbed along each ray
+    and, besides outputting per-ray `opacity` values of shape `(...,)`,
+    does not produce any feature renderings.
+
+    The algorithm simply computes `total_transmission = prod(1 - rays_densities)`
+    of shape `(..., 1)` which, for each ray, measures the total amount of light
+    that passed through the volume.
+    It then returns `opacities = 1 - total_transmission`.
+    """
+
+    def __init__(self) -> None:
+        super().__init__()
+
+    def forward(
+        self, rays_densities: torch.Tensor, **kwargs
+    ) -> Union[None, torch.Tensor]:
+        """
+        Args:
+            rays_densities: Per-ray density values represented with a tensor
+                of shape `(..., n_points_per_ray)` whose values range in [0, 1].
+
+        Returns:
+            opacities: A tensor of per-ray opacity values of shape `(..., 1)`.
+                Its values range between [0, 1] and denote the total amount
+                of light that has been absorbed for each ray. E.g. a value
+                of 0 corresponds to the ray completely passing through a volume.
+        """
+
+        _check_raymarcher_inputs(
+            rays_densities,
+            None,
+            None,
+            features_can_be_none=True,
+            z_can_be_none=True,
+            density_1d=True,
+        )
+        rays_densities = rays_densities[..., 0]
+        _check_density_bounds(rays_densities)
+        total_transmission = torch.prod(1 - rays_densities, dim=-1, keepdim=True)
+        opacities = 1.0 - total_transmission
+        return opacities
+
+
+def _shifted_cumprod(x, shift: int = 1):
+    """
+    Computes `torch.cumprod(x, dim=-1)` and prepends `shift` number of
+    ones and removes `shift` trailing elements to/from the last dimension
+    of the result.
+    """
+    x_cumprod = torch.cumprod(x, dim=-1)
+    x_cumprod_shift = torch.cat(
+        [torch.ones_like(x_cumprod[..., :shift]), x_cumprod[..., :-shift]], dim=-1
+    )
+    return x_cumprod_shift
+
+
+def _check_density_bounds(
+    rays_densities: torch.Tensor, bounds: Tuple[float, float] = (0.0, 1.0)
+) -> None:
+    """
+    Checks whether the elements of `rays_densities` range within `bounds`.
+    If not issues a warning.
+    """
+    with torch.no_grad():
+        if (rays_densities.max() > bounds[1]) or (rays_densities.min() < bounds[0]):
+            warnings.warn(
+                "One or more elements of rays_densities are outside of valid"
+                + f"range {str(bounds)}"
+            )
+
+
+def _check_raymarcher_inputs(
+    rays_densities: torch.Tensor,
+    rays_features: Optional[torch.Tensor],
+    rays_z: Optional[torch.Tensor],
+    features_can_be_none: bool = False,
+    z_can_be_none: bool = False,
+    density_1d: bool = True,
+) -> None:
+    """
+    Checks the validity of the inputs to raymarching algorithms.
+    """
+    if not torch.is_tensor(rays_densities):
+        raise ValueError("rays_densities has to be an instance of torch.Tensor.")
+
+    if not z_can_be_none and not torch.is_tensor(rays_z):
+        raise ValueError("rays_z has to be an instance of torch.Tensor.")
+
+    if not features_can_be_none and not torch.is_tensor(rays_features):
+        raise ValueError("rays_features has to be an instance of torch.Tensor.")
+
+    if rays_densities.ndim < 1:
+        raise ValueError("rays_densities have to have at least one dimension.")
+
+    if density_1d and rays_densities.shape[-1] != 1:
+        raise ValueError(
+            "The size of the last dimension of rays_densities has to be one."
+            + f" Got shape {rays_densities.shape}."
+        )
+
+    rays_shape = rays_densities.shape[:-1]
+
+    # pyre-fixme[16]: `Optional` has no attribute `shape`.
+    if not z_can_be_none and rays_z.shape != rays_shape:
+        raise ValueError("rays_z have to be of the same shape as rays_densities.")
+
+    if not features_can_be_none and rays_features.shape[:-1] != rays_shape:
+        raise ValueError(
+            "The first to previous to last dimensions of rays_features"
+            " have to be the same as all dimensions of rays_densities."
+        )
diff --git a/pytorch3d/pytorch3d/renderer/implicit/raysampling.py b/pytorch3d/pytorch3d/renderer/implicit/raysampling.py
new file mode 100644
index 0000000000000000000000000000000000000000..c81178afe8fd869049d9b4306b0b3d915a5a01ad
--- /dev/null
+++ b/pytorch3d/pytorch3d/renderer/implicit/raysampling.py
@@ -0,0 +1,794 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import warnings
+from typing import Optional, Tuple, Union
+
+import torch
+from pytorch3d.common.compat import meshgrid_ij
+from pytorch3d.ops import padded_to_packed
+from pytorch3d.renderer.cameras import CamerasBase
+from pytorch3d.renderer.implicit.utils import HeterogeneousRayBundle, RayBundle
+from torch.nn import functional as F
+
+
+"""
+This file defines three raysampling techniques:
+    - MultinomialRaysampler which can be used to sample rays from pixels of an image grid
+    - NDCMultinomialRaysampler which can be used to sample rays from pixels of an image grid,
+        which follows the pytorch3d convention for image grid coordinates
+    - MonteCarloRaysampler which randomly selects real-valued locations in the image plane
+        and emits rays from them
+"""
+
+
+class MultinomialRaysampler(torch.nn.Module):
+    """
+    Samples a fixed number of points along rays which are regularly distributed
+    in a batch of rectangular image grids. Points along each ray
+    have uniformly-spaced z-coordinates between a predefined
+    minimum and maximum depth.
+
+    The raysampler first generates a 3D coordinate grid of the following form::
+
+           / min_x, min_y, max_depth -------------- / max_x, min_y, max_depth
+          /                                        /|
+         /                                        / |     ^
+        / min_depth                    min_depth /  |     |
+        min_x ----------------------------- max_x   |     | image
+        min_y                               min_y   |     | height
+        |                                       |   |     |
+        |                                       |   |     v
+        |                                       |   |
+        |                                       |   / max_x, max_y,     ^
+        |                                       |  /  max_depth        /
+        min_x                               max_y /                   / n_pts_per_ray
+        max_y ----------------------------- max_x/ min_depth         v
+                < --- image_width --- >
+
+    In order to generate ray points, `MultinomialRaysampler` takes each 3D point of
+    the grid (with coordinates `[x, y, depth]`) and unprojects it
+    with `cameras.unproject_points([x, y, depth])`, where `cameras` are an
+    additional input to the `forward` function.
+
+    Note that this is a generic implementation that can support any image grid
+    coordinate convention. For a raysampler which follows the PyTorch3D
+    coordinate conventions please refer to `NDCMultinomialRaysampler`.
+    As such, `NDCMultinomialRaysampler` is a special case of `MultinomialRaysampler`.
+
+    Attributes:
+        min_x: The leftmost x-coordinate of each ray's source pixel's center.
+        max_x: The rightmost x-coordinate of each ray's source pixel's center.
+        min_y: The topmost y-coordinate of each ray's source pixel's center.
+        max_y: The bottommost y-coordinate of each ray's source pixel's center.
+    """
+
+    def __init__(
+        self,
+        *,
+        min_x: float,
+        max_x: float,
+        min_y: float,
+        max_y: float,
+        image_width: int,
+        image_height: int,
+        n_pts_per_ray: int,
+        min_depth: float,
+        max_depth: float,
+        n_rays_per_image: Optional[int] = None,
+        n_rays_total: Optional[int] = None,
+        unit_directions: bool = False,
+        stratified_sampling: bool = False,
+    ) -> None:
+        """
+        Args:
+            min_x: The leftmost x-coordinate of each ray's source pixel's center.
+            max_x: The rightmost x-coordinate of each ray's source pixel's center.
+            min_y: The topmost y-coordinate of each ray's source pixel's center.
+            max_y: The bottommost y-coordinate of each ray's source pixel's center.
+            image_width: The horizontal size of the image grid.
+            image_height: The vertical size of the image grid.
+            n_pts_per_ray: The number of points sampled along each ray.
+            min_depth: The minimum depth of a ray-point.
+            max_depth: The maximum depth of a ray-point.
+            n_rays_per_image: If given, this amount of rays are sampled from the grid.
+                `n_rays_per_image` and `n_rays_total` cannot both be defined.
+            n_rays_total: How many rays in total to sample from the cameras provided. The result
+                is as if `n_rays_total_training` cameras were sampled with replacement from the
+                cameras provided and for every camera one ray was sampled. If set returns the
+                HeterogeneousRayBundle with batch_size=n_rays_total.
+                `n_rays_per_image` and `n_rays_total` cannot both be defined.
+            unit_directions: whether to normalize direction vectors in ray bundle.
+            stratified_sampling: if True, performs stratified random sampling
+                along the ray; otherwise takes ray points at deterministic offsets.
+        """
+        super().__init__()
+        self._n_pts_per_ray = n_pts_per_ray
+        self._min_depth = min_depth
+        self._max_depth = max_depth
+        self._n_rays_per_image = n_rays_per_image
+        self._n_rays_total = n_rays_total
+        self._unit_directions = unit_directions
+        self._stratified_sampling = stratified_sampling
+        self.min_x, self.max_x = min_x, max_x
+        self.min_y, self.max_y = min_y, max_y
+        # get the initial grid of image xy coords
+        y, x = meshgrid_ij(
+            torch.linspace(min_y, max_y, image_height, dtype=torch.float32),
+            torch.linspace(min_x, max_x, image_width, dtype=torch.float32),
+        )
+        _xy_grid = torch.stack([x, y], dim=-1)
+
+        self.register_buffer("_xy_grid", _xy_grid, persistent=False)
+
+    def forward(
+        self,
+        cameras: CamerasBase,
+        *,
+        mask: Optional[torch.Tensor] = None,
+        min_depth: Optional[float] = None,
+        max_depth: Optional[float] = None,
+        n_rays_per_image: Optional[int] = None,
+        n_pts_per_ray: Optional[int] = None,
+        stratified_sampling: Optional[bool] = None,
+        n_rays_total: Optional[int] = None,
+        **kwargs,
+    ) -> Union[RayBundle, HeterogeneousRayBundle]:
+        """
+        Args:
+            cameras: A batch of `batch_size` cameras from which the rays are emitted.
+            mask: if given, the rays are sampled from the mask. Should be of size
+                (batch_size, image_height, image_width).
+            min_depth: The minimum depth of a ray-point.
+            max_depth: The maximum depth of a ray-point.
+            n_rays_per_image: If given, this amount of rays are sampled from the grid.
+                `n_rays_per_image` and `n_rays_total` cannot both be defined.
+            n_pts_per_ray: The number of points sampled along each ray.
+            stratified_sampling: if set, overrides stratified_sampling provided
+                in __init__.
+            n_rays_total: How many rays in total to sample from the cameras provided. The result
+                is as if `n_rays_total_training` cameras were sampled with replacement from the
+                cameras provided and for every camera one ray was sampled. If set returns the
+                HeterogeneousRayBundle with batch_size=n_rays_total.
+                `n_rays_per_image` and `n_rays_total` cannot both be defined.
+        Returns:
+            A named tuple RayBundle or dataclass HeterogeneousRayBundle with the
+            following fields:
+
+            origins: A tensor of shape
+                `(batch_size, s1, s2, 3)`
+                denoting the locations of ray origins in the world coordinates.
+            directions: A tensor of shape
+                `(batch_size, s1, s2, 3)`
+                denoting the directions of each ray in the world coordinates.
+            lengths: A tensor of shape
+                `(batch_size, s1, s2, n_pts_per_ray)`
+                containing the z-coordinate (=depth) of each ray in world units.
+            xys: A tensor of shape
+                `(batch_size, s1, s2, 2)`
+                containing the 2D image coordinates of each ray or,
+                if mask is given, `(batch_size, n, 1, 2)`
+            Here `s1, s2` refer to spatial dimensions.
+            `(s1, s2)` refer to (highest priority first):
+                - `(1, 1)` if `n_rays_total` is provided, (batch_size=n_rays_total)
+                - `(n_rays_per_image, 1) if `n_rays_per_image` if provided,
+                - `(n, 1)` where n is the minimum cardinality of the mask
+                        in the batch if `mask` is provided
+                - `(image_height, image_width)` if nothing from above is satisfied
+
+            `HeterogeneousRayBundle` has additional members:
+                - camera_ids: tensor of shape (M,), where `M` is the number of unique sampled
+                    cameras. It represents unique ids of sampled cameras.
+                - camera_counts: tensor of shape (M,), where `M` is the number of unique sampled
+                    cameras. Represents how many times each camera from `camera_ids` was sampled
+
+            `HeterogeneousRayBundle` is returned if `n_rays_total` is provided else `RayBundle`
+            is returned.
+        """
+        n_rays_total = n_rays_total or self._n_rays_total
+        n_rays_per_image = n_rays_per_image or self._n_rays_per_image
+        if (n_rays_total is not None) and (n_rays_per_image is not None):
+            raise ValueError(
+                "`n_rays_total` and `n_rays_per_image` cannot both be defined."
+            )
+        if n_rays_total:
+            (
+                cameras,
+                mask,
+                camera_ids,  # unique ids of sampled cameras
+                camera_counts,  # number of times unique camera id was sampled
+                # `n_rays_per_image` is equal to the max number of times a simgle camera
+                # was sampled. We sample all cameras at `camera_ids` `n_rays_per_image` times
+                # and then discard the unneeded rays.
+                # pyre-ignore[9]
+                n_rays_per_image,
+            ) = _sample_cameras_and_masks(n_rays_total, cameras, mask)
+        else:
+            # pyre-ignore[9]
+            camera_ids: torch.LongTensor = torch.arange(len(cameras), dtype=torch.long)
+
+        batch_size = cameras.R.shape[0]
+        device = cameras.device
+
+        # expand the (H, W, 2) grid batch_size-times to (B, H, W, 2)
+        xy_grid = self._xy_grid.to(device).expand(batch_size, -1, -1, -1)
+
+        if mask is not None and n_rays_per_image is None:
+            # if num rays not given, sample according to the smallest mask
+            n_rays_per_image = (
+                n_rays_per_image or mask.sum(dim=(1, 2)).min().int().item()
+            )
+
+        if n_rays_per_image is not None:
+            if mask is not None:
+                assert mask.shape == xy_grid.shape[:3]
+                weights = mask.reshape(batch_size, -1)
+            else:
+                # it is probably more efficient to use torch.randperm
+                # for uniform weights but it is unlikely given that randperm
+                # is not batched and does not support partial permutation
+                _, width, height, _ = xy_grid.shape
+                weights = xy_grid.new_ones(batch_size, width * height)
+            # pyre-fixme[6]: For 2nd param expected `int` but got `Union[bool,
+            #  float, int]`.
+            rays_idx = _safe_multinomial(weights, n_rays_per_image)[..., None].expand(
+                -1, -1, 2
+            )
+
+            xy_grid = torch.gather(xy_grid.reshape(batch_size, -1, 2), 1, rays_idx)[
+                :, :, None
+            ]
+
+        min_depth = min_depth if min_depth is not None else self._min_depth
+        max_depth = max_depth if max_depth is not None else self._max_depth
+        n_pts_per_ray = (
+            n_pts_per_ray if n_pts_per_ray is not None else self._n_pts_per_ray
+        )
+        stratified_sampling = (
+            stratified_sampling
+            if stratified_sampling is not None
+            else self._stratified_sampling
+        )
+
+        ray_bundle = _xy_to_ray_bundle(
+            cameras,
+            xy_grid,
+            min_depth,
+            max_depth,
+            n_pts_per_ray,
+            self._unit_directions,
+            stratified_sampling,
+        )
+
+        return (
+            # pyre-ignore[61]
+            _pack_ray_bundle(ray_bundle, camera_ids, camera_counts)
+            if n_rays_total
+            else ray_bundle
+        )
+
+
+class NDCMultinomialRaysampler(MultinomialRaysampler):
+    """
+    Samples a fixed number of points along rays which are regularly distributed
+    in a batch of rectangular image grids. Points along each ray
+    have uniformly-spaced z-coordinates between a predefined minimum and maximum depth.
+
+    `NDCMultinomialRaysampler` follows the screen conventions of the `Meshes` and `Pointclouds`
+    renderers. I.e. the pixel coordinates are in [-1, 1]x[-u, u] or [-u, u]x[-1, 1]
+    where u > 1 is the aspect ratio of the image.
+
+    For the description of arguments, see the documentation to MultinomialRaysampler.
+    """
+
+    def __init__(
+        self,
+        *,
+        image_width: int,
+        image_height: int,
+        n_pts_per_ray: int,
+        min_depth: float,
+        max_depth: float,
+        n_rays_per_image: Optional[int] = None,
+        n_rays_total: Optional[int] = None,
+        unit_directions: bool = False,
+        stratified_sampling: bool = False,
+    ) -> None:
+        if image_width >= image_height:
+            range_x = image_width / image_height
+            range_y = 1.0
+        else:
+            range_x = 1.0
+            range_y = image_height / image_width
+
+        half_pix_width = range_x / image_width
+        half_pix_height = range_y / image_height
+        super().__init__(
+            min_x=range_x - half_pix_width,
+            max_x=-range_x + half_pix_width,
+            min_y=range_y - half_pix_height,
+            max_y=-range_y + half_pix_height,
+            image_width=image_width,
+            image_height=image_height,
+            n_pts_per_ray=n_pts_per_ray,
+            min_depth=min_depth,
+            max_depth=max_depth,
+            n_rays_per_image=n_rays_per_image,
+            n_rays_total=n_rays_total,
+            unit_directions=unit_directions,
+            stratified_sampling=stratified_sampling,
+        )
+
+
+class MonteCarloRaysampler(torch.nn.Module):
+    """
+    Samples a fixed number of pixels within denoted xy bounds uniformly at random.
+    For each pixel, a fixed number of points is sampled along its ray at uniformly-spaced
+    z-coordinates such that the z-coordinates range between a predefined minimum
+    and maximum depth.
+
+    For practical purposes, this is similar to MultinomialRaysampler without a mask,
+    however sampling at real-valued locations bypassing replacement checks may be faster.
+    """
+
+    def __init__(
+        self,
+        min_x: float,
+        max_x: float,
+        min_y: float,
+        max_y: float,
+        n_rays_per_image: int,
+        n_pts_per_ray: int,
+        min_depth: float,
+        max_depth: float,
+        *,
+        n_rays_total: Optional[int] = None,
+        unit_directions: bool = False,
+        stratified_sampling: bool = False,
+    ) -> None:
+        """
+        Args:
+            min_x: The smallest x-coordinate of each ray's source pixel.
+            max_x: The largest x-coordinate of each ray's source pixel.
+            min_y: The smallest y-coordinate of each ray's source pixel.
+            max_y: The largest y-coordinate of each ray's source pixel.
+            n_rays_per_image: The number of rays randomly sampled in each camera.
+                `n_rays_per_image` and `n_rays_total` cannot both be defined.
+            n_pts_per_ray: The number of points sampled along each ray.
+            min_depth: The minimum depth of each ray-point.
+            max_depth: The maximum depth of each ray-point.
+            n_rays_total: How many rays in total to sample from the cameras provided. The result
+                is as if `n_rays_total_training` cameras were sampled with replacement from the
+                cameras provided and for every camera one ray was sampled. If set returns the
+                HeterogeneousRayBundle with batch_size=n_rays_total.
+                `n_rays_per_image` and `n_rays_total` cannot both be defined.
+            unit_directions: whether to normalize direction vectors in ray bundle.
+            stratified_sampling: if True, performs stratified sampling in n_pts_per_ray
+                bins for each ray; otherwise takes n_pts_per_ray deterministic points
+                on each ray with uniform offsets.
+        """
+        super().__init__()
+        self._min_x = min_x
+        self._max_x = max_x
+        self._min_y = min_y
+        self._max_y = max_y
+        self._n_rays_per_image = n_rays_per_image
+        self._n_pts_per_ray = n_pts_per_ray
+        self._min_depth = min_depth
+        self._max_depth = max_depth
+        self._n_rays_total = n_rays_total
+        self._unit_directions = unit_directions
+        self._stratified_sampling = stratified_sampling
+
+    def forward(
+        self,
+        cameras: CamerasBase,
+        *,
+        stratified_sampling: Optional[bool] = None,
+        **kwargs,
+    ) -> Union[RayBundle, HeterogeneousRayBundle]:
+        """
+        Args:
+            cameras: A batch of `batch_size` cameras from which the rays are emitted.
+            stratified_sampling: if set, overrides stratified_sampling provided
+                in __init__.
+        Returns:
+            A named tuple `RayBundle` or dataclass `HeterogeneousRayBundle` with the
+            following fields:
+
+            origins: A tensor of shape
+                `(batch_size, n_rays_per_image, 3)`
+                denoting the locations of ray origins in the world coordinates.
+            directions: A tensor of shape
+                `(batch_size, n_rays_per_image, 3)`
+                denoting the directions of each ray in the world coordinates.
+            lengths: A tensor of shape
+                `(batch_size, n_rays_per_image, n_pts_per_ray)`
+                containing the z-coordinate (=depth) of each ray in world units.
+            xys: A tensor of shape
+                `(batch_size, n_rays_per_image, 2)`
+                containing the 2D image coordinates of each ray.
+            If `n_rays_total` is provided `batch_size=n_rays_total`and
+            `n_rays_per_image=1` and `HeterogeneousRayBundle` is returned else `RayBundle`
+            is returned.
+
+            `HeterogeneousRayBundle` has additional members:
+                - camera_ids: tensor of shape (M,), where `M` is the number of unique sampled
+                    cameras. It represents unique ids of sampled cameras.
+                - camera_counts: tensor of shape (M,), where `M` is the number of unique sampled
+                    cameras. Represents how many times each camera from `camera_ids` was sampled
+        """
+        if (
+            sum(x is not None for x in [self._n_rays_total, self._n_rays_per_image])
+            != 1
+        ):
+            raise ValueError(
+                "Exactly one of `self.n_rays_total` and `self.n_rays_per_image` "
+                "must be given."
+            )
+
+        if self._n_rays_total:
+            (
+                cameras,
+                _,
+                camera_ids,
+                camera_counts,
+                n_rays_per_image,
+            ) = _sample_cameras_and_masks(self._n_rays_total, cameras, None)
+        else:
+            # pyre-ignore[9]
+            camera_ids: torch.LongTensor = torch.arange(len(cameras), dtype=torch.long)
+            n_rays_per_image = self._n_rays_per_image
+
+        batch_size = cameras.R.shape[0]
+
+        device = cameras.device
+
+        # get the initial grid of image xy coords
+        # of shape (batch_size, n_rays_per_image, 2)
+        rays_xy = torch.cat(
+            [
+                torch.rand(
+                    size=(batch_size, n_rays_per_image, 1),
+                    dtype=torch.float32,
+                    device=device,
+                )
+                * (high - low)
+                + low
+                for low, high in (
+                    (self._min_x, self._max_x),
+                    (self._min_y, self._max_y),
+                )
+            ],
+            dim=2,
+        )
+
+        stratified_sampling = (
+            stratified_sampling
+            if stratified_sampling is not None
+            else self._stratified_sampling
+        )
+
+        ray_bundle = _xy_to_ray_bundle(
+            cameras,
+            rays_xy,
+            self._min_depth,
+            self._max_depth,
+            self._n_pts_per_ray,
+            self._unit_directions,
+            stratified_sampling,
+        )
+
+        return (
+            # pyre-ignore[61]
+            _pack_ray_bundle(ray_bundle, camera_ids, camera_counts)
+            if self._n_rays_total
+            else ray_bundle
+        )
+
+
+# Settings for backwards compatibility
+def GridRaysampler(
+    min_x: float,
+    max_x: float,
+    min_y: float,
+    max_y: float,
+    image_width: int,
+    image_height: int,
+    n_pts_per_ray: int,
+    min_depth: float,
+    max_depth: float,
+) -> "MultinomialRaysampler":
+    """
+    GridRaysampler has been DEPRECATED. Use MultinomialRaysampler instead.
+    Preserving GridRaysampler for backward compatibility.
+    """
+
+    warnings.warn(
+        """GridRaysampler is deprecated,
+        Use MultinomialRaysampler instead.
+        GridRaysampler will be removed in future releases.""",
+        PendingDeprecationWarning,
+    )
+
+    return MultinomialRaysampler(
+        min_x=min_x,
+        max_x=max_x,
+        min_y=min_y,
+        max_y=max_y,
+        image_width=image_width,
+        image_height=image_height,
+        n_pts_per_ray=n_pts_per_ray,
+        min_depth=min_depth,
+        max_depth=max_depth,
+    )
+
+
+# Settings for backwards compatibility
+def NDCGridRaysampler(
+    image_width: int,
+    image_height: int,
+    n_pts_per_ray: int,
+    min_depth: float,
+    max_depth: float,
+) -> "NDCMultinomialRaysampler":
+    """
+    NDCGridRaysampler has been DEPRECATED. Use NDCMultinomialRaysampler instead.
+    Preserving NDCGridRaysampler for backward compatibility.
+    """
+
+    warnings.warn(
+        """NDCGridRaysampler is deprecated,
+        Use NDCMultinomialRaysampler instead.
+        NDCGridRaysampler will be removed in future releases.""",
+        PendingDeprecationWarning,
+    )
+
+    return NDCMultinomialRaysampler(
+        image_width=image_width,
+        image_height=image_height,
+        n_pts_per_ray=n_pts_per_ray,
+        min_depth=min_depth,
+        max_depth=max_depth,
+    )
+
+
+def _safe_multinomial(input: torch.Tensor, num_samples: int) -> torch.Tensor:
+    """
+    Wrapper around torch.multinomial that attempts sampling without replacement
+    when possible, otherwise resorts to sampling with replacement.
+
+    Args:
+        input: tensor of shape [B, n] containing non-negative values;
+                rows are interpreted as unnormalized event probabilities
+                in categorical distributions.
+        num_samples: number of samples to take.
+
+    Returns:
+        LongTensor of shape [B, num_samples] containing
+        values from {0, ..., n - 1} where the elements [i, :] of row i make
+            (1) if there are num_samples or more non-zero values in input[i],
+                a random subset of the indices of those values, with
+                probabilities proportional to the values in input[i, :].
+
+            (2) if not, a random sample with replacement of the indices of
+                those values, with probabilities proportional to them.
+                This sample might not contain all the indices of the
+                non-zero values.
+        Behavior undetermined if there are no non-zero values in a whole row
+        or if there are negative values.
+    """
+    try:
+        res = torch.multinomial(input, num_samples, replacement=False)
+    except RuntimeError:
+        # this is probably rare, so we don't mind sampling twice
+        res = torch.multinomial(input, num_samples, replacement=True)
+        no_repl = (input > 0.0).sum(dim=-1) >= num_samples
+        res[no_repl] = torch.multinomial(input[no_repl], num_samples, replacement=False)
+        return res
+
+    # in some versions of Pytorch, zero probabilty samples can be drawn without an error
+    # due to this bug: https://github.com/pytorch/pytorch/issues/50034. Handle this case:
+    repl = (input > 0.0).sum(dim=-1) < num_samples
+    if repl.any():
+        res[repl] = torch.multinomial(input[repl], num_samples, replacement=True)
+
+    return res
+
+
+def _xy_to_ray_bundle(
+    cameras: CamerasBase,
+    xy_grid: torch.Tensor,
+    min_depth: float,
+    max_depth: float,
+    n_pts_per_ray: int,
+    unit_directions: bool,
+    stratified_sampling: bool = False,
+) -> RayBundle:
+    """
+    Extends the `xy_grid` input of shape `(batch_size, ..., 2)` to rays.
+    This adds to each xy location in the grid a vector of `n_pts_per_ray` depths
+    uniformly spaced between `min_depth` and `max_depth`.
+
+    The extended grid is then unprojected with `cameras` to yield
+    ray origins, directions and depths.
+
+    Args:
+        cameras: cameras object representing a batch of cameras.
+        xy_grid: torch.tensor grid of image xy coords.
+        min_depth: The minimum depth of each ray-point.
+        max_depth: The maximum depth of each ray-point.
+        n_pts_per_ray: The number of points sampled along each ray.
+        unit_directions: whether to normalize direction vectors in ray bundle.
+        stratified_sampling: if True, performs stratified sampling in n_pts_per_ray
+            bins for each ray; otherwise takes n_pts_per_ray deterministic points
+            on each ray with uniform offsets.
+    """
+    batch_size = xy_grid.shape[0]
+    spatial_size = xy_grid.shape[1:-1]
+    n_rays_per_image = spatial_size.numel()
+
+    # ray z-coords
+    rays_zs = xy_grid.new_empty((0,))
+    if n_pts_per_ray > 0:
+        depths = torch.linspace(
+            min_depth,
+            max_depth,
+            n_pts_per_ray,
+            dtype=xy_grid.dtype,
+            device=xy_grid.device,
+        )
+        rays_zs = depths[None, None].expand(batch_size, n_rays_per_image, n_pts_per_ray)
+
+        if stratified_sampling:
+            rays_zs = _jiggle_within_stratas(rays_zs)
+
+    # make two sets of points at a constant depth=1 and 2
+    to_unproject = torch.cat(
+        (
+            xy_grid.view(batch_size, 1, n_rays_per_image, 2)
+            .expand(batch_size, 2, n_rays_per_image, 2)
+            .reshape(batch_size, n_rays_per_image * 2, 2),
+            torch.cat(
+                (
+                    xy_grid.new_ones(batch_size, n_rays_per_image, 1),
+                    2.0 * xy_grid.new_ones(batch_size, n_rays_per_image, 1),
+                ),
+                dim=1,
+            ),
+        ),
+        dim=-1,
+    )
+
+    # unproject the points
+    unprojected = cameras.unproject_points(to_unproject, from_ndc=True)
+
+    # split the two planes back
+    rays_plane_1_world = unprojected[:, :n_rays_per_image]
+    rays_plane_2_world = unprojected[:, n_rays_per_image:]
+
+    # directions are the differences between the two planes of points
+    rays_directions_world = rays_plane_2_world - rays_plane_1_world
+
+    # origins are given by subtracting the ray directions from the first plane
+    rays_origins_world = rays_plane_1_world - rays_directions_world
+
+    if unit_directions:
+        rays_directions_world = F.normalize(rays_directions_world, dim=-1)
+
+    return RayBundle(
+        rays_origins_world.view(batch_size, *spatial_size, 3),
+        rays_directions_world.view(batch_size, *spatial_size, 3),
+        rays_zs.view(batch_size, *spatial_size, n_pts_per_ray),
+        xy_grid,
+    )
+
+
+def _jiggle_within_stratas(bin_centers: torch.Tensor) -> torch.Tensor:
+    """
+    Performs sampling of 1 point per bin given the bin centers.
+
+    More specifically, it replaces each point's value `z`
+    with a sample from a uniform random distribution on
+    `[z - delta_-, z + delta_+]`, where `delta_-` is half of the difference
+    between `z` and the previous point, and `delta_+` is half of the difference
+    between the next point and `z`. For the first and last items, the
+    corresponding boundary deltas are assumed zero.
+
+    Args:
+        `bin_centers`: The input points of size (..., N); the result is broadcast
+            along all but the last dimension (the rows). Each row should be
+            sorted in ascending order.
+
+    Returns:
+        a tensor of size (..., N) with the locations jiggled within stratas/bins.
+    """
+    # Get intervals between bin centers.
+    mids = 0.5 * (bin_centers[..., 1:] + bin_centers[..., :-1])
+    upper = torch.cat((mids, bin_centers[..., -1:]), dim=-1)
+    lower = torch.cat((bin_centers[..., :1], mids), dim=-1)
+    # Samples in those intervals.
+    jiggled = lower + (upper - lower) * torch.rand_like(lower)
+    return jiggled
+
+
+def _sample_cameras_and_masks(
+    n_samples: int, cameras: CamerasBase, mask: Optional[torch.Tensor] = None
+) -> Tuple[
+    CamerasBase,
+    Optional[torch.Tensor],
+    torch.LongTensor,
+    torch.LongTensor,
+    torch.LongTensor,
+]:
+    """
+    Samples n_rays_total cameras and masks and returns them in a form
+    (camera_idx, count), where count represents number of times the same camera
+    has been sampled.
+
+    Args:
+        n_samples: how many camera and mask pairs to sample
+        cameras: A batch of `batch_size` cameras from which the rays are emitted.
+        mask: Optional. Should be of size (batch_size, image_height, image_width).
+    Returns:
+        tuple of a form (sampled_cameras, sampled_masks, unique_sampled_camera_ids,
+            number_of_times_each_sampled_camera_has_been_sampled,
+            max_number_of_times_camera_has_been_sampled,
+            )
+    """
+    sampled_ids = torch.randint(
+        0,
+        len(cameras),
+        size=(n_samples,),
+        dtype=torch.long,
+    )
+    unique_ids, counts = torch.unique(sampled_ids, return_counts=True)
+    # pyre-ignore[7]
+    return (
+        cameras[unique_ids],
+        mask[unique_ids] if mask is not None else None,
+        unique_ids,
+        counts,
+        torch.max(counts),
+    )
+
+
+# TODO: this function can be unified with ImplicitronRayBundle.get_padded_xys
+def _pack_ray_bundle(
+    ray_bundle: RayBundle, camera_ids: torch.LongTensor, camera_counts: torch.LongTensor
+) -> HeterogeneousRayBundle:
+    """
+    Pack the raybundle from [n_cameras, max(rays_per_camera), ...] to
+        [total_num_rays, 1, ...]
+
+    Args:
+        ray_bundle: A ray_bundle to pack
+        camera_ids: Unique ids of cameras that were sampled
+        camera_counts: how many of which camera to pack, each count coresponds to
+            one 'row' of the ray_bundle and says how many rays wll be taken
+            from it and packed.
+    Returns:
+        HeterogeneousRayBundle where batch_size=sum(camera_counts) and n_rays_per_image=1
+    """
+    # pyre-ignore[9]
+    camera_counts = camera_counts.to(ray_bundle.origins.device)
+    cumsum = torch.cumsum(camera_counts, dim=0, dtype=torch.long)
+    # pyre-ignore[9]
+    first_idxs: torch.LongTensor = torch.cat(
+        (camera_counts.new_zeros((1,), dtype=torch.long), cumsum[:-1])
+    )
+    num_inputs = int(camera_counts.sum())
+
+    return HeterogeneousRayBundle(
+        origins=padded_to_packed(ray_bundle.origins, first_idxs, num_inputs)[:, None],
+        directions=padded_to_packed(ray_bundle.directions, first_idxs, num_inputs)[
+            :, None
+        ],
+        lengths=padded_to_packed(ray_bundle.lengths, first_idxs, num_inputs)[:, None],
+        xys=padded_to_packed(ray_bundle.xys, first_idxs, num_inputs)[:, None],
+        camera_ids=camera_ids,
+        camera_counts=camera_counts,
+    )
diff --git a/pytorch3d/pytorch3d/renderer/implicit/renderer.py b/pytorch3d/pytorch3d/renderer/implicit/renderer.py
new file mode 100644
index 0000000000000000000000000000000000000000..ffd7578e4d31fb938e9fd30bf1bc96344155c909
--- /dev/null
+++ b/pytorch3d/pytorch3d/renderer/implicit/renderer.py
@@ -0,0 +1,413 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Callable, Tuple, Union
+
+import torch
+
+from ...ops.utils import eyes
+from ...structures import Volumes
+from ...transforms import Transform3d
+from ..cameras import CamerasBase
+from .raysampling import HeterogeneousRayBundle, RayBundle
+from .utils import _validate_ray_bundle_variables, ray_bundle_variables_to_ray_points
+
+
+# The implicit renderer class should be initialized with a
+# function for raysampling and a function for raymarching.
+
+# During the forward pass:
+# 1) The raysampler:
+#     - samples rays from input cameras
+#     - transforms the rays to world coordinates
+# 2) The volumetric_function (which is a callable argument of the forward pass)
+#    evaluates ray_densities and ray_features at the sampled ray-points.
+# 3) The raymarcher takes ray_densities and ray_features and uses a raymarching
+#    algorithm to render each ray.
+
+
+class ImplicitRenderer(torch.nn.Module):
+    """
+    A class for rendering a batch of implicit surfaces. The class should
+    be initialized with a raysampler and raymarcher class which both have
+    to be a `Callable`.
+
+    VOLUMETRIC_FUNCTION
+
+    The `forward` function of the renderer accepts as input the rendering cameras
+    as well as the `volumetric_function` `Callable`, which defines a field of opacity
+    and feature vectors over the 3D domain of the scene.
+
+    A standard `volumetric_function` has the following signature::
+
+        def volumetric_function(
+            ray_bundle: Union[RayBundle, HeterogeneousRayBundle],
+            **kwargs,
+        ) -> Tuple[torch.Tensor, torch.Tensor]
+
+    With the following arguments:
+        `ray_bundle`: A RayBundle or HeterogeneousRayBundle object
+            containing the following variables:
+
+            `origins`: A tensor of shape `(minibatch, ..., 3)` denoting
+                the origins of the rendering rays.
+            `directions`: A tensor of shape `(minibatch, ..., 3)`
+                containing the direction vectors of rendering rays.
+            `lengths`: A tensor of shape
+                `(minibatch, ..., num_points_per_ray)`containing the
+                lengths at which the ray points are sampled.
+            `xys`: A tensor of shape
+                `(minibatch, ..., 2)` containing the
+                xy locations of each ray's pixel in the screen space.
+    Calling `volumetric_function` then returns the following:
+        `rays_densities`: A tensor of shape
+            `(minibatch, ..., num_points_per_ray, opacity_dim)` containing
+            the an opacity vector for each ray point.
+        `rays_features`: A tensor of shape
+            `(minibatch, ..., num_points_per_ray, feature_dim)` containing
+            the an feature vector for each ray point.
+
+    Note that, in order to increase flexibility of the API, we allow multiple
+    other arguments to enter the volumetric function via additional
+    (optional) keyword arguments `**kwargs`.
+    A typical use-case is passing a `CamerasBase` object as an additional
+    keyword argument, which can allow the volumetric function to adjust its
+    outputs based on the directions of the projection rays.
+
+    Example:
+        A simple volumetric function of a 0-centered
+        RGB sphere with a unit diameter is defined as follows::
+
+            def volumetric_function(
+                ray_bundle: Union[RayBundle, HeterogeneousRayBundle],
+                **kwargs,
+            ) -> Tuple[torch.Tensor, torch.Tensor]:
+
+                # first convert the ray origins, directions and lengths
+                # to 3D ray point locations in world coords
+                rays_points_world = ray_bundle_to_ray_points(ray_bundle)
+
+                # set the densities as an inverse sigmoid of the
+                # ray point distance from the sphere centroid
+                rays_densities = torch.sigmoid(
+                    -100.0 * rays_points_world.norm(dim=-1, keepdim=True)
+                )
+
+                # set the ray features to RGB colors proportional
+                # to the 3D location of the projection of ray points
+                # on the sphere surface
+                rays_features = torch.nn.functional.normalize(
+                    rays_points_world, dim=-1
+                ) * 0.5 + 0.5
+
+                return rays_densities, rays_features
+
+    """
+
+    def __init__(self, raysampler: Callable, raymarcher: Callable) -> None:
+        """
+        Args:
+            raysampler: A `Callable` that takes as input scene cameras
+                (an instance of `CamerasBase`) and returns a
+                RayBundle or HeterogeneousRayBundle, that
+                describes the rays emitted from the cameras.
+            raymarcher: A `Callable` that receives the response of the
+                `volumetric_function` (an input to `self.forward`) evaluated
+                along the sampled rays, and renders the rays with a
+                ray-marching algorithm.
+        """
+        super().__init__()
+
+        if not callable(raysampler):
+            raise ValueError('"raysampler" has to be a "Callable" object.')
+        if not callable(raymarcher):
+            raise ValueError('"raymarcher" has to be a "Callable" object.')
+
+        self.raysampler = raysampler
+        self.raymarcher = raymarcher
+
+    def forward(
+        self, cameras: CamerasBase, volumetric_function: Callable, **kwargs
+    ) -> Tuple[torch.Tensor, Union[RayBundle, HeterogeneousRayBundle]]:
+        """
+        Render a batch of images using a volumetric function
+        represented as a callable (e.g. a Pytorch module).
+
+        Args:
+            cameras: A batch of cameras that render the scene. A `self.raysampler`
+                takes the cameras as input and samples rays that pass through the
+                domain of the volumetric function.
+            volumetric_function: A `Callable` that accepts the parametrizations
+                of the rendering rays and returns the densities and features
+                at the respective 3D of the rendering rays. Please refer to
+                the main class documentation for details.
+
+        Returns:
+            images: A tensor of shape `(minibatch, ..., feature_dim + opacity_dim)`
+                containing the result of the rendering.
+            ray_bundle: A `Union[RayBundle, HeterogeneousRayBundle]` containing
+                the parametrizations of the sampled rendering rays.
+        """
+
+        if not callable(volumetric_function):
+            raise ValueError('"volumetric_function" has to be a "Callable" object.')
+
+        # first call the ray sampler that returns the RayBundle or HeterogeneousRayBundle
+        # parametrizing the rendering rays.
+        ray_bundle = self.raysampler(
+            cameras=cameras, volumetric_function=volumetric_function, **kwargs
+        )
+        # ray_bundle.origins - minibatch x ... x 3
+        # ray_bundle.directions - minibatch x ... x 3
+        # ray_bundle.lengths - minibatch x ... x n_pts_per_ray
+        # ray_bundle.xys - minibatch x ... x 2
+
+        # given sampled rays, call the volumetric function that
+        # evaluates the densities and features at the locations of the
+        # ray points
+        # pyre-fixme[23]: Unable to unpack `object` into 2 values.
+        rays_densities, rays_features = volumetric_function(
+            ray_bundle=ray_bundle, cameras=cameras, **kwargs
+        )
+        # ray_densities - minibatch x ... x n_pts_per_ray x density_dim
+        # ray_features - minibatch x ... x n_pts_per_ray x feature_dim
+
+        # finally, march along the sampled rays to obtain the renders
+        images = self.raymarcher(
+            rays_densities=rays_densities,
+            rays_features=rays_features,
+            ray_bundle=ray_bundle,
+            **kwargs,
+        )
+        # images - minibatch x ... x (feature_dim + opacity_dim)
+
+        return images, ray_bundle
+
+
+# The volume renderer class should be initialized with a
+# function for raysampling and a function for raymarching.
+
+# During the forward pass:
+# 1) The raysampler:
+#     - samples rays from input cameras
+#     - transforms the rays to world coordinates
+# 2) The scene volumes (which are an argument of the forward function)
+#    are then sampled at the locations of the ray-points to generate
+#    ray_densities and ray_features.
+# 3) The raymarcher takes ray_densities and ray_features and uses a raymarching
+#    algorithm to render each ray.
+
+
+class VolumeRenderer(torch.nn.Module):
+    """
+    A class for rendering a batch of Volumes. The class should
+    be initialized with a raysampler and a raymarcher class which both have
+    to be a `Callable`.
+    """
+
+    def __init__(
+        self, raysampler: Callable, raymarcher: Callable, sample_mode: str = "bilinear"
+    ) -> None:
+        """
+        Args:
+            raysampler: A `Callable` that takes as input scene cameras
+                (an instance of `CamerasBase`) and returns a
+                `Union[RayBundle, HeterogeneousRayBundle],` that
+                describes the rays emitted from the cameras.
+            raymarcher: A `Callable` that receives the `volumes`
+                (an instance of `Volumes` input to `self.forward`)
+                sampled at the ray-points, and renders the rays with a
+                ray-marching algorithm.
+            sample_mode: Defines the algorithm used to sample the volumetric
+                voxel grid. Can be either "bilinear" or "nearest".
+        """
+        super().__init__()
+
+        self.renderer = ImplicitRenderer(raysampler, raymarcher)
+        self._sample_mode = sample_mode
+
+    def forward(
+        self, cameras: CamerasBase, volumes: Volumes, **kwargs
+    ) -> Tuple[torch.Tensor, Union[RayBundle, HeterogeneousRayBundle]]:
+        """
+        Render a batch of images using raymarching over rays cast through
+        input `Volumes`.
+
+        Args:
+            cameras: A batch of cameras that render the scene. A `self.raysampler`
+                takes the cameras as input and samples rays that pass through the
+                domain of the volumetric function.
+            volumes: An instance of the `Volumes` class representing a
+                batch of volumes that are being rendered.
+
+        Returns:
+            images: A tensor of shape `(minibatch, ..., (feature_dim + opacity_dim)`
+                containing the result of the rendering.
+            ray_bundle: A `RayBundle` or `HeterogeneousRayBundle` containing the
+                parametrizations of the sampled rendering rays.
+        """
+        volumetric_function = VolumeSampler(volumes, sample_mode=self._sample_mode)
+        return self.renderer(
+            cameras=cameras, volumetric_function=volumetric_function, **kwargs
+        )
+
+
+class VolumeSampler(torch.nn.Module):
+    """
+    A module to sample a batch of volumes `Volumes`
+    at 3D points sampled along projection rays.
+    """
+
+    def __init__(
+        self,
+        volumes: Volumes,
+        sample_mode: str = "bilinear",
+        padding_mode: str = "zeros",
+    ) -> None:
+        """
+        Args:
+            volumes: An instance of the `Volumes` class representing a
+                batch of volumes that are being rendered.
+            sample_mode: Defines the algorithm used to sample the volumetric
+                voxel grid. Can be either "bilinear" or "nearest".
+            padding_mode: How to handle values outside of the volume.
+                One of: zeros, border, reflection
+                See torch.nn.functional.grid_sample for more information.
+        """
+        super().__init__()
+        if not isinstance(volumes, Volumes):
+            raise ValueError("'volumes' have to be an instance of the 'Volumes' class.")
+        self._volumes = volumes
+        self._sample_mode = sample_mode
+        self._padding_mode = padding_mode
+
+    def _get_ray_directions_transform(self):
+        """
+        Compose the ray-directions transform by removing the translation component
+        from the volume global-to-local coords transform.
+        """
+        world2local = self._volumes.get_world_to_local_coords_transform().get_matrix()
+        directions_transform_matrix = eyes(
+            4,
+            N=world2local.shape[0],
+            device=world2local.device,
+            dtype=world2local.dtype,
+        )
+        directions_transform_matrix[:, :3, :3] = world2local[:, :3, :3]
+        directions_transform = Transform3d(matrix=directions_transform_matrix)
+        return directions_transform
+
+    def forward(
+        self, ray_bundle: Union[RayBundle, HeterogeneousRayBundle], **kwargs
+    ) -> Tuple[torch.Tensor, torch.Tensor]:
+        """
+        Given an input ray parametrization, the forward function samples
+        `self._volumes` at the respective 3D ray-points.
+        Can also accept ImplicitronRayBundle as argument for ray_bundle.
+
+        Args:
+            ray_bundle: A RayBundle or HeterogeneousRayBundle object with the following fields:
+                rays_origins_world: A tensor of shape `(minibatch, ..., 3)` denoting the
+                    origins of the sampling rays in world coords.
+                rays_directions_world: A tensor of shape `(minibatch, ..., 3)`
+                    containing the direction vectors of sampling rays in world coords.
+                rays_lengths: A tensor of shape `(minibatch, ..., num_points_per_ray)`
+                    containing the lengths at which the rays are sampled.
+
+        Returns:
+            rays_densities: A tensor of shape
+                `(minibatch, ..., num_points_per_ray, opacity_dim)` containing the
+                density vectors sampled from the volume at the locations of
+                the ray points.
+            rays_features: A tensor of shape
+                `(minibatch, ..., num_points_per_ray, feature_dim)` containing the
+                feature vectors sampled from the volume at the locations of
+                the ray points.
+        """
+
+        # take out the interesting parts of ray_bundle
+        rays_origins_world = ray_bundle.origins
+        rays_directions_world = ray_bundle.directions
+        rays_lengths = ray_bundle.lengths
+
+        # validate the inputs
+        _validate_ray_bundle_variables(
+            rays_origins_world, rays_directions_world, rays_lengths
+        )
+        if self._volumes.densities().shape[0] != rays_origins_world.shape[0]:
+            raise ValueError("Input volumes have to have the same batch size as rays.")
+
+        #########################################################
+        # 1) convert the origins/directions to the local coords #
+        #########################################################
+
+        # origins are mapped with the world_to_local transform of the volumes
+        rays_origins_local = self._volumes.world_to_local_coords(rays_origins_world)
+
+        # obtain the Transform3d object that transforms ray directions to local coords
+        directions_transform = self._get_ray_directions_transform()
+
+        # transform the directions to the local coords
+        rays_directions_local = directions_transform.transform_points(
+            rays_directions_world.view(rays_lengths.shape[0], -1, 3)
+        ).view(rays_directions_world.shape)
+
+        ############################
+        # 2) obtain the ray points #
+        ############################
+
+        # this op produces a fairly big tensor (minibatch, ..., n_samples_per_ray, 3)
+        rays_points_local = ray_bundle_variables_to_ray_points(
+            rays_origins_local, rays_directions_local, rays_lengths
+        )
+
+        ########################
+        # 3) sample the volume #
+        ########################
+
+        # generate the tensor for sampling
+        volumes_densities = self._volumes.densities()
+        dim_density = volumes_densities.shape[1]
+        volumes_features = self._volumes.features()
+
+        # reshape to a size which grid_sample likes
+        rays_points_local_flat = rays_points_local.view(
+            rays_points_local.shape[0], -1, 1, 1, 3
+        )
+
+        # run the grid sampler on the volumes densities
+        rays_densities = torch.nn.functional.grid_sample(
+            volumes_densities,
+            rays_points_local_flat,
+            mode=self._sample_mode,
+            padding_mode=self._padding_mode,
+            align_corners=self._volumes.get_align_corners(),
+        )
+
+        # permute the dimensions & reshape densities after sampling
+        rays_densities = rays_densities.permute(0, 2, 3, 4, 1).view(
+            *rays_points_local.shape[:-1], volumes_densities.shape[1]
+        )
+
+        # if features exist, run grid sampler again on the features densities
+        if volumes_features is None:
+            dim_feature = 0
+            _, rays_features = rays_densities.split([dim_density, dim_feature], dim=-1)
+        else:
+            rays_features = torch.nn.functional.grid_sample(
+                volumes_features,
+                rays_points_local_flat,
+                mode=self._sample_mode,
+                padding_mode=self._padding_mode,
+                align_corners=self._volumes.get_align_corners(),
+            )
+
+            # permute the dimensions & reshape features after sampling
+            rays_features = rays_features.permute(0, 2, 3, 4, 1).view(
+                *rays_points_local.shape[:-1], volumes_features.shape[1]
+            )
+
+        return rays_densities, rays_features
diff --git a/pytorch3d/pytorch3d/renderer/implicit/sample_pdf.py b/pytorch3d/pytorch3d/renderer/implicit/sample_pdf.py
new file mode 100644
index 0000000000000000000000000000000000000000..c2387e5b503d4f3ec8efb6e07dabd95dd4ff0eba
--- /dev/null
+++ b/pytorch3d/pytorch3d/renderer/implicit/sample_pdf.py
@@ -0,0 +1,146 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import torch
+from pytorch3d import _C
+
+
+def sample_pdf(
+    bins: torch.Tensor,
+    weights: torch.Tensor,
+    n_samples: int,
+    det: bool = False,
+    eps: float = 1e-5,
+) -> torch.Tensor:
+    """
+    Samples probability density functions defined by bin edges `bins` and
+    the non-negative per-bin probabilities `weights`.
+
+    Args:
+        bins: Tensor of shape `(..., n_bins+1)` denoting the edges of the sampling bins.
+        weights: Tensor of shape `(..., n_bins)` containing non-negative numbers
+            representing the probability of sampling the corresponding bin.
+        n_samples: The number of samples to draw from each set of bins.
+        det: If `False`, the sampling is random. `True` yields deterministic
+            uniformly-spaced sampling from the inverse cumulative density function.
+        eps: A constant preventing division by zero in case empty bins are present.
+
+    Returns:
+        samples: Tensor of shape `(..., n_samples)` containing `n_samples` samples
+            drawn from each probability distribution.
+
+    Refs:
+        [1] https://github.com/bmild/nerf/blob/55d8b00244d7b5178f4d003526ab6667683c9da9/run_nerf_helpers.py#L183  # noqa E501
+    """
+    if torch.is_grad_enabled() and (bins.requires_grad or weights.requires_grad):
+        raise NotImplementedError("sample_pdf differentiability.")
+    if weights.min() <= -eps:
+        raise ValueError("Negative weights provided.")
+    batch_shape = bins.shape[:-1]
+    n_bins = weights.shape[-1]
+    if n_bins + 1 != bins.shape[-1] or weights.shape[:-1] != batch_shape:
+        shapes = f"{bins.shape}{weights.shape}"
+        raise ValueError("Inconsistent shapes of bins and weights: " + shapes)
+    output_shape = batch_shape + (n_samples,)
+
+    if det:
+        u = torch.linspace(0.0, 1.0, n_samples, device=bins.device, dtype=torch.float32)
+        output = u.expand(output_shape).contiguous()
+    else:
+        output = torch.rand(output_shape, dtype=torch.float32, device=bins.device)
+
+    # pyre-fixme[16]: Module `pytorch3d` has no attribute `_C`.
+    _C.sample_pdf(
+        bins.reshape(-1, n_bins + 1),
+        weights.reshape(-1, n_bins),
+        output.reshape(-1, n_samples),
+        eps,
+    )
+
+    return output
+
+
+def sample_pdf_python(
+    bins: torch.Tensor,
+    weights: torch.Tensor,
+    N_samples: int,
+    det: bool = False,
+    eps: float = 1e-5,
+) -> torch.Tensor:
+    """
+    This is a pure python implementation of the `sample_pdf` function.
+    It may be faster than sample_pdf when the number of bins is very large,
+    because it behaves as O(batchsize * [n_bins + log(n_bins) * n_samples] )
+    whereas sample_pdf behaves as O(batchsize * n_bins * n_samples).
+    For 64 bins sample_pdf is much faster.
+
+    Samples probability density functions defined by bin edges `bins` and
+    the non-negative per-bin probabilities `weights`.
+
+    Note: This is a direct conversion of the TensorFlow function from the original
+    release [1] to PyTorch. It requires PyTorch 1.6 or greater due to the use of
+    torch.searchsorted.
+
+    Args:
+        bins: Tensor of shape `(..., n_bins+1)` denoting the edges of the sampling bins.
+        weights: Tensor of shape `(..., n_bins)` containing non-negative numbers
+            representing the probability of sampling the corresponding bin.
+        N_samples: The number of samples to draw from each set of bins.
+        det: If `False`, the sampling is random. `True` yields deterministic
+            uniformly-spaced sampling from the inverse cumulative density function.
+        eps: A constant preventing division by zero in case empty bins are present.
+
+    Returns:
+        samples: Tensor of shape `(..., N_samples)` containing `N_samples` samples
+            drawn from each probability distribution.
+
+    Refs:
+        [1] https://github.com/bmild/nerf/blob/55d8b00244d7b5178f4d003526ab6667683c9da9/run_nerf_helpers.py#L183  # noqa E501
+    """
+
+    # Get pdf
+    weights = weights + eps  # prevent nans
+    if weights.min() <= 0:
+        raise ValueError("Negative weights provided.")
+    pdf = weights / weights.sum(dim=-1, keepdim=True)
+    cdf = torch.cumsum(pdf, -1)
+    cdf = torch.cat([torch.zeros_like(cdf[..., :1]), cdf], -1)
+
+    # Take uniform samples u of shape (..., N_samples)
+    if det:
+        u = torch.linspace(0.0, 1.0, N_samples, device=cdf.device, dtype=cdf.dtype)
+        u = u.expand(list(cdf.shape[:-1]) + [N_samples]).contiguous()
+    else:
+        u = torch.rand(
+            list(cdf.shape[:-1]) + [N_samples], device=cdf.device, dtype=cdf.dtype
+        )
+
+    # Invert CDF
+    inds = torch.searchsorted(cdf, u, right=True)
+    # inds has shape (..., N_samples) identifying the bin of each sample.
+    below = (inds - 1).clamp(0)
+    above = inds.clamp(max=cdf.shape[-1] - 1)
+    # Below and above are of shape (..., N_samples), identifying the bin
+    # edges surrounding each sample.
+
+    inds_g = torch.stack([below, above], -1).view(
+        *below.shape[:-1], below.shape[-1] * 2
+    )
+    cdf_g = torch.gather(cdf, -1, inds_g).view(*below.shape, 2)
+    bins_g = torch.gather(bins, -1, inds_g).view(*below.shape, 2)
+    # cdf_g and bins_g are of shape (..., N_samples, 2) and identify
+    # the cdf and the index of the two bin edges surrounding each sample.
+
+    denom = cdf_g[..., 1] - cdf_g[..., 0]
+    denom = torch.where(denom < eps, torch.ones_like(denom), denom)
+    t = (u - cdf_g[..., 0]) / denom
+    # t is of shape  (..., N_samples) and identifies how far through
+    # each sample is in its bin.
+
+    samples = bins_g[..., 0] + t * (bins_g[..., 1] - bins_g[..., 0])
+
+    return samples
diff --git a/pytorch3d/pytorch3d/renderer/implicit/utils.py b/pytorch3d/pytorch3d/renderer/implicit/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..d73c8583b04985a2dda3cd4ecf0856bacc8bd4f6
--- /dev/null
+++ b/pytorch3d/pytorch3d/renderer/implicit/utils.py
@@ -0,0 +1,171 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import dataclasses
+from typing import NamedTuple, Optional, Union
+
+import torch
+
+
+class RayBundle(NamedTuple):
+    """
+    Parametrizes points along projection rays by storing:
+
+        origins: A tensor of shape `(..., 3)` denoting the
+            origins of the sampling rays in world coords.
+        directions: A tensor of shape `(..., 3)` containing the direction
+            vectors of sampling rays in world coords. They don't have to be normalized;
+            they define unit vectors in the respective 1D coordinate systems; see
+            documentation for :func:`ray_bundle_to_ray_points` for the conversion formula.
+        lengths: A tensor of shape `(..., num_points_per_ray)`
+            containing the lengths at which the rays are sampled.
+        xys: A tensor of shape `(..., 2)`, the xy-locations (`xys`) of the ray pixels
+    """
+
+    origins: torch.Tensor
+    directions: torch.Tensor
+    lengths: torch.Tensor
+    xys: torch.Tensor
+
+
+@dataclasses.dataclass
+class HeterogeneousRayBundle:
+    """
+    Members:
+        origins: A tensor of shape `(..., 3)` denoting the
+            origins of the sampling rays in world coords.
+        directions: A tensor of shape `(..., 3)` containing the direction
+            vectors of sampling rays in world coords. They don't have to be normalized;
+            they define unit vectors in the respective 1D coordinate systems; see
+            documentation for :func:`ray_bundle_to_ray_points` for the conversion formula.
+        lengths: A tensor of shape `(..., num_points_per_ray)`
+            containing the lengths at which the rays are sampled.
+        xys: A tensor of shape `(..., 2)`, the xy-locations (`xys`) of the ray pixels
+        camera_ids: A tensor of shape (N, ) which indicates which camera
+            was used to sample the rays. `N` is the number of unique sampled cameras.
+        camera_counts: A tensor of shape (N, ) which how many times the
+            coresponding camera in `camera_ids` was sampled.
+            `sum(camera_counts)==total_number_of_rays`
+
+    If we sample cameras of ids [0, 3, 5, 3, 1, 0, 0] that would be
+    stored as camera_ids=[1, 3, 5, 0] and camera_counts=[1, 2, 1, 3]. `camera_ids` is a
+    set like object with no particular ordering of elements. ith element of
+    `camera_ids` coresponds to the ith element of `camera_counts`.
+    """
+
+    origins: torch.Tensor
+    directions: torch.Tensor
+    lengths: torch.Tensor
+    xys: torch.Tensor
+    camera_ids: Optional[torch.LongTensor] = None
+    camera_counts: Optional[torch.LongTensor] = None
+
+
+def ray_bundle_to_ray_points(
+    ray_bundle: Union[RayBundle, HeterogeneousRayBundle]
+) -> torch.Tensor:
+    """
+    Converts rays parametrized with a `ray_bundle` (an instance of the `RayBundle`
+    named tuple or HeterogeneousRayBundle dataclass) to 3D points by
+    extending each ray according to the corresponding length.
+
+    E.g. for 2 dimensional tensors `ray_bundle.origins`, `ray_bundle.directions`
+        and `ray_bundle.lengths`, the ray point at position `[i, j]` is::
+
+            ray_bundle.points[i, j, :] = (
+                ray_bundle.origins[i, :]
+                + ray_bundle.directions[i, :] * ray_bundle.lengths[i, j]
+            )
+
+    Note that both the directions and magnitudes of the vectors in
+    `ray_bundle.directions` matter.
+
+    Args:
+        ray_bundle: A `RayBundle` or `HeterogeneousRayBundle` object with fields:
+            origins: A tensor of shape `(..., 3)`
+            directions: A tensor of shape `(..., 3)`
+            lengths: A tensor of shape `(..., num_points_per_ray)`
+
+    Returns:
+        rays_points: A tensor of shape `(..., num_points_per_ray, 3)`
+            containing the points sampled along each ray.
+    """
+    return ray_bundle_variables_to_ray_points(
+        ray_bundle.origins, ray_bundle.directions, ray_bundle.lengths
+    )
+
+
+def ray_bundle_variables_to_ray_points(
+    rays_origins: torch.Tensor,
+    rays_directions: torch.Tensor,
+    rays_lengths: torch.Tensor,
+) -> torch.Tensor:
+    """
+    Converts rays parametrized with origins and directions
+    to 3D points by extending each ray according to the corresponding
+    ray length:
+
+    E.g. for 2 dimensional input tensors `rays_origins`, `rays_directions`
+    and `rays_lengths`, the ray point at position `[i, j]` is::
+
+            rays_points[i, j, :] = (
+                rays_origins[i, :]
+                + rays_directions[i, :] * rays_lengths[i, j]
+            )
+
+    Note that both the directions and magnitudes of the vectors in
+    `rays_directions` matter.
+
+    Args:
+        rays_origins: A tensor of shape `(..., 3)`
+        rays_directions: A tensor of shape `(..., 3)`
+        rays_lengths: A tensor of shape `(..., num_points_per_ray)`
+
+    Returns:
+        rays_points: A tensor of shape `(..., num_points_per_ray, 3)`
+            containing the points sampled along each ray.
+    """
+    rays_points = (
+        rays_origins[..., None, :]
+        + rays_lengths[..., :, None] * rays_directions[..., None, :]
+    )
+    return rays_points
+
+
+def _validate_ray_bundle_variables(
+    rays_origins: torch.Tensor,
+    rays_directions: torch.Tensor,
+    rays_lengths: torch.Tensor,
+) -> None:
+    """
+    Validate the shapes of RayBundle variables
+    `rays_origins`, `rays_directions`, and `rays_lengths`.
+    """
+    ndim = rays_origins.ndim
+    if any(r.ndim != ndim for r in (rays_directions, rays_lengths)):
+        raise ValueError(
+            "rays_origins, rays_directions and rays_lengths"
+            + " have to have the same number of dimensions."
+        )
+
+    if ndim <= 2:
+        raise ValueError(
+            "rays_origins, rays_directions and rays_lengths"
+            + " have to have at least 3 dimensions."
+        )
+
+    spatial_size = rays_origins.shape[:-1]
+    if any(spatial_size != r.shape[:-1] for r in (rays_directions, rays_lengths)):
+        raise ValueError(
+            "The shapes of rays_origins, rays_directions and rays_lengths"
+            + " may differ only in the last dimension."
+        )
+
+    if any(r.shape[-1] != 3 for r in (rays_origins, rays_directions)):
+        raise ValueError(
+            "The size of the last dimension of rays_origins/rays_directions"
+            + "has to be 3."
+        )
diff --git a/pytorch3d/pytorch3d/renderer/lighting.py b/pytorch3d/pytorch3d/renderer/lighting.py
new file mode 100644
index 0000000000000000000000000000000000000000..ab4f5fd3a7f131778fd9b3dfb303fbe498dd3cfa
--- /dev/null
+++ b/pytorch3d/pytorch3d/renderer/lighting.py
@@ -0,0 +1,339 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import torch
+import torch.nn.functional as F
+
+from ..common.datatypes import Device
+from .utils import convert_to_tensors_and_broadcast, TensorProperties
+
+
+def diffuse(normals, color, direction) -> torch.Tensor:
+    """
+    Calculate the diffuse component of light reflection using Lambert's
+    cosine law.
+
+    Args:
+        normals: (N, ..., 3) xyz normal vectors. Normals and points are
+            expected to have the same shape.
+        color: (1, 3) or (N, 3) RGB color of the diffuse component of the light.
+        direction: (x,y,z) direction of the light
+
+    Returns:
+        colors: (N, ..., 3), same shape as the input points.
+
+    The normals and light direction should be in the same coordinate frame
+    i.e. if the points have been transformed from world -> view space then
+    the normals and direction should also be in view space.
+
+    NOTE: to use with the packed vertices (i.e. no batch dimension) reformat the
+    inputs in the following way.
+
+    .. code-block:: python
+
+        Args:
+            normals: (P, 3)
+            color: (N, 3)[batch_idx, :] -> (P, 3)
+            direction: (N, 3)[batch_idx, :] -> (P, 3)
+
+        Returns:
+            colors: (P, 3)
+
+        where batch_idx is of shape (P). For meshes, batch_idx can be:
+        meshes.verts_packed_to_mesh_idx() or meshes.faces_packed_to_mesh_idx()
+        depending on whether points refers to the vertex coordinates or
+        average/interpolated face coordinates.
+    """
+    # TODO: handle multiple directional lights per batch element.
+    # TODO: handle attenuation.
+
+    # Ensure color and location have same batch dimension as normals
+    normals, color, direction = convert_to_tensors_and_broadcast(
+        normals, color, direction, device=normals.device
+    )
+
+    # Reshape direction and color so they have all the arbitrary intermediate
+    # dimensions as normals. Assume first dim = batch dim and last dim = 3.
+    points_dims = normals.shape[1:-1]
+    expand_dims = (-1,) + (1,) * len(points_dims) + (3,)
+    if direction.shape != normals.shape:
+        direction = direction.view(expand_dims)
+    if color.shape != normals.shape:
+        color = color.view(expand_dims)
+
+    # Renormalize the normals in case they have been interpolated.
+    # We tried to replace the following with F.cosine_similarity, but it wasn't faster.
+    normals = F.normalize(normals, p=2, dim=-1, eps=1e-6)
+    direction = F.normalize(direction, p=2, dim=-1, eps=1e-6)
+    angle = F.relu(torch.sum(normals * direction, dim=-1))
+    return color * angle[..., None]
+
+
+def specular(
+    points, normals, direction, color, camera_position, shininess
+) -> torch.Tensor:
+    """
+    Calculate the specular component of light reflection.
+
+    Args:
+        points: (N, ..., 3) xyz coordinates of the points.
+        normals: (N, ..., 3) xyz normal vectors for each point.
+        color: (N, 3) RGB color of the specular component of the light.
+        direction: (N, 3) vector direction of the light.
+        camera_position: (N, 3) The xyz position of the camera.
+        shininess: (N)  The specular exponent of the material.
+
+    Returns:
+        colors: (N, ..., 3), same shape as the input points.
+
+    The points, normals, camera_position, and direction should be in the same
+    coordinate frame i.e. if the points have been transformed from
+    world -> view space then the normals, camera_position, and light direction
+    should also be in view space.
+
+    To use with a batch of packed points reindex in the following way.
+    .. code-block:: python::
+
+        Args:
+            points: (P, 3)
+            normals: (P, 3)
+            color: (N, 3)[batch_idx] -> (P, 3)
+            direction: (N, 3)[batch_idx] -> (P, 3)
+            camera_position: (N, 3)[batch_idx] -> (P, 3)
+            shininess: (N)[batch_idx] -> (P)
+        Returns:
+            colors: (P, 3)
+
+        where batch_idx is of shape (P). For meshes batch_idx can be:
+        meshes.verts_packed_to_mesh_idx() or meshes.faces_packed_to_mesh_idx().
+    """
+    # TODO: handle multiple directional lights
+    # TODO: attenuate based on inverse squared distance to the light source
+
+    if points.shape != normals.shape:
+        msg = "Expected points and normals to have the same shape: got %r, %r"
+        raise ValueError(msg % (points.shape, normals.shape))
+
+    # Ensure all inputs have same batch dimension as points
+    matched_tensors = convert_to_tensors_and_broadcast(
+        points, color, direction, camera_position, shininess, device=points.device
+    )
+    _, color, direction, camera_position, shininess = matched_tensors
+
+    # Reshape direction and color so they have all the arbitrary intermediate
+    # dimensions as points. Assume first dim = batch dim and last dim = 3.
+    points_dims = points.shape[1:-1]
+    expand_dims = (-1,) + (1,) * len(points_dims)
+    if direction.shape != normals.shape:
+        direction = direction.view(expand_dims + (3,))
+    if color.shape != normals.shape:
+        color = color.view(expand_dims + (3,))
+    if camera_position.shape != normals.shape:
+        camera_position = camera_position.view(expand_dims + (3,))
+    if shininess.shape != normals.shape:
+        shininess = shininess.view(expand_dims)
+
+    # Renormalize the normals in case they have been interpolated.
+    # We tried a version that uses F.cosine_similarity instead of renormalizing,
+    # but it was slower.
+    normals = F.normalize(normals, p=2, dim=-1, eps=1e-6)
+    direction = F.normalize(direction, p=2, dim=-1, eps=1e-6)
+    cos_angle = torch.sum(normals * direction, dim=-1)
+    # No specular highlights if angle is less than 0.
+    mask = (cos_angle > 0).to(torch.float32)
+
+    # Calculate the specular reflection.
+    view_direction = camera_position - points
+    view_direction = F.normalize(view_direction, p=2, dim=-1, eps=1e-6)
+    reflect_direction = -direction + 2 * (cos_angle[..., None] * normals)
+
+    # Cosine of the angle between the reflected light ray and the viewer
+    alpha = F.relu(torch.sum(view_direction * reflect_direction, dim=-1)) * mask
+    return color * torch.pow(alpha, shininess)[..., None]
+
+
+class DirectionalLights(TensorProperties):
+    def __init__(
+        self,
+        ambient_color=((0.5, 0.5, 0.5),),
+        diffuse_color=((0.3, 0.3, 0.3),),
+        specular_color=((0.2, 0.2, 0.2),),
+        direction=((0, 1, 0),),
+        device: Device = "cpu",
+    ) -> None:
+        """
+        Args:
+            ambient_color: RGB color of the ambient component.
+            diffuse_color: RGB color of the diffuse component.
+            specular_color: RGB color of the specular component.
+            direction: (x, y, z) direction vector of the light.
+            device: Device (as str or torch.device) on which the tensors should be located
+
+        The inputs can each be
+            - 3 element tuple/list or list of lists
+            - torch tensor of shape (1, 3)
+            - torch tensor of shape (N, 3)
+        The inputs are broadcast against each other so they all have batch
+        dimension N.
+        """
+        super().__init__(
+            device=device,
+            ambient_color=ambient_color,
+            diffuse_color=diffuse_color,
+            specular_color=specular_color,
+            direction=direction,
+        )
+        _validate_light_properties(self)
+        if self.direction.shape[-1] != 3:
+            msg = "Expected direction to have shape (N, 3); got %r"
+            raise ValueError(msg % repr(self.direction.shape))
+
+    def clone(self):
+        other = self.__class__(device=self.device)
+        return super().clone(other)
+
+    def diffuse(self, normals, points=None) -> torch.Tensor:
+        # NOTE: Points is not used but is kept in the args so that the API is
+        # the same for directional and point lights. The call sites should not
+        # need to know the light type.
+        return diffuse(
+            normals=normals,
+            color=self.diffuse_color,
+            direction=self.direction,
+        )
+
+    def specular(self, normals, points, camera_position, shininess) -> torch.Tensor:
+        return specular(
+            points=points,
+            normals=normals,
+            color=self.specular_color,
+            direction=self.direction,
+            camera_position=camera_position,
+            shininess=shininess,
+        )
+
+
+class PointLights(TensorProperties):
+    def __init__(
+        self,
+        ambient_color=((0.5, 0.5, 0.5),),
+        diffuse_color=((0.3, 0.3, 0.3),),
+        specular_color=((0.2, 0.2, 0.2),),
+        location=((0, 1, 0),),
+        device: Device = "cpu",
+    ) -> None:
+        """
+        Args:
+            ambient_color: RGB color of the ambient component
+            diffuse_color: RGB color of the diffuse component
+            specular_color: RGB color of the specular component
+            location: xyz position of the light.
+            device: Device (as str or torch.device) on which the tensors should be located
+
+        The inputs can each be
+            - 3 element tuple/list or list of lists
+            - torch tensor of shape (1, 3)
+            - torch tensor of shape (N, 3)
+        The inputs are broadcast against each other so they all have batch
+        dimension N.
+        """
+        super().__init__(
+            device=device,
+            ambient_color=ambient_color,
+            diffuse_color=diffuse_color,
+            specular_color=specular_color,
+            location=location,
+        )
+        _validate_light_properties(self)
+        if self.location.shape[-1] != 3:
+            msg = "Expected location to have shape (N, 3); got %r"
+            raise ValueError(msg % repr(self.location.shape))
+
+    def clone(self):
+        other = self.__class__(device=self.device)
+        return super().clone(other)
+
+    def reshape_location(self, points) -> torch.Tensor:
+        """
+        Reshape the location tensor to have dimensions
+        compatible with the points which can either be of
+        shape (P, 3) or (N, H, W, K, 3).
+        """
+        if self.location.ndim == points.ndim:
+            return self.location
+        return self.location[:, None, None, None, :]
+
+    def diffuse(self, normals, points) -> torch.Tensor:
+        location = self.reshape_location(points)
+        direction = location - points
+        return diffuse(normals=normals, color=self.diffuse_color, direction=direction)
+
+    def specular(self, normals, points, camera_position, shininess) -> torch.Tensor:
+        location = self.reshape_location(points)
+        direction = location - points
+        return specular(
+            points=points,
+            normals=normals,
+            color=self.specular_color,
+            direction=direction,
+            camera_position=camera_position,
+            shininess=shininess,
+        )
+
+
+class AmbientLights(TensorProperties):
+    """
+    A light object representing the same color of light everywhere.
+    By default, this is white, which effectively means lighting is
+    not used in rendering.
+
+    Unlike other lights this supports an arbitrary number of channels, not just 3 for RGB.
+    The ambient_color input determines the number of channels.
+    """
+
+    def __init__(self, *, ambient_color=None, device: Device = "cpu") -> None:
+        """
+        If ambient_color is provided, it should be a sequence of
+        triples of floats.
+
+        Args:
+            ambient_color: RGB color
+            device: Device (as str or torch.device) on which the tensors should be located
+
+        The ambient_color if provided, should be
+            - tuple/list of C-element tuples of floats
+            - torch tensor of shape (1, C)
+            - torch tensor of shape (N, C)
+        where C is the number of channels and N is batch size.
+        For RGB, C is 3.
+        """
+        if ambient_color is None:
+            ambient_color = ((1.0, 1.0, 1.0),)
+        super().__init__(ambient_color=ambient_color, device=device)
+
+    def clone(self):
+        other = self.__class__(device=self.device)
+        return super().clone(other)
+
+    def diffuse(self, normals, points) -> torch.Tensor:
+        return self._zeros_channels(points)
+
+    def specular(self, normals, points, camera_position, shininess) -> torch.Tensor:
+        return self._zeros_channels(points)
+
+    def _zeros_channels(self, points: torch.Tensor) -> torch.Tensor:
+        ch = self.ambient_color.shape[-1]
+        return torch.zeros(*points.shape[:-1], ch, device=points.device)
+
+
+def _validate_light_properties(obj) -> None:
+    props = ("ambient_color", "diffuse_color", "specular_color")
+    for n in props:
+        t = getattr(obj, n)
+        if t.shape[-1] != 3:
+            msg = "Expected %s to have shape (N, 3); got %r"
+            raise ValueError(msg % (n, t.shape))
diff --git a/pytorch3d/pytorch3d/renderer/materials.py b/pytorch3d/pytorch3d/renderer/materials.py
new file mode 100644
index 0000000000000000000000000000000000000000..27558ed8a66f82d43e702e6e96f7734ba6ce803f
--- /dev/null
+++ b/pytorch3d/pytorch3d/renderer/materials.py
@@ -0,0 +1,65 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import torch
+
+from ..common.datatypes import Device
+from .utils import TensorProperties
+
+
+class Materials(TensorProperties):
+    """
+    A class for storing a batch of material properties. Currently only one
+    material per batch element is supported.
+    """
+
+    def __init__(
+        self,
+        ambient_color=((1, 1, 1),),
+        diffuse_color=((1, 1, 1),),
+        specular_color=((1, 1, 1),),
+        shininess=64,
+        device: Device = "cpu",
+    ) -> None:
+        """
+        Args:
+            ambient_color: ambient reflectivity of the material
+            diffuse_color: diffuse reflectivity of the material
+            specular_color: specular reflectivity of the material
+            shininess: The specular exponent for the material. This defines
+                the focus of the specular highlight with a high value
+                resulting in a concentrated highlight. Shininess values
+                can range from 0-1000.
+            device: Device (as str or torch.device) on which the tensors should be located
+
+        ambient_color, diffuse_color and specular_color can be of shape
+        (1, C) or (N, C) where C is typically 3 (for RGB). shininess can be of shape (1,)
+        or (N,).
+
+        The colors and shininess are broadcast against each other so need to
+        have either the same batch dimension or batch dimension = 1.
+        """
+        super().__init__(
+            device=device,
+            diffuse_color=diffuse_color,
+            ambient_color=ambient_color,
+            specular_color=specular_color,
+            shininess=shininess,
+        )
+        C = self.ambient_color.shape[-1]
+        for n in ["ambient_color", "diffuse_color", "specular_color"]:
+            t = getattr(self, n)
+            if t.shape[-1] != C:
+                msg = "Expected %s to have shape (N, %d); got %r"
+                raise ValueError(msg % (n, C, t.shape))
+        if self.shininess.shape != torch.Size([self._N]):
+            msg = "shininess should have shape (N); got %r"
+            raise ValueError(msg % repr(self.shininess.shape))
+
+    def clone(self):
+        other = Materials(device=self.device)
+        return super().clone(other)
diff --git a/pytorch3d/pytorch3d/renderer/mesh/__init__.py b/pytorch3d/pytorch3d/renderer/mesh/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..f6bda3f77477f6ea1de5b7d31f0b619b05027f92
--- /dev/null
+++ b/pytorch3d/pytorch3d/renderer/mesh/__init__.py
@@ -0,0 +1,37 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from .clip import (
+    clip_faces,
+    ClipFrustum,
+    ClippedFaces,
+    convert_clipped_rasterization_to_original_faces,
+)
+from .rasterize_meshes import rasterize_meshes
+from .rasterizer import MeshRasterizer, RasterizationSettings
+from .renderer import MeshRenderer, MeshRendererWithFragments
+from .shader import (  # DEPRECATED
+    BlendParams,
+    HardFlatShader,
+    HardGouraudShader,
+    HardPhongShader,
+    SoftGouraudShader,
+    SoftPhongShader,
+    SoftSilhouetteShader,
+    SplatterPhongShader,
+    TexturedSoftPhongShader,
+)
+from .shading import gouraud_shading, phong_shading
+from .textures import (  # DEPRECATED
+    Textures,
+    TexturesAtlas,
+    TexturesBase,
+    TexturesUV,
+    TexturesVertex,
+)
+
+
+__all__ = [k for k in globals().keys() if not k.startswith("_")]
diff --git a/pytorch3d/pytorch3d/renderer/mesh/clip.py b/pytorch3d/pytorch3d/renderer/mesh/clip.py
new file mode 100644
index 0000000000000000000000000000000000000000..6261f9c5e2fbd50533c388cfa372b438003c7958
--- /dev/null
+++ b/pytorch3d/pytorch3d/renderer/mesh/clip.py
@@ -0,0 +1,724 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Any, List, Optional, Tuple
+
+import torch
+
+
+"""
+Mesh clipping is done before rasterization and is implemented using 4 cases
+(these will be referred to throughout the functions below)
+
+Case 1: the triangle is completely in front of the clipping plane (it is left
+        unchanged)
+Case 2: the triangle is completely behind the clipping plane (it is culled)
+Case 3: the triangle has exactly two vertices behind the clipping plane (it is
+        clipped into a smaller triangle)
+Case 4: the triangle has exactly one vertex behind the clipping plane (it is clipped
+        into a smaller quadrilateral and divided into two triangular faces)
+
+After rasterization, the Fragments from the clipped/modified triangles
+are mapped back to the triangles in the original mesh. The indices,
+barycentric coordinates and distances are all relative to original mesh triangles.
+
+NOTE: It is assumed that all z-coordinates are in world coordinates (not NDC
+coordinates), while x and y coordinates may be in NDC/screen coordinates
+(i.e after applying a projective transform e.g. cameras.transform_points(points)).
+"""
+
+
+class ClippedFaces:
+    """
+    Helper class to store the data for the clipped version of a Meshes object
+    (face_verts, mesh_to_face_first_idx, num_faces_per_mesh) along with
+    conversion information (faces_clipped_to_unclipped_idx, barycentric_conversion,
+    faces_clipped_to_conversion_idx, clipped_faces_neighbor_idx) required to convert
+    barycentric coordinates from rasterization of the clipped Meshes to barycentric
+    coordinates in terms of the unclipped Meshes.
+
+    Args:
+        face_verts: FloatTensor of shape (F_clipped, 3, 3) giving the verts of
+            each of the clipped faces
+        mesh_to_face_first_idx: an tensor of shape (N,), where N is the number of meshes
+            in the batch.  The ith element stores the index into face_verts
+            of the first face of the ith mesh.
+        num_faces_per_mesh: a tensor of shape (N,) storing the number of faces in each mesh.
+        faces_clipped_to_unclipped_idx: (F_clipped,) shaped LongTensor mapping each clipped
+            face back to the face in faces_unclipped (i.e. the faces in the original meshes
+            obtained using meshes.faces_packed())
+        barycentric_conversion: (T, 3, 3) FloatTensor, where barycentric_conversion[i, :, k]
+            stores the barycentric weights in terms of the world coordinates of the original
+            (big) unclipped triangle for the kth vertex in the clipped (small) triangle.
+            If the rasterizer then expresses some NDC coordinate in terms of barycentric
+            world coordinates for the clipped (small) triangle as alpha_clipped[i,:],
+            alpha_unclipped[i, :] = barycentric_conversion[i, :, :]*alpha_clipped[i, :]
+        faces_clipped_to_conversion_idx: (F_clipped,) shaped LongTensor mapping each clipped
+            face to the applicable row of barycentric_conversion (or set to -1 if conversion is
+            not needed).
+        clipped_faces_neighbor_idx: LongTensor of shape (F_clipped,) giving the index of the
+            neighboring face for each case 4 triangle. e.g. for a case 4 face with f split
+            into two triangles (t1, t2): clipped_faces_neighbor_idx[t1_idx] = t2_idx.
+            Faces which are not clipped and subdivided are set to -1 (i.e cases 1/2/3).
+    """
+
+    __slots__ = [
+        "face_verts",
+        "mesh_to_face_first_idx",
+        "num_faces_per_mesh",
+        "faces_clipped_to_unclipped_idx",
+        "barycentric_conversion",
+        "faces_clipped_to_conversion_idx",
+        "clipped_faces_neighbor_idx",
+    ]
+
+    def __init__(
+        self,
+        face_verts: torch.Tensor,
+        mesh_to_face_first_idx: torch.Tensor,
+        num_faces_per_mesh: torch.Tensor,
+        faces_clipped_to_unclipped_idx: Optional[torch.Tensor] = None,
+        barycentric_conversion: Optional[torch.Tensor] = None,
+        faces_clipped_to_conversion_idx: Optional[torch.Tensor] = None,
+        clipped_faces_neighbor_idx: Optional[torch.Tensor] = None,
+    ) -> None:
+        self.face_verts = face_verts
+        self.mesh_to_face_first_idx = mesh_to_face_first_idx
+        self.num_faces_per_mesh = num_faces_per_mesh
+        self.faces_clipped_to_unclipped_idx = faces_clipped_to_unclipped_idx
+        self.barycentric_conversion = barycentric_conversion
+        self.faces_clipped_to_conversion_idx = faces_clipped_to_conversion_idx
+        self.clipped_faces_neighbor_idx = clipped_faces_neighbor_idx
+
+
+class ClipFrustum:
+    """
+    Helper class to store the information needed to represent a view frustum
+    (left, right, top, bottom, znear, zfar), which is used to clip or cull triangles.
+    Values left as None mean that culling should not be performed for that axis.
+    The parameters perspective_correct, cull, and z_clip_value are used to define
+    behavior for clipping triangles to the frustum.
+
+    Args:
+        left: NDC coordinate of the left clipping plane (along x axis)
+        right: NDC coordinate of the right clipping plane (along x axis)
+        top: NDC coordinate of the top clipping plane (along y axis)
+        bottom: NDC coordinate of the bottom clipping plane (along y axis)
+        znear: world space z coordinate of the near clipping plane
+        zfar: world space z coordinate of the far clipping plane
+        perspective_correct: should be set to True for a perspective camera
+        cull: if True, triangles outside the frustum should be culled
+        z_clip_value: if not None, then triangles should be clipped (possibly into
+            smaller triangles) such that z >= z_clip_value.  This avoids projections
+            that go to infinity as z->0
+    """
+
+    __slots__ = [
+        "left",
+        "right",
+        "top",
+        "bottom",
+        "znear",
+        "zfar",
+        "perspective_correct",
+        "cull",
+        "z_clip_value",
+    ]
+
+    def __init__(
+        self,
+        left: Optional[float] = None,
+        right: Optional[float] = None,
+        top: Optional[float] = None,
+        bottom: Optional[float] = None,
+        znear: Optional[float] = None,
+        zfar: Optional[float] = None,
+        perspective_correct: bool = False,
+        cull: bool = True,
+        z_clip_value: Optional[float] = None,
+    ) -> None:
+        self.left = left
+        self.right = right
+        self.top = top
+        self.bottom = bottom
+        self.znear = znear
+        self.zfar = zfar
+        self.perspective_correct = perspective_correct
+        self.cull = cull
+        self.z_clip_value = z_clip_value
+
+
+def _get_culled_faces(face_verts: torch.Tensor, frustum: ClipFrustum) -> torch.Tensor:
+    """
+    Helper function used to find all the faces in Meshes which are
+    fully outside the view frustum. A face is culled if all 3 vertices are outside
+    the same axis of the view frustum.
+
+    Args:
+        face_verts: An (F,3,3) tensor, where F is the number of faces in
+            the packed representation of Meshes. The 2nd dimension represents the 3 vertices
+            of a triangle, and the 3rd dimension stores the xyz locations of each
+            vertex.
+        frustum: An instance of the ClipFrustum class with the information on the
+            position of the clipping planes.
+
+    Returns:
+        faces_culled: An boolean tensor of size F specifying whether or not each face should be
+            culled.
+    """
+    clipping_planes = (
+        (frustum.left, 0, "<"),
+        (frustum.right, 0, ">"),
+        (frustum.top, 1, "<"),
+        (frustum.bottom, 1, ">"),
+        (frustum.znear, 2, "<"),
+        (frustum.zfar, 2, ">"),
+    )
+    faces_culled = torch.zeros(
+        [face_verts.shape[0]], dtype=torch.bool, device=face_verts.device
+    )
+    for plane in clipping_planes:
+        clip_value, axis, op = plane
+        # If clip_value is None then don't clip along that plane
+        if frustum.cull and clip_value is not None:
+            if op == "<":
+                verts_clipped = face_verts[:, axis] < clip_value
+            else:
+                verts_clipped = face_verts[:, axis] > clip_value
+
+            # If all verts are clipped then face is outside the frustum
+            faces_culled |= verts_clipped.sum(1) == 3
+
+    return faces_culled
+
+
+def _find_verts_intersecting_clipping_plane(
+    face_verts: torch.Tensor,
+    p1_face_ind: torch.Tensor,
+    clip_value: float,
+    perspective_correct: bool,
+) -> Tuple[Tuple[Any, Any, Any, Any, Any], List[Any]]:
+    r"""
+    Helper function to find the vertices used to form a new triangle for case 3/case 4 faces.
+
+    Given a list of triangles that are already known to intersect the clipping plane,
+    solve for the two vertices p4 and p5 where the edges of the triangle intersects the
+    clipping plane.
+
+                       p1
+                       /\
+                      /  \
+                     /  t \
+     _____________p4/______\p5__________ clip_value
+                   /        \
+                  /____      \
+                p2     ---____\p3
+
+    Args:
+        face_verts: An (F,3,3) tensor, where F is the number of faces in
+            the packed representation of the Meshes, the 2nd dimension represents
+            the 3 vertices of the face, and the 3rd dimension stores the xyz locations of each
+            vertex.  The z-coordinates must be represented in world coordinates, while
+            the xy-coordinates may be in NDC/screen coordinates (i.e. after projection).
+        p1_face_ind: A tensor of shape (N,) with values in the range of 0 to 2.  In each
+            case 3/case 4 triangle, two vertices are on the same side of the
+            clipping plane and the 3rd is on the other side.  p1_face_ind stores the index of
+            the vertex that is not on the same side as any other vertex in the triangle.
+        clip_value: Float, the z-value defining where to clip the triangle.
+        perspective_correct: Bool, Should be set to true if a perspective camera was
+            used and xy-coordinates of face_verts_unclipped are in NDC/screen coordinates.
+
+    Returns:
+        A 2-tuple
+            p: (p1, p2, p3, p4, p5))
+            p_barycentric (p1_bary, p2_bary, p3_bary, p4_bary, p5_bary)
+
+        Each of p1...p5 is an (F,3) tensor of the xyz locations of the 5 points in the
+        diagram above for case 3/case 4 faces. Each p1_bary...p5_bary is an (F, 3) tensor
+        storing the barycentric weights used to encode p1...p5 in terms of the the original
+        unclipped triangle.
+    """
+
+    # Let T be number of triangles in face_verts (note that these correspond to the subset
+    # of case 1 or case 2 triangles). p1_face_ind, p2_face_ind, and p3_face_ind are (T)
+    # tensors with values in the range of 0 to 2.  p1_face_ind stores the index of the
+    # vertex that is not on the same side as any other vertex in the triangle, and
+    # p2_face_ind and p3_face_ind are the indices of the other two vertices preserving
+    # the same counterclockwise or clockwise ordering
+    T = face_verts.shape[0]
+    p2_face_ind = torch.remainder(p1_face_ind + 1, 3)
+    p3_face_ind = torch.remainder(p1_face_ind + 2, 3)
+
+    # p1, p2, p3 are (T, 3) tensors storing the corresponding (x, y, z) coordinates
+    # of p1_face_ind, p2_face_ind, p3_face_ind
+    p1 = face_verts.gather(1, p1_face_ind[:, None, None].expand(-1, -1, 3)).squeeze(1)
+    p2 = face_verts.gather(1, p2_face_ind[:, None, None].expand(-1, -1, 3)).squeeze(1)
+    p3 = face_verts.gather(1, p3_face_ind[:, None, None].expand(-1, -1, 3)).squeeze(1)
+
+    ##################################
+    # Solve for intersection point p4
+    ##################################
+
+    # p4 is a (T, 3) tensor is the point on the segment between p1 and p2 that
+    # intersects the clipping plane.
+    # Solve for the weight w2 such that p1.z*(1-w2) + p2.z*w2 = clip_value.
+    # Then interpolate p4 = p1*(1-w2) + p2*w2 where it is assumed that z-coordinates
+    # are expressed in world coordinates (since we want to clip z in world coordinates).
+    w2 = (p1[:, 2] - clip_value) / (p1[:, 2] - p2[:, 2])
+    p4 = p1 * (1 - w2[:, None]) + p2 * w2[:, None]
+    if perspective_correct:
+        # It is assumed that all z-coordinates are in world coordinates (not NDC
+        # coordinates), while x and y coordinates may be in NDC/screen coordinates.
+        # If x and y are in NDC/screen coordinates and a projective transform was used
+        # in a perspective camera, then we effectively want to:
+        # 1. Convert back to world coordinates (by multiplying by z)
+        # 2. Interpolate using w2
+        # 3. Convert back to NDC/screen coordinates (by dividing by the new z=clip_value)
+        p1_world = p1[:, :2] * p1[:, 2:3]
+        p2_world = p2[:, :2] * p2[:, 2:3]
+        p4[:, :2] = (p1_world * (1 - w2[:, None]) + p2_world * w2[:, None]) / clip_value
+
+    ##################################
+    # Solve for intersection point p5
+    ##################################
+
+    # p5 is a (T, 3) tensor representing the point on the segment between p1 and p3 that
+    # intersects the clipping plane.
+    # Solve for the weight w3 such that p1.z * (1-w3) + p2.z * w3 = clip_value,
+    # and then interpolate p5 = p1 * (1-w3) + p3 * w3
+    w3 = (p1[:, 2] - clip_value) / (p1[:, 2] - p3[:, 2])
+    w3 = w3.detach()
+    p5 = p1 * (1 - w3[:, None]) + p3 * w3[:, None]
+    if perspective_correct:
+        # Again if using a perspective camera, convert back to world coordinates
+        # interpolate and convert back
+        p1_world = p1[:, :2] * p1[:, 2:3]
+        p3_world = p3[:, :2] * p3[:, 2:3]
+        p5[:, :2] = (p1_world * (1 - w3[:, None]) + p3_world * w3[:, None]) / clip_value
+
+    # Set the barycentric coordinates of p1,p2,p3,p4,p5 in terms of the original
+    # unclipped triangle in face_verts.
+    T_idx = torch.arange(T, device=face_verts.device)
+    p_barycentric = [torch.zeros((T, 3), device=face_verts.device) for i in range(5)]
+    p_barycentric[0][(T_idx, p1_face_ind)] = 1
+    p_barycentric[1][(T_idx, p2_face_ind)] = 1
+    p_barycentric[2][(T_idx, p3_face_ind)] = 1
+    p_barycentric[3][(T_idx, p1_face_ind)] = 1 - w2
+    p_barycentric[3][(T_idx, p2_face_ind)] = w2
+    p_barycentric[4][(T_idx, p1_face_ind)] = 1 - w3
+    p_barycentric[4][(T_idx, p3_face_ind)] = w3
+
+    p = (p1, p2, p3, p4, p5)
+
+    return p, p_barycentric
+
+
+###################
+# Main Entry point
+###################
+def clip_faces(
+    face_verts_unclipped: torch.Tensor,
+    mesh_to_face_first_idx: torch.Tensor,
+    num_faces_per_mesh: torch.Tensor,
+    frustum: ClipFrustum,
+) -> ClippedFaces:
+    """
+    Clip a mesh to the portion contained within a view frustum and with z > z_clip_value.
+
+    There are two types of clipping:
+      1) Cull triangles that are completely outside the view frustum.  This is purely
+         to save computation by reducing the number of triangles that need to be
+         rasterized.
+      2) Clip triangles into the portion of the triangle where z > z_clip_value. The
+         clipped region may be a quadrilateral, which results in splitting a triangle
+         into two triangles. This does not save computation, but is necessary to
+         correctly rasterize using perspective cameras for triangles that pass through
+         z <= 0, because NDC/screen coordinates go to infinity at z=0.
+
+    Args:
+        face_verts_unclipped: An (F, 3, 3) tensor, where F is the number of faces in
+            the packed representation of Meshes, the 2nd dimension represents the 3 vertices
+            of the triangle, and the 3rd dimension stores the xyz locations of each
+            vertex.  The z-coordinates must be represented in world coordinates, while
+            the xy-coordinates may be in NDC/screen coordinates
+        mesh_to_face_first_idx: an tensor of shape (N,), where N is the number of meshes
+            in the batch.  The ith element stores the index into face_verts_unclipped
+            of the first face of the ith mesh.
+        num_faces_per_mesh: a tensor of shape (N,) storing the number of faces in each mesh.
+        frustum: a ClipFrustum object defining the frustum used to cull faces.
+
+    Returns:
+        clipped_faces: ClippedFaces object storing a clipped version of the Meshes
+            along with tensors that can be used to convert barycentric coordinates
+            returned by rasterization of the clipped meshes into a barycentric
+            coordinates for the unclipped meshes.
+    """
+    F = face_verts_unclipped.shape[0]
+    device = face_verts_unclipped.device
+
+    # Triangles completely outside the view frustum will be culled
+    # faces_culled is of shape (F, )
+    faces_culled = _get_culled_faces(face_verts_unclipped, frustum)
+
+    # Triangles that are partially behind the z clipping plane will be clipped to
+    # smaller triangles
+    z_clip_value = frustum.z_clip_value
+    perspective_correct = frustum.perspective_correct
+    if z_clip_value is not None:
+        # (F, 3) tensor (where F is the number of triangles) marking whether each vertex
+        # in a triangle is behind the clipping plane
+        faces_clipped_verts = face_verts_unclipped[:, :, 2] < z_clip_value
+
+        # (F) dim tensor containing the number of clipped vertices in each triangle
+        faces_num_clipped_verts = faces_clipped_verts.sum(1)
+    else:
+        faces_num_clipped_verts = torch.zeros([F], device=device)
+
+    # If no triangles need to be clipped or culled, avoid unnecessary computation
+    # and return early
+    if faces_num_clipped_verts.sum().item() == 0 and faces_culled.sum().item() == 0:
+        return ClippedFaces(
+            face_verts=face_verts_unclipped,
+            mesh_to_face_first_idx=mesh_to_face_first_idx,
+            num_faces_per_mesh=num_faces_per_mesh,
+        )
+
+    #####################################################################################
+    # Classify faces into the 4 relevant cases:
+    #   1) The triangle is completely in front of the clipping plane (it is left
+    #      unchanged)
+    #   2) The triangle is completely behind the clipping plane (it is culled)
+    #   3) The triangle has exactly two vertices behind the clipping plane (it is
+    #      clipped into a smaller triangle)
+    #   4) The triangle has exactly one vertex behind the clipping plane (it is clipped
+    #      into a smaller quadrilateral and split into two triangles)
+    #####################################################################################
+
+    faces_unculled = ~faces_culled
+    # Case 1:  no clipped verts or culled faces
+    cases1_unclipped = (faces_num_clipped_verts == 0) & faces_unculled
+    case1_unclipped_idx = cases1_unclipped.nonzero(as_tuple=True)[0]
+    # Case 2: all verts clipped
+    case2_unclipped = (faces_num_clipped_verts == 3) | faces_culled
+    # Case 3: two verts clipped
+    case3_unclipped = (faces_num_clipped_verts == 2) & faces_unculled
+    case3_unclipped_idx = case3_unclipped.nonzero(as_tuple=True)[0]
+    # Case 4: one vert clipped
+    case4_unclipped = (faces_num_clipped_verts == 1) & faces_unculled
+    case4_unclipped_idx = case4_unclipped.nonzero(as_tuple=True)[0]
+
+    # faces_unclipped_to_clipped_idx is an (F) dim tensor storing the index of each
+    # face to the corresponding face in face_verts_clipped.
+    # Each case 2 triangle will be culled (deleted from face_verts_clipped),
+    # while each case 4 triangle will be split into two smaller triangles
+    # (replaced by two consecutive triangles in face_verts_clipped)
+
+    # case2_unclipped is an (F,) dim 0/1 tensor of all the case2 faces
+    # case4_unclipped is an (F,) dim 0/1 tensor of all the case4 faces
+    faces_delta = case4_unclipped.int() - case2_unclipped.int()
+    # faces_delta_cum gives the per face change in index. Faces which are
+    # clipped in the original mesh are mapped to the closest non clipped face
+    # in face_verts_clipped (this doesn't matter as they are not used
+    # during rasterization anyway).
+    faces_delta_cum = faces_delta.cumsum(0) - faces_delta
+    delta = 1 + case4_unclipped.int() - case2_unclipped.int()
+    faces_unclipped_to_clipped_idx = delta.cumsum(0) - delta
+
+    ###########################################
+    # Allocate tensors for the output Meshes.
+    # These will then be filled in for each case.
+    ###########################################
+    F_clipped = (
+        F
+        # pyre-fixme[58]: `+` is not supported for operand types `int` and
+        #  `Union[bool, float, int]`.
+        + faces_delta_cum[-1].item()
+        # pyre-fixme[58]: `+` is not supported for operand types `int` and
+        #  `Union[bool, float, int]`.
+        + faces_delta[-1].item()
+    )  # Total number of faces in the new Meshes
+    face_verts_clipped = torch.zeros(
+        (F_clipped, 3, 3), dtype=face_verts_unclipped.dtype, device=device
+    )
+    faces_clipped_to_unclipped_idx = torch.zeros(
+        [F_clipped], dtype=torch.int64, device=device
+    )
+
+    # Update version of mesh_to_face_first_idx and num_faces_per_mesh applicable to
+    # face_verts_clipped
+    mesh_to_face_first_idx_clipped = faces_unclipped_to_clipped_idx[
+        mesh_to_face_first_idx
+    ]
+    F_clipped_t = torch.full([1], F_clipped, dtype=torch.int64, device=device)
+    num_faces_next = torch.cat((mesh_to_face_first_idx_clipped[1:], F_clipped_t))
+    num_faces_per_mesh_clipped = num_faces_next - mesh_to_face_first_idx_clipped
+
+    ################# Start Case 1 ########################################
+
+    # Case 1: Triangles are fully visible, copy unchanged triangles into the
+    # appropriate position in the new list of faces
+    case1_clipped_idx = faces_unclipped_to_clipped_idx[case1_unclipped_idx]
+    face_verts_clipped[case1_clipped_idx] = face_verts_unclipped[case1_unclipped_idx]
+    faces_clipped_to_unclipped_idx[case1_clipped_idx] = case1_unclipped_idx
+
+    # If no triangles need to be clipped but some triangles were culled, avoid
+    # unnecessary clipping computation
+    if case3_unclipped_idx.shape[0] + case4_unclipped_idx.shape[0] == 0:
+        return ClippedFaces(
+            face_verts=face_verts_clipped,
+            mesh_to_face_first_idx=mesh_to_face_first_idx_clipped,
+            num_faces_per_mesh=num_faces_per_mesh_clipped,
+            faces_clipped_to_unclipped_idx=faces_clipped_to_unclipped_idx,
+        )
+
+    ################# End Case 1 ##########################################
+
+    ################# Start Case 3 ########################################
+
+    # Case 3: exactly two vertices are behind the camera, clipping the triangle into a
+    # triangle.  In the diagram below, we clip the bottom part of the triangle, and add
+    # new vertices p4 and p5 by intersecting with the clipping plane.  The updated
+    # triangle is the triangle between p4, p1, p5
+    #
+    #                   p1  (unclipped vertex)
+    #                   /\
+    #                  /  \
+    #                 /  t \
+    # _____________p4/______\p5__________ clip_value
+    # xxxxxxxxxxxxxx/        \xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+    # xxxxxxxxxxxxx/____      \xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+    # xxxxxxxxxx p2 xxxx---____\p3 xxxxxxxxxxxxxxxxxxxxxxxxxxx
+    # xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+    faces_case3 = face_verts_unclipped[case3_unclipped_idx]
+
+    # index (0, 1, or 2) of the vertex in front of the clipping plane
+    p1_face_ind = torch.where(~faces_clipped_verts[case3_unclipped_idx])[1]
+
+    # Solve for the points p4, p5 that intersect the clipping plane
+    p, p_barycentric = _find_verts_intersecting_clipping_plane(
+        faces_case3, p1_face_ind, z_clip_value, perspective_correct
+    )
+
+    p1, _, _, p4, p5 = p
+    p1_barycentric, _, _, p4_barycentric, p5_barycentric = p_barycentric
+
+    # Store clipped triangle
+    case3_clipped_idx = faces_unclipped_to_clipped_idx[case3_unclipped_idx]
+    t_barycentric = torch.stack((p4_barycentric, p5_barycentric, p1_barycentric), 2)
+    face_verts_clipped[case3_clipped_idx] = torch.stack((p4, p5, p1), 1)
+    faces_clipped_to_unclipped_idx[case3_clipped_idx] = case3_unclipped_idx
+
+    ################# End Case 3 ##########################################
+
+    ################# Start Case 4 ########################################
+
+    # Case 4: exactly one vertex is behind the camera, clip the triangle into a
+    # quadrilateral.  In the diagram below, we clip the bottom part of the triangle,
+    # and add new vertices p4 and p5 by intersecting with the cliiping plane.  The
+    # unclipped region is a quadrilateral, which is split into two triangles:
+    #   t1: p4, p2, p5
+    #   t2: p5, p2, p3
+    #
+    #            p3_____________________p2
+    #              \               __--/
+    #               \    t2    __--   /
+    #                \     __--  t1  /
+    # ______________p5\__--_________/p4_________clip_value
+    # xxxxxxxxxxxxxxxxx\           /xxxxxxxxxxxxxxxxxx
+    # xxxxxxxxxxxxxxxxxx\         /xxxxxxxxxxxxxxxxxxx
+    # xxxxxxxxxxxxxxxxxxx\       /xxxxxxxxxxxxxxxxxxxx
+    # xxxxxxxxxxxxxxxxxxxx\     /xxxxxxxxxxxxxxxxxxxxx
+    # xxxxxxxxxxxxxxxxxxxxx\   /xxxxxxxxxxxxxxxxxxxxx
+    # xxxxxxxxxxxxxxxxxxxxxx\ /xxxxxxxxxxxxxxxxxxxxx
+    #                      p1 (clipped vertex)
+
+    faces_case4 = face_verts_unclipped[case4_unclipped_idx]
+
+    # index (0, 1, or 2) of the vertex behind the clipping plane
+    p1_face_ind = torch.where(faces_clipped_verts[case4_unclipped_idx])[1]
+
+    # Solve for the points p4, p5 that intersect the clipping plane
+    p, p_barycentric = _find_verts_intersecting_clipping_plane(
+        faces_case4, p1_face_ind, z_clip_value, perspective_correct
+    )
+    _, p2, p3, p4, p5 = p
+    _, p2_barycentric, p3_barycentric, p4_barycentric, p5_barycentric = p_barycentric
+
+    # Store clipped triangles
+    case4_clipped_idx = faces_unclipped_to_clipped_idx[case4_unclipped_idx]
+    face_verts_clipped[case4_clipped_idx] = torch.stack((p4, p2, p5), 1)
+    face_verts_clipped[case4_clipped_idx + 1] = torch.stack((p5, p2, p3), 1)
+    t1_barycentric = torch.stack((p4_barycentric, p2_barycentric, p5_barycentric), 2)
+    t2_barycentric = torch.stack((p5_barycentric, p2_barycentric, p3_barycentric), 2)
+    faces_clipped_to_unclipped_idx[case4_clipped_idx] = case4_unclipped_idx
+    faces_clipped_to_unclipped_idx[case4_clipped_idx + 1] = case4_unclipped_idx
+
+    ##################### End Case 4 #########################
+
+    # Triangles that were clipped (case 3 & case 4) will require conversion of
+    # barycentric coordinates from being in terms of the smaller clipped triangle to in terms
+    # of the original big triangle.  If there are T clipped triangles,
+    # barycentric_conversion is a (T, 3, 3) tensor, where barycentric_conversion[i, :, k]
+    # stores the barycentric weights in terms of the world coordinates of the original
+    # (big) triangle for the kth vertex in the clipped (small) triangle.  If our
+    # rasterizer then expresses some NDC coordinate in terms of barycentric
+    # world coordinates for the clipped (small) triangle as alpha_clipped[i,:],
+    #   alpha_unclipped[i, :] = barycentric_conversion[i, :, :]*alpha_clipped[i, :]
+    barycentric_conversion = torch.cat((t_barycentric, t1_barycentric, t2_barycentric))
+
+    # faces_clipped_to_conversion_idx is an (F_clipped,) shape tensor mapping each output
+    # face to the applicable row of barycentric_conversion (or set to -1 if conversion is
+    # not needed)
+    faces_to_convert_idx = torch.cat(
+        (case3_clipped_idx, case4_clipped_idx, case4_clipped_idx + 1), 0
+    )
+    barycentric_idx = torch.arange(
+        barycentric_conversion.shape[0], dtype=torch.int64, device=device
+    )
+    faces_clipped_to_conversion_idx = torch.full(
+        [F_clipped], -1, dtype=torch.int64, device=device
+    )
+    faces_clipped_to_conversion_idx[faces_to_convert_idx] = barycentric_idx
+
+    # clipped_faces_quadrilateral_ind is an (F_clipped) dim tensor
+    # For case 4 clipped triangles (where a big triangle is split in two smaller triangles),
+    # store the index of the neighboring clipped triangle.
+    # This will be needed because if the soft rasterizer includes both
+    # triangles in the list of top K nearest triangles, we
+    # should only use the one with the smaller distance.
+    clipped_faces_neighbor_idx = torch.full(
+        [F_clipped], -1, dtype=torch.int64, device=device
+    )
+    clipped_faces_neighbor_idx[case4_clipped_idx] = case4_clipped_idx + 1
+    clipped_faces_neighbor_idx[case4_clipped_idx + 1] = case4_clipped_idx
+
+    clipped_faces = ClippedFaces(
+        face_verts=face_verts_clipped,
+        mesh_to_face_first_idx=mesh_to_face_first_idx_clipped,
+        num_faces_per_mesh=num_faces_per_mesh_clipped,
+        faces_clipped_to_unclipped_idx=faces_clipped_to_unclipped_idx,
+        barycentric_conversion=barycentric_conversion,
+        faces_clipped_to_conversion_idx=faces_clipped_to_conversion_idx,
+        clipped_faces_neighbor_idx=clipped_faces_neighbor_idx,
+    )
+    return clipped_faces
+
+
+def convert_clipped_rasterization_to_original_faces(
+    pix_to_face_clipped, bary_coords_clipped, clipped_faces: ClippedFaces
+) -> Tuple[torch.Tensor, torch.Tensor]:
+    """
+    Convert rasterization Fragments (expressed as pix_to_face_clipped,
+    bary_coords_clipped, dists_clipped) of clipped Meshes computed using clip_faces()
+    to the corresponding rasterization Fragments where barycentric coordinates and
+    face indices are in terms of the original unclipped Meshes. The distances are
+    handled in the rasterizer C++/CUDA kernels (i.e. for Cases 1/3 the distance
+    can be used directly and for Case 4 triangles the distance of the pixel to
+    the closest of the two subdivided triangles is used).
+
+    Args:
+        pix_to_face_clipped: LongTensor of shape (N, image_size, image_size,
+            faces_per_pixel) giving the indices of the nearest faces at each pixel,
+            sorted in ascending z-order. Concretely
+            ``pix_to_face_clipped[n, y, x, k] = f`` means that ``faces_verts_clipped[f]``
+            is the kth closest face (in the z-direction) to pixel (y, x). Pixels that
+            are hit by fewer than faces_per_pixel are padded with -1.
+        bary_coords_clipped: FloatTensor of shape
+            (N, image_size, image_size, faces_per_pixel, 3) giving the barycentric
+            coordinates in world coordinates of the nearest faces at each pixel, sorted
+            in ascending z-order.  Concretely, if ``pix_to_face_clipped[n, y, x, k] = f``
+            then ``[w0, w1, w2] = bary_coords_clipped[n, y, x, k]`` gives the
+            barycentric coords for pixel (y, x) relative to the face defined by
+            ``unproject(face_verts_clipped[f])``. Pixels hit by fewer than
+            faces_per_pixel are padded with -1.
+        clipped_faces: an instance of ClippedFaces class giving the auxillary variables
+            for converting rasterization outputs from clipped to unclipped Meshes.
+
+    Returns:
+        3-tuple: (pix_to_face_unclipped, bary_coords_unclipped, dists_unclipped) that
+        have the same definition as (pix_to_face_clipped, bary_coords_clipped,
+        dists_clipped) except that they pertain to faces_verts_unclipped instead of
+        faces_verts_clipped (i.e the original meshes as opposed to the modified meshes)
+    """
+    faces_clipped_to_unclipped_idx = clipped_faces.faces_clipped_to_unclipped_idx
+
+    # If no clipping then return inputs
+    if (
+        faces_clipped_to_unclipped_idx is None
+        or faces_clipped_to_unclipped_idx.numel() == 0
+    ):
+        return pix_to_face_clipped, bary_coords_clipped
+
+    device = pix_to_face_clipped.device
+
+    # Convert pix_to_face indices to now refer to the faces in the unclipped Meshes.
+    # Init empty tensor to fill in all the background values which have pix_to_face=-1.
+    empty = torch.full(pix_to_face_clipped.shape, -1, device=device, dtype=torch.int64)
+    pix_to_face_unclipped = torch.where(
+        pix_to_face_clipped != -1,
+        faces_clipped_to_unclipped_idx[pix_to_face_clipped],
+        empty,
+    )
+
+    # For triangles that were clipped into smaller triangle(s), convert barycentric
+    # coordinates from being in terms of the clipped triangle to being in terms of the
+    # original unclipped triangle.
+
+    # barycentric_conversion is a (T, 3, 3) tensor such that
+    # alpha_unclipped[i, :] = barycentric_conversion[i, :, :]*alpha_clipped[i, :]
+    barycentric_conversion = clipped_faces.barycentric_conversion
+
+    # faces_clipped_to_conversion_idx is an (F_clipped,) shape tensor mapping each output
+    # face to the applicable row of barycentric_conversion (or set to -1 if conversion is
+    # not needed)
+    faces_clipped_to_conversion_idx = clipped_faces.faces_clipped_to_conversion_idx
+
+    if barycentric_conversion is not None:
+        bary_coords_unclipped = bary_coords_clipped.clone()
+
+        # Select the subset of faces that require conversion, where N is the sum
+        # number of case3/case4 triangles that are in the closest k triangles to some
+        # rasterized pixel.
+        pix_to_conversion_idx = torch.where(
+            pix_to_face_clipped != -1,
+            faces_clipped_to_conversion_idx[pix_to_face_clipped],
+            empty,
+        )
+        faces_to_convert_mask = pix_to_conversion_idx != -1
+        N = faces_to_convert_mask.sum().item()
+
+        # Expand to (N, H, W, K, 3) to be the same shape as barycentric coordinates
+        faces_to_convert_mask_expanded = faces_to_convert_mask[:, :, :, :, None].expand(
+            -1, -1, -1, -1, 3
+        )
+
+        # An (N,) dim tensor of indices into barycentric_conversion
+        conversion_idx_subset = pix_to_conversion_idx[faces_to_convert_mask]
+
+        # An (N, 3, 1) tensor of barycentric coordinates in terms of the clipped triangles
+        bary_coords_clipped_subset = bary_coords_clipped[faces_to_convert_mask_expanded]
+        bary_coords_clipped_subset = bary_coords_clipped_subset.reshape((N, 3, 1))
+
+        # An (N, 3, 3) tensor storing matrices to convert from clipped to unclipped
+        # barycentric coordinates
+        bary_conversion_subset = barycentric_conversion[conversion_idx_subset]
+
+        # An (N, 3, 1) tensor of barycentric coordinates in terms of the unclipped triangle
+        bary_coords_unclipped_subset = bary_conversion_subset.bmm(
+            bary_coords_clipped_subset
+        )
+
+        bary_coords_unclipped_subset = bary_coords_unclipped_subset.reshape([N * 3])
+        bary_coords_unclipped[
+            faces_to_convert_mask_expanded
+        ] = bary_coords_unclipped_subset
+
+        # dists for case 4 faces will be handled in the rasterizer
+        # so no need to modify them here.
+    else:
+        bary_coords_unclipped = bary_coords_clipped
+
+    return pix_to_face_unclipped, bary_coords_unclipped
diff --git a/pytorch3d/pytorch3d/renderer/mesh/rasterize_meshes.py b/pytorch3d/pytorch3d/renderer/mesh/rasterize_meshes.py
new file mode 100644
index 0000000000000000000000000000000000000000..afcd7496253ced111584ef11a34b750f7f1b3840
--- /dev/null
+++ b/pytorch3d/pytorch3d/renderer/mesh/rasterize_meshes.py
@@ -0,0 +1,763 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+from typing import List, Optional, Tuple, Union
+
+import numpy as np
+import torch
+from pytorch3d import _C
+
+from ..utils import parse_image_size
+
+from .clip import (
+    clip_faces,
+    ClipFrustum,
+    convert_clipped_rasterization_to_original_faces,
+)
+
+
+# TODO make the epsilon user configurable
+kEpsilon = 1e-8
+
+# Maximum number of faces per bins for
+# coarse-to-fine rasterization
+kMaxFacesPerBin = 22
+
+
+def rasterize_meshes(
+    meshes,
+    image_size: Union[int, List[int], Tuple[int, int]] = 256,
+    blur_radius: float = 0.0,
+    faces_per_pixel: int = 8,
+    bin_size: Optional[int] = None,
+    max_faces_per_bin: Optional[int] = None,
+    perspective_correct: bool = False,
+    clip_barycentric_coords: bool = False,
+    cull_backfaces: bool = False,
+    z_clip_value: Optional[float] = None,
+    cull_to_frustum: bool = False,
+):
+    """
+    Rasterize a batch of meshes given the shape of the desired output image.
+    Each mesh is rasterized onto a separate image of shape
+    (H, W) if `image_size` is a tuple or (image_size, image_size) if it
+    is an int.
+
+    If the desired image size is non square (i.e. a tuple of (H, W) where H != W)
+    the aspect ratio needs special consideration. There are two aspect ratios
+    to be aware of:
+        - the aspect ratio of each pixel
+        - the aspect ratio of the output image
+    The camera can be used to set the pixel aspect ratio. In the rasterizer,
+    we assume square pixels, but variable image aspect ratio (i.e rectangle images).
+
+    In most cases you will want to set the camera aspect ratio to
+    1.0 (i.e. square pixels) and only vary the
+    `image_size` (i.e. the output image dimensions in pixels).
+
+    Args:
+        meshes: A Meshes object representing a batch of meshes, batch size N.
+        image_size: Size in pixels of the output image to be rasterized.
+            Can optionally be a tuple of (H, W) in the case of non square images.
+        blur_radius: Float distance in the range [0, 2] used to expand the face
+            bounding boxes for rasterization. Setting blur radius
+            results in blurred edges around the shape instead of a
+            hard boundary. Set to 0 for no blur.
+        faces_per_pixel (Optional): Number of faces to save per pixel, returning
+            the nearest faces_per_pixel points along the z-axis.
+        bin_size: Size of bins to use for coarse-to-fine rasterization. Setting
+            bin_size=0 uses naive rasterization; setting bin_size=None attempts to
+            set it heuristically based on the shape of the input. This should not
+            affect the output, but can affect the speed of the forward pass.
+        max_faces_per_bin: Only applicable when using coarse-to-fine rasterization
+            (bin_size > 0); this is the maximum number of faces allowed within each
+            bin. This should not affect the output values, but can affect
+            the memory usage in the forward pass.
+        perspective_correct: Bool, Whether to apply perspective correction when computing
+            barycentric coordinates for pixels. This should be set to True if a perspective
+            camera is used.
+        clip_barycentric_coords: Whether, after any perspective correction is applied
+            but before the depth is calculated (e.g. for z clipping),
+            to "correct" a location outside the face (i.e. with a negative
+            barycentric coordinate) to a position on the edge of the face.
+        cull_backfaces: Bool, Whether to only rasterize mesh faces which are
+            visible to the camera.  This assumes that vertices of
+            front-facing triangles are ordered in an anti-clockwise
+            fashion, and triangles that face away from the camera are
+            in a clockwise order relative to the current view
+            direction. NOTE: This will only work if the mesh faces are
+            consistently defined with counter-clockwise ordering when
+            viewed from the outside.
+        z_clip_value: if not None, then triangles will be clipped (and possibly
+            subdivided into smaller triangles) such that z >= z_clip_value.
+            This avoids camera projections that go to infinity as z->0.
+            Default is None as clipping affects rasterization speed and
+            should only be turned on if explicitly needed.
+            See clip.py for all the extra computation that is required.
+        cull_to_frustum: if True, triangles outside the view frustum will be culled.
+            Culling involves removing all faces which fall outside view frustum.
+            Default is False so that it is turned on only when needed.
+
+    Returns:
+        4-element tuple containing
+
+        - **pix_to_face**: LongTensor of shape
+          (N, image_size, image_size, faces_per_pixel)
+          giving the indices of the nearest faces at each pixel,
+          sorted in ascending z-order.
+          Concretely ``pix_to_face[n, y, x, k] = f`` means that
+          ``faces_verts[f]`` is the kth closest face (in the z-direction)
+          to pixel (y, x). Pixels that are hit by fewer than
+          faces_per_pixel are padded with -1.
+        - **zbuf**: FloatTensor of shape (N, image_size, image_size, faces_per_pixel)
+          giving the NDC z-coordinates of the nearest faces at each pixel,
+          sorted in ascending z-order.
+          Concretely, if ``pix_to_face[n, y, x, k] = f`` then
+          ``zbuf[n, y, x, k] = face_verts[f, 2]``. Pixels hit by fewer than
+          faces_per_pixel are padded with -1.
+        - **barycentric**: FloatTensor of shape
+          (N, image_size, image_size, faces_per_pixel, 3)
+          giving the barycentric coordinates in NDC units of the
+          nearest faces at each pixel, sorted in ascending z-order.
+          Concretely, if ``pix_to_face[n, y, x, k] = f`` then
+          ``[w0, w1, w2] = barycentric[n, y, x, k]`` gives
+          the barycentric coords for pixel (y, x) relative to the face
+          defined by ``face_verts[f]``. Pixels hit by fewer than
+          faces_per_pixel are padded with -1.
+        - **pix_dists**: FloatTensor of shape
+          (N, image_size, image_size, faces_per_pixel)
+          giving the signed Euclidean distance (in NDC units) in the
+          x/y plane of each point closest to the pixel. Concretely if
+          ``pix_to_face[n, y, x, k] = f`` then ``pix_dists[n, y, x, k]`` is the
+          squared distance between the pixel (y, x) and the face given
+          by vertices ``face_verts[f]``. Pixels hit with fewer than
+          ``faces_per_pixel`` are padded with -1.
+
+        In the case that image_size is a tuple of (H, W) then the outputs
+        will be of shape `(N, H, W, ...)`.
+    """
+    verts_packed = meshes.verts_packed()
+    faces_packed = meshes.faces_packed()
+    face_verts = verts_packed[faces_packed]
+    mesh_to_face_first_idx = meshes.mesh_to_faces_packed_first_idx()
+    num_faces_per_mesh = meshes.num_faces_per_mesh()
+
+    # In the case that H != W use the max image size to set the bin_size
+    # to accommodate the num bins constraint in the coarse rasterizer.
+    # If the ratio of H:W is large this might cause issues as the smaller
+    # dimension will have fewer bins.
+    # TODO: consider a better way of setting the bin size.
+    im_size = parse_image_size(image_size)
+    max_image_size = max(*im_size)
+
+    clipped_faces_neighbor_idx = None
+
+    if z_clip_value is not None or cull_to_frustum:
+        # Cull faces outside the view frustum, and clip faces that are partially
+        # behind the camera into the portion of the triangle in front of the
+        # camera.  This may change the number of faces
+        frustum = ClipFrustum(
+            left=-1,
+            right=1,
+            top=-1,
+            bottom=1,
+            perspective_correct=perspective_correct,
+            z_clip_value=z_clip_value,
+            cull=cull_to_frustum,
+        )
+        clipped_faces = clip_faces(
+            face_verts, mesh_to_face_first_idx, num_faces_per_mesh, frustum=frustum
+        )
+        face_verts = clipped_faces.face_verts
+        mesh_to_face_first_idx = clipped_faces.mesh_to_face_first_idx
+        num_faces_per_mesh = clipped_faces.num_faces_per_mesh
+
+        # For case 4 clipped triangles (where a big triangle is split in two smaller triangles),
+        # need the index of the neighboring clipped triangle as only one can be in
+        # in the top K closest faces in the rasterization step.
+        clipped_faces_neighbor_idx = clipped_faces.clipped_faces_neighbor_idx
+
+    if clipped_faces_neighbor_idx is None:
+        # Set to the default which is all -1s.
+        clipped_faces_neighbor_idx = torch.full(
+            size=(face_verts.shape[0],),
+            fill_value=-1,
+            device=meshes.device,
+            dtype=torch.int64,
+        )
+
+    # TODO: Choose naive vs coarse-to-fine based on mesh size and image size.
+    if bin_size is None:
+        if not verts_packed.is_cuda:
+            # Binned CPU rasterization is not supported.
+            bin_size = 0
+        else:
+            # TODO better heuristics for bin size.
+            if max_image_size <= 64:
+                bin_size = 8
+            else:
+                # Heuristic based formula maps max_image_size -> bin_size as follows:
+                # max_image_size < 64 -> 8
+                # 16 < max_image_size < 256 -> 16
+                # 256 < max_image_size < 512 -> 32
+                # 512 < max_image_size < 1024 -> 64
+                # 1024 < max_image_size < 2048 -> 128
+                bin_size = int(2 ** max(np.ceil(np.log2(max_image_size)) - 4, 4))
+
+    if bin_size != 0:
+        # There is a limit on the number of faces per bin in the cuda kernel.
+        faces_per_bin = 1 + (max_image_size - 1) // bin_size
+        if faces_per_bin >= kMaxFacesPerBin:
+            raise ValueError(
+                "bin_size too small, number of faces per bin must be less than %d; got %d"
+                % (kMaxFacesPerBin, faces_per_bin)
+            )
+
+    if max_faces_per_bin is None:
+        max_faces_per_bin = int(max(10000, meshes._F / 5))
+
+    pix_to_face, zbuf, barycentric_coords, dists = _RasterizeFaceVerts.apply(
+        face_verts,
+        mesh_to_face_first_idx,
+        num_faces_per_mesh,
+        clipped_faces_neighbor_idx,
+        im_size,
+        blur_radius,
+        faces_per_pixel,
+        bin_size,
+        max_faces_per_bin,
+        perspective_correct,
+        clip_barycentric_coords,
+        cull_backfaces,
+    )
+
+    if z_clip_value is not None or cull_to_frustum:
+        # If faces were clipped, map the rasterization result to be in terms of the
+        # original unclipped faces.  This may involve converting barycentric
+        # coordinates
+        outputs = convert_clipped_rasterization_to_original_faces(
+            pix_to_face,
+            barycentric_coords,
+            # pyre-fixme[61]: `clipped_faces` may not be initialized here.
+            clipped_faces,
+        )
+        pix_to_face, barycentric_coords = outputs
+
+    return pix_to_face, zbuf, barycentric_coords, dists
+
+
+class _RasterizeFaceVerts(torch.autograd.Function):
+    """
+    Torch autograd wrapper for forward and backward pass of rasterize_meshes
+    implemented in C++/CUDA.
+
+    Args:
+        face_verts: Tensor of shape (F, 3, 3) giving (packed) vertex positions
+            for faces in all the meshes in the batch. Concretely,
+            face_verts[f, i] = [x, y, z] gives the coordinates for the
+            ith vertex of the fth face. These vertices are expected to
+            be in NDC coordinates in the range [-1, 1].
+        mesh_to_face_first_idx: LongTensor of shape (N) giving the index in
+            faces_verts of the first face in each mesh in
+            the batch.
+        num_faces_per_mesh: LongTensor of shape (N) giving the number of faces
+            for each mesh in the batch.
+        image_size, blur_radius, faces_per_pixel: same as rasterize_meshes.
+        perspective_correct: same as rasterize_meshes.
+        cull_backfaces: same as rasterize_meshes.
+
+    Returns:
+        same as rasterize_meshes function.
+    """
+
+    @staticmethod
+    # pyre-fixme[14]: `forward` overrides method defined in `Function` inconsistently.
+    def forward(
+        ctx,
+        face_verts: torch.Tensor,
+        mesh_to_face_first_idx: torch.Tensor,
+        num_faces_per_mesh: torch.Tensor,
+        clipped_faces_neighbor_idx: torch.Tensor,
+        image_size: Union[List[int], Tuple[int, int]] = (256, 256),
+        blur_radius: float = 0.01,
+        faces_per_pixel: int = 0,
+        bin_size: int = 0,
+        max_faces_per_bin: int = 0,
+        perspective_correct: bool = False,
+        clip_barycentric_coords: bool = False,
+        cull_backfaces: bool = False,
+        z_clip_value: Optional[float] = None,
+        cull_to_frustum: bool = True,
+    ):
+        # pyre-fixme[16]: Module `pytorch3d` has no attribute `_C`.
+        pix_to_face, zbuf, barycentric_coords, dists = _C.rasterize_meshes(
+            face_verts,
+            mesh_to_face_first_idx,
+            num_faces_per_mesh,
+            clipped_faces_neighbor_idx,
+            image_size,
+            blur_radius,
+            faces_per_pixel,
+            bin_size,
+            max_faces_per_bin,
+            perspective_correct,
+            clip_barycentric_coords,
+            cull_backfaces,
+        )
+
+        ctx.save_for_backward(face_verts, pix_to_face)
+        ctx.mark_non_differentiable(pix_to_face)
+        ctx.perspective_correct = perspective_correct
+        ctx.clip_barycentric_coords = clip_barycentric_coords
+        return pix_to_face, zbuf, barycentric_coords, dists
+
+    @staticmethod
+    def backward(ctx, grad_pix_to_face, grad_zbuf, grad_barycentric_coords, grad_dists):
+        grad_face_verts = None
+        grad_mesh_to_face_first_idx = None
+        grad_num_faces_per_mesh = None
+        grad_clipped_faces_neighbor_idx = None
+        grad_image_size = None
+        grad_radius = None
+        grad_faces_per_pixel = None
+        grad_bin_size = None
+        grad_max_faces_per_bin = None
+        grad_perspective_correct = None
+        grad_clip_barycentric_coords = None
+        grad_cull_backfaces = None
+        face_verts, pix_to_face = ctx.saved_tensors
+        grad_face_verts = _C.rasterize_meshes_backward(
+            face_verts,
+            pix_to_face,
+            grad_zbuf,
+            grad_barycentric_coords,
+            grad_dists,
+            ctx.perspective_correct,
+            ctx.clip_barycentric_coords,
+        )
+        grads = (
+            grad_face_verts,
+            grad_mesh_to_face_first_idx,
+            grad_num_faces_per_mesh,
+            grad_clipped_faces_neighbor_idx,
+            grad_image_size,
+            grad_radius,
+            grad_faces_per_pixel,
+            grad_bin_size,
+            grad_max_faces_per_bin,
+            grad_perspective_correct,
+            grad_clip_barycentric_coords,
+            grad_cull_backfaces,
+        )
+        return grads
+
+
+def non_square_ndc_range(S1, S2):
+    """
+    In the case of non square images, we scale the NDC range
+    to maintain the aspect ratio. The smaller dimension has NDC
+    range of 2.0.
+
+    Args:
+        S1: dimension along with the NDC range is needed
+        S2: the other image dimension
+
+    Returns:
+        ndc_range: NDC range for dimension S1
+    """
+    ndc_range = 2.0
+    if S1 > S2:
+        ndc_range = (S1 / S2) * ndc_range
+    return ndc_range
+
+
+def pix_to_non_square_ndc(i, S1, S2):
+    """
+    The default value of the NDC range is [-1, 1].
+    However in the case of non square images, we scale the NDC range
+    to maintain the aspect ratio. The smaller dimension has NDC
+    range from [-1, 1] and the other dimension is scaled by
+    the ratio of H:W.
+    e.g. for image size (H, W) = (64, 128)
+       Height NDC range: [-1, 1]
+       Width NDC range: [-2, 2]
+
+    Args:
+        i: pixel position on axes S1
+        S1: dimension along with i is given
+        S2: the other image dimension
+
+    Returns:
+        pixel: NDC coordinate of point i for dimension S1
+    """
+    # NDC: x-offset + (i * pixel_width + half_pixel_width)
+    ndc_range = non_square_ndc_range(S1, S2)
+    offset = ndc_range / 2.0
+    return -offset + (ndc_range * i + offset) / S1
+
+
+def rasterize_meshes_python(  # noqa: C901
+    meshes,
+    image_size: Union[int, Tuple[int, int]] = 256,
+    blur_radius: float = 0.0,
+    faces_per_pixel: int = 8,
+    perspective_correct: bool = False,
+    clip_barycentric_coords: bool = False,
+    cull_backfaces: bool = False,
+    z_clip_value: Optional[float] = None,
+    cull_to_frustum: bool = True,
+    clipped_faces_neighbor_idx: Optional[torch.Tensor] = None,
+):
+    """
+    Naive PyTorch implementation of mesh rasterization with the same inputs and
+    outputs as the rasterize_meshes function.
+
+    This function is not optimized and is implemented as a comparison for the
+    C++/CUDA implementations.
+    """
+    N = len(meshes)
+    H, W = image_size if isinstance(image_size, tuple) else (image_size, image_size)
+
+    K = faces_per_pixel
+    device = meshes.device
+
+    verts_packed = meshes.verts_packed()
+    faces_packed = meshes.faces_packed()
+    faces_verts = verts_packed[faces_packed]
+    mesh_to_face_first_idx = meshes.mesh_to_faces_packed_first_idx()
+    num_faces_per_mesh = meshes.num_faces_per_mesh()
+
+    if z_clip_value is not None or cull_to_frustum:
+        # Cull faces outside the view frustum, and clip faces that are partially
+        # behind the camera into the portion of the triangle in front of the
+        # camera.  This may change the number of faces
+        frustum = ClipFrustum(
+            left=-1,
+            right=1,
+            top=-1,
+            bottom=1,
+            perspective_correct=perspective_correct,
+            z_clip_value=z_clip_value,
+            cull=cull_to_frustum,
+        )
+        clipped_faces = clip_faces(
+            faces_verts, mesh_to_face_first_idx, num_faces_per_mesh, frustum=frustum
+        )
+        faces_verts = clipped_faces.face_verts
+        mesh_to_face_first_idx = clipped_faces.mesh_to_face_first_idx
+        num_faces_per_mesh = clipped_faces.num_faces_per_mesh
+
+    # Initialize output tensors.
+    face_idxs = torch.full(
+        (N, H, W, K), fill_value=-1, dtype=torch.int64, device=device
+    )
+    zbuf = torch.full((N, H, W, K), fill_value=-1, dtype=torch.float32, device=device)
+    bary_coords = torch.full(
+        (N, H, W, K, 3), fill_value=-1, dtype=torch.float32, device=device
+    )
+    pix_dists = torch.full(
+        (N, H, W, K), fill_value=-1, dtype=torch.float32, device=device
+    )
+
+    # Calculate all face bounding boxes.
+    x_mins = torch.min(faces_verts[:, :, 0], dim=1, keepdim=True).values
+    x_maxs = torch.max(faces_verts[:, :, 0], dim=1, keepdim=True).values
+    y_mins = torch.min(faces_verts[:, :, 1], dim=1, keepdim=True).values
+    y_maxs = torch.max(faces_verts[:, :, 1], dim=1, keepdim=True).values
+    z_mins = torch.min(faces_verts[:, :, 2], dim=1, keepdim=True).values
+
+    # Expand by blur radius.
+    x_mins = x_mins - np.sqrt(blur_radius) - kEpsilon
+    x_maxs = x_maxs + np.sqrt(blur_radius) + kEpsilon
+    y_mins = y_mins - np.sqrt(blur_radius) - kEpsilon
+    y_maxs = y_maxs + np.sqrt(blur_radius) + kEpsilon
+
+    # Loop through meshes in the batch.
+    for n in range(N):
+        face_start_idx = mesh_to_face_first_idx[n]
+        face_stop_idx = face_start_idx + num_faces_per_mesh[n]
+
+        # Iterate through the horizontal lines of the image from top to bottom.
+        for yi in range(H):
+            # Y coordinate of one end of the image. Reverse the ordering
+            # of yi so that +Y is pointing up in the image.
+            yfix = H - 1 - yi
+            yf = pix_to_non_square_ndc(yfix, H, W)
+
+            # Iterate through pixels on this horizontal line, left to right.
+            for xi in range(W):
+                # X coordinate of one end of the image. Reverse the ordering
+                # of xi so that +X is pointing to the left in the image.
+                xfix = W - 1 - xi
+                xf = pix_to_non_square_ndc(xfix, W, H)
+                top_k_points = []
+
+                # Check whether each face in the mesh affects this pixel.
+                for f in range(face_start_idx, face_stop_idx):
+                    face = faces_verts[f].squeeze()
+                    v0, v1, v2 = face.unbind(0)
+
+                    face_area = edge_function(v0, v1, v2)
+
+                    # Ignore triangles facing away from the camera.
+                    back_face = face_area < 0
+                    if cull_backfaces and back_face:
+                        continue
+
+                    # Ignore faces which have zero area.
+                    if face_area == 0.0:
+                        continue
+
+                    outside_bbox = (
+                        xf < x_mins[f]
+                        or xf > x_maxs[f]
+                        or yf < y_mins[f]
+                        or yf > y_maxs[f]
+                    )
+
+                    # Faces with at least one vertex behind the camera won't
+                    # render correctly and should be removed or clipped before
+                    # calling the rasterizer
+                    if z_mins[f] < kEpsilon:
+                        continue
+
+                    # Check if pixel is outside of face bbox.
+                    if outside_bbox:
+                        continue
+
+                    # Compute barycentric coordinates and pixel z distance.
+                    pxy = torch.tensor([xf, yf], dtype=torch.float32, device=device)
+
+                    bary = barycentric_coordinates(pxy, v0[:2], v1[:2], v2[:2])
+                    if perspective_correct:
+                        z0, z1, z2 = v0[2], v1[2], v2[2]
+                        l0, l1, l2 = bary[0], bary[1], bary[2]
+                        top0 = l0 * z1 * z2
+                        top1 = z0 * l1 * z2
+                        top2 = z0 * z1 * l2
+                        bot = top0 + top1 + top2
+                        bary = torch.stack([top0 / bot, top1 / bot, top2 / bot])
+
+                    # Check if inside before clipping
+                    inside = all(x > 0.0 for x in bary)
+
+                    # Barycentric clipping
+                    if clip_barycentric_coords:
+                        bary = barycentric_coordinates_clip(bary)
+                    # use clipped barycentric coords to calculate the z value
+                    pz = bary[0] * v0[2] + bary[1] * v1[2] + bary[2] * v2[2]
+
+                    # Check if point is behind the image.
+                    if pz < 0:
+                        continue
+
+                    # Calculate signed 2D distance from point to face.
+                    # Points inside the triangle have negative distance.
+                    dist = point_triangle_distance(pxy, v0[:2], v1[:2], v2[:2])
+
+                    # Add an epsilon to prevent errors when comparing distance
+                    # to blur radius.
+                    if not inside and dist >= blur_radius:
+                        continue
+
+                    # Handle the case where a face (f) partially behind the image plane is
+                    # clipped to a quadrilateral and then split into two faces (t1, t2).
+                    top_k_idx = -1
+                    if (
+                        clipped_faces_neighbor_idx is not None
+                        and clipped_faces_neighbor_idx[f] != -1
+                    ):
+                        neighbor_idx = clipped_faces_neighbor_idx[f]
+                        # See if neighbor_idx is in top_k and find index
+                        top_k_idx = [
+                            i
+                            for i, val in enumerate(top_k_points)
+                            if val[1] == neighbor_idx
+                        ]
+                        top_k_idx = top_k_idx[0] if len(top_k_idx) > 0 else -1
+
+                    if top_k_idx != -1 and dist < top_k_points[top_k_idx][3]:
+                        # Overwrite the neighbor with current face info
+                        top_k_points[top_k_idx] = (pz, f, bary, dist, inside)
+                    else:
+                        # Handle as a normal face
+                        top_k_points.append((pz, f, bary, dist, inside))
+
+                    top_k_points.sort()
+                    if len(top_k_points) > K:
+                        top_k_points = top_k_points[:K]
+
+                # Save to output tensors.
+                for k, (pz, f, bary, dist, inside) in enumerate(top_k_points):
+                    zbuf[n, yi, xi, k] = pz
+                    face_idxs[n, yi, xi, k] = f
+                    bary_coords[n, yi, xi, k, 0] = bary[0]
+                    bary_coords[n, yi, xi, k, 1] = bary[1]
+                    bary_coords[n, yi, xi, k, 2] = bary[2]
+                    # Write the signed distance
+                    pix_dists[n, yi, xi, k] = -dist if inside else dist
+
+    if z_clip_value is not None or cull_to_frustum:
+        # If faces were clipped, map the rasterization result to be in terms of the
+        # original unclipped faces.  This may involve converting barycentric
+        # coordinates
+        (face_idxs, bary_coords,) = convert_clipped_rasterization_to_original_faces(
+            face_idxs,
+            bary_coords,
+            # pyre-fixme[61]: `clipped_faces` may not be initialized here.
+            clipped_faces,
+        )
+
+    return face_idxs, zbuf, bary_coords, pix_dists
+
+
+def edge_function(p, v0, v1):
+    r"""
+    Determines whether a point p is on the right side of a 2D line segment
+    given by the end points v0, v1.
+
+    Args:
+        p: (x, y) Coordinates of a point.
+        v0, v1: (x, y) Coordinates of the end points of the edge.
+
+    Returns:
+        area: The signed area of the parallelogram given by the vectors
+
+              .. code-block:: python
+
+                  B = p - v0
+                  A = v1 - v0
+
+                        v1 ________
+                          /\      /
+                      A  /  \    /
+                        /    \  /
+                    v0 /______\/
+                          B    p
+
+             The area can also be interpreted as the cross product A x B.
+             If the sign of the area is positive, the point p is on the
+             right side of the edge. Negative area indicates the point is on
+             the left side of the edge. i.e. for an edge v1 - v0
+
+             .. code-block:: python
+
+                             v1
+                            /
+                           /
+                    -     /    +
+                         /
+                        /
+                      v0
+    """
+    return (p[0] - v0[0]) * (v1[1] - v0[1]) - (p[1] - v0[1]) * (v1[0] - v0[0])
+
+
+def barycentric_coordinates_clip(bary):
+    """
+    Clip negative barycentric coordinates to 0.0 and renormalize so
+    the barycentric coordinates for a point sum to 1. When the blur_radius
+    is greater than 0, a face will still be recorded as overlapping a pixel
+    if the pixel is outside the face. In this case at least one of the
+    barycentric coordinates for the pixel relative to the face will be negative.
+    Clipping will ensure that the texture and z buffer are interpolated correctly.
+
+    Args:
+        bary: tuple of barycentric coordinates
+
+    Returns
+        bary_clip: (w0, w1, w2) barycentric coordinates with no negative values.
+    """
+    # Only negative values are clamped to 0.0.
+    w0_clip = torch.clamp(bary[0], min=0.0)
+    w1_clip = torch.clamp(bary[1], min=0.0)
+    w2_clip = torch.clamp(bary[2], min=0.0)
+    bary_sum = torch.clamp(w0_clip + w1_clip + w2_clip, min=1e-5)
+    w0_clip = w0_clip / bary_sum
+    w1_clip = w1_clip / bary_sum
+    w2_clip = w2_clip / bary_sum
+
+    return (w0_clip, w1_clip, w2_clip)
+
+
+def barycentric_coordinates(p, v0, v1, v2):
+    """
+    Compute the barycentric coordinates of a point relative to a triangle.
+
+    Args:
+        p: Coordinates of a point.
+        v0, v1, v2: Coordinates of the triangle vertices.
+
+    Returns
+        bary: (w0, w1, w2) barycentric coordinates in the range [0, 1].
+    """
+    area = edge_function(v2, v0, v1) + kEpsilon  # 2 x face area.
+    w0 = edge_function(p, v1, v2) / area
+    w1 = edge_function(p, v2, v0) / area
+    w2 = edge_function(p, v0, v1) / area
+    return (w0, w1, w2)
+
+
+def point_line_distance(p, v0, v1):
+    """
+    Return minimum distance between line segment (v1 - v0) and point p.
+
+    Args:
+        p: Coordinates of a point.
+        v0, v1: Coordinates of the end points of the line segment.
+
+    Returns:
+        non-square distance to the boundary of the triangle.
+
+    Consider the line extending the segment - this can be parameterized as
+    ``v0 + t (v1 - v0)``.
+
+    First find the projection of point p onto the line. It falls where
+    ``t = [(p - v0) . (v1 - v0)] / |v1 - v0|^2``
+    where . is the dot product.
+
+    The parameter t is clamped from [0, 1] to handle points outside the
+    segment (v1 - v0).
+
+    Once the projection of the point on the segment is known, the distance from
+    p to the projection gives the minimum distance to the segment.
+    """
+    if p.shape != v0.shape != v1.shape:
+        raise ValueError("All points must have the same number of coordinates")
+
+    v1v0 = v1 - v0
+    l2 = v1v0.dot(v1v0)  # |v1 - v0|^2
+    if l2 <= kEpsilon:
+        return (p - v1).dot(p - v1)  # v0 == v1
+
+    t = v1v0.dot(p - v0) / l2
+    t = torch.clamp(t, min=0.0, max=1.0)
+    p_proj = v0 + t * v1v0
+    delta_p = p_proj - p
+    return delta_p.dot(delta_p)
+
+
+def point_triangle_distance(p, v0, v1, v2):
+    """
+    Return shortest distance between a point and a triangle.
+
+    Args:
+        p: Coordinates of a point.
+        v0, v1, v2: Coordinates of the three triangle vertices.
+
+    Returns:
+        shortest absolute distance from the point to the triangle.
+    """
+    if p.shape != v0.shape != v1.shape != v2.shape:
+        raise ValueError("All points must have the same number of coordinates")
+
+    e01_dist = point_line_distance(p, v0, v1)
+    e02_dist = point_line_distance(p, v0, v2)
+    e12_dist = point_line_distance(p, v1, v2)
+    edge_dists_min = torch.min(torch.min(e01_dist, e02_dist), e12_dist)
+
+    return edge_dists_min
diff --git a/pytorch3d/pytorch3d/renderer/mesh/rasterizer.py b/pytorch3d/pytorch3d/renderer/mesh/rasterizer.py
new file mode 100644
index 0000000000000000000000000000000000000000..5ef2760393bdebcf42f34a6bd3972cbafe2383f2
--- /dev/null
+++ b/pytorch3d/pytorch3d/renderer/mesh/rasterizer.py
@@ -0,0 +1,271 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from dataclasses import dataclass
+from typing import Optional, Tuple, Union
+
+import torch
+import torch.nn as nn
+from pytorch3d.renderer.cameras import try_get_projection_transform
+
+from .rasterize_meshes import rasterize_meshes
+
+
+@dataclass(frozen=True)
+class Fragments:
+    """
+    A dataclass representing the outputs of a rasterizer. Can be detached from the
+    computational graph in order to stop the gradients from flowing through the
+    rasterizer.
+
+    Members:
+        pix_to_face:
+            LongTensor of shape (N, image_size, image_size, faces_per_pixel) giving
+            the indices of the nearest faces at each pixel, sorted in ascending
+            z-order. Concretely ``pix_to_face[n, y, x, k] = f`` means that
+            ``faces_verts[f]`` is the kth closest face (in the z-direction) to pixel
+            (y, x). Pixels that are hit by fewer than faces_per_pixel are padded with
+            -1.
+
+        zbuf:
+            FloatTensor of shape (N, image_size, image_size, faces_per_pixel) giving
+            the NDC z-coordinates of the nearest faces at each pixel, sorted in
+            ascending z-order. Concretely, if ``pix_to_face[n, y, x, k] = f`` then
+            ``zbuf[n, y, x, k] = face_verts[f, 2]``. Pixels hit by fewer than
+            faces_per_pixel are padded with -1.
+
+        bary_coords:
+            FloatTensor of shape (N, image_size, image_size, faces_per_pixel, 3)
+            giving the barycentric coordinates in NDC units of the nearest faces at
+            each pixel, sorted in ascending z-order. Concretely, if ``pix_to_face[n,
+            y, x, k] = f`` then ``[w0, w1, w2] = barycentric[n, y, x, k]`` gives the
+            barycentric coords for pixel (y, x) relative to the face defined by
+            ``face_verts[f]``. Pixels hit by fewer than faces_per_pixel are padded
+            with -1.
+
+        dists:
+            FloatTensor of shape (N, image_size, image_size, faces_per_pixel) giving
+            the signed Euclidean distance (in NDC units) in the x/y plane of each
+            point closest to the pixel. Concretely if ``pix_to_face[n, y, x, k] = f``
+            then ``pix_dists[n, y, x, k]`` is the squared distance between the pixel
+            (y, x) and the face given by vertices ``face_verts[f]``. Pixels hit with
+            fewer than ``faces_per_pixel`` are padded with -1.
+    """
+
+    pix_to_face: torch.Tensor
+    zbuf: torch.Tensor
+    bary_coords: torch.Tensor
+    dists: Optional[torch.Tensor]
+
+    def detach(self) -> "Fragments":
+        return Fragments(
+            pix_to_face=self.pix_to_face,
+            zbuf=self.zbuf.detach(),
+            bary_coords=self.bary_coords.detach(),
+            dists=self.dists.detach() if self.dists is not None else self.dists,
+        )
+
+
+@dataclass
+class RasterizationSettings:
+    """
+    Class to store the mesh rasterization params with defaults
+
+    Members:
+        image_size: Either common height and width or (height, width), in pixels.
+        blur_radius: Float distance in the range [0, 2] used to expand the face
+            bounding boxes for rasterization. Setting blur radius
+            results in blurred edges around the shape instead of a
+            hard boundary. Set to 0 for no blur.
+        faces_per_pixel: (int) Number of faces to keep track of per pixel.
+            We return the nearest faces_per_pixel faces along the z-axis.
+        bin_size: Size of bins to use for coarse-to-fine rasterization. Setting
+            bin_size=0 uses naive rasterization; setting bin_size=None attempts
+            to set it heuristically based on the shape of the input. This should
+            not affect the output, but can affect the speed of the forward pass.
+        max_faces_opengl: Max number of faces in any mesh we will rasterize. Used only by
+            MeshRasterizerOpenGL to pre-allocate OpenGL memory.
+        max_faces_per_bin: Only applicable when using coarse-to-fine
+            rasterization (bin_size != 0); this is the maximum number of faces
+            allowed within each bin. This should not affect the output values,
+            but can affect the memory usage in the forward pass.
+            Setting max_faces_per_bin=None attempts to set with a heuristic.
+        perspective_correct: Whether to apply perspective correction when
+            computing barycentric coordinates for pixels.
+            None (default) means make correction if the camera uses perspective.
+        clip_barycentric_coords: Whether, after any perspective correction
+            is applied but before the depth is calculated (e.g. for
+            z clipping), to "correct" a location outside the face (i.e. with
+            a negative barycentric coordinate) to a position on the edge of the
+            face. None (default) means clip if blur_radius > 0, which is a condition
+            under which such outside-face-points are likely.
+        cull_backfaces: Whether to only rasterize mesh faces which are
+            visible to the camera.  This assumes that vertices of
+            front-facing triangles are ordered in an anti-clockwise
+            fashion, and triangles that face away from the camera are
+            in a clockwise order relative to the current view
+            direction. NOTE: This will only work if the mesh faces are
+            consistently defined with counter-clockwise ordering when
+            viewed from the outside.
+        z_clip_value: if not None, then triangles will be clipped (and possibly
+            subdivided into smaller triangles) such that z >= z_clip_value.
+            This avoids camera projections that go to infinity as z->0.
+            Default is None as clipping affects rasterization speed and
+            should only be turned on if explicitly needed.
+            See clip.py for all the extra computation that is required.
+        cull_to_frustum: Whether to cull triangles outside the view frustum.
+            Culling involves removing all faces which fall outside view frustum.
+            Default is False for performance as often not needed.
+    """
+
+    image_size: Union[int, Tuple[int, int]] = 256
+    blur_radius: float = 0.0
+    faces_per_pixel: int = 1
+    bin_size: Optional[int] = None
+    max_faces_opengl: int = 10_000_000
+    max_faces_per_bin: Optional[int] = None
+    perspective_correct: Optional[bool] = None
+    clip_barycentric_coords: Optional[bool] = None
+    cull_backfaces: bool = False
+    z_clip_value: Optional[float] = None
+    cull_to_frustum: bool = False
+
+
+class MeshRasterizer(nn.Module):
+    """
+    This class implements methods for rasterizing a batch of heterogeneous
+    Meshes.
+    """
+
+    def __init__(self, cameras=None, raster_settings=None) -> None:
+        """
+        Args:
+            cameras: A cameras object which has a  `transform_points` method
+                which returns the transformed points after applying the
+                world-to-view and view-to-ndc transformations.
+            raster_settings: the parameters for rasterization. This should be a
+                named tuple.
+
+        All these initial settings can be overridden by passing keyword
+        arguments to the forward function.
+        """
+        super().__init__()
+        if raster_settings is None:
+            raster_settings = RasterizationSettings()
+
+        self.cameras = cameras
+        self.raster_settings = raster_settings
+
+    def to(self, device):
+        # Manually move to device cameras as it is not a subclass of nn.Module
+        if self.cameras is not None:
+            self.cameras = self.cameras.to(device)
+        return self
+
+    def transform(self, meshes_world, **kwargs) -> torch.Tensor:
+        """
+        Args:
+            meshes_world: a Meshes object representing a batch of meshes with
+                vertex coordinates in world space.
+
+        Returns:
+            meshes_proj: a Meshes object with the vertex positions projected
+            in NDC space
+
+        NOTE: keeping this as a separate function for readability but it could
+        be moved into forward.
+        """
+        cameras = kwargs.get("cameras", self.cameras)
+        if cameras is None:
+            msg = "Cameras must be specified either at initialization \
+                or in the forward pass of MeshRasterizer"
+            raise ValueError(msg)
+
+        n_cameras = len(cameras)
+        if n_cameras != 1 and n_cameras != len(meshes_world):
+            msg = "Wrong number (%r) of cameras for %r meshes"
+            raise ValueError(msg % (n_cameras, len(meshes_world)))
+
+        verts_world = meshes_world.verts_padded()
+
+        # NOTE: Retaining view space z coordinate for now.
+        # TODO: Revisit whether or not to transform z coordinate to [-1, 1] or
+        # [0, 1] range.
+        eps = kwargs.get("eps", None)
+        verts_view = cameras.get_world_to_view_transform(**kwargs).transform_points(
+            verts_world, eps=eps
+        )
+        to_ndc_transform = cameras.get_ndc_camera_transform(**kwargs)
+        projection_transform = try_get_projection_transform(cameras, kwargs)
+        if projection_transform is not None:
+            projection_transform = projection_transform.compose(to_ndc_transform)
+            verts_ndc = projection_transform.transform_points(verts_view, eps=eps)
+        else:
+            # Call transform_points instead of explicitly composing transforms to handle
+            # the case, where camera class does not have a projection matrix form.
+            verts_proj = cameras.transform_points(verts_world, eps=eps)
+            verts_ndc = to_ndc_transform.transform_points(verts_proj, eps=eps)
+
+        verts_ndc[..., 2] = verts_view[..., 2]
+        meshes_ndc = meshes_world.update_padded(new_verts_padded=verts_ndc)
+        return meshes_ndc
+
+    def forward(self, meshes_world, **kwargs) -> Fragments:
+        """
+        Args:
+            meshes_world: a Meshes object representing a batch of meshes with
+                          coordinates in world space.
+        Returns:
+            Fragments: Rasterization outputs as a named tuple.
+        """
+        meshes_proj = self.transform(meshes_world, **kwargs)
+        raster_settings = kwargs.get("raster_settings", self.raster_settings)
+
+        # By default, turn on clip_barycentric_coords if blur_radius > 0.
+        # When blur_radius > 0, a face can be matched to a pixel that is outside the
+        # face, resulting in negative barycentric coordinates.
+        clip_barycentric_coords = raster_settings.clip_barycentric_coords
+        if clip_barycentric_coords is None:
+            clip_barycentric_coords = raster_settings.blur_radius > 0.0
+
+        # If not specified, infer perspective_correct and z_clip_value from the camera
+        cameras = kwargs.get("cameras", self.cameras)
+        if raster_settings.perspective_correct is not None:
+            perspective_correct = raster_settings.perspective_correct
+        else:
+            perspective_correct = cameras.is_perspective()
+        if raster_settings.z_clip_value is not None:
+            z_clip = raster_settings.z_clip_value
+        else:
+            znear = cameras.get_znear()
+            if isinstance(znear, torch.Tensor):
+                znear = znear.min().item()
+            z_clip = None if not perspective_correct or znear is None else znear / 2
+
+        # By default, turn on clip_barycentric_coords if blur_radius > 0.
+        # When blur_radius > 0, a face can be matched to a pixel that is outside the
+        # face, resulting in negative barycentric coordinates.
+
+        pix_to_face, zbuf, bary_coords, dists = rasterize_meshes(
+            meshes_proj,
+            image_size=raster_settings.image_size,
+            blur_radius=raster_settings.blur_radius,
+            faces_per_pixel=raster_settings.faces_per_pixel,
+            bin_size=raster_settings.bin_size,
+            max_faces_per_bin=raster_settings.max_faces_per_bin,
+            clip_barycentric_coords=clip_barycentric_coords,
+            perspective_correct=perspective_correct,
+            cull_backfaces=raster_settings.cull_backfaces,
+            z_clip_value=z_clip,
+            cull_to_frustum=raster_settings.cull_to_frustum,
+        )
+
+        return Fragments(
+            pix_to_face=pix_to_face,
+            zbuf=zbuf,
+            bary_coords=bary_coords,
+            dists=dists,
+        )
diff --git a/pytorch3d/pytorch3d/renderer/mesh/renderer.py b/pytorch3d/pytorch3d/renderer/mesh/renderer.py
new file mode 100644
index 0000000000000000000000000000000000000000..98576a9fcc6797090b493cfbd05e2234e79ba66b
--- /dev/null
+++ b/pytorch3d/pytorch3d/renderer/mesh/renderer.py
@@ -0,0 +1,110 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Tuple
+
+import torch
+import torch.nn as nn
+
+from ...structures.meshes import Meshes
+
+# A renderer class should be initialized with a
+# function for rasterization and a function for shading.
+# The rasterizer should:
+#     - transform inputs from world -> screen space
+#     - rasterize inputs
+#     - return fragments
+# The shader can take fragments as input along with any other properties of
+# the scene and generate images.
+
+# E.g. rasterize inputs and then shade
+#
+# fragments = self.rasterize(meshes)
+# images = self.shader(fragments, meshes)
+# return images
+
+
+class MeshRenderer(nn.Module):
+    """
+    A class for rendering a batch of heterogeneous meshes. The class should
+    be initialized with a rasterizer (a MeshRasterizer or a MeshRasterizerOpenGL)
+    and shader class which each have a forward function.
+    """
+
+    def __init__(self, rasterizer, shader) -> None:
+        super().__init__()
+        self.rasterizer = rasterizer
+        self.shader = shader
+
+    def to(self, device):
+        # Rasterizer and shader have submodules which are not of type nn.Module
+        self.rasterizer.to(device)
+        self.shader.to(device)
+        return self
+
+    def forward(self, meshes_world: Meshes, **kwargs) -> torch.Tensor:
+        """
+        Render a batch of images from a batch of meshes by rasterizing and then
+        shading.
+
+        NOTE: If the blur radius for rasterization is > 0.0, some pixels can
+        have one or more barycentric coordinates lying outside the range [0, 1].
+        For a pixel with out of bounds barycentric coordinates with respect to a
+        face f, clipping is required before interpolating the texture uv
+        coordinates and z buffer so that the colors and depths are limited to
+        the range for the corresponding face.
+        For this set rasterizer.raster_settings.clip_barycentric_coords=True
+        """
+        fragments = self.rasterizer(meshes_world, **kwargs)
+        images = self.shader(fragments, meshes_world, **kwargs)
+
+        return images
+
+
+class MeshRendererWithFragments(nn.Module):
+    """
+    A class for rendering a batch of heterogeneous meshes. The class should
+    be initialized with a rasterizer (a MeshRasterizer or a MeshRasterizerOpenGL)
+    and shader class which each have a forward function.
+
+    In the forward pass this class returns the `fragments` from which intermediate
+    values such as the depth map can be easily extracted e.g.
+
+    .. code-block:: python
+        images, fragments = renderer(meshes)
+        depth = fragments.zbuf
+    """
+
+    def __init__(self, rasterizer, shader) -> None:
+        super().__init__()
+        self.rasterizer = rasterizer
+        self.shader = shader
+
+    def to(self, device):
+        # Rasterizer and shader have submodules which are not of type nn.Module
+        self.rasterizer.to(device)
+        self.shader.to(device)
+        return self
+
+    def forward(
+        self, meshes_world: Meshes, **kwargs
+    ) -> Tuple[torch.Tensor, torch.Tensor]:
+        """
+        Render a batch of images from a batch of meshes by rasterizing and then
+        shading.
+
+        NOTE: If the blur radius for rasterization is > 0.0, some pixels can
+        have one or more barycentric coordinates lying outside the range [0, 1].
+        For a pixel with out of bounds barycentric coordinates with respect to a
+        face f, clipping is required before interpolating the texture uv
+        coordinates and z buffer so that the colors and depths are limited to
+        the range for the corresponding face.
+        For this set rasterizer.raster_settings.clip_barycentric_coords=True
+        """
+        fragments = self.rasterizer(meshes_world, **kwargs)
+        images = self.shader(fragments, meshes_world, **kwargs)
+
+        return images, fragments
diff --git a/pytorch3d/pytorch3d/renderer/mesh/shader.py b/pytorch3d/pytorch3d/renderer/mesh/shader.py
new file mode 100644
index 0000000000000000000000000000000000000000..40e9cd17d0e12c6cff0d03a48e89a0b151a228de
--- /dev/null
+++ b/pytorch3d/pytorch3d/renderer/mesh/shader.py
@@ -0,0 +1,442 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import warnings
+from typing import Optional
+
+import torch
+import torch.nn as nn
+
+from ...common.datatypes import Device
+from ...structures.meshes import Meshes
+from ..blending import (
+    BlendParams,
+    hard_rgb_blend,
+    sigmoid_alpha_blend,
+    softmax_rgb_blend,
+)
+from ..lighting import PointLights
+from ..materials import Materials
+from ..splatter_blend import SplatterBlender
+from ..utils import TensorProperties
+from .rasterizer import Fragments
+from .shading import (
+    _phong_shading_with_pixels,
+    flat_shading,
+    gouraud_shading,
+    phong_shading,
+)
+
+
+# A Shader should take as input fragments from the output of rasterization
+# along with scene params and output images. A shader could perform operations
+# such as:
+#     - interpolate vertex attributes for all the fragments
+#     - sample colors from a texture map
+#     - apply per pixel lighting
+#     - blend colors across top K faces per pixel.
+class ShaderBase(nn.Module):
+    def __init__(
+        self,
+        device: Device = "cpu",
+        cameras: Optional[TensorProperties] = None,
+        lights: Optional[TensorProperties] = None,
+        materials: Optional[Materials] = None,
+        blend_params: Optional[BlendParams] = None,
+    ) -> None:
+        super().__init__()
+        self.lights = lights if lights is not None else PointLights(device=device)
+        self.materials = (
+            materials if materials is not None else Materials(device=device)
+        )
+        self.cameras = cameras
+        self.blend_params = blend_params if blend_params is not None else BlendParams()
+
+    def _get_cameras(self, **kwargs):
+        cameras = kwargs.get("cameras", self.cameras)
+        if cameras is None:
+            msg = "Cameras must be specified either at initialization \
+                or in the forward pass of the shader."
+            raise ValueError(msg)
+
+        return cameras
+
+    # pyre-fixme[14]: `to` overrides method defined in `Module` inconsistently.
+    def to(self, device: Device):
+        # Manually move to device modules which are not subclasses of nn.Module
+        cameras = self.cameras
+        if cameras is not None:
+            self.cameras = cameras.to(device)
+        self.materials = self.materials.to(device)
+        self.lights = self.lights.to(device)
+        return self
+
+
+class HardPhongShader(ShaderBase):
+    """
+    Per pixel lighting - the lighting model is applied using the interpolated
+    coordinates and normals for each pixel. The blending function hard assigns
+    the color of the closest face for each pixel.
+
+    To use the default values, simply initialize the shader with the desired
+    device e.g.
+
+    .. code-block::
+
+        shader = HardPhongShader(device=torch.device("cuda:0"))
+    """
+
+    def forward(self, fragments: Fragments, meshes: Meshes, **kwargs) -> torch.Tensor:
+        cameras = super()._get_cameras(**kwargs)
+        texels = meshes.sample_textures(fragments)
+        lights = kwargs.get("lights", self.lights)
+        materials = kwargs.get("materials", self.materials)
+        blend_params = kwargs.get("blend_params", self.blend_params)
+        colors = phong_shading(
+            meshes=meshes,
+            fragments=fragments,
+            texels=texels,
+            lights=lights,
+            cameras=cameras,
+            materials=materials,
+        )
+        images = hard_rgb_blend(colors, fragments, blend_params)
+        return images
+
+
+class SoftPhongShader(ShaderBase):
+    """
+    Per pixel lighting - the lighting model is applied using the interpolated
+    coordinates and normals for each pixel. The blending function returns the
+    soft aggregated color using all the faces per pixel.
+
+    To use the default values, simply initialize the shader with the desired
+    device e.g.
+
+    .. code-block::
+
+        shader = SoftPhongShader(device=torch.device("cuda:0"))
+    """
+
+    def forward(self, fragments: Fragments, meshes: Meshes, **kwargs) -> torch.Tensor:
+        cameras = super()._get_cameras(**kwargs)
+        texels = meshes.sample_textures(fragments)
+        lights = kwargs.get("lights", self.lights)
+        materials = kwargs.get("materials", self.materials)
+        blend_params = kwargs.get("blend_params", self.blend_params)
+        colors = phong_shading(
+            meshes=meshes,
+            fragments=fragments,
+            texels=texels,
+            lights=lights,
+            cameras=cameras,
+            materials=materials,
+        )
+        znear = kwargs.get("znear", getattr(cameras, "znear", 1.0))
+        zfar = kwargs.get("zfar", getattr(cameras, "zfar", 100.0))
+        images = softmax_rgb_blend(
+            colors, fragments, blend_params, znear=znear, zfar=zfar
+        )
+        return images
+
+
+class HardGouraudShader(ShaderBase):
+    """
+    Per vertex lighting - the lighting model is applied to the vertex colors and
+    the colors are then interpolated using the barycentric coordinates to
+    obtain the colors for each pixel. The blending function hard assigns
+    the color of the closest face for each pixel.
+
+    To use the default values, simply initialize the shader with the desired
+    device e.g.
+
+    .. code-block::
+
+        shader = HardGouraudShader(device=torch.device("cuda:0"))
+    """
+
+    def forward(self, fragments: Fragments, meshes: Meshes, **kwargs) -> torch.Tensor:
+        cameras = super()._get_cameras(**kwargs)
+        lights = kwargs.get("lights", self.lights)
+        materials = kwargs.get("materials", self.materials)
+        blend_params = kwargs.get("blend_params", self.blend_params)
+
+        # As Gouraud shading applies the illumination to the vertex
+        # colors, the interpolated pixel texture is calculated in the
+        # shading step. In comparison, for Phong shading, the pixel
+        # textures are computed first after which the illumination is
+        # applied.
+        pixel_colors = gouraud_shading(
+            meshes=meshes,
+            fragments=fragments,
+            lights=lights,
+            cameras=cameras,
+            materials=materials,
+        )
+        images = hard_rgb_blend(pixel_colors, fragments, blend_params)
+        return images
+
+
+class SoftGouraudShader(ShaderBase):
+    """
+    Per vertex lighting - the lighting model is applied to the vertex colors and
+    the colors are then interpolated using the barycentric coordinates to
+    obtain the colors for each pixel. The blending function returns the
+    soft aggregated color using all the faces per pixel.
+
+    To use the default values, simply initialize the shader with the desired
+    device e.g.
+
+    .. code-block::
+
+        shader = SoftGouraudShader(device=torch.device("cuda:0"))
+    """
+
+    def forward(self, fragments: Fragments, meshes: Meshes, **kwargs) -> torch.Tensor:
+        cameras = super()._get_cameras(**kwargs)
+        lights = kwargs.get("lights", self.lights)
+        materials = kwargs.get("materials", self.materials)
+        pixel_colors = gouraud_shading(
+            meshes=meshes,
+            fragments=fragments,
+            lights=lights,
+            cameras=cameras,
+            materials=materials,
+        )
+        znear = kwargs.get("znear", getattr(cameras, "znear", 1.0))
+        zfar = kwargs.get("zfar", getattr(cameras, "zfar", 100.0))
+        images = softmax_rgb_blend(
+            pixel_colors, fragments, self.blend_params, znear=znear, zfar=zfar
+        )
+        return images
+
+
+def TexturedSoftPhongShader(
+    device: Device = "cpu",
+    cameras: Optional[TensorProperties] = None,
+    lights: Optional[TensorProperties] = None,
+    materials: Optional[Materials] = None,
+    blend_params: Optional[BlendParams] = None,
+) -> SoftPhongShader:
+    """
+    TexturedSoftPhongShader class has been DEPRECATED. Use SoftPhongShader instead.
+    Preserving TexturedSoftPhongShader as a function for backwards compatibility.
+    """
+    warnings.warn(
+        """TexturedSoftPhongShader is now deprecated;
+            use SoftPhongShader instead.""",
+        PendingDeprecationWarning,
+    )
+    return SoftPhongShader(
+        device=device,
+        cameras=cameras,
+        lights=lights,
+        materials=materials,
+        blend_params=blend_params,
+    )
+
+
+class HardFlatShader(ShaderBase):
+    """
+    Per face lighting - the lighting model is applied using the average face
+    position and the face normal. The blending function hard assigns
+    the color of the closest face for each pixel.
+
+    To use the default values, simply initialize the shader with the desired
+    device e.g.
+
+    .. code-block::
+
+        shader = HardFlatShader(device=torch.device("cuda:0"))
+    """
+
+    def forward(self, fragments: Fragments, meshes: Meshes, **kwargs) -> torch.Tensor:
+        cameras = super()._get_cameras(**kwargs)
+        texels = meshes.sample_textures(fragments)
+        lights = kwargs.get("lights", self.lights)
+        materials = kwargs.get("materials", self.materials)
+        blend_params = kwargs.get("blend_params", self.blend_params)
+        colors = flat_shading(
+            meshes=meshes,
+            fragments=fragments,
+            texels=texels,
+            lights=lights,
+            cameras=cameras,
+            materials=materials,
+        )
+        images = hard_rgb_blend(colors, fragments, blend_params)
+        return images
+
+
+class SoftSilhouetteShader(nn.Module):
+    """
+    Calculate the silhouette by blending the top K faces for each pixel based
+    on the 2d euclidean distance of the center of the pixel to the mesh face.
+
+    Use this shader for generating silhouettes similar to SoftRasterizer [0].
+
+    .. note::
+
+        To be consistent with SoftRasterizer, initialize the
+        RasterizationSettings for the rasterizer with
+        `blur_radius = np.log(1. / 1e-4 - 1.) * blend_params.sigma`
+
+    [0] Liu et al, 'Soft Rasterizer: A Differentiable Renderer for Image-based
+        3D Reasoning', ICCV 2019
+    """
+
+    def __init__(self, blend_params: Optional[BlendParams] = None) -> None:
+        super().__init__()
+        self.blend_params = blend_params if blend_params is not None else BlendParams()
+
+    def forward(self, fragments: Fragments, meshes: Meshes, **kwargs) -> torch.Tensor:
+        """
+        Only want to render the silhouette so RGB values can be ones.
+        There is no need for lighting or texturing
+        """
+        colors = torch.ones_like(fragments.bary_coords)
+        blend_params = kwargs.get("blend_params", self.blend_params)
+        images = sigmoid_alpha_blend(colors, fragments, blend_params)
+        return images
+
+
+class SplatterPhongShader(ShaderBase):
+    """
+    Per pixel lighting - the lighting model is applied using the interpolated
+    coordinates and normals for each pixel. The blending function returns the
+    color aggregated using splats from surrounding pixels (see [0]).
+
+    To use the default values, simply initialize the shader with the desired
+    device e.g.
+
+    .. code-block::
+
+        shader = SplatterPhongShader(device=torch.device("cuda:0"))
+
+    [0] Cole, F. et al., "Differentiable Surface Rendering via Non-differentiable
+        Sampling".
+    """
+
+    def __init__(self, **kwargs):
+        self.splatter_blender = None
+        super().__init__(**kwargs)
+
+    def to(self, device: Device):
+        if self.splatter_blender:
+            self.splatter_blender.to(device)
+        return super().to(device)
+
+    def forward(self, fragments: Fragments, meshes: Meshes, **kwargs) -> torch.Tensor:
+        cameras = super()._get_cameras(**kwargs)
+        texels = meshes.sample_textures(fragments)
+        lights = kwargs.get("lights", self.lights)
+        materials = kwargs.get("materials", self.materials)
+
+        colors, pixel_coords_cameras = _phong_shading_with_pixels(
+            meshes=meshes,
+            fragments=fragments.detach(),
+            texels=texels,
+            lights=lights,
+            cameras=cameras,
+            materials=materials,
+        )
+
+        if not self.splatter_blender:
+            # Init only once, to avoid re-computing constants.
+            N, H, W, K, _ = colors.shape
+            self.splatter_blender = SplatterBlender((N, H, W, K), colors.device)
+
+        blend_params = kwargs.get("blend_params", self.blend_params)
+        self.check_blend_params(blend_params)
+
+        images = self.splatter_blender(
+            colors,
+            pixel_coords_cameras,
+            cameras,
+            fragments.pix_to_face < 0,
+            kwargs.get("blend_params", self.blend_params),
+        )
+
+        return images
+
+    def check_blend_params(self, blend_params):
+        if blend_params.sigma != 0.5:
+            warnings.warn(
+                f"SplatterPhongShader received sigma={blend_params.sigma}. sigma is "
+                "defined in pixel units, and any value other than 0.5 is highly "
+                "unexpected. Only use other values if you know what you are doing. "
+            )
+
+
+class HardDepthShader(ShaderBase):
+    """
+    Renders the Z distances of the closest face for each pixel. If no face is
+    found it returns the zfar value of the camera.
+
+    Output from this shader is [N, H, W, 1] since it's only depth.
+
+    To use the default values, simply initialize the shader with the desired
+    device e.g.
+
+    .. code-block::
+
+        shader = HardDepthShader(device=torch.device("cuda:0"))
+    """
+
+    def forward(self, fragments: Fragments, meshes: Meshes, **kwargs) -> torch.Tensor:
+        cameras = super()._get_cameras(**kwargs)
+
+        zfar = kwargs.get("zfar", getattr(cameras, "zfar", 100.0))
+        mask = fragments.pix_to_face[..., 0:1] < 0
+
+        zbuf = fragments.zbuf[..., 0:1].clone()
+        zbuf[mask] = zfar
+        return zbuf
+
+
+class SoftDepthShader(ShaderBase):
+    """
+    Renders the Z distances using an aggregate of the distances of each face
+    based off of the point distance.  If no face is found it returns the zfar
+    value of the camera.
+
+    Output from this shader is [N, H, W, 1] since it's only depth.
+
+    To use the default values, simply initialize the shader with the desired
+    device e.g.
+
+    .. code-block::
+
+        shader = SoftDepthShader(device=torch.device("cuda:0"))
+    """
+
+    def forward(self, fragments: Fragments, meshes: Meshes, **kwargs) -> torch.Tensor:
+        if fragments.dists is None:
+            raise ValueError("SoftDepthShader requires Fragments.dists to be present.")
+
+        cameras = super()._get_cameras(**kwargs)
+
+        N, H, W, K = fragments.pix_to_face.shape
+        device = fragments.zbuf.device
+        mask = fragments.pix_to_face >= 0
+
+        zfar = kwargs.get("zfar", getattr(cameras, "zfar", 100.0))
+
+        # Sigmoid probability map based on the distance of the pixel to the face.
+        prob_map = torch.sigmoid(-fragments.dists / self.blend_params.sigma) * mask
+
+        # append extra face for zfar
+        dists = torch.cat(
+            (fragments.zbuf, torch.ones((N, H, W, 1), device=device) * zfar), dim=3
+        )
+        probs = torch.cat((prob_map, torch.ones((N, H, W, 1), device=device)), dim=3)
+
+        # compute weighting based off of probabilities using cumsum
+        probs = probs.cumsum(dim=3)
+        probs = probs.clamp(max=1)
+        probs = probs.diff(dim=3, prepend=torch.zeros((N, H, W, 1), device=device))
+
+        return (probs * dists).sum(dim=3).unsqueeze(3)
diff --git a/pytorch3d/pytorch3d/renderer/mesh/shading.py b/pytorch3d/pytorch3d/renderer/mesh/shading.py
new file mode 100644
index 0000000000000000000000000000000000000000..05cb66ade8c465f42437244e3c76bb36bc5bb07a
--- /dev/null
+++ b/pytorch3d/pytorch3d/renderer/mesh/shading.py
@@ -0,0 +1,223 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+from typing import Tuple
+
+import torch
+from pytorch3d.ops import interpolate_face_attributes
+
+from .textures import TexturesVertex
+
+
+def _apply_lighting(
+    points, normals, lights, cameras, materials
+) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+    """
+    Args:
+        points: torch tensor of shape (N, ..., 3) or (P, 3).
+        normals: torch tensor of shape (N, ..., 3) or (P, 3)
+        lights: instance of the Lights class.
+        cameras: instance of the Cameras class.
+        materials: instance of the Materials class.
+
+    Returns:
+        ambient_color: same shape as materials.ambient_color
+        diffuse_color: same shape as the input points
+        specular_color: same shape as the input points
+    """
+    light_diffuse = lights.diffuse(normals=normals, points=points)
+    light_specular = lights.specular(
+        normals=normals,
+        points=points,
+        camera_position=cameras.get_camera_center(),
+        shininess=materials.shininess,
+    )
+    ambient_color = materials.ambient_color * lights.ambient_color
+    diffuse_color = materials.diffuse_color * light_diffuse
+    specular_color = materials.specular_color * light_specular
+
+    if normals.dim() == 2 and points.dim() == 2:
+        # If given packed inputs remove batch dim in output.
+        return (
+            ambient_color.squeeze(),
+            diffuse_color.squeeze(),
+            specular_color.squeeze(),
+        )
+
+    if ambient_color.ndim != diffuse_color.ndim:
+        # Reshape from (N, 3) to have dimensions compatible with
+        # diffuse_color which is of shape (N, H, W, K, 3)
+        ambient_color = ambient_color[:, None, None, None, :]
+    return ambient_color, diffuse_color, specular_color
+
+
+def _phong_shading_with_pixels(
+    meshes, fragments, lights, cameras, materials, texels
+) -> Tuple[torch.Tensor, torch.Tensor]:
+    """
+    Apply per pixel shading. First interpolate the vertex normals and
+    vertex coordinates using the barycentric coordinates to get the position
+    and normal at each pixel. Then compute the illumination for each pixel.
+    The pixel color is obtained by multiplying the pixel textures by the ambient
+    and diffuse illumination and adding the specular component.
+
+    Args:
+        meshes: Batch of meshes
+        fragments: Fragments named tuple with the outputs of rasterization
+        lights: Lights class containing a batch of lights
+        cameras: Cameras class containing a batch of cameras
+        materials: Materials class containing a batch of material properties
+        texels: texture per pixel of shape (N, H, W, K, 3)
+
+    Returns:
+        colors: (N, H, W, K, 3)
+        pixel_coords: (N, H, W, K, 3), camera coordinates of each intersection.
+    """
+    verts = meshes.verts_packed()  # (V, 3)
+    faces = meshes.faces_packed()  # (F, 3)
+    vertex_normals = meshes.verts_normals_packed()  # (V, 3)
+    faces_verts = verts[faces]
+    faces_normals = vertex_normals[faces]
+    pixel_coords_in_camera = interpolate_face_attributes(
+        fragments.pix_to_face, fragments.bary_coords, faces_verts
+    )
+    pixel_normals = interpolate_face_attributes(
+        fragments.pix_to_face, fragments.bary_coords, faces_normals
+    )
+    ambient, diffuse, specular = _apply_lighting(
+        pixel_coords_in_camera, pixel_normals, lights, cameras, materials
+    )
+    colors = (ambient + diffuse) * texels + specular
+    return colors, pixel_coords_in_camera
+
+
+def phong_shading(
+    meshes, fragments, lights, cameras, materials, texels
+) -> torch.Tensor:
+    """
+    Apply per pixel shading. First interpolate the vertex normals and
+    vertex coordinates using the barycentric coordinates to get the position
+    and normal at each pixel. Then compute the illumination for each pixel.
+    The pixel color is obtained by multiplying the pixel textures by the ambient
+    and diffuse illumination and adding the specular component.
+
+    Args:
+        meshes: Batch of meshes
+        fragments: Fragments named tuple with the outputs of rasterization
+        lights: Lights class containing a batch of lights
+        cameras: Cameras class containing a batch of cameras
+        materials: Materials class containing a batch of material properties
+        texels: texture per pixel of shape (N, H, W, K, 3)
+
+    Returns:
+        colors: (N, H, W, K, 3)
+    """
+    colors, _ = _phong_shading_with_pixels(
+        meshes, fragments, lights, cameras, materials, texels
+    )
+    return colors
+
+
+def gouraud_shading(meshes, fragments, lights, cameras, materials) -> torch.Tensor:
+    """
+    Apply per vertex shading. First compute the vertex illumination by applying
+    ambient, diffuse and specular lighting. If vertex color is available,
+    combine the ambient and diffuse vertex illumination with the vertex color
+    and add the specular component to determine the vertex shaded color.
+    Then interpolate the vertex shaded colors using the barycentric coordinates
+    to get a color per pixel.
+
+    Gouraud shading is only supported for meshes with texture type `TexturesVertex`.
+    This is because the illumination is applied to the vertex colors.
+
+    Args:
+        meshes: Batch of meshes
+        fragments: Fragments named tuple with the outputs of rasterization
+        lights: Lights class containing a batch of lights parameters
+        cameras: Cameras class containing a batch of cameras parameters
+        materials: Materials class containing a batch of material properties
+
+    Returns:
+        colors: (N, H, W, K, 3)
+    """
+    if not isinstance(meshes.textures, TexturesVertex):
+        raise ValueError("Mesh textures must be an instance of TexturesVertex")
+
+    faces = meshes.faces_packed()  # (F, 3)
+    verts = meshes.verts_packed()  # (V, 3)
+    verts_normals = meshes.verts_normals_packed()  # (V, 3)
+    verts_colors = meshes.textures.verts_features_packed()  # (V, D)
+    vert_to_mesh_idx = meshes.verts_packed_to_mesh_idx()
+
+    # Format properties of lights and materials so they are compatible
+    # with the packed representation of the vertices. This transforms
+    # all tensor properties in the class from shape (N, ...) -> (V, ...) where
+    # V is the number of packed vertices. If the number of meshes in the
+    # batch is one then this is not necessary.
+    if len(meshes) > 1:
+        lights = lights.clone().gather_props(vert_to_mesh_idx)
+        cameras = cameras.clone().gather_props(vert_to_mesh_idx)
+        materials = materials.clone().gather_props(vert_to_mesh_idx)
+
+    # Calculate the illumination at each vertex
+    ambient, diffuse, specular = _apply_lighting(
+        verts, verts_normals, lights, cameras, materials
+    )
+
+    verts_colors_shaded = verts_colors * (ambient + diffuse) + specular
+    face_colors = verts_colors_shaded[faces]
+    colors = interpolate_face_attributes(
+        fragments.pix_to_face, fragments.bary_coords, face_colors
+    )
+    return colors
+
+
+def flat_shading(meshes, fragments, lights, cameras, materials, texels) -> torch.Tensor:
+    """
+    Apply per face shading. Use the average face position and the face normals
+    to compute the ambient, diffuse and specular lighting. Apply the ambient
+    and diffuse color to the pixel color and add the specular component to
+    determine the final pixel color.
+
+    Args:
+        meshes: Batch of meshes
+        fragments: Fragments named tuple with the outputs of rasterization
+        lights: Lights class containing a batch of lights parameters
+        cameras: Cameras class containing a batch of cameras parameters
+        materials: Materials class containing a batch of material properties
+        texels: texture per pixel of shape (N, H, W, K, 3)
+
+    Returns:
+        colors: (N, H, W, K, 3)
+    """
+    verts = meshes.verts_packed()  # (V, 3)
+    faces = meshes.faces_packed()  # (F, 3)
+    face_normals = meshes.faces_normals_packed()  # (V, 3)
+    faces_verts = verts[faces]
+    face_coords = faces_verts.mean(dim=-2)  # (F, 3, XYZ) mean xyz across verts
+
+    # Replace empty pixels in pix_to_face with 0 in order to interpolate.
+    mask = fragments.pix_to_face == -1
+    pix_to_face = fragments.pix_to_face.clone()
+    pix_to_face[mask] = 0
+
+    N, H, W, K = pix_to_face.shape
+    idx = pix_to_face.view(N * H * W * K, 1).expand(N * H * W * K, 3)
+
+    # gather pixel coords
+    pixel_coords = face_coords.gather(0, idx).view(N, H, W, K, 3)
+    pixel_coords[mask] = 0.0
+    # gather pixel normals
+    pixel_normals = face_normals.gather(0, idx).view(N, H, W, K, 3)
+    pixel_normals[mask] = 0.0
+
+    # Calculate the illumination at each face
+    ambient, diffuse, specular = _apply_lighting(
+        pixel_coords, pixel_normals, lights, cameras, materials
+    )
+    colors = (ambient + diffuse) * texels + specular
+    return colors
diff --git a/pytorch3d/pytorch3d/renderer/mesh/textures.py b/pytorch3d/pytorch3d/renderer/mesh/textures.py
new file mode 100644
index 0000000000000000000000000000000000000000..599271554ef86678e178e27733642973df390d84
--- /dev/null
+++ b/pytorch3d/pytorch3d/renderer/mesh/textures.py
@@ -0,0 +1,1669 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import itertools
+import warnings
+from typing import Dict, List, Optional, Tuple, TYPE_CHECKING, Union
+
+import torch
+import torch.nn.functional as F
+from pytorch3d.ops import interpolate_face_attributes
+from pytorch3d.structures.utils import list_to_packed, list_to_padded, padded_to_list
+from torch.nn.functional import interpolate
+
+from .utils import pack_unique_rectangles, PackedRectangle, Rectangle
+
+
+# This file contains classes and helper functions for texturing.
+# There are three types of textures: TexturesVertex, TexturesAtlas
+# and TexturesUV which inherit from a base textures class TexturesBase.
+#
+# Each texture class has a method 'sample_textures' to sample a
+# value given barycentric coordinates.
+#
+# All the textures accept either list or padded inputs. The values
+# are stored as either per face values (TexturesAtlas, TexturesUV),
+# or per face vertex features (TexturesVertex).
+
+
+def _list_to_padded_wrapper(
+    x: List[torch.Tensor],
+    pad_size: Union[list, tuple, None] = None,
+    pad_value: float = 0.0,
+) -> torch.Tensor:
+    r"""
+    This is a wrapper function for
+    pytorch3d.structures.utils.list_to_padded function which only accepts
+    3-dimensional inputs.
+
+    For this use case, the input x is of shape (F, 3, ...) where only F
+    is different for each element in the list
+
+    Transforms a list of N tensors each of shape (Mi, ...) into a single tensor
+    of shape (N, pad_size, ...), or (N, max(Mi), ...)
+    if pad_size is None.
+
+    Args:
+      x: list of Tensors
+      pad_size: int specifying the size of the first dimension
+        of the padded tensor
+      pad_value: float value to be used to fill the padded tensor
+
+    Returns:
+      x_padded: tensor consisting of padded input tensors
+    """
+    N = len(x)
+    dims = x[0].ndim
+    reshape_dims = x[0].shape[1:]
+    D = torch.prod(torch.tensor(reshape_dims)).item()
+    x_reshaped = []
+    for y in x:
+        if y.ndim != dims and y.shape[1:] != reshape_dims:
+            msg = (
+                "list_to_padded requires tensors to have the same number of dimensions"
+            )
+            raise ValueError(msg)
+        # pyre-fixme[6]: For 2nd param expected `int` but got `Union[bool, float, int]`.
+        x_reshaped.append(y.reshape(-1, D))
+    x_padded = list_to_padded(x_reshaped, pad_size=pad_size, pad_value=pad_value)
+    # pyre-fixme[58]: `+` is not supported for operand types `Tuple[int, int]` and
+    #  `Size`.
+    return x_padded.reshape((N, -1) + reshape_dims)
+
+
+def _padded_to_list_wrapper(
+    x: torch.Tensor, split_size: Union[list, tuple, None] = None
+) -> List[torch.Tensor]:
+    r"""
+    This is a wrapper function for pytorch3d.structures.utils.padded_to_list
+    which only accepts 3-dimensional inputs.
+
+    For this use case, the input x is of shape (N, F, ...) where F
+    is the number of faces which is different for each tensor in the batch.
+
+    This function transforms a padded tensor of shape (N, M, ...) into a
+    list of N tensors of shape (Mi, ...) where (Mi) is specified in
+    split_size(i), or of shape (M,) if split_size is None.
+
+    Args:
+      x: padded Tensor
+      split_size: list of ints defining the number of items for each tensor
+        in the output list.
+
+    Returns:
+      x_list: a list of tensors
+    """
+    N, M = x.shape[:2]
+    reshape_dims = x.shape[2:]
+    D = torch.prod(torch.tensor(reshape_dims)).item()
+    # pyre-fixme[6]: For 3rd param expected `int` but got `Union[bool, float, int]`.
+    x_reshaped = x.reshape(N, M, D)
+    x_list = padded_to_list(x_reshaped, split_size=split_size)
+    # pyre-fixme[58]: `+` is not supported for operand types `Tuple[typing.Any]` and
+    #  `Size`.
+    x_list = [xl.reshape((xl.shape[0],) + reshape_dims) for xl in x_list]
+    return x_list
+
+
+def _pad_texture_maps(
+    images: Union[Tuple[torch.Tensor], List[torch.Tensor]], align_corners: bool
+) -> torch.Tensor:
+    """
+    Pad all texture images so they have the same height and width.
+
+    Args:
+        images: list of N tensors of shape (H_i, W_i, C)
+        align_corners: used for interpolation
+
+    Returns:
+        tex_maps: Tensor of shape (N, max_H, max_W, C)
+    """
+    tex_maps = []
+    max_H = 0
+    max_W = 0
+    for im in images:
+        h, w, _C = im.shape
+        if h > max_H:
+            max_H = h
+        if w > max_W:
+            max_W = w
+        tex_maps.append(im)
+    max_shape = (max_H, max_W)
+
+    for i, image in enumerate(tex_maps):
+        if image.shape[:2] != max_shape:
+            image_BCHW = image.permute(2, 0, 1)[None]
+            new_image_BCHW = interpolate(
+                image_BCHW,
+                size=max_shape,
+                mode="bilinear",
+                align_corners=align_corners,
+            )
+            tex_maps[i] = new_image_BCHW[0].permute(1, 2, 0)
+    tex_maps = torch.stack(tex_maps, dim=0)  # (num_tex_maps, max_H, max_W, C)
+    return tex_maps
+
+
+# A base class for defining a batch of textures
+# with helper methods.
+# This is also useful to have so that inside `Meshes`
+# we can allow the input textures to be any texture
+# type which is an instance of the base class.
+class TexturesBase:
+    def isempty(self):
+        if self._N is not None and self.valid is not None:
+            return self._N == 0 or self.valid.eq(False).all()
+        return False
+
+    def to(self, device):
+        for k in dir(self):
+            v = getattr(self, k)
+            if isinstance(v, (list, tuple)) and all(
+                torch.is_tensor(elem) for elem in v
+            ):
+                v = [elem.to(device) for elem in v]
+                setattr(self, k, v)
+            if torch.is_tensor(v) and v.device != device:
+                setattr(self, k, v.to(device))
+        self.device = device
+        return self
+
+    def _extend(self, N: int, props: List[str]) -> Dict[str, Union[torch.Tensor, List]]:
+        """
+        Create a dict with the specified properties
+        repeated N times per batch element.
+
+        Args:
+            N: number of new copies of each texture
+                in the batch.
+            props: a List of strings which refer to either
+                class attributes or class methods which
+                return tensors or lists.
+
+        Returns:
+            Dict with the same keys as props. The values are the
+            extended properties.
+        """
+        if not isinstance(N, int):
+            raise ValueError("N must be an integer.")
+        if N <= 0:
+            raise ValueError("N must be > 0.")
+
+        new_props = {}
+        for p in props:
+            t = getattr(self, p)
+            if callable(t):
+                t = t()  # class method
+            if isinstance(t, list):
+                if not all(isinstance(elem, (int, float)) for elem in t):
+                    raise ValueError("Extend only supports lists of scalars")
+                t = [[ti] * N for ti in t]
+                new_props[p] = list(itertools.chain(*t))
+            elif torch.is_tensor(t):
+                new_props[p] = t.repeat_interleave(N, dim=0)
+        return new_props
+
+    def _getitem(self, index: Union[int, slice], props: List[str]):
+        """
+        Helper function for __getitem__
+        """
+        new_props = {}
+        if isinstance(index, (int, slice)):
+            for p in props:
+                t = getattr(self, p)
+                if callable(t):
+                    t = t()  # class method
+                new_props[p] = t[index]
+        elif isinstance(index, list):
+            index = torch.tensor(index)
+        if isinstance(index, torch.Tensor):
+            if index.dtype == torch.bool:
+                index = index.nonzero()
+                index = index.squeeze(1) if index.numel() > 0 else index
+                index = index.tolist()
+            for p in props:
+                t = getattr(self, p)
+                if callable(t):
+                    t = t()  # class method
+                new_props[p] = [t[i] for i in index]
+
+        return new_props
+
+    def sample_textures(self) -> torch.Tensor:
+        """
+        Different texture classes sample textures in different ways
+        e.g. for vertex textures, the values at each vertex
+        are interpolated across the face using the barycentric
+        coordinates.
+        Each texture class should implement a sample_textures
+        method to take the `fragments` from rasterization.
+        Using `fragments.pix_to_face` and `fragments.bary_coords`
+        this function should return the sampled texture values for
+        each pixel in the output image.
+        """
+        raise NotImplementedError()
+
+    def submeshes(
+        self,
+        vertex_ids_list: List[List[torch.LongTensor]],
+        faces_ids_list: List[List[torch.LongTensor]],
+    ) -> "TexturesBase":
+        """
+        Extract sub-textures used for submeshing.
+        """
+        raise NotImplementedError(f"{self.__class__} does not support submeshes")
+
+    def faces_verts_textures_packed(self) -> torch.Tensor:
+        """
+        Returns the texture for each vertex for each face in the mesh.
+        For N meshes, this function returns sum(Fi)x3xC where Fi is the
+        number of faces in the i-th mesh and C is the dimensional of
+        the feature (C = 3 for RGB textures).
+        You can use the utils function in structures.utils to convert the
+        packed representation to a list or padded.
+        """
+        raise NotImplementedError()
+
+    def clone(self) -> "TexturesBase":
+        """
+        Each texture class should implement a method
+        to clone all necessary internal tensors.
+        """
+        raise NotImplementedError()
+
+    def detach(self) -> "TexturesBase":
+        """
+        Each texture class should implement a method
+        to detach all necessary internal tensors.
+        """
+        raise NotImplementedError()
+
+    def __getitem__(self, index) -> "TexturesBase":
+        """
+        Each texture class should implement a method
+        to get the texture properties for the
+        specified elements in the batch.
+        The TexturesBase._getitem(i) method
+        can be used as a helper function to retrieve the
+        class attributes for item i. Then, a new
+        instance of the child class can be created with
+        the attributes.
+        """
+        raise NotImplementedError()
+
+
+def Textures(
+    maps: Optional[Union[List[torch.Tensor], torch.Tensor]] = None,
+    faces_uvs: Optional[torch.Tensor] = None,
+    verts_uvs: Optional[torch.Tensor] = None,
+    verts_rgb: Optional[torch.Tensor] = None,
+) -> TexturesBase:
+    """
+    Textures class has been DEPRECATED.
+    Preserving Textures as a function for backwards compatibility.
+
+    Args:
+        maps: texture map per mesh. This can either be a list of maps
+          [(H, W, C)] or a padded tensor of shape (N, H, W, C).
+        faces_uvs: (N, F, 3) tensor giving the index into verts_uvs for each
+            vertex in the face. Padding value is assumed to be -1.
+        verts_uvs: (N, V, 2) tensor giving the uv coordinate per vertex.
+        verts_rgb: (N, V, C) tensor giving the color per vertex. Padding
+            value is assumed to be -1. (C=3 for RGB.)
+
+
+    Returns:
+        a Textures class which is an instance of TexturesBase e.g. TexturesUV,
+        TexturesAtlas, TexturesVertex
+
+    """
+
+    warnings.warn(
+        """Textures class is deprecated,
+        use TexturesUV, TexturesAtlas, TexturesVertex instead.
+        Textures class will be removed in future releases.""",
+        PendingDeprecationWarning,
+    )
+
+    if faces_uvs is not None and verts_uvs is not None and maps is not None:
+        return TexturesUV(maps=maps, faces_uvs=faces_uvs, verts_uvs=verts_uvs)
+
+    if verts_rgb is not None:
+        return TexturesVertex(verts_features=verts_rgb)
+
+    raise ValueError(
+        "Textures either requires all three of (faces uvs, verts uvs, maps) or verts rgb"
+    )
+
+
+class TexturesAtlas(TexturesBase):
+    def __init__(self, atlas: Union[torch.Tensor, List[torch.Tensor]]) -> None:
+        """
+        A texture representation where each face has a square texture map.
+        This is based on the implementation from SoftRasterizer [1].
+
+        Args:
+            atlas: (N, F, R, R, C) tensor giving the per face texture map.
+                The atlas can be created during obj loading with the
+                pytorch3d.io.load_obj function - in the input arguments
+                set `create_texture_atlas=True`. The atlas will be
+                returned in aux.texture_atlas.
+
+
+        The padded and list representations of the textures are stored
+        and the packed representations is computed on the fly and
+        not cached.
+
+        [1] Liu et al, 'Soft Rasterizer: A Differentiable Renderer for Image-based
+            3D Reasoning', ICCV 2019
+            See also https://github.com/ShichenLiu/SoftRas/issues/21
+        """
+        if isinstance(atlas, (list, tuple)):
+            correct_format = all(
+                (
+                    torch.is_tensor(elem)
+                    and elem.ndim == 4
+                    and elem.shape[1] == elem.shape[2]
+                    and elem.shape[1] == atlas[0].shape[1]
+                )
+                for elem in atlas
+            )
+            if not correct_format:
+                msg = (
+                    "Expected atlas to be a list of tensors of shape (F, R, R, C) "
+                    "with the same value of R."
+                )
+                raise ValueError(msg)
+            self._atlas_list = atlas
+            self._atlas_padded = None
+            self.device = torch.device("cpu")
+
+            # These values may be overridden when textures is
+            # passed into the Meshes constructor. For more details
+            # refer to the __init__ of Meshes.
+            self._N = len(atlas)
+            self._num_faces_per_mesh = [len(a) for a in atlas]
+
+            if self._N > 0:
+                self.device = atlas[0].device
+
+        elif torch.is_tensor(atlas):
+            if atlas.ndim != 5:
+                msg = "Expected atlas to be of shape (N, F, R, R, C); got %r"
+                raise ValueError(msg % repr(atlas.ndim))
+            self._atlas_padded = atlas
+            self._atlas_list = None
+            self.device = atlas.device
+
+            # These values may be overridden when textures is
+            # passed into the Meshes constructor. For more details
+            # refer to the __init__ of Meshes.
+            self._N = len(atlas)
+            max_F = atlas.shape[1]
+            self._num_faces_per_mesh = [max_F] * self._N
+        else:
+            raise ValueError("Expected atlas to be a tensor or list")
+
+        # The num_faces_per_mesh, N and valid
+        # are reset inside the Meshes object when textures is
+        # passed into the Meshes constructor. For more details
+        # refer to the __init__ of Meshes.
+        self.valid = torch.ones((self._N,), dtype=torch.bool, device=self.device)
+
+    def clone(self) -> "TexturesAtlas":
+        tex = self.__class__(atlas=self.atlas_padded().clone())
+        if self._atlas_list is not None:
+            tex._atlas_list = [atlas.clone() for atlas in self._atlas_list]
+        num_faces = (
+            self._num_faces_per_mesh.clone()
+            if torch.is_tensor(self._num_faces_per_mesh)
+            else self._num_faces_per_mesh
+        )
+        tex.valid = self.valid.clone()
+        tex._num_faces_per_mesh = num_faces
+        return tex
+
+    def detach(self) -> "TexturesAtlas":
+        tex = self.__class__(atlas=self.atlas_padded().detach())
+        if self._atlas_list is not None:
+            tex._atlas_list = [atlas.detach() for atlas in self._atlas_list]
+        num_faces = (
+            self._num_faces_per_mesh.detach()
+            if torch.is_tensor(self._num_faces_per_mesh)
+            else self._num_faces_per_mesh
+        )
+        tex.valid = self.valid.detach()
+        tex._num_faces_per_mesh = num_faces
+        return tex
+
+    def __getitem__(self, index) -> "TexturesAtlas":
+        props = ["atlas_list", "_num_faces_per_mesh"]
+        new_props = self._getitem(index, props=props)
+        atlas = new_props["atlas_list"]
+        if isinstance(atlas, list):
+            # multiple batch elements
+            new_tex = self.__class__(atlas=atlas)
+        elif torch.is_tensor(atlas):
+            # single element
+            new_tex = self.__class__(atlas=[atlas])
+        else:
+            raise ValueError("Not all values are provided in the correct format")
+        new_tex._num_faces_per_mesh = new_props["_num_faces_per_mesh"]
+        return new_tex
+
+    def atlas_padded(self) -> torch.Tensor:
+        if self._atlas_padded is None:
+            if self.isempty():
+                self._atlas_padded = torch.zeros(
+                    (self._N, 0, 0, 0, 3), dtype=torch.float32, device=self.device
+                )
+            else:
+                self._atlas_padded = _list_to_padded_wrapper(
+                    self._atlas_list, pad_value=0.0
+                )
+        return self._atlas_padded
+
+    def atlas_list(self) -> List[torch.Tensor]:
+        if self._atlas_list is None:
+            if self.isempty():
+                self._atlas_padded = [
+                    torch.empty((0, 0, 0, 3), dtype=torch.float32, device=self.device)
+                ] * self._N
+            self._atlas_list = _padded_to_list_wrapper(
+                self._atlas_padded, split_size=self._num_faces_per_mesh
+            )
+        return self._atlas_list
+
+    def atlas_packed(self) -> torch.Tensor:
+        if self.isempty():
+            return torch.zeros(
+                (self._N, 0, 0, 3), dtype=torch.float32, device=self.device
+            )
+        atlas_list = self.atlas_list()
+        return list_to_packed(atlas_list)[0]
+
+    def extend(self, N: int) -> "TexturesAtlas":
+        new_props = self._extend(N, ["atlas_padded", "_num_faces_per_mesh"])
+        new_tex = self.__class__(atlas=new_props["atlas_padded"])
+        new_tex._num_faces_per_mesh = new_props["_num_faces_per_mesh"]
+        return new_tex
+
+    # pyre-fixme[14]: `sample_textures` overrides method defined in `TexturesBase`
+    #  inconsistently.
+    def sample_textures(self, fragments, **kwargs) -> torch.Tensor:
+        """
+        This is similar to a nearest neighbor sampling and involves a
+        discretization step. The barycentric coordinates from
+        rasterization are used to find the nearest grid cell in the texture
+        atlas and the RGB is returned as the color.
+        This means that this step is differentiable with respect to the RGB
+        values of the texture atlas but not differentiable with respect to the
+        barycentric coordinates.
+
+        TODO: Add a different sampling mode which interpolates the barycentric
+        coordinates to sample the texture and will be differentiable w.r.t
+        the barycentric coordinates.
+
+        Args:
+            fragments:
+                The outputs of rasterization. From this we use
+
+                - pix_to_face: LongTensor of shape (N, H, W, K) specifying the indices
+                of the faces (in the packed representation) which
+                overlap each pixel in the image.
+                - barycentric_coords: FloatTensor of shape (N, H, W, K, 3) specifying
+                the barycentric coordinates of each pixel
+                relative to the faces (in the packed
+                representation) which overlap the pixel.
+
+        Returns:
+            texels: (N, H, W, K, C)
+        """
+        N, H, W, K = fragments.pix_to_face.shape
+        atlas_packed = self.atlas_packed()
+        R = atlas_packed.shape[1]
+        bary = fragments.bary_coords
+        pix_to_face = fragments.pix_to_face
+
+        bary_w01 = bary[..., :2]
+        # pyre-fixme[16]: `bool` has no attribute `__getitem__`.
+        mask = (pix_to_face < 0)[..., None]
+        bary_w01 = torch.where(mask, torch.zeros_like(bary_w01), bary_w01)
+        # If barycentric coordinates are > 1.0 (in the case of
+        # blur_radius > 0.0), wxy might be > R. We need to clamp this
+        # index to R-1 to index into the texture atlas.
+        w_xy = (bary_w01 * R).to(torch.int64).clamp(max=R - 1)  # (N, H, W, K, 2)
+
+        below_diag = (
+            bary_w01.sum(dim=-1) * R - w_xy.float().sum(dim=-1)
+        ) <= 1.0  # (N, H, W, K)
+        w_x, w_y = w_xy.unbind(-1)
+        w_x = torch.where(below_diag, w_x, (R - 1 - w_x))
+        w_y = torch.where(below_diag, w_y, (R - 1 - w_y))
+
+        texels = atlas_packed[pix_to_face, w_y, w_x]
+        texels = texels * (pix_to_face >= 0)[..., None].float()
+
+        return texels
+
+    def submeshes(
+        self,
+        vertex_ids_list: List[List[torch.LongTensor]],
+        faces_ids_list: List[List[torch.LongTensor]],
+    ) -> "TexturesAtlas":
+        """
+        Extract a sub-texture for use in a submesh.
+
+        If the meshes batch corresponding to this TextureAtlas contains
+        `n = len(faces_ids_list)` meshes, then self.atlas_list()
+        will be of length n. After submeshing, we obtain a batch of
+        `k = sum(len(v) for v in atlas_list` submeshes (see Meshes.submeshes). This
+        function creates a corresponding TexturesAtlas object with `atlas_list`
+        of length `k`.
+        """
+        if len(faces_ids_list) != len(self.atlas_list()):
+            raise IndexError(
+                "faces_ids_list must be of " "the same length as atlas_list."
+            )
+
+        sub_features = []
+        for atlas, faces_ids in zip(self.atlas_list(), faces_ids_list):
+            for faces_ids_submesh in faces_ids:
+                sub_features.append(atlas[faces_ids_submesh])
+
+        return self.__class__(sub_features)
+
+    def faces_verts_textures_packed(self) -> torch.Tensor:
+        """
+        Samples texture from each vertex for each face in the mesh.
+        For N meshes with {Fi} number of faces, it returns a
+        tensor of shape sum(Fi)x3xC (C = 3 for RGB).
+        You can use the utils function in structures.utils to convert the
+        packed representation to a list or padded.
+        """
+        atlas_packed = self.atlas_packed()
+        # assume each face consists of (v0, v1, v2).
+        # to sample from the atlas we only need the first two barycentric coordinates.
+        # for details on how this texture sample works refer to the sample_textures function.
+        t0 = atlas_packed[:, 0, -1]  # corresponding to v0  with bary = (1, 0)
+        t1 = atlas_packed[:, -1, 0]  # corresponding to v1 with bary = (0, 1)
+        t2 = atlas_packed[:, 0, 0]  # corresponding to v2 with bary = (0, 0)
+        return torch.stack((t0, t1, t2), dim=1)
+
+    def join_batch(self, textures: List["TexturesAtlas"]) -> "TexturesAtlas":
+        """
+        Join the list of textures given by `textures` to
+        self to create a batch of textures. Return a new
+        TexturesAtlas object with the combined textures.
+
+        Args:
+            textures: List of TexturesAtlas objects
+
+        Returns:
+            new_tex: TexturesAtlas object with the combined
+            textures from self and the list `textures`.
+        """
+        tex_types_same = all(isinstance(tex, TexturesAtlas) for tex in textures)
+        if not tex_types_same:
+            raise ValueError("All textures must be of type TexturesAtlas.")
+
+        atlas_list = []
+        atlas_list += self.atlas_list()
+        num_faces_per_mesh = self._num_faces_per_mesh.copy()
+        for tex in textures:
+            atlas_list += tex.atlas_list()
+            num_faces_per_mesh += tex._num_faces_per_mesh
+        new_tex = self.__class__(atlas=atlas_list)
+        new_tex._num_faces_per_mesh = num_faces_per_mesh
+        return new_tex
+
+    def join_scene(self) -> "TexturesAtlas":
+        """
+        Return a new TexturesAtlas amalgamating the batch.
+        """
+        return self.__class__(atlas=[torch.cat(self.atlas_list())])
+
+    def check_shapes(
+        self, batch_size: int, max_num_verts: int, max_num_faces: int
+    ) -> bool:
+        """
+        Check if the dimensions of the atlas match that of the mesh faces
+        """
+        # (N, F) should be the same
+        return self.atlas_padded().shape[0:2] == (batch_size, max_num_faces)
+
+
+class TexturesUV(TexturesBase):
+    def __init__(
+        self,
+        maps: Union[torch.Tensor, List[torch.Tensor]],
+        faces_uvs: Union[torch.Tensor, List[torch.Tensor], Tuple[torch.Tensor]],
+        verts_uvs: Union[torch.Tensor, List[torch.Tensor], Tuple[torch.Tensor]],
+        padding_mode: str = "border",
+        align_corners: bool = True,
+        sampling_mode: str = "bilinear",
+    ) -> None:
+        """
+        Textures are represented as a per mesh texture map and uv coordinates for each
+        vertex in each face. NOTE: this class only supports one texture map per mesh.
+
+        Args:
+            maps: texture map per mesh. This can either be a list of maps
+              [(H, W, C)] or a padded tensor of shape (N, H, W, C).
+              For RGB, C = 3.
+            faces_uvs: (N, F, 3) LongTensor giving the index into verts_uvs
+                        for each face
+            verts_uvs: (N, V, 2) tensor giving the uv coordinates per vertex
+                        (a FloatTensor with values between 0 and 1).
+            align_corners: If true, the extreme values 0 and 1 for verts_uvs
+                            indicate the centers of the edge pixels in the maps.
+            padding_mode: padding mode for outside grid values
+                                ("zeros", "border" or "reflection").
+            sampling_mode: type of interpolation used to sample the texture.
+                            Corresponds to the mode parameter in PyTorch's
+                            grid_sample ("nearest" or "bilinear").
+
+        The align_corners and padding_mode arguments correspond to the arguments
+        of the `grid_sample` torch function. There is an informative illustration of
+        the two align_corners options at
+        https://discuss.pytorch.org/t/22663/9 .
+
+        An example of how the indexing into the maps, with align_corners=True,
+        works is as follows.
+        If maps[i] has shape [1001, 101] and the value of verts_uvs[i][j]
+        is [0.4, 0.3], then a value of j in faces_uvs[i] means a vertex
+        whose color is given by maps[i][700, 40]. padding_mode affects what
+        happens if a value in verts_uvs is less than 0 or greater than 1.
+        Note that increasing a value in verts_uvs[..., 0] increases an index
+        in maps, whereas increasing a value in verts_uvs[..., 1] _decreases_
+        an _earlier_ index in maps.
+
+        If align_corners=False, an example would be as follows.
+        If maps[i] has shape [1000, 100] and the value of verts_uvs[i][j]
+        is [0.405, 0.2995], then a value of j in faces_uvs[i] means a vertex
+        whose color is given by maps[i][700, 40].
+        When align_corners=False, padding_mode even matters for values in
+        verts_uvs slightly above 0 or slightly below 1. In this case, the
+        padding_mode matters if the first value is outside the interval
+        [0.0005, 0.9995] or if the second is outside the interval
+        [0.005, 0.995].
+        """
+        self.padding_mode = padding_mode
+        self.align_corners = align_corners
+        self.sampling_mode = sampling_mode
+        if isinstance(faces_uvs, (list, tuple)):
+            for fv in faces_uvs:
+                if fv.ndim != 2 or fv.shape[-1] != 3:
+                    msg = "Expected faces_uvs to be of shape (F, 3); got %r"
+                    raise ValueError(msg % repr(fv.shape))
+            self._faces_uvs_list = faces_uvs
+            self._faces_uvs_padded = None
+            self.device = torch.device("cpu")
+
+            # These values may be overridden when textures is
+            # passed into the Meshes constructor. For more details
+            # refer to the __init__ of Meshes.
+            self._N = len(faces_uvs)
+            self._num_faces_per_mesh = [len(fv) for fv in faces_uvs]
+
+            if self._N > 0:
+                self.device = faces_uvs[0].device
+
+        elif torch.is_tensor(faces_uvs):
+            if faces_uvs.ndim != 3 or faces_uvs.shape[-1] != 3:
+                msg = "Expected faces_uvs to be of shape (N, F, 3); got %r"
+                raise ValueError(msg % repr(faces_uvs.shape))
+            self._faces_uvs_padded = faces_uvs
+            self._faces_uvs_list = None
+            self.device = faces_uvs.device
+
+            # These values may be overridden when textures is
+            # passed into the Meshes constructor. For more details
+            # refer to the __init__ of Meshes.
+            self._N = len(faces_uvs)
+            max_F = faces_uvs.shape[1]
+            self._num_faces_per_mesh = [max_F] * self._N
+        else:
+            raise ValueError("Expected faces_uvs to be a tensor or list")
+
+        if isinstance(verts_uvs, (list, tuple)):
+            for fv in verts_uvs:
+                if fv.ndim != 2 or fv.shape[-1] != 2:
+                    msg = "Expected verts_uvs to be of shape (V, 2); got %r"
+                    raise ValueError(msg % repr(fv.shape))
+            self._verts_uvs_list = verts_uvs
+            self._verts_uvs_padded = None
+
+            if len(verts_uvs) != self._N:
+                raise ValueError(
+                    "verts_uvs and faces_uvs must have the same batch dimension"
+                )
+            if not all(v.device == self.device for v in verts_uvs):
+                raise ValueError("verts_uvs and faces_uvs must be on the same device")
+
+        elif torch.is_tensor(verts_uvs):
+            if (
+                verts_uvs.ndim != 3
+                or verts_uvs.shape[-1] != 2
+                or verts_uvs.shape[0] != self._N
+            ):
+                msg = "Expected verts_uvs to be of shape (N, V, 2); got %r"
+                raise ValueError(msg % repr(verts_uvs.shape))
+            self._verts_uvs_padded = verts_uvs
+            self._verts_uvs_list = None
+
+            if verts_uvs.device != self.device:
+                raise ValueError("verts_uvs and faces_uvs must be on the same device")
+        else:
+            raise ValueError("Expected verts_uvs to be a tensor or list")
+
+        if isinstance(maps, (list, tuple)):
+            self._maps_list = maps
+        else:
+            self._maps_list = None
+        self._maps_padded = self._format_maps_padded(maps)
+
+        if self._maps_padded.device != self.device:
+            raise ValueError("maps must be on the same device as verts/faces uvs.")
+
+        self.valid = torch.ones((self._N,), dtype=torch.bool, device=self.device)
+
+    def _format_maps_padded(
+        self, maps: Union[torch.Tensor, List[torch.Tensor]]
+    ) -> torch.Tensor:
+        if isinstance(maps, torch.Tensor):
+            if maps.ndim != 4 or maps.shape[0] != self._N:
+                msg = "Expected maps to be of shape (N, H, W, C); got %r"
+                raise ValueError(msg % repr(maps.shape))
+            return maps
+
+        if isinstance(maps, (list, tuple)):
+            if len(maps) != self._N:
+                raise ValueError("Expected one texture map per mesh in the batch.")
+            if self._N > 0:
+                if not all(map.ndim == 3 for map in maps):
+                    raise ValueError("Invalid number of dimensions in texture maps")
+                if not all(map.shape[2] == maps[0].shape[2] for map in maps):
+                    raise ValueError("Inconsistent number of channels in maps")
+                maps_padded = _pad_texture_maps(maps, align_corners=self.align_corners)
+            else:
+                maps_padded = torch.empty(
+                    (self._N, 0, 0, 3), dtype=torch.float32, device=self.device
+                )
+            return maps_padded
+
+        raise ValueError("Expected maps to be a tensor or list of tensors.")
+
+    def clone(self) -> "TexturesUV":
+        tex = self.__class__(
+            self.maps_padded().clone(),
+            self.faces_uvs_padded().clone(),
+            self.verts_uvs_padded().clone(),
+            align_corners=self.align_corners,
+            padding_mode=self.padding_mode,
+            sampling_mode=self.sampling_mode,
+        )
+        if self._maps_list is not None:
+            tex._maps_list = [m.clone() for m in self._maps_list]
+        if self._verts_uvs_list is not None:
+            tex._verts_uvs_list = [v.clone() for v in self._verts_uvs_list]
+        if self._faces_uvs_list is not None:
+            tex._faces_uvs_list = [f.clone() for f in self._faces_uvs_list]
+        num_faces = (
+            self._num_faces_per_mesh.clone()
+            if torch.is_tensor(self._num_faces_per_mesh)
+            else self._num_faces_per_mesh
+        )
+        tex._num_faces_per_mesh = num_faces
+        tex.valid = self.valid.clone()
+        return tex
+
+    def detach(self) -> "TexturesUV":
+        tex = self.__class__(
+            self.maps_padded().detach(),
+            self.faces_uvs_padded().detach(),
+            self.verts_uvs_padded().detach(),
+            align_corners=self.align_corners,
+            padding_mode=self.padding_mode,
+            sampling_mode=self.sampling_mode,
+        )
+        if self._maps_list is not None:
+            tex._maps_list = [m.detach() for m in self._maps_list]
+        if self._verts_uvs_list is not None:
+            tex._verts_uvs_list = [v.detach() for v in self._verts_uvs_list]
+        if self._faces_uvs_list is not None:
+            tex._faces_uvs_list = [f.detach() for f in self._faces_uvs_list]
+        num_faces = (
+            self._num_faces_per_mesh.detach()
+            if torch.is_tensor(self._num_faces_per_mesh)
+            else self._num_faces_per_mesh
+        )
+        tex._num_faces_per_mesh = num_faces
+        tex.valid = self.valid.detach()
+        return tex
+
+    def __getitem__(self, index) -> "TexturesUV":
+        props = ["verts_uvs_list", "faces_uvs_list", "maps_list", "_num_faces_per_mesh"]
+        new_props = self._getitem(index, props)
+        faces_uvs = new_props["faces_uvs_list"]
+        verts_uvs = new_props["verts_uvs_list"]
+        maps = new_props["maps_list"]
+
+        # if index has multiple values then faces/verts/maps may be a list of tensors
+        if all(isinstance(f, (list, tuple)) for f in [faces_uvs, verts_uvs, maps]):
+            new_tex = self.__class__(
+                faces_uvs=faces_uvs,
+                verts_uvs=verts_uvs,
+                maps=maps,
+                padding_mode=self.padding_mode,
+                align_corners=self.align_corners,
+                sampling_mode=self.sampling_mode,
+            )
+        elif all(torch.is_tensor(f) for f in [faces_uvs, verts_uvs, maps]):
+            new_tex = self.__class__(
+                faces_uvs=[faces_uvs],
+                verts_uvs=[verts_uvs],
+                maps=[maps],
+                padding_mode=self.padding_mode,
+                align_corners=self.align_corners,
+                sampling_mode=self.sampling_mode,
+            )
+        else:
+            raise ValueError("Not all values are provided in the correct format")
+        new_tex._num_faces_per_mesh = new_props["_num_faces_per_mesh"]
+        return new_tex
+
+    def faces_uvs_padded(self) -> torch.Tensor:
+        if self._faces_uvs_padded is None:
+            if self.isempty():
+                self._faces_uvs_padded = torch.zeros(
+                    (self._N, 0, 3), dtype=torch.float32, device=self.device
+                )
+            else:
+                self._faces_uvs_padded = list_to_padded(
+                    self._faces_uvs_list, pad_value=0.0
+                )
+        return self._faces_uvs_padded
+
+    def faces_uvs_list(self) -> List[torch.Tensor]:
+        if self._faces_uvs_list is None:
+            if self.isempty():
+                self._faces_uvs_list = [
+                    torch.empty((0, 3), dtype=torch.float32, device=self.device)
+                ] * self._N
+            else:
+                self._faces_uvs_list = padded_to_list(
+                    self._faces_uvs_padded, split_size=self._num_faces_per_mesh
+                )
+        return self._faces_uvs_list
+
+    def verts_uvs_padded(self) -> torch.Tensor:
+        if self._verts_uvs_padded is None:
+            if self.isempty():
+                self._verts_uvs_padded = torch.zeros(
+                    (self._N, 0, 2), dtype=torch.float32, device=self.device
+                )
+            else:
+                self._verts_uvs_padded = list_to_padded(
+                    self._verts_uvs_list, pad_value=0.0
+                )
+        return self._verts_uvs_padded
+
+    def verts_uvs_list(self) -> List[torch.Tensor]:
+        if self._verts_uvs_list is None:
+            if self.isempty():
+                self._verts_uvs_list = [
+                    torch.empty((0, 2), dtype=torch.float32, device=self.device)
+                ] * self._N
+            else:
+                # The number of vertices in the mesh and in verts_uvs can differ
+                # e.g. if a vertex is shared between 3 faces, it can
+                # have up to 3 different uv coordinates.
+                self._verts_uvs_list = list(self._verts_uvs_padded.unbind(0))
+        return self._verts_uvs_list
+
+    # Currently only the padded maps are used.
+    def maps_padded(self) -> torch.Tensor:
+        return self._maps_padded
+
+    def maps_list(self) -> List[torch.Tensor]:
+        if self._maps_list is not None:
+            return self._maps_list
+        return self._maps_padded.unbind(0)
+
+    def extend(self, N: int) -> "TexturesUV":
+        new_props = self._extend(
+            N,
+            [
+                "maps_padded",
+                "verts_uvs_padded",
+                "faces_uvs_padded",
+                "_num_faces_per_mesh",
+            ],
+        )
+        new_tex = self.__class__(
+            maps=new_props["maps_padded"],
+            faces_uvs=new_props["faces_uvs_padded"],
+            verts_uvs=new_props["verts_uvs_padded"],
+            padding_mode=self.padding_mode,
+            align_corners=self.align_corners,
+            sampling_mode=self.sampling_mode,
+        )
+
+        new_tex._num_faces_per_mesh = new_props["_num_faces_per_mesh"]
+        return new_tex
+
+    # pyre-fixme[14]: `sample_textures` overrides method defined in `TexturesBase`
+    #  inconsistently.
+    def sample_textures(self, fragments, **kwargs) -> torch.Tensor:
+        """
+        Interpolate a 2D texture map using uv vertex texture coordinates for each
+        face in the mesh. First interpolate the vertex uvs using barycentric coordinates
+        for each pixel in the rasterized output. Then interpolate the texture map
+        using the uv coordinate for each pixel.
+
+        Args:
+            fragments:
+                The outputs of rasterization. From this we use
+
+                - pix_to_face: LongTensor of shape (N, H, W, K) specifying the indices
+                of the faces (in the packed representation) which
+                overlap each pixel in the image.
+                - barycentric_coords: FloatTensor of shape (N, H, W, K, 3) specifying
+                the barycentric coordinates of each pixel
+                relative to the faces (in the packed
+                representation) which overlap the pixel.
+
+        Returns:
+            texels: tensor of shape (N, H, W, K, C) giving the interpolated
+            texture for each pixel in the rasterized image.
+        """
+        if self.isempty():
+            faces_verts_uvs = torch.zeros(
+                (self._N, 3, 2), dtype=torch.float32, device=self.device
+            )
+        else:
+            packing_list = [
+                i[j] for i, j in zip(self.verts_uvs_list(), self.faces_uvs_list())
+            ]
+            faces_verts_uvs = torch.cat(packing_list)
+        texture_maps = self.maps_padded()
+
+        # pixel_uvs: (N, H, W, K, 2)
+        pixel_uvs = interpolate_face_attributes(
+            fragments.pix_to_face, fragments.bary_coords, faces_verts_uvs
+        )
+
+        N, H_out, W_out, K = fragments.pix_to_face.shape
+        N, H_in, W_in, C = texture_maps.shape  # 3 for RGB
+
+        # pixel_uvs: (N, H, W, K, 2) -> (N, K, H, W, 2) -> (NK, H, W, 2)
+        pixel_uvs = pixel_uvs.permute(0, 3, 1, 2, 4).reshape(N * K, H_out, W_out, 2)
+
+        # textures.map:
+        #   (N, H, W, C) -> (N, C, H, W) -> (1, N, C, H, W)
+        #   -> expand (K, N, C, H, W) -> reshape (N*K, C, H, W)
+        texture_maps = (
+            texture_maps.permute(0, 3, 1, 2)[None, ...]
+            .expand(K, -1, -1, -1, -1)
+            .transpose(0, 1)
+            .reshape(N * K, C, H_in, W_in)
+        )
+
+        # Textures: (N*K, C, H, W), pixel_uvs: (N*K, H, W, 2)
+        # Now need to format the pixel uvs and the texture map correctly!
+        # From pytorch docs, grid_sample takes `grid` and `input`:
+        #   grid specifies the sampling pixel locations normalized by
+        #   the input spatial dimensions It should have most
+        #   values in the range of [-1, 1]. Values x = -1, y = -1
+        #   is the left-top pixel of input, and values x = 1, y = 1 is the
+        #   right-bottom pixel of input.
+
+        # map to a range of [-1, 1] and flip the y axis
+        pixel_uvs = torch.lerp(
+            pixel_uvs.new_tensor([-1.0, 1.0]),
+            pixel_uvs.new_tensor([1.0, -1.0]),
+            pixel_uvs,
+        )
+
+        if texture_maps.device != pixel_uvs.device:
+            texture_maps = texture_maps.to(pixel_uvs.device)
+        texels = F.grid_sample(
+            texture_maps,
+            pixel_uvs,
+            mode=self.sampling_mode,
+            align_corners=self.align_corners,
+            padding_mode=self.padding_mode,
+        )
+        # texels now has shape (NK, C, H_out, W_out)
+        texels = texels.reshape(N, K, C, H_out, W_out).permute(0, 3, 4, 1, 2)
+        return texels
+
+    def faces_verts_textures_packed(self) -> torch.Tensor:
+        """
+        Samples texture from each vertex and for each face in the mesh.
+        For N meshes with {Fi} number of faces, it returns a
+        tensor of shape sum(Fi)x3xC (C = 3 for RGB).
+        You can use the utils function in structures.utils to convert the
+        packed representation to a list or padded.
+        """
+        if self.isempty():
+            return torch.zeros(
+                (0, 3, self.maps_padded().shape[-1]),
+                dtype=torch.float32,
+                device=self.device,
+            )
+        else:
+            packing_list = [
+                i[j] for i, j in zip(self.verts_uvs_list(), self.faces_uvs_list())
+            ]
+            faces_verts_uvs = _list_to_padded_wrapper(
+                packing_list, pad_value=0.0
+            )  # Nxmax(Fi)x3x2
+        texture_maps = self.maps_padded()  # NxHxWxC
+        texture_maps = texture_maps.permute(0, 3, 1, 2)  # NxCxHxW
+
+        # map to a range of [-1, 1] and flip the y axis
+        faces_verts_uvs = torch.lerp(
+            faces_verts_uvs.new_tensor([-1.0, 1.0]),
+            faces_verts_uvs.new_tensor([1.0, -1.0]),
+            faces_verts_uvs,
+        )
+
+        textures = F.grid_sample(
+            texture_maps,
+            faces_verts_uvs,
+            mode=self.sampling_mode,
+            align_corners=self.align_corners,
+            padding_mode=self.padding_mode,
+        )  # NxCxmax(Fi)x3
+
+        textures = textures.permute(0, 2, 3, 1)  # Nxmax(Fi)x3xC
+        textures = _padded_to_list_wrapper(
+            textures, split_size=self._num_faces_per_mesh
+        )  # list of N {Fix3xC} tensors
+        return list_to_packed(textures)[0]
+
+    def join_batch(self, textures: List["TexturesUV"]) -> "TexturesUV":
+        """
+        Join the list of textures given by `textures` to
+        self to create a batch of textures. Return a new
+        TexturesUV object with the combined textures.
+
+        Args:
+            textures: List of TexturesUV objects
+
+        Returns:
+            new_tex: TexturesUV object with the combined
+            textures from self and the list `textures`.
+        """
+        tex_types_same = all(isinstance(tex, TexturesUV) for tex in textures)
+        if not tex_types_same:
+            raise ValueError("All textures must be of type TexturesUV.")
+
+        padding_modes_same = all(
+            tex.padding_mode == self.padding_mode for tex in textures
+        )
+        if not padding_modes_same:
+            raise ValueError("All textures must have the same padding_mode.")
+        align_corners_same = all(
+            tex.align_corners == self.align_corners for tex in textures
+        )
+        if not align_corners_same:
+            raise ValueError("All textures must have the same align_corners value.")
+        sampling_mode_same = all(
+            tex.sampling_mode == self.sampling_mode for tex in textures
+        )
+        if not sampling_mode_same:
+            raise ValueError("All textures must have the same sampling_mode.")
+
+        verts_uvs_list = []
+        faces_uvs_list = []
+        maps_list = []
+        faces_uvs_list += self.faces_uvs_list()
+        verts_uvs_list += self.verts_uvs_list()
+        maps_list += self.maps_list()
+        num_faces_per_mesh = self._num_faces_per_mesh.copy()
+        for tex in textures:
+            verts_uvs_list += tex.verts_uvs_list()
+            faces_uvs_list += tex.faces_uvs_list()
+            num_faces_per_mesh += tex._num_faces_per_mesh
+            maps_list += tex.maps_list()
+
+        new_tex = self.__class__(
+            maps=maps_list,
+            verts_uvs=verts_uvs_list,
+            faces_uvs=faces_uvs_list,
+            padding_mode=self.padding_mode,
+            align_corners=self.align_corners,
+            sampling_mode=self.sampling_mode,
+        )
+        new_tex._num_faces_per_mesh = num_faces_per_mesh
+        return new_tex
+
+    def _place_map_into_single_map(
+        self, single_map: torch.Tensor, map_: torch.Tensor, location: PackedRectangle
+    ) -> None:
+        """
+        Copy map into a larger tensor single_map at the destination specified by location.
+        If align_corners is False, we add the needed border around the destination.
+
+        Used by join_scene.
+
+        Args:
+            single_map: (total_H, total_W, C)
+            map_: (H, W, C) source data
+            location: where to place map
+        """
+        do_flip = location.flipped
+        source = map_.transpose(0, 1) if do_flip else map_
+        border_width = 0 if self.align_corners else 1
+        lower_u = location.x + border_width
+        lower_v = location.y + border_width
+        upper_u = lower_u + source.shape[0]
+        upper_v = lower_v + source.shape[1]
+        single_map[lower_u:upper_u, lower_v:upper_v] = source
+
+        if self.padding_mode != "zeros" and not self.align_corners:
+            single_map[lower_u - 1, lower_v:upper_v] = single_map[
+                lower_u, lower_v:upper_v
+            ]
+            single_map[upper_u, lower_v:upper_v] = single_map[
+                upper_u - 1, lower_v:upper_v
+            ]
+            single_map[lower_u:upper_u, lower_v - 1] = single_map[
+                lower_u:upper_u, lower_v
+            ]
+            single_map[lower_u:upper_u, upper_v] = single_map[
+                lower_u:upper_u, upper_v - 1
+            ]
+            single_map[lower_u - 1, lower_v - 1] = single_map[lower_u, lower_v]
+            single_map[lower_u - 1, upper_v] = single_map[lower_u, upper_v - 1]
+            single_map[upper_u, lower_v - 1] = single_map[upper_u - 1, lower_v]
+            single_map[upper_u, upper_v] = single_map[upper_u - 1, upper_v - 1]
+
+    def join_scene(self) -> "TexturesUV":
+        """
+        Return a new TexturesUV amalgamating the batch.
+
+        We calculate a large single map which contains the original maps,
+        and find verts_uvs to point into it. This will not replicate
+        behavior of padding for verts_uvs values outside [0,1].
+
+        If align_corners=False, we need to add an artificial border around
+        every map.
+
+        We use the function `pack_unique_rectangles` to provide a layout for
+        the single map. This means that if self was created with a list of maps,
+        and to() has not been called, and there were two maps which were exactly
+        the same tensor object, then they will become the same data in the unified map.
+        _place_map_into_single_map is used to copy the maps into the single map.
+        The merging of verts_uvs and faces_uvs is handled locally in this function.
+        """
+        maps = self.maps_list()
+        heights_and_widths = []
+        extra_border = 0 if self.align_corners else 2
+        for map_ in maps:
+            heights_and_widths.append(
+                Rectangle(
+                    map_.shape[0] + extra_border, map_.shape[1] + extra_border, id(map_)
+                )
+            )
+        merging_plan = pack_unique_rectangles(heights_and_widths)
+        C = maps[0].shape[-1]
+        single_map = maps[0].new_zeros((*merging_plan.total_size, C))
+        verts_uvs = self.verts_uvs_list()
+        verts_uvs_merged = []
+
+        for map_, loc, uvs in zip(maps, merging_plan.locations, verts_uvs):
+            new_uvs = uvs.clone()
+            if loc.is_first:
+                self._place_map_into_single_map(single_map, map_, loc)
+            do_flip = loc.flipped
+            x_shape = map_.shape[1] if do_flip else map_.shape[0]
+            y_shape = map_.shape[0] if do_flip else map_.shape[1]
+
+            if do_flip:
+                # Here we have flipped / transposed the map.
+                # In uvs, the y values are decreasing from 1 to 0 and the x
+                # values increase from 0 to 1. We subtract all values from 1
+                # as the x's become y's and the y's become x's.
+                new_uvs = 1.0 - new_uvs[:, [1, 0]]
+                if TYPE_CHECKING:
+                    new_uvs = torch.Tensor(new_uvs)
+
+            # If align_corners is True, then an index of x (where x is in
+            # the range 0 .. map_.shape[1]-1) in one of the input maps
+            # was hit by a u of x/(map_.shape[1]-1).
+            # That x is located at the index loc[1] + x in the single_map, and
+            # to hit that we need u to equal (loc[1] + x) / (total_size[1]-1)
+            # so the old u should be mapped to
+            #   { u*(map_.shape[1]-1) + loc[1] } / (total_size[1]-1)
+
+            # Also, an index of y (where y is in
+            # the range 0 .. map_.shape[0]-1) in one of the input maps
+            # was hit by a v of 1 - y/(map_.shape[0]-1).
+            # That y is located at the index loc[0] + y in the single_map, and
+            # to hit that we need v to equal 1 - (loc[0] + y) / (total_size[0]-1)
+            # so the old v should be mapped to
+            #   1 - { (1-v)*(map_.shape[0]-1) + loc[0] } / (total_size[0]-1)
+            # =
+            # { v*(map_.shape[0]-1) + total_size[0] - map.shape[0] - loc[0] }
+            #        / (total_size[0]-1)
+
+            # If align_corners is False, then an index of x (where x is in
+            # the range 1 .. map_.shape[1]-2) in one of the input maps
+            # was hit by a u of (x+0.5)/(map_.shape[1]).
+            # That x is located at the index loc[1] + 1 + x in the single_map,
+            # (where the 1 is for the border)
+            # and to hit that we need u to equal (loc[1] + 1 + x + 0.5) / (total_size[1])
+            # so the old u should be mapped to
+            #   { loc[1] + 1 + u*map_.shape[1]-0.5 + 0.5 } / (total_size[1])
+            #  = { loc[1] + 1 + u*map_.shape[1] } / (total_size[1])
+
+            # Also, an index of y (where y is in
+            # the range 1 .. map_.shape[0]-2) in one of the input maps
+            # was hit by a v of 1 - (y+0.5)/(map_.shape[0]).
+            # That y is located at the index loc[0] + 1 + y in the single_map,
+            # (where the 1 is for the border)
+            # and to hit that we need v to equal 1 - (loc[0] + 1 + y + 0.5) / (total_size[0])
+            # so the old v should be mapped to
+            #   1 - { loc[0] + 1 + (1-v)*map_.shape[0]-0.5 + 0.5 } / (total_size[0])
+            #  = { total_size[0] - loc[0] -1 - (1-v)*map_.shape[0]  }
+            #         / (total_size[0])
+            #  = { total_size[0] - loc[0] - map.shape[0] - 1 + v*map_.shape[0] }
+            #         / (total_size[0])
+
+            # We change the y's in new_uvs for the scaling of height,
+            # and the x's for the scaling of width.
+            # That is why the 1's and 0's are mismatched in these lines.
+            one_if_align = 1 if self.align_corners else 0
+            one_if_not_align = 1 - one_if_align
+            denom_x = merging_plan.total_size[0] - one_if_align
+            scale_x = x_shape - one_if_align
+            denom_y = merging_plan.total_size[1] - one_if_align
+            scale_y = y_shape - one_if_align
+            new_uvs[:, 1] *= scale_x / denom_x
+            new_uvs[:, 1] += (
+                merging_plan.total_size[0] - x_shape - loc.x - one_if_not_align
+            ) / denom_x
+            new_uvs[:, 0] *= scale_y / denom_y
+            new_uvs[:, 0] += (loc.y + one_if_not_align) / denom_y
+
+            verts_uvs_merged.append(new_uvs)
+
+        faces_uvs_merged = []
+        offset = 0
+        for faces_uvs_, verts_uvs_ in zip(self.faces_uvs_list(), verts_uvs):
+            faces_uvs_merged.append(offset + faces_uvs_)
+            offset += verts_uvs_.shape[0]
+
+        return self.__class__(
+            maps=[single_map],
+            verts_uvs=[torch.cat(verts_uvs_merged)],
+            faces_uvs=[torch.cat(faces_uvs_merged)],
+            align_corners=self.align_corners,
+            padding_mode=self.padding_mode,
+            sampling_mode=self.sampling_mode,
+        )
+
+    def centers_for_image(self, index: int) -> torch.Tensor:
+        """
+        Return the locations in the texture map which correspond to the given
+        verts_uvs, for one of the meshes. This is potentially useful for
+        visualizing the data. See the texturesuv_image_matplotlib and
+        texturesuv_image_PIL functions.
+
+        Args:
+            index: batch index of the mesh whose centers to return.
+
+        Returns:
+            centers: coordinates of points in the texture image
+                - a FloatTensor of shape (V,2)
+        """
+        if self._N != 1:
+            raise ValueError(
+                "This function only supports plotting textures for one mesh."
+            )
+        texture_image = self.maps_padded()
+        verts_uvs = self.verts_uvs_list()[index][None]
+        _, H, W, _3 = texture_image.shape
+        coord1 = torch.arange(W).expand(H, W)
+        coord2 = torch.arange(H)[:, None].expand(H, W)
+        coords = torch.stack([coord1, coord2])[None]
+        with torch.no_grad():
+            # Get xy cartesian coordinates based on the uv coordinates
+            centers = F.grid_sample(
+                torch.flip(coords.to(texture_image), [2]),
+                # Convert from [0, 1] -> [-1, 1] range expected by grid sample
+                verts_uvs[:, None] * 2.0 - 1,
+                mode=self.sampling_mode,
+                align_corners=self.align_corners,
+                padding_mode=self.padding_mode,
+            ).cpu()
+            centers = centers[0, :, 0].T
+        return centers
+
+    def check_shapes(
+        self, batch_size: int, max_num_verts: int, max_num_faces: int
+    ) -> bool:
+        """
+        Check if the dimensions of the verts/faces uvs match that of the mesh
+        """
+        # (N, F) should be the same
+        # (N, V) is not guaranteed to be the same
+        return (self.faces_uvs_padded().shape[0:2] == (batch_size, max_num_faces)) and (
+            self.verts_uvs_padded().shape[0] == batch_size
+        )
+
+    def submeshes(
+        self,
+        vertex_ids_list: List[List[torch.LongTensor]],
+        faces_ids_list: List[List[torch.LongTensor]],
+    ) -> "TexturesUV":
+        """
+        Extract a sub-texture for use in a submesh.
+
+        If the meshes batch corresponding to this  TexturesUV contains
+        `n = len(faces_ids_list)` meshes, then self.faces_uvs_padded()
+        will be of length n. After submeshing, we obtain a batch of
+        `k = sum(len(f) for f in faces_ids_list` submeshes (see Meshes.submeshes). This
+        function creates a corresponding  TexturesUV object with `faces_uvs_padded`
+        of length `k`.
+
+        Args:
+            vertex_ids_list: Not used when submeshing TexturesUV.
+
+            face_ids_list: A list of length equal to self.faces_uvs_padded. Each
+                element is a LongTensor listing the face ids that the submesh keeps in
+                each respective mesh.
+
+
+        Returns:
+            A  "TexturesUV in which faces_uvs_padded, verts_uvs_padded, and maps_padded
+            have length sum(len(faces) for faces in faces_ids_list)
+        """
+
+        if len(faces_ids_list) != len(self.faces_uvs_padded()):
+            raise IndexError(
+                "faces_uvs_padded must be of " "the same length as face_ids_list."
+            )
+
+        sub_faces_uvs, sub_verts_uvs, sub_maps = [], [], []
+        for faces_ids, faces_uvs, verts_uvs, map_ in zip(
+            faces_ids_list,
+            self.faces_uvs_padded(),
+            self.verts_uvs_padded(),
+            self.maps_padded(),
+        ):
+            for faces_ids_submesh in faces_ids:
+                sub_faces_uvs.append(faces_uvs[faces_ids_submesh])
+                sub_verts_uvs.append(verts_uvs)
+                sub_maps.append(map_)
+
+        return self.__class__(
+            sub_maps,
+            sub_faces_uvs,
+            sub_verts_uvs,
+            self.padding_mode,
+            self.align_corners,
+            self.sampling_mode,
+        )
+
+
+class TexturesVertex(TexturesBase):
+    def __init__(
+        self,
+        verts_features: Union[torch.Tensor, List[torch.Tensor], Tuple[torch.Tensor]],
+    ) -> None:
+        """
+        Batched texture representation where each vertex in a mesh
+        has a C dimensional feature vector.
+
+        Args:
+            verts_features: list of (Vi, C) or (N, V, C) tensor giving a feature
+                vector with arbitrary dimensions for each vertex.
+        """
+        if isinstance(verts_features, (tuple, list)):
+            correct_shape = all(
+                (torch.is_tensor(v) and v.ndim == 2) for v in verts_features
+            )
+            if not correct_shape:
+                raise ValueError(
+                    "Expected verts_features to be a list of tensors of shape (V, C)."
+                )
+
+            self._verts_features_list = verts_features
+            self._verts_features_padded = None
+            self.device = torch.device("cpu")
+
+            # These values may be overridden when textures is
+            # passed into the Meshes constructor. For more details
+            # refer to the __init__ of Meshes.
+            self._N = len(verts_features)
+            self._num_verts_per_mesh = [len(fv) for fv in verts_features]
+
+            if self._N > 0:
+                self.device = verts_features[0].device
+
+        elif torch.is_tensor(verts_features):
+            if verts_features.ndim != 3:
+                msg = "Expected verts_features to be of shape (N, V, C); got %r"
+                raise ValueError(msg % repr(verts_features.shape))
+            self._verts_features_padded = verts_features
+            self._verts_features_list = None
+            self.device = verts_features.device
+
+            # These values may be overridden when textures is
+            # passed into the Meshes constructor. For more details
+            # refer to the __init__ of Meshes.
+            self._N = len(verts_features)
+            max_F = verts_features.shape[1]
+            self._num_verts_per_mesh = [max_F] * self._N
+        else:
+            raise ValueError("verts_features must be a tensor or list of tensors")
+
+        # This is set inside the Meshes object when textures is
+        # passed into the Meshes constructor. For more details
+        # refer to the __init__ of Meshes.
+        self.valid = torch.ones((self._N,), dtype=torch.bool, device=self.device)
+
+    def clone(self) -> "TexturesVertex":
+        tex = self.__class__(self.verts_features_padded().clone())
+        if self._verts_features_list is not None:
+            tex._verts_features_list = [f.clone() for f in self._verts_features_list]
+        tex._num_verts_per_mesh = self._num_verts_per_mesh.copy()
+        tex.valid = self.valid.clone()
+        return tex
+
+    def detach(self) -> "TexturesVertex":
+        tex = self.__class__(self.verts_features_padded().detach())
+        if self._verts_features_list is not None:
+            tex._verts_features_list = [f.detach() for f in self._verts_features_list]
+        tex._num_verts_per_mesh = self._num_verts_per_mesh.copy()
+        tex.valid = self.valid.detach()
+        return tex
+
+    def __getitem__(self, index) -> "TexturesVertex":
+        props = ["verts_features_list", "_num_verts_per_mesh"]
+        new_props = self._getitem(index, props)
+        verts_features = new_props["verts_features_list"]
+        if isinstance(verts_features, list):
+            # Handle the case of an empty list
+            if len(verts_features) == 0:
+                verts_features = torch.empty(
+                    size=(0, 0, 3),
+                    dtype=torch.float32,
+                    device=self.verts_features_padded().device,
+                )
+            new_tex = self.__class__(verts_features=verts_features)
+        elif torch.is_tensor(verts_features):
+            new_tex = self.__class__(verts_features=[verts_features])
+        else:
+            raise ValueError("Not all values are provided in the correct format")
+        new_tex._num_verts_per_mesh = new_props["_num_verts_per_mesh"]
+        return new_tex
+
+    def verts_features_padded(self) -> torch.Tensor:
+        if self._verts_features_padded is None:
+            if self.isempty():
+                self._verts_features_padded = torch.zeros(
+                    (self._N, 0, 3, 0), dtype=torch.float32, device=self.device
+                )
+            else:
+                self._verts_features_padded = list_to_padded(
+                    self._verts_features_list, pad_value=0.0
+                )
+        return self._verts_features_padded
+
+    def verts_features_list(self) -> List[torch.Tensor]:
+        if self._verts_features_list is None:
+            if self.isempty():
+                self._verts_features_list = [
+                    torch.empty((0, 3), dtype=torch.float32, device=self.device)
+                ] * self._N
+            else:
+                self._verts_features_list = padded_to_list(
+                    self._verts_features_padded, split_size=self._num_verts_per_mesh
+                )
+        return self._verts_features_list
+
+    def verts_features_packed(self) -> torch.Tensor:
+        if self.isempty():
+            return torch.zeros((self._N, 3, 0), dtype=torch.float32, device=self.device)
+        verts_features_list = self.verts_features_list()
+        return list_to_packed(verts_features_list)[0]
+
+    def extend(self, N: int) -> "TexturesVertex":
+        new_props = self._extend(N, ["verts_features_padded", "_num_verts_per_mesh"])
+        new_tex = self.__class__(verts_features=new_props["verts_features_padded"])
+        new_tex._num_verts_per_mesh = new_props["_num_verts_per_mesh"]
+        return new_tex
+
+    # pyre-fixme[14]: `sample_textures` overrides method defined in `TexturesBase`
+    #  inconsistently.
+    def sample_textures(self, fragments, faces_packed=None) -> torch.Tensor:
+        """
+        Determine the color for each rasterized face. Interpolate the colors for
+        vertices which form the face using the barycentric coordinates.
+        Args:
+            fragments:
+                The outputs of rasterization. From this we use
+
+                - pix_to_face: LongTensor of shape (N, H, W, K) specifying the indices
+                of the faces (in the packed representation) which
+                overlap each pixel in the image.
+                - barycentric_coords: FloatTensor of shape (N, H, W, K, 3) specifying
+                the barycentric coordinates of each pixel
+                relative to the faces (in the packed
+                representation) which overlap the pixel.
+
+        Returns:
+            texels: An texture per pixel of shape (N, H, W, K, C).
+            There will be one C dimensional value for each element in
+            fragments.pix_to_face.
+        """
+        verts_features_packed = self.verts_features_packed()
+        faces_verts_features = verts_features_packed[faces_packed]
+
+        texels = interpolate_face_attributes(
+            fragments.pix_to_face, fragments.bary_coords, faces_verts_features
+        )
+        return texels
+
+    def submeshes(
+        self,
+        vertex_ids_list: List[List[torch.LongTensor]],
+        faces_ids_list: List[List[torch.LongTensor]],
+    ) -> "TexturesVertex":
+        """
+        Extract a sub-texture for use in a submesh.
+
+        If the meshes batch corresponding to this TexturesVertex contains
+        `n = len(vertex_ids_list)` meshes, then self.verts_features_list()
+        will be of length n. After submeshing, we obtain a batch of
+        `k = sum(len(v) for v in vertex_ids_list` submeshes (see Meshes.submeshes). This
+        function creates a corresponding TexturesVertex object with `verts_features_list`
+        of length `k`.
+
+        Args:
+            vertex_ids_list: A list of length equal to self.verts_features_list. Each
+                element is a LongTensor listing the vertices that the submesh keeps in
+                each respective mesh.
+
+            face_ids_list: Not used when submeshing TexturesVertex.
+
+        Returns:
+            A TexturesVertex in which verts_features_list has length
+            sum(len(vertices) for vertices in vertex_ids_list). Each element contains
+            vertex features corresponding to the subset of vertices in that submesh.
+        """
+        if len(vertex_ids_list) != len(self.verts_features_list()):
+            raise IndexError(
+                "verts_features_list must be of " "the same length as vertex_ids_list."
+            )
+
+        sub_features = []
+        for vertex_ids, features in zip(vertex_ids_list, self.verts_features_list()):
+            for vertex_ids_submesh in vertex_ids:
+                sub_features.append(features[vertex_ids_submesh])
+
+        return self.__class__(sub_features)
+
+    def faces_verts_textures_packed(self, faces_packed=None) -> torch.Tensor:
+        """
+        Samples texture from each vertex and for each face in the mesh.
+        For N meshes with {Fi} number of faces, it returns a
+        tensor of shape sum(Fi)x3xC (C = 3 for RGB).
+        You can use the utils function in structures.utils to convert the
+        packed representation to a list or padded.
+        """
+        verts_features_packed = self.verts_features_packed()
+        faces_verts_features = verts_features_packed[faces_packed]
+        return faces_verts_features
+
+    def join_batch(self, textures: List["TexturesVertex"]) -> "TexturesVertex":
+        """
+        Join the list of textures given by `textures` to
+        self to create a batch of textures. Return a new
+        TexturesVertex object with the combined textures.
+
+        Args:
+            textures: List of TexturesVertex objects
+
+        Returns:
+            new_tex: TexturesVertex object with the combined
+            textures from self and the list `textures`.
+        """
+        tex_types_same = all(isinstance(tex, TexturesVertex) for tex in textures)
+        if not tex_types_same:
+            raise ValueError("All textures must be of type TexturesVertex.")
+
+        verts_features_list = []
+        verts_features_list += self.verts_features_list()
+        num_verts_per_mesh = self._num_verts_per_mesh.copy()
+        for tex in textures:
+            verts_features_list += tex.verts_features_list()
+            num_verts_per_mesh += tex._num_verts_per_mesh
+
+        new_tex = self.__class__(verts_features=verts_features_list)
+        new_tex._num_verts_per_mesh = num_verts_per_mesh
+        return new_tex
+
+    def join_scene(self) -> "TexturesVertex":
+        """
+        Return a new TexturesVertex amalgamating the batch.
+        """
+        return self.__class__(verts_features=[torch.cat(self.verts_features_list())])
+
+    def check_shapes(
+        self, batch_size: int, max_num_verts: int, max_num_faces: int
+    ) -> bool:
+        """
+        Check if the dimensions of the verts features match that of the mesh verts
+        """
+        # (N, V) should be the same
+        return self.verts_features_padded().shape[:-1] == (batch_size, max_num_verts)
diff --git a/pytorch3d/pytorch3d/renderer/mesh/utils.py b/pytorch3d/pytorch3d/renderer/mesh/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..6157c8704e2865e64d0b2bfbb733fa710650b2cf
--- /dev/null
+++ b/pytorch3d/pytorch3d/renderer/mesh/utils.py
@@ -0,0 +1,318 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import List, NamedTuple, Tuple
+
+import torch
+from pytorch3d.ops import interpolate_face_attributes
+
+
+def _clip_barycentric_coordinates(bary) -> torch.Tensor:
+    """
+    Args:
+        bary: barycentric coordinates of shape (...., 3) where `...` represents
+            an arbitrary number of dimensions
+
+    Returns:
+        bary: Barycentric coordinates clipped (i.e any values < 0 are set to 0)
+        and renormalized. We only clip  the negative values. Values > 1 will fall
+        into the [0, 1] range after renormalization.
+        The output is the same shape as the input.
+    """
+    if bary.shape[-1] != 3:
+        msg = "Expected barycentric coords to have last dim = 3; got %r"
+        raise ValueError(msg % (bary.shape,))
+    ndims = bary.ndim - 1
+    mask = bary.eq(-1).all(dim=-1, keepdim=True).expand(*((-1,) * ndims + (3,)))
+    clipped = bary.clamp(min=0.0)
+    clipped[mask] = 0.0
+    clipped_sum = torch.clamp(clipped.sum(dim=-1, keepdim=True), min=1e-5)
+    clipped = clipped / clipped_sum
+    clipped[mask] = -1.0
+    return clipped
+
+
+def _interpolate_zbuf(
+    pix_to_face: torch.Tensor, barycentric_coords: torch.Tensor, meshes
+) -> torch.Tensor:
+    """
+    A helper function to calculate the z buffer for each pixel in the
+    rasterized output.
+
+    Args:
+        pix_to_face: LongTensor of shape (N, H, W, K) specifying the indices
+            of the faces (in the packed representation) which
+            overlap each pixel in the image.
+        barycentric_coords: FloatTensor of shape (N, H, W, K, 3) specifying
+            the barycentric coordinates of each pixel
+            relative to the faces (in the packed
+            representation) which overlap the pixel.
+        meshes: Meshes object representing a batch of meshes.
+
+    Returns:
+        zbuffer: (N, H, W, K) FloatTensor
+    """
+    verts = meshes.verts_packed()
+    faces = meshes.faces_packed()
+    faces_verts_z = verts[faces][..., 2][..., None]  # (F, 3, 1)
+    zbuf = interpolate_face_attributes(pix_to_face, barycentric_coords, faces_verts_z)[
+        ..., 0
+    ]  # (1, H, W, K)
+    zbuf[pix_to_face == -1] = -1
+    return zbuf
+
+
+# -----------  Rectangle Packing  -------------------- #
+
+
+class Rectangle(NamedTuple):
+    xsize: int
+    ysize: int
+    identifier: int
+
+
+class PackedRectangle(NamedTuple):
+    x: int
+    y: int
+    flipped: bool
+    is_first: bool
+
+
+class PackedRectangles(NamedTuple):
+    total_size: Tuple[int, int]
+    locations: List[PackedRectangle]
+
+
+# Note the order of members matters here because it determines the queue order.
+# We want to place longer rectangles first.
+class _UnplacedRectangle(NamedTuple):
+    size: Tuple[int, int]
+    ind: int
+    flipped: bool
+
+
+def _try_place_rectangle(
+    rect: _UnplacedRectangle,
+    placed_so_far: List[PackedRectangle],
+    occupied: List[Tuple[int, int]],
+) -> bool:
+    """
+    Try to place rect within the current bounding box.
+    Part of the implementation of pack_rectangles.
+
+    Note that the arguments `placed_so_far` and `occupied` are modified.
+
+    Args:
+        rect: rectangle to place
+        placed_so_far: the locations decided upon so far - a list of
+                    (x, y, whether flipped). The nth element is the
+                    location of the nth rectangle if it has been decided.
+                    (modified in place)
+        occupied: the nodes of the graph of extents of rightmost placed
+                    rectangles - (modified in place)
+
+    Returns:
+        True on success.
+
+    Example:
+    (We always have placed the first rectangle horizontally and other
+    rectangles above it.)
+    Let's say the placed boxes 1-4 are laid out like this.
+    The coordinates of the points marked X are stored in occupied.
+    It is to the right of the X's that we seek to place rect.
+
+        +-----------------------X
+        |2                      |
+        |                       +---X
+        |                       |4  |
+        |                       |   |
+        |                       +---+X
+        |                       |3   |
+        |                       |    |
+        +-----------------------+----+------X
+    y    |1                                  |
+    ^    |     --->x                         |
+    |    +-----------------------------------+
+
+    We want to place this rectangle.
+
+                +-+
+                |5|
+                | |
+                | |   = rect
+                | |
+                | |
+                | |
+                +-+
+
+    The call will succeed, returning True, leaving us with
+
+        +-----------------------X
+        |2                      |    +-X
+        |                       +---+|5|
+        |                       |4  || |
+        |                       |   || |
+        |                       +---++ |
+        |                       |3   | |
+        |                       |    | |
+        +-----------------------+----+-+----X
+        |1                                  |
+        |                                   |
+        +-----------------------------------+ .
+
+    """
+    total_width = occupied[0][0]
+    needed_height = rect.size[1]
+    current_start_idx = None
+    current_max_width = 0
+    previous_height = 0
+    currently_packed = 0
+    for idx, interval in enumerate(occupied):
+        if interval[0] <= total_width - rect.size[0]:
+            currently_packed += interval[1] - previous_height
+            current_max_width = max(interval[0], current_max_width)
+            if current_start_idx is None:
+                current_start_idx = idx
+            if currently_packed >= needed_height:
+                current_max_width = max(interval[0], current_max_width)
+                placed_so_far[rect.ind] = PackedRectangle(
+                    current_max_width,
+                    occupied[current_start_idx - 1][1],
+                    rect.flipped,
+                    True,
+                )
+                new_occupied = (
+                    current_max_width + rect.size[0],
+                    occupied[current_start_idx - 1][1] + needed_height,
+                )
+                if currently_packed == needed_height:
+                    occupied[idx] = new_occupied
+                    del occupied[current_start_idx:idx]
+                elif idx > current_start_idx:
+                    occupied[idx - 1] = new_occupied
+                    del occupied[current_start_idx : (idx - 1)]
+                else:
+                    occupied.insert(idx, new_occupied)
+                return True
+        else:
+            current_start_idx = None
+            current_max_width = 0
+            currently_packed = 0
+        previous_height = interval[1]
+    return False
+
+
+def pack_rectangles(sizes: List[Tuple[int, int]]) -> PackedRectangles:
+    """
+    Naive rectangle packing in to a large rectangle. Flipping (i.e. rotating
+    a rectangle by 90 degrees) is allowed.
+
+    This is used to join several uv maps into a single scene, see
+    TexturesUV.join_scene.
+
+    Args:
+        sizes: List of sizes of rectangles to pack
+
+    Returns:
+        total_size: size of total large rectangle
+        rectangles: location for each of the input rectangles.
+                    This includes whether they are flipped.
+                    The is_first field is always True.
+    """
+
+    if len(sizes) < 2:
+        raise ValueError("Cannot pack less than two boxes")
+
+    queue = []
+    for i, size in enumerate(sizes):
+        if size[0] < size[1]:
+            queue.append(_UnplacedRectangle((size[1], size[0]), i, True))
+        else:
+            queue.append(_UnplacedRectangle((size[0], size[1]), i, False))
+    queue.sort()
+    placed_so_far = [PackedRectangle(-1, -1, False, False)] * len(sizes)
+
+    biggest = queue.pop()
+    total_width, current_height = biggest.size
+    placed_so_far[biggest.ind] = PackedRectangle(0, 0, biggest.flipped, True)
+
+    second = queue.pop()
+    placed_so_far[second.ind] = PackedRectangle(0, current_height, second.flipped, True)
+    current_height += second.size[1]
+    occupied = [biggest.size, (second.size[0], current_height)]
+
+    for rect in reversed(queue):
+        if _try_place_rectangle(rect, placed_so_far, occupied):
+            continue
+
+        rotated = _UnplacedRectangle(
+            (rect.size[1], rect.size[0]), rect.ind, not rect.flipped
+        )
+        if _try_place_rectangle(rotated, placed_so_far, occupied):
+            continue
+
+        # rect wasn't placed in the current bounding box,
+        # so we add extra space to fit it in.
+        placed_so_far[rect.ind] = PackedRectangle(0, current_height, rect.flipped, True)
+        current_height += rect.size[1]
+        occupied.append((rect.size[0], current_height))
+
+    return PackedRectangles((total_width, current_height), placed_so_far)
+
+
+def pack_unique_rectangles(rectangles: List[Rectangle]) -> PackedRectangles:
+    """
+    Naive rectangle packing in to a large rectangle. Flipping (i.e. rotating
+    a rectangle by 90 degrees) is allowed. Inputs are deduplicated by their
+    identifier.
+
+    This is a wrapper around pack_rectangles, where inputs come with an
+    identifier. In particular, it calls pack_rectangles for the deduplicated inputs,
+    then returns the values for all the inputs. The output for all rectangles with
+    the same identifier will be the same, except that only the first one will have
+    the is_first field True.
+
+    This is used to join several uv maps into a single scene, see
+    TexturesUV.join_scene.
+
+    Args:
+        rectangles: List of sizes of rectangles to pack
+
+    Returns:
+        total_size: size of total large rectangle
+        rectangles: location for each of the input rectangles.
+                    This includes whether they are flipped.
+                    The is_first field is true for the first rectangle
+                    with each identifier.
+    """
+
+    if len(rectangles) < 2:
+        raise ValueError("Cannot pack less than two boxes")
+
+    input_map = {}
+    input_indices: List[Tuple[int, bool]] = []
+    unique_input_sizes: List[Tuple[int, int]] = []
+    for rectangle in rectangles:
+        if rectangle.identifier not in input_map:
+            unique_index = len(unique_input_sizes)
+            unique_input_sizes.append((rectangle.xsize, rectangle.ysize))
+            input_map[rectangle.identifier] = unique_index
+            input_indices.append((unique_index, True))
+        else:
+            unique_index = input_map[rectangle.identifier]
+            input_indices.append((unique_index, False))
+
+    if len(unique_input_sizes) == 1:
+        first = [PackedRectangle(0, 0, False, True)]
+        rest = (len(rectangles) - 1) * [PackedRectangle(0, 0, False, False)]
+        return PackedRectangles(unique_input_sizes[0], first + rest)
+
+    total_size, unique_locations = pack_rectangles(unique_input_sizes)
+    full_locations = []
+    for input_index, first in input_indices:
+        full_locations.append(unique_locations[input_index]._replace(is_first=first))
+
+    return PackedRectangles(total_size, full_locations)
diff --git a/pytorch3d/pytorch3d/renderer/opengl/__init__.py b/pytorch3d/pytorch3d/renderer/opengl/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..f0f6b4c170125529009029391431083001f86d68
--- /dev/null
+++ b/pytorch3d/pytorch3d/renderer/opengl/__init__.py
@@ -0,0 +1,37 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# If we can access EGL, import MeshRasterizerOpenGL.
+def _can_import_egl_and_pycuda():
+    import os
+    import warnings
+
+    try:
+        os.environ["PYOPENGL_PLATFORM"] = "egl"
+        import OpenGL.EGL
+    except (AttributeError, ImportError, ModuleNotFoundError):
+        warnings.warn(
+            "Can't import EGL, not importing MeshRasterizerOpenGL. This might happen if"
+            " your Python application imported OpenGL with a non-EGL backend before"
+            " importing PyTorch3D, or if you don't have pyopengl installed as part"
+            " of your Python distribution."
+        )
+        return False
+
+    try:
+        import pycuda.gl
+    except (ImportError, ImportError, ModuleNotFoundError):
+        warnings.warn("Can't import pycuda.gl, not importing MeshRasterizerOpenGL.")
+        return False
+
+    return True
+
+
+if _can_import_egl_and_pycuda():
+    from .opengl_utils import EGLContext, global_device_context_store
+    from .rasterizer_opengl import MeshRasterizerOpenGL
+
+__all__ = [k for k in globals().keys() if not k.startswith("_")]
diff --git a/pytorch3d/pytorch3d/renderer/opengl/opengl_utils.py b/pytorch3d/pytorch3d/renderer/opengl/opengl_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..b854f067378b9dfb6ef5cedd2f34972a539e137a
--- /dev/null
+++ b/pytorch3d/pytorch3d/renderer/opengl/opengl_utils.py
@@ -0,0 +1,448 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# Utilities useful for OpenGL rendering.
+#
+# NOTE: This module MUST be imported before any other OpenGL modules in this Python
+# session, unless you set PYOPENGL_PLATFORM to egl *before* importing other modules.
+# Otherwise, the imports below will throw an error.
+#
+# This module (as well as rasterizer_opengl) will not be imported into pytorch3d if
+# you do not have pycuda.gl and pyopengl installed.
+
+import contextlib
+import ctypes
+import os
+import threading
+from typing import Any, Dict
+
+
+os.environ["PYOPENGL_PLATFORM"] = "egl"
+import OpenGL.EGL as egl  # noqa
+
+import pycuda.driver as cuda  # noqa
+from OpenGL._opaque import opaque_pointer_cls  # noqa
+from OpenGL.raw.EGL._errors import EGLError  # noqa
+
+# A few constants necessary to use EGL extensions, see links for details.
+
+# https://www.khronos.org/registry/EGL/extensions/EXT/EGL_EXT_platform_device.txt
+EGL_PLATFORM_DEVICE_EXT = 0x313F
+# https://www.khronos.org/registry/EGL/extensions/NV/EGL_NV_device_cuda.txt
+EGL_CUDA_DEVICE_NV = 0x323A
+
+
+# To use EGL extensions, we need to tell OpenGL about them. For details, see
+# https://developer.nvidia.com/blog/egl-eye-opengl-visualization-without-x-server/.
+# To avoid garbage collection of the protos, we'll store them in a module-global list.
+def _define_egl_extension(name: str, type):
+    if hasattr(egl, name):
+        return
+    addr = egl.eglGetProcAddress(name)
+    if addr is None:
+        raise RuntimeError(f"Cannot find EGL extension {name}.")
+    else:
+        proto = ctypes.CFUNCTYPE(type)
+        func = proto(addr)
+        setattr(egl, name, func)
+    return proto
+
+
+_protos = []
+_protos.append(_define_egl_extension("eglGetPlatformDisplayEXT", egl.EGLDisplay))
+_protos.append(_define_egl_extension("eglQueryDevicesEXT", egl.EGLBoolean))
+_protos.append(_define_egl_extension("eglQueryDeviceAttribEXT", egl.EGLBoolean))
+_protos.append(_define_egl_extension("eglQueryDisplayAttribEXT", egl.EGLBoolean))
+_protos.append(_define_egl_extension("eglQueryDeviceStringEXT", ctypes.c_char_p))
+
+if not hasattr(egl, "EGLDeviceEXT"):
+    egl.EGLDeviceEXT = opaque_pointer_cls("EGLDeviceEXT")
+
+
+def _egl_convert_to_int_array(egl_attributes):
+    """
+    Convert a Python dict of EGL attributes into an array of ints (some of which are
+    special EGL ints.
+
+    Args:
+        egl_attributes: A dict where keys are EGL attributes, and values are their vals.
+
+    Returns:
+        A c-list of length 2 * len(egl_attributes) + 1, of the form [key1, val1, ...,
+        keyN, valN, EGL_NONE]
+    """
+    attributes_list = sum(([k, v] for k, v in egl_attributes.items()), []) + [
+        egl.EGL_NONE
+    ]
+    return (egl.EGLint * len(attributes_list))(*attributes_list)
+
+
+def _get_cuda_device(requested_device_id: int):
+    """
+    Find an EGL device with a given CUDA device ID.
+
+    Args:
+        requested_device_id: The desired CUDA device ID, e.g. "1" for "cuda:1".
+
+    Returns:
+        EGL device with the desired CUDA ID.
+    """
+    num_devices = egl.EGLint()
+    if (
+        # pyre-ignore Undefined attribute [16]
+        not egl.eglQueryDevicesEXT(0, None, ctypes.pointer(num_devices))
+        or num_devices.value < 1
+    ):
+        raise RuntimeError("EGL requires a system that supports at least one device.")
+    devices = (egl.EGLDeviceEXT * num_devices.value)()  # array of size num_devices
+    if (
+        # pyre-ignore Undefined attribute [16]
+        not egl.eglQueryDevicesEXT(
+            num_devices.value, devices, ctypes.pointer(num_devices)
+        )
+        or num_devices.value < 1
+    ):
+        raise RuntimeError("EGL sees no available devices.")
+    if len(devices) < requested_device_id + 1:
+        raise ValueError(
+            f"Device {requested_device_id} not available. Found only {len(devices)} devices."
+        )
+
+    # Iterate over all the EGL devices, and check if their CUDA ID matches the request.
+    for device in devices:
+        available_device_id = egl.EGLAttrib(ctypes.c_int(-1))
+        # pyre-ignore Undefined attribute [16]
+        egl.eglQueryDeviceAttribEXT(device, EGL_CUDA_DEVICE_NV, available_device_id)
+        if available_device_id.contents.value == requested_device_id:
+            return device
+    raise ValueError(
+        f"Found {len(devices)} CUDA devices, but none with CUDA id {requested_device_id}."
+    )
+
+
+def _get_egl_config(egl_dpy, surface_type):
+    """
+    Get an EGL config with reasonable settings (for use with MeshRasterizerOpenGL).
+
+    Args:
+        egl_dpy: An EGL display constant (int).
+        surface_type: An EGL surface_type int.
+
+    Returns:
+        An EGL config object.
+
+    Throws:
+        ValueError if the desired config is not available or invalid.
+    """
+    egl_config_dict = {
+        egl.EGL_RED_SIZE: 8,
+        egl.EGL_GREEN_SIZE: 8,
+        egl.EGL_BLUE_SIZE: 8,
+        egl.EGL_ALPHA_SIZE: 8,
+        egl.EGL_DEPTH_SIZE: 24,
+        egl.EGL_STENCIL_SIZE: egl.EGL_DONT_CARE,
+        egl.EGL_RENDERABLE_TYPE: egl.EGL_OPENGL_BIT,
+        egl.EGL_SURFACE_TYPE: surface_type,
+    }
+    egl_config_array = _egl_convert_to_int_array(egl_config_dict)
+    egl_config = egl.EGLConfig()
+    num_configs = egl.EGLint()
+    if (
+        not egl.eglChooseConfig(
+            egl_dpy,
+            egl_config_array,
+            ctypes.pointer(egl_config),
+            1,
+            ctypes.pointer(num_configs),
+        )
+        or num_configs.value == 0
+    ):
+        raise ValueError("Invalid EGL config.")
+    return egl_config
+
+
+class EGLContext:
+    """
+    A class representing an EGL context. In short, EGL allows us to render OpenGL con-
+    tent in a headless mode, that is without an actual display to render to. This capa-
+    bility enables MeshRasterizerOpenGL to render on the GPU and then transfer the re-
+    sults to PyTorch3D.
+    """
+
+    def __init__(self, width: int, height: int, cuda_device_id: int = 0) -> None:
+        """
+        Args:
+            width: Width of the "display" to render to.
+            height: Height of the "display" to render to.
+            cuda_device_id: Device ID to render to, in the CUDA convention (note that
+                this might be different than EGL's device numbering).
+        """
+        # Lock used to prevent multiple threads from rendering on the same device
+        # at the same time, creating/destroying contexts at the same time, etc.
+        self.lock = threading.Lock()
+        self.cuda_device_id = cuda_device_id
+        self.device = _get_cuda_device(self.cuda_device_id)
+        self.width = width
+        self.height = height
+        self.dpy = egl.eglGetPlatformDisplayEXT(
+            EGL_PLATFORM_DEVICE_EXT, self.device, None
+        )
+        major, minor = egl.EGLint(), egl.EGLint()
+
+        # Initialize EGL components: the display, surface, and context
+        egl.eglInitialize(self.dpy, ctypes.pointer(major), ctypes.pointer(minor))
+
+        config = _get_egl_config(self.dpy, egl.EGL_PBUFFER_BIT)
+        pb_surf_attribs = _egl_convert_to_int_array(
+            {
+                egl.EGL_WIDTH: width,
+                egl.EGL_HEIGHT: height,
+            }
+        )
+        self.surface = egl.eglCreatePbufferSurface(self.dpy, config, pb_surf_attribs)
+        if self.surface == egl.EGL_NO_SURFACE:
+            raise RuntimeError("Failed to create an EGL surface.")
+
+        if not egl.eglBindAPI(egl.EGL_OPENGL_API):
+            raise RuntimeError("Failed to bind EGL to the OpenGL API.")
+        self.context = egl.eglCreateContext(self.dpy, config, egl.EGL_NO_CONTEXT, None)
+        if self.context == egl.EGL_NO_CONTEXT:
+            raise RuntimeError("Failed to create an EGL context.")
+
+    @contextlib.contextmanager
+    def active_and_locked(self):
+        """
+        A context manager used to make sure a given EGL context is only current in
+        a single thread at a single time. It is recommended to ALWAYS use EGL within
+        a `with context.active_and_locked():` context.
+
+        Throws:
+            EGLError when the context cannot be made current or make non-current.
+        """
+        self.lock.acquire()
+        egl.eglMakeCurrent(self.dpy, self.surface, self.surface, self.context)
+        try:
+            yield
+        finally:
+            egl.eglMakeCurrent(
+                self.dpy, egl.EGL_NO_SURFACE, egl.EGL_NO_SURFACE, egl.EGL_NO_CONTEXT
+            )
+            self.lock.release()
+
+    def get_context_info(self) -> Dict[str, Any]:
+        """
+        Return context info. Useful for debugging.
+
+        Returns:
+            A dict of keys and ints, representing the context's display, surface,
+            the context itself, and the current thread.
+        """
+        return {
+            "dpy": self.dpy,
+            "surface": self.surface,
+            "context": self.context,
+            "thread": threading.get_ident(),
+        }
+
+    def release(self):
+        """
+        Release the context's resources.
+        """
+        self.lock.acquire()
+        try:
+            if self.surface:
+                egl.eglDestroySurface(self.dpy, self.surface)
+            if self.context and self.dpy:
+                egl.eglDestroyContext(self.dpy, self.context)
+            egl.eglMakeCurrent(
+                self.dpy, egl.EGL_NO_SURFACE, egl.EGL_NO_SURFACE, egl.EGL_NO_CONTEXT
+            )
+            if self.dpy:
+                egl.eglTerminate(self.dpy)
+        except EGLError as err:
+            print(
+                f"EGL could not release context on device cuda:{self.cuda_device_id}."
+                " This can happen if you created two contexts on the same device."
+                " Instead, you can use DeviceContextStore to use a single context"
+                " per device, and EGLContext.make_(in)active_in_current_thread to"
+                " (in)activate the context as needed."
+            )
+            raise err
+
+        egl.eglReleaseThread()
+        self.lock.release()
+
+
+class _DeviceContextStore:
+    """
+    DeviceContextStore provides thread-safe storage for EGL and pycuda contexts. It
+    should not be used directly. opengl_utils instantiates a module-global variable
+    called opengl_utils.global_device_context_store. MeshRasterizerOpenGL uses this
+    store to avoid unnecessary context creation and destruction.
+
+    The EGL/CUDA contexts are not meant to be created and destroyed all the time,
+    and having multiple on a single device can be troublesome. Intended use is entirely
+    transparent to the user::
+
+        rasterizer1 = MeshRasterizerOpenGL(...some args...)
+        mesh1 = load_mesh_on_cuda_0()
+
+        # Now rasterizer1 will request EGL/CUDA contexts from
+        # global_device_context_store on cuda:0, and since there aren't any, the
+        # store will create new ones.
+        rasterizer1.rasterize(mesh1)
+
+        # rasterizer2 also needs EGL & CUDA contexts. But global_context_store
+        # already has them for cuda:0. Instead of creating new contexts, the store
+        # will tell rasterizer2 to use them.
+        rasterizer2 = MeshRasterizerOpenGL(dcs)
+        rasterize2.rasterize(mesh1)
+
+        # When rasterizer1 needs to render on cuda:1, the store will create new contexts.
+        mesh2 = load_mesh_on_cuda_1()
+        rasterizer1.rasterize(mesh2)
+
+    """
+
+    def __init__(self):
+        cuda.init()
+        # pycuda contexts, at most one per device.
+        self._cuda_contexts = {}
+        # EGL contexts, at most one per device.
+        self._egl_contexts = {}
+        # Any extra per-device data (e.g. precompiled GL objects).
+        self._context_data = {}
+        # Lock for DeviceContextStore used in multithreaded multidevice scenarios.
+        self._lock = threading.Lock()
+        # All EGL contexts created by this store will have this resolution.
+        self.max_egl_width = 2048
+        self.max_egl_height = 2048
+
+    def get_cuda_context(self, device):
+        """
+        Return a pycuda's CUDA context on a given CUDA device. If we have not created
+        such a context yet, create a new one and store it in a dict. The context is
+        popped (you need to call context.push() to start using it). This function
+        is thread-safe.
+
+        Args:
+            device: A torch.device.
+
+        Returns: A pycuda context corresponding to the given device.
+        """
+        cuda_device_id = device.index
+        with self._lock:
+            if cuda_device_id not in self._cuda_contexts:
+                self._cuda_contexts[cuda_device_id] = _init_cuda_context(cuda_device_id)
+                self._cuda_contexts[cuda_device_id].pop()
+            return self._cuda_contexts[cuda_device_id]
+
+    def get_egl_context(self, device):
+        """
+        Return an EGL context on a given CUDA device. If we have not created such a
+        context yet, create a new one and store it in a dict. The context if not current
+        (you should use the `with egl_context.active_and_locked:` context manager when
+        you need it to be current). This function is thread-safe.
+
+        Args:
+            device: A torch.device.
+
+        Returns: An EGLContext on the requested device. The context will have size
+            self.max_egl_width and self.max_egl_height.
+        """
+        cuda_device_id = device.index
+        with self._lock:
+            egl_context = self._egl_contexts.get(cuda_device_id, None)
+            if egl_context is None:
+                self._egl_contexts[cuda_device_id] = EGLContext(
+                    self.max_egl_width, self.max_egl_height, cuda_device_id
+                )
+            return self._egl_contexts[cuda_device_id]
+
+    def set_context_data(self, device, value):
+        """
+        Set arbitrary data in a per-device dict.
+
+        This function is intended for storing precompiled OpenGL objects separately for
+        EGL contexts on different devices. Each such context needs a separate compiled
+        OpenGL program, but (in case e.g. of MeshRasterizerOpenGL) there's no need to
+        re-compile it each time we move the rasterizer to the same device repeatedly,
+        as it happens when using DataParallel.
+
+        Args:
+            device: A torch.device
+            value: An arbitrary Python object.
+        """
+
+        cuda_device_id = device.index
+        self._context_data[cuda_device_id] = value
+
+    def get_context_data(self, device):
+        """
+        Get arbitrary data in a per-device dict. See set_context_data for more detail.
+
+        Args:
+            device: A torch.device
+
+        Returns:
+            The most recent object stored using set_context_data.
+        """
+        cuda_device_id = device.index
+        return self._context_data.get(cuda_device_id, None)
+
+    def release(self):
+        """
+        Release all CUDA and EGL contexts.
+        """
+        for context in self._cuda_contexts.values():
+            context.detach()
+
+        for context in self._egl_contexts.values():
+            context.release()
+
+
+def _init_cuda_context(device_id: int = 0):
+    """
+    Initialize a pycuda context on a chosen device.
+
+    Args:
+        device_id: int, specifies which GPU to use.
+
+    Returns:
+        A pycuda Context.
+    """
+    # pyre-ignore Undefined attribute [16]
+    device = cuda.Device(device_id)
+    cuda_context = device.make_context()
+    return cuda_context
+
+
+def _torch_to_opengl(torch_tensor, cuda_context, cuda_buffer):
+    # CUDA access to the OpenGL buffer is only allowed within a map-unmap block.
+    cuda_context.push()
+    mapping_obj = cuda_buffer.map()
+
+    # data_ptr points to the OpenGL shader storage buffer memory.
+    data_ptr, sz = mapping_obj.device_ptr_and_size()
+
+    # Copy the torch tensor to the OpenGL buffer directly on device.
+    cuda_copy = cuda.Memcpy2D()
+    cuda_copy.set_src_device(torch_tensor.data_ptr())
+    cuda_copy.set_dst_device(data_ptr)
+    cuda_copy.width_in_bytes = cuda_copy.src_pitch = cuda_copy.dst_ptch = (
+        torch_tensor.shape[1] * 4
+    )
+    cuda_copy.height = torch_tensor.shape[0]
+    cuda_copy(False)
+
+    # Unmap and pop the cuda context to make sure OpenGL won't interfere with
+    # PyTorch ops down the line.
+    mapping_obj.unmap()
+    cuda_context.pop()
+
+
+# Initialize a global _DeviceContextStore. Almost always we will only need a single one.
+global_device_context_store = _DeviceContextStore()
diff --git a/pytorch3d/pytorch3d/renderer/opengl/rasterizer_opengl.py b/pytorch3d/pytorch3d/renderer/opengl/rasterizer_opengl.py
new file mode 100644
index 0000000000000000000000000000000000000000..cf61d0d722bcc9f3a5bad7485277872d68f3a187
--- /dev/null
+++ b/pytorch3d/pytorch3d/renderer/opengl/rasterizer_opengl.py
@@ -0,0 +1,711 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# NOTE: This module (as well as rasterizer_opengl) will not be imported into pytorch3d
+# if you do not have pycuda.gl and pyopengl installed. In addition, please make sure
+# your Python application *does not* import OpenGL before importing PyTorch3D, unless
+# you are using the EGL backend.
+import warnings
+from typing import Optional, Tuple, Union
+
+import numpy as np
+import OpenGL.GL as gl
+import pycuda.gl
+import torch
+
+import torch.nn as nn
+
+from pytorch3d.structures.meshes import Meshes
+
+from ..cameras import FoVOrthographicCameras, FoVPerspectiveCameras
+from ..mesh.rasterizer import Fragments, RasterizationSettings
+from ..utils import parse_image_size
+
+from .opengl_utils import _torch_to_opengl, global_device_context_store
+
+# Shader strings, used below to compile an OpenGL program.
+vertex_shader = """
+// The vertex shader does nothing.
+#version 430
+
+void main() { }
+"""
+
+geometry_shader = """
+#version 430
+
+layout (points) in;
+layout (triangle_strip, max_vertices = 3) out;
+
+out layout (location = 0) vec2 bary_coords;
+out layout (location = 1) float depth;
+out layout (location = 2) float p2f;
+
+layout(binding=0) buffer triangular_mesh { float mesh_buffer[]; };
+
+uniform mat4 perspective_projection;
+
+vec3 get_vertex_position(int vertex_index) {
+    int offset = gl_PrimitiveIDIn * 9 + vertex_index * 3;
+    return vec3(
+        mesh_buffer[offset],
+        mesh_buffer[offset + 1],
+        mesh_buffer[offset + 2]
+    );
+}
+
+void main() {
+    vec3 positions[3] = {
+        get_vertex_position(0),
+        get_vertex_position(1),
+        get_vertex_position(2)
+    };
+    vec4 projected_vertices[3] = {
+        perspective_projection * vec4(positions[0], 1.0),
+        perspective_projection * vec4(positions[1], 1.0),
+        perspective_projection * vec4(positions[2], 1.0)
+    };
+
+    for (int i = 0; i < 3; ++i) {
+        gl_Position = projected_vertices[i];
+        bary_coords = vec2(i==0 ? 1.0 : 0.0, i==1 ? 1.0 : 0.0);
+        // At the moment, we output depth as the distance from the image plane in
+        // view coordinates -- NOT distance along the camera ray.
+        depth = positions[i][2];
+        p2f = gl_PrimitiveIDIn;
+        EmitVertex();
+    }
+    EndPrimitive();
+}
+"""
+
+fragment_shader = """
+#version 430
+
+in layout(location = 0) vec2 bary_coords;
+in layout(location = 1) float depth;
+in layout(location = 2) float p2f;
+
+
+out vec4 bary_depth_p2f;
+
+void main() {
+    bary_depth_p2f = vec4(bary_coords, depth, round(p2f));
+}
+"""
+
+
+def _parse_and_verify_image_size(
+    image_size: Union[Tuple[int, int], int],
+) -> Tuple[int, int]:
+    """
+    Parse image_size as a tuple of ints. Throw ValueError if the size is incompatible
+    with the maximum renderable size as set in global_device_context_store.
+    """
+    height, width = parse_image_size(image_size)
+    max_h = global_device_context_store.max_egl_height
+    max_w = global_device_context_store.max_egl_width
+    if height > max_h or width > max_w:
+        raise ValueError(
+            f"Max rasterization size is height={max_h}, width={max_w}. "
+            f"Cannot raster an image of size {height}, {width}. You can change max "
+            "allowed rasterization size by modifying the MAX_EGL_HEIGHT and "
+            "MAX_EGL_WIDTH environment variables."
+        )
+    return height, width
+
+
+class MeshRasterizerOpenGL(nn.Module):
+    """
+    EXPERIMENTAL, USE WITH CAUTION
+
+    This class implements methods for rasterizing a batch of heterogeneous
+    Meshes using OpenGL. This rasterizer, as opposed to MeshRasterizer, is
+    *not differentiable* and needs to be used with shading methods such as
+    SplatterPhongShader, which do not require differentiable rasterizerization.
+    It is, however, faster: on a 2M-faced mesh, about 20x so.
+
+    Fragments output by MeshRasterizerOpenGL and MeshRasterizer should have near
+    identical pix_to_face, bary_coords and zbuf. However, MeshRasterizerOpenGL does not
+    return Fragments.dists which is only relevant to SoftPhongShader and
+    SoftSilhouetteShader. These do not work with MeshRasterizerOpenGL (because it is
+    not differentiable).
+    """
+
+    def __init__(
+        self,
+        cameras: Optional[Union[FoVOrthographicCameras, FoVPerspectiveCameras]] = None,
+        raster_settings=None,
+    ) -> None:
+        """
+        Args:
+            cameras: A cameras object which has a `transform_points` method
+                which returns the transformed points after applying the
+                world-to-view and view-to-ndc transformations. Currently, only FoV
+                cameras are supported.
+            raster_settings: the parameters for rasterization. This should be a
+                named tuple.
+        """
+        super().__init__()
+        if raster_settings is None:
+            raster_settings = RasterizationSettings()
+        self.raster_settings = raster_settings
+        _check_raster_settings(self.raster_settings)
+        self.cameras = cameras
+        self.image_size = _parse_and_verify_image_size(self.raster_settings.image_size)
+
+        self.opengl_machinery = _OpenGLMachinery(
+            max_faces=self.raster_settings.max_faces_opengl,
+        )
+
+    def forward(self, meshes_world: Meshes, **kwargs) -> Fragments:
+        """
+        Args:
+            meshes_world: a Meshes object representing a batch of meshes with
+                coordinates in world space. The batch must live on a GPU.
+
+        Returns:
+            Fragments: Rasterization outputs as a named tuple. These are different than
+                Fragments returned by MeshRasterizer in two ways. First, we return no
+                `dist` which is only relevant to SoftPhongShader which doesn't work
+                with MeshRasterizerOpenGL (because it is not differentiable). Second,
+                the zbuf uses the opengl zbuf convention, where the z-vals are between 0
+                (at projection plane) and 1 (at clipping distance), and are a non-linear
+                function of the depth values of the camera ray intersections. In
+                contrast, MeshRasterizer's zbuf values are simply the distance of each
+                ray intersection from the camera.
+
+        Throws:
+            ValueError if meshes_world lives on the CPU.
+        """
+        if meshes_world.device == torch.device("cpu"):
+            raise ValueError("MeshRasterizerOpenGL works only on CUDA devices.")
+
+        raster_settings = kwargs.get("raster_settings", self.raster_settings)
+        _check_raster_settings(raster_settings)
+
+        image_size = (
+            _parse_and_verify_image_size(raster_settings.image_size) or self.image_size
+        )
+
+        # OpenGL needs vertices in NDC coordinates with un-flipped xy directions.
+        cameras_unpacked = kwargs.get("cameras", self.cameras)
+        _check_cameras(cameras_unpacked)
+        meshes_gl_ndc = _convert_meshes_to_gl_ndc(
+            meshes_world, image_size, cameras_unpacked, **kwargs
+        )
+
+        # Perspective projection will happen within the OpenGL rasterizer.
+        projection_matrix = cameras_unpacked.get_projection_transform(**kwargs)._matrix
+
+        # Run OpenGL rasterization machinery.
+        pix_to_face, bary_coords, zbuf = self.opengl_machinery(
+            meshes_gl_ndc, projection_matrix, image_size
+        )
+
+        # Return the Fragments and detach, because gradients don't go through OpenGL.
+        return Fragments(
+            pix_to_face=pix_to_face,
+            zbuf=zbuf,
+            bary_coords=bary_coords,
+            dists=None,
+        ).detach()
+
+    def to(self, device):
+        # Manually move to device cameras as it is not a subclass of nn.Module
+        if self.cameras is not None:
+            self.cameras = self.cameras.to(device)
+
+        # Create a new OpenGLMachinery, as its member variables can be tied to a GPU.
+        self.opengl_machinery = _OpenGLMachinery(
+            max_faces=self.raster_settings.max_faces_opengl,
+        )
+
+
+class _OpenGLMachinery:
+    """
+    A class holding OpenGL machinery used by MeshRasterizerOpenGL.
+    """
+
+    def __init__(
+        self,
+        max_faces: int = 10_000_000,
+    ) -> None:
+        self.max_faces = max_faces
+        self.program = None
+
+        # These will be created on an appropriate GPU each time we render a new mesh on
+        # that GPU for the first time.
+        self.egl_context = None
+        self.cuda_context = None
+        self.perspective_projection_uniform = None
+        self.mesh_buffer_object = None
+        self.vao = None
+        self.fbo = None
+        self.cuda_buffer = None
+
+    def __call__(
+        self,
+        meshes_gl_ndc: Meshes,
+        projection_matrix: torch.Tensor,
+        image_size: Tuple[int, int],
+    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+        """
+        Rasterize a batch of meshes, using a given batch of projection matrices and
+        image size.
+
+        Args:
+            meshes_gl_ndc: A Meshes object, with vertices in the OpenGL NDC convention.
+            projection_matrix: A 3x3 camera projection matrix, or a tensor of projection
+                matrices equal in length to the number of meshes in meshes_gl_ndc.
+            image_size: Image size to rasterize. Must be smaller than the max height and
+                width stored in global_device_context_store.
+
+        Returns:
+            pix_to_faces: A BHW1 tensor of ints, filled with -1 where no face projects
+                to a given pixel.
+            bary_coords: A BHW3 float tensor, filled with -1 where no face projects to
+                a given pixel.
+            zbuf: A BHW1 float tensor, filled with 1 where no face projects to a given
+                pixel. NOTE: this zbuf uses the opengl zbuf convention, where the z-vals
+                are between 0 (at projection plane) and 1 (at clipping distance), and
+                are a non-linear function of the depth values of the camera ray inter-
+                sections.
+        """
+
+        self.initialize_device_data(meshes_gl_ndc.device)
+        with self.egl_context.active_and_locked():
+            # Perspective projection happens in OpenGL. Move the matrix over if there's only
+            # a single camera shared by all the meshes.
+            if projection_matrix.shape[0] == 1:
+                self._projection_matrix_to_opengl(projection_matrix)
+
+            pix_to_faces = []
+            bary_coords = []
+            zbufs = []
+
+            # pyre-ignore Incompatible parameter type [6]
+            for mesh_id, mesh in enumerate(meshes_gl_ndc):
+                pix_to_face, bary_coord, zbuf = self._rasterize_mesh(
+                    mesh,
+                    image_size,
+                    projection_matrix=projection_matrix[mesh_id]
+                    if projection_matrix.shape[0] > 1
+                    else None,
+                )
+                pix_to_faces.append(pix_to_face)
+                bary_coords.append(bary_coord)
+                zbufs.append(zbuf)
+
+        return (
+            torch.cat(pix_to_faces, dim=0),
+            torch.cat(bary_coords, dim=0),
+            torch.cat(zbufs, dim=0),
+        )
+
+    def initialize_device_data(self, device) -> None:
+        """
+        Initialize data specific to a GPU device: the EGL and CUDA contexts, the OpenGL
+        program, as well as various buffer and array objects used to communicate with
+        OpenGL.
+
+        Args:
+            device: A torch.device.
+        """
+        self.egl_context = global_device_context_store.get_egl_context(device)
+        self.cuda_context = global_device_context_store.get_cuda_context(device)
+
+        # self.program represents the OpenGL program we use for rasterization.
+        if global_device_context_store.get_context_data(device) is None:
+            with self.egl_context.active_and_locked():
+                self.program = self._compile_and_link_gl_program()
+                self._set_up_gl_program_properties(self.program)
+
+                # Create objects used to transfer data into and out of the program.
+                (
+                    self.perspective_projection_uniform,
+                    self.mesh_buffer_object,
+                    self.vao,
+                    self.fbo,
+                ) = self._prepare_persistent_opengl_objects(
+                    self.program,
+                    self.max_faces,
+                )
+
+                # Register the input buffer with pycuda, to transfer data directly into it.
+                self.cuda_context.push()
+                self.cuda_buffer = pycuda.gl.RegisteredBuffer(
+                    int(self.mesh_buffer_object),
+                    pycuda.gl.graphics_map_flags.WRITE_DISCARD,
+                )
+                self.cuda_context.pop()
+
+            global_device_context_store.set_context_data(
+                device,
+                (
+                    self.program,
+                    self.perspective_projection_uniform,
+                    self.mesh_buffer_object,
+                    self.vao,
+                    self.fbo,
+                    self.cuda_buffer,
+                ),
+            )
+        (
+            self.program,
+            self.perspective_projection_uniform,
+            self.mesh_buffer_object,
+            self.vao,
+            self.fbo,
+            self.cuda_buffer,
+        ) = global_device_context_store.get_context_data(device)
+
+    def release(self) -> None:
+        """
+        Release CUDA and OpenGL resources.
+        """
+        # Finish all current operations.
+        torch.cuda.synchronize()
+        self.cuda_context.synchronize()
+
+        # Free pycuda resources.
+        self.cuda_context.push()
+        self.cuda_buffer.unregister()
+        self.cuda_context.pop()
+
+        # Free GL resources.
+        gl.glBindFramebuffer(gl.GL_FRAMEBUFFER, self.fbo)
+        gl.glDeleteFramebuffers(1, [self.fbo])
+        gl.glBindFramebuffer(gl.GL_FRAMEBUFFER, 0)
+        del self.fbo
+
+        gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 0, self.mesh_buffer_object)
+        gl.glDeleteBuffers(1, [self.mesh_buffer_object])
+        gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 0, 0)
+        del self.mesh_buffer_object
+
+        gl.glDeleteProgram(self.program)
+        self.egl_context.release()
+
+    def _projection_matrix_to_opengl(self, projection_matrix: torch.Tensor) -> None:
+        """
+        Transfer a torch projection matrix to OpenGL.
+
+        Args:
+            projection matrix: A 3x3 float tensor.
+        """
+        gl.glUseProgram(self.program)
+        gl.glUniformMatrix4fv(
+            self.perspective_projection_uniform,
+            1,
+            gl.GL_FALSE,
+            projection_matrix.detach().flatten().cpu().numpy().astype(np.float32),
+        )
+        gl.glUseProgram(0)
+
+    def _rasterize_mesh(
+        self,
+        mesh: Meshes,
+        image_size: Tuple[int, int],
+        projection_matrix: Optional[torch.Tensor] = None,
+    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+        """
+        Rasterize a single mesh using OpenGL.
+
+        Args:
+            mesh: A Meshes object, containing a single mesh only.
+            projection_matrix: A 3x3 camera projection matrix, or a tensor of projection
+                matrices equal in length to the number of meshes in meshes_gl_ndc.
+            image_size: Image size to rasterize. Must be smaller than the max height and
+                width stored in global_device_context_store.
+
+        Returns:
+            pix_to_faces: A 1HW1 tensor of ints, filled with -1 where no face projects
+                to a given pixel.
+            bary_coords: A 1HW3 float tensor, filled with -1 where no face projects to
+                a given pixel.
+            zbuf: A 1HW1 float tensor, filled with 1 where no face projects to a given
+                pixel. NOTE: this zbuf uses the opengl zbuf convention, where the z-vals
+                are between 0 (at projection plane) and 1 (at clipping distance), and
+                are a non-linear function of the depth values of the camera ray inter-
+                sections.
+        """
+        height, width = image_size
+        # Extract face_verts and move them to OpenGL as well. We use pycuda to
+        # directly move the vertices on the GPU, to avoid a costly torch/GPU -> CPU
+        # -> openGL/GPU trip.
+        verts_packed = mesh.verts_packed().detach()
+        faces_packed = mesh.faces_packed().detach()
+        face_verts = verts_packed[faces_packed].reshape(-1, 9)
+        _torch_to_opengl(face_verts, self.cuda_context, self.cuda_buffer)
+
+        if projection_matrix is not None:
+            self._projection_matrix_to_opengl(projection_matrix)
+
+        # Start OpenGL operations.
+        gl.glUseProgram(self.program)
+
+        # Render an image of size (width, height).
+        gl.glViewport(0, 0, width, height)
+
+        gl.glBindFramebuffer(gl.GL_FRAMEBUFFER, self.fbo)
+        # Clear the output framebuffer. The "background" value for both pix_to_face
+        # as well as bary_coords is -1 (background = pixels which the rasterizer
+        # projected no triangle to).
+        gl.glClearColor(-1.0, -1.0, -1.0, -1.0)
+        gl.glClearDepth(1.0)
+        # pyre-ignore Unsupported operand [58]
+        gl.glClear(gl.GL_COLOR_BUFFER_BIT | gl.GL_DEPTH_BUFFER_BIT)
+
+        # Run the actual rendering. The face_verts were transported to the OpenGL
+        # program into a shader storage buffer which is used directly in the geometry
+        # shader. Here, we only pass the number of these vertices to the vertex shader
+        # (which doesn't do anything and passes directly to the geometry shader).
+        gl.glBindVertexArray(self.vao)
+        gl.glDrawArrays(gl.GL_POINTS, 0, len(face_verts))
+        gl.glBindVertexArray(0)
+
+        # Read out the result. We ignore the depth buffer. The RGBA color buffer stores
+        # barycentrics in the RGB component and pix_to_face in the A component.
+        bary_depth_p2f_gl = gl.glReadPixels(
+            0,
+            0,
+            width,
+            height,
+            gl.GL_RGBA,
+            gl.GL_FLOAT,
+        )
+
+        gl.glBindFramebuffer(gl.GL_FRAMEBUFFER, 0)
+        gl.glUseProgram(0)
+
+        # Create torch tensors containing the results.
+        bary_depth_p2f = (
+            torch.frombuffer(bary_depth_p2f_gl, dtype=torch.float)
+            .reshape(1, height, width, 1, -1)
+            .to(verts_packed.device)
+        )
+
+        # Read out barycentrics. GL only outputs the first two, so we need to compute
+        # the third one and make sure we still leave no-intersection pixels with -1.
+        barycentric_coords = torch.cat(
+            [
+                bary_depth_p2f[..., :2],
+                1.0 - bary_depth_p2f[..., 0:1] - bary_depth_p2f[..., 1:2],
+            ],
+            dim=-1,
+        )
+        barycentric_coords = torch.where(
+            barycentric_coords == 3, -1, barycentric_coords
+        )
+        depth = bary_depth_p2f[..., 2:3].squeeze(-1)
+        pix_to_face = bary_depth_p2f[..., -1].long()
+
+        return pix_to_face, barycentric_coords, depth
+
+    @staticmethod
+    def _compile_and_link_gl_program():
+        """
+        Compile the vertex, geometry, and fragment shaders and link them into an OpenGL
+        program. The shader sources are strongly inspired by https://github.com/tensorflow/
+        graphics/blob/master/tensorflow_graphics/rendering/opengl/rasterization_backend.py.
+
+        Returns:
+            An OpenGL program for mesh rasterization.
+        """
+        program = gl.glCreateProgram()
+        shader_objects = []
+
+        for shader_string, shader_type in zip(
+            [vertex_shader, geometry_shader, fragment_shader],
+            [gl.GL_VERTEX_SHADER, gl.GL_GEOMETRY_SHADER, gl.GL_FRAGMENT_SHADER],
+        ):
+            shader_objects.append(gl.glCreateShader(shader_type))
+            gl.glShaderSource(shader_objects[-1], shader_string)
+
+            gl.glCompileShader(shader_objects[-1])
+            status = gl.glGetShaderiv(shader_objects[-1], gl.GL_COMPILE_STATUS)
+            if status == gl.GL_FALSE:
+                gl.glDeleteShader(shader_objects[-1])
+                gl.glDeleteProgram(program)
+                error_msg = gl.glGetShaderInfoLog(shader_objects[-1]).decode("utf-8")
+                raise RuntimeError(f"Compilation failure:\n {error_msg}")
+
+            gl.glAttachShader(program, shader_objects[-1])
+            gl.glDeleteShader(shader_objects[-1])
+
+        gl.glLinkProgram(program)
+        status = gl.glGetProgramiv(program, gl.GL_LINK_STATUS)
+
+        if status == gl.GL_FALSE:
+            gl.glDeleteProgram(program)
+            error_msg = gl.glGetProgramInfoLog(program)
+            raise RuntimeError(f"Link failure:\n {error_msg}")
+
+        return program
+
+    @staticmethod
+    def _set_up_gl_program_properties(program) -> None:
+        """
+        Set basic OpenGL program properties: disable blending, enable depth testing,
+        and disable face culling.
+        """
+        gl.glUseProgram(program)
+        gl.glDisable(gl.GL_BLEND)
+        gl.glEnable(gl.GL_DEPTH_TEST)
+        gl.glDisable(gl.GL_CULL_FACE)
+        gl.glUseProgram(0)
+
+    @staticmethod
+    def _prepare_persistent_opengl_objects(program, max_faces: int):
+        """
+        Prepare OpenGL objects that we want to persist between rasterizations.
+
+        Args:
+            program: The OpenGL program the resources will be tied to.
+            max_faces: Max number of faces of any mesh we will rasterize.
+
+        Returns:
+            perspective_projection_uniform: An OpenGL object pointing to a location of
+                the perspective projection matrix in OpenGL memory.
+            mesh_buffer_object: An OpenGL object pointing to the location of the mesh
+                buffer object in OpenGL memory.
+            vao: The OpenGL input array object.
+            fbo: The OpenGL output framebuffer.
+
+        """
+        gl.glUseProgram(program)
+        # Get location of the "uniform" (that is, an internal OpenGL variable available
+        # to the shaders) that we'll load the projection matrices to.
+        perspective_projection_uniform = gl.glGetUniformLocation(
+            program, "perspective_projection"
+        )
+
+        # Mesh buffer object -- our main input point. We'll copy the mesh here
+        # from pytorch/cuda. The buffer needs enough space to store the three vertices
+        # of each face, that is its size in bytes is
+        # max_faces * 3 (vertices) * 3 (coordinates) * 4 (bytes)
+        mesh_buffer_object = gl.glGenBuffers(1)
+        gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 0, mesh_buffer_object)
+
+        gl.glBufferData(
+            gl.GL_SHADER_STORAGE_BUFFER,
+            max_faces * 9 * 4,
+            np.zeros((max_faces, 9), dtype=np.float32),
+            gl.GL_DYNAMIC_COPY,
+        )
+
+        # Input vertex array object. We will only use it implicitly for indexing the
+        # vertices, but the actual input data is passed in the shader storage buffer.
+        vao = gl.glGenVertexArrays(1)
+
+        # Create the framebuffer object (fbo) where we'll store output data.
+        MAX_EGL_WIDTH = global_device_context_store.max_egl_width
+        MAX_EGL_HEIGHT = global_device_context_store.max_egl_height
+        color_buffer = gl.glGenRenderbuffers(1)
+        gl.glBindRenderbuffer(gl.GL_RENDERBUFFER, color_buffer)
+        gl.glRenderbufferStorage(
+            gl.GL_RENDERBUFFER, gl.GL_RGBA32F, MAX_EGL_WIDTH, MAX_EGL_HEIGHT
+        )
+        gl.glBindRenderbuffer(gl.GL_RENDERBUFFER, 0)
+
+        depth_buffer = gl.glGenRenderbuffers(1)
+        gl.glBindRenderbuffer(gl.GL_RENDERBUFFER, depth_buffer)
+        gl.glRenderbufferStorage(
+            gl.GL_RENDERBUFFER, gl.GL_DEPTH_COMPONENT, MAX_EGL_WIDTH, MAX_EGL_HEIGHT
+        )
+        gl.glBindRenderbuffer(gl.GL_RENDERBUFFER, 0)
+
+        fbo = gl.glGenFramebuffers(1)
+        gl.glBindFramebuffer(gl.GL_FRAMEBUFFER, fbo)
+        gl.glFramebufferRenderbuffer(
+            gl.GL_FRAMEBUFFER, gl.GL_COLOR_ATTACHMENT0, gl.GL_RENDERBUFFER, color_buffer
+        )
+        gl.glFramebufferRenderbuffer(
+            gl.GL_FRAMEBUFFER, gl.GL_DEPTH_ATTACHMENT, gl.GL_RENDERBUFFER, depth_buffer
+        )
+        gl.glBindFramebuffer(gl.GL_FRAMEBUFFER, 0)
+
+        gl.glUseProgram(0)
+        return perspective_projection_uniform, mesh_buffer_object, vao, fbo
+
+
+def _check_cameras(cameras) -> None:
+    # Check that the cameras are non-None and compatible with MeshRasterizerOpenGL.
+    if cameras is None:
+        msg = "Cameras must be specified either at initialization \
+            or in the forward pass of MeshRasterizer"
+        raise ValueError(msg)
+    if type(cameras).__name__ in {"PerspectiveCameras", "OrthographicCameras"}:
+        raise ValueError(
+            "MeshRasterizerOpenGL only works with FoVPerspectiveCameras and "
+            "FoVOrthographicCameras, which are OpenGL compatible."
+        )
+
+
+def _check_raster_settings(raster_settings) -> None:
+    # Check that the rasterizer's settings are compatible with MeshRasterizerOpenGL.
+    if raster_settings.faces_per_pixel > 1:
+        warnings.warn(
+            "MeshRasterizerOpenGL currently works only with one face per pixel."
+        )
+    if raster_settings.cull_backfaces:
+        warnings.warn(
+            "MeshRasterizerOpenGL cannot cull backfaces yet, rasterizing without culling."
+        )
+    if raster_settings.cull_to_frustum:
+        warnings.warn(
+            "MeshRasterizerOpenGL cannot cull to frustum yet, rasterizing without culling."
+        )
+    if raster_settings.z_clip_value is not None:
+        raise NotImplementedError("MeshRasterizerOpenGL cannot do z-clipping yet.")
+    if raster_settings.perspective_correct is False:
+        raise ValueError(
+            "MeshRasterizerOpenGL always uses perspective-correct interpolation."
+        )
+
+
+def _convert_meshes_to_gl_ndc(
+    meshes_world: Meshes, image_size: Tuple[int, int], camera, **kwargs
+) -> Meshes:
+    """
+    Convert a batch of world-coordinate meshes to GL NDC coordinates.
+
+    Args:
+        meshes_world: Meshes in the world coordinate system.
+        image_size: Image height and width, used to modify mesh coords for rendering in
+            non-rectangular images. OpenGL will expand anything within the [-1, 1] NDC
+            range to fit the width and height of the screen, so we will squeeze the NDCs
+            appropriately if rendering a rectangular image.
+        camera: FoV cameras.
+        kwargs['R'], kwargs['T']: If present, used to define the world-view transform.
+    """
+    height, width = image_size
+    verts_ndc = (
+        camera.get_world_to_view_transform(**kwargs)
+        .compose(camera.get_ndc_camera_transform(**kwargs))
+        .transform_points(meshes_world.verts_padded(), eps=None)
+    )
+    verts_ndc[..., 0] = -verts_ndc[..., 0]
+    verts_ndc[..., 1] = -verts_ndc[..., 1]
+
+    # In case of a non-square viewport, transform the vertices. OpenGL will expand
+    # the anything within the [-1, 1] NDC range to fit the width and height of the
+    # screen. So to work with PyTorch3D cameras, we need to squeeze the NDCs
+    # appropriately.
+    dtype, device = verts_ndc.dtype, verts_ndc.device
+    if height > width:
+        verts_ndc = verts_ndc * torch.tensor(
+            [1, width / height, 1], dtype=dtype, device=device
+        )
+    elif width > height:
+        verts_ndc = verts_ndc * torch.tensor(
+            [height / width, 1, 1], dtype=dtype, device=device
+        )
+
+    meshes_gl_ndc = meshes_world.update_padded(new_verts_padded=verts_ndc)
+
+    return meshes_gl_ndc
diff --git a/pytorch3d/pytorch3d/renderer/points/__init__.py b/pytorch3d/pytorch3d/renderer/points/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..2fde33bafbaedbb4ab63c945001acaf77082165c
--- /dev/null
+++ b/pytorch3d/pytorch3d/renderer/points/__init__.py
@@ -0,0 +1,14 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from .compositor import AlphaCompositor, NormWeightedCompositor
+from .pulsar.unified import PulsarPointsRenderer
+from .rasterize_points import rasterize_points
+from .rasterizer import PointsRasterizationSettings, PointsRasterizer
+from .renderer import PointsRenderer
+
+
+__all__ = [k for k in globals().keys() if not k.startswith("_")]
diff --git a/pytorch3d/pytorch3d/renderer/points/compositor.py b/pytorch3d/pytorch3d/renderer/points/compositor.py
new file mode 100644
index 0000000000000000000000000000000000000000..0846e53e1ef65b9bfc53727859fe294ea43e5c65
--- /dev/null
+++ b/pytorch3d/pytorch3d/renderer/points/compositor.py
@@ -0,0 +1,114 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import List, Optional, Tuple, Union
+
+import torch
+import torch.nn as nn
+
+from ..compositing import alpha_composite, norm_weighted_sum
+
+
+# A compositor should take as input 3D points and some corresponding information.
+# Given this information, the compositor can:
+#     - blend colors across the top K vertices at a pixel
+
+
+class AlphaCompositor(nn.Module):
+    """
+    Accumulate points using alpha compositing.
+    """
+
+    def __init__(
+        self, background_color: Optional[Union[Tuple, List, torch.Tensor]] = None
+    ) -> None:
+        super().__init__()
+        self.background_color = background_color
+
+    def forward(self, fragments, alphas, ptclds, **kwargs) -> torch.Tensor:
+        background_color = kwargs.get("background_color", self.background_color)
+        images = alpha_composite(fragments, alphas, ptclds)
+
+        # images are of shape (N, C, H, W)
+        # check for background color & feature size C (C=4 indicates rgba)
+        if background_color is not None:
+            return _add_background_color_to_images(fragments, images, background_color)
+        return images
+
+
+class NormWeightedCompositor(nn.Module):
+    """
+    Accumulate points using a normalized weighted sum.
+    """
+
+    def __init__(
+        self, background_color: Optional[Union[Tuple, List, torch.Tensor]] = None
+    ) -> None:
+        super().__init__()
+        self.background_color = background_color
+
+    def forward(self, fragments, alphas, ptclds, **kwargs) -> torch.Tensor:
+        background_color = kwargs.get("background_color", self.background_color)
+        images = norm_weighted_sum(fragments, alphas, ptclds)
+
+        # images are of shape (N, C, H, W)
+        # check for background color & feature size C (C=4 indicates rgba)
+        if background_color is not None:
+            return _add_background_color_to_images(fragments, images, background_color)
+        return images
+
+
+def _add_background_color_to_images(pix_idxs, images, background_color):
+    """
+    Mask pixels in images without corresponding points with a given background_color.
+
+    Args:
+        pix_idxs: int32 Tensor of shape (N, points_per_pixel, image_size, image_size)
+            giving the indices of the nearest points at each pixel, sorted in z-order.
+        images: Tensor of shape (N, 4, image_size, image_size) giving the
+            accumulated features at each point, where 4 refers to a rgba feature.
+        background_color: Tensor, list, or tuple with 3 or 4 values indicating the rgb/rgba
+            value for the new background. Values should be in the interval [0,1].
+     Returns:
+        images: Tensor of shape (N, 4, image_size, image_size), where pixels with
+            no nearest points have features set to the background color, and other
+            pixels with accumulated features have unchanged values.
+    """
+    # Initialize background mask
+    background_mask = pix_idxs[:, 0] < 0  # (N, H, W)
+
+    # Convert background_color to an appropriate tensor and check shape
+    if not torch.is_tensor(background_color):
+        background_color = images.new_tensor(background_color)
+
+    if background_color.ndim == 0:
+        background_color = background_color.expand(images.shape[1])
+
+    if background_color.ndim > 1:
+        raise ValueError("Wrong shape of background_color")
+
+    background_color = background_color.to(images)
+
+    # add alpha channel if needed
+    if background_color.shape[0] + 1 == images.shape[1]:
+        alpha = images.new_ones(1)
+        background_color = torch.cat([background_color, alpha])
+
+    if images.shape[1] != background_color.shape[0]:
+        raise ValueError(
+            "Background color has %s channels not %s"
+            % (background_color.shape[0], images.shape[1])
+        )
+
+    num_background_pixels = background_mask.sum()
+
+    # permute so that features are the last dimension for masked_scatter to work
+    masked_images = images.permute(0, 2, 3, 1).masked_scatter(
+        background_mask[..., None],
+        background_color[None, :].expand(num_background_pixels, -1),
+    )
+
+    return masked_images.permute(0, 3, 1, 2)
diff --git a/pytorch3d/pytorch3d/renderer/points/pulsar/__init__.py b/pytorch3d/pytorch3d/renderer/points/pulsar/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..22fe5de613d2423645f0b936f53f18f0b86cfcf1
--- /dev/null
+++ b/pytorch3d/pytorch3d/renderer/points/pulsar/__init__.py
@@ -0,0 +1,7 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from .renderer import Renderer  # noqa: F401
diff --git a/pytorch3d/pytorch3d/renderer/points/pulsar/renderer.py b/pytorch3d/pytorch3d/renderer/points/pulsar/renderer.py
new file mode 100644
index 0000000000000000000000000000000000000000..9ba7dfd5da4c0f9297dd9399302d27866829c959
--- /dev/null
+++ b/pytorch3d/pytorch3d/renderer/points/pulsar/renderer.py
@@ -0,0 +1,664 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""pulsar renderer PyTorch integration.
+
+Proper Python support for pytorch requires creating a torch.autograd.function
+(independent of whether this is being done within the C++ module). This is done
+here and a torch.nn.Module is exposed for the use in more complex models.
+"""
+import logging
+import warnings
+from typing import Optional, Tuple, Union
+
+import torch
+from pytorch3d import _C
+from pytorch3d.transforms import axis_angle_to_matrix, rotation_6d_to_matrix
+
+
+LOGGER = logging.getLogger(__name__)
+GAMMA_WARNING_EMITTED = False
+AXANGLE_WARNING_EMITTED = False
+
+
+class _Render(torch.autograd.Function):
+    """
+    Differentiable rendering function for the Pulsar renderer.
+
+    Usually this will be used through the `Renderer` module, which takes care of
+    setting up the buffers and putting them on the correct device. If you use
+    the function directly, you will have to do this manually.
+
+    The steps for this are two-fold: first, you need to create a native Renderer
+    object to provide the required buffers. This is the `native_renderer` parameter
+    for this function. You can create it by creating a `pytorch3d._C.PulsarRenderer`
+    object (with parameters for width, height and maximum number of balls it should
+    be able to render). This object by default resides on the CPU. If you want to
+    shift the buffers to a different device, just assign an empty tensor on the target
+    device to its property `device_tracker`.
+
+    To convert camera parameters from a more convenient representation to the
+    required vectors as in this function, you can use the static
+    function `pytorch3d.renderer.points.pulsar.Renderer._transform_cam_params`.
+
+    Args:
+        * ctx: Pytorch context.
+        * vert_pos: vertex positions. [Bx]Nx3 tensor of positions in 3D space.
+        * vert_col: vertex colors. [Bx]NxK tensor of channels.
+        * vert_rad: vertex radii. [Bx]N tensor of radiuses, >0.
+        * cam_pos: camera position(s). [Bx]3 tensor in 3D coordinates.
+        * pixel_0_0_center: [Bx]3 tensor center(s) of the upper left pixel(s) in
+                            world coordinates.
+        * pixel_vec_x: [Bx]3 tensor from one pixel center to the next in image x
+                       direction in world coordinates.
+        * pixel_vec_y: [Bx]3 tensor from one pixel center to the next in image y
+                       direction in world coordinates.
+        * focal_length: [Bx]1 tensor of focal lengths in world coordinates.
+        * principal_point_offsets: [Bx]2 tensor of principal point offsets in pixels.
+        * gamma: sphere transparency in [1.,1E-5], with 1 being mostly transparent.
+                 [Bx]1.
+        * max_depth: maximum depth for spheres to render. Set this as tighly
+                     as possible to have good numerical accuracy for gradients.
+        * native_renderer: a `pytorch3d._C.PulsarRenderer` object.
+        * min_depth: a float with the minimum depth a sphere must have to be renderer.
+                     Must be 0. or > max(focal_length).
+        * bg_col: K tensor with a background color to use or None (uses all ones).
+        * opacity: [Bx]N tensor of opacity values in [0., 1.] or None (uses all ones).
+        * percent_allowed_difference: a float in [0., 1.[ with the maximum allowed
+                     difference in color space. This is used to speed up the
+                     computation. Default: 0.01.
+        * max_n_hits: a hard limit on the number of hits per ray. Default: max int.
+        * mode: render mode in {0, 1}. 0: render an image; 1: render the hit map.
+        * return_forward_info: whether to return a second map. This second map contains
+            13 channels: first channel contains sm_m (the maximum exponent factor
+            observed), the second sm_d (the normalization denominator, the sum of all
+            coefficients), the third the maximum closest possible intersection for a
+            hit. The following channels alternate with the float encoded integer index
+            of a sphere and its weight. They are the five spheres with the highest
+            color contribution to this pixel color, ordered descending.
+
+        Returns:
+            * image: [Bx]HxWxK float tensor with the resulting image.
+            * forw_info: [Bx]HxWx13 float forward information as described above,
+                  if enabled.
+    """
+
+    @staticmethod
+    # pyre-fixme[14]: `forward` overrides method defined in `Function` inconsistently.
+    def forward(
+        ctx,
+        vert_pos,
+        vert_col,
+        vert_rad,
+        cam_pos,
+        pixel_0_0_center,
+        pixel_vec_x,
+        pixel_vec_y,
+        focal_length,
+        principal_point_offsets,
+        gamma,
+        max_depth,
+        native_renderer,
+        min_depth=0.0,
+        bg_col=None,
+        opacity=None,
+        percent_allowed_difference=0.01,
+        # pyre-fixme[16]: Module `_C` has no attribute `MAX_UINT`.
+        max_n_hits=_C.MAX_UINT,
+        mode=0,
+        return_forward_info=False,
+    ) -> Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]:
+        if mode != 0:
+            assert not return_forward_info, (
+                "You are using a non-standard rendering mode. This does "
+                "not provide gradients, and also no `forward_info`. Please "
+                "set `return_forward_info` to `False`."
+            )
+        ctx.gamma = gamma
+        ctx.max_depth = max_depth
+        ctx.min_depth = min_depth
+        ctx.percent_allowed_difference = percent_allowed_difference
+        ctx.max_n_hits = max_n_hits
+        ctx.mode = mode
+        ctx.native_renderer = native_renderer
+        image, info = ctx.native_renderer.forward(
+            vert_pos,
+            vert_col,
+            vert_rad,
+            cam_pos,
+            pixel_0_0_center,
+            pixel_vec_x,
+            pixel_vec_y,
+            focal_length,
+            principal_point_offsets,
+            gamma,
+            max_depth,
+            min_depth,
+            bg_col,
+            opacity,
+            percent_allowed_difference,
+            max_n_hits,
+            mode,
+        )
+        if mode != 0:
+            # Backprop not possible!
+            info = None
+        # Prepare for backprop.
+        ctx.save_for_backward(
+            vert_pos,
+            vert_col,
+            vert_rad,
+            cam_pos,
+            pixel_0_0_center,
+            pixel_vec_x,
+            pixel_vec_y,
+            focal_length,
+            principal_point_offsets,
+            bg_col,
+            opacity,
+            image,
+            info,
+        )
+        if return_forward_info:
+            return image, info
+        else:
+            return image
+
+    @staticmethod
+    def backward(ctx, grad_im, *args):
+        global GAMMA_WARNING_EMITTED
+        (
+            vert_pos,
+            vert_col,
+            vert_rad,
+            cam_pos,
+            pixel_0_0_center,
+            pixel_vec_x,
+            pixel_vec_y,
+            focal_length,
+            principal_point_offsets,
+            bg_col,
+            opacity,
+            image,
+            info,
+        ) = ctx.saved_tensors
+        if (
+            (
+                ctx.needs_input_grad[0]
+                or ctx.needs_input_grad[2]
+                or ctx.needs_input_grad[3]
+                or ctx.needs_input_grad[4]
+                or ctx.needs_input_grad[5]
+                or ctx.needs_input_grad[6]
+                or ctx.needs_input_grad[7]
+            )
+            and ctx.gamma < 1e-3
+            and not GAMMA_WARNING_EMITTED
+        ):
+            warnings.warn(
+                "Optimizing for non-color parameters and having a gamma value < 1E-3! "
+                "This is probably not going to produce usable gradients."
+            )
+            GAMMA_WARNING_EMITTED = True
+        if ctx.mode == 0:
+            (
+                grad_pos,
+                grad_col,
+                grad_rad,
+                grad_cam_pos,
+                grad_pixel_0_0_center,
+                grad_pixel_vec_x,
+                grad_pixel_vec_y,
+                grad_opacity,
+            ) = ctx.native_renderer.backward(
+                grad_im,
+                image,
+                info,
+                vert_pos,
+                vert_col,
+                vert_rad,
+                cam_pos,
+                pixel_0_0_center,
+                pixel_vec_x,
+                pixel_vec_y,
+                focal_length,
+                principal_point_offsets,
+                ctx.gamma,
+                ctx.max_depth,
+                ctx.min_depth,
+                bg_col,
+                opacity,
+                ctx.percent_allowed_difference,
+                ctx.max_n_hits,
+                ctx.mode,
+                ctx.needs_input_grad[0],
+                ctx.needs_input_grad[1],
+                ctx.needs_input_grad[2],
+                ctx.needs_input_grad[3]
+                or ctx.needs_input_grad[4]
+                or ctx.needs_input_grad[5]
+                or ctx.needs_input_grad[6]
+                or ctx.needs_input_grad[7],
+                ctx.needs_input_grad[14],
+                None,  # No debug information provided.
+            )
+        else:
+            raise ValueError(
+                "Performing a backward pass for a "
+                "rendering with `mode != 0`! This is not possible."
+            )
+        return (
+            grad_pos,
+            grad_col,
+            grad_rad,
+            grad_cam_pos,
+            grad_pixel_0_0_center,
+            grad_pixel_vec_x,
+            grad_pixel_vec_y,
+            None,  # focal_length
+            None,  # principal_point_offsets
+            None,  # gamma
+            None,  # max_depth
+            None,  # native_renderer
+            None,  # min_depth
+            None,  # bg_col
+            grad_opacity,
+            None,  # percent_allowed_difference
+            None,  # max_n_hits
+            None,  # mode
+            None,  # return_forward_info
+        )
+
+
+class Renderer(torch.nn.Module):
+    """
+    Differentiable rendering module for the Pulsar renderer.
+
+    Set the maximum number of balls to a reasonable value. It is used to determine
+    several buffer sizes. It is no problem to render less balls than this number,
+    but never more.
+
+    When optimizing for sphere positions, sphere radiuses or camera parameters you
+    have to use higher gamma values (closer to one) and larger sphere sizes: spheres
+    can only 'move' to areas that they cover, and only with higher gamma values exists
+    a gradient w.r.t. their color depending on their position.
+
+    Args:
+        * width: result image width in pixels.
+        * height: result image height in pixels.
+        * max_num_balls: the maximum number of balls this renderer will handle.
+        * orthogonal_projection: use an orthogonal instead of perspective projection.
+            Default: False.
+        * right_handed_system: use a right-handed instead of a left-handed coordinate
+            system. This is relevant for compatibility with other drawing or scanning
+            systems. Pulsar by default assumes a left-handed world and camera coordinate
+            system as known from mathematics with x-axis to the right, y axis up and z
+            axis for increasing depth along the optical axis. In the image coordinate
+            system, only the y axis is pointing down, leading still to a left-handed
+            system. If you set this to True, it is assuming a right-handed world and
+            camera coordinate system with x axis to the right, y axis to the top and
+            z axis decreasing along the optical axis. Again, the image coordinate
+            system has a flipped y axis, remaining a right-handed system.
+            Default: False.
+        * background_normalized_depth: the normalized depth the background is placed
+            at.
+            This is on a scale from 0. to 1. between the specified min and max depth
+            (see the forward function). The value 0. is the most furthest depth whereas
+            1. is the closest. Be careful when setting the background too far front - it
+            may hide elements in your scene. Default: EPS.
+        * n_channels: the number of image content channels to use. This is usually three
+            for regular color representations, but can be a higher or lower number.
+            Default: 3.
+        * n_track: the number of spheres to track for gradient calculation per pixel.
+            Only the closest n_track spheres will receive gradients. Default: 5.
+    """
+
+    def __init__(
+        self,
+        width: int,
+        height: int,
+        max_num_balls: int,
+        orthogonal_projection: bool = False,
+        right_handed_system: bool = False,
+        # pyre-fixme[16]: Module `_C` has no attribute `EPS`.
+        background_normalized_depth: float = _C.EPS,
+        n_channels: int = 3,
+        n_track: int = 5,
+    ) -> None:
+        super(Renderer, self).__init__()
+        # pyre-fixme[16]: Module `pytorch3d` has no attribute `_C`.
+        self._renderer = _C.PulsarRenderer(
+            width,
+            height,
+            max_num_balls,
+            orthogonal_projection,
+            right_handed_system,
+            background_normalized_depth,
+            n_channels,
+            n_track,
+        )
+        self.register_buffer("device_tracker", torch.zeros(1))
+
+    @staticmethod
+    def sphere_ids_from_result_info_nograd(result_info: torch.Tensor) -> torch.Tensor:
+        """
+        Get the sphere IDs from a result info tensor.
+        """
+        if result_info.ndim == 3:
+            return Renderer.sphere_ids_from_result_info_nograd(result_info[None, ...])
+        # pyre-fixme[16]: Module `pytorch3d` has no attribute `_C`.
+        return _C.pulsar_sphere_ids_from_result_info_nograd(result_info)
+
+    @staticmethod
+    def depth_map_from_result_info_nograd(result_info: torch.Tensor) -> torch.Tensor:
+        """
+        Get the depth map from a result info tensor.
+
+        This returns a map of the same size as the image with just one channel
+        containing the closest intersection value at that position. Gradients
+        are not available for this tensor, but do note that you can use
+        `sphere_ids_from_result_info_nograd` to get the IDs of the spheres at
+        each position and directly create a loss on their depth if required.
+
+        The depth map contains -1. at positions where no intersection has
+        been detected.
+        """
+        return result_info[..., 4]
+
+    @staticmethod
+    def _transform_cam_params(
+        cam_params: torch.Tensor,
+        width: int,
+        height: int,
+        orthogonal: bool,
+        right_handed: bool,
+        first_R_then_T: bool = False,
+    ) -> Tuple[
+        torch.Tensor,
+        torch.Tensor,
+        torch.Tensor,
+        torch.Tensor,
+        torch.Tensor,
+        torch.Tensor,
+    ]:
+        """
+        Transform 8 component camera parameter vector(s) to the internal camera
+        representation.
+
+        The input vectors consists of:
+            * 3 components for camera position,
+            * 3 components for camera rotation (three rotation angles) or
+              6 components as described in "On the Continuity of Rotation
+              Representations in Neural Networks" (Zhou et al.),
+            * focal length,
+            * the sensor width in world coordinates,
+            * [optional] the principal point offset in x and y.
+
+        The sensor height is inferred by pixel size and sensor width to obtain
+        quadratic pixels.
+
+        Args:
+            * cam_params: [Bx]{8, 10, 11, 13}, input tensors as described above.
+            * width: number of pixels in x direction.
+            * height: number of pixels in y direction.
+            * orthogonal: bool, whether an orthogonal projection is used
+                  (does not use focal length).
+            * right_handed: bool, whether to use a right handed system
+                  (negative z in camera direction).
+            * first_R_then_T: bool, whether to first rotate, then translate
+                  the camera (PyTorch3D convention).
+
+        Returns:
+            * pos_vec: the position vector in 3D,
+            * pixel_0_0_center: the center of the upper left pixel in world coordinates,
+            * pixel_vec_x: the step to move one pixel on the image x axis
+                   in world coordinates,
+            * pixel_vec_y: the step to move one pixel on the image y axis
+                   in world coordinates,
+            * focal_length: the focal lengths,
+            * principal_point_offsets: the principal point offsets in x, y.
+        """
+        global AXANGLE_WARNING_EMITTED
+        # Set up all direction vectors, i.e., the sensor direction of all axes.
+        assert width > 0
+        assert height > 0
+        batch_processing = True
+        if cam_params.ndimension() == 1:
+            batch_processing = False
+            cam_params = cam_params[None, :]
+        batch_size = cam_params.size(0)
+        continuous_rep = True
+        if cam_params.shape[1] in [8, 10]:
+            if cam_params.requires_grad and not AXANGLE_WARNING_EMITTED:
+                warnings.warn(
+                    "Using an axis angle representation for camera rotations. "
+                    "This has discontinuities and should not be used for optimization. "
+                    "Alternatively, use a six-component representation as described in "
+                    "'On the Continuity of Rotation Representations in Neural Networks'"
+                    " (Zhou et al.). "
+                    "The `pytorch3d.transforms` module provides "
+                    "facilities for using this representation."
+                )
+                AXANGLE_WARNING_EMITTED = True
+            continuous_rep = False
+        else:
+            assert cam_params.shape[1] in [11, 13]
+        pos_vec: torch.Tensor = cam_params[:, :3]
+        principal_point_offsets: torch.Tensor = torch.zeros(
+            (cam_params.shape[0], 2), dtype=torch.int32, device=cam_params.device
+        )
+        if continuous_rep:
+            rot_vec = cam_params[:, 3:9]
+            focal_length: torch.Tensor = cam_params[:, 9:10]
+            sensor_size_x = cam_params[:, 10:11]
+            if cam_params.shape[1] == 13:
+                principal_point_offsets: torch.Tensor = cam_params[:, 11:13].to(
+                    torch.int32
+                )
+        else:
+            rot_vec = cam_params[:, 3:6]
+            focal_length: torch.Tensor = cam_params[:, 6:7]
+            sensor_size_x = cam_params[:, 7:8]
+            if cam_params.shape[1] == 10:
+                principal_point_offsets: torch.Tensor = cam_params[:, 8:10].to(
+                    torch.int32
+                )
+        # Always get quadratic pixels.
+        pixel_size_x = sensor_size_x / float(width)
+        sensor_size_y = height * pixel_size_x
+        if continuous_rep:
+            rot_mat = rotation_6d_to_matrix(rot_vec)
+        else:
+            rot_mat = axis_angle_to_matrix(rot_vec)
+        if first_R_then_T:
+            pos_vec = torch.matmul(rot_mat, pos_vec[..., None])[:, :, 0]
+        sensor_dir_x = torch.matmul(
+            rot_mat,
+            torch.tensor(
+                [1.0, 0.0, 0.0], dtype=torch.float32, device=rot_mat.device
+            ).repeat(batch_size, 1)[:, :, None],
+        )[:, :, 0]
+        sensor_dir_y = torch.matmul(
+            rot_mat,
+            torch.tensor(
+                [0.0, -1.0, 0.0], dtype=torch.float32, device=rot_mat.device
+            ).repeat(batch_size, 1)[:, :, None],
+        )[:, :, 0]
+        sensor_dir_z = torch.matmul(
+            rot_mat,
+            torch.tensor(
+                [0.0, 0.0, 1.0], dtype=torch.float32, device=rot_mat.device
+            ).repeat(batch_size, 1)[:, :, None],
+        )[:, :, 0]
+        if right_handed:
+            sensor_dir_z *= -1
+        if orthogonal:
+            sensor_center = pos_vec
+        else:
+            sensor_center = pos_vec + focal_length * sensor_dir_z
+        sensor_luc = (  # Sensor left upper corner.
+            sensor_center
+            - sensor_dir_x * (sensor_size_x / 2.0)
+            - sensor_dir_y * (sensor_size_y / 2.0)
+        )
+        pixel_size_x = sensor_size_x / float(width)
+        pixel_size_y = sensor_size_y / float(height)
+        pixel_vec_x: torch.Tensor = sensor_dir_x * pixel_size_x
+        pixel_vec_y: torch.Tensor = sensor_dir_y * pixel_size_y
+        pixel_0_0_center = sensor_luc + 0.5 * pixel_vec_x + 0.5 * pixel_vec_y
+        # Reduce dimension.
+        focal_length: torch.Tensor = focal_length[:, 0]
+        if batch_processing:
+            return (
+                pos_vec,
+                pixel_0_0_center,
+                pixel_vec_x,
+                pixel_vec_y,
+                focal_length,
+                principal_point_offsets,
+            )
+        else:
+            return (
+                pos_vec[0],
+                pixel_0_0_center[0],
+                pixel_vec_x[0],
+                pixel_vec_y[0],
+                focal_length[0],
+                principal_point_offsets[0],
+            )
+
+    def forward(
+        self,
+        vert_pos: torch.Tensor,
+        vert_col: torch.Tensor,
+        vert_rad: torch.Tensor,
+        cam_params: torch.Tensor,
+        gamma: float,
+        max_depth: float,
+        min_depth: float = 0.0,
+        bg_col: Optional[torch.Tensor] = None,
+        opacity: Optional[torch.Tensor] = None,
+        percent_allowed_difference: float = 0.01,
+        # pyre-fixme[16]: Module `_C` has no attribute `MAX_UINT`.
+        max_n_hits: int = _C.MAX_UINT,
+        mode: int = 0,
+        return_forward_info: bool = False,
+        first_R_then_T: bool = False,
+    ) -> Union[torch.Tensor, Tuple[torch.Tensor, Optional[torch.Tensor]]]:
+        """
+        Rendering pass to create an image from the provided spheres and camera
+        parameters.
+
+        Args:
+            * vert_pos: vertex positions. [Bx]Nx3 tensor of positions in 3D space.
+            * vert_col: vertex colors. [Bx]NxK tensor of channels.
+            * vert_rad: vertex radii. [Bx]N tensor of radiuses, >0.
+            * cam_params: camera parameter(s). [Bx]8 tensor, consisting of:
+                - 3 components for camera position,
+                - 3 components for camera rotation (axis angle representation) or
+                  6 components as described in "On the Continuity of Rotation
+                  Representations in Neural Networks" (Zhou et al.),
+                - focal length,
+                - the sensor width in world coordinates,
+                - [optional] an offset for the principal point in x, y (no gradients).
+            * gamma: sphere transparency in [1.,1E-5], with 1 being mostly transparent.
+                [Bx]1.
+            * max_depth: maximum depth for spheres to render. Set this as tightly
+                        as possible to have good numerical accuracy for gradients.
+                        float > min_depth + eps.
+            * min_depth: a float with the minimum depth a sphere must have to be
+                        rendered. Must be 0. or > max(focal_length) + eps.
+            * bg_col: K tensor with a background color to use or None (uses all ones).
+            * opacity: [Bx]N tensor of opacity values in [0., 1.] or None (uses all
+                    ones).
+            * percent_allowed_difference: a float in [0., 1.[ with the maximum allowed
+                        difference in color space. This is used to speed up the
+                        computation. Default: 0.01.
+            * max_n_hits: a hard limit on the number of hits per ray. Default: max int.
+            * mode: render mode in {0, 1}. 0: render an image; 1: render the hit map.
+            * return_forward_info: whether to return a second map. This second map
+                contains 13 channels: first channel contains sm_m (the maximum
+                exponent factor observed), the second sm_d (the normalization
+                denominator, the sum of all coefficients), the third the maximum closest
+                possible intersection for a hit. The following channels alternate with
+                the float encoded integer index of a sphere and its weight. They are the
+                five spheres with the highest color contribution to this pixel color,
+                ordered descending. Default: False.
+            * first_R_then_T: bool, whether to first apply rotation to the camera,
+                then translation (PyTorch3D convention). Default: False.
+
+        Returns:
+            * image: [Bx]HxWx3 float tensor with the resulting image.
+            * forw_info: [Bx]HxWx13 float forward information as described above, if
+                    enabled.
+        """
+        # The device tracker is registered as buffer.
+        self._renderer.device_tracker = self.device_tracker
+        (
+            pos_vec,
+            pixel_0_0_center,
+            pixel_vec_x,
+            pixel_vec_y,
+            focal_lengths,
+            principal_point_offsets,
+        ) = Renderer._transform_cam_params(
+            cam_params,
+            self._renderer.width,
+            self._renderer.height,
+            self._renderer.orthogonal,
+            self._renderer.right_handed,
+            first_R_then_T=first_R_then_T,
+        )
+        if (
+            focal_lengths.min().item() > 0.0
+            and max_depth > 10_000.0 * focal_lengths.min().item()
+        ):
+            warnings.warn(
+                (
+                    "Extreme ratio of `max_depth` vs. focal length detected "
+                    "(%f vs. %f, ratio: %f). This will likely lead to "
+                    "artifacts due to numerical instabilities."
+                )
+                % (
+                    max_depth,
+                    focal_lengths.min().item(),
+                    max_depth / focal_lengths.min().item(),
+                )
+            )
+        ret_res = _Render.apply(
+            vert_pos,
+            vert_col,
+            vert_rad,
+            pos_vec,
+            pixel_0_0_center,
+            pixel_vec_x,
+            pixel_vec_y,
+            # Focal length and sensor size don't need gradients other than through
+            # `pixel_vec_x` and `pixel_vec_y`. The focal length is only used in the
+            # renderer to determine the projection areas of the balls.
+            focal_lengths,
+            # principal_point_offsets does not receive gradients.
+            principal_point_offsets,
+            gamma,
+            max_depth,
+            self._renderer,
+            min_depth,
+            bg_col,
+            opacity,
+            percent_allowed_difference,
+            max_n_hits,
+            mode,
+            (mode == 0) and return_forward_info,
+        )
+        if return_forward_info and mode != 0:
+            return ret_res, None
+        return ret_res
+
+    def extra_repr(self) -> str:
+        """Extra information to print in pytorch graphs."""
+        return "width={}, height={}, max_num_balls={}".format(
+            self._renderer.width, self._renderer.height, self._renderer.max_num_balls
+        )
diff --git a/pytorch3d/pytorch3d/renderer/points/pulsar/unified.py b/pytorch3d/pytorch3d/renderer/points/pulsar/unified.py
new file mode 100644
index 0000000000000000000000000000000000000000..146bbb8f5b927f937260593de494c2e0cbc8fd82
--- /dev/null
+++ b/pytorch3d/pytorch3d/renderer/points/pulsar/unified.py
@@ -0,0 +1,554 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import math
+import warnings
+from typing import Any, Dict, Optional, Tuple, Union
+
+import torch
+import torch.nn as nn
+
+from ...camera_conversions import _pulsar_from_cameras_projection
+from ...cameras import (
+    FoVOrthographicCameras,
+    FoVPerspectiveCameras,
+    OrthographicCameras,
+    PerspectiveCameras,
+)
+from ..compositor import AlphaCompositor, NormWeightedCompositor
+from ..rasterizer import PointsRasterizer
+from .renderer import Renderer as PulsarRenderer
+
+
+def _ensure_float_tensor(val_in, device):
+    """Make sure that the value provided is wrapped a PyTorch float tensor."""
+    if not isinstance(val_in, torch.Tensor):
+        val_out = torch.tensor(val_in, dtype=torch.float32, device=device).reshape((1,))
+    else:
+        val_out = val_in.to(torch.float32).to(device).reshape((1,))
+    return val_out
+
+
+class PulsarPointsRenderer(nn.Module):
+    """
+    This renderer is a PyTorch3D interface wrapper around the pulsar renderer.
+
+    It provides an interface consistent with PyTorch3D Pointcloud rendering.
+    It will extract all necessary information from the rasterizer and compositor
+    objects and convert them to the pulsar required format, then invoke rendering
+    in the pulsar renderer. All gradients are handled appropriately through the
+    wrapper and the wrapper should provide equivalent results to using the pulsar
+    renderer directly.
+    """
+
+    def __init__(
+        self,
+        rasterizer: PointsRasterizer,
+        compositor: Optional[Union[NormWeightedCompositor, AlphaCompositor]] = None,
+        n_channels: int = 3,
+        max_num_spheres: int = int(1e6),  # noqa: B008
+        **kwargs,
+    ) -> None:
+        """
+        rasterizer (PointsRasterizer): An object encapsulating rasterization parameters.
+        compositor (ignored): Only keeping this for interface consistency. Default: None.
+        n_channels (int): The number of channels of the resulting image. Default: 3.
+        max_num_spheres (int): The maximum number of spheres intended to render with
+            this renderer. Default: 1e6.
+        kwargs (Any): kwargs to pass on to the pulsar renderer.
+            See `pytorch3d.renderer.points.pulsar.renderer.Renderer` for all options.
+        """
+        super().__init__()
+        self.rasterizer = rasterizer
+        if compositor is not None:
+            warnings.warn(
+                "Creating a `PulsarPointsRenderer` with a compositor object! "
+                "This object is ignored and just allowed as an argument for interface "
+                "compatibility."
+            )
+        # Initialize the pulsar renderers.
+        if not isinstance(
+            rasterizer.cameras,
+            (
+                FoVOrthographicCameras,
+                FoVPerspectiveCameras,
+                PerspectiveCameras,
+                OrthographicCameras,
+            ),
+        ):
+            raise ValueError(
+                "Only FoVPerspectiveCameras, PerspectiveCameras, "
+                "FoVOrthographicCameras and OrthographicCameras are supported "
+                "by the pulsar backend."
+            )
+        if isinstance(rasterizer.raster_settings.image_size, tuple):
+            height, width = rasterizer.raster_settings.image_size
+        else:
+            width = rasterizer.raster_settings.image_size
+            height = rasterizer.raster_settings.image_size
+        # Making sure about integer types.
+        width = int(width)
+        height = int(height)
+        max_num_spheres = int(max_num_spheres)
+        orthogonal_projection = isinstance(
+            rasterizer.cameras, (FoVOrthographicCameras, OrthographicCameras)
+        )
+        n_channels = int(n_channels)
+        self.renderer = PulsarRenderer(
+            width=width,
+            height=height,
+            max_num_balls=max_num_spheres,
+            orthogonal_projection=orthogonal_projection,
+            right_handed_system=False,
+            n_channels=n_channels,
+            **kwargs,
+        )
+
+    def _conf_check(self, point_clouds, kwargs: Dict[str, Any]) -> bool:
+        """
+        Verify internal configuration state with kwargs and pointclouds.
+
+        This method will raise ValueError's for any inconsistencies found. It
+        returns whether an orthogonal projection will be used.
+        """
+        if "gamma" not in kwargs.keys():
+            raise ValueError(
+                "gamma is a required keyword argument for the PulsarPointsRenderer!"
+            )
+        if (
+            len(point_clouds) != len(self.rasterizer.cameras)
+            and len(self.rasterizer.cameras) != 1
+        ):
+            raise ValueError(
+                (
+                    "The len(point_clouds) must either be equal to len(rasterizer.cameras) or "
+                    "only one camera must be used. len(point_clouds): %d, "
+                    "len(rasterizer.cameras): %d."
+                )
+                % (
+                    len(point_clouds),
+                    len(self.rasterizer.cameras),
+                )
+            )
+        # Make sure the rasterizer and cameras objects have no
+        # changes that can't be matched.
+        orthogonal_projection = isinstance(
+            self.rasterizer.cameras, (FoVOrthographicCameras, OrthographicCameras)
+        )
+        if orthogonal_projection != self.renderer._renderer.orthogonal:
+            raise ValueError(
+                "The camera type can not be changed after renderer initialization! "
+                "Current camera orthogonal: %r. Original orthogonal: %r."
+            ) % (orthogonal_projection, self.renderer._renderer.orthogonal)
+        image_size = self.rasterizer.raster_settings.image_size
+        if isinstance(image_size, tuple):
+            expected_height, expected_width = image_size
+        else:
+            expected_height = expected_width = image_size
+        if expected_width != self.renderer._renderer.width:
+            raise ValueError(
+                (
+                    "The rasterizer width can not be changed after renderer "
+                    "initialization! Current width: %s. Original width: %d."
+                )
+                % (
+                    expected_width,
+                    self.renderer._renderer.width,
+                )
+            )
+        if expected_height != self.renderer._renderer.height:
+            raise ValueError(
+                (
+                    "The rasterizer height can not be changed after renderer "
+                    "initialization! Current height: %s. Original height: %d."
+                )
+                % (
+                    expected_height,
+                    self.renderer._renderer.height,
+                )
+            )
+        return orthogonal_projection
+
+    def _extract_intrinsics(  # noqa: C901
+        self, orthogonal_projection, kwargs, cloud_idx, device
+    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, float, float]:
+        """
+        Translate the camera intrinsics from PyTorch3D format to pulsar format.
+        """
+        # Shorthand:
+        cameras = self.rasterizer.cameras
+        if orthogonal_projection:
+            focal_length = torch.zeros((1,), dtype=torch.float32)
+            if isinstance(cameras, FoVOrthographicCameras):
+                znear = kwargs.get("znear", cameras.znear)[cloud_idx]
+                zfar = kwargs.get("zfar", cameras.zfar)[cloud_idx]
+                max_y = kwargs.get("max_y", cameras.max_y)[cloud_idx]
+                min_y = kwargs.get("min_y", cameras.min_y)[cloud_idx]
+                max_x = kwargs.get("max_x", cameras.max_x)[cloud_idx]
+                min_x = kwargs.get("min_x", cameras.min_x)[cloud_idx]
+                if max_y != -min_y:
+                    raise ValueError(
+                        "The orthographic camera must be centered around 0. "
+                        f"Max is {max_y} and min is {min_y}."
+                    )
+                if max_x != -min_x:
+                    raise ValueError(
+                        "The orthographic camera must be centered around 0. "
+                        f"Max is {max_x} and min is {min_x}."
+                    )
+                if not torch.all(
+                    kwargs.get("scale_xyz", cameras.scale_xyz)[cloud_idx] == 1.0
+                ):
+                    raise ValueError(
+                        "The orthographic camera scale must be ((1.0, 1.0, 1.0),). "
+                        f"{kwargs.get('scale_xyz', cameras.scale_xyz)[cloud_idx]}."
+                    )
+                sensor_width = max_x - min_x
+                if not sensor_width > 0.0:
+                    raise ValueError(
+                        f"The orthographic camera must have positive size! Is: {sensor_width}."  # noqa: B950
+                    )
+                principal_point_x, principal_point_y = (
+                    torch.zeros((1,), dtype=torch.float32),
+                    torch.zeros((1,), dtype=torch.float32),
+                )
+            else:
+                # Currently, this means it must be an 'OrthographicCameras' object.
+                focal_length_conf = kwargs.get("focal_length", cameras.focal_length)[
+                    cloud_idx
+                ]
+                if (
+                    focal_length_conf.numel() == 2
+                    and focal_length_conf[0] * self.renderer._renderer.width
+                    - focal_length_conf[1] * self.renderer._renderer.height
+                    > 1e-5
+                ):
+                    raise ValueError(
+                        "Pulsar only supports a single focal length! "
+                        "Provided: %s." % (str(focal_length_conf))
+                    )
+                if focal_length_conf.numel() == 2:
+                    sensor_width = 2.0 / focal_length_conf[0]
+                else:
+                    if focal_length_conf.numel() != 1:
+                        raise ValueError(
+                            "Focal length not parsable: %s." % (str(focal_length_conf))
+                        )
+                    sensor_width = 2.0 / focal_length_conf
+                if "znear" not in kwargs.keys() or "zfar" not in kwargs.keys():
+                    raise ValueError(
+                        "pulsar needs znear and zfar values for "
+                        "the OrthographicCameras. Please provide them as keyword "
+                        "argument to the forward method."
+                    )
+                znear = kwargs["znear"][cloud_idx]
+                zfar = kwargs["zfar"][cloud_idx]
+                principal_point_x = (
+                    kwargs.get("principal_point", cameras.principal_point)[cloud_idx][0]
+                    * 0.5
+                    * self.renderer._renderer.width
+                )
+                principal_point_y = (
+                    kwargs.get("principal_point", cameras.principal_point)[cloud_idx][1]
+                    * 0.5
+                    * self.renderer._renderer.height
+                )
+        else:
+            if not isinstance(cameras, PerspectiveCameras):
+                # Create a virtual focal length that is closer than znear.
+                znear = kwargs.get("znear", cameras.znear)[cloud_idx]
+                zfar = kwargs.get("zfar", cameras.zfar)[cloud_idx]
+                focal_length = znear - 1e-6
+                # Create a sensor size that matches the expected fov assuming this f.
+                afov = kwargs.get("fov", cameras.fov)[cloud_idx]
+                if kwargs.get("degrees", cameras.degrees):
+                    afov *= math.pi / 180.0
+                sensor_width = math.tan(afov / 2.0) * 2.0 * focal_length
+                if not (
+                    kwargs.get("aspect_ratio", cameras.aspect_ratio)[cloud_idx]
+                    - self.renderer._renderer.width / self.renderer._renderer.height
+                    < 1e-6
+                ):
+                    raise ValueError(
+                        "The aspect ratio ("
+                        f"{kwargs.get('aspect_ratio', cameras.aspect_ratio)[cloud_idx]}) "
+                        "must agree with the resolution width / height ("
+                        f"{self.renderer._renderer.width / self.renderer._renderer.height})."  # noqa: B950
+                    )
+                principal_point_x, principal_point_y = (
+                    torch.zeros((1,), dtype=torch.float32),
+                    torch.zeros((1,), dtype=torch.float32),
+                )
+            else:
+                focal_length_conf = kwargs.get("focal_length", cameras.focal_length)[
+                    cloud_idx
+                ]
+                if (
+                    focal_length_conf.numel() == 2
+                    and focal_length_conf[0] * self.renderer._renderer.width
+                    - focal_length_conf[1] * self.renderer._renderer.height
+                    > 1e-5
+                ):
+                    raise ValueError(
+                        "Pulsar only supports a single focal length! "
+                        "Provided: %s." % (str(focal_length_conf))
+                    )
+                if "znear" not in kwargs.keys() or "zfar" not in kwargs.keys():
+                    raise ValueError(
+                        "pulsar needs znear and zfar values for "
+                        "the PerspectiveCameras. Please provide them as keyword "
+                        "argument to the forward method."
+                    )
+                znear = kwargs["znear"][cloud_idx]
+                zfar = kwargs["zfar"][cloud_idx]
+                if focal_length_conf.numel() == 2:
+                    focal_length_px = focal_length_conf[0]
+                else:
+                    if focal_length_conf.numel() != 1:
+                        raise ValueError(
+                            "Focal length not parsable: %s." % (str(focal_length_conf))
+                        )
+                    focal_length_px = focal_length_conf
+                focal_length = torch.tensor(
+                    [
+                        znear - 1e-6,
+                    ],
+                    dtype=torch.float32,
+                    device=focal_length_px.device,
+                )
+                sensor_width = focal_length / focal_length_px * 2.0
+                principal_point_x = (
+                    kwargs.get("principal_point", cameras.principal_point)[cloud_idx][0]
+                    * 0.5
+                    * self.renderer._renderer.width
+                )
+                principal_point_y = (
+                    kwargs.get("principal_point", cameras.principal_point)[cloud_idx][1]
+                    * 0.5
+                    * self.renderer._renderer.height
+                )
+        focal_length = _ensure_float_tensor(focal_length, device)
+        sensor_width = _ensure_float_tensor(sensor_width, device)
+        principal_point_x = _ensure_float_tensor(principal_point_x, device)
+        principal_point_y = _ensure_float_tensor(principal_point_y, device)
+        znear = _ensure_float_tensor(znear, device)
+        zfar = _ensure_float_tensor(zfar, device)
+        return (
+            focal_length,
+            sensor_width,
+            principal_point_x,
+            principal_point_y,
+            znear,
+            zfar,
+        )
+
+    def _extract_extrinsics(
+        self, kwargs, cloud_idx
+    ) -> Tuple[torch.Tensor, torch.Tensor]:
+        """
+        Extract the extrinsic information from the kwargs for a specific point cloud.
+
+        Instead of implementing a direct translation from the PyTorch3D to the Pulsar
+        camera model, we chain the two conversions of PyTorch3D->OpenCV and
+        OpenCV->Pulsar for better maintainability (PyTorch3D->OpenCV is maintained and
+        tested by the core PyTorch3D team, whereas OpenCV->Pulsar is maintained and
+        tested by the Pulsar team).
+        """
+        # Shorthand:
+        cameras = self.rasterizer.cameras
+        R = kwargs.get("R", cameras.R)[cloud_idx]
+        T = kwargs.get("T", cameras.T)[cloud_idx]
+        tmp_cams = PerspectiveCameras(
+            R=R.unsqueeze(0), T=T.unsqueeze(0), device=R.device
+        )
+        size_tensor = torch.tensor(
+            [[self.renderer._renderer.height, self.renderer._renderer.width]]
+        )
+        pulsar_cam = _pulsar_from_cameras_projection(tmp_cams, size_tensor)
+        cam_pos = pulsar_cam[0, :3]
+        cam_rot = pulsar_cam[0, 3:9]
+        return cam_pos, cam_rot
+
+    def _get_vert_rad(
+        self, vert_pos, cam_pos, orthogonal_projection, focal_length, kwargs, cloud_idx
+    ) -> torch.Tensor:
+        """
+        Get point radiuses.
+
+        These can be depending on the camera position in case of a perspective
+        transform.
+        """
+        # Normalize point radiuses.
+        # `self.rasterizer.raster_settings.radius` can either be a float
+        # or itself a tensor.
+        raster_rad = self.rasterizer.raster_settings.radius
+        if kwargs.get("radius_world", False):
+            return raster_rad
+        if (
+            isinstance(raster_rad, torch.Tensor)
+            and raster_rad.numel() > 1
+            and raster_rad.ndim > 1
+        ):
+            # In this case it must be a batched torch tensor.
+            raster_rad = raster_rad[cloud_idx]
+        if orthogonal_projection:
+            vert_rad = (
+                torch.ones(
+                    (vert_pos.shape[0],), dtype=torch.float32, device=vert_pos.device
+                )
+                * raster_rad
+            )
+        else:
+            point_dists = torch.norm((vert_pos - cam_pos), p=2, dim=1, keepdim=False)
+            vert_rad = raster_rad / focal_length.to(vert_pos.device) * point_dists
+            if isinstance(self.rasterizer.cameras, PerspectiveCameras):
+                # NDC normalization happens through adjusted focal length.
+                pass
+            else:
+                vert_rad = vert_rad / 2.0  # NDC normalization.
+        return vert_rad
+
+    # point_clouds is not typed to avoid a cyclic dependency.
+    def forward(self, point_clouds, **kwargs) -> torch.Tensor:
+        """
+        Get the rendering of the provided `Pointclouds`.
+
+        The number of point clouds in the `Pointclouds` object determines the
+        number of resulting images. The provided cameras can be either 1 or equal
+        to the number of pointclouds (in the first case, the same camera will be
+        used for all clouds, in the latter case each point cloud will be rendered
+        with the corresponding camera).
+
+        The following kwargs are support from PyTorch3D (depending on the selected
+        camera model potentially overriding camera parameters):
+            radius_world (bool): use the provided radiuses from the raster_settings
+              plain as radiuses in world space. Default: False.
+            znear (Iterable[float]): near geometry cutoff. Is required for
+              OrthographicCameras and PerspectiveCameras.
+            zfar (Iterable[float]): far geometry cutoff. Is required for
+              OrthographicCameras and PerspectiveCameras.
+            R (torch.Tensor): [Bx3x3] camera rotation matrices.
+            T (torch.Tensor): [Bx3] camera translation vectors.
+            principal_point (torch.Tensor): [Bx2] camera intrinsic principal
+              point offset vectors.
+            focal_length (torch.Tensor): [Bx1] camera intrinsic focal lengths.
+            aspect_ratio (Iterable[float]): camera aspect ratios.
+            fov (Iterable[float]): camera FOVs.
+            degrees (bool): whether FOVs are specified in degrees or
+              radians.
+            min_x (Iterable[float]): minimum x for the FoVOrthographicCameras.
+            max_x (Iterable[float]): maximum x for the FoVOrthographicCameras.
+            min_y (Iterable[float]): minimum y for the FoVOrthographicCameras.
+            max_y (Iterable[float]): maximum y for the FoVOrthographicCameras.
+
+        The following kwargs are supported from pulsar:
+            gamma (float): The gamma value to use. This defines the transparency for
+                differentiability (see pulsar paper for details). Must be in [1., 1e-5]
+                with 1.0 being mostly transparent. This keyword argument is *required*!
+            bg_col (torch.Tensor): The background color. Must be a tensor on the same
+                device as the point clouds, with as many channels as features (no batch
+                dimension - it is the same for all images in the batch).
+                Default: 0.0 for all channels.
+            percent_allowed_difference (float): a value in [0., 1.[ with the maximum
+                allowed difference in channel space. This is used to speed up the
+                computation. Default: 0.01.
+            max_n_hits (int): a hard limit on the number of sphere hits per ray.
+                Default: max int.
+            mode (int): render mode in {0, 1}. 0: render image; 1: render hit map.
+        """
+        orthogonal_projection: bool = self._conf_check(point_clouds, kwargs)
+        # Get access to inputs. We're using the list accessor and process
+        # them sequentially.
+        position_list = point_clouds.points_list()
+        features_list = point_clouds.features_list()
+        # Result list.
+        images = []
+        for cloud_idx, (vert_pos, vert_col) in enumerate(
+            zip(position_list, features_list)
+        ):
+            # Get extrinsics.
+            cam_pos, cam_rot = self._extract_extrinsics(kwargs, cloud_idx)
+            # Get intrinsics.
+            (
+                focal_length,
+                sensor_width,
+                principal_point_x,
+                principal_point_y,
+                znear,
+                zfar,
+            ) = self._extract_intrinsics(
+                orthogonal_projection, kwargs, cloud_idx, cam_pos.device
+            )
+            # Put everything together.
+            cam_params = torch.cat(
+                (
+                    cam_pos,
+                    cam_rot.to(cam_pos.device),
+                    torch.cat(
+                        [
+                            focal_length,
+                            sensor_width,
+                            principal_point_x,
+                            principal_point_y,
+                        ],
+                    ),
+                )
+            )
+            # Get point radiuses (can depend on camera position).
+            vert_rad = self._get_vert_rad(
+                vert_pos,
+                cam_pos,
+                orthogonal_projection,
+                focal_length,
+                kwargs,
+                cloud_idx,
+            )
+            # Clean kwargs for passing on.
+            gamma = kwargs["gamma"][cloud_idx]
+            if "first_R_then_T" in kwargs.keys():
+                raise ValueError("`first_R_then_T` is not supported in this interface.")
+            otherargs = {
+                argn: argv
+                for argn, argv in kwargs.items()
+                if argn
+                not in [
+                    "radius_world",
+                    "gamma",
+                    "znear",
+                    "zfar",
+                    "R",
+                    "T",
+                    "principal_point",
+                    "focal_length",
+                    "aspect_ratio",
+                    "fov",
+                    "degrees",
+                    "min_x",
+                    "max_x",
+                    "min_y",
+                    "max_y",
+                ]
+            }
+            # background color
+            if "bg_col" not in otherargs:
+                bg_col = torch.zeros(
+                    vert_col.shape[1], device=cam_params.device, dtype=torch.float32
+                )
+                otherargs["bg_col"] = bg_col
+            # Go!
+            images.append(
+                self.renderer(
+                    vert_pos=vert_pos,
+                    vert_col=vert_col,
+                    vert_rad=vert_rad,
+                    cam_params=cam_params,
+                    gamma=gamma,
+                    max_depth=zfar,
+                    min_depth=znear,
+                    **otherargs,
+                ).flip(dims=[0])
+            )
+        return torch.stack(images, dim=0)
diff --git a/pytorch3d/pytorch3d/renderer/points/rasterize_points.py b/pytorch3d/pytorch3d/renderer/points/rasterize_points.py
new file mode 100644
index 0000000000000000000000000000000000000000..08211049fb48488569b50881bbc936ac9d9064c8
--- /dev/null
+++ b/pytorch3d/pytorch3d/renderer/points/rasterize_points.py
@@ -0,0 +1,320 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import List, Optional, Tuple, Union
+
+import numpy as np
+import torch
+from pytorch3d import _C
+from pytorch3d.renderer.mesh.rasterize_meshes import pix_to_non_square_ndc
+
+from ..utils import parse_image_size
+
+
+# Maximum number of faces per bins for
+# coarse-to-fine rasterization
+kMaxPointsPerBin = 22
+
+
+def rasterize_points(
+    pointclouds,
+    image_size: Union[int, List[int], Tuple[int, int]] = 256,
+    radius: Union[float, List, Tuple, torch.Tensor] = 0.01,
+    points_per_pixel: int = 8,
+    bin_size: Optional[int] = None,
+    max_points_per_bin: Optional[int] = None,
+):
+    """
+    Each pointcloud is rasterized onto a separate image of shape
+    (H, W) if `image_size` is a tuple or (image_size, image_size) if it
+    is an int.
+
+    If the desired image size is non square (i.e. a tuple of (H, W) where H != W)
+    the aspect ratio needs special consideration. There are two aspect ratios
+    to be aware of:
+        - the aspect ratio of each pixel
+        - the aspect ratio of the output image
+    The camera can be used to set the pixel aspect ratio. In the rasterizer,
+    we assume square pixels, but variable image aspect ratio (i.e rectangle images).
+
+    In most cases you will want to set the camera aspect ratio to
+    1.0 (i.e. square pixels) and only vary the
+    `image_size` (i.e. the output image dimensions in pix
+
+    Args:
+        pointclouds: A Pointclouds object representing a batch of point clouds to be
+            rasterized. This is a batch of N pointclouds, where each point cloud
+            can have a different number of points; the coordinates of each point
+            are (x, y, z). The coordinates are expected to
+            be in normalized device coordinates (NDC): [-1, 1]^3 with the camera at
+            (0, 0, 0); In the camera coordinate frame the x-axis goes from right-to-left,
+            the y-axis goes from bottom-to-top, and the z-axis goes from back-to-front.
+        image_size: Size in pixels of the output image to be rasterized.
+            Can optionally be a tuple of (H, W) in the case of non square images.
+        radius (Optional): The radius (in NDC units) of the disk to
+            be rasterized. This can either be a float in which case the same radius is used
+            for each point, or a torch.Tensor of shape (N, P) giving a radius per point
+            in the batch.
+        points_per_pixel (Optional): We will keep track of this many points per
+            pixel, returning the nearest points_per_pixel points along the z-axis
+        bin_size: Size of bins to use for coarse-to-fine rasterization. Setting
+            bin_size=0 uses naive rasterization; setting bin_size=None attempts to
+            set it heuristically based on the shape of the input. This should not
+            affect the output, but can affect the speed of the forward pass.
+        max_points_per_bin: Only applicable when using coarse-to-fine rasterization
+            (bin_size > 0); this is the maximum number of points allowed within each
+            bin. This should not affect the output values, but can affect
+            the memory usage in the forward pass.
+
+    Returns:
+        3-element tuple containing
+
+        - **idx**: int32 Tensor of shape (N, image_size, image_size, points_per_pixel)
+          giving the indices of the nearest points at each pixel, in ascending
+          z-order. Concretely `idx[n, y, x, k] = p` means that `points[p]` is the kth
+          closest point (along the z-direction) to pixel (y, x) - note that points
+          represents the packed points of shape (P, 3).
+          Pixels that are hit by fewer than points_per_pixel are padded with -1.
+        - **zbuf**: Tensor of shape (N, image_size, image_size, points_per_pixel)
+          giving the z-coordinates of the nearest points at each pixel, sorted in
+          z-order. Concretely, if `idx[n, y, x, k] = p` then
+          `zbuf[n, y, x, k] = points[n, p, 2]`. Pixels hit by fewer than
+          points_per_pixel are padded with -1
+        - **dists2**: Tensor of shape (N, image_size, image_size, points_per_pixel)
+          giving the squared Euclidean distance (in NDC units) in the x/y plane
+          for each point closest to the pixel. Concretely if `idx[n, y, x, k] = p`
+          then `dists[n, y, x, k]` is the squared distance between the pixel (y, x)
+          and the point `(points[n, p, 0], points[n, p, 1])`. Pixels hit with fewer
+          than points_per_pixel are padded with -1.
+
+        In the case that image_size is a tuple of (H, W) then the outputs
+        will be of shape `(N, H, W, ...)`.
+    """
+    points_packed = pointclouds.points_packed()
+    cloud_to_packed_first_idx = pointclouds.cloud_to_packed_first_idx()
+    num_points_per_cloud = pointclouds.num_points_per_cloud()
+
+    radius = _format_radius(radius, pointclouds)
+
+    # In the case that H != W use the max image size to set the bin_size
+    # to accommodate the num bins constraint in the coarse rasterizer.
+    # If the ratio of H:W is large this might cause issues as the smaller
+    # dimension will have fewer bins.
+    # TODO: consider a better way of setting the bin size.
+    im_size = parse_image_size(image_size)
+    max_image_size = max(*im_size)
+
+    if bin_size is None:
+        if not points_packed.is_cuda:
+            # Binned CPU rasterization not fully implemented
+            bin_size = 0
+        else:
+            bin_size = int(2 ** max(np.ceil(np.log2(max_image_size)) - 4, 4))
+
+    if bin_size != 0:
+        # There is a limit on the number of points per bin in the cuda kernel.
+        points_per_bin = 1 + (max_image_size - 1) // bin_size
+        if points_per_bin >= kMaxPointsPerBin:
+            raise ValueError(
+                "bin_size too small, number of points per bin must be less than %d; got %d"
+                % (kMaxPointsPerBin, points_per_bin)
+            )
+
+    if max_points_per_bin is None:
+        max_points_per_bin = int(max(10000, pointclouds._P / 5))
+
+    # Function.apply cannot take keyword args, so we handle defaults in this
+    # wrapper and call apply with positional args only
+    return _RasterizePoints.apply(
+        points_packed,
+        cloud_to_packed_first_idx,
+        num_points_per_cloud,
+        im_size,
+        radius,
+        points_per_pixel,
+        bin_size,
+        max_points_per_bin,
+    )
+
+
+def _format_radius(
+    radius: Union[float, List, Tuple, torch.Tensor], pointclouds
+) -> torch.Tensor:
+    """
+    Format the radius as a torch tensor of shape (P_packed,)
+    where P_packed is the total number of points in the
+    batch (i.e. pointclouds.points_packed().shape[0]).
+
+    This will enable support for a different size radius
+    for each point in the batch.
+
+    Args:
+        radius: can be a float, List, Tuple or tensor of
+            shape (N, P_padded) where P_padded is the
+            maximum number of points for each pointcloud
+            in the batch.
+
+    Returns:
+        radius: torch.Tensor of shape (P_packed)
+    """
+    N, P_padded = pointclouds._N, pointclouds._P
+    points_packed = pointclouds.points_packed()
+    P_packed = points_packed.shape[0]
+    if isinstance(radius, (list, tuple)):
+        radius = torch.tensor(radius).type_as(points_packed)
+    if isinstance(radius, torch.Tensor):
+        if N == 1 and radius.ndim == 1:
+            radius = radius[None, ...]
+        if radius.shape != (N, P_padded):
+            msg = "radius must be of shape (N, P): got %s"
+            raise ValueError(msg % (repr(radius.shape)))
+        else:
+            padded_to_packed_idx = pointclouds.padded_to_packed_idx()
+            radius = radius.view(-1)[padded_to_packed_idx]
+    elif isinstance(radius, float):
+        radius = torch.full((P_packed,), fill_value=radius).type_as(points_packed)
+    else:
+        msg = "radius must be a float, list, tuple or tensor; got %s"
+        raise ValueError(msg % type(radius))
+    return radius
+
+
+class _RasterizePoints(torch.autograd.Function):
+    @staticmethod
+    # pyre-fixme[14]: `forward` overrides method defined in `Function` inconsistently.
+    def forward(
+        ctx,
+        points,  # (P, 3)
+        cloud_to_packed_first_idx,
+        num_points_per_cloud,
+        image_size: Union[List[int], Tuple[int, int]] = (256, 256),
+        radius: Union[float, torch.Tensor] = 0.01,
+        points_per_pixel: int = 8,
+        bin_size: int = 0,
+        max_points_per_bin: int = 0,
+    ):
+        # TODO: Add better error handling for when there are more than
+        # max_points_per_bin in any bin.
+        args = (
+            points,
+            cloud_to_packed_first_idx,
+            num_points_per_cloud,
+            image_size,
+            radius,
+            points_per_pixel,
+            bin_size,
+            max_points_per_bin,
+        )
+        # pyre-fixme[16]: Module `pytorch3d` has no attribute `_C`.
+        idx, zbuf, dists = _C.rasterize_points(*args)
+        ctx.save_for_backward(points, idx)
+        ctx.mark_non_differentiable(idx)
+        return idx, zbuf, dists
+
+    @staticmethod
+    def backward(ctx, grad_idx, grad_zbuf, grad_dists):
+        grad_points = None
+        grad_cloud_to_packed_first_idx = None
+        grad_num_points_per_cloud = None
+        grad_image_size = None
+        grad_radius = None
+        grad_points_per_pixel = None
+        grad_bin_size = None
+        grad_max_points_per_bin = None
+        points, idx = ctx.saved_tensors
+        args = (points, idx, grad_zbuf, grad_dists)
+        grad_points = _C.rasterize_points_backward(*args)
+        grads = (
+            grad_points,
+            grad_cloud_to_packed_first_idx,
+            grad_num_points_per_cloud,
+            grad_image_size,
+            grad_radius,
+            grad_points_per_pixel,
+            grad_bin_size,
+            grad_max_points_per_bin,
+        )
+        return grads
+
+
+def rasterize_points_python(
+    pointclouds,
+    image_size: Union[int, Tuple[int, int]] = 256,
+    radius: Union[float, torch.Tensor] = 0.01,
+    points_per_pixel: int = 8,
+):
+    """
+    Naive pure PyTorch implementation of pointcloud rasterization.
+
+    Inputs / Outputs: Same as above
+    """
+    N = len(pointclouds)
+    H, W = (
+        image_size
+        if isinstance(image_size, (tuple, list))
+        else (image_size, image_size)
+    )
+    K = points_per_pixel
+    device = pointclouds.device
+
+    points_packed = pointclouds.points_packed()
+    cloud_to_packed_first_idx = pointclouds.cloud_to_packed_first_idx()
+    num_points_per_cloud = pointclouds.num_points_per_cloud()
+
+    # Support variable size radius for each point in the batch
+    radius = _format_radius(radius, pointclouds)
+
+    # Initialize output tensors.
+    point_idxs = torch.full(
+        (N, H, W, K), fill_value=-1, dtype=torch.int32, device=device
+    )
+    zbuf = torch.full((N, H, W, K), fill_value=-1, dtype=torch.float32, device=device)
+    pix_dists = torch.full(
+        (N, H, W, K), fill_value=-1, dtype=torch.float32, device=device
+    )
+
+    # NDC is from [-1, 1]. Get pixel size using specified image size.
+    radius2 = radius * radius
+
+    # Iterate through the batch of point clouds.
+    for n in range(N):
+        point_start_idx = cloud_to_packed_first_idx[n]
+        point_stop_idx = point_start_idx + num_points_per_cloud[n]
+
+        # Iterate through the horizontal lines of the image from top to bottom.
+        for yi in range(H):
+            # Y coordinate of one end of the image. Reverse the ordering
+            # of yi so that +Y is pointing up in the image.
+            yfix = H - 1 - yi
+            yf = pix_to_non_square_ndc(yfix, H, W)
+
+            # Iterate through pixels on this horizontal line, left to right.
+            for xi in range(W):
+                # X coordinate of one end of the image. Reverse the ordering
+                # of xi so that +X is pointing to the left in the image.
+                xfix = W - 1 - xi
+                xf = pix_to_non_square_ndc(xfix, W, H)
+
+                top_k_points = []
+                # Check whether each point in the batch affects this pixel.
+                for p in range(point_start_idx, point_stop_idx):
+                    px, py, pz = points_packed[p, :]
+                    r = radius2[p]
+                    if pz < 0:
+                        continue
+                    dx = px - xf
+                    dy = py - yf
+                    dist2 = dx * dx + dy * dy
+                    if dist2 < r:
+                        top_k_points.append((pz, p, dist2))
+                        top_k_points.sort()
+                        if len(top_k_points) > K:
+                            top_k_points = top_k_points[:K]
+                for k, (pz, p, dist2) in enumerate(top_k_points):
+                    zbuf[n, yi, xi, k] = pz
+                    point_idxs[n, yi, xi, k] = p
+                    pix_dists[n, yi, xi, k] = dist2
+    return point_idxs, zbuf, pix_dists
diff --git a/pytorch3d/pytorch3d/renderer/points/rasterizer.py b/pytorch3d/pytorch3d/renderer/points/rasterizer.py
new file mode 100644
index 0000000000000000000000000000000000000000..5831994c7b88c2119aa159717e1b0af382d81842
--- /dev/null
+++ b/pytorch3d/pytorch3d/renderer/points/rasterizer.py
@@ -0,0 +1,167 @@
+#!/usr/bin/env python3
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from dataclasses import dataclass
+from typing import NamedTuple, Optional, Tuple, Union
+
+import torch
+import torch.nn as nn
+from pytorch3d.renderer.cameras import try_get_projection_transform
+from pytorch3d.structures import Pointclouds
+
+from .rasterize_points import rasterize_points
+
+
+class PointFragments(NamedTuple):
+    """
+    Class to store the outputs of point rasterization
+
+    Members:
+        idx: int32 Tensor of shape (N, image_size, image_size, points_per_pixel)
+            giving the indices of the nearest points at each pixel, in ascending
+            z-order. Concretely `idx[n, y, x, k] = p` means that `points[p]` is the kth
+            closest point (along the z-direction) to pixel (y, x) - note that points
+            represents the packed points of shape (P, 3).
+            Pixels that are hit by fewer than points_per_pixel are padded with -1.
+        zbuf: Tensor of shape (N, image_size, image_size, points_per_pixel)
+            giving the z-coordinates of the nearest points at each pixel, sorted in
+            z-order. Concretely, if `idx[n, y, x, k] = p` then
+            `zbuf[n, y, x, k] = points[n, p, 2]`. Pixels hit by fewer than
+            points_per_pixel are padded with -1.
+        dists: Tensor of shape (N, image_size, image_size, points_per_pixel)
+            giving the squared Euclidean distance (in NDC units) in the x/y plane
+            for each point closest to the pixel. Concretely if `idx[n, y, x, k] = p`
+            then `dists[n, y, x, k]` is the squared distance between the pixel (y, x)
+            and the point `(points[n, p, 0], points[n, p, 1])`. Pixels hit with fewer
+            than points_per_pixel are padded with -1.
+    """
+
+    idx: torch.Tensor
+    zbuf: torch.Tensor
+    dists: torch.Tensor
+
+
+@dataclass
+class PointsRasterizationSettings:
+    """
+    Class to store the point rasterization params with defaults
+
+    Members:
+        image_size: Either common height and width or (height, width), in pixels.
+        radius: The radius (in NDC units) of each disk to be rasterized.
+            This can either be a float in which case the same radius is used
+            for each point, or a torch.Tensor of shape (N, P) giving a radius
+            per point in the batch.
+        points_per_pixel: (int) Number of points to keep track of per pixel.
+            We return the nearest points_per_pixel points along the z-axis.
+        bin_size: Size of bins to use for coarse-to-fine rasterization. Setting
+            bin_size=0 uses naive rasterization; setting bin_size=None attempts
+            to set it heuristically based on the shape of the input. This should
+            not affect the output, but can affect the speed of the forward pass.
+        max_points_per_bin: Only applicable when using coarse-to-fine
+            rasterization (bin_size != 0); this is the maximum number of points
+            allowed within each bin. This should not affect the output values,
+            but can affect the memory usage in the forward pass.
+            Setting max_points_per_bin=None attempts to set with a heuristic.
+    """
+
+    image_size: Union[int, Tuple[int, int]] = 256
+    radius: Union[float, torch.Tensor] = 0.01
+    points_per_pixel: int = 8
+    bin_size: Optional[int] = None
+    max_points_per_bin: Optional[int] = None
+
+
+class PointsRasterizer(nn.Module):
+    """
+    This class implements methods for rasterizing a batch of pointclouds.
+    """
+
+    def __init__(self, cameras=None, raster_settings=None) -> None:
+        """
+        cameras: A cameras object which has a  `transform_points` method
+                which returns the transformed points after applying the
+                world-to-view and view-to-ndc transformations.
+            raster_settings: the parameters for rasterization. This should be a
+                named tuple.
+
+        All these initial settings can be overridden by passing keyword
+        arguments to the forward function.
+        """
+        super().__init__()
+        if raster_settings is None:
+            raster_settings = PointsRasterizationSettings()
+
+        self.cameras = cameras
+        self.raster_settings = raster_settings
+
+    def transform(self, point_clouds, **kwargs) -> Pointclouds:
+        """
+        Args:
+            point_clouds: a set of point clouds
+
+        Returns:
+            points_proj: the points with positions projected
+            in NDC space
+
+        NOTE: keeping this as a separate function for readability but it could
+        be moved into forward.
+        """
+        cameras = kwargs.get("cameras", self.cameras)
+        if cameras is None:
+            msg = "Cameras must be specified either at initialization \
+                or in the forward pass of PointsRasterizer"
+            raise ValueError(msg)
+
+        pts_world = point_clouds.points_padded()
+        # NOTE: Retaining view space z coordinate for now.
+        # TODO: Remove this line when the convention for the z coordinate in
+        # the rasterizer is decided. i.e. retain z in view space or transform
+        # to a different range.
+        eps = kwargs.get("eps", None)
+        pts_view = cameras.get_world_to_view_transform(**kwargs).transform_points(
+            pts_world, eps=eps
+        )
+        to_ndc_transform = cameras.get_ndc_camera_transform(**kwargs)
+        projection_transform = try_get_projection_transform(cameras, kwargs)
+        if projection_transform is not None:
+            projection_transform = projection_transform.compose(to_ndc_transform)
+            pts_ndc = projection_transform.transform_points(pts_view, eps=eps)
+        else:
+            # Call transform_points instead of explicitly composing transforms to handle
+            # the case, where camera class does not have a projection matrix form.
+            pts_proj = cameras.transform_points(pts_world, eps=eps)
+            pts_ndc = to_ndc_transform.transform_points(pts_proj, eps=eps)
+
+        pts_ndc[..., 2] = pts_view[..., 2]
+        point_clouds = point_clouds.update_padded(pts_ndc)
+        return point_clouds
+
+    def to(self, device):
+        # Manually move to device cameras as it is not a subclass of nn.Module
+        if self.cameras is not None:
+            self.cameras = self.cameras.to(device)
+        return self
+
+    def forward(self, point_clouds, **kwargs) -> PointFragments:
+        """
+        Args:
+            point_clouds: a set of point clouds with coordinates in world space.
+        Returns:
+            PointFragments: Rasterization outputs as a named tuple.
+        """
+        points_proj = self.transform(point_clouds, **kwargs)
+        raster_settings = kwargs.get("raster_settings", self.raster_settings)
+        idx, zbuf, dists2 = rasterize_points(
+            points_proj,
+            image_size=raster_settings.image_size,
+            radius=raster_settings.radius,
+            points_per_pixel=raster_settings.points_per_pixel,
+            bin_size=raster_settings.bin_size,
+            max_points_per_bin=raster_settings.max_points_per_bin,
+        )
+        return PointFragments(idx=idx, zbuf=zbuf, dists=dists2)
diff --git a/pytorch3d/pytorch3d/renderer/points/renderer.py b/pytorch3d/pytorch3d/renderer/points/renderer.py
new file mode 100644
index 0000000000000000000000000000000000000000..0a83ec40458cd4ed55fd42ad4a0ebefa7b064ab8
--- /dev/null
+++ b/pytorch3d/pytorch3d/renderer/points/renderer.py
@@ -0,0 +1,73 @@
+#!/usr/bin/env python3
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+import torch.nn as nn
+
+
+# A renderer class should be initialized with a
+# function for rasterization and a function for compositing.
+# The rasterizer should:
+#     - transform inputs from world -> screen space
+#     - rasterize inputs
+#     - return fragments
+# The compositor can take fragments as input along with any other properties of
+# the scene and generate images.
+
+# E.g. rasterize inputs and then shade
+#
+# fragments = self.rasterize(point_clouds)
+# images = self.compositor(fragments, point_clouds)
+# return images
+
+
+class PointsRenderer(nn.Module):
+    """
+    A class for rendering a batch of points. The class should
+    be initialized with a rasterizer and compositor class which each have a forward
+    function.
+
+    The points are rendered with with varying alpha (weights) values depending on
+    the distance of the pixel center to the true point in the xy plane. The purpose
+    of this is to soften the hard decision boundary, for differentiability.
+    See Section 3.2 of "SynSin: End-to-end View Synthesis from a Single Image"
+    (https://arxiv.org/pdf/1912.08804.pdf) for more details.
+    """
+
+    def __init__(self, rasterizer, compositor) -> None:
+        super().__init__()
+        self.rasterizer = rasterizer
+        self.compositor = compositor
+
+    def to(self, device):
+        # Manually move to device rasterizer as the cameras
+        # within the class are not of type nn.Module
+        self.rasterizer = self.rasterizer.to(device)
+        self.compositor = self.compositor.to(device)
+        return self
+
+    def forward(self, point_clouds, **kwargs) -> torch.Tensor:
+        fragments = self.rasterizer(point_clouds, **kwargs)
+
+        # Construct weights based on the distance of a point to the true point.
+        # However, this could be done differently: e.g. predicted as opposed
+        # to a function of the weights.
+        r = self.rasterizer.raster_settings.radius
+
+        dists2 = fragments.dists.permute(0, 3, 1, 2)
+        weights = 1 - dists2 / (r * r)
+        images = self.compositor(
+            fragments.idx.long().permute(0, 3, 1, 2),
+            weights,
+            point_clouds.features_packed().permute(1, 0),
+            **kwargs,
+        )
+
+        # permute so image comes at the end
+        images = images.permute(0, 2, 3, 1)
+
+        return images
diff --git a/pytorch3d/pytorch3d/renderer/splatter_blend.py b/pytorch3d/pytorch3d/renderer/splatter_blend.py
new file mode 100644
index 0000000000000000000000000000000000000000..0149dfb30f44dd716fd21be38051266c8bceb67d
--- /dev/null
+++ b/pytorch3d/pytorch3d/renderer/splatter_blend.py
@@ -0,0 +1,566 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# This file defines SplatterBlender, which is used for blending in SplatterPhongShader.
+
+import itertools
+from typing import Tuple
+
+import torch
+import torch.nn.functional as F
+from pytorch3d.common.datatypes import Device
+from pytorch3d.renderer import BlendParams
+from pytorch3d.renderer.cameras import FoVPerspectiveCameras
+
+from .blending import _get_background_color
+
+
+def _precompute(
+    input_shape: Tuple[int, int, int, int], device: Device
+) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+    """
+    Precompute padding and offset constants that won't change for a given NHWK shape.
+
+    Args:
+        input_shape: Tuple indicating N (batch size), H, W (image size) and K (number of
+            intersections) output by the rasterizer.
+        device: Device to store the tensors on.
+
+    returns:
+        crop_ids_h: An (N, H, W+2, K, 9, 5) tensor, used during splatting to offset the
+            p-pixels (splatting pixels) in one of the 9 splatting directions within a
+            call to torch.gather. See comments and offset_splats for details.
+        crop_ids_w: An (N, H, W, K, 9, 5) tensor, used similarly to crop_ids_h.
+        offsets: A (1, 1, 1, 1, 9, 2) tensor (shaped so for broadcasting) containing va-
+            lues [-1, -1], [-1, 0], [-1, 1], [0, -1], ..., [1, 1] which correspond to
+            the nine splatting directions.
+    """
+    N, H, W, K = input_shape
+
+    # (N, H, W+2, K, 9, 5) tensor, used to reduce a tensor from (N, H+2, W+2...) to
+    # (N, H, W+2, ...) in torch.gather. If only torch.gather broadcasted, we wouldn't
+    # need the tiling. But it doesn't.
+    crop_ids_h = (
+        torch.arange(0, H, device=device).view(1, H, 1, 1, 1, 1)
+        + torch.tensor([0, 1, 2, 0, 1, 2, 0, 1, 2], device=device).view(
+            1, 1, 1, 1, 9, 1
+        )
+    ).expand(N, H, W + 2, K, 9, 5)
+
+    # (N, H, W, K, 9, 5) tensor, used to reduce a tensor from (N, H, W+2, ...) to
+    # (N, H, W, ...) in torch.gather.
+    crop_ids_w = (
+        torch.arange(0, W, device=device).view(1, 1, W, 1, 1, 1)
+        + torch.tensor([0, 0, 0, 1, 1, 1, 2, 2, 2], device=device).view(
+            1, 1, 1, 1, 9, 1
+        )
+    ).expand(N, H, W, K, 9, 5)
+
+    offsets = torch.tensor(
+        list(itertools.product((-1, 0, 1), repeat=2)),
+        dtype=torch.long,
+        device=device,
+    )
+
+    return crop_ids_h, crop_ids_w, offsets
+
+
+def _prepare_pixels_and_colors(
+    pixel_coords_cameras: torch.Tensor,
+    colors: torch.Tensor,
+    cameras: FoVPerspectiveCameras,
+    background_mask: torch.Tensor,
+) -> Tuple[torch.Tensor, torch.Tensor]:
+    """
+    Project pixel coords into the un-inverted screen frame of reference, and set
+    background pixel z-values to 1.0 and alphas to 0.0.
+
+    Args:
+        pixel_coords_cameras: (N, H, W, K, 3) float tensor.
+        colors: (N, H, W, K, 3) float tensor.
+        cameras: PyTorch3D cameras, for now we assume FoVPerspectiveCameras.
+        background_mask: (N, H, W, K) boolean tensor.
+
+    Returns:
+        pixel_coords_screen: (N, H, W, K, 3) float tensor. Background pixels have
+            x=y=z=1.0.
+        colors: (N, H, W, K, 4). Alpha is set to 1 for foreground pixels and 0 for back-
+            ground pixels.
+    """
+
+    N, H, W, K, C = colors.shape
+    # pixel_coords_screen will contain invalid values at background
+    # intersections, and [H+0.5, W+0.5, z] at valid intersections. It is important
+    # to not flip the xy axes, otherwise the gradients will be inverted when the
+    # splatter works with a detached rasterizer.
+    pixel_coords_screen = cameras.transform_points_screen(
+        pixel_coords_cameras.view([N, -1, 3]), image_size=(H, W), with_xyflip=False
+    ).reshape(pixel_coords_cameras.shape)
+
+    # Set colors' alpha to 1 and background to 0.
+    colors = torch.cat(
+        [colors, torch.ones_like(colors[..., :1])], dim=-1
+    )  # (N, H, W, K, 4)
+
+    # The hw values of background don't matter because their alpha is set
+    # to 0 in the next step (which means that no matter what their splatting kernel
+    # value is, they will not splat as the kernel is multiplied by alpha). However,
+    # their z-values need to be at max depth.  Otherwise, we could incorrectly compute
+    # occlusion layer linkage.
+    pixel_coords_screen[background_mask] = 1.0
+
+    # Any background color value value with alpha=0 will do, as anything with
+    # alpha=0 will have a zero-weight splatting power. Note that neighbors can still
+    # splat on zero-alpha pixels: that's the way we get non-zero gradients at the
+    # boundary with the background.
+    colors[background_mask] = 0.0
+
+    return pixel_coords_screen, colors
+
+
+def _get_splat_kernel_normalization(
+    offsets: torch.Tensor,
+    sigma: float = 0.5,
+):
+    if sigma <= 0.0:
+        raise ValueError("Only positive standard deviations make sense.")
+
+    epsilon = 0.05
+    normalization_constant = torch.exp(
+        # pyre-fixme[58]: `**` is not supported for operand types `Tensor` and `int`.
+        -(offsets**2).sum(dim=1)
+        / (2 * sigma**2)
+    ).sum()
+
+    # We add an epsilon to the normalization constant to ensure the gradient will travel
+    # through non-boundary pixels' normalization factor, see Sec. 3.3.1 in "Differentia-
+    # ble Surface Rendering via Non-Differentiable Sampling", Cole et al.
+    # pyre-fixme[58]: `/` is not supported for operand types `float` and `Tensor`.
+    return (1 + epsilon) / normalization_constant
+
+
+def _compute_occlusion_layers(
+    q_depth: torch.Tensor,
+) -> torch.Tensor:
+    """
+    For each splatting pixel, decide whether it splats from a background, surface, or
+    foreground depth relative to the splatted pixel. See unit tests in
+    test_splatter_blend for some enlightening examples.
+
+    Args:
+        q_depth: (N, H, W, K) tensor of z-values of the splatted pixels.
+
+    Returns:
+        occlusion_layers: (N, H, W, 9) long tensor. Each of the 9 values corresponds to
+            one of the nine splatting directions ([-1, -1], [-1, 0], ..., [1,
+            1]). The value at nhwd (where d is the splatting direction) is 0 if
+            the splat in direction d is on the same surface level as the pixel at
+            hw. The value is negative if the splat is in the background (occluded
+            by another splat above it that is at the same surface level as the
+            pixel splatted on), and the value is positive if the splat is in the
+            foreground.
+    """
+    N, H, W, K = q_depth.shape
+
+    # q are the "center pixels" and p the pixels splatting onto them. Use `unfold` to
+    # create `p_depth`, a tensor with 9 layers, each of which corresponds to the
+    # depth of a neighbor of q in one of the 9 directions. For example, p_depth[nk0hw]
+    # is the depth of the pixel splatting onto pixel nhwk from the [-1, -1] direction,
+    # and p_depth[nk4hw] the depth of q (self-splatting onto itself).
+    # More concretely, imagine the pixel depths in a 2x2 image's k-th layer are
+    #   .1 .2
+    #   .3 .4
+    # Then (remembering that we pad with zeros when a pixel has fewer than 9 neighbors):
+    #
+    # p_depth[n, k, :, 0, 0] = [ 0  0  0  0 .1 .2  0 .3 .4] - neighbors of .1
+    # p_depth[n, k, :, 0, 1] = [ 0  0  0 .1 .2  0 .3 .4  0] - neighbors of .2
+    # p_depth[n, k, :, 1, 0] = [ 0 .1 .2  0 .3 .4  0  0  0] - neighbors of .3
+    # p_depth[n, k, :, 0, 1] = [.1 .2  0 .3 .4  0  0  0  0] - neighbors of .4
+    q_depth = q_depth.permute(0, 3, 1, 2)  # (N, K, H, W)
+    p_depth = F.unfold(q_depth, kernel_size=3, padding=1)  # (N, 3^2 * K, H * W)
+    q_depth = q_depth.view(N, K, 1, H, W)
+    p_depth = p_depth.view(N, K, 9, H, W)
+
+    # Take the center pixel q's top rasterization layer. This is the "surface layer"
+    # that we're splatting on. For each of the nine splatting directions p, find which
+    # of the K splatting rasterization layers is closest in depth to the surface
+    # splatted layer.
+    qtop_to_p_zdist = torch.abs(p_depth - q_depth[:, 0:1])  # (N, K, 9, H, W)
+    qtop_to_p_closest_zdist, qtop_to_p_closest_id = qtop_to_p_zdist.min(dim=1)
+
+    # For each of the nine splatting directions p, take the top of the K rasterization
+    # layers. Check which of the K q-layers (that the given direction is splatting on)
+    # is closest in depth to the top splatting layer.
+    ptop_to_q_zdist = torch.abs(p_depth[:, 0:1] - q_depth)  # (N, K, 9, H, W)
+    ptop_to_q_closest_zdist, ptop_to_q_closest_id = ptop_to_q_zdist.min(dim=1)
+
+    # Decide whether each p is on the same level, below, or above the q it is splatting
+    # on. See Fig. 4 in [0] for an illustration. Briefly: say we're interested in pixel
+    # p_{h, w} = [10, 32] splatting onto its neighbor q_{h, w} = [11, 33]. The splat is
+    # coming from direction [-1, -1], which has index 0 in our enumeration of splatting
+    # directions. Hence, we are interested in
+    #
+    # P = p_depth[n, :, d=0, 11, 33] - a vector of K depth values, and
+    # Q = q_depth.squeeze()[n, :, 11, 33] - a vector of K depth values.
+    #
+    # If Q[0] is closest, say, to P[2], then we assume the 0th surface layer of Q is
+    # the same surface as P[2] that's splatting onto it, and P[:2] are foreground splats
+    # and P[3:] are background splats.
+    #
+    # If instead say Q[2] is closest to P[0], then all the splats are background splats,
+    # because the top splatting layer is the same surface as a non-top splatted layer.
+    #
+    # Finally, if Q[0] is closest to P[0], then the top-level P is splatting onto top-
+    # level Q, and P[1:] are all background splats.
+    occlusion_offsets = torch.where(  # noqa
+        ptop_to_q_closest_zdist < qtop_to_p_closest_zdist,
+        -ptop_to_q_closest_id,
+        qtop_to_p_closest_id,
+    )  # (N, 9, H, W)
+
+    occlusion_layers = occlusion_offsets.permute((0, 2, 3, 1))  # (N, H, W, 9)
+    return occlusion_layers
+
+
+def _compute_splatting_colors_and_weights(
+    pixel_coords_screen: torch.Tensor,
+    colors: torch.Tensor,
+    sigma: float,
+    offsets: torch.Tensor,
+) -> torch.Tensor:
+    """
+    For each center pixel q, compute the splatting weights of its surrounding nine spla-
+    tting pixels p, as well as their splatting colors (which are just their colors re-
+    weighted by the splatting weights).
+
+    Args:
+        pixel_coords_screen: (N, H, W, K, 2) tensor of pixel screen coords.
+        colors: (N, H, W, K, 4) RGBA tensor of pixel colors.
+        sigma: splatting kernel variance.
+        offsets: (9, 2) tensor computed by _precompute, indicating the nine
+            splatting directions ([-1, -1], ..., [1, 1]).
+
+    Returns:
+        splat_colors_and_weights: (N, H, W, K, 9, 5) tensor.
+            splat_colors_and_weights[..., :4] corresponds to the splatting colors, and
+            splat_colors_and_weights[..., 4:5] to the splatting weights. The "9" di-
+            mension corresponds to the nine splatting directions.
+    """
+    N, H, W, K, C = colors.shape
+    splat_kernel_normalization = _get_splat_kernel_normalization(offsets, sigma)
+
+    # Distance from each barycentric-interpolated triangle vertices' triplet from its
+    # "ideal" pixel-center location. pixel_coords_screen are in screen coordinates, and
+    # should be at the "ideal" locations on the forward pass -- e.g.
+    # pixel_coords_screen[n, 24, 31, k] = [24.5, 31.5]. For this reason, q_to_px_center
+    # should equal torch.zeros during the forward pass. On the backwards pass, these
+    # coordinates will be adjusted and non-zero, allowing the gradients to flow back
+    # to the mesh vertex coordinates.
+    q_to_px_center = (
+        torch.floor(pixel_coords_screen[..., :2]) - pixel_coords_screen[..., :2] + 0.5
+    ).view((N, H, W, K, 1, 2))
+
+    # pyre-fixme[58]: `**` is not supported for operand types `Tensor` and `int`.
+    dist2_p_q = torch.sum((q_to_px_center + offsets) ** 2, dim=5)  # (N, H, W, K, 9)
+    splat_weights = torch.exp(-dist2_p_q / (2 * sigma**2))
+    alpha = colors[..., 3:4]
+    splat_weights = (alpha * splat_kernel_normalization * splat_weights).unsqueeze(
+        5
+    )  # (N, H, W, K, 9, 1)
+
+    # splat_colors[n, h, w, direction, :] contains the splatting color (weighted by the
+    # splatting weight) that pixel h, w will splat in one  of the nine possible
+    # directions (e.g. nhw0 corresponds to splatting in [-1, 1] direciton, nhw4 is
+    # self-splatting).
+    splat_colors = splat_weights * colors.unsqueeze(4)  # (N, H, W, K, 9, 4)
+
+    return torch.cat([splat_colors, splat_weights], dim=5)
+
+
+def _offset_splats(
+    splat_colors_and_weights: torch.Tensor,
+    crop_ids_h: torch.Tensor,
+    crop_ids_w: torch.Tensor,
+) -> torch.Tensor:
+    """
+    Pad splatting colors and weights so that tensor locations/coordinates are aligned
+    with the splatting directions. For example, say we have an example input Red channel
+    splat_colors_and_weights[n, :, :, k, direction=0, channel=0] equal to
+       .1  .2  .3
+       .4  .5  .6
+       .7  .8  .9
+    the (h, w) entry indicates that pixel n, h, w, k splats the given color in direction
+    equal to 0, which corresponds to offsets[0] = (-1, -1). Note that this is the x-y
+    direction, not h-w. This function pads and crops this array to
+        0   0   0
+       .2  .3   0
+       .5  .6   0
+    which indicates, for example, that:
+        * There is no pixel splatting in direction (-1, -1) whose splat lands on pixel
+          h=w=0.
+        * There is a pixel splatting in direction (-1, -1) whose splat lands on the pi-
+          xel h=1, w=0, and that pixel's splatting color is .2.
+        * There is a pixel splatting in direction (-1, -1) whose splat lands on the pi-
+          xel h=2, w=1, and that pixel's splatting color is .6.
+
+    Args:
+        *splat_colors_and_weights*: (N, H, W, K, 9, 5) tensor of colors and weights,
+        where dim=-2 corresponds to the splatting directions/offsets.
+        *crop_ids_h*: (N, H, W+2, K, 9, 5) precomputed tensor used for padding within
+            torch.gather. See _precompute for more info.
+        *crop_ids_w*: (N, H, W, K, 9, 5) precomputed tensor used for padding within
+            torch.gather. See _precompute for more info.
+
+
+    Returns:
+        *splat_colors_and_weights*: (N, H, W, K, 9, 5) tensor.
+    """
+    N, H, W, K, _, _ = splat_colors_and_weights.shape
+    # Transform splat_colors such that each of the 9 layers (corresponding to
+    # the 9 splat offsets) is padded with 1 and shifted in the appropriate
+    # direction. E.g. splat_colors[n, :, :, 0] corresponds to the (-1, -1)
+    # offset, so will be padded with one rows of 1 on the right and have a
+    # single row clipped at the bottom, and splat_colors[n, :, :, 4] corrsponds
+    # to offset (0, 0) and will remain unchanged.
+    splat_colors_and_weights = F.pad(
+        splat_colors_and_weights, [0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0]
+    )  # N, H+2, W+2, 9, 5
+
+    # (N, H+2, W+2, K, 9, 5) -> (N, H, W+2, K, 9, 5)
+    splat_colors_and_weights = torch.gather(
+        splat_colors_and_weights, dim=1, index=crop_ids_h
+    )
+
+    # (N, H, W+2, K, 9, 5) -> (N, H, W, K, 9, 5)
+    splat_colors_and_weights = torch.gather(
+        splat_colors_and_weights, dim=2, index=crop_ids_w
+    )
+
+    return splat_colors_and_weights
+
+
+def _compute_splatted_colors_and_weights(
+    occlusion_layers: torch.Tensor,  # (N, H, W, 9)
+    splat_colors_and_weights: torch.Tensor,  # (N, H, W, K, 9, 5)
+) -> Tuple[torch.Tensor, torch.Tensor]:
+    """
+    Accumulate splatted colors in background, surface and foreground occlusion buffers.
+
+    Args:
+        occlusion_layers: (N, H, W, 9) tensor. See _compute_occlusion_layers.
+        splat_colors_and_weights: (N, H, W, K, 9, 5) tensor. See _offset_splats.
+
+    Returns:
+        splatted_colors: (N, H, W, 4, 3) tensor. Last dimension corresponds to back-
+            ground, surface, and foreground splat colors.
+        splatted_weights: (N, H, W, 1, 3) tensor. Last dimension corresponds to back-
+            ground, surface, and foreground splat weights and is used for normalization.
+
+    """
+    N, H, W, K, _, _ = splat_colors_and_weights.shape
+
+    # Create an occlusion mask, with the last dimension of length 3, corresponding to
+    # background/surface/foreground splatting. E.g. occlusion_layer_mask[n,h,w,k,d,0] is
+    # 1 if the pixel at hw is splatted from direction d such that the splatting pixel p
+    # is below the splatted pixel q (in the background); otherwise, the value is 0.
+    # occlusion_layer_mask[n,h,w,k,d,1] is 1 if the splatting pixel is at the same
+    # surface level as the splatted pixel q, and occlusion_layer_mask[n,h,w,k,d,2] is
+    # 1 only if the splatting pixel is in the foreground.
+    layer_ids = torch.arange(K, device=splat_colors_and_weights.device).view(
+        1, 1, 1, K, 1
+    )
+    occlusion_layers = occlusion_layers.view(N, H, W, 1, 9)
+    occlusion_layer_mask = torch.stack(
+        [
+            occlusion_layers > layer_ids,  # (N, H, W, K, 9)
+            occlusion_layers == layer_ids,  # (N, H, W, K, 9)
+            occlusion_layers < layer_ids,  # (N, H, W, K, 9)
+        ],
+        dim=5,
+    ).float()  # (N, H, W, K, 9, 3)
+
+    # (N * H * W, 5, 9 * K) x (N * H * W, 9 * K, 3) -> (N * H * W, 5, 3)
+    splatted_colors_and_weights = torch.bmm(
+        splat_colors_and_weights.permute(0, 1, 2, 5, 3, 4).reshape(
+            (N * H * W, 5, K * 9)
+        ),
+        occlusion_layer_mask.reshape((N * H * W, K * 9, 3)),
+    ).reshape((N, H, W, 5, 3))
+
+    return (
+        splatted_colors_and_weights[..., :4, :],
+        splatted_colors_and_weights[..., 4:5, :],
+    )
+
+
+def _normalize_and_compose_all_layers(
+    background_color: torch.Tensor,
+    splatted_colors_per_occlusion_layer: torch.Tensor,
+    splatted_weights_per_occlusion_layer: torch.Tensor,
+) -> torch.Tensor:
+    """
+    Normalize each bg/surface/fg buffer by its weight, and compose.
+
+    Args:
+        background_color: (3) RGB tensor.
+        splatter_colors_per_occlusion_layer: (N, H, W, 4, 3) RGBA tensor, last dimension
+            corresponds to foreground, surface, and background splatting.
+        splatted_weights_per_occlusion_layer: (N, H, W, 1, 3) weight tensor.
+
+    Returns:
+        output_colors: (N, H, W, 4) RGBA tensor.
+    """
+    device = splatted_colors_per_occlusion_layer.device
+
+    # Normalize each of bg/surface/fg splat layers separately.
+    normalization_scales = 1.0 / (
+        # pyre-fixme[58]: `/` is not supported for operand types `float` and `Tensor`.
+        torch.maximum(
+            splatted_weights_per_occlusion_layer,
+            torch.tensor([1.0], device=device),
+        )
+    )  # (N, H, W, 1, 3)
+
+    normalized_splatted_colors = (
+        splatted_colors_per_occlusion_layer * normalization_scales
+    )  # (N, H, W, 4, 3)
+
+    # Use alpha-compositing to compose the splat layers.
+    output_colors = torch.cat(
+        [background_color, torch.tensor([0.0], device=device)]
+    )  # (4), will broadcast to (N, H, W, 4) below.
+
+    for occlusion_layer_id in (-1, -2, -3):
+        # Over-compose the bg, surface, and fg occlusion layers. Note that we already
+        # multiplied each pixel's RGBA by its own alpha as part of self-splatting in
+        # _compute_splatting_colors_and_weights, so we don't re-multiply by alpha here.
+        alpha = normalized_splatted_colors[..., 3:4, occlusion_layer_id]  # (N, H, W, 1)
+        output_colors = (
+            normalized_splatted_colors[..., occlusion_layer_id]
+            + (1.0 - alpha) * output_colors
+        )
+    return output_colors
+
+
+class SplatterBlender(torch.nn.Module):
+    def __init__(
+        self,
+        input_shape: Tuple[int, int, int, int],
+        device,
+    ):
+        """
+        A splatting blender. See `forward` docs for details of the splatting mechanism.
+
+        Args:
+            input_shape: Tuple (N, H, W, K) indicating the batch size, image height,
+                image width, and number of rasterized layers. Used to precompute
+                constant tensors that do not change as long as this tuple is unchanged.
+        """
+        super().__init__()
+        self.crop_ids_h, self.crop_ids_w, self.offsets = _precompute(
+            input_shape, device
+        )
+
+    def to(self, device):
+        self.offsets = self.offsets.to(device)
+        self.crop_ids_h = self.crop_ids_h.to(device)
+        self.crop_ids_w = self.crop_ids_w.to(device)
+        super().to(device)
+
+    def forward(
+        self,
+        colors: torch.Tensor,
+        pixel_coords_cameras: torch.Tensor,
+        cameras: FoVPerspectiveCameras,
+        background_mask: torch.Tensor,
+        blend_params: BlendParams,
+    ) -> torch.Tensor:
+        """
+        RGB blending using splatting, as proposed in [0].
+
+        Args:
+            colors: (N, H, W, K, 3) tensor of RGB colors at each h, w pixel location for
+                K intersection layers.
+            pixel_coords_cameras: (N, H, W, K, 3) tensor of pixel coordinates in the
+                camera frame of reference. It is *crucial* that these are computed by
+                interpolating triangle vertex positions using barycentric coordinates --
+                this allows gradients to travel through pixel_coords_camera back to the
+                vertex positions.
+            cameras: Cameras object used to project pixel_coords_cameras screen coords.
+            background_mask: (N, H, W, K, 3) boolean tensor, True for bg pixels. A pixel
+                is considered "background" if no mesh triangle projects to it. This is
+                typically computed by the rasterizer.
+            blend_params: BlendParams, from which we use sigma (splatting kernel
+                variance) and background_color.
+
+        Returns:
+            output_colors: (N, H, W, 4) tensor of RGBA values. The alpha layer is set to
+                fully transparent in the background.
+
+        [0] Cole, F. et al., "Differentiable Surface Rendering via Non-differentiable
+            Sampling".
+        """
+
+        # Our implementation has 6 stages. In the description below, we will call each
+        # pixel q and the 9 surrounding splatting pixels (including itself) p.
+        #     1. Use barycentrics to compute the position of each pixel in screen
+        # coordinates. These should exactly correspond to pixel centers during the
+        # forward pass, but can be shifted on backwards. This step allows gradients to
+        # travel to vertex coordinates, even if the rasterizer is non-differentiable.
+        #     2a. For each center pixel q, take each splatting p and decide whether it
+        # is on the same surface level as q, or in the background or foreground.
+        #     2b. For each center pixel q, compute the splatting weight of surrounding
+        # pixels p, and their splatting colors (which are just the original colors
+        # weighted by the splatting weights).
+        #     3. As a vectorization technicality, offset the tensors corresponding to
+        # the splatting p values in the nine directions, by padding each of nine
+        # splatting layers on the bottom/top, left/right.
+        #     4. Do the actual splatting, by accumulating the splatting colors of the
+        # surrounding p's for each pixel q. The weights get accumulated separately for
+        # p's that got assigned to the background/surface/foreground in Step 2a.
+        #     5. Normalize each the splatted bg/surface/fg colors for each q, and
+        # compose the resulting color maps.
+        #
+        # Note that it is crucial that in Step 1 we compute the pixel coordinates by in-
+        # terpolating triangle vertices using barycentric coords from the rasterizer. In
+        # our case, these pixel_coords_camera are computed by the shader and passed to
+        # this function to avoid re-computation.
+
+        pixel_coords_screen, colors = _prepare_pixels_and_colors(
+            pixel_coords_cameras, colors, cameras, background_mask
+        )  # (N, H, W, K, 3) and (N, H, W, K, 4)
+
+        occlusion_layers = _compute_occlusion_layers(
+            pixel_coords_screen[..., 2:3].squeeze(dim=-1)
+        )  # (N, H, W, 9)
+
+        splat_colors_and_weights = _compute_splatting_colors_and_weights(
+            pixel_coords_screen,
+            colors,
+            blend_params.sigma,
+            self.offsets,
+        )  # (N, H, W, K, 9, 5)
+
+        splat_colors_and_weights = _offset_splats(
+            splat_colors_and_weights,
+            self.crop_ids_h,
+            self.crop_ids_w,
+        )  # (N, H, W, K, 9, 5)
+
+        (
+            splatted_colors_per_occlusion_layer,
+            splatted_weights_per_occlusion_layer,
+        ) = _compute_splatted_colors_and_weights(
+            occlusion_layers, splat_colors_and_weights
+        )  # (N, H, W, 4, 3) and (N, H, W, 1, 3)
+
+        output_colors = _normalize_and_compose_all_layers(
+            _get_background_color(blend_params, colors.device),
+            splatted_colors_per_occlusion_layer,
+            splatted_weights_per_occlusion_layer,
+        )  # (N, H, W, 4)
+
+        return output_colors
diff --git a/pytorch3d/pytorch3d/renderer/utils.py b/pytorch3d/pytorch3d/renderer/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..e2c37871c1fad0c530df96bdf4b779f647f9ce8e
--- /dev/null
+++ b/pytorch3d/pytorch3d/renderer/utils.py
@@ -0,0 +1,458 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import copy
+import inspect
+import warnings
+from typing import Any, List, Optional, Tuple, TypeVar, Union
+
+import numpy as np
+import torch
+import torch.nn as nn
+
+from ..common.datatypes import Device, make_device
+
+
+class TensorAccessor(nn.Module):
+    """
+    A helper class to be used with the __getitem__ method. This can be used for
+    getting/setting the values for an attribute of a class at one particular
+    index.  This is useful when the attributes of a class are batched tensors
+    and one element in the batch needs to be modified.
+    """
+
+    def __init__(self, class_object, index: Union[int, slice]) -> None:
+        """
+        Args:
+            class_object: this should be an instance of a class which has
+                attributes which are tensors representing a batch of
+                values.
+            index: int/slice, an index indicating the position in the batch.
+                In __setattr__ and __getattr__ only the value of class
+                attributes at this index will be accessed.
+        """
+        self.__dict__["class_object"] = class_object
+        self.__dict__["index"] = index
+
+    def __setattr__(self, name: str, value: Any):
+        """
+        Update the attribute given by `name` to the value given by `value`
+        at the index specified by `self.index`.
+
+        Args:
+            name: str, name of the attribute.
+            value: value to set the attribute to.
+        """
+        v = getattr(self.class_object, name)
+        if not torch.is_tensor(v):
+            msg = "Can only set values on attributes which are tensors; got %r"
+            raise AttributeError(msg % type(v))
+
+        # Convert the attribute to a tensor if it is not a tensor.
+        if not torch.is_tensor(value):
+            value = torch.tensor(
+                value, device=v.device, dtype=v.dtype, requires_grad=v.requires_grad
+            )
+
+        # Check the shapes match the existing shape and the shape of the index.
+        if v.dim() > 1 and value.dim() > 1 and value.shape[1:] != v.shape[1:]:
+            msg = "Expected value to have shape %r; got %r"
+            raise ValueError(msg % (v.shape, value.shape))
+        if (
+            v.dim() == 0
+            and isinstance(self.index, slice)
+            and len(value) != len(self.index)
+        ):
+            msg = "Expected value to have len %r; got %r"
+            raise ValueError(msg % (len(self.index), len(value)))
+        self.class_object.__dict__[name][self.index] = value
+
+    def __getattr__(self, name: str):
+        """
+        Return the value of the attribute given by "name" on self.class_object
+        at the index specified in self.index.
+
+        Args:
+            name: string of the attribute name
+        """
+        if hasattr(self.class_object, name):
+            return self.class_object.__dict__[name][self.index]
+        else:
+            msg = "Attribute %s not found on %r"
+            return AttributeError(msg % (name, self.class_object.__name__))
+
+
+BROADCAST_TYPES = (float, int, list, tuple, torch.Tensor, np.ndarray)
+
+
+class TensorProperties(nn.Module):
+    """
+    A mix-in class for storing tensors as properties with helper methods.
+    """
+
+    def __init__(
+        self,
+        dtype: torch.dtype = torch.float32,
+        device: Device = "cpu",
+        **kwargs,
+    ) -> None:
+        """
+        Args:
+            dtype: data type to set for the inputs
+            device: Device (as str or torch.device)
+            kwargs: any number of keyword arguments. Any arguments which are
+                of type (float/int/list/tuple/tensor/array) are broadcasted and
+                other keyword arguments are set as attributes.
+        """
+        super().__init__()
+        self.device = make_device(device)
+        self._N = 0
+        if kwargs is not None:
+
+            # broadcast all inputs which are float/int/list/tuple/tensor/array
+            # set as attributes anything else e.g. strings, bools
+            args_to_broadcast = {}
+            for k, v in kwargs.items():
+                if v is None or isinstance(v, (str, bool)):
+                    setattr(self, k, v)
+                elif isinstance(v, BROADCAST_TYPES):
+                    args_to_broadcast[k] = v
+                else:
+                    msg = "Arg %s with type %r is not broadcastable"
+                    warnings.warn(msg % (k, type(v)))
+
+            names = args_to_broadcast.keys()
+            # convert from type dict.values to tuple
+            values = tuple(v for v in args_to_broadcast.values())
+
+            if len(values) > 0:
+                broadcasted_values = convert_to_tensors_and_broadcast(
+                    *values, device=device
+                )
+
+                # Set broadcasted values as attributes on self.
+                for i, n in enumerate(names):
+                    setattr(self, n, broadcasted_values[i])
+                    if self._N == 0:
+                        self._N = broadcasted_values[i].shape[0]
+
+    def __len__(self) -> int:
+        return self._N
+
+    def isempty(self) -> bool:
+        return self._N == 0
+
+    def __getitem__(self, index: Union[int, slice]) -> TensorAccessor:
+        """
+
+        Args:
+            index: an int or slice used to index all the fields.
+
+        Returns:
+            if `index` is an index int/slice return a TensorAccessor class
+            with getattribute/setattribute methods which return/update the value
+            at the index in the original class.
+        """
+        if isinstance(index, (int, slice)):
+            return TensorAccessor(class_object=self, index=index)
+
+        msg = "Expected index of type int or slice; got %r"
+        raise ValueError(msg % type(index))
+
+    # pyre-fixme[14]: `to` overrides method defined in `Module` inconsistently.
+    def to(self, device: Device = "cpu") -> "TensorProperties":
+        """
+        In place operation to move class properties which are tensors to a
+        specified device. If self has a property "device", update this as well.
+        """
+        device_ = make_device(device)
+        for k in dir(self):
+            v = getattr(self, k)
+            if k == "device":
+                setattr(self, k, device_)
+            if torch.is_tensor(v) and v.device != device_:
+                setattr(self, k, v.to(device_))
+        return self
+
+    def cpu(self) -> "TensorProperties":
+        return self.to("cpu")
+
+    # pyre-fixme[14]: `cuda` overrides method defined in `Module` inconsistently.
+    def cuda(self, device: Optional[int] = None) -> "TensorProperties":
+        return self.to(f"cuda:{device}" if device is not None else "cuda")
+
+    def clone(self, other) -> "TensorProperties":
+        """
+        Update the tensor properties of other with the cloned properties of self.
+        """
+        for k in dir(self):
+            v = getattr(self, k)
+            if inspect.ismethod(v) or k.startswith("__") or type(v) is TypeVar:
+                continue
+            if torch.is_tensor(v):
+                v_clone = v.clone()
+            else:
+                v_clone = copy.deepcopy(v)
+            setattr(other, k, v_clone)
+        return other
+
+    def gather_props(self, batch_idx) -> "TensorProperties":
+        """
+        This is an in place operation to reformat all tensor class attributes
+        based on a set of given indices using torch.gather. This is useful when
+        attributes which are batched tensors e.g. shape (N, 3) need to be
+        multiplied with another tensor which has a different first dimension
+        e.g. packed vertices of shape (V, 3).
+
+        Example
+
+        .. code-block:: python
+
+            self.specular_color = (N, 3) tensor of specular colors for each mesh
+
+        A lighting calculation may use
+
+        .. code-block:: python
+
+            verts_packed = meshes.verts_packed()  # (V, 3)
+
+        To multiply these two tensors the batch dimension needs to be the same.
+        To achieve this we can do
+
+        .. code-block:: python
+
+            batch_idx = meshes.verts_packed_to_mesh_idx()  # (V)
+
+        This gives index of the mesh for each vertex in verts_packed.
+
+        .. code-block:: python
+
+            self.gather_props(batch_idx)
+            self.specular_color = (V, 3) tensor with the specular color for
+                                     each packed vertex.
+
+        torch.gather requires the index tensor to have the same shape as the
+        input tensor so this method takes care of the reshaping of the index
+        tensor to use with class attributes with arbitrary dimensions.
+
+        Args:
+            batch_idx: shape (B, ...) where `...` represents an arbitrary
+                number of dimensions
+
+        Returns:
+            self with all properties reshaped. e.g. a property with shape (N, 3)
+            is transformed to shape (B, 3).
+        """
+        # Iterate through the attributes of the class which are tensors.
+        for k in dir(self):
+            v = getattr(self, k)
+            if torch.is_tensor(v):
+                if v.shape[0] > 1:
+                    # There are different values for each batch element
+                    # so gather these using the batch_idx.
+                    # First clone the input batch_idx tensor before
+                    # modifying it.
+                    _batch_idx = batch_idx.clone()
+                    idx_dims = _batch_idx.shape
+                    tensor_dims = v.shape
+                    if len(idx_dims) > len(tensor_dims):
+                        msg = "batch_idx cannot have more dimensions than %s. "
+                        msg += "got shape %r and %s has shape %r"
+                        raise ValueError(msg % (k, idx_dims, k, tensor_dims))
+                    if idx_dims != tensor_dims:
+                        # To use torch.gather the index tensor (_batch_idx) has
+                        # to have the same shape as the input tensor.
+                        new_dims = len(tensor_dims) - len(idx_dims)
+                        new_shape = idx_dims + (1,) * new_dims
+                        expand_dims = (-1,) + tensor_dims[1:]
+                        _batch_idx = _batch_idx.view(*new_shape)
+                        _batch_idx = _batch_idx.expand(*expand_dims)
+
+                    v = v.gather(0, _batch_idx)
+                    setattr(self, k, v)
+        return self
+
+
+def format_tensor(
+    input,
+    dtype: torch.dtype = torch.float32,
+    device: Device = "cpu",
+) -> torch.Tensor:
+    """
+    Helper function for converting a scalar value to a tensor.
+
+    Args:
+        input: Python scalar, Python list/tuple, torch scalar, 1D torch tensor
+        dtype: data type for the input
+        device: Device (as str or torch.device) on which the tensor should be placed.
+
+    Returns:
+        input_vec: torch tensor with optional added batch dimension.
+    """
+    device_ = make_device(device)
+    if not torch.is_tensor(input):
+        input = torch.tensor(input, dtype=dtype, device=device_)
+
+    if input.dim() == 0:
+        input = input.view(1)
+
+    if input.device == device_:
+        return input
+
+    input = input.to(device=device)
+    return input
+
+
+def convert_to_tensors_and_broadcast(
+    *args,
+    dtype: torch.dtype = torch.float32,
+    device: Device = "cpu",
+):
+    """
+    Helper function to handle parsing an arbitrary number of inputs (*args)
+    which all need to have the same batch dimension.
+    The output is a list of tensors.
+
+    Args:
+        *args: an arbitrary number of inputs
+            Each of the values in `args` can be one of the following
+                - Python scalar
+                - Torch scalar
+                - Torch tensor of shape (N, K_i) or (1, K_i) where K_i are
+                  an arbitrary number of dimensions which can vary for each
+                  value in args. In this case each input is broadcast to a
+                  tensor of shape (N, K_i)
+        dtype: data type to use when creating new tensors.
+        device: torch device on which the tensors should be placed.
+
+    Output:
+        args: A list of tensors of shape (N, K_i)
+    """
+    # Convert all inputs to tensors with a batch dimension
+    args_1d = [format_tensor(c, dtype, device) for c in args]
+
+    # Find broadcast size
+    sizes = [c.shape[0] for c in args_1d]
+    N = max(sizes)
+
+    args_Nd = []
+    for c in args_1d:
+        if c.shape[0] != 1 and c.shape[0] != N:
+            msg = "Got non-broadcastable sizes %r" % sizes
+            raise ValueError(msg)
+
+        # Expand broadcast dim and keep non broadcast dims the same size
+        expand_sizes = (N,) + (-1,) * len(c.shape[1:])
+        args_Nd.append(c.expand(*expand_sizes))
+
+    return args_Nd
+
+
+def ndc_grid_sample(
+    input: torch.Tensor,
+    grid_ndc: torch.Tensor,
+    *,
+    align_corners: bool = False,
+    **grid_sample_kwargs,
+) -> torch.Tensor:
+    """
+    Samples a tensor `input` of shape `(B, dim, H, W)` at 2D locations
+    specified by a tensor `grid_ndc` of shape `(B, ..., 2)` using
+    the `torch.nn.functional.grid_sample` function.
+    `grid_ndc` is specified in PyTorch3D NDC coordinate frame.
+
+    Args:
+        input: The tensor of shape `(B, dim, H, W)` to be sampled.
+        grid_ndc: A tensor of shape `(B, ..., 2)` denoting the set of
+            2D locations at which `input` is sampled.
+            See [1] for a detailed description of the NDC coordinates.
+        align_corners: Forwarded to the `torch.nn.functional.grid_sample`
+            call. See its docstring.
+        grid_sample_kwargs: Additional arguments forwarded to the
+            `torch.nn.functional.grid_sample` call. See the corresponding
+            docstring for a listing of the corresponding arguments.
+
+    Returns:
+        sampled_input: A tensor of shape `(B, dim, ...)` containing the samples
+            of `input` at 2D locations `grid_ndc`.
+
+    References:
+        [1] https://pytorch3d.org/docs/cameras
+    """
+
+    batch, *spatial_size, pt_dim = grid_ndc.shape
+    if batch != input.shape[0]:
+        raise ValueError("'input' and 'grid_ndc' have to have the same batch size.")
+    if input.ndim != 4:
+        raise ValueError("'input' has to be a 4-dimensional Tensor.")
+    if pt_dim != 2:
+        raise ValueError("The last dimension of 'grid_ndc' has to be == 2.")
+
+    grid_ndc_flat = grid_ndc.reshape(batch, -1, 1, 2)
+
+    # pyre-fixme[6]: For 2nd param expected `Tuple[int, int]` but got `Size`.
+    grid_flat = ndc_to_grid_sample_coords(grid_ndc_flat, input.shape[2:])
+
+    sampled_input_flat = torch.nn.functional.grid_sample(
+        input, grid_flat, align_corners=align_corners, **grid_sample_kwargs
+    )
+
+    sampled_input = sampled_input_flat.reshape([batch, input.shape[1], *spatial_size])
+
+    return sampled_input
+
+
+def ndc_to_grid_sample_coords(
+    xy_ndc: torch.Tensor,
+    image_size_hw: Tuple[int, int],
+) -> torch.Tensor:
+    """
+    Convert from the PyTorch3D's NDC coordinates to
+    `torch.nn.functional.grid_sampler`'s coordinates.
+
+    Args:
+        xy_ndc: Tensor of shape `(..., 2)` containing 2D points in the
+            PyTorch3D's NDC coordinates.
+        image_size_hw: A tuple `(image_height, image_width)` denoting the
+            height and width of the image tensor to sample.
+    Returns:
+        xy_grid_sample: Tensor of shape `(..., 2)` containing 2D points in the
+            `torch.nn.functional.grid_sample` coordinates.
+    """
+    if len(image_size_hw) != 2 or any(s <= 0 for s in image_size_hw):
+        raise ValueError("'image_size_hw' has to be a 2-tuple of positive integers")
+    aspect = min(image_size_hw) / max(image_size_hw)
+    xy_grid_sample = -xy_ndc  # first negate the coords
+    if image_size_hw[0] >= image_size_hw[1]:
+        xy_grid_sample[..., 1] *= aspect
+    else:
+        xy_grid_sample[..., 0] *= aspect
+    return xy_grid_sample
+
+
+def parse_image_size(
+    image_size: Union[List[int], Tuple[int, int], int]
+) -> Tuple[int, int]:
+    """
+    Args:
+        image_size: A single int (for square images) or a tuple/list of two ints.
+
+    Returns:
+        A tuple of two ints.
+
+    Throws:
+        ValueError if got more than two ints, any negative numbers or non-ints.
+    """
+    if not isinstance(image_size, (tuple, list)):
+        return (image_size, image_size)
+    if len(image_size) != 2:
+        raise ValueError("Image size can only be a tuple/list of (H, W)")
+    if not all(i > 0 for i in image_size):
+        raise ValueError("Image sizes must be greater than 0; got %d, %d" % image_size)
+    if not all(isinstance(i, int) for i in image_size):
+        raise ValueError("Image sizes must be integers; got %f, %f" % image_size)
+    return tuple(image_size)
diff --git a/pytorch3d/pytorch3d/structures/__init__.py b/pytorch3d/pytorch3d/structures/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..b92e87241a5c3614876be5a04a49350b46413ff3
--- /dev/null
+++ b/pytorch3d/pytorch3d/structures/__init__.py
@@ -0,0 +1,17 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from .meshes import join_meshes_as_batch, join_meshes_as_scene, Meshes
+from .pointclouds import (
+    join_pointclouds_as_batch,
+    join_pointclouds_as_scene,
+    Pointclouds,
+)
+from .utils import list_to_packed, list_to_padded, packed_to_list, padded_to_list
+from .volumes import Volumes
+
+
+__all__ = [k for k in globals().keys() if not k.startswith("_")]
diff --git a/pytorch3d/pytorch3d/structures/meshes.py b/pytorch3d/pytorch3d/structures/meshes.py
new file mode 100644
index 0000000000000000000000000000000000000000..fce929bec31f7152c961c0082604c5e8347af98a
--- /dev/null
+++ b/pytorch3d/pytorch3d/structures/meshes.py
@@ -0,0 +1,1750 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import List, Union
+
+import torch
+
+from ..common.datatypes import Device, make_device
+from . import utils as struct_utils
+
+
+class Meshes:
+    """
+    This class provides functions for working with batches of triangulated
+    meshes with varying numbers of faces and vertices, and converting between
+    representations.
+
+    Within Meshes, there are three different representations of the faces and
+    verts data:
+
+    List
+      - only used for input as a starting point to convert to other representations.
+    Padded
+      - has specific batch dimension.
+    Packed
+      - no batch dimension.
+      - has auxiliary variables used to index into the padded representation.
+
+    Example:
+
+    Input list of verts V_n = [[V_1], [V_2], ... , [V_N]]
+    where V_1, ... , V_N are the number of verts in each mesh and N is the
+    number of meshes.
+
+    Input list of faces F_n = [[F_1], [F_2], ... , [F_N]]
+    where F_1, ... , F_N are the number of faces in each mesh.
+
+    # SPHINX IGNORE
+     List                      | Padded                  | Packed
+    ---------------------------|-------------------------|------------------------
+    [[V_1], ... , [V_N]]       | size = (N, max(V_n), 3) |  size = (sum(V_n), 3)
+                               |                         |
+    Example for verts:         |                         |
+                               |                         |
+    V_1 = 3, V_2 = 4, V_3 = 5  | size = (3, 5, 3)        |  size = (12, 3)
+                               |                         |
+    List([                     | tensor([                |  tensor([
+      [                        |     [                   |    [0.1, 0.3, 0.5],
+        [0.1, 0.3, 0.5],       |       [0.1, 0.3, 0.5],  |    [0.5, 0.2, 0.1],
+        [0.5, 0.2, 0.1],       |       [0.5, 0.2, 0.1],  |    [0.6, 0.8, 0.7],
+        [0.6, 0.8, 0.7],       |       [0.6, 0.8, 0.7],  |    [0.1, 0.3, 0.3],
+      ],                       |       [0,    0,    0],  |    [0.6, 0.7, 0.8],
+      [                        |       [0,    0,    0],  |    [0.2, 0.3, 0.4],
+        [0.1, 0.3, 0.3],       |     ],                  |    [0.1, 0.5, 0.3],
+        [0.6, 0.7, 0.8],       |     [                   |    [0.7, 0.3, 0.6],
+        [0.2, 0.3, 0.4],       |       [0.1, 0.3, 0.3],  |    [0.2, 0.4, 0.8],
+        [0.1, 0.5, 0.3],       |       [0.6, 0.7, 0.8],  |    [0.9, 0.5, 0.2],
+      ],                       |       [0.2, 0.3, 0.4],  |    [0.2, 0.3, 0.4],
+      [                        |       [0.1, 0.5, 0.3],  |    [0.9, 0.3, 0.8],
+        [0.7, 0.3, 0.6],       |       [0,    0,    0],  |  ])
+        [0.2, 0.4, 0.8],       |     ],                  |
+        [0.9, 0.5, 0.2],       |     [                   |
+        [0.2, 0.3, 0.4],       |       [0.7, 0.3, 0.6],  |
+        [0.9, 0.3, 0.8],       |       [0.2, 0.4, 0.8],  |
+      ]                        |       [0.9, 0.5, 0.2],  |
+    ])                         |       [0.2, 0.3, 0.4],  |
+                               |       [0.9, 0.3, 0.8],  |
+                               |     ]                   |
+                               |  ])                     |
+    Example for faces:         |                         |
+                               |                         |
+    F_1 = 1, F_2 = 2, F_3 = 7  | size = (3, 7, 3)        | size = (10, 3)
+                               |                         |
+    List([                     | tensor([                | tensor([
+      [                        |     [                   |    [ 0,  1,  2],
+        [0, 1, 2],             |       [0,   1,  2],     |    [ 3,  4,  5],
+      ],                       |       [-1, -1, -1],     |    [ 4,  5,  6],
+      [                        |       [-1, -1, -1]      |    [ 8,  9,  7],
+        [0, 1, 2],             |       [-1, -1, -1]      |    [ 7,  8, 10],
+        [1, 2, 3],             |       [-1, -1, -1]      |    [ 9, 10,  8],
+      ],                       |       [-1, -1, -1],     |    [11, 10,  9],
+      [                        |       [-1, -1, -1],     |    [11,  7,  8],
+        [1, 2, 0],             |     ],                  |    [11, 10,  8],
+        [0, 1, 3],             |     [                   |    [11,  9,  8],
+        [2, 3, 1],             |       [0,   1,  2],     |  ])
+        [4, 3, 2],             |       [1,   2,  3],     |
+        [4, 0, 1],             |       [-1, -1, -1],     |
+        [4, 3, 1],             |       [-1, -1, -1],     |
+        [4, 2, 1],             |       [-1, -1, -1],     |
+      ],                       |       [-1, -1, -1],     |
+    ])                         |       [-1, -1, -1],     |
+                               |     ],                  |
+                               |     [                   |
+                               |       [1,   2,  0],     |
+                               |       [0,   1,  3],     |
+                               |       [2,   3,  1],     |
+                               |       [4,   3,  2],     |
+                               |       [4,   0,  1],     |
+                               |       [4,   3,  1],     |
+                               |       [4,   2,  1],     |
+                               |     ]                   |
+                               |   ])                    |
+    -----------------------------------------------------------------------------
+
+    Auxiliary variables for packed representation
+
+    Name                           |   Size              |  Example from above
+    -------------------------------|---------------------|-----------------------
+                                   |                     |
+    verts_packed_to_mesh_idx       |  size = (sum(V_n))  |   tensor([
+                                   |                     |     0, 0, 0, 1, 1, 1,
+                                   |                     |     1, 2, 2, 2, 2, 2
+                                   |                     |   )]
+                                   |                     |   size = (12)
+                                   |                     |
+    mesh_to_verts_packed_first_idx |  size = (N)         |   tensor([0, 3, 7])
+                                   |                     |   size = (3)
+                                   |                     |
+    num_verts_per_mesh             |  size = (N)         |   tensor([3, 4, 5])
+                                   |                     |   size = (3)
+                                   |                     |
+    faces_packed_to_mesh_idx       |  size = (sum(F_n))  |   tensor([
+                                   |                     |     0, 1, 1, 2, 2, 2,
+                                   |                     |     2, 2, 2, 2
+                                   |                     |   )]
+                                   |                     |   size = (10)
+                                   |                     |
+    mesh_to_faces_packed_first_idx |  size = (N)         |   tensor([0, 1, 3])
+                                   |                     |   size = (3)
+                                   |                     |
+    num_faces_per_mesh             |  size = (N)         |   tensor([1, 2, 7])
+                                   |                     |   size = (3)
+                                   |                     |
+    verts_padded_to_packed_idx     |  size = (sum(V_n))  |  tensor([
+                                   |                     |     0, 1, 2, 5, 6, 7,
+                                   |                     |     8, 10, 11, 12, 13,
+                                   |                     |     14
+                                   |                     |  )]
+                                   |                     |  size = (12)
+    -----------------------------------------------------------------------------
+    # SPHINX IGNORE
+
+    From the faces, edges are computed and have packed and padded
+    representations with auxiliary variables.
+
+    E_n = [[E_1], ... , [E_N]]
+    where E_1, ... , E_N are the number of unique edges in each mesh.
+    Total number of unique edges = sum(E_n)
+
+    # SPHINX IGNORE
+    Name                           |   Size                  | Example from above
+    -------------------------------|-------------------------|----------------------
+                                   |                         |
+    edges_packed                   | size = (sum(E_n), 2)    |  tensor([
+                                   |                         |     [0, 1],
+                                   |                         |     [0, 2],
+                                   |                         |     [1, 2],
+                                   |                         |       ...
+                                   |                         |     [10, 11],
+                                   |                         |   )]
+                                   |                         |   size = (18, 2)
+                                   |                         |
+    num_edges_per_mesh             | size = (N)              |  tensor([3, 5, 10])
+                                   |                         |  size = (3)
+                                   |                         |
+    edges_packed_to_mesh_idx       | size = (sum(E_n))       |  tensor([
+                                   |                         |    0, 0, 0,
+                                   |                         |     . . .
+                                   |                         |    2, 2, 2
+                                   |                         |   ])
+                                   |                         |   size = (18)
+                                   |                         |
+    faces_packed_to_edges_packed   | size = (sum(F_n), 3)    |  tensor([
+                                   |                         |    [2,   1,  0],
+                                   |                         |    [5,   4,  3],
+                                   |                         |       .  .  .
+                                   |                         |    [12, 14, 16],
+                                   |                         |   ])
+                                   |                         |   size = (10, 3)
+                                   |                         |
+    mesh_to_edges_packed_first_idx | size = (N)              |  tensor([0, 3, 8])
+                                   |                         |  size = (3)
+    ----------------------------------------------------------------------------
+    # SPHINX IGNORE
+    """
+
+    _INTERNAL_TENSORS = [
+        "_verts_packed",
+        "_verts_packed_to_mesh_idx",
+        "_mesh_to_verts_packed_first_idx",
+        "_verts_padded",
+        "_num_verts_per_mesh",
+        "_faces_packed",
+        "_faces_packed_to_mesh_idx",
+        "_mesh_to_faces_packed_first_idx",
+        "_faces_padded",
+        "_faces_areas_packed",
+        "_verts_normals_packed",
+        "_faces_normals_packed",
+        "_num_faces_per_mesh",
+        "_edges_packed",
+        "_edges_packed_to_mesh_idx",
+        "_mesh_to_edges_packed_first_idx",
+        "_faces_packed_to_edges_packed",
+        "_num_edges_per_mesh",
+        "_verts_padded_to_packed_idx",
+        "_laplacian_packed",
+        "valid",
+        "equisized",
+    ]
+
+    def __init__(
+        self,
+        verts,
+        faces,
+        textures=None,
+        *,
+        verts_normals=None,
+    ) -> None:
+        """
+        Args:
+            verts:
+                Can be either
+
+                - List where each element is a tensor of shape (num_verts, 3)
+                  containing the (x, y, z) coordinates of each vertex.
+                - Padded float tensor with shape (num_meshes, max_num_verts, 3).
+                  Meshes should be padded with fill value of 0 so they all have
+                  the same number of vertices.
+            faces:
+                Can be either
+
+                - List where each element is a tensor of shape (num_faces, 3)
+                  containing the indices of the 3 vertices in the corresponding
+                  mesh in verts which form the triangular face.
+                - Padded long tensor of shape (num_meshes, max_num_faces, 3).
+                  Meshes should be padded with fill value of -1 so they have
+                  the same number of faces.
+            textures: Optional instance of the Textures class with mesh
+                texture properties.
+            verts_normals:
+                Optional. Can be either
+
+                - List where each element is a tensor of shape (num_verts, 3)
+                  containing the normals of each vertex.
+                - Padded float tensor with shape (num_meshes, max_num_verts, 3).
+                  They should be padded with fill value of 0 so they all have
+                  the same number of vertices.
+                Note that modifying the mesh later, e.g. with offset_verts_,
+                can cause these normals to be forgotten and normals to be recalculated
+                based on the new vertex positions.
+
+        Refer to comments above for descriptions of List and Padded representations.
+        """
+        self.device = torch.device("cpu")
+        if textures is not None and not hasattr(textures, "sample_textures"):
+            msg = "Expected textures to be an instance of type TexturesBase; got %r"
+            raise ValueError(msg % type(textures))
+
+        self.textures = textures
+
+        # Indicates whether the meshes in the list/batch have the same number
+        # of faces and vertices.
+        self.equisized = False
+
+        # Boolean indicator for each mesh in the batch
+        # True if mesh has non zero number of verts and face, False otherwise.
+        self.valid = None
+
+        self._N = 0  # batch size (number of meshes)
+        self._V = 0  # (max) number of vertices per mesh
+        self._F = 0  # (max) number of faces per mesh
+
+        # List of Tensors of verts and faces.
+        self._verts_list = None
+        self._faces_list = None
+
+        # Packed representation for verts.
+        self._verts_packed = None  # (sum(V_n), 3)
+        self._verts_packed_to_mesh_idx = None  # sum(V_n)
+
+        # Index to convert verts from flattened padded to packed
+        self._verts_padded_to_packed_idx = None  # N * max_V
+
+        # Index of each mesh's first vert in the packed verts.
+        # Assumes packing is sequential.
+        self._mesh_to_verts_packed_first_idx = None  # N
+
+        # Packed representation for faces.
+        self._faces_packed = None  # (sum(F_n), 3)
+        self._faces_packed_to_mesh_idx = None  # sum(F_n)
+
+        # Index of each mesh's first face in packed faces.
+        # Assumes packing is sequential.
+        self._mesh_to_faces_packed_first_idx = None  # N
+
+        # Packed representation of edges sorted by index of the first vertex
+        # in the edge. Edges can be shared between faces in a mesh.
+        self._edges_packed = None  # (sum(E_n), 2)
+
+        # Map from packed edges to corresponding mesh index.
+        self._edges_packed_to_mesh_idx = None  # sum(E_n)
+        self._num_edges_per_mesh = None  # N
+        self._mesh_to_edges_packed_first_idx = None  # N
+
+        # Map from packed faces to packed edges. This represents the index of
+        # the edge opposite the vertex for each vertex in the face. E.g.
+        #
+        #         v0
+        #         /\
+        #        /  \
+        #    e1 /    \ e2
+        #      /      \
+        #     /________\
+        #   v2    e0   v1
+        #
+        # Face (v0, v1, v2) => Edges (e0, e1, e2)
+        self._faces_packed_to_edges_packed = None  # (sum(F_n), 3)
+
+        # Padded representation of verts.
+        self._verts_padded = None  # (N, max(V_n), 3)
+        self._num_verts_per_mesh = None  # N
+
+        # Padded representation of faces.
+        self._faces_padded = None  # (N, max(F_n), 3)
+        self._num_faces_per_mesh = None  # N
+
+        # Face areas
+        self._faces_areas_packed = None
+
+        # Normals
+        self._verts_normals_packed = None
+        self._faces_normals_packed = None
+
+        # Packed representation of Laplacian Matrix
+        self._laplacian_packed = None
+
+        # Identify type of verts and faces.
+        if isinstance(verts, list) and isinstance(faces, list):
+            self._verts_list = verts
+            self._faces_list = [
+                f[f.gt(-1).all(1)].to(torch.int64) if len(f) > 0 else f for f in faces
+            ]
+            self._N = len(self._verts_list)
+            self.valid = torch.zeros((self._N,), dtype=torch.bool, device=self.device)
+            if self._N > 0:
+                self.device = self._verts_list[0].device
+                if not (
+                    all(v.device == self.device for v in verts)
+                    and all(f.device == self.device for f in faces)
+                ):
+                    raise ValueError(
+                        "All Verts and Faces tensors should be on same device."
+                    )
+                self._num_verts_per_mesh = torch.tensor(
+                    [len(v) for v in self._verts_list], device=self.device
+                )
+                self._V = int(self._num_verts_per_mesh.max())
+                self._num_faces_per_mesh = torch.tensor(
+                    [len(f) for f in self._faces_list], device=self.device
+                )
+                self._F = int(self._num_faces_per_mesh.max())
+                self.valid = torch.tensor(
+                    [
+                        len(v) > 0 and len(f) > 0
+                        for (v, f) in zip(self._verts_list, self._faces_list)
+                    ],
+                    dtype=torch.bool,
+                    device=self.device,
+                )
+                if (len(self._num_verts_per_mesh.unique()) == 1) and (
+                    len(self._num_faces_per_mesh.unique()) == 1
+                ):
+                    self.equisized = True
+
+        elif torch.is_tensor(verts) and torch.is_tensor(faces):
+            if verts.size(2) != 3 or faces.size(2) != 3:
+                raise ValueError("Verts or Faces tensors have incorrect dimensions.")
+            self._verts_padded = verts
+            self._faces_padded = faces.to(torch.int64)
+            self._N = self._verts_padded.shape[0]
+            self._V = self._verts_padded.shape[1]
+
+            if verts.device != faces.device:
+                msg = "Verts and Faces tensors should be on same device. \n Got {} and {}."
+                raise ValueError(msg.format(verts.device, faces.device))
+
+            self.device = self._verts_padded.device
+            self.valid = torch.zeros((self._N,), dtype=torch.bool, device=self.device)
+            if self._N > 0:
+                # Check that padded faces - which have value -1 - are at the
+                # end of the tensors
+                faces_not_padded = self._faces_padded.gt(-1).all(2)
+                self._num_faces_per_mesh = faces_not_padded.sum(1)
+                if (faces_not_padded[:, :-1] < faces_not_padded[:, 1:]).any():
+                    raise ValueError("Padding of faces must be at the end")
+
+                # NOTE that we don't check for the ordering of padded verts
+                # as long as the faces index correspond to the right vertices.
+
+                self.valid = self._num_faces_per_mesh > 0
+                self._F = int(self._num_faces_per_mesh.max())
+                if len(self._num_faces_per_mesh.unique()) == 1:
+                    self.equisized = True
+
+                self._num_verts_per_mesh = torch.full(
+                    size=(self._N,),
+                    fill_value=self._V,
+                    dtype=torch.int64,
+                    device=self.device,
+                )
+
+        else:
+            raise ValueError(
+                "Verts and Faces must be either a list or a tensor with \
+                    shape (batch_size, N, 3) where N is either the maximum \
+                       number of verts or faces respectively."
+            )
+
+        if self.isempty():
+            self._num_verts_per_mesh = torch.zeros(
+                (0,), dtype=torch.int64, device=self.device
+            )
+            self._num_faces_per_mesh = torch.zeros(
+                (0,), dtype=torch.int64, device=self.device
+            )
+
+        # Set the num verts/faces on the textures if present.
+        if textures is not None:
+            shape_ok = self.textures.check_shapes(self._N, self._V, self._F)
+            if not shape_ok:
+                msg = "Textures do not match the dimensions of Meshes."
+                raise ValueError(msg)
+
+            self.textures._num_faces_per_mesh = self._num_faces_per_mesh.tolist()
+            self.textures._num_verts_per_mesh = self._num_verts_per_mesh.tolist()
+            self.textures.valid = self.valid
+
+        if verts_normals is not None:
+            self._set_verts_normals(verts_normals)
+
+    def _set_verts_normals(self, verts_normals) -> None:
+        if isinstance(verts_normals, list):
+            if len(verts_normals) != self._N:
+                raise ValueError("Invalid verts_normals input")
+
+            for item, n_verts in zip(verts_normals, self._num_verts_per_mesh):
+                if (
+                    not isinstance(item, torch.Tensor)
+                    or item.ndim != 2
+                    or item.shape[1] != 3
+                    or item.shape[0] != n_verts
+                ):
+                    raise ValueError("Invalid verts_normals input")
+            self._verts_normals_packed = torch.cat(verts_normals, 0)
+        elif torch.is_tensor(verts_normals):
+            if (
+                verts_normals.ndim != 3
+                or verts_normals.size(2) != 3
+                or verts_normals.size(0) != self._N
+            ):
+                raise ValueError("Vertex normals tensor has incorrect dimensions.")
+            self._verts_normals_packed = struct_utils.padded_to_packed(
+                verts_normals, split_size=self._num_verts_per_mesh.tolist()
+            )
+        else:
+            raise ValueError("verts_normals must be a list or tensor")
+
+    def __len__(self) -> int:
+        return self._N
+
+    def __getitem__(
+        self, index: Union[int, List[int], slice, torch.BoolTensor, torch.LongTensor]
+    ) -> "Meshes":
+        """
+        Args:
+            index: Specifying the index of the mesh to retrieve.
+                Can be an int, slice, list of ints or a boolean tensor.
+
+        Returns:
+            Meshes object with selected meshes. The mesh tensors are not cloned.
+        """
+        if isinstance(index, (int, slice)):
+            verts = self.verts_list()[index]
+            faces = self.faces_list()[index]
+        elif isinstance(index, list):
+            verts = [self.verts_list()[i] for i in index]
+            faces = [self.faces_list()[i] for i in index]
+        elif isinstance(index, torch.Tensor):
+            if index.dim() != 1 or index.dtype.is_floating_point:
+                raise IndexError(index)
+            # NOTE consider converting index to cpu for efficiency
+            if index.dtype == torch.bool:
+                # advanced indexing on a single dimension
+                index = index.nonzero()
+                index = index.squeeze(1) if index.numel() > 0 else index
+                index = index.tolist()
+            verts = [self.verts_list()[i] for i in index]
+            faces = [self.faces_list()[i] for i in index]
+        else:
+            raise IndexError(index)
+
+        textures = None if self.textures is None else self.textures[index]
+
+        if torch.is_tensor(verts) and torch.is_tensor(faces):
+            return self.__class__(verts=[verts], faces=[faces], textures=textures)
+        elif isinstance(verts, list) and isinstance(faces, list):
+            return self.__class__(verts=verts, faces=faces, textures=textures)
+        else:
+            raise ValueError("(verts, faces) not defined correctly")
+
+    def isempty(self) -> bool:
+        """
+        Checks whether any mesh is valid.
+
+        Returns:
+            bool indicating whether there is any data.
+        """
+        return self._N == 0 or self.valid.eq(False).all()
+
+    def verts_list(self):
+        """
+        Get the list representation of the vertices.
+
+        Returns:
+            list of tensors of vertices of shape (V_n, 3).
+        """
+        if self._verts_list is None:
+            assert (
+                self._verts_padded is not None
+            ), "verts_padded is required to compute verts_list."
+            self._verts_list = struct_utils.padded_to_list(
+                self._verts_padded, self.num_verts_per_mesh().tolist()
+            )
+        return self._verts_list
+
+    def faces_list(self):
+        """
+        Get the list representation of the faces.
+
+        Returns:
+            list of tensors of faces of shape (F_n, 3).
+        """
+        if self._faces_list is None:
+            assert (
+                self._faces_padded is not None
+            ), "faces_padded is required to compute faces_list."
+            self._faces_list = struct_utils.padded_to_list(
+                self._faces_padded, self.num_faces_per_mesh().tolist()
+            )
+        return self._faces_list
+
+    def verts_packed(self):
+        """
+        Get the packed representation of the vertices.
+
+        Returns:
+            tensor of vertices of shape (sum(V_n), 3).
+        """
+        self._compute_packed()
+        return self._verts_packed
+
+    def verts_packed_to_mesh_idx(self):
+        """
+        Return a 1D tensor with the same first dimension as verts_packed.
+        verts_packed_to_mesh_idx[i] gives the index of the mesh which contains
+        verts_packed[i].
+
+        Returns:
+            1D tensor of indices.
+        """
+        self._compute_packed()
+        return self._verts_packed_to_mesh_idx
+
+    def mesh_to_verts_packed_first_idx(self):
+        """
+        Return a 1D tensor x with length equal to the number of meshes such that
+        the first vertex of the ith mesh is verts_packed[x[i]].
+
+        Returns:
+            1D tensor of indices of first items.
+        """
+        self._compute_packed()
+        return self._mesh_to_verts_packed_first_idx
+
+    def num_verts_per_mesh(self):
+        """
+        Return a 1D tensor x with length equal to the number of meshes giving
+        the number of vertices in each mesh.
+
+        Returns:
+            1D tensor of sizes.
+        """
+        return self._num_verts_per_mesh
+
+    def faces_packed(self):
+        """
+        Get the packed representation of the faces.
+        Faces are given by the indices of the three vertices in verts_packed.
+
+        Returns:
+            tensor of faces of shape (sum(F_n), 3).
+        """
+        self._compute_packed()
+        return self._faces_packed
+
+    def faces_packed_to_mesh_idx(self):
+        """
+        Return a 1D tensor with the same first dimension as faces_packed.
+        faces_packed_to_mesh_idx[i] gives the index of the mesh which contains
+        faces_packed[i].
+
+        Returns:
+            1D tensor of indices.
+        """
+        self._compute_packed()
+        return self._faces_packed_to_mesh_idx
+
+    def mesh_to_faces_packed_first_idx(self):
+        """
+        Return a 1D tensor x with length equal to the number of meshes such that
+        the first face of the ith mesh is faces_packed[x[i]].
+
+        Returns:
+            1D tensor of indices of first items.
+        """
+        self._compute_packed()
+        return self._mesh_to_faces_packed_first_idx
+
+    def verts_padded(self):
+        """
+        Get the padded representation of the vertices.
+
+        Returns:
+            tensor of vertices of shape (N, max(V_n), 3).
+        """
+        self._compute_padded()
+        return self._verts_padded
+
+    def faces_padded(self):
+        """
+        Get the padded representation of the faces.
+
+        Returns:
+            tensor of faces of shape (N, max(F_n), 3).
+        """
+        self._compute_padded()
+        return self._faces_padded
+
+    def num_faces_per_mesh(self):
+        """
+        Return a 1D tensor x with length equal to the number of meshes giving
+        the number of faces in each mesh.
+
+        Returns:
+            1D tensor of sizes.
+        """
+        return self._num_faces_per_mesh
+
+    def edges_packed(self):
+        """
+        Get the packed representation of the edges.
+
+        Returns:
+            tensor of edges of shape (sum(E_n), 2).
+        """
+        self._compute_edges_packed()
+        return self._edges_packed
+
+    def edges_packed_to_mesh_idx(self):
+        """
+        Return a 1D tensor with the same first dimension as edges_packed.
+        edges_packed_to_mesh_idx[i] gives the index of the mesh which contains
+        edges_packed[i].
+
+        Returns:
+            1D tensor of indices.
+        """
+        self._compute_edges_packed()
+        return self._edges_packed_to_mesh_idx
+
+    def mesh_to_edges_packed_first_idx(self):
+        """
+        Return a 1D tensor x with length equal to the number of meshes such that
+        the first edge of the ith mesh is edges_packed[x[i]].
+
+        Returns:
+            1D tensor of indices of first items.
+        """
+        self._compute_edges_packed()
+        return self._mesh_to_edges_packed_first_idx
+
+    def faces_packed_to_edges_packed(self):
+        """
+        Get the packed representation of the faces in terms of edges.
+        Faces are given by the indices of the three edges in
+        the packed representation of the edges.
+
+        Returns:
+            tensor of faces of shape (sum(F_n), 3).
+        """
+        self._compute_edges_packed()
+        return self._faces_packed_to_edges_packed
+
+    def num_edges_per_mesh(self):
+        """
+        Return a 1D tensor x with length equal to the number of meshes giving
+        the number of edges in each mesh.
+
+        Returns:
+            1D tensor of sizes.
+        """
+        self._compute_edges_packed()
+        return self._num_edges_per_mesh
+
+    def verts_padded_to_packed_idx(self):
+        """
+        Return a 1D tensor x with length equal to the total number of vertices
+        such that verts_packed()[i] is element x[i] of the flattened padded
+        representation.
+        The packed representation can be calculated as follows.
+
+        .. code-block:: python
+
+            p = verts_padded().reshape(-1, 3)
+            verts_packed = p[x]
+
+        Returns:
+            1D tensor of indices.
+        """
+        if self._verts_padded_to_packed_idx is not None:
+            return self._verts_padded_to_packed_idx
+
+        self._verts_padded_to_packed_idx = torch.cat(
+            [
+                torch.arange(v, dtype=torch.int64, device=self.device) + i * self._V
+                for (i, v) in enumerate(self.num_verts_per_mesh())
+            ],
+            dim=0,
+        )
+        return self._verts_padded_to_packed_idx
+
+    def has_verts_normals(self) -> bool:
+        """
+        Check whether vertex normals are already present.
+        """
+        return self._verts_normals_packed is not None
+
+    def verts_normals_packed(self):
+        """
+        Get the packed representation of the vertex normals.
+
+        Returns:
+            tensor of normals of shape (sum(V_n), 3).
+        """
+        self._compute_vertex_normals()
+        return self._verts_normals_packed
+
+    def verts_normals_list(self):
+        """
+        Get the list representation of the vertex normals.
+
+        Returns:
+            list of tensors of normals of shape (V_n, 3).
+        """
+        if self.isempty():
+            return [
+                torch.empty((0, 3), dtype=torch.float32, device=self.device)
+            ] * self._N
+        verts_normals_packed = self.verts_normals_packed()
+        split_size = self.num_verts_per_mesh().tolist()
+        return struct_utils.packed_to_list(verts_normals_packed, split_size)
+
+    def verts_normals_padded(self):
+        """
+        Get the padded representation of the vertex normals.
+
+        Returns:
+            tensor of normals of shape (N, max(V_n), 3).
+        """
+        if self.isempty():
+            return torch.zeros((self._N, 0, 3), dtype=torch.float32, device=self.device)
+        verts_normals_list = self.verts_normals_list()
+        return struct_utils.list_to_padded(
+            verts_normals_list, (self._V, 3), pad_value=0.0, equisized=self.equisized
+        )
+
+    def faces_normals_packed(self):
+        """
+        Get the packed representation of the face normals.
+
+        Returns:
+            tensor of normals of shape (sum(F_n), 3).
+        """
+        self._compute_face_areas_normals()
+        return self._faces_normals_packed
+
+    def faces_normals_list(self):
+        """
+        Get the list representation of the face normals.
+
+        Returns:
+            list of tensors of normals of shape (F_n, 3).
+        """
+        if self.isempty():
+            return [
+                torch.empty((0, 3), dtype=torch.float32, device=self.device)
+            ] * self._N
+        faces_normals_packed = self.faces_normals_packed()
+        split_size = self.num_faces_per_mesh().tolist()
+        return struct_utils.packed_to_list(faces_normals_packed, split_size)
+
+    def faces_normals_padded(self):
+        """
+        Get the padded representation of the face normals.
+
+        Returns:
+            tensor of normals of shape (N, max(F_n), 3).
+        """
+        if self.isempty():
+            return torch.zeros((self._N, 0, 3), dtype=torch.float32, device=self.device)
+        faces_normals_list = self.faces_normals_list()
+        return struct_utils.list_to_padded(
+            faces_normals_list, (self._F, 3), pad_value=0.0, equisized=self.equisized
+        )
+
+    def faces_areas_packed(self):
+        """
+        Get the packed representation of the face areas.
+
+        Returns:
+            tensor of areas of shape (sum(F_n),).
+        """
+        self._compute_face_areas_normals()
+        return self._faces_areas_packed
+
+    def laplacian_packed(self):
+        self._compute_laplacian_packed()
+        return self._laplacian_packed
+
+    def _compute_face_areas_normals(self, refresh: bool = False):
+        """
+        Compute the area and normal of each face in faces_packed.
+        The convention of a normal for a face consisting of verts [v0, v1, v2]
+        is normal = (v1 - v0) x (v2 - v0)
+
+        Args:
+            refresh: Set to True to force recomputation of face areas.
+                     Default: False.
+        """
+        from ..ops.mesh_face_areas_normals import mesh_face_areas_normals
+
+        if not (
+            refresh
+            or any(
+                v is None
+                for v in [self._faces_areas_packed, self._faces_normals_packed]
+            )
+        ):
+            return
+        faces_packed = self.faces_packed()
+        verts_packed = self.verts_packed()
+        face_areas, face_normals = mesh_face_areas_normals(verts_packed, faces_packed)
+        self._faces_areas_packed = face_areas
+        self._faces_normals_packed = face_normals
+
+    def _compute_vertex_normals(self, refresh: bool = False):
+        """Computes the packed version of vertex normals from the packed verts
+        and faces. This assumes verts are shared between faces. The normal for
+        a vertex is computed as the sum of the normals of all the faces it is
+        part of weighed by the face areas.
+
+        Args:
+            refresh: Set to True to force recomputation of vertex normals.
+                Default: False.
+        """
+        if not (refresh or any(v is None for v in [self._verts_normals_packed])):
+            return
+
+        if self.isempty():
+            self._verts_normals_packed = torch.zeros(
+                (self._N, 3), dtype=torch.int64, device=self.device
+            )
+        else:
+            faces_packed = self.faces_packed()
+            verts_packed = self.verts_packed()
+            verts_normals = torch.zeros_like(verts_packed)
+            vertices_faces = verts_packed[faces_packed]
+
+            faces_normals = torch.cross(
+                vertices_faces[:, 2] - vertices_faces[:, 1],
+                vertices_faces[:, 0] - vertices_faces[:, 1],
+                dim=1,
+            )
+
+            # NOTE: this is already applying the area weighting as the magnitude
+            # of the cross product is 2 x area of the triangle.
+            verts_normals = verts_normals.index_add(
+                0, faces_packed[:, 0], faces_normals
+            )
+            verts_normals = verts_normals.index_add(
+                0, faces_packed[:, 1], faces_normals
+            )
+            verts_normals = verts_normals.index_add(
+                0, faces_packed[:, 2], faces_normals
+            )
+
+            self._verts_normals_packed = torch.nn.functional.normalize(
+                verts_normals, eps=1e-6, dim=1
+            )
+
+    def _compute_padded(self, refresh: bool = False):
+        """
+        Computes the padded version of meshes from verts_list and faces_list.
+        """
+        if not (
+            refresh or any(v is None for v in [self._verts_padded, self._faces_padded])
+        ):
+            return
+
+        verts_list = self.verts_list()
+        faces_list = self.faces_list()
+        assert (
+            faces_list is not None and verts_list is not None
+        ), "faces_list and verts_list arguments are required"
+
+        if self.isempty():
+            self._faces_padded = torch.zeros(
+                (self._N, 0, 3), dtype=torch.int64, device=self.device
+            )
+            self._verts_padded = torch.zeros(
+                (self._N, 0, 3), dtype=torch.float32, device=self.device
+            )
+        else:
+            self._faces_padded = struct_utils.list_to_padded(
+                faces_list, (self._F, 3), pad_value=-1.0, equisized=self.equisized
+            )
+            self._verts_padded = struct_utils.list_to_padded(
+                verts_list, (self._V, 3), pad_value=0.0, equisized=self.equisized
+            )
+
+    # TODO(nikhilar) Improve performance of _compute_packed.
+    def _compute_packed(self, refresh: bool = False):
+        """
+        Computes the packed version of the meshes from verts_list and faces_list
+        and sets the values of auxiliary tensors.
+
+        Args:
+            refresh: Set to True to force recomputation of packed representations.
+                Default: False.
+        """
+
+        if not (
+            refresh
+            or any(
+                v is None
+                for v in [
+                    self._verts_packed,
+                    self._verts_packed_to_mesh_idx,
+                    self._mesh_to_verts_packed_first_idx,
+                    self._faces_packed,
+                    self._faces_packed_to_mesh_idx,
+                    self._mesh_to_faces_packed_first_idx,
+                ]
+            )
+        ):
+            return
+
+        # Packed can be calculated from padded or list, so can call the
+        # accessor function for verts_list and faces_list.
+        verts_list = self.verts_list()
+        faces_list = self.faces_list()
+        if self.isempty():
+            self._verts_packed = torch.zeros(
+                (0, 3), dtype=torch.float32, device=self.device
+            )
+            self._verts_packed_to_mesh_idx = torch.zeros(
+                (0,), dtype=torch.int64, device=self.device
+            )
+            self._mesh_to_verts_packed_first_idx = torch.zeros(
+                (0,), dtype=torch.int64, device=self.device
+            )
+            self._num_verts_per_mesh = torch.zeros(
+                (0,), dtype=torch.int64, device=self.device
+            )
+            self._faces_packed = -(
+                torch.ones((0, 3), dtype=torch.int64, device=self.device)
+            )
+            self._faces_packed_to_mesh_idx = torch.zeros(
+                (0,), dtype=torch.int64, device=self.device
+            )
+            self._mesh_to_faces_packed_first_idx = torch.zeros(
+                (0,), dtype=torch.int64, device=self.device
+            )
+            self._num_faces_per_mesh = torch.zeros(
+                (0,), dtype=torch.int64, device=self.device
+            )
+            return
+
+        verts_list_to_packed = struct_utils.list_to_packed(verts_list)
+        self._verts_packed = verts_list_to_packed[0]
+        if not torch.allclose(self.num_verts_per_mesh(), verts_list_to_packed[1]):
+            raise ValueError("The number of verts per mesh should be consistent.")
+        self._mesh_to_verts_packed_first_idx = verts_list_to_packed[2]
+        self._verts_packed_to_mesh_idx = verts_list_to_packed[3]
+
+        faces_list_to_packed = struct_utils.list_to_packed(faces_list)
+        faces_packed = faces_list_to_packed[0]
+        if not torch.allclose(self.num_faces_per_mesh(), faces_list_to_packed[1]):
+            raise ValueError("The number of faces per mesh should be consistent.")
+        self._mesh_to_faces_packed_first_idx = faces_list_to_packed[2]
+        self._faces_packed_to_mesh_idx = faces_list_to_packed[3]
+
+        faces_packed_offset = self._mesh_to_verts_packed_first_idx[
+            self._faces_packed_to_mesh_idx
+        ]
+        self._faces_packed = faces_packed + faces_packed_offset.view(-1, 1)
+
+    def _compute_edges_packed(self, refresh: bool = False):
+        """
+        Computes edges in packed form from the packed version of faces and verts.
+        """
+        if not (
+            refresh
+            or any(
+                v is None
+                for v in [
+                    self._edges_packed,
+                    self._faces_packed_to_mesh_idx,
+                    self._edges_packed_to_mesh_idx,
+                    self._num_edges_per_mesh,
+                    self._mesh_to_edges_packed_first_idx,
+                ]
+            )
+        ):
+            return
+
+        if self.isempty():
+            self._edges_packed = torch.full(
+                (0, 2), fill_value=-1, dtype=torch.int64, device=self.device
+            )
+            self._edges_packed_to_mesh_idx = torch.zeros(
+                (0,), dtype=torch.int64, device=self.device
+            )
+            return
+
+        faces = self.faces_packed()
+        F = faces.shape[0]
+        v0, v1, v2 = faces.chunk(3, dim=1)
+        e01 = torch.cat([v0, v1], dim=1)  # (sum(F_n), 2)
+        e12 = torch.cat([v1, v2], dim=1)  # (sum(F_n), 2)
+        e20 = torch.cat([v2, v0], dim=1)  # (sum(F_n), 2)
+
+        # All edges including duplicates.
+        edges = torch.cat([e12, e20, e01], dim=0)  # (sum(F_n)*3, 2)
+        edge_to_mesh = torch.cat(
+            [
+                self._faces_packed_to_mesh_idx,
+                self._faces_packed_to_mesh_idx,
+                self._faces_packed_to_mesh_idx,
+            ],
+            dim=0,
+        )  # sum(F_n)*3
+
+        # Sort the edges in increasing vertex order to remove duplicates as
+        # the same edge may appear in different orientations in different faces.
+        # i.e. rows in edges after sorting will be of the form (v0, v1) where v1 > v0.
+        # This sorting does not change the order in dim=0.
+        edges, _ = edges.sort(dim=1)
+
+        # Remove duplicate edges: convert each edge (v0, v1) into an
+        # integer hash = V * v0 + v1; this allows us to use the scalar version of
+        # unique which is much faster than edges.unique(dim=1) which is very slow.
+        # After finding the unique elements reconstruct the vertex indices as:
+        # (v0, v1) = (hash / V, hash % V)
+        # The inverse maps from unique_edges back to edges:
+        # unique_edges[inverse_idxs] == edges
+        # i.e. inverse_idxs[i] == j means that edges[i] == unique_edges[j]
+
+        V = self._verts_packed.shape[0]
+        edges_hash = V * edges[:, 0] + edges[:, 1]
+        u, inverse_idxs = torch.unique(edges_hash, return_inverse=True)
+
+        # Find indices of unique elements.
+        # TODO (nikhilar) remove following 4 lines when torch.unique has support
+        # for returning unique indices
+        sorted_hash, sort_idx = torch.sort(edges_hash, dim=0)
+        unique_mask = torch.ones(
+            edges_hash.shape[0], dtype=torch.bool, device=self.device
+        )
+        unique_mask[1:] = sorted_hash[1:] != sorted_hash[:-1]
+        unique_idx = sort_idx[unique_mask]
+
+        self._edges_packed = torch.stack([u // V, u % V], dim=1)
+        self._edges_packed_to_mesh_idx = edge_to_mesh[unique_idx]
+
+        self._faces_packed_to_edges_packed = inverse_idxs.reshape(3, F).t()
+
+        # Compute number of edges per mesh
+        num_edges_per_mesh = torch.zeros(self._N, dtype=torch.int32, device=self.device)
+        ones = torch.ones(1, dtype=torch.int32, device=self.device).expand(
+            self._edges_packed_to_mesh_idx.shape
+        )
+        num_edges_per_mesh = num_edges_per_mesh.scatter_add_(
+            0, self._edges_packed_to_mesh_idx, ones
+        )
+        self._num_edges_per_mesh = num_edges_per_mesh
+
+        # Compute first idx for each mesh in edges_packed
+        mesh_to_edges_packed_first_idx = torch.zeros(
+            self._N, dtype=torch.int64, device=self.device
+        )
+        num_edges_cumsum = num_edges_per_mesh.cumsum(dim=0)
+        mesh_to_edges_packed_first_idx[1:] = num_edges_cumsum[:-1].clone()
+
+        self._mesh_to_edges_packed_first_idx = mesh_to_edges_packed_first_idx
+
+    def _compute_laplacian_packed(self, refresh: bool = False):
+        """
+        Computes the laplacian in packed form.
+        The definition of the laplacian is
+        L[i, j] =    -1       , if i == j
+        L[i, j] = 1 / deg(i)  , if (i, j) is an edge
+        L[i, j] =    0        , otherwise
+        where deg(i) is the degree of the i-th vertex in the graph
+
+        Returns:
+            Sparse FloatTensor of shape (V, V) where V = sum(V_n)
+
+        """
+        from ..ops import laplacian
+
+        if not (refresh or self._laplacian_packed is None):
+            return
+
+        if self.isempty():
+            self._laplacian_packed = torch.zeros(
+                (0, 0), dtype=torch.float32, device=self.device
+            ).to_sparse()
+            return
+
+        verts_packed = self.verts_packed()  # (sum(V_n), 3)
+        edges_packed = self.edges_packed()  # (sum(E_n), 3)
+
+        self._laplacian_packed = laplacian(verts_packed, edges_packed)
+
+    def clone(self):
+        """
+        Deep copy of Meshes object. All internal tensors are cloned individually.
+
+        Returns:
+            new Meshes object.
+        """
+        verts_list = self.verts_list()
+        faces_list = self.faces_list()
+        new_verts_list = [v.clone() for v in verts_list]
+        new_faces_list = [f.clone() for f in faces_list]
+        other = self.__class__(verts=new_verts_list, faces=new_faces_list)
+        for k in self._INTERNAL_TENSORS:
+            v = getattr(self, k)
+            if torch.is_tensor(v):
+                setattr(other, k, v.clone())
+
+        # Textures is not a tensor but has a clone method
+        if self.textures is not None:
+            other.textures = self.textures.clone()
+        return other
+
+    def detach(self):
+        """
+        Detach Meshes object. All internal tensors are detached individually.
+
+        Returns:
+            new Meshes object.
+        """
+        verts_list = self.verts_list()
+        faces_list = self.faces_list()
+        new_verts_list = [v.detach() for v in verts_list]
+        new_faces_list = [f.detach() for f in faces_list]
+        other = self.__class__(verts=new_verts_list, faces=new_faces_list)
+        for k in self._INTERNAL_TENSORS:
+            v = getattr(self, k)
+            if torch.is_tensor(v):
+                setattr(other, k, v.detach())
+
+        # Textures is not a tensor but has a detach method
+        if self.textures is not None:
+            other.textures = self.textures.detach()
+        return other
+
+    def to(self, device: Device, copy: bool = False):
+        """
+        Match functionality of torch.Tensor.to()
+        If copy = True or the self Tensor is on a different device, the
+        returned tensor is a copy of self with the desired torch.device.
+        If copy = False and the self Tensor already has the correct torch.device,
+        then self is returned.
+
+        Args:
+            device: Device (as str or torch.device) for the new tensor.
+            copy: Boolean indicator whether or not to clone self. Default False.
+
+        Returns:
+            Meshes object.
+        """
+        device_ = make_device(device)
+        if not copy and self.device == device_:
+            return self
+
+        other = self.clone()
+        if self.device == device_:
+            return other
+
+        other.device = device_
+        if other._N > 0:
+            other._verts_list = [v.to(device_) for v in other._verts_list]
+            other._faces_list = [f.to(device_) for f in other._faces_list]
+        for k in self._INTERNAL_TENSORS:
+            v = getattr(self, k)
+            if torch.is_tensor(v):
+                setattr(other, k, v.to(device_))
+        if self.textures is not None:
+            other.textures = other.textures.to(device_)
+        return other
+
+    def cpu(self):
+        return self.to("cpu")
+
+    def cuda(self):
+        return self.to("cuda")
+
+    def get_mesh_verts_faces(self, index: int):
+        """
+        Get tensors for a single mesh from the list representation.
+
+        Args:
+            index: Integer in the range [0, N).
+
+        Returns:
+            verts: Tensor of shape (V, 3).
+            faces: LongTensor of shape (F, 3).
+        """
+        if not isinstance(index, int):
+            raise ValueError("Mesh index must be an integer.")
+        if index < 0 or index > self._N:
+            raise ValueError(
+                "Mesh index must be in the range [0, N) where \
+            N is the number of meshes in the batch."
+            )
+        verts = self.verts_list()
+        faces = self.faces_list()
+        return verts[index], faces[index]
+
+    # TODO(nikhilar) Move function to a utils file.
+    def split(self, split_sizes: list):
+        """
+        Splits Meshes object of size N into a list of Meshes objects of
+        size len(split_sizes), where the i-th Meshes object is of size split_sizes[i].
+        Similar to torch.split().
+
+        Args:
+            split_sizes: List of integer sizes of Meshes objects to be returned.
+
+        Returns:
+            list[Meshes].
+        """
+        if not all(isinstance(x, int) for x in split_sizes):
+            raise ValueError("Value of split_sizes must be a list of integers.")
+        meshlist = []
+        curi = 0
+        for i in split_sizes:
+            meshlist.append(self[curi : curi + i])
+            curi += i
+        return meshlist
+
+    def offset_verts_(self, vert_offsets_packed):
+        """
+        Add an offset to the vertices of this Meshes. In place operation.
+        If normals are present they may be recalculated.
+
+        Args:
+            vert_offsets_packed: A Tensor of shape (3,) or the same shape as
+                                self.verts_packed, giving offsets to be added
+                                to all vertices.
+        Returns:
+            self.
+        """
+        verts_packed = self.verts_packed()
+        if vert_offsets_packed.shape == (3,):
+            update_normals = False
+            vert_offsets_packed = vert_offsets_packed.expand_as(verts_packed)
+        else:
+            update_normals = True
+        if vert_offsets_packed.shape != verts_packed.shape:
+            raise ValueError("Verts offsets must have dimension (all_v, 3).")
+        # update verts packed
+        self._verts_packed = verts_packed + vert_offsets_packed
+        new_verts_list = list(
+            self._verts_packed.split(self.num_verts_per_mesh().tolist(), 0)
+        )
+        # update verts list
+        # Note that since _compute_packed() has been executed, verts_list
+        # cannot be None even if not provided during construction.
+        self._verts_list = new_verts_list
+
+        # update verts padded
+        if self._verts_padded is not None:
+            for i, verts in enumerate(new_verts_list):
+                if len(verts) > 0:
+                    self._verts_padded[i, : verts.shape[0], :] = verts
+
+        # update face areas and normals and vertex normals
+        # only if the original attributes are present
+        if update_normals and any(
+            v is not None
+            for v in [self._faces_areas_packed, self._faces_normals_packed]
+        ):
+            self._compute_face_areas_normals(refresh=True)
+        if update_normals and self._verts_normals_packed is not None:
+            self._compute_vertex_normals(refresh=True)
+
+        return self
+
+    # TODO(nikhilar) Move out of place operator to a utils file.
+    def offset_verts(self, vert_offsets_packed):
+        """
+        Out of place offset_verts.
+
+        Args:
+            vert_offsets_packed: A Tensor of the same shape as self.verts_packed
+                giving offsets to be added to all vertices.
+        Returns:
+            new Meshes object.
+        """
+        new_mesh = self.clone()
+        return new_mesh.offset_verts_(vert_offsets_packed)
+
+    def scale_verts_(self, scale):
+        """
+        Multiply the vertices of this Meshes object by a scalar value.
+        In place operation.
+
+        Args:
+            scale: A scalar, or a Tensor of shape (N,).
+
+        Returns:
+            self.
+        """
+        if not torch.is_tensor(scale):
+            scale = torch.full((len(self),), scale, device=self.device)
+        new_verts_list = []
+        verts_list = self.verts_list()
+        for i, old_verts in enumerate(verts_list):
+            new_verts_list.append(scale[i] * old_verts)
+        # update list
+        self._verts_list = new_verts_list
+        # update packed
+        if self._verts_packed is not None:
+            self._verts_packed = torch.cat(new_verts_list, dim=0)
+        # update padded
+        if self._verts_padded is not None:
+            for i, verts in enumerate(self._verts_list):
+                if len(verts) > 0:
+                    self._verts_padded[i, : verts.shape[0], :] = verts
+
+        # update face areas and normals
+        # only if the original attributes are computed
+        if any(
+            v is not None
+            for v in [self._faces_areas_packed, self._faces_normals_packed]
+        ):
+            self._compute_face_areas_normals(refresh=True)
+        return self
+
+    def scale_verts(self, scale):
+        """
+        Out of place scale_verts.
+
+        Args:
+            scale: A scalar, or a Tensor of shape (N,).
+
+        Returns:
+            new Meshes object.
+        """
+        new_mesh = self.clone()
+        return new_mesh.scale_verts_(scale)
+
+    def update_padded(self, new_verts_padded):
+        """
+        This function allows for an update of verts_padded without having to
+        explicitly convert it to the list representation for heterogeneous batches.
+        Returns a Meshes structure with updated padded tensors and copies of the
+        auxiliary tensors at construction time.
+        It updates self._verts_padded with new_verts_padded, and does a
+        shallow copy of (faces_padded, faces_list, num_verts_per_mesh, num_faces_per_mesh).
+        If packed representations are computed in self, they are updated as well.
+
+        Args:
+            new_points_padded: FloatTensor of shape (N, V, 3)
+
+        Returns:
+            Meshes with updated padded representations
+        """
+
+        def check_shapes(x, size):
+            if x.shape[0] != size[0]:
+                raise ValueError("new values must have the same batch dimension.")
+            if x.shape[1] != size[1]:
+                raise ValueError("new values must have the same number of points.")
+            if x.shape[2] != size[2]:
+                raise ValueError("new values must have the same dimension.")
+
+        check_shapes(new_verts_padded, [self._N, self._V, 3])
+
+        new = self.__class__(verts=new_verts_padded, faces=self.faces_padded())
+
+        if new._N != self._N or new._V != self._V or new._F != self._F:
+            raise ValueError("Inconsistent sizes after construction.")
+
+        # overwrite the equisized flag
+        new.equisized = self.equisized
+
+        # overwrite textures if any
+        new.textures = self.textures
+
+        # copy auxiliary tensors
+        copy_tensors = ["_num_verts_per_mesh", "_num_faces_per_mesh", "valid"]
+
+        for k in copy_tensors:
+            v = getattr(self, k)
+            if torch.is_tensor(v):
+                setattr(new, k, v)  # shallow copy
+
+        # shallow copy of faces_list if any, st new.faces_list()
+        # does not re-compute from _faces_padded
+        new._faces_list = self._faces_list
+
+        # update verts/faces packed if they are computed in self
+        if self._verts_packed is not None:
+            copy_tensors = [
+                "_faces_packed",
+                "_verts_packed_to_mesh_idx",
+                "_faces_packed_to_mesh_idx",
+                "_mesh_to_verts_packed_first_idx",
+                "_mesh_to_faces_packed_first_idx",
+            ]
+            for k in copy_tensors:
+                v = getattr(self, k)
+                assert torch.is_tensor(v)
+                setattr(new, k, v)  # shallow copy
+            # update verts_packed
+            pad_to_packed = self.verts_padded_to_packed_idx()
+            new_verts_packed = new_verts_padded.reshape(-1, 3)[pad_to_packed, :]
+            new._verts_packed = new_verts_packed
+            new._verts_padded_to_packed_idx = pad_to_packed
+
+        # update edges packed if they are computed in self
+        if self._edges_packed is not None:
+            copy_tensors = [
+                "_edges_packed",
+                "_edges_packed_to_mesh_idx",
+                "_mesh_to_edges_packed_first_idx",
+                "_faces_packed_to_edges_packed",
+                "_num_edges_per_mesh",
+            ]
+            for k in copy_tensors:
+                v = getattr(self, k)
+                assert torch.is_tensor(v)
+                setattr(new, k, v)  # shallow copy
+
+        # update laplacian if it is compute in self
+        if self._laplacian_packed is not None:
+            new._laplacian_packed = self._laplacian_packed
+
+        assert new._verts_list is None
+        assert new._verts_normals_packed is None
+        assert new._faces_normals_packed is None
+        assert new._faces_areas_packed is None
+
+        return new
+
+    # TODO(nikhilar) Move function to utils file.
+    def get_bounding_boxes(self):
+        """
+        Compute an axis-aligned bounding box for each mesh in this Meshes object.
+
+        Returns:
+            bboxes: Tensor of shape (N, 3, 2) where bbox[i, j] gives the
+            min and max values of mesh i along the jth coordinate axis.
+        """
+        all_mins, all_maxes = [], []
+        for verts in self.verts_list():
+            cur_mins = verts.min(dim=0)[0]  # (3,)
+            cur_maxes = verts.max(dim=0)[0]  # (3,)
+            all_mins.append(cur_mins)
+            all_maxes.append(cur_maxes)
+        all_mins = torch.stack(all_mins, dim=0)  # (N, 3)
+        all_maxes = torch.stack(all_maxes, dim=0)  # (N, 3)
+        bboxes = torch.stack([all_mins, all_maxes], dim=2)
+        return bboxes
+
+    def extend(self, N: int):
+        """
+        Create new Meshes class which contains each input mesh N times
+
+        Args:
+            N: number of new copies of each mesh.
+
+        Returns:
+            new Meshes object.
+        """
+        if not isinstance(N, int):
+            raise ValueError("N must be an integer.")
+        if N <= 0:
+            raise ValueError("N must be > 0.")
+        new_verts_list, new_faces_list = [], []
+        for verts, faces in zip(self.verts_list(), self.faces_list()):
+            new_verts_list.extend(verts.clone() for _ in range(N))
+            new_faces_list.extend(faces.clone() for _ in range(N))
+
+        tex = None
+        if self.textures is not None:
+            tex = self.textures.extend(N)
+
+        return self.__class__(verts=new_verts_list, faces=new_faces_list, textures=tex)
+
+    def sample_textures(self, fragments):
+        if self.textures is not None:
+
+            # Check dimensions of textures match that of meshes
+            shape_ok = self.textures.check_shapes(self._N, self._V, self._F)
+            if not shape_ok:
+                msg = "Textures do not match the dimensions of Meshes."
+                raise ValueError(msg)
+
+            # Pass in faces packed. If the textures are defined per
+            # vertex, the face indices are needed in order to interpolate
+            # the vertex attributes across the face.
+            return self.textures.sample_textures(
+                fragments, faces_packed=self.faces_packed()
+            )
+        else:
+            raise ValueError("Meshes does not have textures")
+
+    def submeshes(
+        self,
+        face_indices: Union[
+            List[List[torch.LongTensor]], List[torch.LongTensor], torch.LongTensor
+        ],
+    ) -> "Meshes":
+        """
+        Split a batch of meshes into a batch of submeshes.
+
+        The return value is a Meshes object representing
+            [mesh restricted to only faces indexed by selected_faces
+            for mesh, selected_faces_list in zip(self, face_indices)
+            for faces in selected_faces_list]
+
+        Args:
+          face_indices:
+            Let the original mesh have verts_list() of length N.
+            Can be either
+              - List of lists of LongTensors. The n-th element is a list of length
+              num_submeshes_n (empty lists are allowed). The k-th element of the n-th
+              sublist is a LongTensor of length num_faces_submesh_n_k.
+              - List of LongTensors. The n-th element is a (possibly empty) LongTensor
+                of shape (num_submeshes_n, num_faces_n).
+              - A LongTensor of shape (N, num_submeshes_per_mesh, num_faces_per_submesh)
+                where all meshes in the batch will have the same number of submeshes.
+                This will result in an output Meshes object with batch size equal to
+                N * num_submeshes_per_mesh.
+
+        Returns:
+          Meshes object of length `sum(len(ids) for ids in face_indices)`.
+
+        Example 1:
+
+        If `meshes` has batch size 1, and `face_indices` is a 1D LongTensor,
+        then `meshes.submeshes([[face_indices]]) and
+        `meshes.submeshes(face_indices[None, None])` both produce a Meshes of length 1,
+        containing a single submesh with a subset of `meshes`' faces, whose indices are
+        specified by `face_indices`.
+
+        Example 2:
+
+        Take a Meshes object `cubes` with 4 meshes, each a translated cube. Then:
+            * len(cubes) is 4, len(cubes.verts_list()) is 4, len(cubes.faces_list()) 4,
+            * [cube_verts.size for cube_verts in cubes.verts_list()] is [8, 8, 8, 8],
+            * [cube_faces.size for cube_faces in cubes.faces_list()] if [6, 6, 6, 6],
+
+        Now let front_facet, top_and_bottom, all_facets be LongTensors of
+        sizes (2), (4), and (12), each picking up a number of facets of a cube by
+        specifying the appropriate triangular faces.
+
+        Then let `subcubes = cubes.submeshes([[front_facet, top_and_bottom], [],
+                                              [all_facets], []])`.
+            * len(subcubes) is 3.
+            * subcubes[0] is the front facet of the cube contained in cubes[0].
+            * subcubes[1] is a mesh containing the (disconnected) top and bottom facets
+              of cubes[0].
+            * subcubes[2] is cubes[2].
+            * There are no submeshes of cubes[1] and cubes[3] in subcubes.
+            * subcubes[0] and subcubes[1] are not watertight. subcubes[2] is.
+        """
+        if len(face_indices) != len(self):
+            raise ValueError(
+                "You must specify exactly one set of submeshes"
+                " for each mesh in this Meshes object."
+            )
+
+        sub_verts = []
+        sub_verts_ids = []
+        sub_faces = []
+
+        for face_ids_per_mesh, faces, verts in zip(
+            face_indices, self.faces_list(), self.verts_list()
+        ):
+            sub_verts_ids.append([])
+            for submesh_face_ids in face_ids_per_mesh:
+                faces_to_keep = faces[submesh_face_ids]
+
+                # Say we are keeping two faces from a mesh with six vertices:
+                # faces_to_keep = [[0, 6, 4],
+                #                  [0, 2, 6]]
+                # Then we want verts_to_keep to contain only vertices [0, 2, 4, 6]:
+                vertex_ids_to_keep = torch.unique(faces_to_keep, sorted=True)
+                sub_verts.append(verts[vertex_ids_to_keep])
+                sub_verts_ids[-1].append(vertex_ids_to_keep)
+
+                # Now, convert faces_to_keep to use the new vertex ids.
+                # In our example, instead of
+                # [[0, 6, 4],
+                #  [0, 2, 6]]
+                # we want faces_to_keep to be
+                # [[0, 3, 2],
+                #  [0, 1, 3]],
+                # as each point id got reduced to its sort rank.
+                _, ids_of_unique_ids_in_sorted = torch.unique(
+                    faces_to_keep, return_inverse=True
+                )
+                sub_faces.append(ids_of_unique_ids_in_sorted)
+
+        return self.__class__(
+            verts=sub_verts,
+            faces=sub_faces,
+            textures=(
+                self.textures.submeshes(sub_verts_ids, face_indices)
+                if self.textures
+                else None
+            ),
+        )
+
+
+def join_meshes_as_batch(meshes: List[Meshes], include_textures: bool = True) -> Meshes:
+    """
+    Merge multiple Meshes objects, i.e. concatenate the meshes objects. They
+    must all be on the same device. If include_textures is true, they must all
+    be compatible, either all or none having textures, and all the Textures
+    objects being the same type. If include_textures is False, textures are
+    ignored.
+
+    If the textures are TexturesAtlas then being the same type includes having
+    the same resolution. If they are TexturesUV then it includes having the same
+    align_corners and padding_mode.
+
+    Args:
+        meshes: list of meshes.
+        include_textures: (bool) whether to try to join the textures.
+
+    Returns:
+        new Meshes object containing all the meshes from all the inputs.
+    """
+    if isinstance(meshes, Meshes):
+        # Meshes objects can be iterated and produce single Meshes. We avoid
+        # letting join_meshes_as_batch(mesh1, mesh2) silently do the wrong thing.
+        raise ValueError("Wrong first argument to join_meshes_as_batch.")
+    verts = [v for mesh in meshes for v in mesh.verts_list()]
+    faces = [f for mesh in meshes for f in mesh.faces_list()]
+    if len(meshes) == 0 or not include_textures:
+        return Meshes(verts=verts, faces=faces)
+
+    if meshes[0].textures is None:
+        if any(mesh.textures is not None for mesh in meshes):
+            raise ValueError("Inconsistent textures in join_meshes_as_batch.")
+        return Meshes(verts=verts, faces=faces)
+
+    if any(mesh.textures is None for mesh in meshes):
+        raise ValueError("Inconsistent textures in join_meshes_as_batch.")
+
+    # Now we know there are multiple meshes and they have textures to merge.
+    all_textures = [mesh.textures for mesh in meshes]
+    first = all_textures[0]
+    tex_types_same = all(type(tex) == type(first) for tex in all_textures)  # noqa: E721
+
+    if not tex_types_same:
+        raise ValueError("All meshes in the batch must have the same type of texture.")
+
+    tex = first.join_batch(all_textures[1:])
+    return Meshes(verts=verts, faces=faces, textures=tex)
+
+
+def join_meshes_as_scene(
+    meshes: Union[Meshes, List[Meshes]], include_textures: bool = True
+) -> Meshes:
+    """
+    Joins a batch of meshes in the form of a Meshes object or a list of Meshes
+    objects as a single mesh. If the input is a list, the Meshes objects in the
+    list must all be on the same device. Unless include_textures is False, the
+    meshes must all have the same type of texture or must all not have textures.
+
+    If textures are included, then the textures are joined as a single scene in
+    addition to the meshes. For this, texture types have an appropriate method
+    called join_scene which joins mesh textures into a single texture.
+    If the textures are TexturesAtlas then they must have the same resolution.
+    If they are TexturesUV then they must have the same align_corners and
+    padding_mode. Values in verts_uvs outside [0, 1] will not
+    be respected.
+
+    Args:
+        meshes: Meshes object that contains a batch of meshes, or a list of
+                    Meshes objects.
+        include_textures: (bool) whether to try to join the textures.
+
+    Returns:
+        new Meshes object containing a single mesh
+    """
+    if not isinstance(include_textures, (bool, int)):
+        # We want to avoid letting join_meshes_as_scene(mesh1, mesh2) silently
+        # do the wrong thing.
+        raise ValueError(
+            f"include_textures argument cannot be {type(include_textures)}"
+        )
+    if isinstance(meshes, List):
+        meshes = join_meshes_as_batch(meshes, include_textures=include_textures)
+
+    if len(meshes) == 1:
+        return meshes
+    verts = meshes.verts_packed()  # (sum(V_n), 3)
+    # Offset automatically done by faces_packed
+    faces = meshes.faces_packed()  # (sum(F_n), 3)
+    textures = None
+
+    if include_textures and meshes.textures is not None:
+        textures = meshes.textures.join_scene()
+
+    mesh = Meshes(verts=verts.unsqueeze(0), faces=faces.unsqueeze(0), textures=textures)
+    return mesh
diff --git a/pytorch3d/pytorch3d/structures/pointclouds.py b/pytorch3d/pytorch3d/structures/pointclouds.py
new file mode 100644
index 0000000000000000000000000000000000000000..654b5a26ca32fada8c434443502059e0fe6a4b94
--- /dev/null
+++ b/pytorch3d/pytorch3d/structures/pointclouds.py
@@ -0,0 +1,1303 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from itertools import zip_longest
+from typing import List, Optional, Sequence, Tuple, Union
+
+import numpy as np
+import torch
+
+from ..common.datatypes import Device, make_device
+from . import utils as struct_utils
+
+
+class Pointclouds:
+    """
+    This class provides functions for working with batches of 3d point clouds,
+    and converting between representations.
+
+    Within Pointclouds, there are three different representations of the data.
+
+    List
+       - only used for input as a starting point to convert to other representations.
+    Padded
+       - has specific batch dimension.
+    Packed
+       - no batch dimension.
+       - has auxiliary variables used to index into the padded representation.
+
+    Example
+
+    Input list of points = [[P_1], [P_2], ... , [P_N]]
+    where P_1, ... , P_N are the number of points in each cloud and N is the
+    number of clouds.
+
+    # SPHINX IGNORE
+     List                      | Padded                  | Packed
+    ---------------------------|-------------------------|------------------------
+    [[P_1], ... , [P_N]]       | size = (N, max(P_n), 3) |  size = (sum(P_n), 3)
+                               |                         |
+    Example for locations      |                         |
+    or colors:                 |                         |
+                               |                         |
+    P_1 = 3, P_2 = 4, P_3 = 5  | size = (3, 5, 3)        |  size = (12, 3)
+                               |                         |
+    List([                     | tensor([                |  tensor([
+      [                        |     [                   |    [0.1, 0.3, 0.5],
+        [0.1, 0.3, 0.5],       |       [0.1, 0.3, 0.5],  |    [0.5, 0.2, 0.1],
+        [0.5, 0.2, 0.1],       |       [0.5, 0.2, 0.1],  |    [0.6, 0.8, 0.7],
+        [0.6, 0.8, 0.7]        |       [0.6, 0.8, 0.7],  |    [0.1, 0.3, 0.3],
+      ],                       |       [0,    0,    0],  |    [0.6, 0.7, 0.8],
+      [                        |       [0,    0,    0]   |    [0.2, 0.3, 0.4],
+        [0.1, 0.3, 0.3],       |     ],                  |    [0.1, 0.5, 0.3],
+        [0.6, 0.7, 0.8],       |     [                   |    [0.7, 0.3, 0.6],
+        [0.2, 0.3, 0.4],       |       [0.1, 0.3, 0.3],  |    [0.2, 0.4, 0.8],
+        [0.1, 0.5, 0.3]        |       [0.6, 0.7, 0.8],  |    [0.9, 0.5, 0.2],
+      ],                       |       [0.2, 0.3, 0.4],  |    [0.2, 0.3, 0.4],
+      [                        |       [0.1, 0.5, 0.3],  |    [0.9, 0.3, 0.8],
+        [0.7, 0.3, 0.6],       |       [0,    0,    0]   |  ])
+        [0.2, 0.4, 0.8],       |     ],                  |
+        [0.9, 0.5, 0.2],       |     [                   |
+        [0.2, 0.3, 0.4],       |       [0.7, 0.3, 0.6],  |
+        [0.9, 0.3, 0.8],       |       [0.2, 0.4, 0.8],  |
+      ]                        |       [0.9, 0.5, 0.2],  |
+    ])                         |       [0.2, 0.3, 0.4],  |
+                               |       [0.9, 0.3, 0.8]   |
+                               |     ]                   |
+                               |  ])                     |
+    -----------------------------------------------------------------------------
+
+    Auxiliary variables for packed representation
+
+    Name                           |   Size              |  Example from above
+    -------------------------------|---------------------|-----------------------
+                                   |                     |
+    packed_to_cloud_idx            |  size = (sum(P_n))  |   tensor([
+                                   |                     |     0, 0, 0, 1, 1, 1,
+                                   |                     |     1, 2, 2, 2, 2, 2
+                                   |                     |   )]
+                                   |                     |   size = (12)
+                                   |                     |
+    cloud_to_packed_first_idx      |  size = (N)         |   tensor([0, 3, 7])
+                                   |                     |   size = (3)
+                                   |                     |
+    num_points_per_cloud           |  size = (N)         |   tensor([3, 4, 5])
+                                   |                     |   size = (3)
+                                   |                     |
+    padded_to_packed_idx           |  size = (sum(P_n))  |  tensor([
+                                   |                     |     0, 1, 2, 5, 6, 7,
+                                   |                     |     8, 10, 11, 12, 13,
+                                   |                     |     14
+                                   |                     |  )]
+                                   |                     |  size = (12)
+    -----------------------------------------------------------------------------
+    # SPHINX IGNORE
+    """
+
+    _INTERNAL_TENSORS = [
+        "_points_packed",
+        "_points_padded",
+        "_normals_packed",
+        "_normals_padded",
+        "_features_packed",
+        "_features_padded",
+        "_packed_to_cloud_idx",
+        "_cloud_to_packed_first_idx",
+        "_num_points_per_cloud",
+        "_padded_to_packed_idx",
+        "valid",
+        "equisized",
+    ]
+
+    def __init__(self, points, normals=None, features=None) -> None:
+        """
+        Args:
+            points:
+                Can be either
+
+                - List where each element is a tensor of shape (num_points, 3)
+                  containing the (x, y, z) coordinates of each point.
+                - Padded float tensor with shape (num_clouds, num_points, 3).
+            normals:
+                Can be either
+
+                - None
+                - List where each element is a tensor of shape (num_points, 3)
+                  containing the normal vector for each point.
+                - Padded float tensor of shape (num_clouds, num_points, 3).
+            features:
+                Can be either
+
+                - None
+                - List where each element is a tensor of shape (num_points, C)
+                  containing the features for the points in the cloud.
+                - Padded float tensor of shape (num_clouds, num_points, C).
+                where C is the number of channels in the features.
+                For example 3 for RGB color.
+
+        Refer to comments above for descriptions of List and Padded
+        representations.
+        """
+        self.device = torch.device("cpu")
+
+        # Indicates whether the clouds in the list/batch have the same number
+        # of points.
+        self.equisized = False
+
+        # Boolean indicator for each cloud in the batch.
+        # True if cloud has non zero number of points, False otherwise.
+        self.valid = None
+
+        self._N = 0  # batch size (number of clouds)
+        self._P = 0  # (max) number of points per cloud
+        self._C = None  # number of channels in the features
+
+        # List of Tensors of points and features.
+        self._points_list = None
+        self._normals_list = None
+        self._features_list = None
+
+        # Number of points per cloud.
+        self._num_points_per_cloud = None  # N
+
+        # Packed representation.
+        self._points_packed = None  # (sum(P_n), 3)
+        self._normals_packed = None  # (sum(P_n), 3)
+        self._features_packed = None  # (sum(P_n), C)
+
+        self._packed_to_cloud_idx = None  # sum(P_n)
+
+        # Index of each cloud's first point in the packed points.
+        # Assumes packing is sequential.
+        self._cloud_to_packed_first_idx = None  # N
+
+        # Padded representation.
+        self._points_padded = None  # (N, max(P_n), 3)
+        self._normals_padded = None  # (N, max(P_n), 3)
+        self._features_padded = None  # (N, max(P_n), C)
+
+        # Index to convert points from flattened padded to packed.
+        self._padded_to_packed_idx = None  # N * max_P
+
+        # Identify type of points.
+        if isinstance(points, list):
+            self._points_list = points
+            self._N = len(self._points_list)
+            self.valid = torch.zeros((self._N,), dtype=torch.bool, device=self.device)
+
+            if self._N > 0:
+                self.device = self._points_list[0].device
+                for p in self._points_list:
+                    if len(p) > 0 and (p.dim() != 2 or p.shape[1] != 3):
+                        raise ValueError("Clouds in list must be of shape Px3 or empty")
+                    if p.device != self.device:
+                        raise ValueError("All points must be on the same device")
+
+                num_points_per_cloud = torch.tensor(
+                    [len(p) for p in self._points_list], device=self.device
+                )
+                self._P = int(num_points_per_cloud.max())
+                self.valid = torch.tensor(
+                    [len(p) > 0 for p in self._points_list],
+                    dtype=torch.bool,
+                    device=self.device,
+                )
+
+                if len(num_points_per_cloud.unique()) == 1:
+                    self.equisized = True
+                self._num_points_per_cloud = num_points_per_cloud
+            else:
+                self._num_points_per_cloud = torch.tensor([], dtype=torch.int64)
+
+        elif torch.is_tensor(points):
+            if points.dim() != 3 or points.shape[2] != 3:
+                raise ValueError("Points tensor has incorrect dimensions.")
+            self._points_padded = points
+            self._N = self._points_padded.shape[0]
+            self._P = self._points_padded.shape[1]
+            self.device = self._points_padded.device
+            self.valid = torch.ones((self._N,), dtype=torch.bool, device=self.device)
+            self._num_points_per_cloud = torch.tensor(
+                [self._P] * self._N, device=self.device
+            )
+            self.equisized = True
+        else:
+            raise ValueError(
+                "Points must be either a list or a tensor with \
+                    shape (batch_size, P, 3) where P is the maximum number of \
+                    points in a cloud."
+            )
+
+        # parse normals
+        normals_parsed = self._parse_auxiliary_input(normals)
+        self._normals_list, self._normals_padded, normals_C = normals_parsed
+        if normals_C is not None and normals_C != 3:
+            raise ValueError("Normals are expected to be 3-dimensional")
+
+        # parse features
+        features_parsed = self._parse_auxiliary_input(features)
+        self._features_list, self._features_padded, features_C = features_parsed
+        if features_C is not None:
+            self._C = features_C
+
+    def _parse_auxiliary_input(
+        self, aux_input
+    ) -> Tuple[Optional[List[torch.Tensor]], Optional[torch.Tensor], Optional[int]]:
+        """
+        Interpret the auxiliary inputs (normals, features) given to __init__.
+
+        Args:
+            aux_input:
+              Can be either
+
+                - List where each element is a tensor of shape (num_points, C)
+                  containing the features for the points in the cloud.
+                - Padded float tensor of shape (num_clouds, num_points, C).
+              For normals, C = 3
+
+        Returns:
+            3-element tuple of list, padded, num_channels.
+            If aux_input is list, then padded is None. If aux_input is a tensor,
+            then list is None.
+        """
+        if aux_input is None or self._N == 0:
+            return None, None, None
+
+        aux_input_C = None
+
+        if isinstance(aux_input, list):
+            return self._parse_auxiliary_input_list(aux_input)
+        if torch.is_tensor(aux_input):
+            if aux_input.dim() != 3:
+                raise ValueError("Auxiliary input tensor has incorrect dimensions.")
+            if self._N != aux_input.shape[0]:
+                raise ValueError("Points and inputs must be the same length.")
+            if self._P != aux_input.shape[1]:
+                raise ValueError(
+                    "Inputs tensor must have the right maximum \
+                    number of points in each cloud."
+                )
+            if aux_input.device != self.device:
+                raise ValueError(
+                    "All auxiliary inputs must be on the same device as the points."
+                )
+            aux_input_C = aux_input.shape[2]
+            return None, aux_input, aux_input_C
+        else:
+            raise ValueError(
+                "Auxiliary input must be either a list or a tensor with \
+                    shape (batch_size, P, C) where P is the maximum number of \
+                    points in a cloud."
+            )
+
+    def _parse_auxiliary_input_list(
+        self, aux_input: list
+    ) -> Tuple[Optional[List[torch.Tensor]], None, Optional[int]]:
+        """
+        Interpret the auxiliary inputs (normals, features) given to __init__,
+        if a list.
+
+        Args:
+            aux_input:
+                - List where each element is a tensor of shape (num_points, C)
+                  containing the features for the points in the cloud.
+              For normals, C = 3
+
+        Returns:
+            3-element tuple of list, padded=None, num_channels.
+            If aux_input is list, then padded is None. If aux_input is a tensor,
+            then list is None.
+        """
+        aux_input_C = None
+        good_empty = None
+        needs_fixing = False
+
+        if len(aux_input) != self._N:
+            raise ValueError("Points and auxiliary input must be the same length.")
+        for p, d in zip(self._num_points_per_cloud, aux_input):
+            valid_but_empty = p == 0 and d is not None and d.ndim == 2
+            if p > 0 or valid_but_empty:
+                if p != d.shape[0]:
+                    raise ValueError(
+                        "A cloud has mismatched numbers of points and inputs"
+                    )
+                if d.dim() != 2:
+                    raise ValueError(
+                        "A cloud auxiliary input must be of shape PxC or empty"
+                    )
+                if aux_input_C is None:
+                    aux_input_C = d.shape[1]
+                elif aux_input_C != d.shape[1]:
+                    raise ValueError("The clouds must have the same number of channels")
+                if d.device != self.device:
+                    raise ValueError(
+                        "All auxiliary inputs must be on the same device as the points."
+                    )
+            else:
+                needs_fixing = True
+
+        if aux_input_C is None:
+            # We found nothing useful
+            return None, None, None
+
+        # If we have empty but "wrong" inputs we want to store "fixed" versions.
+        if needs_fixing:
+            if good_empty is None:
+                good_empty = torch.zeros((0, aux_input_C), device=self.device)
+            aux_input_out = []
+            for p, d in zip(self._num_points_per_cloud, aux_input):
+                valid_but_empty = p == 0 and d is not None and d.ndim == 2
+                if p > 0 or valid_but_empty:
+                    aux_input_out.append(d)
+                else:
+                    aux_input_out.append(good_empty)
+        else:
+            aux_input_out = aux_input
+
+        return aux_input_out, None, aux_input_C
+
+    def __len__(self) -> int:
+        return self._N
+
+    def __getitem__(
+        self,
+        index: Union[int, List[int], slice, torch.BoolTensor, torch.LongTensor],
+    ) -> "Pointclouds":
+        """
+        Args:
+            index: Specifying the index of the cloud to retrieve.
+                Can be an int, slice, list of ints or a boolean tensor.
+
+        Returns:
+            Pointclouds object with selected clouds. The tensors are not cloned.
+        """
+        normals, features = None, None
+        normals_list = self.normals_list()
+        features_list = self.features_list()
+        if isinstance(index, int):
+            points = [self.points_list()[index]]
+            if normals_list is not None:
+                normals = [normals_list[index]]
+            if features_list is not None:
+                features = [features_list[index]]
+        elif isinstance(index, slice):
+            points = self.points_list()[index]
+            if normals_list is not None:
+                normals = normals_list[index]
+            if features_list is not None:
+                features = features_list[index]
+        elif isinstance(index, list):
+            points = [self.points_list()[i] for i in index]
+            if normals_list is not None:
+                normals = [normals_list[i] for i in index]
+            if features_list is not None:
+                features = [features_list[i] for i in index]
+        elif isinstance(index, torch.Tensor):
+            if index.dim() != 1 or index.dtype.is_floating_point:
+                raise IndexError(index)
+            # NOTE consider converting index to cpu for efficiency
+            if index.dtype == torch.bool:
+                # advanced indexing on a single dimension
+                index = index.nonzero()
+                index = index.squeeze(1) if index.numel() > 0 else index
+                index = index.tolist()
+            points = [self.points_list()[i] for i in index]
+            if normals_list is not None:
+                normals = [normals_list[i] for i in index]
+            if features_list is not None:
+                features = [features_list[i] for i in index]
+        else:
+            raise IndexError(index)
+
+        return self.__class__(points=points, normals=normals, features=features)
+
+    def isempty(self) -> bool:
+        """
+        Checks whether any cloud is valid.
+
+        Returns:
+            bool indicating whether there is any data.
+        """
+        return self._N == 0 or self.valid.eq(False).all()
+
+    def points_list(self) -> List[torch.Tensor]:
+        """
+        Get the list representation of the points.
+
+        Returns:
+            list of tensors of points of shape (P_n, 3).
+        """
+        if self._points_list is None:
+            assert (
+                self._points_padded is not None
+            ), "points_padded is required to compute points_list."
+            points_list = []
+            for i in range(self._N):
+                points_list.append(
+                    self._points_padded[i, : self.num_points_per_cloud()[i]]
+                )
+            self._points_list = points_list
+        return self._points_list
+
+    def normals_list(self) -> Optional[List[torch.Tensor]]:
+        """
+        Get the list representation of the normals,
+        or None if there are no normals.
+
+        Returns:
+            list of tensors of normals of shape (P_n, 3).
+        """
+        if self._normals_list is None:
+            if self._normals_padded is None:
+                # No normals provided so return None
+                return None
+            self._normals_list = struct_utils.padded_to_list(
+                self._normals_padded, self.num_points_per_cloud().tolist()
+            )
+        return self._normals_list
+
+    def features_list(self) -> Optional[List[torch.Tensor]]:
+        """
+        Get the list representation of the features,
+        or None if there are no features.
+
+        Returns:
+            list of tensors of features of shape (P_n, C).
+        """
+        if self._features_list is None:
+            if self._features_padded is None:
+                # No features provided so return None
+                return None
+            self._features_list = struct_utils.padded_to_list(
+                self._features_padded, self.num_points_per_cloud().tolist()
+            )
+        return self._features_list
+
+    def points_packed(self) -> torch.Tensor:
+        """
+        Get the packed representation of the points.
+
+        Returns:
+            tensor of points of shape (sum(P_n), 3).
+        """
+        self._compute_packed()
+        return self._points_packed
+
+    def normals_packed(self) -> Optional[torch.Tensor]:
+        """
+        Get the packed representation of the normals.
+
+        Returns:
+            tensor of normals of shape (sum(P_n), 3),
+            or None if there are no normals.
+        """
+        self._compute_packed()
+        return self._normals_packed
+
+    def features_packed(self) -> Optional[torch.Tensor]:
+        """
+        Get the packed representation of the features.
+
+        Returns:
+            tensor of features of shape (sum(P_n), C),
+            or None if there are no features
+        """
+        self._compute_packed()
+        return self._features_packed
+
+    def packed_to_cloud_idx(self):
+        """
+        Return a 1D tensor x with length equal to the total number of points.
+        packed_to_cloud_idx()[i] gives the index of the cloud which contains
+        points_packed()[i].
+
+        Returns:
+            1D tensor of indices.
+        """
+        self._compute_packed()
+        return self._packed_to_cloud_idx
+
+    def cloud_to_packed_first_idx(self):
+        """
+        Return a 1D tensor x with length equal to the number of clouds such that
+        the first point of the ith cloud is points_packed[x[i]].
+
+        Returns:
+            1D tensor of indices of first items.
+        """
+        self._compute_packed()
+        return self._cloud_to_packed_first_idx
+
+    def num_points_per_cloud(self) -> torch.Tensor:
+        """
+        Return a 1D tensor x with length equal to the number of clouds giving
+        the number of points in each cloud.
+
+        Returns:
+            1D tensor of sizes.
+        """
+        return self._num_points_per_cloud
+
+    def points_padded(self) -> torch.Tensor:
+        """
+        Get the padded representation of the points.
+
+        Returns:
+            tensor of points of shape (N, max(P_n), 3).
+        """
+        self._compute_padded()
+        return self._points_padded
+
+    def normals_padded(self) -> Optional[torch.Tensor]:
+        """
+        Get the padded representation of the normals,
+        or None if there are no normals.
+
+        Returns:
+            tensor of normals of shape (N, max(P_n), 3).
+        """
+        self._compute_padded()
+        return self._normals_padded
+
+    def features_padded(self) -> Optional[torch.Tensor]:
+        """
+        Get the padded representation of the features,
+        or None if there are no features.
+
+        Returns:
+            tensor of features of shape (N, max(P_n), 3).
+        """
+        self._compute_padded()
+        return self._features_padded
+
+    def padded_to_packed_idx(self):
+        """
+        Return a 1D tensor x with length equal to the total number of points
+        such that points_packed()[i] is element x[i] of the flattened padded
+        representation.
+        The packed representation can be calculated as follows.
+
+        .. code-block:: python
+
+            p = points_padded().reshape(-1, 3)
+            points_packed = p[x]
+
+        Returns:
+            1D tensor of indices.
+        """
+        if self._padded_to_packed_idx is not None:
+            return self._padded_to_packed_idx
+        if self._N == 0:
+            self._padded_to_packed_idx = []
+        else:
+            self._padded_to_packed_idx = torch.cat(
+                [
+                    torch.arange(v, dtype=torch.int64, device=self.device) + i * self._P
+                    for (i, v) in enumerate(self.num_points_per_cloud())
+                ],
+                dim=0,
+            )
+        return self._padded_to_packed_idx
+
+    def _compute_padded(self, refresh: bool = False):
+        """
+        Computes the padded version from points_list, normals_list and features_list.
+
+        Args:
+            refresh: whether to force the recalculation.
+        """
+        if not (refresh or self._points_padded is None):
+            return
+
+        self._normals_padded, self._features_padded = None, None
+        if self.isempty():
+            self._points_padded = torch.zeros((self._N, 0, 3), device=self.device)
+        else:
+            self._points_padded = struct_utils.list_to_padded(
+                self.points_list(),
+                (self._P, 3),
+                pad_value=0.0,
+                equisized=self.equisized,
+            )
+            normals_list = self.normals_list()
+            if normals_list is not None:
+                self._normals_padded = struct_utils.list_to_padded(
+                    normals_list,
+                    (self._P, 3),
+                    pad_value=0.0,
+                    equisized=self.equisized,
+                )
+            features_list = self.features_list()
+            if features_list is not None:
+                self._features_padded = struct_utils.list_to_padded(
+                    features_list,
+                    (self._P, self._C),
+                    pad_value=0.0,
+                    equisized=self.equisized,
+                )
+
+    # TODO(nikhilar) Improve performance of _compute_packed.
+    def _compute_packed(self, refresh: bool = False):
+        """
+        Computes the packed version from points_list, normals_list and
+        features_list and sets the values of auxiliary tensors.
+
+        Args:
+            refresh: Set to True to force recomputation of packed
+                representations. Default: False.
+        """
+
+        if not (
+            refresh
+            or any(
+                v is None
+                for v in [
+                    self._points_packed,
+                    self._packed_to_cloud_idx,
+                    self._cloud_to_packed_first_idx,
+                ]
+            )
+        ):
+            return
+
+        # Packed can be calculated from padded or list, so can call the
+        # accessor function for the lists.
+        points_list = self.points_list()
+        normals_list = self.normals_list()
+        features_list = self.features_list()
+        if self.isempty():
+            self._points_packed = torch.zeros(
+                (0, 3), dtype=torch.float32, device=self.device
+            )
+            self._packed_to_cloud_idx = torch.zeros(
+                (0,), dtype=torch.int64, device=self.device
+            )
+            self._cloud_to_packed_first_idx = torch.zeros(
+                (0,), dtype=torch.int64, device=self.device
+            )
+            self._normals_packed = None
+            self._features_packed = None
+            return
+
+        points_list_to_packed = struct_utils.list_to_packed(points_list)
+        self._points_packed = points_list_to_packed[0]
+        if not torch.allclose(self._num_points_per_cloud, points_list_to_packed[1]):
+            raise ValueError("Inconsistent list to packed conversion")
+        self._cloud_to_packed_first_idx = points_list_to_packed[2]
+        self._packed_to_cloud_idx = points_list_to_packed[3]
+
+        self._normals_packed, self._features_packed = None, None
+        if normals_list is not None:
+            normals_list_to_packed = struct_utils.list_to_packed(normals_list)
+            self._normals_packed = normals_list_to_packed[0]
+
+        if features_list is not None:
+            features_list_to_packed = struct_utils.list_to_packed(features_list)
+            self._features_packed = features_list_to_packed[0]
+
+    def clone(self):
+        """
+        Deep copy of Pointclouds object. All internal tensors are cloned
+        individually.
+
+        Returns:
+            new Pointclouds object.
+        """
+        # instantiate new pointcloud with the representation which is not None
+        # (either list or tensor) to save compute.
+        new_points, new_normals, new_features = None, None, None
+        if self._points_list is not None:
+            new_points = [v.clone() for v in self.points_list()]
+            normals_list = self.normals_list()
+            features_list = self.features_list()
+            if normals_list is not None:
+                new_normals = [n.clone() for n in normals_list]
+            if features_list is not None:
+                new_features = [f.clone() for f in features_list]
+        elif self._points_padded is not None:
+            new_points = self.points_padded().clone()
+            normals_padded = self.normals_padded()
+            features_padded = self.features_padded()
+            if normals_padded is not None:
+                new_normals = self.normals_padded().clone()
+            if features_padded is not None:
+                new_features = self.features_padded().clone()
+        other = self.__class__(
+            points=new_points, normals=new_normals, features=new_features
+        )
+        for k in self._INTERNAL_TENSORS:
+            v = getattr(self, k)
+            if torch.is_tensor(v):
+                setattr(other, k, v.clone())
+        return other
+
+    def detach(self):
+        """
+        Detach Pointclouds object. All internal tensors are detached
+        individually.
+
+        Returns:
+            new Pointclouds object.
+        """
+        # instantiate new pointcloud with the representation which is not None
+        # (either list or tensor) to save compute.
+        new_points, new_normals, new_features = None, None, None
+        if self._points_list is not None:
+            new_points = [v.detach() for v in self.points_list()]
+            normals_list = self.normals_list()
+            features_list = self.features_list()
+            if normals_list is not None:
+                new_normals = [n.detach() for n in normals_list]
+            if features_list is not None:
+                new_features = [f.detach() for f in features_list]
+        elif self._points_padded is not None:
+            new_points = self.points_padded().detach()
+            normals_padded = self.normals_padded()
+            features_padded = self.features_padded()
+            if normals_padded is not None:
+                new_normals = self.normals_padded().detach()
+            if features_padded is not None:
+                new_features = self.features_padded().detach()
+        other = self.__class__(
+            points=new_points, normals=new_normals, features=new_features
+        )
+        for k in self._INTERNAL_TENSORS:
+            v = getattr(self, k)
+            if torch.is_tensor(v):
+                setattr(other, k, v.detach())
+        return other
+
+    def to(self, device: Device, copy: bool = False):
+        """
+        Match functionality of torch.Tensor.to()
+        If copy = True or the self Tensor is on a different device, the
+        returned tensor is a copy of self with the desired torch.device.
+        If copy = False and the self Tensor already has the correct torch.device,
+        then self is returned.
+
+        Args:
+          device: Device (as str or torch.device) for the new tensor.
+          copy: Boolean indicator whether or not to clone self. Default False.
+
+        Returns:
+          Pointclouds object.
+        """
+        device_ = make_device(device)
+
+        if not copy and self.device == device_:
+            return self
+
+        other = self.clone()
+        if self.device == device_:
+            return other
+
+        other.device = device_
+        if other._N > 0:
+            other._points_list = [v.to(device_) for v in other.points_list()]
+            if other._normals_list is not None:
+                other._normals_list = [n.to(device_) for n in other.normals_list()]
+            if other._features_list is not None:
+                other._features_list = [f.to(device_) for f in other.features_list()]
+        for k in self._INTERNAL_TENSORS:
+            v = getattr(self, k)
+            if torch.is_tensor(v):
+                setattr(other, k, v.to(device_))
+        return other
+
+    def cpu(self):
+        return self.to("cpu")
+
+    def cuda(self):
+        return self.to("cuda")
+
+    def get_cloud(self, index: int):
+        """
+        Get tensors for a single cloud from the list representation.
+
+        Args:
+            index: Integer in the range [0, N).
+
+        Returns:
+            points: Tensor of shape (P, 3).
+            normals: Tensor of shape (P, 3)
+            features: LongTensor of shape (P, C).
+        """
+        if not isinstance(index, int):
+            raise ValueError("Cloud index must be an integer.")
+        if index < 0 or index > self._N:
+            raise ValueError(
+                "Cloud index must be in the range [0, N) where \
+            N is the number of clouds in the batch."
+            )
+        points = self.points_list()[index]
+        normals, features = None, None
+        normals_list = self.normals_list()
+        if normals_list is not None:
+            normals = normals_list[index]
+        features_list = self.features_list()
+        if features_list is not None:
+            features = features_list[index]
+        return points, normals, features
+
+    # TODO(nikhilar) Move function to a utils file.
+    def split(self, split_sizes: list):
+        """
+        Splits Pointclouds object of size N into a list of Pointclouds objects
+        of size len(split_sizes), where the i-th Pointclouds object is of size
+        split_sizes[i]. Similar to torch.split().
+
+        Args:
+            split_sizes: List of integer sizes of Pointclouds objects to be
+            returned.
+
+        Returns:
+            list[Pointclouds].
+        """
+        if not all(isinstance(x, int) for x in split_sizes):
+            raise ValueError("Value of split_sizes must be a list of integers.")
+        cloudlist = []
+        curi = 0
+        for i in split_sizes:
+            cloudlist.append(self[curi : curi + i])
+            curi += i
+        return cloudlist
+
+    def offset_(self, offsets_packed):
+        """
+        Translate the point clouds by an offset. In place operation.
+
+        Args:
+            offsets_packed: A Tensor of shape (3,) or the same shape
+                as self.points_packed giving offsets to be added to
+                all points.
+
+        Returns:
+            self.
+        """
+        points_packed = self.points_packed()
+        if offsets_packed.shape == (3,):
+            offsets_packed = offsets_packed.expand_as(points_packed)
+        if offsets_packed.shape != points_packed.shape:
+            raise ValueError("Offsets must have dimension (all_p, 3).")
+        self._points_packed = points_packed + offsets_packed
+        new_points_list = list(
+            self._points_packed.split(self.num_points_per_cloud().tolist(), 0)
+        )
+        # Note that since _compute_packed() has been executed, points_list
+        # cannot be None even if not provided during construction.
+        self._points_list = new_points_list
+        if self._points_padded is not None:
+            for i, points in enumerate(new_points_list):
+                if len(points) > 0:
+                    self._points_padded[i, : points.shape[0], :] = points
+        return self
+
+    # TODO(nikhilar) Move out of place operator to a utils file.
+    def offset(self, offsets_packed):
+        """
+        Out of place offset.
+
+        Args:
+            offsets_packed: A Tensor of the same shape as self.points_packed
+                giving offsets to be added to all points.
+        Returns:
+            new Pointclouds object.
+        """
+        new_clouds = self.clone()
+        return new_clouds.offset_(offsets_packed)
+
+    def subsample(self, max_points: Union[int, Sequence[int]]) -> "Pointclouds":
+        """
+        Subsample each cloud so that it has at most max_points points.
+
+        Args:
+            max_points: maximum number of points in each cloud.
+
+        Returns:
+            new Pointclouds object, or self if nothing to be done.
+        """
+        if isinstance(max_points, int):
+            max_points = [max_points] * len(self)
+        elif len(max_points) != len(self):
+            raise ValueError("wrong number of max_points supplied")
+        if all(
+            int(n_points) <= int(max_)
+            for n_points, max_ in zip(self.num_points_per_cloud(), max_points)
+        ):
+            return self
+
+        points_list = []
+        features_list = []
+        normals_list = []
+        for max_, n_points, points, features, normals in zip_longest(
+            map(int, max_points),
+            map(int, self.num_points_per_cloud()),
+            self.points_list(),
+            self.features_list() or (),
+            self.normals_list() or (),
+        ):
+            if n_points > max_:
+                keep_np = np.random.choice(n_points, max_, replace=False)
+                keep = torch.tensor(keep_np, device=points.device, dtype=torch.int64)
+                points = points[keep]
+                if features is not None:
+                    features = features[keep]
+                if normals is not None:
+                    normals = normals[keep]
+            points_list.append(points)
+            features_list.append(features)
+            normals_list.append(normals)
+
+        return Pointclouds(
+            points=points_list,
+            normals=self.normals_list() and normals_list,
+            features=self.features_list() and features_list,
+        )
+
+    def scale_(self, scale):
+        """
+        Multiply the coordinates of this object by a scalar value.
+        - i.e. enlarge/dilate
+        In place operation.
+
+        Args:
+            scale: A scalar, or a Tensor of shape (N,).
+
+        Returns:
+            self.
+        """
+        if not torch.is_tensor(scale):
+            scale = torch.full((len(self),), scale, device=self.device)
+        new_points_list = []
+        points_list = self.points_list()
+        for i, old_points in enumerate(points_list):
+            new_points_list.append(scale[i] * old_points)
+        self._points_list = new_points_list
+        if self._points_packed is not None:
+            self._points_packed = torch.cat(new_points_list, dim=0)
+        if self._points_padded is not None:
+            for i, points in enumerate(new_points_list):
+                if len(points) > 0:
+                    self._points_padded[i, : points.shape[0], :] = points
+        return self
+
+    def scale(self, scale):
+        """
+        Out of place scale_.
+
+        Args:
+            scale: A scalar, or a Tensor of shape (N,).
+
+        Returns:
+            new Pointclouds object.
+        """
+        new_clouds = self.clone()
+        return new_clouds.scale_(scale)
+
+    # TODO(nikhilar) Move function to utils file.
+    def get_bounding_boxes(self):
+        """
+        Compute an axis-aligned bounding box for each cloud.
+
+        Returns:
+            bboxes: Tensor of shape (N, 3, 2) where bbox[i, j] gives the
+            min and max values of cloud i along the jth coordinate axis.
+        """
+        all_mins, all_maxes = [], []
+        for points in self.points_list():
+            cur_mins = points.min(dim=0)[0]  # (3,)
+            cur_maxes = points.max(dim=0)[0]  # (3,)
+            all_mins.append(cur_mins)
+            all_maxes.append(cur_maxes)
+        all_mins = torch.stack(all_mins, dim=0)  # (N, 3)
+        all_maxes = torch.stack(all_maxes, dim=0)  # (N, 3)
+        bboxes = torch.stack([all_mins, all_maxes], dim=2)
+        return bboxes
+
+    def estimate_normals(
+        self,
+        neighborhood_size: int = 50,
+        disambiguate_directions: bool = True,
+        assign_to_self: bool = False,
+    ):
+        """
+        Estimates the normals of each point in each cloud and assigns
+        them to the internal tensors `self._normals_list` and `self._normals_padded`
+
+        The function uses `ops.estimate_pointcloud_local_coord_frames`
+        to estimate the normals. Please refer to that function for more
+        detailed information about the implemented algorithm.
+
+        Args:
+          **neighborhood_size**: The size of the neighborhood used to estimate the
+            geometry around each point.
+          **disambiguate_directions**: If `True`, uses the algorithm from [1] to
+            ensure sign consistency of the normals of neighboring points.
+          **normals**: A tensor of normals for each input point
+            of shape `(minibatch, num_point, 3)`.
+            If `pointclouds` are of `Pointclouds` class, returns a padded tensor.
+          **assign_to_self**: If `True`, assigns the computed normals to the
+            internal buffers overwriting any previously stored normals.
+
+        References:
+          [1] Tombari, Salti, Di Stefano: Unique Signatures of Histograms for
+          Local Surface Description, ECCV 2010.
+        """
+        from .. import ops
+
+        # estimate the normals
+        normals_est = ops.estimate_pointcloud_normals(
+            self,
+            neighborhood_size=neighborhood_size,
+            disambiguate_directions=disambiguate_directions,
+        )
+
+        # assign to self
+        if assign_to_self:
+            _, self._normals_padded, _ = self._parse_auxiliary_input(normals_est)
+            self._normals_list, self._normals_packed = None, None
+            if self._points_list is not None:
+                # update self._normals_list
+                self.normals_list()
+            if self._points_packed is not None:
+                # update self._normals_packed
+                self._normals_packed = torch.cat(self._normals_list, dim=0)
+
+        return normals_est
+
+    def extend(self, N: int):
+        """
+        Create new Pointclouds which contains each cloud N times.
+
+        Args:
+            N: number of new copies of each cloud.
+
+        Returns:
+            new Pointclouds object.
+        """
+        if not isinstance(N, int):
+            raise ValueError("N must be an integer.")
+        if N <= 0:
+            raise ValueError("N must be > 0.")
+
+        new_points_list, new_normals_list, new_features_list = [], None, None
+        for points in self.points_list():
+            new_points_list.extend(points.clone() for _ in range(N))
+        normals_list = self.normals_list()
+        if normals_list is not None:
+            new_normals_list = []
+            for normals in normals_list:
+                new_normals_list.extend(normals.clone() for _ in range(N))
+        features_list = self.features_list()
+        if features_list is not None:
+            new_features_list = []
+            for features in features_list:
+                new_features_list.extend(features.clone() for _ in range(N))
+        return self.__class__(
+            points=new_points_list, normals=new_normals_list, features=new_features_list
+        )
+
+    def update_padded(
+        self, new_points_padded, new_normals_padded=None, new_features_padded=None
+    ):
+        """
+        Returns a Pointcloud structure with updated padded tensors and copies of
+        the auxiliary tensors. This function allows for an update of
+        points_padded (and normals and features) without having to explicitly
+        convert it to the list representation for heterogeneous batches.
+
+        Args:
+            new_points_padded: FloatTensor of shape (N, P, 3)
+            new_normals_padded: (optional) FloatTensor of shape (N, P, 3)
+            new_features_padded: (optional) FloatTensor of shape (N, P, C)
+
+        Returns:
+            Pointcloud with updated padded representations
+        """
+
+        def check_shapes(x, size):
+            if x.shape[0] != size[0]:
+                raise ValueError("new values must have the same batch dimension.")
+            if x.shape[1] != size[1]:
+                raise ValueError("new values must have the same number of points.")
+            if size[2] is not None:
+                if x.shape[2] != size[2]:
+                    raise ValueError(
+                        "new values must have the same number of channels."
+                    )
+
+        check_shapes(new_points_padded, [self._N, self._P, 3])
+        if new_normals_padded is not None:
+            check_shapes(new_normals_padded, [self._N, self._P, 3])
+        if new_features_padded is not None:
+            check_shapes(new_features_padded, [self._N, self._P, self._C])
+
+        new = self.__class__(
+            points=new_points_padded,
+            normals=new_normals_padded,
+            features=new_features_padded,
+        )
+
+        # overwrite the equisized flag
+        new.equisized = self.equisized
+
+        # copy normals
+        if new_normals_padded is None:
+            # If no normals are provided, keep old ones (shallow copy)
+            new._normals_list = self._normals_list
+            new._normals_padded = self._normals_padded
+            new._normals_packed = self._normals_packed
+
+        # copy features
+        if new_features_padded is None:
+            # If no features are provided, keep old ones (shallow copy)
+            new._features_list = self._features_list
+            new._features_padded = self._features_padded
+            new._features_packed = self._features_packed
+
+        # copy auxiliary tensors
+        copy_tensors = [
+            "_packed_to_cloud_idx",
+            "_cloud_to_packed_first_idx",
+            "_num_points_per_cloud",
+            "_padded_to_packed_idx",
+            "valid",
+        ]
+        for k in copy_tensors:
+            v = getattr(self, k)
+            if torch.is_tensor(v):
+                setattr(new, k, v)  # shallow copy
+
+        # update points
+        new._points_padded = new_points_padded
+        assert new._points_list is None
+        assert new._points_packed is None
+
+        # update normals and features if provided
+        if new_normals_padded is not None:
+            new._normals_padded = new_normals_padded
+            new._normals_list = None
+            new._normals_packed = None
+        if new_features_padded is not None:
+            new._features_padded = new_features_padded
+            new._features_list = None
+            new._features_packed = None
+        return new
+
+    def inside_box(self, box):
+        """
+        Finds the points inside a 3D box.
+
+        Args:
+            box: FloatTensor of shape (2, 3) or (N, 2, 3) where N is the number
+                of clouds.
+                    box[..., 0, :] gives the min x, y & z.
+                    box[..., 1, :] gives the max x, y & z.
+        Returns:
+            idx: BoolTensor of length sum(P_i) indicating whether the packed points are
+                within the input box.
+        """
+        if box.dim() > 3 or box.dim() < 2:
+            raise ValueError("Input box must be of shape (2, 3) or (N, 2, 3).")
+
+        if box.dim() == 3 and box.shape[0] != 1 and box.shape[0] != self._N:
+            raise ValueError(
+                "Input box dimension is incompatible with pointcloud size."
+            )
+
+        if box.dim() == 2:
+            box = box[None]
+
+        if (box[..., 0, :] > box[..., 1, :]).any():
+            raise ValueError("Input box is invalid: min values larger than max values.")
+
+        points_packed = self.points_packed()
+        sumP = points_packed.shape[0]
+
+        if box.shape[0] == 1:
+            box = box.expand(sumP, 2, 3)
+        elif box.shape[0] == self._N:
+            box = box.unbind(0)
+            box = [
+                b.expand(p, 2, 3) for (b, p) in zip(box, self.num_points_per_cloud())
+            ]
+            box = torch.cat(box, 0)
+
+        coord_inside = (points_packed >= box[:, 0]) * (points_packed <= box[:, 1])
+        return coord_inside.all(dim=-1)
+
+
+def join_pointclouds_as_batch(pointclouds: Sequence[Pointclouds]) -> Pointclouds:
+    """
+    Merge a list of Pointclouds objects into a single batched Pointclouds
+    object. All pointclouds must be on the same device.
+
+    Args:
+        batch: List of Pointclouds objects each with batch dim [b1, b2, ..., bN]
+    Returns:
+        pointcloud: Poinclouds object with all input pointclouds collated into
+            a single object with batch dim = sum(b1, b2, ..., bN)
+    """
+    if isinstance(pointclouds, Pointclouds) or not isinstance(pointclouds, Sequence):
+        raise ValueError("Wrong first argument to join_points_as_batch.")
+
+    device = pointclouds[0].device
+    if not all(p.device == device for p in pointclouds):
+        raise ValueError("Pointclouds must all be on the same device")
+
+    kwargs = {}
+    for field in ("points", "normals", "features"):
+        field_list = [getattr(p, field + "_list")() for p in pointclouds]
+        if None in field_list:
+            if field == "points":
+                raise ValueError("Pointclouds cannot have their points set to None!")
+            if not all(f is None for f in field_list):
+                raise ValueError(
+                    f"Pointclouds in the batch have some fields '{field}'"
+                    + " defined and some set to None."
+                )
+            field_list = None
+        else:
+            field_list = [p for points in field_list for p in points]
+            if field == "features" and any(
+                p.shape[1] != field_list[0].shape[1] for p in field_list[1:]
+            ):
+                raise ValueError("Pointclouds must have the same number of features")
+        kwargs[field] = field_list
+
+    return Pointclouds(**kwargs)
+
+
+def join_pointclouds_as_scene(
+    pointclouds: Union[Pointclouds, List[Pointclouds]]
+) -> Pointclouds:
+    """
+    Joins a batch of point cloud in the form of a Pointclouds object or a list of Pointclouds
+    objects as a single point cloud. If the input is a list, the Pointclouds objects in the
+    list must all be on the same device, and they must either all or none have features and
+    all or none have normals.
+
+    Args:
+        Pointclouds: Pointclouds object that contains a batch of point clouds, or a list of
+                    Pointclouds objects.
+
+    Returns:
+        new Pointclouds object containing a single point cloud
+    """
+    if isinstance(pointclouds, list):
+        pointclouds = join_pointclouds_as_batch(pointclouds)
+
+    if len(pointclouds) == 1:
+        return pointclouds
+    points = pointclouds.points_packed()
+    features = pointclouds.features_packed()
+    normals = pointclouds.normals_packed()
+    pointcloud = Pointclouds(
+        points=points[None],
+        features=None if features is None else features[None],
+        normals=None if normals is None else normals[None],
+    )
+    return pointcloud
diff --git a/pytorch3d/pytorch3d/structures/utils.py b/pytorch3d/pytorch3d/structures/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..aab4fc3dad08b9f83255a1bc837c5e78b5f929ef
--- /dev/null
+++ b/pytorch3d/pytorch3d/structures/utils.py
@@ -0,0 +1,241 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import List, Sequence, Tuple, Union
+
+import torch
+
+
+"""
+Util functions for points/verts/faces/volumes.
+"""
+
+
+def list_to_padded(
+    x: Union[List[torch.Tensor], Tuple[torch.Tensor]],
+    pad_size: Union[Sequence[int], None] = None,
+    pad_value: float = 0.0,
+    equisized: bool = False,
+) -> torch.Tensor:
+    r"""
+    Transforms a list of N tensors each of shape (Si_0, Si_1, ... Si_D)
+    into:
+    - a single tensor of shape (N, pad_size(0), pad_size(1), ..., pad_size(D))
+      if pad_size is provided
+    - or a tensor of shape (N, max(Si_0), max(Si_1), ..., max(Si_D)) if pad_size is None.
+
+    Args:
+      x: list of Tensors
+      pad_size: list(int) specifying the size of the padded tensor.
+        If `None` (default), the largest size of each dimension
+        is set as the `pad_size`.
+      pad_value: float value to be used to fill the padded tensor
+      equisized: bool indicating whether the items in x are of equal size
+        (sometimes this is known and if provided saves computation)
+
+    Returns:
+      x_padded: tensor consisting of padded input tensors stored
+        over the newly allocated memory.
+    """
+    if equisized:
+        return torch.stack(x, 0)
+
+    if not all(torch.is_tensor(y) for y in x):
+        raise ValueError("All items have to be instances of a torch.Tensor.")
+
+    # we set the common number of dimensions to the maximum
+    # of the dimensionalities of the tensors in the list
+    element_ndim = max(y.ndim for y in x)
+
+    # replace empty 1D tensors with empty tensors with a correct number of dimensions
+    x = [
+        (y.new_zeros([0] * element_ndim) if (y.ndim == 1 and y.nelement() == 0) else y)
+        for y in x
+    ]
+
+    if any(y.ndim != x[0].ndim for y in x):
+        raise ValueError("All items have to have the same number of dimensions!")
+
+    if pad_size is None:
+        pad_dims = [
+            max(y.shape[dim] for y in x if len(y) > 0) for dim in range(x[0].ndim)
+        ]
+    else:
+        if any(len(pad_size) != y.ndim for y in x):
+            raise ValueError("Pad size must contain target size for all dimensions.")
+        pad_dims = pad_size
+
+    N = len(x)
+    x_padded = x[0].new_full((N, *pad_dims), pad_value)
+    for i, y in enumerate(x):
+        if len(y) > 0:
+            slices = (i, *(slice(0, y.shape[dim]) for dim in range(y.ndim)))
+            x_padded[slices] = y
+    return x_padded
+
+
+def padded_to_list(
+    x: torch.Tensor,
+    split_size: Union[Sequence[int], Sequence[Sequence[int]], None] = None,
+):
+    r"""
+    Transforms a padded tensor of shape (N, S_1, S_2, ..., S_D) into a list
+    of N tensors of shape:
+    - (Si_1, Si_2, ..., Si_D) where (Si_1, Si_2, ..., Si_D) is specified in split_size(i)
+    - or (S_1, S_2, ..., S_D) if split_size is None
+    - or (Si_1, S_2, ..., S_D) if split_size(i) is an integer.
+
+    Args:
+      x: tensor
+      split_size: optional 1D or 2D list/tuple of ints defining the number of
+        items for each tensor.
+
+    Returns:
+      x_list: a list of tensors sharing the memory with the input.
+    """
+    x_list = list(x.unbind(0))
+
+    if split_size is None:
+        return x_list
+
+    N = len(split_size)
+    if x.shape[0] != N:
+        raise ValueError("Split size must be of same length as inputs first dimension")
+
+    for i in range(N):
+        if isinstance(split_size[i], int):
+            x_list[i] = x_list[i][: split_size[i]]
+        else:
+            slices = tuple(slice(0, s) for s in split_size[i])  # pyre-ignore
+            x_list[i] = x_list[i][slices]
+    return x_list
+
+
+def list_to_packed(x: List[torch.Tensor]):
+    r"""
+    Transforms a list of N tensors each of shape (Mi, K, ...) into a single
+    tensor of shape (sum(Mi), K, ...).
+
+    Args:
+      x: list of tensors.
+
+    Returns:
+        4-element tuple containing
+
+        - **x_packed**: tensor consisting of packed input tensors along the
+          1st dimension.
+        - **num_items**: tensor of shape N containing Mi for each element in x.
+        - **item_packed_first_idx**: tensor of shape N indicating the index of
+          the first item belonging to the same element in the original list.
+        - **item_packed_to_list_idx**: tensor of shape sum(Mi) containing the
+          index of the element in the list the item belongs to.
+    """
+    N = len(x)
+    num_items = torch.zeros(N, dtype=torch.int64, device=x[0].device)
+    item_packed_first_idx = torch.zeros(N, dtype=torch.int64, device=x[0].device)
+    item_packed_to_list_idx = []
+    cur = 0
+    for i, y in enumerate(x):
+        num = len(y)
+        num_items[i] = num
+        item_packed_first_idx[i] = cur
+        item_packed_to_list_idx.append(
+            torch.full((num,), i, dtype=torch.int64, device=y.device)
+        )
+        cur += num
+
+    x_packed = torch.cat(x, dim=0)
+    item_packed_to_list_idx = torch.cat(item_packed_to_list_idx, dim=0)
+
+    return x_packed, num_items, item_packed_first_idx, item_packed_to_list_idx
+
+
+def packed_to_list(x: torch.Tensor, split_size: Union[list, int]):
+    r"""
+    Transforms a tensor of shape (sum(Mi), K, L, ...) to N set of tensors of
+    shape (Mi, K, L, ...) where Mi's are defined in split_size
+
+    Args:
+      x: tensor
+      split_size: list, tuple or int defining the number of items for each tensor
+        in the output list.
+
+    Returns:
+      x_list: A list of Tensors
+    """
+    return x.split(split_size, dim=0)
+
+
+def padded_to_packed(
+    x: torch.Tensor,
+    split_size: Union[list, tuple, None] = None,
+    pad_value: Union[float, int, None] = None,
+):
+    r"""
+    Transforms a padded tensor of shape (N, M, K) into a packed tensor
+    of shape:
+     - (sum(Mi), K) where (Mi, K) are the dimensions of
+        each of the tensors in the batch and Mi is specified by split_size(i)
+     - (N*M, K) if split_size is None
+
+    Support only for 3-dimensional input tensor and 1-dimensional split size.
+
+    Args:
+      x: tensor
+      split_size: list, tuple or int defining the number of items for each tensor
+        in the output list.
+      pad_value: optional value to use to filter the padded values in the input
+        tensor.
+
+    Only one of split_size or pad_value should be provided, or both can be None.
+
+    Returns:
+      x_packed: a packed tensor.
+    """
+    if x.ndim != 3:
+        raise ValueError("Supports only 3-dimensional input tensors")
+
+    N, M, D = x.shape
+
+    if split_size is not None and pad_value is not None:
+        raise ValueError("Only one of split_size or pad_value should be provided.")
+
+    x_packed = x.reshape(-1, D)  # flatten padded
+
+    if pad_value is None and split_size is None:
+        return x_packed
+
+    # Convert to packed using pad value
+    if pad_value is not None:
+        mask = x_packed.ne(pad_value).any(-1)
+        x_packed = x_packed[mask]
+        return x_packed
+
+    # Convert to packed using split sizes
+    # pyre-fixme[6]: Expected `Sized` for 1st param but got `Union[None,
+    #  List[typing.Any], typing.Tuple[typing.Any, ...]]`.
+    N = len(split_size)
+    if x.shape[0] != N:
+        raise ValueError("Split size must be of same length as inputs first dimension")
+
+    # pyre-fixme[16]: `None` has no attribute `__iter__`.
+    if not all(isinstance(i, int) for i in split_size):
+        raise ValueError(
+            "Support only 1-dimensional unbinded tensor. \
+                Split size for more dimensions provided"
+        )
+
+    padded_to_packed_idx = torch.cat(
+        [
+            torch.arange(v, dtype=torch.int64, device=x.device) + i * M
+            # pyre-fixme[6]: Expected `Iterable[Variable[_T]]` for 1st param but got
+            #  `Union[None, List[typing.Any], typing.Tuple[typing.Any, ...]]`.
+            for (i, v) in enumerate(split_size)
+        ],
+        dim=0,
+    )
+
+    return x_packed[padded_to_packed_idx]
diff --git a/pytorch3d/pytorch3d/structures/volumes.py b/pytorch3d/pytorch3d/structures/volumes.py
new file mode 100644
index 0000000000000000000000000000000000000000..23ed743da0f682cba3b6ea7c2a7dd68ca062b6e4
--- /dev/null
+++ b/pytorch3d/pytorch3d/structures/volumes.py
@@ -0,0 +1,1135 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import copy
+from typing import List, Optional, Tuple, Union
+
+import torch
+from pytorch3d.common.compat import meshgrid_ij
+from pytorch3d.common.datatypes import Device, make_device
+from pytorch3d.transforms import Scale, Transform3d
+
+from . import utils as struct_utils
+
+
+_Scalar = Union[int, float]
+_Vector = Union[torch.Tensor, Tuple[_Scalar, ...], List[_Scalar]]
+_ScalarOrVector = Union[_Scalar, _Vector]
+
+_VoxelSize = _ScalarOrVector
+_Translation = _Vector
+
+_TensorBatch = Union[torch.Tensor, List[torch.Tensor], Tuple[torch.Tensor]]
+_ALL_CONTENT: slice = slice(0, None)
+
+
+class Volumes:
+    """
+    This class provides functions for working with batches of volumetric grids
+    of possibly varying spatial sizes.
+
+    VOLUME DENSITIES
+
+    The Volumes class can be either constructed from a 5D tensor of
+    `densities` of size `batch x density_dim x depth x height x width` or
+    from a list of differently-sized 4D tensors `[D_1, ..., D_batch]`,
+    where each `D_i` is of size `[density_dim x depth_i x height_i x width_i]`.
+
+    In case the `Volumes` object is initialized from the list of `densities`,
+    the list of tensors is internally converted to a single 5D tensor by
+    zero-padding the relevant dimensions. Both list and padded representations can be
+    accessed with the `Volumes.densities()` or `Volumes.densities_list()` getters.
+    The sizes of the individual volumes in the structure can be retrieved
+    with the `Volumes.get_grid_sizes()` getter.
+
+    The `Volumes` class is immutable. I.e. after generating a `Volumes` object,
+    one cannot change its properties, such as `self._densities` or `self._features`
+    anymore.
+
+
+    VOLUME FEATURES
+
+    While the `densities` field is intended to represent various measures of the
+    "density" of the volume cells (opacity, signed/unsigned distances
+    from the nearest surface, ...), one can additionally initialize the
+    object with the `features` argument. `features` are either a 5D tensor
+    of shape `batch x feature_dim x depth x height x width` or a list of
+    of differently-sized 4D tensors `[F_1, ..., F_batch]`,
+    where each `F_i` is of size `[feature_dim x depth_i x height_i x width_i]`.
+    `features` are intended to describe other properties of volume cells,
+    such as per-voxel 3D vectors of RGB colors that can be later used
+    for rendering the volume.
+
+
+    VOLUME COORDINATES
+
+    Additionally, using the `VolumeLocator` class the `Volumes` class keeps track
+    of the locations of the centers of the volume cells in the local volume
+    coordinates as well as in the world coordinates.
+
+        Local coordinates:
+            - Represent the locations of the volume cells in the local coordinate
+              frame of the volume.
+            - The center of the voxel indexed with `[·, ·, 0, 0, 0]` in the volume
+              has its 3D local coordinate set to `[-1, -1, -1]`, while the voxel
+              at index `[·, ·, depth_i-1, height_i-1, width_i-1]` has its
+              3D local coordinate set to `[1, 1, 1]`.
+            - The first/second/third coordinate of each of the 3D per-voxel
+              XYZ vector denotes the horizontal/vertical/depth-wise position
+              respectively. I.e the order of the coordinate dimensions in the
+              volume is reversed w.r.t. the order of the 3D coordinate vectors.
+            - The intermediate coordinates between `[-1, -1, -1]` and `[1, 1, 1]`.
+              are linearly interpolated over the spatial dimensions of the volume.
+            - Note that the convention is the same as for the 5D version of the
+              `torch.nn.functional.grid_sample` function called with
+              the same value of `align_corners` argument.
+            - Note that the local coordinate convention of `Volumes`
+              (+X = left to right, +Y = top to bottom, +Z = away from the user)
+              is *different* from the world coordinate convention of the
+              renderer for `Meshes` or `Pointclouds`
+              (+X = right to left, +Y = bottom to top, +Z = away from the user).
+
+        World coordinates:
+            - These define the locations of the centers of the volume cells
+              in the world coordinates.
+            - They are specified with the following mapping that converts
+              points `x_local` in the local coordinates to points `x_world`
+              in the world coordinates::
+
+                    x_world = (
+                        x_local * (volume_size - 1) * 0.5 * voxel_size
+                    ) - volume_translation,
+
+              here `voxel_size` specifies the size of each voxel of the volume,
+              and `volume_translation` is the 3D offset of the central voxel of
+              the volume w.r.t. the origin of the world coordinate frame.
+              Both `voxel_size` and `volume_translation` are specified in
+              the world coordinate units. `volume_size` is the spatial size of
+              the volume in form of a 3D vector `[width, height, depth]`.
+            - Given the above definition of `x_world`, one can derive the
+              inverse mapping from `x_world` to `x_local` as follows::
+
+                    x_local = (
+                        (x_world + volume_translation) / (0.5 * voxel_size)
+                    ) / (volume_size - 1)
+
+            - For a trivial volume with `volume_translation==[0, 0, 0]`
+              with `voxel_size=-1`, `x_world` would range
+              from -(volume_size-1)/2` to `+(volume_size-1)/2`.
+
+    Coordinate tensors that denote the locations of each of the volume cells in
+    local / world coordinates (with shape `(depth x height x width x 3)`)
+    can be retrieved by calling the `Volumes.get_coord_grid()` getter with the
+    appropriate `world_coordinates` argument.
+
+    Internally, the mapping between `x_local` and `x_world` is represented
+    as a `Transform3d` object `Volumes.VolumeLocator._local_to_world_transform`.
+    Users can access the relevant transformations with the
+    `Volumes.get_world_to_local_coords_transform()` and
+    `Volumes.get_local_to_world_coords_transform()`
+    functions.
+
+    Example coordinate conversion:
+        - For a "trivial" volume with `voxel_size = 1.`,
+          `volume_translation=[0., 0., 0.]`, and the spatial size of
+          `DxHxW = 5x5x5`, the point `x_world = (-2, 0, 2)` gets mapped
+          to `x_local=(-1, 0, 1)`.
+        - For a "trivial" volume `v` with `voxel_size = 1.`,
+          `volume_translation=[0., 0., 0.]`, the following holds:
+
+                torch.nn.functional.grid_sample(
+                    v.densities(),
+                    v.get_coord_grid(world_coordinates=False),
+                    align_corners=align_corners,
+                ) == v.densities(),
+
+            i.e. sampling the volume at trivial local coordinates
+            (no scaling with `voxel_size`` or shift with `volume_translation`)
+            results in the same volume.
+    """
+
+    def __init__(
+        self,
+        densities: _TensorBatch,
+        features: Optional[_TensorBatch] = None,
+        voxel_size: _VoxelSize = 1.0,
+        volume_translation: _Translation = (0.0, 0.0, 0.0),
+        align_corners: bool = True,
+    ) -> None:
+        """
+        Args:
+            **densities**: Batch of input feature volume occupancies of shape
+                `(minibatch, density_dim, depth, height, width)`, or a list
+                of 4D tensors `[D_1, ..., D_minibatch]` where each `D_i` has
+                shape `(density_dim, depth_i, height_i, width_i)`.
+                Typically, each voxel contains a non-negative number
+                corresponding to its opaqueness.
+            **features**: Batch of input feature volumes of shape:
+                `(minibatch, feature_dim, depth, height, width)` or a list
+                of 4D tensors `[F_1, ..., F_minibatch]` where each `F_i` has
+                shape `(feature_dim, depth_i, height_i, width_i)`.
+                The field is optional and can be set to `None` in case features are
+                not required.
+            **voxel_size**: Denotes the size of each volume voxel in world units.
+                Has to be one of:
+                a) A scalar (square voxels)
+                b) 3-tuple or a 3-list of scalars
+                c) a Tensor of shape (3,)
+                d) a Tensor of shape (minibatch, 3)
+                e) a Tensor of shape (minibatch, 1)
+                f) a Tensor of shape (1,) (square voxels)
+            **volume_translation**: Denotes the 3D translation of the center
+                of the volume in world units. Has to be one of:
+                a) 3-tuple or a 3-list of scalars
+                b) a Tensor of shape (3,)
+                c) a Tensor of shape (minibatch, 3)
+                d) a Tensor of shape (1,) (square voxels)
+            **align_corners**: If set (default), the coordinates of the corner voxels are
+                exactly −1 or +1 in the local coordinate system. Otherwise, the coordinates
+                correspond to the centers of the corner voxels. Cf. the namesake argument to
+                `torch.nn.functional.grid_sample`.
+        """
+
+        # handle densities
+        densities_, grid_sizes = self._convert_densities_features_to_tensor(
+            densities, "densities"
+        )
+
+        # take device from densities
+        self.device = densities_.device
+
+        # assign to the internal buffers
+        self._densities = densities_
+
+        # assign a coordinate transformation member
+        self.locator = VolumeLocator(
+            batch_size=len(self),
+            grid_sizes=grid_sizes,
+            voxel_size=voxel_size,
+            volume_translation=volume_translation,
+            device=self.device,
+            align_corners=align_corners,
+        )
+
+        # handle features
+        self._features = None
+        if features is not None:
+            self._set_features(features)
+
+    def _convert_densities_features_to_tensor(
+        self, x: _TensorBatch, var_name: str
+    ) -> Tuple[torch.Tensor, torch.LongTensor]:
+        """
+        Handle the `densities` or `features` arguments to the constructor.
+        """
+        if isinstance(x, (list, tuple)):
+            x_tensor = struct_utils.list_to_padded(x)
+            if any(x_.ndim != 4 for x_ in x):
+                raise ValueError(
+                    f"`{var_name}` has to be a list of 4-dim tensors of shape: "
+                    f"({var_name}_dim, height, width, depth)"
+                )
+            if any(x_.shape[0] != x[0].shape[0] for x_ in x):
+                raise ValueError(
+                    f"Each entry in the list of `{var_name}` has to have the "
+                    "same number of channels (first dimension in the tensor)."
+                )
+            x_shapes = torch.stack(
+                [
+                    torch.tensor(
+                        list(x_.shape[1:]), dtype=torch.long, device=x_tensor.device
+                    )
+                    for x_ in x
+                ],
+                dim=0,
+            )
+        elif torch.is_tensor(x):
+            if x.ndim != 5:
+                raise ValueError(
+                    f"`{var_name}` has to be a 5-dim tensor of shape: "
+                    f"(minibatch, {var_name}_dim, height, width, depth)"
+                )
+            x_tensor = x
+            x_shapes = torch.tensor(
+                list(x.shape[2:]), dtype=torch.long, device=x.device
+            )[None].repeat(x.shape[0], 1)
+        else:
+            raise ValueError(
+                f"{var_name} must be either a list or a tensor with "
+                f"shape (batch_size, {var_name}_dim, H, W, D)."
+            )
+        # pyre-ignore[7]
+        return x_tensor, x_shapes
+
+    def __len__(self) -> int:
+        return self._densities.shape[0]
+
+    def __getitem__(
+        self,
+        index: Union[
+            int, List[int], Tuple[int], slice, torch.BoolTensor, torch.LongTensor
+        ],
+    ) -> "Volumes":
+        """
+        Args:
+            index: Specifying the index of the volume to retrieve.
+                Can be an int, slice, list of ints or a boolean or a long tensor.
+
+        Returns:
+            Volumes object with selected volumes. The tensors are not cloned.
+        """
+        if isinstance(index, int):
+            index = torch.LongTensor([index])
+        elif isinstance(index, (slice, list, tuple)):
+            pass
+        elif torch.is_tensor(index):
+            if index.dim() != 1 or index.dtype.is_floating_point:
+                raise IndexError(index)
+        else:
+            raise IndexError(index)
+
+        new = self.__class__(
+            # pyre-fixme[16]: `Optional` has no attribute `__getitem__`.
+            features=self.features()[index] if self._features is not None else None,
+            densities=self.densities()[index],
+        )
+        # dont forget to update grid_sizes!
+        self.locator._copy_transform_and_sizes(new.locator, index=index)
+        return new
+
+    def features(self) -> Optional[torch.Tensor]:
+        """
+        Returns the features of the volume.
+
+        Returns:
+            **features**: The tensor of volume features.
+        """
+        return self._features
+
+    def densities(self) -> torch.Tensor:
+        """
+        Returns the densities of the volume.
+
+        Returns:
+            **densities**: The tensor of volume densities.
+        """
+        return self._densities
+
+    def densities_list(self) -> List[torch.Tensor]:
+        """
+        Get the list representation of the densities.
+
+        Returns:
+            list of tensors of densities of shape (dim_i, D_i, H_i, W_i).
+        """
+        return self._features_densities_list(self.densities())
+
+    def features_list(self) -> List[torch.Tensor]:
+        """
+        Get the list representation of the features.
+
+        Returns:
+            list of tensors of features of shape (dim_i, D_i, H_i, W_i)
+            or `None` for feature-less volumes.
+        """
+        features_ = self.features()
+        if features_ is None:
+            # No features provided so return None
+            # pyre-fixme[7]: Expected `List[torch.Tensor]` but got `None`.
+            return None
+        return self._features_densities_list(features_)
+
+    def get_align_corners(self) -> bool:
+        """
+        Return whether the corners of the voxels should be aligned with the
+        image pixels.
+        """
+        return self.locator._align_corners
+
+    def _features_densities_list(self, x: torch.Tensor) -> List[torch.Tensor]:
+        """
+        Retrieve the list representation of features/densities.
+
+        Args:
+            x: self.features() or self.densities()
+
+        Returns:
+            list of tensors of features/densities of shape (dim_i, D_i, H_i, W_i).
+        """
+        x_dim = x.shape[1]
+        pad_sizes = torch.nn.functional.pad(
+            self.get_grid_sizes(), [1, 0], mode="constant", value=x_dim
+        )
+        x_list = struct_utils.padded_to_list(x, pad_sizes.tolist())
+        return x_list
+
+    def update_padded(
+        self, new_densities: torch.Tensor, new_features: Optional[torch.Tensor] = None
+    ) -> "Volumes":
+        """
+        Returns a Volumes structure with updated padded tensors and copies of
+        the auxiliary tensors `self._local_to_world_transform`,
+        `device` and `self._grid_sizes`. This function allows for an update of
+        densities (and features) without having to explicitly
+        convert it to the list representation for heterogeneous batches.
+
+        Args:
+            new_densities: FloatTensor of shape (N, dim_density, D, H, W)
+            new_features: (optional) FloatTensor of shape (N, dim_feature, D, H, W)
+
+        Returns:
+            Volumes with updated features and densities
+        """
+        new = copy.copy(self)
+        new._set_densities(new_densities)
+        if new_features is None:
+            new._features = None
+        else:
+            new._set_features(new_features)
+        return new
+
+    def _set_features(self, features: _TensorBatch) -> None:
+        self._set_densities_features("features", features)
+
+    def _set_densities(self, densities: _TensorBatch) -> None:
+        self._set_densities_features("densities", densities)
+
+    def _set_densities_features(self, var_name: str, x: _TensorBatch) -> None:
+        x_tensor, grid_sizes = self._convert_densities_features_to_tensor(x, var_name)
+        if x_tensor.device != self.device:
+            raise ValueError(
+                f"`{var_name}` have to be on the same device as `self.densities`."
+            )
+        if len(x_tensor.shape) != 5:
+            raise ValueError(
+                f"{var_name} has to be a 5-dim tensor of shape: "
+                f"(minibatch, {var_name}_dim, height, width, depth)"
+            )
+
+        if not (
+            (self.get_grid_sizes().shape == grid_sizes.shape)
+            and torch.allclose(self.get_grid_sizes(), grid_sizes)
+        ):
+            raise ValueError(
+                f"The size of every grid in `{var_name}` has to match the size of"
+                "the corresponding `densities` grid."
+            )
+        setattr(self, "_" + var_name, x_tensor)
+
+    def clone(self) -> "Volumes":
+        """
+        Deep copy of Volumes object. All internal tensors are cloned
+        individually.
+
+        Returns:
+            new Volumes object.
+        """
+        return copy.deepcopy(self)
+
+    def to(self, device: Device, copy: bool = False) -> "Volumes":
+        """
+        Match the functionality of torch.Tensor.to()
+        If copy = True or the self Tensor is on a different device, the
+        returned tensor is a copy of self with the desired torch.device.
+        If copy = False and the self Tensor already has the correct torch.device,
+        then self is returned.
+
+        Args:
+            device: Device (as str or torch.device) for the new tensor.
+            copy: Boolean indicator whether or not to clone self. Default False.
+
+        Returns:
+            Volumes object.
+        """
+        device_ = make_device(device)
+        if not copy and self.device == device_:
+            return self
+
+        other = self.clone()
+        if self.device == device_:
+            return other
+
+        other.device = device_
+        other._densities = self._densities.to(device_)
+        if self._features is not None:
+            # pyre-fixme[16]: `Optional` has no attribute `to`.
+            other._features = self.features().to(device_)
+        self.locator._copy_transform_and_sizes(other.locator, device=device_)
+        other.locator = other.locator.to(device, copy)
+        return other
+
+    def cpu(self) -> "Volumes":
+        return self.to("cpu")
+
+    def cuda(self) -> "Volumes":
+        return self.to("cuda")
+
+    def get_grid_sizes(self) -> torch.LongTensor:
+        """
+        Returns the sizes of individual volumetric grids in the structure.
+
+        Returns:
+            **grid_sizes**: Tensor of spatial sizes of each of the volumes
+                of size (batchsize, 3), where i-th row holds (D_i, H_i, W_i).
+        """
+        return self.locator.get_grid_sizes()
+
+    def get_local_to_world_coords_transform(self) -> Transform3d:
+        """
+        Return a Transform3d object that converts points in the
+        the local coordinate frame of the volume to world coordinates.
+        Local volume coordinates are scaled s.t. the coordinates along one
+        side of the volume are in range [-1, 1].
+
+        Returns:
+            **local_to_world_transform**: A Transform3d object converting
+                points from local coordinates to the world coordinates.
+        """
+        return self.locator.get_local_to_world_coords_transform()
+
+    def get_world_to_local_coords_transform(self) -> Transform3d:
+        """
+        Return a Transform3d object that converts points in the
+        world coordinates to the local coordinate frame of the volume.
+        Local volume coordinates are scaled s.t. the coordinates along one
+        side of the volume are in range [-1, 1].
+
+        Returns:
+            **world_to_local_transform**: A Transform3d object converting
+                points from world coordinates to local coordinates.
+        """
+        return self.get_local_to_world_coords_transform().inverse()
+
+    def world_to_local_coords(self, points_3d_world: torch.Tensor) -> torch.Tensor:
+        """
+        Convert a batch of 3D point coordinates `points_3d_world` of shape
+        (minibatch, ..., dim) in the world coordinates to
+        the local coordinate frame of the volume. Local volume
+        coordinates are scaled s.t. the coordinates along one side of the volume
+        are in range [-1, 1].
+
+        Args:
+            **points_3d_world**: A tensor of shape `(minibatch, ..., 3)`
+                containing the 3D coordinates of a set of points that will
+                be converted from the local volume coordinates (ranging
+                within [-1, 1]) to the world coordinates
+                defined by the `self.center` and `self.voxel_size` parameters.
+
+        Returns:
+            **points_3d_local**: `points_3d_world` converted to the local
+                volume coordinates of shape `(minibatch, ..., 3)`.
+        """
+        return self.locator.world_to_local_coords(points_3d_world)
+
+    def local_to_world_coords(self, points_3d_local: torch.Tensor) -> torch.Tensor:
+        """
+        Convert a batch of 3D point coordinates `points_3d_local` of shape
+        (minibatch, ..., dim) in the local coordinate frame of the volume
+        to the world coordinates.
+
+        Args:
+            **points_3d_local**: A tensor of shape `(minibatch, ..., 3)`
+                containing the 3D coordinates of a set of points that will
+                be converted from the local volume coordinates (ranging
+                within [-1, 1]) to the world coordinates
+                defined by the `self.center` and `self.voxel_size` parameters.
+
+        Returns:
+            **points_3d_world**: `points_3d_local` converted to the world
+                coordinates of the volume of shape `(minibatch, ..., 3)`.
+        """
+        return self.locator.local_to_world_coords(points_3d_local)
+
+    def get_coord_grid(self, world_coordinates: bool = True) -> torch.Tensor:
+        """
+        Return the 3D coordinate grid of the volumetric grid
+        in local (`world_coordinates=False`) or world coordinates
+        (`world_coordinates=True`).
+
+        The grid records location of each center of the corresponding volume voxel.
+
+        Local coordinates are scaled s.t. the values along one side of the
+        volume are in range [-1, 1].
+
+        Args:
+            **world_coordinates**: if `True`, the method
+                returns the grid in the world coordinates,
+                otherwise, in local coordinates.
+
+        Returns:
+            **coordinate_grid**: The grid of coordinates of shape
+                `(minibatch, depth, height, width, 3)`, where `minibatch`,
+                `height`, `width` and `depth` are the batch size, height, width
+                and depth of the volume `features` or `densities`.
+        """
+        return self.locator.get_coord_grid(world_coordinates)
+
+
+class VolumeLocator:
+    """
+    The `VolumeLocator` class keeps track of the locations of the
+    centers of the volume cells in the local volume coordinates as well as in
+    the world coordinates for a voxel grid structure in 3D.
+
+        Local coordinates:
+            - Represent the locations of the volume cells in the local coordinate
+              frame of the volume.
+            - The center of the voxel indexed with `[·, ·, 0, 0, 0]` in the volume
+              has its 3D local coordinate set to `[-1, -1, -1]`, while the voxel
+              at index `[·, ·, depth_i-1, height_i-1, width_i-1]` has its
+              3D local coordinate set to `[1, 1, 1]`.
+            - The first/second/third coordinate of each of the 3D per-voxel
+              XYZ vector denotes the horizontal/vertical/depth-wise position
+              respectively. I.e the order of the coordinate dimensions in the
+              volume is reversed w.r.t. the order of the 3D coordinate vectors.
+            - The intermediate coordinates between `[-1, -1, -1]` and `[1, 1, 1]`.
+              are linearly interpolated over the spatial dimensions of the volume.
+            - Note that the convention is the same as for the 5D version of the
+              `torch.nn.functional.grid_sample` function called with
+              the same value of `align_corners` argument.
+            - Note that the local coordinate convention of `VolumeLocator`
+              (+X = left to right, +Y = top to bottom, +Z = away from the user)
+              is *different* from the world coordinate convention of the
+              renderer for `Meshes` or `Pointclouds`
+              (+X = right to left, +Y = bottom to top, +Z = away from the user).
+
+        World coordinates:
+            - These define the locations of the centers of the volume cells
+              in the world coordinates.
+            - They are specified with the following mapping that converts
+              points `x_local` in the local coordinates to points `x_world`
+              in the world coordinates::
+
+                    x_world = (
+                        x_local * (volume_size - 1) * 0.5 * voxel_size
+                    ) - volume_translation,
+
+              here `voxel_size` specifies the size of each voxel of the volume,
+              and `volume_translation` is the 3D offset of the central voxel of
+              the volume w.r.t. the origin of the world coordinate frame.
+              Both `voxel_size` and `volume_translation` are specified in
+              the world coordinate units. `volume_size` is the spatial size of
+              the volume in form of a 3D vector `[width, height, depth]`.
+            - Given the above definition of `x_world`, one can derive the
+              inverse mapping from `x_world` to `x_local` as follows::
+
+                    x_local = (
+                        (x_world + volume_translation) / (0.5 * voxel_size)
+                    ) / (volume_size - 1)
+
+            - For a trivial volume with `volume_translation==[0, 0, 0]`
+              with `voxel_size=-1`, `x_world` would range
+              from -(volume_size-1)/2` to `+(volume_size-1)/2`.
+
+    Coordinate tensors that denote the locations of each of the volume cells in
+    local / world coordinates (with shape `(depth x height x width x 3)`)
+    can be retrieved by calling the `VolumeLocator.get_coord_grid()` getter with the
+    appropriate `world_coordinates` argument.
+
+    Internally, the mapping between `x_local` and `x_world` is represented
+    as a `Transform3d` object `VolumeLocator._local_to_world_transform`.
+    Users can access the relevant transformations with the
+    `VolumeLocator.get_world_to_local_coords_transform()` and
+    `VolumeLocator.get_local_to_world_coords_transform()`
+    functions.
+
+    Example coordinate conversion:
+        - For a "trivial" volume with `voxel_size = 1.`,
+          `volume_translation=[0., 0., 0.]`, and the spatial size of
+          `DxHxW = 5x5x5`, the point `x_world = (-2, 0, 2)` gets mapped
+          to `x_local=(-1, 0, 1)`.
+        - For a "trivial" volume `v` with `voxel_size = 1.`,
+          `volume_translation=[0., 0., 0.]`, the following holds::
+
+                torch.nn.functional.grid_sample(
+                    v.densities(),
+                    v.get_coord_grid(world_coordinates=False),
+                    align_corners=align_corners,
+                ) == v.densities(),
+
+            i.e. sampling the volume at trivial local coordinates
+            (no scaling with `voxel_size`` or shift with `volume_translation`)
+            results in the same volume.
+    """
+
+    def __init__(
+        self,
+        batch_size: int,
+        grid_sizes: Union[
+            torch.LongTensor, Tuple[int, int, int], List[torch.LongTensor]
+        ],
+        device: torch.device,
+        voxel_size: _VoxelSize = 1.0,
+        volume_translation: _Translation = (0.0, 0.0, 0.0),
+        align_corners: bool = True,
+    ):
+        """
+        **batch_size** : Batch size of the underlying grids
+        **grid_sizes** : Represents the resolutions of different grids in the batch. Can be
+                a) tuple of form (H, W, D)
+                b) list/tuple of length batch_size of lists/tuples of form (H, W, D)
+                c) torch.Tensor of shape (batch_size, H, W, D)
+            H, W, D are height, width, depth respectively.  If `grid_sizes` is a tuple than
+            all the  grids in the batch have the same resolution.
+        **voxel_size**: Denotes the size of each volume voxel in world units.
+            Has to be one of:
+            a) A scalar (square voxels)
+            b) 3-tuple or a 3-list of scalars
+            c) a Tensor of shape (3,)
+            d) a Tensor of shape (minibatch, 3)
+            e) a Tensor of shape (minibatch, 1)
+            f) a Tensor of shape (1,) (square voxels)
+        **volume_translation**: Denotes the 3D translation of the center
+            of the volume in world units. Has to be one of:
+            a) 3-tuple or a 3-list of scalars
+            b) a Tensor of shape (3,)
+            c) a Tensor of shape (minibatch, 3)
+            d) a Tensor of shape (1,) (square voxels)
+        **align_corners**: If set (default), the coordinates of the corner voxels are
+            exactly −1 or +1 in the local coordinate system. Otherwise, the coordinates
+            correspond to the centers of the corner voxels. Cf. the namesake argument to
+            `torch.nn.functional.grid_sample`.
+        """
+        self.device = device
+        self._batch_size = batch_size
+        self._grid_sizes = self._convert_grid_sizes2tensor(grid_sizes)
+        self._resolution = tuple(torch.max(self._grid_sizes.cpu(), dim=0).values)
+        self._align_corners = align_corners
+
+        # set the local_to_world transform
+        self._set_local_to_world_transform(
+            voxel_size=voxel_size,
+            volume_translation=volume_translation,
+        )
+
+    def _convert_grid_sizes2tensor(
+        self, x: Union[torch.LongTensor, List[torch.LongTensor], Tuple[int, int, int]]
+    ) -> torch.LongTensor:
+        """
+        Handle the grid_sizes argument to the constructor.
+        """
+        if isinstance(x, (list, tuple)):
+            if isinstance(x[0], (torch.LongTensor, list, tuple)):
+                if self._batch_size != len(x):
+                    raise ValueError("x should have a batch size of 'batch_size'")
+                # pyre-ignore[6]
+                if any(len(x_) != 3 for x_ in x):
+                    raise ValueError(
+                        "`grid_sizes` has to be a list of 3-dim tensors of shape: "
+                        "(height, width, depth)"
+                    )
+                x_shapes = torch.stack(
+                    [
+                        torch.tensor(
+                            # pyre-ignore[6]
+                            list(x_),
+                            dtype=torch.long,
+                            device=self.device,
+                        )
+                        for x_ in x
+                    ],
+                    dim=0,
+                )
+            elif isinstance(x[0], int):
+                x_shapes = torch.stack(
+                    [
+                        torch.tensor(list(x), dtype=torch.long, device=self.device)
+                        for _ in range(self._batch_size)
+                    ],
+                    dim=0,
+                )
+            else:
+                raise ValueError(
+                    "`grid_sizes` can be a list/tuple of int or torch.Tensor not of "
+                    + "{type(x[0])}."
+                )
+
+        elif torch.is_tensor(x):
+            if x.ndim != 2:
+                raise ValueError(
+                    "`grid_sizes` has to be a 2-dim tensor of shape: (minibatch, 3)"
+                )
+            x_shapes = x.to(self.device)
+        else:
+            raise ValueError(
+                "grid_sizes must be either a list of tensors with shape (H, W, D), tensor with"
+                "shape (batch_size, H, W, D) or a tuple of (H, W, D)."
+            )
+        # pyre-ignore[7]
+        return x_shapes
+
+    def _voxel_size_translation_to_transform(
+        self,
+        voxel_size: torch.Tensor,
+        volume_translation: torch.Tensor,
+        batch_size: int,
+    ) -> Transform3d:
+        """
+        Converts the `voxel_size` and `volume_translation` constructor arguments
+        to the internal `Transform3d` object `local_to_world_transform`.
+        """
+        volume_size_zyx = self.get_grid_sizes().float()
+        volume_size_xyz = volume_size_zyx[:, [2, 1, 0]]
+
+        # x_local = (
+        #       (x_world + volume_translation) / (0.5 * voxel_size)
+        #   ) / (volume_size - 1)
+
+        # x_world = (
+        #       x_local * (volume_size - 1) * 0.5 * voxel_size
+        #   ) - volume_translation
+
+        local_to_world_transform = Scale(
+            (volume_size_xyz - 1) * voxel_size * 0.5, device=self.device
+        ).translate(-volume_translation)
+
+        return local_to_world_transform
+
+    def get_coord_grid(self, world_coordinates: bool = True) -> torch.Tensor:
+        """
+        Return the 3D coordinate grid of the volumetric grid
+        in local (`world_coordinates=False`) or world coordinates
+        (`world_coordinates=True`).
+
+        The grid records location of each center of the corresponding volume voxel.
+
+        Local coordinates are scaled s.t. the values along one side of the
+        volume are in range [-1, 1].
+
+        Args:
+            **world_coordinates**: if `True`, the method
+                returns the grid in the world coordinates,
+                otherwise, in local coordinates.
+
+        Returns:
+            **coordinate_grid**: The grid of coordinates of shape
+                `(minibatch, depth, height, width, 3)`, where `minibatch`,
+                `height`, `width` and `depth` are the batch size, height, width
+                and depth of the volume `features` or `densities`.
+        """
+        # TODO(dnovotny): Implement caching of the coordinate grid.
+        return self._calculate_coordinate_grid(world_coordinates=world_coordinates)
+
+    def _calculate_coordinate_grid(
+        self, world_coordinates: bool = True
+    ) -> torch.Tensor:
+        """
+        Calculate the 3D coordinate grid of the volumetric grid either
+        in local (`world_coordinates=False`) or
+        world coordinates (`world_coordinates=True`) .
+        """
+
+        ba, (de, he, wi) = self._batch_size, self._resolution
+        grid_sizes = self.get_grid_sizes()
+
+        # generate coordinate axes
+        def corner_coord_adjustment(r):
+            return 0.0 if self._align_corners else 1.0 / r
+
+        vol_axes = [
+            torch.linspace(
+                -1.0 + corner_coord_adjustment(r),
+                1.0 - corner_coord_adjustment(r),
+                r,
+                dtype=torch.float32,
+                device=self.device,
+            )
+            for r in (de, he, wi)
+        ]
+
+        # generate per-coord meshgrids
+        Z, Y, X = meshgrid_ij(vol_axes)
+
+        # stack the coord grids ... this order matches the coordinate convention
+        # of torch.nn.grid_sample
+        vol_coords_local = torch.stack((X, Y, Z), dim=3)[None].repeat(ba, 1, 1, 1, 1)
+
+        # get grid sizes relative to the maximal volume size
+        grid_sizes_relative = (
+            torch.tensor([[de, he, wi]], device=grid_sizes.device, dtype=torch.float32)
+            - 1
+        ) / (grid_sizes - 1).float()
+
+        if (grid_sizes_relative != 1.0).any():
+            # if any of the relative sizes != 1.0, adjust the grid
+            grid_sizes_relative_reshape = grid_sizes_relative[:, [2, 1, 0]][
+                :, None, None, None
+            ]
+            vol_coords_local *= grid_sizes_relative_reshape
+            vol_coords_local += grid_sizes_relative_reshape - 1
+
+        if world_coordinates:
+            vol_coords = self.local_to_world_coords(vol_coords_local)
+        else:
+            vol_coords = vol_coords_local
+
+        return vol_coords
+
+    def get_local_to_world_coords_transform(self) -> Transform3d:
+        """
+        Return a Transform3d object that converts points in the
+        the local coordinate frame of the volume to world coordinates.
+        Local volume coordinates are scaled s.t. the coordinates along one
+        side of the volume are in range [-1, 1].
+
+        Returns:
+            **local_to_world_transform**: A Transform3d object converting
+                points from local coordinates to the world coordinates.
+        """
+        return self._local_to_world_transform
+
+    def get_world_to_local_coords_transform(self) -> Transform3d:
+        """
+        Return a Transform3d object that converts points in the
+        world coordinates to the local coordinate frame of the volume.
+        Local volume coordinates are scaled s.t. the coordinates along one
+        side of the volume are in range [-1, 1].
+
+        Returns:
+            **world_to_local_transform**: A Transform3d object converting
+                points from world coordinates to local coordinates.
+        """
+        return self.get_local_to_world_coords_transform().inverse()
+
+    def world_to_local_coords(self, points_3d_world: torch.Tensor) -> torch.Tensor:
+        """
+        Convert a batch of 3D point coordinates `points_3d_world` of shape
+        (minibatch, ..., dim) in the world coordinates to
+        the local coordinate frame of the volume. Local volume
+        coordinates are scaled s.t. the coordinates along one side of the volume
+        are in range [-1, 1].
+
+        Args:
+            **points_3d_world**: A tensor of shape `(minibatch, ..., 3)`
+                containing the 3D coordinates of a set of points that will
+                be converted from the local volume coordinates (ranging
+                within [-1, 1]) to the world coordinates
+                defined by the `self.center` and `self.voxel_size` parameters.
+
+        Returns:
+            **points_3d_local**: `points_3d_world` converted to the local
+                volume coordinates of shape `(minibatch, ..., 3)`.
+        """
+        pts_shape = points_3d_world.shape
+        return (
+            self.get_world_to_local_coords_transform()
+            .transform_points(points_3d_world.view(pts_shape[0], -1, 3))
+            .view(pts_shape)
+        )
+
+    def local_to_world_coords(self, points_3d_local: torch.Tensor) -> torch.Tensor:
+        """
+        Convert a batch of 3D point coordinates `points_3d_local` of shape
+        (minibatch, ..., dim) in the local coordinate frame of the volume
+        to the world coordinates.
+
+        Args:
+            **points_3d_local**: A tensor of shape `(minibatch, ..., 3)`
+                containing the 3D coordinates of a set of points that will
+                be converted from the local volume coordinates (ranging
+                within [-1, 1]) to the world coordinates
+                defined by the `self.center` and `self.voxel_size` parameters.
+
+        Returns:
+            **points_3d_world**: `points_3d_local` converted to the world
+                coordinates of the volume of shape `(minibatch, ..., 3)`.
+        """
+        pts_shape = points_3d_local.shape
+        return (
+            self.get_local_to_world_coords_transform()
+            .transform_points(points_3d_local.view(pts_shape[0], -1, 3))
+            .view(pts_shape)
+        )
+
+    def get_grid_sizes(self) -> torch.LongTensor:
+        """
+        Returns the sizes of individual volumetric grids in the structure.
+
+        Returns:
+            **grid_sizes**: Tensor of spatial sizes of each of the volumes
+                of size (batchsize, 3), where i-th row holds (D_i, H_i, W_i).
+        """
+        return self._grid_sizes
+
+    def _set_local_to_world_transform(
+        self,
+        voxel_size: _VoxelSize = 1.0,
+        volume_translation: _Translation = (0.0, 0.0, 0.0),
+    ):
+        """
+        Sets the internal representation of the transformation between the
+        world and local volume coordinates by specifying
+        `voxel_size` and `volume_translation`
+
+        Args:
+            **voxel_size**: Denotes the size of input voxels. Has to be one of:
+                a) A scalar (square voxels)
+                b) 3-tuple or a 3-list of scalars
+                c) a Tensor of shape (3,)
+                d) a Tensor of shape (minibatch, 3)
+                e) a Tensor of shape (1,) (square voxels)
+            **volume_translation**: Denotes the 3D translation of the center
+                of the volume in world units. Has to be one of:
+                a) 3-tuple or a 3-list of scalars
+                b) a Tensor of shape (3,)
+                c) a Tensor of shape (minibatch, 3)
+                d) a Tensor of shape (1,) (square voxels)
+        """
+        # handle voxel size and center
+        # here we force the tensors to lie on self.device
+        voxel_size = self._handle_voxel_size(voxel_size, len(self))
+        volume_translation = self._handle_volume_translation(
+            volume_translation, len(self)
+        )
+        self._local_to_world_transform = self._voxel_size_translation_to_transform(
+            voxel_size, volume_translation, len(self)
+        )
+
+    def _copy_transform_and_sizes(
+        self,
+        other: "VolumeLocator",
+        device: Optional[torch.device] = None,
+        index: Optional[
+            Union[int, List[int], Tuple[int], slice, torch.Tensor]
+        ] = _ALL_CONTENT,
+    ) -> None:
+        """
+        Copies the local to world transform and grid sizes to other VolumeLocator object
+        and moves it to specified device. Operates in place on other.
+
+        Args:
+            other: VolumeLocator object to which to copy
+            device: torch.device on which to put the result, defatults to self.device
+            index: Specifies which parts to copy.
+                Can be an int, slice, list of ints or a boolean or a long tensor.
+                Defaults to all items (`:`).
+        """
+        device = device if device is not None else self.device
+        other._grid_sizes = self._grid_sizes[index].to(device)
+        other._local_to_world_transform = self.get_local_to_world_coords_transform()[
+            # pyre-fixme[6]: For 1st param expected `Union[List[int], int, slice,
+            #  BoolTensor, LongTensor]` but got `Union[None, List[int], Tuple[int],
+            #  int, slice, Tensor]`.
+            index
+        ].to(device)
+
+    def _handle_voxel_size(
+        self, voxel_size: _VoxelSize, batch_size: int
+    ) -> torch.Tensor:
+        """
+        Handle the `voxel_size` argument to the `VolumeLocator` constructor.
+        """
+        err_msg = (
+            "voxel_size has to be either a 3-tuple of scalars, or a scalar, or"
+            " a torch.Tensor of shape (3,) or (1,) or (minibatch, 3) or (minibatch, 1)."
+        )
+        if isinstance(voxel_size, (float, int)):
+            # convert a scalar to a 3-element tensor
+            voxel_size = torch.full(
+                (1, 3), voxel_size, device=self.device, dtype=torch.float32
+            )
+        elif isinstance(voxel_size, torch.Tensor):
+            if voxel_size.numel() == 1:
+                # convert a single-element tensor to a 3-element one
+                voxel_size = voxel_size.view(-1).repeat(3)
+            elif len(voxel_size.shape) == 2 and (
+                voxel_size.shape[0] == batch_size and voxel_size.shape[1] == 1
+            ):
+                voxel_size = voxel_size.repeat(1, 3)
+        return self._convert_volume_property_to_tensor(voxel_size, batch_size, err_msg)
+
+    def _handle_volume_translation(
+        self, translation: _Translation, batch_size: int
+    ) -> torch.Tensor:
+        """
+        Handle the `volume_translation` argument to the `VolumeLocator` constructor.
+        """
+        err_msg = (
+            "`volume_translation` has to be either a 3-tuple of scalars, or"
+            " a Tensor of shape (1,3) or (minibatch, 3) or (3,)`."
+        )
+        return self._convert_volume_property_to_tensor(translation, batch_size, err_msg)
+
+    def __len__(self) -> int:
+        return self._batch_size
+
+    def _convert_volume_property_to_tensor(
+        self, x: _Vector, batch_size: int, err_msg: str
+    ) -> torch.Tensor:
+        """
+        Handle the `volume_translation` or `voxel_size` argument to
+        the VolumeLocator constructor.
+        Return a tensor of shape (N, 3) where N is the batch_size.
+        """
+        if isinstance(x, (list, tuple)):
+            if len(x) != 3:
+                raise ValueError(err_msg)
+            x = torch.tensor(x, device=self.device, dtype=torch.float32)[None]
+            x = x.repeat((batch_size, 1))
+        elif isinstance(x, torch.Tensor):
+            ok = (
+                (x.shape[0] == 1 and x.shape[1] == 3)
+                or (x.shape[0] == 3 and len(x.shape) == 1)
+                or (x.shape[0] == batch_size and x.shape[1] == 3)
+            )
+            if not ok:
+                raise ValueError(err_msg)
+            if x.device != self.device:
+                x = x.to(self.device)
+            if x.shape[0] == 3 and len(x.shape) == 1:
+                x = x[None]
+            if x.shape[0] == 1:
+                x = x.repeat((batch_size, 1))
+        else:
+            raise ValueError(err_msg)
+
+        return x
+
+    def to(self, device: Device, copy: bool = False) -> "VolumeLocator":
+        """
+        Match the functionality of torch.Tensor.to()
+        If copy = True or the self Tensor is on a different device, the
+        returned tensor is a copy of self with the desired torch.device.
+        If copy = False and the self Tensor already has the correct torch.device,
+        then self is returned.
+
+        Args:
+            device: Device (as str or torch.device) for the new tensor.
+            copy: Boolean indicator whether or not to clone self. Default False.
+
+        Returns:
+            VolumeLocator object.
+        """
+        device_ = make_device(device)
+        if not copy and self.device == device_:
+            return self
+
+        other = self.clone()
+        if self.device == device_:
+            return other
+
+        other.device = device_
+        other._grid_sizes = self._grid_sizes.to(device_)
+        other._local_to_world_transform = self.get_local_to_world_coords_transform().to(
+            device
+        )
+        return other
+
+    def clone(self) -> "VolumeLocator":
+        """
+        Deep copy of VoluVolumeLocatormes object. All internal tensors are cloned
+        individually.
+
+        Returns:
+            new VolumeLocator object.
+        """
+        return copy.deepcopy(self)
+
+    def cpu(self) -> "VolumeLocator":
+        return self.to("cpu")
+
+    def cuda(self) -> "VolumeLocator":
+        return self.to("cuda")
diff --git a/pytorch3d/pytorch3d/transforms/__init__.py b/pytorch3d/pytorch3d/transforms/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..9d8ee713452e2a5ebd95c11e8c4db036d219b598
--- /dev/null
+++ b/pytorch3d/pytorch3d/transforms/__init__.py
@@ -0,0 +1,39 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from .math import acos_linear_extrapolation
+from .rotation_conversions import (
+    axis_angle_to_matrix,
+    axis_angle_to_quaternion,
+    euler_angles_to_matrix,
+    matrix_to_axis_angle,
+    matrix_to_euler_angles,
+    matrix_to_quaternion,
+    matrix_to_rotation_6d,
+    quaternion_apply,
+    quaternion_invert,
+    quaternion_multiply,
+    quaternion_raw_multiply,
+    quaternion_to_axis_angle,
+    quaternion_to_matrix,
+    random_quaternions,
+    random_rotation,
+    random_rotations,
+    rotation_6d_to_matrix,
+    standardize_quaternion,
+)
+from .se3 import se3_exp_map, se3_log_map
+from .so3 import (
+    so3_exp_map,
+    so3_exponential_map,
+    so3_log_map,
+    so3_relative_angle,
+    so3_rotation_angle,
+)
+from .transform3d import Rotate, RotateAxisAngle, Scale, Transform3d, Translate
+
+
+__all__ = [k for k in globals().keys() if not k.startswith("_")]
diff --git a/pytorch3d/pytorch3d/transforms/math.py b/pytorch3d/pytorch3d/transforms/math.py
new file mode 100644
index 0000000000000000000000000000000000000000..e2b93c9337996751569106795ba4785c99d1051c
--- /dev/null
+++ b/pytorch3d/pytorch3d/transforms/math.py
@@ -0,0 +1,85 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import math
+from typing import Tuple
+
+import torch
+
+
+DEFAULT_ACOS_BOUND: float = 1.0 - 1e-4
+
+
+def acos_linear_extrapolation(
+    x: torch.Tensor,
+    bounds: Tuple[float, float] = (-DEFAULT_ACOS_BOUND, DEFAULT_ACOS_BOUND),
+) -> torch.Tensor:
+    """
+    Implements `arccos(x)` which is linearly extrapolated outside `x`'s original
+    domain of `(-1, 1)`. This allows for stable backpropagation in case `x`
+    is not guaranteed to be strictly within `(-1, 1)`.
+
+    More specifically::
+
+        bounds=(lower_bound, upper_bound)
+        if lower_bound <= x <= upper_bound:
+            acos_linear_extrapolation(x) = acos(x)
+        elif x <= lower_bound: # 1st order Taylor approximation
+            acos_linear_extrapolation(x)
+                = acos(lower_bound) + dacos/dx(lower_bound) * (x - lower_bound)
+        else:  # x >= upper_bound
+            acos_linear_extrapolation(x)
+                = acos(upper_bound) + dacos/dx(upper_bound) * (x - upper_bound)
+
+    Args:
+        x: Input `Tensor`.
+        bounds: A float 2-tuple defining the region for the
+            linear extrapolation of `acos`.
+            The first/second element of `bound`
+            describes the lower/upper bound that defines the lower/upper
+            extrapolation region, i.e. the region where
+            `x <= bound[0]`/`bound[1] <= x`.
+            Note that all elements of `bound` have to be within (-1, 1).
+    Returns:
+        acos_linear_extrapolation: `Tensor` containing the extrapolated `arccos(x)`.
+    """
+
+    lower_bound, upper_bound = bounds
+
+    if lower_bound > upper_bound:
+        raise ValueError("lower bound has to be smaller or equal to upper bound.")
+
+    if lower_bound <= -1.0 or upper_bound >= 1.0:
+        raise ValueError("Both lower bound and upper bound have to be within (-1, 1).")
+
+    # init an empty tensor and define the domain sets
+    acos_extrap = torch.empty_like(x)
+    x_upper = x >= upper_bound
+    x_lower = x <= lower_bound
+    x_mid = (~x_upper) & (~x_lower)
+
+    # acos calculation for upper_bound < x < lower_bound
+    acos_extrap[x_mid] = torch.acos(x[x_mid])
+    # the linear extrapolation for x >= upper_bound
+    acos_extrap[x_upper] = _acos_linear_approximation(x[x_upper], upper_bound)
+    # the linear extrapolation for x <= lower_bound
+    acos_extrap[x_lower] = _acos_linear_approximation(x[x_lower], lower_bound)
+
+    return acos_extrap
+
+
+def _acos_linear_approximation(x: torch.Tensor, x0: float) -> torch.Tensor:
+    """
+    Calculates the 1st order Taylor expansion of `arccos(x)` around `x0`.
+    """
+    return (x - x0) * _dacos_dx(x0) + math.acos(x0)
+
+
+def _dacos_dx(x: float) -> float:
+    """
+    Calculates the derivative of `arccos(x)` w.r.t. `x`.
+    """
+    return (-1.0) / math.sqrt(1.0 - x * x)
diff --git a/pytorch3d/pytorch3d/transforms/rotation_conversions.py b/pytorch3d/pytorch3d/transforms/rotation_conversions.py
new file mode 100644
index 0000000000000000000000000000000000000000..459441ca184ff484e252b2b4e4fc86b9b24d4c0e
--- /dev/null
+++ b/pytorch3d/pytorch3d/transforms/rotation_conversions.py
@@ -0,0 +1,596 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Optional
+
+import torch
+import torch.nn.functional as F
+
+from ..common.datatypes import Device
+
+
+"""
+The transformation matrices returned from the functions in this file assume
+the points on which the transformation will be applied are column vectors.
+i.e. the R matrix is structured as
+
+    R = [
+            [Rxx, Rxy, Rxz],
+            [Ryx, Ryy, Ryz],
+            [Rzx, Rzy, Rzz],
+        ]  # (3, 3)
+
+This matrix can be applied to column vectors by post multiplication
+by the points e.g.
+
+    points = [[0], [1], [2]]  # (3 x 1) xyz coordinates of a point
+    transformed_points = R * points
+
+To apply the same matrix to points which are row vectors, the R matrix
+can be transposed and pre multiplied by the points:
+
+e.g.
+    points = [[0, 1, 2]]  # (1 x 3) xyz coordinates of a point
+    transformed_points = points * R.transpose(1, 0)
+"""
+
+
+def quaternion_to_matrix(quaternions: torch.Tensor) -> torch.Tensor:
+    """
+    Convert rotations given as quaternions to rotation matrices.
+
+    Args:
+        quaternions: quaternions with real part first,
+            as tensor of shape (..., 4).
+
+    Returns:
+        Rotation matrices as tensor of shape (..., 3, 3).
+    """
+    r, i, j, k = torch.unbind(quaternions, -1)
+    # pyre-fixme[58]: `/` is not supported for operand types `float` and `Tensor`.
+    two_s = 2.0 / (quaternions * quaternions).sum(-1)
+
+    o = torch.stack(
+        (
+            1 - two_s * (j * j + k * k),
+            two_s * (i * j - k * r),
+            two_s * (i * k + j * r),
+            two_s * (i * j + k * r),
+            1 - two_s * (i * i + k * k),
+            two_s * (j * k - i * r),
+            two_s * (i * k - j * r),
+            two_s * (j * k + i * r),
+            1 - two_s * (i * i + j * j),
+        ),
+        -1,
+    )
+    return o.reshape(quaternions.shape[:-1] + (3, 3))
+
+
+def _copysign(a: torch.Tensor, b: torch.Tensor) -> torch.Tensor:
+    """
+    Return a tensor where each element has the absolute value taken from the,
+    corresponding element of a, with sign taken from the corresponding
+    element of b. This is like the standard copysign floating-point operation,
+    but is not careful about negative 0 and NaN.
+
+    Args:
+        a: source tensor.
+        b: tensor whose signs will be used, of the same shape as a.
+
+    Returns:
+        Tensor of the same shape as a with the signs of b.
+    """
+    signs_differ = (a < 0) != (b < 0)
+    return torch.where(signs_differ, -a, a)
+
+
+def _sqrt_positive_part(x: torch.Tensor) -> torch.Tensor:
+    """
+    Returns torch.sqrt(torch.max(0, x))
+    but with a zero subgradient where x is 0.
+    """
+    ret = torch.zeros_like(x)
+    positive_mask = x > 0
+    ret[positive_mask] = torch.sqrt(x[positive_mask])
+    return ret
+
+
+def matrix_to_quaternion(matrix: torch.Tensor) -> torch.Tensor:
+    """
+    Convert rotations given as rotation matrices to quaternions.
+
+    Args:
+        matrix: Rotation matrices as tensor of shape (..., 3, 3).
+
+    Returns:
+        quaternions with real part first, as tensor of shape (..., 4).
+    """
+    if matrix.size(-1) != 3 or matrix.size(-2) != 3:
+        raise ValueError(f"Invalid rotation matrix shape {matrix.shape}.")
+
+    batch_dim = matrix.shape[:-2]
+    m00, m01, m02, m10, m11, m12, m20, m21, m22 = torch.unbind(
+        matrix.reshape(batch_dim + (9,)), dim=-1
+    )
+
+    q_abs = _sqrt_positive_part(
+        torch.stack(
+            [
+                1.0 + m00 + m11 + m22,
+                1.0 + m00 - m11 - m22,
+                1.0 - m00 + m11 - m22,
+                1.0 - m00 - m11 + m22,
+            ],
+            dim=-1,
+        )
+    )
+
+    # we produce the desired quaternion multiplied by each of r, i, j, k
+    quat_by_rijk = torch.stack(
+        [
+            # pyre-fixme[58]: `**` is not supported for operand types `Tensor` and
+            #  `int`.
+            torch.stack([q_abs[..., 0] ** 2, m21 - m12, m02 - m20, m10 - m01], dim=-1),
+            # pyre-fixme[58]: `**` is not supported for operand types `Tensor` and
+            #  `int`.
+            torch.stack([m21 - m12, q_abs[..., 1] ** 2, m10 + m01, m02 + m20], dim=-1),
+            # pyre-fixme[58]: `**` is not supported for operand types `Tensor` and
+            #  `int`.
+            torch.stack([m02 - m20, m10 + m01, q_abs[..., 2] ** 2, m12 + m21], dim=-1),
+            # pyre-fixme[58]: `**` is not supported for operand types `Tensor` and
+            #  `int`.
+            torch.stack([m10 - m01, m20 + m02, m21 + m12, q_abs[..., 3] ** 2], dim=-1),
+        ],
+        dim=-2,
+    )
+
+    # We floor here at 0.1 but the exact level is not important; if q_abs is small,
+    # the candidate won't be picked.
+    flr = torch.tensor(0.1).to(dtype=q_abs.dtype, device=q_abs.device)
+    quat_candidates = quat_by_rijk / (2.0 * q_abs[..., None].max(flr))
+
+    # if not for numerical problems, quat_candidates[i] should be same (up to a sign),
+    # forall i; we pick the best-conditioned one (with the largest denominator)
+    out = quat_candidates[
+        F.one_hot(q_abs.argmax(dim=-1), num_classes=4) > 0.5, :
+    ].reshape(batch_dim + (4,))
+    return standardize_quaternion(out)
+
+
+def _axis_angle_rotation(axis: str, angle: torch.Tensor) -> torch.Tensor:
+    """
+    Return the rotation matrices for one of the rotations about an axis
+    of which Euler angles describe, for each value of the angle given.
+
+    Args:
+        axis: Axis label "X" or "Y or "Z".
+        angle: any shape tensor of Euler angles in radians
+
+    Returns:
+        Rotation matrices as tensor of shape (..., 3, 3).
+    """
+
+    cos = torch.cos(angle)
+    sin = torch.sin(angle)
+    one = torch.ones_like(angle)
+    zero = torch.zeros_like(angle)
+
+    if axis == "X":
+        R_flat = (one, zero, zero, zero, cos, -sin, zero, sin, cos)
+    elif axis == "Y":
+        R_flat = (cos, zero, sin, zero, one, zero, -sin, zero, cos)
+    elif axis == "Z":
+        R_flat = (cos, -sin, zero, sin, cos, zero, zero, zero, one)
+    else:
+        raise ValueError("letter must be either X, Y or Z.")
+
+    return torch.stack(R_flat, -1).reshape(angle.shape + (3, 3))
+
+
+def euler_angles_to_matrix(euler_angles: torch.Tensor, convention: str) -> torch.Tensor:
+    """
+    Convert rotations given as Euler angles in radians to rotation matrices.
+
+    Args:
+        euler_angles: Euler angles in radians as tensor of shape (..., 3).
+        convention: Convention string of three uppercase letters from
+            {"X", "Y", and "Z"}.
+
+    Returns:
+        Rotation matrices as tensor of shape (..., 3, 3).
+    """
+    if euler_angles.dim() == 0 or euler_angles.shape[-1] != 3:
+        raise ValueError("Invalid input euler angles.")
+    if len(convention) != 3:
+        raise ValueError("Convention must have 3 letters.")
+    if convention[1] in (convention[0], convention[2]):
+        raise ValueError(f"Invalid convention {convention}.")
+    for letter in convention:
+        if letter not in ("X", "Y", "Z"):
+            raise ValueError(f"Invalid letter {letter} in convention string.")
+    matrices = [
+        _axis_angle_rotation(c, e)
+        for c, e in zip(convention, torch.unbind(euler_angles, -1))
+    ]
+    # return functools.reduce(torch.matmul, matrices)
+    return torch.matmul(torch.matmul(matrices[0], matrices[1]), matrices[2])
+
+
+def _angle_from_tan(
+    axis: str, other_axis: str, data, horizontal: bool, tait_bryan: bool
+) -> torch.Tensor:
+    """
+    Extract the first or third Euler angle from the two members of
+    the matrix which are positive constant times its sine and cosine.
+
+    Args:
+        axis: Axis label "X" or "Y or "Z" for the angle we are finding.
+        other_axis: Axis label "X" or "Y or "Z" for the middle axis in the
+            convention.
+        data: Rotation matrices as tensor of shape (..., 3, 3).
+        horizontal: Whether we are looking for the angle for the third axis,
+            which means the relevant entries are in the same row of the
+            rotation matrix. If not, they are in the same column.
+        tait_bryan: Whether the first and third axes in the convention differ.
+
+    Returns:
+        Euler Angles in radians for each matrix in data as a tensor
+        of shape (...).
+    """
+
+    i1, i2 = {"X": (2, 1), "Y": (0, 2), "Z": (1, 0)}[axis]
+    if horizontal:
+        i2, i1 = i1, i2
+    even = (axis + other_axis) in ["XY", "YZ", "ZX"]
+    if horizontal == even:
+        return torch.atan2(data[..., i1], data[..., i2])
+    if tait_bryan:
+        return torch.atan2(-data[..., i2], data[..., i1])
+    return torch.atan2(data[..., i2], -data[..., i1])
+
+
+def _index_from_letter(letter: str) -> int:
+    if letter == "X":
+        return 0
+    if letter == "Y":
+        return 1
+    if letter == "Z":
+        return 2
+    raise ValueError("letter must be either X, Y or Z.")
+
+
+def matrix_to_euler_angles(matrix: torch.Tensor, convention: str) -> torch.Tensor:
+    """
+    Convert rotations given as rotation matrices to Euler angles in radians.
+
+    Args:
+        matrix: Rotation matrices as tensor of shape (..., 3, 3).
+        convention: Convention string of three uppercase letters.
+
+    Returns:
+        Euler angles in radians as tensor of shape (..., 3).
+    """
+    if len(convention) != 3:
+        raise ValueError("Convention must have 3 letters.")
+    if convention[1] in (convention[0], convention[2]):
+        raise ValueError(f"Invalid convention {convention}.")
+    for letter in convention:
+        if letter not in ("X", "Y", "Z"):
+            raise ValueError(f"Invalid letter {letter} in convention string.")
+    if matrix.size(-1) != 3 or matrix.size(-2) != 3:
+        raise ValueError(f"Invalid rotation matrix shape {matrix.shape}.")
+    i0 = _index_from_letter(convention[0])
+    i2 = _index_from_letter(convention[2])
+    tait_bryan = i0 != i2
+    if tait_bryan:
+        central_angle = torch.asin(
+            matrix[..., i0, i2] * (-1.0 if i0 - i2 in [-1, 2] else 1.0)
+        )
+    else:
+        central_angle = torch.acos(matrix[..., i0, i0])
+
+    o = (
+        _angle_from_tan(
+            convention[0], convention[1], matrix[..., i2], False, tait_bryan
+        ),
+        central_angle,
+        _angle_from_tan(
+            convention[2], convention[1], matrix[..., i0, :], True, tait_bryan
+        ),
+    )
+    return torch.stack(o, -1)
+
+
+def random_quaternions(
+    n: int, dtype: Optional[torch.dtype] = None, device: Optional[Device] = None
+) -> torch.Tensor:
+    """
+    Generate random quaternions representing rotations,
+    i.e. versors with nonnegative real part.
+
+    Args:
+        n: Number of quaternions in a batch to return.
+        dtype: Type to return.
+        device: Desired device of returned tensor. Default:
+            uses the current device for the default tensor type.
+
+    Returns:
+        Quaternions as tensor of shape (N, 4).
+    """
+    if isinstance(device, str):
+        device = torch.device(device)
+    o = torch.randn((n, 4), dtype=dtype, device=device)
+    s = (o * o).sum(1)
+    o = o / _copysign(torch.sqrt(s), o[:, 0])[:, None]
+    return o
+
+
+def random_rotations(
+    n: int, dtype: Optional[torch.dtype] = None, device: Optional[Device] = None
+) -> torch.Tensor:
+    """
+    Generate random rotations as 3x3 rotation matrices.
+
+    Args:
+        n: Number of rotation matrices in a batch to return.
+        dtype: Type to return.
+        device: Device of returned tensor. Default: if None,
+            uses the current device for the default tensor type.
+
+    Returns:
+        Rotation matrices as tensor of shape (n, 3, 3).
+    """
+    quaternions = random_quaternions(n, dtype=dtype, device=device)
+    return quaternion_to_matrix(quaternions)
+
+
+def random_rotation(
+    dtype: Optional[torch.dtype] = None, device: Optional[Device] = None
+) -> torch.Tensor:
+    """
+    Generate a single random 3x3 rotation matrix.
+
+    Args:
+        dtype: Type to return
+        device: Device of returned tensor. Default: if None,
+            uses the current device for the default tensor type
+
+    Returns:
+        Rotation matrix as tensor of shape (3, 3).
+    """
+    return random_rotations(1, dtype, device)[0]
+
+
+def standardize_quaternion(quaternions: torch.Tensor) -> torch.Tensor:
+    """
+    Convert a unit quaternion to a standard form: one in which the real
+    part is non negative.
+
+    Args:
+        quaternions: Quaternions with real part first,
+            as tensor of shape (..., 4).
+
+    Returns:
+        Standardized quaternions as tensor of shape (..., 4).
+    """
+    return torch.where(quaternions[..., 0:1] < 0, -quaternions, quaternions)
+
+
+def quaternion_raw_multiply(a: torch.Tensor, b: torch.Tensor) -> torch.Tensor:
+    """
+    Multiply two quaternions.
+    Usual torch rules for broadcasting apply.
+
+    Args:
+        a: Quaternions as tensor of shape (..., 4), real part first.
+        b: Quaternions as tensor of shape (..., 4), real part first.
+
+    Returns:
+        The product of a and b, a tensor of quaternions shape (..., 4).
+    """
+    aw, ax, ay, az = torch.unbind(a, -1)
+    bw, bx, by, bz = torch.unbind(b, -1)
+    ow = aw * bw - ax * bx - ay * by - az * bz
+    ox = aw * bx + ax * bw + ay * bz - az * by
+    oy = aw * by - ax * bz + ay * bw + az * bx
+    oz = aw * bz + ax * by - ay * bx + az * bw
+    return torch.stack((ow, ox, oy, oz), -1)
+
+
+def quaternion_multiply(a: torch.Tensor, b: torch.Tensor) -> torch.Tensor:
+    """
+    Multiply two quaternions representing rotations, returning the quaternion
+    representing their composition, i.e. the versor with nonnegative real part.
+    Usual torch rules for broadcasting apply.
+
+    Args:
+        a: Quaternions as tensor of shape (..., 4), real part first.
+        b: Quaternions as tensor of shape (..., 4), real part first.
+
+    Returns:
+        The product of a and b, a tensor of quaternions of shape (..., 4).
+    """
+    ab = quaternion_raw_multiply(a, b)
+    return standardize_quaternion(ab)
+
+
+def quaternion_invert(quaternion: torch.Tensor) -> torch.Tensor:
+    """
+    Given a quaternion representing rotation, get the quaternion representing
+    its inverse.
+
+    Args:
+        quaternion: Quaternions as tensor of shape (..., 4), with real part
+            first, which must be versors (unit quaternions).
+
+    Returns:
+        The inverse, a tensor of quaternions of shape (..., 4).
+    """
+
+    scaling = torch.tensor([1, -1, -1, -1], device=quaternion.device)
+    return quaternion * scaling
+
+
+def quaternion_apply(quaternion: torch.Tensor, point: torch.Tensor) -> torch.Tensor:
+    """
+    Apply the rotation given by a quaternion to a 3D point.
+    Usual torch rules for broadcasting apply.
+
+    Args:
+        quaternion: Tensor of quaternions, real part first, of shape (..., 4).
+        point: Tensor of 3D points of shape (..., 3).
+
+    Returns:
+        Tensor of rotated points of shape (..., 3).
+    """
+    if point.size(-1) != 3:
+        raise ValueError(f"Points are not in 3D, {point.shape}.")
+    real_parts = point.new_zeros(point.shape[:-1] + (1,))
+    point_as_quaternion = torch.cat((real_parts, point), -1)
+    out = quaternion_raw_multiply(
+        quaternion_raw_multiply(quaternion, point_as_quaternion),
+        quaternion_invert(quaternion),
+    )
+    return out[..., 1:]
+
+
+def axis_angle_to_matrix(axis_angle: torch.Tensor) -> torch.Tensor:
+    """
+    Convert rotations given as axis/angle to rotation matrices.
+
+    Args:
+        axis_angle: Rotations given as a vector in axis angle form,
+            as a tensor of shape (..., 3), where the magnitude is
+            the angle turned anticlockwise in radians around the
+            vector's direction.
+
+    Returns:
+        Rotation matrices as tensor of shape (..., 3, 3).
+    """
+    return quaternion_to_matrix(axis_angle_to_quaternion(axis_angle))
+
+
+def matrix_to_axis_angle(matrix: torch.Tensor) -> torch.Tensor:
+    """
+    Convert rotations given as rotation matrices to axis/angle.
+
+    Args:
+        matrix: Rotation matrices as tensor of shape (..., 3, 3).
+
+    Returns:
+        Rotations given as a vector in axis angle form, as a tensor
+            of shape (..., 3), where the magnitude is the angle
+            turned anticlockwise in radians around the vector's
+            direction.
+    """
+    return quaternion_to_axis_angle(matrix_to_quaternion(matrix))
+
+
+def axis_angle_to_quaternion(axis_angle: torch.Tensor) -> torch.Tensor:
+    """
+    Convert rotations given as axis/angle to quaternions.
+
+    Args:
+        axis_angle: Rotations given as a vector in axis angle form,
+            as a tensor of shape (..., 3), where the magnitude is
+            the angle turned anticlockwise in radians around the
+            vector's direction.
+
+    Returns:
+        quaternions with real part first, as tensor of shape (..., 4).
+    """
+    angles = torch.norm(axis_angle, p=2, dim=-1, keepdim=True)
+    half_angles = angles * 0.5
+    eps = 1e-6
+    small_angles = angles.abs() < eps
+    sin_half_angles_over_angles = torch.empty_like(angles)
+    sin_half_angles_over_angles[~small_angles] = (
+        torch.sin(half_angles[~small_angles]) / angles[~small_angles]
+    )
+    # for x small, sin(x/2) is about x/2 - (x/2)^3/6
+    # so sin(x/2)/x is about 1/2 - (x*x)/48
+    sin_half_angles_over_angles[small_angles] = (
+        0.5 - (angles[small_angles] * angles[small_angles]) / 48
+    )
+    quaternions = torch.cat(
+        [torch.cos(half_angles), axis_angle * sin_half_angles_over_angles], dim=-1
+    )
+    return quaternions
+
+
+def quaternion_to_axis_angle(quaternions: torch.Tensor) -> torch.Tensor:
+    """
+    Convert rotations given as quaternions to axis/angle.
+
+    Args:
+        quaternions: quaternions with real part first,
+            as tensor of shape (..., 4).
+
+    Returns:
+        Rotations given as a vector in axis angle form, as a tensor
+            of shape (..., 3), where the magnitude is the angle
+            turned anticlockwise in radians around the vector's
+            direction.
+    """
+    norms = torch.norm(quaternions[..., 1:], p=2, dim=-1, keepdim=True)
+    half_angles = torch.atan2(norms, quaternions[..., :1])
+    angles = 2 * half_angles
+    eps = 1e-6
+    small_angles = angles.abs() < eps
+    sin_half_angles_over_angles = torch.empty_like(angles)
+    sin_half_angles_over_angles[~small_angles] = (
+        torch.sin(half_angles[~small_angles]) / angles[~small_angles]
+    )
+    # for x small, sin(x/2) is about x/2 - (x/2)^3/6
+    # so sin(x/2)/x is about 1/2 - (x*x)/48
+    sin_half_angles_over_angles[small_angles] = (
+        0.5 - (angles[small_angles] * angles[small_angles]) / 48
+    )
+    return quaternions[..., 1:] / sin_half_angles_over_angles
+
+
+def rotation_6d_to_matrix(d6: torch.Tensor) -> torch.Tensor:
+    """
+    Converts 6D rotation representation by Zhou et al. [1] to rotation matrix
+    using Gram--Schmidt orthogonalization per Section B of [1].
+    Args:
+        d6: 6D rotation representation, of size (*, 6)
+
+    Returns:
+        batch of rotation matrices of size (*, 3, 3)
+
+    [1] Zhou, Y., Barnes, C., Lu, J., Yang, J., & Li, H.
+    On the Continuity of Rotation Representations in Neural Networks.
+    IEEE Conference on Computer Vision and Pattern Recognition, 2019.
+    Retrieved from http://arxiv.org/abs/1812.07035
+    """
+
+    a1, a2 = d6[..., :3], d6[..., 3:]
+    b1 = F.normalize(a1, dim=-1)
+    b2 = a2 - (b1 * a2).sum(-1, keepdim=True) * b1
+    b2 = F.normalize(b2, dim=-1)
+    b3 = torch.cross(b1, b2, dim=-1)
+    return torch.stack((b1, b2, b3), dim=-2)
+
+
+def matrix_to_rotation_6d(matrix: torch.Tensor) -> torch.Tensor:
+    """
+    Converts rotation matrices to 6D rotation representation by Zhou et al. [1]
+    by dropping the last row. Note that 6D representation is not unique.
+    Args:
+        matrix: batch of rotation matrices of size (*, 3, 3)
+
+    Returns:
+        6D rotation representation, of size (*, 6)
+
+    [1] Zhou, Y., Barnes, C., Lu, J., Yang, J., & Li, H.
+    On the Continuity of Rotation Representations in Neural Networks.
+    IEEE Conference on Computer Vision and Pattern Recognition, 2019.
+    Retrieved from http://arxiv.org/abs/1812.07035
+    """
+    batch_dim = matrix.size()[:-2]
+    return matrix[..., :2, :].clone().reshape(batch_dim + (6,))
diff --git a/pytorch3d/pytorch3d/transforms/se3.py b/pytorch3d/pytorch3d/transforms/se3.py
new file mode 100644
index 0000000000000000000000000000000000000000..1c8a5a1b10827914ba48ff0ee8653d7aca93ac3a
--- /dev/null
+++ b/pytorch3d/pytorch3d/transforms/se3.py
@@ -0,0 +1,221 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+
+from .so3 import _so3_exp_map, hat, so3_log_map
+
+
+def se3_exp_map(log_transform: torch.Tensor, eps: float = 1e-4) -> torch.Tensor:
+    """
+    Convert a batch of logarithmic representations of SE(3) matrices `log_transform`
+    to a batch of 4x4 SE(3) matrices using the exponential map.
+    See e.g. [1], Sec 9.4.2. for more detailed description.
+
+    A SE(3) matrix has the following form:
+        ```
+        [ R 0 ]
+        [ T 1 ] ,
+        ```
+    where `R` is a 3x3 rotation matrix and `T` is a 3-D translation vector.
+    SE(3) matrices are commonly used to represent rigid motions or camera extrinsics.
+
+    In the SE(3) logarithmic representation SE(3) matrices are
+    represented as 6-dimensional vectors `[log_translation | log_rotation]`,
+    i.e. a concatenation of two 3D vectors `log_translation` and `log_rotation`.
+
+    The conversion from the 6D representation to a 4x4 SE(3) matrix `transform`
+    is done as follows:
+        ```
+        transform = exp( [ hat(log_rotation) 0 ]
+                         [   log_translation 1 ] ) ,
+        ```
+    where `exp` is the matrix exponential and `hat` is the Hat operator [2].
+
+    Note that for any `log_transform` with `0 <= ||log_rotation|| < 2pi`
+    (i.e. the rotation angle is between 0 and 2pi), the following identity holds:
+    ```
+    se3_log_map(se3_exponential_map(log_transform)) == log_transform
+    ```
+
+    The conversion has a singularity around `||log(transform)|| = 0`
+    which is handled by clamping controlled with the `eps` argument.
+
+    Args:
+        log_transform: Batch of vectors of shape `(minibatch, 6)`.
+        eps: A threshold for clipping the squared norm of the rotation logarithm
+            to avoid unstable gradients in the singular case.
+
+    Returns:
+        Batch of transformation matrices of shape `(minibatch, 4, 4)`.
+
+    Raises:
+        ValueError if `log_transform` is of incorrect shape.
+
+    [1] https://jinyongjeong.github.io/Download/SE3/jlblanco2010geometry3d_techrep.pdf
+    [2] https://en.wikipedia.org/wiki/Hat_operator
+    """
+
+    if log_transform.ndim != 2 or log_transform.shape[1] != 6:
+        raise ValueError("Expected input to be of shape (N, 6).")
+
+    N, _ = log_transform.shape
+
+    log_translation = log_transform[..., :3]
+    log_rotation = log_transform[..., 3:]
+
+    # rotation is an exponential map of log_rotation
+    (
+        R,
+        rotation_angles,
+        log_rotation_hat,
+        log_rotation_hat_square,
+    ) = _so3_exp_map(log_rotation, eps=eps)
+
+    # translation is V @ T
+    V = _se3_V_matrix(
+        log_rotation,
+        log_rotation_hat,
+        log_rotation_hat_square,
+        rotation_angles,
+        eps=eps,
+    )
+    T = torch.bmm(V, log_translation[:, :, None])[:, :, 0]
+
+    transform = torch.zeros(
+        N, 4, 4, dtype=log_transform.dtype, device=log_transform.device
+    )
+
+    transform[:, :3, :3] = R
+    transform[:, :3, 3] = T
+    transform[:, 3, 3] = 1.0
+
+    return transform.permute(0, 2, 1)
+
+
+def se3_log_map(
+    transform: torch.Tensor, eps: float = 1e-4, cos_bound: float = 1e-4
+) -> torch.Tensor:
+    """
+    Convert a batch of 4x4 transformation matrices `transform`
+    to a batch of 6-dimensional SE(3) logarithms of the SE(3) matrices.
+    See e.g. [1], Sec 9.4.2. for more detailed description.
+
+    A SE(3) matrix has the following form:
+        ```
+        [ R 0 ]
+        [ T 1 ] ,
+        ```
+    where `R` is an orthonormal 3x3 rotation matrix and `T` is a 3-D translation vector.
+    SE(3) matrices are commonly used to represent rigid motions or camera extrinsics.
+
+    In the SE(3) logarithmic representation SE(3) matrices are
+    represented as 6-dimensional vectors `[log_translation | log_rotation]`,
+    i.e. a concatenation of two 3D vectors `log_translation` and `log_rotation`.
+
+    The conversion from the 4x4 SE(3) matrix `transform` to the
+    6D representation `log_transform = [log_translation | log_rotation]`
+    is done as follows:
+        ```
+        log_transform = log(transform)
+        log_translation = log_transform[3, :3]
+        log_rotation = inv_hat(log_transform[:3, :3])
+        ```
+    where `log` is the matrix logarithm
+    and `inv_hat` is the inverse of the Hat operator [2].
+
+    Note that for any valid 4x4 `transform` matrix, the following identity holds:
+    ```
+    se3_exp_map(se3_log_map(transform)) == transform
+    ```
+
+    The conversion has a singularity around `(transform=I)` which is handled
+    by clamping controlled with the `eps` and `cos_bound` arguments.
+
+    Args:
+        transform: batch of SE(3) matrices of shape `(minibatch, 4, 4)`.
+        eps: A threshold for clipping the squared norm of the rotation logarithm
+            to avoid division by zero in the singular case.
+        cos_bound: Clamps the cosine of the rotation angle to
+            [-1 + cos_bound, 3 - cos_bound] to avoid non-finite outputs.
+            The non-finite outputs can be caused by passing small rotation angles
+            to the `acos` function in `so3_rotation_angle` of `so3_log_map`.
+
+    Returns:
+        Batch of logarithms of input SE(3) matrices
+        of shape `(minibatch, 6)`.
+
+    Raises:
+        ValueError if `transform` is of incorrect shape.
+        ValueError if `R` has an unexpected trace.
+
+    [1] https://jinyongjeong.github.io/Download/SE3/jlblanco2010geometry3d_techrep.pdf
+    [2] https://en.wikipedia.org/wiki/Hat_operator
+    """
+
+    if transform.ndim != 3:
+        raise ValueError("Input tensor shape has to be (N, 4, 4).")
+
+    N, dim1, dim2 = transform.shape
+    if dim1 != 4 or dim2 != 4:
+        raise ValueError("Input tensor shape has to be (N, 4, 4).")
+
+    if not torch.allclose(transform[:, :3, 3], torch.zeros_like(transform[:, :3, 3])):
+        raise ValueError("All elements of `transform[:, :3, 3]` should be 0.")
+
+    # log_rot is just so3_log_map of the upper left 3x3 block
+    R = transform[:, :3, :3].permute(0, 2, 1)
+    log_rotation = so3_log_map(R, eps=eps, cos_bound=cos_bound)
+
+    # log_translation is V^-1 @ T
+    T = transform[:, 3, :3]
+    V = _se3_V_matrix(*_get_se3_V_input(log_rotation), eps=eps)
+    log_translation = torch.linalg.solve(V, T[:, :, None])[:, :, 0]
+
+    return torch.cat((log_translation, log_rotation), dim=1)
+
+
+def _se3_V_matrix(
+    log_rotation: torch.Tensor,
+    log_rotation_hat: torch.Tensor,
+    log_rotation_hat_square: torch.Tensor,
+    rotation_angles: torch.Tensor,
+    eps: float = 1e-4,
+) -> torch.Tensor:
+    """
+    A helper function that computes the "V" matrix from [1], Sec 9.4.2.
+    [1] https://jinyongjeong.github.io/Download/SE3/jlblanco2010geometry3d_techrep.pdf
+    """
+
+    V = (
+        torch.eye(3, dtype=log_rotation.dtype, device=log_rotation.device)[None]
+        + log_rotation_hat
+        # pyre-fixme[58]: `**` is not supported for operand types `Tensor` and `int`.
+        * ((1 - torch.cos(rotation_angles)) / (rotation_angles**2))[:, None, None]
+        + (
+            log_rotation_hat_square
+            # pyre-fixme[58]: `**` is not supported for operand types `Tensor` and
+            #  `int`.
+            * ((rotation_angles - torch.sin(rotation_angles)) / (rotation_angles**3))[
+                :, None, None
+            ]
+        )
+    )
+
+    return V
+
+
+def _get_se3_V_input(log_rotation: torch.Tensor, eps: float = 1e-4):
+    """
+    A helper function that computes the input variables to the `_se3_V_matrix`
+    function.
+    """
+    # pyre-fixme[58]: `**` is not supported for operand types `Tensor` and `int`.
+    nrms = (log_rotation**2).sum(-1)
+    rotation_angles = torch.clamp(nrms, eps).sqrt()
+    log_rotation_hat = hat(log_rotation)
+    log_rotation_hat_square = torch.bmm(log_rotation_hat, log_rotation_hat)
+    return log_rotation, log_rotation_hat, log_rotation_hat_square, rotation_angles
diff --git a/pytorch3d/pytorch3d/transforms/so3.py b/pytorch3d/pytorch3d/transforms/so3.py
new file mode 100644
index 0000000000000000000000000000000000000000..dea68a904cda120c68cf6da38c35f505d7f5ab96
--- /dev/null
+++ b/pytorch3d/pytorch3d/transforms/so3.py
@@ -0,0 +1,268 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import warnings
+from typing import Tuple
+
+import torch
+from pytorch3d.transforms import rotation_conversions
+
+from ..transforms import acos_linear_extrapolation
+
+
+def so3_relative_angle(
+    R1: torch.Tensor,
+    R2: torch.Tensor,
+    cos_angle: bool = False,
+    cos_bound: float = 1e-4,
+    eps: float = 1e-4,
+) -> torch.Tensor:
+    """
+    Calculates the relative angle (in radians) between pairs of
+    rotation matrices `R1` and `R2` with `angle = acos(0.5 * (Trace(R1 R2^T)-1))`
+
+    .. note::
+        This corresponds to a geodesic distance on the 3D manifold of rotation
+        matrices.
+
+    Args:
+        R1: Batch of rotation matrices of shape `(minibatch, 3, 3)`.
+        R2: Batch of rotation matrices of shape `(minibatch, 3, 3)`.
+        cos_angle: If==True return cosine of the relative angle rather than
+            the angle itself. This can avoid the unstable calculation of `acos`.
+        cos_bound: Clamps the cosine of the relative rotation angle to
+            [-1 + cos_bound, 1 - cos_bound] to avoid non-finite outputs/gradients
+            of the `acos` call. Note that the non-finite outputs/gradients
+            are returned when the angle is requested (i.e. `cos_angle==False`)
+            and the rotation angle is close to 0 or π.
+        eps: Tolerance for the valid trace check of the relative rotation matrix
+            in `so3_rotation_angle`.
+    Returns:
+        Corresponding rotation angles of shape `(minibatch,)`.
+        If `cos_angle==True`, returns the cosine of the angles.
+
+    Raises:
+        ValueError if `R1` or `R2` is of incorrect shape.
+        ValueError if `R1` or `R2` has an unexpected trace.
+    """
+    R12 = torch.bmm(R1, R2.permute(0, 2, 1))
+    return so3_rotation_angle(R12, cos_angle=cos_angle, cos_bound=cos_bound, eps=eps)
+
+
+def so3_rotation_angle(
+    R: torch.Tensor,
+    eps: float = 1e-4,
+    cos_angle: bool = False,
+    cos_bound: float = 1e-4,
+) -> torch.Tensor:
+    """
+    Calculates angles (in radians) of a batch of rotation matrices `R` with
+    `angle = acos(0.5 * (Trace(R)-1))`. The trace of the
+    input matrices is checked to be in the valid range `[-1-eps,3+eps]`.
+    The `eps` argument is a small constant that allows for small errors
+    caused by limited machine precision.
+
+    Args:
+        R: Batch of rotation matrices of shape `(minibatch, 3, 3)`.
+        eps: Tolerance for the valid trace check.
+        cos_angle: If==True return cosine of the rotation angles rather than
+            the angle itself. This can avoid the unstable
+            calculation of `acos`.
+        cos_bound: Clamps the cosine of the rotation angle to
+            [-1 + cos_bound, 1 - cos_bound] to avoid non-finite outputs/gradients
+            of the `acos` call. Note that the non-finite outputs/gradients
+            are returned when the angle is requested (i.e. `cos_angle==False`)
+            and the rotation angle is close to 0 or π.
+
+    Returns:
+        Corresponding rotation angles of shape `(minibatch,)`.
+        If `cos_angle==True`, returns the cosine of the angles.
+
+    Raises:
+        ValueError if `R` is of incorrect shape.
+        ValueError if `R` has an unexpected trace.
+    """
+
+    N, dim1, dim2 = R.shape
+    if dim1 != 3 or dim2 != 3:
+        raise ValueError("Input has to be a batch of 3x3 Tensors.")
+
+    rot_trace = R[:, 0, 0] + R[:, 1, 1] + R[:, 2, 2]
+
+    if ((rot_trace < -1.0 - eps) + (rot_trace > 3.0 + eps)).any():
+        raise ValueError("A matrix has trace outside valid range [-1-eps,3+eps].")
+
+    # phi ... rotation angle
+    phi_cos = (rot_trace - 1.0) * 0.5
+
+    if cos_angle:
+        return phi_cos
+    else:
+        if cos_bound > 0.0:
+            bound = 1.0 - cos_bound
+            return acos_linear_extrapolation(phi_cos, (-bound, bound))
+        else:
+            return torch.acos(phi_cos)
+
+
+def so3_exp_map(log_rot: torch.Tensor, eps: float = 0.0001) -> torch.Tensor:
+    """
+    Convert a batch of logarithmic representations of rotation matrices `log_rot`
+    to a batch of 3x3 rotation matrices using Rodrigues formula [1].
+
+    In the logarithmic representation, each rotation matrix is represented as
+    a 3-dimensional vector (`log_rot`) who's l2-norm and direction correspond
+    to the magnitude of the rotation angle and the axis of rotation respectively.
+
+    The conversion has a singularity around `log(R) = 0`
+    which is handled by clamping controlled with the `eps` argument.
+
+    Args:
+        log_rot: Batch of vectors of shape `(minibatch, 3)`.
+        eps: A float constant handling the conversion singularity.
+
+    Returns:
+        Batch of rotation matrices of shape `(minibatch, 3, 3)`.
+
+    Raises:
+        ValueError if `log_rot` is of incorrect shape.
+
+    [1] https://en.wikipedia.org/wiki/Rodrigues%27_rotation_formula
+    """
+    return _so3_exp_map(log_rot, eps=eps)[0]
+
+
+def so3_exponential_map(log_rot: torch.Tensor, eps: float = 0.0001) -> torch.Tensor:
+    warnings.warn(
+        """so3_exponential_map is deprecated,
+        Use so3_exp_map instead.
+        so3_exponential_map will be removed in future releases.""",
+        PendingDeprecationWarning,
+    )
+
+    return so3_exp_map(log_rot, eps)
+
+
+def _so3_exp_map(
+    log_rot: torch.Tensor, eps: float = 0.0001
+) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
+    """
+    A helper function that computes the so3 exponential map and,
+    apart from the rotation matrix, also returns intermediate variables
+    that can be re-used in other functions.
+    """
+    _, dim = log_rot.shape
+    if dim != 3:
+        raise ValueError("Input tensor shape has to be Nx3.")
+
+    nrms = (log_rot * log_rot).sum(1)
+    # phis ... rotation angles
+    rot_angles = torch.clamp(nrms, eps).sqrt()
+    skews = hat(log_rot)
+    skews_square = torch.bmm(skews, skews)
+
+    R = rotation_conversions.axis_angle_to_matrix(log_rot)
+
+    return R, rot_angles, skews, skews_square
+
+
+def so3_log_map(
+    R: torch.Tensor, eps: float = 0.0001, cos_bound: float = 1e-4
+) -> torch.Tensor:
+    """
+    Convert a batch of 3x3 rotation matrices `R`
+    to a batch of 3-dimensional matrix logarithms of rotation matrices
+    The conversion has a singularity around `(R=I)`.
+
+    Args:
+        R: batch of rotation matrices of shape `(minibatch, 3, 3)`.
+        eps: (unused, for backward compatibility)
+        cos_bound: (unused, for backward compatibility)
+
+    Returns:
+        Batch of logarithms of input rotation matrices
+        of shape `(minibatch, 3)`.
+    """
+
+    N, dim1, dim2 = R.shape
+    if dim1 != 3 or dim2 != 3:
+        raise ValueError("Input has to be a batch of 3x3 Tensors.")
+
+    return rotation_conversions.matrix_to_axis_angle(R)
+
+
+def hat_inv(h: torch.Tensor) -> torch.Tensor:
+    """
+    Compute the inverse Hat operator [1] of a batch of 3x3 matrices.
+
+    Args:
+        h: Batch of skew-symmetric matrices of shape `(minibatch, 3, 3)`.
+
+    Returns:
+        Batch of 3d vectors of shape `(minibatch, 3, 3)`.
+
+    Raises:
+        ValueError if `h` is of incorrect shape.
+        ValueError if `h` not skew-symmetric.
+
+    [1] https://en.wikipedia.org/wiki/Hat_operator
+    """
+
+    N, dim1, dim2 = h.shape
+    if dim1 != 3 or dim2 != 3:
+        raise ValueError("Input has to be a batch of 3x3 Tensors.")
+
+    ss_diff = torch.abs(h + h.permute(0, 2, 1)).max()
+
+    HAT_INV_SKEW_SYMMETRIC_TOL = 1e-5
+    if float(ss_diff) > HAT_INV_SKEW_SYMMETRIC_TOL:
+        raise ValueError("One of input matrices is not skew-symmetric.")
+
+    x = h[:, 2, 1]
+    y = h[:, 0, 2]
+    z = h[:, 1, 0]
+
+    v = torch.stack((x, y, z), dim=1)
+
+    return v
+
+
+def hat(v: torch.Tensor) -> torch.Tensor:
+    """
+    Compute the Hat operator [1] of a batch of 3D vectors.
+
+    Args:
+        v: Batch of vectors of shape `(minibatch , 3)`.
+
+    Returns:
+        Batch of skew-symmetric matrices of shape
+        `(minibatch, 3 , 3)` where each matrix is of the form:
+            `[    0  -v_z   v_y ]
+             [  v_z     0  -v_x ]
+             [ -v_y   v_x     0 ]`
+
+    Raises:
+        ValueError if `v` is of incorrect shape.
+
+    [1] https://en.wikipedia.org/wiki/Hat_operator
+    """
+
+    N, dim = v.shape
+    if dim != 3:
+        raise ValueError("Input vectors have to be 3-dimensional.")
+
+    h = torch.zeros((N, 3, 3), dtype=v.dtype, device=v.device)
+
+    x, y, z = v.unbind(1)
+
+    h[:, 0, 1] = -z
+    h[:, 0, 2] = y
+    h[:, 1, 0] = z
+    h[:, 1, 2] = -x
+    h[:, 2, 0] = -y
+    h[:, 2, 1] = x
+
+    return h
diff --git a/pytorch3d/pytorch3d/transforms/transform3d.py b/pytorch3d/pytorch3d/transforms/transform3d.py
new file mode 100644
index 0000000000000000000000000000000000000000..cbef7cbbdb769f7ad0986e308a93a8561fc94691
--- /dev/null
+++ b/pytorch3d/pytorch3d/transforms/transform3d.py
@@ -0,0 +1,855 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import math
+import os
+import warnings
+from typing import List, Optional, Union
+
+import torch
+
+from ..common.datatypes import Device, get_device, make_device
+from ..common.workaround import _safe_det_3x3
+from .rotation_conversions import _axis_angle_rotation
+from .se3 import se3_log_map
+
+
+class Transform3d:
+    """
+    A Transform3d object encapsulates a batch of N 3D transformations, and knows
+    how to transform points and normal vectors. Suppose that t is a Transform3d;
+    then we can do the following:
+
+    .. code-block:: python
+
+        N = len(t)
+        points = torch.randn(N, P, 3)
+        normals = torch.randn(N, P, 3)
+        points_transformed = t.transform_points(points)    # => (N, P, 3)
+        normals_transformed = t.transform_normals(normals)  # => (N, P, 3)
+
+
+    BROADCASTING
+    Transform3d objects supports broadcasting. Suppose that t1 and tN are
+    Transform3d objects with len(t1) == 1 and len(tN) == N respectively. Then we
+    can broadcast transforms like this:
+
+    .. code-block:: python
+
+        t1.transform_points(torch.randn(P, 3))     # => (P, 3)
+        t1.transform_points(torch.randn(1, P, 3))  # => (1, P, 3)
+        t1.transform_points(torch.randn(M, P, 3))  # => (M, P, 3)
+        tN.transform_points(torch.randn(P, 3))     # => (N, P, 3)
+        tN.transform_points(torch.randn(1, P, 3))  # => (N, P, 3)
+
+
+    COMBINING TRANSFORMS
+    Transform3d objects can be combined in two ways: composing and stacking.
+    Composing is function composition. Given Transform3d objects t1, t2, t3,
+    the following all compute the same thing:
+
+    .. code-block:: python
+
+        y1 = t3.transform_points(t2.transform_points(t1.transform_points(x)))
+        y2 = t1.compose(t2).compose(t3).transform_points(x)
+        y3 = t1.compose(t2, t3).transform_points(x)
+
+
+    Composing transforms should broadcast.
+
+    .. code-block:: python
+
+        if len(t1) == 1 and len(t2) == N, then len(t1.compose(t2)) == N.
+
+    We can also stack a sequence of Transform3d objects, which represents
+    composition along the batch dimension; then the following should compute the
+    same thing.
+
+    .. code-block:: python
+
+        N, M = len(tN), len(tM)
+        xN = torch.randn(N, P, 3)
+        xM = torch.randn(M, P, 3)
+        y1 = torch.cat([tN.transform_points(xN), tM.transform_points(xM)], dim=0)
+        y2 = tN.stack(tM).transform_points(torch.cat([xN, xM], dim=0))
+
+    BUILDING TRANSFORMS
+    We provide convenience methods for easily building Transform3d objects
+    as compositions of basic transforms.
+
+    .. code-block:: python
+
+        # Scale by 0.5, then translate by (1, 2, 3)
+        t1 = Transform3d().scale(0.5).translate(1, 2, 3)
+
+        # Scale each axis by a different amount, then translate, then scale
+        t2 = Transform3d().scale(1, 3, 3).translate(2, 3, 1).scale(2.0)
+
+        t3 = t1.compose(t2)
+        tN = t1.stack(t3, t3)
+
+
+    BACKPROP THROUGH TRANSFORMS
+    When building transforms, we can also parameterize them by Torch tensors;
+    in this case we can backprop through the construction and application of
+    Transform objects, so they could be learned via gradient descent or
+    predicted by a neural network.
+
+    .. code-block:: python
+
+        s1_params = torch.randn(N, requires_grad=True)
+        t_params = torch.randn(N, 3, requires_grad=True)
+        s2_params = torch.randn(N, 3, requires_grad=True)
+
+        t = Transform3d().scale(s1_params).translate(t_params).scale(s2_params)
+        x = torch.randn(N, 3)
+        y = t.transform_points(x)
+        loss = compute_loss(y)
+        loss.backward()
+
+        with torch.no_grad():
+            s1_params -= lr * s1_params.grad
+            t_params -= lr * t_params.grad
+            s2_params -= lr * s2_params.grad
+
+    CONVENTIONS
+    We adopt a right-hand coordinate system, meaning that rotation about an axis
+    with a positive angle results in a counter clockwise rotation.
+
+    This class assumes that transformations are applied on inputs which
+    are row vectors. The internal representation of the Nx4x4 transformation
+    matrix is of the form:
+
+    .. code-block:: python
+
+        M = [
+                [Rxx, Ryx, Rzx, 0],
+                [Rxy, Ryy, Rzy, 0],
+                [Rxz, Ryz, Rzz, 0],
+                [Tx,  Ty,  Tz,  1],
+            ]
+
+    To apply the transformation to points, which are row vectors, the latter are
+    converted to homogeneous (4D) coordinates and right-multiplied by the M matrix:
+
+    .. code-block:: python
+
+        points = [[0, 1, 2]]  # (1 x 3) xyz coordinates of a point
+        [transformed_points, 1] ∝ [points, 1] @ M
+
+    """
+
+    def __init__(
+        self,
+        dtype: torch.dtype = torch.float32,
+        device: Device = "cpu",
+        matrix: Optional[torch.Tensor] = None,
+    ) -> None:
+        """
+        Args:
+            dtype: The data type of the transformation matrix.
+                to be used if `matrix = None`.
+            device: The device for storing the implemented transformation.
+                If `matrix != None`, uses the device of input `matrix`.
+            matrix: A tensor of shape (4, 4) or of shape (minibatch, 4, 4)
+                representing the 4x4 3D transformation matrix.
+                If `None`, initializes with identity using
+                the specified `device` and `dtype`.
+        """
+
+        if matrix is None:
+            self._matrix = torch.eye(4, dtype=dtype, device=device).view(1, 4, 4)
+        else:
+            if matrix.ndim not in (2, 3):
+                raise ValueError('"matrix" has to be a 2- or a 3-dimensional tensor.')
+            if matrix.shape[-2] != 4 or matrix.shape[-1] != 4:
+                raise ValueError(
+                    '"matrix" has to be a tensor of shape (minibatch, 4, 4) or (4, 4).'
+                )
+            # set dtype and device from matrix
+            dtype = matrix.dtype
+            device = matrix.device
+            self._matrix = matrix.view(-1, 4, 4)
+
+        self._transforms = []  # store transforms to compose
+        self._lu = None
+        self.device = make_device(device)
+        self.dtype = dtype
+
+    def __len__(self) -> int:
+        return self.get_matrix().shape[0]
+
+    def __getitem__(
+        self, index: Union[int, List[int], slice, torch.BoolTensor, torch.LongTensor]
+    ) -> "Transform3d":
+        """
+        Args:
+            index: Specifying the index of the transform to retrieve.
+                Can be an int, slice, list of ints, boolean, long tensor.
+                Supports negative indices.
+
+        Returns:
+            Transform3d object with selected transforms. The tensors are not cloned.
+        """
+        if isinstance(index, int):
+            index = [index]
+        return self.__class__(matrix=self.get_matrix()[index])
+
+    def compose(self, *others: "Transform3d") -> "Transform3d":
+        """
+        Return a new Transform3d representing the composition of self with the
+        given other transforms, which will be stored as an internal list.
+
+        Args:
+            *others: Any number of Transform3d objects
+
+        Returns:
+            A new Transform3d with the stored transforms
+        """
+        out = Transform3d(dtype=self.dtype, device=self.device)
+        out._matrix = self._matrix.clone()
+        for other in others:
+            if not isinstance(other, Transform3d):
+                msg = "Only possible to compose Transform3d objects; got %s"
+                raise ValueError(msg % type(other))
+        out._transforms = self._transforms + list(others)
+        return out
+
+    def get_matrix(self) -> torch.Tensor:
+        """
+        Returns a 4×4 matrix corresponding to each transform in the batch.
+
+        If the transform was composed from others, the matrix for the composite
+        transform will be returned.
+        For example, if self.transforms contains transforms t1, t2, and t3, and
+        given a set of points x, the following should be true:
+
+        .. code-block:: python
+
+            y1 = t1.compose(t2, t3).transform(x)
+            y2 = t3.transform(t2.transform(t1.transform(x)))
+            y1.get_matrix() == y2.get_matrix()
+
+        Where necessary, those transforms are broadcast against each other.
+
+        Returns:
+            A (N, 4, 4) batch of transformation matrices representing
+                the stored transforms. See the class documentation for the conventions.
+        """
+        composed_matrix = self._matrix.clone()
+        if len(self._transforms) > 0:
+            for other in self._transforms:
+                other_matrix = other.get_matrix()
+                composed_matrix = _broadcast_bmm(composed_matrix, other_matrix)
+        return composed_matrix
+
+    def get_se3_log(self, eps: float = 1e-4, cos_bound: float = 1e-4) -> torch.Tensor:
+        """
+        Returns a 6D SE(3) log vector corresponding to each transform in the batch.
+
+        In the SE(3) logarithmic representation SE(3) matrices are
+        represented as 6-dimensional vectors `[log_translation | log_rotation]`,
+        i.e. a concatenation of two 3D vectors `log_translation` and `log_rotation`.
+
+        The conversion from the 4x4 SE(3) matrix `transform` to the
+        6D representation `log_transform = [log_translation | log_rotation]`
+        is done as follows::
+
+            log_transform = log(transform.get_matrix())
+            log_translation = log_transform[3, :3]
+            log_rotation = inv_hat(log_transform[:3, :3])
+
+        where `log` is the matrix logarithm
+        and `inv_hat` is the inverse of the Hat operator [2].
+
+        See the docstring for `se3.se3_log_map` and [1], Sec 9.4.2. for more
+        detailed description.
+
+        Args:
+            eps: A threshold for clipping the squared norm of the rotation logarithm
+                to avoid division by zero in the singular case.
+            cos_bound: Clamps the cosine of the rotation angle to
+                [-1 + cos_bound, 3 - cos_bound] to avoid non-finite outputs.
+                The non-finite outputs can be caused by passing small rotation angles
+                to the `acos` function in `so3_rotation_angle` of `so3_log_map`.
+
+        Returns:
+            A (N, 6) tensor, rows of which represent the individual transforms
+            stored in the object as SE(3) logarithms.
+
+        Raises:
+            ValueError if the stored transform is not Euclidean (e.g. R is not a rotation
+                matrix or the last column has non-zeros in the first three places).
+
+        [1] https://jinyongjeong.github.io/Download/SE3/jlblanco2010geometry3d_techrep.pdf
+        [2] https://en.wikipedia.org/wiki/Hat_operator
+        """
+        return se3_log_map(self.get_matrix(), eps, cos_bound)
+
+    def _get_matrix_inverse(self) -> torch.Tensor:
+        """
+        Return the inverse of self._matrix.
+        """
+        return torch.inverse(self._matrix)
+
+    def inverse(self, invert_composed: bool = False) -> "Transform3d":
+        """
+        Returns a new Transform3d object that represents an inverse of the
+        current transformation.
+
+        Args:
+            invert_composed:
+                - True: First compose the list of stored transformations
+                  and then apply inverse to the result. This is
+                  potentially slower for classes of transformations
+                  with inverses that can be computed efficiently
+                  (e.g. rotations and translations).
+                - False: Invert the individual stored transformations
+                  independently without composing them.
+
+        Returns:
+            A new Transform3d object containing the inverse of the original
+            transformation.
+        """
+
+        tinv = Transform3d(dtype=self.dtype, device=self.device)
+
+        if invert_composed:
+            # first compose then invert
+            tinv._matrix = torch.inverse(self.get_matrix())
+        else:
+            # self._get_matrix_inverse() implements efficient inverse
+            # of self._matrix
+            i_matrix = self._get_matrix_inverse()
+
+            # 2 cases:
+            if len(self._transforms) > 0:
+                # a) Either we have a non-empty list of transforms:
+                # Here we take self._matrix and append its inverse at the
+                # end of the reverted _transforms list. After composing
+                # the transformations with get_matrix(), this correctly
+                # right-multiplies by the inverse of self._matrix
+                # at the end of the composition.
+                tinv._transforms = [t.inverse() for t in reversed(self._transforms)]
+                last = Transform3d(dtype=self.dtype, device=self.device)
+                last._matrix = i_matrix
+                tinv._transforms.append(last)
+            else:
+                # b) Or there are no stored transformations
+                # we just set inverted matrix
+                tinv._matrix = i_matrix
+
+        return tinv
+
+    def stack(self, *others: "Transform3d") -> "Transform3d":
+        """
+        Return a new batched Transform3d representing the batch elements from
+        self and all the given other transforms all batched together.
+
+        Args:
+            *others: Any number of Transform3d objects
+
+        Returns:
+            A new Transform3d.
+        """
+        transforms = [self] + list(others)
+        matrix = torch.cat([t.get_matrix() for t in transforms], dim=0)
+        out = Transform3d(dtype=self.dtype, device=self.device)
+        out._matrix = matrix
+        return out
+
+    def transform_points(self, points, eps: Optional[float] = None) -> torch.Tensor:
+        """
+        Use this transform to transform a set of 3D points. Assumes row major
+        ordering of the input points.
+
+        Args:
+            points: Tensor of shape (P, 3) or (N, P, 3)
+            eps: If eps!=None, the argument is used to clamp the
+                last coordinate before performing the final division.
+                The clamping corresponds to:
+                last_coord := (last_coord.sign() + (last_coord==0)) *
+                torch.clamp(last_coord.abs(), eps),
+                i.e. the last coordinates that are exactly 0 will
+                be clamped to +eps.
+
+        Returns:
+            points_out: points of shape (N, P, 3) or (P, 3) depending
+            on the dimensions of the transform
+        """
+        points_batch = points.clone()
+        if points_batch.dim() == 2:
+            points_batch = points_batch[None]  # (P, 3) -> (1, P, 3)
+        if points_batch.dim() != 3:
+            msg = "Expected points to have dim = 2 or dim = 3: got shape %r"
+            raise ValueError(msg % repr(points.shape))
+
+        N, P, _3 = points_batch.shape
+        ones = torch.ones(N, P, 1, dtype=points.dtype, device=points.device)
+        points_batch = torch.cat([points_batch, ones], dim=2)
+
+        composed_matrix = self.get_matrix()
+        points_out = _broadcast_bmm(points_batch, composed_matrix)
+        denom = points_out[..., 3:]  # denominator
+        if eps is not None:
+            denom_sign = denom.sign() + (denom == 0.0).type_as(denom)
+            denom = denom_sign * torch.clamp(denom.abs(), eps)
+        points_out = points_out[..., :3] / denom
+
+        # When transform is (1, 4, 4) and points is (P, 3) return
+        # points_out of shape (P, 3)
+        if points_out.shape[0] == 1 and points.dim() == 2:
+            points_out = points_out.reshape(points.shape)
+
+        return points_out
+
+    def transform_normals(self, normals) -> torch.Tensor:
+        """
+        Use this transform to transform a set of normal vectors.
+
+        Args:
+            normals: Tensor of shape (P, 3) or (N, P, 3)
+
+        Returns:
+            normals_out: Tensor of shape (P, 3) or (N, P, 3) depending
+            on the dimensions of the transform
+        """
+        if normals.dim() not in [2, 3]:
+            msg = "Expected normals to have dim = 2 or dim = 3: got shape %r"
+            raise ValueError(msg % (normals.shape,))
+        composed_matrix = self.get_matrix()
+
+        # TODO: inverse is bad! Solve a linear system instead
+        mat = composed_matrix[:, :3, :3]
+        normals_out = _broadcast_bmm(normals, mat.transpose(1, 2).inverse())
+
+        # This doesn't pass unit tests. TODO investigate further
+        # if self._lu is None:
+        #     self._lu = self._matrix[:, :3, :3].transpose(1, 2).lu()
+        # normals_out = normals.lu_solve(*self._lu)
+
+        # When transform is (1, 4, 4) and normals is (P, 3) return
+        # normals_out of shape (P, 3)
+        if normals_out.shape[0] == 1 and normals.dim() == 2:
+            normals_out = normals_out.reshape(normals.shape)
+
+        return normals_out
+
+    def translate(self, *args, **kwargs) -> "Transform3d":
+        return self.compose(
+            Translate(*args, device=self.device, dtype=self.dtype, **kwargs)
+        )
+
+    def scale(self, *args, **kwargs) -> "Transform3d":
+        return self.compose(
+            Scale(*args, device=self.device, dtype=self.dtype, **kwargs)
+        )
+
+    def rotate(self, *args, **kwargs) -> "Transform3d":
+        return self.compose(
+            Rotate(*args, device=self.device, dtype=self.dtype, **kwargs)
+        )
+
+    def rotate_axis_angle(self, *args, **kwargs) -> "Transform3d":
+        return self.compose(
+            RotateAxisAngle(*args, device=self.device, dtype=self.dtype, **kwargs)
+        )
+
+    def clone(self) -> "Transform3d":
+        """
+        Deep copy of Transforms object. All internal tensors are cloned
+        individually.
+
+        Returns:
+            new Transforms object.
+        """
+        other = Transform3d(dtype=self.dtype, device=self.device)
+        if self._lu is not None:
+            other._lu = [elem.clone() for elem in self._lu]
+        other._matrix = self._matrix.clone()
+        other._transforms = [t.clone() for t in self._transforms]
+        return other
+
+    def to(
+        self,
+        device: Device,
+        copy: bool = False,
+        dtype: Optional[torch.dtype] = None,
+    ) -> "Transform3d":
+        """
+        Match functionality of torch.Tensor.to()
+        If copy = True or the self Tensor is on a different device, the
+        returned tensor is a copy of self with the desired torch.device.
+        If copy = False and the self Tensor already has the correct torch.device,
+        then self is returned.
+
+        Args:
+          device: Device (as str or torch.device) for the new tensor.
+          copy: Boolean indicator whether or not to clone self. Default False.
+          dtype: If not None, casts the internal tensor variables
+              to a given torch.dtype.
+
+        Returns:
+          Transform3d object.
+        """
+        device_ = make_device(device)
+        dtype_ = self.dtype if dtype is None else dtype
+        skip_to = self.device == device_ and self.dtype == dtype_
+
+        if not copy and skip_to:
+            return self
+
+        other = self.clone()
+
+        if skip_to:
+            return other
+
+        other.device = device_
+        other.dtype = dtype_
+        other._matrix = other._matrix.to(device=device_, dtype=dtype_)
+        other._transforms = [
+            t.to(device_, copy=copy, dtype=dtype_) for t in other._transforms
+        ]
+        return other
+
+    def cpu(self) -> "Transform3d":
+        return self.to("cpu")
+
+    def cuda(self) -> "Transform3d":
+        return self.to("cuda")
+
+
+class Translate(Transform3d):
+    def __init__(
+        self,
+        x,
+        y=None,
+        z=None,
+        dtype: torch.dtype = torch.float32,
+        device: Optional[Device] = None,
+    ) -> None:
+        """
+        Create a new Transform3d representing 3D translations.
+
+        Option I: Translate(xyz, dtype=torch.float32, device='cpu')
+            xyz should be a tensor of shape (N, 3)
+
+        Option II: Translate(x, y, z, dtype=torch.float32, device='cpu')
+            Here x, y, and z will be broadcast against each other and
+            concatenated to form the translation. Each can be:
+                - A python scalar
+                - A torch scalar
+                - A 1D torch tensor
+        """
+        xyz = _handle_input(x, y, z, dtype, device, "Translate")
+        super().__init__(device=xyz.device, dtype=dtype)
+        N = xyz.shape[0]
+
+        mat = torch.eye(4, dtype=dtype, device=self.device)
+        mat = mat.view(1, 4, 4).repeat(N, 1, 1)
+        mat[:, 3, :3] = xyz
+        self._matrix = mat
+
+    def _get_matrix_inverse(self) -> torch.Tensor:
+        """
+        Return the inverse of self._matrix.
+        """
+        inv_mask = self._matrix.new_ones([1, 4, 4])
+        inv_mask[0, 3, :3] = -1.0
+        i_matrix = self._matrix * inv_mask
+        return i_matrix
+
+
+class Scale(Transform3d):
+    def __init__(
+        self,
+        x,
+        y=None,
+        z=None,
+        dtype: torch.dtype = torch.float32,
+        device: Optional[Device] = None,
+    ) -> None:
+        """
+        A Transform3d representing a scaling operation, with different scale
+        factors along each coordinate axis.
+
+        Option I: Scale(s, dtype=torch.float32, device='cpu')
+            s can be one of
+                - Python scalar or torch scalar: Single uniform scale
+                - 1D torch tensor of shape (N,): A batch of uniform scale
+                - 2D torch tensor of shape (N, 3): Scale differently along each axis
+
+        Option II: Scale(x, y, z, dtype=torch.float32, device='cpu')
+            Each of x, y, and z can be one of
+                - python scalar
+                - torch scalar
+                - 1D torch tensor
+        """
+        xyz = _handle_input(x, y, z, dtype, device, "scale", allow_singleton=True)
+        super().__init__(device=xyz.device, dtype=dtype)
+        N = xyz.shape[0]
+
+        # TODO: Can we do this all in one go somehow?
+        mat = torch.eye(4, dtype=dtype, device=self.device)
+        mat = mat.view(1, 4, 4).repeat(N, 1, 1)
+        mat[:, 0, 0] = xyz[:, 0]
+        mat[:, 1, 1] = xyz[:, 1]
+        mat[:, 2, 2] = xyz[:, 2]
+        self._matrix = mat
+
+    def _get_matrix_inverse(self) -> torch.Tensor:
+        """
+        Return the inverse of self._matrix.
+        """
+        xyz = torch.stack([self._matrix[:, i, i] for i in range(4)], dim=1)
+        # pyre-fixme[58]: `/` is not supported for operand types `float` and `Tensor`.
+        ixyz = 1.0 / xyz
+        # pyre-fixme[6]: For 1st param expected `Tensor` but got `float`.
+        imat = torch.diag_embed(ixyz, dim1=1, dim2=2)
+        return imat
+
+
+class Rotate(Transform3d):
+    def __init__(
+        self,
+        R: torch.Tensor,
+        dtype: torch.dtype = torch.float32,
+        device: Optional[Device] = None,
+        orthogonal_tol: float = 1e-5,
+    ) -> None:
+        """
+        Create a new Transform3d representing 3D rotation using a rotation
+        matrix as the input.
+
+        Args:
+            R: a tensor of shape (3, 3) or (N, 3, 3)
+            orthogonal_tol: tolerance for the test of the orthogonality of R
+
+        """
+        device_ = get_device(R, device)
+        super().__init__(device=device_, dtype=dtype)
+        if R.dim() == 2:
+            R = R[None]
+        if R.shape[-2:] != (3, 3):
+            msg = "R must have shape (3, 3) or (N, 3, 3); got %s"
+            raise ValueError(msg % repr(R.shape))
+        R = R.to(device=device_, dtype=dtype)
+        if os.environ.get("PYTORCH3D_CHECK_ROTATION_MATRICES", "0") == "1":
+            # Note: aten::all_close in the check is computationally slow, so we
+            # only run the check when PYTORCH3D_CHECK_ROTATION_MATRICES is on.
+            _check_valid_rotation_matrix(R, tol=orthogonal_tol)
+        N = R.shape[0]
+        mat = torch.eye(4, dtype=dtype, device=device_)
+        mat = mat.view(1, 4, 4).repeat(N, 1, 1)
+        mat[:, :3, :3] = R
+        self._matrix = mat
+
+    def _get_matrix_inverse(self) -> torch.Tensor:
+        """
+        Return the inverse of self._matrix.
+        """
+        return self._matrix.permute(0, 2, 1).contiguous()
+
+
+class RotateAxisAngle(Rotate):
+    def __init__(
+        self,
+        angle,
+        axis: str = "X",
+        degrees: bool = True,
+        dtype: torch.dtype = torch.float32,
+        device: Optional[Device] = None,
+    ) -> None:
+        """
+        Create a new Transform3d representing 3D rotation about an axis
+        by an angle.
+
+        Assuming a right-hand coordinate system, positive rotation angles result
+        in a counter clockwise rotation.
+
+        Args:
+            angle:
+                - A torch tensor of shape (N,)
+                - A python scalar
+                - A torch scalar
+            axis:
+                string: one of ["X", "Y", "Z"] indicating the axis about which
+                to rotate.
+                NOTE: All batch elements are rotated about the same axis.
+        """
+        axis = axis.upper()
+        if axis not in ["X", "Y", "Z"]:
+            msg = "Expected axis to be one of ['X', 'Y', 'Z']; got %s"
+            raise ValueError(msg % axis)
+        angle = _handle_angle_input(angle, dtype, device, "RotateAxisAngle")
+        angle = (angle / 180.0 * math.pi) if degrees else angle
+        # We assume the points on which this transformation will be applied
+        # are row vectors. The rotation matrix returned from _axis_angle_rotation
+        # is for transforming column vectors. Therefore we transpose this matrix.
+        # R will always be of shape (N, 3, 3)
+        R = _axis_angle_rotation(axis, angle).transpose(1, 2)
+        super().__init__(device=angle.device, R=R, dtype=dtype)
+
+
+def _handle_coord(c, dtype: torch.dtype, device: torch.device) -> torch.Tensor:
+    """
+    Helper function for _handle_input.
+
+    Args:
+        c: Python scalar, torch scalar, or 1D torch tensor
+
+    Returns:
+        c_vec: 1D torch tensor
+    """
+    if not torch.is_tensor(c):
+        c = torch.tensor(c, dtype=dtype, device=device)
+    if c.dim() == 0:
+        c = c.view(1)
+    if c.device != device or c.dtype != dtype:
+        c = c.to(device=device, dtype=dtype)
+    return c
+
+
+def _handle_input(
+    x,
+    y,
+    z,
+    dtype: torch.dtype,
+    device: Optional[Device],
+    name: str,
+    allow_singleton: bool = False,
+) -> torch.Tensor:
+    """
+    Helper function to handle parsing logic for building transforms. The output
+    is always a tensor of shape (N, 3), but there are several types of allowed
+    input.
+
+    Case I: Single Matrix
+        In this case x is a tensor of shape (N, 3), and y and z are None. Here just
+        return x.
+
+    Case II: Vectors and Scalars
+        In this case each of x, y, and z can be one of the following
+            - Python scalar
+            - Torch scalar
+            - Torch tensor of shape (N, 1) or (1, 1)
+        In this case x, y and z are broadcast to tensors of shape (N, 1)
+        and concatenated to a tensor of shape (N, 3)
+
+    Case III: Singleton (only if allow_singleton=True)
+        In this case y and z are None, and x can be one of the following:
+            - Python scalar
+            - Torch scalar
+            - Torch tensor of shape (N, 1) or (1, 1)
+        Here x will be duplicated 3 times, and we return a tensor of shape (N, 3)
+
+    Returns:
+        xyz: Tensor of shape (N, 3)
+    """
+    device_ = get_device(x, device)
+    # If x is actually a tensor of shape (N, 3) then just return it
+    if torch.is_tensor(x) and x.dim() == 2:
+        if x.shape[1] != 3:
+            msg = "Expected tensor of shape (N, 3); got %r (in %s)"
+            raise ValueError(msg % (x.shape, name))
+        if y is not None or z is not None:
+            msg = "Expected y and z to be None (in %s)" % name
+            raise ValueError(msg)
+        return x.to(device=device_, dtype=dtype)
+
+    if allow_singleton and y is None and z is None:
+        y = x
+        z = x
+
+    # Convert all to 1D tensors
+    xyz = [_handle_coord(c, dtype, device_) for c in [x, y, z]]
+
+    # Broadcast and concatenate
+    sizes = [c.shape[0] for c in xyz]
+    N = max(sizes)
+    for c in xyz:
+        if c.shape[0] != 1 and c.shape[0] != N:
+            msg = "Got non-broadcastable sizes %r (in %s)" % (sizes, name)
+            raise ValueError(msg)
+    xyz = [c.expand(N) for c in xyz]
+    xyz = torch.stack(xyz, dim=1)
+    return xyz
+
+
+def _handle_angle_input(
+    x, dtype: torch.dtype, device: Optional[Device], name: str
+) -> torch.Tensor:
+    """
+    Helper function for building a rotation function using angles.
+    The output is always of shape (N,).
+
+    The input can be one of:
+        - Torch tensor of shape (N,)
+        - Python scalar
+        - Torch scalar
+    """
+    device_ = get_device(x, device)
+    if torch.is_tensor(x) and x.dim() > 1:
+        msg = "Expected tensor of shape (N,); got %r (in %s)"
+        raise ValueError(msg % (x.shape, name))
+    else:
+        return _handle_coord(x, dtype, device_)
+
+
+def _broadcast_bmm(a, b) -> torch.Tensor:
+    """
+    Batch multiply two matrices and broadcast if necessary.
+
+    Args:
+        a: torch tensor of shape (P, K) or (M, P, K)
+        b: torch tensor of shape (N, K, K)
+
+    Returns:
+        a and b broadcast multiplied. The output batch dimension is max(N, M).
+
+    To broadcast transforms across a batch dimension if M != N then
+    expect that either M = 1 or N = 1. The tensor with batch dimension 1 is
+    expanded to have shape N or M.
+    """
+    if a.dim() == 2:
+        a = a[None]
+    if len(a) != len(b):
+        if not ((len(a) == 1) or (len(b) == 1)):
+            msg = "Expected batch dim for bmm to be equal or 1; got %r, %r"
+            raise ValueError(msg % (a.shape, b.shape))
+        if len(a) == 1:
+            a = a.expand(len(b), -1, -1)
+        if len(b) == 1:
+            b = b.expand(len(a), -1, -1)
+    return a.bmm(b)
+
+
+@torch.no_grad()
+def _check_valid_rotation_matrix(R, tol: float = 1e-7) -> None:
+    """
+    Determine if R is a valid rotation matrix by checking it satisfies the
+    following conditions:
+
+    ``RR^T = I and det(R) = 1``
+
+    Args:
+        R: an (N, 3, 3) matrix
+
+    Returns:
+        None
+
+    Emits a warning if R is an invalid rotation matrix.
+    """
+    N = R.shape[0]
+    eye = torch.eye(3, dtype=R.dtype, device=R.device)
+    eye = eye.view(1, 3, 3).expand(N, -1, -1)
+    orthogonal = torch.allclose(R.bmm(R.transpose(1, 2)), eye, atol=tol)
+    det_R = _safe_det_3x3(R)
+    no_distortion = torch.allclose(det_R, torch.ones_like(det_R))
+    if not (orthogonal and no_distortion):
+        msg = "R is not a valid rotation matrix"
+        warnings.warn(msg)
+    return
diff --git a/pytorch3d/pytorch3d/utils/__init__.py b/pytorch3d/pytorch3d/utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..f3681e8236e65a5f802044a408c70b3b4d42e7a5
--- /dev/null
+++ b/pytorch3d/pytorch3d/utils/__init__.py
@@ -0,0 +1,18 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from .camera_conversions import (
+    cameras_from_opencv_projection,
+    opencv_from_cameras_projection,
+    pulsar_from_cameras_projection,
+    pulsar_from_opencv_projection,
+)
+from .checkerboard import checkerboard
+from .ico_sphere import ico_sphere
+from .torus import torus
+
+
+__all__ = [k for k in globals().keys() if not k.startswith("_")]
diff --git a/pytorch3d/pytorch3d/utils/camera_conversions.py b/pytorch3d/pytorch3d/utils/camera_conversions.py
new file mode 100644
index 0000000000000000000000000000000000000000..83ce2bb5fb05a360a4d7523c227477a585143378
--- /dev/null
+++ b/pytorch3d/pytorch3d/utils/camera_conversions.py
@@ -0,0 +1,157 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Tuple
+
+import torch
+
+from ..renderer import PerspectiveCameras
+from ..renderer.camera_conversions import (
+    _cameras_from_opencv_projection,
+    _opencv_from_cameras_projection,
+    _pulsar_from_cameras_projection,
+    _pulsar_from_opencv_projection,
+)
+
+
+def cameras_from_opencv_projection(
+    R: torch.Tensor,
+    tvec: torch.Tensor,
+    camera_matrix: torch.Tensor,
+    image_size: torch.Tensor,
+) -> PerspectiveCameras:
+    """
+    Converts a batch of OpenCV-conventioned cameras parametrized with the
+    rotation matrices `R`, translation vectors `tvec`, and the camera
+    calibration matrices `camera_matrix` to `PerspectiveCameras` in PyTorch3D
+    convention.
+
+    More specifically, the conversion is carried out such that a projection
+    of a 3D shape to the OpenCV-conventioned screen of size `image_size` results
+    in the same image as a projection with the corresponding PyTorch3D camera
+    to the NDC screen convention of PyTorch3D.
+
+    More specifically, the OpenCV convention projects points to the OpenCV screen
+    space as follows::
+
+        x_screen_opencv = camera_matrix @ (R @ x_world + tvec)
+
+    followed by the homogenization of `x_screen_opencv`.
+
+    Note:
+        The parameters `R, tvec, camera_matrix` correspond to the inputs of
+        `cv2.projectPoints(x_world, rvec, tvec, camera_matrix, [])`,
+        where `rvec` is an axis-angle vector that can be obtained from
+        the rotation matrix `R` expected here by calling the `so3_log_map` function.
+        Correspondingly, `R` can be obtained from `rvec` by calling `so3_exp_map`.
+
+    Args:
+        R: A batch of rotation matrices of shape `(N, 3, 3)`.
+        tvec: A batch of translation vectors of shape `(N, 3)`.
+        camera_matrix: A batch of camera calibration matrices of shape `(N, 3, 3)`.
+        image_size: A tensor of shape `(N, 2)` containing the sizes of the images
+            (height, width) attached to each camera.
+
+    Returns:
+        cameras_pytorch3d: A batch of `N` cameras in the PyTorch3D convention.
+    """
+    return _cameras_from_opencv_projection(R, tvec, camera_matrix, image_size)
+
+
+def opencv_from_cameras_projection(
+    cameras: PerspectiveCameras,
+    image_size: torch.Tensor,
+) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+    """
+    Converts a batch of `PerspectiveCameras` into OpenCV-convention
+    rotation matrices `R`, translation vectors `tvec`, and the camera
+    calibration matrices `camera_matrix`. This operation is exactly the inverse
+    of `cameras_from_opencv_projection`.
+
+    Note:
+        The outputs `R, tvec, camera_matrix` correspond to the inputs of
+        `cv2.projectPoints(x_world, rvec, tvec, camera_matrix, [])`,
+        where `rvec` is an axis-angle vector that can be obtained from
+        the rotation matrix `R` output here by calling the `so3_log_map` function.
+        Correspondingly, `R` can be obtained from `rvec` by calling `so3_exp_map`.
+
+    Args:
+        cameras: A batch of `N` cameras in the PyTorch3D convention.
+        image_size: A tensor of shape `(N, 2)` containing the sizes of the images
+            (height, width) attached to each camera.
+        return_as_rotmat (bool): If set to True, return the full 3x3 rotation
+            matrices. Otherwise, return an axis-angle vector (default).
+
+    Returns:
+        R: A batch of rotation matrices of shape `(N, 3, 3)`.
+        tvec: A batch of translation vectors of shape `(N, 3)`.
+        camera_matrix: A batch of camera calibration matrices of shape `(N, 3, 3)`.
+    """
+    return _opencv_from_cameras_projection(cameras, image_size)
+
+
+def pulsar_from_opencv_projection(
+    R: torch.Tensor,
+    tvec: torch.Tensor,
+    camera_matrix: torch.Tensor,
+    image_size: torch.Tensor,
+    znear: float = 0.1,
+) -> torch.Tensor:
+    """
+    Convert OpenCV style camera parameters to Pulsar style camera parameters.
+
+    Note:
+        * Pulsar does NOT support different focal lengths for x and y.
+          For conversion, we use the average of fx and fy.
+        * The Pulsar renderer MUST use a left-handed coordinate system for this
+          mapping to work.
+        * The resulting image will be vertically flipped - which has to be
+          addressed AFTER rendering by the user.
+        * The parameters `R, tvec, camera_matrix` correspond to the outputs
+          of `cv2.decomposeProjectionMatrix`.
+
+    Args:
+        R: A batch of rotation matrices of shape `(N, 3, 3)`.
+        tvec: A batch of translation vectors of shape `(N, 3)`.
+        camera_matrix: A batch of camera calibration matrices of shape `(N, 3, 3)`.
+        image_size: A tensor of shape `(N, 2)` containing the sizes of the images
+            (height, width) attached to each camera.
+        znear (float): The near clipping value to use for Pulsar.
+
+    Returns:
+        cameras_pulsar: A batch of `N` Pulsar camera vectors in the Pulsar
+            convention `(N, 13)` (3 translation, 6 rotation, focal_length, sensor_width,
+            c_x, c_y).
+    """
+    return _pulsar_from_opencv_projection(R, tvec, camera_matrix, image_size, znear)
+
+
+def pulsar_from_cameras_projection(
+    cameras: PerspectiveCameras,
+    image_size: torch.Tensor,
+) -> torch.Tensor:
+    """
+    Convert PyTorch3D `PerspectiveCameras` to Pulsar style camera parameters.
+
+    Note:
+        * Pulsar does NOT support different focal lengths for x and y.
+          For conversion, we use the average of fx and fy.
+        * The Pulsar renderer MUST use a left-handed coordinate system for this
+          mapping to work.
+        * The resulting image will be vertically flipped - which has to be
+          addressed AFTER rendering by the user.
+
+    Args:
+        cameras: A batch of `N` cameras in the PyTorch3D convention.
+        image_size: A tensor of shape `(N, 2)` containing the sizes of the images
+            (height, width) attached to each camera.
+
+    Returns:
+        cameras_pulsar: A batch of `N` Pulsar camera vectors in the Pulsar
+            convention `(N, 13)` (3 translation, 6 rotation, focal_length, sensor_width,
+            c_x, c_y).
+    """
+    return _pulsar_from_cameras_projection(cameras, image_size)
diff --git a/pytorch3d/pytorch3d/utils/checkerboard.py b/pytorch3d/pytorch3d/utils/checkerboard.py
new file mode 100644
index 0000000000000000000000000000000000000000..625c08684525ce937ed0ba728394ec49c5e0203c
--- /dev/null
+++ b/pytorch3d/pytorch3d/utils/checkerboard.py
@@ -0,0 +1,89 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+from typing import Optional, Tuple
+
+import torch
+from pytorch3d.common.compat import meshgrid_ij
+from pytorch3d.renderer.mesh.textures import TexturesAtlas
+from pytorch3d.structures.meshes import Meshes
+
+
+def checkerboard(
+    radius: int = 4,
+    color1: Tuple[float, ...] = (0.0, 0.0, 0.0),
+    color2: Tuple[float, ...] = (1.0, 1.0, 1.0),
+    device: Optional[torch.types._device] = None,
+) -> Meshes:
+    """
+    Returns a mesh of squares in the xy-plane where each unit is one of the two given
+    colors and adjacent squares have opposite colors.
+    Args:
+        radius: how many squares in each direction from the origin
+        color1: background color
+        color2: foreground color (must have the same number of channels as color1)
+    Returns:
+        new Meshes object containing one mesh.
+    """
+
+    if device is None:
+        device = torch.device("cpu")
+    if radius < 1:
+        raise ValueError("radius must be > 0")
+
+    num_verts_per_row = 2 * radius + 1
+
+    # construct 2D grid of 3D vertices
+    x = torch.arange(-radius, radius + 1, device=device)
+    grid_y, grid_x = meshgrid_ij(x, x)
+    verts = torch.stack(
+        [grid_x, grid_y, torch.zeros((2 * radius + 1, 2 * radius + 1))], dim=-1
+    )
+    verts = verts.view(1, -1, 3)
+
+    top_triangle_idx = torch.arange(0, num_verts_per_row * (num_verts_per_row - 1))
+    top_triangle_idx = torch.stack(
+        [
+            top_triangle_idx,
+            top_triangle_idx + 1,
+            top_triangle_idx + num_verts_per_row + 1,
+        ],
+        dim=-1,
+    )
+
+    bottom_triangle_idx = top_triangle_idx[:, [0, 2, 1]] + torch.tensor(
+        [0, 0, num_verts_per_row - 1]
+    )
+
+    faces = torch.zeros(
+        (1, len(top_triangle_idx) + len(bottom_triangle_idx), 3),
+        dtype=torch.long,
+        device=device,
+    )
+    faces[0, ::2] = top_triangle_idx
+    faces[0, 1::2] = bottom_triangle_idx
+
+    # construct range of indices that excludes the boundary to avoid wrong triangles
+    indexing_range = torch.arange(0, 2 * num_verts_per_row * num_verts_per_row).view(
+        num_verts_per_row, num_verts_per_row, 2
+    )
+    indexing_range = indexing_range[:-1, :-1]  # removes boundaries from list of indices
+    indexing_range = indexing_range.reshape(
+        2 * (num_verts_per_row - 1) * (num_verts_per_row - 1)
+    )
+
+    faces = faces[:, indexing_range]
+
+    # adding color
+    colors = torch.tensor(color1).repeat(2 * num_verts_per_row * num_verts_per_row, 1)
+    colors[2::4] = torch.tensor(color2)
+    colors[3::4] = torch.tensor(color2)
+    colors = colors[None, indexing_range, None, None]
+
+    texture_atlas = TexturesAtlas(colors)
+
+    return Meshes(verts=verts, faces=faces, textures=texture_atlas)
diff --git a/pytorch3d/pytorch3d/utils/ico_sphere.py b/pytorch3d/pytorch3d/utils/ico_sphere.py
new file mode 100644
index 0000000000000000000000000000000000000000..da7ed10b9e137920d18c5092f7390ecfe3dd3cf4
--- /dev/null
+++ b/pytorch3d/pytorch3d/utils/ico_sphere.py
@@ -0,0 +1,84 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import torch
+from pytorch3d.ops.subdivide_meshes import SubdivideMeshes
+from pytorch3d.structures.meshes import Meshes
+
+
+# Vertex coordinates for a level 0 ico-sphere.
+_ico_verts0 = [
+    [-0.5257, 0.8507, 0.0000],
+    [0.5257, 0.8507, 0.0000],
+    [-0.5257, -0.8507, 0.0000],
+    [0.5257, -0.8507, 0.0000],
+    [0.0000, -0.5257, 0.8507],
+    [0.0000, 0.5257, 0.8507],
+    [0.0000, -0.5257, -0.8507],
+    [0.0000, 0.5257, -0.8507],
+    [0.8507, 0.0000, -0.5257],
+    [0.8507, 0.0000, 0.5257],
+    [-0.8507, 0.0000, -0.5257],
+    [-0.8507, 0.0000, 0.5257],
+]
+
+
+# Faces for level 0 ico-sphere
+_ico_faces0 = [
+    [0, 11, 5],
+    [0, 5, 1],
+    [0, 1, 7],
+    [0, 7, 10],
+    [0, 10, 11],
+    [1, 5, 9],
+    [5, 11, 4],
+    [11, 10, 2],
+    [10, 7, 6],
+    [7, 1, 8],
+    [3, 9, 4],
+    [3, 4, 2],
+    [3, 2, 6],
+    [3, 6, 8],
+    [3, 8, 9],
+    [4, 9, 5],
+    [2, 4, 11],
+    [6, 2, 10],
+    [8, 6, 7],
+    [9, 8, 1],
+]
+
+
+def ico_sphere(level: int = 0, device=None):
+    """
+    Create verts and faces for a unit ico-sphere, with all faces oriented
+    consistently.
+
+    Args:
+        level: integer specifying the number of iterations for subdivision
+               of the mesh faces. Each additional level will result in four new
+               faces per face.
+        device: A torch.device object on which the outputs will be allocated.
+
+    Returns:
+        Meshes object with verts and faces.
+    """
+    if device is None:
+        device = torch.device("cpu")
+    if level < 0:
+        raise ValueError("level must be >= 0.")
+    if level == 0:
+        verts = torch.tensor(_ico_verts0, dtype=torch.float32, device=device)
+        faces = torch.tensor(_ico_faces0, dtype=torch.int64, device=device)
+
+    else:
+        mesh = ico_sphere(level - 1, device)
+        subdivide = SubdivideMeshes()
+        mesh = subdivide(mesh)
+        verts = mesh.verts_list()[0]
+        verts /= verts.norm(p=2, dim=1, keepdim=True)
+        faces = mesh.faces_list()[0]
+    return Meshes(verts=[verts], faces=[faces])
diff --git a/pytorch3d/pytorch3d/utils/torus.py b/pytorch3d/pytorch3d/utils/torus.py
new file mode 100644
index 0000000000000000000000000000000000000000..c5c34785832c2d580f343a264c8a8228bd0d5a44
--- /dev/null
+++ b/pytorch3d/pytorch3d/utils/torus.py
@@ -0,0 +1,71 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from itertools import tee
+from math import cos, pi, sin
+from typing import Iterator, Optional, Tuple
+
+import torch
+from pytorch3d.structures.meshes import Meshes
+
+
+# Make an iterator over the adjacent pairs: (-1, 0), (0, 1), ..., (N - 2, N - 1)
+def _make_pair_range(N: int) -> Iterator[Tuple[int, int]]:
+    i, j = tee(range(-1, N))
+    next(j, None)
+    return zip(i, j)
+
+
+def torus(
+    r: float, R: float, sides: int, rings: int, device: Optional[torch.device] = None
+) -> Meshes:
+    """
+    Create vertices and faces for a torus.
+
+    Args:
+        r: Inner radius of the torus.
+        R: Outer radius of the torus.
+        sides: Number of inner divisions.
+        rings: Number of outer divisions.
+        device: Device on which the outputs will be allocated.
+
+    Returns:
+        Meshes object with the generated vertices and faces.
+    """
+    if not (sides > 0):
+        raise ValueError("sides must be > 0.")
+    if not (rings > 0):
+        raise ValueError("rings must be > 0.")
+    device = device if device else torch.device("cpu")
+
+    verts = []
+    for i in range(rings):
+        # phi ranges from 0 to 2 pi (rings - 1) / rings
+        phi = 2 * pi * i / rings
+        for j in range(sides):
+            # theta ranges from 0 to 2 pi (sides - 1) / sides
+            theta = 2 * pi * j / sides
+            x = (R + r * cos(theta)) * cos(phi)
+            y = (R + r * cos(theta)) * sin(phi)
+            z = r * sin(theta)
+            # This vertex has index i * sides + j
+            verts.append([x, y, z])
+
+    faces = []
+    for i0, i1 in _make_pair_range(rings):
+        index0 = (i0 % rings) * sides
+        index1 = (i1 % rings) * sides
+        for j0, j1 in _make_pair_range(sides):
+            index00 = index0 + (j0 % sides)
+            index01 = index0 + (j1 % sides)
+            index10 = index1 + (j0 % sides)
+            index11 = index1 + (j1 % sides)
+            faces.append([index00, index10, index11])
+            faces.append([index11, index01, index00])
+
+    verts_list = [torch.tensor(verts, dtype=torch.float32, device=device)]
+    faces_list = [torch.tensor(faces, dtype=torch.int64, device=device)]
+    return Meshes(verts_list, faces_list)
diff --git a/pytorch3d/pytorch3d/vis/__init__.py b/pytorch3d/pytorch3d/vis/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..972cc5cebae7494281cc235b88df7c6e244d1cd0
--- /dev/null
+++ b/pytorch3d/pytorch3d/vis/__init__.py
@@ -0,0 +1,21 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import warnings
+
+
+try:
+    from .plotly_vis import get_camera_wireframe, plot_batch_individually, plot_scene
+except ModuleNotFoundError as err:
+    if "plotly" in str(err):
+        warnings.warn(
+            "Cannot import plotly-based visualization code."
+            " Please install plotly to enable (pip install plotly)."
+        )
+    else:
+        raise
+
+from .texture_vis import texturesuv_image_matplotlib, texturesuv_image_PIL
diff --git a/pytorch3d/pytorch3d/vis/plotly_vis.py b/pytorch3d/pytorch3d/vis/plotly_vis.py
new file mode 100644
index 0000000000000000000000000000000000000000..155e143d8ea9d761776c19fde7f77b5f1aeabb7d
--- /dev/null
+++ b/pytorch3d/pytorch3d/vis/plotly_vis.py
@@ -0,0 +1,1048 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import warnings
+from typing import Dict, List, NamedTuple, Optional, Tuple, Union
+
+import plotly.graph_objects as go
+import torch
+from plotly.subplots import make_subplots
+from pytorch3d.renderer import (
+    HeterogeneousRayBundle,
+    ray_bundle_to_ray_points,
+    RayBundle,
+    TexturesAtlas,
+    TexturesVertex,
+)
+from pytorch3d.renderer.camera_utils import camera_to_eye_at_up
+from pytorch3d.renderer.cameras import CamerasBase
+from pytorch3d.structures import join_meshes_as_scene, Meshes, Pointclouds
+
+
+Struct = Union[CamerasBase, Meshes, Pointclouds, RayBundle, HeterogeneousRayBundle]
+
+
+def _get_len(struct: Union[Struct, List[Struct]]) -> int:  # pragma: no cover
+    """
+    Returns the length (usually corresponds to the batch size) of the input structure.
+    """
+    # pyre-ignore[6]
+    if not _is_ray_bundle(struct):
+        # pyre-ignore[6]
+        return len(struct)
+    if _is_heterogeneous_ray_bundle(struct):
+        # pyre-ignore[16]
+        return len(struct.camera_counts)
+    # pyre-ignore[16]
+    return len(struct.directions)
+
+
+def _is_ray_bundle(struct: Struct) -> bool:
+    """
+    Args:
+        struct: Struct object to test
+    Returns:
+        True if something is a RayBundle, HeterogeneousRayBundle or
+        ImplicitronRayBundle, else False
+    """
+    return hasattr(struct, "directions")
+
+
+def _is_heterogeneous_ray_bundle(struct: Union[List[Struct], Struct]) -> bool:
+    """
+    Args:
+        struct :object to test
+    Returns:
+        True if something is a HeterogeneousRayBundle or ImplicitronRayBundle
+        and cant be reduced to RayBundle else False
+    """
+    # pyre-ignore[16]
+    return hasattr(struct, "camera_counts") and struct.camera_counts is not None
+
+
+def get_camera_wireframe(scale: float = 0.3):  # pragma: no cover
+    """
+    Returns a wireframe of a 3D line-plot of a camera symbol.
+    """
+    a = 0.5 * torch.tensor([-2, 1.5, 4])
+    up1 = 0.5 * torch.tensor([0, 1.5, 4])
+    up2 = 0.5 * torch.tensor([0, 2, 4])
+    b = 0.5 * torch.tensor([2, 1.5, 4])
+    c = 0.5 * torch.tensor([-2, -1.5, 4])
+    d = 0.5 * torch.tensor([2, -1.5, 4])
+    C = torch.zeros(3)
+    F = torch.tensor([0, 0, 3])
+    camera_points = [a, up1, up2, up1, b, d, c, a, C, b, d, C, c, C, F]
+    lines = torch.stack([x.float() for x in camera_points]) * scale
+    return lines
+
+
+class AxisArgs(NamedTuple):  # pragma: no cover
+    showgrid: bool = False
+    zeroline: bool = False
+    showline: bool = False
+    ticks: str = ""
+    showticklabels: bool = False
+    backgroundcolor: str = "#fff"
+    showaxeslabels: bool = False
+
+
+class Lighting(NamedTuple):  # pragma: no cover
+    ambient: float = 0.8
+    diffuse: float = 1.0
+    fresnel: float = 0.0
+    specular: float = 0.0
+    roughness: float = 0.5
+    facenormalsepsilon: float = 1e-6
+    vertexnormalsepsilon: float = 1e-12
+
+
+@torch.no_grad()
+def plot_scene(
+    plots: Dict[str, Dict[str, Struct]],
+    *,
+    viewpoint_cameras: Optional[CamerasBase] = None,
+    ncols: int = 1,
+    camera_scale: float = 0.3,
+    pointcloud_max_points: int = 20000,
+    pointcloud_marker_size: int = 1,
+    raybundle_max_rays: int = 20000,
+    raybundle_max_points_per_ray: int = 1000,
+    raybundle_ray_point_marker_size: int = 1,
+    raybundle_ray_line_width: int = 1,
+    **kwargs,
+):  # pragma: no cover
+    """
+    Main function to visualize Cameras, Meshes, Pointclouds, and RayBundle.
+    Plots input Cameras, Meshes, Pointclouds, and RayBundle data into named subplots,
+    with named traces based on the dictionary keys. Cameras are
+    rendered at the camera center location using a wireframe.
+
+    Args:
+        plots: A dict containing subplot and trace names,
+            as well as the Meshes, Cameras and Pointclouds objects to be rendered.
+            See below for examples of the format.
+        viewpoint_cameras: an instance of a Cameras object providing a location
+            to view the plotly plot from. If the batch size is equal
+            to the number of subplots, it is a one to one mapping.
+            If the batch size is 1, then that viewpoint will be used
+            for all the subplots will be viewed from that point.
+            Otherwise, the viewpoint_cameras will not be used.
+        ncols: the number of subplots per row
+        camera_scale: determines the size of the wireframe used to render cameras.
+        pointcloud_max_points: the maximum number of points to plot from
+            a pointcloud. If more are present, a random sample of size
+            pointcloud_max_points is used.
+        pointcloud_marker_size: the size of the points rendered by plotly
+            when plotting a pointcloud.
+        raybundle_max_rays: maximum number of rays of a RayBundle to visualize. Randomly
+            subsamples without replacement in case the number of rays is bigger than max_rays.
+        raybundle_max_points_per_ray: the maximum number of points per ray in RayBundle
+            to visualize. If more are present, a random sample of size
+            max_points_per_ray is used.
+        raybundle_ray_point_marker_size: the size of the ray points of a plotted RayBundle
+        raybundle_ray_line_width: the width of the plotted rays of a RayBundle
+        **kwargs: Accepts lighting (a Lighting object) and any of the args xaxis,
+            yaxis and zaxis which Plotly's scene accepts. Accepts axis_args,
+            which is an AxisArgs object that is applied to all 3 axes.
+            Example settings for axis_args and lighting are given at the
+            top of this file.
+
+    Example:
+
+    ..code-block::python
+
+        mesh = ...
+        point_cloud = ...
+        fig = plot_scene({
+            "subplot_title": {
+                "mesh_trace_title": mesh,
+                "pointcloud_trace_title": point_cloud
+            }
+        })
+        fig.show()
+
+    The above example will render one subplot which has both a mesh and pointcloud.
+
+    If the Meshes, Pointclouds, or Cameras objects are batched, then every object in that batch
+    will be plotted in a single trace.
+
+    ..code-block::python
+        mesh = ... # batch size 2
+        point_cloud = ... # batch size 2
+        fig = plot_scene({
+            "subplot_title": {
+                "mesh_trace_title": mesh,
+                "pointcloud_trace_title": point_cloud
+            }
+        })
+        fig.show()
+
+    The above example renders one subplot with 2 traces, each of which renders
+    both objects from their respective batched data.
+
+    Multiple subplots follow the same pattern:
+    ..code-block::python
+        mesh = ... # batch size 2
+        point_cloud = ... # batch size 2
+        fig = plot_scene({
+            "subplot1_title": {
+                "mesh_trace_title": mesh[0],
+                "pointcloud_trace_title": point_cloud[0]
+            },
+            "subplot2_title": {
+                "mesh_trace_title": mesh[1],
+                "pointcloud_trace_title": point_cloud[1]
+            }
+        },
+        ncols=2)  # specify the number of subplots per row
+        fig.show()
+
+    The above example will render two subplots, each containing a mesh
+    and a pointcloud. The ncols argument will render two subplots in one row
+    instead of having them vertically stacked because the default is one subplot
+    per row.
+
+    To view plotly plots from a PyTorch3D camera's point of view, we can use
+    viewpoint_cameras:
+    ..code-block::python
+        mesh = ... # batch size 2
+        R, T = look_at_view_transform(2.7, 0, [0, 180]) # 2 camera angles, front and back
+        # Any instance of CamerasBase works, here we use FoVPerspectiveCameras
+        cameras = FoVPerspectiveCameras(device=device, R=R, T=T)
+        fig = plot_scene({
+            "subplot1_title": {
+                "mesh_trace_title": mesh[0]
+            },
+            "subplot2_title": {
+                "mesh_trace_title": mesh[1]
+            }
+        },
+        viewpoint_cameras=cameras)
+        fig.show()
+
+    The above example will render the first subplot seen from the camera on the +z axis,
+    and the second subplot from the viewpoint of the camera on the -z axis.
+
+    We can visualize these cameras as well:
+    ..code-block::python
+        mesh = ...
+        R, T = look_at_view_transform(2.7, 0, [0, 180]) # 2 camera angles, front and back
+        # Any instance of CamerasBase works, here we use FoVPerspectiveCameras
+        cameras = FoVPerspectiveCameras(device=device, R=R, T=T)
+        fig = plot_scene({
+            "subplot1_title": {
+                "mesh_trace_title": mesh,
+                "cameras_trace_title": cameras,
+            },
+        })
+        fig.show()
+
+    The above example will render one subplot with the mesh object
+    and two cameras.
+
+    RayBundle visualization is also supproted:
+    ..code-block::python
+        cameras = PerspectiveCameras(...)
+        ray_bundle = RayBundle(origins=..., lengths=..., directions=..., xys=...)
+        fig = plot_scene({
+            "subplot1_title": {
+                "ray_bundle_trace_title": ray_bundle,
+                "cameras_trace_title": cameras,
+            },
+        })
+        fig.show()
+
+    For an example of using kwargs, see below:
+    ..code-block::python
+        mesh = ...
+        point_cloud = ...
+        fig = plot_scene({
+            "subplot_title": {
+                "mesh_trace_title": mesh,
+                "pointcloud_trace_title": point_cloud
+            }
+        },
+        axis_args=AxisArgs(backgroundcolor="rgb(200,230,200)")) # kwarg axis_args
+        fig.show()
+
+    The above example will render each axis with the input background color.
+
+    See the tutorials in pytorch3d/docs/tutorials for more examples
+    (namely rendered_color_points.ipynb and rendered_textured_meshes.ipynb).
+    """
+
+    subplots = list(plots.keys())
+    fig = _gen_fig_with_subplots(len(subplots), ncols, subplots)
+    lighting = kwargs.get("lighting", Lighting())._asdict()
+    axis_args_dict = kwargs.get("axis_args", AxisArgs())._asdict()
+
+    # Set axis arguments to defaults defined at the top of this file
+    x_settings = {**axis_args_dict}
+    y_settings = {**axis_args_dict}
+    z_settings = {**axis_args_dict}
+
+    # Update the axes with any axis settings passed in as kwargs.
+    x_settings.update(**kwargs.get("xaxis", {}))
+    y_settings.update(**kwargs.get("yaxis", {}))
+    z_settings.update(**kwargs.get("zaxis", {}))
+
+    camera = {
+        "up": {
+            "x": 0.0,
+            "y": 1.0,
+            "z": 0.0,
+        }  # set the up vector to match PyTorch3D world coordinates conventions
+    }
+    viewpoints_eye_at_up_world = None
+    if viewpoint_cameras:
+        n_viewpoint_cameras = len(viewpoint_cameras)
+        if n_viewpoint_cameras == len(subplots) or n_viewpoint_cameras == 1:
+            # Calculate the vectors eye, at, up in world space
+            # to initialize the position of the camera in
+            # the plotly figure
+            viewpoints_eye_at_up_world = camera_to_eye_at_up(
+                viewpoint_cameras.get_world_to_view_transform().cpu()
+            )
+        else:
+            msg = "Invalid number {} of viewpoint cameras were provided. Either 1 \
+            or {} cameras are required".format(
+                len(viewpoint_cameras), len(subplots)
+            )
+            warnings.warn(msg)
+
+    for subplot_idx in range(len(subplots)):
+        subplot_name = subplots[subplot_idx]
+        traces = plots[subplot_name]
+        for trace_name, struct in traces.items():
+            if isinstance(struct, Meshes):
+                _add_mesh_trace(fig, struct, trace_name, subplot_idx, ncols, lighting)
+            elif isinstance(struct, Pointclouds):
+                _add_pointcloud_trace(
+                    fig,
+                    struct,
+                    trace_name,
+                    subplot_idx,
+                    ncols,
+                    pointcloud_max_points,
+                    pointcloud_marker_size,
+                )
+            elif isinstance(struct, CamerasBase):
+                _add_camera_trace(
+                    fig, struct, trace_name, subplot_idx, ncols, camera_scale
+                )
+            elif _is_ray_bundle(struct):
+                _add_ray_bundle_trace(
+                    fig,
+                    struct,
+                    trace_name,
+                    subplot_idx,
+                    ncols,
+                    raybundle_max_rays,
+                    raybundle_max_points_per_ray,
+                    raybundle_ray_point_marker_size,
+                    raybundle_ray_line_width,
+                )
+            else:
+                raise ValueError(
+                    "struct {} is not a Cameras, Meshes, Pointclouds,".format(struct)
+                    + " , RayBundle or HeterogeneousRayBundle object."
+                )
+
+        # Ensure update for every subplot.
+        plot_scene = "scene" + str(subplot_idx + 1)
+        current_layout = fig["layout"][plot_scene]
+        xaxis = current_layout["xaxis"]
+        yaxis = current_layout["yaxis"]
+        zaxis = current_layout["zaxis"]
+
+        # Update the axes with our above default and provided settings.
+        xaxis.update(**x_settings)
+        yaxis.update(**y_settings)
+        zaxis.update(**z_settings)
+
+        # update camera viewpoint if provided
+        if viewpoints_eye_at_up_world is not None:
+            # Use camera params for batch index or the first camera if only one provided.
+            viewpoint_idx = min(n_viewpoint_cameras - 1, subplot_idx)
+
+            eye, at, up = (i[viewpoint_idx] for i in viewpoints_eye_at_up_world)
+            eye_x, eye_y, eye_z = eye.tolist()
+            at_x, at_y, at_z = at.tolist()
+            up_x, up_y, up_z = up.tolist()
+
+            # scale camera eye to plotly [-1, 1] ranges
+            x_range = xaxis["range"]
+            y_range = yaxis["range"]
+            z_range = zaxis["range"]
+
+            eye_x = _scale_camera_to_bounds(eye_x, x_range, True)
+            eye_y = _scale_camera_to_bounds(eye_y, y_range, True)
+            eye_z = _scale_camera_to_bounds(eye_z, z_range, True)
+
+            at_x = _scale_camera_to_bounds(at_x, x_range, True)
+            at_y = _scale_camera_to_bounds(at_y, y_range, True)
+            at_z = _scale_camera_to_bounds(at_z, z_range, True)
+
+            up_x = _scale_camera_to_bounds(up_x, x_range, False)
+            up_y = _scale_camera_to_bounds(up_y, y_range, False)
+            up_z = _scale_camera_to_bounds(up_z, z_range, False)
+
+            camera["eye"] = {"x": eye_x, "y": eye_y, "z": eye_z}
+            camera["center"] = {"x": at_x, "y": at_y, "z": at_z}
+            camera["up"] = {"x": up_x, "y": up_y, "z": up_z}
+
+        current_layout.update(
+            {
+                "xaxis": xaxis,
+                "yaxis": yaxis,
+                "zaxis": zaxis,
+                "aspectmode": "cube",
+                "camera": camera,
+            }
+        )
+
+    return fig
+
+
+@torch.no_grad()
+def plot_batch_individually(
+    batched_structs: Union[
+        List[Struct],
+        Struct,
+    ],
+    *,
+    viewpoint_cameras: Optional[CamerasBase] = None,
+    ncols: int = 1,
+    extend_struct: bool = True,
+    subplot_titles: Optional[List[str]] = None,
+    **kwargs,
+):  # pragma: no cover
+    """
+    This is a higher level plotting function than plot_scene, for plotting
+    Cameras, Meshes, Pointclouds, and RayBundle in simple cases. The simplest use
+    is to plot a single Cameras, Meshes, Pointclouds, or a RayBundle object,
+    where you just pass it in as a one element list. This will plot each batch
+    element in a separate subplot.
+
+    More generally, you can supply multiple Cameras, Meshes, Pointclouds, or RayBundle
+    having the same batch size `n`. In this case, there will be `n` subplots,
+    each depicting the corresponding batch element of all the inputs.
+
+    In addition, you can include Cameras, Meshes, Pointclouds, or RayBundle of size 1 in
+    the input. These will either be rendered in the first subplot
+    (if extend_struct is False), or in every subplot.
+    RayBundle includes ImplicitronRayBundle and HeterogeneousRaybundle.
+
+    Args:
+        batched_structs: a list of Cameras, Meshes, Pointclouds and RayBundle to be
+            rendered. Each structure's corresponding batch element will be plotted in a
+            single subplot, resulting in n subplots for a batch of size n. Every struct
+            should either have the same batch size or be of batch size 1. See extend_struct
+            and the description above for how batch size 1 structs are handled. Also accepts
+            a single Cameras, Meshes, Pointclouds, and RayBundle object, which will have
+            each individual element plotted in its own subplot.
+        viewpoint_cameras: an instance of a Cameras object providing a location
+            to view the plotly plot from. If the batch size is equal
+            to the number of subplots, it is a one to one mapping.
+            If the batch size is 1, then that viewpoint will be used
+            for all the subplots will be viewed from that point.
+            Otherwise, the viewpoint_cameras will not be used.
+        ncols: the number of subplots per row
+        extend_struct: if True, indicates that structs of batch size 1
+            should be plotted in every subplot.
+        subplot_titles: strings to name each subplot
+        **kwargs: keyword arguments which are passed to plot_scene.
+            See plot_scene documentation for details.
+
+    Example:
+
+    ..code-block::python
+
+        mesh = ...  # mesh of batch size 2
+        point_cloud = ... # point_cloud of batch size 2
+        fig = plot_batch_individually([mesh, point_cloud], subplot_titles=["plot1", "plot2"])
+        fig.show()
+
+        # this is equivalent to the below figure
+        fig = plot_scene({
+            "plot1": {
+                "trace1-1": mesh[0],
+                "trace1-2": point_cloud[0]
+            },
+            "plot2":{
+                "trace2-1": mesh[1],
+                "trace2-2": point_cloud[1]
+            }
+        })
+        fig.show()
+
+    The above example will render two subplots which each have both a mesh and pointcloud.
+    For more examples look at the pytorch3d tutorials at `pytorch3d/docs/tutorials`,
+    in particular the files rendered_color_points.ipynb and rendered_textured_meshes.ipynb.
+    """
+
+    # check that every batch is the same size or is size 1
+    if _get_len(batched_structs) == 0:
+        msg = "No structs to plot"
+        warnings.warn(msg)
+        return
+    max_size = 0
+    if isinstance(batched_structs, list):
+        max_size = max(_get_len(s) for s in batched_structs)
+        for struct in batched_structs:
+            struct_len = _get_len(struct)
+            if struct_len not in (1, max_size):
+                msg = "invalid batch size {} provided: {}".format(struct_len, struct)
+                raise ValueError(msg)
+    else:
+        max_size = _get_len(batched_structs)
+
+    if max_size == 0:
+        msg = "No data is provided with at least one element"
+        raise ValueError(msg)
+
+    if subplot_titles:
+        if len(subplot_titles) != max_size:
+            msg = "invalid number of subplot titles"
+            raise ValueError(msg)
+
+    # if we are dealing with HeterogeneousRayBundle of ImplicitronRayBundle create
+    # first indexes for faster
+    first_idxs = None
+    if _is_heterogeneous_ray_bundle(batched_structs):
+        # pyre-ignore[16]
+        cumsum = batched_structs.camera_counts.cumsum(dim=0)
+        first_idxs = torch.cat((cumsum.new_zeros((1,)), cumsum))
+
+    scene_dictionary = {}
+    # construct the scene dictionary
+    for scene_num in range(max_size):
+        subplot_title = (
+            subplot_titles[scene_num]
+            if subplot_titles
+            else "subplot " + str(scene_num + 1)
+        )
+        scene_dictionary[subplot_title] = {}
+
+        if isinstance(batched_structs, list):
+            for i, batched_struct in enumerate(batched_structs):
+                first_idxs = None
+                if _is_heterogeneous_ray_bundle(batched_structs[i]):
+                    # pyre-ignore[16]
+                    cumsum = batched_struct.camera_counts.cumsum(dim=0)
+                    first_idxs = torch.cat((cumsum.new_zeros((1,)), cumsum))
+                # check for whether this struct needs to be extended
+                batched_struct_len = _get_len(batched_struct)
+                if i >= batched_struct_len and not extend_struct:
+                    continue
+                _add_struct_from_batch(
+                    batched_struct,
+                    scene_num,
+                    subplot_title,
+                    scene_dictionary,
+                    i + 1,
+                    first_idxs=first_idxs,
+                )
+        else:  # batched_structs is a single struct
+            _add_struct_from_batch(
+                batched_structs,
+                scene_num,
+                subplot_title,
+                scene_dictionary,
+                first_idxs=first_idxs,
+            )
+
+    return plot_scene(
+        scene_dictionary, viewpoint_cameras=viewpoint_cameras, ncols=ncols, **kwargs
+    )
+
+
+def _add_struct_from_batch(
+    batched_struct: Struct,
+    scene_num: int,
+    subplot_title: str,
+    scene_dictionary: Dict[str, Dict[str, Struct]],
+    trace_idx: int = 1,
+    first_idxs: Optional[torch.Tensor] = None,
+) -> None:  # pragma: no cover
+    """
+    Adds the struct corresponding to the given scene_num index to
+    a provided scene_dictionary to be passed in to plot_scene
+
+    Args:
+        batched_struct: the batched data structure to add to the dict
+        scene_num: the subplot from plot_batch_individually which this struct
+            should be added to
+        subplot_title: the title of the subplot
+        scene_dictionary: the dictionary to add the indexed struct to
+        trace_idx: the trace number, starting at 1 for this struct's trace
+    """
+    struct = None
+    if isinstance(batched_struct, CamerasBase):
+        # we can't index directly into camera batches
+        R, T = batched_struct.R, batched_struct.T
+        r_idx = min(scene_num, len(R) - 1)
+        t_idx = min(scene_num, len(T) - 1)
+        R = R[r_idx].unsqueeze(0)
+        T = T[t_idx].unsqueeze(0)
+        struct = CamerasBase(device=batched_struct.device, R=R, T=T)
+    elif _is_ray_bundle(batched_struct) and not _is_heterogeneous_ray_bundle(
+        batched_struct
+    ):
+        # for RayBundle we treat the camera count as the batch index
+        struct_idx = min(scene_num, _get_len(batched_struct) - 1)
+
+        struct = RayBundle(
+            **{
+                attr: getattr(batched_struct, attr)[struct_idx]
+                for attr in ["origins", "directions", "lengths", "xys"]
+            }
+        )
+    elif _is_heterogeneous_ray_bundle(batched_struct):
+        # for RayBundle we treat the camera count as the batch index
+        struct_idx = min(scene_num, _get_len(batched_struct) - 1)
+
+        struct = RayBundle(
+            **{
+                attr: getattr(batched_struct, attr)[
+                    # pyre-ignore[16]
+                    first_idxs[struct_idx] : first_idxs[struct_idx + 1]
+                ]
+                for attr in ["origins", "directions", "lengths", "xys"]
+            }
+        )
+
+    else:  # batched meshes and pointclouds are indexable
+        struct_idx = min(scene_num, _get_len(batched_struct) - 1)
+        # pyre-ignore[16]
+        struct = batched_struct[struct_idx]
+    trace_name = "trace{}-{}".format(scene_num + 1, trace_idx)
+    scene_dictionary[subplot_title][trace_name] = struct
+
+
+def _add_mesh_trace(
+    fig: go.Figure,  # pyre-ignore[11]
+    meshes: Meshes,
+    trace_name: str,
+    subplot_idx: int,
+    ncols: int,
+    lighting: Lighting,
+) -> None:  # pragma: no cover
+    """
+    Adds a trace rendering a Meshes object to the passed in figure, with
+    a given name and in a specific subplot.
+
+    Args:
+        fig: plotly figure to add the trace within.
+        meshes: Meshes object to render. It can be batched.
+        trace_name: name to label the trace with.
+        subplot_idx: identifies the subplot, with 0 being the top left.
+        ncols: the number of subplots per row.
+        lighting: a Lighting object that specifies the Mesh3D lighting.
+    """
+
+    mesh = join_meshes_as_scene(meshes)
+    mesh = mesh.detach().cpu()
+    verts = mesh.verts_packed()
+    faces = mesh.faces_packed()
+    # If mesh has vertex colors or face colors, use them
+    # for figure, otherwise use plotly's default colors.
+    verts_rgb = None
+    faces_rgb = None
+    if isinstance(mesh.textures, TexturesVertex):
+        verts_rgb = mesh.textures.verts_features_packed()
+        verts_rgb.clamp_(min=0.0, max=1.0)
+        verts_rgb = torch.tensor(255.0) * verts_rgb
+    if isinstance(mesh.textures, TexturesAtlas):
+        atlas = mesh.textures.atlas_packed()
+        # If K==1
+        if atlas.shape[1] == 1 and atlas.shape[3] == 3:
+            faces_rgb = atlas[:, 0, 0]
+
+    # Reposition the unused vertices to be "inside" the object
+    # (i.e. they won't be visible in the plot).
+    verts_used = torch.zeros((verts.shape[0],), dtype=torch.bool)
+    verts_used[torch.unique(faces)] = True
+    verts_center = verts[verts_used].mean(0)
+    verts[~verts_used] = verts_center
+
+    row, col = subplot_idx // ncols + 1, subplot_idx % ncols + 1
+    fig.add_trace(
+        go.Mesh3d(
+            x=verts[:, 0],
+            y=verts[:, 1],
+            z=verts[:, 2],
+            vertexcolor=verts_rgb,
+            facecolor=faces_rgb,
+            i=faces[:, 0],
+            j=faces[:, 1],
+            k=faces[:, 2],
+            lighting=lighting,
+            name=trace_name,
+        ),
+        row=row,
+        col=col,
+    )
+
+    # Access the current subplot's scene configuration
+    plot_scene = "scene" + str(subplot_idx + 1)
+    current_layout = fig["layout"][plot_scene]
+
+    # update the bounds of the axes for the current trace
+    max_expand = (verts.max(0)[0] - verts.min(0)[0]).max()
+    _update_axes_bounds(verts_center, max_expand, current_layout)
+
+
+def _add_pointcloud_trace(
+    fig: go.Figure,
+    pointclouds: Pointclouds,
+    trace_name: str,
+    subplot_idx: int,
+    ncols: int,
+    max_points_per_pointcloud: int,
+    marker_size: int,
+) -> None:  # pragma: no cover
+    """
+    Adds a trace rendering a Pointclouds object to the passed in figure, with
+    a given name and in a specific subplot.
+
+    Args:
+        fig: plotly figure to add the trace within.
+        pointclouds: Pointclouds object to render. It can be batched.
+        trace_name: name to label the trace with.
+        subplot_idx: identifies the subplot, with 0 being the top left.
+        ncols: the number of subplots per row.
+        max_points_per_pointcloud: the number of points to render, which are randomly sampled.
+        marker_size: the size of the rendered points
+    """
+    pointclouds = pointclouds.detach().cpu().subsample(max_points_per_pointcloud)
+    verts = pointclouds.points_packed()
+    features = pointclouds.features_packed()
+
+    color = None
+    if features is not None:
+        if features.shape[1] == 4:  # rgba
+            template = "rgb(%d, %d, %d, %f)"
+            rgb = (features[:, :3].clamp(0.0, 1.0) * 255).int()
+            color = [template % (*rgb_, a_) for rgb_, a_ in zip(rgb, features[:, 3])]
+
+        if features.shape[1] == 3:
+            template = "rgb(%d, %d, %d)"
+            rgb = (features.clamp(0.0, 1.0) * 255).int()
+            color = [template % (r, g, b) for r, g, b in rgb]
+
+    row = subplot_idx // ncols + 1
+    col = subplot_idx % ncols + 1
+    fig.add_trace(
+        go.Scatter3d(
+            x=verts[:, 0],
+            y=verts[:, 1],
+            z=verts[:, 2],
+            marker={"color": color, "size": marker_size},
+            mode="markers",
+            name=trace_name,
+        ),
+        row=row,
+        col=col,
+    )
+
+    # Access the current subplot's scene configuration
+    plot_scene = "scene" + str(subplot_idx + 1)
+    current_layout = fig["layout"][plot_scene]
+
+    # update the bounds of the axes for the current trace
+    verts_center = verts.mean(0)
+    max_expand = (verts.max(0)[0] - verts.min(0)[0]).max()
+    _update_axes_bounds(verts_center, max_expand, current_layout)
+
+
+def _add_camera_trace(
+    fig: go.Figure,
+    cameras: CamerasBase,
+    trace_name: str,
+    subplot_idx: int,
+    ncols: int,
+    camera_scale: float,
+) -> None:  # pragma: no cover
+    """
+    Adds a trace rendering a Cameras object to the passed in figure, with
+    a given name and in a specific subplot.
+
+    Args:
+        fig: plotly figure to add the trace within.
+        cameras: the Cameras object to render. It can be batched.
+        trace_name: name to label the trace with.
+        subplot_idx: identifies the subplot, with 0 being the top left.
+        ncols: the number of subplots per row.
+        camera_scale: the size of the wireframe used to render the Cameras object.
+    """
+    cam_wires = get_camera_wireframe(camera_scale).to(cameras.device)
+    cam_trans = cameras.get_world_to_view_transform().inverse()
+    cam_wires_trans = cam_trans.transform_points(cam_wires).detach().cpu()
+    # if batch size is 1, unsqueeze to add dimension
+    if len(cam_wires_trans.shape) < 3:
+        cam_wires_trans = cam_wires_trans.unsqueeze(0)
+
+    nan_tensor = torch.Tensor([[float("NaN")] * 3])
+    all_cam_wires = cam_wires_trans[0]
+    for wire in cam_wires_trans[1:]:
+        # We combine camera points into a single tensor to plot them in a
+        # single trace. The NaNs are inserted between sets of camera
+        # points so that the lines drawn by Plotly are not drawn between
+        # points that belong to different cameras.
+        all_cam_wires = torch.cat((all_cam_wires, nan_tensor, wire))
+    x, y, z = all_cam_wires.detach().cpu().numpy().T.astype(float)
+
+    row, col = subplot_idx // ncols + 1, subplot_idx % ncols + 1
+    fig.add_trace(
+        go.Scatter3d(x=x, y=y, z=z, marker={"size": 1}, name=trace_name),
+        row=row,
+        col=col,
+    )
+
+    # Access the current subplot's scene configuration
+    plot_scene = "scene" + str(subplot_idx + 1)
+    current_layout = fig["layout"][plot_scene]
+
+    # flatten for bounds calculations
+    flattened_wires = cam_wires_trans.flatten(0, 1)
+    verts_center = flattened_wires.mean(0)
+    max_expand = (flattened_wires.max(0)[0] - flattened_wires.min(0)[0]).max()
+    _update_axes_bounds(verts_center, max_expand, current_layout)
+
+
+def _add_ray_bundle_trace(
+    fig: go.Figure,
+    ray_bundle: Union[RayBundle, HeterogeneousRayBundle],
+    trace_name: str,
+    subplot_idx: int,
+    ncols: int,
+    max_rays: int,
+    max_points_per_ray: int,
+    marker_size: int,
+    line_width: int,
+) -> None:  # pragma: no cover
+    """
+    Adds a trace rendering a ray bundle object
+    to the passed in figure, with a given name and in a specific subplot.
+
+    Args:
+        fig: plotly figure to add the trace within.
+        ray_bundle: the RayBundle, ImplicitronRayBundle or HeterogeneousRaybundle to render.
+            It can be batched.
+        trace_name: name to label the trace with.
+        subplot_idx: identifies the subplot, with 0 being the top left.
+        ncols: the number of subplots per row.
+        max_rays: maximum number of plotted rays in total. Randomly subsamples
+            without replacement in case the number of rays is bigger than max_rays.
+        max_points_per_ray: maximum number of points plotted per ray.
+        marker_size: the size of the ray point markers.
+        line_width: the width of the ray lines.
+    """
+
+    n_pts_per_ray = ray_bundle.lengths.shape[-1]
+    n_rays = ray_bundle.lengths.shape[:-1].numel()
+
+    # flatten all batches of rays into a single big bundle
+    ray_bundle_flat = RayBundle(
+        **{
+            attr: torch.flatten(getattr(ray_bundle, attr), start_dim=0, end_dim=-2)
+            for attr in ["origins", "directions", "lengths", "xys"]
+        }
+    )
+
+    # subsample the rays (if needed)
+    if n_rays > max_rays:
+        indices_rays = torch.randperm(n_rays)[:max_rays]
+        ray_bundle_flat = RayBundle(
+            **{
+                attr: getattr(ray_bundle_flat, attr)[indices_rays]
+                for attr in ["origins", "directions", "lengths", "xys"]
+            }
+        )
+
+    # make ray line endpoints
+    min_max_ray_depth = torch.stack(
+        [
+            ray_bundle_flat.lengths.min(dim=1).values,
+            ray_bundle_flat.lengths.max(dim=1).values,
+        ],
+        dim=-1,
+    )
+    ray_lines_endpoints = ray_bundle_to_ray_points(
+        ray_bundle_flat._replace(lengths=min_max_ray_depth)
+    )
+
+    # make the ray lines for plotly plotting
+    nan_tensor = torch.tensor(
+        [[float("NaN")] * 3],
+        device=ray_lines_endpoints.device,
+        dtype=ray_lines_endpoints.dtype,
+    )
+    ray_lines = torch.empty(size=(1, 3), device=ray_lines_endpoints.device)
+    for ray_line in ray_lines_endpoints:
+        # We combine the ray lines into a single tensor to plot them in a
+        # single trace. The NaNs are inserted between sets of ray lines
+        # so that the lines drawn by Plotly are not drawn between
+        # lines that belong to different rays.
+        ray_lines = torch.cat((ray_lines, nan_tensor, ray_line))
+    x, y, z = ray_lines.detach().cpu().numpy().T.astype(float)
+    row, col = subplot_idx // ncols + 1, subplot_idx % ncols + 1
+    fig.add_trace(
+        go.Scatter3d(
+            x=x,
+            y=y,
+            z=z,
+            marker={"size": 0.1},
+            line={"width": line_width},
+            name=trace_name,
+        ),
+        row=row,
+        col=col,
+    )
+
+    # subsample the ray points (if needed)
+    if n_pts_per_ray > max_points_per_ray:
+        indices_ray_pts = torch.cat(
+            [
+                torch.randperm(n_pts_per_ray)[:max_points_per_ray] + ri * n_pts_per_ray
+                for ri in range(ray_bundle_flat.lengths.shape[0])
+            ]
+        )
+        ray_bundle_flat = ray_bundle_flat._replace(
+            lengths=ray_bundle_flat.lengths.reshape(-1)[indices_ray_pts].reshape(
+                ray_bundle_flat.lengths.shape[0], -1
+            )
+        )
+
+    # plot the ray points
+    ray_points = (
+        ray_bundle_to_ray_points(ray_bundle_flat)
+        .view(-1, 3)
+        .detach()
+        .cpu()
+        .numpy()
+        .astype(float)
+    )
+    fig.add_trace(
+        go.Scatter3d(
+            x=ray_points[:, 0],
+            y=ray_points[:, 1],
+            z=ray_points[:, 2],
+            mode="markers",
+            name=trace_name + "_points",
+            marker={"size": marker_size},
+        ),
+        row=row,
+        col=col,
+    )
+
+    # Access the current subplot's scene configuration
+    plot_scene = "scene" + str(subplot_idx + 1)
+    current_layout = fig["layout"][plot_scene]
+
+    # update the bounds of the axes for the current trace
+    all_ray_points = ray_bundle_to_ray_points(ray_bundle).reshape(-1, 3)
+    ray_points_center = all_ray_points.mean(dim=0)
+    max_expand = (all_ray_points.max(0)[0] - all_ray_points.min(0)[0]).max().item()
+    _update_axes_bounds(ray_points_center, float(max_expand), current_layout)
+
+
+def _gen_fig_with_subplots(
+    batch_size: int, ncols: int, subplot_titles: List[str]
+):  # pragma: no cover
+    """
+    Takes in the number of objects to be plotted and generate a plotly figure
+    with the appropriate number and orientation of titled subplots.
+    Args:
+        batch_size: the number of elements in the batch of objects to be visualized.
+        ncols: number of subplots in the same row.
+        subplot_titles: titles for the subplot(s). list of strings of length batch_size.
+
+    Returns:
+        Plotly figure with ncols subplots per row, and batch_size subplots.
+    """
+    fig_rows = batch_size // ncols
+    if batch_size % ncols != 0:
+        fig_rows += 1  # allow for non-uniform rows
+    fig_cols = ncols
+    fig_type = [{"type": "scene"}]
+    specs = [fig_type * fig_cols] * fig_rows
+    # subplot_titles must have one title per subplot
+    fig = make_subplots(
+        rows=fig_rows,
+        cols=fig_cols,
+        specs=specs,
+        subplot_titles=subplot_titles,
+        column_widths=[1.0] * fig_cols,
+    )
+    return fig
+
+
+def _update_axes_bounds(
+    verts_center: torch.Tensor,
+    max_expand: float,
+    current_layout: go.Scene,  # pyre-ignore[11]
+) -> None:  # pragma: no cover
+    """
+    Takes in the vertices' center point and max spread, and the current plotly figure
+    layout and updates the layout to have bounds that include all traces for that subplot.
+    Args:
+        verts_center: tensor of size (3) corresponding to a trace's vertices' center point.
+        max_expand: the maximum spread in any dimension of the trace's vertices.
+        current_layout: the plotly figure layout scene corresponding to the referenced trace.
+    """
+    verts_center = verts_center.detach().cpu()
+    verts_min = verts_center - max_expand
+    verts_max = verts_center + max_expand
+    bounds = torch.t(torch.stack((verts_min, verts_max)))
+
+    # Ensure that within a subplot, the bounds capture all traces
+    old_xrange, old_yrange, old_zrange = (
+        current_layout["xaxis"]["range"],
+        current_layout["yaxis"]["range"],
+        current_layout["zaxis"]["range"],
+    )
+    x_range, y_range, z_range = bounds
+    if old_xrange is not None:
+        x_range[0] = min(x_range[0], old_xrange[0])
+        x_range[1] = max(x_range[1], old_xrange[1])
+    if old_yrange is not None:
+        y_range[0] = min(y_range[0], old_yrange[0])
+        y_range[1] = max(y_range[1], old_yrange[1])
+    if old_zrange is not None:
+        z_range[0] = min(z_range[0], old_zrange[0])
+        z_range[1] = max(z_range[1], old_zrange[1])
+
+    xaxis = {"range": x_range}
+    yaxis = {"range": y_range}
+    zaxis = {"range": z_range}
+    current_layout.update({"xaxis": xaxis, "yaxis": yaxis, "zaxis": zaxis})
+
+
+def _scale_camera_to_bounds(
+    coordinate: float, axis_bounds: Tuple[float, float], is_position: bool
+) -> float:  # pragma: no cover
+    """
+    We set our plotly plot's axes' bounding box to [-1,1]x[-1,1]x[-1,1]. As such,
+    the plotly camera location has to be scaled accordingly to have its world coordinates
+    correspond to its relative plotted coordinates for viewing the plotly plot.
+    This function does the scaling and offset to transform the coordinates.
+
+    Args:
+        coordinate: the float value to be transformed
+        axis_bounds: the bounds of the plotly plot for the axis which
+            the coordinate argument refers to
+        is_position: If true, the float value is the coordinate of a position, and so must
+            be moved in to [-1,1]. Otherwise it is a component of a direction, and so needs only
+            to be scaled.
+    """
+    scale = (axis_bounds[1] - axis_bounds[0]) / 2
+    if not is_position:
+        return coordinate / scale
+    offset = (axis_bounds[1] / scale) - 1
+    return coordinate / scale - offset
diff --git a/pytorch3d/pytorch3d/vis/texture_vis.py b/pytorch3d/pytorch3d/vis/texture_vis.py
new file mode 100644
index 0000000000000000000000000000000000000000..0d36abb6f154a7e383fa2ab887aade5a2ca6c9da
--- /dev/null
+++ b/pytorch3d/pytorch3d/vis/texture_vis.py
@@ -0,0 +1,110 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Any, Optional
+
+import numpy as np
+from PIL import Image, ImageDraw
+from pytorch3d.renderer.mesh import TexturesUV
+
+
+def texturesuv_image_matplotlib(
+    texture: TexturesUV,
+    *,
+    texture_index: int = 0,
+    radius: float = 1,
+    color=(1.0, 0.0, 0.0),
+    subsample: Optional[int] = 10000,
+    origin: str = "upper",
+) -> None:  # pragma: no cover
+    """
+    Plot the texture image for one element of a TexturesUV with
+    matplotlib together with verts_uvs positions circled.
+    In particular a value in verts_uvs which is never referenced
+    in faces_uvs will still be plotted.
+    This is for debugging purposes, e.g. to align the map with
+    the uv coordinates. In particular, matplotlib
+    is used which is not an official dependency of PyTorch3D.
+
+    Args:
+        texture: a TexturesUV object with one mesh
+        texture_index: index in the batch to plot
+        radius: plotted circle radius in pixels
+        color: any matplotlib-understood color for the circles.
+        subsample: if not None, number of points to plot.
+                Otherwise all points are plotted.
+        origin: "upper" or "lower" like matplotlib.imshow .
+            upper (the default) matches texturesuv_image_PIL.
+    """
+
+    import matplotlib.pyplot as plt
+    from matplotlib.patches import Circle
+
+    texture_image = texture.maps_padded()
+    centers = texture.centers_for_image(index=texture_index).numpy()
+
+    ax = plt.gca()
+    ax.imshow(texture_image[texture_index].detach().cpu().numpy(), origin=origin)
+
+    n_points = centers.shape[0]
+    if subsample is None or n_points <= subsample:
+        indices = range(n_points)
+    else:
+        indices = np.random.choice(n_points, subsample, replace=False)
+    for i in indices:
+        # setting clip_on=False makes it obvious when
+        # we have UV coordinates outside the correct range
+        ax.add_patch(Circle(centers[i], radius, color=color, clip_on=False))
+
+
+def texturesuv_image_PIL(
+    texture: TexturesUV,
+    *,
+    texture_index: int = 0,
+    radius: float = 1,
+    color: Any = "red",
+    subsample: Optional[int] = 10000,
+):  # pragma: no cover
+    """
+    Return a PIL image of the texture image of one element of the batch
+    from a TexturesUV, together with the verts_uvs positions circled.
+    In particular a value in verts_uvs which is never referenced
+    in faces_uvs will still be plotted.
+    This is for debugging purposes, e.g. to align the map with
+    the uv coordinates. In particular, matplotlib
+    is used which is not an official dependency of PyTorch3D.
+
+    Args:
+        texture: a TexturesUV object with one mesh
+        texture_index: index in the batch to plot
+        radius: plotted circle radius in pixels
+        color: any PIL-understood color for the circles.
+        subsample: if not None, number of points to plot.
+                Otherwise all points are plotted.
+
+    Returns:
+        PIL Image object.
+    """
+
+    centers = texture.centers_for_image(index=texture_index).numpy()
+    texture_image = texture.maps_padded()
+    texture_array = (texture_image[texture_index] * 255).cpu().numpy().astype(np.uint8)
+
+    image = Image.fromarray(texture_array)
+    draw = ImageDraw.Draw(image)
+
+    n_points = centers.shape[0]
+    if subsample is None or n_points <= subsample:
+        indices = range(n_points)
+    else:
+        indices = np.random.choice(n_points, subsample, replace=False)
+
+    for i in indices:
+        x = centers[i][0]
+        y = centers[i][1]
+        draw.ellipse([(x - radius, y - radius), (x + radius, y + radius)], fill=color)
+
+    return image
diff --git a/pytorch3d/scripts/build_website.sh b/pytorch3d/scripts/build_website.sh
new file mode 100644
index 0000000000000000000000000000000000000000..2fc2db9ad112ebc0f68d6db7528042e1e0978d52
--- /dev/null
+++ b/pytorch3d/scripts/build_website.sh
@@ -0,0 +1,66 @@
+#!/bin/bash
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# run this script from the project root using `./scripts/build_docs.sh`
+
+set -e
+
+usage() {
+  echo "Usage: $0 [-b]"
+  echo ""
+  echo "Build PyTorch3D documentation."
+  echo ""
+  echo "  -b   Build static version of documentation (otherwise start server)"
+  echo ""
+  exit 1
+}
+
+BUILD_STATIC=false
+
+while getopts 'hb' flag; do
+  case "${flag}" in
+    h)
+      usage
+      ;;
+    b)
+      BUILD_STATIC=true
+      ;;
+    *)
+      usage
+      ;;
+  esac
+done
+
+
+echo "-----------------------------------"
+echo "Building PyTorch3D Docusaurus site"
+echo "-----------------------------------"
+cd website
+yarn
+cd ..
+
+echo "-----------------------------------"
+echo "Generating tutorials"
+echo "-----------------------------------"
+cwd=$(pwd)
+mkdir -p "website/_tutorials"
+mkdir -p "website/static/files"
+python scripts/parse_tutorials.py --repo_dir "${cwd}"
+
+cd website
+
+if [[ $BUILD_STATIC == true ]]; then
+  echo "-----------------------------------"
+  echo "Building static site"
+  echo "-----------------------------------"
+  yarn build
+else
+  echo "-----------------------------------"
+  echo "Starting local server"
+  echo "-----------------------------------"
+  yarn start
+fi
diff --git a/pytorch3d/scripts/parse_tutorials.py b/pytorch3d/scripts/parse_tutorials.py
new file mode 100644
index 0000000000000000000000000000000000000000..a7c7cd267b00c4267db9d719eda1c07b8c3f6694
--- /dev/null
+++ b/pytorch3d/scripts/parse_tutorials.py
@@ -0,0 +1,117 @@
+#!/usr/bin/env python3
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import argparse
+import json
+import os
+
+import nbformat
+from bs4 import BeautifulSoup
+from nbconvert import HTMLExporter, ScriptExporter
+
+
+TEMPLATE = """const CWD = process.cwd();
+
+const React = require('react');
+const Tutorial = require(`${{CWD}}/core/Tutorial.js`);
+
+class TutorialPage extends React.Component {{
+  render() {{
+      const {{config: siteConfig}} = this.props;
+      const {{baseUrl}} = siteConfig;
+      return <Tutorial baseUrl={{baseUrl}} tutorialID="{}"/>;
+  }}
+}}
+
+module.exports = TutorialPage;
+
+"""
+
+JS_SCRIPTS = """
+<script
+  src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.1.10/require.min.js">
+</script>
+<script
+  src="https://cdnjs.cloudflare.com/ajax/libs/jquery/2.0.3/jquery.min.js">
+</script>
+"""  # noqa: E501
+
+
+def gen_tutorials(repo_dir: str) -> None:
+    """Generate HTML tutorials for PyTorch3D Docusaurus site from Jupyter notebooks.
+
+    Also create ipynb and py versions of tutorial in Docusaurus site for
+    download.
+    """
+    with open(os.path.join(repo_dir, "website", "tutorials.json"), "r") as infile:
+        tutorial_config = json.loads(infile.read())
+
+    tutorial_ids = {x["id"] for v in tutorial_config.values() for x in v}
+
+    for tid in tutorial_ids:
+        print("Generating {} tutorial".format(tid))
+
+        # convert notebook to HTML
+        ipynb_in_path = os.path.join(
+            repo_dir, "docs", "tutorials", "{}.ipynb".format(tid)
+        )
+        with open(ipynb_in_path, "r") as infile:
+            nb_str = infile.read()
+            nb = nbformat.reads(nb_str, nbformat.NO_CONVERT)
+
+        # displayname is absent from notebook metadata
+        nb["metadata"]["kernelspec"]["display_name"] = "python3"
+
+        exporter = HTMLExporter()
+        html, meta = exporter.from_notebook_node(nb)
+
+        # pull out html div for notebook
+        soup = BeautifulSoup(html, "html.parser")
+        nb_meat = soup.find("div", {"id": "notebook-container"})
+        del nb_meat.attrs["id"]
+        nb_meat.attrs["class"] = ["notebook"]
+        html_out = JS_SCRIPTS + str(nb_meat)
+
+        # generate html file
+        html_out_path = os.path.join(
+            repo_dir, "website", "_tutorials", "{}.html".format(tid)
+        )
+        with open(html_out_path, "w") as html_outfile:
+            html_outfile.write(html_out)
+
+        # generate JS file
+        script = TEMPLATE.format(tid)
+        js_out_path = os.path.join(
+            repo_dir, "website", "pages", "tutorials", "{}.js".format(tid)
+        )
+        with open(js_out_path, "w") as js_outfile:
+            js_outfile.write(script)
+
+        # output tutorial in both ipynb & py form
+        ipynb_out_path = os.path.join(
+            repo_dir, "website", "static", "files", "{}.ipynb".format(tid)
+        )
+        with open(ipynb_out_path, "w") as ipynb_outfile:
+            ipynb_outfile.write(nb_str)
+        exporter = ScriptExporter()
+        script, meta = exporter.from_notebook_node(nb)
+        py_out_path = os.path.join(
+            repo_dir, "website", "static", "files", "{}.py".format(tid)
+        )
+        with open(py_out_path, "w") as py_outfile:
+            py_outfile.write(script)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Generate JS, HTML, ipynb, and py files for tutorials."
+    )
+    parser.add_argument(
+        "--repo_dir", metavar="path", required=True, help="PyTorch3D repo directory."
+    )
+    args = parser.parse_args()
+    gen_tutorials(args.repo_dir)
diff --git a/pytorch3d/scripts/publish_website.sh b/pytorch3d/scripts/publish_website.sh
new file mode 100644
index 0000000000000000000000000000000000000000..604875245965948db603e27dae4bbe95d629e0a1
--- /dev/null
+++ b/pytorch3d/scripts/publish_website.sh
@@ -0,0 +1,43 @@
+#!/bin/bash
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+# Instructions, assuming you are on a fresh pytorch3d checkout on a local
+# drive.
+
+# (1) Have a separate checkout of pytorch3d at the head of the gh-pages branch
+# on a local drive. Set the variable GHP to its full path.
+# Any uncommitted changes there will be obliterated.
+# For example
+#   GHP=/path/to/pytorch3d-gh-pages
+#   git clone -b gh-pages https://github.com/facebookresearch/pytorch3d $GHP
+
+# (2) Run this script in this directory with
+#   sudo docker run -it --rm -v $PWD/..:/loc -v $GHP:/ghp continuumio/miniconda3 bash --login /loc/scripts/publish_website.sh
+
+# (3) Choose a commit message, commit and push:
+#   cd $GHP && git add .
+#   git commit -m 'Update latest version of site'
+#   git push
+
+set -e
+
+conda create -y -n myenv python=3.7 nodejs
+
+# Note: Using bash --login together with the continuumio/miniconda3 image
+# is what lets conda activate work so smoothly.
+
+conda activate myenv
+pip install nbformat==4.4.0 nbconvert==5.3.1 ipywidgets==7.5.1 tornado==4.2 bs4 notebook==5.7.12 'mistune<2'
+npm install --global yarn
+
+cd /loc
+bash scripts/build_website.sh -b
+
+rm -rf /ghp/*
+echo "pytorch3d.org" > /ghp/CNAME
+mv /loc/website/build/pytorch3d/* /ghp/
diff --git a/pytorch3d/setup.cfg b/pytorch3d/setup.cfg
new file mode 100644
index 0000000000000000000000000000000000000000..8e48cc2dd614b4cb7370e8312de9329cdb128abc
--- /dev/null
+++ b/pytorch3d/setup.cfg
@@ -0,0 +1,14 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+[isort]
+line_length = 88
+multi_line_output = 3
+include_trailing_comma = True
+force_grid_warp = 0
+default_section = THIRDPARTY
+lines_after_imports = 2
+combine_as_imports = True
diff --git a/pytorch3d/setup.py b/pytorch3d/setup.py
new file mode 100644
index 0000000000000000000000000000000000000000..a5aecf71d3b5ec51163355540db1c056b68fa5aa
--- /dev/null
+++ b/pytorch3d/setup.py
@@ -0,0 +1,181 @@
+#!/usr/bin/env python
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import glob
+import os
+import runpy
+import sys
+import warnings
+from typing import List, Optional
+
+import torch
+from setuptools import find_packages, setup
+from torch.utils.cpp_extension import CppExtension, CUDA_HOME, CUDAExtension
+
+
+def get_existing_ccbin(nvcc_args: List[str]) -> Optional[str]:
+    """
+    Given a list of nvcc arguments, return the compiler if specified.
+
+    Note from CUDA doc: Single value options and list options must have
+    arguments, which must follow the name of the option itself by either
+    one of more spaces or an equals character.
+    """
+    last_arg = None
+    for arg in reversed(nvcc_args):
+        if arg == "-ccbin":
+            return last_arg
+        if arg.startswith("-ccbin="):
+            return arg[7:]
+        last_arg = arg
+    return None
+
+
+def get_extensions():
+    no_extension = os.getenv("PYTORCH3D_NO_EXTENSION", "0") == "1"
+    if no_extension:
+        msg = "SKIPPING EXTENSION BUILD. PYTORCH3D WILL NOT WORK!"
+        print(msg, file=sys.stderr)
+        warnings.warn(msg)
+        return []
+
+    this_dir = os.path.dirname(os.path.abspath(__file__))
+    extensions_dir = os.path.join(this_dir, "pytorch3d", "csrc")
+    sources = glob.glob(os.path.join(extensions_dir, "**", "*.cpp"), recursive=True)
+    source_cuda = glob.glob(os.path.join(extensions_dir, "**", "*.cu"), recursive=True)
+    extension = CppExtension
+
+    extra_compile_args = {"cxx": ["-std=c++17"]}
+    define_macros = []
+    include_dirs = [extensions_dir]
+
+    force_cuda = os.getenv("FORCE_CUDA", "0") == "1"
+    force_no_cuda = os.getenv("PYTORCH3D_FORCE_NO_CUDA", "0") == "1"
+    if (
+        not force_no_cuda and torch.cuda.is_available() and CUDA_HOME is not None
+    ) or force_cuda:
+        extension = CUDAExtension
+        sources += source_cuda
+        define_macros += [("WITH_CUDA", None)]
+        # Thrust is only used for its tuple objects.
+        # With CUDA 11.0 we can't use the cudatoolkit's version of cub.
+        # We take the risk that CUB and Thrust are incompatible, because
+        # we aren't using parts of Thrust which actually use CUB.
+        define_macros += [("THRUST_IGNORE_CUB_VERSION_CHECK", None)]
+        cub_home = os.environ.get("CUB_HOME", None)
+        nvcc_args = [
+            "-DCUDA_HAS_FP16=1",
+            "-D__CUDA_NO_HALF_OPERATORS__",
+            "-D__CUDA_NO_HALF_CONVERSIONS__",
+            "-D__CUDA_NO_HALF2_OPERATORS__",
+        ]
+        if os.name != "nt":
+            nvcc_args.append("-std=c++17")
+        if cub_home is None:
+            prefix = os.environ.get("CONDA_PREFIX", None)
+            if prefix is not None and os.path.isdir(prefix + "/include/cub"):
+                cub_home = prefix + "/include"
+
+        if cub_home is None:
+            warnings.warn(
+                "The environment variable `CUB_HOME` was not found. "
+                "NVIDIA CUB is required for compilation and can be downloaded "
+                "from `https://github.com/NVIDIA/cub/releases`. You can unpack "
+                "it to a location of your choice and set the environment variable "
+                "`CUB_HOME` to the folder containing the `CMakeListst.txt` file."
+            )
+        else:
+            include_dirs.append(os.path.realpath(cub_home).replace("\\ ", " "))
+        nvcc_flags_env = os.getenv("NVCC_FLAGS", "")
+        if nvcc_flags_env != "":
+            nvcc_args.extend(nvcc_flags_env.split(" "))
+
+        # This is needed for pytorch 1.6 and earlier. See e.g.
+        # https://github.com/facebookresearch/pytorch3d/issues/436
+        # It is harmless after https://github.com/pytorch/pytorch/pull/47404 .
+        # But it can be problematic in torch 1.7.0 and 1.7.1
+        if torch.__version__[:4] != "1.7.":
+            CC = os.environ.get("CC", None)
+            if CC is not None:
+                existing_CC = get_existing_ccbin(nvcc_args)
+                if existing_CC is None:
+                    CC_arg = "-ccbin={}".format(CC)
+                    nvcc_args.append(CC_arg)
+                elif existing_CC != CC:
+                    msg = f"Inconsistent ccbins: {CC} and {existing_CC}"
+                    raise ValueError(msg)
+
+        extra_compile_args["nvcc"] = nvcc_args
+
+    sources = [os.path.join(extensions_dir, s) for s in sources]
+
+    ext_modules = [
+        extension(
+            "pytorch3d._C",
+            sources,
+            include_dirs=include_dirs,
+            define_macros=define_macros,
+            extra_compile_args=extra_compile_args,
+        )
+    ]
+
+    return ext_modules
+
+
+# Retrieve __version__ from the package.
+__version__ = runpy.run_path("pytorch3d/__init__.py")["__version__"]
+
+
+if os.getenv("PYTORCH3D_NO_NINJA", "0") == "1":
+
+    class BuildExtension(torch.utils.cpp_extension.BuildExtension):
+        def __init__(self, *args, **kwargs):
+            super().__init__(use_ninja=False, *args, **kwargs)
+
+else:
+    BuildExtension = torch.utils.cpp_extension.BuildExtension
+
+trainer = "pytorch3d.implicitron_trainer"
+
+setup(
+    name="pytorch3d",
+    version=__version__,
+    author="FAIR",
+    url="https://github.com/facebookresearch/pytorch3d",
+    description="PyTorch3D is FAIR's library of reusable components "
+    "for deep Learning with 3D data.",
+    packages=find_packages(
+        exclude=("configs", "tests", "tests.*", "docs.*", "projects.*")
+    )
+    + [trainer],
+    package_dir={trainer: "projects/implicitron_trainer"},
+    install_requires=["fvcore", "iopath"],
+    extras_require={
+        "all": ["matplotlib", "tqdm>4.29.0", "imageio", "ipywidgets"],
+        "dev": ["flake8", "usort"],
+        "implicitron": [
+            "hydra-core>=1.1",
+            "visdom",
+            "lpips",
+            "tqdm>4.29.0",
+            "matplotlib",
+            "accelerate",
+            "sqlalchemy>=2.0",
+        ],
+    },
+    entry_points={
+        "console_scripts": [
+            f"pytorch3d_implicitron_runner={trainer}.experiment:experiment",
+            f"pytorch3d_implicitron_visualizer={trainer}.visualize_reconstruction:main",
+        ]
+    },
+    ext_modules=get_extensions(),
+    cmdclass={"build_ext": BuildExtension},
+    package_data={
+        "": ["*.json"],
+    },
+)
diff --git a/pytorch3d/tests/__init__.py b/pytorch3d/tests/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..2e41cd717f6a439a9c08d76a9d0e4a54e190fc5a
--- /dev/null
+++ b/pytorch3d/tests/__init__.py
@@ -0,0 +1,5 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
diff --git a/pytorch3d/tests/benchmarks/__init__.py b/pytorch3d/tests/benchmarks/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..2e41cd717f6a439a9c08d76a9d0e4a54e190fc5a
--- /dev/null
+++ b/pytorch3d/tests/benchmarks/__init__.py
@@ -0,0 +1,5 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
diff --git a/pytorch3d/tests/benchmarks/bm_acos_linear_extrapolation.py b/pytorch3d/tests/benchmarks/bm_acos_linear_extrapolation.py
new file mode 100644
index 0000000000000000000000000000000000000000..4cfd3fa1262dcbfce77b3f138c089520fdf23d42
--- /dev/null
+++ b/pytorch3d/tests/benchmarks/bm_acos_linear_extrapolation.py
@@ -0,0 +1,27 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from fvcore.common.benchmark import benchmark
+from tests.test_acos_linear_extrapolation import TestAcosLinearExtrapolation
+
+
+def bm_acos_linear_extrapolation() -> None:
+    kwargs_list = [
+        {"batch_size": 1},
+        {"batch_size": 100},
+        {"batch_size": 10000},
+        {"batch_size": 1000000},
+    ]
+    benchmark(
+        TestAcosLinearExtrapolation.acos_linear_extrapolation,
+        "ACOS_LINEAR_EXTRAPOLATION",
+        kwargs_list,
+        warmup_iters=1,
+    )
+
+
+if __name__ == "__main__":
+    bm_acos_linear_extrapolation()
diff --git a/pytorch3d/tests/benchmarks/bm_ball_query.py b/pytorch3d/tests/benchmarks/bm_ball_query.py
new file mode 100644
index 0000000000000000000000000000000000000000..73aefd0a4a562722957999828635c4b8d7a4fca0
--- /dev/null
+++ b/pytorch3d/tests/benchmarks/bm_ball_query.py
@@ -0,0 +1,40 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from itertools import product
+
+from fvcore.common.benchmark import benchmark
+from tests.test_ball_query import TestBallQuery
+
+
+def bm_ball_query() -> None:
+
+    backends = ["cpu", "cuda:0"]
+
+    kwargs_list = []
+    Ns = [32]
+    P1s = [256]
+    P2s = [128, 512]
+    Ds = [3, 10]
+    Ks = [3, 24, 100]
+    Rs = [0.1, 0.2, 5]
+    test_cases = product(Ns, P1s, P2s, Ds, Ks, Rs, backends)
+    for case in test_cases:
+        N, P1, P2, D, K, R, b = case
+        kwargs_list.append(
+            {"N": N, "P1": P1, "P2": P2, "D": D, "K": K, "radius": R, "device": b}
+        )
+
+    benchmark(
+        TestBallQuery.ball_query_square, "BALLQUERY_SQUARE", kwargs_list, warmup_iters=1
+    )
+    benchmark(
+        TestBallQuery.ball_query_ragged, "BALLQUERY_RAGGED", kwargs_list, warmup_iters=1
+    )
+
+
+if __name__ == "__main__":
+    bm_ball_query()
diff --git a/pytorch3d/tests/benchmarks/bm_barycentric_clipping.py b/pytorch3d/tests/benchmarks/bm_barycentric_clipping.py
new file mode 100644
index 0000000000000000000000000000000000000000..289233ac08cc748aeafc1cfd721077ce81bfb1f2
--- /dev/null
+++ b/pytorch3d/tests/benchmarks/bm_barycentric_clipping.py
@@ -0,0 +1,120 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from itertools import product
+
+import torch
+from fvcore.common.benchmark import benchmark
+from pytorch3d.renderer.cameras import FoVPerspectiveCameras, look_at_view_transform
+from pytorch3d.renderer.mesh.rasterizer import (
+    Fragments,
+    MeshRasterizer,
+    RasterizationSettings,
+)
+from pytorch3d.renderer.mesh.utils import (
+    _clip_barycentric_coordinates,
+    _interpolate_zbuf,
+)
+from pytorch3d.utils.ico_sphere import ico_sphere
+
+
+def baryclip_cuda(
+    num_meshes: int = 8,
+    ico_level: int = 5,
+    image_size: int = 64,
+    faces_per_pixel: int = 50,
+    device="cuda",
+):
+    # Init meshes
+    sphere_meshes = ico_sphere(ico_level, device).extend(num_meshes)
+    # Init transform
+    R, T = look_at_view_transform(1.0, 0.0, 0.0)
+    cameras = FoVPerspectiveCameras(device=device, R=R, T=T)
+    # Init rasterizer
+    raster_settings = RasterizationSettings(
+        image_size=image_size,
+        blur_radius=1e-4,
+        faces_per_pixel=faces_per_pixel,
+        clip_barycentric_coords=True,
+    )
+    rasterizer = MeshRasterizer(cameras=cameras, raster_settings=raster_settings)
+
+    torch.cuda.synchronize()
+
+    def raster_fn():
+        rasterizer(sphere_meshes)
+        torch.cuda.synchronize()
+
+    return raster_fn
+
+
+def baryclip_pytorch(
+    num_meshes: int = 8,
+    ico_level: int = 5,
+    image_size: int = 64,
+    faces_per_pixel: int = 50,
+    device="cuda",
+):
+    # Init meshes
+    sphere_meshes = ico_sphere(ico_level, device).extend(num_meshes)
+    # Init transform
+    R, T = look_at_view_transform(1.0, 0.0, 0.0)
+    cameras = FoVPerspectiveCameras(device=device, R=R, T=T)
+    # Init rasterizer
+    raster_settings = RasterizationSettings(
+        image_size=image_size,
+        blur_radius=1e-4,
+        faces_per_pixel=faces_per_pixel,
+        clip_barycentric_coords=False,
+    )
+    rasterizer = MeshRasterizer(cameras=cameras, raster_settings=raster_settings)
+
+    torch.cuda.synchronize()
+
+    def raster_fn():
+        fragments = rasterizer(sphere_meshes)
+
+        # Clip bary and reinterpolate
+        clipped_bary_coords = _clip_barycentric_coordinates(fragments.bary_coords)
+        clipped_zbuf = _interpolate_zbuf(
+            fragments.pix_to_face, clipped_bary_coords, sphere_meshes
+        )
+        fragments = Fragments(
+            bary_coords=clipped_bary_coords,
+            zbuf=clipped_zbuf,
+            dists=fragments.dists,
+            pix_to_face=fragments.pix_to_face,
+        )
+        torch.cuda.synchronize()
+
+    return raster_fn
+
+
+def bm_barycentric_clip() -> None:
+    if torch.cuda.is_available():
+        kwargs_list = []
+        num_meshes = [1, 8]
+        ico_level = [0, 4]
+        image_size = [64, 128, 256]
+        faces_per_pixel = [10, 75, 100]
+        test_cases = product(num_meshes, ico_level, image_size, faces_per_pixel)
+        for case in test_cases:
+            n, ic, im, nf = case
+            kwargs_list.append(
+                {
+                    "num_meshes": n,
+                    "ico_level": ic,
+                    "image_size": im,
+                    "faces_per_pixel": nf,
+                }
+            )
+
+        benchmark(baryclip_cuda, "BARY_CLIP_CUDA", kwargs_list, warmup_iters=1)
+        benchmark(baryclip_pytorch, "BARY_CLIP_PYTORCH", kwargs_list, warmup_iters=1)
+
+
+if __name__ == "__main__":
+    bm_barycentric_clip()
diff --git a/pytorch3d/tests/benchmarks/bm_blending.py b/pytorch3d/tests/benchmarks/bm_blending.py
new file mode 100644
index 0000000000000000000000000000000000000000..6a404233c46182a580d26f8c195118be8ae5458d
--- /dev/null
+++ b/pytorch3d/tests/benchmarks/bm_blending.py
@@ -0,0 +1,74 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+from itertools import product
+
+from fvcore.common.benchmark import benchmark
+from tests.test_blending import TestBlending
+
+
+def bm_blending() -> None:
+    devices = ["cuda"]
+    kwargs_list = []
+    num_meshes = [8]
+    image_size = [64, 128, 256]
+    faces_per_pixel = [2, 50, 100]
+    backend = ["pytorch", "custom"]
+    test_cases = product(num_meshes, image_size, faces_per_pixel, devices, backend)
+
+    for case in test_cases:
+        n, s, k, d, b = case
+        kwargs_list.append(
+            {
+                "num_meshes": n,
+                "image_size": s,
+                "faces_per_pixel": k,
+                "device": d,
+                "backend": b,
+            }
+        )
+
+    benchmark(
+        TestBlending.bm_sigmoid_alpha_blending,
+        "SIGMOID_ALPHA_BLENDING_PYTORCH",
+        kwargs_list,
+        warmup_iters=1,
+    )
+
+    kwargs_list = [case for case in kwargs_list if case["backend"] == "pytorch"]
+    benchmark(
+        TestBlending.bm_softmax_blending,
+        "SOFTMAX_BLENDING_PYTORCH",
+        kwargs_list,
+        warmup_iters=1,
+    )
+
+    kwargs_list = []
+    faces_per_pixel = [2, 10]
+    backend = ["pytorch"]
+    test_cases = product(num_meshes, image_size, faces_per_pixel, devices, backend)
+    for case in test_cases:
+        n, s, k, d, b = case
+        kwargs_list.append(
+            {
+                "num_meshes": n,
+                "image_size": s,
+                "faces_per_pixel": k,
+                "device": d,
+                "backend": b,
+            }
+        )
+    benchmark(
+        TestBlending.bm_splatter_blending,
+        "SPLATTER_BLENDING_PYTORCH",
+        kwargs_list,
+        warmup_iters=1,
+    )
+
+
+if __name__ == "__main__":
+    bm_blending()
diff --git a/pytorch3d/tests/benchmarks/bm_cameras.py b/pytorch3d/tests/benchmarks/bm_cameras.py
new file mode 100644
index 0000000000000000000000000000000000000000..8cf6bf221c99a8f9c7e42b36aaf76ab1ec7bee9d
--- /dev/null
+++ b/pytorch3d/tests/benchmarks/bm_cameras.py
@@ -0,0 +1,59 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import itertools
+
+from fvcore.common.benchmark import benchmark
+from tests.test_cameras import TestCamerasCommon
+
+
+def _setUp():
+    case_grid = {
+        "cam_type": [
+            "OpenGLOrthographicCameras",
+            "OpenGLPerspectiveCameras",
+            "SfMOrthographicCameras",
+            "SfMPerspectiveCameras",
+            "FoVOrthographicCameras",
+            "FoVPerspectiveCameras",
+            "OrthographicCameras",
+            "PerspectiveCameras",
+            "FishEyeCameras",
+        ],
+        "batch_size": [1, 10],
+        "num_points": [10, 100],
+        "device": ["cpu", "cuda:0"],
+    }
+    test_cases = itertools.product(*case_grid.values())
+    kwargs_list = [dict(zip(case_grid.keys(), case)) for case in test_cases]
+    return kwargs_list
+
+
+def _bm_cameras_project() -> None:
+    kwargs_list = _setUp()
+    benchmark(
+        TestCamerasCommon.transform_points,
+        "TEST_TRANSFORM_POINTS",
+        kwargs_list,
+    )
+
+
+def _bm_cameras_unproject() -> None:
+    kwargs_list = _setUp()
+    benchmark(
+        TestCamerasCommon.unproject_points,
+        "TEST_UNPROJECT_POINTS",
+        kwargs_list,
+    )
+
+
+def bm_cameras() -> None:
+    _bm_cameras_project()
+    _bm_cameras_unproject()
+
+
+if __name__ == "__main__":
+    bm_cameras()
diff --git a/pytorch3d/tests/benchmarks/bm_cameras_alignment.py b/pytorch3d/tests/benchmarks/bm_cameras_alignment.py
new file mode 100644
index 0000000000000000000000000000000000000000..22dd1570f88fdeca1da9fad027532bed98bbc407
--- /dev/null
+++ b/pytorch3d/tests/benchmarks/bm_cameras_alignment.py
@@ -0,0 +1,32 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import itertools
+
+from fvcore.common.benchmark import benchmark
+from tests.test_cameras_alignment import TestCamerasAlignment
+
+
+def bm_cameras_alignment() -> None:
+
+    case_grid = {
+        "batch_size": [10, 100, 1000],
+        "mode": ["centers", "extrinsics"],
+        "estimate_scale": [False, True],
+    }
+    test_cases = itertools.product(*case_grid.values())
+    kwargs_list = [dict(zip(case_grid.keys(), case)) for case in test_cases]
+
+    benchmark(
+        TestCamerasAlignment.corresponding_cameras_alignment,
+        "CORRESPONDING_CAMERAS_ALIGNMENT",
+        kwargs_list,
+        warmup_iters=1,
+    )
+
+
+if __name__ == "__main__":
+    bm_cameras_alignment()
diff --git a/pytorch3d/tests/benchmarks/bm_chamfer.py b/pytorch3d/tests/benchmarks/bm_chamfer.py
new file mode 100644
index 0000000000000000000000000000000000000000..81944c477cc7b87abcf60b93a3cc5cadde5a1355
--- /dev/null
+++ b/pytorch3d/tests/benchmarks/bm_chamfer.py
@@ -0,0 +1,65 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from itertools import product
+
+import torch
+from fvcore.common.benchmark import benchmark
+from tests.test_chamfer import TestChamfer
+
+
+def bm_chamfer() -> None:
+    # Currently disabled.
+    return
+    devices = ["cpu"]
+    if torch.cuda.is_available():
+        devices.append("cuda:0")
+
+    kwargs_list_naive = []
+    batch_size = [1, 32]
+    return_normals = [True, False]
+    test_cases = product(batch_size, return_normals, devices)
+
+    for case in test_cases:
+        b, n, d = case
+        kwargs_list_naive.append(
+            {"batch_size": b, "P1": 32, "P2": 64, "return_normals": n, "device": d}
+        )
+
+    benchmark(
+        TestChamfer.chamfer_naive_with_init,
+        "CHAMFER_NAIVE",
+        kwargs_list_naive,
+        warmup_iters=1,
+    )
+
+    if torch.cuda.is_available():
+        device = "cuda:0"
+        kwargs_list = []
+        batch_size = [1, 32]
+        P1 = [32, 1000, 10000]
+        P2 = [64, 3000, 30000]
+        return_normals = [True, False]
+        homogeneous = [True, False]
+        test_cases = product(batch_size, P1, P2, return_normals, homogeneous)
+
+        for case in test_cases:
+            b, p1, p2, n, h = case
+            kwargs_list.append(
+                {
+                    "batch_size": b,
+                    "P1": p1,
+                    "P2": p2,
+                    "return_normals": n,
+                    "homogeneous": h,
+                    "device": device,
+                }
+            )
+        benchmark(TestChamfer.chamfer_with_init, "CHAMFER", kwargs_list, warmup_iters=1)
+
+
+if __name__ == "__main__":
+    bm_chamfer()
diff --git a/pytorch3d/tests/benchmarks/bm_cubify.py b/pytorch3d/tests/benchmarks/bm_cubify.py
new file mode 100644
index 0000000000000000000000000000000000000000..37b18a5f54db09c37da037b878af14b1339d106b
--- /dev/null
+++ b/pytorch3d/tests/benchmarks/bm_cubify.py
@@ -0,0 +1,21 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from fvcore.common.benchmark import benchmark
+from tests.test_cubify import TestCubify
+
+
+def bm_cubify() -> None:
+    kwargs_list = [
+        {"batch_size": 32, "V": 16},
+        {"batch_size": 64, "V": 16},
+        {"batch_size": 16, "V": 32},
+    ]
+    benchmark(TestCubify.cubify_with_init, "CUBIFY", kwargs_list, warmup_iters=1)
+
+
+if __name__ == "__main__":
+    bm_cubify()
diff --git a/pytorch3d/tests/benchmarks/bm_face_areas_normals.py b/pytorch3d/tests/benchmarks/bm_face_areas_normals.py
new file mode 100644
index 0000000000000000000000000000000000000000..970a4de30d761704648482d6cd48e6eb47287087
--- /dev/null
+++ b/pytorch3d/tests/benchmarks/bm_face_areas_normals.py
@@ -0,0 +1,47 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+from itertools import product
+
+import torch
+from fvcore.common.benchmark import benchmark
+from tests.test_face_areas_normals import TestFaceAreasNormals
+
+
+def bm_face_areas_normals() -> None:
+    kwargs_list = []
+    backend = ["cpu"]
+    if torch.cuda.is_available():
+        backend.append("cuda:0")
+
+    num_meshes = [2, 10, 32]
+    num_verts = [100, 1000]
+    num_faces = [300, 3000]
+
+    test_cases = product(num_meshes, num_verts, num_faces, backend)
+    for case in test_cases:
+        n, v, f, d = case
+        kwargs_list.append(
+            {"num_meshes": n, "num_verts": v, "num_faces": f, "device": d}
+        )
+    benchmark(
+        TestFaceAreasNormals.face_areas_normals_with_init,
+        "FACE_AREAS_NORMALS",
+        kwargs_list,
+        warmup_iters=1,
+    )
+
+    benchmark(
+        TestFaceAreasNormals.face_areas_normals_with_init_torch,
+        "FACE_AREAS_NORMALS_TORCH",
+        kwargs_list,
+        warmup_iters=1,
+    )
+
+
+if __name__ == "__main__":
+    bm_face_areas_normals()
diff --git a/pytorch3d/tests/benchmarks/bm_graph_conv.py b/pytorch3d/tests/benchmarks/bm_graph_conv.py
new file mode 100644
index 0000000000000000000000000000000000000000..4276f23d2392523b8851cef0ea09668b84712dbe
--- /dev/null
+++ b/pytorch3d/tests/benchmarks/bm_graph_conv.py
@@ -0,0 +1,50 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+from itertools import product
+
+import torch
+from fvcore.common.benchmark import benchmark
+from tests.test_graph_conv import TestGraphConv
+
+
+def bm_graph_conv() -> None:
+    backends = ["cpu"]
+    if torch.cuda.is_available():
+        backends.append("cuda")
+
+    kwargs_list = []
+    gconv_dim = [128, 256]
+    num_meshes = [32, 64]
+    num_verts = [100]
+    num_faces = [1000]
+    directed = [False, True]
+    test_cases = product(
+        gconv_dim, num_meshes, num_verts, num_faces, directed, backends
+    )
+    for case in test_cases:
+        g, n, v, f, d, b = case
+        kwargs_list.append(
+            {
+                "gconv_dim": g,
+                "num_meshes": n,
+                "num_verts": v,
+                "num_faces": f,
+                "directed": d,
+                "backend": b,
+            }
+        )
+    benchmark(
+        TestGraphConv.graph_conv_forward_backward,
+        "GRAPH CONV",
+        kwargs_list,
+        warmup_iters=1,
+    )
+
+
+if __name__ == "__main__":
+    bm_graph_conv()
diff --git a/pytorch3d/tests/benchmarks/bm_interpolate_face_attributes.py b/pytorch3d/tests/benchmarks/bm_interpolate_face_attributes.py
new file mode 100644
index 0000000000000000000000000000000000000000..bc56f4293d416abbd8c76c361a2cd99d9c906b27
--- /dev/null
+++ b/pytorch3d/tests/benchmarks/bm_interpolate_face_attributes.py
@@ -0,0 +1,84 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from itertools import product
+
+import torch
+from fvcore.common.benchmark import benchmark
+from pytorch3d.ops.interp_face_attrs import (
+    interpolate_face_attributes,
+    interpolate_face_attributes_python,
+)
+
+
+def _generate_data(N, S, K, F, D, device, requires_grad=False):
+    pix_to_face = torch.randint(-10, F, (N, S, S, K), device=device)
+    barycentric_coords = torch.randn(
+        N, S, S, K, 3, device=device, requires_grad=requires_grad
+    )
+    face_attrs = torch.randn(F, 3, D, device=device, requires_grad=requires_grad)
+    grad_pix_attrs = torch.randn(N, S, S, K, D, device=device)
+    return pix_to_face, barycentric_coords, face_attrs, grad_pix_attrs
+
+
+def _bm_forward(N, S, F, K, D, impl):
+    # The runtime depends on the values of pix_to_face. So for proper
+    # benchmarking we should probably take the average of multiple
+    # values of pix to face. But this doesn't easily fit into fvcore
+    # benchmarking, so instead we'll just set a manual seed to make sure
+    # that different impls will use the same data.
+    torch.manual_seed(0)
+    device = torch.device("cuda")
+    data = _generate_data(N, S, K, F, D, device, requires_grad=False)
+    args = data[:3]
+    torch.cuda.synchronize()
+    if impl == "cuda":
+        fun = interpolate_face_attributes
+    elif impl == "python":
+        fun = interpolate_face_attributes_python
+    return lambda: fun(*args)
+
+
+def _bm_forward_backward(N, S, F, K, D, impl):
+    torch.manual_seed(0)
+    device = torch.device("cuda")
+    data = _generate_data(N, S, K, F, D, device, requires_grad=True)
+    args, grad = data[:3], data[3]
+    torch.cuda.synchronize()
+    if impl == "cuda":
+        fun = interpolate_face_attributes
+    elif impl == "python":
+        fun = interpolate_face_attributes_python
+
+    def run():
+        out = fun(*args)
+        out.backward(gradient=grad)
+
+    return run
+
+
+def bm_interpolate_face_attribues() -> None:
+    # For now only benchmark on GPU
+    if not torch.cuda.is_available():
+        return
+
+    Ns = [1, 4]
+    Ss = [128]
+    Ks = [1, 10, 40]
+    Fs = [5000]
+    Ds = [1, 3, 16]
+    impls = ["python", "cuda"]
+    test_cases = product(Ns, Ss, Ks, Fs, Ds, impls)
+    kwargs_list = []
+    for case in test_cases:
+        N, S, K, F, D, impl = case
+        kwargs_list.append({"N": N, "S": S, "K": K, "F": F, "D": D, "impl": impl})
+    benchmark(_bm_forward, "FORWARD", kwargs_list, warmup_iters=3)
+    benchmark(_bm_forward_backward, "FORWARD+BACKWARD", kwargs_list, warmup_iters=3)
+
+
+if __name__ == "__main__":
+    bm_interpolate_face_attribues()
diff --git a/pytorch3d/tests/benchmarks/bm_iou_box3d.py b/pytorch3d/tests/benchmarks/bm_iou_box3d.py
new file mode 100644
index 0000000000000000000000000000000000000000..152e36ea78862faab7acd73968613e1e5cb0a033
--- /dev/null
+++ b/pytorch3d/tests/benchmarks/bm_iou_box3d.py
@@ -0,0 +1,54 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from itertools import product
+
+from fvcore.common.benchmark import benchmark
+from tests.test_iou_box3d import TestIoU3D
+
+
+def bm_iou_box3d() -> None:
+    # Realistic use cases
+    N = [30, 100]
+    M = [5, 10, 100]
+    kwargs_list = []
+    test_cases = product(N, M)
+    for case in test_cases:
+        n, m = case
+        kwargs_list.append({"N": n, "M": m, "device": "cuda:0"})
+    benchmark(TestIoU3D.iou, "3D_IOU", kwargs_list, warmup_iters=1)
+
+    # Comparison of C++/CUDA
+    kwargs_list = []
+    N = [1, 4, 8, 16]
+    devices = ["cpu", "cuda:0"]
+    test_cases = product(N, N, devices)
+    for case in test_cases:
+        n, m, d = case
+        kwargs_list.append({"N": n, "M": m, "device": d})
+    benchmark(TestIoU3D.iou, "3D_IOU", kwargs_list, warmup_iters=1)
+
+    # Naive PyTorch
+    N = [1, 4]
+    kwargs_list = []
+    test_cases = product(N, N)
+    for case in test_cases:
+        n, m = case
+        kwargs_list.append({"N": n, "M": m, "device": "cuda:0"})
+    benchmark(TestIoU3D.iou_naive, "3D_IOU_NAIVE", kwargs_list, warmup_iters=1)
+
+    # Sampling based method
+    num_samples = [2000, 5000]
+    kwargs_list = []
+    test_cases = product(N, N, num_samples)
+    for case in test_cases:
+        n, m, s = case
+        kwargs_list.append({"N": n, "M": m, "num_samples": s, "device": "cuda:0"})
+    benchmark(TestIoU3D.iou_sampling, "3D_IOU_SAMPLING", kwargs_list, warmup_iters=1)
+
+
+if __name__ == "__main__":
+    bm_iou_box3d()
diff --git a/pytorch3d/tests/benchmarks/bm_knn.py b/pytorch3d/tests/benchmarks/bm_knn.py
new file mode 100644
index 0000000000000000000000000000000000000000..f6ffa5e857f0dd9eb5c28cc71c655e1d1fe5c9d6
--- /dev/null
+++ b/pytorch3d/tests/benchmarks/bm_knn.py
@@ -0,0 +1,34 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from itertools import product
+
+from fvcore.common.benchmark import benchmark
+from tests.test_knn import TestKNN
+
+
+def bm_knn() -> None:
+
+    backends = ["cpu", "cuda:0"]
+
+    kwargs_list = []
+    Ns = [32]
+    P1s = [256]
+    P2s = [128, 512]
+    Ds = [3]
+    Ks = [24]
+    test_cases = product(Ns, P1s, P2s, Ds, Ks, backends)
+    for case in test_cases:
+        N, P1, P2, D, K, b = case
+        kwargs_list.append({"N": N, "P1": P1, "P2": P2, "D": D, "K": K, "device": b})
+
+    benchmark(TestKNN.knn_square, "KNN_SQUARE", kwargs_list, warmup_iters=1)
+
+    benchmark(TestKNN.knn_ragged, "KNN_RAGGED", kwargs_list, warmup_iters=1)
+
+
+if __name__ == "__main__":
+    bm_knn()
diff --git a/pytorch3d/tests/benchmarks/bm_lighting.py b/pytorch3d/tests/benchmarks/bm_lighting.py
new file mode 100644
index 0000000000000000000000000000000000000000..b5c70b63f9e98bec0fdec265169f2b0e12afbccf
--- /dev/null
+++ b/pytorch3d/tests/benchmarks/bm_lighting.py
@@ -0,0 +1,55 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from itertools import product
+
+import torch
+from fvcore.common.benchmark import benchmark
+from pytorch3d.renderer.lighting import diffuse, specular
+
+
+def _bm_diffuse_cuda_with_init(N, S, K):
+    device = torch.device("cuda")
+    normals = torch.randn(N, S, S, K, 3, device=device)
+    color = torch.randn(1, 3, device=device)
+    direction = torch.randn(N, S, S, K, 3, device=device)
+    args = (normals, color, direction)
+    torch.cuda.synchronize()
+    return lambda: diffuse(*args)
+
+
+def _bm_specular_cuda_with_init(N, S, K):
+    device = torch.device("cuda")
+    points = torch.randn(N, S, S, K, 3, device=device)
+    normals = torch.randn(N, S, S, K, 3, device=device)
+    direction = torch.randn(N, S, S, K, 3, device=device)
+    color = torch.randn(1, 3, device=device)
+    camera_position = torch.randn(N, 3, device=device)
+    shininess = torch.randn(N, device=device)
+    args = (points, normals, direction, color, camera_position, shininess)
+    torch.cuda.synchronize()
+    return lambda: specular(*args)
+
+
+def bm_lighting() -> None:
+    # For now only benchmark lighting on GPU
+    if not torch.cuda.is_available():
+        return
+
+    kwargs_list = []
+    Ns = [1, 8]
+    Ss = [128, 256]
+    Ks = [1, 10, 80]
+    test_cases = product(Ns, Ss, Ks)
+    for case in test_cases:
+        N, S, K = case
+        kwargs_list.append({"N": N, "S": S, "K": K})
+    benchmark(_bm_diffuse_cuda_with_init, "DIFFUSE", kwargs_list, warmup_iters=3)
+    benchmark(_bm_specular_cuda_with_init, "SPECULAR", kwargs_list, warmup_iters=3)
+
+
+if __name__ == "__main__":
+    bm_lighting()
diff --git a/pytorch3d/tests/benchmarks/bm_main.py b/pytorch3d/tests/benchmarks/bm_main.py
new file mode 100644
index 0000000000000000000000000000000000000000..ce10012e2f559eaadf053b29f06df8b3e1e00c2f
--- /dev/null
+++ b/pytorch3d/tests/benchmarks/bm_main.py
@@ -0,0 +1,42 @@
+#!/usr/bin/env python3
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import glob
+import os
+import subprocess
+import sys
+from os.path import dirname, isfile, join
+
+
+def main() -> None:
+    # pyre-ignore[16]
+    if len(sys.argv) > 1:
+        # Parse from flags.
+        # pyre-ignore[16]
+        file_names = [
+            join(dirname(__file__), n) for n in sys.argv if n.startswith("bm_")
+        ]
+    else:
+        # Get all the benchmark files (starting with "bm_").
+        bm_files = glob.glob(join(dirname(__file__), "bm_*.py"))
+        file_names = sorted(
+            f for f in bm_files if isfile(f) and not f.endswith("bm_main.py")
+        )
+
+    # Forward all important path information to the subprocesses through the
+    # environment.
+    os.environ["PATH"] = sys.path[0] + ":" + os.environ.get("PATH", "")
+    os.environ["LD_LIBRARY_PATH"] = (
+        sys.path[0] + ":" + os.environ.get("LD_LIBRARY_PATH", "")
+    )
+    os.environ["PYTHONPATH"] = ":".join(sys.path)
+    for file_name in file_names:
+        subprocess.check_call([sys.executable, file_name])
+
+
+if __name__ == "__main__":
+    main()  # pragma: no cover
diff --git a/pytorch3d/tests/benchmarks/bm_marching_cubes.py b/pytorch3d/tests/benchmarks/bm_marching_cubes.py
new file mode 100644
index 0000000000000000000000000000000000000000..625728310174eb5c2f4ad488d3e1705b8565ac98
--- /dev/null
+++ b/pytorch3d/tests/benchmarks/bm_marching_cubes.py
@@ -0,0 +1,35 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import itertools
+
+from fvcore.common.benchmark import benchmark
+from tests.test_marching_cubes import TestMarchingCubes
+
+
+def bm_marching_cubes() -> None:
+    case_grid = {
+        "algo_type": [
+            "naive",
+            "extension",
+        ],
+        "batch_size": [1, 2],
+        "V": [5, 10, 20, 100, 512],
+        "device": ["cpu", "cuda:0"],
+    }
+    test_cases = itertools.product(*case_grid.values())
+    kwargs_list = [dict(zip(case_grid.keys(), case)) for case in test_cases]
+
+    benchmark(
+        TestMarchingCubes.marching_cubes_with_init,
+        "MARCHING_CUBES",
+        kwargs_list,
+        warmup_iters=1,
+    )
+
+
+if __name__ == "__main__":
+    bm_marching_cubes()
diff --git a/pytorch3d/tests/benchmarks/bm_mesh_edge_loss.py b/pytorch3d/tests/benchmarks/bm_mesh_edge_loss.py
new file mode 100644
index 0000000000000000000000000000000000000000..3cf14b174ea7fd3b8ef9a7cb45932a744b485e3e
--- /dev/null
+++ b/pytorch3d/tests/benchmarks/bm_mesh_edge_loss.py
@@ -0,0 +1,29 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+from itertools import product
+
+from fvcore.common.benchmark import benchmark
+from tests.test_mesh_edge_loss import TestMeshEdgeLoss
+
+
+def bm_mesh_edge_loss() -> None:
+    kwargs_list = []
+    num_meshes = [1, 16, 32]
+    max_v = [100, 10000]
+    max_f = [300, 30000]
+    test_cases = product(num_meshes, max_v, max_f)
+    for case in test_cases:
+        n, v, f = case
+        kwargs_list.append({"num_meshes": n, "max_v": v, "max_f": f})
+    benchmark(
+        TestMeshEdgeLoss.mesh_edge_loss, "MESH_EDGE_LOSS", kwargs_list, warmup_iters=1
+    )
+
+
+if __name__ == "__main__":
+    bm_mesh_edge_loss()
diff --git a/pytorch3d/tests/benchmarks/bm_mesh_io.py b/pytorch3d/tests/benchmarks/bm_mesh_io.py
new file mode 100644
index 0000000000000000000000000000000000000000..37155fae34691442d4f1acb0e47a1563f0746ba6
--- /dev/null
+++ b/pytorch3d/tests/benchmarks/bm_mesh_io.py
@@ -0,0 +1,105 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from itertools import product
+
+from fvcore.common.benchmark import benchmark
+from tests.test_io_obj import TestMeshObjIO
+from tests.test_io_ply import TestMeshPlyIO
+
+
+def bm_save_load() -> None:
+    simple_kwargs_list = [
+        {"V": 100, "F": 200},
+        {"V": 1000, "F": 2000},
+        {"V": 10000, "F": 20000},
+    ]
+    benchmark(
+        TestMeshObjIO.bm_load_simple_obj_with_init,
+        "LOAD_SIMPLE_OBJ",
+        simple_kwargs_list,
+        warmup_iters=1,
+    )
+    benchmark(
+        TestMeshObjIO.bm_save_simple_obj_with_init,
+        "SAVE_SIMPLE_OBJ",
+        simple_kwargs_list,
+        warmup_iters=1,
+    )
+    benchmark(
+        TestMeshPlyIO.bm_load_simple_ply_with_init,
+        "LOAD_SIMPLE_PLY",
+        simple_kwargs_list,
+        warmup_iters=1,
+    )
+    benchmark(
+        TestMeshPlyIO.bm_save_simple_ply_with_init,
+        "SAVE_SIMPLE_PLY",
+        simple_kwargs_list,
+        warmup_iters=1,
+    )
+
+    complex_kwargs_list = [{"N": 8}, {"N": 32}, {"N": 128}]
+    benchmark(
+        TestMeshObjIO.bm_load_complex_obj,
+        "LOAD_COMPLEX_OBJ",
+        complex_kwargs_list,
+        warmup_iters=1,
+    )
+    benchmark(
+        TestMeshObjIO.bm_save_complex_obj,
+        "SAVE_COMPLEX_OBJ",
+        complex_kwargs_list,
+        warmup_iters=1,
+    )
+    benchmark(
+        TestMeshPlyIO.bm_load_complex_ply,
+        "LOAD_COMPLEX_PLY",
+        complex_kwargs_list,
+        warmup_iters=1,
+    )
+    benchmark(
+        TestMeshPlyIO.bm_save_complex_ply,
+        "SAVE_COMPLEX_PLY",
+        complex_kwargs_list,
+        warmup_iters=1,
+    )
+
+    # Texture loading benchmarks
+    kwargs_list = [{"R": 2}, {"R": 4}, {"R": 10}, {"R": 15}, {"R": 20}]
+    benchmark(
+        TestMeshObjIO.bm_load_texture_atlas,
+        "PYTORCH3D_TEXTURE_ATLAS",
+        kwargs_list,
+        warmup_iters=1,
+    )
+
+    kwargs_list = []
+    S = [64, 256, 1024]
+    F = [100, 1000, 10000]
+    R = [5, 10, 20]
+    test_cases = product(S, F, R)
+
+    for case in test_cases:
+        s, f, r = case
+        kwargs_list.append({"S": s, "F": f, "R": r})
+
+    benchmark(
+        TestMeshObjIO.bm_bilinear_sampling_vectorized,
+        "BILINEAR_VECTORIZED",
+        kwargs_list,
+        warmup_iters=1,
+    )
+    benchmark(
+        TestMeshObjIO.bm_bilinear_sampling_grid_sample,
+        "BILINEAR_GRID_SAMPLE",
+        kwargs_list,
+        warmup_iters=1,
+    )
+
+
+if __name__ == "__main__":
+    bm_save_load()
diff --git a/pytorch3d/tests/benchmarks/bm_mesh_laplacian_smoothing.py b/pytorch3d/tests/benchmarks/bm_mesh_laplacian_smoothing.py
new file mode 100644
index 0000000000000000000000000000000000000000..440aafbae75bc917aaea13f7e096d3a5fb73a92c
--- /dev/null
+++ b/pytorch3d/tests/benchmarks/bm_mesh_laplacian_smoothing.py
@@ -0,0 +1,40 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+from itertools import product
+
+import torch
+from fvcore.common.benchmark import benchmark
+from tests.test_mesh_laplacian_smoothing import TestLaplacianSmoothing
+
+
+def bm_mesh_laplacian_smoothing() -> None:
+    devices = ["cpu"]
+    if torch.cuda.is_available():
+        devices.append("cuda")
+
+    kwargs_list = []
+    num_meshes = [2, 10, 32]
+    num_verts = [100, 1000]
+    num_faces = [300, 3000]
+    test_cases = product(num_meshes, num_verts, num_faces, devices)
+    for case in test_cases:
+        n, v, f, d = case
+        kwargs_list.append(
+            {"num_meshes": n, "num_verts": v, "num_faces": f, "device": d}
+        )
+
+    benchmark(
+        TestLaplacianSmoothing.laplacian_smoothing_with_init,
+        "MESH_LAPLACIAN_SMOOTHING",
+        kwargs_list,
+        warmup_iters=1,
+    )
+
+
+if __name__ == "__main__":
+    bm_mesh_laplacian_smoothing()
diff --git a/pytorch3d/tests/benchmarks/bm_mesh_normal_consistency.py b/pytorch3d/tests/benchmarks/bm_mesh_normal_consistency.py
new file mode 100644
index 0000000000000000000000000000000000000000..11d1be46c19c0e923c60f7165ac8d032680f834c
--- /dev/null
+++ b/pytorch3d/tests/benchmarks/bm_mesh_normal_consistency.py
@@ -0,0 +1,37 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+from itertools import product
+
+import torch
+from fvcore.common.benchmark import benchmark
+from tests.test_mesh_normal_consistency import TestMeshNormalConsistency
+
+
+def bm_mesh_normal_consistency() -> None:
+    devices = ["cpu"]
+    if torch.cuda.is_available():
+        devices.append("cuda")
+
+    kwargs_list = []
+    num_meshes = [16, 32, 64]
+    levels = [2, 3]
+    test_cases = product(num_meshes, levels, devices)
+    for case in test_cases:
+        n, l, d = case
+        kwargs_list.append({"num_meshes": n, "level": l, "device": d})
+
+    benchmark(
+        TestMeshNormalConsistency.mesh_normal_consistency_with_ico,
+        "MESH_NORMAL_CONSISTENCY_ICO",
+        kwargs_list,
+        warmup_iters=1,
+    )
+
+
+if __name__ == "__main__":
+    bm_mesh_normal_consistency()
diff --git a/pytorch3d/tests/benchmarks/bm_mesh_rasterizer_transform.py b/pytorch3d/tests/benchmarks/bm_mesh_rasterizer_transform.py
new file mode 100644
index 0000000000000000000000000000000000000000..e65c265ef6e902b51acca9a8df9a258eeb85e7e4
--- /dev/null
+++ b/pytorch3d/tests/benchmarks/bm_mesh_rasterizer_transform.py
@@ -0,0 +1,53 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+from itertools import product
+
+import torch
+from fvcore.common.benchmark import benchmark
+from pytorch3d.renderer.cameras import FoVPerspectiveCameras, look_at_view_transform
+from pytorch3d.renderer.mesh.rasterizer import MeshRasterizer
+from pytorch3d.utils.ico_sphere import ico_sphere
+
+
+def rasterize_transform_with_init(num_meshes: int, ico_level: int = 5, device="cuda"):
+    # Init meshes
+    sphere_meshes = ico_sphere(ico_level, device).extend(num_meshes)
+    # Init transform
+    R, T = look_at_view_transform(1.0, 0.0, 0.0)
+    cameras = FoVPerspectiveCameras(device=device, R=R, T=T)
+    # Init rasterizer
+    rasterizer = MeshRasterizer(cameras=cameras)
+
+    torch.cuda.synchronize()
+
+    def raster_fn():
+        rasterizer.transform(sphere_meshes)
+        torch.cuda.synchronize()
+
+    return raster_fn
+
+
+def bm_mesh_rasterizer_transform() -> None:
+    if torch.cuda.is_available():
+        kwargs_list = []
+        num_meshes = [1, 8]
+        ico_level = [0, 1, 3, 4]
+        test_cases = product(num_meshes, ico_level)
+        for case in test_cases:
+            n, ic = case
+            kwargs_list.append({"num_meshes": n, "ico_level": ic})
+        benchmark(
+            rasterize_transform_with_init,
+            "MESH_RASTERIZER",
+            kwargs_list,
+            warmup_iters=1,
+        )
+
+
+if __name__ == "__main__":
+    bm_mesh_rasterizer_transform()
diff --git a/pytorch3d/tests/benchmarks/bm_meshes.py b/pytorch3d/tests/benchmarks/bm_meshes.py
new file mode 100644
index 0000000000000000000000000000000000000000..1d0b63f2ba1f0b339780238e029b306fcaea6847
--- /dev/null
+++ b/pytorch3d/tests/benchmarks/bm_meshes.py
@@ -0,0 +1,43 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+from itertools import product
+
+import torch
+from fvcore.common.benchmark import benchmark
+from tests.test_meshes import TestMeshes
+
+
+def bm_compute_packed_padded_meshes() -> None:
+    devices = ["cpu"]
+    if torch.cuda.is_available():
+        devices.append("cuda")
+
+    kwargs_list = []
+    num_meshes = [32, 128]
+    max_v = [100, 1000, 10000]
+    max_f = [300, 3000, 30000]
+    test_cases = product(num_meshes, max_v, max_f, devices)
+    for case in test_cases:
+        n, v, f, d = case
+        kwargs_list.append({"num_meshes": n, "max_v": v, "max_f": f, "device": d})
+    benchmark(
+        TestMeshes.compute_packed_with_init,
+        "COMPUTE_PACKED",
+        kwargs_list,
+        warmup_iters=1,
+    )
+    benchmark(
+        TestMeshes.compute_padded_with_init,
+        "COMPUTE_PADDED",
+        kwargs_list,
+        warmup_iters=1,
+    )
+
+
+if __name__ == "__main__":
+    bm_compute_packed_padded_meshes()
diff --git a/pytorch3d/tests/benchmarks/bm_packed_to_padded.py b/pytorch3d/tests/benchmarks/bm_packed_to_padded.py
new file mode 100644
index 0000000000000000000000000000000000000000..e61f36daa2cf8a912bfa8d09915ad16f36b4f23b
--- /dev/null
+++ b/pytorch3d/tests/benchmarks/bm_packed_to_padded.py
@@ -0,0 +1,48 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+from itertools import product
+
+import torch
+from fvcore.common.benchmark import benchmark
+from tests.test_packed_to_padded import TestPackedToPadded
+
+
+def bm_packed_to_padded() -> None:
+    kwargs_list = []
+    backend = ["cpu"]
+    if torch.cuda.is_available():
+        backend.append("cuda:0")
+
+    num_meshes = [2, 10, 32]
+    num_verts = [100, 1000]
+    num_faces = [300, 3000]
+    num_ds = [0, 1, 16]
+
+    test_cases = product(num_meshes, num_verts, num_faces, num_ds, backend)
+    for case in test_cases:
+        n, v, f, d, b = case
+        kwargs_list.append(
+            {"num_meshes": n, "num_verts": v, "num_faces": f, "num_d": d, "device": b}
+        )
+    benchmark(
+        TestPackedToPadded.packed_to_padded_with_init,
+        "PACKED_TO_PADDED",
+        kwargs_list,
+        warmup_iters=1,
+    )
+
+    benchmark(
+        TestPackedToPadded.packed_to_padded_with_init_torch,
+        "PACKED_TO_PADDED_TORCH",
+        kwargs_list,
+        warmup_iters=1,
+    )
+
+
+if __name__ == "__main__":
+    bm_packed_to_padded()
diff --git a/pytorch3d/tests/benchmarks/bm_perspective_n_points.py b/pytorch3d/tests/benchmarks/bm_perspective_n_points.py
new file mode 100644
index 0000000000000000000000000000000000000000..672e530af9b61f5f032581856fbacae788872fe5
--- /dev/null
+++ b/pytorch3d/tests/benchmarks/bm_perspective_n_points.py
@@ -0,0 +1,33 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import itertools
+
+from fvcore.common.benchmark import benchmark
+from tests.test_perspective_n_points import TestPerspectiveNPoints
+
+
+def bm_perspective_n_points() -> None:
+    case_grid = {
+        "batch_size": [1, 10, 100],
+        "num_pts": [100, 100000],
+        "skip_q": [False, True],
+    }
+
+    test_cases = itertools.product(*case_grid.values())
+    kwargs_list = [dict(zip(case_grid.keys(), case)) for case in test_cases]
+
+    test = TestPerspectiveNPoints()
+    benchmark(
+        test.case_with_gaussian_points,
+        "PerspectiveNPoints",
+        kwargs_list,
+        warmup_iters=1,
+    )
+
+
+if __name__ == "__main__":
+    bm_perspective_n_points()
diff --git a/pytorch3d/tests/benchmarks/bm_point_mesh_distance.py b/pytorch3d/tests/benchmarks/bm_point_mesh_distance.py
new file mode 100644
index 0000000000000000000000000000000000000000..bc1da12883da43fa792ce14d561ae6af072b7a70
--- /dev/null
+++ b/pytorch3d/tests/benchmarks/bm_point_mesh_distance.py
@@ -0,0 +1,44 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+from itertools import product
+
+from fvcore.common.benchmark import benchmark
+from tests.test_point_mesh_distance import TestPointMeshDistance
+
+
+def bm_point_mesh_distance() -> None:
+
+    backend = ["cuda:0"]
+
+    kwargs_list = []
+    batch_size = [4, 8, 16]
+    num_verts = [100, 1000]
+    num_faces = [300, 3000]
+    num_points = [5000, 10000]
+    test_cases = product(batch_size, num_verts, num_faces, num_points, backend)
+    for case in test_cases:
+        n, v, f, p, b = case
+        kwargs_list.append({"N": n, "V": v, "F": f, "P": p, "device": b})
+
+    benchmark(
+        TestPointMeshDistance.point_mesh_edge,
+        "POINT_MESH_EDGE",
+        kwargs_list,
+        warmup_iters=1,
+    )
+
+    benchmark(
+        TestPointMeshDistance.point_mesh_face,
+        "POINT_MESH_FACE",
+        kwargs_list,
+        warmup_iters=1,
+    )
+
+
+if __name__ == "__main__":
+    bm_point_mesh_distance()
diff --git a/pytorch3d/tests/benchmarks/bm_pointclouds.py b/pytorch3d/tests/benchmarks/bm_pointclouds.py
new file mode 100644
index 0000000000000000000000000000000000000000..4868e6bf2aa202fb5fd77ccb21f7e4699ea49db8
--- /dev/null
+++ b/pytorch3d/tests/benchmarks/bm_pointclouds.py
@@ -0,0 +1,38 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+from itertools import product
+
+from fvcore.common.benchmark import benchmark
+from tests.test_pointclouds import TestPointclouds
+
+
+def bm_compute_packed_padded_pointclouds() -> None:
+    kwargs_list = []
+    num_clouds = [32, 128]
+    max_p = [100, 10000]
+    feats = [1, 10, 300]
+    test_cases = product(num_clouds, max_p, feats)
+    for case in test_cases:
+        n, p, f = case
+        kwargs_list.append({"num_clouds": n, "max_p": p, "features": f})
+    benchmark(
+        TestPointclouds.compute_packed_with_init,
+        "COMPUTE_PACKED",
+        kwargs_list,
+        warmup_iters=1,
+    )
+    benchmark(
+        TestPointclouds.compute_padded_with_init,
+        "COMPUTE_PADDED",
+        kwargs_list,
+        warmup_iters=1,
+    )
+
+
+if __name__ == "__main__":
+    bm_compute_packed_padded_pointclouds()
diff --git a/pytorch3d/tests/benchmarks/bm_points_alignment.py b/pytorch3d/tests/benchmarks/bm_points_alignment.py
new file mode 100644
index 0000000000000000000000000000000000000000..842773fb6390548b5cfcf28f6fdc0e90a4b67f05
--- /dev/null
+++ b/pytorch3d/tests/benchmarks/bm_points_alignment.py
@@ -0,0 +1,80 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from copy import deepcopy
+from itertools import product
+
+from fvcore.common.benchmark import benchmark
+from tests.test_points_alignment import TestCorrespondingPointsAlignment, TestICP
+
+
+def bm_iterative_closest_point() -> None:
+
+    case_grid = {
+        "batch_size": [1, 10],
+        "dim": [3, 20],
+        "n_points_X": [100, 1000],
+        "n_points_Y": [100, 1000],
+        "use_pointclouds": [False],
+    }
+
+    test_args = sorted(case_grid.keys())
+    test_cases = product(*case_grid.values())
+    kwargs_list = [dict(zip(test_args, case)) for case in test_cases]
+
+    # add the use_pointclouds=True test cases whenever we have dim==3
+    kwargs_to_add = []
+    for entry in kwargs_list:
+        if entry["dim"] == 3:
+            entry_add = deepcopy(entry)
+            entry_add["use_pointclouds"] = True
+            kwargs_to_add.append(entry_add)
+    kwargs_list.extend(kwargs_to_add)
+
+    benchmark(
+        TestICP.iterative_closest_point,
+        "IterativeClosestPoint",
+        kwargs_list,
+        warmup_iters=1,
+    )
+
+
+def bm_corresponding_points_alignment() -> None:
+
+    case_grid = {
+        "allow_reflection": [True, False],
+        "batch_size": [1, 10, 100],
+        "dim": [3, 20],
+        "estimate_scale": [True, False],
+        "n_points": [100, 10000],
+        "random_weights": [False, True],
+        "use_pointclouds": [False],
+    }
+
+    test_args = sorted(case_grid.keys())
+    test_cases = product(*case_grid.values())
+    kwargs_list = [dict(zip(test_args, case)) for case in test_cases]
+
+    # add the use_pointclouds=True test cases whenever we have dim==3
+    kwargs_to_add = []
+    for entry in kwargs_list:
+        if entry["dim"] == 3:
+            entry_add = deepcopy(entry)
+            entry_add["use_pointclouds"] = True
+            kwargs_to_add.append(entry_add)
+    kwargs_list.extend(kwargs_to_add)
+
+    benchmark(
+        TestCorrespondingPointsAlignment.corresponding_points_alignment,
+        "CorrespodingPointsAlignment",
+        kwargs_list,
+        warmup_iters=1,
+    )
+
+
+if __name__ == "__main__":
+    bm_corresponding_points_alignment()
+    bm_iterative_closest_point()
diff --git a/pytorch3d/tests/benchmarks/bm_points_normals.py b/pytorch3d/tests/benchmarks/bm_points_normals.py
new file mode 100644
index 0000000000000000000000000000000000000000..efdf9950bb134f77f2a21c8b61d8c9686cc90b22
--- /dev/null
+++ b/pytorch3d/tests/benchmarks/bm_points_normals.py
@@ -0,0 +1,47 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import itertools
+
+import torch
+from fvcore.common.benchmark import benchmark
+from pytorch3d.ops import estimate_pointcloud_normals
+from tests.test_points_normals import TestPCLNormals
+
+
+def to_bm(num_points, use_symeig_workaround):
+    device = torch.device("cuda:0")
+    points_padded, _normals = TestPCLNormals.init_spherical_pcl(
+        num_points=num_points, device=device, use_pointclouds=False
+    )
+    torch.cuda.synchronize()
+
+    def run():
+        estimate_pointcloud_normals(
+            points_padded, use_symeig_workaround=use_symeig_workaround
+        )
+        torch.cuda.synchronize()
+
+    return run
+
+
+def bm_points_normals() -> None:
+    case_grid = {
+        "use_symeig_workaround": [True, False],
+        "num_points": [3000, 6000],
+    }
+    test_cases = itertools.product(*case_grid.values())
+    kwargs_list = [dict(zip(case_grid.keys(), case)) for case in test_cases]
+    benchmark(
+        to_bm,
+        "normals",
+        kwargs_list,
+        warmup_iters=1,
+    )
+
+
+if __name__ == "__main__":
+    bm_points_normals()
diff --git a/pytorch3d/tests/benchmarks/bm_points_to_volumes.py b/pytorch3d/tests/benchmarks/bm_points_to_volumes.py
new file mode 100644
index 0000000000000000000000000000000000000000..7a79b1954e9a992cfb26bc045740dafcc549694f
--- /dev/null
+++ b/pytorch3d/tests/benchmarks/bm_points_to_volumes.py
@@ -0,0 +1,33 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import itertools
+
+from fvcore.common.benchmark import benchmark
+from tests.test_points_to_volumes import TestPointsToVolumes
+
+
+def bm_points_to_volumes() -> None:
+    case_grid = {
+        "device": ["cpu", "cuda:0"],
+        "batch_size": [10, 100],
+        "interp_mode": ["trilinear", "nearest"],
+        "volume_size": [[25, 25, 25], [101, 111, 121]],
+        "n_points": [1000, 10000, 100000],
+    }
+    test_cases = itertools.product(*case_grid.values())
+    kwargs_list = [dict(zip(case_grid.keys(), case)) for case in test_cases]
+
+    benchmark(
+        TestPointsToVolumes.add_points_to_volumes,
+        "ADD_POINTS_TO_VOLUMES",
+        kwargs_list,
+        warmup_iters=1,
+    )
+
+
+if __name__ == "__main__":
+    bm_points_to_volumes()
diff --git a/pytorch3d/tests/benchmarks/bm_pulsar.py b/pytorch3d/tests/benchmarks/bm_pulsar.py
new file mode 100644
index 0000000000000000000000000000000000000000..74110ef124bf6d0ee2c1bd8f9e1979e17f47d1ce
--- /dev/null
+++ b/pytorch3d/tests/benchmarks/bm_pulsar.py
@@ -0,0 +1,126 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""Test render speed."""
+import logging
+import sys
+from os import path
+
+import torch
+from fvcore.common.benchmark import benchmark
+from pytorch3d.renderer.points.pulsar import Renderer
+from torch.autograd import Variable
+
+
+# Making sure you can run this, even if pulsar hasn't been installed yet.
+sys.path.insert(0, path.join(path.dirname(__file__), ".."))
+LOGGER = logging.getLogger(__name__)
+
+
+"""Measure the execution speed of the rendering.
+
+This measures a very pessimistic upper bound on speed, because synchronization
+points have to be introduced in Python. On a pure PyTorch execution pipeline,
+results should be significantly faster. You can get pure CUDA timings through
+C++ by activating `PULSAR_TIMINGS_BATCHED_ENABLED` in the file
+`pytorch3d/csrc/pulsar/logging.h` or defining it for your compiler.
+"""
+
+
+def _bm_pulsar():
+    n_points = 1_000_000
+    width = 1_000
+    height = 1_000
+    renderer = Renderer(width, height, n_points)
+    # Generate sample data.
+    torch.manual_seed(1)
+    vert_pos = torch.rand(n_points, 3, dtype=torch.float32) * 10.0
+    vert_pos[:, 2] += 25.0
+    vert_pos[:, :2] -= 5.0
+    vert_col = torch.rand(n_points, 3, dtype=torch.float32)
+    vert_rad = torch.rand(n_points, dtype=torch.float32)
+    cam_params = torch.tensor(
+        [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.0, 2.0], dtype=torch.float32
+    )
+    device = torch.device("cuda")
+    vert_pos = vert_pos.to(device)
+    vert_col = vert_col.to(device)
+    vert_rad = vert_rad.to(device)
+    cam_params = cam_params.to(device)
+    renderer = renderer.to(device)
+    vert_pos_var = Variable(vert_pos, requires_grad=False)
+    vert_col_var = Variable(vert_col, requires_grad=False)
+    vert_rad_var = Variable(vert_rad, requires_grad=False)
+    cam_params_var = Variable(cam_params, requires_grad=False)
+
+    def bm_closure():
+        renderer.forward(
+            vert_pos_var,
+            vert_col_var,
+            vert_rad_var,
+            cam_params_var,
+            1.0e-1,
+            45.0,
+            percent_allowed_difference=0.01,
+        )
+        torch.cuda.synchronize()
+
+    return bm_closure
+
+
+def _bm_pulsar_backward():
+    n_points = 1_000_000
+    width = 1_000
+    height = 1_000
+    renderer = Renderer(width, height, n_points)
+    # Generate sample data.
+    torch.manual_seed(1)
+    vert_pos = torch.rand(n_points, 3, dtype=torch.float32) * 10.0
+    vert_pos[:, 2] += 25.0
+    vert_pos[:, :2] -= 5.0
+    vert_col = torch.rand(n_points, 3, dtype=torch.float32)
+    vert_rad = torch.rand(n_points, dtype=torch.float32)
+    cam_params = torch.tensor(
+        [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.0, 2.0], dtype=torch.float32
+    )
+    device = torch.device("cuda")
+    vert_pos = vert_pos.to(device)
+    vert_col = vert_col.to(device)
+    vert_rad = vert_rad.to(device)
+    cam_params = cam_params.to(device)
+    renderer = renderer.to(device)
+    vert_pos_var = Variable(vert_pos, requires_grad=True)
+    vert_col_var = Variable(vert_col, requires_grad=True)
+    vert_rad_var = Variable(vert_rad, requires_grad=True)
+    cam_params_var = Variable(cam_params, requires_grad=True)
+    res = renderer.forward(
+        vert_pos_var,
+        vert_col_var,
+        vert_rad_var,
+        cam_params_var,
+        1.0e-1,
+        45.0,
+        percent_allowed_difference=0.01,
+    )
+    loss = res.sum()
+
+    def bm_closure():
+        loss.backward(retain_graph=True)
+        torch.cuda.synchronize()
+
+    return bm_closure
+
+
+def bm_pulsar() -> None:
+    if not torch.cuda.is_available():
+        return
+
+    benchmark(_bm_pulsar, "PULSAR_FORWARD", [{}], warmup_iters=3)
+    benchmark(_bm_pulsar_backward, "PULSAR_BACKWARD", [{}], warmup_iters=3)
+
+
+if __name__ == "__main__":
+    bm_pulsar()
diff --git a/pytorch3d/tests/benchmarks/bm_rasterize_meshes.py b/pytorch3d/tests/benchmarks/bm_rasterize_meshes.py
new file mode 100644
index 0000000000000000000000000000000000000000..0a6531a389189ad13c63d1712ef08d499ba1584c
--- /dev/null
+++ b/pytorch3d/tests/benchmarks/bm_rasterize_meshes.py
@@ -0,0 +1,127 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import os
+from itertools import product
+
+import torch
+from fvcore.common.benchmark import benchmark
+from tests.test_rasterize_meshes import TestRasterizeMeshes
+
+BM_RASTERIZE_MESHES_N_THREADS = os.getenv("BM_RASTERIZE_MESHES_N_THREADS", 1)
+torch.set_num_threads(int(BM_RASTERIZE_MESHES_N_THREADS))
+
+# ico levels:
+# 0: (12 verts, 20 faces)
+# 1: (42 verts, 80 faces)
+# 3: (642 verts, 1280 faces)
+# 4: (2562 verts, 5120 faces)
+# 5: (10242 verts, 20480 faces)
+# 6: (40962 verts, 81920 faces)
+
+
+def bm_rasterize_meshes() -> None:
+    kwargs_list = [
+        {
+            "num_meshes": 1,
+            "ico_level": 0,
+            "image_size": 10,  # very slow with large image size
+            "blur_radius": 0.0,
+            "faces_per_pixel": 3,
+        }
+    ]
+    benchmark(
+        TestRasterizeMeshes.rasterize_meshes_python_with_init,
+        "RASTERIZE_MESHES",
+        kwargs_list,
+        warmup_iters=1,
+    )
+
+    kwargs_list = []
+    num_meshes = [1]
+    ico_level = [1]
+    image_size = [64, 128, 512]
+    blur = [1e-6]
+    faces_per_pixel = [3, 50]
+    test_cases = product(num_meshes, ico_level, image_size, blur, faces_per_pixel)
+    for case in test_cases:
+        n, ic, im, b, f = case
+        kwargs_list.append(
+            {
+                "num_meshes": n,
+                "ico_level": ic,
+                "image_size": im,
+                "blur_radius": b,
+                "faces_per_pixel": f,
+            }
+        )
+    benchmark(
+        TestRasterizeMeshes.rasterize_meshes_cpu_with_init,
+        "RASTERIZE_MESHES",
+        kwargs_list,
+        warmup_iters=1,
+    )
+
+    if torch.cuda.is_available():
+        kwargs_list = []
+        num_meshes = [8, 16]
+        ico_level = [4, 5, 6]
+        # Square and non square cases
+        image_size = [64, 128, 512, (512, 256), (256, 512)]
+        blur = [1e-6]
+        faces_per_pixel = [40]
+        test_cases = product(num_meshes, ico_level, image_size, blur, faces_per_pixel)
+
+        for case in test_cases:
+            n, ic, im, b, f = case
+            kwargs_list.append(
+                {
+                    "num_meshes": n,
+                    "ico_level": ic,
+                    "image_size": im,
+                    "blur_radius": b,
+                    "faces_per_pixel": f,
+                }
+            )
+        benchmark(
+            TestRasterizeMeshes.rasterize_meshes_cuda_with_init,
+            "RASTERIZE_MESHES_CUDA",
+            kwargs_list,
+            warmup_iters=1,
+        )
+
+        # Test a subset of the cases with the
+        # image plane intersecting the mesh.
+        kwargs_list = []
+        num_meshes = [8, 16]
+        # Square and non square cases
+        image_size = [64, 128, 512, (512, 256), (256, 512)]
+        dist = [3, 0.8, 0.5]
+        test_cases = product(num_meshes, dist, image_size)
+
+        for case in test_cases:
+            n, d, im = case
+            kwargs_list.append(
+                {
+                    "num_meshes": n,
+                    "ico_level": 4,
+                    "image_size": im,
+                    "blur_radius": 1e-6,
+                    "faces_per_pixel": 40,
+                    "dist": d,
+                }
+            )
+
+        benchmark(
+            TestRasterizeMeshes.bm_rasterize_meshes_with_clipping,
+            "RASTERIZE_MESHES_CUDA_CLIPPING",
+            kwargs_list,
+            warmup_iters=1,
+        )
+
+
+if __name__ == "__main__":
+    bm_rasterize_meshes()
diff --git a/pytorch3d/tests/benchmarks/bm_rasterize_points.py b/pytorch3d/tests/benchmarks/bm_rasterize_points.py
new file mode 100644
index 0000000000000000000000000000000000000000..bcdd8fb08393aed7862488601725e1f5b686aaea
--- /dev/null
+++ b/pytorch3d/tests/benchmarks/bm_rasterize_points.py
@@ -0,0 +1,105 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from itertools import product
+
+import torch
+from fvcore.common.benchmark import benchmark
+from pytorch3d.renderer.points.rasterize_points import (
+    rasterize_points,
+    rasterize_points_python,
+)
+from pytorch3d.structures.pointclouds import Pointclouds
+
+
+def _bm_python_with_init(N, P, img_size=32, radius=0.1, pts_per_pxl=3):
+    torch.manual_seed(231)
+    points = torch.randn(N, P, 3)
+    pointclouds = Pointclouds(points=points)
+    args = (pointclouds, img_size, radius, pts_per_pxl)
+    return lambda: rasterize_points_python(*args)
+
+
+def _bm_rasterize_points_with_init(
+    N, P, img_size=32, radius=0.1, pts_per_pxl=3, device="cpu", expand_radius=False
+):
+    torch.manual_seed(231)
+    device = torch.device(device)
+    points = torch.randn(N, P, 3, device=device)
+    pointclouds = Pointclouds(points=points)
+
+    if expand_radius:
+        points_padded = pointclouds.points_padded()
+        radius = torch.full((N, P), fill_value=radius).type_as(points_padded)
+
+    args = (pointclouds, img_size, radius, pts_per_pxl)
+    if device == "cuda":
+        torch.cuda.synchronize(device)
+
+    def fn():
+        rasterize_points(*args)
+        if device == "cuda":
+            torch.cuda.synchronize(device)
+
+    return fn
+
+
+def bm_python_vs_cpu_vs_cuda() -> None:
+    kwargs_list = []
+    num_meshes = [1]
+    num_points = [10000, 2000]
+    image_size = [128, 256]
+    radius = [1e-3, 0.01]
+    pts_per_pxl = [50, 100]
+    expand = [True, False]
+    test_cases = product(
+        num_meshes, num_points, image_size, radius, pts_per_pxl, expand
+    )
+    for case in test_cases:
+        n, p, im, r, pts, e = case
+        kwargs_list.append(
+            {
+                "N": n,
+                "P": p,
+                "img_size": im,
+                "radius": r,
+                "pts_per_pxl": pts,
+                "device": "cpu",
+                "expand_radius": e,
+            }
+        )
+
+    benchmark(
+        _bm_rasterize_points_with_init, "RASTERIZE_CPU", kwargs_list, warmup_iters=1
+    )
+    kwargs_list += [
+        {"N": 32, "P": 100000, "img_size": 128, "radius": 0.01, "pts_per_pxl": 50},
+        {"N": 8, "P": 200000, "img_size": 512, "radius": 0.01, "pts_per_pxl": 50},
+        {"N": 8, "P": 200000, "img_size": 256, "radius": 0.01, "pts_per_pxl": 50},
+        {
+            "N": 8,
+            "P": 200000,
+            "img_size": (512, 256),
+            "radius": 0.01,
+            "pts_per_pxl": 50,
+        },
+        {
+            "N": 8,
+            "P": 200000,
+            "img_size": (256, 512),
+            "radius": 0.01,
+            "pts_per_pxl": 50,
+        },
+    ]
+    for k in kwargs_list:
+        k["device"] = "cuda"
+    benchmark(
+        _bm_rasterize_points_with_init, "RASTERIZE_CUDA", kwargs_list, warmup_iters=1
+    )
+
+
+if __name__ == "__main__":
+    bm_python_vs_cpu_vs_cuda()
diff --git a/pytorch3d/tests/benchmarks/bm_raymarching.py b/pytorch3d/tests/benchmarks/bm_raymarching.py
new file mode 100644
index 0000000000000000000000000000000000000000..b37f612734b184edd8cae93dd077ec468d05d223
--- /dev/null
+++ b/pytorch3d/tests/benchmarks/bm_raymarching.py
@@ -0,0 +1,23 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import itertools
+
+from fvcore.common.benchmark import benchmark
+from pytorch3d.renderer import AbsorptionOnlyRaymarcher, EmissionAbsorptionRaymarcher
+from tests.test_raymarching import TestRaymarching
+
+
+def bm_raymarching() -> None:
+    case_grid = {
+        "raymarcher_type": [EmissionAbsorptionRaymarcher, AbsorptionOnlyRaymarcher],
+        "n_rays": [10, 1000, 10000],
+        "n_pts_per_ray": [10, 1000, 10000],
+    }
+    test_cases = itertools.product(*case_grid.values())
+    kwargs_list = [dict(zip(case_grid.keys(), case)) for case in test_cases]
+
+    benchmark(TestRaymarching.raymarcher, "RAYMARCHER", kwargs_list, warmup_iters=1)
diff --git a/pytorch3d/tests/benchmarks/bm_raysampling.py b/pytorch3d/tests/benchmarks/bm_raysampling.py
new file mode 100644
index 0000000000000000000000000000000000000000..09b0f1283e65a2788c43d8002822283d7b02e393
--- /dev/null
+++ b/pytorch3d/tests/benchmarks/bm_raysampling.py
@@ -0,0 +1,47 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import itertools
+
+from fvcore.common.benchmark import benchmark
+from pytorch3d.renderer import (
+    FoVOrthographicCameras,
+    FoVPerspectiveCameras,
+    MonteCarloRaysampler,
+    MultinomialRaysampler,
+    NDCMultinomialRaysampler,
+    OrthographicCameras,
+    PerspectiveCameras,
+)
+from tests.test_raysampling import TestRaysampling
+
+
+def bm_raysampling() -> None:
+    case_grid = {
+        "raysampler_type": [
+            MultinomialRaysampler,
+            NDCMultinomialRaysampler,
+            MonteCarloRaysampler,
+        ],
+        "camera_type": [
+            PerspectiveCameras,
+            OrthographicCameras,
+            FoVPerspectiveCameras,
+            FoVOrthographicCameras,
+        ],
+        "batch_size": [1, 10],
+        "n_pts_per_ray": [10, 1000, 10000],
+        "image_width": [10, 300],
+        "image_height": [10, 300],
+    }
+    test_cases = itertools.product(*case_grid.values())
+    kwargs_list = [dict(zip(case_grid.keys(), case)) for case in test_cases]
+
+    benchmark(TestRaysampling.raysampler, "RAYSAMPLER", kwargs_list, warmup_iters=1)
+
+
+if __name__ == "__main__":
+    bm_raysampling()
diff --git a/pytorch3d/tests/benchmarks/bm_render_implicit.py b/pytorch3d/tests/benchmarks/bm_render_implicit.py
new file mode 100644
index 0000000000000000000000000000000000000000..af38047a2e2f9b3a88348e5e18e15286ecfa0d84
--- /dev/null
+++ b/pytorch3d/tests/benchmarks/bm_render_implicit.py
@@ -0,0 +1,26 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import itertools
+
+from fvcore.common.benchmark import benchmark
+from pytorch3d.renderer import AbsorptionOnlyRaymarcher, EmissionAbsorptionRaymarcher
+from tests.test_render_implicit import TestRenderImplicit
+
+
+def bm_render_volumes() -> None:
+    case_grid = {
+        "batch_size": [1, 5],
+        "raymarcher_type": [EmissionAbsorptionRaymarcher, AbsorptionOnlyRaymarcher],
+        "n_rays_per_image": [64**2, 256**2],
+        "n_pts_per_ray": [16, 128],
+    }
+    test_cases = itertools.product(*case_grid.values())
+    kwargs_list = [dict(zip(case_grid.keys(), case)) for case in test_cases]
+
+    benchmark(
+        TestRenderImplicit.renderer, "IMPLICIT_RENDERER", kwargs_list, warmup_iters=1
+    )
diff --git a/pytorch3d/tests/benchmarks/bm_render_volumes.py b/pytorch3d/tests/benchmarks/bm_render_volumes.py
new file mode 100644
index 0000000000000000000000000000000000000000..abe76d93205d72f64d3eff1b057e5f1acbd8e281
--- /dev/null
+++ b/pytorch3d/tests/benchmarks/bm_render_volumes.py
@@ -0,0 +1,28 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import itertools
+
+from fvcore.common.benchmark import benchmark
+from pytorch3d.renderer import AbsorptionOnlyRaymarcher, EmissionAbsorptionRaymarcher
+from tests.test_render_volumes import TestRenderVolumes
+
+
+def bm_render_volumes() -> None:
+    case_grid = {
+        "volume_size": [tuple([17] * 3), tuple([129] * 3)],
+        "batch_size": [1, 5],
+        "shape": ["sphere", "cube"],
+        "raymarcher_type": [EmissionAbsorptionRaymarcher, AbsorptionOnlyRaymarcher],
+        "n_rays_per_image": [64**2, 256**2],
+        "n_pts_per_ray": [16, 128],
+    }
+    test_cases = itertools.product(*case_grid.values())
+    kwargs_list = [dict(zip(case_grid.keys(), case)) for case in test_cases]
+
+    benchmark(
+        TestRenderVolumes.renderer, "VOLUME_RENDERER", kwargs_list, warmup_iters=1
+    )
diff --git a/pytorch3d/tests/benchmarks/bm_sample_farthest_points.py b/pytorch3d/tests/benchmarks/bm_sample_farthest_points.py
new file mode 100644
index 0000000000000000000000000000000000000000..1bf355ce05e3362c74b8502fe93ba0db76844a53
--- /dev/null
+++ b/pytorch3d/tests/benchmarks/bm_sample_farthest_points.py
@@ -0,0 +1,46 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from itertools import product
+
+from fvcore.common.benchmark import benchmark
+from tests.test_sample_farthest_points import TestFPS
+
+
+def bm_fps() -> None:
+    kwargs_list = []
+    backends = ["cpu", "cuda:0"]
+    Ns = [8, 32]
+    Ps = [64, 256]
+    Ds = [3]
+    Ks = [24]
+    test_cases = product(Ns, Ps, Ds, Ks, backends)
+    for case in test_cases:
+        N, P, D, K, d = case
+        kwargs_list.append({"N": N, "P": P, "D": D, "K": K, "device": d})
+
+    benchmark(
+        TestFPS.sample_farthest_points_naive,
+        "FPS_NAIVE_PYTHON",
+        kwargs_list,
+        warmup_iters=1,
+    )
+
+    # Add some larger batch sizes and pointcloud sizes
+    Ns = [32]
+    Ps = [2048, 8192, 18384]
+    Ds = [3, 9]
+    Ks = [24, 48]
+    test_cases = product(Ns, Ps, Ds, Ks, backends)
+    for case in test_cases:
+        N, P, D, K, d = case
+        kwargs_list.append({"N": N, "P": P, "D": D, "K": K, "device": d})
+
+    benchmark(TestFPS.sample_farthest_points, "FPS", kwargs_list, warmup_iters=1)
+
+
+if __name__ == "__main__":
+    bm_fps()
diff --git a/pytorch3d/tests/benchmarks/bm_sample_pdf.py b/pytorch3d/tests/benchmarks/bm_sample_pdf.py
new file mode 100644
index 0000000000000000000000000000000000000000..0f94df8f9a1915001d929177a4c47006ccbaeefe
--- /dev/null
+++ b/pytorch3d/tests/benchmarks/bm_sample_pdf.py
@@ -0,0 +1,37 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from itertools import product
+
+from fvcore.common.benchmark import benchmark
+from tests.test_sample_pdf import TestSamplePDF
+
+
+def bm_sample_pdf() -> None:
+
+    backends = ["python_cuda", "cuda", "python_cpu", "cpu"]
+
+    kwargs_list = []
+    sample_counts = [64]
+    batch_sizes = [1024, 10240]
+    bin_counts = [62, 600]
+    test_cases = product(backends, sample_counts, batch_sizes, bin_counts)
+    for case in test_cases:
+        backend, n_samples, batch_size, n_bins = case
+        kwargs_list.append(
+            {
+                "backend": backend,
+                "n_samples": n_samples,
+                "batch_size": batch_size,
+                "n_bins": n_bins,
+            }
+        )
+
+    benchmark(TestSamplePDF.bm_fn, "SAMPLE_PDF", kwargs_list, warmup_iters=1)
+
+
+if __name__ == "__main__":
+    bm_sample_pdf()
diff --git a/pytorch3d/tests/benchmarks/bm_sample_points_from_meshes.py b/pytorch3d/tests/benchmarks/bm_sample_points_from_meshes.py
new file mode 100644
index 0000000000000000000000000000000000000000..480e7eb97bdc1e78921fef776bcf19c809007074
--- /dev/null
+++ b/pytorch3d/tests/benchmarks/bm_sample_points_from_meshes.py
@@ -0,0 +1,46 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+from itertools import product
+
+import torch
+from fvcore.common.benchmark import benchmark
+from tests.test_sample_points_from_meshes import TestSamplePoints
+
+
+def bm_sample_points() -> None:
+
+    backend = ["cpu"]
+    if torch.cuda.is_available():
+        backend.append("cuda:0")
+    kwargs_list = []
+    num_meshes = [2, 10, 32]
+    num_verts = [100, 1000]
+    num_faces = [300, 3000]
+    num_samples = [5000, 10000]
+    test_cases = product(num_meshes, num_verts, num_faces, num_samples, backend)
+    for case in test_cases:
+        n, v, f, s, b = case
+        kwargs_list.append(
+            {
+                "num_meshes": n,
+                "num_verts": v,
+                "num_faces": f,
+                "num_samples": s,
+                "device": b,
+            }
+        )
+    benchmark(
+        TestSamplePoints.sample_points_with_init,
+        "SAMPLE_MESH",
+        kwargs_list,
+        warmup_iters=1,
+    )
+
+
+if __name__ == "__main__":
+    bm_sample_points()
diff --git a/pytorch3d/tests/benchmarks/bm_se3.py b/pytorch3d/tests/benchmarks/bm_se3.py
new file mode 100644
index 0000000000000000000000000000000000000000..62ac896730cb10ecf0e616b43986a4b9e5c4faec
--- /dev/null
+++ b/pytorch3d/tests/benchmarks/bm_se3.py
@@ -0,0 +1,23 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from fvcore.common.benchmark import benchmark
+from tests.test_se3 import TestSE3
+
+
+def bm_se3() -> None:
+    kwargs_list = [
+        {"batch_size": 1},
+        {"batch_size": 10},
+        {"batch_size": 100},
+        {"batch_size": 1000},
+    ]
+    benchmark(TestSE3.se3_expmap, "SE3_EXP", kwargs_list, warmup_iters=1)
+    benchmark(TestSE3.se3_logmap, "SE3_LOG", kwargs_list, warmup_iters=1)
+
+
+if __name__ == "__main__":
+    bm_se3()
diff --git a/pytorch3d/tests/benchmarks/bm_so3.py b/pytorch3d/tests/benchmarks/bm_so3.py
new file mode 100644
index 0000000000000000000000000000000000000000..851e9fc4b9cc33d96adadd5ab229c06d434cbd71
--- /dev/null
+++ b/pytorch3d/tests/benchmarks/bm_so3.py
@@ -0,0 +1,23 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from fvcore.common.benchmark import benchmark
+from tests.test_so3 import TestSO3
+
+
+def bm_so3() -> None:
+    kwargs_list = [
+        {"batch_size": 1},
+        {"batch_size": 10},
+        {"batch_size": 100},
+        {"batch_size": 1000},
+    ]
+    benchmark(TestSO3.so3_expmap, "SO3_EXP", kwargs_list, warmup_iters=1)
+    benchmark(TestSO3.so3_logmap, "SO3_LOG", kwargs_list, warmup_iters=1)
+
+
+if __name__ == "__main__":
+    bm_so3()
diff --git a/pytorch3d/tests/benchmarks/bm_subdivide_meshes.py b/pytorch3d/tests/benchmarks/bm_subdivide_meshes.py
new file mode 100644
index 0000000000000000000000000000000000000000..79a4a30a96828bf3de3b566eafb39eaa63583a4e
--- /dev/null
+++ b/pytorch3d/tests/benchmarks/bm_subdivide_meshes.py
@@ -0,0 +1,31 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+from itertools import product
+
+from fvcore.common.benchmark import benchmark
+from tests.test_subdivide_meshes import TestSubdivideMeshes
+
+
+def bm_subdivide() -> None:
+    kwargs_list = []
+    num_meshes = [1, 16, 32]
+    same_topo = [True, False]
+    test_cases = product(num_meshes, same_topo)
+    for case in test_cases:
+        n, s = case
+        kwargs_list.append({"num_meshes": n, "same_topo": s})
+    benchmark(
+        TestSubdivideMeshes.subdivide_meshes_with_init,
+        "SUBDIVIDE",
+        kwargs_list,
+        warmup_iters=1,
+    )
+
+
+if __name__ == "__main__":
+    bm_subdivide()
diff --git a/pytorch3d/tests/benchmarks/bm_symeig3x3.py b/pytorch3d/tests/benchmarks/bm_symeig3x3.py
new file mode 100644
index 0000000000000000000000000000000000000000..c0e4dc6346350c0790d9769fda07dc92a108c6a0
--- /dev/null
+++ b/pytorch3d/tests/benchmarks/bm_symeig3x3.py
@@ -0,0 +1,94 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+from itertools import product
+from typing import Any, Callable
+
+import torch
+from common_testing import get_random_cuda_device
+from fvcore.common.benchmark import benchmark
+from pytorch3d.common.workaround import symeig3x3
+from tests.test_symeig3x3 import TestSymEig3x3
+
+
+torch.set_num_threads(1)
+
+CUDA_DEVICE = get_random_cuda_device()
+
+
+def create_traced_func(func, device, batch_size):
+    traced_func = torch.jit.trace(
+        func, (TestSymEig3x3.create_random_sym3x3(device, batch_size),)
+    )
+
+    return traced_func
+
+
+FUNC_NAME_TO_FUNC = {
+    "sym3x3eig": (lambda inputs: symeig3x3(inputs, eigenvectors=True)),
+    "sym3x3eig_traced_cuda": create_traced_func(
+        (lambda inputs: symeig3x3(inputs, eigenvectors=True)), CUDA_DEVICE, 1024
+    ),
+    "torch_symeig": (lambda inputs: torch.symeig(inputs, eigenvectors=True)),
+    "torch_linalg_eigh": (lambda inputs: torch.linalg.eigh(inputs)),
+    "torch_pca_lowrank": (
+        lambda inputs: torch.pca_lowrank(inputs, center=False, niter=1)
+    ),
+    "sym3x3eig_no_vecs": (lambda inputs: symeig3x3(inputs, eigenvectors=False)),
+    "torch_symeig_no_vecs": (lambda inputs: torch.symeig(inputs, eigenvectors=False)),
+    "torch_linalg_eigvalsh_no_vecs": (lambda inputs: torch.linalg.eigvalsh(inputs)),
+}
+
+
+def test_symeig3x3(func_name, batch_size, device) -> Callable[[], Any]:
+    func = FUNC_NAME_TO_FUNC[func_name]
+    inputs = TestSymEig3x3.create_random_sym3x3(device, batch_size)
+    torch.cuda.synchronize()
+
+    def symeig3x3():
+        func(inputs)
+        torch.cuda.synchronize()
+
+    return symeig3x3
+
+
+def bm_symeig3x3() -> None:
+    devices = ["cpu"]
+    if torch.cuda.is_available():
+        devices.append(CUDA_DEVICE)
+
+    kwargs_list = []
+    func_names = FUNC_NAME_TO_FUNC.keys()
+    batch_sizes = [16, 128, 1024, 8192, 65536, 1048576]
+
+    for func_name, batch_size, device in product(func_names, batch_sizes, devices):
+        # Run CUDA-only implementations only on GPU
+        if "cuda" in func_name and not device.startswith("cuda"):
+            continue
+
+        # Torch built-ins are quite slow on larger batches
+        if "torch" in func_name and batch_size > 8192:
+            continue
+
+        # Avoid running CPU implementations on larger batches as well
+        if device == "cpu" and batch_size > 8192:
+            continue
+
+        kwargs_list.append(
+            {"func_name": func_name, "batch_size": batch_size, "device": device}
+        )
+
+    benchmark(
+        test_symeig3x3,
+        "SYMEIG3X3",
+        kwargs_list,
+        warmup_iters=3,
+    )
+
+
+if __name__ == "__main__":
+    bm_symeig3x3()
diff --git a/pytorch3d/tests/benchmarks/bm_vert_align.py b/pytorch3d/tests/benchmarks/bm_vert_align.py
new file mode 100644
index 0000000000000000000000000000000000000000..6670029e89203fbd5e516d1cb57f15b946c94cec
--- /dev/null
+++ b/pytorch3d/tests/benchmarks/bm_vert_align.py
@@ -0,0 +1,37 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+from itertools import product
+
+import torch
+from fvcore.common.benchmark import benchmark
+from tests.test_vert_align import TestVertAlign
+
+
+def bm_vert_align() -> None:
+    devices = ["cpu"]
+    if torch.cuda.is_available():
+        devices.append("cuda")
+
+    kwargs_list = []
+    num_meshes = [2, 10, 32]
+    num_verts = [100, 1000]
+    num_faces = [300, 3000]
+    test_cases = product(num_meshes, num_verts, num_faces, devices)
+    for case in test_cases:
+        n, v, f, d = case
+        kwargs_list.append(
+            {"num_meshes": n, "num_verts": v, "num_faces": f, "device": d}
+        )
+
+    benchmark(
+        TestVertAlign.vert_align_with_init, "VERT_ALIGN", kwargs_list, warmup_iters=1
+    )
+
+
+if __name__ == "__main__":
+    bm_vert_align()
diff --git a/pytorch3d/tests/common_camera_utils.py b/pytorch3d/tests/common_camera_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..aa7aeb2d18868a1a9f58b94998c1e500222e29ec
--- /dev/null
+++ b/pytorch3d/tests/common_camera_utils.py
@@ -0,0 +1,77 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import typing
+
+import torch
+from pytorch3d.common.datatypes import Device
+from pytorch3d.renderer.cameras import (
+    CamerasBase,
+    FoVOrthographicCameras,
+    FoVPerspectiveCameras,
+    OpenGLOrthographicCameras,
+    OpenGLPerspectiveCameras,
+    OrthographicCameras,
+    PerspectiveCameras,
+    SfMOrthographicCameras,
+    SfMPerspectiveCameras,
+)
+from pytorch3d.renderer.fisheyecameras import FishEyeCameras
+from pytorch3d.transforms.so3 import so3_exp_map
+
+
+def init_random_cameras(
+    cam_type: typing.Type[CamerasBase],
+    batch_size: int,
+    random_z: bool = False,
+    device: Device = "cpu",
+):
+    cam_params = {}
+    T = torch.randn(batch_size, 3) * 0.03
+    if not random_z:
+        T[:, 2] = 4
+    R = so3_exp_map(torch.randn(batch_size, 3) * 3.0)
+    cam_params = {"R": R, "T": T, "device": device}
+    if cam_type in (OpenGLPerspectiveCameras, OpenGLOrthographicCameras):
+        cam_params["znear"] = torch.rand(batch_size) * 10 + 0.1
+        cam_params["zfar"] = torch.rand(batch_size) * 4 + 1 + cam_params["znear"]
+        if cam_type == OpenGLPerspectiveCameras:
+            cam_params["fov"] = torch.rand(batch_size) * 60 + 30
+            cam_params["aspect_ratio"] = torch.rand(batch_size) * 0.5 + 0.5
+        else:
+            cam_params["top"] = torch.rand(batch_size) * 0.2 + 0.9
+            cam_params["bottom"] = -(torch.rand(batch_size)) * 0.2 - 0.9
+            cam_params["left"] = -(torch.rand(batch_size)) * 0.2 - 0.9
+            cam_params["right"] = torch.rand(batch_size) * 0.2 + 0.9
+    elif cam_type in (FoVPerspectiveCameras, FoVOrthographicCameras):
+        cam_params["znear"] = torch.rand(batch_size) * 10 + 0.1
+        cam_params["zfar"] = torch.rand(batch_size) * 4 + 1 + cam_params["znear"]
+        if cam_type == FoVPerspectiveCameras:
+            cam_params["fov"] = torch.rand(batch_size) * 60 + 30
+            cam_params["aspect_ratio"] = torch.rand(batch_size) * 0.5 + 0.5
+        else:
+            cam_params["max_y"] = torch.rand(batch_size) * 0.2 + 0.9
+            cam_params["min_y"] = -(torch.rand(batch_size)) * 0.2 - 0.9
+            cam_params["min_x"] = -(torch.rand(batch_size)) * 0.2 - 0.9
+            cam_params["max_x"] = torch.rand(batch_size) * 0.2 + 0.9
+    elif cam_type in (
+        SfMOrthographicCameras,
+        SfMPerspectiveCameras,
+        OrthographicCameras,
+        PerspectiveCameras,
+    ):
+        cam_params["focal_length"] = torch.rand(batch_size) * 10 + 0.1
+        cam_params["principal_point"] = torch.randn((batch_size, 2))
+    elif cam_type == FishEyeCameras:
+        cam_params["focal_length"] = torch.rand(batch_size, 1) * 10 + 0.1
+        cam_params["principal_point"] = torch.randn((batch_size, 2))
+        cam_params["radial_params"] = torch.randn((batch_size, 6))
+        cam_params["tangential_params"] = torch.randn((batch_size, 2))
+        cam_params["thin_prism_params"] = torch.randn((batch_size, 4))
+
+    else:
+        raise ValueError(str(cam_type))
+    return cam_type(**cam_params)
diff --git a/pytorch3d/tests/common_testing.py b/pytorch3d/tests/common_testing.py
new file mode 100644
index 0000000000000000000000000000000000000000..017e52ecba48456ea8cf6e64a8aa9e08c4a04045
--- /dev/null
+++ b/pytorch3d/tests/common_testing.py
@@ -0,0 +1,222 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import os
+import unittest
+from numbers import Real
+from pathlib import Path
+from typing import Callable, Optional, Union
+
+import numpy as np
+import torch
+from PIL import Image
+
+
+def interactive_testing_requested() -> bool:
+    """
+    Certain tests are only useful when run interactively, and so are not regularly run.
+    These are activated by this funciton returning True, which the user requests by
+    setting the environment variable `PYTORCH3D_INTERACTIVE_TESTING` to 1.
+    """
+    return os.environ.get("PYTORCH3D_INTERACTIVE_TESTING", "") == "1"
+
+
+def get_tests_dir() -> Path:
+    """
+    Returns Path for the directory containing this file.
+    """
+    return Path(__file__).resolve().parent
+
+
+def get_pytorch3d_dir() -> Path:
+    """
+    Returns Path for the root PyTorch3D directory.
+
+    Meta internal systems need a special case here.
+    """
+    if os.environ.get("INSIDE_RE_WORKER") is not None:
+        return Path(__file__).resolve().parent.parent
+    elif os.environ.get("CONDA_BUILD_STATE", "") == "TEST":
+        return Path(os.environ["SRC_DIR"])
+    else:
+        return Path(__file__).resolve().parent.parent
+
+
+def load_rgb_image(filename: str, data_dir: Union[str, Path]):
+    filepath = os.path.join(data_dir, filename)
+    with Image.open(filepath) as raw_image:
+        image = torch.from_numpy(np.array(raw_image) / 255.0)
+    image = image.to(dtype=torch.float32)
+    return image[..., :3]
+
+
+TensorOrArray = Union[torch.Tensor, np.ndarray]
+
+
+def get_random_cuda_device() -> str:
+    """
+    Function to get a random GPU device from the
+    available devices. This is useful for testing
+    that custom cuda kernels can support inputs on
+    any device without having to set the device explicitly.
+    """
+    num_devices = torch.cuda.device_count()
+    device_id = (
+        torch.randint(high=num_devices, size=(1,)).item() if num_devices > 1 else 0
+    )
+    return "cuda:%d" % device_id
+
+
+class TestCaseMixin(unittest.TestCase):
+    def assertSeparate(self, tensor1, tensor2) -> None:
+        """
+        Verify that tensor1 and tensor2 have their data in distinct locations.
+        """
+        self.assertNotEqual(tensor1.storage().data_ptr(), tensor2.storage().data_ptr())
+
+    def assertNotSeparate(self, tensor1, tensor2) -> None:
+        """
+        Verify that tensor1 and tensor2 have their data in the same locations.
+        """
+        self.assertEqual(tensor1.storage().data_ptr(), tensor2.storage().data_ptr())
+
+    def assertAllSeparate(self, tensor_list) -> None:
+        """
+        Verify that all tensors in tensor_list have their data in
+        distinct locations.
+        """
+        ptrs = [i.storage().data_ptr() for i in tensor_list]
+        self.assertCountEqual(ptrs, set(ptrs))
+
+    def assertNormsClose(
+        self,
+        input: TensorOrArray,
+        other: TensorOrArray,
+        norm_fn: Callable[[TensorOrArray], TensorOrArray],
+        *,
+        rtol: float = 1e-05,
+        atol: float = 1e-08,
+        equal_nan: bool = False,
+        msg: Optional[str] = None,
+    ) -> None:
+        """
+        Verifies that two tensors or arrays have the same shape and are close
+            given absolute and relative tolerance; raises AssertionError otherwise.
+            A custom norm function is computed before comparison. If no such pre-
+            processing needed, pass `torch.abs` or, equivalently, call `assertClose`.
+        Args:
+            input, other: two tensors or two arrays.
+            norm_fn: The function evaluates
+                `all(norm_fn(input - other) <= atol + rtol * norm_fn(other))`.
+                norm_fn is a tensor -> tensor function; the output has:
+                    * all entries non-negative,
+                    * shape defined by the input shape only.
+            rtol, atol, equal_nan: as for torch.allclose.
+            msg: message in case the assertion is violated.
+        Note:
+            Optional arguments here are all keyword-only, to avoid confusion
+            with msg arguments on other assert functions.
+        """
+
+        self.assertEqual(np.shape(input), np.shape(other))
+
+        diff = norm_fn(input - other)
+        other_ = norm_fn(other)
+
+        # We want to generalize allclose(input, output), which is essentially
+        #  all(diff <= atol + rtol * other)
+        # but with a sophisticated handling non-finite values.
+        # We work that around by calling allclose() with the following arguments:
+        # allclose(diff + other_, other_). This computes what we want because
+        #  all(|diff + other_ - other_| <= atol + rtol * |other_|) ==
+        #    all(|norm_fn(input - other)| <= atol + rtol * |norm_fn(other)|) ==
+        #    all(norm_fn(input - other) <= atol + rtol * norm_fn(other)).
+
+        self.assertClose(
+            diff + other_, other_, rtol=rtol, atol=atol, equal_nan=equal_nan, msg=msg
+        )
+
+    def assertClose(
+        self,
+        input: TensorOrArray,
+        other: TensorOrArray,
+        *,
+        rtol: float = 1e-05,
+        atol: float = 1e-08,
+        equal_nan: bool = False,
+        msg: Optional[str] = None,
+    ) -> None:
+        """
+        Verifies that two tensors or arrays have the same shape and are close
+            given absolute and relative tolerance, i.e. checks
+            `all(|input - other| <= atol + rtol * |other|)`;
+            raises AssertionError otherwise.
+        Args:
+            input, other: two tensors or two arrays.
+            rtol, atol, equal_nan: as for torch.allclose.
+            msg: message in case the assertion is violated.
+        Note:
+            Optional arguments here are all keyword-only, to avoid confusion
+            with msg arguments on other assert functions.
+        """
+
+        self.assertEqual(np.shape(input), np.shape(other))
+
+        backend = torch if torch.is_tensor(input) else np
+        close = backend.allclose(
+            input, other, rtol=rtol, atol=atol, equal_nan=equal_nan
+        )
+
+        if close:
+            return
+
+        # handle bool case
+        if backend == torch and input.dtype == torch.bool:
+            diff = (input != other).float()
+            ratio = diff
+        if backend == np and input.dtype == bool:
+            diff = (input != other).astype(float)
+            ratio = diff
+        else:
+            diff = backend.abs(input + 0.0 - other)
+            ratio = diff / backend.abs(other)
+
+        try_relative = (diff <= atol) | (backend.isfinite(ratio) & (ratio > 0))
+        if try_relative.all():
+            if backend == np:
+                # Avoid a weirdness with zero dimensional arrays.
+                ratio = np.array(ratio)
+            ratio[diff <= atol] = 0
+            extra = f" Max relative diff {ratio.max()}"
+        else:
+            extra = ""
+        shape = tuple(input.shape)
+        loc = np.unravel_index(int(diff.argmax()), shape)
+        max_diff = diff.max()
+        err = f"Not close. Max diff {max_diff}.{extra} Shape {shape}. At {loc}."
+        if msg is not None:
+            self.fail(f"{msg} {err}")
+        self.fail(err)
+
+    def assertConstant(
+        self, input: TensorOrArray, value: Real, *, atol: float = 0
+    ) -> None:
+        """
+        Asserts input is entirely filled with value.
+
+        Args:
+            input: tensor or array
+            value: expected value
+            atol: tolerance
+        """
+        mn, mx = input.min(), input.max()
+        msg = f"values in range [{mn}, {mx}], not {value}, shape {input.shape}"
+        if atol == 0:
+            self.assertEqual(input.min(), value, msg=msg)
+            self.assertEqual(input.max(), value, msg=msg)
+        else:
+            self.assertGreater(input.min(), value - atol, msg=msg)
+            self.assertLess(input.max(), value + atol, msg=msg)
diff --git a/pytorch3d/tests/data/cow.glb b/pytorch3d/tests/data/cow.glb
new file mode 100644
index 0000000000000000000000000000000000000000..54b124b4bc43de4d08ae0bc8e3ce8020d232bd78
Binary files /dev/null and b/pytorch3d/tests/data/cow.glb differ
diff --git a/pytorch3d/tests/data/cv_project_points_precomputed.json b/pytorch3d/tests/data/cv_project_points_precomputed.json
new file mode 100644
index 0000000000000000000000000000000000000000..ba430912056c13c749f20f7313c9c50322ad9aa9
--- /dev/null
+++ b/pytorch3d/tests/data/cv_project_points_precomputed.json
@@ -0,0 +1,1230 @@
+[
+    {
+        "rvec": [
+            -1.6336234226511284,
+            -1.009804818052615,
+            0.4160736184401035
+        ],
+        "tvec": [
+            1.626905,
+            -1.5187958,
+            -4.6009063
+        ],
+        "camera_matrix": [
+            [
+                -1.0406556,
+                0.0,
+                0.6160261
+            ],
+            [
+                -0.0,
+                -4.6370989,
+                -2.2859802
+            ],
+            [
+                -0.0,
+                -0.0,
+                1.0
+            ]
+        ],
+        "image_size": [
+            19,
+            16
+        ],
+        "pts": [
+            [
+                0.7610377,
+                0.121675,
+                0.4438632
+            ],
+            [
+                0.3336743,
+                1.4940791,
+                -0.2051583
+            ],
+            [
+                0.3130677,
+                -0.8540957,
+                -2.5529898
+            ],
+            [
+                0.6536186,
+                0.8644362,
+                -0.742165
+            ],
+            [
+                2.2697546,
+                -1.4543657,
+                0.0457585
+            ],
+            [
+                -0.1871839,
+                1.5327792,
+                1.4693588
+            ],
+            [
+                0.1549474,
+                0.3781625,
+                -0.8877857
+            ],
+            [
+                -1.9807965,
+                -0.3479121,
+                0.156349
+            ],
+            [
+                1.2302907,
+                1.2023798,
+                -0.3873268
+            ],
+            [
+                -0.3023028,
+                -1.048553,
+                -1.4200179
+            ],
+            [
+                -1.7062702,
+                1.9507754,
+                -0.5096522
+            ],
+            [
+                -0.4380743,
+                -1.2527954,
+                0.7774904
+            ],
+            [
+                -1.6138978,
+                -0.2127403,
+                -0.8954666
+            ],
+            [
+                0.3869025,
+                -0.5108051,
+                -1.1806322
+            ],
+            [
+                -0.0281822,
+                0.4283319,
+                0.0665172
+            ],
+            [
+                0.3024719,
+                -0.6343221,
+                -0.3627412
+            ],
+            [
+                -0.6724604,
+                -0.3595532,
+                -0.8131463
+            ],
+            [
+                -1.7262826,
+                0.1774261,
+                -0.4017809
+            ],
+            [
+                -1.6301983,
+                0.4627823,
+                -0.9072984
+            ],
+            [
+                0.0519454,
+                0.7290906,
+                0.1289829
+            ],
+            [
+                1.1394007,
+                -1.2348258,
+                0.4023416
+            ],
+            [
+                -0.6848101,
+                -0.8707971,
+                -0.5788497
+            ],
+            [
+                -0.3115525,
+                0.0561653,
+                -1.1651498
+            ],
+            [
+                0.9008265,
+                0.4656624,
+                -1.5362437
+            ],
+            [
+                1.4882522,
+                1.8958892,
+                1.1787796
+            ],
+            [
+                -0.1799248,
+                -1.0707526,
+                1.0544517
+            ],
+            [
+                -0.4031769,
+                1.2224451,
+                0.208275
+            ],
+            [
+                0.976639,
+                0.3563664,
+                0.7065732
+            ],
+            [
+                0.0105,
+                1.7858705,
+                0.1269121
+            ],
+            [
+                0.4019894,
+                1.8831507,
+                -1.3477591
+            ]
+        ],
+        "pts_proj": [
+            [
+                1.0145014,
+                -2.9284953
+            ],
+            [
+                1.0724146,
+                -3.4080993
+            ],
+            [
+                1.7917063,
+                -6.7793572
+            ],
+            [
+                1.2140849,
+                -3.6559075
+            ],
+            [
+                1.4910298,
+                -1.7436687
+            ],
+            [
+                0.7868983,
+                -2.8461123
+            ],
+            [
+                1.1790002,
+                -4.2421372
+            ],
+            [
+                0.6688383,
+                -5.165954
+            ],
+            [
+                1.2222519,
+                -2.9970978
+            ],
+            [
+                1.2750471,
+                -6.013773
+            ],
+            [
+                0.8970727,
+                -4.5353657
+            ],
+            [
+                0.7041445,
+                -3.9451632
+            ],
+            [
+                0.9085118,
+                -5.7326503
+            ],
+            [
+                1.3246946,
+                -4.7337656
+            ],
+            [
+                0.9750362,
+                -3.678297
+            ],
+            [
+                1.1074375,
+                -4.0924549
+            ],
+            [
+                1.0367661,
+                -5.1176712
+            ],
+            [
+                0.8222725,
+                -5.1682077
+            ],
+            [
+                0.9272976,
+                -5.3440215
+            ],
+            [
+                0.9797946,
+                -3.5213536
+            ],
+            [
+                1.0847165,
+                -2.7743226
+            ],
+            [
+                0.9762082,
+                -5.2337584
+            ],
+            [
+                1.1681587,
+                -4.9342569
+            ],
+            [
+                1.4412056,
+                -4.1796204
+            ],
+            [
+                1.0048257,
+                -2.0358393
+            ],
+            [
+                0.7052604,
+                -3.4338048
+            ],
+            [
+                0.9221008,
+                -3.6497452
+            ],
+            [
+                1.0001084,
+                -2.588002
+            ],
+            [
+                0.9887072,
+                -3.3682799
+            ],
+            [
+                1.2562716,
+                -3.9393683
+            ]
+        ]
+    },
+    {
+        "rvec": [
+            -1.624698671197051,
+            1.2717319528620892,
+            0.026455585547038436
+        ],
+        "tvec": [
+            -0.7416676,
+            -0.9513922,
+            1.2411273
+        ],
+        "camera_matrix": [
+            [
+                -0.6674566,
+                0.0,
+                0.0366092
+            ],
+            [
+                -0.0,
+                -0.4250565,
+                -0.1661673
+            ],
+            [
+                -0.0,
+                -0.0,
+                1.0
+            ]
+        ],
+        "image_size": [
+            11,
+            19
+        ],
+        "pts": [
+            [
+                0.9182028,
+                -0.1588005,
+                -0.9640634
+            ],
+            [
+                -1.9907788,
+                0.0897307,
+                0.1148539
+            ],
+            [
+                -0.5858152,
+                0.298772,
+                0.2222599
+            ],
+            [
+                0.435183,
+                -0.0457481,
+                0.0498984
+            ],
+            [
+                -0.9355305,
+                0.2873877,
+                0.3604273
+            ],
+            [
+                0.4081481,
+                -1.9407157,
+                1.4448357
+            ],
+            [
+                0.1928609,
+                -0.4208648,
+                1.7402535
+            ],
+            [
+                -0.3640868,
+                1.3439544,
+                -0.818221
+            ],
+            [
+                0.0827099,
+                -1.2910585,
+                -0.6611042
+            ],
+            [
+                -1.180191,
+                0.1976426,
+                0.4139
+            ],
+            [
+                1.197322,
+                1.8833539,
+                0.7142238
+            ],
+            [
+                2.2843334,
+                1.5641026,
+                0.6111037
+            ],
+            [
+                -0.8773633,
+                -1.6210875,
+                -0.581673
+            ],
+            [
+                -0.537834,
+                -1.5560237,
+                -0.0544648
+            ],
+            [
+                -1.8112788,
+                -0.6311752,
+                -0.9281592
+            ],
+            [
+                1.4907219,
+                0.1954993,
+                -0.4716043
+            ],
+            [
+                1.8123547,
+                -2.2941375,
+                0.6512093
+            ],
+            [
+                -1.1304964,
+                -0.7773467,
+                1.1159385
+            ],
+            [
+                1.339453,
+                -1.7674337,
+                0.4244125
+            ],
+            [
+                1.089309,
+                -0.3841857,
+                0.6322014
+            ],
+            [
+                -0.5496559,
+                0.5211257,
+                0.1083495
+            ],
+            [
+                0.2616685,
+                -0.9147553,
+                0.8582378
+            ],
+            [
+                0.0943343,
+                -1.4859039,
+                -1.9005843
+            ],
+            [
+                -1.1375792,
+                -1.7620389,
+                -0.2886232
+            ],
+            [
+                1.0479822,
+                0.2499575,
+                0.0469045
+            ],
+            [
+                -1.032243,
+                0.4031857,
+                -0.6840593
+            ],
+            [
+                1.2623222,
+                -2.0055566,
+                -0.3320304
+            ],
+            [
+                -0.2961004,
+                -2.2183608,
+                -0.1835029
+            ],
+            [
+                0.3923081,
+                0.2416348,
+                0.1039359
+            ],
+            [
+                -0.8295712,
+                0.4927594,
+                0.0901128
+            ]
+        ],
+        "pts_proj": [
+            [
+                0.4141589,
+                0.5870938
+            ],
+            [
+                0.5208963,
+                -0.2684058
+            ],
+            [
+                0.6192571,
+                -0.0457759
+            ],
+            [
+                0.3620623,
+                0.3519971
+            ],
+            [
+                0.5966208,
+                -0.1618695
+            ],
+            [
+                -0.6130961,
+                -0.0666413
+            ],
+            [
+                -0.5973283,
+                -0.240145
+            ],
+            [
+                1.7229771,
+                0.3713118
+            ],
+            [
+                0.069312,
+                0.1164213
+            ],
+            [
+                0.5411626,
+                -0.21708
+            ],
+            [
+                -0.7298492,
+                -0.6226879
+            ],
+            [
+                -0.2271261,
+                -0.7756765
+            ],
+            [
+                0.0919778,
+                -0.0452863
+            ],
+            [
+                0.005477,
+                -0.0454978
+            ],
+            [
+                0.3725968,
+                -0.1135225
+            ],
+            [
+                0.6738571,
+                1.806536
+            ],
+            [
+                -0.8906672,
+                0.3956792
+            ],
+            [
+                0.0670651,
+                -0.294005
+            ],
+            [
+                -0.5670261,
+                0.3293613
+            ],
+            [
+                -0.3558128,
+                0.7635874
+            ],
+            [
+                0.7998255,
+                -0.0010898
+            ],
+            [
+                -0.2129184,
+                0.031003
+            ],
+            [
+                0.1711748,
+                0.1747995
+            ],
+            [
+                0.0606019,
+                -0.0990852
+            ],
+            [
+                0.6655512,
+                1.3313645
+            ],
+            [
+                0.6986332,
+                -0.0128461
+            ],
+            [
+                -0.3178348,
+                0.297116
+            ],
+            [
+                -0.1058796,
+                -0.0156453
+            ],
+            [
+                0.6059715,
+                0.4297142
+            ],
+            [
+                0.750323,
+                -0.082273
+            ]
+        ]
+    },
+    {
+        "rvec": [
+            0.9529480300972241,
+            -0.8534383895555052,
+            -2.0235766239679127
+        ],
+        "tvec": [
+            -0.3351616,
+            -1.6076185,
+            0.0039651
+        ],
+        "camera_matrix": [
+            [
+                0.6332697,
+                0.0,
+                -0.0084798
+            ],
+            [
+                0.0,
+                0.5719922,
+                0.5328057
+            ],
+            [
+                0.0,
+                0.0,
+                1.0
+            ]
+        ],
+        "image_size": [
+            10,
+            19
+        ],
+        "pts": [
+            [
+                -0.3824862,
+                0.6916619,
+                0.353885
+            ],
+            [
+                1.0475853,
+                -0.4238962,
+                -3.5147681
+            ],
+            [
+                -1.3431567,
+                1.4255061,
+                0.228582
+            ],
+            [
+                -0.2576638,
+                0.0503707,
+                -1.3802109
+            ],
+            [
+                -0.2616721,
+                -0.1793797,
+                -0.6927706
+            ],
+            [
+                1.1378269,
+                -0.1691573,
+                -0.7639137
+            ],
+            [
+                -0.4980731,
+                -0.3628911,
+                0.2639603
+            ],
+            [
+                -0.6296419,
+                -0.4722584,
+                -1.513361
+            ],
+            [
+                1.1076247,
+                0.1762388,
+                -0.9403535
+            ],
+            [
+                0.9295943,
+                -1.0627949,
+                -0.8864063
+            ],
+            [
+                1.921347,
+                -0.4597805,
+                -1.0890344
+            ],
+            [
+                0.9841173,
+                -1.1592063,
+                -0.4365371
+            ],
+            [
+                1.0092445,
+                0.7133896,
+                -0.7280577
+            ],
+            [
+                0.8395165,
+                1.239021,
+                -1.7848039
+            ],
+            [
+                -0.7961858,
+                -1.4005413,
+                -0.1843506
+            ],
+            [
+                -1.3911931,
+                0.0362597,
+                -0.8144056
+            ],
+            [
+                0.6973728,
+                -1.7374292,
+                0.1158557
+            ],
+            [
+                0.3656514,
+                -0.0739235,
+                -0.4935176
+            ],
+            [
+                3.1015306,
+                0.8587542,
+                -1.1547755
+            ],
+            [
+                0.9418343,
+                -0.2821351,
+                -0.9756547
+            ],
+            [
+                0.0981867,
+                0.90549,
+                1.0187414
+            ],
+            [
+                -0.1148989,
+                1.7430387,
+                -0.3218792
+            ],
+            [
+                0.8295711,
+                -0.207318,
+                1.1179986
+            ],
+            [
+                1.0642497,
+                1.1513298,
+                -0.7724577
+            ],
+            [
+                -1.2936343,
+                0.6770268,
+                0.4240552
+            ],
+            [
+                -0.4856762,
+                -0.0516972,
+                0.5670564
+            ],
+            [
+                1.0678336,
+                0.2715957,
+                0.6193018
+            ],
+            [
+                -0.058626,
+                1.2565714,
+                0.2967472
+            ],
+            [
+                0.3985857,
+                -1.0531744,
+                -0.6394763
+            ],
+            [
+                -0.1485269,
+                -1.5745821,
+                -0.4956882
+            ]
+        ],
+        "pts_proj": [
+            [
+                -0.1735197,
+                -0.5026447
+            ],
+            [
+                -0.5086927,
+                1.2602939
+            ],
+            [
+                0.1995019,
+                0.1377832
+            ],
+            [
+                -1.0602558,
+                2.3228582
+            ],
+            [
+                -0.4811967,
+                2.6476893
+            ],
+            [
+                0.1891248,
+                2.2078843
+            ],
+            [
+                -14.5705317,
+                -27.5060102
+            ],
+            [
+                -0.7295134,
+                1.2970094
+            ],
+            [
+                -0.0004579,
+                2.8277094
+            ],
+            [
+                0.1412656,
+                1.2635503
+            ],
+            [
+                0.1810516,
+                1.7102113
+            ],
+            [
+                0.3390042,
+                1.2833538
+            ],
+            [
+                -0.1753715,
+                12.5495766
+            ],
+            [
+                -3.6611631,
+                10.1461827
+            ],
+            [
+                0.1812461,
+                0.7066171
+            ],
+            [
+                6.5346752,
+                -3.5950883
+            ],
+            [
+                0.5305561,
+                0.9966792
+            ],
+            [
+                0.1684846,
+                3.1942163
+            ],
+            [
+                0.3376347,
+                3.4565985
+            ],
+            [
+                0.024885,
+                1.9060357
+            ],
+            [
+                -0.4946605,
+                -0.3688437
+            ],
+            [
+                0.2704817,
+                -0.5945693
+            ],
+            [
+                -8.9040477,
+                -8.4144665
+            ],
+            [
+                0.7361192,
+                -10.8591518
+            ],
+            [
+                0.0607204,
+                0.1565487
+            ],
+            [
+                -0.9204385,
+                -0.9006548
+            ],
+            [
+                -4.7045052,
+                -7.8600902
+            ],
+            [
+                -0.0843625,
+                -0.5081105
+            ],
+            [
+                0.1565888,
+                1.2224583
+            ],
+            [
+                0.1561078,
+                0.8506761
+            ]
+        ]
+    },
+    {
+        "rvec": [
+            -0.9637575928124262,
+            1.6344477621933204,
+            1.1377406680922277
+        ],
+        "tvec": [
+            -5.0736742,
+            -15.0264648,
+            3.7663565
+        ],
+        "camera_matrix": [
+            [
+                0.0592345,
+                0.0,
+                1.0948929
+            ],
+            [
+                0.0,
+                2.0469542,
+                -1.6769676
+            ],
+            [
+                0.0,
+                0.0,
+                1.0
+            ]
+        ],
+        "image_size": [
+            19,
+            12
+        ],
+        "pts": [
+            [
+                0.0815831,
+                1.2381695,
+                2.1837783
+            ],
+            [
+                0.1905641,
+                -0.4981669,
+                0.0475124
+            ],
+            [
+                1.4483291,
+                0.4282211,
+                -0.1707534
+            ],
+            [
+                -2.3547773,
+                -0.4678065,
+                -0.1294017
+            ],
+            [
+                0.9000515,
+                -0.5156528,
+                0.5203996
+            ],
+            [
+                1.1410499,
+                -1.4447244,
+                -1.0176718
+            ],
+            [
+                -0.9902687,
+                1.5241503,
+                0.636245
+            ],
+            [
+                0.1996184,
+                -0.0077118,
+                1.1754896
+            ],
+            [
+                1.2556589,
+                -0.2809262,
+                -0.3669177
+            ],
+            [
+                1.6537248,
+                -0.0856097,
+                0.2933573
+            ],
+            [
+                1.4872575,
+                -0.6384799,
+                0.7087688
+            ],
+            [
+                -0.0825277,
+                -0.6873837,
+                -1.4113116
+            ],
+            [
+                0.6213837,
+                0.3085112,
+                -0.3309394
+            ],
+            [
+                -0.4852161,
+                -0.0602118,
+                -1.5596469
+            ],
+            [
+                -0.2914053,
+                0.8040719,
+                0.7358267
+            ],
+            [
+                -0.2026395,
+                0.2902467,
+                -0.8548176
+            ],
+            [
+                -0.3261322,
+                -0.952252,
+                1.1869633
+            ],
+            [
+                -1.1571738,
+                0.0193,
+                1.4500207
+            ],
+            [
+                0.6521225,
+                -0.3442692,
+                1.2908895
+            ],
+            [
+                -0.8021245,
+                -1.3890878,
+                0.2576423
+            ],
+            [
+                1.1274729,
+                2.7849331,
+                -0.0913946
+            ],
+            [
+                -0.0290724,
+                1.030574,
+                1.0248824
+            ],
+            [
+                1.5831991,
+                1.0291609,
+                1.8343708
+            ],
+            [
+                -0.4910075,
+                -0.8094693,
+                -0.2462478
+            ],
+            [
+                -0.4517529,
+                -0.6581127,
+                0.3922879
+            ],
+            [
+                -1.3964618,
+                0.3368952,
+                0.6266755
+            ],
+            [
+                0.5469885,
+                -0.3078414,
+                0.0734122
+            ],
+            [
+                -0.2766632,
+                0.1395219,
+                -0.050913
+            ],
+            [
+                0.740091,
+                -0.3233616,
+                2.4009981
+            ],
+            [
+                2.8127453,
+                0.6191253,
+                -2.3287021
+            ]
+        ],
+        "pts_proj": [
+            [
+                1.0011881,
+                -8.7962669
+            ],
+            [
+                1.0148904,
+                -10.6624766
+            ],
+            [
+                0.9559355,
+                -13.9861497
+            ],
+            [
+                1.0550145,
+                -6.889409
+            ],
+            [
+                0.9903897,
+                -12.9240756
+            ],
+            [
+                0.9922857,
+                -15.2615745
+            ],
+            [
+                1.0231235,
+                -7.3797553
+            ],
+            [
+                1.0095861,
+                -10.1396779
+            ],
+            [
+                0.9732178,
+                -14.1103632
+            ],
+            [
+                0.9437488,
+                -16.0891922
+            ],
+            [
+                0.9584208,
+                -16.2312512
+            ],
+            [
+                1.0231435,
+                -10.332697
+            ],
+            [
+                0.9944343,
+                -11.1411459
+            ],
+            [
+                1.0257093,
+                -9.1991957
+            ],
+            [
+                1.0149771,
+                -8.6661286
+            ],
+            [
+                1.0172749,
+                -9.379192
+            ],
+            [
+                1.0324498,
+                -9.6570503
+            ],
+            [
+                1.0382854,
+                -7.6709289
+            ],
+            [
+                0.9976967,
+                -11.7654687
+            ],
+            [
+                1.0447702,
+                -9.1607223
+            ],
+            [
+                0.9566311,
+                -10.3084282
+            ],
+            [
+                1.0066492,
+                -8.9367541
+            ],
+            [
+                0.9320123,
+                -13.8820569
+            ],
+            [
+                1.0331906,
+                -9.4447618
+            ],
+            [
+                1.0314307,
+                -9.3262563
+            ],
+            [
+                1.0384939,
+                -7.4069907
+            ],
+            [
+                1.0022615,
+                -11.4736859
+            ],
+            [
+                1.0202613,
+                -9.217644
+            ],
+            [
+                0.9922728,
+                -11.9963374
+            ],
+            [
+                0.8448706,
+                -23.0521993
+            ]
+        ]
+    }
+]
diff --git a/pytorch3d/tests/data/missing_files_obj/model.mtl b/pytorch3d/tests/data/missing_files_obj/model.mtl
new file mode 100644
index 0000000000000000000000000000000000000000..d6a0f4fb3c119916f2c324f846a040879e4b6e2a
--- /dev/null
+++ b/pytorch3d/tests/data/missing_files_obj/model.mtl
@@ -0,0 +1,9 @@
+newmtl material_1
+map_Kd material_1.png
+
+# Test colors
+
+Ka 1.000 1.000 1.000  # white
+Kd 1.000 1.000 1.000  # white
+Ks 0.000 0.000 0.000  # black
+Ns 10.0
diff --git a/pytorch3d/tests/data/missing_files_obj/model.obj b/pytorch3d/tests/data/missing_files_obj/model.obj
new file mode 100644
index 0000000000000000000000000000000000000000..cf411442fd4d6e5364e0e49aea0e3bbd570d6b06
--- /dev/null
+++ b/pytorch3d/tests/data/missing_files_obj/model.obj
@@ -0,0 +1,10 @@
+
+mtllib model.mtl
+
+v 0.1 0.2 0.3
+v 0.2 0.3 0.4
+v 0.3 0.4 0.5
+v 0.4 0.5 0.6
+usemtl material_1
+f 1 2 3
+f 1 2 4
diff --git a/pytorch3d/tests/data/missing_files_obj/model2.obj b/pytorch3d/tests/data/missing_files_obj/model2.obj
new file mode 100644
index 0000000000000000000000000000000000000000..df393f9f1b360ded4b6d232039702a2c745f5529
--- /dev/null
+++ b/pytorch3d/tests/data/missing_files_obj/model2.obj
@@ -0,0 +1,10 @@
+
+mtllib model2.mtl
+
+v 0.1 0.2 0.3
+v 0.2 0.3 0.4
+v 0.3 0.4 0.5
+v 0.4 0.5 0.6
+usemtl material_1
+f 1 2 3
+f 1 2 4
diff --git a/pytorch3d/tests/data/missing_usemtl/README.md b/pytorch3d/tests/data/missing_usemtl/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..1c9b08bea9e0a4efac7770c872a7aa2b6023e307
--- /dev/null
+++ b/pytorch3d/tests/data/missing_usemtl/README.md
@@ -0,0 +1,7 @@
+# Acknowledgements
+
+This is copied version of docs/tutorials/data/cow_mesh with removed line 6159 (usemtl material_1) to test behavior without usemtl material_1 declaration.
+
+Thank you to Keenan Crane for allowing the cow mesh model to be used freely in the public domain.
+
+###### Source: http://www.cs.cmu.edu/~kmcrane/Projects/ModelRepository/
diff --git a/pytorch3d/tests/data/missing_usemtl/cow.mtl b/pytorch3d/tests/data/missing_usemtl/cow.mtl
new file mode 100644
index 0000000000000000000000000000000000000000..c3bc054358c3db2526520c29f4c30ac812f7c370
--- /dev/null
+++ b/pytorch3d/tests/data/missing_usemtl/cow.mtl
@@ -0,0 +1,9 @@
+newmtl material_1
+map_Kd cow_texture.png
+
+# Test colors
+
+Ka 1.000 1.000 1.000  # white
+Kd 1.000 1.000 1.000  # white
+Ks 0.000 0.000 0.000  # black
+Ns 10.0
diff --git a/pytorch3d/tests/data/missing_usemtl/cow.obj b/pytorch3d/tests/data/missing_usemtl/cow.obj
new file mode 100644
index 0000000000000000000000000000000000000000..2f349d0dac3994869486bdac7f15892f5acb2508
--- /dev/null
+++ b/pytorch3d/tests/data/missing_usemtl/cow.obj
@@ -0,0 +1,12014 @@
+
+mtllib cow.mtl
+
+v 0.348799 -0.334989 -0.0832331
+v 0.313132 -0.399051 0.881192
+v 0.266758 0.181628 0.122726
+v 0.229555 0.0663178 0.828702
+v 0.353623 -0.0486456 0.443969
+v 0.335878 -0.384495 0.425693
+v 0.305362 0.0307983 -0.00655663
+v 0.279611 -0.0552387 0.858547
+v 0.266719 0.10578 0.46681
+v 0.149341 -0.451522 0.166423
+v 0.12606 -0.162036 -0.185668
+v 0.114009 -0.358339 -0.0759521
+v 0.101546 -0.475382 0.663157
+v 0.240757 -0.435635 0.421981
+v 0.385825 -0.174501 0.180766
+v 0.358602 -0.37797 0.17897
+v 0.345953 -0.229705 0.664126
+v 0.316102 -0.419396 0.649153
+v 0.131961 -0.201382 0.951731
+v 0.0876766 -0.391182 0.901606
+v 0.161859 0.285356 0.303098
+v 0.177264 0.282839 0.0875537
+v 0.158786 0.0751889 -0.141343
+v 0.310974 -0.0974099 -0.0697762
+v 0.138236 0.191142 0.700393
+v 0.13061 0.109881 0.873657
+v 0.287544 -0.214549 0.864938
+v 0.25445 0.101336 0.680112
+v 0.34304 -0.0541165 0.672286
+v 0.369388 -0.0242775 0.205838
+v 0.273782 0.160332 0.273861
+v 0.36907 -0.228332 0.435279
+v 0.146074 0.206063 0.488052
+v 0.15689 0.317253 -0.0179554
+v 0.184529 0.181235 -0.238574
+v 0.229877 0.446038 -0.0428095
+v 0.326584 0.303676 -0.375494
+v 0.257915 0.488969 -0.517721
+v 0.187711 0.730295 -0.437134
+v 0.190442 0.122856 -0.437696
+v 0.271501 0.201979 -0.579853
+v 0.227961 0.337016 -0.641468
+v 0.323288 -0.645898 -0.0650069
+v 0.277559 -0.660659 0.872426
+v 0.15321 -0.671553 0.116597
+v 0.153343 -0.652512 -0.0628489
+v 0.129752 -0.680413 0.714233
+v 0.310744 -0.658159 0.12474
+v 0.270197 -0.670204 0.705429
+v 0.125665 -0.664371 0.877017
+v 0.264169 0.646946 -0.369897
+v 0.180359 0.68646 -0.148279
+v 0.263204 0.560375 -0.137191
+v 0.371765 0.683869 -0.321196
+v 0.432195 0.751246 -0.233457
+v 0.424574 0.680439 -0.150855
+v 0.379346 0.604049 -0.213075
+v 0 -0.192084 -0.196407
+v 0 0.505678 -0.563016
+v 0 0.340641 -0.665066
+v 0 0.0414775 -0.241591
+v 0 0.117615 -0.305384
+v 0 0.100384 -0.450564
+v 0 0.182587 -0.616155
+v -4.33681e-19 0.765067 -0.449072
+v -4.33681e-19 0.834757 -0.307668
+v 0 -0.434774 0.0615192
+v 0 -0.354126 -0.0677048
+v 0 0.326124 0.139538
+v 0 0.352207 0.0388631
+v 0 0.499175 0.0586613
+v -4.33681e-19 0.723906 -0.0597107
+v 0 0.816061 -0.189369
+v 0 -0.435249 0.726548
+v 0 -0.533724 0.415511
+v 0 0.301194 0.312719
+v 0 0.233342 0.49566
+v 0 -0.340037 0.891691
+v 0 -0.194995 0.971379
+v 0 0.220553 0.711025
+v 0 0.130478 0.891171
+v 0.164015 0.929339 -0.232094
+v 0.205905 0.915741 -0.313374
+v 0.15338 0.935554 -0.282029
+v 0.221786 0.905461 -0.256021
+v 0.115236 0.837896 -0.222054
+v 0.086327 0.812209 -0.211465
+v 0.172231 0.819312 -0.334104
+v 0.162464 0.791078 -0.351846
+v 0.0787362 0.822102 -0.294366
+v 0.102999 0.844736 -0.288229
+v 0.219399 0.754552 -0.28348
+v 0.199751 0.790382 -0.244881
+v 0.184684 0.762009 -0.196152
+v 0.279703 0.762439 -0.233186
+v 0.120311 -0.456517 0.243094
+v 0.111282 -0.477316 0.58188
+v 0.186164 -0.487701 0.417533
+v 0 -0.462404 0.208739
+v 0 -0.483948 0.606421
+v 0.17185 -0.0410977 0.94527
+v 0.106249 -0.0142167 0.968533
+v 0.107395 -0.0653463 0.975253
+v 0 0.00228925 0.987177
+v 0 -0.0756163 0.995507
+v 0.0517509 -0.0608359 1.02473
+v 0.0517951 -0.0798093 1.0112
+v 0 -0.0688251 1.04807
+v 0 -0.0952192 1.02642
+v -0.348799 -0.334989 -0.0832331
+v -0.313132 -0.399051 0.881192
+v -0.266758 0.181628 0.122726
+v -0.229555 0.0663178 0.828702
+v -0.353623 -0.0486456 0.443969
+v -0.335878 -0.384495 0.425693
+v -0.305362 0.0307983 -0.00655663
+v -0.279611 -0.0552387 0.858547
+v -0.266719 0.10578 0.46681
+v -0.149341 -0.451522 0.166423
+v -0.12606 -0.162036 -0.185668
+v -0.114009 -0.358339 -0.0759521
+v -0.101546 -0.475382 0.663157
+v -0.240757 -0.435635 0.421981
+v -0.385825 -0.174501 0.180766
+v -0.358602 -0.37797 0.17897
+v -0.345953 -0.229705 0.664126
+v -0.316102 -0.419396 0.649153
+v -0.131961 -0.201382 0.951731
+v -0.0876766 -0.391182 0.901606
+v -0.161859 0.285356 0.303098
+v -0.177264 0.282839 0.0875537
+v -0.158786 0.0751889 -0.141343
+v -0.310974 -0.0974099 -0.0697762
+v -0.138236 0.191142 0.700393
+v -0.13061 0.109881 0.873657
+v -0.287544 -0.214549 0.864938
+v -0.25445 0.101336 0.680112
+v -0.34304 -0.0541165 0.672286
+v -0.369388 -0.0242775 0.205838
+v -0.273782 0.160332 0.273861
+v -0.36907 -0.228332 0.435279
+v -0.146074 0.206063 0.488052
+v -0.15689 0.317253 -0.0179554
+v -0.184529 0.181235 -0.238574
+v -0.229877 0.446038 -0.0428095
+v -0.326584 0.303676 -0.375494
+v -0.257915 0.488969 -0.517721
+v -0.187711 0.730295 -0.437134
+v -0.190442 0.122856 -0.437696
+v -0.271501 0.201979 -0.579853
+v -0.227961 0.337016 -0.641468
+v -0.323288 -0.645898 -0.0650069
+v -0.277559 -0.660659 0.872426
+v -0.15321 -0.671553 0.116597
+v -0.153343 -0.652512 -0.0628489
+v -0.129752 -0.680413 0.714233
+v -0.310744 -0.658159 0.12474
+v -0.270197 -0.670204 0.705429
+v -0.125665 -0.664371 0.877017
+v -0.264169 0.646946 -0.369897
+v -0.180359 0.68646 -0.148279
+v -0.263204 0.560375 -0.137191
+v -0.371765 0.683869 -0.321196
+v -0.432195 0.751246 -0.233457
+v -0.424574 0.680439 -0.150855
+v -0.379346 0.604049 -0.213075
+v -0.164015 0.929339 -0.232094
+v -0.205905 0.915741 -0.313374
+v -0.15338 0.935554 -0.282029
+v -0.221786 0.905461 -0.256021
+v -0.115236 0.837896 -0.222054
+v -0.086327 0.812209 -0.211465
+v -0.172231 0.819312 -0.334104
+v -0.162464 0.791078 -0.351846
+v -0.0787362 0.822102 -0.294366
+v -0.102999 0.844736 -0.288229
+v -0.219399 0.754552 -0.28348
+v -0.199751 0.790382 -0.244881
+v -0.184684 0.762009 -0.196152
+v -0.279703 0.762439 -0.233186
+v -0.120311 -0.456517 0.243094
+v -0.111282 -0.477316 0.58188
+v -0.186164 -0.487701 0.417533
+v -0.17185 -0.0410977 0.94527
+v -0.106249 -0.0142167 0.968533
+v -0.107395 -0.0653463 0.975253
+v -0.0517509 -0.0608359 1.02473
+v -0.0517951 -0.0798093 1.0112
+v 0.287063 -0.417912 0.42339
+v 0.223323 -0.431749 0.300839
+v 0.262599 -0.446574 0.214165
+v 0.345683 -0.367458 0.305622
+v 0.284862 -0.410239 0.304857
+v 0.320467 -0.390454 0.538527
+v 0.218649 -0.474853 0.619945
+v 0.19621 -0.447285 0.538732
+v 0.257025 -0.429064 0.536591
+v 0.240602 -0.341214 -0.117452
+v 0.12147 -0.249274 -0.145022
+v 0.221157 -0.120393 -0.146058
+v 0.328715 -0.192761 -0.0778193
+v 0.229478 -0.212011 -0.133147
+v 0.307987 -0.0331195 -0.049691
+v 0.139893 -0.0548182 -0.181103
+v 0.237075 0.0521949 -0.0759725
+v 0.227173 -0.0372356 -0.128413
+v 0.301492 -0.292885 0.870674
+v 0.213279 -0.204742 0.922518
+v 0.117499 -0.281642 0.935739
+v 0.211816 -0.394732 0.922409
+v 0.215547 -0.286415 0.919674
+v 0.364968 -0.12541 0.046235
+v 0.382458 -0.25968 0.176197
+v 0.385194 -0.351205 0.0306159
+v 0.375418 -0.216313 0.0359591
+v 0.382575 -0.209764 0.311653
+v 0.361147 -0.316624 0.430338
+v 0.374384 -0.294532 0.308248
+v 0.368668 -0.137134 0.438729
+v 0.385132 -0.101185 0.188168
+v 0.365146 -0.0406368 0.324829
+v 0.381949 -0.125584 0.315412
+v 0.350232 0.00253885 0.0908221
+v 0.361458 -0.0619596 0.0609153
+v 0.343331 -0.409171 0.778577
+v 0.341862 -0.313907 0.657949
+v 0.328019 -0.223318 0.773747
+v 0.336544 -0.303107 0.775339
+v 0.283052 -0.132631 0.863571
+v 0.351001 -0.142438 0.668828
+v 0.322487 -0.0565049 0.774019
+v 0.329 -0.140489 0.774308
+v 0.349301 -0.0521301 0.560904
+v 0.357572 -0.233122 0.551664
+v 0.360647 -0.141814 0.556564
+v 0.349272 -0.319879 0.544579
+v 0.23281 0.229543 0.110217
+v 0.170184 0.299829 0.20555
+v 0.221341 0.236481 0.293418
+v 0.272682 0.181352 0.189589
+v 0.227226 0.252859 0.204008
+v 0.271155 0.129527 0.366223
+v 0.153276 0.244216 0.39384
+v 0.20969 0.164574 0.478658
+v 0.215375 0.198113 0.382954
+v 0.24282 0.0875955 0.770725
+v 0.198521 0.154682 0.689709
+v 0.134533 0.161682 0.796578
+v 0.187262 0.086887 0.854887
+v 0.191558 0.132026 0.78245
+v 0.141545 0.196582 0.593509
+v 0.26179 0.101674 0.574769
+v 0.204392 0.157257 0.584394
+v 0.327755 0.0676272 0.239516
+v 0.317321 0.0335117 0.454173
+v 0.324726 0.0469262 0.344192
+v 0.307072 0.0296092 0.675079
+v 0.253941 0.0157408 0.848168
+v 0.289863 0.0225221 0.77107
+v 0.190901 0.188016 -0.0300482
+v 0.288174 0.116771 0.0639197
+v 0.239299 0.151564 0.0118342
+v 0.313467 0.030566 0.566851
+v 0.317033 0.0909783 0.142973
+v 0.150018 0.148066 -0.170355
+v 0.192003 0.257627 -0.114007
+v 0.150923 0.290686 0.0162651
+v 0.172619 0.221245 -0.0722104
+v 0.274031 0.23831 -0.301318
+v 0.294876 0.383503 -0.203773
+v 0.194068 0.37243 -0.0269037
+v 0.248629 0.311098 -0.153862
+v 0.196806 0.141274 -0.362924
+v 0.23911 0.14497 -0.502892
+v 0.324701 0.24108 -0.502727
+v 0.278147 0.181024 -0.423508
+v 0.263786 0.27805 -0.630217
+v 0.250979 0.388662 -0.601302
+v 0.318198 0.390558 -0.446528
+v 0.314593 0.32071 -0.563292
+v 0.338046 -0.680095 0.0309463
+v 0.228165 -0.692422 0.147407
+v 0.12828 -0.689708 0.0245746
+v 0.238161 -0.677382 -0.0918856
+v 0.229681 -0.733842 0.0266899
+v 0.20056 -0.687393 0.898863
+v 0.107919 -0.696884 0.796995
+v 0.199176 -0.699686 0.68654
+v 0.291818 -0.690195 0.788408
+v 0.198244 -0.736784 0.793448
+v 0.307855 -0.55258 0.89062
+v 0.294592 -0.571581 0.678753
+v 0.335563 -0.561971 0.78431
+v 0.0593616 -0.458076 0.795932
+v 0.110927 -0.595142 0.691599
+v 0.102733 -0.558625 0.896274
+v 0.0716769 -0.583028 0.796447
+v 0.203007 -0.591569 0.647127
+v 0.206666 -0.553636 0.930804
+v 0.339121 -0.546516 0.150832
+v 0.352933 -0.520557 -0.0885122
+v 0.383244 -0.532332 0.0297109
+v 0.132569 -0.534652 -0.0767316
+v 0.24402 -0.524662 -0.124285
+v 0.143499 -0.577512 0.141914
+v 0.241073 -0.571789 0.188527
+v 0.0964942 -0.434904 0.0342915
+v 0.0999951 -0.56397 0.0317575
+v 0.271576 0.561647 -0.416318
+v 0.282894 0.56989 -0.257283
+v 0.23851 0.520387 -0.0907886
+v 0.293195 0.482084 -0.276654
+v 0.213045 0.609177 -0.109989
+v 0.321376 0.576724 -0.185683
+v 0.422878 0.632073 -0.168661
+v 0.32688 0.680784 -0.145207
+v 0.326345 0.612341 -0.141546
+v 0.312594 0.674309 -0.341185
+v 0.379053 0.63587 -0.283895
+v 0.3154 0.606871 -0.278418
+v 0.417773 0.731266 -0.288914
+v 0.460287 0.727236 -0.177367
+v 0.454185 0.684351 -0.230461
+v 0 0.320051 0.223298
+v 0.0857686 0.301184 0.309872
+v 0.0922474 0.320203 0.127906
+v 0.0910385 0.318251 0.216664
+v 0 0.227033 0.602086
+v 0.0710686 0.213115 0.708099
+v 0.0750596 0.22755 0.493766
+v 0.0724773 0.219588 0.599773
+v 0 0.186603 0.810651
+v 0.0670456 0.125156 0.886255
+v 0.0695062 0.18012 0.806775
+v 0 -0.40432 -0.00187701
+v 0.0502685 -0.34467 -0.0665397
+v 0.0661753 -0.432919 0.0879789
+v 0.056363 -0.399912 0.0126968
+v 0 -0.395546 0.810009
+v 0.0398588 -0.442711 0.714357
+v 0.0361117 -0.34665 0.891624
+v 0.0321101 -0.404726 0.804765
+v 0 0.264692 0.401284
+v 0.0798727 0.262384 0.399618
+v 0 0.330325 0.0716557
+v 0.0874752 0.345901 0.0284499
+v 0.0858416 0.32349 0.0605597
+v 0 0.409384 0.0515977
+v 0.124296 0.487548 0.0340513
+v 0.105997 0.402715 0.0360884
+v 0 0.641579 -0.507325
+v 0.104733 0.755088 -0.444831
+v 0.214286 0.629383 -0.484754
+v 0.136392 0.50382 -0.556974
+v 0.121817 0.636846 -0.502061
+v 0 0.412943 -0.623842
+v 0.139278 0.340909 -0.661549
+v 0.138945 0.411476 -0.620153
+v 0 -0.271137 0.943676
+v 0.0622199 -0.197393 0.966789
+v 0.0501277 -0.274014 0.941915
+v 0 -0.286073 -0.138523
+v 0.0553298 -0.185801 -0.197022
+v 0.050202 -0.275405 -0.142323
+v 0 -0.0695404 -0.225378
+v 0.0766662 0.0457919 -0.217551
+v 0.0655056 -0.0670069 -0.214493
+v 0 0.101519 -0.264918
+v 0.0933959 0.130309 -0.285229
+v 0.0838463 0.11062 -0.238883
+v 0 0.106874 -0.369301
+v 0.114603 0.104637 -0.445501
+v 0.102596 0.115199 -0.359834
+v 0 0.126441 -0.539369
+v 0.153665 0.185489 -0.611191
+v 0.137484 0.129002 -0.534091
+v 0 0.259825 -0.661348
+v 0.150371 0.262453 -0.657355
+v 0.0819636 0.707011 -0.0978848
+v 0 0.613137 0.0118372
+v 0.128934 0.593549 -0.0276065
+v 0.230899 0.696193 -0.415496
+v 0.2462 0.632058 -0.451998
+v 0.2255 0.918412 -0.287909
+v 0.182289 0.937359 -0.304334
+v 0.155831 0.94592 -0.253016
+v 0.199042 0.926972 -0.236591
+v 0.198381 0.950853 -0.26973
+v 0.195536 0.768708 -0.323732
+v 0.162591 0.796641 -0.337706
+v 0.198135 0.804412 -0.300184
+v 0.204856 0.767832 -0.258627
+v 0.190344 0.780419 -0.305321
+v 0.158595 0.817014 -0.220186
+v 0.102838 0.815304 -0.21903
+v 0.12548 0.786281 -0.190531
+v 0.18859 0.771476 -0.215131
+v 0.14521 0.793415 -0.208712
+v 0.113814 0.810725 -0.339255
+v 0.0911506 0.822276 -0.28966
+v 0.134346 0.834135 -0.324328
+v 0.12321 0.811864 -0.327105
+v 0.0699464 0.823757 -0.248502
+v 0.0956349 0.846841 -0.248877
+v 0.0836698 0.824545 -0.249204
+v 0.24364 0.757848 -0.207433
+v 0.246443 0.756953 -0.257453
+v 0.21289 0.762245 -0.22883
+v 0.355751 0.762428 -0.238497
+v 0.249966 0.723503 -0.332259
+v 0.32003 0.739642 -0.304714
+v 0.181848 0.741315 -0.180043
+v 0.328516 0.741977 -0.181175
+v 0.0499681 0.812176 -0.199395
+v 0 0.785709 -0.129116
+v 0.0788354 0.774284 -0.158189
+v 0.0505691 0.828853 -0.304368
+v 0.168022 0.778755 -0.386065
+v 0 0.821114 -0.378707
+v 0.0897195 0.811796 -0.373587
+v 0 0.831803 -0.246303
+v 0.0404745 0.828353 -0.247459
+v 0.211302 0.744371 -0.365196
+v 0.193702 0.87269 -0.329392
+v 0.216553 0.856661 -0.251136
+v 0.218551 0.860755 -0.297102
+v 0.126841 0.897388 -0.285836
+v 0.157513 0.887986 -0.320358
+v 0.138938 0.890583 -0.222912
+v 0.120161 0.900194 -0.247801
+v 0.181137 0.872948 -0.224154
+v 0.106299 -0.467348 0.612794
+v 0.156751 -0.485651 0.517662
+v 0.218865 -0.458568 0.419816
+v 0.177713 -0.464461 0.527871
+v 0 -0.459742 0.666605
+v 0.0640756 -0.481225 0.60015
+v 0.0486808 -0.462533 0.655449
+v 0.129769 -0.445891 0.210992
+v 0.0674919 -0.459184 0.218582
+v 0 -0.445956 0.12884
+v 0.0638357 -0.444618 0.15006
+v 0.160168 -0.471758 0.313909
+v 0.192938 -0.45045 0.307624
+v 0 -0.518477 0.519852
+v 0.106971 -0.518553 0.416032
+v 0.0913755 -0.507594 0.518132
+v 0 -0.503509 0.307563
+v 0.0906862 -0.492986 0.310407
+v 0.148938 -0.0188178 0.950827
+v 0.109744 -0.0412763 0.976341
+v 0.152295 -0.0598695 0.957881
+v 0.133131 -0.0374762 0.965091
+v 0.129144 -0.123186 0.961976
+v 0.214922 -0.0472768 0.917853
+v 0.202946 -0.113321 0.930645
+v 0.124777 0.0448234 0.926767
+v 0.189431 0.0176502 0.907717
+v 0.0541294 -0.000582154 0.980957
+v 0 0.0636729 0.946137
+v 0.0627521 0.0592134 0.940397
+v 0.0544734 -0.0758718 0.989219
+v 0 -0.121134 0.98554
+v 0.0631528 -0.123094 0.979468
+v 0.0716307 -0.0432 1.00541
+v 0.052795 -0.0725924 1.01945
+v 0.0717681 -0.0690106 0.994894
+v 0.0809297 -0.0558501 0.997263
+v 0 -0.0400118 1.02513
+v 0.0297219 -0.0653048 1.04059
+v 0.0406958 -0.0395224 1.01839
+v 0.0297587 -0.0908237 1.02001
+v 0 -0.0782912 1.00911
+v 0.0407812 -0.0765567 1.00299
+v 0 -0.0903561 1.04431
+v 0.0264068 -0.0851806 1.03686
+v -0.345683 -0.367458 0.305622
+v -0.262599 -0.446574 0.214165
+v -0.223323 -0.431749 0.300839
+v -0.287063 -0.417912 0.42339
+v -0.284862 -0.410239 0.304857
+v -0.19621 -0.447285 0.538732
+v -0.218649 -0.474853 0.619945
+v -0.320467 -0.390454 0.538527
+v -0.257025 -0.429064 0.536591
+v -0.328715 -0.192761 -0.0778193
+v -0.221157 -0.120393 -0.146058
+v -0.12147 -0.249274 -0.145022
+v -0.240602 -0.341214 -0.117452
+v -0.229478 -0.212011 -0.133147
+v -0.237075 0.0521949 -0.0759725
+v -0.139893 -0.0548182 -0.181103
+v -0.307987 -0.0331195 -0.049691
+v -0.227173 -0.0372356 -0.128413
+v -0.211816 -0.394732 0.922409
+v -0.117499 -0.281642 0.935739
+v -0.213279 -0.204742 0.922518
+v -0.301492 -0.292885 0.870674
+v -0.215547 -0.286415 0.919674
+v -0.385194 -0.351205 0.0306159
+v -0.382458 -0.25968 0.176197
+v -0.364968 -0.12541 0.046235
+v -0.375418 -0.216313 0.0359591
+v -0.361147 -0.316624 0.430338
+v -0.382575 -0.209764 0.311653
+v -0.374384 -0.294532 0.308248
+v -0.365146 -0.0406368 0.324829
+v -0.385132 -0.101185 0.188168
+v -0.368668 -0.137134 0.438729
+v -0.381949 -0.125584 0.315412
+v -0.350232 0.00253885 0.0908221
+v -0.361458 -0.0619596 0.0609153
+v -0.328019 -0.223318 0.773747
+v -0.341862 -0.313907 0.657949
+v -0.343331 -0.409171 0.778577
+v -0.336544 -0.303107 0.775339
+v -0.322487 -0.0565049 0.774019
+v -0.351001 -0.142438 0.668828
+v -0.283052 -0.132631 0.863571
+v -0.329 -0.140489 0.774308
+v -0.357572 -0.233122 0.551664
+v -0.349301 -0.0521301 0.560904
+v -0.360647 -0.141814 0.556564
+v -0.349272 -0.319879 0.544579
+v -0.272682 0.181352 0.189589
+v -0.221341 0.236481 0.293418
+v -0.170184 0.299829 0.20555
+v -0.23281 0.229543 0.110217
+v -0.227226 0.252859 0.204008
+v -0.20969 0.164574 0.478658
+v -0.153276 0.244216 0.39384
+v -0.271155 0.129527 0.366223
+v -0.215375 0.198113 0.382954
+v -0.187262 0.086887 0.854887
+v -0.134533 0.161682 0.796578
+v -0.198521 0.154682 0.689709
+v -0.24282 0.0875955 0.770725
+v -0.191558 0.132026 0.78245
+v -0.26179 0.101674 0.574769
+v -0.141545 0.196582 0.593509
+v -0.204392 0.157257 0.584394
+v -0.317321 0.0335117 0.454173
+v -0.327755 0.0676272 0.239516
+v -0.324726 0.0469262 0.344192
+v -0.253941 0.0157408 0.848168
+v -0.307072 0.0296092 0.675079
+v -0.289863 0.0225221 0.77107
+v -0.288174 0.116771 0.0639197
+v -0.190901 0.188016 -0.0300482
+v -0.239299 0.151564 0.0118342
+v -0.313467 0.030566 0.566851
+v -0.317033 0.0909783 0.142973
+v -0.150923 0.290686 0.0162651
+v -0.192003 0.257627 -0.114007
+v -0.150018 0.148066 -0.170355
+v -0.172619 0.221245 -0.0722104
+v -0.194068 0.37243 -0.0269037
+v -0.294876 0.383503 -0.203773
+v -0.274031 0.23831 -0.301318
+v -0.248629 0.311098 -0.153862
+v -0.324701 0.24108 -0.502727
+v -0.23911 0.14497 -0.502892
+v -0.196806 0.141274 -0.362924
+v -0.278147 0.181024 -0.423508
+v -0.318198 0.390558 -0.446528
+v -0.250979 0.388662 -0.601302
+v -0.263786 0.27805 -0.630217
+v -0.314593 0.32071 -0.563292
+v -0.238161 -0.677382 -0.0918856
+v -0.12828 -0.689708 0.0245746
+v -0.228165 -0.692422 0.147407
+v -0.338046 -0.680095 0.0309463
+v -0.229681 -0.733842 0.0266899
+v -0.291818 -0.690195 0.788408
+v -0.199176 -0.699686 0.68654
+v -0.107919 -0.696884 0.796995
+v -0.20056 -0.687393 0.898863
+v -0.198244 -0.736784 0.793448
+v -0.294592 -0.571581 0.678753
+v -0.307855 -0.55258 0.89062
+v -0.335563 -0.561971 0.78431
+v -0.102733 -0.558625 0.896274
+v -0.110927 -0.595142 0.691599
+v -0.0593616 -0.458076 0.795932
+v -0.0716769 -0.583028 0.796447
+v -0.203007 -0.591569 0.647127
+v -0.206666 -0.553636 0.930804
+v -0.352933 -0.520557 -0.0885122
+v -0.339121 -0.546516 0.150832
+v -0.383244 -0.532332 0.0297109
+v -0.132569 -0.534652 -0.0767316
+v -0.24402 -0.524662 -0.124285
+v -0.143499 -0.577512 0.141914
+v -0.241073 -0.571789 0.188527
+v -0.0964942 -0.434904 0.0342915
+v -0.0999951 -0.56397 0.0317575
+v -0.23851 0.520387 -0.0907886
+v -0.282894 0.56989 -0.257283
+v -0.271576 0.561647 -0.416318
+v -0.293195 0.482084 -0.276654
+v -0.32688 0.680784 -0.145207
+v -0.422878 0.632073 -0.168661
+v -0.321376 0.576724 -0.185683
+v -0.213045 0.609177 -0.109989
+v -0.326345 0.612341 -0.141546
+v -0.379053 0.63587 -0.283895
+v -0.312594 0.674309 -0.341185
+v -0.3154 0.606871 -0.278418
+v -0.460287 0.727236 -0.177367
+v -0.417773 0.731266 -0.288914
+v -0.454185 0.684351 -0.230461
+v -0.0922474 0.320203 0.127906
+v -0.0857686 0.301184 0.309872
+v -0.0910385 0.318251 0.216664
+v -0.0750596 0.22755 0.493766
+v -0.0710686 0.213115 0.708099
+v -0.0724773 0.219588 0.599773
+v -0.0670456 0.125156 0.886255
+v -0.0695062 0.18012 0.806775
+v -0.0661753 -0.432919 0.0879789
+v -0.0502685 -0.34467 -0.0665397
+v -0.056363 -0.399912 0.0126968
+v -0.0361117 -0.34665 0.891624
+v -0.0398588 -0.442711 0.714357
+v -0.0321101 -0.404726 0.804765
+v -0.0798727 0.262384 0.399618
+v -0.0874752 0.345901 0.0284499
+v -0.0858416 0.32349 0.0605597
+v -0.124296 0.487548 0.0340513
+v -0.105997 0.402715 0.0360884
+v -0.136392 0.50382 -0.556974
+v -0.214286 0.629383 -0.484754
+v -0.104733 0.755088 -0.444831
+v -0.121817 0.636846 -0.502061
+v -0.139278 0.340909 -0.661549
+v -0.138945 0.411476 -0.620153
+v -0.0622199 -0.197393 0.966789
+v -0.0501277 -0.274014 0.941915
+v -0.0553298 -0.185801 -0.197022
+v -0.050202 -0.275405 -0.142323
+v -0.0766662 0.0457919 -0.217551
+v -0.0655056 -0.0670069 -0.214493
+v -0.0933959 0.130309 -0.285229
+v -0.0838463 0.11062 -0.238883
+v -0.114603 0.104637 -0.445501
+v -0.102596 0.115199 -0.359834
+v -0.153665 0.185489 -0.611191
+v -0.137484 0.129002 -0.534091
+v -0.150371 0.262453 -0.657355
+v -0.0819636 0.707011 -0.0978848
+v -0.128934 0.593549 -0.0276065
+v -0.230899 0.696193 -0.415496
+v -0.2462 0.632058 -0.451998
+v -0.199042 0.926972 -0.236591
+v -0.155831 0.94592 -0.253016
+v -0.182289 0.937359 -0.304334
+v -0.2255 0.918412 -0.287909
+v -0.198381 0.950853 -0.26973
+v -0.204856 0.767832 -0.258627
+v -0.198135 0.804412 -0.300184
+v -0.162591 0.796641 -0.337706
+v -0.195536 0.768708 -0.323732
+v -0.190344 0.780419 -0.305321
+v -0.18859 0.771476 -0.215131
+v -0.12548 0.786281 -0.190531
+v -0.102838 0.815304 -0.21903
+v -0.158595 0.817014 -0.220186
+v -0.14521 0.793415 -0.208712
+v -0.134346 0.834135 -0.324328
+v -0.0911506 0.822276 -0.28966
+v -0.113814 0.810725 -0.339255
+v -0.12321 0.811864 -0.327105
+v -0.0956349 0.846841 -0.248877
+v -0.0699464 0.823757 -0.248502
+v -0.0836698 0.824545 -0.249204
+v -0.246443 0.756953 -0.257453
+v -0.24364 0.757848 -0.207433
+v -0.21289 0.762245 -0.22883
+v -0.249966 0.723503 -0.332259
+v -0.355751 0.762428 -0.238497
+v -0.32003 0.739642 -0.304714
+v -0.181848 0.741315 -0.180043
+v -0.328516 0.741977 -0.181175
+v -0.0499681 0.812176 -0.199395
+v -0.0788354 0.774284 -0.158189
+v -0.168022 0.778755 -0.386065
+v -0.0505691 0.828853 -0.304368
+v -0.0897195 0.811796 -0.373587
+v -0.0404745 0.828353 -0.247459
+v -0.211302 0.744371 -0.365196
+v -0.216553 0.856661 -0.251136
+v -0.193702 0.87269 -0.329392
+v -0.218551 0.860755 -0.297102
+v -0.126841 0.897388 -0.285836
+v -0.157513 0.887986 -0.320358
+v -0.138938 0.890583 -0.222912
+v -0.120161 0.900194 -0.247801
+v -0.181137 0.872948 -0.224154
+v -0.218865 -0.458568 0.419816
+v -0.156751 -0.485651 0.517662
+v -0.106299 -0.467348 0.612794
+v -0.177713 -0.464461 0.527871
+v -0.0640756 -0.481225 0.60015
+v -0.0486808 -0.462533 0.655449
+v -0.0674919 -0.459184 0.218582
+v -0.129769 -0.445891 0.210992
+v -0.0638357 -0.444618 0.15006
+v -0.160168 -0.471758 0.313909
+v -0.192938 -0.45045 0.307624
+v -0.106971 -0.518553 0.416032
+v -0.0913755 -0.507594 0.518132
+v -0.0906862 -0.492986 0.310407
+v -0.152295 -0.0598695 0.957881
+v -0.109744 -0.0412763 0.976341
+v -0.148938 -0.0188178 0.950827
+v -0.133131 -0.0374762 0.965091
+v -0.214922 -0.0472768 0.917853
+v -0.129144 -0.123186 0.961976
+v -0.202946 -0.113321 0.930645
+v -0.124777 0.0448234 0.926767
+v -0.189431 0.0176502 0.907717
+v -0.0541294 -0.000582154 0.980957
+v -0.0627521 0.0592134 0.940397
+v -0.0544734 -0.0758718 0.989219
+v -0.0631528 -0.123094 0.979468
+v -0.0717681 -0.0690106 0.994894
+v -0.052795 -0.0725924 1.01945
+v -0.0716307 -0.0432 1.00541
+v -0.0809297 -0.0558501 0.997263
+v -0.0297219 -0.0653048 1.04059
+v -0.0406958 -0.0395224 1.01839
+v -0.0297587 -0.0908237 1.02001
+v -0.0407812 -0.0765567 1.00299
+v -0.0264068 -0.0851806 1.03686
+v 0.313121 -0.40468 0.424303
+v 0.289638 -0.411984 0.363044
+v 0.317818 -0.392423 0.305485
+v 0.341299 -0.375944 0.366232
+v 0.317288 -0.397295 0.364448
+v 0.23935 -0.43199 0.361842
+v 0.251544 -0.422435 0.302332
+v 0.261632 -0.427221 0.422701
+v 0.262332 -0.422664 0.362137
+v 0.2099 -0.457622 0.202318
+v 0.274846 -0.418754 0.252658
+v 0.194817 -0.437402 0.239358
+v 0.231211 -0.430966 0.245486
+v 0.350284 -0.364453 0.244783
+v 0.312495 -0.42258 0.208415
+v 0.316601 -0.396416 0.250199
+v 0.328162 -0.387696 0.483083
+v 0.291362 -0.413379 0.536815
+v 0.275025 -0.421949 0.482067
+v 0.303426 -0.408222 0.482265
+v 0.26979 -0.455493 0.624003
+v 0.236564 -0.444228 0.584476
+v 0.315111 -0.397679 0.592146
+v 0.280089 -0.425608 0.586435
+v 0.156119 -0.458526 0.595789
+v 0.222989 -0.439421 0.53835
+v 0.164011 -0.483033 0.631406
+v 0.191143 -0.453861 0.591118
+v 0.225399 -0.440193 0.480871
+v 0.247502 -0.431674 0.481825
+v 0.300873 -0.335808 -0.11045
+v 0.23546 -0.269794 -0.12322
+v 0.283535 -0.198083 -0.11242
+v 0.339519 -0.257182 -0.0802831
+v 0.292915 -0.260195 -0.110308
+v 0.119478 -0.295776 -0.113872
+v 0.173106 -0.230478 -0.142628
+v 0.176383 -0.34915 -0.10631
+v 0.174532 -0.282877 -0.121531
+v 0.172014 -0.141551 -0.170002
+v 0.224047 -0.163495 -0.142015
+v 0.123113 -0.206388 -0.170045
+v 0.171621 -0.184999 -0.160037
+v 0.318497 -0.140059 -0.0746981
+v 0.26898 -0.103901 -0.112945
+v 0.274851 -0.146856 -0.114171
+v 0.307808 -0.00318692 -0.0317095
+v 0.269878 -0.0318881 -0.0928838
+v 0.232462 0.00608204 -0.106821
+v 0.273281 0.0420733 -0.0438498
+v 0.272342 0.00331308 -0.0724007
+v 0.222802 -0.0788504 -0.141507
+v 0.308256 -0.0631453 -0.0619102
+v 0.26804 -0.0666021 -0.106124
+v 0.131964 -0.111136 -0.188613
+v 0.182883 -0.0457092 -0.157469
+v 0.176218 -0.0952439 -0.168768
+v 0.199052 0.0623656 -0.105989
+v 0.148915 0.00578811 -0.165362
+v 0.190658 0.0074224 -0.137241
+v 0.30851 -0.340088 0.875405
+v 0.263105 -0.289222 0.900577
+v 0.214141 -0.334681 0.919903
+v 0.26887 -0.396352 0.909752
+v 0.266995 -0.336923 0.904292
+v 0.253262 -0.209214 0.897882
+v 0.215216 -0.244729 0.920604
+v 0.293898 -0.252639 0.867139
+v 0.258195 -0.248301 0.898482
+v 0.126656 -0.240414 0.945024
+v 0.164801 -0.284207 0.93026
+v 0.172003 -0.202637 0.93974
+v 0.169955 -0.242358 0.935335
+v 0.149733 -0.3934 0.920339
+v 0.104904 -0.328487 0.922781
+v 0.15673 -0.332639 0.924128
+v 0.358457 -0.200112 -0.0267162
+v 0.380901 -0.277243 0.032671
+v 0.375987 -0.340814 -0.0337268
+v 0.367895 -0.263853 -0.0306343
+v 0.342627 -0.106257 -0.015664
+v 0.369603 -0.166291 0.0406057
+v 0.349444 -0.148083 -0.0217878
+v 0.385185 -0.214712 0.178025
+v 0.382253 -0.237542 0.105277
+v 0.379025 -0.149836 0.112764
+v 0.38088 -0.190046 0.108528
+v 0.379176 -0.364086 0.102343
+v 0.375596 -0.311145 0.175939
+v 0.381399 -0.294389 0.103665
+v 0.363729 -0.333817 0.306674
+v 0.367965 -0.307651 0.370431
+v 0.351404 -0.354331 0.427814
+v 0.35769 -0.345276 0.36828
+v 0.379525 -0.278609 0.243679
+v 0.369702 -0.323364 0.243084
+v 0.386398 -0.194374 0.247084
+v 0.380079 -0.252636 0.309953
+v 0.384425 -0.235652 0.245143
+v 0.366553 -0.273812 0.432936
+v 0.376188 -0.22098 0.374406
+v 0.373629 -0.265433 0.372527
+v 0.363531 -0.0923241 0.440867
+v 0.375379 -0.132559 0.377674
+v 0.376221 -0.0834201 0.319025
+v 0.359143 -0.0454376 0.384517
+v 0.369754 -0.0887861 0.380331
+v 0.383633 -0.167556 0.313214
+v 0.370145 -0.182621 0.437038
+v 0.377148 -0.176656 0.375911
+v 0.386429 -0.137312 0.183762
+v 0.385994 -0.115397 0.252169
+v 0.387444 -0.154662 0.24911
+v 0.369328 -0.0337485 0.265179
+v 0.380073 -0.0641595 0.195141
+v 0.380479 -0.0754649 0.257209
+v 0.331012 0.0171674 0.0389716
+v 0.357964 -0.0317511 0.0731655
+v 0.338487 -0.0443826 0.00235778
+v 0.336366 -0.0157204 0.0175237
+v 0.37698 -0.0821329 0.123634
+v 0.363023 -0.0117289 0.147082
+v 0.372551 -0.0487428 0.133083
+v 0.363023 -0.0919062 0.0524717
+v 0.378431 -0.114848 0.117331
+v 0.339774 -0.073126 -0.00816016
+v 0.336842 -0.40362 0.835552
+v 0.340911 -0.350598 0.77666
+v 0.324728 -0.29766 0.82767
+v 0.331904 -0.344898 0.831349
+v 0.334716 -0.360298 0.654176
+v 0.340676 -0.308781 0.717453
+v 0.335933 -0.414813 0.716014
+v 0.339263 -0.356428 0.715872
+v 0.338685 -0.226798 0.720092
+v 0.331546 -0.262105 0.774295
+v 0.344609 -0.271269 0.661246
+v 0.339619 -0.266826 0.718796
+v 0.311718 -0.219243 0.822794
+v 0.317327 -0.257308 0.824743
+v 0.282982 -0.0932074 0.86163
+v 0.309753 -0.137557 0.821642
+v 0.32815 -0.0982774 0.774453
+v 0.304296 -0.0566442 0.818942
+v 0.309064 -0.0966912 0.82069
+v 0.328063 -0.182474 0.773912
+v 0.284251 -0.173813 0.864211
+v 0.309915 -0.178867 0.822052
+v 0.348891 -0.186539 0.666645
+v 0.342261 -0.141944 0.72292
+v 0.340465 -0.184932 0.72153
+v 0.335097 -0.0554519 0.724795
+v 0.349859 -0.0980743 0.670701
+v 0.341372 -0.0985454 0.724072
+v 0.350887 -0.0507559 0.502911
+v 0.3576 -0.096426 0.55865
+v 0.364199 -0.140124 0.498352
+v 0.360004 -0.0947951 0.500374
+v 0.356689 -0.14249 0.613383
+v 0.34723 -0.0531297 0.617509
+v 0.354744 -0.0974434 0.615492
+v 0.35206 -0.232064 0.608144
+v 0.360088 -0.187609 0.554338
+v 0.355112 -0.187591 0.610995
+v 0.363053 -0.232129 0.494204
+v 0.364779 -0.186112 0.496433
+v 0.354812 -0.320109 0.488079
+v 0.338808 -0.358381 0.541229
+v 0.344689 -0.357576 0.485189
+v 0.354746 -0.277669 0.548235
+v 0.360332 -0.277544 0.491251
+v 0.344841 -0.317709 0.600763
+v 0.349578 -0.27523 0.604646
+v 0.335024 -0.360119 0.596759
+v 0.253104 0.203435 0.118078
+v 0.22988 0.247545 0.157829
+v 0.250535 0.219935 0.200112
+v 0.270021 0.183796 0.151405
+v 0.251096 0.217673 0.158942
+v 0.173937 0.295795 0.151795
+v 0.201028 0.279793 0.204606
+v 0.208445 0.255016 0.0997486
+v 0.204742 0.274117 0.153286
+v 0.19325 0.264928 0.298974
+v 0.224342 0.248556 0.249012
+v 0.166122 0.2964 0.255682
+v 0.197174 0.276415 0.252836
+v 0.273837 0.172969 0.230557
+v 0.247672 0.201215 0.285394
+v 0.249328 0.213762 0.242242
+v 0.269047 0.115689 0.415447
+v 0.243641 0.166107 0.375362
+v 0.21248 0.178956 0.429644
+v 0.238934 0.137098 0.472953
+v 0.241351 0.149381 0.423
+v 0.218341 0.218477 0.337805
+v 0.272811 0.14513 0.319056
+v 0.245762 0.184358 0.32963
+v 0.157504 0.266542 0.348704
+v 0.185529 0.224498 0.389084
+v 0.189327 0.246276 0.344018
+v 0.178799 0.187723 0.48375
+v 0.149393 0.222637 0.439843
+v 0.181979 0.203634 0.435289
+v 0.235856 0.0765799 0.805621
+v 0.217701 0.11212 0.775527
+v 0.188626 0.112308 0.821548
+v 0.212012 0.0755801 0.841201
+v 0.213324 0.0965225 0.811853
+v 0.226814 0.130295 0.684497
+v 0.195015 0.146226 0.738197
+v 0.249108 0.0961373 0.728207
+v 0.222367 0.12354 0.732548
+v 0.136442 0.179619 0.750387
+v 0.163968 0.1484 0.789706
+v 0.169138 0.174847 0.695193
+v 0.166543 0.164739 0.744396
+v 0.159999 0.0987976 0.865373
+v 0.13257 0.138159 0.837992
+v 0.161693 0.126272 0.830304
+v 0.207042 0.158534 0.530777
+v 0.233686 0.131486 0.579487
+v 0.264402 0.101968 0.520397
+v 0.236433 0.132176 0.525634
+v 0.143538 0.198754 0.539805
+v 0.173735 0.178964 0.589175
+v 0.176115 0.180806 0.535582
+v 0.201599 0.157166 0.637907
+v 0.139858 0.195421 0.647569
+v 0.171473 0.178284 0.642915
+v 0.258575 0.102323 0.628487
+v 0.230534 0.131902 0.632959
+v 0.34742 0.00319315 0.333602
+v 0.320929 0.0389878 0.398635
+v 0.337836 -0.00655256 0.448549
+v 0.342419 -0.00261151 0.390867
+v 0.351215 0.0204208 0.221415
+v 0.327429 0.0566863 0.291075
+v 0.350974 0.0108683 0.277027
+v 0.298744 0.0894187 0.355449
+v 0.30121 0.115034 0.257783
+v 0.300703 0.102007 0.305644
+v 0.293232 0.0711038 0.460405
+v 0.296022 0.0786559 0.407077
+v 0.328122 -0.0112347 0.673608
+v 0.30029 0.0266191 0.725054
+v 0.308985 -0.0158364 0.772679
+v 0.320738 -0.0133772 0.724898
+v 0.267341 0.0573644 0.769829
+v 0.281859 0.0674534 0.67711
+v 0.275748 0.0634772 0.725933
+v 0.24017 0.0457536 0.838346
+v 0.274888 0.0183198 0.812257
+v 0.256292 0.0501447 0.807253
+v 0.268509 -0.018423 0.854481
+v 0.291897 -0.0177918 0.816142
+v 0.181753 0.135303 -0.0848536
+v 0.213035 0.170022 -0.00953204
+v 0.239418 0.102195 -0.0351077
+v 0.20757 0.117335 -0.0616182
+v 0.236523 0.195781 0.0609139
+v 0.190293 0.237055 0.0258931
+v 0.212268 0.218116 0.0448664
+v 0.276588 0.155574 0.0981021
+v 0.265278 0.133372 0.0353559
+v 0.258648 0.173682 0.0768666
+v 0.298292 0.0729821 0.0271766
+v 0.270541 0.0874645 -0.00640695
+v 0.311113 0.0304905 0.622016
+v 0.288873 0.0678482 0.57056
+v 0.286019 0.0682635 0.624906
+v 0.334099 -0.00961306 0.563635
+v 0.332102 -0.0102295 0.619617
+v 0.315185 0.0311388 0.510574
+v 0.335537 -0.00863692 0.506376
+v 0.291063 0.0681481 0.515313
+v 0.33595 0.0447295 0.115485
+v 0.324423 0.0791082 0.189745
+v 0.346277 0.0318576 0.16704
+v 0.305188 0.103086 0.100412
+v 0.320025 0.0585873 0.0683354
+v 0.295396 0.137443 0.169109
+v 0.288286 0.145212 0.129998
+v 0.299573 0.127115 0.211987
+v 0.149034 0.119255 -0.152767
+v 0.167034 0.18223 -0.122798
+v 0.177314 0.204818 -0.0510199
+v 0.168725 0.158519 -0.103729
+v 0.194921 0.221357 -0.173014
+v 0.177164 0.238112 -0.093383
+v 0.160058 0.168229 -0.195838
+v 0.175228 0.201681 -0.146613
+v 0.149721 0.3008 -0.00470029
+v 0.167426 0.259011 -0.0241111
+v 0.17918 0.290497 -0.0609743
+v 0.168557 0.27236 -0.0447148
+v 0.159665 0.285267 0.0452783
+v 0.175009 0.248965 0.00116269
+v 0.233098 0.21084 -0.267301
+v 0.26422 0.274293 -0.228207
+v 0.21819 0.281995 -0.133552
+v 0.227725 0.245742 -0.199837
+v 0.314639 0.343639 -0.292523
+v 0.27622 0.344814 -0.176785
+v 0.306638 0.268641 -0.338166
+v 0.294985 0.306469 -0.259249
+v 0.21453 0.407775 -0.0317089
+v 0.225521 0.344757 -0.0845003
+v 0.267222 0.417414 -0.116444
+v 0.249809 0.379204 -0.0978893
+v 0.173263 0.341693 -0.0238379
+v 0.200073 0.314908 -0.0732157
+v 0.192666 0.159829 -0.308601
+v 0.240251 0.15911 -0.387649
+v 0.279797 0.207117 -0.366981
+v 0.238726 0.181983 -0.331175
+v 0.213461 0.12942 -0.465108
+v 0.265715 0.160342 -0.469212
+v 0.195451 0.128497 -0.407372
+v 0.233311 0.142158 -0.432916
+v 0.305787 0.218563 -0.547441
+v 0.307399 0.208213 -0.46356
+v 0.259412 0.1698 -0.542722
+v 0.290901 0.186126 -0.509143
+v 0.330786 0.26947 -0.445487
+v 0.31171 0.235804 -0.406418
+v 0.32675 0.280027 -0.535933
+v 0.321008 0.352655 -0.511417
+v 0.329541 0.345256 -0.410956
+v 0.333345 0.309542 -0.480189
+v 0.272516 0.239582 -0.609537
+v 0.296456 0.295786 -0.60247
+v 0.307295 0.256707 -0.578835
+v 0.240548 0.35766 -0.62796
+v 0.289278 0.358786 -0.584918
+v 0.246645 0.312827 -0.640337
+v 0.274305 0.331026 -0.617491
+v 0.295245 0.436762 -0.484187
+v 0.255976 0.429549 -0.563622
+v 0.295399 0.395443 -0.540067
+v 0.335047 -0.66747 -0.026194
+v 0.288313 -0.71953 0.0292002
+v 0.233667 -0.718383 -0.0394682
+v 0.288912 -0.666279 -0.0831981
+v 0.288523 -0.704927 -0.0329901
+v 0.276152 -0.679477 0.14116
+v 0.227442 -0.724741 0.0933852
+v 0.327044 -0.674066 0.0872825
+v 0.281161 -0.710704 0.0910758
+v 0.136631 -0.686121 0.0787634
+v 0.172796 -0.723594 0.0247169
+v 0.182766 -0.687312 0.135767
+v 0.176191 -0.716151 0.0857864
+v 0.187443 -0.669698 -0.0818819
+v 0.136552 -0.67511 -0.0280042
+v 0.179633 -0.708233 -0.0350267
+v 0.2461 -0.678205 0.889508
+v 0.198896 -0.723335 0.85193
+v 0.248642 -0.724519 0.790895
+v 0.288217 -0.679435 0.838379
+v 0.246831 -0.711912 0.845432
+v 0.113033 -0.684171 0.844965
+v 0.148661 -0.727347 0.795626
+v 0.15574 -0.680004 0.892121
+v 0.151829 -0.713932 0.849146
+v 0.157976 -0.694713 0.696986
+v 0.198408 -0.728532 0.734591
+v 0.115859 -0.693483 0.748115
+v 0.153047 -0.720555 0.741401
+v 0.283587 -0.684515 0.738785
+v 0.24088 -0.688682 0.691628
+v 0.244434 -0.716483 0.736358
+v 0.342489 -0.483419 0.78141
+v 0.32282 -0.565952 0.726198
+v 0.30391 -0.498066 0.668956
+v 0.331237 -0.488983 0.719836
+v 0.31354 -0.474577 0.887895
+v 0.331331 -0.556659 0.842236
+v 0.337524 -0.478425 0.840051
+v 0.319732 -0.633454 0.786428
+v 0.294197 -0.618974 0.884469
+v 0.315639 -0.625673 0.839496
+v 0.28283 -0.632563 0.691009
+v 0.308326 -0.632734 0.733956
+v 0.0634711 -0.431765 0.851899
+v 0.0654567 -0.515878 0.79543
+v 0.0774719 -0.570173 0.85133
+v 0.0969194 -0.478778 0.897495
+v 0.0707234 -0.496321 0.852392
+v 0.106684 -0.534636 0.682874
+v 0.0832655 -0.591144 0.740087
+v 0.069956 -0.47422 0.735292
+v 0.0773661 -0.528459 0.735056
+v 0.0840444 -0.64607 0.797118
+v 0.119063 -0.647679 0.702012
+v 0.0943475 -0.648249 0.746058
+v 0.112805 -0.624088 0.888986
+v 0.0899111 -0.634521 0.847274
+v 0.252371 -0.581833 0.652832
+v 0.208287 -0.52835 0.637223
+v 0.260651 -0.513618 0.642398
+v 0.20034 -0.651356 0.660475
+v 0.244653 -0.641994 0.666514
+v 0.15335 -0.595644 0.66033
+v 0.156436 -0.650983 0.672896
+v 0.153645 -0.534978 0.650489
+v 0.149543 -0.554429 0.922812
+v 0.209392 -0.47177 0.928314
+v 0.147798 -0.472236 0.922294
+v 0.203436 -0.628169 0.923071
+v 0.153584 -0.625261 0.914716
+v 0.262868 -0.552296 0.920108
+v 0.253551 -0.622939 0.912658
+v 0.267734 -0.472455 0.917082
+v 0.348561 -0.464054 0.158716
+v 0.369537 -0.538419 0.0961211
+v 0.387436 -0.440321 0.0296682
+v 0.376444 -0.45013 0.0999599
+v 0.368236 -0.614976 0.0306267
+v 0.325558 -0.615423 0.139592
+v 0.354786 -0.614851 0.0904298
+v 0.340742 -0.597899 -0.0798229
+v 0.378455 -0.525308 -0.0356201
+v 0.364087 -0.605614 -0.0296813
+v 0.354445 -0.427851 -0.0878125
+v 0.380964 -0.432504 -0.0361787
+v 0.243518 -0.430642 -0.121062
+v 0.182701 -0.527846 -0.109937
+v 0.12704 -0.448099 -0.0735785
+v 0.179139 -0.436248 -0.106528
+v 0.304443 -0.521287 -0.117606
+v 0.305291 -0.427537 -0.115465
+v 0.241923 -0.609859 -0.117359
+v 0.296601 -0.602967 -0.109127
+v 0.141316 -0.607217 -0.0730372
+v 0.186967 -0.607615 -0.104287
+v 0.148115 -0.512563 0.149929
+v 0.188536 -0.577394 0.174934
+v 0.25113 -0.502745 0.197094
+v 0.196177 -0.511769 0.183546
+v 0.233064 -0.638226 0.175543
+v 0.145237 -0.635178 0.13052
+v 0.185474 -0.638484 0.162245
+v 0.293784 -0.559568 0.181063
+v 0.282371 -0.626877 0.167977
+v 0.304357 -0.484216 0.189956
+v 0.105712 -0.548895 -0.0268432
+v 0.0997946 -0.492468 0.0352712
+v 0.0953701 -0.405611 -0.0214782
+v 0.102107 -0.469936 -0.0239275
+v 0.106751 -0.633108 0.0275567
+v 0.114552 -0.619641 -0.0267392
+v 0.113288 -0.573273 0.0910993
+v 0.117988 -0.635837 0.0828
+v 0.112515 -0.451919 0.0935688
+v 0.115559 -0.506404 0.0968639
+v 0.271184 0.525759 -0.460079
+v 0.285353 0.520853 -0.361551
+v 0.308658 0.43336 -0.368305
+v 0.291391 0.478507 -0.418527
+v 0.278055 0.600765 -0.320108
+v 0.282577 0.535883 -0.259681
+v 0.267 0.601931 -0.388185
+v 0.278645 0.563976 -0.33172
+v 0.247008 0.54474 -0.114114
+v 0.271214 0.498067 -0.167977
+v 0.277875 0.55689 -0.190965
+v 0.271665 0.531425 -0.180446
+v 0.298509 0.427522 -0.236282
+v 0.235335 0.485531 -0.0647615
+v 0.272045 0.45855 -0.143225
+v 0.317802 0.38717 -0.32759
+v 0.192626 0.647576 -0.125774
+v 0.267679 0.610284 -0.129549
+v 0.326452 0.645309 -0.138504
+v 0.252363 0.682167 -0.15033
+v 0.258116 0.645451 -0.13607
+v 0.289471 0.569074 -0.162469
+v 0.324346 0.587714 -0.15672
+v 0.239082 0.578957 -0.111996
+v 0.279361 0.583208 -0.137022
+v 0.401583 0.612716 -0.19176
+v 0.380321 0.61857 -0.152413
+v 0.35448 0.589213 -0.202564
+v 0.367842 0.597467 -0.17194
+v 0.388682 0.680406 -0.144893
+v 0.430349 0.658277 -0.154382
+v 0.384792 0.647908 -0.143704
+v 0.294439 0.591849 -0.266039
+v 0.318359 0.584666 -0.230094
+v 0.294318 0.572729 -0.209871
+v 0.28588 0.662049 -0.349089
+v 0.312606 0.638668 -0.319124
+v 0.290372 0.624108 -0.317788
+v 0.375881 0.662505 -0.310821
+v 0.343967 0.620068 -0.286887
+v 0.345079 0.681327 -0.333156
+v 0.34177 0.649107 -0.318477
+v 0.380591 0.614582 -0.245535
+v 0.347363 0.598425 -0.244565
+v 0.393743 0.706765 -0.3123
+v 0.444197 0.711394 -0.263256
+v 0.419572 0.658706 -0.261892
+v 0.412958 0.685943 -0.291128
+v 0.452589 0.744444 -0.206816
+v 0.471552 0.708579 -0.199184
+v 0.43241 0.746455 -0.258303
+v 0.460792 0.730831 -0.231063
+v 0.447289 0.655956 -0.196803
+v 0.447397 0.702392 -0.15815
+v 0.460337 0.681448 -0.17325
+v 0.417303 0.633713 -0.225256
+v 0 0.324094 0.180165
+v 0.0462436 0.320164 0.221337
+v 0.0925539 0.320139 0.170699
+v 0.0467382 0.325284 0.136739
+v 0.0468991 0.323707 0.17766
+v 0.0434892 0.30185 0.311934
+v 0.0886491 0.312396 0.263435
+v 0 0.312812 0.267846
+v 0.0450231 0.313323 0.266515
+v 0.132936 0.312061 0.210485
+v 0.125628 0.296564 0.306829
+v 0.129481 0.307575 0.259519
+v 0.135299 0.30832 0.112598
+v 0.13572 0.310986 0.159838
+v 0 0.228164 0.547798
+v 0.0364517 0.2252 0.601479
+v 0.0735327 0.221293 0.545706
+v 0.0377827 0.232065 0.495188
+v 0.0369868 0.226526 0.54726
+v 0.0357781 0.218679 0.710249
+v 0.0717153 0.218255 0.65442
+v 0 0.22586 0.657001
+v 0.0360809 0.22396 0.656319
+v 0.107651 0.210086 0.597079
+v 0.105384 0.203917 0.704755
+v 0.106456 0.208743 0.651454
+v 0.111325 0.219111 0.491389
+v 0.109197 0.212143 0.543201
+v 0 0.20702 0.762635
+v 0.035033 0.18494 0.809623
+v 0.0703592 0.199987 0.759265
+v 0.0354469 0.205232 0.761742
+v 0.0337403 0.129119 0.889852
+v 0.0684286 0.154761 0.849486
+v 0 0.160642 0.853891
+v 0.0344822 0.159128 0.852718
+v 0.10286 0.172311 0.802347
+v 0.0994805 0.118705 0.880737
+v 0.101303 0.147713 0.844481
+v 0.104202 0.191409 0.755412
+v 0 -0.422236 0.0297518
+v 0.0290027 -0.401336 0.00260959
+v 0.0619855 -0.419454 0.0516405
+v 0.032544 -0.433336 0.0684456
+v 0.0312142 -0.420036 0.03584
+v 0.024587 -0.350255 -0.0669927
+v 0.0524989 -0.374486 -0.0268658
+v 0 -0.38147 -0.0341137
+v 0.0265726 -0.377866 -0.0315624
+v 0.0804386 -0.407577 0.025012
+v 0.0781392 -0.343402 -0.0680291
+v 0.0771327 -0.378765 -0.022981
+v 0.101981 -0.43742 0.118873
+v 0.0918712 -0.426509 0.0746889
+v 0 -0.369655 0.853165
+v 0.0162142 -0.396858 0.808469
+v 0.032518 -0.377955 0.851217
+v 0.0173499 -0.340897 0.891348
+v 0.0161103 -0.370733 0.852319
+v 0.0190571 -0.436735 0.723869
+v 0.0352263 -0.426311 0.757202
+v 0 -0.417485 0.766409
+v 0.0173127 -0.418962 0.764189
+v 0.0473693 -0.423079 0.799815
+v 0.0641499 -0.454693 0.696538
+v 0.0543417 -0.44245 0.745122
+v 0.0576974 -0.360469 0.893822
+v 0.0495204 -0.395304 0.851292
+v 0 0.284012 0.356829
+v 0.0403407 0.264584 0.4009
+v 0.08278 0.283223 0.354838
+v 0.0418935 0.284411 0.356338
+v 0.0772361 0.242537 0.445498
+v 0 0.24666 0.447191
+v 0.0389355 0.245943 0.446792
+v 0.117787 0.25622 0.397308
+v 0.114267 0.235186 0.443214
+v 0.121652 0.278062 0.352302
+v 0 0.337744 0.0495788
+v 0.0441574 0.329119 0.0693112
+v 0.084777 0.331134 0.0392602
+v 0.0448664 0.351036 0.0368917
+v 0.0436281 0.336529 0.047541
+v 0.0891709 0.320583 0.0906503
+v 0 0.327327 0.102505
+v 0.0455092 0.326229 0.0998335
+v 0.122579 0.311429 0.0436831
+v 0.129137 0.308034 0.0737864
+v 0.125569 0.33518 0.0110101
+v 0.120968 0.319807 0.0225689
+v 0 0.450581 0.0596803
+v 0.0541098 0.40816 0.0483714
+v 0.116503 0.442256 0.0399308
+v 0.0632284 0.496693 0.0522852
+v 0.0593708 0.448955 0.0551193
+v 0.0954346 0.370179 0.029827
+v 0 0.37634 0.0420973
+v 0.0488177 0.375217 0.039732
+v 0.153438 0.391277 0.0121446
+v 0.13765 0.35956 0.0095734
+v 0.18104 0.470039 0.00485434
+v 0.169159 0.42866 0.0126096
+v 0 0.570891 -0.534672
+v 0.0626485 0.640189 -0.506046
+v 0.130136 0.568004 -0.528764
+v 0.0692761 0.505582 -0.562042
+v 0.0664738 0.570216 -0.533525
+v 0.0536794 0.762368 -0.447664
+v 0.113201 0.701726 -0.474505
+v 0 0.709164 -0.479338
+v 0.0582379 0.707055 -0.477971
+v 0.199905 0.685497 -0.462496
+v 0.174025 0.632377 -0.495223
+v 0.150533 0.744043 -0.44196
+v 0.161614 0.694175 -0.469576
+v 0.199187 0.49892 -0.545668
+v 0.232047 0.565216 -0.501918
+v 0.188173 0.564066 -0.51907
+v 0 0.376475 -0.648788
+v 0.0711811 0.413275 -0.623292
+v 0.138282 0.375681 -0.645424
+v 0.0726958 0.340957 -0.664799
+v 0.0716465 0.376815 -0.648416
+v 0.138821 0.452917 -0.589054
+v 0 0.45452 -0.593995
+v 0.0706181 0.454733 -0.593233
+v 0.199873 0.404748 -0.612935
+v 0.202195 0.446614 -0.579567
+v 0.193634 0.339499 -0.652866
+v 0.194895 0.370921 -0.637938
+v 0 -0.233549 0.959877
+v 0.0240468 -0.271835 0.943264
+v 0.057123 -0.235922 0.956621
+v 0.0306399 -0.195728 0.970285
+v 0.0277858 -0.234252 0.959139
+v 0.0425527 -0.311463 0.921053
+v 0 -0.306916 0.921405
+v 0.0202817 -0.307657 0.921165
+v 0.0802769 -0.277764 0.939515
+v 0.0688023 -0.319918 0.921674
+v 0.0956803 -0.199455 0.960676
+v 0.0895627 -0.238122 0.952018
+v 0 -0.322733 -0.103397
+v 0.0238367 -0.283103 -0.139843
+v 0.049547 -0.311956 -0.105817
+v 0.023709 -0.319134 -0.103998
+v 0.0267024 -0.190718 -0.197111
+v 0.0521008 -0.23408 -0.173683
+v 0 -0.242929 -0.170415
+v 0.0248684 -0.240765 -0.171792
+v 0.0816243 -0.264189 -0.144485
+v 0.0878074 -0.176513 -0.193941
+v 0.0840611 -0.222685 -0.173847
+v 0.0796433 -0.304819 -0.108871
+v 0 -0.132321 -0.213832
+v 0.0321871 -0.0691596 -0.222731
+v 0.0599751 -0.128388 -0.209467
+v 0.0292367 -0.131565 -0.213064
+v 0.0381981 0.0419961 -0.235526
+v 0.0713902 -0.00682142 -0.215572
+v 0 -0.00964083 -0.233734
+v 0.035269 -0.00935459 -0.229192
+v 0.101087 -0.0620719 -0.200364
+v 0.115674 0.055105 -0.187883
+v 0.109216 -0.00036684 -0.192884
+v 0.0937169 -0.121882 -0.201748
+v 0 0.0779149 -0.251636
+v 0.0428926 0.103386 -0.257977
+v 0.0803711 0.0844567 -0.225271
+v 0.0406901 0.0790388 -0.244814
+v 0.0470212 0.120443 -0.299626
+v 0.0884331 0.12573 -0.258533
+v 0 0.114136 -0.282485
+v 0.0449537 0.11665 -0.27594
+v 0.120922 0.124855 -0.209363
+v 0.138478 0.148595 -0.265069
+v 0.128964 0.142918 -0.232488
+v 0.118034 0.096215 -0.193935
+v 0 0.113801 -0.334664
+v 0.0518947 0.108853 -0.365753
+v 0.097999 0.124878 -0.319976
+v 0.0492429 0.116377 -0.329956
+v 0.0595919 0.101024 -0.449026
+v 0.107541 0.107031 -0.401861
+v 0 0.101009 -0.408276
+v 0.0552525 0.102268 -0.405836
+v 0.150911 0.126322 -0.356271
+v 0.16045 0.112915 -0.441079
+v 0.153901 0.116281 -0.399697
+v 0.145782 0.140078 -0.308863
+v 0 0.109177 -0.495145
+v 0.0709982 0.126709 -0.538642
+v 0.125551 0.112279 -0.490196
+v 0.065189 0.10952 -0.494143
+v 0.0790718 0.182988 -0.615599
+v 0.1475 0.153854 -0.575332
+v 0 0.151226 -0.580589
+v 0.0759743 0.151547 -0.579976
+v 0.194944 0.134806 -0.523346
+v 0.219301 0.191387 -0.600189
+v 0.210129 0.15949 -0.56385
+v 0.176258 0.119186 -0.482359
+v 0 0.219576 -0.64342
+v 0.0775425 0.260206 -0.661002
+v 0.154044 0.222564 -0.638941
+v 0.0792455 0.219987 -0.642965
+v 0.144378 0.302534 -0.665208
+v 0 0.300969 -0.668909
+v 0.0750102 0.301309 -0.668637
+v 0.213772 0.26767 -0.647803
+v 0.202463 0.304851 -0.656009
+v 0.21995 0.228648 -0.628685
+v 0.126715 0.537337 0.0111468
+v 0.0606884 0.60785 0.00104436
+v 0 0.554408 0.0408567
+v 0.0643107 0.550463 0.0321785
+v 0.191637 0.550233 -0.0477524
+v 0.185308 0.513743 -0.0172354
+v 0.171165 0.605799 -0.0764289
+v 0.209701 0.569968 -0.0807895
+v 0.126096 0.69557 -0.127071
+v 0.0987466 0.656786 -0.062447
+v 0.142824 0.650926 -0.0969906
+v 0 0.672219 -0.0228265
+v 0.0403517 0.7183 -0.0719016
+v 0.0504326 0.666442 -0.034872
+v 0.237614 0.630468 -0.469873
+v 0.258514 0.602197 -0.445199
+v 0.253217 0.578269 -0.474783
+v 0.212896 0.714905 -0.428435
+v 0.240943 0.662224 -0.43894
+v 0.224501 0.676789 -0.452578
+v 0.246529 0.67248 -0.397952
+v 0.254771 0.635976 -0.4207
+v 0.226436 0.912723 -0.26973
+v 0.21544 0.938681 -0.279423
+v 0.201014 0.943338 -0.25145
+v 0.214224 0.916672 -0.246046
+v 0.216965 0.931825 -0.261167
+v 0.196917 0.927676 -0.311546
+v 0.191876 0.948989 -0.288378
+v 0.216862 0.918936 -0.303965
+v 0.207634 0.937732 -0.29671
+v 0.1537 0.943252 -0.269054
+v 0.17745 0.953646 -0.260405
+v 0.165912 0.939303 -0.292739
+v 0.173109 0.951035 -0.277835
+v 0.179801 0.930945 -0.23207
+v 0.159856 0.939686 -0.239651
+v 0.180783 0.946411 -0.244635
+v 0.207814 0.759869 -0.30299
+v 0.191438 0.774734 -0.312022
+v 0.199084 0.77337 -0.281405
+v 0.209996 0.761876 -0.268358
+v 0.201734 0.767127 -0.28955
+v 0.161743 0.793141 -0.342758
+v 0.178221 0.788374 -0.325575
+v 0.180956 0.7796 -0.34144
+v 0.178357 0.783728 -0.331262
+v 0.18742 0.811743 -0.321532
+v 0.192387 0.789092 -0.301842
+v 0.165819 0.804418 -0.335288
+v 0.181068 0.796668 -0.322844
+v 0.202175 0.775482 -0.251997
+v 0.203233 0.797472 -0.273932
+v 0.199221 0.78193 -0.2764
+v 0.181336 0.804317 -0.228606
+v 0.151415 0.801648 -0.215482
+v 0.168503 0.781447 -0.210541
+v 0.192516 0.777938 -0.227759
+v 0.174268 0.789156 -0.220565
+v 0.108345 0.822919 -0.221075
+v 0.12229 0.805309 -0.211386
+v 0.135327 0.828572 -0.217894
+v 0.128262 0.813355 -0.215464
+v 0.103071 0.800257 -0.198013
+v 0.137434 0.789176 -0.20042
+v 0.0963023 0.812563 -0.21585
+v 0.114773 0.801816 -0.205592
+v 0.186175 0.766882 -0.204739
+v 0.153638 0.772539 -0.191376
+v 0.162292 0.776737 -0.200861
+v 0.13845 0.801677 -0.350685
+v 0.119159 0.809804 -0.331351
+v 0.143329 0.804627 -0.337252
+v 0.140844 0.802002 -0.341998
+v 0.086003 0.820458 -0.291304
+v 0.10501 0.817882 -0.310157
+v 0.0934554 0.817549 -0.318853
+v 0.100185 0.816042 -0.313185
+v 0.116614 0.840248 -0.308051
+v 0.127696 0.819206 -0.325202
+v 0.0961567 0.829666 -0.288835
+v 0.109848 0.825231 -0.308777
+v 0.153713 0.826966 -0.334024
+v 0.147191 0.8121 -0.335056
+v 0.0710179 0.824335 -0.270682
+v 0.0780919 0.822576 -0.248928
+v 0.084054 0.824638 -0.268864
+v 0.0786961 0.822795 -0.269513
+v 0.0899479 0.821575 -0.232115
+v 0.0751675 0.819879 -0.228529
+v 0.084018 0.819324 -0.230668
+v 0.102029 0.843892 -0.232989
+v 0.0888081 0.831892 -0.249223
+v 0.095203 0.828981 -0.232883
+v 0.0959818 0.847028 -0.267898
+v 0.0891348 0.832003 -0.268429
+v 0.208076 0.764415 -0.241789
+v 0.226075 0.759994 -0.240858
+v 0.231902 0.754223 -0.26947
+v 0.216972 0.759952 -0.252956
+v 0.20189 0.765574 -0.22074
+v 0.201854 0.770051 -0.234112
+v 0.215759 0.757673 -0.200462
+v 0.223444 0.760451 -0.217689
+v 0.205912 0.761992 -0.209879
+v 0.264144 0.760816 -0.243923
+v 0.265103 0.760852 -0.220189
+v 0.240714 0.760641 -0.229576
+v 0.278995 0.751255 -0.279238
+v 0.277858 0.727498 -0.319523
+v 0.234872 0.742799 -0.306286
+v 0.256224 0.744786 -0.293184
+v 0.311167 0.763685 -0.236173
+v 0.343965 0.756617 -0.269721
+v 0.300161 0.759231 -0.259113
+v 0.37536 0.738148 -0.30116
+v 0.402685 0.758643 -0.236201
+v 0.390758 0.753705 -0.27032
+v 0.317986 0.708047 -0.333577
+v 0.358205 0.712515 -0.324745
+v 0.258236 0.693552 -0.356883
+v 0.284726 0.698216 -0.343913
+v 0.183529 0.754714 -0.188935
+v 0.248651 0.737773 -0.18066
+v 0.277376 0.753251 -0.197126
+v 0.23327 0.750984 -0.191422
+v 0.329845 0.712932 -0.159268
+v 0.178779 0.718135 -0.16643
+v 0.252112 0.713887 -0.166078
+v 0.409003 0.736167 -0.174314
+v 0.39942 0.710994 -0.155057
+v 0.352617 0.756796 -0.208565
+v 0.409429 0.752424 -0.202781
+v 0.303456 0.759588 -0.213776
+v 0.0705006 0.811753 -0.205807
+v 0.0602661 0.797425 -0.177797
+v 0.106741 0.782082 -0.178969
+v 0.0845451 0.798015 -0.188581
+v 0 0.80352 -0.16013
+v 0.0359638 0.781745 -0.13792
+v 0.0258744 0.814456 -0.19275
+v 0.0313039 0.800824 -0.16616
+v 0.0771688 0.744134 -0.128582
+v 0 0.760454 -0.0953915
+v 0.0372542 0.755486 -0.106376
+v 0.128352 0.75368 -0.16956
+v 0.122404 0.729951 -0.151266
+v 0.140544 0.766364 -0.181936
+v 0.0269807 0.832763 -0.306956
+v 0.0647564 0.824008 -0.337021
+v 0.0438017 0.818098 -0.377412
+v 0 0.831312 -0.342444
+v 0.0352238 0.828719 -0.341387
+v 0.105446 0.812324 -0.352131
+v 0.0673729 0.8251 -0.299446
+v 0.0829065 0.820245 -0.328154
+v 0.163948 0.787613 -0.366372
+v 0.134148 0.794627 -0.38492
+v 0.134862 0.801108 -0.364702
+v 0.0968568 0.789376 -0.411772
+v 0.176513 0.760515 -0.410655
+v 0.140375 0.776566 -0.412207
+v 0 0.800708 -0.414889
+v 0.0494105 0.797681 -0.413491
+v 0.0431316 0.822381 -0.222498
+v 0.0209585 0.83068 -0.246717
+v 0 0.825505 -0.217769
+v 0.0222752 0.824379 -0.219341
+v 0.0571055 0.825864 -0.248034
+v 0.0611506 0.820889 -0.225684
+v 0.0427143 0.830406 -0.274631
+v 0.0589761 0.827121 -0.27268
+v 0 0.834904 -0.275945
+v 0.0223763 0.833445 -0.275661
+v 0.192333 0.761236 -0.378584
+v 0.220893 0.723503 -0.390265
+v 0.20182 0.742487 -0.402976
+v 0.202503 0.759013 -0.342238
+v 0.185497 0.772855 -0.357643
+v 0.229147 0.73046 -0.348619
+v 0.217964 0.748268 -0.323878
+v 0.238328 0.704586 -0.373617
+v 0.182638 0.844163 -0.332751
+v 0.20855 0.866089 -0.317157
+v 0.208154 0.830042 -0.298948
+v 0.197796 0.836731 -0.320106
+v 0.225435 0.891324 -0.293612
+v 0.202088 0.898609 -0.32219
+v 0.216036 0.894273 -0.311279
+v 0.221713 0.885753 -0.253374
+v 0.22169 0.856724 -0.273536
+v 0.227222 0.886884 -0.273083
+v 0.207473 0.823979 -0.249259
+v 0.212209 0.824662 -0.273398
+v 0.113654 0.869597 -0.287244
+v 0.140188 0.893495 -0.304854
+v 0.144889 0.85895 -0.323167
+v 0.127225 0.865094 -0.306986
+v 0.170407 0.915574 -0.314537
+v 0.141278 0.921251 -0.283964
+v 0.154011 0.919177 -0.300925
+v 0.17602 0.88052 -0.329499
+v 0.186655 0.907214 -0.322451
+v 0.164181 0.851772 -0.332764
+v 0.125921 0.862724 -0.222037
+v 0.126235 0.896792 -0.232757
+v 0.106278 0.87162 -0.248059
+v 0.112672 0.868658 -0.232446
+v 0.136706 0.926532 -0.249134
+v 0.152825 0.914652 -0.226568
+v 0.141818 0.921807 -0.235425
+v 0.120264 0.900007 -0.266157
+v 0.136018 0.925025 -0.266007
+v 0.106638 0.871852 -0.266963
+v 0.201725 0.863227 -0.234578
+v 0.169297 0.842653 -0.222278
+v 0.191458 0.831381 -0.232334
+v 0.191735 0.902282 -0.228209
+v 0.209229 0.892731 -0.238166
+v 0.158724 0.882351 -0.220155
+v 0.171199 0.908866 -0.224253
+v 0.146124 0.853579 -0.218747
+v 0.183641 -0.455489 0.534379
+v 0.20511 -0.461837 0.475502
+v 0.228365 -0.446181 0.420977
+v 0.21326 -0.450303 0.478539
+v 0.102981 -0.465723 0.632974
+v 0.142559 -0.466669 0.57469
+v 0.143787 -0.462243 0.587914
+v 0.132033 -0.481167 0.556781
+v 0.17086 -0.474631 0.521789
+v 0.109552 -0.472057 0.595485
+v 0.140849 -0.473458 0.563316
+v 0.206663 -0.472505 0.418626
+v 0.176881 -0.48852 0.469991
+v 0.194975 -0.474631 0.472428
+v 0.0439995 -0.453929 0.682063
+v 0.0755892 -0.465387 0.639018
+v 0.0705061 -0.461083 0.663389
+v 0 -0.448613 0.69461
+v 0.0238268 -0.46053 0.664171
+v 0.0210984 -0.44987 0.691901
+v 0.0329053 -0.483094 0.604968
+v 0.0549351 -0.471091 0.629642
+v 0 -0.470799 0.638539
+v 0.0276324 -0.470883 0.636565
+v 0.0917758 -0.479034 0.591505
+v 0.0815781 -0.471375 0.616771
+v 0.065741 -0.440111 0.119726
+v 0.0317646 -0.445352 0.134276
+v 0 -0.441491 0.0941719
+v 0.0323291 -0.440606 0.100743
+v 0.138015 -0.443895 0.192281
+v 0.0965198 -0.444834 0.175662
+v 0.101319 -0.442167 0.150389
+v 0.0981278 -0.457279 0.230677
+v 0.0636123 -0.450028 0.182161
+v 0.124102 -0.450719 0.228684
+v 0.0942201 -0.449501 0.201118
+v 0 -0.451738 0.166652
+v 0.034368 -0.461296 0.211229
+v 0.0320458 -0.45108 0.17059
+v 0.205661 -0.440923 0.30331
+v 0.163536 -0.447685 0.255959
+v 0.174785 -0.442414 0.24495
+v 0.213095 -0.454425 0.36305
+v 0.224677 -0.442616 0.362277
+v 0.178344 -0.481123 0.364052
+v 0.179537 -0.460826 0.31166
+v 0.199389 -0.4673 0.363772
+v 0.138385 -0.462583 0.271465
+v 0.153491 -0.454897 0.266081
+v 0 -0.501354 0.566258
+v 0.0472884 -0.515301 0.519274
+v 0.0774159 -0.49456 0.56248
+v 0.0400356 -0.499329 0.565268
+v 0.0552219 -0.529321 0.415644
+v 0.102374 -0.516827 0.468605
+v 0 -0.53078 0.469003
+v 0.0529288 -0.526727 0.468796
+v 0.12906 -0.497175 0.517019
+v 0.151776 -0.503865 0.416665
+v 0.144853 -0.503338 0.468859
+v 0.109485 -0.488354 0.558077
+v 0 -0.522771 0.361177
+v 0.046641 -0.500432 0.308431
+v 0.101727 -0.509066 0.362546
+v 0.0524328 -0.518796 0.361628
+v 0.0778956 -0.474931 0.261875
+v 0 -0.481524 0.256229
+v 0.0399262 -0.479506 0.257737
+v 0.12954 -0.482953 0.312862
+v 0.111951 -0.469275 0.268257
+v 0.144743 -0.495777 0.363499
+v 0.16267 -0.0297463 0.947597
+v 0.137897 -0.0298265 0.961061
+v 0.139042 -0.0451633 0.963659
+v 0.164631 -0.0504489 0.951176
+v 0.150007 -0.038161 0.957034
+v 0.109331 -0.0304408 0.974293
+v 0.122462 -0.037914 0.969963
+v 0.130968 -0.0135459 0.956851
+v 0.124172 -0.0273895 0.966259
+v 0.133714 -0.0642499 0.965416
+v 0.109851 -0.0516794 0.976283
+v 0.125245 -0.0487781 0.969619
+v 0.172782 -0.0812671 0.947651
+v 0.213918 -0.078022 0.922841
+v 0.18873 -0.0434748 0.935764
+v 0.185903 -0.064746 0.940486
+v 0.119758 -0.0911109 0.968067
+v 0.163882 -0.120989 0.949271
+v 0.149065 -0.0885975 0.957545
+v 0.209867 -0.161573 0.925244
+v 0.133001 -0.161124 0.956956
+v 0.170754 -0.160591 0.943861
+v 0.24743 -0.125194 0.898745
+v 0.2493 -0.167014 0.898651
+v 0.247517 -0.0516572 0.891815
+v 0.248772 -0.0872467 0.896118
+v 0.206641 -0.0168625 0.913183
+v 0.165953 -0.00373407 0.934179
+v 0.181563 -0.0218941 0.9332
+v 0.225597 0.0165132 0.878354
+v 0.237857 -0.0169093 0.886885
+v 0.188508 0.055577 0.881861
+v 0.216542 0.0481267 0.863247
+v 0.128713 0.0776757 0.9026
+v 0.155037 0.0342159 0.918365
+v 0.159167 0.0664178 0.894094
+v 0.116698 0.0146029 0.948102
+v 0.143447 0.00727528 0.940085
+v 0.0272441 0.00174637 0.985368
+v 0.0591053 0.0273947 0.96148
+v 0.0314129 0.062618 0.944549
+v 0 0.031249 0.967483
+v 0.0296124 0.0304081 0.965777
+v 0.0939443 0.0532189 0.93429
+v 0.0802969 -0.0053962 0.974961
+v 0.0883591 0.0217178 0.955411
+v 0.0652762 0.0925539 0.91594
+v 0.0973395 0.0863776 0.910036
+v 0 0.0974522 0.921309
+v 0.032753 0.0962353 0.919852
+v 0.0640997 -0.158632 0.974036
+v 0.0313918 -0.121887 0.983992
+v 0 -0.156323 0.979553
+v 0.0317497 -0.157085 0.978196
+v 0.0956523 -0.123707 0.972085
+v 0.0976501 -0.160225 0.966985
+v 0.0810019 -0.0733797 0.982855
+v 0.05977 -0.0942344 0.98419
+v 0.0898671 -0.0931811 0.977114
+v 0 -0.0930316 0.990478
+v 0.0273784 -0.0760818 0.993721
+v 0.0298425 -0.0936976 0.988793
+v 0.0955999 -0.0476973 0.985969
+v 0.0789269 -0.06284 0.994959
+v 0.0864529 -0.0640166 0.98579
+v 0.0940951 -0.0565412 0.98502
+v 0.0861068 -0.0306433 0.989153
+v 0.0788425 -0.0488271 1.00064
+v 0.0938769 -0.0385712 0.986828
+v 0.0525009 -0.0662129 1.0231
+v 0.0666347 -0.0644079 1.00864
+v 0.0598411 -0.0536198 1.01768
+v 0.0652608 -0.0582549 1.01251
+v 0.0598989 -0.0753391 1.00401
+v 0.0525276 -0.0775374 1.01483
+v 0.0652946 -0.0700523 1.00523
+v 0.0476184 -0.0223971 1.0006
+v 0.0582598 -0.0403976 1.0119
+v 0.0694706 -0.025394 0.994502
+v 0 -0.0210975 1.00705
+v 0.0209047 -0.039677 1.02317
+v 0.0242006 -0.0212264 1.00515
+v 0.0152462 -0.0676801 1.04602
+v 0.0344846 -0.0534302 1.03232
+v 0 -0.0555541 1.03946
+v 0.0177789 -0.0547726 1.03745
+v 0.0426567 -0.0627588 1.03251
+v 0.0490439 -0.0525287 1.02501
+v 0.0477856 -0.0718253 0.995571
+v 0.0209476 -0.0779737 1.00716
+v 0 -0.0724905 1.00177
+v 0.0242757 -0.0726037 0.999895
+v 0.0583871 -0.073576 0.998233
+v 0.0697639 -0.0690371 0.990088
+v 0.0427012 -0.0854758 1.01505
+v 0.0345351 -0.0845283 1.01121
+v 0.0491057 -0.0804193 1.00671
+v 0 -0.0876552 1.01738
+v 0.0152667 -0.0940994 1.02438
+v 0.0178069 -0.0869021 1.01538
+v 0.0271444 -0.0763309 1.04139
+v 0.0394909 -0.0796254 1.02906
+v 0.0399318 -0.0720589 1.03309
+v 0 -0.0809251 1.049
+v 0.0132272 -0.0889316 1.04223
+v 0.0137291 -0.0795664 1.04692
+v 0.0271637 -0.0905263 1.02914
+v 0 -0.0956202 1.03609
+v 0.01374 -0.0942759 1.03402
+v 0.0399551 -0.0846544 1.02277
+v -0.341299 -0.375944 0.366232
+v -0.317818 -0.392423 0.305485
+v -0.289638 -0.411984 0.363044
+v -0.313121 -0.40468 0.424303
+v -0.317288 -0.397295 0.364448
+v -0.312495 -0.42258 0.208415
+v -0.274846 -0.418754 0.252658
+v -0.350284 -0.364453 0.244783
+v -0.316601 -0.396416 0.250199
+v -0.194817 -0.437402 0.239358
+v -0.251544 -0.422435 0.302332
+v -0.2099 -0.457622 0.202318
+v -0.231211 -0.430966 0.245486
+v -0.261632 -0.427221 0.422701
+v -0.23935 -0.43199 0.361842
+v -0.262332 -0.422664 0.362137
+v -0.275025 -0.421949 0.482067
+v -0.291362 -0.413379 0.536815
+v -0.328162 -0.387696 0.483083
+v -0.303426 -0.408222 0.482265
+v -0.225399 -0.440193 0.480871
+v -0.222989 -0.439421 0.53835
+v -0.247502 -0.431674 0.481825
+v -0.164011 -0.483033 0.631406
+v -0.236564 -0.444228 0.584476
+v -0.156119 -0.458526 0.595789
+v -0.191143 -0.453861 0.591118
+v -0.315111 -0.397679 0.592146
+v -0.26979 -0.455493 0.624003
+v -0.280089 -0.425608 0.586435
+v -0.339519 -0.257182 -0.0802831
+v -0.283535 -0.198083 -0.11242
+v -0.23546 -0.269794 -0.12322
+v -0.300873 -0.335808 -0.11045
+v -0.292915 -0.260195 -0.110308
+v -0.26898 -0.103901 -0.112945
+v -0.224047 -0.163495 -0.142015
+v -0.318497 -0.140059 -0.0746981
+v -0.274851 -0.146856 -0.114171
+v -0.123113 -0.206388 -0.170045
+v -0.173106 -0.230478 -0.142628
+v -0.172014 -0.141551 -0.170002
+v -0.171621 -0.184999 -0.160037
+v -0.176383 -0.34915 -0.10631
+v -0.119478 -0.295776 -0.113872
+v -0.174532 -0.282877 -0.121531
+v -0.273281 0.0420733 -0.0438498
+v -0.232462 0.00608204 -0.106821
+v -0.269878 -0.0318881 -0.0928838
+v -0.307808 -0.00318692 -0.0317095
+v -0.272342 0.00331308 -0.0724007
+v -0.148915 0.00578811 -0.165362
+v -0.182883 -0.0457092 -0.157469
+v -0.199052 0.0623656 -0.105989
+v -0.190658 0.0074224 -0.137241
+v -0.222802 -0.0788504 -0.141507
+v -0.131964 -0.111136 -0.188613
+v -0.176218 -0.0952439 -0.168768
+v -0.308256 -0.0631453 -0.0619102
+v -0.26804 -0.0666021 -0.106124
+v -0.26887 -0.396352 0.909752
+v -0.214141 -0.334681 0.919903
+v -0.263105 -0.289222 0.900577
+v -0.30851 -0.340088 0.875405
+v -0.266995 -0.336923 0.904292
+v -0.104904 -0.328487 0.922781
+v -0.164801 -0.284207 0.93026
+v -0.149733 -0.3934 0.920339
+v -0.15673 -0.332639 0.924128
+v -0.172003 -0.202637 0.93974
+v -0.215216 -0.244729 0.920604
+v -0.126656 -0.240414 0.945024
+v -0.169955 -0.242358 0.935335
+v -0.293898 -0.252639 0.867139
+v -0.253262 -0.209214 0.897882
+v -0.258195 -0.248301 0.898482
+v -0.375987 -0.340814 -0.0337268
+v -0.380901 -0.277243 0.032671
+v -0.358457 -0.200112 -0.0267162
+v -0.367895 -0.263853 -0.0306343
+v -0.375596 -0.311145 0.175939
+v -0.382253 -0.237542 0.105277
+v -0.379176 -0.364086 0.102343
+v -0.381399 -0.294389 0.103665
+v -0.379025 -0.149836 0.112764
+v -0.369603 -0.166291 0.0406057
+v -0.385185 -0.214712 0.178025
+v -0.38088 -0.190046 0.108528
+v -0.342627 -0.106257 -0.015664
+v -0.349444 -0.148083 -0.0217878
+v -0.351404 -0.354331 0.427814
+v -0.367965 -0.307651 0.370431
+v -0.363729 -0.333817 0.306674
+v -0.35769 -0.345276 0.36828
+v -0.376188 -0.22098 0.374406
+v -0.380079 -0.252636 0.309953
+v -0.366553 -0.273812 0.432936
+v -0.373629 -0.265433 0.372527
+v -0.379525 -0.278609 0.243679
+v -0.386398 -0.194374 0.247084
+v -0.384425 -0.235652 0.245143
+v -0.369702 -0.323364 0.243084
+v -0.359143 -0.0454376 0.384517
+v -0.376221 -0.0834201 0.319025
+v -0.375379 -0.132559 0.377674
+v -0.363531 -0.0923241 0.440867
+v -0.369754 -0.0887861 0.380331
+v -0.380073 -0.0641595 0.195141
+v -0.385994 -0.115397 0.252169
+v -0.369328 -0.0337485 0.265179
+v -0.380479 -0.0754649 0.257209
+v -0.383633 -0.167556 0.313214
+v -0.386429 -0.137312 0.183762
+v -0.387444 -0.154662 0.24911
+v -0.370145 -0.182621 0.437038
+v -0.377148 -0.176656 0.375911
+v -0.338487 -0.0443826 0.00235778
+v -0.357964 -0.0317511 0.0731655
+v -0.331012 0.0171674 0.0389716
+v -0.336366 -0.0157204 0.0175237
+v -0.363023 -0.0919062 0.0524717
+v -0.339774 -0.073126 -0.00816016
+v -0.37698 -0.0821329 0.123634
+v -0.378431 -0.114848 0.117331
+v -0.363023 -0.0117289 0.147082
+v -0.372551 -0.0487428 0.133083
+v -0.324728 -0.29766 0.82767
+v -0.340911 -0.350598 0.77666
+v -0.336842 -0.40362 0.835552
+v -0.331904 -0.344898 0.831349
+v -0.311718 -0.219243 0.822794
+v -0.331546 -0.262105 0.774295
+v -0.317327 -0.257308 0.824743
+v -0.344609 -0.271269 0.661246
+v -0.340676 -0.308781 0.717453
+v -0.338685 -0.226798 0.720092
+v -0.339619 -0.266826 0.718796
+v -0.335933 -0.414813 0.716014
+v -0.334716 -0.360298 0.654176
+v -0.339263 -0.356428 0.715872
+v -0.304296 -0.0566442 0.818942
+v -0.32815 -0.0982774 0.774453
+v -0.309753 -0.137557 0.821642
+v -0.282982 -0.0932074 0.86163
+v -0.309064 -0.0966912 0.82069
+v -0.349859 -0.0980743 0.670701
+v -0.342261 -0.141944 0.72292
+v -0.335097 -0.0554519 0.724795
+v -0.341372 -0.0985454 0.724072
+v -0.328063 -0.182474 0.773912
+v -0.348891 -0.186539 0.666645
+v -0.340465 -0.184932 0.72153
+v -0.284251 -0.173813 0.864211
+v -0.309915 -0.178867 0.822052
+v -0.364199 -0.140124 0.498352
+v -0.3576 -0.096426 0.55865
+v -0.350887 -0.0507559 0.502911
+v -0.360004 -0.0947951 0.500374
+v -0.363053 -0.232129 0.494204
+v -0.360088 -0.187609 0.554338
+v -0.364779 -0.186112 0.496433
+v -0.356689 -0.14249 0.613383
+v -0.35206 -0.232064 0.608144
+v -0.355112 -0.187591 0.610995
+v -0.34723 -0.0531297 0.617509
+v -0.354744 -0.0974434 0.615492
+v -0.338808 -0.358381 0.541229
+v -0.354812 -0.320109 0.488079
+v -0.344689 -0.357576 0.485189
+v -0.344841 -0.317709 0.600763
+v -0.335024 -0.360119 0.596759
+v -0.354746 -0.277669 0.548235
+v -0.349578 -0.27523 0.604646
+v -0.360332 -0.277544 0.491251
+v -0.270021 0.183796 0.151405
+v -0.250535 0.219935 0.200112
+v -0.22988 0.247545 0.157829
+v -0.253104 0.203435 0.118078
+v -0.251096 0.217673 0.158942
+v -0.247672 0.201215 0.285394
+v -0.224342 0.248556 0.249012
+v -0.273837 0.172969 0.230557
+v -0.249328 0.213762 0.242242
+v -0.166122 0.2964 0.255682
+v -0.201028 0.279793 0.204606
+v -0.19325 0.264928 0.298974
+v -0.197174 0.276415 0.252836
+v -0.208445 0.255016 0.0997486
+v -0.173937 0.295795 0.151795
+v -0.204742 0.274117 0.153286
+v -0.238934 0.137098 0.472953
+v -0.21248 0.178956 0.429644
+v -0.243641 0.166107 0.375362
+v -0.269047 0.115689 0.415447
+v -0.241351 0.149381 0.423
+v -0.149393 0.222637 0.439843
+v -0.185529 0.224498 0.389084
+v -0.178799 0.187723 0.48375
+v -0.181979 0.203634 0.435289
+v -0.218341 0.218477 0.337805
+v -0.157504 0.266542 0.348704
+v -0.189327 0.246276 0.344018
+v -0.272811 0.14513 0.319056
+v -0.245762 0.184358 0.32963
+v -0.212012 0.0755801 0.841201
+v -0.188626 0.112308 0.821548
+v -0.217701 0.11212 0.775527
+v -0.235856 0.0765799 0.805621
+v -0.213324 0.0965225 0.811853
+v -0.13257 0.138159 0.837992
+v -0.163968 0.1484 0.789706
+v -0.159999 0.0987976 0.865373
+v -0.161693 0.126272 0.830304
+v -0.169138 0.174847 0.695193
+v -0.195015 0.146226 0.738197
+v -0.136442 0.179619 0.750387
+v -0.166543 0.164739 0.744396
+v -0.249108 0.0961373 0.728207
+v -0.226814 0.130295 0.684497
+v -0.222367 0.12354 0.732548
+v -0.264402 0.101968 0.520397
+v -0.233686 0.131486 0.579487
+v -0.207042 0.158534 0.530777
+v -0.236433 0.132176 0.525634
+v -0.201599 0.157166 0.637907
+v -0.258575 0.102323 0.628487
+v -0.230534 0.131902 0.632959
+v -0.139858 0.195421 0.647569
+v -0.173735 0.178964 0.589175
+v -0.171473 0.178284 0.642915
+v -0.143538 0.198754 0.539805
+v -0.176115 0.180806 0.535582
+v -0.337836 -0.00655256 0.448549
+v -0.320929 0.0389878 0.398635
+v -0.34742 0.00319315 0.333602
+v -0.342419 -0.00261151 0.390867
+v -0.298744 0.0894187 0.355449
+v -0.293232 0.0711038 0.460405
+v -0.296022 0.0786559 0.407077
+v -0.30121 0.115034 0.257783
+v -0.327429 0.0566863 0.291075
+v -0.300703 0.102007 0.305644
+v -0.351215 0.0204208 0.221415
+v -0.350974 0.0108683 0.277027
+v -0.308985 -0.0158364 0.772679
+v -0.30029 0.0266191 0.725054
+v -0.328122 -0.0112347 0.673608
+v -0.320738 -0.0133772 0.724898
+v -0.268509 -0.018423 0.854481
+v -0.274888 0.0183198 0.812257
+v -0.291897 -0.0177918 0.816142
+v -0.267341 0.0573644 0.769829
+v -0.24017 0.0457536 0.838346
+v -0.256292 0.0501447 0.807253
+v -0.281859 0.0674534 0.67711
+v -0.275748 0.0634772 0.725933
+v -0.239418 0.102195 -0.0351077
+v -0.213035 0.170022 -0.00953204
+v -0.181753 0.135303 -0.0848536
+v -0.20757 0.117335 -0.0616182
+v -0.298292 0.0729821 0.0271766
+v -0.265278 0.133372 0.0353559
+v -0.270541 0.0874645 -0.00640695
+v -0.236523 0.195781 0.0609139
+v -0.276588 0.155574 0.0981021
+v -0.258648 0.173682 0.0768666
+v -0.190293 0.237055 0.0258931
+v -0.212268 0.218116 0.0448664
+v -0.288873 0.0678482 0.57056
+v -0.311113 0.0304905 0.622016
+v -0.286019 0.0682635 0.624906
+v -0.315185 0.0311388 0.510574
+v -0.291063 0.0681481 0.515313
+v -0.334099 -0.00961306 0.563635
+v -0.335537 -0.00863692 0.506376
+v -0.332102 -0.0102295 0.619617
+v -0.324423 0.0791082 0.189745
+v -0.33595 0.0447295 0.115485
+v -0.346277 0.0318576 0.16704
+v -0.295396 0.137443 0.169109
+v -0.299573 0.127115 0.211987
+v -0.305188 0.103086 0.100412
+v -0.288286 0.145212 0.129998
+v -0.320025 0.0585873 0.0683354
+v -0.177314 0.204818 -0.0510199
+v -0.167034 0.18223 -0.122798
+v -0.149034 0.119255 -0.152767
+v -0.168725 0.158519 -0.103729
+v -0.159665 0.285267 0.0452783
+v -0.167426 0.259011 -0.0241111
+v -0.175009 0.248965 0.00116269
+v -0.17918 0.290497 -0.0609743
+v -0.177164 0.238112 -0.093383
+v -0.149721 0.3008 -0.00470029
+v -0.168557 0.27236 -0.0447148
+v -0.160058 0.168229 -0.195838
+v -0.194921 0.221357 -0.173014
+v -0.175228 0.201681 -0.146613
+v -0.21819 0.281995 -0.133552
+v -0.26422 0.274293 -0.228207
+v -0.233098 0.21084 -0.267301
+v -0.227725 0.245742 -0.199837
+v -0.173263 0.341693 -0.0238379
+v -0.225521 0.344757 -0.0845003
+v -0.200073 0.314908 -0.0732157
+v -0.267222 0.417414 -0.116444
+v -0.27622 0.344814 -0.176785
+v -0.21453 0.407775 -0.0317089
+v -0.249809 0.379204 -0.0978893
+v -0.306638 0.268641 -0.338166
+v -0.314639 0.343639 -0.292523
+v -0.294985 0.306469 -0.259249
+v -0.279797 0.207117 -0.366981
+v -0.240251 0.15911 -0.387649
+v -0.192666 0.159829 -0.308601
+v -0.238726 0.181983 -0.331175
+v -0.330786 0.26947 -0.445487
+v -0.307399 0.208213 -0.46356
+v -0.31171 0.235804 -0.406418
+v -0.259412 0.1698 -0.542722
+v -0.265715 0.160342 -0.469212
+v -0.305787 0.218563 -0.547441
+v -0.290901 0.186126 -0.509143
+v -0.195451 0.128497 -0.407372
+v -0.213461 0.12942 -0.465108
+v -0.233311 0.142158 -0.432916
+v -0.329541 0.345256 -0.410956
+v -0.321008 0.352655 -0.511417
+v -0.32675 0.280027 -0.535933
+v -0.333345 0.309542 -0.480189
+v -0.255976 0.429549 -0.563622
+v -0.289278 0.358786 -0.584918
+v -0.295245 0.436762 -0.484187
+v -0.295399 0.395443 -0.540067
+v -0.246645 0.312827 -0.640337
+v -0.296456 0.295786 -0.60247
+v -0.240548 0.35766 -0.62796
+v -0.274305 0.331026 -0.617491
+v -0.272516 0.239582 -0.609537
+v -0.307295 0.256707 -0.578835
+v -0.288912 -0.666279 -0.0831981
+v -0.233667 -0.718383 -0.0394682
+v -0.288313 -0.71953 0.0292002
+v -0.335047 -0.66747 -0.026194
+v -0.288523 -0.704927 -0.0329901
+v -0.136552 -0.67511 -0.0280042
+v -0.172796 -0.723594 0.0247169
+v -0.187443 -0.669698 -0.0818819
+v -0.179633 -0.708233 -0.0350267
+v -0.182766 -0.687312 0.135767
+v -0.227442 -0.724741 0.0933852
+v -0.136631 -0.686121 0.0787634
+v -0.176191 -0.716151 0.0857864
+v -0.327044 -0.674066 0.0872825
+v -0.276152 -0.679477 0.14116
+v -0.281161 -0.710704 0.0910758
+v -0.288217 -0.679435 0.838379
+v -0.248642 -0.724519 0.790895
+v -0.198896 -0.723335 0.85193
+v -0.2461 -0.678205 0.889508
+v -0.246831 -0.711912 0.845432
+v -0.24088 -0.688682 0.691628
+v -0.198408 -0.728532 0.734591
+v -0.283587 -0.684515 0.738785
+v -0.244434 -0.716483 0.736358
+v -0.115859 -0.693483 0.748115
+v -0.148661 -0.727347 0.795626
+v -0.157976 -0.694713 0.696986
+v -0.153047 -0.720555 0.741401
+v -0.15574 -0.680004 0.892121
+v -0.113033 -0.684171 0.844965
+v -0.151829 -0.713932 0.849146
+v -0.30391 -0.498066 0.668956
+v -0.32282 -0.565952 0.726198
+v -0.342489 -0.483419 0.78141
+v -0.331237 -0.488983 0.719836
+v -0.319732 -0.633454 0.786428
+v -0.28283 -0.632563 0.691009
+v -0.308326 -0.632734 0.733956
+v -0.294197 -0.618974 0.884469
+v -0.331331 -0.556659 0.842236
+v -0.315639 -0.625673 0.839496
+v -0.31354 -0.474577 0.887895
+v -0.337524 -0.478425 0.840051
+v -0.0969194 -0.478778 0.897495
+v -0.0774719 -0.570173 0.85133
+v -0.0654567 -0.515878 0.79543
+v -0.0634711 -0.431765 0.851899
+v -0.0707234 -0.496321 0.852392
+v -0.0840444 -0.64607 0.797118
+v -0.112805 -0.624088 0.888986
+v -0.0899111 -0.634521 0.847274
+v -0.119063 -0.647679 0.702012
+v -0.0832655 -0.591144 0.740087
+v -0.0943475 -0.648249 0.746058
+v -0.069956 -0.47422 0.735292
+v -0.106684 -0.534636 0.682874
+v -0.0773661 -0.528459 0.735056
+v -0.208287 -0.52835 0.637223
+v -0.252371 -0.581833 0.652832
+v -0.260651 -0.513618 0.642398
+v -0.15335 -0.595644 0.66033
+v -0.153645 -0.534978 0.650489
+v -0.20034 -0.651356 0.660475
+v -0.156436 -0.650983 0.672896
+v -0.244653 -0.641994 0.666514
+v -0.209392 -0.47177 0.928314
+v -0.149543 -0.554429 0.922812
+v -0.147798 -0.472236 0.922294
+v -0.262868 -0.552296 0.920108
+v -0.267734 -0.472455 0.917082
+v -0.203436 -0.628169 0.923071
+v -0.253551 -0.622939 0.912658
+v -0.153584 -0.625261 0.914716
+v -0.387436 -0.440321 0.0296682
+v -0.369537 -0.538419 0.0961211
+v -0.348561 -0.464054 0.158716
+v -0.376444 -0.45013 0.0999599
+v -0.354445 -0.427851 -0.0878125
+v -0.378455 -0.525308 -0.0356201
+v -0.380964 -0.432504 -0.0361787
+v -0.368236 -0.614976 0.0306267
+v -0.340742 -0.597899 -0.0798229
+v -0.364087 -0.605614 -0.0296813
+v -0.325558 -0.615423 0.139592
+v -0.354786 -0.614851 0.0904298
+v -0.12704 -0.448099 -0.0735785
+v -0.182701 -0.527846 -0.109937
+v -0.243518 -0.430642 -0.121062
+v -0.179139 -0.436248 -0.106528
+v -0.241923 -0.609859 -0.117359
+v -0.141316 -0.607217 -0.0730372
+v -0.186967 -0.607615 -0.104287
+v -0.304443 -0.521287 -0.117606
+v -0.296601 -0.602967 -0.109127
+v -0.305291 -0.427537 -0.115465
+v -0.25113 -0.502745 0.197094
+v -0.188536 -0.577394 0.174934
+v -0.148115 -0.512563 0.149929
+v -0.196177 -0.511769 0.183546
+v -0.293784 -0.559568 0.181063
+v -0.304357 -0.484216 0.189956
+v -0.233064 -0.638226 0.175543
+v -0.282371 -0.626877 0.167977
+v -0.145237 -0.635178 0.13052
+v -0.185474 -0.638484 0.162245
+v -0.0953701 -0.405611 -0.0214782
+v -0.0997946 -0.492468 0.0352712
+v -0.105712 -0.548895 -0.0268432
+v -0.102107 -0.469936 -0.0239275
+v -0.113288 -0.573273 0.0910993
+v -0.112515 -0.451919 0.0935688
+v -0.115559 -0.506404 0.0968639
+v -0.106751 -0.633108 0.0275567
+v -0.117988 -0.635837 0.0828
+v -0.114552 -0.619641 -0.0267392
+v -0.308658 0.43336 -0.368305
+v -0.285353 0.520853 -0.361551
+v -0.271184 0.525759 -0.460079
+v -0.291391 0.478507 -0.418527
+v -0.298509 0.427522 -0.236282
+v -0.317802 0.38717 -0.32759
+v -0.235335 0.485531 -0.0647615
+v -0.271214 0.498067 -0.167977
+v -0.272045 0.45855 -0.143225
+v -0.277875 0.55689 -0.190965
+v -0.282577 0.535883 -0.259681
+v -0.247008 0.54474 -0.114114
+v -0.271665 0.531425 -0.180446
+v -0.267 0.601931 -0.388185
+v -0.278055 0.600765 -0.320108
+v -0.278645 0.563976 -0.33172
+v -0.252363 0.682167 -0.15033
+v -0.326452 0.645309 -0.138504
+v -0.267679 0.610284 -0.129549
+v -0.192626 0.647576 -0.125774
+v -0.258116 0.645451 -0.13607
+v -0.430349 0.658277 -0.154382
+v -0.380321 0.61857 -0.152413
+v -0.388682 0.680406 -0.144893
+v -0.384792 0.647908 -0.143704
+v -0.35448 0.589213 -0.202564
+v -0.324346 0.587714 -0.15672
+v -0.401583 0.612716 -0.19176
+v -0.367842 0.597467 -0.17194
+v -0.239082 0.578957 -0.111996
+v -0.289471 0.569074 -0.162469
+v -0.279361 0.583208 -0.137022
+v -0.318359 0.584666 -0.230094
+v -0.294439 0.591849 -0.266039
+v -0.294318 0.572729 -0.209871
+v -0.380591 0.614582 -0.245535
+v -0.343967 0.620068 -0.286887
+v -0.347363 0.598425 -0.244565
+v -0.345079 0.681327 -0.333156
+v -0.312606 0.638668 -0.319124
+v -0.375881 0.662505 -0.310821
+v -0.34177 0.649107 -0.318477
+v -0.28588 0.662049 -0.349089
+v -0.290372 0.624108 -0.317788
+v -0.419572 0.658706 -0.261892
+v -0.444197 0.711394 -0.263256
+v -0.393743 0.706765 -0.3123
+v -0.412958 0.685943 -0.291128
+v -0.447289 0.655956 -0.196803
+v -0.417303 0.633713 -0.225256
+v -0.447397 0.702392 -0.15815
+v -0.471552 0.708579 -0.199184
+v -0.460337 0.681448 -0.17325
+v -0.43241 0.746455 -0.258303
+v -0.452589 0.744444 -0.206816
+v -0.460792 0.730831 -0.231063
+v -0.0467382 0.325284 0.136739
+v -0.0925539 0.320139 0.170699
+v -0.0462436 0.320164 0.221337
+v -0.0468991 0.323707 0.17766
+v -0.132936 0.312061 0.210485
+v -0.135299 0.30832 0.112598
+v -0.13572 0.310986 0.159838
+v -0.125628 0.296564 0.306829
+v -0.0886491 0.312396 0.263435
+v -0.129481 0.307575 0.259519
+v -0.0434892 0.30185 0.311934
+v -0.0450231 0.313323 0.266515
+v -0.0377827 0.232065 0.495188
+v -0.0735327 0.221293 0.545706
+v -0.0364517 0.2252 0.601479
+v -0.0369868 0.226526 0.54726
+v -0.107651 0.210086 0.597079
+v -0.111325 0.219111 0.491389
+v -0.109197 0.212143 0.543201
+v -0.105384 0.203917 0.704755
+v -0.0717153 0.218255 0.65442
+v -0.106456 0.208743 0.651454
+v -0.0357781 0.218679 0.710249
+v -0.0360809 0.22396 0.656319
+v -0.0703592 0.199987 0.759265
+v -0.035033 0.18494 0.809623
+v -0.0354469 0.205232 0.761742
+v -0.10286 0.172311 0.802347
+v -0.104202 0.191409 0.755412
+v -0.0994805 0.118705 0.880737
+v -0.0684286 0.154761 0.849486
+v -0.101303 0.147713 0.844481
+v -0.0337403 0.129119 0.889852
+v -0.0344822 0.159128 0.852718
+v -0.032544 -0.433336 0.0684456
+v -0.0619855 -0.419454 0.0516405
+v -0.0290027 -0.401336 0.00260959
+v -0.0312142 -0.420036 0.03584
+v -0.0804386 -0.407577 0.025012
+v -0.101981 -0.43742 0.118873
+v -0.0918712 -0.426509 0.0746889
+v -0.0781392 -0.343402 -0.0680291
+v -0.0524989 -0.374486 -0.0268658
+v -0.0771327 -0.378765 -0.022981
+v -0.024587 -0.350255 -0.0669927
+v -0.0265726 -0.377866 -0.0315624
+v -0.0173499 -0.340897 0.891348
+v -0.032518 -0.377955 0.851217
+v -0.0162142 -0.396858 0.808469
+v -0.0161103 -0.370733 0.852319
+v -0.0473693 -0.423079 0.799815
+v -0.0576974 -0.360469 0.893822
+v -0.0495204 -0.395304 0.851292
+v -0.0641499 -0.454693 0.696538
+v -0.0352263 -0.426311 0.757202
+v -0.0543417 -0.44245 0.745122
+v -0.0190571 -0.436735 0.723869
+v -0.0173127 -0.418962 0.764189
+v -0.08278 0.283223 0.354838
+v -0.0403407 0.264584 0.4009
+v -0.0418935 0.284411 0.356338
+v -0.117787 0.25622 0.397308
+v -0.121652 0.278062 0.352302
+v -0.0772361 0.242537 0.445498
+v -0.114267 0.235186 0.443214
+v -0.0389355 0.245943 0.446792
+v -0.0448664 0.351036 0.0368917
+v -0.084777 0.331134 0.0392602
+v -0.0441574 0.329119 0.0693112
+v -0.0436281 0.336529 0.047541
+v -0.122579 0.311429 0.0436831
+v -0.125569 0.33518 0.0110101
+v -0.120968 0.319807 0.0225689
+v -0.0891709 0.320583 0.0906503
+v -0.129137 0.308034 0.0737864
+v -0.0455092 0.326229 0.0998335
+v -0.0632284 0.496693 0.0522852
+v -0.116503 0.442256 0.0399308
+v -0.0541098 0.40816 0.0483714
+v -0.0593708 0.448955 0.0551193
+v -0.153438 0.391277 0.0121446
+v -0.18104 0.470039 0.00485434
+v -0.169159 0.42866 0.0126096
+v -0.0954346 0.370179 0.029827
+v -0.13765 0.35956 0.0095734
+v -0.0488177 0.375217 0.039732
+v -0.0692761 0.505582 -0.562042
+v -0.130136 0.568004 -0.528764
+v -0.0626485 0.640189 -0.506046
+v -0.0664738 0.570216 -0.533525
+v -0.232047 0.565216 -0.501918
+v -0.174025 0.632377 -0.495223
+v -0.199187 0.49892 -0.545668
+v -0.188173 0.564066 -0.51907
+v -0.150533 0.744043 -0.44196
+v -0.113201 0.701726 -0.474505
+v -0.199905 0.685497 -0.462496
+v -0.161614 0.694175 -0.469576
+v -0.0536794 0.762368 -0.447664
+v -0.0582379 0.707055 -0.477971
+v -0.0726958 0.340957 -0.664799
+v -0.138282 0.375681 -0.645424
+v -0.0711811 0.413275 -0.623292
+v -0.0716465 0.376815 -0.648416
+v -0.199873 0.404748 -0.612935
+v -0.193634 0.339499 -0.652866
+v -0.194895 0.370921 -0.637938
+v -0.138821 0.452917 -0.589054
+v -0.202195 0.446614 -0.579567
+v -0.0706181 0.454733 -0.593233
+v -0.0306399 -0.195728 0.970285
+v -0.057123 -0.235922 0.956621
+v -0.0240468 -0.271835 0.943264
+v -0.0277858 -0.234252 0.959139
+v -0.0802769 -0.277764 0.939515
+v -0.0956803 -0.199455 0.960676
+v -0.0895627 -0.238122 0.952018
+v -0.0425527 -0.311463 0.921053
+v -0.0688023 -0.319918 0.921674
+v -0.0202817 -0.307657 0.921165
+v -0.049547 -0.311956 -0.105817
+v -0.0238367 -0.283103 -0.139843
+v -0.023709 -0.319134 -0.103998
+v -0.0816243 -0.264189 -0.144485
+v -0.0796433 -0.304819 -0.108871
+v -0.0878074 -0.176513 -0.193941
+v -0.0521008 -0.23408 -0.173683
+v -0.0840611 -0.222685 -0.173847
+v -0.0267024 -0.190718 -0.197111
+v -0.0248684 -0.240765 -0.171792
+v -0.0599751 -0.128388 -0.209467
+v -0.0321871 -0.0691596 -0.222731
+v -0.0292367 -0.131565 -0.213064
+v -0.101087 -0.0620719 -0.200364
+v -0.0937169 -0.121882 -0.201748
+v -0.115674 0.055105 -0.187883
+v -0.0713902 -0.00682142 -0.215572
+v -0.109216 -0.00036684 -0.192884
+v -0.0381981 0.0419961 -0.235526
+v -0.035269 -0.00935459 -0.229192
+v -0.0803711 0.0844567 -0.225271
+v -0.0428926 0.103386 -0.257977
+v -0.0406901 0.0790388 -0.244814
+v -0.120922 0.124855 -0.209363
+v -0.118034 0.096215 -0.193935
+v -0.138478 0.148595 -0.265069
+v -0.0884331 0.12573 -0.258533
+v -0.128964 0.142918 -0.232488
+v -0.0470212 0.120443 -0.299626
+v -0.0449537 0.11665 -0.27594
+v -0.097999 0.124878 -0.319976
+v -0.0518947 0.108853 -0.365753
+v -0.0492429 0.116377 -0.329956
+v -0.150911 0.126322 -0.356271
+v -0.145782 0.140078 -0.308863
+v -0.16045 0.112915 -0.441079
+v -0.107541 0.107031 -0.401861
+v -0.153901 0.116281 -0.399697
+v -0.0595919 0.101024 -0.449026
+v -0.0552525 0.102268 -0.405836
+v -0.125551 0.112279 -0.490196
+v -0.0709982 0.126709 -0.538642
+v -0.065189 0.10952 -0.494143
+v -0.194944 0.134806 -0.523346
+v -0.176258 0.119186 -0.482359
+v -0.219301 0.191387 -0.600189
+v -0.1475 0.153854 -0.575332
+v -0.210129 0.15949 -0.56385
+v -0.0790718 0.182988 -0.615599
+v -0.0759743 0.151547 -0.579976
+v -0.154044 0.222564 -0.638941
+v -0.0775425 0.260206 -0.661002
+v -0.0792455 0.219987 -0.642965
+v -0.213772 0.26767 -0.647803
+v -0.21995 0.228648 -0.628685
+v -0.144378 0.302534 -0.665208
+v -0.202463 0.304851 -0.656009
+v -0.0750102 0.301309 -0.668637
+v -0.0606884 0.60785 0.00104436
+v -0.126715 0.537337 0.0111468
+v -0.0643107 0.550463 0.0321785
+v -0.0403517 0.7183 -0.0719016
+v -0.0987466 0.656786 -0.062447
+v -0.0504326 0.666442 -0.034872
+v -0.171165 0.605799 -0.0764289
+v -0.126096 0.69557 -0.127071
+v -0.142824 0.650926 -0.0969906
+v -0.191637 0.550233 -0.0477524
+v -0.209701 0.569968 -0.0807895
+v -0.185308 0.513743 -0.0172354
+v -0.258514 0.602197 -0.445199
+v -0.237614 0.630468 -0.469873
+v -0.253217 0.578269 -0.474783
+v -0.246529 0.67248 -0.397952
+v -0.240943 0.662224 -0.43894
+v -0.254771 0.635976 -0.4207
+v -0.212896 0.714905 -0.428435
+v -0.224501 0.676789 -0.452578
+v -0.214224 0.916672 -0.246046
+v -0.201014 0.943338 -0.25145
+v -0.21544 0.938681 -0.279423
+v -0.226436 0.912723 -0.26973
+v -0.216965 0.931825 -0.261167
+v -0.159856 0.939686 -0.239651
+v -0.17745 0.953646 -0.260405
+v -0.179801 0.930945 -0.23207
+v -0.180783 0.946411 -0.244635
+v -0.165912 0.939303 -0.292739
+v -0.191876 0.948989 -0.288378
+v -0.1537 0.943252 -0.269054
+v -0.173109 0.951035 -0.277835
+v -0.216862 0.918936 -0.303965
+v -0.196917 0.927676 -0.311546
+v -0.207634 0.937732 -0.29671
+v -0.209996 0.761876 -0.268358
+v -0.199084 0.77337 -0.281405
+v -0.191438 0.774734 -0.312022
+v -0.207814 0.759869 -0.30299
+v -0.201734 0.767127 -0.28955
+v -0.203233 0.797472 -0.273932
+v -0.192387 0.789092 -0.301842
+v -0.202175 0.775482 -0.251997
+v -0.199221 0.78193 -0.2764
+v -0.165819 0.804418 -0.335288
+v -0.178221 0.788374 -0.325575
+v -0.18742 0.811743 -0.321532
+v -0.181068 0.796668 -0.322844
+v -0.180956 0.7796 -0.34144
+v -0.161743 0.793141 -0.342758
+v -0.178357 0.783728 -0.331262
+v -0.192516 0.777938 -0.227759
+v -0.168503 0.781447 -0.210541
+v -0.151415 0.801648 -0.215482
+v -0.181336 0.804317 -0.228606
+v -0.174268 0.789156 -0.220565
+v -0.153638 0.772539 -0.191376
+v -0.137434 0.789176 -0.20042
+v -0.186175 0.766882 -0.204739
+v -0.162292 0.776737 -0.200861
+v -0.0963023 0.812563 -0.21585
+v -0.12229 0.805309 -0.211386
+v -0.103071 0.800257 -0.198013
+v -0.114773 0.801816 -0.205592
+v -0.135327 0.828572 -0.217894
+v -0.108345 0.822919 -0.221075
+v -0.128262 0.813355 -0.215464
+v -0.143329 0.804627 -0.337252
+v -0.119159 0.809804 -0.331351
+v -0.13845 0.801677 -0.350685
+v -0.140844 0.802002 -0.341998
+v -0.153713 0.826966 -0.334024
+v -0.127696 0.819206 -0.325202
+v -0.147191 0.8121 -0.335056
+v -0.0961567 0.829666 -0.288835
+v -0.10501 0.817882 -0.310157
+v -0.116614 0.840248 -0.308051
+v -0.109848 0.825231 -0.308777
+v -0.0934554 0.817549 -0.318853
+v -0.086003 0.820458 -0.291304
+v -0.100185 0.816042 -0.313185
+v -0.084054 0.824638 -0.268864
+v -0.0780919 0.822576 -0.248928
+v -0.0710179 0.824335 -0.270682
+v -0.0786961 0.822795 -0.269513
+v -0.0959818 0.847028 -0.267898
+v -0.0888081 0.831892 -0.249223
+v -0.0891348 0.832003 -0.268429
+v -0.0899479 0.821575 -0.232115
+v -0.102029 0.843892 -0.232989
+v -0.095203 0.828981 -0.232883
+v -0.0751675 0.819879 -0.228529
+v -0.084018 0.819324 -0.230668
+v -0.231902 0.754223 -0.26947
+v -0.226075 0.759994 -0.240858
+v -0.208076 0.764415 -0.241789
+v -0.216972 0.759952 -0.252956
+v -0.265103 0.760852 -0.220189
+v -0.223444 0.760451 -0.217689
+v -0.264144 0.760816 -0.243923
+v -0.240714 0.760641 -0.229576
+v -0.20189 0.765574 -0.22074
+v -0.215759 0.757673 -0.200462
+v -0.205912 0.761992 -0.209879
+v -0.201854 0.770051 -0.234112
+v -0.234872 0.742799 -0.306286
+v -0.277858 0.727498 -0.319523
+v -0.278995 0.751255 -0.279238
+v -0.256224 0.744786 -0.293184
+v -0.317986 0.708047 -0.333577
+v -0.258236 0.693552 -0.356883
+v -0.284726 0.698216 -0.343913
+v -0.37536 0.738148 -0.30116
+v -0.358205 0.712515 -0.324745
+v -0.402685 0.758643 -0.236201
+v -0.343965 0.756617 -0.269721
+v -0.390758 0.753705 -0.27032
+v -0.311167 0.763685 -0.236173
+v -0.300161 0.759231 -0.259113
+v -0.277376 0.753251 -0.197126
+v -0.248651 0.737773 -0.18066
+v -0.183529 0.754714 -0.188935
+v -0.23327 0.750984 -0.191422
+v -0.352617 0.756796 -0.208565
+v -0.303456 0.759588 -0.213776
+v -0.409003 0.736167 -0.174314
+v -0.409429 0.752424 -0.202781
+v -0.329845 0.712932 -0.159268
+v -0.39942 0.710994 -0.155057
+v -0.178779 0.718135 -0.16643
+v -0.252112 0.713887 -0.166078
+v -0.106741 0.782082 -0.178969
+v -0.0602661 0.797425 -0.177797
+v -0.0705006 0.811753 -0.205807
+v -0.0845451 0.798015 -0.188581
+v -0.128352 0.75368 -0.16956
+v -0.140544 0.766364 -0.181936
+v -0.0771688 0.744134 -0.128582
+v -0.122404 0.729951 -0.151266
+v -0.0359638 0.781745 -0.13792
+v -0.0372542 0.755486 -0.106376
+v -0.0258744 0.814456 -0.19275
+v -0.0313039 0.800824 -0.16616
+v -0.0438017 0.818098 -0.377412
+v -0.0647564 0.824008 -0.337021
+v -0.0269807 0.832763 -0.306956
+v -0.0352238 0.828719 -0.341387
+v -0.0968568 0.789376 -0.411772
+v -0.0494105 0.797681 -0.413491
+v -0.176513 0.760515 -0.410655
+v -0.134148 0.794627 -0.38492
+v -0.140375 0.776566 -0.412207
+v -0.105446 0.812324 -0.352131
+v -0.163948 0.787613 -0.366372
+v -0.134862 0.801108 -0.364702
+v -0.0673729 0.8251 -0.299446
+v -0.0829065 0.820245 -0.328154
+v -0.0209585 0.83068 -0.246717
+v -0.0431316 0.822381 -0.222498
+v -0.0222752 0.824379 -0.219341
+v -0.0427143 0.830406 -0.274631
+v -0.0223763 0.833445 -0.275661
+v -0.0571055 0.825864 -0.248034
+v -0.0589761 0.827121 -0.27268
+v -0.0611506 0.820889 -0.225684
+v -0.220893 0.723503 -0.390265
+v -0.192333 0.761236 -0.378584
+v -0.20182 0.742487 -0.402976
+v -0.229147 0.73046 -0.348619
+v -0.238328 0.704586 -0.373617
+v -0.202503 0.759013 -0.342238
+v -0.217964 0.748268 -0.323878
+v -0.185497 0.772855 -0.357643
+v -0.208154 0.830042 -0.298948
+v -0.20855 0.866089 -0.317157
+v -0.182638 0.844163 -0.332751
+v -0.197796 0.836731 -0.320106
+v -0.207473 0.823979 -0.249259
+v -0.22169 0.856724 -0.273536
+v -0.212209 0.824662 -0.273398
+v -0.225435 0.891324 -0.293612
+v -0.221713 0.885753 -0.253374
+v -0.227222 0.886884 -0.273083
+v -0.202088 0.898609 -0.32219
+v -0.216036 0.894273 -0.311279
+v -0.144889 0.85895 -0.323167
+v -0.140188 0.893495 -0.304854
+v -0.113654 0.869597 -0.287244
+v -0.127225 0.865094 -0.306986
+v -0.17602 0.88052 -0.329499
+v -0.164181 0.851772 -0.332764
+v -0.170407 0.915574 -0.314537
+v -0.186655 0.907214 -0.322451
+v -0.141278 0.921251 -0.283964
+v -0.154011 0.919177 -0.300925
+v -0.106278 0.87162 -0.248059
+v -0.126235 0.896792 -0.232757
+v -0.125921 0.862724 -0.222037
+v -0.112672 0.868658 -0.232446
+v -0.120264 0.900007 -0.266157
+v -0.106638 0.871852 -0.266963
+v -0.136706 0.926532 -0.249134
+v -0.136018 0.925025 -0.266007
+v -0.152825 0.914652 -0.226568
+v -0.141818 0.921807 -0.235425
+v -0.169297 0.842653 -0.222278
+v -0.201725 0.863227 -0.234578
+v -0.191458 0.831381 -0.232334
+v -0.158724 0.882351 -0.220155
+v -0.146124 0.853579 -0.218747
+v -0.191735 0.902282 -0.228209
+v -0.171199 0.908866 -0.224253
+v -0.209229 0.892731 -0.238166
+v -0.228365 -0.446181 0.420977
+v -0.20511 -0.461837 0.475502
+v -0.183641 -0.455489 0.534379
+v -0.21326 -0.450303 0.478539
+v -0.176881 -0.48852 0.469991
+v -0.17086 -0.474631 0.521789
+v -0.206663 -0.472505 0.418626
+v -0.194975 -0.474631 0.472428
+v -0.109552 -0.472057 0.595485
+v -0.142559 -0.466669 0.57469
+v -0.132033 -0.481167 0.556781
+v -0.140849 -0.473458 0.563316
+v -0.102981 -0.465723 0.632974
+v -0.143787 -0.462243 0.587914
+v -0.0755892 -0.465387 0.639018
+v -0.0439995 -0.453929 0.682063
+v -0.0705061 -0.461083 0.663389
+v -0.0917758 -0.479034 0.591505
+v -0.0549351 -0.471091 0.629642
+v -0.0815781 -0.471375 0.616771
+v -0.0238268 -0.46053 0.664171
+v -0.0329053 -0.483094 0.604968
+v -0.0276324 -0.470883 0.636565
+v -0.0210984 -0.44987 0.691901
+v -0.0317646 -0.445352 0.134276
+v -0.065741 -0.440111 0.119726
+v -0.0323291 -0.440606 0.100743
+v -0.034368 -0.461296 0.211229
+v -0.0636123 -0.450028 0.182161
+v -0.0320458 -0.45108 0.17059
+v -0.124102 -0.450719 0.228684
+v -0.0965198 -0.444834 0.175662
+v -0.0981278 -0.457279 0.230677
+v -0.0942201 -0.449501 0.201118
+v -0.138015 -0.443895 0.192281
+v -0.101319 -0.442167 0.150389
+v -0.163536 -0.447685 0.255959
+v -0.205661 -0.440923 0.30331
+v -0.174785 -0.442414 0.24495
+v -0.138385 -0.462583 0.271465
+v -0.179537 -0.460826 0.31166
+v -0.153491 -0.454897 0.266081
+v -0.213095 -0.454425 0.36305
+v -0.178344 -0.481123 0.364052
+v -0.199389 -0.4673 0.363772
+v -0.224677 -0.442616 0.362277
+v -0.0774159 -0.49456 0.56248
+v -0.0472884 -0.515301 0.519274
+v -0.0400356 -0.499329 0.565268
+v -0.12906 -0.497175 0.517019
+v -0.109485 -0.488354 0.558077
+v -0.151776 -0.503865 0.416665
+v -0.102374 -0.516827 0.468605
+v -0.144853 -0.503338 0.468859
+v -0.0552219 -0.529321 0.415644
+v -0.0529288 -0.526727 0.468796
+v -0.101727 -0.509066 0.362546
+v -0.046641 -0.500432 0.308431
+v -0.0524328 -0.518796 0.361628
+v -0.12954 -0.482953 0.312862
+v -0.144743 -0.495777 0.363499
+v -0.0778956 -0.474931 0.261875
+v -0.111951 -0.469275 0.268257
+v -0.0399262 -0.479506 0.257737
+v -0.164631 -0.0504489 0.951176
+v -0.139042 -0.0451633 0.963659
+v -0.137897 -0.0298265 0.961061
+v -0.16267 -0.0297463 0.947597
+v -0.150007 -0.038161 0.957034
+v -0.109851 -0.0516794 0.976283
+v -0.122462 -0.037914 0.969963
+v -0.133714 -0.0642499 0.965416
+v -0.125245 -0.0487781 0.969619
+v -0.130968 -0.0135459 0.956851
+v -0.109331 -0.0304408 0.974293
+v -0.124172 -0.0273895 0.966259
+v -0.18873 -0.0434748 0.935764
+v -0.213918 -0.078022 0.922841
+v -0.172782 -0.0812671 0.947651
+v -0.185903 -0.064746 0.940486
+v -0.24743 -0.125194 0.898745
+v -0.247517 -0.0516572 0.891815
+v -0.248772 -0.0872467 0.896118
+v -0.209867 -0.161573 0.925244
+v -0.2493 -0.167014 0.898651
+v -0.133001 -0.161124 0.956956
+v -0.163882 -0.120989 0.949271
+v -0.170754 -0.160591 0.943861
+v -0.119758 -0.0911109 0.968067
+v -0.149065 -0.0885975 0.957545
+v -0.165953 -0.00373407 0.934179
+v -0.206641 -0.0168625 0.913183
+v -0.181563 -0.0218941 0.9332
+v -0.116698 0.0146029 0.948102
+v -0.155037 0.0342159 0.918365
+v -0.143447 0.00727528 0.940085
+v -0.188508 0.055577 0.881861
+v -0.128713 0.0776757 0.9026
+v -0.159167 0.0664178 0.894094
+v -0.225597 0.0165132 0.878354
+v -0.216542 0.0481267 0.863247
+v -0.237857 -0.0169093 0.886885
+v -0.0314129 0.062618 0.944549
+v -0.0591053 0.0273947 0.96148
+v -0.0272441 0.00174637 0.985368
+v -0.0296124 0.0304081 0.965777
+v -0.0652762 0.0925539 0.91594
+v -0.032753 0.0962353 0.919852
+v -0.0939443 0.0532189 0.93429
+v -0.0973395 0.0863776 0.910036
+v -0.0802969 -0.0053962 0.974961
+v -0.0883591 0.0217178 0.955411
+v -0.0313918 -0.121887 0.983992
+v -0.0640997 -0.158632 0.974036
+v -0.0317497 -0.157085 0.978196
+v -0.0273784 -0.0760818 0.993721
+v -0.05977 -0.0942344 0.98419
+v -0.0298425 -0.0936976 0.988793
+v -0.0956523 -0.123707 0.972085
+v -0.0810019 -0.0733797 0.982855
+v -0.0898671 -0.0931811 0.977114
+v -0.0976501 -0.160225 0.966985
+v -0.0864529 -0.0640166 0.98579
+v -0.0789269 -0.06284 0.994959
+v -0.0955999 -0.0476973 0.985969
+v -0.0940951 -0.0565412 0.98502
+v -0.0525276 -0.0775374 1.01483
+v -0.0666347 -0.0644079 1.00864
+v -0.0598989 -0.0753391 1.00401
+v -0.0652946 -0.0700523 1.00523
+v -0.0598411 -0.0536198 1.01768
+v -0.0788425 -0.0488271 1.00064
+v -0.0525009 -0.0662129 1.0231
+v -0.0652608 -0.0582549 1.01251
+v -0.0861068 -0.0306433 0.989153
+v -0.0938769 -0.0385712 0.986828
+v -0.0582598 -0.0403976 1.0119
+v -0.0476184 -0.0223971 1.0006
+v -0.0694706 -0.025394 0.994502
+v -0.0426567 -0.0627588 1.03251
+v -0.0344846 -0.0534302 1.03232
+v -0.0490439 -0.0525287 1.02501
+v -0.0209047 -0.039677 1.02317
+v -0.0152462 -0.0676801 1.04602
+v -0.0177789 -0.0547726 1.03745
+v -0.0242006 -0.0212264 1.00515
+v -0.0209476 -0.0779737 1.00716
+v -0.0477856 -0.0718253 0.995571
+v -0.0242757 -0.0726037 0.999895
+v -0.0152667 -0.0940994 1.02438
+v -0.0345351 -0.0845283 1.01121
+v -0.0178069 -0.0869021 1.01538
+v -0.0583871 -0.073576 0.998233
+v -0.0427012 -0.0854758 1.01505
+v -0.0491057 -0.0804193 1.00671
+v -0.0697639 -0.0690371 0.990088
+v -0.0394909 -0.0796254 1.02906
+v -0.0271444 -0.0763309 1.04139
+v -0.0399318 -0.0720589 1.03309
+v -0.0271637 -0.0905263 1.02914
+v -0.0399551 -0.0846544 1.02277
+v -0.0132272 -0.0889316 1.04223
+v -0.01374 -0.0942759 1.03402
+v -0.0137291 -0.0795664 1.04692
+vt 0.800375 0.667457
+vt 0.789584 0.668215
+vt 0.799923 0.663933
+vt 0.789057 0.664897
+vt 0.811842 0.670848
+vt 0.80103 0.671718
+vt 0.811217 0.666381
+vt 0.810346 0.655189
+vt 0.810548 0.658683
+vt 0.799442 0.658809
+vt 0.799641 0.661178
+vt 0.788653 0.662549
+vt 0.821717 0.660492
+vt 0.821466 0.655998
+vt 0.832622 0.658487
+vt 0.832389 0.653218
+vt 0.821251 0.651569
+vt 0.822539 0.669852
+vt 0.822041 0.665116
+vt 0.832855 0.663756
+vt 0.768389 0.668485
+vt 0.778915 0.668524
+vt 0.769427 0.673433
+vt 0.779732 0.673035
+vt 0.778165 0.664791
+vt 0.74577 0.661865
+vt 0.756532 0.663039
+vt 0.747747 0.667793
+vt 0.758027 0.6682
+vt 0.759367 0.673605
+vt 0.753496 0.653114
+vt 0.765177 0.656219
+vt 0.755019 0.658037
+vt 0.76625 0.660042
+vt 0.743793 0.655937
+vt 0.777492 0.661858
+vt 0.776857 0.659324
+vt 0.860743 0.66105
+vt 0.85437 0.658994
+vt 0.861885 0.655612
+vt 0.85471 0.654338
+vt 0.858253 0.665619
+vt 0.86356 0.668573
+vt 0.867109 0.663417
+vt 0.870052 0.650753
+vt 0.862482 0.650015
+vt 0.870421 0.644717
+vt 0.862589 0.644612
+vt 0.85505 0.649682
+vt 0.883125 0.662497
+vt 0.876218 0.659429
+vt 0.885992 0.655087
+vt 0.877897 0.652301
+vt 0.878895 0.645534
+vt 0.869196 0.672347
+vt 0.87346 0.666407
+vt 0.879453 0.669777
+vt 0.889305 0.677736
+vt 0.893116 0.683944
+vt 0.881403 0.682863
+vt 0.884277 0.688027
+vt 0.896144 0.692562
+vt 0.90593 0.689794
+vt 0.901798 0.679591
+vt 0.889995 0.666526
+vt 0.884742 0.673283
+vt 0.878268 0.679148
+vt 0.902596 0.665971
+vt 0.89431 0.659538
+vt 0.908133 0.659549
+vt 0.89794 0.652608
+vt 0.916609 0.686107
+vt 0.910596 0.674701
+vt 0.918591 0.668435
+vt 0.713434 0.678889
+vt 0.718232 0.673865
+vt 0.719971 0.68226
+vt 0.723135 0.677582
+vt 0.720227 0.663887
+vt 0.722594 0.669482
+vt 0.714094 0.668877
+vt 0.690854 0.689174
+vt 0.699881 0.681004
+vt 0.701094 0.693026
+vt 0.707762 0.685198
+vt 0.716169 0.688193
+vt 0.697269 0.662673
+vt 0.702107 0.668949
+vt 0.686646 0.668347
+vt 0.692886 0.67536
+vt 0.6822 0.683174
+vt 0.710252 0.663404
+vt 0.717861 0.658291
+vt 0.706788 0.657767
+vt 0.858365 0.672357
+vt 0.853611 0.668611
+vt 0.843559 0.666338
+vt 0.848795 0.664994
+vt 0.847566 0.670615
+vt 0.855227 0.679121
+vt 0.862995 0.676364
+vt 0.858343 0.683022
+vt 0.867102 0.680206
+vt 0.834415 0.678265
+vt 0.843484 0.67676
+vt 0.836434 0.683129
+vt 0.846261 0.68128
+vt 0.848914 0.685681
+vt 0.840865 0.672225
+vt 0.838324 0.667682
+vt 0.832962 0.673545
+vt 0.812703 0.675707
+vt 0.801925 0.676679
+vt 0.791163 0.677558
+vt 0.792268 0.683394
+vt 0.803066 0.682261
+vt 0.82446 0.679693
+vt 0.823312 0.674685
+vt 0.815369 0.686723
+vt 0.826081 0.684957
+vt 0.817113 0.692704
+vt 0.827981 0.690382
+vt 0.79352 0.689825
+vt 0.804461 0.688386
+vt 0.806115 0.694921
+vt 0.798864 0.71184
+vt 0.810186 0.708981
+vt 0.801046 0.72017
+vt 0.812549 0.716812
+vt 0.823667 0.712859
+vt 0.826337 0.721153
+vt 0.81509 0.725328
+vt 0.819076 0.698906
+vt 0.808032 0.701735
+vt 0.796857 0.704106
+vt 0.829963 0.69587
+vt 0.840668 0.692798
+vt 0.832092 0.701818
+vt 0.842717 0.698135
+vt 0.834432 0.708622
+vt 0.837107 0.716573
+vt 0.844937 0.704332
+vt 0.867471 0.703655
+vt 0.86533 0.696176
+vt 0.877095 0.699305
+vt 0.875007 0.692109
+vt 0.872828 0.687102
+vt 0.853212 0.694487
+vt 0.855272 0.700217
+vt 0.857607 0.707609
+vt 0.851162 0.689863
+vt 0.861137 0.686828
+vt 0.870286 0.68346
+vt 0.737342 0.674399
+vt 0.736161 0.679693
+vt 0.731151 0.674738
+vt 0.729268 0.679242
+vt 0.727195 0.684244
+vt 0.743049 0.685744
+vt 0.743345 0.679784
+vt 0.751789 0.685777
+vt 0.751276 0.679628
+vt 0.743533 0.67406
+vt 0.73204 0.698591
+vt 0.733516 0.691513
+vt 0.741945 0.699272
+vt 0.742539 0.692176
+vt 0.751932 0.692359
+vt 0.72474 0.690242
+vt 0.722029 0.697328
+vt 0.716708 0.714828
+vt 0.715092 0.72481
+vt 0.702617 0.714132
+vt 0.700538 0.724407
+vt 0.728191 0.725265
+vt 0.727285 0.735883
+vt 0.713954 0.735509
+vt 0.730575 0.706575
+vt 0.719186 0.705595
+vt 0.70688 0.704276
+vt 0.740902 0.715992
+vt 0.741399 0.707225
+vt 0.75229 0.716217
+vt 0.752093 0.707508
+vt 0.740453 0.725533
+vt 0.740092 0.735897
+vt 0.752425 0.725529
+vt 0.789126 0.722623
+vt 0.790991 0.731868
+vt 0.776943 0.72426
+vt 0.778298 0.733889
+vt 0.787299 0.713921
+vt 0.76386 0.71596
+vt 0.764649 0.725167
+vt 0.765442 0.735096
+vt 0.773153 0.698992
+vt 0.774288 0.706826
+vt 0.762394 0.699466
+vt 0.763091 0.707391
+vt 0.785576 0.705767
+vt 0.784024 0.698116
+vt 0.781583 0.684283
+vt 0.780606 0.678305
+vt 0.77033 0.678903
+vt 0.772112 0.691699
+vt 0.782707 0.69092
+vt 0.761214 0.685501
+vt 0.761821 0.692189
+vt 0.760412 0.679339
+vt 0.899727 0.753469
+vt 0.893091 0.747999
+vt 0.905106 0.743418
+vt 0.897762 0.739947
+vt 0.887783 0.741206
+vt 0.880731 0.745814
+vt 0.886161 0.754214
+vt 0.904716 0.775474
+vt 0.898651 0.768079
+vt 0.914323 0.765617
+vt 0.907202 0.758849
+vt 0.913707 0.74723
+vt 0.877978 0.781668
+vt 0.872582 0.772759
+vt 0.888929 0.775502
+vt 0.883177 0.767576
+vt 0.894526 0.783756
+vt 0.872228 0.750111
+vt 0.877492 0.759308
+vt 0.867387 0.763696
+vt 0.843778 0.771729
+vt 0.83028 0.775596
+vt 0.839713 0.76143
+vt 0.826842 0.765165
+vt 0.83386 0.786091
+vt 0.815809 0.779609
+vt 0.818643 0.790465
+vt 0.860927 0.77752
+vt 0.856148 0.767799
+vt 0.851602 0.757722
+vt 0.852231 0.792074
+vt 0.865736 0.787127
+vt 0.856409 0.802551
+vt 0.870501 0.796949
+vt 0.837491 0.796705
+vt 0.821654 0.801351
+vt 0.841215 0.807639
+vt 0.724356 0.780932
+vt 0.721897 0.791331
+vt 0.712133 0.77893
+vt 0.708432 0.787628
+vt 0.688991 0.788336
+vt 0.696924 0.781352
+vt 0.7032 0.796344
+vt 0.734947 0.804179
+vt 0.736823 0.793158
+vt 0.751835 0.804371
+vt 0.752608 0.793134
+vt 0.738282 0.782037
+vt 0.732844 0.815397
+vt 0.730608 0.827229
+vt 0.715066 0.812163
+vt 0.711339 0.823597
+vt 0.751007 0.816099
+vt 0.681689 0.797702
+vt 0.697958 0.806263
+vt 0.692594 0.817566
+vt 0.802671 0.795049
+vt 0.800517 0.783977
+vt 0.798447 0.773092
+vt 0.783398 0.776846
+vt 0.784699 0.788076
+vt 0.788691 0.822864
+vt 0.787323 0.810943
+vt 0.807157 0.818048
+vt 0.804858 0.806345
+vt 0.768947 0.80275
+vt 0.769269 0.814405
+vt 0.769629 0.82649
+vt 0.768262 0.779889
+vt 0.768648 0.791284
+vt 0.829319 0.730383
+vt 0.817782 0.734631
+vt 0.807922 0.748352
+vt 0.80562 0.738467
+vt 0.820629 0.744508
+vt 0.843731 0.736281
+vt 0.840238 0.725959
+vt 0.854152 0.732184
+vt 0.850604 0.721518
+vt 0.847491 0.747042
+vt 0.836031 0.750919
+vt 0.858049 0.743181
+vt 0.823637 0.754744
+vt 0.810425 0.758547
+vt 0.739591 0.758816
+vt 0.739858 0.747133
+vt 0.753132 0.758368
+vt 0.752889 0.746823
+vt 0.726681 0.747058
+vt 0.739135 0.77052
+vt 0.725558 0.770057
+vt 0.753262 0.7701
+vt 0.69786 0.758579
+vt 0.712375 0.758296
+vt 0.700367 0.76912
+vt 0.712782 0.769064
+vt 0.712907 0.746891
+vt 0.698005 0.747078
+vt 0.929639 0.699595
+vt 0.928443 0.716654
+vt 0.918934 0.700571
+vt 0.918764 0.716584
+vt 0.908376 0.702434
+vt 0.917291 0.732639
+vt 0.908091 0.730817
+vt 0.926839 0.733706
+vt 0.899583 0.717133
+vt 0.899655 0.729345
+vt 0.890467 0.718513
+vt 0.891789 0.729325
+vt 0.898356 0.704249
+vt 0.888732 0.70646
+vt 0.766981 0.757058
+vt 0.767683 0.768445
+vt 0.782175 0.765786
+vt 0.779622 0.744124
+vt 0.766227 0.745834
+vt 0.794659 0.751851
+vt 0.792827 0.741649
+vt 0.796513 0.762357
+vt 0.870023 0.713177
+vt 0.860485 0.717218
+vt 0.863872 0.728122
+vt 0.881776 0.720957
+vt 0.879363 0.709514
+vt 0.876505 0.735646
+vt 0.884297 0.731857
+vt 0.867737 0.739401
+vt 0.942885 0.685053
+vt 0.953367 0.687709
+vt 0.940102 0.70044
+vt 0.950147 0.702062
+vt 0.938165 0.716742
+vt 0.968754 0.704629
+vt 0.964644 0.717556
+vt 0.959599 0.703413
+vt 0.956139 0.717201
+vt 0.963128 0.689798
+vt 0.953145 0.730828
+vt 0.951463 0.743182
+vt 0.945243 0.731521
+vt 0.944286 0.744982
+vt 0.960533 0.730482
+vt 0.936315 0.732906
+vt 0.935707 0.747908
+vt 0.168776 0.186925
+vt 0.153188 0.186399
+vt 0.169355 0.173108
+vt 0.15372 0.172537
+vt 0.137511 0.174005
+vt 0.154886 0.15836
+vt 0.139434 0.160894
+vt 0.135878 0.200202
+vt 0.153921 0.199631
+vt 0.13741 0.213278
+vt 0.156324 0.212569
+vt 0.169774 0.199674
+vt 0.115769 0.200908
+vt 0.0946273 0.201684
+vt 0.11798 0.188318
+vt 0.0992616 0.189776
+vt 0.115489 0.212947
+vt 0.120823 0.175844
+vt 0.123982 0.163427
+vt 0.103896 0.177868
+vt 0.197705 0.18287
+vt 0.184318 0.177386
+vt 0.204286 0.174321
+vt 0.191177 0.166862
+vt 0.182759 0.189357
+vt 0.218051 0.196712
+vt 0.206672 0.195261
+vt 0.218754 0.189245
+vt 0.208611 0.187062
+vt 0.213838 0.179979
+vt 0.195322 0.2024
+vt 0.206572 0.20383
+vt 0.197221 0.212029
+vt 0.207879 0.212331
+vt 0.217851 0.204542
+vt 0.183316 0.200806
+vt 0.185616 0.211745
+vt 0.200177 0.221191
+vt 0.189289 0.222187
+vt 0.176497 0.223558
+vt 0.183072 0.235218
+vt 0.194814 0.232099
+vt 0.222696 0.225794
+vt 0.213531 0.227462
+vt 0.220244 0.219432
+vt 0.210159 0.220329
+vt 0.219048 0.242324
+vt 0.210793 0.236729
+vt 0.224444 0.237674
+vt 0.218111 0.23337
+vt 0.225701 0.231231
+vt 0.192704 0.246651
+vt 0.20267 0.241446
+vt 0.21291 0.248905
+vt 0.147552 0.942756
+vt 0.13896 0.937482
+vt 0.15442 0.936036
+vt 0.145004 0.930904
+vt 0.131991 0.943023
+vt 0.122815 0.936756
+vt 0.130641 0.931218
+vt 0.148962 0.952945
+vt 0.155644 0.947301
+vt 0.157149 0.955791
+vt 0.164222 0.950166
+vt 0.162965 0.941085
+vt 0.125551 0.956525
+vt 0.133402 0.953212
+vt 0.134656 0.9604
+vt 0.141677 0.957166
+vt 0.14902 0.960085
+vt 0.115375 0.942445
+vt 0.124894 0.947972
+vt 0.116632 0.951525
+vt 0.871579 0.942636
+vt 0.878596 0.946725
+vt 0.86416 0.948207
+vt 0.872055 0.95214
+vt 0.886187 0.942108
+vt 0.885478 0.951547
+vt 0.893652 0.946846
+vt 0.872605 0.934139
+vt 0.865133 0.939046
+vt 0.865671 0.931509
+vt 0.858017 0.93669
+vt 0.857275 0.944342
+vt 0.895145 0.930139
+vt 0.887213 0.93361
+vt 0.887365 0.926738
+vt 0.880339 0.930202
+vt 0.873942 0.927331
+vt 0.901581 0.942011
+vt 0.893918 0.937827
+vt 0.902324 0.934359
+vt 0.937778 0.969427
+vt 0.924742 0.963135
+vt 0.963417 0.953814
+vt 0.94277 0.951002
+vt 0.929636 0.938971
+vt 0.950985 0.939207
+vt 0.924587 0.948175
+vt 0.897606 0.964725
+vt 0.904637 0.974329
+vt 0.880983 0.968761
+vt 0.883219 0.980673
+vt 0.91214 0.985041
+vt 0.902437 0.951808
+vt 0.891517 0.95734
+vt 0.87954 0.960235
+vt 0.911331 0.94532
+vt 0.915423 0.938551
+vt 0.837802 0.927994
+vt 0.819804 0.940196
+vt 0.826204 0.927352
+vt 0.800609 0.943494
+vt 0.811605 0.952025
+vt 0.831685 0.94681
+vt 0.836697 0.937574
+vt 0.857877 0.916732
+vt 0.87928 0.910354
+vt 0.863614 0.921492
+vt 0.880267 0.917735
+vt 0.851798 0.911211
+vt 0.857703 0.929735
+vt 0.868668 0.924726
+vt 0.880724 0.922242
+vt 0.845039 0.94326
+vt 0.848987 0.936302
+vt 0.924475 0.930625
+vt 0.941648 0.929031
+vt 0.96114 0.927417
+vt 0.933224 0.916635
+vt 0.922496 0.92029
+vt 0.903495 0.926861
+vt 0.911938 0.93218
+vt 0.897103 0.91889
+vt 0.892712 0.923251
+vt 0.905308 0.905852
+vt 0.901263 0.912802
+vt 0.820926 0.962167
+vt 0.836809 0.955124
+vt 0.840048 0.970839
+vt 0.802886 0.96989
+vt 0.830758 0.980145
+vt 0.856645 0.954682
+vt 0.848379 0.949442
+vt 0.864116 0.967327
+vt 0.867473 0.958814
+vt 0.86125 0.978259
+vt 0.85863 0.990399
+vt 0.191842 0.938528
+vt 0.18483 0.928669
+vt 0.214023 0.932819
+vt 0.202519 0.920832
+vt 0.190621 0.899112
+vt 0.222723 0.912315
+vt 0.180131 0.909732
+vt 0.161958 0.928403
+vt 0.17217 0.935147
+vt 0.177196 0.942628
+vt 0.133862 0.910183
+vt 0.152639 0.912726
+vt 0.135915 0.920575
+vt 0.149326 0.922843
+vt 0.155653 0.899975
+vt 0.158518 0.885909
+vt 0.131 0.896051
+vt 0.0727151 0.905945
+vt 0.0872616 0.914579
+vt 0.045865 0.922565
+vt 0.0688679 0.927864
+vt 0.0891699 0.932953
+vt 0.0611106 0.940896
+vt 0.0850445 0.943365
+vt 0.116168 0.914322
+vt 0.108088 0.902447
+vt 0.0995652 0.889326
+vt 0.112481 0.9301
+vt 0.123364 0.923705
+vt 0.10407 0.937626
+vt 0.101008 0.945485
+vt 0.141749 0.967835
+vt 0.159735 0.964585
+vt 0.141615 0.973986
+vt 0.164947 0.968203
+vt 0.198452 0.956803
+vt 0.170264 0.972446
+vt 0.186612 0.957297
+vt 0.165935 0.957311
+vt 0.154732 0.962219
+vt 0.141924 0.964387
+vt 0.187555 0.948567
+vt 0.17427 0.950399
+vt 0.205846 0.945238
+vt 0.226639 0.94116
+vt 0.0918948 0.952851
+vt 0.072194 0.95227
+vt 0.0497805 0.951411
+vt 0.080546 0.963637
+vt 0.0937428 0.962144
+vt 0.116457 0.959008
+vt 0.10617 0.952878
+vt 0.123264 0.966181
+vt 0.12877 0.963081
+vt 0.111312 0.975863
+vt 0.117382 0.970674
+vt 0.168188 0.257701
+vt 0.179853 0.251475
+vt 0.181861 0.270696
+vt 0.19145 0.264485
+vt 0.168322 0.238524
+vt 0.157989 0.278114
+vt 0.139623 0.26474
+vt 0.161775 0.267111
+vt 0.143271 0.254169
+vt 0.176773 0.279775
+vt 0.122129 0.24267
+vt 0.0996677 0.231893
+vt 0.121641 0.233595
+vt 0.0964428 0.225792
+vt 0.121258 0.251367
+vt 0.141637 0.226246
+vt 0.118437 0.223765
+vt 0.0932179 0.219692
+vt 0.160802 0.225544
+vt 0.0419152 0.53618
+vt 0.0472563 0.523503
+vt 0.0332681 0.539408
+vt 0.038828 0.522754
+vt 0.0547796 0.5128
+vt 0.0629257 0.517019
+vt 0.0550589 0.52441
+vt 0.0478604 0.554178
+vt 0.0476885 0.543737
+vt 0.0376346 0.561691
+vt 0.0380519 0.549348
+vt 0.0277081 0.556063
+vt 0.0662616 0.533766
+vt 0.0581 0.532485
+vt 0.0632249 0.540131
+vt 0.0559107 0.540335
+vt 0.055828 0.548702
+vt 0.0616102 0.525629
+vt 0.0679204 0.520553
+vt 0.0687181 0.527585
+vt 0.0509533 0.580146
+vt 0.0426311 0.57189
+vt 0.0442882 0.588789
+vt 0.0332182 0.580753
+vt 0.0513392 0.563763
+vt 0.0757862 0.577556
+vt 0.0655669 0.57722
+vt 0.0718228 0.588438
+vt 0.060513 0.586655
+vt 0.0553583 0.596825
+vt 0.0637107 0.564356
+vt 0.0704192 0.569255
+vt 0.070071 0.55722
+vt 0.0760258 0.562179
+vt 0.0789464 0.569999
+vt 0.0586375 0.557107
+vt 0.0648092 0.550898
+vt 0.0904211 0.548013
+vt 0.0833422 0.555408
+vt 0.0957205 0.55254
+vt 0.0881221 0.560358
+vt 0.0767944 0.549978
+vt 0.090315 0.526886
+vt 0.0877993 0.53464
+vt 0.0982366 0.53011
+vt 0.0953147 0.539062
+vt 0.100791 0.542278
+vt 0.0746039 0.537227
+vt 0.0781908 0.530701
+vt 0.0798442 0.524364
+vt 0.0701373 0.544116
+vt 0.925171 0.815649
+vt 0.920243 0.805051
+vt 0.934775 0.80669
+vt 0.929739 0.796503
+vt 0.93322 0.778201
+vt 0.938087 0.788562
+vt 0.924761 0.786096
+vt 0.904862 0.803051
+vt 0.910168 0.813683
+vt 0.893079 0.810992
+vt 0.898686 0.821929
+vt 0.915568 0.824608
+vt 0.910263 0.784583
+vt 0.899742 0.793007
+vt 0.888113 0.80057
+vt 0.919899 0.77525
+vt 0.928896 0.766724
+vt 0.815787 0.855921
+vt 0.819336 0.868912
+vt 0.793818 0.862058
+vt 0.795798 0.875795
+vt 0.831679 0.83698
+vt 0.836161 0.849417
+vt 0.812478 0.843031
+vt 0.770591 0.852857
+vt 0.771369 0.867508
+vt 0.748599 0.854921
+vt 0.748212 0.870509
+vt 0.772261 0.882678
+vt 0.790259 0.835472
+vt 0.77004 0.839246
+vt 0.749297 0.841006
+vt 0.80965 0.830342
+vt 0.827993 0.824626
+vt 0.724119 0.869298
+vt 0.722193 0.885121
+vt 0.69961 0.865356
+vt 0.695663 0.88068
+vt 0.726136 0.854086
+vt 0.681189 0.844692
+vt 0.675206 0.860163
+vt 0.659402 0.836491
+vt 0.651382 0.852648
+vt 0.669133 0.87624
+vt 0.686995 0.830434
+vt 0.707465 0.836555
+vt 0.667117 0.821916
+vt 0.728335 0.840094
+vt 0.844791 0.630193
+vt 0.844724 0.634228
+vt 0.843982 0.630547
+vt 0.842478 0.634715
+vt 0.837664 0.639425
+vt 0.840419 0.635163
+vt 0.840973 0.638882
+vt 0.848717 0.637567
+vt 0.847346 0.633664
+vt 0.854083 0.636646
+vt 0.851791 0.632782
+vt 0.845599 0.629839
+vt 0.849337 0.641623
+vt 0.849581 0.645756
+vt 0.844215 0.642375
+vt 0.843773 0.646487
+vt 0.855223 0.640762
+vt 0.839469 0.64305
+vt 0.83491 0.643687
+vt 0.837964 0.647218
+vt 0.725656 0.643083
+vt 0.719643 0.64232
+vt 0.724939 0.640251
+vt 0.71996 0.638782
+vt 0.711679 0.641543
+vt 0.711659 0.645946
+vt 0.719749 0.645744
+vt 0.738356 0.646598
+vt 0.732577 0.6462
+vt 0.736627 0.644893
+vt 0.731247 0.643959
+vt 0.729918 0.641719
+vt 0.727515 0.648634
+vt 0.733906 0.64844
+vt 0.728655 0.651352
+vt 0.735235 0.65068
+vt 0.740086 0.648303
+vt 0.712958 0.649959
+vt 0.7207 0.648941
+vt 0.722074 0.652024
+vt 0.884966 0.82932
+vt 0.890192 0.840682
+vt 0.86976 0.836188
+vt 0.87442 0.847798
+vt 0.879899 0.818138
+vt 0.849222 0.830896
+vt 0.853822 0.842863
+vt 0.858647 0.854913
+vt 0.860816 0.813432
+vt 0.845073 0.819095
+vt 0.87515 0.807317
+vt 0.953522 0.784398
+vt 0.950371 0.775653
+vt 0.958094 0.777732
+vt 0.955796 0.769832
+vt 0.959544 0.757237
+vt 0.961105 0.764151
+vt 0.953683 0.761713
+vt 0.940794 0.771842
+vt 0.944716 0.781755
+vt 0.94895 0.791065
+vt 0.945308 0.756461
+vt 0.937496 0.760723
+vt 0.957984 0.750323
+vt 0.951941 0.753153
+vt 0.237985 0.738297
+vt 0.268705 0.737229
+vt 0.241633 0.762787
+vt 0.268705 0.761697
+vt 0.200475 0.715676
+vt 0.234693 0.711537
+vt 0.207029 0.741466
+vt 0.222333 0.788147
+vt 0.245519 0.78554
+vt 0.230341 0.810102
+vt 0.249523 0.807426
+vt 0.268705 0.784364
+vt 0.159946 0.773803
+vt 0.187254 0.769363
+vt 0.175961 0.794629
+vt 0.199147 0.791261
+vt 0.211159 0.812779
+vt 0.165843 0.72393
+vt 0.175598 0.746705
+vt 0.14393 0.752977
+vt 0.253345 0.280308
+vt 0.268706 0.280765
+vt 0.253464 0.295744
+vt 0.268576 0.296249
+vt 0.238317 0.265747
+vt 0.253558 0.266767
+vt 0.238168 0.279086
+vt 0.239577 0.31028
+vt 0.253859 0.311911
+vt 0.240382 0.325349
+vt 0.254334 0.327593
+vt 0.268615 0.312525
+vt 0.21323 0.291052
+vt 0.225134 0.292637
+vt 0.214544 0.305067
+vt 0.22632 0.307825
+vt 0.227134 0.322083
+vt 0.223441 0.277203
+vt 0.223004 0.263413
+vt 0.210121 0.276206
+vt 0.268934 0.241592
+vt 0.268768 0.248379
+vt 0.255927 0.241503
+vt 0.254998 0.248112
+vt 0.243677 0.241038
+vt 0.256594 0.23557
+vt 0.244966 0.235422
+vt 0.239676 0.25526
+vt 0.254158 0.256284
+vt 0.268799 0.256538
+vt 0.225586 0.253155
+vt 0.229504 0.24562
+vt 0.233074 0.239996
+vt 0.235164 0.235206
+vt 0.672084 0.639787
+vt 0.66845 0.628475
+vt 0.686608 0.639919
+vt 0.68396 0.631421
+vt 0.660815 0.654442
+vt 0.655577 0.64149
+vt 0.676137 0.650509
+vt 0.700237 0.640623
+vt 0.701506 0.646666
+vt 0.69947 0.634368
+vt 0.693146 0.655714
+vt 0.703781 0.652284
+vt 0.681027 0.660046
+vt 0.66725 0.665722
+vt 0.869213 0.629853
+vt 0.859507 0.631337
+vt 0.868187 0.625508
+vt 0.857298 0.627496
+vt 0.861301 0.635363
+vt 0.891113 0.635857
+vt 0.879896 0.634217
+vt 0.89156 0.629458
+vt 0.879626 0.628853
+vt 0.879077 0.62352
+vt 0.870406 0.639191
+vt 0.879607 0.639641
+vt 0.889853 0.641715
+vt 0.862262 0.639758
+vt 0.92121 0.637592
+vt 0.905828 0.63279
+vt 0.925118 0.630408
+vt 0.907542 0.62597
+vt 0.903798 0.639508
+vt 0.942567 0.656273
+vt 0.930472 0.650822
+vt 0.949711 0.646823
+vt 0.936482 0.642606
+vt 0.942694 0.634846
+vt 0.912967 0.652217
+vt 0.924862 0.659954
+vt 0.936428 0.667127
+vt 0.901136 0.646022
+vt 0.966001 0.650564
+vt 0.958961 0.649232
+vt 0.970644 0.639264
+vt 0.965457 0.639274
+vt 0.952791 0.65981
+vt 0.969418 0.663982
+vt 0.972601 0.651548
+vt 0.976942 0.665473
+vt 0.97898 0.652359
+vt 0.975831 0.639254
+vt 0.957281 0.67448
+vt 0.966331 0.676693
+vt 0.974903 0.678588
+vt 0.947274 0.67163
+vt 0.269766 0.158525
+vt 0.250658 0.159766
+vt 0.270029 0.149052
+vt 0.248065 0.150831
+vt 0.220307 0.146351
+vt 0.245291 0.141613
+vt 0.226699 0.154829
+vt 0.24059 0.178843
+vt 0.237237 0.170956
+vt 0.254532 0.176342
+vt 0.25289 0.168134
+vt 0.269586 0.166899
+vt 0.216438 0.167843
+vt 0.223799 0.174757
+vt 0.228354 0.181409
+vt 0.20652 0.159758
+vt 0.195322 0.151089
+vt 0.255674 0.193252
+vt 0.255353 0.184799
+vt 0.26904 0.192481
+vt 0.269203 0.183716
+vt 0.242017 0.186776
+vt 0.242969 0.210229
+vt 0.242663 0.202753
+vt 0.255922 0.209205
+vt 0.255817 0.201447
+vt 0.269103 0.200716
+vt 0.229862 0.196333
+vt 0.229944 0.203984
+vt 0.230377 0.211402
+vt 0.229853 0.188704
+vt 0.256133 0.216346
+vt 0.269011 0.215922
+vt 0.256393 0.223033
+vt 0.26887 0.222737
+vt 0.243486 0.217235
+vt 0.244951 0.229727
+vt 0.256648 0.229427
+vt 0.268973 0.2293
+vt 0.232926 0.224635
+vt 0.234642 0.230172
+vt 0.231438 0.218332
+vt 0.231875 0.681974
+vt 0.195405 0.687835
+vt 0.268705 0.681041
+vt 0.268707 0.649795
+vt 0.231298 0.650222
+vt 0.159651 0.701684
+vt 0.124255 0.719528
+vt 0.152896 0.681504
+vt 0.120595 0.706905
+vt 0.162996 0.634435
+vt 0.141452 0.664926
+vt 0.142052 0.61864
+vt 0.127664 0.650149
+vt 0.116935 0.694282
+vt 0.211524 0.580428
+vt 0.203702 0.612749
+vt 0.187831 0.570739
+vt 0.178593 0.601672
+vt 0.15644 0.58713
+vt 0.268705 0.617234
+vt 0.234725 0.616894
+vt 0.239135 0.585332
+vt 0.199167 0.27764
+vt 0.203439 0.290476
+vt 0.191338 0.282164
+vt 0.195938 0.298294
+vt 0.204702 0.30253
+vt 0.194745 0.309006
+vt 0.204397 0.314025
+vt 0.187394 0.290439
+vt 0.185378 0.30188
+vt 0.158413 0.418134
+vt 0.159054 0.422514
+vt 0.151701 0.418126
+vt 0.153357 0.422511
+vt 0.148767 0.427196
+vt 0.147324 0.422254
+vt 0.154331 0.42702
+vt 0.164378 0.426932
+vt 0.164461 0.422716
+vt 0.169254 0.42687
+vt 0.169733 0.422548
+vt 0.164807 0.418553
+vt 0.159617 0.435925
+vt 0.159655 0.431437
+vt 0.163693 0.43525
+vt 0.1639 0.431123
+vt 0.16807 0.431122
+vt 0.15511 0.43158
+vt 0.150273 0.432153
+vt 0.155198 0.436148
+vt 0.137897 0.391939
+vt 0.139469 0.389504
+vt 0.17063 0.392906
+vt 0.177579 0.390684
+vt 0.108525 0.396264
+vt 0.10962 0.392167
+vt 0.136029 0.394644
+vt 0.189135 0.398093
+vt 0.201386 0.395105
+vt 0.205359 0.404085
+vt 0.221392 0.400547
+vt 0.215689 0.391863
+vt 0.159402 0.397834
+vt 0.180988 0.400577
+vt 0.156251 0.401361
+vt 0.175531 0.40322
+vt 0.1956 0.40625
+vt 0.133566 0.39789
+vt 0.106177 0.400376
+vt 0.13333 0.401583
+vt 0.105778 0.419582
+vt 0.112909 0.431692
+vt 0.0857539 0.42372
+vt 0.0978159 0.435407
+vt 0.0792543 0.412786
+vt 0.0493589 0.420043
+vt 0.0604704 0.429958
+vt 0.113824 0.445537
+vt 0.130697 0.452864
+vt 0.104492 0.449389
+vt 0.127195 0.455721
+vt 0.123421 0.442151
+vt 0.0620736 0.445754
+vt 0.0967788 0.453971
+vt 0.042503 0.451596
+vt 0.0898755 0.458919
+vt 0.12693 0.459685
+vt 0.0109524 0.427572
+vt 0.0292777 0.436932
+vt -0.00486957 0.444274
+vt 0.218498 0.42425
+vt 0.211205 0.424626
+vt 0.236149 0.408646
+vt 0.221871 0.411246
+vt 0.209393 0.413479
+vt 0.194343 0.436667
+vt 0.197771 0.438115
+vt 0.182271 0.446693
+vt 0.18375 0.449328
+vt 0.200848 0.439853
+vt 0.190212 0.435797
+vt 0.185733 0.435099
+vt 0.197971 0.425316
+vt 0.191608 0.425692
+vt 0.179819 0.445093
+vt 0.200514 0.414982
+vt 0.193566 0.416303
+vt 0.160223 0.460849
+vt 0.159192 0.459271
+vt 0.17171 0.458153
+vt 0.171322 0.455881
+vt 0.17064 0.453743
+vt 0.14455 0.458507
+vt 0.146064 0.460995
+vt 0.148735 0.463545
+vt 0.158128 0.455703
+vt 0.14535 0.456142
+vt 0.158194 0.452773
+vt 0.147456 0.453335
+vt 0.169371 0.451874
+vt 0.167924 0.449711
+vt 0.0851413 0.39891
+vt 0.0922294 0.393554
+vt 0.0991561 0.388722
+vt 0.100257 0.388524
+vt 0.0806903 0.395723
+vt 0.0520286 0.41091
+vt 0.0799332 0.404918
+vt 0.0223556 0.413036
+vt 0.0433475 0.407848
+vt -0.0149432 0.424994
+vt 0.019659 0.417742
+vt 0.0699681 0.398298
+vt 0.0980548 0.388921
+vt 0.0596545 0.401078
+vt 0.112325 0.545469
+vt 0.112227 0.562147
+vt 0.121292 0.533311
+vt 0.124355 0.551553
+vt 0.0994555 0.572536
+vt 0.0969232 0.587328
+vt 0.112171 0.578561
+vt 0.106779 0.524557
+vt 0.107589 0.538443
+vt 0.110423 0.513527
+vt 0.112506 0.528264
+vt 0.118228 0.515069
+vt 0.0989423 0.555133
+vt 0.104283 0.542593
+vt 0.104701 0.530629
+vt 0.0882754 0.571292
+vt 0.0894754 0.56478
+vt 0.0853955 0.582515
+vt 0.0816758 0.596094
+vt 0.0753139 0.485928
+vt 0.0650052 0.492652
+vt 0.0850253 0.491819
+vt 0.076075 0.499356
+vt 0.0930081 0.48361
+vt 0.100394 0.488016
+vt 0.0945577 0.497873
+vt 0.0651895 0.505551
+vt 0.073766 0.51176
+vt 0.0546966 0.499376
+vt 0.0900973 0.518593
+vt 0.0785101 0.518041
+vt 0.0994002 0.522709
+vt 0.100625 0.514782
+vt 0.107779 0.492421
+vt 0.103732 0.504253
+vt 0.23946 0.508287
+vt 0.230571 0.517622
+vt 0.228964 0.503271
+vt 0.217261 0.507781
+vt 0.188396 0.491269
+vt 0.206255 0.496224
+vt 0.200655 0.50824
+vt 0.268702 0.540749
+vt 0.243925 0.538213
+vt 0.268704 0.526377
+vt 0.248486 0.524031
+vt 0.253257 0.51249
+vt 0.215104 0.553361
+vt 0.241505 0.558876
+vt 0.268704 0.561105
+vt 0.190298 0.545736
+vt 0.166291 0.537055
+vt 0.187254 0.524533
+vt 0.161754 0.51849
+vt 0.170538 0.486314
+vt 0.179959 0.505005
+vt 0.157216 0.499924
+vt 0.24946 0.367438
+vt 0.258636 0.367175
+vt 0.255765 0.379731
+vt 0.262283 0.37965
+vt 0.255826 0.354536
+vt 0.268674 0.354878
+vt 0.268728 0.367451
+vt 0.234227 0.359499
+vt 0.242109 0.369275
+vt 0.228611 0.363909
+vt 0.23567 0.3719
+vt 0.249247 0.379811
+vt 0.215158 0.338968
+vt 0.227222 0.344776
+vt 0.214279 0.345817
+vt 0.224857 0.351242
+vt 0.221552 0.355917
+vt 0.240937 0.339238
+vt 0.227708 0.334728
+vt 0.215289 0.329543
+vt 0.268674 0.342201
+vt 0.254691 0.341571
+vt 0.244771 0.497304
+vt 0.256308 0.499747
+vt 0.268704 0.500841
+vt 0.268703 0.486337
+vt 0.258069 0.48605
+vt 0.225998 0.491141
+vt 0.227882 0.481103
+vt 0.234955 0.49435
+vt 0.237658 0.483058
+vt 0.249236 0.471672
+vt 0.239501 0.471434
+vt 0.250178 0.458265
+vt 0.240915 0.459645
+vt 0.229767 0.471064
+vt 0.25897 0.471644
+vt 0.268704 0.471564
+vt 0.259441 0.456884
+vt 0.203423 0.33552
+vt 0.203968 0.325186
+vt 0.193422 0.320894
+vt 0.18965 0.357864
+vt 0.190908 0.345555
+vt 0.202072 0.352895
+vt 0.202774 0.344537
+vt 0.18128 0.331343
+vt 0.179249 0.347026
+vt 0.17058 0.329645
+vt 0.167693 0.348723
+vt 0.177228 0.362833
+vt 0.183331 0.315904
+vt 0.173467 0.310566
+vt 0.17135 0.406685
+vt 0.182441 0.411169
+vt 0.16826 0.410583
+vt 0.176559 0.414437
+vt 0.155839 0.404764
+vt 0.157521 0.413533
+vt 0.166076 0.414524
+vt 0.172571 0.417098
+vt 0.135724 0.413665
+vt 0.144491 0.409813
+vt 0.142544 0.416326
+vt 0.148879 0.413937
+vt 0.13814 0.405626
+vt 0.124506 0.410074
+vt 0.174252 0.441387
+vt 0.171559 0.438762
+vt 0.181259 0.434168
+vt 0.176807 0.433082
+vt 0.186036 0.426037
+vt 0.174588 0.426678
+vt 0.172391 0.431921
+vt 0.168862 0.436383
+vt 0.180644 0.419795
+vt 0.175028 0.421443
+vt 0.186883 0.417995
+vt 0.149859 0.449516
+vt 0.140851 0.446115
+vt 0.152195 0.445145
+vt 0.145413 0.441926
+vt 0.158597 0.449574
+vt 0.159394 0.440679
+vt 0.154101 0.440679
+vt 0.14915 0.438598
+vt 0.165593 0.443116
+vt 0.164422 0.439274
+vt 0.166713 0.446693
+vt 0.121193 0.418911
+vt 0.132677 0.420037
+vt 0.1248 0.429441
+vt 0.142327 0.427533
+vt 0.140909 0.42129
+vt 0.13921 0.436123
+vt 0.145149 0.43376
+vt 0.131925 0.438966
+vt 0.718331 0.590305
+vt 0.746935 0.609408
+vt 0.714187 0.592962
+vt 0.74538 0.618837
+vt 0.776045 0.619539
+vt 0.775836 0.606046
+vt 0.748316 0.600181
+vt 0.698297 0.570265
+vt 0.691016 0.56935
+vt 0.678793 0.553513
+vt 0.668258 0.550016
+vt 0.682993 0.567088
+vt 0.724973 0.581367
+vt 0.704095 0.568485
+vt 0.726629 0.571953
+vt 0.707052 0.562689
+vt 0.687534 0.554647
+vt 0.775571 0.595734
+vt 0.74935 0.591359
+vt 0.749786 0.580241
+vt 0.631527 0.521919
+vt 0.643349 0.524282
+vt 0.641663 0.531566
+vt 0.653323 0.535777
+vt 0.66485 0.539385
+vt 0.649849 0.514184
+vt 0.635476 0.513181
+vt 0.647883 0.501604
+vt 0.631937 0.501954
+vt 0.62139 0.512271
+vt 0.684619 0.531929
+vt 0.668859 0.529
+vt 0.681508 0.516892
+vt 0.664797 0.515372
+vt 0.663468 0.501153
+vt 0.676113 0.541788
+vt 0.689104 0.545131
+vt 0.919101 0.529512
+vt 0.932277 0.513107
+vt 0.933894 0.53169
+vt 0.952549 0.514234
+vt 0.938271 0.4968
+vt 0.915407 0.496828
+vt 0.912378 0.512315
+vt 0.874997 0.559455
+vt 0.89162 0.543518
+vt 0.884334 0.563198
+vt 0.903546 0.546094
+vt 0.915238 0.549145
+vt 0.888008 0.52727
+vt 0.879227 0.541892
+vt 0.86975 0.528566
+vt 0.864702 0.54298
+vt 0.866281 0.556104
+vt 0.894945 0.496872
+vt 0.893224 0.512192
+vt 0.87312 0.513029
+vt 0.860186 0.581167
+vt 0.86658 0.586129
+vt 0.833052 0.598471
+vt 0.836578 0.605656
+vt 0.854086 0.575883
+vt 0.804878 0.613577
+vt 0.803304 0.602321
+vt 0.806576 0.625184
+vt 0.801979 0.591764
+vt 0.80095 0.579355
+vt 0.826879 0.582241
+vt 0.824828 0.570521
+vt 0.848572 0.569954
+vt 0.845415 0.561476
+vt 0.723056 0.499144
+vt 0.723365 0.520768
+vt 0.700461 0.499917
+vt 0.701169 0.518891
+vt 0.702985 0.536491
+vt 0.773996 0.544472
+vt 0.748527 0.544271
+vt 0.773366 0.521907
+vt 0.747679 0.521923
+vt 0.747248 0.498271
+vt 0.725988 0.558685
+vt 0.74937 0.564127
+vt 0.774586 0.566031
+vt 0.705814 0.551558
+vt 0.799676 0.520274
+vt 0.799659 0.497042
+vt 0.825661 0.517765
+vt 0.825711 0.49681
+vt 0.799857 0.542467
+vt 0.848896 0.532613
+vt 0.850371 0.515118
+vt 0.850862 0.496842
+vt 0.824745 0.55595
+vt 0.846383 0.548548
+vt 0.800262 0.562542
+vt 0.622835 0.741898
+vt 0.629822 0.737341
+vt 0.630937 0.748106
+vt 0.637348 0.741916
+vt 0.638168 0.732456
+vt 0.632743 0.72671
+vt 0.623594 0.732979
+vt 0.601364 0.736825
+vt 0.612351 0.737387
+vt 0.601727 0.746549
+vt 0.614012 0.74515
+vt 0.621199 0.753622
+vt 0.623692 0.721651
+vt 0.615298 0.729693
+vt 0.603694 0.727299
+vt 0.64705 0.720202
+vt 0.651064 0.728223
+vt 0.666861 0.725286
+vt 0.666356 0.736499
+vt 0.651523 0.736804
+vt 0.655097 0.70327
+vt 0.638367 0.712843
+vt 0.644545 0.693632
+vt 0.627328 0.703786
+vt 0.680292 0.699272
+vt 0.669656 0.692929
+vt 0.66043 0.68407
+vt 0.686784 0.713263
+vt 0.693307 0.702612
+vt 0.683906 0.72429
+vt 0.682763 0.73584
+vt 0.664776 0.7482
+vt 0.649538 0.745843
+vt 0.644256 0.754878
+vt 0.68235 0.760278
+vt 0.681446 0.748063
+vt 0.67624 0.777475
+vt 0.68787 0.771805
+vt 0.657375 0.798004
+vt 0.64028 0.785894
+vt 0.666427 0.786487
+vt 0.651148 0.774455
+vt 0.634824 0.763445
+vt 0.622928 0.773335
+vt 0.597165 0.801047
+vt 0.584037 0.816073
+vt 0.575655 0.789046
+vt 0.561282 0.802854
+vt 0.606349 0.829025
+vt 0.594793 0.84477
+vt 0.570888 0.831255
+vt 0.629233 0.799056
+vt 0.610251 0.786335
+vt 0.590115 0.774943
+vt 0.648026 0.811186
+vt 0.638424 0.825833
+vt 0.628613 0.841743
+vt 0.618698 0.858285
+vt 0.643447 0.660513
+vt 0.635997 0.646287
+vt 0.6049 0.637043
+vt 0.628919 0.631286
+vt 0.614692 0.652678
+vt 0.634531 0.681735
+vt 0.651642 0.673186
+vt 0.584936 0.681759
+vt 0.604883 0.675219
+vt 0.597105 0.697898
+vt 0.616244 0.690669
+vt 0.593009 0.659162
+vt 0.58088 0.642801
+vt 0.571868 0.665802
+vt 0.586302 0.735665
+vt 0.587541 0.725613
+vt 0.566532 0.713946
+vt 0.585456 0.714475
+vt 0.569532 0.724249
+vt 0.584349 0.745549
+vt 0.579071 0.755761
+vt 0.565094 0.743135
+vt 0.557994 0.751956
+vt 0.550572 0.731836
+vt 0.547179 0.740294
+vt 0.532926 0.729581
+vt 0.530226 0.737883
+vt 0.540511 0.747664
+vt 0.550413 0.713789
+vt 0.552359 0.722818
+vt 0.536163 0.720718
+vt 0.553225 0.778158
+vt 0.567858 0.766798
+vt 0.546459 0.760192
+vt 0.496408 0.78312
+vt 0.5145 0.775693
+vt 0.521696 0.80043
+vt 0.537687 0.789339
+vt 0.512875 0.758052
+vt 0.492969 0.762083
+vt 0.500324 0.747435
+vt 0.478432 0.748437
+vt 0.471121 0.76581
+vt 0.5289 0.753414
+vt 0.51732 0.74582
+vt 0.55219 0.672662
+vt 0.56563 0.687095
+vt 0.519008 0.667707
+vt 0.537934 0.658133
+vt 0.533662 0.679776
+vt 0.558759 0.703094
+vt 0.577435 0.701334
+vt 0.516448 0.703502
+vt 0.530573 0.696655
+vt 0.529774 0.709521
+vt 0.542602 0.705297
+vt 0.515971 0.687177
+vt 0.500082 0.677282
+vt 0.500703 0.696106
+vt 0.496419 0.736002
+vt 0.513862 0.73676
+vt 0.515969 0.727011
+vt 0.476228 0.734685
+vt 0.454664 0.733089
+vt 0.481514 0.721061
+vt 0.463494 0.717678
+vt 0.505915 0.712954
+vt 0.489445 0.707801
+vt 0.472325 0.702267
+vt 0.521085 0.717345
+vt 0.817772 0.32972
+vt 0.817268 0.334177
+vt 0.807098 0.329266
+vt 0.806518 0.333516
+vt 0.806012 0.337004
+vt 0.795796 0.333142
+vt 0.795184 0.336418
+vt 0.838686 0.341177
+vt 0.827785 0.339607
+vt 0.838685 0.335915
+vt 0.827985 0.335004
+vt 0.82832 0.330282
+vt 0.827498 0.348469
+vt 0.816308 0.345239
+vt 0.82763 0.344073
+vt 0.816582 0.341797
+vt 0.838688 0.346438
+vt 0.805553 0.339703
+vt 0.794562 0.338721
+vt 0.805117 0.342008
+vt 0.784417 0.336886
+vt 0.785167 0.333173
+vt 0.774651 0.33352
+vt 0.775476 0.328538
+vt 0.785864 0.328653
+vt 0.771718 0.345756
+vt 0.772735 0.341953
+vt 0.782822 0.342267
+vt 0.783629 0.339781
+vt 0.763085 0.339283
+vt 0.761859 0.344306
+vt 0.752475 0.340764
+vt 0.750993 0.34675
+vt 0.760614 0.349246
+vt 0.765302 0.328633
+vt 0.764271 0.334092
+vt 0.753958 0.334778
+vt 0.872696 0.335583
+vt 0.866407 0.338084
+vt 0.86882 0.330441
+vt 0.863658 0.333516
+vt 0.867867 0.343569
+vt 0.860064 0.340257
+vt 0.860616 0.34494
+vt 0.882136 0.339457
+vt 0.878875 0.332422
+vt 0.888775 0.336165
+vt 0.88462 0.328842
+vt 0.874176 0.326498
+vt 0.876903 0.354499
+vt 0.876407 0.348394
+vt 0.885368 0.353577
+vt 0.88421 0.346688
+vt 0.892061 0.343631
+vt 0.868751 0.349227
+vt 0.861168 0.349623
+vt 0.869036 0.354668
+vt 0.905832 0.318045
+vt 0.897126 0.314006
+vt 0.909273 0.307792
+vt 0.899606 0.305356
+vt 0.888216 0.310241
+vt 0.885668 0.315488
+vt 0.89379 0.320385
+vt 0.912856 0.338167
+vt 0.907418 0.331814
+vt 0.922353 0.328655
+vt 0.914539 0.322618
+vt 0.91968 0.310994
+vt 0.895263 0.331765
+vt 0.899861 0.338745
+vt 0.903538 0.34573
+vt 0.882771 0.319356
+vt 0.889606 0.32508
+vt 0.726761 0.338632
+vt 0.720269 0.333665
+vt 0.728661 0.332789
+vt 0.724048 0.328479
+vt 0.728685 0.324646
+vt 0.725283 0.320091
+vt 0.718943 0.323554
+vt 0.708141 0.33392
+vt 0.716815 0.339387
+vt 0.703619 0.340611
+vt 0.713734 0.345346
+vt 0.724861 0.344474
+vt 0.705092 0.321759
+vt 0.698212 0.327693
+vt 0.695328 0.313899
+vt 0.68654 0.320218
+vt 0.692159 0.335172
+vt 0.721158 0.3143
+vt 0.712853 0.317398
+vt 0.705649 0.309794
+vt 0.849098 0.333114
+vt 0.852849 0.328742
+vt 0.854305 0.334344
+vt 0.858906 0.330607
+vt 0.863427 0.326736
+vt 0.848559 0.322731
+vt 0.846187 0.327308
+vt 0.839608 0.321509
+vt 0.838379 0.326237
+vt 0.84389 0.331884
+vt 0.862887 0.316059
+vt 0.859996 0.320044
+vt 0.853481 0.313684
+vt 0.851078 0.31815
+vt 0.841341 0.316617
+vt 0.867789 0.322599
+vt 0.871643 0.31863
+vt 0.808609 0.31864
+vt 0.807782 0.324278
+vt 0.797841 0.317858
+vt 0.797082 0.323766
+vt 0.818436 0.324849
+vt 0.821851 0.307693
+vt 0.820468 0.313742
+vt 0.810856 0.305827
+vt 0.809616 0.312441
+vt 0.79869 0.311344
+vt 0.829771 0.320444
+vt 0.831091 0.315141
+vt 0.832672 0.309663
+vt 0.828882 0.32546
+vt 0.827295 0.287378
+vt 0.81609 0.283741
+vt 0.829537 0.279056
+vt 0.81819 0.275174
+vt 0.802762 0.289081
+vt 0.80451 0.280694
+vt 0.814142 0.291631
+vt 0.836158 0.298133
+vt 0.838143 0.291297
+vt 0.84681 0.301473
+vt 0.848714 0.295264
+vt 0.84041 0.283327
+vt 0.823454 0.301435
+vt 0.834348 0.304125
+vt 0.84504 0.30685
+vt 0.801169 0.296892
+vt 0.812381 0.298942
+vt 0.876986 0.311536
+vt 0.878898 0.306488
+vt 0.870963 0.295312
+vt 0.88062 0.29932
+vt 0.869186 0.302754
+vt 0.865464 0.312169
+vt 0.874638 0.315263
+vt 0.855505 0.309448
+vt 0.857319 0.304788
+vt 0.859093 0.299048
+vt 0.861066 0.291675
+vt 0.732418 0.318062
+vt 0.734712 0.322982
+vt 0.743001 0.327869
+vt 0.736781 0.327408
+vt 0.741594 0.322579
+vt 0.738426 0.310793
+vt 0.729673 0.312154
+vt 0.736628 0.303749
+vt 0.726615 0.305165
+vt 0.757036 0.316401
+vt 0.748245 0.31651
+vt 0.75687 0.309754
+vt 0.747447 0.310045
+vt 0.746533 0.302937
+vt 0.748795 0.322531
+vt 0.749221 0.328331
+vt 0.756792 0.322664
+vt 0.731654 0.277237
+vt 0.718591 0.277987
+vt 0.730298 0.26671
+vt 0.717089 0.267402
+vt 0.704046 0.278768
+vt 0.706282 0.288897
+vt 0.720525 0.287868
+vt 0.756105 0.285792
+vt 0.744751 0.286232
+vt 0.755817 0.276484
+vt 0.743878 0.276719
+vt 0.743048 0.266393
+vt 0.734816 0.295809
+vt 0.745635 0.294985
+vt 0.756313 0.294527
+vt 0.710968 0.298566
+vt 0.723383 0.296998
+vt 0.792536 0.278545
+vt 0.791154 0.28731
+vt 0.780323 0.277204
+vt 0.793935 0.269245
+vt 0.781204 0.267531
+vt 0.78873 0.303265
+vt 0.777827 0.302657
+vt 0.789858 0.295534
+vt 0.778545 0.294752
+vt 0.767676 0.285819
+vt 0.767323 0.294432
+vt 0.767022 0.302415
+vt 0.768024 0.276581
+vt 0.768346 0.266629
+vt 0.776124 0.323009
+vt 0.786496 0.323337
+vt 0.787134 0.317284
+vt 0.766566 0.316536
+vt 0.766062 0.322804
+vt 0.777196 0.310038
+vt 0.766828 0.309761
+vt 0.78785 0.310554
+vt 0.887192 0.245173
+vt 0.894254 0.251226
+vt 0.882204 0.253513
+vt 0.889316 0.257984
+vt 0.899158 0.259059
+vt 0.906216 0.25556
+vt 0.900579 0.245749
+vt 0.873068 0.226854
+vt 0.883747 0.231913
+vt 0.868263 0.235954
+vt 0.878436 0.240212
+vt 0.873612 0.249405
+vt 0.899039 0.231269
+vt 0.889175 0.22393
+vt 0.904851 0.223823
+vt 0.894483 0.215603
+vt 0.878092 0.21784
+vt 0.907771 0.240362
+vt 0.914548 0.251706
+vt 0.914646 0.233613
+vt 0.83088 0.224634
+vt 0.816277 0.220886
+vt 0.83396 0.214024
+vt 0.818586 0.209908
+vt 0.827947 0.235154
+vt 0.840898 0.238644
+vt 0.844486 0.228275
+vt 0.855747 0.197071
+vt 0.852027 0.207701
+vt 0.840323 0.192176
+vt 0.837098 0.203273
+vt 0.821041 0.19888
+vt 0.861296 0.22221
+vt 0.8657 0.212494
+vt 0.870075 0.20254
+vt 0.852868 0.242144
+vt 0.856947 0.232012
+vt 0.689623 0.215775
+vt 0.70332 0.207275
+vt 0.697939 0.222493
+vt 0.709017 0.215819
+vt 0.725203 0.22199
+vt 0.713143 0.224363
+vt 0.722188 0.211648
+vt 0.691318 0.186238
+vt 0.709695 0.179461
+vt 0.697458 0.197483
+vt 0.714155 0.190926
+vt 0.68169 0.206684
+vt 0.751207 0.197567
+vt 0.734425 0.198302
+vt 0.74968 0.185753
+vt 0.731649 0.18705
+vt 0.728678 0.175143
+vt 0.738996 0.220423
+vt 0.736932 0.209325
+vt 0.752641 0.20886
+vt 0.784858 0.21308
+vt 0.800826 0.216826
+vt 0.784162 0.2244
+vt 0.799325 0.227809
+vt 0.802394 0.205632
+vt 0.768345 0.19871
+vt 0.768707 0.210268
+vt 0.768953 0.22173
+vt 0.786206 0.189958
+vt 0.767985 0.186934
+vt 0.786915 0.177877
+vt 0.767639 0.174701
+vt 0.803984 0.194191
+vt 0.805673 0.182317
+vt 0.809955 0.252364
+vt 0.822756 0.255918
+vt 0.80815 0.262293
+vt 0.820407 0.265831
+vt 0.832047 0.269793
+vt 0.837735 0.249207
+vt 0.825254 0.245637
+vt 0.811949 0.242115
+vt 0.856542 0.267463
+vt 0.846044 0.263615
+vt 0.859897 0.256492
+vt 0.849272 0.252848
+vt 0.843066 0.273938
+vt 0.853522 0.278101
+vt 0.742291 0.255201
+vt 0.729185 0.255607
+vt 0.755289 0.255203
+vt 0.754976 0.243665
+vt 0.741468 0.243564
+vt 0.715531 0.256156
+vt 0.700801 0.256437
+vt 0.714427 0.244869
+vt 0.700028 0.24508
+vt 0.72694 0.232765
+vt 0.714289 0.234159
+vt 0.701971 0.234531
+vt 0.754532 0.231935
+vt 0.740439 0.231904
+vt 0.911041 0.2953
+vt 0.921301 0.296781
+vt 0.93161 0.297423
+vt 0.929903 0.281016
+vt 0.920542 0.281282
+vt 0.901221 0.293843
+vt 0.891715 0.291968
+vt 0.901867 0.281252
+vt 0.892897 0.280142
+vt 0.909625 0.267746
+vt 0.901437 0.269362
+vt 0.893739 0.269552
+vt 0.927824 0.264632
+vt 0.918548 0.265802
+vt 0.783513 0.23552
+vt 0.768967 0.233217
+vt 0.768829 0.24463
+vt 0.796609 0.249173
+vt 0.797938 0.238614
+vt 0.782032 0.257262
+vt 0.795284 0.259419
+vt 0.768618 0.255873
+vt 0.866329 0.271283
+vt 0.863474 0.282119
+vt 0.873054 0.28587
+vt 0.878455 0.263629
+vt 0.869642 0.26007
+vt 0.884271 0.277953
+vt 0.886283 0.267215
+vt 0.882397 0.289232
+vt 0.941581 0.296368
+vt 0.939264 0.280761
+vt 0.944658 0.31122
+vt 0.954503 0.308374
+vt 0.951091 0.294591
+vt 0.945764 0.266671
+vt 0.937066 0.26534
+vt 0.944666 0.253926
+vt 0.936188 0.251002
+vt 0.964594 0.279716
+vt 0.956479 0.280128
+vt 0.960523 0.267756
+vt 0.953407 0.267371
+vt 0.951694 0.255808
+vt 0.963661 0.306017
+vt 0.960023 0.293069
+vt 0.968665 0.291676
+vt 0.386764 0.173365
+vt 0.386251 0.159166
+vt 0.403134 0.175347
+vt 0.40173 0.162164
+vt 0.370756 0.187276
+vt 0.370795 0.173446
+vt 0.38669 0.187253
+vt 0.422389 0.190328
+vt 0.419955 0.177717
+vt 0.441316 0.192402
+vt 0.437002 0.180281
+vt 0.417209 0.165162
+vt 0.401826 0.214817
+vt 0.403793 0.201683
+vt 0.424104 0.215186
+vt 0.424198 0.203052
+vt 0.44563 0.204522
+vt 0.385442 0.200518
+vt 0.36925 0.200049
+vt 0.382596 0.213492
+vt 0.356262 0.189263
+vt 0.355175 0.177265
+vt 0.348714 0.166736
+vt 0.334762 0.173858
+vt 0.341079 0.182428
+vt 0.340863 0.211835
+vt 0.342946 0.202073
+vt 0.352708 0.211778
+vt 0.355322 0.200767
+vt 0.331414 0.194664
+vt 0.331412 0.203371
+vt 0.319782 0.196009
+vt 0.319946 0.204047
+vt 0.330031 0.212063
+vt 0.324674 0.179566
+vt 0.32968 0.18654
+vt 0.319256 0.188731
+vt 0.34302 0.232291
+vt 0.34877 0.222299
+vt 0.354926 0.235741
+vt 0.361782 0.224028
+vt 0.337711 0.221083
+vt 0.32682 0.236795
+vt 0.334971 0.241716
+vt 0.31849 0.242434
+vt 0.324587 0.249202
+vt 0.345001 0.247217
+vt 0.32414 0.227342
+vt 0.319507 0.233375
+vt 0.314915 0.225618
+vt 0.311922 0.231227
+vt 0.313143 0.237776
+vt 0.327627 0.220146
+vt 0.317481 0.219239
+vt 0.63785 0.953205
+vt 0.630881 0.947663
+vt 0.647016 0.946972
+vt 0.6392 0.9414
+vt 0.622289 0.952938
+vt 0.615431 0.946184
+vt 0.624837 0.941085
+vt 0.636439 0.963394
+vt 0.644947 0.958154
+vt 0.644288 0.966733
+vt 0.653209 0.961707
+vt 0.654465 0.952626
+vt 0.612697 0.965958
+vt 0.620878 0.963127
+vt 0.620821 0.970267
+vt 0.628164 0.967348
+vt 0.635184 0.970582
+vt 0.606875 0.951267
+vt 0.614197 0.957483
+vt 0.605619 0.960347
+vt 0.367121 0.917394
+vt 0.374142 0.923099
+vt 0.360165 0.922438
+vt 0.367724 0.92812
+vt 0.381201 0.91911
+vt 0.388569 0.925813
+vt 0.380653 0.929651
+vt 0.367048 0.90811
+vt 0.360477 0.912329
+vt 0.360243 0.904335
+vt 0.353385 0.908573
+vt 0.353375 0.916883
+vt 0.388646 0.907697
+vt 0.381128 0.909826
+vt 0.380927 0.902848
+vt 0.374115 0.905116
+vt 0.367999 0.901318
+vt 0.396044 0.921905
+vt 0.388216 0.915684
+vt 0.396055 0.913595
+vt 0.328818 0.915038
+vt 0.333156 0.924854
+vt 0.309844 0.916921
+vt 0.317341 0.929308
+vt 0.32345 0.950038
+vt 0.299401 0.934005
+vt 0.334074 0.942122
+vt 0.352513 0.92816
+vt 0.344718 0.920885
+vt 0.341477 0.913527
+vt 0.373801 0.947808
+vt 0.357974 0.943112
+vt 0.374058 0.938334
+vt 0.362633 0.93464
+vt 0.352852 0.95399
+vt 0.347499 0.966072
+vt 0.373169 0.960769
+vt 0.418321 0.915362
+vt 0.435775 0.918439
+vt 0.422947 0.925132
+vt 0.443681 0.930756
+vt 0.455589 0.922184
+vt 0.430406 0.905691
+vt 0.418226 0.905767
+vt 0.397194 0.906411
+vt 0.405585 0.913617
+vt 0.409188 0.921144
+vt 0.376316 0.893935
+vt 0.392361 0.898152
+vt 0.375205 0.897753
+vt 0.386735 0.900808
+vt 0.405223 0.889198
+vt 0.398631 0.894039
+vt 0.377906 0.887331
+vt 0.336894 0.897334
+vt 0.318775 0.906333
+vt 0.327696 0.895339
+vt 0.301524 0.906655
+vt 0.333962 0.906288
+vt 0.360174 0.89468
+vt 0.356966 0.889714
+vt 0.353868 0.884022
+vt 0.353137 0.901438
+vt 0.363602 0.898194
+vt 0.345023 0.906797
+vt 0.434342 0.941415
+vt 0.453466 0.949534
+vt 0.415406 0.950555
+vt 0.42616 0.960391
+vt 0.417515 0.933928
+vt 0.390161 0.946583
+vt 0.394517 0.958315
+vt 0.398853 0.971248
+vt 0.39657 0.933133
+vt 0.385766 0.937253
+vt 0.40528 0.927706
+vt 0.57922 0.909293
+vt 0.58971 0.919914
+vt 0.547117 0.922496
+vt 0.567321 0.931013
+vt 0.577999 0.948709
+vt 0.555818 0.943001
+vt 0.585011 0.938851
+vt 0.617202 0.922908
+vt 0.614188 0.910157
+vt 0.635979 0.920365
+vt 0.638841 0.906233
+vt 0.611323 0.89609
+vt 0.607883 0.938585
+vt 0.620514 0.933025
+vt 0.633926 0.930757
+vt 0.597671 0.945329
+vt 0.592645 0.95281
+vt 0.700973 0.938045
+vt 0.70873 0.951078
+vt 0.680671 0.943135
+vt 0.684796 0.953548
+vt 0.697126 0.916127
+vt 0.723976 0.932747
+vt 0.682579 0.924761
+vt 0.65736 0.940282
+vt 0.665771 0.947808
+vt 0.668833 0.955667
+vt 0.653673 0.924504
+vt 0.646476 0.933887
+vt 0.670275 0.899507
+vt 0.661753 0.912628
+vt 0.571389 0.966985
+vt 0.583228 0.967478
+vt 0.599576 0.982628
+vt 0.604894 0.978385
+vt 0.628092 0.978017
+vt 0.628226 0.984167
+vt 0.610106 0.974767
+vt 0.582286 0.958749
+vt 0.563995 0.95542
+vt 0.543202 0.951342
+vt 0.603906 0.967493
+vt 0.595571 0.96058
+vt 0.615108 0.972401
+vt 0.627917 0.974569
+vt 0.676098 0.972325
+vt 0.697647 0.962452
+vt 0.689295 0.973819
+vt 0.72006 0.961593
+vt 0.677946 0.963033
+vt 0.646577 0.976363
+vt 0.652459 0.980856
+vt 0.658529 0.986044
+vt 0.653384 0.96919
+vt 0.64107 0.973262
+vt 0.663671 0.963059
+vt 0.36984 0.23949
+vt 0.357948 0.252377
+vt 0.346028 0.26526
+vt 0.355607 0.271672
+vt 0.369662 0.258935
+vt 0.397205 0.227812
+vt 0.377725 0.226506
+vt 0.417354 0.23588
+vt 0.420828 0.226027
+vt 0.4431 0.22875
+vt 0.446522 0.222697
+vt 0.398497 0.266882
+vt 0.394993 0.256063
+vt 0.417376 0.253869
+vt 0.416668 0.245046
+vt 0.439677 0.234803
+vt 0.375987 0.26863
+vt 0.360587 0.280948
+vt 0.379618 0.279895
+vt 0.470993 0.595329
+vt 0.466212 0.581429
+vt 0.482679 0.589544
+vt 0.476881 0.579741
+vt 0.489311 0.600458
+vt 0.499356 0.579702
+vt 0.49224 0.574945
+vt 0.479771 0.564752
+vt 0.472749 0.572159
+vt 0.474256 0.558067
+vt 0.468458 0.56471
+vt 0.46434 0.573191
+vt 0.492648 0.557082
+vt 0.486003 0.554296
+vt 0.497062 0.544746
+vt 0.490231 0.543792
+vt 0.480758 0.549772
+vt 0.509401 0.558946
+vt 0.500678 0.558385
+vt 0.505852 0.543851
+vt 0.497996 0.536049
+vt 0.505621 0.535332
+vt 0.501948 0.53012
+vt 0.507915 0.530515
+vt 0.51368 0.534353
+vt 0.482316 0.532549
+vt 0.489579 0.530568
+vt 0.484501 0.537962
+vt 0.491239 0.536243
+vt 0.491373 0.524968
+vt 0.485801 0.52569
+vt 0.484285 0.519868
+vt 0.479547 0.521219
+vt 0.479193 0.527051
+vt 0.496798 0.525508
+vt 0.50215 0.526678
+vt 0.490005 0.520085
+vt 0.473801 0.535793
+vt 0.470659 0.528429
+vt 0.469034 0.521857
+vt 0.460157 0.521751
+vt 0.461514 0.530505
+vt 0.469248 0.549809
+vt 0.476929 0.543255
+vt 0.456268 0.544777
+vt 0.462181 0.555305
+vt 0.449568 0.548141
+vt 0.456741 0.560093
+vt 0.451562 0.524114
+vt 0.453071 0.533958
+vt 0.446261 0.536501
+vt 0.933404 0.221309
+vt 0.924739 0.213284
+vt 0.938258 0.211078
+vt 0.929637 0.202912
+vt 0.924585 0.183534
+vt 0.934625 0.192739
+vt 0.919804 0.19418
+vt 0.910176 0.214711
+vt 0.920022 0.224053
+vt 0.929173 0.232598
+vt 0.892208 0.188152
+vt 0.904301 0.196133
+vt 0.88756 0.198715
+vt 0.899431 0.206272
+vt 0.909372 0.185384
+vt 0.914544 0.174329
+vt 0.89751 0.177035
+vt 0.829357 0.162643
+vt 0.809729 0.156901
+vt 0.833272 0.14992
+vt 0.812406 0.143745
+vt 0.789947 0.13803
+vt 0.815325 0.130471
+vt 0.791208 0.124039
+vt 0.787797 0.165074
+vt 0.807536 0.169823
+vt 0.826253 0.175235
+vt 0.767318 0.161769
+vt 0.746459 0.160576
+vt 0.767097 0.147949
+vt 0.744921 0.146466
+vt 0.767051 0.133048
+vt 0.767091 0.117607
+vt 0.743596 0.130619
+vt 0.722473 0.147996
+vt 0.719464 0.132562
+vt 0.695065 0.137329
+vt 0.716504 0.116494
+vt 0.690034 0.121812
+vt 0.725582 0.162157
+vt 0.70494 0.166437
+vt 0.656395 0.167991
+vt 0.677929 0.158979
+vt 0.665245 0.182639
+vt 0.684786 0.173336
+vt 0.670811 0.143368
+vt 0.663563 0.127131
+vt 0.647183 0.151721
+vt 0.844502 0.359871
+vt 0.847714 0.360296
+vt 0.847408 0.363957
+vt 0.849348 0.364269
+vt 0.851648 0.36844
+vt 0.850981 0.368241
+vt 0.851475 0.364632
+vt 0.850753 0.356913
+vt 0.846081 0.356324
+vt 0.850205 0.353003
+vt 0.844447 0.352351
+vt 0.841595 0.355786
+vt 0.855799 0.357604
+vt 0.861641 0.358449
+vt 0.855262 0.361451
+vt 0.860512 0.36239
+vt 0.855963 0.353655
+vt 0.853976 0.365096
+vt 0.852315 0.368639
+vt 0.858224 0.366005
+vt 0.720036 0.362311
+vt 0.71959 0.357661
+vt 0.728015 0.361123
+vt 0.727779 0.35756
+vt 0.73391 0.360061
+vt 0.73344 0.362971
+vt 0.728708 0.364784
+vt 0.736235 0.351526
+vt 0.735307 0.354339
+vt 0.729598 0.350921
+vt 0.728459 0.354192
+vt 0.720575 0.353357
+vt 0.740522 0.356645
+vt 0.741697 0.354388
+vt 0.746207 0.356041
+vt 0.747858 0.354388
+vt 0.742872 0.352131
+vt 0.739347 0.358902
+vt 0.738172 0.361159
+vt 0.744555 0.357694
+vt 0.878699 0.181029
+vt 0.883387 0.169647
+vt 0.867761 0.162851
+vt 0.888238 0.158069
+vt 0.871973 0.151014
+vt 0.874335 0.192023
+vt 0.859716 0.186019
+vt 0.843668 0.180528
+vt 0.847301 0.168505
+vt 0.85139 0.156282
+vt 0.855707 0.143958
+vt 0.959959 0.243101
+vt 0.954077 0.238396
+vt 0.961713 0.236753
+vt 0.956323 0.230743
+vt 0.954067 0.216174
+vt 0.958767 0.22329
+vt 0.950811 0.224595
+vt 0.952226 0.24645
+vt 0.945638 0.243001
+vt 0.958206 0.249448
+vt 0.941091 0.227849
+vt 0.937833 0.238648
+vt 0.949366 0.209059
+vt 0.945053 0.218173
+vt 0.336934 0.71568
+vt 0.33038 0.741466
+vt 0.302716 0.711537
+vt 0.299424 0.738297
+vt 0.295776 0.762787
+vt 0.393479 0.752977
+vt 0.377463 0.773803
+vt 0.361811 0.746705
+vt 0.350155 0.769363
+vt 0.371566 0.72393
+vt 0.338262 0.791261
+vt 0.32625 0.812779
+vt 0.315076 0.788147
+vt 0.307068 0.810102
+vt 0.361448 0.794629
+vt 0.29189 0.78554
+vt 0.287886 0.807426
+vt 0.299023 0.2659
+vt 0.299047 0.279209
+vt 0.283956 0.266822
+vt 0.283986 0.280354
+vt 0.283688 0.295775
+vt 0.327087 0.27663
+vt 0.323816 0.291325
+vt 0.313749 0.277461
+vt 0.311916 0.292761
+vt 0.31433 0.263721
+vt 0.296663 0.325331
+vt 0.297456 0.310292
+vt 0.309865 0.321929
+vt 0.31066 0.307783
+vt 0.322425 0.305028
+vt 0.283294 0.311929
+vt 0.282898 0.327607
+vt 0.281812 0.241506
+vt 0.281311 0.235544
+vt 0.29388 0.241063
+vt 0.292617 0.235389
+vt 0.282538 0.248141
+vt 0.307969 0.245729
+vt 0.304463 0.240079
+vt 0.302421 0.235265
+vt 0.297743 0.255375
+vt 0.311831 0.253377
+vt 0.283363 0.256332
+vt 0.665914 0.351378
+vt 0.682493 0.354562
+vt 0.661557 0.365446
+vt 0.679123 0.366239
+vt 0.694271 0.365301
+vt 0.676262 0.378559
+vt 0.692167 0.374571
+vt 0.699871 0.348099
+vt 0.686879 0.344167
+vt 0.6718 0.339046
+vt 0.709168 0.357329
+vt 0.711074 0.351244
+vt 0.708348 0.363848
+vt 0.708071 0.370584
+vt 0.867597 0.363815
+vt 0.865591 0.367703
+vt 0.874845 0.369526
+vt 0.873284 0.373919
+vt 0.863133 0.371378
+vt 0.876827 0.360099
+vt 0.868698 0.3595
+vt 0.885694 0.365176
+vt 0.885881 0.359586
+vt 0.896427 0.363349
+vt 0.895778 0.357262
+vt 0.884753 0.370807
+vt 0.883435 0.376459
+vt 0.896131 0.370165
+vt 0.908527 0.359153
+vt 0.909799 0.366215
+vt 0.924545 0.360491
+vt 0.927731 0.367892
+vt 0.910658 0.373446
+vt 0.917439 0.345549
+vt 0.906434 0.352429
+vt 0.933647 0.34626
+vt 0.928384 0.337086
+vt 0.944982 0.340124
+vt 0.93903 0.329282
+vt 0.939163 0.354526
+vt 0.944804 0.362339
+vt 0.951802 0.349505
+vt 0.954599 0.336095
+vt 0.960613 0.346611
+vt 0.967306 0.344981
+vt 0.971931 0.35627
+vt 0.966904 0.356527
+vt 0.949139 0.32438
+vt 0.958528 0.321259
+vt 0.967014 0.318797
+vt 0.975048 0.316665
+vt 0.970264 0.331384
+vt 0.97736 0.329695
+vt 0.976958 0.356012
+vt 0.973591 0.343753
+vt 0.979672 0.342725
+vt 0.320524 0.146521
+vt 0.313373 0.154734
+vt 0.2954 0.141698
+vt 0.292003 0.150779
+vt 0.28884 0.159589
+vt 0.322801 0.167455
+vt 0.333528 0.159639
+vt 0.345648 0.151344
+vt 0.301572 0.170503
+vt 0.31486 0.174313
+vt 0.297761 0.178302
+vt 0.309891 0.180957
+vt 0.286147 0.16786
+vt 0.284108 0.176013
+vt 0.296019 0.186221
+vt 0.282908 0.18447
+vt 0.282346 0.192959
+vt 0.307895 0.195694
+vt 0.308085 0.188134
+vt 0.295125 0.202337
+vt 0.307775 0.203492
+vt 0.294766 0.209933
+vt 0.307347 0.211107
+vt 0.28222 0.201212
+vt 0.28215 0.209032
+vt 0.294211 0.217025
+vt 0.281754 0.216221
+vt 0.281328 0.222946
+vt 0.304681 0.22447
+vt 0.306231 0.218115
+vt 0.292675 0.229646
+vt 0.302952 0.230124
+vt 0.281165 0.229372
+vt 0.305534 0.681974
+vt 0.306111 0.650222
+vt 0.342004 0.687835
+vt 0.333707 0.612749
+vt 0.302684 0.616894
+vt 0.325885 0.580428
+vt 0.298274 0.585332
+vt 0.395357 0.618639
+vt 0.374414 0.634434
+vt 0.380969 0.58713
+vt 0.358816 0.601672
+vt 0.349578 0.570739
+vt 0.384513 0.681504
+vt 0.395957 0.664926
+vt 0.416814 0.706905
+vt 0.420474 0.694282
+vt 0.409745 0.650149
+vt 0.377758 0.701684
+vt 0.413154 0.719528
+vt 0.338053 0.278233
+vt 0.345863 0.28289
+vt 0.333606 0.290848
+vt 0.34226 0.309133
+vt 0.341074 0.298701
+vt 0.351677 0.302345
+vt 0.349731 0.291219
+vt 0.332267 0.302652
+vt 0.332584 0.313884
+vt 0.387723 0.422423
+vt 0.382162 0.422373
+vt 0.388869 0.417427
+vt 0.382859 0.417822
+vt 0.377549 0.413541
+vt 0.384244 0.41338
+vt 0.377169 0.417955
+vt 0.377116 0.426931
+vt 0.381664 0.426972
+vt 0.377432 0.431442
+vt 0.381856 0.431562
+vt 0.386531 0.427438
+vt 0.367249 0.422554
+vt 0.372123 0.422513
+vt 0.368692 0.426806
+vt 0.372857 0.426712
+vt 0.373317 0.430853
+vt 0.371782 0.41828
+vt 0.37118 0.414106
+vt 0.366505 0.41823
+vt 0.426004 0.390403
+vt 0.398456 0.389437
+vt 0.424679 0.386349
+vt 0.396424 0.386762
+vt 0.363793 0.388475
+vt 0.394705 0.384351
+vt 0.356716 0.386401
+vt 0.378598 0.396645
+vt 0.375308 0.393171
+vt 0.401577 0.396345
+vt 0.401115 0.392641
+vt 0.428621 0.394516
+vt 0.353918 0.396416
+vt 0.339672 0.402446
+vt 0.345629 0.394106
+vt 0.329812 0.40053
+vt 0.35953 0.398947
+vt 0.33321 0.391382
+vt 0.318726 0.38845
+vt 0.313563 0.397303
+vt 0.476084 0.423195
+vt 0.45045 0.417502
+vt 0.486588 0.412986
+vt 0.456271 0.406372
+vt 0.430198 0.413801
+vt 0.423791 0.426121
+vt 0.43912 0.429517
+vt 0.507665 0.429493
+vt 0.542219 0.436093
+vt 0.475452 0.439099
+vt 0.495356 0.444524
+vt 0.525393 0.419675
+vt 0.43331 0.443713
+vt 0.441295 0.448141
+vt 0.411039 0.45059
+vt 0.411532 0.454564
+vt 0.448493 0.452954
+vt 0.423753 0.440055
+vt 0.413961 0.436871
+vt 0.407352 0.447798
+vt 0.31374 0.40806
+vt 0.32634 0.410021
+vt 0.317907 0.421046
+vt 0.299322 0.405773
+vt 0.325214 0.421259
+vt 0.338474 0.421652
+vt 0.3353 0.411329
+vt 0.344846 0.421904
+vt 0.34232 0.412499
+vt 0.355509 0.44277
+vt 0.342793 0.432971
+vt 0.357815 0.441106
+vt 0.346865 0.432004
+vt 0.351296 0.431202
+vt 0.336492 0.43632
+vt 0.339458 0.434504
+vt 0.354149 0.44545
+vt 0.366963 0.45175
+vt 0.367513 0.449587
+vt 0.378353 0.456489
+vt 0.366715 0.454041
+vt 0.379286 0.45488
+vt 0.368666 0.447681
+vt 0.369978 0.445476
+vt 0.38013 0.451272
+vt 0.379885 0.448331
+vt 0.392918 0.451424
+vt 0.393862 0.45378
+vt 0.390643 0.448651
+vt 0.392502 0.456314
+vt 0.389991 0.458936
+vt 0.434919 0.382651
+vt 0.453791 0.389264
+vt 0.433807 0.382477
+vt 0.442133 0.387347
+vt 0.449541 0.392567
+vt 0.49183 0.400597
+vt 0.464657 0.391608
+vt 0.513113 0.405332
+vt 0.475128 0.394166
+vt 0.436031 0.382826
+vt 0.516095 0.409998
+vt 0.483347 0.40387
+vt 0.551098 0.416497
+vt 0.455111 0.398484
+vt 0.472738 0.503366
+vt 0.465757 0.493643
+vt 0.482409 0.502085
+vt 0.475421 0.491708
+vt 0.456009 0.49597
+vt 0.450804 0.485866
+vt 0.459618 0.483598
+vt 0.474669 0.513513
+vt 0.481402 0.512442
+vt 0.489397 0.512463
+vt 0.46027 0.514483
+vt 0.470459 0.516768
+vt 0.449301 0.508912
+vt 0.450715 0.51651
+vt 0.441643 0.516565
+vt 0.44392 0.523436
+vt 0.44199 0.488134
+vt 0.446094 0.499081
+vt 0.437618 0.505763
+vt 0.409641 0.53125
+vt 0.417486 0.517634
+vt 0.424725 0.549489
+vt 0.428372 0.534587
+vt 0.421164 0.563953
+vt 0.440531 0.58304
+vt 0.440131 0.567599
+vt 0.438076 0.530458
+vt 0.43219 0.518805
+vt 0.42533 0.504018
+vt 0.445266 0.55059
+vt 0.442015 0.535846
+vt 0.453943 0.564519
+vt 0.453765 0.571874
+vt 0.456182 0.585446
+vt 0.459898 0.602127
+vt 0.349013 0.491269
+vt 0.336754 0.508239
+vt 0.331154 0.496224
+vt 0.320148 0.507781
+vt 0.297949 0.508264
+vt 0.308444 0.503271
+vt 0.306838 0.517622
+vt 0.350155 0.524533
+vt 0.35745 0.505005
+vt 0.375656 0.518489
+vt 0.380193 0.499924
+vt 0.366871 0.486313
+vt 0.322305 0.553361
+vt 0.347111 0.545735
+vt 0.371118 0.537055
+vt 0.293484 0.538213
+vt 0.295904 0.558876
+vt 0.288923 0.524031
+vt 0.284152 0.512489
+vt 0.281519 0.354585
+vt 0.27883 0.367232
+vt 0.275331 0.37971
+vt 0.281861 0.37985
+vt 0.288051 0.367559
+vt 0.296239 0.339242
+vt 0.282591 0.341601
+vt 0.310112 0.344766
+vt 0.309416 0.334597
+vt 0.322149 0.338741
+vt 0.321809 0.329248
+vt 0.30337 0.359703
+vt 0.312751 0.351414
+vt 0.309216 0.364244
+vt 0.316363 0.356301
+vt 0.323307 0.345678
+vt 0.295502 0.369475
+vt 0.288392 0.37999
+vt 0.302069 0.372187
+vt 0.281101 0.499747
+vt 0.27934 0.48605
+vt 0.292638 0.497304
+vt 0.28723 0.458265
+vt 0.288173 0.471672
+vt 0.277967 0.456884
+vt 0.278439 0.471644
+vt 0.299751 0.483057
+vt 0.297907 0.471434
+vt 0.309526 0.481102
+vt 0.307642 0.471064
+vt 0.296494 0.459645
+vt 0.302454 0.49435
+vt 0.311411 0.491141
+vt 0.343632 0.320416
+vt 0.333105 0.324778
+vt 0.333814 0.334838
+vt 0.363585 0.310039
+vt 0.366431 0.327171
+vt 0.353718 0.315393
+vt 0.355806 0.329495
+vt 0.346397 0.343469
+vt 0.357889 0.343783
+vt 0.347817 0.354895
+vt 0.35997 0.358164
+vt 0.369277 0.344302
+vt 0.334695 0.343565
+vt 0.335664 0.351626
+vt 0.379291 0.400052
+vt 0.363918 0.402333
+vt 0.367243 0.406178
+vt 0.353116 0.407088
+vt 0.35919 0.410239
+vt 0.41091 0.404675
+vt 0.399926 0.408536
+vt 0.397021 0.400516
+vt 0.390935 0.404866
+vt 0.37815 0.408898
+vt 0.386805 0.409108
+vt 0.393279 0.411364
+vt 0.363338 0.412821
+vt 0.369666 0.410088
+vt 0.350438 0.422105
+vt 0.355707 0.430165
+vt 0.363148 0.437257
+vt 0.365676 0.434559
+vt 0.360087 0.428972
+vt 0.349098 0.414046
+vt 0.35544 0.415713
+vt 0.361911 0.422489
+vt 0.36115 0.417241
+vt 0.368224 0.432108
+vt 0.364426 0.427706
+vt 0.379285 0.445126
+vt 0.388009 0.44487
+vt 0.385408 0.440532
+vt 0.396797 0.441249
+vt 0.391983 0.437144
+vt 0.371003 0.442416
+vt 0.371902 0.438797
+vt 0.377944 0.436209
+vt 0.372836 0.434912
+vt 0.388047 0.433887
+vt 0.38323 0.436089
+vt 0.411804 0.424136
+vt 0.414761 0.413476
+vt 0.40336 0.414868
+vt 0.405273 0.433865
+vt 0.397822 0.431174
+vt 0.394183 0.422617
+vt 0.391746 0.428935
+vt 0.395216 0.416313
+vt 0.775924 0.375151
+vt 0.775728 0.388743
+vt 0.746453 0.386032
+vt 0.747928 0.395308
+vt 0.717395 0.406114
+vt 0.713072 0.403611
+vt 0.744823 0.376573
+vt 0.749092 0.40422
+vt 0.749645 0.415483
+vt 0.72449 0.414957
+vt 0.72621 0.424666
+vt 0.7756 0.399248
+vt 0.70368 0.428763
+vt 0.687208 0.443737
+vt 0.697322 0.42707
+vt 0.677635 0.445158
+vt 0.707038 0.434775
+vt 0.68965 0.428163
+vt 0.681322 0.43065
+vt 0.666675 0.448773
+vt 0.663815 0.460707
+vt 0.651924 0.464538
+vt 0.630412 0.480156
+vt 0.639991 0.468922
+vt 0.642366 0.477595
+vt 0.66825 0.472074
+vt 0.675624 0.457981
+vt 0.68415 0.468328
+vt 0.689031 0.45406
+vt 0.649396 0.488552
+vt 0.664408 0.486592
+vt 0.681197 0.484039
+vt 0.634969 0.490079
+vt 0.620833 0.49139
+vt 0.912385 0.481335
+vt 0.932278 0.480511
+vt 0.952544 0.479369
+vt 0.933883 0.461959
+vt 0.919103 0.464149
+vt 0.869741 0.465154
+vt 0.887953 0.466437
+vt 0.873151 0.480724
+vt 0.893241 0.481526
+vt 0.865824 0.437779
+vt 0.874945 0.43437
+vt 0.878921 0.451884
+vt 0.891561 0.450226
+vt 0.864248 0.450831
+vt 0.915223 0.44455
+vt 0.903554 0.447624
+vt 0.884353 0.430602
+vt 0.854087 0.418075
+vt 0.860227 0.412749
+vt 0.833101 0.395604
+vt 0.866565 0.407739
+vt 0.836568 0.388337
+vt 0.826949 0.411924
+vt 0.848341 0.424031
+vt 0.824944 0.4236
+vt 0.84502 0.432484
+vt 0.803376 0.3921
+vt 0.802127 0.402739
+vt 0.801153 0.415145
+vt 0.804904 0.380706
+vt 0.806571 0.368936
+vt 0.702688 0.462561
+vt 0.700952 0.48073
+vt 0.723247 0.47733
+vt 0.725766 0.438352
+vt 0.705712 0.446642
+vt 0.74847 0.451919
+vt 0.749286 0.431814
+vt 0.774044 0.450536
+vt 0.774704 0.42899
+vt 0.747651 0.474504
+vt 0.773403 0.473071
+vt 0.799984 0.45176
+vt 0.799741 0.47385
+vt 0.825727 0.475971
+vt 0.824826 0.438046
+vt 0.800446 0.431839
+vt 0.848903 0.461112
+vt 0.846154 0.445298
+vt 0.850422 0.478567
+vt 0.62364 0.273336
+vt 0.631303 0.268742
+vt 0.633284 0.279074
+vt 0.640023 0.273125
+vt 0.639784 0.263935
+vt 0.632769 0.258173
+vt 0.623739 0.264647
+vt 0.600029 0.280268
+vt 0.598688 0.270903
+vt 0.613376 0.277111
+vt 0.610984 0.269667
+vt 0.621969 0.284647
+vt 0.621719 0.253201
+vt 0.613611 0.261924
+vt 0.599928 0.261159
+vt 0.670072 0.278979
+vt 0.653616 0.276716
+vt 0.669832 0.267969
+vt 0.654614 0.268283
+vt 0.648422 0.284838
+vt 0.690213 0.290189
+vt 0.687377 0.279381
+vt 0.68627 0.268024
+vt 0.683842 0.304224
+vt 0.697203 0.300525
+vt 0.645124 0.311795
+vt 0.656458 0.301386
+vt 0.663002 0.320505
+vt 0.672639 0.311009
+vt 0.637949 0.292669
+vt 0.625515 0.302527
+vt 0.64648 0.25088
+vt 0.652498 0.259521
+vt 0.667905 0.256541
+vt 0.65209 0.231125
+vt 0.635774 0.242815
+vt 0.639961 0.22002
+vt 0.62248 0.233347
+vt 0.677556 0.227123
+vt 0.666981 0.218489
+vt 0.656893 0.207226
+vt 0.684642 0.243963
+vt 0.689474 0.232327
+vt 0.684466 0.25604
+vt 0.602642 0.176918
+vt 0.580529 0.190632
+vt 0.589953 0.161017
+vt 0.566382 0.1753
+vt 0.557833 0.204588
+vt 0.573039 0.21849
+vt 0.594645 0.205765
+vt 0.635672 0.179448
+vt 0.62466 0.163426
+vt 0.613523 0.146733
+vt 0.646437 0.19413
+vt 0.627782 0.207015
+vt 0.588272 0.232651
+vt 0.608699 0.2205
+vt 0.608211 0.373618
+vt 0.616832 0.356454
+vt 0.634284 0.378082
+vt 0.640225 0.361705
+vt 0.646724 0.346174
+vt 0.603899 0.333518
+vt 0.593087 0.35113
+vt 0.582299 0.32801
+vt 0.570201 0.34546
+vt 0.582138 0.369154
+vt 0.635039 0.324678
+vt 0.614437 0.31673
+vt 0.593411 0.310569
+vt 0.65434 0.332333
+vt 0.561644 0.296106
+vt 0.564966 0.285457
+vt 0.580542 0.294418
+vt 0.583116 0.283009
+vt 0.582766 0.272899
+vt 0.546607 0.278466
+vt 0.547908 0.287734
+vt 0.528899 0.281331
+vt 0.53186 0.290461
+vt 0.545646 0.297066
+vt 0.561955 0.266155
+vt 0.543727 0.269714
+vt 0.555322 0.257166
+vt 0.537387 0.262094
+vt 0.526554 0.27266
+vt 0.581699 0.262937
+vt 0.576957 0.252641
+vt 0.565582 0.241532
+vt 0.543702 0.248829
+vt 0.550357 0.230094
+vt 0.496387 0.262785
+vt 0.509284 0.25153
+vt 0.513745 0.264403
+vt 0.525705 0.256225
+vt 0.51058 0.233141
+vt 0.488727 0.247432
+vt 0.491672 0.225616
+vt 0.466102 0.243633
+vt 0.473932 0.261753
+vt 0.534066 0.218808
+vt 0.517242 0.2076
+vt 0.516335 0.345283
+vt 0.530115 0.332372
+vt 0.5362 0.354987
+vt 0.549386 0.339171
+vt 0.561842 0.32354
+vt 0.526052 0.315178
+vt 0.511862 0.325172
+vt 0.511906 0.308506
+vt 0.496234 0.31623
+vt 0.49647 0.33558
+vt 0.537837 0.306011
+vt 0.553916 0.307529
+vt 0.525234 0.302122
+vt 0.572839 0.308275
+vt 0.511827 0.28428
+vt 0.509974 0.274041
+vt 0.492244 0.274874
+vt 0.501435 0.298913
+vt 0.516739 0.294259
+vt 0.458569 0.293763
+vt 0.476889 0.290416
+vt 0.467587 0.30982
+vt 0.484835 0.304206
+vt 0.471621 0.276173
+vt 0.449551 0.277706
+vt 0.79029 0.672484
+vt 0.810842 0.6624
+vt 0.788538 0.66243
+vt 0.832155 0.647948
+vt 0.833088 0.669026
+vt 0.767347 0.664111
+vt 0.749725 0.673721
+vt 0.741816 0.650008
+vt 0.85403 0.66365
+vt 0.868881 0.657298
+vt 0.85539 0.645025
+vt 0.888074 0.648027
+vt 0.874212 0.676013
+vt 0.886607 0.695639
+vt 0.89598 0.672443
+vt 0.929571 0.681021
+vt 0.72496 0.675077
+vt 0.707382 0.674426
+vt 0.711754 0.695525
+vt 0.674442 0.675309
+vt 0.715494 0.652696
+vt 0.851241 0.674754
+vt 0.83861 0.688008
+vt 0.813905 0.681083
+vt 0.795054 0.69679
+vt 0.803314 0.729031
+vt 0.821237 0.705648
+vt 0.847489 0.712022
+vt 0.863108 0.691149
+vt 0.734908 0.685353
+vt 0.751974 0.699565
+vt 0.699285 0.735409
+vt 0.729177 0.71566
+vt 0.752628 0.735708
+vt 0.775609 0.715256
+vt 0.771295 0.685081
+vt 0.892477 0.736968
+vt 0.891992 0.760942
+vt 0.924257 0.752441
+vt 0.883161 0.790753
+vt 0.862406 0.753942
+vt 0.813012 0.768991
+vt 0.847704 0.781855
+vt 0.824711 0.812689
+vt 0.703179 0.776568
+vt 0.71878 0.801121
+vt 0.75314 0.781706
+vt 0.750148 0.828139
+vt 0.674567 0.808938
+vt 0.785894 0.799205
+vt 0.832437 0.740517
+vt 0.726143 0.758511
+vt 0.908808 0.716733
+vt 0.780855 0.754707
+vt 0.872961 0.724451
+vt 0.947275 0.716911
+vt 0.972864 0.691703
+vt 0.956423 0.743409
+vt 0.170337 0.155827
+vt 0.136076 0.187112
+vt 0.172299 0.21177
+vt 0.0899929 0.213591
+vt 0.10853 0.16596
+vt 0.195445 0.192426
+vt 0.220169 0.183556
+vt 0.218642 0.212212
+vt 0.204829 0.229447
+vt 0.228379 0.234767
+vt 0.205687 0.259413
+vt 0.137176 0.927417
+vt 0.140528 0.948093
+vt 0.168383 0.944741
+vt 0.141913 0.961891
+vt 0.11077 0.946594
+vt 0.878967 0.954757
+vt 0.879367 0.938048
+vt 0.853203 0.941375
+vt 0.880712 0.92523
+vt 0.906681 0.938262
+vt 0.989055 0.9382
+vt 0.912651 0.957127
+vt 0.886501 1.00065
+vt 0.775015 0.959635
+vt 0.84864 0.929044
+vt 0.877392 0.895069
+vt 0.91222 0.923978
+vt 0.849069 0.962192
+vt 0.254826 0.925518
+vt 0.170213 0.919654
+vt 0.126415 0.872706
+vt 0.0190149 0.939185
+vt 0.100899 0.922774
+vt 0.142077 0.988088
+vt 0.17534 0.957124
+vt 0.106027 0.960245
+vt 0.148988 0.242126
+vt 0.176354 0.291487
+vt 0.102893 0.237993
+vt 0.044388 0.5061
+vt 0.0504472 0.534124
+vt 0.0221481 0.572717
+vt 0.0611958 0.546374
+vt 0.0719578 0.52396
+vt 0.0576334 0.571748
+vt 0.0664284 0.60486
+vt 0.0810156 0.564522
+vt 0.0825732 0.542567
+vt 0.101769 0.534189
+vt 0.944378 0.797731
+vt 0.915109 0.794589
+vt 0.905965 0.833567
+vt 0.842874 0.862029
+vt 0.791851 0.848437
+vt 0.748723 0.889561
+vt 0.703598 0.850293
+vt 0.642603 0.8718
+vt 0.843173 0.630902
+vt 0.84458 0.638283
+vt 0.846408 0.629484
+vt 0.71498 0.637314
+vt 0.726461 0.64589
+vt 0.734897 0.643187
+vt 0.865004 0.824698
+vt 0.962666 0.771065
+vt 0.94749 0.766441
+vt 0.268705 0.710477
+vt 0.214528 0.765549
+vt 0.268705 0.80475
+vt 0.191977 0.815455
+vt 0.127915 0.732152
+vt 0.268814 0.267129
+vt 0.238957 0.294344
+vt 0.268669 0.328244
+vt 0.215045 0.317962
+vt 0.269051 0.235581
+vt 0.241893 0.247542
+vt 0.652939 0.625528
+vt 0.689461 0.648127
+vt 0.870091 0.634597
+vt 0.889967 0.621532
+vt 0.917129 0.645104
+vt 0.960269 0.639284
+vt 0.9614 0.662237
+vt 0.981019 0.639244
+vt 0.270276 0.136874
+vt 0.232631 0.163206
+vt 0.269409 0.175133
+vt 0.242681 0.194855
+vt 0.269141 0.208639
+vt 0.244466 0.223601
+vt 0.189521 0.653078
+vt 0.113276 0.681659
+vt 0.170828 0.555621
+vt 0.268705 0.586687
+vt 0.197069 0.290309
+vt 0.147015 0.418787
+vt 0.159401 0.426978
+vt 0.169209 0.419346
+vt 0.166865 0.4343
+vt 0.151569 0.435731
+vt 0.101359 0.388325
+vt 0.164076 0.395797
+vt 0.253799 0.393043
+vt 0.188648 0.408382
+vt 0.10719 0.406598
+vt 0.0810976 0.440196
+vt 0.135792 0.449792
+vt 0.137248 0.466241
+vt -0.0522421 0.436952
+vt 0.203585 0.425014
+vt 0.183198 0.455456
+vt 0.176955 0.4435
+vt 0.158284 0.457002
+vt 0.0646922 0.403325
+vt 0.0969534 0.389119
+vt 0.127418 0.569795
+vt 0.101838 0.557019
+vt 0.115165 0.496827
+vt 0.0856225 0.479205
+vt 0.0876161 0.508163
+vt 0.224113 0.501179
+vt 0.216094 0.528531
+vt 0.268704 0.514179
+vt 0.152679 0.481359
+vt 0.268801 0.37957
+vt 0.241478 0.353126
+vt 0.242729 0.379892
+vt 0.214493 0.347926
+vt 0.247642 0.484503
+vt 0.231652 0.461026
+vt 0.268704 0.455503
+vt 0.192219 0.333104
+vt 0.164806 0.367802
+vt 0.156354 0.409665
+vt 0.179537 0.42641
+vt 0.158914 0.444672
+vt 0.1348 0.428233
+vt 0.776574 0.644712
+vt 0.722394 0.585817
+vt 0.651799 0.541213
+vt 0.694305 0.554226
+vt 0.775203 0.583316
+vt 0.656336 0.526524
+vt 0.611254 0.502624
+vt 0.680604 0.50058
+vt 0.971205 0.496778
+vt 0.903217 0.52798
+vt 0.896582 0.566601
+vt 0.859045 0.553539
+vt 0.874064 0.496896
+vt 0.829393 0.590056
+vt 0.725299 0.540094
+vt 0.773136 0.497495
+vt 0.8247 0.537119
+vt 0.64197 0.737033
+vt 0.61964 0.737363
+vt 0.604328 0.759732
+vt 0.608299 0.714718
+vt 0.667128 0.712276
+vt 0.663413 0.762979
+vt 0.546984 0.81774
+vt 0.617742 0.813781
+vt 0.624356 0.66752
+vt 0.55686 0.648558
+vt 0.568421 0.733816
+vt 0.529143 0.743889
+vt 0.538817 0.714517
+vt 0.531598 0.768055
+vt 0.445833 0.7485
+vt 0.547123 0.691597
+vt 0.481156 0.686856
+vt 0.499408 0.724217
+vt 0.796428 0.328877
+vt 0.8169 0.338119
+vt 0.838683 0.330654
+vt 0.838689 0.3517
+vt 0.793926 0.338777
+vt 0.773732 0.337889
+vt 0.74951 0.352736
+vt 0.755441 0.328792
+vt 0.859512 0.335574
+vt 0.874845 0.341757
+vt 0.878929 0.322664
+vt 0.894264 0.350808
+vt 0.86172 0.354306
+vt 0.890121 0.302638
+vt 0.900633 0.325513
+vt 0.932192 0.315548
+vt 0.730562 0.326947
+vt 0.713106 0.328223
+vt 0.72296 0.350316
+vt 0.678736 0.328632
+vt 0.716325 0.307131
+vt 0.856285 0.324517
+vt 0.84324 0.311687
+vt 0.81933 0.31943
+vt 0.799782 0.304286
+vt 0.806322 0.271776
+vt 0.825244 0.294637
+vt 0.850879 0.287575
+vt 0.86722 0.30781
+vt 0.740091 0.316932
+vt 0.756566 0.302501
+vt 0.702583 0.267919
+vt 0.73303 0.286789
+vt 0.75555 0.26631
+vt 0.779434 0.286256
+vt 0.776741 0.316735
+vt 0.894098 0.262064
+vt 0.892695 0.238422
+vt 0.863735 0.245736
+vt 0.882937 0.208649
+vt 0.924796 0.246563
+vt 0.814013 0.231601
+vt 0.847948 0.218052
+vt 0.823544 0.187373
+vt 0.704382 0.227027
+vt 0.718515 0.201921
+vt 0.673705 0.195595
+vt 0.748091 0.173595
+vt 0.753811 0.220319
+vt 0.785435 0.201844
+vt 0.834654 0.259639
+vt 0.728096 0.244245
+vt 0.910872 0.281409
+vt 0.782742 0.246644
+vt 0.87545 0.274707
+vt 0.948004 0.28051
+vt 0.956452 0.255796
+vt 0.972736 0.303635
+vt 0.370772 0.156168
+vt 0.404072 0.188535
+vt 0.432688 0.16816
+vt 0.449944 0.216643
+vt 0.366322 0.212187
+vt 0.343097 0.19204
+vt 0.319164 0.211948
+vt 0.318073 0.183115
+vt 0.332918 0.229442
+vt 0.33175 0.259955
+vt 0.309225 0.234833
+vt 0.632664 0.937599
+vt 0.629313 0.958275
+vt 0.659071 0.956775
+vt 0.627927 0.972073
+vt 0.601457 0.954923
+vt 0.374133 0.932158
+vt 0.374091 0.913509
+vt 0.349193 0.91288
+vt 0.374675 0.900396
+vt 0.400674 0.918841
+vt 0.275352 0.917971
+vt 0.344112 0.934726
+vt 0.371547 0.982105
+vt 0.480772 0.938676
+vt 0.406833 0.906276
+vt 0.38004 0.872706
+vt 0.345731 0.899719
+vt 0.405214 0.941284
+vt 0.515015 0.935699
+vt 0.599629 0.929832
+vt 0.643425 0.882887
+vt 0.750826 0.949367
+vt 0.668941 0.932959
+vt 0.627764 0.99827
+vt 0.594501 0.967304
+vt 0.663814 0.97043
+vt 0.389421 0.243699
+vt 0.436255 0.240856
+vt 0.360739 0.292908
+vt 0.479266 0.621214
+vt 0.485524 0.569542
+vt 0.463254 0.566666
+vt 0.48531 0.54359
+vt 0.519446 0.53819
+vt 0.495549 0.530351
+vt 0.47602 0.523291
+vt 0.496385 0.522841
+vt 0.465093 0.54022
+vt 0.446797 0.527765
+vt 0.944666 0.201943
+vt 0.914837 0.204675
+vt 0.904503 0.165124
+vt 0.839442 0.136903
+vt 0.788702 0.151899
+vt 0.742974 0.111175
+vt 0.700099 0.152559
+vt 0.637093 0.13245
+vt 0.850315 0.368043
+vt 0.851223 0.360798
+vt 0.852982 0.368837
+vt 0.723976 0.366596
+vt 0.734474 0.357174
+vt 0.742904 0.359347
+vt 0.863449 0.174562
+vt 0.963467 0.230405
+vt 0.947856 0.233441
+vt 0.322881 0.76555
+vt 0.409494 0.732152
+vt 0.345432 0.815455
+vt 0.298143 0.294421
+vt 0.321942 0.31772
+vt 0.295585 0.24761
+vt 0.660357 0.382546
+vt 0.69659 0.356392
+vt 0.876132 0.364721
+vt 0.893585 0.379
+vt 0.921054 0.3528
+vt 0.961877 0.356785
+vt 0.962698 0.33339
+vt 0.981984 0.355755
+vt 0.306768 0.162912
+vt 0.295194 0.194359
+vt 0.29317 0.223456
+vt 0.347888 0.653078
+vt 0.366581 0.55562
+vt 0.424133 0.681659
+vt 0.340033 0.290817
+vt 0.388965 0.413938
+vt 0.377097 0.422446
+vt 0.385455 0.431061
+vt 0.370091 0.429971
+vt 0.366833 0.415002
+vt 0.432694 0.382302
+vt 0.370508 0.391231
+vt 0.427991 0.40079
+vt 0.346746 0.40443
+vt 0.280737 0.3905
+vt 0.456111 0.433947
+vt 0.589083 0.427663
+vt 0.401629 0.461384
+vt 0.402075 0.444828
+vt 0.332854 0.421481
+vt 0.360577 0.439441
+vt 0.355077 0.451593
+vt 0.380059 0.45258
+vt 0.470234 0.396534
+vt 0.437143 0.383
+vt 0.468433 0.48133
+vt 0.462264 0.506664
+vt 0.433175 0.490402
+vt 0.401796 0.544866
+vt 0.440576 0.55139
+vt 0.313296 0.501179
+vt 0.321314 0.528529
+vt 0.38473 0.481358
+vt 0.295899 0.353213
+vt 0.32351 0.348357
+vt 0.294922 0.38013
+vt 0.289767 0.484501
+vt 0.305757 0.461026
+vt 0.344952 0.331857
+vt 0.372123 0.361433
+vt 0.379071 0.404986
+vt 0.356951 0.422334
+vt 0.378668 0.44021
+vt 0.40174 0.423148
+vt 0.776574 0.349534
+vt 0.721657 0.410555
+vt 0.77532 0.411729
+vt 0.694232 0.4441
+vt 0.649571 0.457689
+vt 0.655529 0.474973
+vt 0.903209 0.465704
+vt 0.85855 0.440351
+vt 0.896562 0.42714
+vt 0.829462 0.404087
+vt 0.725094 0.457517
+vt 0.824792 0.456782
+vt 0.644474 0.268488
+vt 0.619495 0.269287
+vt 0.604207 0.292645
+vt 0.603235 0.247789
+vt 0.669742 0.291872
+vt 0.665491 0.242026
+vt 0.542812 0.189584
+vt 0.615129 0.192251
+vt 0.625589 0.340224
+vt 0.556065 0.36469
+vt 0.56464 0.275636
+vt 0.534275 0.296732
+vt 0.525704 0.266294
+vt 0.528338 0.24086
+vt 0.440532 0.261649
+vt 0.542934 0.319725
+vt 0.476605 0.325877
+vt 0.495044 0.287182
+f 739/1 735/2 736/3
+f 189/4 736/3 735/2
+f 192/5 738/6 737/7
+f 739/1 737/7 738/6
+f 190/8 741/9 740/10
+f 743/11 740/10 741/9
+f 736/3 189/4 743/11
+f 742/12 743/11 189/4
+f 745/13 747/14 191/15
+f 744/16 191/15 747/14
+f 746/17 747/14 190/8
+f 741/9 190/8 747/14
+f 748/18 192/5 750/19
+f 737/7 750/19 192/5
+f 191/15 749/20 745/13
+f 750/19 745/13 749/20
+f 752/21 754/22 194/23
+f 751/24 194/23 754/22
+f 753/25 189/4 754/22
+f 735/2 754/22 189/4
+f 195/26 756/27 755/28
+f 758/29 755/28 756/27
+f 194/23 757/30 752/21
+f 758/29 752/21 757/30
+f 759/31 196/32 762/33
+f 760/34 762/33 196/32
+f 761/35 762/33 195/26
+f 756/27 195/26 762/33
+f 764/36 742/12 753/25
+f 189/4 753/25 742/12
+f 196/32 763/37 760/34
+f 764/36 760/34 763/37
+f 769/38 765/39 766/40
+f 198/41 766/40 765/39
+f 768/42 769/38 201/43
+f 767/44 201/43 769/38
+f 771/45 773/46 199/47
+f 770/48 199/47 773/46
+f 772/49 773/46 198/41
+f 766/40 198/41 773/46
+f 200/50 775/51 774/52
+f 777/53 774/52 775/51
+f 199/47 776/54 771/45
+f 777/53 771/45 776/54
+f 201/43 767/44 778/55
+f 780/56 778/55 767/44
+f 779/57 780/56 200/50
+f 775/51 200/50 780/56
+f 782/58 785/59 203/60
+f 781/61 203/60 785/59
+f 784/62 785/59 205/63
+f 783/64 205/63 785/59
+f 200/50 786/65 779/57
+f 788/66 779/57 786/65
+f 203/60 787/67 782/58
+f 788/66 782/58 787/67
+f 790/68 791/69 204/70
+f 789/71 204/70 791/69
+f 774/52 791/69 200/50
+f 786/65 200/50 791/69
+f 205/63 783/64 792/72
+f 794/73 792/72 783/64
+f 204/70 793/74 790/68
+f 794/73 790/68 793/74
+f 796/75 799/76 207/77
+f 795/78 207/77 799/76
+f 210/79 798/80 797/81
+f 799/76 797/81 798/80
+f 208/82 801/83 800/84
+f 803/85 800/84 801/83
+f 207/77 802/86 796/75
+f 803/85 796/75 802/86
+f 209/87 805/88 804/89
+f 807/90 804/89 805/88
+f 806/91 807/90 208/82
+f 801/83 208/82 807/90
+f 797/81 810/92 210/79
+f 808/93 210/79 810/92
+f 809/94 810/92 209/87
+f 805/88 209/87 810/92
+f 201/43 811/95 768/42
+f 814/96 768/42 811/95
+f 214/97 813/98 812/99
+f 814/96 812/99 813/98
+f 816/100 817/101 212/102
+f 815/103 212/102 817/101
+f 778/55 817/101 201/43
+f 811/95 201/43 817/101
+f 213/104 819/105 818/106
+f 821/107 818/106 819/105
+f 212/102 820/108 816/100
+f 821/107 816/100 820/108
+f 812/99 824/109 214/97
+f 822/110 214/97 824/109
+f 823/111 824/109 213/104
+f 819/105 213/104 824/109
+f 825/112 828/113 192/5
+f 738/6 192/5 828/113
+f 827/114 828/113 217/115
+f 826/116 217/115 828/113
+f 213/104 829/117 823/111
+f 830/118 823/111 829/117
+f 830/118 825/112 748/18
+f 192/5 748/18 825/112
+f 832/119 833/120 216/121
+f 831/122 216/121 833/120
+f 818/106 833/120 213/104
+f 829/117 213/104 833/120
+f 217/115 826/116 834/123
+f 836/124 834/123 826/116
+f 216/121 835/125 832/119
+f 836/124 832/119 835/125
+f 219/126 838/127 837/128
+f 841/129 837/128 838/127
+f 839/130 221/131 841/129
+f 840/132 841/129 221/131
+f 842/133 844/134 216/121
+f 835/125 216/121 844/134
+f 843/135 844/134 219/126
+f 838/127 219/126 844/134
+f 847/136 845/137 846/138
+f 220/139 846/138 845/137
+f 216/121 831/122 842/133
+f 847/136 842/133 831/122
+f 850/140 848/141 839/130
+f 221/131 839/130 848/141
+f 846/138 220/139 850/140
+f 849/142 850/140 220/139
+f 223/143 852/144 851/145
+f 854/146 851/145 852/144
+f 203/60 781/61 853/147
+f 854/146 853/147 781/61
+f 220/139 855/148 849/142
+f 857/149 849/142 855/148
+f 852/144 223/143 857/149
+f 856/150 857/149 223/143
+f 820/108 212/102 859/151
+f 858/152 859/151 212/102
+f 845/137 859/151 220/139
+f 855/148 220/139 859/151
+f 787/67 203/60 860/153
+f 853/147 860/153 203/60
+f 212/102 815/103 858/152
+f 860/153 858/152 815/103
+f 225/154 862/155 861/156
+f 864/157 861/156 862/155
+f 207/77 795/78 863/158
+f 864/157 863/158 795/78
+f 866/159 868/160 226/161
+f 865/162 226/161 868/160
+f 867/163 868/160 225/154
+f 862/155 225/154 868/160
+f 227/164 870/165 869/166
+f 872/167 869/166 870/165
+f 226/161 871/168 866/159
+f 872/167 866/159 871/168
+f 863/158 874/169 207/77
+f 802/86 207/77 874/169
+f 873/170 874/169 227/164
+f 870/165 227/164 874/169
+f 876/171 879/172 229/173
+f 875/174 229/173 879/172
+f 877/175 231/176 879/172
+f 878/177 879/172 231/176
+f 227/164 880/178 873/170
+f 882/179 873/170 880/178
+f 229/173 881/180 876/171
+f 882/179 876/171 881/180
+f 884/181 885/182 230/183
+f 883/184 230/183 885/182
+f 869/166 885/182 227/164
+f 880/178 227/164 885/182
+f 888/185 886/186 877/175
+f 231/176 877/175 886/186
+f 230/183 887/187 884/181
+f 888/185 884/181 887/187
+f 892/188 889/189 890/190
+f 233/191 890/190 889/189
+f 892/188 891/192 837/128
+f 219/126 837/128 891/192
+f 893/193 895/194 230/183
+f 887/187 230/183 895/194
+f 894/195 895/194 233/191
+f 890/190 233/191 895/194
+f 234/196 897/197 896/198
+f 898/199 896/198 897/197
+f 230/183 883/184 893/193
+f 898/199 893/193 883/184
+f 900/200 843/135 891/192
+f 219/126 891/192 843/135
+f 899/201 900/200 234/196
+f 897/197 234/196 900/200
+f 217/115 901/202 827/114
+f 903/203 827/114 901/202
+f 194/23 751/24 902/204
+f 903/203 902/204 751/24
+f 234/196 904/205 899/201
+f 905/206 899/201 904/205
+f 834/123 905/206 217/115
+f 901/202 217/115 905/206
+f 906/207 907/208 226/161
+f 871/168 226/161 907/208
+f 896/198 907/208 234/196
+f 904/205 234/196 907/208
+f 908/209 757/30 902/204
+f 194/23 902/204 757/30
+f 226/161 865/162 906/207
+f 908/209 906/207 865/162
+f 910/210 913/211 237/212
+f 909/213 237/212 913/211
+f 912/214 913/211 240/215
+f 911/216 240/215 913/211
+f 238/217 915/218 914/219
+f 917/220 914/219 915/218
+f 917/220 910/210 916/221
+f 237/212 916/221 910/210
+f 918/222 239/223 921/224
+f 919/225 921/224 239/223
+f 920/226 921/224 238/217
+f 915/218 238/217 921/224
+f 240/215 911/216 922/227
+f 924/228 922/227 911/216
+f 239/223 923/229 919/225
+f 924/228 919/225 923/229
+f 926/230 929/231 242/232
+f 925/233 242/232 929/231
+f 929/231 927/234 928/235
+f 244/236 928/235 927/234
+f 239/223 930/237 923/229
+f 932/238 923/229 930/237
+f 242/232 931/239 926/230
+f 932/238 926/230 931/239
+f 934/240 935/241 243/242
+f 933/243 243/242 935/241
+f 918/222 935/241 239/223
+f 930/237 239/223 935/241
+f 938/244 936/245 927/234
+f 244/236 927/234 936/245
+f 243/242 937/246 934/240
+f 938/244 934/240 937/246
+f 246/247 940/248 939/249
+f 943/250 939/249 940/248
+f 249/251 942/252 941/253
+f 943/250 941/253 942/252
+f 945/254 947/255 247/256
+f 944/257 247/256 947/255
+f 946/258 947/255 246/247
+f 940/248 246/247 947/255
+f 951/259 948/260 949/261
+f 248/262 949/261 948/260
+f 247/256 950/263 945/254
+f 951/259 945/254 950/263
+f 952/264 249/251 954/265
+f 941/253 954/265 249/251
+f 953/266 954/265 248/262
+f 949/261 248/262 954/265
+f 955/267 958/268 244/236
+f 928/235 244/236 958/268
+f 957/269 958/268 252/270
+f 956/271 252/270 958/268
+f 251/272 960/273 959/274
+f 961/275 959/274 960/273
+f 244/236 936/245 955/267
+f 961/275 955/267 936/245
+f 962/276 964/277 247/256
+f 950/263 247/256 964/277
+f 963/278 964/277 251/272
+f 960/273 251/272 964/277
+f 252/270 956/271 965/279
+f 966/280 965/279 956/271
+f 247/256 944/257 962/276
+f 966/280 962/276 944/257
+f 221/131 967/281 840/132
+f 970/282 840/132 967/281
+f 255/283 969/284 968/285
+f 970/282 968/285 969/284
+f 972/286 973/287 254/288
+f 971/289 254/288 973/287
+f 848/141 973/287 221/131
+f 967/281 221/131 973/287
+f 976/290 931/239 974/291
+f 242/232 974/291 931/239
+f 254/288 975/292 972/286
+f 976/290 972/286 975/292
+f 968/285 978/293 255/283
+f 977/294 255/283 978/293
+f 974/291 242/232 978/293
+f 925/233 978/293 242/232
+f 980/295 982/296 257/297
+f 979/298 257/297 982/296
+f 886/186 982/296 231/176
+f 981/299 231/176 982/296
+f 985/300 946/258 983/301
+f 246/247 983/301 946/258
+f 257/297 984/302 980/295
+f 985/300 980/295 984/302
+f 258/303 987/304 986/305
+f 988/306 986/305 987/304
+f 246/247 939/249 983/301
+f 988/306 983/301 939/249
+f 231/176 981/299 878/177
+f 990/307 878/177 981/299
+f 990/307 987/304 989/308
+f 258/303 989/308 987/304
+f 991/309 260/310 994/311
+f 992/312 994/311 260/310
+f 792/72 994/311 205/63
+f 993/313 205/63 994/311
+f 916/221 237/212 997/314
+f 995/315 997/314 237/212
+f 260/310 996/316 992/312
+f 997/314 992/312 996/316
+f 999/317 1000/318 261/319
+f 998/320 261/319 1000/318
+f 237/212 909/213 995/315
+f 1000/318 995/315 909/213
+f 205/63 993/313 784/62
+f 1002/321 784/62 993/313
+f 1002/321 999/317 1001/322
+f 261/319 1001/322 999/317
+f 1003/323 1005/324 257/297
+f 984/302 257/297 1005/324
+f 965/279 1005/324 252/270
+f 1004/325 252/270 1005/324
+f 233/191 1006/326 894/195
+f 1007/327 894/195 1006/326
+f 257/297 979/298 1003/323
+f 1007/327 1003/323 979/298
+f 1008/328 1009/329 255/283
+f 969/284 255/283 1009/329
+f 889/189 1009/329 233/191
+f 1006/326 233/191 1009/329
+f 252/270 1004/325 957/269
+f 1010/330 957/269 1004/325
+f 255/283 977/294 1008/328
+f 1010/330 1008/328 977/294
+f 223/143 1011/331 856/150
+f 1013/332 856/150 1011/331
+f 254/288 971/289 1012/333
+f 1013/332 1012/333 971/289
+f 1014/334 1015/335 261/319
+f 1001/322 261/319 1015/335
+f 851/145 1015/335 223/143
+f 1011/331 223/143 1015/335
+f 240/215 1016/336 912/214
+f 1017/337 912/214 1016/336
+f 261/319 998/320 1014/334
+f 1017/337 1014/334 998/320
+f 1012/333 1018/338 254/288
+f 975/292 254/288 1018/338
+f 922/227 1018/338 240/215
+f 1016/336 240/215 1018/338
+f 1019/339 265/340 1022/341
+f 1020/342 1022/341 265/340
+f 1022/341 1021/343 991/309
+f 260/310 991/309 1021/343
+f 1023/344 266/345 1026/346
+f 1024/347 1026/346 266/345
+f 1025/348 1026/346 265/340
+f 1020/342 265/340 1026/346
+f 1030/349 1027/350 1028/351
+f 267/352 1028/351 1027/350
+f 266/345 1029/353 1024/347
+f 1030/349 1024/347 1029/353
+f 1021/343 1032/354 260/310
+f 996/316 260/310 1032/354
+f 1028/351 267/352 1032/354
+f 1031/355 1032/354 267/352
+f 269/356 1034/357 1033/358
+f 1036/359 1033/358 1034/357
+f 1036/359 1035/360 1023/361
+f 266/362 1023/361 1035/360
+f 1038/363 1040/364 270/365
+f 1037/366 270/365 1040/364
+f 1039/367 1040/364 269/356
+f 1034/357 269/356 1040/364
+f 1044/368 1041/369 1042/370
+f 271/371 1042/370 1041/369
+f 270/365 1043/372 1038/363
+f 1044/368 1038/363 1043/372
+f 1035/360 1046/373 266/362
+f 1029/374 266/362 1046/373
+f 1042/370 271/371 1046/373
+f 1045/375 1046/373 271/371
+f 1048/376 1050/377 273/378
+f 1047/379 273/378 1050/377
+f 1033/358 1050/377 269/356
+f 1049/380 269/356 1050/377
+f 274/381 1052/382 1051/383
+f 1054/384 1051/383 1052/382
+f 273/378 1053/385 1048/376
+f 1054/384 1048/376 1053/385
+f 1056/386 1058/387 275/388
+f 1055/389 275/388 1058/387
+f 1057/390 1058/387 274/381
+f 1052/382 274/381 1058/387
+f 269/356 1049/380 1039/367
+f 1060/391 1039/367 1049/380
+f 275/388 1059/392 1056/386
+f 1060/391 1056/386 1059/392
+f 1061/393 1064/394 275/388
+f 1059/392 275/388 1064/394
+f 1063/395 1064/394 279/396
+f 1062/397 279/396 1064/394
+f 277/398 1066/399 1065/400
+f 1067/401 1065/400 1066/399
+f 275/388 1055/389 1061/393
+f 1067/401 1061/393 1055/389
+f 278/402 1069/403 1068/404
+f 1071/405 1068/404 1069/403
+f 1070/406 1071/405 277/398
+f 1066/399 277/398 1071/405
+f 279/396 1062/397 1072/407
+f 1074/408 1072/407 1062/397
+f 1073/409 1074/408 278/402
+f 1069/403 278/402 1074/408
+f 1076/410 1079/411 281/412
+f 1075/413 281/412 1079/411
+f 1077/414 284/415 1079/411
+f 1078/416 1079/411 284/415
+f 1081/417 1083/418 282/419
+f 1080/420 282/419 1083/418
+f 281/412 1082/421 1076/410
+f 1083/418 1076/410 1082/421
+f 283/422 1085/423 1084/424
+f 1087/425 1084/424 1085/423
+f 282/419 1086/426 1081/417
+f 1087/425 1081/417 1086/426
+f 284/415 1077/414 1088/427
+f 1090/428 1088/427 1077/414
+f 1090/428 1085/423 1089/429
+f 283/422 1089/429 1085/423
+f 1092/430 1095/431 286/432
+f 1091/433 286/432 1095/431
+f 1095/431 1093/434 1094/435
+f 289/436 1094/435 1093/434
+f 1097/437 1099/438 287/439
+f 1096/440 287/439 1099/438
+f 286/432 1098/441 1092/430
+f 1099/438 1092/430 1098/441
+f 288/442 1101/443 1100/444
+f 1103/445 1100/444 1101/443
+f 287/439 1102/446 1097/437
+f 1103/445 1097/437 1102/446
+f 289/436 1093/434 1104/447
+f 1106/448 1104/447 1093/434
+f 1101/443 288/442 1106/448
+f 1105/449 1106/448 288/442
+f 225/450 1107/451 867/452
+f 1110/453 867/452 1107/451
+f 292/454 1109/455 1108/456
+f 1110/453 1108/456 1109/455
+f 1112/457 1113/458 291/459
+f 1111/460 291/459 1113/458
+f 861/461 1113/458 225/450
+f 1107/451 225/450 1113/458
+f 289/436 1114/462 1094/435
+f 1116/463 1094/435 1114/462
+f 291/459 1115/464 1112/457
+f 1116/463 1112/457 1115/464
+f 1108/456 1118/465 292/454
+f 1117/466 292/454 1118/465
+f 289/436 1104/447 1114/462
+f 1118/465 1114/462 1104/447
+f 1120/467 1123/468 294/469
+f 1119/470 294/469 1123/468
+f 1122/471 1123/468 296/472
+f 1121/473 296/472 1123/468
+f 1127/474 1124/475 1125/476
+f 295/477 1125/476 1124/475
+f 294/469 1126/478 1120/467
+f 1127/474 1120/467 1126/478
+f 1128/479 1130/480 287/439
+f 1102/446 287/439 1130/480
+f 295/477 1129/481 1125/476
+f 1130/480 1125/476 1129/481
+f 296/472 1121/473 1131/482
+f 1132/483 1131/482 1121/473
+f 287/439 1096/440 1128/479
+f 1132/483 1128/479 1096/440
+f 1133/484 1135/485 292/454
+f 1109/455 292/454 1135/485
+f 755/486 1135/485 195/487
+f 1134/488 195/487 1135/485
+f 288/442 1136/489 1105/449
+f 1137/490 1105/449 1136/489
+f 292/454 1117/466 1133/484
+f 1137/490 1133/484 1117/466
+f 295/477 1138/491 1129/481
+f 1139/492 1129/481 1138/491
+f 1136/489 288/442 1139/492
+f 1100/444 1139/492 288/442
+f 195/487 1134/488 761/493
+f 1140/494 761/493 1134/488
+f 295/477 1124/475 1138/491
+f 1140/494 1138/491 1124/475
+f 1143/495 1122/471 1141/496
+f 296/472 1141/496 1122/471
+f 1143/495 1142/497 808/498
+f 210/499 808/498 1142/497
+f 1144/500 1145/501 286/432
+f 1098/441 286/432 1145/501
+f 1141/496 296/472 1145/501
+f 1131/482 1145/501 296/472
+f 291/459 1146/502 1115/464
+f 1147/503 1115/464 1146/502
+f 286/432 1091/433 1144/500
+f 1147/503 1144/500 1091/433
+f 1142/497 1148/504 210/499
+f 798/505 210/499 1148/504
+f 1146/502 291/459 1148/504
+f 1111/460 1148/504 291/459
+f 300/506 1150/507 1149/508
+f 1152/509 1149/508 1150/507
+f 214/510 822/511 1151/512
+f 1152/509 1151/512 822/511
+f 1153/513 1155/514 281/412
+f 1082/421 281/412 1155/514
+f 1150/507 300/506 1155/514
+f 1154/515 1155/514 300/506
+f 301/516 1157/517 1156/518
+f 1158/519 1156/518 1157/517
+f 281/412 1075/413 1153/513
+f 1158/519 1153/513 1075/413
+f 1151/512 1160/520 214/510
+f 813/521 214/510 1160/520
+f 1157/517 301/516 1160/520
+f 1159/522 1160/520 301/516
+f 198/523 1161/524 772/525
+f 1164/526 772/525 1161/524
+f 1164/526 1162/527 1163/528
+f 303/529 1163/528 1162/527
+f 1165/530 1166/531 301/516
+f 1159/522 301/516 1166/531
+f 765/532 1166/531 198/523
+f 1161/524 198/523 1166/531
+f 284/415 1167/533 1078/416
+f 1168/534 1078/416 1167/533
+f 301/516 1156/518 1165/530
+f 1168/534 1165/530 1156/518
+f 1162/527 1170/535 303/529
+f 1169/536 303/529 1170/535
+f 1167/533 284/415 1170/535
+f 1088/427 1170/535 284/415
+f 305/537 1172/538 1171/539
+f 1174/540 1171/539 1172/538
+f 191/541 744/542 1173/543
+f 1174/540 1173/543 744/542
+f 1175/544 1177/545 282/419
+f 1086/426 282/419 1177/545
+f 1176/546 1177/545 305/537
+f 1172/538 305/537 1177/545
+f 300/506 1178/547 1154/515
+f 1179/548 1154/515 1178/547
+f 282/419 1080/420 1175/544
+f 1179/548 1175/544 1080/420
+f 1173/543 1180/549 191/541
+f 749/550 191/541 1180/549
+f 1149/508 1180/549 300/506
+f 1178/547 300/506 1180/549
+f 1181/551 1184/552 303/529
+f 1163/528 303/529 1184/552
+f 1183/553 1184/552 307/554
+f 1182/555 307/554 1184/552
+f 283/422 1185/556 1089/429
+f 1186/557 1089/429 1185/556
+f 303/529 1169/536 1181/551
+f 1186/557 1181/551 1169/536
+f 305/537 1187/558 1176/546
+f 1188/559 1176/546 1187/558
+f 283/422 1084/424 1185/556
+f 1188/559 1185/556 1084/424
+f 307/554 1182/555 1189/560
+f 1190/561 1189/560 1182/555
+f 1171/539 1190/561 305/537
+f 1187/558 305/537 1190/561
+f 1192/562 1194/563 309/564
+f 1191/565 309/564 1194/563
+f 1072/407 1194/563 279/396
+f 1193/566 279/396 1194/563
+f 1195/567 310/568 1198/569
+f 1196/570 1198/569 310/568
+f 1192/562 309/564 1198/569
+f 1197/571 1198/569 309/564
+f 1202/572 1199/573 1200/574
+f 311/575 1200/574 1199/573
+f 310/568 1201/576 1196/570
+f 1202/572 1196/570 1201/576
+f 1203/577 1205/578 270/365
+f 1043/372 270/365 1205/578
+f 1200/574 311/575 1205/578
+f 1204/579 1205/578 311/575
+f 279/396 1193/566 1063/395
+f 1206/580 1063/395 1193/566
+f 270/365 1037/366 1203/577
+f 1206/580 1203/577 1037/366
+f 1208/581 1211/582 313/583
+f 1207/584 313/583 1211/582
+f 1210/585 1211/582 316/586
+f 1209/587 316/586 1211/582
+f 314/588 1213/589 1212/590
+f 1215/591 1212/590 1213/589
+f 313/583 1214/592 1208/581
+f 1215/591 1208/581 1214/592
+f 315/593 1217/594 1216/595
+f 1219/596 1216/595 1217/594
+f 1219/596 1213/589 1218/597
+f 314/588 1218/597 1213/589
+f 1209/587 1222/598 316/586
+f 1220/599 316/586 1222/598
+f 1221/600 1222/598 315/593
+f 1217/594 315/593 1222/598
+f 1223/601 1225/602 310/603
+f 1201/604 310/603 1225/602
+f 1212/590 1225/602 314/588
+f 1224/605 314/588 1225/602
+f 318/606 1227/607 1226/608
+f 1228/609 1226/608 1227/607
+f 1228/609 1223/601 1195/610
+f 310/603 1195/610 1223/601
+f 1230/611 1232/612 319/613
+f 1229/614 319/613 1232/612
+f 1231/615 1232/612 318/606
+f 1227/607 318/606 1232/612
+f 314/588 1224/605 1218/597
+f 1234/616 1218/597 1224/605
+f 319/613 1233/617 1230/611
+f 1234/616 1230/611 1233/617
+f 1236/618 1238/619 321/620
+f 1235/621 321/620 1238/619
+f 1237/622 319/613 1238/619
+f 1229/614 1238/619 319/613
+f 322/623 1240/624 1239/625
+f 1242/626 1239/625 1240/624
+f 321/620 1241/627 1236/618
+f 1242/626 1236/618 1241/627
+f 315/593 1243/628 1221/600
+f 1245/629 1221/600 1243/628
+f 1240/624 322/623 1245/629
+f 1244/630 1245/629 322/623
+f 319/613 1237/622 1233/617
+f 1246/631 1233/617 1237/622
+f 1246/631 1243/628 1216/595
+f 315/593 1216/595 1243/628
+f 324/632 1248/633 1247/634
+f 1251/635 1247/634 1248/633
+f 326/636 1250/637 1249/638
+f 1251/635 1249/638 1250/637
+f 1253/639 1255/640 325/641
+f 1252/642 325/641 1255/640
+f 1254/643 1255/640 324/632
+f 1248/633 324/632 1255/640
+f 238/217 1256/644 920/226
+f 1258/645 920/226 1256/644
+f 325/641 1257/646 1253/639
+f 1258/645 1253/639 1257/646
+f 1249/638 1260/647 326/636
+f 1259/648 326/636 1260/647
+f 914/219 1260/647 238/217
+f 1256/644 238/217 1260/647
+f 1265/649 1261/650 1262/651
+f 328/652 1262/651 1261/650
+f 330/653 1264/654 1263/655
+f 1265/649 1263/655 1264/654
+f 1267/656 1269/657 329/658
+f 1266/659 329/658 1269/657
+f 1262/651 328/652 1269/657
+f 1268/660 1269/657 328/652
+f 251/272 1270/661 963/278
+f 1272/662 963/278 1270/661
+f 1272/662 1267/656 1271/663
+f 329/658 1271/663 1267/656
+f 1263/655 1274/664 330/653
+f 1273/665 330/653 1274/664
+f 959/274 1274/664 251/272
+f 1270/661 251/272 1274/664
+f 1278/666 1275/667 1276/668
+f 332/669 1276/668 1275/667
+f 329/658 1266/659 1277/670
+f 1278/666 1277/670 1266/659
+f 1280/671 1282/672 333/673
+f 1279/674 333/673 1282/672
+f 1276/668 332/669 1282/672
+f 1281/675 1282/672 332/669
+f 1285/676 953/266 1283/677
+f 248/262 1283/677 953/266
+f 333/673 1284/678 1280/671
+f 1285/676 1280/671 1284/678
+f 1271/663 329/658 1286/679
+f 1277/670 1286/679 329/658
+f 948/260 1286/679 248/262
+f 1283/677 248/262 1286/679
+f 335/680 1288/681 1287/682
+f 1291/683 1287/682 1288/681
+f 337/684 1290/685 1289/686
+f 1291/683 1289/686 1290/685
+f 1293/687 1295/688 336/689
+f 1292/690 336/689 1295/688
+f 1288/681 335/680 1295/688
+f 1294/691 1295/688 335/680
+f 1298/692 1183/693 1296/694
+f 307/695 1296/694 1183/693
+f 1298/692 1293/687 1297/696
+f 336/689 1297/696 1293/687
+f 1289/686 1300/697 337/684
+f 1299/698 337/684 1300/697
+f 1296/694 307/695 1300/697
+f 1189/699 1300/697 307/695
+f 1302/700 1305/701 339/702
+f 1301/703 339/702 1305/701
+f 1304/704 1305/701 341/705
+f 1303/706 341/705 1305/701
+f 340/707 1307/708 1306/709
+f 1309/710 1306/709 1307/708
+f 1309/710 1302/700 1308/711
+f 339/702 1308/711 1302/700
+f 1310/712 1312/713 294/714
+f 1126/715 294/714 1312/713
+f 1307/708 340/707 1312/713
+f 1311/716 1312/713 340/707
+f 341/705 1303/706 1313/717
+f 1314/718 1313/717 1303/706
+f 294/714 1119/719 1310/712
+f 1314/718 1310/712 1119/719
+f 1318/720 1315/721 1316/722
+f 343/723 1316/722 1315/721
+f 325/641 1252/642 1317/724
+f 1318/720 1317/724 1252/642
+f 1319/725 1321/726 330/653
+f 1264/654 330/653 1321/726
+f 1316/722 343/723 1321/726
+f 1320/727 1321/726 343/723
+f 243/242 1322/728 937/246
+f 1323/729 937/246 1322/728
+f 1323/729 1319/725 1273/665
+f 330/653 1273/665 1319/725
+f 1317/724 1324/730 325/641
+f 1257/646 325/641 1324/730
+f 933/243 1324/730 243/242
+f 1322/728 243/242 1324/730
+f 345/731 1326/732 1325/733
+f 1329/734 1325/733 1326/732
+f 346/735 1328/736 1327/737
+f 1329/734 1327/737 1328/736
+f 1330/738 1332/739 326/636
+f 1250/637 326/636 1332/739
+f 1331/740 1332/739 345/731
+f 1326/732 345/731 1332/739
+f 267/352 1333/741 1031/355
+f 1334/742 1031/355 1333/741
+f 326/636 1259/648 1330/738
+f 1334/742 1330/738 1259/648
+f 1335/743 346/735 1336/744
+f 1327/737 1336/744 346/735
+f 1027/350 1336/744 267/352
+f 1333/741 267/352 1336/744
+f 1341/745 1337/746 1338/747
+f 348/748 1338/747 1337/746
+f 349/749 1340/750 1339/751
+f 1341/745 1339/751 1340/750
+f 1342/752 1344/753 346/754
+f 1328/755 346/754 1344/753
+f 1338/747 348/748 1344/753
+f 1343/756 1344/753 348/748
+f 271/757 1345/758 1045/759
+f 1346/760 1045/759 1345/758
+f 1346/760 1342/752 1335/761
+f 346/754 1335/761 1342/752
+f 349/749 1339/751 1347/762
+f 1348/763 1347/762 1339/751
+f 1041/764 1348/763 271/757
+f 1345/758 271/757 1348/763
+f 1353/765 1349/766 1350/767
+f 351/768 1350/767 1349/766
+f 354/769 1352/770 1351/771
+f 1353/765 1351/771 1352/770
+f 1355/772 1357/773 352/774
+f 1354/775 352/774 1357/773
+f 1350/767 351/768 1357/773
+f 1356/776 1357/773 351/768
+f 353/777 1359/778 1358/779
+f 1361/780 1358/779 1359/778
+f 352/774 1360/781 1355/772
+f 1361/780 1355/772 1360/781
+f 1351/771 1364/782 354/769
+f 1362/783 354/769 1364/782
+f 1363/784 1364/782 353/777
+f 1359/778 353/777 1364/782
+f 1365/785 356/786 1369/787
+f 1366/788 1369/787 356/786
+f 1369/787 1367/789 1368/790
+f 357/791 1368/790 1367/789
+f 1370/792 1372/793 354/769
+f 1352/770 354/769 1372/793
+f 356/786 1371/794 1366/788
+f 1372/793 1366/788 1371/794
+f 1374/795 1073/409 1373/796
+f 278/402 1373/796 1073/409
+f 354/769 1362/783 1370/792
+f 1374/795 1370/792 1362/783
+f 1367/789 1376/797 357/791
+f 1375/798 357/791 1376/797
+f 278/402 1068/404 1373/796
+f 1376/797 1373/796 1068/404
+f 1381/799 1377/800 1378/801
+f 359/802 1378/801 1377/800
+f 360/803 1380/804 1379/805
+f 1381/799 1379/805 1380/804
+f 1384/806 1304/704 1382/807
+f 341/705 1382/807 1304/704
+f 1378/801 359/802 1384/806
+f 1383/808 1384/806 359/802
+f 209/87 1385/809 809/94
+f 1386/810 809/94 1385/809
+f 1382/807 341/705 1386/810
+f 1313/717 1386/810 341/705
+f 1379/805 1388/811 360/803
+f 1387/812 360/803 1388/811
+f 804/89 1388/811 209/87
+f 1385/809 209/87 1388/811
+f 1390/813 1392/814 362/815
+f 1389/816 362/815 1392/814
+f 1391/817 336/689 1392/814
+f 1292/690 1392/814 336/689
+f 363/818 1394/819 1393/820
+f 1396/821 1393/820 1394/819
+f 1396/821 1390/813 1395/822
+f 362/815 1395/822 1390/813
+f 1397/823 1399/824 199/47
+f 776/54 199/47 1399/824
+f 1398/825 1399/824 363/818
+f 1394/819 363/818 1399/824
+f 1400/826 1297/696 1391/817
+f 336/689 1391/817 1297/696
+f 199/47 770/48 1397/823
+f 1400/826 1397/823 770/48
+f 1402/827 1404/828 365/829
+f 1401/830 365/829 1404/828
+f 1403/831 363/818 1404/828
+f 1393/820 1404/828 363/818
+f 366/832 1406/833 1405/834
+f 1408/835 1405/834 1406/833
+f 1408/835 1402/827 1407/836
+f 365/829 1407/836 1402/827
+f 1409/837 1411/838 204/70
+f 793/74 204/70 1411/838
+f 1410/839 1411/838 366/832
+f 1406/833 366/832 1411/838
+f 363/818 1403/831 1398/825
+f 1412/840 1398/825 1403/831
+f 204/70 789/71 1409/837
+f 1412/840 1409/837 789/71
+f 1414/841 1416/842 368/843
+f 1413/844 368/843 1416/842
+f 1405/834 1416/842 366/832
+f 1415/845 366/832 1416/842
+f 1418/846 1420/847 369/848
+f 1417/849 369/848 1420/847
+f 1419/850 1420/847 368/843
+f 1414/841 368/843 1420/847
+f 265/340 1421/851 1025/348
+f 1423/852 1025/348 1421/851
+f 1423/852 1418/846 1422/853
+f 369/848 1422/853 1418/846
+f 366/832 1415/845 1410/839
+f 1424/854 1410/839 1415/845
+f 1424/854 1421/851 1019/339
+f 265/340 1019/339 1421/851
+f 371/855 1426/856 1425/857
+f 1428/858 1425/857 1426/856
+f 369/859 1417/860 1427/861
+f 1428/858 1427/861 1417/860
+f 372/862 1430/863 1429/864
+f 1432/865 1429/864 1430/863
+f 1431/866 1432/865 371/855
+f 1426/856 371/855 1432/865
+f 1433/867 1435/868 273/378
+f 1053/385 273/378 1435/868
+f 1434/869 1435/868 372/862
+f 1430/863 372/862 1435/868
+f 1427/861 1436/870 369/859
+f 1422/871 369/859 1436/870
+f 273/378 1047/379 1433/867
+f 1436/870 1433/867 1047/379
+f 1438/872 1440/873 374/874
+f 1437/875 374/874 1440/873
+f 1429/864 1440/873 372/862
+f 1439/876 372/862 1440/873
+f 375/877 1442/878 1441/879
+f 1444/880 1441/879 1442/878
+f 1444/880 1438/872 1443/881
+f 374/874 1443/881 1438/872
+f 1445/882 1447/883 274/381
+f 1057/390 274/381 1447/883
+f 1446/884 1447/883 375/877
+f 1442/878 375/877 1447/883
+f 372/862 1439/876 1434/869
+f 1448/885 1434/869 1439/876
+f 274/381 1051/383 1445/882
+f 1448/885 1445/882 1051/383
+f 1452/886 1449/887 1450/888
+f 377/889 1450/888 1449/887
+f 1451/890 375/877 1452/886
+f 1441/879 1452/886 375/877
+f 1453/891 1455/892 357/791
+f 1368/790 357/791 1455/892
+f 1450/888 377/889 1455/892
+f 1454/893 1455/892 377/889
+f 277/398 1456/894 1070/406
+f 1457/895 1070/406 1456/894
+f 357/791 1375/798 1453/891
+f 1457/895 1453/891 1375/798
+f 1446/884 375/877 1458/896
+f 1451/890 1458/896 375/877
+f 1065/400 1458/896 277/398
+f 1456/894 277/398 1458/896
+f 1462/897 1340/750 1459/898
+f 349/749 1459/898 1340/750
+f 1461/899 1462/897 380/900
+f 1460/901 380/900 1462/897
+f 1464/902 1204/903 1463/904
+f 311/905 1463/904 1204/903
+f 1459/898 349/749 1464/902
+f 1347/762 1464/902 349/749
+f 1465/906 1466/907 313/908
+f 1214/909 313/908 1466/907
+f 1199/910 1466/907 311/905
+f 1463/904 311/905 1466/907
+f 379/911 1468/912 1467/913
+f 1469/914 1467/913 1468/912
+f 313/908 1207/915 1465/906
+f 1469/914 1465/906 1207/915
+f 380/900 1460/901 1470/916
+f 1472/917 1470/916 1460/901
+f 1472/917 1468/912 1471/918
+f 379/911 1471/918 1468/912
+f 1363/784 353/777 1475/919
+f 1473/920 1475/919 353/777
+f 1191/565 1475/919 309/564
+f 1474/921 309/564 1475/919
+f 1477/922 1478/923 382/924
+f 1476/925 382/924 1478/923
+f 1358/779 1478/923 353/777
+f 1473/920 353/777 1478/923
+f 309/564 1474/921 1197/571
+f 1480/926 1197/571 1474/921
+f 382/924 1479/927 1477/922
+f 1480/926 1477/922 1479/927
+f 384/928 1482/929 1481/930
+f 1485/931 1481/930 1482/929
+f 387/932 1484/933 1483/934
+f 1485/931 1483/934 1484/933
+f 1487/935 1489/936 385/937
+f 1486/938 385/937 1489/936
+f 1488/939 1489/936 384/928
+f 1482/929 384/928 1489/936
+f 386/940 1491/941 1490/942
+f 1493/943 1490/942 1491/941
+f 385/937 1492/944 1487/935
+f 1493/943 1487/935 1492/944
+f 1483/934 1496/945 387/932
+f 1494/946 387/932 1496/945
+f 1491/941 386/940 1496/945
+f 1495/947 1496/945 386/940
+f 1501/948 1497/949 1498/950
+f 389/951 1498/950 1497/949
+f 392/952 1500/953 1499/954
+f 1501/948 1499/954 1500/953
+f 1503/955 1505/956 390/957
+f 1502/958 390/957 1505/956
+f 1498/950 389/951 1505/956
+f 1504/959 1505/956 389/951
+f 1507/960 1509/961 391/962
+f 1506/963 391/962 1509/961
+f 1509/961 1503/955 1508/964
+f 390/957 1508/964 1503/955
+f 1499/954 1512/965 392/952
+f 1510/966 392/952 1512/965
+f 391/962 1511/967 1507/960
+f 1512/965 1507/960 1511/967
+f 1513/968 394/969 1517/970
+f 1514/971 1517/970 394/969
+f 1516/972 1517/970 397/973
+f 1515/974 397/973 1517/970
+f 1521/975 1518/976 1519/977
+f 395/978 1519/977 1518/976
+f 1514/971 394/969 1521/975
+f 1520/979 1521/975 394/969
+f 1523/980 1525/981 396/982
+f 1522/983 396/982 1525/981
+f 1519/977 395/978 1525/981
+f 1524/984 1525/981 395/978
+f 397/973 1515/974 1526/985
+f 1528/986 1526/985 1515/974
+f 1528/986 1523/980 1527/987
+f 396/982 1527/987 1523/980
+f 399/988 1530/989 1529/990
+f 1532/991 1529/990 1530/989
+f 1502/958 1532/991 390/957
+f 1531/992 390/957 1532/991
+f 1534/993 1536/994 400/995
+f 1533/996 400/995 1536/994
+f 399/988 1535/997 1530/989
+f 1536/994 1530/989 1535/997
+f 1540/998 1537/999 1538/1000
+f 401/1001 1538/1000 1537/999
+f 400/995 1539/1002 1534/993
+f 1540/998 1534/993 1539/1002
+f 1531/992 1542/1003 390/957
+f 1508/964 390/957 1542/1003
+f 1538/1000 401/1001 1542/1003
+f 1541/1004 1542/1003 401/1001
+f 403/1005 1544/1006 1543/1007
+f 1546/1008 1543/1007 1544/1006
+f 1546/1008 1545/1009 1533/996
+f 400/995 1533/996 1545/1009
+f 395/978 1547/1010 1524/984
+f 1549/1011 1524/984 1547/1010
+f 1549/1011 1544/1006 1548/1012
+f 403/1005 1548/1012 1544/1006
+f 1551/1013 1552/1014 404/1015
+f 1550/1016 404/1015 1552/1014
+f 1518/976 1552/1014 395/978
+f 1547/1010 395/978 1552/1014
+f 1545/1009 1554/1017 400/995
+f 1539/1002 400/995 1554/1017
+f 1554/1017 1551/1013 1553/1018
+f 404/1015 1553/1018 1551/1013
+f 1555/1019 1558/1020 392/952
+f 1500/953 392/952 1558/1020
+f 407/1021 1557/1022 1556/1023
+f 1558/1020 1556/1023 1557/1022
+f 397/973 1559/1024 1516/972
+f 1560/1025 1516/972 1559/1024
+f 392/952 1510/966 1555/1019
+f 1560/1025 1555/1019 1510/966
+f 406/1026 1562/1027 1561/1028
+f 1563/1029 1561/1028 1562/1027
+f 1526/985 1563/1029 397/973
+f 1559/1024 397/973 1563/1029
+f 1556/1023 1566/1030 407/1021
+f 1564/1031 407/1021 1566/1030
+f 1565/1032 1566/1030 406/1026
+f 1562/1027 406/1026 1566/1030
+f 1567/1033 1570/1034 407/1035
+f 1557/1036 407/1035 1570/1034
+f 1568/1037 410/1038 1570/1034
+f 1569/1039 1570/1034 410/1038
+f 409/1040 1572/1041 1571/1042
+f 1573/1043 1571/1042 1572/1041
+f 407/1035 1564/1044 1567/1033
+f 1573/1043 1567/1033 1564/1044
+f 1574/1045 1576/1046 321/620
+f 1241/627 321/620 1576/1046
+f 1575/1047 1576/1046 409/1040
+f 1572/1041 409/1040 1576/1046
+f 318/606 1577/1048 1231/615
+f 1578/1049 1231/615 1577/1048
+f 321/620 1235/621 1574/1045
+f 1578/1049 1574/1045 1235/621
+f 1568/1037 1580/1050 410/1038
+f 1579/1051 410/1038 1580/1050
+f 1226/608 1580/1050 318/606
+f 1577/1048 318/606 1580/1050
+f 1581/1052 412/1053 1584/1054
+f 1582/1055 1584/1054 412/1053
+f 1561/1056 1584/1054 406/1057
+f 1583/1058 406/1057 1584/1054
+f 1587/1059 1210/585 1585/1060
+f 316/586 1585/1060 1210/585
+f 1587/1059 1582/1055 1586/1061
+f 412/1053 1586/1061 1582/1055
+f 322/623 1588/1062 1244/630
+f 1589/1063 1244/630 1588/1062
+f 1589/1063 1585/1060 1220/599
+f 316/586 1220/599 1585/1060
+f 1591/1064 1575/1047 1590/1065
+f 409/1040 1590/1065 1575/1047
+f 322/623 1239/625 1588/1062
+f 1591/1064 1588/1062 1239/625
+f 406/1057 1583/1058 1565/1066
+f 1592/1067 1565/1066 1583/1058
+f 409/1040 1571/1042 1590/1065
+f 1592/1067 1590/1065 1571/1042
+f 414/1068 1594/1069 1593/1070
+f 1596/1071 1593/1070 1594/1069
+f 396/1072 1522/1073 1595/1074
+f 1596/1071 1595/1074 1522/1073
+f 415/1075 1598/1076 1597/1077
+f 1600/1078 1597/1077 1598/1076
+f 1600/1078 1594/1069 1599/1079
+f 414/1068 1599/1079 1594/1069
+f 1601/1080 1603/1081 379/911
+f 1471/918 379/911 1603/1081
+f 1602/1082 1603/1081 415/1075
+f 1598/1076 415/1075 1603/1081
+f 1605/1083 1586/1084 1604/1085
+f 412/1086 1604/1085 1586/1084
+f 379/911 1467/913 1601/1080
+f 1605/1083 1601/1080 1467/913
+f 1527/1087 396/1072 1606/1088
+f 1595/1074 1606/1088 396/1072
+f 1604/1085 412/1086 1606/1088
+f 1581/1089 1606/1088 412/1086
+f 1608/1090 1611/1091 417/1092
+f 1607/1093 417/1092 1611/1091
+f 1609/1094 419/1095 1611/1091
+f 1610/1096 1611/1091 419/1095
+f 1612/1097 1614/1098 399/1099
+f 1535/1100 399/1099 1614/1098
+f 1614/1098 1608/1090 1613/1101
+f 417/1092 1613/1101 1608/1090
+f 418/1102 1616/1103 1615/1104
+f 1617/1105 1615/1104 1616/1103
+f 1612/1097 399/1099 1617/1105
+f 1529/1106 1617/1105 399/1099
+f 1618/1107 1620/1108 352/774
+f 1360/781 352/774 1620/1108
+f 1619/1109 1620/1108 418/1102
+f 1616/1103 418/1102 1620/1108
+f 419/1095 1609/1094 1621/1110
+f 1622/1111 1621/1110 1609/1094
+f 352/774 1354/775 1618/1107
+f 1622/1111 1618/1107 1354/775
+f 414/1068 1623/1112 1599/1079
+f 1626/1113 1599/1079 1623/1112
+f 1625/1114 1626/1113 421/1115
+f 1624/1116 421/1115 1626/1113
+f 1548/1117 403/1118 1628/1119
+f 1627/1120 1628/1119 403/1118
+f 1593/1070 1628/1119 414/1068
+f 1623/1112 414/1068 1628/1119
+f 1629/1121 1630/1122 417/1123
+f 1613/1124 417/1123 1630/1122
+f 1627/1120 403/1118 1630/1122
+f 1543/1125 1630/1122 403/1118
+f 1632/1126 1631/1127 1624/1116
+f 421/1115 1624/1116 1631/1127
+f 417/1123 1607/1128 1629/1121
+f 1632/1126 1629/1121 1607/1128
+f 418/1102 1633/1129 1619/1109
+f 1635/1130 1619/1109 1633/1129
+f 382/924 1476/925 1634/1131
+f 1635/1130 1634/1131 1476/925
+f 389/1132 1636/1133 1504/1134
+f 1637/1135 1504/1134 1636/1133
+f 1615/1104 1637/1135 418/1102
+f 1633/1129 418/1102 1637/1135
+f 1638/1136 1639/1137 410/1138
+f 1569/1139 410/1138 1639/1137
+f 1497/1140 1639/1137 389/1132
+f 1636/1133 389/1132 1639/1137
+f 1634/1131 1640/1141 382/924
+f 1479/927 382/924 1640/1141
+f 1579/1142 1640/1141 410/1138
+f 1638/1136 410/1138 1640/1141
+f 1644/1143 1641/1144 1642/1145
+f 424/1146 1642/1145 1641/1144
+f 391/962 1506/963 1643/1147
+f 1644/1143 1643/1147 1506/963
+f 384/928 1645/1148 1488/939
+f 1647/1149 1488/939 1645/1148
+f 424/1146 1646/1150 1642/1145
+f 1647/1149 1642/1145 1646/1150
+f 425/1151 1649/1152 1648/1153
+f 1650/1154 1648/1153 1649/1152
+f 384/928 1481/930 1645/1148
+f 1650/1154 1645/1148 1481/930
+f 1643/1147 1652/1155 391/962
+f 1511/967 391/962 1652/1155
+f 1651/1156 1652/1155 425/1151
+f 1649/1152 425/1151 1652/1155
+f 1653/1157 427/1158 1656/1159
+f 1654/1160 1656/1159 427/1158
+f 1537/999 1656/1159 401/1001
+f 1655/1161 401/1001 1656/1159
+f 385/937 1657/1162 1492/944
+f 1659/1163 1492/944 1657/1162
+f 427/1158 1658/1164 1654/1160
+f 1659/1163 1654/1160 1658/1164
+f 424/1146 1660/1165 1646/1150
+f 1661/1166 1646/1150 1660/1165
+f 385/937 1486/938 1657/1162
+f 1661/1166 1657/1162 1486/938
+f 401/1001 1655/1161 1541/1004
+f 1662/1167 1541/1004 1655/1161
+f 1641/1144 1662/1167 424/1146
+f 1660/1165 424/1146 1662/1167
+f 1666/1168 1663/1169 1664/1170
+f 429/1171 1664/1170 1663/1169
+f 404/1015 1550/1016 1665/1172
+f 1666/1168 1665/1172 1550/1016
+f 386/940 1667/1173 1495/947
+f 1669/1174 1495/947 1667/1173
+f 429/1171 1668/1175 1664/1170
+f 1669/1174 1664/1170 1668/1175
+f 1670/1176 1671/1177 427/1158
+f 1658/1164 427/1158 1671/1177
+f 1667/1173 386/940 1671/1177
+f 1490/942 1671/1177 386/940
+f 1553/1018 404/1015 1672/1178
+f 1665/1172 1672/1178 404/1015
+f 1672/1178 1670/1176 1653/1157
+f 427/1158 1653/1157 1670/1176
+f 1651/1156 425/1151 1675/1179
+f 1673/1180 1675/1179 425/1151
+f 1513/968 1675/1179 394/969
+f 1674/1181 394/969 1675/1179
+f 387/932 1676/1182 1484/933
+f 1677/1183 1484/933 1676/1182
+f 425/1151 1648/1153 1673/1180
+f 1677/1183 1673/1180 1648/1153
+f 429/1171 1678/1184 1668/1175
+f 1679/1185 1668/1175 1678/1184
+f 387/932 1494/946 1676/1182
+f 1679/1185 1676/1182 1494/946
+f 394/969 1674/1181 1520/979
+f 1680/1186 1520/979 1674/1181
+f 1680/1186 1678/1184 1663/1169
+f 429/1171 1663/1169 1678/1184
+f 1681/1187 1684/1188 196/1189
+f 763/1190 196/1189 1684/1188
+f 1683/1191 1684/1188 434/1192
+f 1682/1193 434/1192 1684/1188
+f 1686/1194 1687/1195 432/1196
+f 1685/1197 432/1196 1687/1195
+f 1687/1195 1681/1187 759/1198
+f 196/1189 759/1198 1681/1187
+f 1689/1199 1691/1200 433/1201
+f 1688/1202 433/1201 1691/1200
+f 1691/1200 1686/1194 1690/1203
+f 432/1196 1690/1203 1686/1194
+f 434/1192 1682/1193 1692/1204
+f 1694/1205 1692/1204 1682/1193
+f 1694/1205 1689/1199 1693/1206
+f 433/1201 1693/1206 1689/1199
+f 340/1207 1695/1208 1311/1209
+f 1697/1210 1311/1209 1695/1208
+f 432/1196 1685/1197 1696/1211
+f 1697/1210 1696/1211 1685/1197
+f 1699/1212 1700/1213 436/1214
+f 1698/1215 436/1214 1700/1213
+f 1306/1216 1700/1213 340/1207
+f 1695/1208 340/1207 1700/1213
+f 437/1217 1702/1218 1701/1219
+f 1704/1220 1701/1219 1702/1218
+f 436/1214 1703/1221 1699/1212
+f 1704/1220 1699/1212 1703/1221
+f 1690/1203 432/1196 1706/1222
+f 1696/1211 1706/1222 432/1196
+f 1705/1223 1706/1222 437/1217
+f 1702/1218 437/1217 1706/1222
+f 1707/1224 1710/1225 337/1226
+f 1290/1227 337/1226 1710/1225
+f 1709/1228 1710/1225 441/1229
+f 1708/1230 441/1229 1710/1225
+f 439/1231 1712/1232 1711/1233
+f 1713/1234 1711/1233 1712/1232
+f 337/1226 1299/1235 1707/1224
+f 1713/1234 1707/1224 1299/1235
+f 1715/1236 1717/1237 440/1238
+f 1714/1239 440/1238 1717/1237
+f 1716/1240 1717/1237 439/1231
+f 1712/1232 439/1231 1717/1237
+f 441/1229 1708/1230 1718/1241
+f 1720/1242 1718/1241 1708/1230
+f 440/1238 1719/1243 1715/1236
+f 1720/1242 1715/1236 1719/1243
+f 1723/1244 746/1245 1721/1246
+f 190/1247 1721/1246 746/1245
+f 1722/1248 439/1231 1723/1244
+f 1711/1233 1723/1244 439/1231
+f 1725/1249 1683/1191 1724/1250
+f 434/1192 1724/1250 1683/1191
+f 1721/1246 190/1247 1725/1249
+f 740/1251 1725/1249 190/1247
+f 1728/1252 1726/1253 1727/1254
+f 443/1255 1727/1254 1726/1253
+f 434/1192 1692/1204 1724/1250
+f 1728/1252 1724/1250 1692/1204
+f 1730/1256 1716/1240 1722/1248
+f 439/1231 1722/1248 1716/1240
+f 1727/1254 443/1255 1730/1256
+f 1729/1257 1730/1256 443/1255
+f 445/1258 1732/1259 1731/1260
+f 1734/1261 1731/1260 1732/1259
+f 1701/1219 1734/1261 437/1217
+f 1733/1262 437/1217 1734/1261
+f 446/1263 1736/1264 1735/1265
+f 1738/1266 1735/1265 1736/1264
+f 1737/1267 1738/1266 445/1258
+f 1732/1259 445/1258 1738/1266
+f 1739/1268 1741/1269 433/1201
+f 1693/1206 433/1201 1741/1269
+f 1740/1270 1741/1269 446/1263
+f 1736/1264 446/1263 1741/1269
+f 437/1217 1733/1262 1705/1223
+f 1742/1271 1705/1223 1733/1262
+f 433/1201 1688/1202 1739/1268
+f 1742/1271 1739/1268 1688/1202
+f 1746/1272 1743/1273 1744/1274
+f 448/1275 1744/1274 1743/1273
+f 446/1263 1735/1265 1745/1276
+f 1746/1272 1745/1276 1735/1265
+f 1747/1277 1749/1278 440/1238
+f 1719/1243 440/1238 1749/1278
+f 1744/1274 448/1275 1749/1278
+f 1748/1279 1749/1278 448/1275
+f 443/1255 1750/1280 1729/1257
+f 1751/1281 1729/1257 1750/1280
+f 440/1238 1714/1239 1747/1277
+f 1751/1281 1747/1277 1714/1239
+f 1745/1276 1752/1282 446/1263
+f 1740/1270 446/1263 1752/1282
+f 1726/1253 1752/1282 443/1255
+f 1750/1280 443/1255 1752/1282
+f 1754/1283 1757/1284 450/1285
+f 1753/1286 450/1285 1757/1284
+f 1756/1287 1757/1284 452/1288
+f 1755/1289 452/1288 1757/1284
+f 451/1290 1759/1291 1758/1292
+f 1761/1293 1758/1292 1759/1291
+f 450/1285 1760/1294 1754/1283
+f 1761/1293 1754/1283 1760/1294
+f 452/1288 1755/1289 1762/1295
+f 1764/1296 1762/1295 1755/1289
+f 1763/1297 1764/1296 451/1290
+f 1759/1291 451/1290 1764/1296
+f 452/1288 1765/1298 1756/1287
+f 1768/1299 1756/1287 1765/1298
+f 1766/1300 455/1301 1768/1299
+f 1767/1302 1768/1299 455/1301
+f 1770/1303 1771/1304 454/1305
+f 1769/1306 454/1305 1771/1304
+f 1762/1295 1771/1304 452/1288
+f 1765/1298 452/1288 1771/1304
+f 208/82 1772/1307 806/91
+f 1774/1308 806/91 1772/1307
+f 454/1305 1773/1309 1770/1303
+f 1774/1308 1770/1303 1773/1309
+f 1775/1310 1776/1311 229/173
+f 881/180 229/173 1776/1311
+f 800/84 1776/1311 208/82
+f 1772/1307 208/82 1776/1311
+f 1778/1312 1777/1313 1766/1300
+f 455/1301 1766/1300 1777/1313
+f 229/173 875/174 1775/1310
+f 1778/1312 1775/1310 875/174
+f 455/1301 1779/1314 1767/1302
+f 1781/1315 1767/1302 1779/1314
+f 450/1285 1753/1286 1780/1316
+f 1781/1315 1780/1316 1753/1286
+f 1782/1317 1783/1318 258/303
+f 989/308 258/303 1783/1318
+f 1777/1313 1783/1318 455/1301
+f 1779/1314 455/1301 1783/1318
+f 1784/1319 1785/1320 249/251
+f 942/252 249/251 1785/1320
+f 258/303 986/305 1782/1317
+f 1785/1320 1782/1317 986/305
+f 1786/1321 457/1322 1788/1323
+f 1787/1324 1788/1323 457/1322
+f 1784/1319 249/251 1788/1323
+f 952/264 1788/1323 249/251
+f 1780/1316 1790/1325 450/1285
+f 1760/1294 450/1285 1790/1325
+f 1789/1326 1790/1325 457/1322
+f 1787/1324 457/1322 1790/1325
+f 1792/1327 1795/1328 459/1329
+f 1791/1330 459/1329 1795/1328
+f 1793/1331 460/1332 1795/1328
+f 1794/1333 1795/1328 460/1332
+f 457/1322 1796/1334 1789/1326
+f 1798/1335 1789/1326 1796/1334
+f 459/1329 1797/1336 1792/1327
+f 1798/1335 1792/1327 1797/1336
+f 1284/678 333/673 1800/1337
+f 1799/1338 1800/1337 333/673
+f 1786/1321 1800/1337 457/1322
+f 1796/1334 457/1322 1800/1337
+f 1802/1339 1801/1340 1793/1331
+f 460/1332 1793/1331 1801/1340
+f 333/673 1279/674 1799/1338
+f 1802/1339 1799/1338 1279/674
+f 360/803 1803/1341 1380/804
+f 1806/1342 1380/804 1803/1341
+f 463/1343 1805/1344 1804/1345
+f 1806/1342 1804/1345 1805/1344
+f 1807/1346 1808/1347 454/1305
+f 1773/1309 454/1305 1808/1347
+f 1387/812 1808/1347 360/803
+f 1803/1341 360/803 1808/1347
+f 462/1348 1810/1349 1809/1350
+f 1811/1351 1809/1350 1810/1349
+f 454/1305 1769/1306 1807/1346
+f 1811/1351 1807/1346 1769/1306
+f 1804/1345 1814/1352 463/1343
+f 1812/1353 463/1343 1814/1352
+f 1813/1354 1814/1352 462/1348
+f 1810/1349 462/1348 1814/1352
+f 451/1290 1815/1355 1763/1297
+f 1818/1356 1763/1297 1815/1355
+f 467/1357 1817/1358 1816/1359
+f 1818/1356 1816/1359 1817/1358
+f 1821/1360 1819/1361 1820/1362
+f 465/1363 1820/1362 1819/1361
+f 1758/1292 1821/1360 451/1290
+f 1815/1355 451/1290 1821/1360
+f 1823/1364 1825/1365 466/1366
+f 1822/1367 466/1366 1825/1365
+f 1820/1362 465/1363 1825/1365
+f 1824/1368 1825/1365 465/1363
+f 1826/1369 467/1357 1828/1370
+f 1816/1359 1828/1370 467/1357
+f 1823/1364 466/1366 1828/1370
+f 1827/1371 1828/1370 466/1366
+f 1829/1372 1831/1373 459/1329
+f 1797/1336 459/1329 1831/1373
+f 1819/1361 1831/1373 465/1363
+f 1830/1374 465/1363 1831/1373
+f 469/1375 1833/1376 1832/1377
+f 1834/1378 1832/1377 1833/1376
+f 459/1329 1791/1330 1829/1372
+f 1834/1378 1829/1372 1791/1330
+f 1836/1379 1838/1380 470/1381
+f 1835/1382 470/1381 1838/1380
+f 1837/1383 1838/1380 469/1375
+f 1833/1376 469/1375 1838/1380
+f 465/1363 1830/1374 1824/1368
+f 1840/1384 1824/1368 1830/1374
+f 470/1381 1839/1385 1836/1379
+f 1840/1384 1836/1379 1839/1385
+f 1844/1386 1813/1354 1841/1387
+f 462/1348 1841/1387 1813/1354
+f 473/1388 1843/1389 1842/1390
+f 1844/1386 1842/1390 1843/1389
+f 1845/1391 1846/1392 467/1357
+f 1817/1358 467/1357 1846/1392
+f 1841/1387 462/1348 1846/1392
+f 1809/1350 1846/1392 462/1348
+f 472/1393 1848/1394 1847/1395
+f 1849/1396 1847/1395 1848/1394
+f 1849/1396 1845/1391 1826/1369
+f 467/1357 1826/1369 1845/1391
+f 1842/1390 1852/1397 473/1388
+f 1850/1398 473/1388 1852/1397
+f 1851/1399 1852/1397 472/1393
+f 1848/1394 472/1393 1852/1397
+f 1853/1400 1855/1401 470/1381
+f 1839/1385 470/1381 1855/1401
+f 1854/1402 466/1366 1855/1401
+f 1822/1367 1855/1401 466/1366
+f 1858/1403 1856/1404 1857/1405
+f 475/1406 1857/1405 1856/1404
+f 470/1381 1835/1382 1853/1400
+f 1858/1403 1853/1400 1835/1382
+f 472/1393 1859/1407 1851/1399
+f 1861/1408 1851/1399 1859/1407
+f 475/1406 1860/1409 1857/1405
+f 1861/1408 1857/1405 1860/1409
+f 1854/1402 1862/1410 466/1366
+f 1827/1371 466/1366 1862/1410
+f 1859/1407 472/1393 1862/1410
+f 1847/1395 1862/1410 472/1393
+f 477/1411 1864/1412 1863/1413
+f 1867/1414 1863/1413 1864/1412
+f 1867/1414 1865/1415 1866/1416
+f 480/1417 1866/1416 1865/1415
+f 478/1418 1869/1419 1868/1420
+f 1871/1421 1868/1420 1869/1419
+f 1870/1422 1871/1421 477/1411
+f 1864/1412 477/1411 1871/1421
+f 1872/1423 479/1424 1875/1425
+f 1873/1426 1875/1425 479/1424
+f 1874/1427 1875/1425 478/1418
+f 1869/1419 478/1418 1875/1425
+f 1865/1415 1878/1428 480/1417
+f 1876/1429 480/1417 1878/1428
+f 479/1424 1877/1430 1873/1426
+f 1878/1428 1873/1426 1877/1430
+f 1879/1431 1882/1432 480/1417
+f 1866/1416 480/1417 1882/1432
+f 1880/1433 484/1434 1882/1432
+f 1881/1435 1882/1432 484/1434
+f 482/1436 1884/1437 1883/1438
+f 1885/1439 1883/1438 1884/1437
+f 1885/1439 1879/1431 1876/1429
+f 480/1417 1876/1429 1879/1431
+f 1887/1440 1889/1441 483/1442
+f 1886/1443 483/1442 1889/1441
+f 1888/1444 1889/1441 482/1436
+f 1884/1437 482/1436 1889/1441
+f 484/1434 1880/1433 1890/1445
+f 1892/1446 1890/1445 1880/1433
+f 483/1442 1891/1447 1887/1440
+f 1892/1446 1887/1440 1891/1447
+f 1894/1448 1897/1449 486/1450
+f 1893/1451 486/1450 1897/1449
+f 1897/1449 1895/1452 1896/1453
+f 489/1454 1896/1453 1895/1452
+f 1899/1455 1901/1456 487/1457
+f 1898/1458 487/1457 1901/1456
+f 486/1450 1900/1459 1894/1448
+f 1901/1456 1894/1448 1900/1459
+f 488/1460 1903/1461 1902/1462
+f 1905/1463 1902/1462 1903/1461
+f 487/1457 1904/1464 1899/1455
+f 1905/1463 1899/1455 1904/1464
+f 1895/1452 1908/1465 489/1454
+f 1906/1466 489/1454 1908/1465
+f 1907/1467 1908/1465 488/1460
+f 1903/1461 488/1460 1908/1465
+f 1910/1468 1913/1469 491/1470
+f 1909/1471 491/1470 1913/1469
+f 1912/1472 1913/1469 493/1473
+f 1911/1474 493/1473 1913/1469
+f 492/1475 1915/1476 1914/1477
+f 1917/1478 1914/1477 1915/1476
+f 491/1470 1916/1479 1910/1468
+f 1917/1478 1910/1468 1916/1479
+f 1918/1480 1920/1481 487/1457
+f 1904/1464 487/1457 1920/1481
+f 1919/1482 1920/1481 492/1475
+f 1915/1476 492/1475 1920/1481
+f 493/1473 1911/1474 1921/1483
+f 1922/1484 1921/1483 1911/1474
+f 487/1457 1898/1458 1918/1480
+f 1922/1484 1918/1480 1898/1458
+f 495/1485 1924/1486 1923/1487
+f 1927/1488 1923/1487 1924/1486
+f 1926/1489 1927/1488 498/1490
+f 1925/1491 498/1490 1927/1488
+f 1929/1492 1931/1493 496/1494
+f 1928/1495 496/1494 1931/1493
+f 1930/1496 1931/1493 495/1485
+f 1924/1486 495/1485 1931/1493
+f 1933/1497 1935/1498 497/1499
+f 1932/1500 497/1499 1935/1498
+f 496/1494 1934/1501 1929/1492
+f 1935/1498 1929/1492 1934/1501
+f 498/1490 1925/1491 1936/1502
+f 1938/1503 1936/1502 1925/1491
+f 497/1499 1937/1504 1933/1497
+f 1938/1503 1933/1497 1937/1504
+f 500/1505 1940/1506 1939/1507
+f 1942/1508 1939/1507 1940/1506
+f 486/1450 1893/1451 1941/1509
+f 1942/1508 1941/1509 1893/1451
+f 1944/1510 1946/1511 501/1512
+f 1943/1513 501/1512 1946/1511
+f 1945/1514 1946/1511 500/1505
+f 1940/1506 500/1505 1946/1511
+f 502/1515 1948/1516 1947/1517
+f 1950/1518 1947/1517 1948/1516
+f 501/1512 1949/1519 1944/1510
+f 1950/1518 1944/1510 1949/1519
+f 1941/1509 1952/1520 486/1450
+f 1900/1459 486/1450 1952/1520
+f 1951/1521 1952/1520 502/1515
+f 1948/1516 502/1515 1952/1520
+f 1954/1522 1956/1523 504/1524
+f 1953/1525 504/1524 1956/1523
+f 1955/1526 477/1411 1956/1523
+f 1863/1413 1956/1523 477/1411
+f 505/1527 1958/1528 1957/1529
+f 1960/1530 1957/1529 1958/1528
+f 504/1524 1959/1531 1954/1522
+f 1960/1530 1954/1522 1959/1531
+f 1961/1532 1963/1533 501/1512
+f 1949/1519 501/1512 1963/1533
+f 1962/1534 1963/1533 505/1527
+f 1958/1528 505/1527 1963/1533
+f 1964/1535 1870/1422 1955/1526
+f 477/1411 1955/1526 1870/1422
+f 501/1512 1943/1513 1961/1532
+f 1964/1535 1961/1532 1943/1513
+f 1966/1536 1969/1537 507/1538
+f 1965/1539 507/1538 1969/1537
+f 509/1540 1968/1541 1967/1542
+f 1969/1537 1967/1542 1968/1541
+f 1971/1543 1973/1544 508/1545
+f 1970/1546 508/1545 1973/1544
+f 1973/1544 1966/1536 1972/1547
+f 507/1538 1972/1547 1966/1536
+f 505/1527 1974/1548 1962/1534
+f 1976/1549 1962/1534 1974/1548
+f 1976/1549 1971/1543 1975/1550
+f 508/1545 1975/1550 1971/1543
+f 1977/1551 509/1540 1978/1552
+f 1967/1542 1978/1552 509/1540
+f 1957/1529 1978/1552 505/1527
+f 1974/1548 505/1527 1978/1552
+f 493/1473 1979/1553 1912/1472
+f 1982/1554 1912/1472 1979/1553
+f 511/1555 1981/1556 1980/1557
+f 1982/1554 1980/1557 1981/1556
+f 502/1515 1983/1558 1951/1521
+f 1984/1559 1951/1521 1983/1558
+f 1921/1483 1984/1559 493/1473
+f 1979/1553 493/1473 1984/1559
+f 1975/1550 508/1545 1986/1560
+f 1985/1561 1986/1560 508/1545
+f 1947/1517 1986/1560 502/1515
+f 1983/1558 502/1515 1986/1560
+f 1980/1557 1988/1562 511/1555
+f 1987/1563 511/1555 1988/1562
+f 508/1545 1970/1546 1985/1561
+f 1988/1562 1985/1561 1970/1546
+f 498/1490 1989/1564 1926/1489
+f 1992/1565 1926/1489 1989/1564
+f 515/1566 1991/1567 1990/1568
+f 1992/1565 1990/1568 1991/1567
+f 1994/1569 1995/1570 513/1571
+f 1993/1572 513/1571 1995/1570
+f 1936/1502 1995/1570 498/1490
+f 1989/1564 498/1490 1995/1570
+f 514/1573 1997/1574 1996/1575
+f 1999/1576 1996/1575 1997/1574
+f 513/1571 1998/1577 1994/1569
+f 1999/1576 1994/1569 1998/1577
+f 1990/1568 2002/1578 515/1566
+f 2000/1579 515/1566 2002/1578
+f 2001/1580 2002/1578 514/1573
+f 1997/1574 514/1573 2002/1578
+f 2004/1581 2007/1582 517/1583
+f 2003/1584 517/1583 2007/1582
+f 2006/1585 2007/1582 519/1586
+f 2005/1587 519/1586 2007/1582
+f 518/1588 2009/1589 2008/1590
+f 2011/1591 2008/1590 2009/1589
+f 2011/1591 2004/1581 2010/1592
+f 517/1583 2010/1592 2004/1581
+f 2012/1593 2014/1594 513/1571
+f 1998/1577 513/1571 2014/1594
+f 2013/1595 2014/1594 518/1588
+f 2009/1589 518/1588 2014/1594
+f 519/1586 2005/1587 2015/1596
+f 2016/1597 2015/1596 2005/1587
+f 513/1571 1993/1572 2012/1593
+f 2016/1597 2012/1593 1993/1572
+f 2020/1598 1968/1541 2017/1599
+f 509/1540 2017/1599 1968/1541
+f 2020/1598 2018/1600 2019/1601
+f 522/1602 2019/1601 2018/1600
+f 2021/1603 521/1604 2023/1605
+f 2022/1606 2023/1605 521/1604
+f 2023/1605 2017/1599 1977/1551
+f 509/1540 1977/1551 2017/1599
+f 518/1588 2024/1607 2013/1595
+f 2026/1608 2013/1595 2024/1607
+f 521/1604 2025/1609 2022/1606
+f 2026/1608 2022/1606 2025/1609
+f 2018/1600 2028/1610 522/1602
+f 2027/1611 522/1602 2028/1610
+f 2008/1590 2028/1610 518/1588
+f 2024/1607 518/1588 2028/1610
+f 484/1434 2029/1612 1881/1435
+f 2031/1613 1881/1435 2029/1612
+f 504/1524 1953/1525 2030/1614
+f 2031/1613 2030/1614 1953/1525
+f 514/1573 2032/1615 2001/1580
+f 2033/1616 2001/1580 2032/1615
+f 1890/1445 2033/1616 484/1434
+f 2029/1612 484/1434 2033/1616
+f 2034/1617 2035/1618 521/1604
+f 2025/1609 521/1604 2035/1618
+f 1996/1575 2035/1618 514/1573
+f 2032/1615 514/1573 2035/1618
+f 1959/1531 504/1524 2036/1619
+f 2030/1614 2036/1619 504/1524
+f 521/1604 2021/1603 2034/1617
+f 2036/1619 2034/1617 2021/1603
+f 2038/1620 2041/1621 525/1622
+f 2037/1623 525/1622 2041/1621
+f 2040/1624 2041/1621 528/1625
+f 2039/1626 528/1625 2041/1621
+f 526/1627 2043/1628 2042/1629
+f 2045/1630 2042/1629 2043/1628
+f 525/1622 2044/1631 2038/1620
+f 2045/1630 2038/1620 2044/1631
+f 2047/1632 2049/1633 527/1634
+f 2046/1635 527/1634 2049/1633
+f 2048/1636 2049/1633 526/1627
+f 2043/1628 526/1627 2049/1633
+f 2052/1637 2050/1638 2039/1626
+f 528/1625 2039/1626 2050/1638
+f 527/1634 2051/1639 2047/1632
+f 2052/1637 2047/1632 2051/1639
+f 2057/1640 2053/1641 2054/1642
+f 530/1643 2054/1642 2053/1641
+f 2056/1644 2057/1640 532/1645
+f 2055/1646 532/1645 2057/1640
+f 531/1647 2059/1648 2058/1649
+f 2061/1650 2058/1649 2059/1648
+f 2061/1650 2054/1642 2060/1651
+f 530/1643 2060/1651 2054/1642
+f 2062/1652 2064/1653 526/1627
+f 2048/1636 526/1627 2064/1653
+f 2063/1654 2064/1653 531/1647
+f 2059/1648 531/1647 2064/1653
+f 532/1645 2055/1646 2065/1655
+f 2066/1656 2065/1655 2055/1646
+f 526/1627 2042/1629 2062/1652
+f 2066/1656 2062/1652 2042/1629
+f 534/1657 2068/1658 2067/1659
+f 2071/1660 2067/1659 2068/1658
+f 537/1661 2070/1662 2069/1663
+f 2071/1660 2069/1663 2070/1662
+f 2072/1664 535/1665 2075/1666
+f 2073/1667 2075/1666 535/1665
+f 2074/1668 2075/1666 534/1657
+f 2068/1658 534/1657 2075/1666
+f 536/1669 2077/1670 2076/1671
+f 2079/1672 2076/1671 2077/1670
+f 2079/1672 2073/1667 2078/1673
+f 535/1665 2078/1673 2073/1667
+f 2080/1674 537/1661 2082/1675
+f 2069/1663 2082/1675 537/1661
+f 2081/1676 2082/1675 536/1669
+f 2077/1670 536/1669 2082/1675
+f 2084/1677 2086/1678 539/1679
+f 2083/1680 539/1679 2086/1678
+f 2053/1641 2086/1678 530/1643
+f 2085/1681 530/1643 2086/1678
+f 536/1669 2087/1682 2081/1676
+f 2089/1683 2081/1676 2087/1682
+f 539/1679 2088/1684 2084/1677
+f 2089/1683 2084/1677 2088/1684
+f 2091/1685 2092/1686 540/1687
+f 2090/1688 540/1687 2092/1686
+f 2076/1671 2092/1686 536/1669
+f 2087/1682 536/1669 2092/1686
+f 530/1643 2085/1681 2060/1651
+f 2094/1689 2060/1651 2085/1681
+f 540/1687 2093/1690 2091/1685
+f 2094/1689 2091/1685 2093/1690
+f 542/1691 2096/1692 2095/1693
+f 2098/1694 2095/1693 2096/1692
+f 507/1538 1965/1539 2097/1695
+f 2098/1694 2097/1695 1965/1539
+f 2099/1696 2101/1697 532/1645
+f 2056/1644 532/1645 2101/1697
+f 2100/1698 2101/1697 542/1691
+f 2096/1692 542/1691 2101/1697
+f 543/1699 2103/1700 2102/1701
+f 2104/1702 2102/1701 2103/1700
+f 2104/1702 2099/1696 2065/1655
+f 532/1645 2065/1655 2099/1696
+f 2097/1695 2106/1703 507/1538
+f 1972/1547 507/1538 2106/1703
+f 2105/1704 2106/1703 543/1699
+f 2103/1700 543/1699 2106/1703
+f 2010/1592 517/1583 2110/1705
+f 2107/1706 2110/1705 517/1583
+f 2109/1707 2110/1705 546/1708
+f 2108/1709 546/1708 2110/1705
+f 2113/1710 2111/1711 2112/1712
+f 545/1713 2112/1712 2111/1711
+f 517/1583 2003/1584 2107/1706
+f 2113/1710 2107/1706 2003/1584
+f 537/1661 2114/1714 2070/1662
+f 2116/1715 2070/1662 2114/1714
+f 545/1713 2115/1716 2112/1712
+f 2116/1715 2112/1712 2115/1716
+f 546/1708 2108/1709 2117/1717
+f 2118/1718 2117/1717 2108/1709
+f 2080/1674 2118/1718 537/1661
+f 2114/1714 537/1661 2118/1718
+f 2119/1719 2122/1720 491/1470
+f 1916/1479 491/1470 2122/1720
+f 2121/1721 2122/1720 549/1722
+f 2120/1723 549/1722 2122/1720
+f 2125/1724 2123/1725 2124/1726
+f 548/1727 2124/1726 2123/1725
+f 491/1470 1909/1471 2119/1719
+f 2125/1724 2119/1719 1909/1471
+f 528/1625 2126/1728 2040/1624
+f 2128/1729 2040/1624 2126/1728
+f 2124/1726 548/1727 2128/1729
+f 2127/1730 2128/1729 548/1727
+f 549/1722 2120/1723 2129/1731
+f 2130/1732 2129/1731 2120/1723
+f 2050/1638 2130/1732 528/1625
+f 2126/1728 528/1625 2130/1732
+f 2131/1733 2133/1734 539/1679
+f 2088/1684 539/1679 2133/1734
+f 2117/1717 2133/1734 546/1708
+f 2132/1735 546/1708 2133/1734
+f 542/1691 2134/1736 2100/1698
+f 2135/1737 2100/1698 2134/1736
+f 539/1679 2083/1680 2131/1733
+f 2135/1737 2131/1733 2083/1680
+f 2136/1738 2137/1739 522/1602
+f 2019/1601 522/1602 2137/1739
+f 2095/1693 2137/1739 542/1691
+f 2134/1736 542/1691 2137/1739
+f 546/1708 2132/1735 2109/1707
+f 2138/1740 2109/1707 2132/1735
+f 522/1602 2027/1611 2136/1738
+f 2138/1740 2136/1738 2027/1611
+f 543/1699 2139/1741 2105/1704
+f 2141/1742 2105/1704 2139/1741
+f 511/1555 1987/1563 2140/1743
+f 2141/1742 2140/1743 1987/1563
+f 2142/1744 2143/1745 525/1622
+f 2044/1631 525/1622 2143/1745
+f 2102/1701 2143/1745 543/1699
+f 2139/1741 543/1699 2143/1745
+f 548/1727 2144/1746 2127/1730
+f 2145/1747 2127/1730 2144/1746
+f 525/1622 2037/1623 2142/1744
+f 2145/1747 2142/1744 2037/1623
+f 2140/1743 2146/1748 511/1555
+f 1981/1556 511/1555 2146/1748
+f 2123/1725 2146/1748 548/1727
+f 2144/1746 548/1727 2146/1748
+f 2150/1749 2121/1721 2147/1750
+f 549/1722 2147/1750 2121/1721
+f 2149/1751 2150/1749 555/1752
+f 2148/1753 555/1752 2150/1749
+f 2152/1754 2153/1755 553/1756
+f 2151/1757 553/1756 2153/1755
+f 2147/1750 549/1722 2153/1755
+f 2129/1731 2153/1755 549/1722
+f 554/1758 2155/1759 2154/1760
+f 2157/1761 2154/1760 2155/1759
+f 2157/1761 2152/1754 2156/1762
+f 553/1756 2156/1762 2152/1754
+f 2158/1763 555/1752 2160/1764
+f 2148/1753 2160/1764 555/1752
+f 2159/1765 2160/1764 554/1758
+f 2155/1759 554/1758 2160/1764
+f 2164/1766 2159/1767 2161/1768
+f 554/1769 2161/1768 2159/1767
+f 559/1770 2163/1771 2162/1772
+f 2164/1766 2162/1772 2163/1771
+f 2166/1773 2167/1774 557/1775
+f 2165/1776 557/1775 2167/1774
+f 2161/1768 554/1769 2167/1774
+f 2154/1777 2167/1774 554/1769
+f 558/1778 2169/1779 2168/1780
+f 2171/1781 2168/1780 2169/1779
+f 2171/1781 2166/1773 2170/1782
+f 557/1775 2170/1782 2166/1773
+f 2162/1772 2174/1783 559/1770
+f 2172/1784 559/1770 2174/1783
+f 2173/1785 2174/1783 558/1778
+f 2169/1779 558/1778 2174/1783
+f 2175/1786 2178/1787 559/1770
+f 2163/1771 559/1770 2178/1787
+f 2177/1788 2178/1787 563/1789
+f 2176/1790 563/1789 2178/1787
+f 561/1791 2180/1792 2179/1793
+f 2181/1794 2179/1793 2180/1792
+f 559/1770 2172/1784 2175/1786
+f 2181/1794 2175/1786 2172/1784
+f 2183/1795 2185/1796 562/1797
+f 2182/1798 562/1797 2185/1796
+f 2184/1799 2185/1796 561/1791
+f 2180/1792 561/1791 2185/1796
+f 563/1789 2176/1790 2186/1800
+f 2188/1801 2186/1800 2176/1790
+f 562/1797 2187/1802 2183/1795
+f 2188/1801 2183/1795 2187/1802
+f 2190/1803 2192/1804 565/1805
+f 2189/1806 565/1805 2192/1804
+f 2179/1793 2192/1804 561/1791
+f 2191/1807 561/1791 2192/1804
+f 2194/1808 2196/1809 566/1810
+f 2193/1811 566/1810 2196/1809
+f 565/1805 2195/1812 2190/1803
+f 2196/1809 2190/1803 2195/1812
+f 2198/1813 2200/1814 567/1815
+f 2197/1816 567/1815 2200/1814
+f 566/1810 2199/1817 2194/1808
+f 2200/1814 2194/1808 2199/1817
+f 561/1791 2191/1807 2184/1799
+f 2202/1818 2184/1799 2191/1807
+f 567/1815 2201/1819 2198/1813
+f 2202/1818 2198/1813 2201/1819
+f 2204/1820 2207/1821 569/1822
+f 2203/1823 569/1822 2207/1821
+f 2205/1824 572/1825 2207/1821
+f 2206/1826 2207/1821 572/1825
+f 2209/1827 2211/1828 570/1829
+f 2208/1830 570/1829 2211/1828
+f 569/1822 2210/1831 2204/1820
+f 2211/1828 2204/1820 2210/1831
+f 571/1832 2213/1833 2212/1834
+f 2215/1835 2212/1834 2213/1833
+f 570/1829 2214/1836 2209/1827
+f 2215/1835 2209/1827 2214/1836
+f 572/1825 2205/1824 2216/1837
+f 2218/1838 2216/1837 2205/1824
+f 2213/1833 571/1832 2218/1838
+f 2217/1839 2218/1838 571/1832
+f 2220/1840 2223/1841 574/1842
+f 2219/1843 574/1842 2223/1841
+f 2221/1844 577/1845 2223/1841
+f 2222/1846 2223/1841 577/1845
+f 2225/1847 2227/1848 575/1849
+f 2224/1850 575/1849 2227/1848
+f 574/1842 2226/1851 2220/1840
+f 2227/1848 2220/1840 2226/1851
+f 576/1852 2229/1853 2228/1854
+f 2231/1855 2228/1854 2229/1853
+f 575/1849 2230/1856 2225/1847
+f 2231/1855 2225/1847 2230/1856
+f 577/1845 2221/1844 2232/1857
+f 2234/1858 2232/1857 2221/1844
+f 2229/1853 576/1852 2234/1858
+f 2233/1859 2234/1858 576/1852
+f 579/1860 2236/1861 2235/1862
+f 2238/1863 2235/1862 2236/1861
+f 515/1864 2000/1865 2237/1866
+f 2238/1863 2237/1866 2000/1865
+f 574/1842 2239/1867 2226/1851
+f 2241/1868 2226/1851 2239/1867
+f 2236/1861 579/1860 2241/1868
+f 2240/1869 2241/1868 579/1860
+f 580/1870 2243/1871 2242/1872
+f 2244/1873 2242/1872 2243/1871
+f 574/1842 2219/1843 2239/1867
+f 2244/1873 2239/1867 2219/1843
+f 2237/1866 2246/1874 515/1864
+f 1991/1875 515/1864 2246/1874
+f 2243/1871 580/1870 2246/1874
+f 2245/1876 2246/1874 580/1870
+f 2248/1877 2251/1878 582/1879
+f 2247/1880 582/1879 2251/1878
+f 2250/1881 2251/1878 584/1882
+f 2249/1883 584/1882 2251/1878
+f 576/1852 2252/1884 2233/1859
+f 2254/1885 2233/1859 2252/1884
+f 582/1879 2253/1886 2248/1877
+f 2254/1885 2248/1877 2253/1886
+f 583/1887 2256/1888 2255/1889
+f 2257/1890 2255/1889 2256/1888
+f 2252/1884 576/1852 2257/1890
+f 2228/1854 2257/1890 576/1852
+f 584/1882 2249/1883 2258/1891
+f 2260/1892 2258/1891 2249/1883
+f 2260/1892 2256/1888 2259/1893
+f 583/1887 2259/1893 2256/1888
+f 2261/1894 2263/1895 483/1896
+f 1891/1897 483/1896 2263/1895
+f 2235/1862 2263/1895 579/1860
+f 2262/1898 579/1860 2263/1895
+f 583/1887 2264/1899 2259/1893
+f 2265/1900 2259/1893 2264/1899
+f 483/1896 1886/1901 2261/1894
+f 2265/1900 2261/1894 1886/1901
+f 2266/1902 2267/1903 575/1849
+f 2230/1856 575/1849 2267/1903
+f 583/1887 2255/1889 2264/1899
+f 2267/1903 2264/1899 2255/1889
+f 579/1860 2262/1898 2240/1869
+f 2268/1904 2240/1869 2262/1898
+f 575/1849 2224/1850 2266/1902
+f 2268/1904 2266/1902 2224/1850
+f 2271/1905 1930/1906 2269/1907
+f 495/1908 2269/1907 1930/1906
+f 2271/1905 2270/1909 2247/1880
+f 582/1879 2247/1880 2270/1909
+f 2272/1910 2273/1911 580/1870
+f 2245/1876 580/1870 2273/1911
+f 2269/1907 495/1908 2273/1911
+f 1923/1912 2273/1911 495/1908
+f 577/1845 2274/1913 2222/1846
+f 2275/1914 2222/1846 2274/1913
+f 580/1870 2242/1872 2272/1910
+f 2275/1914 2272/1910 2242/1872
+f 2270/1909 2276/1915 582/1879
+f 2253/1886 582/1879 2276/1915
+f 2276/1915 2274/1913 2232/1857
+f 577/1845 2232/1857 2274/1913
+f 500/1916 2277/1917 1945/1918
+f 2280/1919 1945/1918 2277/1917
+f 589/1920 2279/1921 2278/1922
+f 2280/1919 2278/1922 2279/1921
+f 2282/1923 2283/1924 588/1925
+f 2281/1926 588/1925 2283/1924
+f 1939/1927 2283/1924 500/1916
+f 2277/1917 500/1916 2283/1924
+f 572/1825 2284/1928 2206/1826
+f 2286/1929 2206/1826 2284/1928
+f 588/1925 2285/1930 2282/1923
+f 2286/1929 2282/1923 2285/1930
+f 2278/1922 2288/1931 589/1920
+f 2287/1932 589/1920 2288/1931
+f 2284/1928 572/1825 2288/1931
+f 2216/1837 2288/1931 572/1825
+f 2292/1933 2289/1934 2290/1935
+f 591/1936 2290/1935 2289/1934
+f 489/1937 1906/1938 2291/1939
+f 2292/1933 2291/1939 1906/1938
+f 2293/1940 2295/1941 569/1822
+f 2210/1831 569/1822 2295/1941
+f 2290/1935 591/1936 2295/1941
+f 2294/1942 2295/1941 591/1936
+f 588/1925 2296/1943 2285/1930
+f 2297/1944 2285/1930 2296/1943
+f 569/1822 2203/1823 2293/1940
+f 2297/1944 2293/1940 2203/1823
+f 1896/1945 489/1937 2298/1946
+f 2291/1939 2298/1946 489/1937
+f 2296/1943 588/1925 2298/1946
+f 2281/1926 2298/1946 588/1925
+f 478/1947 2299/1948 1874/1949
+f 2302/1950 1874/1949 2299/1948
+f 593/1951 2301/1952 2300/1953
+f 2302/1950 2300/1953 2301/1952
+f 2303/1954 2304/1955 589/1920
+f 2279/1921 589/1920 2304/1955
+f 1868/1956 2304/1955 478/1947
+f 2299/1948 478/1947 2304/1955
+f 571/1832 2305/1957 2217/1839
+f 2306/1958 2217/1839 2305/1957
+f 589/1920 2287/1932 2303/1954
+f 2306/1958 2303/1954 2287/1932
+f 2300/1953 2308/1959 593/1951
+f 2307/1960 593/1951 2308/1959
+f 2212/1834 2308/1959 571/1832
+f 2305/1957 571/1832 2308/1959
+f 2310/1961 2312/1962 595/1963
+f 2309/1964 595/1963 2312/1962
+f 2289/1934 2312/1962 591/1936
+f 2311/1965 591/1936 2312/1962
+f 2313/1966 2315/1967 593/1951
+f 2301/1952 593/1951 2315/1967
+f 595/1963 2314/1968 2310/1961
+f 2315/1967 2310/1961 2314/1968
+f 570/1829 2316/1969 2214/1836
+f 2317/1970 2214/1836 2316/1969
+f 593/1951 2307/1960 2313/1966
+f 2317/1970 2313/1966 2307/1960
+f 591/1936 2311/1965 2294/1942
+f 2318/1971 2294/1942 2311/1965
+f 570/1829 2208/1830 2316/1969
+f 2318/1971 2316/1969 2208/1830
+f 2319/1972 2322/1973 565/1805
+f 2195/1812 565/1805 2322/1973
+f 2321/1974 2322/1973 599/1975
+f 2320/1976 599/1975 2322/1973
+f 558/1778 2323/1977 2173/1785
+f 2324/1978 2173/1785 2323/1977
+f 565/1805 2189/1806 2319/1972
+f 2324/1978 2319/1972 2189/1806
+f 2326/1979 2327/1980 597/1981
+f 2325/1982 597/1981 2327/1980
+f 2323/1977 558/1778 2327/1980
+f 2168/1780 2327/1980 558/1778
+f 598/1983 2329/1984 2328/1985
+f 2331/1986 2328/1985 2329/1984
+f 2331/1986 2326/1979 2330/1987
+f 597/1981 2330/1987 2326/1979
+f 2320/1976 2334/1988 599/1975
+f 2332/1989 599/1975 2334/1988
+f 2333/1990 2334/1988 598/1983
+f 2329/1984 598/1983 2334/1988
+f 2335/1991 601/1992 2339/1993
+f 2336/1994 2339/1993 601/1992
+f 2338/1995 2339/1993 604/1996
+f 2337/1997 604/1996 2339/1993
+f 2341/1998 2343/1999 602/2000
+f 2340/2001 602/2000 2343/1999
+f 2336/1994 601/1992 2343/1999
+f 2342/2002 2343/1999 601/1992
+f 2345/2003 2347/2004 603/2005
+f 2344/2006 603/2005 2347/2004
+f 602/2000 2346/2007 2341/1998
+f 2347/2004 2341/1998 2346/2007
+f 604/1996 2337/1997 2348/2008
+f 2350/2009 2348/2008 2337/1997
+f 603/2005 2349/2010 2345/2003
+f 2350/2009 2345/2003 2349/2010
+f 2351/2011 2353/2012 603/2005
+f 2349/2010 603/2005 2353/2012
+f 2352/2013 598/2014 2353/2012
+f 2328/2015 2353/2012 598/2014
+f 606/2016 2355/2017 2354/2018
+f 2356/2019 2354/2018 2355/2017
+f 603/2005 2344/2006 2351/2011
+f 2356/2019 2351/2011 2344/2006
+f 2358/2020 2360/2021 607/2022
+f 2357/2023 607/2022 2360/2021
+f 2359/2024 2360/2021 606/2016
+f 2355/2017 606/2016 2360/2021
+f 2362/2025 2333/2026 2352/2013
+f 598/2014 2352/2013 2333/2026
+f 607/2022 2361/2027 2358/2020
+f 2362/2025 2358/2020 2361/2027
+f 2363/2028 2366/2029 606/2016
+f 2359/2024 606/2016 2366/2029
+f 2365/2030 2366/2029 610/2031
+f 2364/2032 610/2031 2366/2029
+f 2367/2033 2368/2034 602/2000
+f 2346/2007 602/2000 2368/2034
+f 606/2016 2354/2018 2363/2028
+f 2368/2034 2363/2028 2354/2018
+f 2370/2035 2371/2036 609/2037
+f 2369/2038 609/2037 2371/2036
+f 602/2000 2340/2001 2367/2033
+f 2371/2036 2367/2033 2340/2001
+f 610/2031 2364/2032 2372/2039
+f 2374/2040 2372/2039 2364/2032
+f 609/2037 2373/2041 2370/2035
+f 2374/2040 2370/2035 2373/2041
+f 612/2042 2376/2043 2375/2044
+f 2378/2045 2375/2044 2376/2043
+f 324/2046 1247/2047 2377/2048
+f 2378/2045 2377/2048 1247/2047
+f 2379/2049 2381/2050 527/1634
+f 2051/1639 527/1634 2381/2050
+f 2380/2051 2381/2050 612/2042
+f 2376/2043 612/2042 2381/2050
+f 613/2052 2383/2053 2382/2054
+f 2384/2055 2382/2054 2383/2053
+f 527/1634 2046/1635 2379/2049
+f 2384/2055 2379/2049 2046/1635
+f 2377/2048 2386/2056 324/2046
+f 1254/2057 324/2046 2386/2056
+f 2385/2058 2386/2056 613/2052
+f 2383/2053 613/2052 2386/2056
+f 615/2059 2388/2060 2387/2061
+f 2390/2062 2387/2061 2388/2060
+f 2390/2062 2389/2063 1261/2064
+f 328/2065 1261/2064 2389/2063
+f 2391/2066 2393/2067 540/1687
+f 2093/1690 540/1687 2393/2067
+f 2392/2068 2393/2067 615/2059
+f 2388/2060 615/2059 2393/2067
+f 2396/2069 2394/2070 2395/2071
+f 616/2072 2395/2071 2394/2070
+f 540/1687 2090/1688 2391/2066
+f 2396/2069 2391/2066 2090/1688
+f 2389/2063 2398/2073 328/2065
+f 1268/2074 328/2065 2398/2073
+f 2395/2071 616/2072 2398/2073
+f 2397/2075 2398/2073 616/2072
+f 616/2072 2399/2076 2397/2075
+f 2401/2077 2397/2075 2399/2076
+f 2401/2077 2400/2078 1275/2079
+f 332/2080 1275/2079 2400/2078
+f 2078/1673 535/1665 2403/2081
+f 2402/2082 2403/2081 535/1665
+f 2394/2070 2403/2081 616/2072
+f 2399/2076 616/2072 2403/2081
+f 618/2083 2405/2084 2404/2085
+f 2406/2086 2404/2085 2405/2084
+f 2406/2086 2402/2082 2072/1664
+f 535/1665 2072/1664 2402/2082
+f 2400/2078 2408/2087 332/2080
+f 1281/2088 332/2080 2408/2087
+f 2405/2084 618/2083 2408/2087
+f 2407/2089 2408/2087 618/2083
+f 620/2090 2410/2091 2409/2092
+f 2412/2093 2409/2092 2410/2091
+f 335/2094 1287/2095 2411/2096
+f 2412/2093 2411/2096 1287/2095
+f 2413/2097 2415/2098 595/2099
+f 2314/2100 595/2099 2415/2098
+f 2414/2101 2415/2098 620/2090
+f 2410/2091 620/2090 2415/2098
+f 2418/2102 2416/2103 2417/2104
+f 621/2105 2417/2104 2416/2103
+f 2418/2102 2413/2097 2309/2106
+f 595/2099 2309/2106 2413/2097
+f 2411/2096 2420/2107 335/2094
+f 1294/2108 335/2094 2420/2107
+f 2417/2104 621/2105 2420/2107
+f 2419/2109 2420/2107 621/2105
+f 2421/2110 623/2111 2424/2112
+f 2422/2113 2424/2112 623/2111
+f 2423/2114 339/2115 2424/2112
+f 1301/2116 2424/2112 339/2115
+f 584/2117 2425/2118 2250/2119
+f 2427/2120 2250/2119 2425/2118
+f 623/2111 2426/2121 2422/2113
+f 2427/2120 2422/2113 2426/2121
+f 2429/2122 2430/2123 624/2124
+f 2428/2125 624/2124 2430/2123
+f 2425/2118 584/2117 2430/2123
+f 2258/2126 2430/2123 584/2117
+f 2432/2127 1308/2128 2423/2114
+f 339/2115 2423/2114 1308/2128
+f 624/2124 2431/2129 2429/2122
+f 2432/2127 2429/2122 2431/2129
+f 613/2052 2433/2130 2385/2058
+f 2435/2131 2385/2058 2433/2130
+f 2435/2131 2434/2132 1315/2133
+f 343/2134 1315/2133 2434/2132
+f 2063/1654 531/1647 2437/2135
+f 2436/2136 2437/2135 531/1647
+f 2382/2054 2437/2135 613/2052
+f 2433/2130 613/2052 2437/2135
+f 2439/2137 2392/2068 2438/2138
+f 615/2059 2438/2138 2392/2068
+f 531/1647 2058/1649 2436/2136
+f 2439/2137 2436/2136 2058/1649
+f 2434/2132 2440/2139 343/2134
+f 1320/2140 343/2134 2440/2139
+f 2438/2138 615/2059 2440/2139
+f 2387/2061 2440/2139 615/2059
+f 627/2141 2442/2142 2441/2143
+f 2444/2144 2441/2143 2442/2142
+f 345/2145 1325/2146 2443/2147
+f 2444/2144 2443/2147 1325/2146
+f 2156/1762 553/1756 2447/2148
+f 2445/2149 2447/2148 553/1756
+f 2446/2150 2447/2148 627/2141
+f 2442/2142 627/2141 2447/2148
+f 612/2042 2448/2151 2380/2051
+f 2449/2152 2380/2051 2448/2151
+f 553/1756 2151/1757 2445/2149
+f 2449/2152 2445/2149 2151/1757
+f 1331/2153 345/2145 2450/2154
+f 2443/2147 2450/2154 345/2145
+f 2375/2044 2450/2154 612/2042
+f 2448/2151 612/2042 2450/2154
+f 629/2155 2452/2156 2451/2157
+f 2454/2158 2451/2157 2452/2156
+f 2454/2158 2453/2159 1337/746
+f 348/748 1337/746 2453/2159
+f 2170/2160 557/2161 2457/2162
+f 2455/2163 2457/2162 557/2161
+f 629/2155 2456/2164 2452/2156
+f 2457/2162 2452/2156 2456/2164
+f 2459/2165 2446/2166 2458/2167
+f 627/2168 2458/2167 2446/2166
+f 557/2161 2165/2169 2455/2163
+f 2459/2165 2455/2163 2165/2169
+f 2453/2159 2460/2170 348/748
+f 1343/756 348/748 2460/2170
+f 2458/2167 627/2168 2460/2170
+f 2441/2171 2460/2170 627/2168
+f 631/2172 2462/2173 2461/2174
+f 2464/2175 2461/2174 2462/2173
+f 2464/2175 2463/2176 1349/766
+f 351/768 1349/766 2463/2176
+f 2465/2177 632/2178 2468/2179
+f 2466/2180 2468/2179 632/2178
+f 2467/2181 2468/2179 631/2172
+f 2462/2173 631/2172 2468/2179
+f 633/2182 2470/2183 2469/2184
+f 2472/2185 2469/2184 2470/2183
+f 632/2178 2471/2186 2466/2180
+f 2472/2185 2466/2180 2471/2186
+f 2463/2176 2474/2187 351/768
+f 1356/776 351/768 2474/2187
+f 2473/2188 2474/2187 633/2182
+f 2470/2183 633/2182 2474/2187
+f 2478/2189 2475/2190 2476/2191
+f 635/2192 2476/2191 2475/2190
+f 1365/785 2478/2189 356/786
+f 2477/2193 356/786 2478/2189
+f 566/1810 2479/2194 2199/1817
+f 2481/2195 2199/1817 2479/2194
+f 2476/2191 635/2192 2481/2195
+f 2480/2196 2481/2195 635/2192
+f 631/2172 2482/2197 2467/2181
+f 2483/2198 2467/2181 2482/2197
+f 2193/1811 2483/2198 566/1810
+f 2479/2194 566/1810 2483/2198
+f 356/786 2477/2193 1371/794
+f 2484/2199 1371/794 2477/2193
+f 2482/2197 631/2172 2484/2199
+f 2461/2174 2484/2199 631/2172
+f 637/2200 2486/2201 2485/2202
+f 2488/2203 2485/2202 2486/2201
+f 2488/2203 2487/2204 1377/2205
+f 359/2206 1377/2205 2487/2204
+f 2489/2207 2491/2208 496/1494
+f 1934/1501 496/1494 2491/2208
+f 2490/2209 2491/2208 637/2200
+f 2486/2201 637/2200 2491/2208
+f 2492/2210 2493/2211 623/2111
+f 2426/2121 623/2111 2493/2211
+f 496/1494 1928/1495 2489/2207
+f 2493/2211 2489/2207 1928/1495
+f 2487/2204 2494/2212 359/2206
+f 1383/2213 359/2206 2494/2212
+f 2494/2212 2492/2210 2421/2110
+f 623/2111 2421/2110 2492/2210
+f 2495/2214 2497/2215 621/2105
+f 2419/2109 621/2105 2497/2215
+f 2496/2216 362/2217 2497/2215
+f 1389/2218 2497/2215 362/2217
+f 488/1460 2498/2219 1907/1467
+f 2499/2220 1907/1467 2498/2219
+f 2499/2220 2495/2214 2416/2103
+f 621/2105 2416/2103 2495/2214
+f 2501/2221 2502/2222 639/2223
+f 2500/2224 639/2223 2502/2222
+f 1902/1462 2502/2222 488/1460
+f 2498/2219 488/1460 2502/2222
+f 2504/2225 1395/2226 2496/2216
+f 362/2217 2496/2216 1395/2226
+f 639/2223 2503/2227 2501/2221
+f 2504/2225 2501/2221 2503/2227
+f 2505/2228 2507/2229 639/2223
+f 2503/2227 639/2223 2507/2229
+f 2506/2230 365/2231 2507/2229
+f 1401/2232 2507/2229 365/2231
+f 492/1475 2508/2233 1919/1482
+f 2509/2234 1919/1482 2508/2233
+f 639/2223 2500/2224 2505/2228
+f 2509/2234 2505/2228 2500/2224
+f 2511/2235 2512/2236 641/2237
+f 2510/2238 641/2237 2512/2236
+f 1914/1477 2512/2236 492/1475
+f 2508/2233 492/1475 2512/2236
+f 2514/2239 1407/2240 2506/2230
+f 365/2231 2506/2230 1407/2240
+f 641/2237 2513/2241 2511/2235
+f 2514/2239 2511/2235 2513/2241
+f 2515/2242 2517/2243 641/2237
+f 2513/2241 641/2237 2517/2243
+f 2516/2244 368/2245 2517/2243
+f 1413/2246 2517/2243 368/2245
+f 2519/2247 2149/1751 2518/2248
+f 555/1752 2518/2248 2149/1751
+f 641/2237 2510/2238 2515/2242
+f 2519/2247 2515/2242 2510/2238
+f 2522/2249 2520/2250 2521/2251
+f 643/2252 2521/2251 2520/2250
+f 555/1752 2158/1763 2518/2248
+f 2522/2249 2518/2248 2158/1763
+f 1419/2253 368/2245 2524/2254
+f 2516/2244 2524/2254 368/2245
+f 2521/2251 643/2252 2524/2254
+f 2523/2255 2524/2254 643/2252
+f 643/2256 2525/2257 2523/2258
+f 2527/2259 2523/2258 2525/2257
+f 371/855 1425/857 2526/2260
+f 2527/2259 2526/2260 1425/857
+f 563/1789 2528/2261 2177/1788
+f 2529/2262 2177/1788 2528/2261
+f 2520/2263 2529/2262 643/2256
+f 2525/2257 643/2256 2529/2262
+f 2531/2264 2532/2265 645/2266
+f 2530/2267 645/2266 2532/2265
+f 2186/1800 2532/2265 563/1789
+f 2528/2261 563/1789 2532/2265
+f 1431/866 371/855 2534/2268
+f 2526/2260 2534/2268 371/855
+f 645/2266 2533/2269 2531/2264
+f 2534/2268 2531/2264 2533/2269
+f 2535/2270 2537/2271 645/2266
+f 2533/2269 645/2266 2537/2271
+f 2536/2272 374/874 2537/2271
+f 1437/875 2537/2271 374/874
+f 562/1797 2538/2273 2187/1802
+f 2539/2274 2187/1802 2538/2273
+f 645/2266 2530/2267 2535/2270
+f 2539/2274 2535/2270 2530/2267
+f 2541/2275 2542/2276 647/2277
+f 2540/2278 647/2277 2542/2276
+f 2182/1798 2542/2276 562/1797
+f 2538/2273 562/1797 2542/2276
+f 2544/2279 1443/881 2536/2272
+f 374/874 2536/2272 1443/881
+f 647/2277 2543/2280 2541/2275
+f 2544/2279 2541/2275 2543/2280
+f 2545/2281 2547/2282 647/2277
+f 2543/2280 647/2277 2547/2282
+f 2547/2282 2546/2283 1449/887
+f 377/889 1449/887 2546/2283
+f 2548/2284 2549/2285 567/1815
+f 2201/1819 567/1815 2549/2285
+f 2540/2278 2549/2285 647/2277
+f 2545/2281 647/2277 2549/2285
+f 635/2192 2550/2286 2480/2196
+f 2551/2287 2480/2196 2550/2286
+f 567/1815 2197/1816 2548/2284
+f 2551/2287 2548/2284 2197/1816
+f 2546/2283 2552/2288 377/889
+f 1454/893 377/889 2552/2288
+f 2550/2286 635/2192 2552/2288
+f 2475/2190 2552/2288 635/2192
+f 2555/2289 1461/899 2553/2290
+f 380/900 2553/2290 1461/899
+f 2555/2289 2554/2291 2451/2157
+f 629/2155 2451/2157 2554/2291
+f 2557/2292 2558/2293 650/2294
+f 2556/2295 650/2294 2558/2293
+f 380/900 1470/916 2553/2290
+f 2558/2293 2553/2290 1470/916
+f 604/2296 2559/2297 2338/2298
+f 2561/2299 2338/2298 2559/2297
+f 650/2294 2560/2300 2557/2292
+f 2561/2299 2557/2292 2560/2300
+f 2562/2301 2563/2302 597/2303
+f 2330/2304 597/2303 2563/2302
+f 2348/2305 2563/2302 604/2296
+f 2559/2297 604/2296 2563/2302
+f 2554/2291 2564/2306 629/2155
+f 2456/2164 629/2155 2564/2306
+f 2564/2306 2562/2301 2325/2307
+f 597/2303 2325/2307 2562/2301
+f 2321/1974 599/1975 2567/2308
+f 2565/2309 2567/2308 599/1975
+f 2465/2177 2567/2308 632/2178
+f 2566/2310 632/2178 2567/2308
+f 652/2311 2569/2312 2568/2313
+f 2570/2314 2568/2313 2569/2312
+f 599/1975 2332/1989 2565/2309
+f 2570/2314 2565/2309 2332/1989
+f 2471/2186 632/2178 2572/2315
+f 2566/2310 2572/2315 632/2178
+f 2571/2316 2572/2315 652/2311
+f 2569/2312 652/2311 2572/2315
+f 654/2317 2574/2318 2573/2319
+f 2577/2320 2573/2319 2574/2318
+f 657/2321 2576/2322 2575/2323
+f 2577/2320 2575/2323 2576/2322
+f 2579/2324 2581/2325 655/2326
+f 2578/2327 655/2326 2581/2325
+f 2580/2328 2581/2325 654/2317
+f 2574/2318 654/2317 2581/2325
+f 656/2329 2583/2330 2582/2331
+f 2585/2332 2582/2331 2583/2330
+f 655/2326 2584/2333 2579/2324
+f 2585/2332 2579/2324 2584/2333
+f 2575/2323 2588/2334 657/2321
+f 2586/2335 657/2321 2588/2334
+f 2587/2336 2588/2334 656/2329
+f 2583/2330 656/2329 2588/2334
+f 659/2337 2590/2338 2589/2339
+f 2593/2340 2589/2339 2590/2338
+f 2593/2340 2591/2341 2592/2342
+f 662/2343 2592/2342 2591/2341
+f 660/2344 2595/2345 2594/2346
+f 2597/2347 2594/2346 2595/2345
+f 2590/2338 659/2337 2597/2347
+f 2596/2348 2597/2347 659/2337
+f 2601/2349 2598/2350 2599/2351
+f 661/2352 2599/2351 2598/2350
+f 2595/2345 660/2344 2601/2349
+f 2600/2353 2601/2349 660/2344
+f 2591/2341 2604/2354 662/2343
+f 2602/2355 662/2343 2604/2354
+f 2599/2351 661/2352 2604/2354
+f 2603/2356 2604/2354 661/2352
+f 2606/2357 2609/2358 664/2359
+f 2605/2360 664/2359 2609/2358
+f 2608/2361 2609/2358 667/2362
+f 2607/2363 667/2362 2609/2358
+f 2613/2364 2610/2365 2611/2366
+f 665/2367 2611/2366 2610/2365
+f 664/2359 2612/2368 2606/2357
+f 2613/2364 2606/2357 2612/2368
+f 2615/2369 2617/2370 666/2371
+f 2614/2372 666/2371 2617/2370
+f 2611/2366 665/2367 2617/2370
+f 2616/2373 2617/2370 665/2367
+f 2607/2363 2620/2374 667/2362
+f 2618/2375 667/2362 2620/2374
+f 2620/2374 2615/2369 2619/2376
+f 666/2371 2619/2376 2615/2369
+f 2603/2356 661/2352 2624/2377
+f 2621/2378 2624/2377 661/2352
+f 671/2379 2623/2380 2622/2381
+f 2624/2377 2622/2381 2623/2380
+f 2626/2382 2627/2383 669/2384
+f 2625/2385 669/2384 2627/2383
+f 2621/2378 661/2352 2627/2383
+f 2598/2350 2627/2383 661/2352
+f 670/2386 2629/2387 2628/2388
+f 2631/2389 2628/2388 2629/2387
+f 2631/2389 2626/2382 2630/2390
+f 669/2384 2630/2390 2626/2382
+f 671/2379 2622/2381 2632/2391
+f 2634/2392 2632/2391 2622/2381
+f 2629/2387 670/2386 2634/2392
+f 2633/2393 2634/2392 670/2386
+f 2638/2394 2633/2393 2635/2395
+f 670/2386 2635/2395 2633/2393
+f 674/2396 2637/2397 2636/2398
+f 2638/2394 2636/2398 2637/2397
+f 2641/2399 2639/2400 2640/2401
+f 673/2402 2640/2401 2639/2400
+f 2635/2395 670/2386 2641/2399
+f 2628/2388 2641/2399 670/2386
+f 2619/2376 666/2371 2644/2403
+f 2642/2404 2644/2403 666/2371
+f 2644/2403 2640/2401 2643/2405
+f 673/2402 2643/2405 2640/2401
+f 2646/2406 2645/2407 2636/2398
+f 674/2396 2636/2398 2645/2407
+f 666/2371 2614/2372 2642/2404
+f 2646/2406 2642/2404 2614/2372
+f 676/2408 2648/2409 2647/2410
+f 2650/2411 2647/2410 2648/2409
+f 2589/2339 2650/2411 659/2337
+f 2649/2412 659/2337 2650/2411
+f 2652/2413 2654/2414 677/2415
+f 2651/2416 677/2415 2654/2414
+f 2653/2417 2654/2414 676/2408
+f 2648/2409 676/2408 2654/2414
+f 2612/2368 664/2359 2657/2418
+f 2655/2419 2657/2418 664/2359
+f 677/2415 2656/2420 2652/2413
+f 2657/2418 2652/2413 2656/2420
+f 659/2337 2649/2412 2596/2348
+f 2658/2421 2596/2348 2649/2412
+f 664/2359 2605/2360 2655/2419
+f 2658/2421 2655/2419 2605/2360
+f 2660/2422 2662/2423 679/2424
+f 2659/2425 679/2424 2662/2423
+f 2661/2426 676/2427 2662/2423
+f 2647/2428 2662/2423 676/2427
+f 2663/2429 2665/2430 607/2022
+f 2361/2027 607/2022 2665/2430
+f 2664/2431 2665/2430 679/2424
+f 2660/2422 679/2424 2665/2430
+f 610/2031 2666/2432 2365/2030
+f 2667/2433 2365/2030 2666/2432
+f 607/2022 2357/2023 2663/2429
+f 2667/2433 2663/2429 2357/2023
+f 2669/2434 2670/2435 680/2436
+f 2668/2437 680/2436 2670/2435
+f 2372/2039 2670/2435 610/2031
+f 2666/2432 610/2031 2670/2435
+f 676/2427 2661/2426 2653/2438
+f 2672/2439 2653/2438 2661/2426
+f 680/2436 2671/2440 2669/2434
+f 2672/2439 2669/2434 2671/2440
+f 2656/2441 677/2442 2676/2443
+f 2673/2444 2676/2443 677/2442
+f 2675/2445 2676/2443 682/2446
+f 2674/2447 682/2446 2676/2443
+f 680/2436 2677/2448 2671/2440
+f 2678/2449 2671/2440 2677/2448
+f 677/2442 2651/2450 2673/2444
+f 2678/2449 2673/2444 2651/2450
+f 609/2037 2679/2451 2373/2041
+f 2680/2452 2373/2041 2679/2451
+f 2680/2452 2677/2448 2668/2437
+f 680/2436 2668/2437 2677/2448
+f 2682/2453 2342/2002 2681/2454
+f 601/1992 2681/2454 2342/2002
+f 609/2037 2369/2038 2679/2451
+f 2682/2453 2679/2451 2369/2038
+f 2684/2455 2683/2456 2674/2447
+f 682/2446 2674/2447 2683/2456
+f 2684/2455 2681/2454 2335/1991
+f 601/1992 2335/1991 2681/2454
+f 665/2457 2685/2458 2616/2459
+f 2688/2460 2616/2459 2685/2458
+f 684/2461 2687/2462 2686/2463
+f 2688/2460 2686/2463 2687/2462
+f 2689/2464 2690/2465 682/2466
+f 2675/2467 682/2466 2690/2465
+f 2610/2468 2690/2465 665/2457
+f 2685/2458 665/2457 2690/2465
+f 650/2294 2691/2469 2560/2300
+f 2692/2470 2560/2300 2691/2469
+f 2692/2470 2689/2464 2683/2471
+f 682/2466 2683/2471 2689/2464
+f 2693/2472 2694/2473 415/1075
+f 1602/1082 415/1075 2694/2473
+f 2556/2295 2694/2473 650/2294
+f 2691/2469 650/2294 2694/2473
+f 2686/2463 2696/2474 684/2461
+f 2695/2475 684/2461 2696/2474
+f 415/1075 1597/1077 2693/2472
+f 2696/2474 2693/2472 1597/1077
+f 2697/2476 2700/2477 419/1095
+f 1610/1096 419/1095 2700/2477
+f 2699/2478 2700/2477 687/2479
+f 2698/2480 687/2479 2700/2477
+f 633/2182 2701/2481 2473/2188
+f 2702/2482 2473/2188 2701/2481
+f 419/1095 1621/1110 2697/2476
+f 2702/2482 2697/2476 1621/1110
+f 2704/2483 2705/2484 686/2485
+f 2703/2486 686/2485 2705/2484
+f 2469/2184 2705/2484 633/2182
+f 2701/2481 633/2182 2705/2484
+f 2706/2487 2708/2488 671/2489
+f 2623/2490 671/2489 2708/2488
+f 686/2485 2707/2491 2704/2483
+f 2708/2488 2704/2483 2707/2491
+f 2710/2492 2709/2493 2698/2480
+f 687/2479 2698/2480 2709/2493
+f 2706/2487 671/2489 2710/2492
+f 2632/2494 2710/2492 671/2489
+f 1625/1114 421/1115 2713/2495
+f 2711/2496 2713/2495 421/1115
+f 684/2461 2695/2475 2712/2497
+f 2713/2495 2712/2497 2695/2475
+f 687/2498 2714/2499 2699/2500
+f 2715/2501 2699/2500 2714/2499
+f 2715/2501 2711/2496 1631/1127
+f 421/1115 1631/1127 2711/2496
+f 2716/2502 2717/2503 674/2504
+f 2637/2505 674/2504 2717/2503
+f 2714/2499 687/2498 2717/2503
+f 2709/2506 2717/2503 687/2498
+f 2712/2497 2718/2507 684/2461
+f 2687/2462 684/2461 2718/2507
+f 2645/2508 2718/2507 674/2504
+f 2716/2502 674/2504 2718/2507
+f 652/2311 2719/2509 2571/2316
+f 2721/2510 2571/2316 2719/2509
+f 686/2485 2703/2486 2720/2511
+f 2721/2510 2720/2511 2703/2486
+f 2664/2512 679/2513 2723/2514
+f 2722/2515 2723/2514 679/2513
+f 2568/2313 2723/2514 652/2311
+f 2719/2509 652/2311 2723/2514
+f 2724/2516 2725/2517 662/2518
+f 2592/2519 662/2518 2725/2517
+f 2659/2520 2725/2517 679/2513
+f 2722/2515 679/2513 2725/2517
+f 2720/2511 2726/2521 686/2485
+f 2707/2491 686/2485 2726/2521
+f 662/2518 2602/2522 2724/2516
+f 2726/2521 2724/2516 2602/2522
+f 660/2344 2727/2523 2600/2353
+f 2730/2524 2600/2353 2727/2523
+f 2730/2524 2728/2525 2729/2526
+f 692/2527 2729/2526 2728/2525
+f 2731/2528 691/2529 2733/2530
+f 2732/2531 2733/2530 691/2529
+f 2594/2346 2733/2530 660/2344
+f 2727/2523 660/2344 2733/2530
+f 657/2321 2734/2532 2576/2322
+f 2736/2533 2576/2322 2734/2532
+f 691/2529 2735/2534 2732/2531
+f 2736/2533 2732/2531 2735/2534
+f 692/2527 2728/2525 2737/2535
+f 2738/2536 2737/2535 2728/2525
+f 657/2321 2586/2335 2734/2532
+f 2738/2536 2734/2532 2586/2335
+f 2739/2537 2742/2538 669/2384
+f 2630/2390 669/2384 2742/2538
+f 2741/2539 2742/2538 694/2540
+f 2740/2541 694/2540 2742/2538
+f 2729/2526 692/2527 2744/2542
+f 2743/2543 2744/2542 692/2527
+f 669/2384 2625/2385 2739/2537
+f 2744/2542 2739/2537 2625/2385
+f 656/2329 2745/2544 2587/2336
+f 2746/2545 2587/2336 2745/2544
+f 692/2527 2737/2535 2743/2543
+f 2746/2545 2743/2543 2737/2535
+f 694/2540 2740/2541 2747/2546
+f 2748/2547 2747/2546 2740/2541
+f 656/2329 2582/2331 2745/2544
+f 2748/2547 2745/2544 2582/2331
+f 673/2402 2749/2548 2643/2405
+f 2752/2549 2643/2405 2749/2548
+f 2752/2549 2750/2550 2751/2551
+f 696/2552 2751/2551 2750/2550
+f 2754/2553 2741/2539 2753/2554
+f 694/2540 2753/2554 2741/2539
+f 2639/2400 2754/2553 673/2402
+f 2749/2548 673/2402 2754/2553
+f 2755/2555 2756/2556 655/2326
+f 2584/2333 655/2326 2756/2556
+f 2753/2554 694/2540 2756/2556
+f 2747/2546 2756/2556 694/2540
+f 696/2552 2750/2550 2757/2557
+f 2758/2558 2757/2557 2750/2550
+f 655/2326 2578/2327 2755/2555
+f 2758/2558 2755/2555 2578/2327
+f 2759/2559 2761/2560 667/2362
+f 2608/2361 667/2362 2761/2560
+f 2731/2528 2761/2560 691/2529
+f 2760/2561 691/2529 2761/2560
+f 2763/2562 2751/2551 2762/2563
+f 696/2552 2762/2563 2751/2551
+f 667/2362 2618/2375 2759/2559
+f 2763/2562 2759/2559 2618/2375
+f 654/2317 2764/2564 2580/2328
+f 2765/2565 2580/2328 2764/2564
+f 696/2552 2757/2557 2762/2563
+f 2765/2565 2762/2563 2757/2557
+f 691/2529 2760/2561 2735/2534
+f 2766/2566 2735/2534 2760/2561
+f 654/2317 2573/2319 2764/2564
+f 2766/2566 2764/2564 2573/2319
+f 2767/2567 699/2568 2770/2569
+f 2768/2570 2770/2569 699/2568
+f 2769/2571 482/2572 2770/2569
+f 1883/2573 2770/2569 482/2572
+f 2774/2574 2771/2575 2772/2576
+f 700/2577 2772/2576 2771/2575
+f 699/2568 2773/2578 2768/2570
+f 2774/2574 2768/2570 2773/2578
+f 2778/2579 2775/2580 2776/2581
+f 701/2582 2776/2581 2775/2580
+f 2772/2576 700/2577 2778/2579
+f 2777/2583 2778/2579 700/2577
+f 2780/2584 1888/2585 2769/2571
+f 482/2572 2769/2571 1888/2585
+f 2776/2581 701/2582 2780/2584
+f 2779/2586 2780/2584 701/2582
+f 701/2582 2781/2587 2779/2586
+f 2783/2588 2779/2586 2781/2587
+f 624/2589 2428/2590 2782/2591
+f 2783/2588 2782/2591 2428/2590
+f 2785/2592 2786/2593 703/2594
+f 2784/2595 703/2594 2786/2593
+f 2775/2580 2786/2593 701/2582
+f 2781/2587 701/2582 2786/2593
+f 436/1214 2787/2596 1703/1221
+f 2789/2597 1703/1221 2787/2596
+f 703/2594 2788/2598 2785/2592
+f 2789/2597 2785/2592 2788/2598
+f 2782/2591 2790/2599 624/2589
+f 2431/2600 624/2589 2790/2599
+f 1698/1215 2790/2599 436/1214
+f 2787/2596 436/1214 2790/2599
+f 2791/2601 2793/2602 441/1229
+f 1709/1228 441/1229 2793/2602
+f 2409/2603 2793/2602 620/2604
+f 2792/2605 620/2604 2793/2602
+f 705/2606 2795/2607 2794/2608
+f 2796/2609 2794/2608 2795/2607
+f 441/1229 1718/1241 2791/2601
+f 2796/2609 2791/2601 1718/1241
+f 2797/2610 706/2611 2800/2612
+f 2798/2613 2800/2612 706/2611
+f 2799/2614 2800/2612 705/2606
+f 2795/2607 705/2606 2800/2612
+f 620/2604 2792/2605 2414/2615
+f 2802/2616 2414/2615 2792/2605
+f 706/2611 2801/2617 2798/2613
+f 2802/2616 2798/2613 2801/2617
+f 2803/2618 2805/2619 706/2611
+f 2801/2617 706/2611 2805/2619
+f 2805/2619 2804/2620 1872/2621
+f 479/2622 1872/2621 2804/2620
+f 2807/2623 2808/2624 708/2625
+f 2806/2626 708/2625 2808/2624
+f 2808/2624 2803/2618 2797/2610
+f 706/2611 2797/2610 2803/2618
+f 699/2568 2809/2627 2773/2578
+f 2811/2628 2773/2578 2809/2627
+f 2811/2628 2807/2623 2810/2629
+f 708/2625 2810/2629 2807/2623
+f 2804/2620 2812/2630 479/2622
+f 1877/2631 479/2622 2812/2630
+f 2812/2630 2809/2627 2767/2567
+f 699/2568 2767/2567 2809/2627
+f 2813/2632 2815/2633 703/2594
+f 2788/2598 703/2594 2815/2633
+f 445/1258 1731/1260 2814/2634
+f 2815/2633 2814/2634 1731/1260
+f 700/2577 2816/2635 2777/2583
+f 2817/2636 2777/2583 2816/2635
+f 703/2594 2784/2595 2813/2632
+f 2817/2636 2813/2632 2784/2595
+f 2819/2637 2820/2638 710/2639
+f 2818/2640 710/2639 2820/2638
+f 2771/2575 2820/2638 700/2577
+f 2816/2635 700/2577 2820/2638
+f 2822/2641 1737/1267 2814/2634
+f 445/1258 2814/2634 1737/1267
+f 710/2639 2821/2642 2819/2637
+f 2822/2641 2819/2637 2821/2642
+f 710/2639 2823/2643 2821/2642
+f 2825/2644 2821/2642 2823/2643
+f 1743/1273 2825/2644 448/1275
+f 2824/2645 448/1275 2825/2644
+f 2826/2646 2827/2647 708/2625
+f 2810/2629 708/2625 2827/2647
+f 2818/2640 2827/2647 710/2639
+f 2823/2643 710/2639 2827/2647
+f 705/2606 2828/2648 2799/2614
+f 2829/2649 2799/2614 2828/2648
+f 708/2625 2806/2626 2826/2646
+f 2829/2649 2826/2646 2806/2626
+f 2824/2645 2830/2650 448/1275
+f 1748/1279 448/1275 2830/2650
+f 2794/2608 2830/2650 705/2606
+f 2828/2648 705/2606 2830/2650
+f 2832/2651 2835/2652 713/2653
+f 2831/2654 713/2653 2835/2652
+f 2834/2655 2835/2652 715/2656
+f 2833/2657 715/2656 2835/2652
+f 2836/2658 714/2659 2839/2660
+f 2837/2661 2839/2660 714/2659
+f 713/2653 2838/2662 2832/2651
+f 2839/2660 2832/2651 2838/2662
+f 715/2656 2833/2657 2840/2663
+f 2842/2664 2840/2663 2833/2657
+f 714/2659 2841/2665 2837/2661
+f 2842/2664 2837/2661 2841/2665
+f 2844/2666 2846/2667 717/2668
+f 2843/2669 717/2668 2846/2667
+f 713/2653 2831/2654 2845/2670
+f 2846/2667 2845/2670 2831/2654
+f 519/1586 2847/2671 2006/1585
+f 2849/2672 2006/1585 2847/2671
+f 2849/2672 2844/2666 2848/2673
+f 717/2668 2848/2673 2844/2666
+f 2850/2674 2851/2675 497/1499
+f 1937/1504 497/1499 2851/2675
+f 2015/1596 2851/2675 519/1586
+f 2847/2671 519/1586 2851/2675
+f 718/2676 2853/2677 2852/2678
+f 2854/2679 2852/2678 2853/2677
+f 497/1499 1932/1500 2850/2674
+f 2854/2679 2850/2674 1932/1500
+f 2845/2670 2856/2680 713/2653
+f 2838/2662 713/2653 2856/2680
+f 2855/2681 2856/2680 718/2676
+f 2853/2677 718/2676 2856/2680
+f 715/2656 2857/2682 2834/2655
+f 2859/2683 2834/2655 2857/2682
+f 717/2668 2843/2669 2858/2684
+f 2859/2683 2858/2684 2843/2669
+f 2861/2685 2862/2686 720/2687
+f 2860/2688 720/2687 2862/2686
+f 2840/2663 2862/2686 715/2656
+f 2857/2682 715/2656 2862/2686
+f 2863/2689 2865/2690 534/1657
+f 2074/1668 534/1657 2865/2690
+f 2864/2691 2865/2690 720/2687
+f 2861/2685 720/2687 2865/2690
+f 545/1713 2866/2692 2115/1716
+f 2867/2693 2115/1716 2866/2692
+f 2863/2689 534/1657 2867/2693
+f 2067/1659 2867/2693 534/1657
+f 2858/2684 2868/2694 717/2668
+f 2848/2673 717/2668 2868/2694
+f 2111/1711 2868/2694 545/1713
+f 2866/2692 545/1713 2868/2694
+f 2869/2695 2872/2696 460/2697
+f 1794/2698 460/2697 2872/2696
+f 2871/2699 2872/2696 722/2700
+f 2870/2701 722/2700 2872/2696
+f 618/2083 2873/2702 2407/2089
+f 2874/2703 2407/2089 2873/2702
+f 2874/2703 2869/2695 1801/2704
+f 460/2697 1801/2704 2869/2695
+f 2864/2691 720/2687 2876/2705
+f 2875/2706 2876/2705 720/2687
+f 2404/2085 2876/2705 618/2083
+f 2873/2702 618/2083 2876/2705
+f 722/2700 2870/2701 2877/2707
+f 2878/2708 2877/2707 2870/2701
+f 720/2687 2860/2688 2875/2706
+f 2878/2708 2875/2706 2860/2688
+f 463/2709 2879/2710 1805/2711
+f 2881/2712 1805/2711 2879/2710
+f 637/2200 2485/2202 2880/2713
+f 2881/2712 2880/2713 2485/2202
+f 2883/2714 2884/2715 724/2716
+f 2882/2717 724/2716 2884/2715
+f 1812/2718 2884/2715 463/2709
+f 2879/2710 463/2709 2884/2715
+f 718/2676 2885/2719 2855/2681
+f 2887/2720 2855/2681 2885/2719
+f 724/2716 2886/2721 2883/2714
+f 2887/2720 2883/2714 2886/2721
+f 2880/2713 2888/2722 637/2200
+f 2490/2209 637/2200 2888/2722
+f 2852/2678 2888/2722 718/2676
+f 2885/2719 718/2676 2888/2722
+f 726/2723 2890/2724 2889/2725
+f 2892/2726 2889/2725 2890/2724
+f 714/2659 2836/2658 2891/2727
+f 2892/2726 2891/2727 2836/2658
+f 2894/2728 2896/2729 727/2730
+f 2893/2731 727/2730 2896/2729
+f 2895/2732 2896/2729 726/2723
+f 2890/2724 726/2723 2896/2729
+f 2898/2733 2900/2734 728/2735
+f 2897/2736 728/2735 2900/2734
+f 2894/2728 727/2730 2900/2734
+f 2899/2737 2900/2734 727/2730
+f 2841/2665 714/2659 2902/2738
+f 2891/2727 2902/2738 714/2659
+f 2902/2738 2898/2733 2901/2739
+f 728/2735 2901/2739 2898/2733
+f 2901/2739 728/2735 2905/2740
+f 2903/2741 2905/2740 728/2735
+f 2877/2707 2905/2740 722/2700
+f 2904/2742 722/2700 2905/2740
+f 730/2743 2907/2744 2906/2745
+f 2908/2746 2906/2745 2907/2744
+f 728/2735 2897/2736 2903/2741
+f 2908/2746 2903/2741 2897/2736
+f 2909/2747 2911/2748 469/2749
+f 1837/2750 469/2749 2911/2748
+f 2910/2751 2911/2748 730/2743
+f 2907/2744 730/2743 2911/2748
+f 722/2700 2904/2742 2871/2699
+f 2912/2752 2871/2699 2904/2742
+f 469/2749 1832/2753 2909/2747
+f 2912/2752 2909/2747 1832/2753
+f 473/2754 2913/2755 1843/2756
+f 2915/2757 1843/2756 2913/2755
+f 2915/2757 2914/2758 2882/2717
+f 724/2716 2882/2717 2914/2758
+f 2917/2759 2918/2760 732/2761
+f 2916/2762 732/2761 2918/2760
+f 1850/2763 2918/2760 473/2754
+f 2913/2755 473/2754 2918/2760
+f 2921/2764 2895/2732 2919/2765
+f 726/2723 2919/2765 2895/2732
+f 732/2761 2920/2766 2917/2759
+f 2921/2764 2917/2759 2920/2766
+f 2914/2758 2922/2767 724/2716
+f 2886/2721 724/2716 2922/2767
+f 2919/2765 726/2723 2922/2767
+f 2889/2725 2922/2767 726/2723
+f 2923/2768 2925/2769 727/2730
+f 2899/2737 727/2730 2925/2769
+f 2906/2745 2925/2769 730/2743
+f 2924/2770 730/2743 2925/2769
+f 2926/2771 2927/2772 732/2761
+f 2920/2766 732/2761 2927/2772
+f 2923/2768 727/2730 2927/2772
+f 2893/2731 2927/2772 727/2730
+f 475/2773 2928/2774 1860/2775
+f 2929/2776 1860/2775 2928/2774
+f 732/2761 2916/2762 2926/2771
+f 2929/2776 2926/2771 2916/2762
+f 730/2743 2924/2770 2910/2751
+f 2930/2777 2910/2751 2924/2770
+f 2930/2777 2928/2774 1856/2778
+f 475/2773 1856/2778 2928/2774
+f 738/6 6/2779 739/1
+f 735/2 739/1 6/2779
+f 193/2780 737/7 736/3
+f 739/1 736/3 737/7
+f 743/11 742/12 740/10
+f 14/2781 740/10 742/12
+f 741/9 193/2780 743/11
+f 736/3 743/11 193/2780
+f 747/14 746/17 744/16
+f 10/2782 744/16 746/17
+f 193/2780 741/9 745/13
+f 747/14 745/13 741/9
+f 16/2783 748/18 749/20
+f 750/19 749/20 748/18
+f 745/13 750/19 193/2780
+f 737/7 193/2780 750/19
+f 754/22 735/2 751/24
+f 6/2779 751/24 735/2
+f 197/2784 753/25 752/21
+f 754/22 752/21 753/25
+f 18/2785 755/28 757/30
+f 758/29 757/30 755/28
+f 752/21 758/29 197/2784
+f 756/27 197/2784 758/29
+f 762/33 761/35 759/31
+f 13/2786 759/31 761/35
+f 197/2784 756/27 760/34
+f 762/33 760/34 756/27
+f 763/37 14/2781 764/36
+f 742/12 764/36 14/2781
+f 760/34 764/36 197/2784
+f 753/25 197/2784 764/36
+f 1/2787 765/39 768/42
+f 769/38 768/42 765/39
+f 767/44 769/38 202/2788
+f 766/40 202/2788 769/38
+f 12/2789 770/48 772/49
+f 773/46 772/49 770/48
+f 202/2788 766/40 771/45
+f 773/46 771/45 766/40
+f 774/52 777/53 11/2790
+f 776/54 11/2790 777/53
+f 771/45 777/53 202/2788
+f 775/51 202/2788 777/53
+f 24/2791 778/55 779/57
+f 780/56 779/57 778/55
+f 202/2788 775/51 767/44
+f 780/56 767/44 775/51
+f 7/2792 781/61 784/62
+f 785/59 784/62 781/61
+f 206/2793 783/64 782/58
+f 785/59 782/58 783/64
+f 779/57 788/66 24/2791
+f 787/67 24/2791 788/66
+f 782/58 788/66 206/2793
+f 786/65 206/2793 788/66
+f 11/2790 789/71 774/52
+f 791/69 774/52 789/71
+f 206/2793 786/65 790/68
+f 791/69 790/68 786/65
+f 792/72 794/73 23/2794
+f 793/74 23/2794 794/73
+f 790/68 794/73 206/2793
+f 783/64 206/2793 794/73
+f 2/2795 795/78 798/80
+f 799/76 798/80 795/78
+f 211/2796 797/81 796/75
+f 799/76 796/75 797/81
+f 800/84 803/85 27/2797
+f 802/86 27/2797 803/85
+f 796/75 803/85 211/2796
+f 801/83 211/2796 803/85
+f 804/89 807/90 19/2798
+f 806/91 19/2798 807/90
+f 211/2796 801/83 805/88
+f 807/90 805/88 801/83
+f 20/2799 808/93 809/94
+f 810/92 809/94 808/93
+f 810/92 797/81 805/88
+f 211/2796 805/88 797/81
+f 768/42 814/96 1/2787
+f 813/98 1/2787 814/96
+f 812/99 814/96 215/2800
+f 811/95 215/2800 814/96
+f 24/2791 815/103 778/55
+f 817/101 778/55 815/103
+f 215/2800 811/95 816/100
+f 817/101 816/100 811/95
+f 818/106 821/107 15/2801
+f 820/108 15/2801 821/107
+f 816/100 821/107 215/2800
+f 819/105 215/2800 821/107
+f 16/2783 822/110 823/111
+f 824/109 823/111 822/110
+f 215/2800 819/105 812/99
+f 824/109 812/99 819/105
+f 828/113 827/114 738/6
+f 6/2779 738/6 827/114
+f 218/2802 826/116 825/112
+f 828/113 825/112 826/116
+f 16/2783 823/111 748/18
+f 830/118 748/18 823/111
+f 825/112 830/118 218/2802
+f 829/117 218/2802 830/118
+f 15/2801 831/122 818/106
+f 833/120 818/106 831/122
+f 218/2802 829/117 832/119
+f 833/120 832/119 829/117
+f 834/123 836/124 32/2803
+f 835/125 32/2803 836/124
+f 832/119 836/124 218/2802
+f 826/116 218/2802 836/124
+f 841/129 840/132 837/128
+f 5/2804 837/128 840/132
+f 838/127 222/2805 841/129
+f 839/130 841/129 222/2805
+f 32/2803 835/125 843/135
+f 844/134 843/135 835/125
+f 844/134 842/133 838/127
+f 222/2805 838/127 842/133
+f 831/122 15/2801 847/136
+f 845/137 847/136 15/2801
+f 842/133 847/136 222/2805
+f 846/138 222/2805 847/136
+f 849/142 30/2806 850/140
+f 848/141 850/140 30/2806
+f 222/2805 846/138 839/130
+f 850/140 839/130 846/138
+f 851/145 854/146 7/2792
+f 781/61 7/2792 854/146
+f 853/147 854/146 224/2807
+f 852/144 224/2807 854/146
+f 857/149 856/150 849/142
+f 30/2806 849/142 856/150
+f 855/148 224/2807 857/149
+f 852/144 857/149 224/2807
+f 15/2801 820/108 845/137
+f 859/151 845/137 820/108
+f 859/151 858/152 855/148
+f 224/2807 855/148 858/152
+f 24/2791 787/67 815/103
+f 860/153 815/103 787/67
+f 858/152 860/153 224/2807
+f 853/147 224/2807 860/153
+f 861/156 864/157 2/2795
+f 795/78 2/2795 864/157
+f 863/158 864/157 228/2808
+f 862/155 228/2808 864/157
+f 18/2785 865/162 867/163
+f 868/160 867/163 865/162
+f 228/2808 862/155 866/159
+f 868/160 866/159 862/155
+f 869/166 872/167 17/2809
+f 871/168 17/2809 872/167
+f 866/159 872/167 228/2808
+f 870/165 228/2808 872/167
+f 27/2797 802/86 873/170
+f 874/169 873/170 802/86
+f 228/2808 870/165 863/158
+f 874/169 863/158 870/165
+f 879/172 878/177 875/174
+f 8/2810 875/174 878/177
+f 232/2811 877/175 876/171
+f 879/172 876/171 877/175
+f 873/170 882/179 27/2797
+f 881/180 27/2797 882/179
+f 876/171 882/179 232/2811
+f 880/178 232/2811 882/179
+f 17/2809 883/184 869/166
+f 885/182 869/166 883/184
+f 232/2811 880/178 884/181
+f 885/182 884/181 880/178
+f 887/187 29/2812 888/185
+f 886/186 888/185 29/2812
+f 884/181 888/185 232/2811
+f 877/175 232/2811 888/185
+f 837/128 5/2804 892/188
+f 889/189 892/188 5/2804
+f 891/192 892/188 235/2813
+f 890/190 235/2813 892/188
+f 29/2812 887/187 894/195
+f 895/194 894/195 887/187
+f 235/2813 890/190 893/193
+f 895/194 893/193 890/190
+f 896/198 898/199 17/2809
+f 883/184 17/2809 898/199
+f 893/193 898/199 235/2813
+f 897/197 235/2813 898/199
+f 899/201 32/2803 900/200
+f 843/135 900/200 32/2803
+f 235/2813 897/197 891/192
+f 900/200 891/192 897/197
+f 751/24 6/2779 903/203
+f 827/114 903/203 6/2779
+f 236/2814 902/204 901/202
+f 903/203 901/202 902/204
+f 32/2803 899/201 834/123
+f 905/206 834/123 899/201
+f 901/202 905/206 236/2814
+f 904/205 236/2814 905/206
+f 17/2809 871/168 896/198
+f 907/208 896/198 871/168
+f 236/2814 904/205 906/207
+f 907/208 906/207 904/205
+f 18/2785 757/30 865/162
+f 908/209 865/162 757/30
+f 906/207 908/209 236/2814
+f 902/204 236/2814 908/209
+f 913/211 912/214 909/213
+f 3/2815 909/213 912/214
+f 241/2816 911/216 910/210
+f 913/211 910/210 911/216
+f 22/2817 914/219 916/221
+f 917/220 916/221 914/219
+f 915/218 241/2816 917/220
+f 910/210 917/220 241/2816
+f 21/2818 918/222 920/226
+f 921/224 920/226 918/222
+f 921/224 919/225 915/218
+f 241/2816 915/218 919/225
+f 31/2819 922/227 923/229
+f 924/228 923/229 922/227
+f 919/225 924/228 241/2816
+f 911/216 241/2816 924/228
+f 925/233 929/231 9/2820
+f 928/235 9/2820 929/231
+f 245/2821 927/234 926/230
+f 929/231 926/230 927/234
+f 923/229 932/238 31/2819
+f 931/239 31/2819 932/238
+f 926/230 932/238 245/2821
+f 930/237 245/2821 932/238
+f 21/2818 933/243 918/222
+f 935/241 918/222 933/243
+f 245/2821 930/237 934/240
+f 935/241 934/240 930/237
+f 938/244 937/246 936/245
+f 33/2822 936/245 937/246
+f 934/240 938/244 245/2821
+f 927/234 245/2821 938/244
+f 939/249 943/250 4/2823
+f 942/252 4/2823 943/250
+f 941/253 943/250 250/2824
+f 940/248 250/2824 943/250
+f 28/2825 944/257 946/258
+f 947/255 946/258 944/257
+f 250/2824 940/248 945/254
+f 947/255 945/254 940/248
+f 948/260 951/259 25/2826
+f 950/263 25/2826 951/259
+f 945/254 951/259 250/2824
+f 949/261 250/2824 951/259
+f 26/2827 952/264 953/266
+f 954/265 953/266 952/264
+f 954/265 941/253 949/261
+f 250/2824 949/261 941/253
+f 9/2820 928/235 957/269
+f 958/268 957/269 928/235
+f 253/2828 956/271 955/267
+f 958/268 955/267 956/271
+f 959/274 961/275 33/2822
+f 936/245 33/2822 961/275
+f 955/267 961/275 253/2828
+f 960/273 253/2828 961/275
+f 25/2826 950/263 963/278
+f 964/277 963/278 950/263
+f 253/2828 960/273 962/276
+f 964/277 962/276 960/273
+f 965/279 966/280 28/2825
+f 944/257 28/2825 966/280
+f 962/276 966/280 253/2828
+f 956/271 253/2828 966/280
+f 840/132 970/282 5/2804
+f 969/284 5/2804 970/282
+f 968/285 970/282 256/2829
+f 967/281 256/2829 970/282
+f 30/2806 971/289 848/141
+f 973/287 848/141 971/289
+f 256/2829 967/281 972/286
+f 973/287 972/286 967/281
+f 975/292 31/2819 976/290
+f 931/239 976/290 31/2819
+f 972/286 976/290 256/2829
+f 974/291 256/2829 976/290
+f 9/2820 977/294 925/233
+f 978/293 925/233 977/294
+f 256/2829 974/291 968/285
+f 978/293 968/285 974/291
+f 29/2812 979/298 886/186
+f 982/296 886/186 979/298
+f 982/296 980/295 981/299
+f 259/2830 981/299 980/295
+f 946/258 985/300 28/2825
+f 984/302 28/2825 985/300
+f 980/295 985/300 259/2830
+f 983/301 259/2830 985/300
+f 986/305 988/306 4/2823
+f 939/249 4/2823 988/306
+f 259/2830 983/301 987/304
+f 988/306 987/304 983/301
+f 878/177 990/307 8/2810
+f 989/308 8/2810 990/307
+f 981/299 259/2830 990/307
+f 987/304 990/307 259/2830
+f 23/2794 991/309 792/72
+f 994/311 792/72 991/309
+f 994/311 992/312 993/313
+f 262/2831 993/313 992/312
+f 22/2817 916/221 996/316
+f 997/314 996/316 916/221
+f 992/312 997/314 262/2831
+f 995/315 262/2831 997/314
+f 998/320 1000/318 3/2815
+f 909/213 3/2815 1000/318
+f 262/2831 995/315 999/317
+f 1000/318 999/317 995/315
+f 784/62 1002/321 7/2792
+f 1001/322 7/2792 1002/321
+f 993/313 262/2831 1002/321
+f 999/317 1002/321 262/2831
+f 28/2825 984/302 965/279
+f 1005/324 965/279 984/302
+f 263/2832 1004/325 1003/323
+f 1005/324 1003/323 1004/325
+f 894/195 1007/327 29/2812
+f 979/298 29/2812 1007/327
+f 1003/323 1007/327 263/2832
+f 1006/326 263/2832 1007/327
+f 5/2804 969/284 889/189
+f 1009/329 889/189 969/284
+f 263/2832 1006/326 1008/328
+f 1009/329 1008/328 1006/326
+f 957/269 1010/330 9/2820
+f 977/294 9/2820 1010/330
+f 1008/328 1010/330 263/2832
+f 1004/325 263/2832 1010/330
+f 856/150 1013/332 30/2806
+f 971/289 30/2806 1013/332
+f 1012/333 1013/332 264/2833
+f 1011/331 264/2833 1013/332
+f 7/2792 1001/322 851/145
+f 1015/335 851/145 1001/322
+f 264/2833 1011/331 1014/334
+f 1015/335 1014/334 1011/331
+f 912/214 1017/337 3/2815
+f 998/320 3/2815 1017/337
+f 1014/334 1017/337 264/2833
+f 1016/336 264/2833 1017/337
+f 31/2819 975/292 922/227
+f 1018/338 922/227 975/292
+f 264/2833 1016/336 1012/333
+f 1018/338 1012/333 1016/336
+f 23/2794 1019/339 991/309
+f 1022/341 991/309 1019/339
+f 1020/342 268/2834 1022/341
+f 1021/343 1022/341 268/2834
+f 35/2835 1023/344 1025/348
+f 1026/346 1025/348 1023/344
+f 1026/346 1024/347 1020/342
+f 268/2834 1020/342 1024/347
+f 1029/353 34/2836 1030/349
+f 1027/350 1030/349 34/2836
+f 1024/347 1030/349 268/2834
+f 1028/351 268/2834 1030/349
+f 1032/354 1031/355 996/316
+f 22/2817 996/316 1031/355
+f 268/2834 1028/351 1021/343
+f 1032/354 1021/343 1028/351
+f 1033/358 1036/359 35/2837
+f 1023/361 35/2837 1036/359
+f 1035/360 1036/359 272/2838
+f 1034/357 272/2838 1036/359
+f 37/2839 1037/366 1039/367
+f 1040/364 1039/367 1037/366
+f 272/2838 1034/357 1038/363
+f 1040/364 1038/363 1034/357
+f 1043/372 36/2840 1044/368
+f 1041/369 1044/368 36/2840
+f 1038/363 1044/368 272/2838
+f 1042/370 272/2838 1044/368
+f 1046/373 1045/375 1029/374
+f 34/2841 1029/374 1045/375
+f 272/2838 1042/370 1035/360
+f 1046/373 1035/360 1042/370
+f 35/2837 1047/379 1033/358
+f 1050/377 1033/358 1047/379
+f 276/2842 1049/380 1048/376
+f 1050/377 1048/376 1049/380
+f 1051/383 1054/384 40/2843
+f 1053/385 40/2843 1054/384
+f 1048/376 1054/384 276/2842
+f 1052/382 276/2842 1054/384
+f 41/2844 1055/389 1057/390
+f 1058/387 1057/390 1055/389
+f 276/2842 1052/382 1056/386
+f 1058/387 1056/386 1052/382
+f 1039/367 1060/391 37/2839
+f 1059/392 37/2839 1060/391
+f 1056/386 1060/391 276/2842
+f 1049/380 276/2842 1060/391
+f 37/2839 1059/392 1063/395
+f 1064/394 1063/395 1059/392
+f 280/2845 1062/397 1061/393
+f 1064/394 1061/393 1062/397
+f 1065/400 1067/401 41/2844
+f 1055/389 41/2844 1067/401
+f 1061/393 1067/401 280/2845
+f 1066/399 280/2845 1067/401
+f 1071/405 1070/406 1068/404
+f 42/2846 1068/404 1070/406
+f 280/2845 1066/399 1069/403
+f 1071/405 1069/403 1066/399
+f 38/2847 1072/407 1073/409
+f 1074/408 1073/409 1072/407
+f 1074/408 1062/397 1069/403
+f 280/2845 1069/403 1062/397
+f 1079/411 1078/416 1075/413
+f 43/2848 1075/413 1078/416
+f 285/2849 1077/414 1076/410
+f 1079/411 1076/410 1077/414
+f 1080/420 1083/418 48/2850
+f 1082/421 48/2850 1083/418
+f 285/2849 1076/410 1081/417
+f 1083/418 1081/417 1076/410
+f 1084/424 1087/425 45/2851
+f 1086/426 45/2851 1087/425
+f 285/2849 1081/417 1085/423
+f 1087/425 1085/423 1081/417
+f 1088/427 1090/428 46/2852
+f 1089/429 46/2852 1090/428
+f 285/2849 1085/423 1077/414
+f 1090/428 1077/414 1085/423
+f 1091/433 1095/431 44/2853
+f 1094/435 44/2853 1095/431
+f 290/2854 1093/434 1092/430
+f 1095/431 1092/430 1093/434
+f 1099/438 1098/441 1096/440
+f 50/2855 1096/440 1098/441
+f 290/2854 1092/430 1097/437
+f 1099/438 1097/437 1092/430
+f 1100/444 1103/445 47/2856
+f 1102/446 47/2856 1103/445
+f 1097/437 1103/445 290/2854
+f 1101/443 290/2854 1103/445
+f 1104/447 1106/448 49/2857
+f 1105/449 49/2857 1106/448
+f 290/2854 1101/443 1093/434
+f 1106/448 1093/434 1101/443
+f 18/2858 867/452 1109/455
+f 1110/453 1109/455 867/452
+f 1107/451 293/2859 1110/453
+f 1108/456 1110/453 293/2859
+f 1113/458 861/461 1111/460
+f 2/2860 1111/460 861/461
+f 293/2859 1107/451 1112/457
+f 1113/458 1112/457 1107/451
+f 1094/435 1116/463 44/2853
+f 1115/464 44/2853 1116/463
+f 1112/457 1116/463 293/2859
+f 1114/462 293/2859 1116/463
+f 1117/466 1118/465 49/2857
+f 1104/447 49/2857 1118/465
+f 293/2859 1114/462 1108/456
+f 1118/465 1108/456 1114/462
+f 20/2861 1119/470 1122/471
+f 1123/468 1122/471 1119/470
+f 297/2862 1121/473 1120/467
+f 1123/468 1120/467 1121/473
+f 13/2863 1124/475 1126/478
+f 1127/474 1126/478 1124/475
+f 1120/467 1127/474 297/2862
+f 1125/476 297/2862 1127/474
+f 1102/446 1130/480 47/2856
+f 1129/481 47/2856 1130/480
+f 297/2862 1125/476 1128/479
+f 1130/480 1128/479 1125/476
+f 1131/482 1132/483 50/2855
+f 1096/440 50/2855 1132/483
+f 1128/479 1132/483 297/2862
+f 1121/473 297/2862 1132/483
+f 18/2858 1109/455 755/486
+f 1135/485 755/486 1109/455
+f 298/2864 1134/488 1133/484
+f 1135/485 1133/484 1134/488
+f 1105/449 1137/490 49/2857
+f 1117/466 49/2857 1137/490
+f 1133/484 1137/490 298/2864
+f 1136/489 298/2864 1137/490
+f 1129/481 1139/492 47/2856
+f 1100/444 47/2856 1139/492
+f 298/2864 1136/489 1138/491
+f 1139/492 1138/491 1136/489
+f 13/2863 761/493 1124/475
+f 1140/494 1124/475 761/493
+f 1138/491 1140/494 298/2864
+f 1134/488 298/2864 1140/494
+f 20/2861 1122/471 808/498
+f 1143/495 808/498 1122/471
+f 1142/497 1143/495 299/2865
+f 1141/496 299/2865 1143/495
+f 1098/441 1145/501 50/2855
+f 1131/482 50/2855 1145/501
+f 299/2865 1141/496 1144/500
+f 1145/501 1144/500 1141/496
+f 1115/464 1147/503 44/2853
+f 1091/433 44/2853 1147/503
+f 1144/500 1147/503 299/2865
+f 1146/502 299/2865 1147/503
+f 1148/504 1111/460 798/505
+f 2/2860 798/505 1111/460
+f 1148/504 1142/497 1146/502
+f 299/2865 1146/502 1142/497
+f 16/2866 1149/508 822/511
+f 1152/509 822/511 1149/508
+f 1151/512 1152/509 302/2867
+f 1150/507 302/2867 1152/509
+f 1082/421 1155/514 48/2850
+f 1154/515 48/2850 1155/514
+f 302/2867 1150/507 1153/513
+f 1155/514 1153/513 1150/507
+f 1156/518 1158/519 43/2848
+f 1075/413 43/2848 1158/519
+f 1153/513 1158/519 302/2867
+f 1157/517 302/2867 1158/519
+f 1160/520 1159/522 813/521
+f 1/2868 813/521 1159/522
+f 302/2867 1157/517 1151/512
+f 1160/520 1151/512 1157/517
+f 772/525 1164/526 12/2869
+f 1163/528 12/2869 1164/526
+f 1161/524 304/2870 1164/526
+f 1162/527 1164/526 304/2870
+f 1159/522 1166/531 1/2868
+f 765/532 1/2868 1166/531
+f 1166/531 1165/530 1161/524
+f 304/2870 1161/524 1165/530
+f 1078/416 1168/534 43/2848
+f 1156/518 43/2848 1168/534
+f 1165/530 1168/534 304/2870
+f 1167/533 304/2870 1168/534
+f 1169/536 1170/535 46/2852
+f 1088/427 46/2852 1170/535
+f 304/2870 1167/533 1162/527
+f 1170/535 1162/527 1167/533
+f 1171/539 1174/540 10/2871
+f 744/542 10/2871 1174/540
+f 1173/543 1174/540 306/2872
+f 1172/538 306/2872 1174/540
+f 1086/426 1177/545 45/2851
+f 1176/546 45/2851 1177/545
+f 306/2872 1172/538 1175/544
+f 1177/545 1175/544 1172/538
+f 1154/515 1179/548 48/2850
+f 1080/420 48/2850 1179/548
+f 1175/544 1179/548 306/2872
+f 1178/547 306/2872 1179/548
+f 16/2866 749/550 1149/508
+f 1180/549 1149/508 749/550
+f 306/2872 1178/547 1173/543
+f 1180/549 1173/543 1178/547
+f 12/2869 1163/528 1183/553
+f 1184/552 1183/553 1163/528
+f 308/2873 1182/555 1181/551
+f 1184/552 1181/551 1182/555
+f 1089/429 1186/557 46/2852
+f 1169/536 46/2852 1186/557
+f 1181/551 1186/557 308/2873
+f 1185/556 308/2873 1186/557
+f 1176/546 1188/559 45/2851
+f 1084/424 45/2851 1188/559
+f 308/2873 1185/556 1187/558
+f 1188/559 1187/558 1185/556
+f 10/2871 1189/560 1171/539
+f 1190/561 1171/539 1189/560
+f 1187/558 1190/561 308/2873
+f 1182/555 308/2873 1190/561
+f 38/2847 1191/565 1072/407
+f 1194/563 1072/407 1191/565
+f 312/2874 1193/566 1192/562
+f 1194/563 1192/562 1193/566
+f 51/2875 1195/567 1197/571
+f 1198/569 1197/571 1195/567
+f 312/2874 1192/562 1196/570
+f 1198/569 1196/570 1192/562
+f 1201/576 53/2876 1202/572
+f 1199/573 1202/572 53/2876
+f 312/2874 1196/570 1200/574
+f 1202/572 1200/574 1196/570
+f 1205/578 1204/579 1043/372
+f 36/2840 1043/372 1204/579
+f 312/2874 1200/574 1203/577
+f 1205/578 1203/577 1200/574
+f 1063/395 1206/580 37/2839
+f 1037/366 37/2839 1206/580
+f 1203/577 1206/580 312/2874
+f 1193/566 312/2874 1206/580
+f 52/2877 1207/584 1210/585
+f 1211/582 1210/585 1207/584
+f 1211/582 1208/581 1209/587
+f 317/2878 1209/587 1208/581
+f 1212/590 1215/591 53/2879
+f 1214/592 53/2879 1215/591
+f 1208/581 1215/591 317/2878
+f 1213/589 317/2878 1215/591
+f 1216/595 1219/596 57/2880
+f 1218/597 57/2880 1219/596
+f 1217/594 317/2878 1219/596
+f 1213/589 1219/596 317/2878
+f 1222/598 1221/600 1220/599
+f 56/2881 1220/599 1221/600
+f 317/2878 1217/594 1209/587
+f 1222/598 1209/587 1217/594
+f 1225/602 1212/590 1201/604
+f 53/2879 1201/604 1212/590
+f 320/2882 1224/605 1223/601
+f 1225/602 1223/601 1224/605
+f 51/2883 1226/608 1195/610
+f 1228/609 1195/610 1226/608
+f 1227/607 320/2882 1228/609
+f 1223/601 1228/609 320/2882
+f 1232/612 1231/615 1229/614
+f 54/2884 1229/614 1231/615
+f 320/2882 1227/607 1230/611
+f 1232/612 1230/611 1227/607
+f 1218/597 1234/616 57/2880
+f 1233/617 57/2880 1234/616
+f 1230/611 1234/616 320/2882
+f 1224/605 320/2882 1234/616
+f 1238/619 1229/614 1235/621
+f 54/2884 1235/621 1229/614
+f 323/2885 1237/622 1236/618
+f 1238/619 1236/618 1237/622
+f 1239/625 1242/626 55/2886
+f 1241/627 55/2886 1242/626
+f 1236/618 1242/626 323/2885
+f 1240/624 323/2885 1242/626
+f 1221/600 1245/629 56/2881
+f 1244/630 56/2881 1245/629
+f 323/2885 1240/624 1243/628
+f 1245/629 1243/628 1240/624
+f 1233/617 1246/631 57/2880
+f 1216/595 57/2880 1246/631
+f 1243/628 1246/631 323/2885
+f 1237/622 323/2885 1246/631
+f 1247/634 1251/635 69/2887
+f 1250/637 69/2887 1251/635
+f 1249/638 1251/635 327/2888
+f 1248/633 327/2888 1251/635
+f 76/2889 1252/642 1254/643
+f 1255/640 1254/643 1252/642
+f 327/2888 1248/633 1253/639
+f 1255/640 1253/639 1248/633
+f 920/226 1258/645 21/2818
+f 1257/646 21/2818 1258/645
+f 1253/639 1258/645 327/2888
+f 1256/644 327/2888 1258/645
+f 22/2817 1259/648 914/219
+f 1260/647 914/219 1259/648
+f 327/2888 1256/644 1249/638
+f 1260/647 1249/638 1256/644
+f 1264/654 77/2890 1265/649
+f 1261/650 1265/649 77/2890
+f 1263/655 1265/649 331/2891
+f 1262/651 331/2891 1265/649
+f 1269/657 1268/660 1266/659
+f 80/2892 1266/659 1268/660
+f 331/2891 1262/651 1267/656
+f 1269/657 1267/656 1262/651
+f 963/278 1272/662 25/2826
+f 1271/663 25/2826 1272/662
+f 1270/661 331/2891 1272/662
+f 1267/656 1272/662 331/2891
+f 33/2822 1273/665 959/274
+f 1274/664 959/274 1273/665
+f 331/2891 1270/661 1263/655
+f 1274/664 1263/655 1270/661
+f 1266/659 80/2892 1278/666
+f 1275/667 1278/666 80/2892
+f 1277/670 1278/666 334/2893
+f 1276/668 334/2893 1278/666
+f 1282/672 1281/675 1279/674
+f 81/2894 1279/674 1281/675
+f 334/2893 1276/668 1280/671
+f 1282/672 1280/671 1276/668
+f 1284/678 26/2827 1285/676
+f 953/266 1285/676 26/2827
+f 1280/671 1285/676 334/2893
+f 1283/677 334/2893 1285/676
+f 25/2826 1271/663 948/260
+f 1286/679 948/260 1271/663
+f 1286/679 1277/670 1283/677
+f 334/2893 1283/677 1277/670
+f 1287/682 1291/683 67/2895
+f 1290/685 67/2895 1291/683
+f 1289/686 1291/683 338/2896
+f 1288/681 338/2896 1291/683
+f 1295/688 1294/691 1292/690
+f 68/2897 1292/690 1294/691
+f 338/2896 1288/681 1293/687
+f 1295/688 1293/687 1288/681
+f 1297/696 12/2789 1298/692
+f 1183/693 1298/692 12/2789
+f 1293/687 1298/692 338/2896
+f 1296/694 338/2896 1298/692
+f 1300/697 1189/699 1299/698
+f 10/2782 1299/698 1189/699
+f 338/2896 1296/694 1289/686
+f 1300/697 1289/686 1296/694
+f 78/2898 1301/703 1304/704
+f 1305/701 1304/704 1301/703
+f 1305/701 1302/700 1303/706
+f 342/2899 1303/706 1302/700
+f 1306/709 1309/710 74/2900
+f 1308/711 74/2900 1309/710
+f 1307/708 342/2899 1309/710
+f 1302/700 1309/710 342/2899
+f 1312/713 1311/716 1126/715
+f 13/2786 1126/715 1311/716
+f 342/2899 1307/708 1310/712
+f 1312/713 1310/712 1307/708
+f 1313/717 1314/718 20/2799
+f 1119/719 20/2799 1314/718
+f 1303/706 342/2899 1314/718
+f 1310/712 1314/718 342/2899
+f 1315/721 1318/720 76/2889
+f 1252/642 76/2889 1318/720
+f 1317/724 1318/720 344/2901
+f 1316/722 344/2901 1318/720
+f 1321/726 1320/727 1264/654
+f 77/2890 1264/654 1320/727
+f 344/2901 1316/722 1319/725
+f 1321/726 1319/725 1316/722
+f 937/246 1323/729 33/2822
+f 1273/665 33/2822 1323/729
+f 1322/728 344/2901 1323/729
+f 1319/725 1323/729 344/2901
+f 21/2818 1257/646 933/243
+f 1324/730 933/243 1257/646
+f 344/2901 1322/728 1317/724
+f 1324/730 1317/724 1322/728
+f 1328/736 70/2902 1329/734
+f 1325/733 1329/734 70/2902
+f 1326/732 347/2903 1329/734
+f 1327/737 1329/734 347/2903
+f 69/2887 1250/637 1331/740
+f 1332/739 1331/740 1250/637
+f 347/2903 1326/732 1330/738
+f 1332/739 1330/738 1326/732
+f 1031/355 1334/742 22/2817
+f 1259/648 22/2817 1334/742
+f 1330/738 1334/742 347/2903
+f 1333/741 347/2903 1334/742
+f 34/2836 1335/743 1027/350
+f 1336/744 1027/350 1335/743
+f 347/2903 1333/741 1327/737
+f 1336/744 1327/737 1333/741
+f 1340/750 71/2904 1341/745
+f 1337/746 1341/745 71/2904
+f 1339/751 1341/745 350/2905
+f 1338/747 350/2905 1341/745
+f 1344/753 1343/756 1328/755
+f 70/2906 1328/755 1343/756
+f 350/2905 1338/747 1342/752
+f 1344/753 1342/752 1338/747
+f 1045/759 1346/760 34/2907
+f 1335/761 34/2907 1346/760
+f 1345/758 350/2905 1346/760
+f 1342/752 1346/760 350/2905
+f 36/2908 1347/762 1041/764
+f 1348/763 1041/764 1347/762
+f 1348/763 1339/751 1345/758
+f 350/2905 1345/758 1339/751
+f 59/2909 1349/766 1352/770
+f 1353/765 1352/770 1349/766
+f 1351/771 1353/765 355/2910
+f 1350/767 355/2910 1353/765
+f 65/2911 1354/775 1356/776
+f 1357/773 1356/776 1354/775
+f 355/2910 1350/767 1355/772
+f 1357/773 1355/772 1350/767
+f 1358/779 1361/780 39/2912
+f 1360/781 39/2912 1361/780
+f 1355/772 1361/780 355/2910
+f 1359/778 355/2910 1361/780
+f 38/2847 1362/783 1363/784
+f 1364/782 1363/784 1362/783
+f 355/2910 1359/778 1351/771
+f 1364/782 1351/771 1359/778
+f 1368/790 60/2913 1369/787
+f 1365/785 1369/787 60/2913
+f 358/2914 1367/789 1366/788
+f 1369/787 1366/788 1367/789
+f 1372/793 1371/794 1352/770
+f 59/2909 1352/770 1371/794
+f 1370/792 358/2914 1372/793
+f 1366/788 1372/793 358/2914
+f 38/2847 1073/409 1362/783
+f 1374/795 1362/783 1073/409
+f 1374/795 1373/796 1370/792
+f 358/2914 1370/792 1373/796
+f 1375/798 1376/797 42/2846
+f 1068/404 42/2846 1376/797
+f 1373/796 1376/797 358/2914
+f 1367/789 358/2914 1376/797
+f 1380/804 79/2915 1381/799
+f 1377/800 1381/799 79/2915
+f 1379/805 1381/799 361/2916
+f 1378/801 361/2916 1381/799
+f 1383/808 78/2898 1384/806
+f 1304/704 1384/806 78/2898
+f 361/2916 1378/801 1382/807
+f 1384/806 1382/807 1378/801
+f 20/2799 809/94 1313/717
+f 1386/810 1313/717 809/94
+f 1385/809 361/2916 1386/810
+f 1382/807 1386/810 361/2916
+f 19/2798 1387/812 804/89
+f 1388/811 804/89 1387/812
+f 1388/811 1379/805 1385/809
+f 361/2916 1385/809 1379/805
+f 68/2897 1389/816 1292/690
+f 1392/814 1292/690 1389/816
+f 364/2917 1391/817 1390/813
+f 1392/814 1390/813 1391/817
+f 1393/820 1396/821 58/2918
+f 1395/822 58/2918 1396/821
+f 1394/819 364/2917 1396/821
+f 1390/813 1396/821 364/2917
+f 11/2790 776/54 1398/825
+f 1399/824 1398/825 776/54
+f 364/2917 1394/819 1397/823
+f 1399/824 1397/823 1394/819
+f 770/48 12/2789 1400/826
+f 1297/696 1400/826 12/2789
+f 1397/823 1400/826 364/2917
+f 1391/817 364/2917 1400/826
+f 1404/828 1393/820 1401/830
+f 58/2918 1401/830 1393/820
+f 367/2919 1403/831 1402/827
+f 1404/828 1402/827 1403/831
+f 1405/834 1408/835 61/2920
+f 1407/836 61/2920 1408/835
+f 1406/833 367/2919 1408/835
+f 1402/827 1408/835 367/2919
+f 23/2794 793/74 1410/839
+f 1411/838 1410/839 793/74
+f 367/2919 1406/833 1409/837
+f 1411/838 1409/837 1406/833
+f 1398/825 1412/840 11/2790
+f 789/71 11/2790 1412/840
+f 1409/837 1412/840 367/2919
+f 1403/831 367/2919 1412/840
+f 1413/844 1416/842 61/2920
+f 1405/834 61/2920 1416/842
+f 370/2921 1415/845 1414/841
+f 1416/842 1414/841 1415/845
+f 1420/847 1419/850 1417/849
+f 62/2922 1417/849 1419/850
+f 370/2921 1414/841 1418/846
+f 1420/847 1418/846 1414/841
+f 35/2835 1025/348 1422/853
+f 1423/852 1422/853 1025/348
+f 1421/851 370/2921 1423/852
+f 1418/846 1423/852 370/2921
+f 1410/839 1424/854 23/2794
+f 1019/339 23/2794 1424/854
+f 1421/851 1424/854 370/2921
+f 1415/845 370/2921 1424/854
+f 1425/857 1428/858 62/2923
+f 1417/860 62/2923 1428/858
+f 1427/861 1428/858 373/2924
+f 1426/856 373/2924 1428/858
+f 1429/864 1432/865 63/2925
+f 1431/866 63/2925 1432/865
+f 373/2924 1426/856 1430/863
+f 1432/865 1430/863 1426/856
+f 1435/868 1434/869 1053/385
+f 40/2843 1053/385 1434/869
+f 1433/867 373/2924 1435/868
+f 1430/863 1435/868 373/2924
+f 35/2837 1422/871 1047/379
+f 1436/870 1047/379 1422/871
+f 1436/870 1427/861 1433/867
+f 373/2924 1433/867 1427/861
+f 1440/873 1429/864 1437/875
+f 63/2925 1437/875 1429/864
+f 376/2926 1439/876 1438/872
+f 1440/873 1438/872 1439/876
+f 1441/879 1444/880 64/2927
+f 1443/881 64/2927 1444/880
+f 1438/872 1444/880 376/2926
+f 1442/878 376/2926 1444/880
+f 41/2844 1057/390 1446/884
+f 1447/883 1446/884 1057/390
+f 376/2926 1442/878 1445/882
+f 1447/883 1445/882 1442/878
+f 1434/869 1448/885 40/2843
+f 1051/383 40/2843 1448/885
+f 1445/882 1448/885 376/2926
+f 1439/876 376/2926 1448/885
+f 1452/886 1441/879 1449/887
+f 64/2927 1449/887 1441/879
+f 1451/890 1452/886 378/2928
+f 1450/888 378/2928 1452/886
+f 1455/892 1454/893 1368/790
+f 60/2913 1368/790 1454/893
+f 378/2928 1450/888 1453/891
+f 1455/892 1453/891 1450/888
+f 1070/406 1457/895 42/2846
+f 1375/798 42/2846 1457/895
+f 1453/891 1457/895 378/2928
+f 1456/894 378/2928 1457/895
+f 41/2844 1446/884 1065/400
+f 1458/896 1065/400 1446/884
+f 1458/896 1451/890 1456/894
+f 378/2928 1456/894 1451/890
+f 1462/897 1461/899 1340/750
+f 71/2904 1340/750 1461/899
+f 381/2929 1460/901 1459/898
+f 1462/897 1459/898 1460/901
+f 1347/762 36/2908 1464/902
+f 1204/903 1464/902 36/2908
+f 381/2929 1459/898 1463/904
+f 1464/902 1463/904 1459/898
+f 53/2930 1214/909 1199/910
+f 1466/907 1199/910 1214/909
+f 381/2929 1463/904 1465/906
+f 1466/907 1465/906 1463/904
+f 1467/913 1469/914 52/2931
+f 1207/915 52/2931 1469/914
+f 381/2929 1465/906 1468/912
+f 1469/914 1468/912 1465/906
+f 1470/916 1472/917 72/2932
+f 1471/918 72/2932 1472/917
+f 381/2929 1468/912 1460/901
+f 1472/917 1460/901 1468/912
+f 38/2847 1363/784 1191/565
+f 1475/919 1191/565 1363/784
+f 1475/919 1473/920 1474/921
+f 383/2933 1474/921 1473/920
+f 39/2912 1476/925 1358/779
+f 1478/923 1358/779 1476/925
+f 1478/923 1477/922 1473/920
+f 383/2933 1473/920 1477/922
+f 1197/571 1480/926 51/2875
+f 1479/927 51/2875 1480/926
+f 1477/922 1480/926 383/2933
+f 1474/921 383/2933 1480/926
+f 1485/931 1484/933 1481/930
+f 85/2934 1481/930 1484/933
+f 1483/934 1485/931 388/2935
+f 1482/929 388/2935 1485/931
+f 1489/936 1488/939 1486/938
+f 83/2936 1486/938 1488/939
+f 388/2935 1482/929 1487/935
+f 1489/936 1487/935 1482/929
+f 1490/942 1493/943 84/2937
+f 1492/944 84/2937 1493/943
+f 1487/935 1493/943 388/2935
+f 1491/941 388/2935 1493/943
+f 1496/945 1495/947 1494/946
+f 82/2938 1494/946 1495/947
+f 388/2935 1491/941 1483/934
+f 1496/945 1483/934 1491/941
+f 1497/949 1501/948 92/2939
+f 1500/953 92/2939 1501/948
+f 1499/954 1501/948 393/2940
+f 1498/950 393/2940 1501/948
+f 1505/956 1504/959 1502/958
+f 89/2941 1502/958 1504/959
+f 393/2940 1498/950 1503/955
+f 1505/956 1503/955 1498/950
+f 1509/961 1508/964 1506/963
+f 88/2942 1506/963 1508/964
+f 1507/960 393/2940 1509/961
+f 1503/955 1509/961 393/2940
+f 93/2943 1510/966 1511/967
+f 1512/965 1511/967 1510/966
+f 1512/965 1499/954 1507/960
+f 393/2940 1507/960 1499/954
+f 93/2943 1513/968 1516/972
+f 1517/970 1516/972 1513/968
+f 1517/970 1514/971 1515/974
+f 398/2944 1515/974 1514/971
+f 1521/975 1520/979 1518/976
+f 86/2945 1518/976 1520/979
+f 1514/971 1521/975 398/2944
+f 1519/977 398/2944 1521/975
+f 1525/981 1524/984 1522/983
+f 87/2946 1522/983 1524/984
+f 398/2944 1519/977 1523/980
+f 1525/981 1523/980 1519/977
+f 1526/985 1528/986 94/2947
+f 1527/987 94/2947 1528/986
+f 1515/974 398/2944 1528/986
+f 1523/980 1528/986 398/2944
+f 89/2941 1529/990 1502/958
+f 1532/991 1502/958 1529/990
+f 1530/989 402/2948 1532/991
+f 1531/992 1532/991 402/2948
+f 1535/997 90/2949 1536/994
+f 1533/996 1536/994 90/2949
+f 1530/989 1536/994 402/2948
+f 1534/993 402/2948 1536/994
+f 1539/1002 91/2950 1540/998
+f 1537/999 1540/998 91/2950
+f 1534/993 1540/998 402/2948
+f 1538/1000 402/2948 1540/998
+f 1542/1003 1541/1004 1508/964
+f 88/2942 1508/964 1541/1004
+f 402/2948 1538/1000 1531/992
+f 1542/1003 1531/992 1538/1000
+f 1543/1007 1546/1008 90/2949
+f 1533/996 90/2949 1546/1008
+f 1544/1006 405/2951 1546/1008
+f 1545/1009 1546/1008 405/2951
+f 1524/984 1549/1011 87/2946
+f 1548/1012 87/2946 1549/1011
+f 1547/1010 405/2951 1549/1011
+f 1544/1006 1549/1011 405/2951
+f 1518/976 86/2945 1552/1014
+f 1550/1016 1552/1014 86/2945
+f 405/2951 1547/1010 1551/1013
+f 1552/1014 1551/1013 1547/1010
+f 1554/1017 1553/1018 1539/1002
+f 91/2950 1539/1002 1553/1018
+f 1545/1009 405/2951 1554/1017
+f 1551/1013 1554/1017 405/2951
+f 92/2939 1500/953 1557/1022
+f 1558/1020 1557/1022 1500/953
+f 408/2952 1556/1023 1555/1019
+f 1558/1020 1555/1019 1556/1023
+f 93/2943 1516/972 1510/966
+f 1560/1025 1510/966 1516/972
+f 1555/1019 1560/1025 408/2952
+f 1559/1024 408/2952 1560/1025
+f 1561/1028 1563/1029 94/2947
+f 1526/985 94/2947 1563/1029
+f 1562/1027 408/2952 1563/1029
+f 1559/1024 1563/1029 408/2952
+f 95/2953 1564/1031 1565/1032
+f 1566/1030 1565/1032 1564/1031
+f 408/2952 1562/1027 1556/1023
+f 1566/1030 1556/1023 1562/1027
+f 1570/1034 1569/1039 1557/1036
+f 92/2954 1557/1036 1569/1039
+f 411/2955 1568/1037 1567/1033
+f 1570/1034 1567/1033 1568/1037
+f 1571/1042 1573/1043 95/2956
+f 1564/1044 95/2956 1573/1043
+f 411/2955 1567/1033 1572/1041
+f 1573/1043 1572/1041 1567/1033
+f 55/2886 1241/627 1575/1047
+f 1576/1046 1575/1047 1241/627
+f 411/2955 1572/1041 1574/1045
+f 1576/1046 1574/1045 1572/1041
+f 1231/615 1578/1049 54/2884
+f 1235/621 54/2884 1578/1049
+f 1574/1045 1578/1049 411/2955
+f 1577/1048 411/2955 1578/1049
+f 51/2883 1579/1051 1226/608
+f 1580/1050 1226/608 1579/1051
+f 411/2955 1577/1048 1568/1037
+f 1580/1050 1568/1037 1577/1048
+f 94/2957 1581/1052 1561/1056
+f 1584/1054 1561/1056 1581/1052
+f 1584/1054 1582/1055 1583/1058
+f 413/2958 1583/1058 1582/1055
+f 1586/1061 52/2877 1587/1059
+f 1210/585 1587/1059 52/2877
+f 1585/1060 413/2958 1587/1059
+f 1582/1055 1587/1059 413/2958
+f 1244/630 1589/1063 56/2881
+f 1220/599 56/2881 1589/1063
+f 413/2958 1585/1060 1588/1062
+f 1589/1063 1588/1062 1585/1060
+f 1575/1047 1591/1064 55/2886
+f 1239/625 55/2886 1591/1064
+f 413/2958 1588/1062 1590/1065
+f 1591/1064 1590/1065 1588/1062
+f 1571/1042 95/2956 1592/1067
+f 1565/1066 1592/1067 95/2956
+f 1590/1065 1592/1067 413/2958
+f 1583/1058 413/2958 1592/1067
+f 1593/1070 1596/1071 87/2959
+f 1522/1073 87/2959 1596/1071
+f 416/2960 1595/1074 1594/1069
+f 1596/1071 1594/1069 1595/1074
+f 1597/1077 1600/1078 73/2961
+f 1599/1079 73/2961 1600/1078
+f 416/2960 1594/1069 1598/1076
+f 1600/1078 1598/1076 1594/1069
+f 72/2932 1471/918 1602/1082
+f 1603/1081 1602/1082 1471/918
+f 416/2960 1598/1076 1601/1080
+f 1603/1081 1601/1080 1598/1076
+f 1605/1083 1467/913 1586/1084
+f 52/2931 1586/1084 1467/913
+f 416/2960 1601/1080 1604/1085
+f 1605/1083 1604/1085 1601/1080
+f 94/2962 1527/1087 1581/1089
+f 1606/1088 1581/1089 1527/1087
+f 1606/1088 1595/1074 1604/1085
+f 416/2960 1604/1085 1595/1074
+f 1611/1091 1610/1096 1607/1093
+f 66/2963 1607/1093 1610/1096
+f 420/2964 1609/1094 1608/1090
+f 1611/1091 1608/1090 1609/1094
+f 1614/1098 1613/1101 1535/1100
+f 90/2965 1535/1100 1613/1101
+f 420/2964 1608/1090 1612/1097
+f 1614/1098 1612/1097 1608/1090
+f 1615/1104 1617/1105 89/2966
+f 1529/1106 89/2966 1617/1105
+f 420/2964 1612/1097 1616/1103
+f 1617/1105 1616/1103 1612/1097
+f 39/2912 1360/781 1619/1109
+f 1620/1108 1619/1109 1360/781
+f 420/2964 1616/1103 1618/1107
+f 1620/1108 1618/1107 1616/1103
+f 1621/1110 1622/1111 65/2911
+f 1354/775 65/2911 1622/1111
+f 1618/1107 1622/1111 420/2964
+f 1609/1094 420/2964 1622/1111
+f 73/2961 1599/1079 1625/1114
+f 1626/1113 1625/1114 1599/1079
+f 1626/1113 1623/1112 1624/1116
+f 422/2967 1624/1116 1623/1112
+f 87/2959 1548/1117 1593/1070
+f 1628/1119 1593/1070 1548/1117
+f 422/2967 1623/1112 1627/1120
+f 1628/1119 1627/1120 1623/1112
+f 1630/1122 1543/1125 1613/1124
+f 90/2968 1613/1124 1543/1125
+f 422/2967 1627/1120 1629/1121
+f 1630/1122 1629/1121 1627/1120
+f 66/2969 1631/1127 1607/1128
+f 1632/1126 1607/1128 1631/1127
+f 1624/1116 422/2967 1632/1126
+f 1629/1121 1632/1126 422/2967
+f 1619/1109 1635/1130 39/2912
+f 1476/925 39/2912 1635/1130
+f 1634/1131 1635/1130 423/2970
+f 1633/1129 423/2970 1635/1130
+f 1637/1135 1615/1104 1504/1134
+f 89/2966 1504/1134 1615/1104
+f 423/2970 1633/1129 1636/1133
+f 1637/1135 1636/1133 1633/1129
+f 92/2971 1569/1139 1497/1140
+f 1639/1137 1497/1140 1569/1139
+f 423/2970 1636/1133 1638/1136
+f 1639/1137 1638/1136 1636/1133
+f 51/2875 1479/927 1579/1142
+f 1640/1141 1579/1142 1479/927
+f 423/2970 1638/1136 1634/1131
+f 1640/1141 1634/1131 1638/1136
+f 1506/963 88/2942 1644/1143
+f 1641/1144 1644/1143 88/2942
+f 1643/1147 1644/1143 426/2972
+f 1642/1145 426/2972 1644/1143
+f 1488/939 1647/1149 83/2936
+f 1646/1150 83/2936 1647/1149
+f 1642/1145 1647/1149 426/2972
+f 1645/1148 426/2972 1647/1149
+f 1648/1153 1650/1154 85/2934
+f 1481/930 85/2934 1650/1154
+f 1649/1152 426/2972 1650/1154
+f 1645/1148 1650/1154 426/2972
+f 93/2943 1511/967 1651/1156
+f 1652/1155 1651/1156 1511/967
+f 426/2972 1649/1152 1643/1147
+f 1652/1155 1643/1147 1649/1152
+f 91/2950 1653/1157 1537/999
+f 1656/1159 1537/999 1653/1157
+f 1656/1159 1654/1160 1655/1161
+f 428/2973 1655/1161 1654/1160
+f 1492/944 1659/1163 84/2937
+f 1658/1164 84/2937 1659/1163
+f 1654/1160 1659/1163 428/2973
+f 1657/1162 428/2973 1659/1163
+f 1646/1150 1661/1166 83/2936
+f 1486/938 83/2936 1661/1166
+f 1657/1162 1661/1166 428/2973
+f 1660/1165 428/2973 1661/1166
+f 1541/1004 1662/1167 88/2942
+f 1641/1144 88/2942 1662/1167
+f 1662/1167 1655/1161 1660/1165
+f 428/2973 1660/1165 1655/1161
+f 86/2945 1663/1169 1550/1016
+f 1666/1168 1550/1016 1663/1169
+f 1665/1172 1666/1168 430/2974
+f 1664/1170 430/2974 1666/1168
+f 1668/1175 82/2938 1669/1174
+f 1495/947 1669/1174 82/2938
+f 1664/1170 1669/1174 430/2974
+f 1667/1173 430/2974 1669/1174
+f 1658/1164 1671/1177 84/2937
+f 1490/942 84/2937 1671/1177
+f 1670/1176 430/2974 1671/1177
+f 1667/1173 1671/1177 430/2974
+f 91/2950 1553/1018 1653/1157
+f 1672/1178 1653/1157 1553/1018
+f 1672/1178 1665/1172 1670/1176
+f 430/2974 1670/1176 1665/1172
+f 93/2943 1651/1156 1513/968
+f 1675/1179 1513/968 1651/1156
+f 431/2975 1674/1181 1673/1180
+f 1675/1179 1673/1180 1674/1181
+f 1484/933 1677/1183 85/2934
+f 1648/1153 85/2934 1677/1183
+f 1673/1180 1677/1183 431/2975
+f 1676/1182 431/2975 1677/1183
+f 1668/1175 1679/1185 82/2938
+f 1494/946 82/2938 1679/1185
+f 1678/1184 431/2975 1679/1185
+f 1676/1182 1679/1185 431/2975
+f 1520/979 1680/1186 86/2945
+f 1663/1169 86/2945 1680/1186
+f 1674/1181 431/2975 1680/1186
+f 1678/1184 1680/1186 431/2975
+f 14/2976 763/1190 1683/1191
+f 1684/1188 1683/1191 763/1190
+f 1684/1188 1681/1187 1682/1193
+f 435/2977 1682/1193 1681/1187
+f 1687/1195 759/1198 1685/1197
+f 13/2978 1685/1197 759/1198
+f 435/2977 1681/1187 1686/1194
+f 1687/1195 1686/1194 1681/1187
+f 1688/1202 1691/1200 97/2979
+f 1690/1203 97/2979 1691/1200
+f 1689/1199 435/2977 1691/1200
+f 1686/1194 1691/1200 435/2977
+f 1692/1204 1694/1205 98/2980
+f 1693/1206 98/2980 1694/1205
+f 1682/1193 435/2977 1694/1205
+f 1689/1199 1694/1205 435/2977
+f 1685/1197 13/2978 1697/1210
+f 1311/1209 1697/1210 13/2978
+f 1696/1211 1697/1210 438/2981
+f 1695/1208 438/2981 1697/1210
+f 1700/1213 1306/1216 1698/1215
+f 74/2982 1698/1215 1306/1216
+f 438/2981 1695/1208 1699/1212
+f 1700/1213 1699/1212 1695/1208
+f 1701/1219 1704/1220 100/2983
+f 1703/1221 100/2983 1704/1220
+f 1699/1212 1704/1220 438/2981
+f 1702/1218 438/2981 1704/1220
+f 97/2979 1690/1203 1705/1223
+f 1706/1222 1705/1223 1690/1203
+f 438/2981 1702/1218 1696/1211
+f 1706/1222 1696/1211 1702/1218
+f 67/2984 1290/1227 1709/1228
+f 1710/1225 1709/1228 1290/1227
+f 442/2985 1708/1230 1707/1224
+f 1710/1225 1707/1224 1708/1230
+f 1711/1233 1713/1234 10/2986
+f 1299/1235 10/2986 1713/1234
+f 1707/1224 1713/1234 442/2985
+f 1712/1232 442/2985 1713/1234
+f 96/2987 1714/1239 1716/1240
+f 1717/1237 1716/1240 1714/1239
+f 442/2985 1712/1232 1715/1236
+f 1717/1237 1715/1236 1712/1232
+f 1718/1241 1720/1242 99/2988
+f 1719/1243 99/2988 1720/1242
+f 1715/1236 1720/1242 442/2985
+f 1708/1230 442/2985 1720/1242
+f 10/2986 746/1245 1711/1233
+f 1723/1244 1711/1233 746/1245
+f 1721/1246 444/2989 1723/1244
+f 1722/1248 1723/1244 444/2989
+f 740/1251 14/2976 1725/1249
+f 1683/1191 1725/1249 14/2976
+f 1725/1249 1724/1250 1721/1246
+f 444/2989 1721/1246 1724/1250
+f 1692/1204 98/2980 1728/1252
+f 1726/1253 1728/1252 98/2980
+f 1724/1250 1728/1252 444/2989
+f 1727/1254 444/2989 1728/1252
+f 1729/1257 96/2987 1730/1256
+f 1716/1240 1730/1256 96/2987
+f 444/2989 1727/1254 1722/1248
+f 1730/1256 1722/1248 1727/1254
+f 1734/1261 1701/1219 1731/1260
+f 100/2983 1731/1260 1701/1219
+f 447/2990 1733/1262 1732/1259
+f 1734/1261 1732/1259 1733/1262
+f 75/2991 1735/1265 1737/1267
+f 1738/1266 1737/1267 1735/1265
+f 1732/1259 1738/1266 447/2990
+f 1736/1264 447/2990 1738/1266
+f 98/2980 1693/1206 1740/1270
+f 1741/1269 1740/1270 1693/1206
+f 447/2990 1736/1264 1739/1268
+f 1741/1269 1739/1268 1736/1264
+f 1688/1202 97/2979 1742/1271
+f 1705/1223 1742/1271 97/2979
+f 1739/1268 1742/1271 447/2990
+f 1733/1262 447/2990 1742/1271
+f 75/2991 1743/1273 1735/1265
+f 1746/1272 1735/1265 1743/1273
+f 1745/1276 1746/1272 449/2992
+f 1744/1274 449/2992 1746/1272
+f 1749/1278 1748/1279 1719/1243
+f 99/2988 1719/1243 1748/1279
+f 449/2992 1744/1274 1747/1277
+f 1749/1278 1747/1277 1744/1274
+f 1729/1257 1751/1281 96/2987
+f 1714/1239 96/2987 1751/1281
+f 1747/1277 1751/1281 449/2992
+f 1750/1280 449/2992 1751/1281
+f 98/2980 1740/1270 1726/1253
+f 1752/1282 1726/1253 1740/1270
+f 449/2992 1750/1280 1745/1276
+f 1752/1282 1745/1276 1750/1280
+f 1757/1284 1756/1287 1753/1286
+f 101/2993 1753/1286 1756/1287
+f 1757/1284 1754/1283 1755/1289
+f 453/2994 1755/1289 1754/1283
+f 1758/1292 1761/1293 102/2995
+f 1760/1294 102/2995 1761/1293
+f 1754/1283 1761/1293 453/2994
+f 1759/1291 453/2994 1761/1293
+f 1762/1295 1764/1296 103/2996
+f 1763/1297 103/2996 1764/1296
+f 1759/1291 1764/1296 453/2994
+f 1755/1289 453/2994 1764/1296
+f 1768/1299 1767/1302 1756/1287
+f 101/2993 1756/1287 1767/1302
+f 456/2997 1766/1300 1765/1298
+f 1768/1299 1765/1298 1766/1300
+f 103/2996 1769/1306 1762/1295
+f 1771/1304 1762/1295 1769/1306
+f 456/2997 1765/1298 1770/1303
+f 1771/1304 1770/1303 1765/1298
+f 806/91 1774/1308 19/2798
+f 1773/1309 19/2798 1774/1308
+f 1770/1303 1774/1308 456/2997
+f 1772/1307 456/2997 1774/1308
+f 27/2797 881/180 800/84
+f 1776/1311 800/84 881/180
+f 456/2997 1772/1307 1775/1310
+f 1776/1311 1775/1310 1772/1307
+f 875/174 8/2810 1778/1312
+f 1777/1313 1778/1312 8/2810
+f 456/2997 1775/1310 1766/1300
+f 1778/1312 1766/1300 1775/1310
+f 1767/1302 1781/1315 101/2993
+f 1753/1286 101/2993 1781/1315
+f 458/2998 1780/1316 1779/1314
+f 1781/1315 1779/1314 1780/1316
+f 8/2810 989/308 1777/1313
+f 1783/1318 1777/1313 989/308
+f 458/2998 1779/1314 1782/1317
+f 1783/1318 1782/1317 1779/1314
+f 1785/1320 986/305 942/252
+f 4/2823 942/252 986/305
+f 458/2998 1782/1317 1784/1319
+f 1785/1320 1784/1319 1782/1317
+f 26/2827 1786/1321 952/264
+f 1788/1323 952/264 1786/1321
+f 458/2998 1784/1319 1787/1324
+f 1788/1323 1787/1324 1784/1319
+f 102/2995 1760/1294 1789/1326
+f 1790/1325 1789/1326 1760/1294
+f 458/2998 1787/1324 1780/1316
+f 1790/1325 1780/1316 1787/1324
+f 1795/1328 1794/1333 1791/1330
+f 104/2999 1791/1330 1794/1333
+f 461/3000 1793/1331 1792/1327
+f 1795/1328 1792/1327 1793/1331
+f 1789/1326 1798/1335 102/2995
+f 1797/1336 102/2995 1798/1335
+f 1792/1327 1798/1335 461/3000
+f 1796/1334 461/3000 1798/1335
+f 1786/1321 26/2827 1800/1337
+f 1284/678 1800/1337 26/2827
+f 1800/1337 1799/1338 1796/1334
+f 461/3000 1796/1334 1799/1338
+f 1279/674 81/2894 1802/1339
+f 1801/1340 1802/1339 81/2894
+f 1799/1338 1802/1339 461/3000
+f 1793/1331 461/3000 1802/1339
+f 1805/1344 79/2915 1806/1342
+f 1380/804 1806/1342 79/2915
+f 1804/1345 1806/1342 464/3001
+f 1803/1341 464/3001 1806/1342
+f 19/2798 1773/1309 1387/812
+f 1808/1347 1387/812 1773/1309
+f 464/3001 1803/1341 1807/1346
+f 1808/1347 1807/1346 1803/1341
+f 1809/1350 1811/1351 103/2996
+f 1769/1306 103/2996 1811/1351
+f 1807/1346 1811/1351 464/3001
+f 1810/1349 464/3001 1811/1351
+f 1814/1352 1813/1354 1812/1353
+f 105/3002 1812/1353 1813/1354
+f 464/3001 1810/1349 1804/1345
+f 1814/1352 1804/1345 1810/1349
+f 1763/1297 1818/1356 103/2996
+f 1817/1358 103/2996 1818/1356
+f 1815/1355 468/3003 1818/1356
+f 1816/1359 1818/1356 468/3003
+f 102/2995 1819/1361 1758/1292
+f 1821/1360 1758/1292 1819/1361
+f 1821/1360 1820/1362 1815/1355
+f 468/3003 1815/1355 1820/1362
+f 1825/1365 1824/1368 1822/1367
+f 106/3004 1822/1367 1824/1368
+f 468/3003 1820/1362 1823/1364
+f 1825/1365 1823/1364 1820/1362
+f 1828/1370 1827/1371 1826/1369
+f 107/3005 1826/1369 1827/1371
+f 1828/1370 1816/1359 1823/1364
+f 468/3003 1823/1364 1816/1359
+f 102/2995 1797/1336 1819/1361
+f 1831/1373 1819/1361 1797/1336
+f 471/3006 1830/1374 1829/1372
+f 1831/1373 1829/1372 1830/1374
+f 1832/1377 1834/1378 104/2999
+f 1791/1330 104/2999 1834/1378
+f 1829/1372 1834/1378 471/3006
+f 1833/1376 471/3006 1834/1378
+f 108/3007 1835/1382 1837/1383
+f 1838/1380 1837/1383 1835/1382
+f 471/3006 1833/1376 1836/1379
+f 1838/1380 1836/1379 1833/1376
+f 1824/1368 1840/1384 106/3004
+f 1839/1385 106/3004 1840/1384
+f 1836/1379 1840/1384 471/3006
+f 1830/1374 471/3006 1840/1384
+f 1843/1389 105/3002 1844/1386
+f 1813/1354 1844/1386 105/3002
+f 1842/1390 1844/1386 474/3008
+f 1841/1387 474/3008 1844/1386
+f 1846/1392 1809/1350 1817/1358
+f 103/2996 1817/1358 1809/1350
+f 474/3008 1841/1387 1845/1391
+f 1846/1392 1845/1391 1841/1387
+f 1847/1395 1849/1396 107/3005
+f 1826/1369 107/3005 1849/1396
+f 1848/1394 474/3008 1849/1396
+f 1845/1391 1849/1396 474/3008
+f 109/3009 1850/1398 1851/1399
+f 1852/1397 1851/1399 1850/1398
+f 1852/1397 1842/1390 1848/1394
+f 474/3008 1848/1394 1842/1390
+f 1855/1401 1822/1367 1839/1385
+f 106/3004 1839/1385 1822/1367
+f 476/3010 1854/1402 1853/1400
+f 1855/1401 1853/1400 1854/1402
+f 1856/1404 1858/1403 108/3007
+f 1835/1382 108/3007 1858/1403
+f 1853/1400 1858/1403 476/3010
+f 1857/1405 476/3010 1858/1403
+f 1851/1399 1861/1408 109/3009
+f 1860/1409 109/3009 1861/1408
+f 1857/1405 1861/1408 476/3010
+f 1859/1407 476/3010 1861/1408
+f 1862/1410 1847/1395 1827/1371
+f 107/3005 1827/1371 1847/1395
+f 476/3010 1859/1407 1854/1402
+f 1862/1410 1854/1402 1859/1407
+f 1863/1413 1867/1414 115/3011
+f 1866/1416 115/3011 1867/1414
+f 481/3012 1865/1415 1864/1412
+f 1867/1414 1864/1412 1865/1415
+f 125/3013 1868/1420 1870/1422
+f 1871/1421 1870/1422 1868/1420
+f 1869/1419 481/3012 1871/1421
+f 1864/1412 1871/1421 481/3012
+f 1875/1425 1874/1427 1872/1423
+f 119/3014 1872/1423 1874/1427
+f 481/3012 1869/1419 1873/1426
+f 1875/1425 1873/1426 1869/1419
+f 1878/1428 1877/1430 1876/1429
+f 123/3015 1876/1429 1877/1430
+f 1873/1426 1878/1428 481/3012
+f 1865/1415 481/3012 1878/1428
+f 1882/1432 1881/1435 1866/1416
+f 115/3011 1866/1416 1881/1435
+f 485/3016 1880/1433 1879/1431
+f 1882/1432 1879/1431 1880/1433
+f 1883/1438 1885/1439 123/3015
+f 1876/1429 123/3015 1885/1439
+f 1884/1437 485/3016 1885/1439
+f 1879/1431 1885/1439 485/3016
+f 1889/1441 1888/1444 1886/1443
+f 122/3017 1886/1443 1888/1444
+f 485/3016 1884/1437 1887/1440
+f 1889/1441 1887/1440 1884/1437
+f 127/3018 1890/1445 1891/1447
+f 1892/1446 1891/1447 1890/1445
+f 1887/1440 1892/1446 485/3016
+f 1880/1433 485/3016 1892/1446
+f 110/3019 1893/1451 1896/1453
+f 1897/1449 1896/1453 1893/1451
+f 1895/1452 1897/1449 490/3020
+f 1894/1448 490/3020 1897/1449
+f 133/3021 1898/1458 1900/1459
+f 1901/1456 1900/1459 1898/1458
+f 490/3020 1894/1448 1899/1455
+f 1901/1456 1899/1455 1894/1448
+f 1902/1462 1905/1463 120/3022
+f 1904/1464 120/3022 1905/1463
+f 1899/1455 1905/1463 490/3020
+f 1903/1461 490/3020 1905/1463
+f 121/3023 1906/1466 1907/1467
+f 1908/1465 1907/1467 1906/1466
+f 490/3020 1903/1461 1895/1452
+f 1908/1465 1895/1452 1903/1461
+f 116/3024 1909/1471 1912/1472
+f 1913/1469 1912/1472 1909/1471
+f 494/3025 1911/1474 1910/1468
+f 1913/1469 1910/1468 1911/1474
+f 1914/1477 1917/1478 132/3026
+f 1916/1479 132/3026 1917/1478
+f 1910/1468 1917/1478 494/3025
+f 1915/1476 494/3025 1917/1478
+f 120/3022 1904/1464 1919/1482
+f 1920/1481 1919/1482 1904/1464
+f 494/3025 1915/1476 1918/1480
+f 1920/1481 1918/1480 1915/1476
+f 1921/1483 1922/1484 133/3021
+f 1898/1458 133/3021 1922/1484
+f 1918/1480 1922/1484 494/3025
+f 1911/1474 494/3025 1922/1484
+f 111/3027 1923/1487 1926/1489
+f 1927/1488 1926/1489 1923/1487
+f 499/3028 1925/1491 1924/1486
+f 1927/1488 1924/1486 1925/1491
+f 129/3029 1928/1495 1930/1496
+f 1931/1493 1930/1496 1928/1495
+f 1931/1493 1929/1492 1924/1486
+f 499/3028 1924/1486 1929/1492
+f 1935/1498 1934/1501 1932/1500
+f 128/3030 1932/1500 1934/1501
+f 499/3028 1929/1492 1933/1497
+f 1935/1498 1933/1497 1929/1492
+f 1936/1502 1938/1503 136/3031
+f 1937/1504 136/3031 1938/1503
+f 1933/1497 1938/1503 499/3028
+f 1925/1491 499/3028 1938/1503
+f 1939/1507 1942/1508 110/3019
+f 1893/1451 110/3019 1942/1508
+f 1941/1509 1942/1508 503/3032
+f 1940/1506 503/3032 1942/1508
+f 125/3013 1943/1513 1945/1514
+f 1946/1511 1945/1514 1943/1513
+f 503/3032 1940/1506 1944/1510
+f 1946/1511 1944/1510 1940/1506
+f 1947/1517 1950/1518 124/3033
+f 1949/1519 124/3033 1950/1518
+f 1944/1510 1950/1518 503/3032
+f 1948/1516 503/3032 1950/1518
+f 133/3021 1900/1459 1951/1521
+f 1952/1520 1951/1521 1900/1459
+f 503/3032 1948/1516 1941/1509
+f 1952/1520 1941/1509 1948/1516
+f 1956/1523 1863/1413 1953/1525
+f 115/3011 1953/1525 1863/1413
+f 506/3034 1955/1526 1954/1522
+f 1956/1523 1954/1522 1955/1526
+f 1957/1529 1960/1530 141/3035
+f 1959/1531 141/3035 1960/1530
+f 1954/1522 1960/1530 506/3034
+f 1958/1528 506/3034 1960/1530
+f 124/3033 1949/1519 1962/1534
+f 1963/1533 1962/1534 1949/1519
+f 506/3034 1958/1528 1961/1532
+f 1963/1533 1961/1532 1958/1528
+f 125/3013 1870/1422 1943/1513
+f 1964/1535 1943/1513 1870/1422
+f 1961/1532 1964/1535 506/3034
+f 1955/1526 506/3034 1964/1535
+f 1969/1537 1968/1541 1965/1539
+f 114/3036 1965/1539 1968/1541
+f 1967/1542 1969/1537 510/3037
+f 1966/1536 510/3037 1969/1537
+f 1970/1546 1973/1544 139/3038
+f 1972/1547 139/3038 1973/1544
+f 510/3037 1966/1536 1971/1543
+f 1973/1544 1971/1543 1966/1536
+f 1962/1534 1976/1549 124/3033
+f 1975/1550 124/3033 1976/1549
+f 1974/1548 510/3037 1976/1549
+f 1971/1543 1976/1549 510/3037
+f 141/3035 1977/1551 1957/1529
+f 1978/1552 1957/1529 1977/1551
+f 1978/1552 1967/1542 1974/1548
+f 510/3037 1974/1548 1967/1542
+f 1912/1472 1982/1554 116/3024
+f 1981/1556 116/3024 1982/1554
+f 1979/1553 512/3039 1982/1554
+f 1980/1557 1982/1554 512/3039
+f 133/3021 1951/1521 1921/1483
+f 1984/1559 1921/1483 1951/1521
+f 1983/1558 512/3039 1984/1559
+f 1979/1553 1984/1559 512/3039
+f 124/3033 1975/1550 1947/1517
+f 1986/1560 1947/1517 1975/1550
+f 1986/1560 1985/1561 1983/1558
+f 512/3039 1983/1558 1985/1561
+f 1988/1562 1970/1546 1987/1563
+f 139/3038 1987/1563 1970/1546
+f 1985/1561 1988/1562 512/3039
+f 1980/1557 512/3039 1988/1562
+f 1926/1489 1992/1565 111/3027
+f 1991/1567 111/3027 1992/1565
+f 1990/1568 1992/1565 516/3040
+f 1989/1564 516/3040 1992/1565
+f 136/3031 1993/1572 1936/1502
+f 1995/1570 1936/1502 1993/1572
+f 516/3040 1989/1564 1994/1569
+f 1995/1570 1994/1569 1989/1564
+f 1996/1575 1999/1576 126/3041
+f 1998/1577 126/3041 1999/1576
+f 1994/1569 1999/1576 516/3040
+f 1997/1574 516/3040 1999/1576
+f 127/3018 2000/1579 2001/1580
+f 2002/1578 2001/1580 2000/1579
+f 516/3040 1997/1574 1990/1568
+f 2002/1578 1990/1568 1997/1574
+f 2007/1582 2006/1585 2003/1584
+f 117/3042 2003/1584 2006/1585
+f 520/3043 2005/1587 2004/1581
+f 2007/1582 2004/1581 2005/1587
+f 2008/1590 2011/1591 138/3044
+f 2010/1592 138/3044 2011/1591
+f 2009/1589 520/3043 2011/1591
+f 2004/1581 2011/1591 520/3043
+f 126/3041 1998/1577 2013/1595
+f 2014/1594 2013/1595 1998/1577
+f 520/3043 2009/1589 2012/1593
+f 2014/1594 2012/1593 2009/1589
+f 2015/1596 2016/1597 136/3031
+f 1993/1572 136/3031 2016/1597
+f 2012/1593 2016/1597 520/3043
+f 2005/1587 520/3043 2016/1597
+f 1968/1541 2020/1598 114/3036
+f 2019/1601 114/3036 2020/1598
+f 2017/1599 523/3045 2020/1598
+f 2018/1600 2020/1598 523/3045
+f 2021/1603 2023/1605 141/3035
+f 1977/1551 141/3035 2023/1605
+f 523/3045 2017/1599 2022/1606
+f 2023/1605 2022/1606 2017/1599
+f 2013/1595 2026/1608 126/3041
+f 2025/1609 126/3041 2026/1608
+f 2022/1606 2026/1608 523/3045
+f 2024/1607 523/3045 2026/1608
+f 138/3044 2027/1611 2008/1590
+f 2028/1610 2008/1590 2027/1611
+f 523/3045 2024/1607 2018/1600
+f 2028/1610 2018/1600 2024/1607
+f 1881/1435 2031/1613 115/3011
+f 1953/1525 115/3011 2031/1613
+f 524/3046 2030/1614 2029/1612
+f 2031/1613 2029/1612 2030/1614
+f 127/3018 2001/1580 1890/1445
+f 2033/1616 1890/1445 2001/1580
+f 2032/1615 524/3046 2033/1616
+f 2029/1612 2033/1616 524/3046
+f 126/3041 2025/1609 1996/1575
+f 2035/1618 1996/1575 2025/1609
+f 524/3046 2032/1615 2034/1617
+f 2035/1618 2034/1617 2032/1615
+f 141/3035 1959/1531 2021/1603
+f 2036/1619 2021/1603 1959/1531
+f 2034/1617 2036/1619 524/3046
+f 2030/1614 524/3046 2036/1619
+f 2041/1621 2040/1624 2037/1623
+f 112/3047 2037/1623 2040/1624
+f 529/3048 2039/1626 2038/1620
+f 2041/1621 2038/1620 2039/1626
+f 140/3049 2042/1629 2044/1631
+f 2045/1630 2044/1631 2042/1629
+f 2043/1628 529/3048 2045/1630
+f 2038/1620 2045/1630 529/3048
+f 130/3050 2046/1635 2048/1636
+f 2049/1633 2048/1636 2046/1635
+f 2049/1633 2047/1632 2043/1628
+f 529/3048 2043/1628 2047/1632
+f 131/3051 2050/1638 2051/1639
+f 2052/1637 2051/1639 2050/1638
+f 2047/1632 2052/1637 529/3048
+f 2039/1626 529/3048 2052/1637
+f 2056/1644 118/3052 2057/1640
+f 2053/1641 2057/1640 118/3052
+f 533/3053 2055/1646 2054/1642
+f 2057/1640 2054/1642 2055/1646
+f 2061/1650 2060/1651 2058/1649
+f 142/3054 2058/1649 2060/1651
+f 2054/1642 2061/1650 533/3053
+f 2059/1648 533/3053 2061/1650
+f 130/3050 2048/1636 2063/1654
+f 2064/1653 2063/1654 2048/1636
+f 533/3053 2059/1648 2062/1652
+f 2064/1653 2062/1652 2059/1648
+f 2065/1655 2066/1656 140/3049
+f 2042/1629 140/3049 2066/1656
+f 2062/1652 2066/1656 533/3053
+f 2055/1646 533/3053 2066/1656
+f 2067/1659 2071/1660 113/3055
+f 2070/1662 113/3055 2071/1660
+f 2069/1663 2071/1660 538/3056
+f 2068/1658 538/3056 2071/1660
+f 135/3057 2072/1664 2074/1668
+f 2075/1666 2074/1668 2072/1664
+f 2075/1666 2073/1667 2068/1658
+f 538/3056 2068/1658 2073/1667
+f 2076/1671 2079/1672 134/3058
+f 2078/1673 134/3058 2079/1672
+f 2073/1667 2079/1672 538/3056
+f 2077/1670 538/3056 2079/1672
+f 137/3059 2080/1674 2081/1676
+f 2082/1675 2081/1676 2080/1674
+f 538/3056 2077/1670 2069/1663
+f 2082/1675 2069/1663 2077/1670
+f 118/3052 2083/1680 2053/1641
+f 2086/1678 2053/1641 2083/1680
+f 541/3060 2085/1681 2084/1677
+f 2086/1678 2084/1677 2085/1681
+f 2081/1676 2089/1683 137/3059
+f 2088/1684 137/3059 2089/1683
+f 2084/1677 2089/1683 541/3060
+f 2087/1682 541/3060 2089/1683
+f 134/3058 2090/1688 2076/1671
+f 2092/1686 2076/1671 2090/1688
+f 541/3060 2087/1682 2091/1685
+f 2092/1686 2091/1685 2087/1682
+f 2060/1651 2094/1689 142/3054
+f 2093/1690 142/3054 2094/1689
+f 2091/1685 2094/1689 541/3060
+f 2085/1681 541/3060 2094/1689
+f 2095/1693 2098/1694 114/3036
+f 1965/1539 114/3036 2098/1694
+f 2097/1695 2098/1694 544/3061
+f 2096/1692 544/3061 2098/1694
+f 118/3052 2056/1644 2100/1698
+f 2101/1697 2100/1698 2056/1644
+f 544/3061 2096/1692 2099/1696
+f 2101/1697 2099/1696 2096/1692
+f 2102/1701 2104/1702 140/3049
+f 2065/1655 140/3049 2104/1702
+f 2099/1696 2104/1702 544/3061
+f 2103/1700 544/3061 2104/1702
+f 139/3038 1972/1547 2105/1704
+f 2106/1703 2105/1704 1972/1547
+f 544/3061 2103/1700 2097/1695
+f 2106/1703 2097/1695 2103/1700
+f 138/3044 2010/1592 2109/1707
+f 2110/1705 2109/1707 2010/1592
+f 2110/1705 2107/1706 2108/1709
+f 547/3062 2108/1709 2107/1706
+f 2111/1711 2113/1710 117/3042
+f 2003/1584 117/3042 2113/1710
+f 2107/1706 2113/1710 547/3062
+f 2112/1712 547/3062 2113/1710
+f 2070/1662 2116/1715 113/3055
+f 2115/1716 113/3055 2116/1715
+f 547/3062 2112/1712 2114/1714
+f 2116/1715 2114/1714 2112/1712
+f 2117/1717 2118/1718 137/3059
+f 2080/1674 137/3059 2118/1718
+f 2108/1709 547/3062 2118/1718
+f 2114/1714 2118/1718 547/3062
+f 132/3026 1916/1479 2121/1721
+f 2122/1720 2121/1721 1916/1479
+f 2122/1720 2119/1719 2120/1723
+f 550/3063 2120/1723 2119/1719
+f 2123/1725 2125/1724 116/3024
+f 1909/1471 116/3024 2125/1724
+f 2119/1719 2125/1724 550/3063
+f 2124/1726 550/3063 2125/1724
+f 2128/1729 2127/1730 2040/1624
+f 112/3047 2040/1624 2127/1730
+f 550/3063 2124/1726 2126/1728
+f 2128/1729 2126/1728 2124/1726
+f 131/3051 2129/1731 2050/1638
+f 2130/1732 2050/1638 2129/1731
+f 2120/1723 550/3063 2130/1732
+f 2126/1728 2130/1732 550/3063
+f 137/3059 2088/1684 2117/1717
+f 2133/1734 2117/1717 2088/1684
+f 551/3064 2132/1735 2131/1733
+f 2133/1734 2131/1733 2132/1735
+f 2100/1698 2135/1737 118/3052
+f 2083/1680 118/3052 2135/1737
+f 2131/1733 2135/1737 551/3064
+f 2134/1736 551/3064 2135/1737
+f 114/3036 2019/1601 2095/1693
+f 2137/1739 2095/1693 2019/1601
+f 551/3064 2134/1736 2136/1738
+f 2137/1739 2136/1738 2134/1736
+f 2109/1707 2138/1740 138/3044
+f 2027/1611 138/3044 2138/1740
+f 2136/1738 2138/1740 551/3064
+f 2132/1735 551/3064 2138/1740
+f 2105/1704 2141/1742 139/3038
+f 1987/1563 139/3038 2141/1742
+f 2140/1743 2141/1742 552/3065
+f 2139/1741 552/3065 2141/1742
+f 140/3049 2044/1631 2102/1701
+f 2143/1745 2102/1701 2044/1631
+f 552/3065 2139/1741 2142/1744
+f 2143/1745 2142/1744 2139/1741
+f 2127/1730 2145/1747 112/3047
+f 2037/1623 112/3047 2145/1747
+f 2142/1744 2145/1747 552/3065
+f 2144/1746 552/3065 2145/1747
+f 116/3024 1981/1556 2123/1725
+f 2146/1748 2123/1725 1981/1556
+f 552/3065 2144/1746 2140/1743
+f 2146/1748 2140/1743 2144/1746
+f 132/3026 2121/1721 2149/1751
+f 2150/1749 2149/1751 2121/1721
+f 2148/1753 2150/1749 556/3066
+f 2147/1750 556/3066 2150/1749
+f 2153/1755 2129/1731 2151/1757
+f 131/3051 2151/1757 2129/1731
+f 556/3066 2147/1750 2152/1754
+f 2153/1755 2152/1754 2147/1750
+f 2154/1760 2157/1761 143/3067
+f 2156/1762 143/3067 2157/1761
+f 2155/1759 556/3066 2157/1761
+f 2152/1754 2157/1761 556/3066
+f 144/3068 2158/1763 2159/1765
+f 2160/1764 2159/1765 2158/1763
+f 2160/1764 2148/1753 2155/1759
+f 556/3066 2155/1759 2148/1753
+f 2159/1767 2164/1766 144/3069
+f 2163/1771 144/3069 2164/1766
+f 2162/1772 2164/1766 560/3070
+f 2161/1768 560/3070 2164/1766
+f 2167/1774 2154/1777 2165/1776
+f 143/3071 2165/1776 2154/1777
+f 560/3070 2161/1768 2166/1773
+f 2167/1774 2166/1773 2161/1768
+f 2168/1780 2171/1781 145/3072
+f 2170/1782 145/3072 2171/1781
+f 2169/1779 560/3070 2171/1781
+f 2166/1773 2171/1781 560/3070
+f 146/3073 2172/1784 2173/1785
+f 2174/1783 2173/1785 2172/1784
+f 560/3070 2169/1779 2162/1772
+f 2174/1783 2162/1772 2169/1779
+f 144/3069 2163/1771 2177/1788
+f 2178/1787 2177/1788 2163/1771
+f 564/3074 2176/1790 2175/1786
+f 2178/1787 2175/1786 2176/1790
+f 2179/1793 2181/1794 146/3073
+f 2172/1784 146/3073 2181/1794
+f 2175/1786 2181/1794 564/3074
+f 2180/1792 564/3074 2181/1794
+f 150/3075 2182/1798 2184/1799
+f 2185/1796 2184/1799 2182/1798
+f 564/3074 2180/1792 2183/1795
+f 2185/1796 2183/1795 2180/1792
+f 2186/1800 2188/1801 149/3076
+f 2187/1802 149/3076 2188/1801
+f 2183/1795 2188/1801 564/3074
+f 2176/1790 564/3074 2188/1801
+f 146/3073 2189/1806 2179/1793
+f 2192/1804 2179/1793 2189/1806
+f 568/3077 2191/1807 2190/1803
+f 2192/1804 2190/1803 2191/1807
+f 147/3078 2193/1811 2195/1812
+f 2196/1809 2195/1812 2193/1811
+f 2190/1803 2196/1809 568/3077
+f 2194/1808 568/3077 2196/1809
+f 2200/1814 2199/1817 2197/1816
+f 151/3079 2197/1816 2199/1817
+f 568/3077 2194/1808 2198/1813
+f 2200/1814 2198/1813 2194/1808
+f 2184/1799 2202/1818 150/3075
+f 2201/1819 150/3075 2202/1818
+f 2198/1813 2202/1818 568/3077
+f 2191/1807 568/3077 2202/1818
+f 2207/1821 2206/1826 2203/1823
+f 152/3080 2203/1823 2206/1826
+f 573/3081 2205/1824 2204/1820
+f 2207/1821 2204/1820 2205/1824
+f 2208/1830 2211/1828 155/3082
+f 2210/1831 155/3082 2211/1828
+f 573/3081 2204/1820 2209/1827
+f 2211/1828 2209/1827 2204/1820
+f 2212/1834 2215/1835 154/3083
+f 2214/1836 154/3083 2215/1835
+f 573/3081 2209/1827 2213/1833
+f 2215/1835 2213/1833 2209/1827
+f 2216/1837 2218/1838 157/3084
+f 2217/1839 157/3084 2218/1838
+f 573/3081 2213/1833 2205/1824
+f 2218/1838 2205/1824 2213/1833
+f 2223/1841 2222/1846 2219/1843
+f 153/3085 2219/1843 2222/1846
+f 578/3086 2221/1844 2220/1840
+f 2223/1841 2220/1840 2221/1844
+f 2224/1850 2227/1848 158/3087
+f 2226/1851 158/3087 2227/1848
+f 578/3086 2220/1840 2225/1847
+f 2227/1848 2225/1847 2220/1840
+f 2228/1854 2231/1855 156/3088
+f 2230/1856 156/3088 2231/1855
+f 2225/1847 2231/1855 578/3086
+f 2229/1853 578/3086 2231/1855
+f 2232/1857 2234/1858 159/3089
+f 2233/1859 159/3089 2234/1858
+f 578/3086 2229/1853 2221/1844
+f 2234/1858 2221/1844 2229/1853
+f 127/3090 2235/1862 2000/1865
+f 2238/1863 2000/1865 2235/1862
+f 2237/1866 2238/1863 581/3091
+f 2236/1861 581/3091 2238/1863
+f 2226/1851 2241/1868 158/3087
+f 2240/1869 158/3087 2241/1868
+f 581/3091 2236/1861 2239/1867
+f 2241/1868 2239/1867 2236/1861
+f 2242/1872 2244/1873 153/3085
+f 2219/1843 153/3085 2244/1873
+f 2239/1867 2244/1873 581/3091
+f 2243/1871 581/3091 2244/1873
+f 2246/1874 2245/1876 1991/1875
+f 111/3092 1991/1875 2245/1876
+f 581/3091 2243/1871 2237/1866
+f 2246/1874 2237/1866 2243/1871
+f 129/3093 2247/1880 2250/1881
+f 2251/1878 2250/1881 2247/1880
+f 585/3094 2249/1883 2248/1877
+f 2251/1878 2248/1877 2249/1883
+f 2233/1859 2254/1885 159/3089
+f 2253/1886 159/3089 2254/1885
+f 2248/1877 2254/1885 585/3094
+f 2252/1884 585/3094 2254/1885
+f 2255/1889 2257/1890 156/3088
+f 2228/1854 156/3088 2257/1890
+f 585/3094 2252/1884 2256/1888
+f 2257/1890 2256/1888 2252/1884
+f 122/3095 2258/1891 2259/1893
+f 2260/1892 2259/1893 2258/1891
+f 2256/1888 2260/1892 585/3094
+f 2249/1883 585/3094 2260/1892
+f 127/3090 1891/1897 2235/1862
+f 2263/1895 2235/1862 1891/1897
+f 586/3096 2262/1898 2261/1894
+f 2263/1895 2261/1894 2262/1898
+f 122/3095 2259/1893 1886/1901
+f 2265/1900 1886/1901 2259/1893
+f 2261/1894 2265/1900 586/3096
+f 2264/1899 586/3096 2265/1900
+f 2230/1856 2267/1903 156/3088
+f 2255/1889 156/3088 2267/1903
+f 586/3096 2264/1899 2266/1902
+f 2267/1903 2266/1902 2264/1899
+f 2240/1869 2268/1904 158/3087
+f 2224/1850 158/3087 2268/1904
+f 2266/1902 2268/1904 586/3096
+f 2262/1898 586/3096 2268/1904
+f 129/3093 1930/1906 2247/1880
+f 2271/1905 2247/1880 1930/1906
+f 2269/1907 587/3097 2271/1905
+f 2270/1909 2271/1905 587/3097
+f 2273/1911 1923/1912 2245/1876
+f 111/3092 2245/1876 1923/1912
+f 2273/1911 2272/1910 2269/1907
+f 587/3097 2269/1907 2272/1910
+f 2222/1846 2275/1914 153/3085
+f 2242/1872 153/3085 2275/1914
+f 2272/1910 2275/1914 587/3097
+f 2274/1913 587/3097 2275/1914
+f 2253/1886 2276/1915 159/3089
+f 2232/1857 159/3089 2276/1915
+f 587/3097 2274/1913 2270/1909
+f 2276/1915 2270/1909 2274/1913
+f 125/3098 1945/1918 2279/1921
+f 2280/1919 2279/1921 1945/1918
+f 2278/1922 2280/1919 590/3099
+f 2277/1917 590/3099 2280/1919
+f 2283/1924 1939/1927 2281/1926
+f 110/3100 2281/1926 1939/1927
+f 590/3099 2277/1917 2282/1923
+f 2283/1924 2282/1923 2277/1917
+f 2206/1826 2286/1929 152/3080
+f 2285/1930 152/3080 2286/1929
+f 2282/1923 2286/1929 590/3099
+f 2284/1928 590/3099 2286/1929
+f 2287/1932 2288/1931 157/3084
+f 2216/1837 157/3084 2288/1931
+f 590/3099 2284/1928 2278/1922
+f 2288/1931 2278/1922 2284/1928
+f 2289/1934 2292/1933 121/3101
+f 1906/1938 121/3101 2292/1933
+f 2291/1939 2292/1933 592/3102
+f 2290/1935 592/3102 2292/1933
+f 2210/1831 2295/1941 155/3082
+f 2294/1942 155/3082 2295/1941
+f 592/3102 2290/1935 2293/1940
+f 2295/1941 2293/1940 2290/1935
+f 2285/1930 2297/1944 152/3080
+f 2203/1823 152/3080 2297/1944
+f 2293/1940 2297/1944 592/3102
+f 2296/1943 592/3102 2297/1944
+f 2281/1926 110/3100 2298/1946
+f 1896/1945 2298/1946 110/3100
+f 2298/1946 2291/1939 2296/1943
+f 592/3102 2296/1943 2291/1939
+f 1874/1949 2302/1950 119/3103
+f 2301/1952 119/3103 2302/1950
+f 2300/1953 2302/1950 594/3104
+f 2299/1948 594/3104 2302/1950
+f 125/3098 2279/1921 1868/1956
+f 2304/1955 1868/1956 2279/1921
+f 594/3104 2299/1948 2303/1954
+f 2304/1955 2303/1954 2299/1948
+f 2217/1839 2306/1958 157/3084
+f 2287/1932 157/3084 2306/1958
+f 2303/1954 2306/1958 594/3104
+f 2305/1957 594/3104 2306/1958
+f 2308/1959 2212/1834 2307/1960
+f 154/3083 2307/1960 2212/1834
+f 594/3104 2305/1957 2300/1953
+f 2308/1959 2300/1953 2305/1957
+f 121/3101 2309/1964 2289/1934
+f 2312/1962 2289/1934 2309/1964
+f 596/3105 2311/1965 2310/1961
+f 2312/1962 2310/1961 2311/1965
+f 119/3103 2301/1952 2314/1968
+f 2315/1967 2314/1968 2301/1952
+f 2310/1961 2315/1967 596/3105
+f 2313/1966 596/3105 2315/1967
+f 2214/1836 2317/1970 154/3083
+f 2307/1960 154/3083 2317/1970
+f 596/3105 2313/1966 2316/1969
+f 2317/1970 2316/1969 2313/1966
+f 2294/1942 2318/1971 155/3082
+f 2208/1830 155/3082 2318/1971
+f 2316/1969 2318/1971 596/3105
+f 2311/1965 596/3105 2318/1971
+f 147/3078 2195/1812 2321/1974
+f 2322/1973 2321/1974 2195/1812
+f 600/3106 2320/1976 2319/1972
+f 2322/1973 2319/1972 2320/1976
+f 2173/1785 2324/1978 146/3073
+f 2189/1806 146/3073 2324/1978
+f 2319/1972 2324/1978 600/3106
+f 2323/1977 600/3106 2324/1978
+f 2327/1980 2168/1780 2325/1982
+f 145/3072 2325/1982 2168/1780
+f 600/3106 2323/1977 2326/1979
+f 2327/1980 2326/1979 2323/1977
+f 2328/1985 2331/1986 162/3107
+f 2330/1987 162/3107 2331/1986
+f 600/3106 2326/1979 2329/1984
+f 2331/1986 2329/1984 2326/1979
+f 160/3108 2332/1989 2333/1990
+f 2334/1988 2333/1990 2332/1989
+f 600/3106 2329/1984 2320/1976
+f 2334/1988 2320/1976 2329/1984
+f 161/3109 2335/1991 2338/1995
+f 2339/1993 2338/1995 2335/1991
+f 2339/1993 2336/1994 2337/1997
+f 605/3110 2337/1997 2336/1994
+f 2343/1999 2342/2002 2340/2001
+f 165/3111 2340/2001 2342/2002
+f 605/3110 2336/1994 2341/1998
+f 2343/1999 2341/1998 2336/1994
+f 2344/2006 2347/2004 166/3112
+f 2346/2007 166/3112 2347/2004
+f 2341/1998 2347/2004 605/3110
+f 2345/2003 605/3110 2347/2004
+f 2349/2010 162/3113 2350/2009
+f 2348/2008 2350/2009 162/3113
+f 2337/1997 605/3110 2350/2009
+f 2345/2003 2350/2009 605/3110
+f 2353/2012 2328/2015 2349/2010
+f 162/3113 2349/2010 2328/2015
+f 608/3114 2352/2013 2351/2011
+f 2353/2012 2351/2011 2352/2013
+f 2354/2018 2356/2019 166/3112
+f 2344/2006 166/3112 2356/2019
+f 2351/2011 2356/2019 608/3114
+f 2355/2017 608/3114 2356/2019
+f 2360/2021 2359/2024 2357/2023
+f 163/3115 2357/2023 2359/2024
+f 608/3114 2355/2017 2358/2020
+f 2360/2021 2358/2020 2355/2017
+f 160/3116 2333/2026 2361/2027
+f 2362/2025 2361/2027 2333/2026
+f 2358/2020 2362/2025 608/3114
+f 2352/2013 608/3114 2362/2025
+f 2366/2029 2365/2030 2359/2024
+f 163/3115 2359/2024 2365/2030
+f 611/3117 2364/2032 2363/2028
+f 2366/2029 2363/2028 2364/2032
+f 2346/2007 2368/2034 166/3112
+f 2354/2018 166/3112 2368/2034
+f 2363/2028 2368/2034 611/3117
+f 2367/2033 611/3117 2368/2034
+f 2369/2038 2371/2036 165/3111
+f 2340/2001 165/3111 2371/2036
+f 611/3117 2367/2033 2370/2035
+f 2371/2036 2370/2035 2367/2033
+f 2373/2041 164/3118 2374/2040
+f 2372/2039 2374/2040 164/3118
+f 2370/2035 2374/2040 611/3117
+f 2364/2032 611/3117 2374/2040
+f 2375/2044 2378/2045 69/3119
+f 1247/2047 69/3119 2378/2045
+f 2377/2048 2378/2045 614/3120
+f 2376/2043 614/3120 2378/2045
+f 131/3051 2051/1639 2380/2051
+f 2381/2050 2380/2051 2051/1639
+f 614/3120 2376/2043 2379/2049
+f 2381/2050 2379/2049 2376/2043
+f 2382/2054 2384/2055 130/3050
+f 2046/1635 130/3050 2384/2055
+f 2379/2049 2384/2055 614/3120
+f 2383/2053 614/3120 2384/2055
+f 76/3121 1254/2057 2385/2058
+f 2386/2056 2385/2058 1254/2057
+f 614/3120 2383/2053 2377/2048
+f 2386/2056 2377/2048 2383/2053
+f 2387/2061 2390/2062 77/3122
+f 1261/2064 77/3122 2390/2062
+f 2388/2060 617/3123 2390/2062
+f 2389/2063 2390/2062 617/3123
+f 142/3054 2093/1690 2392/2068
+f 2393/2067 2392/2068 2093/1690
+f 617/3123 2388/2060 2391/2066
+f 2393/2067 2391/2066 2388/2060
+f 2394/2070 2396/2069 134/3058
+f 2090/1688 134/3058 2396/2069
+f 2391/2066 2396/2069 617/3123
+f 2395/2071 617/3123 2396/2069
+f 2398/2073 2397/2075 1268/2074
+f 80/3124 1268/2074 2397/2075
+f 617/3123 2395/2071 2389/2063
+f 2398/2073 2389/2063 2395/2071
+f 2397/2075 2401/2077 80/3124
+f 1275/2079 80/3124 2401/2077
+f 2399/2076 619/3125 2401/2077
+f 2400/2078 2401/2077 619/3125
+f 134/3058 2078/1673 2394/2070
+f 2403/2081 2394/2070 2078/1673
+f 2403/2081 2402/2082 2399/2076
+f 619/3125 2399/2076 2402/2082
+f 2404/2085 2406/2086 135/3057
+f 2072/1664 135/3057 2406/2086
+f 2405/2084 619/3125 2406/2086
+f 2402/2082 2406/2086 619/3125
+f 2408/2087 2407/2089 1281/2088
+f 81/3126 1281/2088 2407/2089
+f 619/3125 2405/2084 2400/2078
+f 2408/2087 2400/2078 2405/2084
+f 2409/2092 2412/2093 67/3127
+f 1287/2095 67/3127 2412/2093
+f 2411/2096 2412/2093 622/3128
+f 2410/2091 622/3128 2412/2093
+f 2415/2098 2414/2101 2314/2100
+f 119/3014 2314/2100 2414/2101
+f 622/3128 2410/2091 2413/2097
+f 2415/2098 2413/2097 2410/2091
+f 2416/2103 2418/2102 121/3023
+f 2309/2106 121/3023 2418/2102
+f 2417/2104 622/3128 2418/2102
+f 2413/2097 2418/2102 622/3128
+f 2420/2107 2419/2109 1294/2108
+f 68/3129 1294/2108 2419/2109
+f 622/3128 2417/2104 2411/2096
+f 2420/2107 2411/2096 2417/2104
+f 78/3130 2421/2110 1301/2116
+f 2424/2112 1301/2116 2421/2110
+f 2424/2112 2422/2113 2423/2114
+f 625/3131 2423/2114 2422/2113
+f 2250/2119 2427/2120 129/3029
+f 2426/2121 129/3029 2427/2120
+f 2422/2113 2427/2120 625/3131
+f 2425/2118 625/3131 2427/2120
+f 2430/2123 2258/2126 2428/2125
+f 122/3017 2428/2125 2258/2126
+f 625/3131 2425/2118 2429/2122
+f 2430/2123 2429/2122 2425/2118
+f 2431/2129 74/3132 2432/2127
+f 1308/2128 2432/2127 74/3132
+f 2429/2122 2432/2127 625/3131
+f 2423/2114 625/3131 2432/2127
+f 2385/2058 2435/2131 76/3121
+f 1315/2133 76/3121 2435/2131
+f 2434/2132 2435/2131 626/3133
+f 2433/2130 626/3133 2435/2131
+f 130/3050 2063/1654 2382/2054
+f 2437/2135 2382/2054 2063/1654
+f 626/3133 2433/2130 2436/2136
+f 2437/2135 2436/2136 2433/2130
+f 2058/1649 142/3054 2439/2137
+f 2392/2068 2439/2137 142/3054
+f 2436/2136 2439/2137 626/3133
+f 2438/2138 626/3133 2439/2137
+f 2440/2139 2387/2061 1320/2140
+f 77/3122 1320/2140 2387/2061
+f 626/3133 2438/2138 2434/2132
+f 2440/2139 2434/2132 2438/2138
+f 2441/2143 2444/2144 70/3134
+f 1325/2146 70/3134 2444/2144
+f 2443/2147 2444/2144 628/3135
+f 2442/2142 628/3135 2444/2144
+f 143/3067 2156/1762 2446/2150
+f 2447/2148 2446/2150 2156/1762
+f 628/3135 2442/2142 2445/2149
+f 2447/2148 2445/2149 2442/2142
+f 2380/2051 2449/2152 131/3051
+f 2151/1757 131/3051 2449/2152
+f 2445/2149 2449/2152 628/3135
+f 2448/2151 628/3135 2449/2152
+f 69/3119 1331/2153 2375/2044
+f 2450/2154 2375/2044 1331/2153
+f 628/3135 2448/2151 2443/2147
+f 2450/2154 2443/2147 2448/2151
+f 2451/2157 2454/2158 71/2904
+f 1337/746 71/2904 2454/2158
+f 2452/2156 630/3136 2454/2158
+f 2453/2159 2454/2158 630/3136
+f 145/3137 2170/2160 2456/2164
+f 2457/2162 2456/2164 2170/2160
+f 2457/2162 2455/2163 2452/2156
+f 630/3136 2452/2156 2455/2163
+f 2165/2169 143/3138 2459/2165
+f 2446/2166 2459/2165 143/3138
+f 2455/2163 2459/2165 630/3136
+f 2458/2167 630/3136 2459/2165
+f 2460/2170 2441/2171 1343/756
+f 70/2906 1343/756 2441/2171
+f 630/3136 2458/2167 2453/2159
+f 2460/2170 2453/2159 2458/2167
+f 59/2909 2461/2174 1349/766
+f 2464/2175 1349/766 2461/2174
+f 2463/2176 2464/2175 634/3139
+f 2462/2173 634/3139 2464/2175
+f 147/3078 2465/2177 2467/2181
+f 2468/2179 2467/2181 2465/2177
+f 634/3139 2462/2173 2466/2180
+f 2468/2179 2466/2180 2462/2173
+f 2469/2184 2472/2185 148/3140
+f 2471/2186 148/3140 2472/2185
+f 2466/2180 2472/2185 634/3139
+f 2470/2183 634/3139 2472/2185
+f 65/2911 1356/776 2473/2188
+f 2474/2187 2473/2188 1356/776
+f 634/3139 2470/2183 2463/2176
+f 2474/2187 2463/2176 2470/2183
+f 2475/2190 2478/2189 60/2913
+f 1365/785 60/2913 2478/2189
+f 636/3141 2477/2193 2476/2191
+f 2478/2189 2476/2191 2477/2193
+f 2199/1817 2481/2195 151/3079
+f 2480/2196 151/3079 2481/2195
+f 2479/2194 636/3141 2481/2195
+f 2476/2191 2481/2195 636/3141
+f 147/3078 2467/2181 2193/1811
+f 2483/2198 2193/1811 2467/2181
+f 2483/2198 2482/2197 2479/2194
+f 636/3141 2479/2194 2482/2197
+f 2484/2199 2461/2174 1371/794
+f 59/2909 1371/794 2461/2174
+f 2482/2197 2484/2199 636/3141
+f 2477/2193 636/3141 2484/2199
+f 2485/2202 2488/2203 79/3142
+f 1377/2205 79/3142 2488/2203
+f 2486/2201 638/3143 2488/2203
+f 2487/2204 2488/2203 638/3143
+f 128/3030 1934/1501 2490/2209
+f 2491/2208 2490/2209 1934/1501
+f 2491/2208 2489/2207 2486/2201
+f 638/3143 2486/2201 2489/2207
+f 129/3029 2426/2121 1928/1495
+f 2493/2211 1928/1495 2426/2121
+f 2489/2207 2493/2211 638/3143
+f 2492/2210 638/3143 2493/2211
+f 1383/2213 2494/2212 78/3130
+f 2421/2110 78/3130 2494/2212
+f 638/3143 2492/2210 2487/2204
+f 2494/2212 2487/2204 2492/2210
+f 68/3129 2419/2109 1389/2218
+f 2497/2215 1389/2218 2419/2109
+f 640/3144 2496/2216 2495/2214
+f 2497/2215 2495/2214 2496/2216
+f 1907/1467 2499/2220 121/3023
+f 2416/2103 121/3023 2499/2220
+f 2495/2214 2499/2220 640/3144
+f 2498/2219 640/3144 2499/2220
+f 120/3022 2500/2224 1902/1462
+f 2502/2222 1902/1462 2500/2224
+f 640/3144 2498/2219 2501/2221
+f 2502/2222 2501/2221 2498/2219
+f 2503/2227 58/3145 2504/2225
+f 1395/2226 2504/2225 58/3145
+f 2501/2221 2504/2225 640/3144
+f 2496/2216 640/3144 2504/2225
+f 2507/2229 1401/2232 2503/2227
+f 58/3145 2503/2227 1401/2232
+f 642/3146 2506/2230 2505/2228
+f 2507/2229 2505/2228 2506/2230
+f 1919/1482 2509/2234 120/3022
+f 2500/2224 120/3022 2509/2234
+f 2505/2228 2509/2234 642/3146
+f 2508/2233 642/3146 2509/2234
+f 132/3026 2510/2238 1914/1477
+f 2512/2236 1914/1477 2510/2238
+f 642/3146 2508/2233 2511/2235
+f 2512/2236 2511/2235 2508/2233
+f 2513/2241 61/3147 2514/2239
+f 1407/2240 2514/2239 61/3147
+f 2511/2235 2514/2239 642/3146
+f 2506/2230 642/3146 2514/2239
+f 2517/2243 1413/2246 2513/2241
+f 61/3147 2513/2241 1413/2246
+f 644/3148 2516/2244 2515/2242
+f 2517/2243 2515/2242 2516/2244
+f 2149/1751 2519/2247 132/3026
+f 2510/2238 132/3026 2519/2247
+f 2518/2248 644/3148 2519/2247
+f 2515/2242 2519/2247 644/3148
+f 144/3068 2520/2250 2158/1763
+f 2522/2249 2158/1763 2520/2250
+f 2518/2248 2522/2249 644/3148
+f 2521/2251 644/3148 2522/2249
+f 2524/2254 2523/2255 1419/2253
+f 62/3149 1419/2253 2523/2255
+f 644/3148 2521/2251 2516/2244
+f 2524/2254 2516/2244 2521/2251
+f 1425/857 62/2923 2527/2259
+f 2523/2258 2527/2259 62/2923
+f 2526/2260 2527/2259 646/3150
+f 2525/2257 646/3150 2527/2259
+f 144/3069 2177/1788 2520/2263
+f 2529/2262 2520/2263 2177/1788
+f 2529/2262 2528/2261 2525/2257
+f 646/3150 2525/2257 2528/2261
+f 2532/2265 2186/1800 2530/2267
+f 149/3076 2530/2267 2186/1800
+f 2528/2261 2532/2265 646/3150
+f 2531/2264 646/3150 2532/2265
+f 2533/2269 63/2925 2534/2268
+f 1431/866 2534/2268 63/2925
+f 646/3150 2531/2264 2526/2260
+f 2534/2268 2526/2260 2531/2264
+f 2537/2271 1437/875 2533/2269
+f 63/2925 2533/2269 1437/875
+f 648/3151 2536/2272 2535/2270
+f 2537/2271 2535/2270 2536/2272
+f 2187/1802 2539/2274 149/3076
+f 2530/2267 149/3076 2539/2274
+f 2535/2270 2539/2274 648/3151
+f 2538/2273 648/3151 2539/2274
+f 150/3075 2540/2278 2182/1798
+f 2542/2276 2182/1798 2540/2278
+f 648/3151 2538/2273 2541/2275
+f 2542/2276 2541/2275 2538/2273
+f 2543/2280 64/2927 2544/2279
+f 1443/881 2544/2279 64/2927
+f 2541/2275 2544/2279 648/3151
+f 2536/2272 648/3151 2544/2279
+f 2547/2282 1449/887 2543/2280
+f 64/2927 2543/2280 1449/887
+f 2545/2281 649/3152 2547/2282
+f 2546/2283 2547/2282 649/3152
+f 150/3075 2201/1819 2540/2278
+f 2549/2285 2540/2278 2201/1819
+f 2549/2285 2548/2284 2545/2281
+f 649/3152 2545/2281 2548/2284
+f 2480/2196 2551/2287 151/3079
+f 2197/1816 151/3079 2551/2287
+f 2548/2284 2551/2287 649/3152
+f 2550/2286 649/3152 2551/2287
+f 2552/2288 2475/2190 1454/893
+f 60/2913 1454/893 2475/2190
+f 649/3152 2550/2286 2546/2283
+f 2552/2288 2546/2283 2550/2286
+f 1461/899 2555/2289 71/2904
+f 2451/2157 71/2904 2555/2289
+f 651/3153 2554/2291 2553/2290
+f 2555/2289 2553/2290 2554/2291
+f 2558/2293 1470/916 2556/2295
+f 72/2932 2556/2295 1470/916
+f 651/3153 2553/2290 2557/2292
+f 2558/2293 2557/2292 2553/2290
+f 2338/2298 2561/2299 161/3154
+f 2560/2300 161/3154 2561/2299
+f 651/3153 2557/2292 2559/2297
+f 2561/2299 2559/2297 2557/2292
+f 162/3155 2330/2304 2348/2305
+f 2563/2302 2348/2305 2330/2304
+f 651/3153 2559/2297 2562/2301
+f 2563/2302 2562/2301 2559/2297
+f 2456/2164 2564/2306 145/3137
+f 2325/2307 145/3137 2564/2306
+f 651/3153 2562/2301 2554/2291
+f 2564/2306 2554/2291 2562/2301
+f 147/3078 2321/1974 2465/2177
+f 2567/2308 2465/2177 2321/1974
+f 2567/2308 2565/2309 2566/2310
+f 653/3156 2566/2310 2565/2309
+f 2568/2313 2570/2314 160/3108
+f 2332/1989 160/3108 2570/2314
+f 2565/2309 2570/2314 653/3156
+f 2569/2312 653/3156 2570/2314
+f 148/3140 2471/2186 2571/2316
+f 2572/2315 2571/2316 2471/2186
+f 2572/2315 2566/2310 2569/2312
+f 653/3156 2569/2312 2566/2310
+f 2577/2320 2576/2322 2573/2319
+f 170/3157 2573/2319 2576/2322
+f 2575/2323 2577/2320 658/3158
+f 2574/2318 658/3158 2577/2320
+f 2581/2325 2580/2328 2578/2327
+f 167/3159 2578/2327 2580/2328
+f 658/3158 2574/2318 2579/2324
+f 2581/2325 2579/2324 2574/2318
+f 2582/2331 2585/2332 169/3160
+f 2584/2333 169/3160 2585/2332
+f 2579/2324 2585/2332 658/3158
+f 2583/2330 658/3158 2585/2332
+f 2588/2334 2587/2336 2586/2335
+f 168/3161 2586/2335 2587/2336
+f 658/3158 2583/2330 2575/2323
+f 2588/2334 2575/2323 2583/2330
+f 2589/2339 2593/2340 177/3162
+f 2592/2342 177/3162 2593/2340
+f 2590/2338 663/3163 2593/2340
+f 2591/2341 2593/2340 663/3163
+f 178/3164 2594/2346 2596/2348
+f 2597/2347 2596/2348 2594/2346
+f 2597/2347 2595/2345 2590/2338
+f 663/3163 2590/2338 2595/2345
+f 2601/2349 2600/2353 2598/2350
+f 173/3165 2598/2350 2600/2353
+f 2595/2345 2601/2349 663/3163
+f 2599/2351 663/3163 2601/2349
+f 2604/2354 2603/2356 2602/2355
+f 174/3166 2602/2355 2603/2356
+f 663/3163 2599/2351 2591/2341
+f 2604/2354 2591/2341 2599/2351
+f 178/3164 2605/2360 2608/2361
+f 2609/2358 2608/2361 2605/2360
+f 2609/2358 2606/2357 2607/2363
+f 668/3167 2607/2363 2606/2357
+f 2612/2368 179/3168 2613/2364
+f 2610/2365 2613/2364 179/3168
+f 2606/2357 2613/2364 668/3167
+f 2611/2366 668/3167 2613/2364
+f 2617/2370 2616/2373 2614/2372
+f 172/3169 2614/2372 2616/2373
+f 668/3167 2611/2366 2615/2369
+f 2617/2370 2615/2369 2611/2366
+f 2620/2374 2619/2376 2618/2375
+f 171/3170 2618/2375 2619/2376
+f 2607/2363 668/3167 2620/2374
+f 2615/2369 2620/2374 668/3167
+f 174/3166 2603/2356 2623/2380
+f 2624/2377 2623/2380 2603/2356
+f 2622/2381 2624/2377 672/3171
+f 2621/2378 672/3171 2624/2377
+f 2627/2383 2598/2350 2625/2385
+f 173/3165 2625/2385 2598/2350
+f 672/3171 2621/2378 2626/2382
+f 2627/2383 2626/2382 2621/2378
+f 2628/2388 2631/2389 176/3172
+f 2630/2390 176/3172 2631/2389
+f 2629/2387 672/3171 2631/2389
+f 2626/2382 2631/2389 672/3171
+f 2632/2391 2634/2392 175/3173
+f 2633/2393 175/3173 2634/2392
+f 2622/2381 672/3171 2634/2392
+f 2629/2387 2634/2392 672/3171
+f 2637/2397 175/3173 2638/2394
+f 2633/2393 2638/2394 175/3173
+f 2636/2398 2638/2394 675/3174
+f 2635/2395 675/3174 2638/2394
+f 2641/2399 2628/2388 2639/2400
+f 176/3172 2639/2400 2628/2388
+f 2635/2395 2641/2399 675/3174
+f 2640/2401 675/3174 2641/2399
+f 2619/2376 2644/2403 171/3170
+f 2643/2405 171/3170 2644/2403
+f 675/3174 2640/2401 2642/2404
+f 2644/2403 2642/2404 2640/2401
+f 2614/2372 172/3169 2646/2406
+f 2645/2407 2646/2406 172/3169
+f 2642/2404 2646/2406 675/3174
+f 2636/2398 675/3174 2646/2406
+f 177/3162 2647/2410 2589/2339
+f 2650/2411 2589/2339 2647/2410
+f 678/3175 2649/2412 2648/2409
+f 2650/2411 2648/2409 2649/2412
+f 180/3176 2651/2416 2653/2417
+f 2654/2414 2653/2417 2651/2416
+f 678/3175 2648/2409 2652/2413
+f 2654/2414 2652/2413 2648/2409
+f 2656/2420 179/3168 2657/2418
+f 2612/2368 2657/2418 179/3168
+f 2652/2413 2657/2418 678/3175
+f 2655/2419 678/3175 2657/2418
+f 178/3164 2596/2348 2605/2360
+f 2658/2421 2605/2360 2596/2348
+f 2655/2419 2658/2421 678/3175
+f 2649/2412 678/3175 2658/2421
+f 2662/2423 2647/2428 2659/2425
+f 177/3177 2659/2425 2647/2428
+f 681/3178 2661/2426 2660/2422
+f 2662/2423 2660/2422 2661/2426
+f 160/3116 2361/2027 2664/2431
+f 2665/2430 2664/2431 2361/2027
+f 681/3178 2660/2422 2663/2429
+f 2665/2430 2663/2429 2660/2422
+f 2365/2030 2667/2433 163/3115
+f 2357/2023 163/3115 2667/2433
+f 2663/2429 2667/2433 681/3178
+f 2666/2432 681/3178 2667/2433
+f 164/3118 2668/2437 2372/2039
+f 2670/2435 2372/2039 2668/2437
+f 681/3178 2666/2432 2669/2434
+f 2670/2435 2669/2434 2666/2432
+f 2671/2440 180/3179 2672/2439
+f 2653/2438 2672/2439 180/3179
+f 681/3178 2669/2434 2661/2426
+f 2672/2439 2661/2426 2669/2434
+f 179/3180 2656/2441 2675/2445
+f 2676/2443 2675/2445 2656/2441
+f 2676/2443 2673/2444 2674/2447
+f 683/3181 2674/2447 2673/2444
+f 2671/2440 2678/2449 180/3179
+f 2651/2450 180/3179 2678/2449
+f 2677/2448 683/3181 2678/2449
+f 2673/2444 2678/2449 683/3181
+f 2373/2041 2680/2452 164/3118
+f 2668/2437 164/3118 2680/2452
+f 683/3181 2677/2448 2679/2451
+f 2680/2452 2679/2451 2677/2448
+f 2342/2002 2682/2453 165/3111
+f 2369/2038 165/3111 2682/2453
+f 683/3181 2679/2451 2681/2454
+f 2682/2453 2681/2454 2679/2451
+f 2683/2456 2684/2455 161/3109
+f 2335/1991 161/3109 2684/2455
+f 2681/2454 2684/2455 683/3181
+f 2674/2447 683/3181 2684/2455
+f 2616/2459 2688/2460 172/3182
+f 2687/2462 172/3182 2688/2460
+f 685/3183 2686/2463 2685/2458
+f 2688/2460 2685/2458 2686/2463
+f 179/3184 2675/2467 2610/2468
+f 2690/2465 2610/2468 2675/2467
+f 2690/2465 2689/2464 2685/2458
+f 685/3183 2685/2458 2689/2464
+f 2692/2470 2683/2471 2560/2300
+f 161/3154 2560/2300 2683/2471
+f 685/3183 2689/2464 2691/2469
+f 2692/2470 2691/2469 2689/2464
+f 72/2932 1602/1082 2556/2295
+f 2694/2473 2556/2295 1602/1082
+f 685/3183 2691/2469 2693/2472
+f 2694/2473 2693/2472 2691/2469
+f 2696/2474 1597/1077 2695/2475
+f 73/2961 2695/2475 1597/1077
+f 685/3183 2693/2472 2686/2463
+f 2696/2474 2686/2463 2693/2472
+f 1610/1096 2700/2477 66/2963
+f 2699/2478 66/2963 2700/2477
+f 688/3185 2698/2480 2697/2476
+f 2700/2477 2697/2476 2698/2480
+f 2473/2188 2702/2482 65/2911
+f 1621/1110 65/2911 2702/2482
+f 2697/2476 2702/2482 688/3185
+f 2701/2481 688/3185 2702/2482
+f 148/3140 2703/2486 2469/2184
+f 2705/2484 2469/2184 2703/2486
+f 688/3185 2701/2481 2704/2483
+f 2705/2484 2704/2483 2701/2481
+f 2707/2491 174/3186 2708/2488
+f 2623/2490 2708/2488 174/3186
+f 688/3185 2704/2483 2706/2487
+f 2708/2488 2706/2487 2704/2483
+f 2710/2492 2632/2494 2709/2493
+f 175/3187 2709/2493 2632/2494
+f 688/3185 2706/2487 2698/2480
+f 2710/2492 2698/2480 2706/2487
+f 73/2961 1625/1114 2695/2475
+f 2713/2495 2695/2475 1625/1114
+f 2713/2495 2711/2496 2712/2497
+f 689/3188 2712/2497 2711/2496
+f 66/2969 2699/2500 1631/1127
+f 2715/2501 1631/1127 2699/2500
+f 2711/2496 2715/2501 689/3188
+f 2714/2499 689/3188 2715/2501
+f 2717/2503 2709/2506 2637/2505
+f 175/3189 2637/2505 2709/2506
+f 689/3188 2714/2499 2716/2502
+f 2717/2503 2716/2502 2714/2499
+f 172/3182 2687/2462 2645/2508
+f 2718/2507 2645/2508 2687/2462
+f 689/3188 2716/2502 2712/2497
+f 2718/2507 2712/2497 2716/2502
+f 2571/2316 2721/2510 148/3140
+f 2703/2486 148/3140 2721/2510
+f 2720/2511 2721/2510 690/3190
+f 2719/2509 690/3190 2721/2510
+f 160/3108 2664/2512 2568/2313
+f 2723/2514 2568/2313 2664/2512
+f 690/3190 2719/2509 2722/2515
+f 2723/2514 2722/2515 2719/2509
+f 177/3191 2592/2519 2659/2520
+f 2725/2517 2659/2520 2592/2519
+f 690/3190 2722/2515 2724/2516
+f 2725/2517 2724/2516 2722/2515
+f 2726/2521 2602/2522 2707/2491
+f 174/3186 2707/2491 2602/2522
+f 690/3190 2724/2516 2720/2511
+f 2726/2521 2720/2511 2724/2516
+f 2600/2353 2730/2524 173/3165
+f 2729/2526 173/3165 2730/2524
+f 2727/2523 693/3192 2730/2524
+f 2728/2525 2730/2524 693/3192
+f 178/3164 2731/2528 2594/2346
+f 2733/2530 2594/2346 2731/2528
+f 693/3192 2727/2523 2732/2531
+f 2733/2530 2732/2531 2727/2523
+f 2576/2322 2736/2533 170/3157
+f 2735/2534 170/3157 2736/2533
+f 2732/2531 2736/2533 693/3192
+f 2734/2532 693/3192 2736/2533
+f 2737/2535 2738/2536 168/3161
+f 2586/2335 168/3161 2738/2536
+f 2728/2525 693/3192 2738/2536
+f 2734/2532 2738/2536 693/3192
+f 176/3172 2630/2390 2741/2539
+f 2742/2538 2741/2539 2630/2390
+f 2742/2538 2739/2537 2740/2541
+f 695/3193 2740/2541 2739/2537
+f 2625/2385 173/3165 2744/2542
+f 2729/2526 2744/2542 173/3165
+f 2744/2542 2743/2543 2739/2537
+f 695/3193 2739/2537 2743/2543
+f 2587/2336 2746/2545 168/3161
+f 2737/2535 168/3161 2746/2545
+f 2743/2543 2746/2545 695/3193
+f 2745/2544 695/3193 2746/2545
+f 2747/2546 2748/2547 169/3160
+f 2582/2331 169/3160 2748/2547
+f 2740/2541 695/3193 2748/2547
+f 2745/2544 2748/2547 695/3193
+f 171/3170 2643/2405 2751/2551
+f 2752/2549 2751/2551 2643/2405
+f 2749/2548 697/3194 2752/2549
+f 2750/2550 2752/2549 697/3194
+f 176/3172 2741/2539 2639/2400
+f 2754/2553 2639/2400 2741/2539
+f 2754/2553 2753/2554 2749/2548
+f 697/3194 2749/2548 2753/2554
+f 2584/2333 2756/2556 169/3160
+f 2747/2546 169/3160 2756/2556
+f 2753/2554 2756/2556 697/3194
+f 2755/2555 697/3194 2756/2556
+f 2757/2557 2758/2558 167/3159
+f 2578/2327 167/3159 2758/2558
+f 2750/2550 697/3194 2758/2558
+f 2755/2555 2758/2558 697/3194
+f 178/3164 2608/2361 2731/2528
+f 2761/2560 2731/2528 2608/2361
+f 698/3195 2760/2561 2759/2559
+f 2761/2560 2759/2559 2760/2561
+f 2751/2551 2763/2562 171/3170
+f 2618/2375 171/3170 2763/2562
+f 2759/2559 2763/2562 698/3195
+f 2762/2563 698/3195 2763/2562
+f 2580/2328 2765/2565 167/3159
+f 2757/2557 167/3159 2765/2565
+f 2762/2563 2765/2565 698/3195
+f 2764/2564 698/3195 2765/2565
+f 2735/2534 2766/2566 170/3157
+f 2573/2319 170/3157 2766/2566
+f 2764/2564 2766/2566 698/3195
+f 2760/2561 698/3195 2766/2566
+f 123/3196 2767/2567 1883/2573
+f 2770/2569 1883/2573 2767/2567
+f 2770/2569 2768/2570 2769/2571
+f 702/3197 2769/2571 2768/2570
+f 2773/2578 183/3198 2774/2574
+f 2771/2575 2774/2574 183/3198
+f 2768/2570 2774/2574 702/3197
+f 2772/2576 702/3197 2774/2574
+f 2777/2583 182/3199 2778/2579
+f 2775/2580 2778/2579 182/3199
+f 2772/2576 2778/2579 702/3197
+f 2776/2581 702/3197 2778/2579
+f 2780/2584 2779/2586 1888/2585
+f 122/3200 1888/2585 2779/2586
+f 702/3197 2776/2581 2769/2571
+f 2780/2584 2769/2571 2776/2581
+f 2779/2586 2783/2588 122/3200
+f 2428/2590 122/3200 2783/2588
+f 2782/2591 2783/2588 704/3201
+f 2781/2587 704/3201 2783/2588
+f 182/3199 2784/2595 2775/2580
+f 2786/2593 2775/2580 2784/2595
+f 704/3201 2781/2587 2785/2592
+f 2786/2593 2785/2592 2781/2587
+f 1703/1221 2789/2597 100/2983
+f 2788/2598 100/2983 2789/2597
+f 2785/2592 2789/2597 704/3201
+f 2787/2596 704/3201 2789/2597
+f 2790/2599 1698/1215 2431/2600
+f 74/2982 2431/2600 1698/1215
+f 704/3201 2787/2596 2782/2591
+f 2790/2599 2782/2591 2787/2596
+f 67/2984 1709/1228 2409/2603
+f 2793/2602 2409/2603 1709/1228
+f 707/3202 2792/2605 2791/2601
+f 2793/2602 2791/2601 2792/2605
+f 2794/2608 2796/2609 99/2988
+f 1718/1241 99/2988 2796/2609
+f 2791/2601 2796/2609 707/3202
+f 2795/2607 707/3202 2796/2609
+f 181/3203 2797/2610 2799/2614
+f 2800/2612 2799/2614 2797/2610
+f 707/3202 2795/2607 2798/2613
+f 2800/2612 2798/2613 2795/2607
+f 2801/2617 119/3204 2802/2616
+f 2414/2615 2802/2616 119/3204
+f 2798/2613 2802/2616 707/3202
+f 2792/2605 707/3202 2802/2616
+f 119/3204 2801/2617 1872/2621
+f 2805/2619 1872/2621 2801/2617
+f 2804/2620 2805/2619 709/3205
+f 2803/2618 709/3205 2805/2619
+f 2806/2626 2808/2624 181/3203
+f 2797/2610 181/3203 2808/2624
+f 709/3205 2803/2618 2807/2623
+f 2808/2624 2807/2623 2803/2618
+f 2773/2578 2811/2628 183/3198
+f 2810/2629 183/3198 2811/2628
+f 2809/2627 709/3205 2811/2628
+f 2807/2623 2811/2628 709/3205
+f 1877/2631 2812/2630 123/3196
+f 2767/2567 123/3196 2812/2630
+f 2812/2630 2804/2620 2809/2627
+f 709/3205 2809/2627 2804/2620
+f 2815/2633 1731/1260 2788/2598
+f 100/2983 2788/2598 1731/1260
+f 711/3206 2814/2634 2813/2632
+f 2815/2633 2813/2632 2814/2634
+f 2777/2583 2817/2636 182/3199
+f 2784/2595 182/3199 2817/2636
+f 2813/2632 2817/2636 711/3206
+f 2816/2635 711/3206 2817/2636
+f 183/3198 2818/2640 2771/2575
+f 2820/2638 2771/2575 2818/2640
+f 711/3206 2816/2635 2819/2637
+f 2820/2638 2819/2637 2816/2635
+f 75/2991 1737/1267 2821/2642
+f 2822/2641 2821/2642 1737/1267
+f 2819/2637 2822/2641 711/3206
+f 2814/2634 711/3206 2822/2641
+f 75/2991 2821/2642 1743/1273
+f 2825/2644 1743/1273 2821/2642
+f 2824/2645 2825/2644 712/3207
+f 2823/2643 712/3207 2825/2644
+f 183/3198 2810/2629 2818/2640
+f 2827/2647 2818/2640 2810/2629
+f 712/3207 2823/2643 2826/2646
+f 2827/2647 2826/2646 2823/2643
+f 2806/2626 181/3203 2829/2649
+f 2799/2614 2829/2649 181/3203
+f 2826/2646 2829/2649 712/3207
+f 2828/2648 712/3207 2829/2649
+f 2830/2650 2794/2608 1748/1279
+f 99/2988 1748/1279 2794/2608
+f 712/3207 2828/2648 2824/2645
+f 2830/2650 2824/2645 2828/2648
+f 2835/2652 2834/2655 2831/2654
+f 184/3208 2831/2654 2834/2655
+f 2835/2652 2832/2651 2833/2657
+f 716/3209 2833/2657 2832/2651
+f 2838/2662 186/3210 2839/2660
+f 2836/2658 2839/2660 186/3210
+f 2832/2651 2839/2660 716/3209
+f 2837/2661 716/3209 2839/2660
+f 2840/2663 2842/2664 185/3211
+f 2841/2665 185/3211 2842/2664
+f 2837/2661 2842/2664 716/3209
+f 2833/2657 716/3209 2842/2664
+f 2843/2669 2846/2667 184/3208
+f 2831/2654 184/3208 2846/2667
+f 719/3212 2845/2670 2844/2666
+f 2846/2667 2844/2666 2845/2670
+f 2006/1585 2849/2672 117/3042
+f 2848/2673 117/3042 2849/2672
+f 719/3212 2844/2666 2847/2671
+f 2849/2672 2847/2671 2844/2666
+f 136/3031 1937/1504 2015/1596
+f 2851/2675 2015/1596 1937/1504
+f 719/3212 2847/2671 2850/2674
+f 2851/2675 2850/2674 2847/2671
+f 2852/2678 2854/2679 128/3030
+f 1932/1500 128/3030 2854/2679
+f 2850/2674 2854/2679 719/3212
+f 2853/2677 719/3212 2854/2679
+f 186/3210 2838/2662 2855/2681
+f 2856/2680 2855/2681 2838/2662
+f 719/3212 2853/2677 2845/2670
+f 2856/2680 2845/2670 2853/2677
+f 2834/2655 2859/2683 184/3208
+f 2843/2669 184/3208 2859/2683
+f 721/3213 2858/2684 2857/2682
+f 2859/2683 2857/2682 2858/2684
+f 185/3211 2860/2688 2840/2663
+f 2862/2686 2840/2663 2860/2688
+f 721/3213 2857/2682 2861/2685
+f 2862/2686 2861/2685 2857/2682
+f 135/3057 2074/1668 2864/2691
+f 2865/2690 2864/2691 2074/1668
+f 721/3213 2861/2685 2863/2689
+f 2865/2690 2863/2689 2861/2685
+f 2115/1716 2867/2693 113/3055
+f 2067/1659 113/3055 2867/2693
+f 721/3213 2863/2689 2866/2692
+f 2867/2693 2866/2692 2863/2689
+f 117/3042 2848/2673 2111/1711
+f 2868/2694 2111/1711 2848/2673
+f 721/3213 2866/2692 2858/2684
+f 2868/2694 2858/2684 2866/2692
+f 2872/2696 2871/2699 1794/2698
+f 104/3214 1794/2698 2871/2699
+f 723/3215 2870/2701 2869/2695
+f 2872/2696 2869/2695 2870/2701
+f 2407/2089 2874/2703 81/3126
+f 1801/2704 81/3126 2874/2703
+f 2873/2702 723/3215 2874/2703
+f 2869/2695 2874/2703 723/3215
+f 2864/2691 2876/2705 135/3057
+f 2404/2085 135/3057 2876/2705
+f 2876/2705 2875/2706 2873/2702
+f 723/3215 2873/2702 2875/2706
+f 2877/2707 2878/2708 185/3211
+f 2860/2688 185/3211 2878/2708
+f 2875/2706 2878/2708 723/3215
+f 2870/2701 723/3215 2878/2708
+f 1805/2711 2881/2712 79/3142
+f 2485/2202 79/3142 2881/2712
+f 2880/2713 2881/2712 725/3216
+f 2879/2710 725/3216 2881/2712
+f 2884/2715 1812/2718 2882/2717
+f 105/3217 2882/2717 1812/2718
+f 725/3216 2879/2710 2883/2714
+f 2884/2715 2883/2714 2879/2710
+f 2855/2681 2887/2720 186/3210
+f 2886/2721 186/3210 2887/2720
+f 2883/2714 2887/2720 725/3216
+f 2885/2719 725/3216 2887/2720
+f 128/3030 2490/2209 2852/2678
+f 2888/2722 2852/2678 2490/2209
+f 725/3216 2885/2719 2880/2713
+f 2888/2722 2880/2713 2885/2719
+f 2836/2658 186/3210 2892/2726
+f 2889/2725 2892/2726 186/3210
+f 2891/2727 2892/2726 729/3218
+f 2890/2724 729/3218 2892/2726
+f 2896/2729 2895/2732 2893/2731
+f 188/3219 2893/2731 2895/2732
+f 2896/2729 2894/2728 2890/2724
+f 729/3218 2890/2724 2894/2728
+f 2900/2734 2899/2737 2897/2736
+f 187/3220 2897/2736 2899/2737
+f 729/3218 2894/2728 2898/2733
+f 2900/2734 2898/2733 2894/2728
+f 185/3211 2841/2665 2901/2739
+f 2902/2738 2901/2739 2841/2665
+f 2902/2738 2891/2727 2898/2733
+f 729/3218 2898/2733 2891/2727
+f 185/3211 2901/2739 2877/2707
+f 2905/2740 2877/2707 2901/2739
+f 731/3221 2904/2742 2903/2741
+f 2905/2740 2903/2741 2904/2742
+f 2906/2745 2908/2746 187/3220
+f 2897/2736 187/3220 2908/2746
+f 2903/2741 2908/2746 731/3221
+f 2907/2744 731/3221 2908/2746
+f 108/3222 1837/2750 2910/2751
+f 2911/2748 2910/2751 1837/2750
+f 731/3221 2907/2744 2909/2747
+f 2911/2748 2909/2747 2907/2744
+f 2871/2699 2912/2752 104/3214
+f 1832/2753 104/3214 2912/2752
+f 2909/2747 2912/2752 731/3221
+f 2904/2742 731/3221 2912/2752
+f 1843/2756 2915/2757 105/3217
+f 2882/2717 105/3217 2915/2757
+f 2913/2755 733/3223 2915/2757
+f 2914/2758 2915/2757 733/3223
+f 109/3224 2916/2762 1850/2763
+f 2918/2760 1850/2763 2916/2762
+f 2918/2760 2917/2759 2913/2755
+f 733/3223 2913/2755 2917/2759
+f 2920/2766 188/3219 2921/2764
+f 2895/2732 2921/2764 188/3219
+f 2917/2759 2921/2764 733/3223
+f 2919/2765 733/3223 2921/2764
+f 2922/2767 2889/2725 2886/2721
+f 186/3210 2886/2721 2889/2725
+f 733/3223 2919/2765 2914/2758
+f 2922/2767 2914/2758 2919/2765
+f 2925/2769 2906/2745 2899/2737
+f 187/3220 2899/2737 2906/2745
+f 734/3225 2924/2770 2923/2768
+f 2925/2769 2923/2768 2924/2770
+f 2927/2772 2893/2731 2920/2766
+f 188/3219 2920/2766 2893/2731
+f 734/3225 2923/2768 2926/2771
+f 2927/2772 2926/2771 2923/2768
+f 1860/2775 2929/2776 109/3224
+f 2916/2762 109/3224 2929/2776
+f 2926/2771 2929/2776 734/3225
+f 2928/2774 734/3225 2929/2776
+f 2910/2751 2930/2777 108/3222
+f 1856/2778 108/3222 2930/2777
+f 2928/2774 2930/2777 734/3225
+f 2924/2770 734/3225 2930/2777
diff --git a/pytorch3d/tests/data/obj_mtl_no_image/model.mtl b/pytorch3d/tests/data/obj_mtl_no_image/model.mtl
new file mode 100644
index 0000000000000000000000000000000000000000..ddb3197ce4c4b4f685381f296cc6cc44fd19a3ad
--- /dev/null
+++ b/pytorch3d/tests/data/obj_mtl_no_image/model.mtl
@@ -0,0 +1,7 @@
+# Material Count: 1
+
+newmtl material_1
+Ns 96.078431
+Ka 0.000000 0.000000 0.000000
+Kd 0.500000 0.000000 0.000000
+Ks 0.500000 0.500000 0.500000
diff --git a/pytorch3d/tests/data/obj_mtl_no_image/model.obj b/pytorch3d/tests/data/obj_mtl_no_image/model.obj
new file mode 100644
index 0000000000000000000000000000000000000000..cf411442fd4d6e5364e0e49aea0e3bbd570d6b06
--- /dev/null
+++ b/pytorch3d/tests/data/obj_mtl_no_image/model.obj
@@ -0,0 +1,10 @@
+
+mtllib model.mtl
+
+v 0.1 0.2 0.3
+v 0.2 0.3 0.4
+v 0.3 0.4 0.5
+v 0.4 0.5 0.6
+usemtl material_1
+f 1 2 3
+f 1 2 4
diff --git a/pytorch3d/tests/data/real_boxes.pkl b/pytorch3d/tests/data/real_boxes.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..36d3ca1601d1d9adbe28a992797ff2817ffd7ec6
--- /dev/null
+++ b/pytorch3d/tests/data/real_boxes.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4ada3bb60348a76bf9af95b8901d76ebbe1f97f1e917179dc2ccf37f2f4760cd
+size 1428
diff --git a/pytorch3d/tests/data/test_marching_cubes_data/double_ellipsoid.pickle b/pytorch3d/tests/data/test_marching_cubes_data/double_ellipsoid.pickle
new file mode 100644
index 0000000000000000000000000000000000000000..c8cf127cc5aac73660e18492963635c2c7c5dd7c
--- /dev/null
+++ b/pytorch3d/tests/data/test_marching_cubes_data/double_ellipsoid.pickle
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a9bdf03c6c7e3b949cbe9a2fd76ad41414ba42f7f3243ad075679a13bf153cd3
+size 233311
diff --git a/pytorch3d/tests/data/test_marching_cubes_data/sphere_level64.pickle b/pytorch3d/tests/data/test_marching_cubes_data/sphere_level64.pickle
new file mode 100644
index 0000000000000000000000000000000000000000..be41b99c7a051da614d009f56ce445d90c0b09c9
--- /dev/null
+++ b/pytorch3d/tests/data/test_marching_cubes_data/sphere_level64.pickle
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9eda4a7e251736520fa05b4598ccad2362161f06401fee18bc2ae1851959f69d
+size 70111
diff --git a/pytorch3d/tests/data/uvs.ply b/pytorch3d/tests/data/uvs.ply
new file mode 100644
index 0000000000000000000000000000000000000000..2e8532e1edd6cd583c6ba83fe6309b73b39c9c7e
--- /dev/null
+++ b/pytorch3d/tests/data/uvs.ply
@@ -0,0 +1,28 @@
+ply
+format ascii 1.0
+comment made by Greg Turk
+comment this file is a cube
+comment TextureFile test_nd_sphere.png
+element vertex 8
+property float x
+property float y
+property float z
+property float texture_u
+property float texture_v
+element face 6
+property list uchar int vertex_index
+end_header
+0 0 0 0 0
+0 0 1 0.2 0.3
+0 1 1 0.2 0.3
+0 1 0 0.2 0.3
+1 0 0 0.2 0.3
+1 0 1 0.2 0.3
+1 1 1 0.2 0.3
+1 1 0 0.4 0.5
+4 0 1 2 3
+4 7 6 5 4
+4 0 4 5 1
+4 1 5 6 2
+4 2 6 7 3
+4 3 7 4 0
diff --git a/pytorch3d/tests/implicitron/__init__.py b/pytorch3d/tests/implicitron/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..2e41cd717f6a439a9c08d76a9d0e4a54e190fc5a
--- /dev/null
+++ b/pytorch3d/tests/implicitron/__init__.py
@@ -0,0 +1,5 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
diff --git a/pytorch3d/tests/implicitron/common_resources.py b/pytorch3d/tests/implicitron/common_resources.py
new file mode 100644
index 0000000000000000000000000000000000000000..d8da89f62aefc3f586a86456b0891e09a9fe14fe
--- /dev/null
+++ b/pytorch3d/tests/implicitron/common_resources.py
@@ -0,0 +1,161 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import contextlib
+import logging
+import os
+import tempfile
+import unittest
+from pathlib import Path
+from typing import Generator, Tuple
+from zipfile import ZipFile
+
+from iopath.common.file_io import PathManager
+
+
+CO3D_MANIFOLD_PATH: str = "manifold://co3d/tree/extracted"
+CO3DV2_MANIFOLD_PATH: str = "manifold://co3d/tree/v2/extracted"
+
+INSIDE_RE_WORKER: bool = os.environ.get("INSIDE_RE_WORKER", False)
+
+
+def get_path_manager(silence_logs: bool = False) -> PathManager:
+    """
+    Returns a path manager which can access manifold internally.
+
+    Args:
+        silence_logs: Whether to reduce log output from iopath library.
+    """
+    if silence_logs:
+        logging.getLogger("iopath.fb.manifold").setLevel(logging.CRITICAL)
+        logging.getLogger("iopath.common.file_io").setLevel(logging.CRITICAL)
+
+    if INSIDE_RE_WORKER:
+        raise ValueError("Cannot get to manifold from RE")
+
+    path_manager = PathManager()
+
+    if os.environ.get("FB_TEST", False):
+        from iopath.fb.manifold import ManifoldPathHandler
+
+        path_manager.register_handler(ManifoldPathHandler())
+
+    return path_manager
+
+
+@contextlib.contextmanager
+def get_skateboard_data(
+    avoid_manifold: bool = False, silence_logs: bool = False
+) -> Generator[Tuple[str, PathManager], None, None]:
+    """
+    Context manager for accessing Co3D dataset by tests, at least for
+    the first 5 skateboards. Internally, we want this to exercise the
+    normal way to access the data directly manifold, but on an RE
+    worker this is impossible so we use a workaround.
+
+    Args:
+        avoid_manifold: Use the method used by RE workers even locally.
+        silence_logs: Whether to reduce log output from iopath library.
+
+    Yields:
+        dataset_root: (str) path to dataset root.
+        path_manager: path_manager to access it with.
+    """
+    if silence_logs:
+        logging.getLogger("iopath.fb.manifold").setLevel(logging.CRITICAL)
+        logging.getLogger("iopath.common.file_io").setLevel(logging.CRITICAL)
+
+    if not os.environ.get("FB_TEST", False):
+        if os.getenv("FAIR_ENV_CLUSTER", "") == "":
+            raise unittest.SkipTest("Unknown environment. Data not available.")
+        yield "/datasets01/co3d/081922", PathManager()
+
+    elif avoid_manifold or INSIDE_RE_WORKER:
+        from libfb.py.parutil import get_file_path
+
+        par_path = "skateboard_first_5"
+        source = get_file_path(par_path)
+        assert Path(source).is_file()
+        with tempfile.TemporaryDirectory() as dest:
+            with ZipFile(source) as f:
+                f.extractall(dest)
+            yield os.path.join(dest, "extracted"), PathManager()
+    else:
+        yield CO3D_MANIFOLD_PATH, get_path_manager()
+
+
+def _provide_torchvision_weights(par_path: str, filename: str) -> None:
+    """
+    Ensure the weights files are available for a torchvision model.
+    """
+    # In OSS, torchvision looks for vgg16 weights in
+    #   https://download.pytorch.org/models/vgg16-397923af.pth
+    # Inside fbcode, this is replaced by asking iopath for
+    #   manifold://torchvision/tree/models/vgg16-397923af.pth
+    # (the code for this replacement is in
+    #    fbcode/pytorch/vision/fb/_internally_replaced_utils.py )
+    #
+    # iopath does this by looking for the file at the cache location
+    # and if it is not there getting it from manifold.
+    # (the code for this is in
+    #    fbcode/fair_infra/data/iopath/iopath/fb/manifold.py )
+    #
+    # On the remote execution worker, manifold is inaccessible.
+    # We solve this by making the cached file available before iopath
+    # looks.
+    #
+    # By default the cache location is
+    #   ~/.torch/iopath_cache/manifold_cache/tree/models/vgg16-397923af.pth
+    # But we can't write to the home directory on the RE worker.
+    # We define FVCORE_CACHE to change the cache location to
+    #  iopath_cache/manifold_cache/tree/models/vgg16-397923af.pth
+    # (Without it, manifold caches in unstable temporary locations on RE.)
+    #
+    # The file we want has been copied from
+    #    tree/models/vgg16-397923af.pth in the torchvision bucket
+    # to
+    #    tree/testing/vgg16-397923af.pth in the co3d bucket
+    # and the TARGETS file copies it somewhere in the PAR which we
+    # recover with get_file_path.
+    # (It can't copy straight to a nested location, see
+    #    https://fb.workplace.com/groups/askbuck/posts/2644615728920359/)
+    # Here we symlink it to the new cache location.
+    if INSIDE_RE_WORKER:
+        from libfb.py.parutil import get_file_path
+
+        os.environ["FVCORE_CACHE"] = "iopath_cache"
+
+        source = Path(get_file_path(par_path))
+        assert source.is_file()
+
+        dest = Path("iopath_cache/manifold_cache/tree/models")
+        if not dest.exists():
+            dest.mkdir(parents=True)
+
+        if not (dest / filename).is_symlink():
+            try:
+                (dest / filename).symlink_to(source)
+            except FileExistsError:
+                print("FileExistsError: no symlink created.")
+
+
+def provide_lpips_vgg() -> None:
+    """
+    Ensure the weights files are available for lpips.LPIPS(net="vgg")
+    to be called. Specifically, torchvision's vgg16.
+    """
+    _provide_torchvision_weights("vgg_weights_for_lpips", "vgg16-397923af.pth")
+
+
+def provide_resnet34() -> None:
+    """
+    Ensure the weights files are available for
+
+        torchvision.models.resnet34(pretrained=True)
+
+    to be called.
+    """
+    _provide_torchvision_weights("resnet34_weights", "resnet34-b627a593.pth")
diff --git a/pytorch3d/tests/implicitron/data/data_source.yaml b/pytorch3d/tests/implicitron/data/data_source.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a444309cd99bf7300d07f9d39b37cc800105ecbf
--- /dev/null
+++ b/pytorch3d/tests/implicitron/data/data_source.yaml
@@ -0,0 +1,131 @@
+dataset_map_provider_class_type: ???
+data_loader_map_provider_class_type: SequenceDataLoaderMapProvider
+dataset_map_provider_BlenderDatasetMapProvider_args:
+  base_dir: ???
+  object_name: ???
+  path_manager_factory_class_type: PathManagerFactory
+  n_known_frames_for_test: null
+  path_manager_factory_PathManagerFactory_args:
+    silence_logs: true
+dataset_map_provider_JsonIndexDatasetMapProvider_args:
+  category: ???
+  task_str: singlesequence
+  dataset_root: ''
+  n_frames_per_sequence: -1
+  test_on_train: false
+  restrict_sequence_name: []
+  test_restrict_sequence_id: -1
+  assert_single_seq: false
+  only_test_set: false
+  dataset_class_type: JsonIndexDataset
+  path_manager_factory_class_type: PathManagerFactory
+  dataset_JsonIndexDataset_args:
+    limit_to: 0
+    limit_sequences_to: 0
+    exclude_sequence: []
+    limit_category_to: []
+    load_images: true
+    load_depths: true
+    load_depth_masks: true
+    load_masks: true
+    load_point_clouds: false
+    max_points: 0
+    mask_images: false
+    mask_depths: false
+    image_height: 800
+    image_width: 800
+    box_crop: true
+    box_crop_mask_thr: 0.4
+    box_crop_context: 0.3
+    remove_empty_masks: true
+    seed: 0
+    sort_frames: false
+  path_manager_factory_PathManagerFactory_args:
+    silence_logs: true
+dataset_map_provider_JsonIndexDatasetMapProviderV2_args:
+  category: ???
+  subset_name: ???
+  dataset_root: ''
+  test_on_train: false
+  only_test_set: false
+  load_eval_batches: true
+  num_load_workers: 4
+  n_known_frames_for_test: 0
+  dataset_class_type: JsonIndexDataset
+  path_manager_factory_class_type: PathManagerFactory
+  dataset_JsonIndexDataset_args:
+    limit_to: 0
+    limit_sequences_to: 0
+    pick_sequence: []
+    exclude_sequence: []
+    limit_category_to: []
+    load_images: true
+    load_depths: true
+    load_depth_masks: true
+    load_masks: true
+    load_point_clouds: false
+    max_points: 0
+    mask_images: false
+    mask_depths: false
+    image_height: 800
+    image_width: 800
+    box_crop: true
+    box_crop_mask_thr: 0.4
+    box_crop_context: 0.3
+    remove_empty_masks: true
+    n_frames_per_sequence: -1
+    seed: 0
+    sort_frames: false
+  path_manager_factory_PathManagerFactory_args:
+    silence_logs: true
+dataset_map_provider_LlffDatasetMapProvider_args:
+  base_dir: ???
+  object_name: ???
+  path_manager_factory_class_type: PathManagerFactory
+  n_known_frames_for_test: null
+  path_manager_factory_PathManagerFactory_args:
+    silence_logs: true
+  downscale_factor: 4
+dataset_map_provider_RenderedMeshDatasetMapProvider_args:
+  num_views: 40
+  data_file: null
+  azimuth_range: 180.0
+  distance: 2.7
+  resolution: 128
+  use_point_light: true
+  gpu_idx: 0
+  path_manager_factory_class_type: PathManagerFactory
+  path_manager_factory_PathManagerFactory_args:
+    silence_logs: true
+data_loader_map_provider_SequenceDataLoaderMapProvider_args:
+  batch_size: 1
+  num_workers: 0
+  dataset_length_train: 0
+  dataset_length_val: 0
+  dataset_length_test: 0
+  train_conditioning_type: SAME
+  val_conditioning_type: SAME
+  test_conditioning_type: KNOWN
+  images_per_seq_options: []
+  sample_consecutive_frames: false
+  consecutive_frames_max_gap: 0
+  consecutive_frames_max_gap_seconds: 0.1
+data_loader_map_provider_SimpleDataLoaderMapProvider_args:
+  batch_size: 1
+  num_workers: 0
+  dataset_length_train: 0
+  dataset_length_val: 0
+  dataset_length_test: 0
+data_loader_map_provider_TrainEvalDataLoaderMapProvider_args:
+  batch_size: 1
+  num_workers: 0
+  dataset_length_train: 0
+  dataset_length_val: 0
+  dataset_length_test: 0
+  train_conditioning_type: SAME
+  val_conditioning_type: SAME
+  test_conditioning_type: KNOWN
+  images_per_seq_options: []
+  sample_consecutive_frames: false
+  consecutive_frames_max_gap: 0
+  consecutive_frames_max_gap_seconds: 0.1
diff --git a/pytorch3d/tests/implicitron/data/overrides.yaml b/pytorch3d/tests/implicitron/data/overrides.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..95899d898b9d05aada3ed22b87058c9ade4e93fc
--- /dev/null
+++ b/pytorch3d/tests/implicitron/data/overrides.yaml
@@ -0,0 +1,126 @@
+log_vars:
+- loss_rgb_psnr_fg
+- loss_rgb_psnr
+- loss_rgb_mse
+- loss_rgb_huber
+- loss_depth_abs
+- loss_depth_abs_fg
+- loss_mask_neg_iou
+- loss_mask_bce
+- loss_mask_beta_prior
+- loss_eikonal
+- loss_density_tv
+- loss_depth_neg_penalty
+- loss_autodecoder_norm
+- loss_prev_stage_rgb_mse
+- loss_prev_stage_rgb_psnr_fg
+- loss_prev_stage_rgb_psnr
+- loss_prev_stage_mask_bce
+- objective
+- epoch
+- sec/it
+mask_images: true
+mask_depths: true
+render_image_width: 400
+render_image_height: 400
+mask_threshold: 0.5
+output_rasterized_mc: false
+bg_color:
+- 0.0
+- 0.0
+- 0.0
+num_passes: 1
+chunk_size_grid: 4096
+render_features_dimensions: 3
+tqdm_trigger_threshold: 16
+n_train_target_views: 1
+sampling_mode_training: mask_sample
+sampling_mode_evaluation: full_grid
+global_encoder_class_type: SequenceAutodecoder
+raysampler_class_type: AdaptiveRaySampler
+renderer_class_type: LSTMRenderer
+image_feature_extractor_class_type: ResNetFeatureExtractor
+view_pooler_enabled: true
+implicit_function_class_type: IdrFeatureField
+view_metrics_class_type: ViewMetrics
+regularization_metrics_class_type: RegularizationMetrics
+loss_weights:
+  loss_rgb_mse: 1.0
+  loss_prev_stage_rgb_mse: 1.0
+  loss_mask_bce: 0.0
+  loss_prev_stage_mask_bce: 0.0
+global_encoder_SequenceAutodecoder_args:
+  autodecoder_args:
+    encoding_dim: 0
+    n_instances: 1
+    init_scale: 1.0
+    ignore_input: false
+raysampler_AdaptiveRaySampler_args:
+  n_pts_per_ray_training: 64
+  n_pts_per_ray_evaluation: 64
+  n_rays_per_image_sampled_from_mask: 1024
+  n_rays_total_training: null
+  stratified_point_sampling_training: true
+  stratified_point_sampling_evaluation: false
+  cast_ray_bundle_as_cone: false
+  scene_extent: 8.0
+  scene_center:
+  - 0.0
+  - 0.0
+  - 0.0
+renderer_LSTMRenderer_args:
+  num_raymarch_steps: 10
+  init_depth: 17.0
+  init_depth_noise_std: 0.0005
+  hidden_size: 16
+  n_feature_channels: 256
+  bg_color: null
+  verbose: false
+image_feature_extractor_ResNetFeatureExtractor_args:
+  name: resnet34
+  pretrained: true
+  stages:
+  - 1
+  - 2
+  - 3
+  - 4
+  normalize_image: true
+  image_rescale: 0.16
+  first_max_pool: true
+  proj_dim: 32
+  l2_norm: true
+  add_masks: true
+  add_images: true
+  global_average_pool: false
+  feature_rescale: 1.0
+view_pooler_args:
+  feature_aggregator_class_type: AngleWeightedIdentityFeatureAggregator
+  view_sampler_args:
+    masked_sampling: false
+    sampling_mode: bilinear
+  feature_aggregator_AngleWeightedIdentityFeatureAggregator_args:
+    exclude_target_view: true
+    exclude_target_view_mask_features: true
+    concatenate_output: true
+    weight_by_ray_angle_gamma: 1.0
+    min_ray_angle_weight: 0.1
+implicit_function_IdrFeatureField_args:
+  d_in: 3
+  d_out: 1
+  dims:
+  - 512
+  - 512
+  - 512
+  - 512
+  - 512
+  - 512
+  - 512
+  - 512
+  geometric_init: true
+  bias: 1.0
+  skip_in: []
+  weight_norm: true
+  n_harmonic_functions_xyz: 1729
+  pooled_feature_dim: 0
+view_metrics_ViewMetrics_args: {}
+regularization_metrics_RegularizationMetrics_args: {}
diff --git a/pytorch3d/tests/implicitron/data/sql_dataset/set_lists_100.json b/pytorch3d/tests/implicitron/data/sql_dataset/set_lists_100.json
new file mode 100644
index 0000000000000000000000000000000000000000..96dbe2b4010e4db93f3d7fd0bf84145691c9cee0
--- /dev/null
+++ b/pytorch3d/tests/implicitron/data/sql_dataset/set_lists_100.json
@@ -0,0 +1 @@
+{"train": [["cat0_seq0", 0, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat0_seq0/frame000000.jpg"], ["cat0_seq0", 2, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat0_seq0/frame000002.jpg"], ["cat0_seq0", 4, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat0_seq0/frame000004.jpg"], ["cat0_seq0", 6, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat0_seq0/frame000006.jpg"], ["cat0_seq0", 8, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat0_seq0/frame000008.jpg"], ["cat0_seq1", 0, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat0_seq1/frame000000.jpg"], ["cat0_seq1", 2, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat0_seq1/frame000002.jpg"], ["cat0_seq1", 4, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat0_seq1/frame000004.jpg"], ["cat0_seq1", 6, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat0_seq1/frame000006.jpg"], ["cat0_seq1", 8, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat0_seq1/frame000008.jpg"], ["cat0_seq2", 0, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat0_seq2/frame000000.jpg"], ["cat0_seq2", 2, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat0_seq2/frame000002.jpg"], ["cat0_seq2", 4, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat0_seq2/frame000004.jpg"], ["cat0_seq2", 6, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat0_seq2/frame000006.jpg"], ["cat0_seq2", 8, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat0_seq2/frame000008.jpg"], ["cat0_seq3", 0, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat0_seq3/frame000000.jpg"], ["cat0_seq3", 2, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat0_seq3/frame000002.jpg"], ["cat0_seq3", 4, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat0_seq3/frame000004.jpg"], ["cat0_seq3", 6, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat0_seq3/frame000006.jpg"], ["cat0_seq3", 8, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat0_seq3/frame000008.jpg"], ["cat0_seq4", 0, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat0_seq4/frame000000.jpg"], ["cat0_seq4", 2, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat0_seq4/frame000002.jpg"], ["cat0_seq4", 4, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat0_seq4/frame000004.jpg"], ["cat0_seq4", 6, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat0_seq4/frame000006.jpg"], ["cat0_seq4", 8, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat0_seq4/frame000008.jpg"], ["cat1_seq0", 0, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat1_seq0/frame000000.jpg"], ["cat1_seq0", 2, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat1_seq0/frame000002.jpg"], ["cat1_seq0", 4, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat1_seq0/frame000004.jpg"], ["cat1_seq0", 6, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat1_seq0/frame000006.jpg"], ["cat1_seq0", 8, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat1_seq0/frame000008.jpg"], ["cat1_seq1", 0, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat1_seq1/frame000000.jpg"], ["cat1_seq1", 2, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat1_seq1/frame000002.jpg"], ["cat1_seq1", 4, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat1_seq1/frame000004.jpg"], ["cat1_seq1", 6, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat1_seq1/frame000006.jpg"], ["cat1_seq1", 8, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat1_seq1/frame000008.jpg"], ["cat1_seq2", 0, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat1_seq2/frame000000.jpg"], ["cat1_seq2", 2, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat1_seq2/frame000002.jpg"], ["cat1_seq2", 4, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat1_seq2/frame000004.jpg"], ["cat1_seq2", 6, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat1_seq2/frame000006.jpg"], ["cat1_seq2", 8, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat1_seq2/frame000008.jpg"], ["cat1_seq3", 0, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat1_seq3/frame000000.jpg"], ["cat1_seq3", 2, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat1_seq3/frame000002.jpg"], ["cat1_seq3", 4, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat1_seq3/frame000004.jpg"], ["cat1_seq3", 6, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat1_seq3/frame000006.jpg"], ["cat1_seq3", 8, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat1_seq3/frame000008.jpg"], ["cat1_seq4", 0, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat1_seq4/frame000000.jpg"], ["cat1_seq4", 2, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat1_seq4/frame000002.jpg"], ["cat1_seq4", 4, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat1_seq4/frame000004.jpg"], ["cat1_seq4", 6, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat1_seq4/frame000006.jpg"], ["cat1_seq4", 8, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat1_seq4/frame000008.jpg"]], "test": [["cat0_seq0", 1, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat0_seq0/frame000001.jpg"], ["cat0_seq0", 3, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat0_seq0/frame000003.jpg"], ["cat0_seq0", 5, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat0_seq0/frame000005.jpg"], ["cat0_seq0", 7, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat0_seq0/frame000007.jpg"], ["cat0_seq0", 9, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat0_seq0/frame000009.jpg"], ["cat0_seq1", 1, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat0_seq1/frame000001.jpg"], ["cat0_seq1", 3, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat0_seq1/frame000003.jpg"], ["cat0_seq1", 5, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat0_seq1/frame000005.jpg"], ["cat0_seq1", 7, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat0_seq1/frame000007.jpg"], ["cat0_seq1", 9, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat0_seq1/frame000009.jpg"], ["cat0_seq2", 1, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat0_seq2/frame000001.jpg"], ["cat0_seq2", 3, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat0_seq2/frame000003.jpg"], ["cat0_seq2", 5, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat0_seq2/frame000005.jpg"], ["cat0_seq2", 7, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat0_seq2/frame000007.jpg"], ["cat0_seq2", 9, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat0_seq2/frame000009.jpg"], ["cat0_seq3", 1, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat0_seq3/frame000001.jpg"], ["cat0_seq3", 3, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat0_seq3/frame000003.jpg"], ["cat0_seq3", 5, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat0_seq3/frame000005.jpg"], ["cat0_seq3", 7, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat0_seq3/frame000007.jpg"], ["cat0_seq3", 9, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat0_seq3/frame000009.jpg"], ["cat0_seq4", 1, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat0_seq4/frame000001.jpg"], ["cat0_seq4", 3, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat0_seq4/frame000003.jpg"], ["cat0_seq4", 5, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat0_seq4/frame000005.jpg"], ["cat0_seq4", 7, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat0_seq4/frame000007.jpg"], ["cat0_seq4", 9, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat0_seq4/frame000009.jpg"], ["cat1_seq0", 1, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat1_seq0/frame000001.jpg"], ["cat1_seq0", 3, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat1_seq0/frame000003.jpg"], ["cat1_seq0", 5, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat1_seq0/frame000005.jpg"], ["cat1_seq0", 7, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat1_seq0/frame000007.jpg"], ["cat1_seq0", 9, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat1_seq0/frame000009.jpg"], ["cat1_seq1", 1, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat1_seq1/frame000001.jpg"], ["cat1_seq1", 3, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat1_seq1/frame000003.jpg"], ["cat1_seq1", 5, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat1_seq1/frame000005.jpg"], ["cat1_seq1", 7, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat1_seq1/frame000007.jpg"], ["cat1_seq1", 9, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat1_seq1/frame000009.jpg"], ["cat1_seq2", 1, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat1_seq2/frame000001.jpg"], ["cat1_seq2", 3, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat1_seq2/frame000003.jpg"], ["cat1_seq2", 5, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat1_seq2/frame000005.jpg"], ["cat1_seq2", 7, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat1_seq2/frame000007.jpg"], ["cat1_seq2", 9, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat1_seq2/frame000009.jpg"], ["cat1_seq3", 1, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat1_seq3/frame000001.jpg"], ["cat1_seq3", 3, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat1_seq3/frame000003.jpg"], ["cat1_seq3", 5, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat1_seq3/frame000005.jpg"], ["cat1_seq3", 7, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat1_seq3/frame000007.jpg"], ["cat1_seq3", 9, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat1_seq3/frame000009.jpg"], ["cat1_seq4", 1, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat1_seq4/frame000001.jpg"], ["cat1_seq4", 3, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat1_seq4/frame000003.jpg"], ["cat1_seq4", 5, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat1_seq4/frame000005.jpg"], ["cat1_seq4", 7, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat1_seq4/frame000007.jpg"], ["cat1_seq4", 9, "kfcdtsiagiruwsuplqemogkmqyqhfvpwbvdrikpjlnegagzxhwxrguehparmirtk/cat1_seq4/frame000009.jpg"]]}
\ No newline at end of file
diff --git a/pytorch3d/tests/implicitron/data/sql_dataset/sql_dataset_100.sqlite b/pytorch3d/tests/implicitron/data/sql_dataset/sql_dataset_100.sqlite
new file mode 100644
index 0000000000000000000000000000000000000000..2d8ea3b2bb10d14175b1f8d87a4cebffa0210410
Binary files /dev/null and b/pytorch3d/tests/implicitron/data/sql_dataset/sql_dataset_100.sqlite differ
diff --git a/pytorch3d/tests/implicitron/models/__init__.py b/pytorch3d/tests/implicitron/models/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..2e41cd717f6a439a9c08d76a9d0e4a54e190fc5a
--- /dev/null
+++ b/pytorch3d/tests/implicitron/models/__init__.py
@@ -0,0 +1,5 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
diff --git a/pytorch3d/tests/implicitron/models/test_overfit_model.py b/pytorch3d/tests/implicitron/models/test_overfit_model.py
new file mode 100644
index 0000000000000000000000000000000000000000..8012e214c3cd8fadfa443883cc15f5f80a7ecc7e
--- /dev/null
+++ b/pytorch3d/tests/implicitron/models/test_overfit_model.py
@@ -0,0 +1,217 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+from typing import Any, Dict
+from unittest.mock import patch
+
+import torch
+from pytorch3d.implicitron.models.generic_model import GenericModel
+from pytorch3d.implicitron.models.overfit_model import OverfitModel
+from pytorch3d.implicitron.models.renderer.base import EvaluationMode
+from pytorch3d.implicitron.tools.config import expand_args_fields
+from pytorch3d.renderer.cameras import look_at_view_transform, PerspectiveCameras
+
+DEVICE = torch.device("cuda:0")
+
+
+def _generate_fake_inputs(N: int, H: int, W: int) -> Dict[str, Any]:
+    R, T = look_at_view_transform(azim=torch.rand(N) * 360)
+    return {
+        "camera": PerspectiveCameras(R=R, T=T, device=DEVICE),
+        "fg_probability": torch.randint(
+            high=2, size=(N, 1, H, W), device=DEVICE
+        ).float(),
+        "depth_map": torch.rand((N, 1, H, W), device=DEVICE) + 0.1,
+        "mask_crop": torch.randint(high=2, size=(N, 1, H, W), device=DEVICE).float(),
+        "sequence_name": ["sequence"] * N,
+        "image_rgb": torch.rand((N, 1, H, W), device=DEVICE),
+    }
+
+
+def mock_safe_multinomial(input: torch.Tensor, num_samples: int) -> torch.Tensor:
+    """Return non deterministic indexes to mock safe_multinomial
+
+    Args:
+        input: tensor of shape [B, n] containing non-negative values;
+                rows are interpreted as unnormalized event probabilities
+                in categorical distributions.
+        num_samples: number of samples to take.
+
+    Returns:
+        Tensor of shape [B, num_samples]
+    """
+    batch_size = input.shape[0]
+    return torch.arange(num_samples).repeat(batch_size, 1).to(DEVICE)
+
+
+class TestOverfitModel(unittest.TestCase):
+    def setUp(self):
+        torch.manual_seed(42)
+
+    def test_overfit_model_vs_generic_model_with_batch_size_one(self):
+        """In this test we compare OverfitModel to GenericModel behavior.
+
+        We use a Nerf setup (2 rendering passes).
+
+        OverfitModel is a specific case of GenericModel. Hence, with the same inputs,
+        they should provide the exact same results.
+        """
+        expand_args_fields(OverfitModel)
+        expand_args_fields(GenericModel)
+        batch_size, image_height, image_width = 1, 80, 80
+        assert batch_size == 1
+        overfit_model = OverfitModel(
+            render_image_height=image_height,
+            render_image_width=image_width,
+            coarse_implicit_function_class_type="NeuralRadianceFieldImplicitFunction",
+            # To avoid randomization to compare the outputs of our model
+            # we deactivate the stratified_point_sampling_training
+            raysampler_AdaptiveRaySampler_args={
+                "stratified_point_sampling_training": False
+            },
+            global_encoder_class_type="SequenceAutodecoder",
+            global_encoder_SequenceAutodecoder_args={
+                "autodecoder_args": {
+                    "n_instances": 1000,
+                    "init_scale": 1.0,
+                    "encoding_dim": 64,
+                }
+            },
+        )
+        generic_model = GenericModel(
+            render_image_height=image_height,
+            render_image_width=image_width,
+            n_train_target_views=batch_size,
+            num_passes=2,
+            # To avoid randomization to compare the outputs of our model
+            # we deactivate the stratified_point_sampling_training
+            raysampler_AdaptiveRaySampler_args={
+                "stratified_point_sampling_training": False
+            },
+            global_encoder_class_type="SequenceAutodecoder",
+            global_encoder_SequenceAutodecoder_args={
+                "autodecoder_args": {
+                    "n_instances": 1000,
+                    "init_scale": 1.0,
+                    "encoding_dim": 64,
+                }
+            },
+        )
+
+        # Check if they do share the number of parameters
+        num_params_mvm = sum(p.numel() for p in overfit_model.parameters())
+        num_params_gm = sum(p.numel() for p in generic_model.parameters())
+        self.assertEqual(num_params_mvm, num_params_gm)
+
+        # Adapt the mapping from generic model to overfit model
+        mapping_om_from_gm = {
+            key.replace(
+                "_implicit_functions.0._fn", "coarse_implicit_function"
+            ).replace("_implicit_functions.1._fn", "implicit_function"): val
+            for key, val in generic_model.state_dict().items()
+        }
+        # Copy parameters from generic_model to overfit_model
+        overfit_model.load_state_dict(mapping_om_from_gm)
+
+        overfit_model.to(DEVICE)
+        generic_model.to(DEVICE)
+        inputs_ = _generate_fake_inputs(batch_size, image_height, image_width)
+
+        # training forward pass
+        overfit_model.train()
+        generic_model.train()
+
+        with patch(
+            "pytorch3d.renderer.implicit.raysampling._safe_multinomial",
+            side_effect=mock_safe_multinomial,
+        ):
+            train_preds_om = overfit_model(
+                **inputs_,
+                evaluation_mode=EvaluationMode.TRAINING,
+            )
+            train_preds_gm = generic_model(
+                **inputs_,
+                evaluation_mode=EvaluationMode.TRAINING,
+            )
+
+        self.assertTrue(len(train_preds_om) == len(train_preds_gm))
+
+        self.assertTrue(train_preds_om["objective"].isfinite().item())
+        # We avoid all the randomization and the weights are the same
+        # The objective should be the same
+        self.assertTrue(
+            torch.allclose(train_preds_om["objective"], train_preds_gm["objective"])
+        )
+
+        # Test if the evaluation works
+        overfit_model.eval()
+        generic_model.eval()
+        with torch.no_grad():
+            eval_preds_om = overfit_model(
+                **inputs_,
+                evaluation_mode=EvaluationMode.EVALUATION,
+            )
+            eval_preds_gm = generic_model(
+                **inputs_,
+                evaluation_mode=EvaluationMode.EVALUATION,
+            )
+
+        self.assertEqual(
+            eval_preds_om["images_render"].shape,
+            (batch_size, 3, image_height, image_width),
+        )
+        self.assertTrue(
+            torch.allclose(eval_preds_om["objective"], eval_preds_gm["objective"])
+        )
+        self.assertTrue(
+            torch.allclose(
+                eval_preds_om["images_render"], eval_preds_gm["images_render"]
+            )
+        )
+
+    def test_overfit_model_check_share_weights(self):
+        model = OverfitModel(share_implicit_function_across_passes=True)
+        for p1, p2 in zip(
+            model.implicit_function.parameters(),
+            model.coarse_implicit_function.parameters(),
+        ):
+            self.assertEqual(id(p1), id(p2))
+
+        model.to(DEVICE)
+        inputs_ = _generate_fake_inputs(2, 80, 80)
+        model(**inputs_, evaluation_mode=EvaluationMode.TRAINING)
+
+    def test_overfit_model_check_no_share_weights(self):
+        model = OverfitModel(
+            share_implicit_function_across_passes=False,
+            coarse_implicit_function_class_type="NeuralRadianceFieldImplicitFunction",
+            coarse_implicit_function_NeuralRadianceFieldImplicitFunction_args={
+                "transformer_dim_down_factor": 1.0,
+                "n_hidden_neurons_xyz": 256,
+                "n_layers_xyz": 8,
+                "append_xyz": (5,),
+            },
+        )
+        for p1, p2 in zip(
+            model.implicit_function.parameters(),
+            model.coarse_implicit_function.parameters(),
+        ):
+            self.assertNotEqual(id(p1), id(p2))
+
+        model.to(DEVICE)
+        inputs_ = _generate_fake_inputs(2, 80, 80)
+        model(**inputs_, evaluation_mode=EvaluationMode.TRAINING)
+
+    def test_overfit_model_coarse_implicit_function_is_none(self):
+        model = OverfitModel(
+            share_implicit_function_across_passes=False,
+            coarse_implicit_function_NeuralRadianceFieldImplicitFunction_args=None,
+        )
+        self.assertIsNone(model.coarse_implicit_function)
+        model.to(DEVICE)
+        inputs_ = _generate_fake_inputs(2, 80, 80)
+        model(**inputs_, evaluation_mode=EvaluationMode.TRAINING)
diff --git a/pytorch3d/tests/implicitron/models/test_utils.py b/pytorch3d/tests/implicitron/models/test_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..17c21c0ebecabfafa0d7801a0e06856d2144c091
--- /dev/null
+++ b/pytorch3d/tests/implicitron/models/test_utils.py
@@ -0,0 +1,65 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import unittest
+
+import torch
+
+from pytorch3d.implicitron.models.utils import preprocess_input, weighted_sum_losses
+
+
+class TestUtils(unittest.TestCase):
+    def test_prepare_inputs_wrong_num_dim(self):
+        img = torch.randn(3, 3, 3)
+        text = (
+            "Model received unbatched inputs. "
+            + "Perhaps they came from a FrameData which had not been collated."
+        )
+        with self.assertRaisesRegex(ValueError, text):
+            img, fg_prob, depth_map = preprocess_input(
+                img, None, None, True, True, 0.5, (0.0, 0.0, 0.0)
+            )
+
+    def test_prepare_inputs_mask_image_true(self):
+        batch, channels, height, width = 2, 3, 10, 10
+        img = torch.ones(batch, channels, height, width)
+        # Create a mask on the lower triangular matrix
+        fg_prob = torch.tril(torch.ones(batch, 1, height, width)) * 0.3
+
+        out_img, out_fg_prob, out_depth_map = preprocess_input(
+            img, fg_prob, None, True, False, 0.3, (0.0, 0.0, 0.0)
+        )
+
+        self.assertTrue(torch.equal(out_img, torch.tril(img)))
+        self.assertTrue(torch.equal(out_fg_prob, fg_prob >= 0.3))
+        self.assertIsNone(out_depth_map)
+
+    def test_prepare_inputs_mask_depth_true(self):
+        batch, channels, height, width = 2, 3, 10, 10
+        img = torch.ones(batch, channels, height, width)
+        depth_map = torch.randn(batch, channels, height, width)
+        # Create a mask on the lower triangular matrix
+        fg_prob = torch.tril(torch.ones(batch, 1, height, width)) * 0.3
+
+        out_img, out_fg_prob, out_depth_map = preprocess_input(
+            img, fg_prob, depth_map, False, True, 0.3, (0.0, 0.0, 0.0)
+        )
+
+        self.assertTrue(torch.equal(out_img, img))
+        self.assertTrue(torch.equal(out_fg_prob, fg_prob >= 0.3))
+        self.assertTrue(torch.equal(out_depth_map, torch.tril(depth_map)))
+
+    def test_weighted_sum_losses(self):
+        preds = {"a": torch.tensor(2), "b": torch.tensor(2)}
+        weights = {"a": 2.0, "b": 0.0}
+        loss = weighted_sum_losses(preds, weights)
+        self.assertEqual(loss, 4.0)
+
+    def test_weighted_sum_losses_raise_warning(self):
+        preds = {"a": torch.tensor(2), "b": torch.tensor(2)}
+        weights = {"c": 2.0, "d": 2.0}
+        self.assertIsNone(weighted_sum_losses(preds, weights))
diff --git a/pytorch3d/tests/implicitron/test_batch_sampler.py b/pytorch3d/tests/implicitron/test_batch_sampler.py
new file mode 100644
index 0000000000000000000000000000000000000000..f2ac4a9655f9b0ffcefd72331cf5c8bcca5243f6
--- /dev/null
+++ b/pytorch3d/tests/implicitron/test_batch_sampler.py
@@ -0,0 +1,268 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import unittest
+from collections import defaultdict
+from dataclasses import dataclass
+from itertools import product
+
+import numpy as np
+
+import torch
+from pytorch3d.implicitron.dataset.data_loader_map_provider import (
+    DoublePoolBatchSampler,
+)
+
+from pytorch3d.implicitron.dataset.dataset_base import DatasetBase
+from pytorch3d.implicitron.dataset.frame_data import FrameData
+from pytorch3d.implicitron.dataset.scene_batch_sampler import SceneBatchSampler
+
+
+@dataclass
+class MockFrameAnnotation:
+    frame_number: int
+    sequence_name: str = "sequence"
+    frame_timestamp: float = 0.0
+
+
+class MockDataset(DatasetBase):
+    def __init__(self, num_seq, max_frame_gap=1):
+        """
+        Makes a gap of max_frame_gap frame numbers in the middle of each sequence
+        """
+        self.seq_annots = {f"seq_{i}": None for i in range(num_seq)}
+        self._seq_to_idx = {
+            f"seq_{i}": list(range(i * 10, i * 10 + 10)) for i in range(num_seq)
+        }
+
+        # frame numbers within sequence: [0, ..., 4, n, ..., n+4]
+        # where n - 4 == max_frame_gap
+        frame_nos = list(range(5)) + list(range(4 + max_frame_gap, 9 + max_frame_gap))
+        self.frame_annots = [
+            {"frame_annotation": MockFrameAnnotation(no)} for no in frame_nos * num_seq
+        ]
+        for seq_name, idx in self._seq_to_idx.items():
+            for i in idx:
+                self.frame_annots[i]["frame_annotation"].sequence_name = seq_name
+
+    def get_frame_numbers_and_timestamps(self, idxs, subset_filter=None):
+        assert subset_filter is None
+        out = []
+        for idx in idxs:
+            frame_annotation = self.frame_annots[idx]["frame_annotation"]
+            out.append(
+                (frame_annotation.frame_number, frame_annotation.frame_timestamp)
+            )
+        return out
+
+    def __getitem__(self, index: int):
+        fa = self.frame_annots[index]["frame_annotation"]
+        fd = FrameData(
+            sequence_name=fa.sequence_name,
+            sequence_category="default_category",
+            frame_number=torch.LongTensor([fa.frame_number]),
+            frame_timestamp=torch.LongTensor([fa.frame_timestamp]),
+        )
+        return fd
+
+
+class TestSceneBatchSampler(unittest.TestCase):
+    def setUp(self):
+        np.random.seed(42)
+        self.dataset_overfit = MockDataset(1)
+
+    def test_overfit(self):
+        num_batches = 3
+        batch_size = 10
+        sampler = SceneBatchSampler(
+            self.dataset_overfit,
+            batch_size=batch_size,
+            num_batches=num_batches,
+            images_per_seq_options=[10],  # will try to sample batch_size anyway
+        )
+
+        self.assertEqual(len(sampler), num_batches)
+
+        it = iter(sampler)
+        for _ in range(num_batches):
+            batch = next(it)
+            self.assertIsNotNone(batch)
+            self.assertEqual(len(batch), batch_size)  # true for our examples
+            self.assertTrue(all(idx // 10 == 0 for idx in batch))
+
+        with self.assertRaises(StopIteration):
+            batch = next(it)
+
+    def test_multiseq(self):
+        for ips_options in [[10], [2], [3], [2, 3, 4]]:
+            for sample_consecutive_frames in [True, False]:
+                for consecutive_frames_max_gap in [0, 1, 3]:
+                    self._test_multiseq_flavour(
+                        ips_options,
+                        sample_consecutive_frames,
+                        consecutive_frames_max_gap,
+                    )
+
+    def test_multiseq_gaps(self):
+        num_batches = 16
+        batch_size = 10
+        dataset_multiseq = MockDataset(5, max_frame_gap=3)
+        for ips_options in [[10], [2], [3], [2, 3, 4]]:
+            debug_info = f" Images per sequence: {ips_options}."
+
+            sampler = SceneBatchSampler(
+                dataset_multiseq,
+                batch_size=batch_size,
+                num_batches=num_batches,
+                images_per_seq_options=ips_options,
+                sample_consecutive_frames=True,
+                consecutive_frames_max_gap=1,
+            )
+
+            self.assertEqual(len(sampler), num_batches, msg=debug_info)
+
+            it = iter(sampler)
+            for _ in range(num_batches):
+                batch = next(it)
+                self.assertIsNotNone(batch, "batch is None in" + debug_info)
+                if max(ips_options) > 5:
+                    # true for our examples
+                    self.assertEqual(len(batch), 5, msg=debug_info)
+                else:
+                    # true for our examples
+                    self.assertEqual(len(batch), batch_size, msg=debug_info)
+
+                self._check_frames_are_consecutive(
+                    batch, dataset_multiseq.frame_annots, debug_info
+                )
+
+    def _test_multiseq_flavour(
+        self,
+        ips_options,
+        sample_consecutive_frames,
+        consecutive_frames_max_gap,
+        num_batches=16,
+        batch_size=10,
+    ):
+        debug_info = (
+            f" Images per sequence: {ips_options}, "
+            f"sample_consecutive_frames: {sample_consecutive_frames}, "
+            f"consecutive_frames_max_gap: {consecutive_frames_max_gap}, "
+        )
+        # in this test, either consecutive_frames_max_gap == max_frame_gap,
+        # or consecutive_frames_max_gap == 0, so segments consist of full sequences
+        frame_gap = consecutive_frames_max_gap if consecutive_frames_max_gap > 0 else 3
+        dataset_multiseq = MockDataset(5, max_frame_gap=frame_gap)
+        sampler = SceneBatchSampler(
+            dataset_multiseq,
+            batch_size=batch_size,
+            num_batches=num_batches,
+            images_per_seq_options=ips_options,
+            sample_consecutive_frames=sample_consecutive_frames,
+            consecutive_frames_max_gap=consecutive_frames_max_gap,
+        )
+
+        self.assertEqual(len(sampler), num_batches, msg=debug_info)
+
+        it = iter(sampler)
+        typical_counts = set()
+        for _ in range(num_batches):
+            batch = next(it)
+            self.assertIsNotNone(batch, "batch is None in" + debug_info)
+            # true for our examples
+            self.assertEqual(len(batch), batch_size, msg=debug_info)
+            # find distribution over sequences
+            counts = _count_by_quotient(batch, 10)
+            freqs = _count_by_quotient(counts.values(), 1)
+            self.assertLessEqual(
+                len(freqs),
+                2,
+                msg="We should have maximum of 2 different "
+                "frequences of sequences in the batch." + debug_info,
+            )
+            if len(freqs) == 2:
+                most_seq_count = max(*freqs.keys())
+                last_seq = min(*freqs.keys())
+                self.assertEqual(
+                    freqs[last_seq],
+                    1,
+                    msg="Only one odd sequence allowed." + debug_info,
+                )
+            else:
+                self.assertEqual(len(freqs), 1)
+                most_seq_count = next(iter(freqs))
+
+            self.assertIn(most_seq_count, ips_options)
+            typical_counts.add(most_seq_count)
+
+            if sample_consecutive_frames:
+                self._check_frames_are_consecutive(
+                    batch,
+                    dataset_multiseq.frame_annots,
+                    debug_info,
+                    max_gap=consecutive_frames_max_gap,
+                )
+
+        self.assertTrue(
+            all(i in typical_counts for i in ips_options),
+            "Some of the frequency options did not occur among "
+            f"the {num_batches} batches (could be just bad luck)." + debug_info,
+        )
+
+        with self.assertRaises(StopIteration):
+            batch = next(it)
+
+    def _check_frames_are_consecutive(self, batch, annots, debug_info, max_gap=1):
+        # make sure that sampled frames are consecutive
+        for i in range(len(batch) - 1):
+            curr_idx, next_idx = batch[i : i + 2]
+            if curr_idx // 10 == next_idx // 10:  # same sequence
+                if max_gap > 0:
+                    curr_idx, next_idx = [
+                        annots[idx]["frame_annotation"].frame_number
+                        for idx in (curr_idx, next_idx)
+                    ]
+                    gap = max_gap
+                else:
+                    gap = 1  # we'll check that raw dataset indices are consecutive
+
+                self.assertLessEqual(next_idx - curr_idx, gap, msg=debug_info)
+
+
+def _count_by_quotient(indices, divisor):
+    counter = defaultdict(int)
+    for i in indices:
+        counter[i // divisor] += 1
+
+    return counter
+
+
+class TestRandomSampling(unittest.TestCase):
+    def test_double_pool_batch_sampler(self):
+        unknown_idxs = [2, 3, 4, 5, 8]
+        known_idxs = [2, 9, 10, 11, 12, 13, 14, 15, 16, 17]
+        for replacement, num_batches in product([True, False], [None, 4, 5, 6, 30]):
+            with self.subTest(f"{replacement}, {num_batches}"):
+                sampler = DoublePoolBatchSampler(
+                    first_indices=unknown_idxs,
+                    rest_indices=known_idxs,
+                    batch_size=4,
+                    replacement=replacement,
+                    num_batches=num_batches,
+                )
+                for _ in range(6):
+                    epoch = list(sampler)
+                    self.assertEqual(len(epoch), num_batches or len(unknown_idxs))
+                    for batch in epoch:
+                        self.assertEqual(len(batch), 4)
+                        self.assertIn(batch[0], unknown_idxs)
+                        for i in batch[1:]:
+                            self.assertIn(i, known_idxs)
+                    if not replacement and 4 != num_batches:
+                        self.assertEqual(
+                            {batch[0] for batch in epoch}, set(unknown_idxs)
+                        )
diff --git a/pytorch3d/tests/implicitron/test_bbox.py b/pytorch3d/tests/implicitron/test_bbox.py
new file mode 100644
index 0000000000000000000000000000000000000000..08dc119fe06237df7ab4fed65c57bd5b90957825
--- /dev/null
+++ b/pytorch3d/tests/implicitron/test_bbox.py
@@ -0,0 +1,142 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+
+import numpy as np
+
+import torch
+
+from pytorch3d.implicitron.dataset.utils import (
+    bbox_xywh_to_xyxy,
+    bbox_xyxy_to_xywh,
+    clamp_box_to_image_bounds_and_round,
+    crop_around_box,
+    get_1d_bounds,
+    get_bbox_from_mask,
+    get_clamp_bbox,
+    rescale_bbox,
+    resize_image,
+)
+
+from tests.common_testing import TestCaseMixin
+
+
+class TestBBox(TestCaseMixin, unittest.TestCase):
+    def setUp(self):
+        torch.manual_seed(42)
+
+    def test_bbox_conversion(self):
+        bbox_xywh_list = torch.LongTensor(
+            [
+                [0, 0, 10, 20],
+                [10, 20, 5, 1],
+                [10, 20, 1, 1],
+                [5, 4, 0, 1],
+            ]
+        )
+        for bbox_xywh in bbox_xywh_list:
+            bbox_xyxy = bbox_xywh_to_xyxy(bbox_xywh)
+            bbox_xywh_ = bbox_xyxy_to_xywh(bbox_xyxy)
+            bbox_xyxy_ = bbox_xywh_to_xyxy(bbox_xywh_)
+            self.assertClose(bbox_xywh_, bbox_xywh)
+            self.assertClose(bbox_xyxy, bbox_xyxy_)
+
+    def test_compare_to_expected(self):
+        bbox_xywh_to_xyxy_expected = torch.LongTensor(
+            [
+                [[0, 0, 10, 20], [0, 0, 10, 20]],
+                [[10, 20, 5, 1], [10, 20, 15, 21]],
+                [[10, 20, 1, 1], [10, 20, 11, 21]],
+                [[5, 4, 0, 1], [5, 4, 5, 5]],
+            ]
+        )
+        for bbox_xywh, bbox_xyxy_expected in bbox_xywh_to_xyxy_expected:
+            self.assertClose(bbox_xywh_to_xyxy(bbox_xywh), bbox_xyxy_expected)
+            self.assertClose(bbox_xyxy_to_xywh(bbox_xyxy_expected), bbox_xywh)
+
+        clamp_amnt = 3
+        bbox_xywh_to_xyxy_clamped_expected = torch.LongTensor(
+            [
+                [[0, 0, 10, 20], [0, 0, 10, 20]],
+                [[10, 20, 5, 1], [10, 20, 15, 20 + clamp_amnt]],
+                [[10, 20, 1, 1], [10, 20, 10 + clamp_amnt, 20 + clamp_amnt]],
+                [[5, 4, 0, 1], [5, 4, 5 + clamp_amnt, 4 + clamp_amnt]],
+            ]
+        )
+        for bbox_xywh, bbox_xyxy_expected in bbox_xywh_to_xyxy_clamped_expected:
+            self.assertClose(
+                bbox_xywh_to_xyxy(bbox_xywh, clamp_size=clamp_amnt),
+                bbox_xyxy_expected,
+            )
+
+    def test_mask_to_bbox(self):
+        mask = np.array(
+            [
+                [0, 0, 0, 0, 0, 0],
+                [0, 0, 1, 1, 0, 0],
+                [0, 0, 0, 0, 0, 0],
+            ]
+        ).astype(np.float32)
+        expected_bbox_xywh = [2, 1, 2, 1]
+        bbox_xywh = get_bbox_from_mask(mask, 0.5)
+        self.assertClose(bbox_xywh, expected_bbox_xywh)
+
+    def test_crop_around_box(self):
+        bbox = torch.LongTensor([0, 1, 2, 3])  # (x_min, y_min, x_max, y_max)
+        image = torch.LongTensor(
+            [
+                [0, 0, 10, 20],
+                [10, 20, 5, 1],
+                [10, 20, 1, 1],
+                [5, 4, 0, 1],
+            ]
+        )
+        cropped = crop_around_box(image, bbox)
+        self.assertClose(cropped, image[1:3, 0:2])
+
+    def test_clamp_box_to_image_bounds_and_round(self):
+        bbox = torch.LongTensor([0, 1, 10, 12])
+        image_size = (5, 6)
+        expected_clamped_bbox = torch.LongTensor([0, 1, image_size[1], image_size[0]])
+        clamped_bbox = clamp_box_to_image_bounds_and_round(bbox, image_size)
+        self.assertClose(clamped_bbox, expected_clamped_bbox)
+
+    def test_get_clamp_bbox(self):
+        bbox_xywh = torch.LongTensor([1, 1, 4, 5])
+        clamped_bbox_xyxy = get_clamp_bbox(bbox_xywh, box_crop_context=2)
+        # size multiplied by 2 and added coordinates
+        self.assertClose(clamped_bbox_xyxy, torch.Tensor([-3, -4, 9, 11]))
+
+    def test_rescale_bbox(self):
+        bbox = torch.Tensor([0.0, 1.0, 3.0, 4.0])
+        original_resolution = (4, 4)
+        new_resolution = (8, 8)  # twice bigger
+        rescaled_bbox = rescale_bbox(bbox, original_resolution, new_resolution)
+        self.assertClose(bbox * 2, rescaled_bbox)
+
+    def test_get_1d_bounds(self):
+        array = [0, 1, 2]
+        bounds = get_1d_bounds(array)
+        # make nonzero 1d bounds of image
+        self.assertClose(bounds, [1, 3])
+
+    def test_resize_image(self):
+        image = np.random.rand(3, 300, 500)  # rgb image 300x500
+        expected_shape = (150, 250)
+
+        resized_image, scale, mask_crop = resize_image(
+            image, image_height=expected_shape[0], image_width=expected_shape[1]
+        )
+
+        original_shape = image.shape[-2:]
+        expected_scale = min(
+            expected_shape[0] / original_shape[0], expected_shape[1] / original_shape[1]
+        )
+
+        self.assertEqual(scale, expected_scale)
+        self.assertEqual(resized_image.shape[-2:], expected_shape)
+        self.assertEqual(mask_crop.shape[-2:], expected_shape)
diff --git a/pytorch3d/tests/implicitron/test_build.py b/pytorch3d/tests/implicitron/test_build.py
new file mode 100644
index 0000000000000000000000000000000000000000..646497f5ddc765e7015b31ab396dc635ebb9c1b8
--- /dev/null
+++ b/pytorch3d/tests/implicitron/test_build.py
@@ -0,0 +1,51 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import importlib
+import os
+import sys
+import unittest
+import unittest.mock
+
+from tests.common_testing import get_pytorch3d_dir
+
+
+# This file groups together tests which look at the code without running it.
+class TestBuild(unittest.TestCase):
+    def test_no_import_cycles(self):
+        # Check each module of pytorch3d imports cleanly,
+        # which may fail if there are import cycles.
+
+        with unittest.mock.patch.dict(sys.modules):
+            for module in list(sys.modules):
+                # If any of pytorch3d is already imported,
+                # the test would be pointless.
+                if module.startswith("pytorch3d"):
+                    sys.modules.pop(module, None)
+
+            # torchvision seems to cause problems if re-imported,
+            # so make sure it has been imported here.
+            import torchvision.utils  # noqa
+
+            root_dir = get_pytorch3d_dir() / "pytorch3d"
+            # Exclude opengl-related files, as Implicitron is decoupled from opengl
+            # components which will not work without adding a dep on pytorch3d_opengl.
+            ignored_modules = (
+                "__init__",
+                "plotly_vis",
+                "opengl_utils",
+                "rasterizer_opengl",
+            )
+            if os.environ.get("FB_TEST", False):
+                ignored_modules += ("orm_types", "sql_dataset", "sql_dataset_provider")
+            for module_file in root_dir.glob("**/*.py"):
+                if module_file.stem in ignored_modules:
+                    continue
+                relative_module = str(module_file.relative_to(root_dir))[:-3]
+                module = "pytorch3d." + relative_module.replace("/", ".")
+                with self.subTest(name=module):
+                    with unittest.mock.patch.dict(sys.modules):
+                        importlib.import_module(module)
diff --git a/pytorch3d/tests/implicitron/test_circle_fitting.py b/pytorch3d/tests/implicitron/test_circle_fitting.py
new file mode 100644
index 0000000000000000000000000000000000000000..479e692cadd748fa193f974bb3b942522183055d
--- /dev/null
+++ b/pytorch3d/tests/implicitron/test_circle_fitting.py
@@ -0,0 +1,198 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+from math import pi
+
+import torch
+from pytorch3d.implicitron.tools.circle_fitting import (
+    _signed_area,
+    fit_circle_in_2d,
+    fit_circle_in_3d,
+    get_rotation_to_best_fit_xy,
+)
+from pytorch3d.transforms import random_rotation, random_rotations
+from tests.common_testing import TestCaseMixin
+
+
+class TestCircleFitting(TestCaseMixin, unittest.TestCase):
+    def setUp(self):
+        torch.manual_seed(42)
+
+    def _assertParallel(self, a, b, **kwargs):
+        """
+        Given a and b of shape (..., 3) each containing 3D vectors,
+        assert that correspnding vectors are parallel. Changed sign is ok.
+        """
+        self.assertClose(torch.cross(a, b, dim=-1), torch.zeros_like(a), **kwargs)
+
+    def test_plane_levelling(self):
+        device = torch.device("cuda:0")
+        B = 16
+        N = 1024
+        random = torch.randn((B, N, 3), device=device)
+
+        # first, check that we always return a vaild rotation
+        rot = get_rotation_to_best_fit_xy(random)
+        self.assertClose(rot.det(), torch.ones_like(rot[:, 0, 0]))
+        self.assertClose(rot.norm(dim=-1), torch.ones_like(rot[:, 0]))
+
+        # then, check the result is what we expect
+        z_squeeze = 0.1
+        random[..., -1] *= z_squeeze
+        rot_gt = random_rotations(B, device=device)
+        rotated = random @ rot_gt.transpose(-1, -2)
+        rot_hat = get_rotation_to_best_fit_xy(rotated)
+        self.assertClose(rot.det(), torch.ones_like(rot[:, 0, 0]))
+        self.assertClose(rot.norm(dim=-1), torch.ones_like(rot[:, 0]))
+        # covariance matrix of the levelled points is by design diag(1, 1, z_squeeze²)
+        self.assertClose(
+            (rotated @ rot_hat)[..., -1].std(dim=-1),
+            torch.ones_like(rot_hat[:, 0, 0]) * z_squeeze,
+            rtol=0.1,
+        )
+
+    def test_simple_3d(self):
+        device = torch.device("cuda:0")
+        for _ in range(7):
+            radius = 10 * torch.rand(1, device=device)[0]
+            center = 10 * torch.rand(3, device=device)
+            rot = random_rotation(device=device)
+            offset = torch.rand(3, device=device)
+            up = torch.rand(3, device=device)
+            self._simple_3d_test(radius, center, rot, offset, up)
+
+    def _simple_3d_test(self, radius, center, rot, offset, up):
+        # angles are increasing so the points move in a well defined direction.
+        angles = torch.cumsum(torch.rand(17, device=rot.device), dim=0)
+        many = torch.stack(
+            [torch.cos(angles), torch.sin(angles), torch.zeros_like(angles)], dim=1
+        )
+        source_points = (many * radius) @ rot + center[None]
+
+        # case with no generation
+        result = fit_circle_in_3d(source_points)
+        self.assertClose(result.radius, radius)
+        self.assertClose(result.center, center)
+        self._assertParallel(result.normal, rot[2], atol=1e-5)
+        self.assertEqual(result.generated_points.shape, (0, 3))
+
+        # Generate 5 points around the circle
+        n_new_points = 5
+        result2 = fit_circle_in_3d(source_points, n_points=n_new_points)
+        self.assertClose(result2.radius, radius)
+        self.assertClose(result2.center, center)
+        self.assertClose(result2.normal, result.normal)
+        self.assertEqual(result2.generated_points.shape, (5, 3))
+
+        observed_points = result2.generated_points
+        self.assertClose(observed_points[0], observed_points[4], atol=1e-4)
+        self.assertClose(observed_points[0], source_points[0], atol=1e-5)
+        observed_normal = torch.cross(
+            observed_points[0] - observed_points[2],
+            observed_points[1] - observed_points[3],
+            dim=-1,
+        )
+        self._assertParallel(observed_normal, result.normal, atol=1e-4)
+        diameters = observed_points[:2] - observed_points[2:4]
+        self.assertClose(
+            torch.norm(diameters, dim=1), diameters.new_full((2,), 2 * radius)
+        )
+
+        # Regenerate the input points
+        result3 = fit_circle_in_3d(source_points, angles=angles - angles[0])
+        self.assertClose(result3.radius, radius)
+        self.assertClose(result3.center, center)
+        self.assertClose(result3.normal, result.normal)
+        self.assertClose(result3.generated_points, source_points, atol=1e-5)
+
+        # Test with offset
+        result4 = fit_circle_in_3d(
+            source_points, angles=angles - angles[0], offset=offset, up=up
+        )
+        self.assertClose(result4.radius, radius)
+        self.assertClose(result4.center, center)
+        self.assertClose(result4.normal, result.normal)
+        observed_offsets = result4.generated_points - source_points
+
+        # observed_offset is constant
+        self.assertClose(
+            observed_offsets.min(0).values, observed_offsets.max(0).values, atol=1e-5
+        )
+        # observed_offset has the right length
+        self.assertClose(observed_offsets[0].norm(), offset.norm())
+
+        self.assertClose(result.normal.norm(), torch.ones(()))
+        # component of observed_offset along normal
+        component = torch.dot(observed_offsets[0], result.normal)
+        self.assertClose(component.abs(), offset[2].abs(), atol=1e-5)
+        agree_normal = torch.dot(result.normal, up) > 0
+        agree_signs = component * offset[2] > 0
+        self.assertEqual(agree_normal, agree_signs)
+
+    def test_simple_2d(self):
+        radius = 7.0
+        center = torch.tensor([9, 2.5])
+        angles = torch.cumsum(torch.rand(17), dim=0)
+        many = torch.stack([torch.cos(angles), torch.sin(angles)], dim=1)
+        source_points = (many * radius) + center[None]
+
+        result = fit_circle_in_2d(source_points)
+        self.assertClose(result.radius, torch.tensor(radius))
+        self.assertClose(result.center, center)
+        self.assertEqual(result.generated_points.shape, (0, 2))
+
+        # Generate 5 points around the circle
+        n_new_points = 5
+        result2 = fit_circle_in_2d(source_points, n_points=n_new_points)
+        self.assertClose(result2.radius, torch.tensor(radius))
+        self.assertClose(result2.center, center)
+        self.assertEqual(result2.generated_points.shape, (5, 2))
+
+        observed_points = result2.generated_points
+        self.assertClose(observed_points[0], observed_points[4])
+        self.assertClose(observed_points[0], source_points[0], atol=1e-5)
+        diameters = observed_points[:2] - observed_points[2:4]
+        self.assertClose(torch.norm(diameters, dim=1), torch.full((2,), 2 * radius))
+
+        # Regenerate the input points
+        result3 = fit_circle_in_2d(source_points, angles=angles - angles[0])
+        self.assertClose(result3.radius, torch.tensor(radius))
+        self.assertClose(result3.center, center)
+        self.assertClose(result3.generated_points, source_points, atol=1e-5)
+
+    def test_minimum_inputs(self):
+        fit_circle_in_3d(torch.rand(3, 3), n_points=10)
+
+        with self.assertRaisesRegex(
+            ValueError, "2 points are not enough to determine a circle"
+        ):
+            fit_circle_in_3d(torch.rand(2, 3))
+
+    def test_signed_area(self):
+        n_points = 1001
+        angles = torch.linspace(0, 2 * pi, n_points)
+        radius = 0.85
+        center = torch.rand(2)
+        circle = center + radius * torch.stack(
+            [torch.cos(angles), torch.sin(angles)], dim=1
+        )
+        circle_area = torch.tensor(pi * radius * radius)
+        self.assertClose(_signed_area(circle), circle_area)
+        # clockwise is negative
+        self.assertClose(_signed_area(circle.flip(0)), -circle_area)
+
+        # Semicircles
+        self.assertClose(_signed_area(circle[: (n_points + 1) // 2]), circle_area / 2)
+        self.assertClose(_signed_area(circle[n_points // 2 :]), circle_area / 2)
+
+        # A straight line bounds no area
+        self.assertClose(_signed_area(torch.rand(2, 2)), torch.tensor(0.0))
+
+        # Letter 'L' written anticlockwise.
+        L_shape = [[0, 1], [0, 0], [1, 0]]
+        # Triangle area is 0.5 * b * h.
+        self.assertClose(_signed_area(torch.tensor(L_shape)), torch.tensor(0.5))
diff --git a/pytorch3d/tests/implicitron/test_co3d_sql.py b/pytorch3d/tests/implicitron/test_co3d_sql.py
new file mode 100644
index 0000000000000000000000000000000000000000..7f873cfc3466c483b3025ade5c752fa8bb64d131
--- /dev/null
+++ b/pytorch3d/tests/implicitron/test_co3d_sql.py
@@ -0,0 +1,246 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+import os
+import unittest
+
+import torch
+
+from pytorch3d.implicitron.dataset.data_loader_map_provider import (  # noqa
+    SequenceDataLoaderMapProvider,
+    SimpleDataLoaderMapProvider,
+)
+from pytorch3d.implicitron.dataset.data_source import ImplicitronDataSource
+from pytorch3d.implicitron.dataset.sql_dataset import SqlIndexDataset  # noqa
+from pytorch3d.implicitron.dataset.sql_dataset_provider import (  # noqa
+    SqlIndexDatasetMapProvider,
+)
+from pytorch3d.implicitron.dataset.train_eval_data_loader_provider import (
+    TrainEvalDataLoaderMapProvider,
+)
+from pytorch3d.implicitron.tools.config import get_default_args
+
+logger = logging.getLogger("pytorch3d.implicitron.dataset.sql_dataset")
+sh = logging.StreamHandler()
+logger.addHandler(sh)
+logger.setLevel(logging.DEBUG)
+
+_CO3D_SQL_DATASET_ROOT: str = os.getenv("CO3D_SQL_DATASET_ROOT", "")
+
+
+@unittest.skipUnless(_CO3D_SQL_DATASET_ROOT, "Run only if CO3D is available")
+class TestCo3dSqlDataSource(unittest.TestCase):
+    def test_no_subsets(self):
+        args = get_default_args(ImplicitronDataSource)
+        args.dataset_map_provider_class_type = "SqlIndexDatasetMapProvider"
+        args.data_loader_map_provider_class_type = "TrainEvalDataLoaderMapProvider"
+        provider_args = args.dataset_map_provider_SqlIndexDatasetMapProvider_args
+        provider_args.ignore_subsets = True
+
+        dataset_args = provider_args.dataset_SqlIndexDataset_args
+        dataset_args.pick_categories = ["skateboard"]
+        dataset_args.limit_sequences_to = 1
+
+        data_source = ImplicitronDataSource(**args)
+        self.assertIsInstance(
+            data_source.data_loader_map_provider, TrainEvalDataLoaderMapProvider
+        )
+        _, data_loaders = data_source.get_datasets_and_dataloaders()
+        self.assertEqual(len(data_loaders.train), 202)
+        for frame in data_loaders.train:
+            self.assertIsNone(frame.frame_type)
+            self.assertEqual(frame.image_rgb.shape[-1], 800)  # check loading blobs
+            break
+
+    def test_subsets(self):
+        args = get_default_args(ImplicitronDataSource)
+        args.dataset_map_provider_class_type = "SqlIndexDatasetMapProvider"
+        provider_args = args.dataset_map_provider_SqlIndexDatasetMapProvider_args
+        provider_args.subset_lists_path = (
+            "skateboard/set_lists/set_lists_manyview_dev_0.json"
+        )
+        # this will naturally limit to one sequence (no need to limit by cat/sequence)
+
+        dataset_args = provider_args.dataset_SqlIndexDataset_args
+        dataset_args.remove_empty_masks = True
+
+        for sampler_type in [
+            "SimpleDataLoaderMapProvider",
+            "SequenceDataLoaderMapProvider",
+            "TrainEvalDataLoaderMapProvider",
+        ]:
+            args.data_loader_map_provider_class_type = sampler_type
+            data_source = ImplicitronDataSource(**args)
+            _, data_loaders = data_source.get_datasets_and_dataloaders()
+            self.assertEqual(len(data_loaders.train), 102)
+            self.assertEqual(len(data_loaders.val), 100)
+            self.assertEqual(len(data_loaders.test), 100)
+            for split in ["train", "val", "test"]:
+                for frame in data_loaders[split]:
+                    self.assertEqual(frame.frame_type, [split])
+                    # check loading blobs
+                    self.assertEqual(frame.image_rgb.shape[-1], 800)
+                    break
+
+    def test_sql_subsets(self):
+        args = get_default_args(ImplicitronDataSource)
+        args.dataset_map_provider_class_type = "SqlIndexDatasetMapProvider"
+        provider_args = args.dataset_map_provider_SqlIndexDatasetMapProvider_args
+        provider_args.subset_lists_path = "set_lists/set_lists_manyview_dev_0.sqlite"
+
+        dataset_args = provider_args.dataset_SqlIndexDataset_args
+        dataset_args.remove_empty_masks = True
+        dataset_args.pick_categories = ["skateboard"]
+
+        for sampler_type in [
+            "SimpleDataLoaderMapProvider",
+            "SequenceDataLoaderMapProvider",
+            "TrainEvalDataLoaderMapProvider",
+        ]:
+            args.data_loader_map_provider_class_type = sampler_type
+            data_source = ImplicitronDataSource(**args)
+            _, data_loaders = data_source.get_datasets_and_dataloaders()
+            self.assertEqual(len(data_loaders.train), 102)
+            self.assertEqual(len(data_loaders.val), 100)
+            self.assertEqual(len(data_loaders.test), 100)
+            for split in ["train", "val", "test"]:
+                for frame in data_loaders[split]:
+                    self.assertEqual(frame.frame_type, [split])
+                    self.assertEqual(
+                        frame.image_rgb.shape[-1], 800
+                    )  # check loading blobs
+                    break
+
+    @unittest.skip("It takes 75 seconds; skipping by default")
+    def test_huge_subsets(self):
+        args = get_default_args(ImplicitronDataSource)
+        args.dataset_map_provider_class_type = "SqlIndexDatasetMapProvider"
+        args.data_loader_map_provider_class_type = "TrainEvalDataLoaderMapProvider"
+        provider_args = args.dataset_map_provider_SqlIndexDatasetMapProvider_args
+        provider_args.subset_lists_path = "set_lists/set_lists_fewview_dev.sqlite"
+
+        dataset_args = provider_args.dataset_SqlIndexDataset_args
+        dataset_args.remove_empty_masks = True
+
+        data_source = ImplicitronDataSource(**args)
+        _, data_loaders = data_source.get_datasets_and_dataloaders()
+        self.assertEqual(len(data_loaders.train), 3158974)
+        self.assertEqual(len(data_loaders.val), 518417)
+        self.assertEqual(len(data_loaders.test), 518417)
+        for split in ["train", "val", "test"]:
+            for frame in data_loaders[split]:
+                self.assertEqual(frame.frame_type, [split])
+                self.assertEqual(frame.image_rgb.shape[-1], 800)  # check loading blobs
+                break
+
+    def test_broken_subsets(self):
+        args = get_default_args(ImplicitronDataSource)
+        args.dataset_map_provider_class_type = "SqlIndexDatasetMapProvider"
+        args.data_loader_map_provider_class_type = "TrainEvalDataLoaderMapProvider"
+        provider_args = args.dataset_map_provider_SqlIndexDatasetMapProvider_args
+        provider_args.subset_lists_path = "et_non_est"
+        provider_args.dataset_SqlIndexDataset_args.pick_categories = ["skateboard"]
+        with self.assertRaises(FileNotFoundError) as err:
+            ImplicitronDataSource(**args)
+
+        # check the hint text
+        self.assertIn("Subset lists path given but not found", str(err.exception))
+
+    def test_eval_batches(self):
+        args = get_default_args(ImplicitronDataSource)
+        args.dataset_map_provider_class_type = "SqlIndexDatasetMapProvider"
+        args.data_loader_map_provider_class_type = "TrainEvalDataLoaderMapProvider"
+        provider_args = args.dataset_map_provider_SqlIndexDatasetMapProvider_args
+        provider_args.subset_lists_path = "set_lists/set_lists_manyview_dev_0.sqlite"
+        provider_args.eval_batches_path = (
+            "skateboard/eval_batches/eval_batches_manyview_dev_0.json"
+        )
+
+        dataset_args = provider_args.dataset_SqlIndexDataset_args
+        dataset_args.remove_empty_masks = True
+        dataset_args.pick_categories = ["skateboard"]
+
+        data_source = ImplicitronDataSource(**args)
+        _, data_loaders = data_source.get_datasets_and_dataloaders()
+        self.assertEqual(len(data_loaders.train), 102)
+        self.assertEqual(len(data_loaders.val), 100)
+        self.assertEqual(len(data_loaders.test), 50)
+        for split in ["train", "val", "test"]:
+            for frame in data_loaders[split]:
+                self.assertEqual(frame.frame_type, [split])
+                self.assertEqual(frame.image_rgb.shape[-1], 800)  # check loading blobs
+                break
+
+    def test_eval_batches_from_subset_list_name(self):
+        args = get_default_args(ImplicitronDataSource)
+        args.dataset_map_provider_class_type = "SqlIndexDatasetMapProvider"
+        args.data_loader_map_provider_class_type = "TrainEvalDataLoaderMapProvider"
+        provider_args = args.dataset_map_provider_SqlIndexDatasetMapProvider_args
+        provider_args.subset_list_name = "manyview_dev_0"
+        provider_args.category = "skateboard"
+
+        dataset_args = provider_args.dataset_SqlIndexDataset_args
+        dataset_args.remove_empty_masks = True
+
+        data_source = ImplicitronDataSource(**args)
+        dataset, data_loaders = data_source.get_datasets_and_dataloaders()
+        self.assertListEqual(list(dataset.train.pick_categories), ["skateboard"])
+        self.assertEqual(len(data_loaders.train), 102)
+        self.assertEqual(len(data_loaders.val), 100)
+        self.assertEqual(len(data_loaders.test), 50)
+        for split in ["train", "val", "test"]:
+            for frame in data_loaders[split]:
+                self.assertEqual(frame.frame_type, [split])
+                self.assertEqual(frame.image_rgb.shape[-1], 800)  # check loading blobs
+                break
+
+    def test_frame_access(self):
+        args = get_default_args(ImplicitronDataSource)
+        args.dataset_map_provider_class_type = "SqlIndexDatasetMapProvider"
+        args.data_loader_map_provider_class_type = "TrainEvalDataLoaderMapProvider"
+        provider_args = args.dataset_map_provider_SqlIndexDatasetMapProvider_args
+        provider_args.subset_lists_path = "set_lists/set_lists_manyview_dev_0.sqlite"
+
+        dataset_args = provider_args.dataset_SqlIndexDataset_args
+        dataset_args.remove_empty_masks = True
+        dataset_args.pick_categories = ["skateboard"]
+        frame_builder_args = dataset_args.frame_data_builder_FrameDataBuilder_args
+        frame_builder_args.load_point_clouds = True
+        frame_builder_args.box_crop = False  # required for .meta
+
+        data_source = ImplicitronDataSource(**args)
+        dataset_map, _ = data_source.get_datasets_and_dataloaders()
+        dataset = dataset_map["train"]
+
+        for idx in [10, ("245_26182_52130", 22)]:
+            example_meta = dataset.meta[idx]
+            example = dataset[idx]
+
+            self.assertIsNone(example_meta.image_rgb)
+            self.assertIsNone(example_meta.fg_probability)
+            self.assertIsNone(example_meta.depth_map)
+            self.assertIsNone(example_meta.sequence_point_cloud)
+            self.assertIsNotNone(example_meta.camera)
+
+            self.assertIsNotNone(example.image_rgb)
+            self.assertIsNotNone(example.fg_probability)
+            self.assertIsNotNone(example.depth_map)
+            self.assertIsNotNone(example.sequence_point_cloud)
+            self.assertIsNotNone(example.camera)
+
+            self.assertEqual(example_meta.sequence_name, example.sequence_name)
+            self.assertEqual(example_meta.frame_number, example.frame_number)
+            self.assertEqual(example_meta.frame_timestamp, example.frame_timestamp)
+            self.assertEqual(example_meta.sequence_category, example.sequence_category)
+            torch.testing.assert_close(example_meta.camera.R, example.camera.R)
+            torch.testing.assert_close(example_meta.camera.T, example.camera.T)
+            torch.testing.assert_close(
+                example_meta.camera.focal_length, example.camera.focal_length
+            )
+            torch.testing.assert_close(
+                example_meta.camera.principal_point, example.camera.principal_point
+            )
diff --git a/pytorch3d/tests/implicitron/test_config.py b/pytorch3d/tests/implicitron/test_config.py
new file mode 100644
index 0000000000000000000000000000000000000000..471451b450e4704f4dbd6ad40b2dd9269c1561b9
--- /dev/null
+++ b/pytorch3d/tests/implicitron/test_config.py
@@ -0,0 +1,921 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import pickle
+import textwrap
+import unittest
+from dataclasses import dataclass, field, is_dataclass
+from enum import Enum
+from typing import Any, Dict, List, Optional, Tuple
+from unittest.mock import Mock
+
+from omegaconf import DictConfig, ListConfig, OmegaConf, ValidationError
+from pytorch3d.implicitron.tools.config import (
+    _get_type_to_process,
+    _is_actually_dataclass,
+    _ProcessType,
+    _Registry,
+    Configurable,
+    enable_get_default_args,
+    expand_args_fields,
+    get_default_args,
+    get_default_args_field,
+    registry,
+    remove_unused_components,
+    ReplaceableBase,
+    run_auto_creation,
+)
+
+
+@dataclass
+class Animal(ReplaceableBase):
+    pass
+
+
+class Fruit(ReplaceableBase):
+    pass
+
+
+@registry.register
+class Banana(Fruit):
+    pips: int
+    spots: int
+    bananame: str
+
+
+@registry.register
+class Pear(Fruit):
+    n_pips: int = 13
+
+
+class Pineapple(Fruit):
+    pass
+
+
+@registry.register
+class Orange(Fruit):
+    pass
+
+
+@registry.register
+class Kiwi(Fruit):
+    pass
+
+
+@registry.register
+class LargePear(Pear):
+    pass
+
+
+class BoringConfigurable(Configurable):
+    pass
+
+
+class MainTest(Configurable):
+    the_fruit: Fruit
+    n_ids: int
+    n_reps: int = 8
+    the_second_fruit: Fruit
+
+    def create_the_second_fruit(self):
+        expand_args_fields(Pineapple)
+        self.the_second_fruit = Pineapple()
+
+    def __post_init__(self):
+        run_auto_creation(self)
+
+
+class TestConfig(unittest.TestCase):
+    def test_is_actually_dataclass(self):
+        @dataclass
+        class A:
+            pass
+
+        self.assertTrue(_is_actually_dataclass(A))
+        self.assertTrue(is_dataclass(A))
+
+        class B(A):
+            a: int
+
+        self.assertFalse(_is_actually_dataclass(B))
+        self.assertTrue(is_dataclass(B))
+
+    def test_get_type_to_process(self):
+        gt = _get_type_to_process
+        self.assertIsNone(gt(int))
+        self.assertEqual(gt(Fruit), (Fruit, _ProcessType.REPLACEABLE))
+        self.assertEqual(
+            gt(Optional[Fruit]), (Fruit, _ProcessType.OPTIONAL_REPLACEABLE)
+        )
+        self.assertEqual(gt(MainTest), (MainTest, _ProcessType.CONFIGURABLE))
+        self.assertEqual(
+            gt(Optional[MainTest]), (MainTest, _ProcessType.OPTIONAL_CONFIGURABLE)
+        )
+        self.assertIsNone(gt(Optional[int]))
+        self.assertIsNone(gt(Tuple[Fruit]))
+        self.assertIsNone(gt(Tuple[Fruit, Animal]))
+        self.assertIsNone(gt(Optional[List[int]]))
+
+    def test_simple_replacement(self):
+        struct = get_default_args(MainTest)
+        struct.n_ids = 9780
+        struct.the_fruit_Pear_args.n_pips = 3
+        struct.the_fruit_class_type = "Pear"
+        struct.the_second_fruit_class_type = "Pear"
+
+        main = MainTest(**struct)
+        self.assertIsInstance(main.the_fruit, Pear)
+        self.assertEqual(main.n_reps, 8)
+        self.assertEqual(main.n_ids, 9780)
+        self.assertEqual(main.the_fruit.n_pips, 3)
+        self.assertIsInstance(main.the_second_fruit, Pineapple)
+
+        struct2 = get_default_args(MainTest)
+        self.assertEqual(struct2.the_fruit_Pear_args.n_pips, 13)
+
+        self.assertEqual(
+            MainTest._creation_functions,
+            ("create_the_fruit", "create_the_second_fruit"),
+        )
+
+    def test_detect_bases(self):
+        # testing the _base_class_from_class function
+        self.assertIsNone(_Registry._base_class_from_class(ReplaceableBase))
+        self.assertIsNone(_Registry._base_class_from_class(MainTest))
+        self.assertIs(_Registry._base_class_from_class(Fruit), Fruit)
+        self.assertIs(_Registry._base_class_from_class(Pear), Fruit)
+
+        class PricklyPear(Pear):
+            pass
+
+        self.assertIs(_Registry._base_class_from_class(PricklyPear), Fruit)
+
+    def test_registry_entries(self):
+        self.assertIs(registry.get(Fruit, "Banana"), Banana)
+        with self.assertRaisesRegex(ValueError, "Banana has not been registered."):
+            registry.get(Animal, "Banana")
+        with self.assertRaisesRegex(ValueError, "PricklyPear has not been registered."):
+            registry.get(Fruit, "PricklyPear")
+
+        self.assertIs(registry.get(Pear, "Pear"), Pear)
+        self.assertIs(registry.get(Pear, "LargePear"), LargePear)
+        with self.assertRaisesRegex(ValueError, "Banana resolves to"):
+            registry.get(Pear, "Banana")
+
+        all_fruit = set(registry.get_all(Fruit))
+        self.assertIn(Banana, all_fruit)
+        self.assertIn(Pear, all_fruit)
+        self.assertIn(LargePear, all_fruit)
+        self.assertEqual(registry.get_all(Pear), [LargePear])
+
+        @registry.register
+        class Apple(Fruit):
+            pass
+
+        @registry.register
+        class CrabApple(Apple):
+            pass
+
+        self.assertEqual(registry.get_all(Apple), [CrabApple])
+
+        self.assertIs(registry.get(Fruit, "CrabApple"), CrabApple)
+
+        with self.assertRaisesRegex(ValueError, "Cannot tell what it is."):
+
+            @registry.register
+            class NotAFruit:
+                pass
+
+    def test_recursion(self):
+        class Shape(ReplaceableBase):
+            pass
+
+        @registry.register
+        class Triangle(Shape):
+            a: float = 5.0
+
+        @registry.register
+        class Square(Shape):
+            a: float = 3.0
+
+        @registry.register
+        class LargeShape(Shape):
+            inner: Shape
+
+            def __post_init__(self):
+                run_auto_creation(self)
+
+        class ShapeContainer(Configurable):
+            shape: Shape
+
+        container = ShapeContainer(**get_default_args(ShapeContainer))
+        # This is because ShapeContainer is missing __post_init__
+        with self.assertRaises(AttributeError):
+            container.shape
+
+        class ShapeContainer2(Configurable):
+            x: Shape
+            x_class_type: str = "LargeShape"
+
+            def __post_init__(self):
+                self.x_LargeShape_args.inner_class_type = "Triangle"
+                run_auto_creation(self)
+
+        container2_args = get_default_args(ShapeContainer2)
+        container2_args.x_LargeShape_args.inner_Triangle_args.a += 10
+        self.assertIn("inner_Square_args", container2_args.x_LargeShape_args)
+        # We do not perform expansion that would result in an infinite recursion,
+        # so this member is not present.
+        self.assertNotIn("inner_LargeShape_args", container2_args.x_LargeShape_args)
+        container2_args.x_LargeShape_args.inner_Square_args.a += 100
+        container2 = ShapeContainer2(**container2_args)
+        self.assertIsInstance(container2.x, LargeShape)
+        self.assertIsInstance(container2.x.inner, Triangle)
+        self.assertEqual(container2.x.inner.a, 15.0)
+
+    def test_simpleclass_member(self):
+        # Members which are not dataclasses are
+        # tolerated. But it would be nice to be able to
+        # configure them.
+        class Foo:
+            def __init__(self, a: Any = 1, b: Any = 2):
+                self.a, self.b = a, b
+
+        enable_get_default_args(Foo)
+
+        @dataclass()
+        class Bar:
+            aa: int = 9
+            bb: int = 9
+
+        class Container(Configurable):
+            bar: Bar = Bar()
+            # TODO make this work?
+            # foo: Foo = Foo()
+            fruit: Fruit
+            fruit_class_type: str = "Orange"
+
+            def __post_init__(self):
+                run_auto_creation(self)
+
+        self.assertEqual(get_default_args(Foo), {"a": 1, "b": 2})
+        container_args = get_default_args(Container)
+        container = Container(**container_args)
+        self.assertIsInstance(container.fruit, Orange)
+        self.assertEqual(Container._processed_members, {"fruit": Fruit})
+        self.assertEqual(container._processed_members, {"fruit": Fruit})
+
+        container_defaulted = Container()
+        container_defaulted.fruit_Pear_args.n_pips += 4
+
+        container_args2 = get_default_args(Container)
+        container = Container(**container_args2)
+        self.assertEqual(container.fruit_Pear_args.n_pips, 13)
+
+    def test_inheritance(self):
+        # Also exercises optional replaceables
+        class FruitBowl(ReplaceableBase):
+            main_fruit: Fruit
+            main_fruit_class_type: str = "Orange"
+
+            def __post_init__(self):
+                raise ValueError("This doesn't get called")
+
+        class LargeFruitBowl(FruitBowl):
+            extra_fruit: Optional[Fruit]
+            extra_fruit_class_type: str = "Kiwi"
+            no_fruit: Optional[Fruit]
+            no_fruit_class_type: Optional[str] = None
+
+            def __post_init__(self):
+                run_auto_creation(self)
+
+        large_args = get_default_args(LargeFruitBowl)
+        self.assertNotIn("extra_fruit", large_args)
+        self.assertNotIn("main_fruit", large_args)
+        large = LargeFruitBowl(**large_args)
+        self.assertIsInstance(large.main_fruit, Orange)
+        self.assertIsInstance(large.extra_fruit, Kiwi)
+        self.assertIsNone(large.no_fruit)
+        self.assertIn("no_fruit_Kiwi_args", large_args)
+
+        remove_unused_components(large_args)
+        large2 = LargeFruitBowl(**large_args)
+        self.assertIsInstance(large2.main_fruit, Orange)
+        self.assertIsInstance(large2.extra_fruit, Kiwi)
+        self.assertIsNone(large2.no_fruit)
+        needed_args = [
+            "extra_fruit_Kiwi_args",
+            "extra_fruit_class_type",
+            "main_fruit_Orange_args",
+            "main_fruit_class_type",
+            "no_fruit_class_type",
+        ]
+        self.assertEqual(sorted(large_args.keys()), needed_args)
+
+        with self.assertRaisesRegex(ValueError, "NotAFruit has not been registered."):
+            LargeFruitBowl(extra_fruit_class_type="NotAFruit")
+
+    def test_inheritance2(self):
+        # This is a case where a class could contain an instance
+        # of a subclass, which is ignored.
+        class Parent(ReplaceableBase):
+            pass
+
+        class Main(Configurable):
+            parent: Parent
+            # Note - no __post__init__
+
+        @registry.register
+        class Derived(Parent, Main):
+            pass
+
+        args = get_default_args(Main)
+        # Derived has been ignored in processing Main.
+        self.assertCountEqual(args.keys(), ["parent_class_type"])
+
+        main = Main(**args)
+
+        with self.assertRaisesRegex(ValueError, "UNDEFAULTED has not been registered."):
+            run_auto_creation(main)
+
+        main.parent_class_type = "Derived"
+        # Illustrates that a dict works fine instead of a DictConfig.
+        main.parent_Derived_args = {}
+        with self.assertRaises(AttributeError):
+            main.parent
+        run_auto_creation(main)
+        self.assertIsInstance(main.parent, Derived)
+
+    def test_redefine(self):
+        class FruitBowl(ReplaceableBase):
+            main_fruit: Fruit
+            main_fruit_class_type: str = "Grape"
+
+            def __post_init__(self):
+                run_auto_creation(self)
+
+        @registry.register
+        @dataclass
+        class Grape(Fruit):
+            large: bool = False
+
+            def get_color(self):
+                return "red"
+
+            def __post_init__(self):
+                raise ValueError("This doesn't get called")
+
+        bowl_args = get_default_args(FruitBowl)
+
+        @registry.register
+        @dataclass
+        class Grape(Fruit):  # noqa: F811
+            large: bool = True
+
+            def get_color(self):
+                return "green"
+
+        with self.assertWarnsRegex(
+            UserWarning, "New implementation of Grape is being chosen."
+        ):
+            defaulted_bowl = FruitBowl()
+        self.assertIsInstance(defaulted_bowl.main_fruit, Grape)
+        self.assertEqual(defaulted_bowl.main_fruit.large, True)
+        self.assertEqual(defaulted_bowl.main_fruit.get_color(), "green")
+
+        with self.assertWarnsRegex(
+            UserWarning, "New implementation of Grape is being chosen."
+        ):
+            args_bowl = FruitBowl(**bowl_args)
+        self.assertIsInstance(args_bowl.main_fruit, Grape)
+        # Redefining the same class won't help with defaults because encoded in args
+        self.assertEqual(args_bowl.main_fruit.large, False)
+        # But the override worked.
+        self.assertEqual(args_bowl.main_fruit.get_color(), "green")
+
+        # 2. Try redefining without the dataclass modifier
+        # This relies on the fact that default creation processes the class.
+        # (otherwise incomprehensible messages)
+        @registry.register
+        class Grape(Fruit):  # noqa: F811
+            large: bool = True
+
+        with self.assertWarnsRegex(
+            UserWarning, "New implementation of Grape is being chosen."
+        ):
+            FruitBowl(**bowl_args)
+
+        # 3. Adding a new class doesn't get picked up, because the first
+        # get_default_args call has frozen FruitBowl. This is intrinsic to
+        # the way dataclass and expand_args_fields work in-place but
+        # expand_args_fields is not pure - it depends on the registry.
+        @registry.register
+        class Fig(Fruit):
+            pass
+
+        bowl_args2 = get_default_args(FruitBowl)
+        self.assertIn("main_fruit_Grape_args", bowl_args2)
+        self.assertNotIn("main_fruit_Fig_args", bowl_args2)
+
+        # TODO Is it possible to make this work?
+        # bowl_args2["main_fruit_Fig_args"] = get_default_args(Fig)
+        # bowl_args2.main_fruit_class_type = "Fig"
+        # bowl2 = FruitBowl(**bowl_args2)  <= unexpected argument
+
+        # Note that it is possible to use Fig if you can set
+        # bowl2.main_fruit_Fig_args explicitly (not in bowl_args2)
+        # before run_auto_creation happens. See test_inheritance2
+        # for an example.
+
+    def test_no_replacement(self):
+        # Test of Configurables without ReplaceableBase
+        class A(Configurable):
+            n: int = 9
+
+        class B(Configurable):
+            a: A
+
+            def __post_init__(self):
+                run_auto_creation(self)
+
+        class C(Configurable):
+            b1: B
+            b2: Optional[B]
+            b3: Optional[B]
+            b2_enabled: bool = True
+            b3_enabled: bool = False
+
+            def __post_init__(self):
+                run_auto_creation(self)
+
+        c_args = get_default_args(C)
+        c = C(**c_args)
+        self.assertIsInstance(c.b1.a, A)
+        self.assertEqual(c.b1.a.n, 9)
+        self.assertFalse(hasattr(c, "b1_enabled"))
+        self.assertIsInstance(c.b2.a, A)
+        self.assertEqual(c.b2.a.n, 9)
+        self.assertTrue(c.b2_enabled)
+        self.assertIsNone(c.b3)
+        self.assertFalse(c.b3_enabled)
+
+    def test_doc(self):
+        # The case in the docstring.
+        class A(ReplaceableBase):
+            k: int = 1
+
+        @registry.register
+        class A1(A):
+            m: int = 3
+
+        @registry.register
+        class A2(A):
+            n: str = "2"
+
+        class B(Configurable):
+            a: A
+            a_class_type: str = "A2"
+
+            def __post_init__(self):
+                run_auto_creation(self)
+
+        b_args = get_default_args(B)
+        self.assertNotIn("a", b_args)
+        b = B(**b_args)
+        self.assertEqual(b.a.n, "2")
+
+    def test_raw_types(self):
+        @dataclass
+        class MyDataclass:
+            int_field: int = 0
+            none_field: Optional[int] = None
+            float_field: float = 9.3
+            bool_field: bool = True
+            tuple_field: Tuple[int, ...] = (3,)
+
+        class SimpleClass:
+            def __init__(
+                self,
+                tuple_member_: Tuple[int, int] = (3, 4),
+            ):
+                self.tuple_member = tuple_member_
+
+            def get_tuple(self):
+                return self.tuple_member
+
+        enable_get_default_args(SimpleClass)
+
+        def f(*, a: int = 3, b: str = "kj"):
+            self.assertEqual(a, 3)
+            self.assertEqual(b, "kj")
+
+        enable_get_default_args(f)
+
+        class C(Configurable):
+            simple: DictConfig = get_default_args_field(SimpleClass)
+            # simple2: SimpleClass2 = SimpleClass2()
+            mydata: DictConfig = get_default_args_field(MyDataclass)
+            a_tuple: Tuple[float] = (4.0, 3.0)
+            f_args: DictConfig = get_default_args_field(f)
+
+        args = get_default_args(C)
+        c = C(**args)
+        self.assertCountEqual(args.keys(), ["simple", "mydata", "a_tuple", "f_args"])
+
+        mydata = MyDataclass(**c.mydata)
+        simple = SimpleClass(**c.simple)
+
+        # OmegaConf converts tuples to ListConfigs (which act like lists).
+        self.assertEqual(simple.get_tuple(), [3, 4])
+        self.assertTrue(isinstance(simple.get_tuple(), ListConfig))
+        # get_default_args converts sets to ListConfigs (which act like lists).
+        self.assertEqual(c.a_tuple, [4.0, 3.0])
+        self.assertTrue(isinstance(c.a_tuple, ListConfig))
+        self.assertEqual(mydata.tuple_field, (3,))
+        self.assertTrue(isinstance(mydata.tuple_field, ListConfig))
+        f(**c.f_args)
+
+    def test_irrelevant_bases(self):
+        class NotADataclass:
+            # Like torch.nn.Module, this class contains annotations
+            # but is not designed to be dataclass'd.
+            # This test ensures that such classes, when inherited fron,
+            # are not accidentally affected by expand_args_fields.
+            a: int = 9
+            b: int
+
+        class LeftConfigured(Configurable, NotADataclass):
+            left: int = 1
+
+        class RightConfigured(NotADataclass, Configurable):
+            right: int = 2
+
+        class Outer(Configurable):
+            left: LeftConfigured
+            right: RightConfigured
+
+            def __post_init__(self):
+                run_auto_creation(self)
+
+        outer = Outer(**get_default_args(Outer))
+        self.assertEqual(outer.left.left, 1)
+        self.assertEqual(outer.right.right, 2)
+        with self.assertRaisesRegex(TypeError, "non-default argument"):
+            dataclass(NotADataclass)
+
+    def test_unprocessed(self):
+        # behavior of Configurable classes which need processing in __new__,
+        class UnprocessedConfigurable(Configurable):
+            a: int = 9
+
+        class UnprocessedReplaceable(ReplaceableBase):
+            a: int = 9
+
+        for Unprocessed in [UnprocessedConfigurable, UnprocessedReplaceable]:
+
+            self.assertFalse(_is_actually_dataclass(Unprocessed))
+            unprocessed = Unprocessed()
+            self.assertTrue(_is_actually_dataclass(Unprocessed))
+            self.assertTrue(isinstance(unprocessed, Unprocessed))
+            self.assertEqual(unprocessed.a, 9)
+
+    def test_enum(self):
+        # Test that enum values are kept, i.e. that OmegaConf's runtime checks
+        # are in use.
+
+        class A(Enum):
+            B1 = "b1"
+            B2 = "b2"
+
+        # Test for a Configurable class, a function, and a regular class.
+        class C(Configurable):
+            a: A = A.B1
+
+        # Also test for a calllable with enum arguments.
+        def C_fn(a: A = A.B1):
+            pass
+
+        enable_get_default_args(C_fn)
+
+        class C_cl:
+            def __init__(self, a: A = A.B1) -> None:
+                pass
+
+        enable_get_default_args(C_cl)
+
+        for C_ in [C, C_fn, C_cl]:
+            base = get_default_args(C_)
+            self.assertEqual(OmegaConf.to_yaml(base), "a: B1\n")
+            self.assertEqual(base.a, A.B1)
+            replaced = OmegaConf.merge(base, {"a": "B2"})
+            self.assertEqual(replaced.a, A.B2)
+            with self.assertRaises(ValidationError):
+                # You can't use a value which is not one of the
+                # choices, even if it is the str representation
+                # of one of the choices.
+                OmegaConf.merge(base, {"a": "b2"})
+
+            remerged = OmegaConf.merge(base, OmegaConf.create(OmegaConf.to_yaml(base)))
+            self.assertEqual(remerged.a, A.B1)
+
+    def test_pickle(self):
+        def func(a: int = 1, b: str = "3"):
+            pass
+
+        enable_get_default_args(func)
+
+        args = get_default_args(func)
+        args2 = pickle.loads(pickle.dumps(args))
+        self.assertEqual(args2.a, 1)
+        self.assertEqual(args2.b, "3")
+
+        args_regenerated = get_default_args(func)
+        pickle.dumps(args_regenerated)
+        pickle.dumps(args)
+
+    def test_remove_unused_components(self):
+        struct = get_default_args(MainTest)
+        struct.n_ids = 32
+        struct.the_fruit_class_type = "Pear"
+        struct.the_second_fruit_class_type = "Banana"
+        remove_unused_components(struct)
+        expected_keys = [
+            "n_ids",
+            "n_reps",
+            "the_fruit_Pear_args",
+            "the_fruit_class_type",
+            "the_second_fruit_Banana_args",
+            "the_second_fruit_class_type",
+        ]
+        expected_yaml = textwrap.dedent(
+            """\
+            n_ids: 32
+            n_reps: 8
+            the_fruit_class_type: Pear
+            the_fruit_Pear_args:
+              n_pips: 13
+            the_second_fruit_class_type: Banana
+            the_second_fruit_Banana_args:
+              pips: ???
+              spots: ???
+              bananame: ???
+            """
+        )
+        self.assertEqual(sorted(struct.keys()), expected_keys)
+
+        # Check that struct is what we expect
+        expected = OmegaConf.create(expected_yaml)
+        self.assertEqual(struct, expected)
+
+        # Check that we get what we expect when writing to yaml.
+        self.assertEqual(OmegaConf.to_yaml(struct, sort_keys=False), expected_yaml)
+
+        main = MainTest(**struct)
+        instance_data = OmegaConf.structured(main)
+        remove_unused_components(instance_data)
+        self.assertEqual(sorted(instance_data.keys()), expected_keys)
+        self.assertEqual(instance_data, expected)
+
+    def test_remove_unused_components_optional(self):
+        class MainTestWrapper(Configurable):
+            mt: Optional[MainTest]
+            mt_enabled: bool = False
+
+        args = get_default_args(MainTestWrapper)
+        self.assertEqual(list(args.keys()), ["mt_enabled", "mt_args"])
+        remove_unused_components(args)
+        self.assertEqual(OmegaConf.to_yaml(args), "mt_enabled: false\n")
+
+    def test_get_instance_args(self):
+        mt1, mt2 = [
+            MainTest(
+                n_ids=0,
+                n_reps=909,
+                the_fruit_class_type="Pear",
+                the_second_fruit_class_type="Pear",
+                the_fruit_Pear_args=DictConfig({}),
+                the_second_fruit_Pear_args={},
+            )
+            for _ in range(2)
+        ]
+        # Two equivalent ways to get the DictConfig back out of an instance.
+        cfg1 = OmegaConf.structured(mt1)
+        cfg2 = get_default_args(mt2)
+        self.assertEqual(cfg1, cfg2)
+        self.assertEqual(len(cfg1.the_second_fruit_Pear_args), 0)
+        self.assertEqual(len(mt2.the_second_fruit_Pear_args), 0)
+
+        from_cfg = MainTest(**cfg2)
+        self.assertEqual(len(from_cfg.the_second_fruit_Pear_args), 0)
+
+        # If you want the complete args, merge with the defaults.
+        merged_args = OmegaConf.merge(get_default_args(MainTest), cfg2)
+        from_merged = MainTest(**merged_args)
+        self.assertEqual(len(from_merged.the_second_fruit_Pear_args), 1)
+        self.assertEqual(from_merged.n_reps, 909)
+
+    def test_tweak_hook(self):
+        class A(Configurable):
+            n: int = 9
+
+        class Wrapper(Configurable):
+            fruit: Fruit
+            fruit_class_type: str = "Pear"
+            fruit2: Fruit
+            fruit2_class_type: str = "Pear"
+            a: A
+            a2: A
+            a3: A
+
+            @classmethod
+            def a_tweak_args(cls, type, args):
+                assert type == A
+                args.n = 993
+
+            @classmethod
+            def a3_tweak_args(cls, type, args):
+                del args["n"]
+
+            @classmethod
+            def fruit_tweak_args(cls, type, args):
+                assert issubclass(type, Fruit)
+                if type == Pear:
+                    assert args.n_pips == 13
+                    args.n_pips = 19
+
+        args = get_default_args(Wrapper)
+        self.assertEqual(args.a_args.n, 993)
+        self.assertEqual(args.a2_args.n, 9)
+        self.assertEqual(args.a3_args, {})
+        self.assertEqual(args.fruit_Pear_args.n_pips, 19)
+        self.assertEqual(args.fruit2_Pear_args.n_pips, 13)
+
+    def test_impls(self):
+        # Check that create_x actually uses create_x_impl to do its work
+        # by using all the member types, both with a faked impl function
+        # and without.
+        # members with _0 are optional and absent, those with _o are
+        # optional and present.
+        control_args = []
+
+        def fake_impl(self, control, args):
+            control_args.append(control)
+
+        for fake in [False, True]:
+
+            class MyClass(Configurable):
+                fruit: Fruit
+                fruit_class_type: str = "Orange"
+                fruit_o: Optional[Fruit]
+                fruit_o_class_type: str = "Orange"
+                fruit_0: Optional[Fruit]
+                fruit_0_class_type: Optional[str] = None
+                boring: BoringConfigurable
+                boring_o: Optional[BoringConfigurable]
+                boring_o_enabled: bool = True
+                boring_0: Optional[BoringConfigurable]
+                boring_0_enabled: bool = False
+
+                def __post_init__(self):
+                    run_auto_creation(self)
+
+            if fake:
+                MyClass.create_fruit_impl = fake_impl
+                MyClass.create_fruit_o_impl = fake_impl
+                MyClass.create_boring_impl = fake_impl
+                MyClass.create_boring_o_impl = fake_impl
+
+            expand_args_fields(MyClass)
+            instance = MyClass()
+            for name in ["fruit", "fruit_o", "boring", "boring_o"]:
+                self.assertEqual(
+                    hasattr(instance, name), not fake, msg=f"{name} {fake}"
+                )
+
+            self.assertIsNone(instance.fruit_0)
+            self.assertIsNone(instance.boring_0)
+            if not fake:
+                self.assertIsInstance(instance.fruit, Orange)
+                self.assertIsInstance(instance.fruit_o, Orange)
+                self.assertIsInstance(instance.boring, BoringConfigurable)
+                self.assertIsInstance(instance.boring_o, BoringConfigurable)
+
+        self.assertEqual(control_args, ["Orange", "Orange", True, True])
+
+    def test_pre_expand(self):
+        # Check that the precreate method of a class is called once before
+        # when expand_args_fields is called on the class.
+
+        class A(Configurable):
+            n: int = 9
+
+            @classmethod
+            def pre_expand(cls):
+                pass
+
+        A.pre_expand = Mock()
+        expand_args_fields(A)
+        A.pre_expand.assert_called()
+
+    def test_pre_expand_replaceable(self):
+        # Check that the precreate method of a class is called once before
+        # when expand_args_fields is called on the class.
+
+        class A(ReplaceableBase):
+            pass
+
+            @classmethod
+            def pre_expand(cls):
+                pass
+
+        class A1(A):
+            n: 9
+
+        A.pre_expand = Mock()
+        expand_args_fields(A1)
+        A.pre_expand.assert_called()
+
+
+@dataclass(eq=False)
+class MockDataclass:
+    field_no_default: int
+    field_primitive_type: int = 42
+    field_optional_none: Optional[int] = None
+    field_optional_dict_none: Optional[Dict] = None
+    field_optional_with_value: Optional[int] = 42
+    field_list_type: List[int] = field(default_factory=lambda: [])
+
+
+class RefObject:
+    pass
+
+
+REF_OBJECT = RefObject()
+
+
+class MockClassWithInit:  # noqa: B903
+    def __init__(
+        self,
+        field_no_nothing,
+        field_no_default: int,
+        field_primitive_type: int = 42,
+        field_optional_none: Optional[int] = None,
+        field_optional_dict_none: Optional[Dict] = None,
+        field_optional_with_value: Optional[int] = 42,
+        field_list_type: List[int] = [],  # noqa: B006
+        field_reference_type: RefObject = REF_OBJECT,
+    ):
+        self.field_no_nothing = field_no_nothing
+        self.field_no_default = field_no_default
+        self.field_primitive_type = field_primitive_type
+        self.field_optional_none = field_optional_none
+        self.field_optional_dict_none = field_optional_dict_none
+        self.field_optional_with_value = field_optional_with_value
+        self.field_list_type = field_list_type
+        self.field_reference_type = field_reference_type
+
+
+enable_get_default_args(MockClassWithInit)
+
+
+class TestRawClasses(unittest.TestCase):
+    def setUp(self) -> None:
+        self._instances = {
+            MockDataclass: MockDataclass(field_no_default=0),
+            MockClassWithInit: MockClassWithInit(
+                field_no_nothing="tratata", field_no_default=0
+            ),
+        }
+
+    def test_get_default_args(self):
+        for cls in [MockDataclass, MockClassWithInit]:
+            dataclass_defaults = get_default_args(cls)
+            # DictConfig fields with missing values are `not in`
+            self.assertNotIn("field_no_default", dataclass_defaults)
+            self.assertNotIn("field_no_nothing", dataclass_defaults)
+            self.assertNotIn("field_reference_type", dataclass_defaults)
+            expected_defaults = [
+                "field_primitive_type",
+                "field_optional_none",
+                "field_optional_dict_none",
+                "field_optional_with_value",
+                "field_list_type",
+            ]
+
+            if cls == MockDataclass:  # we don't remove undefaulted from dataclasses
+                dataclass_defaults.field_no_default = 0
+                expected_defaults.insert(0, "field_no_default")
+            self.assertEqual(list(dataclass_defaults), expected_defaults)
+            for name, val in dataclass_defaults.items():
+                self.assertTrue(hasattr(self._instances[cls], name))
+                self.assertEqual(val, getattr(self._instances[cls], name))
+
+    def test_get_default_args_readonly(self):
+        for cls in [MockDataclass, MockClassWithInit]:
+            dataclass_defaults = get_default_args(cls)
+            dataclass_defaults["field_list_type"].append(13)
+            self.assertEqual(self._instances[cls].field_list_type, [])
diff --git a/pytorch3d/tests/implicitron/test_config_use.py b/pytorch3d/tests/implicitron/test_config_use.py
new file mode 100644
index 0000000000000000000000000000000000000000..9cb455da2f5f8d3fbcb3f829540adf65f1e67a5e
--- /dev/null
+++ b/pytorch3d/tests/implicitron/test_config_use.py
@@ -0,0 +1,94 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+
+from omegaconf import OmegaConf
+from pytorch3d.implicitron.models.feature_extractor.resnet_feature_extractor import (
+    ResNetFeatureExtractor,
+)
+from pytorch3d.implicitron.models.generic_model import GenericModel
+from pytorch3d.implicitron.models.global_encoder.global_encoder import (
+    SequenceAutodecoder,
+)
+from pytorch3d.implicitron.models.implicit_function.idr_feature_field import (
+    IdrFeatureField,
+)
+from pytorch3d.implicitron.models.implicit_function.neural_radiance_field import (
+    NeuralRadianceFieldImplicitFunction,
+)
+from pytorch3d.implicitron.models.renderer.lstm_renderer import LSTMRenderer
+from pytorch3d.implicitron.models.renderer.multipass_ea import (
+    MultiPassEmissionAbsorptionRenderer,
+)
+from pytorch3d.implicitron.models.view_pooler.feature_aggregator import (
+    AngleWeightedIdentityFeatureAggregator,
+)
+from pytorch3d.implicitron.tools.config import (
+    get_default_args,
+    remove_unused_components,
+)
+from tests.common_testing import get_tests_dir
+
+from .common_resources import provide_resnet34
+
+DATA_DIR = get_tests_dir() / "implicitron/data"
+DEBUG: bool = False
+
+# Tests the use of the config system in implicitron
+
+
+class TestGenericModel(unittest.TestCase):
+    def setUp(self):
+        self.maxDiff = None
+
+    def test_create_gm(self):
+        args = get_default_args(GenericModel)
+        gm = GenericModel(**args)
+        self.assertIsInstance(gm.renderer, MultiPassEmissionAbsorptionRenderer)
+        self.assertIsInstance(
+            gm._implicit_functions[0]._fn, NeuralRadianceFieldImplicitFunction
+        )
+        self.assertIsNone(gm.global_encoder)
+        self.assertFalse(hasattr(gm, "implicit_function"))
+        self.assertIsNone(gm.view_pooler)
+        self.assertIsNone(gm.image_feature_extractor)
+
+    def test_create_gm_overrides(self):
+        provide_resnet34()
+        args = get_default_args(GenericModel)
+        args.view_pooler_enabled = True
+        args.view_pooler_args.feature_aggregator_class_type = (
+            "AngleWeightedIdentityFeatureAggregator"
+        )
+        args.image_feature_extractor_class_type = "ResNetFeatureExtractor"
+        args.implicit_function_class_type = "IdrFeatureField"
+        args.global_encoder_class_type = "SequenceAutodecoder"
+        idr_args = args.implicit_function_IdrFeatureField_args
+        idr_args.n_harmonic_functions_xyz = 1729
+
+        args.renderer_class_type = "LSTMRenderer"
+        gm = GenericModel(**args)
+        self.assertIsInstance(gm.renderer, LSTMRenderer)
+        self.assertIsInstance(
+            gm.view_pooler.feature_aggregator,
+            AngleWeightedIdentityFeatureAggregator,
+        )
+        self.assertIsInstance(gm._implicit_functions[0]._fn, IdrFeatureField)
+        self.assertEqual(gm._implicit_functions[0]._fn.n_harmonic_functions_xyz, 1729)
+        self.assertIsInstance(gm.global_encoder, SequenceAutodecoder)
+        self.assertIsInstance(gm.image_feature_extractor, ResNetFeatureExtractor)
+        self.assertFalse(hasattr(gm, "implicit_function"))
+
+        instance_args = OmegaConf.structured(gm)
+        if DEBUG:
+            full_yaml = OmegaConf.to_yaml(instance_args, sort_keys=False)
+            (DATA_DIR / "overrides_full.yaml").write_text(full_yaml)
+        remove_unused_components(instance_args)
+        yaml = OmegaConf.to_yaml(instance_args, sort_keys=False)
+        if DEBUG:
+            (DATA_DIR / "overrides_.yaml").write_text(yaml)
+        self.assertEqual(yaml, (DATA_DIR / "overrides.yaml").read_text())
diff --git a/pytorch3d/tests/implicitron/test_data_cow.py b/pytorch3d/tests/implicitron/test_data_cow.py
new file mode 100644
index 0000000000000000000000000000000000000000..801863e9bcadf7f469ce75a0c6ccf15dc8e6a784
--- /dev/null
+++ b/pytorch3d/tests/implicitron/test_data_cow.py
@@ -0,0 +1,57 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import os
+import unittest
+
+import torch
+from pytorch3d.implicitron.dataset.frame_data import FrameData
+from pytorch3d.implicitron.dataset.rendered_mesh_dataset_map_provider import (
+    RenderedMeshDatasetMapProvider,
+)
+from pytorch3d.implicitron.tools.config import expand_args_fields
+from pytorch3d.renderer import FoVPerspectiveCameras
+from tests.common_testing import TestCaseMixin
+
+
+inside_re_worker = os.environ.get("INSIDE_RE_WORKER", False)
+
+
+class TestDataCow(TestCaseMixin, unittest.TestCase):
+    def test_simple(self):
+        if inside_re_worker:
+            return
+        expand_args_fields(RenderedMeshDatasetMapProvider)
+        self._runtest(use_point_light=True, num_views=4)
+        self._runtest(use_point_light=False, num_views=4)
+
+    def _runtest(self, **kwargs):
+        provider = RenderedMeshDatasetMapProvider(**kwargs)
+        dataset_map = provider.get_dataset_map()
+        known_matrix = torch.zeros(1, 4, 4)
+        known_matrix[0, 0, 0] = 1.7321
+        known_matrix[0, 1, 1] = 1.7321
+        known_matrix[0, 2, 2] = 1.0101
+        known_matrix[0, 3, 2] = -1.0101
+        known_matrix[0, 2, 3] = 1
+
+        self.assertIsNone(dataset_map.val)
+        self.assertIsNone(dataset_map.test)
+        self.assertEqual(len(dataset_map.train), provider.num_views)
+
+        value = dataset_map.train[0]
+        self.assertIsInstance(value, FrameData)
+
+        self.assertEqual(value.image_rgb.shape, (3, 128, 128))
+        self.assertEqual(value.fg_probability.shape, (1, 128, 128))
+        # corner of image is background
+        self.assertEqual(value.fg_probability[0, 0, 0], 0)
+        self.assertEqual(value.fg_probability.max(), 1.0)
+        self.assertIsInstance(value.camera, FoVPerspectiveCameras)
+        self.assertEqual(len(value.camera), 1)
+        self.assertIsNone(value.camera.K)
+        matrix = value.camera.get_projection_transform().get_matrix()
+        self.assertClose(matrix, known_matrix, atol=1e-4)
diff --git a/pytorch3d/tests/implicitron/test_data_json_index.py b/pytorch3d/tests/implicitron/test_data_json_index.py
new file mode 100644
index 0000000000000000000000000000000000000000..e11e7b449fe31bc04655540eef559b6ba6d6bb58
--- /dev/null
+++ b/pytorch3d/tests/implicitron/test_data_json_index.py
@@ -0,0 +1,80 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import os
+import unittest
+
+from pytorch3d.implicitron.dataset.data_source import ImplicitronDataSource
+from pytorch3d.implicitron.tools.config import get_default_args
+from pytorch3d.renderer import PerspectiveCameras
+from tests.common_testing import TestCaseMixin
+
+# These tests are only run internally, where the data is available.
+internal = os.environ.get("FB_TEST", False)
+inside_re_worker = os.environ.get("INSIDE_RE_WORKER", False)
+skip_tests = not internal or inside_re_worker
+
+
+@unittest.skipIf(skip_tests, "no data")
+class TestDataJsonIndex(TestCaseMixin, unittest.TestCase):
+    def test_loaders(self):
+        args = get_default_args(ImplicitronDataSource)
+        args.dataset_map_provider_class_type = "JsonIndexDatasetMapProvider"
+        dataset_args = args.dataset_map_provider_JsonIndexDatasetMapProvider_args
+        dataset_args.category = "skateboard"
+        dataset_args.dataset_root = "manifold://co3d/tree/extracted"
+        dataset_args.test_restrict_sequence_id = 0
+        dataset_args.dataset_JsonIndexDataset_args.limit_sequences_to = 1
+
+        data_source = ImplicitronDataSource(**args)
+
+        cameras = data_source.all_train_cameras
+        self.assertIsInstance(cameras, PerspectiveCameras)
+        self.assertEqual(len(cameras), 81)
+
+        data_sets, data_loaders = data_source.get_datasets_and_dataloaders()
+
+        self.assertEqual(len(data_sets.train), 81)
+        self.assertEqual(len(data_sets.val), 102)
+        self.assertEqual(len(data_sets.test), 102)
+
+    def test_visitor_subsets(self):
+        args = get_default_args(ImplicitronDataSource)
+        args.dataset_map_provider_class_type = "JsonIndexDatasetMapProvider"
+        dataset_args = args.dataset_map_provider_JsonIndexDatasetMapProvider_args
+        dataset_args.category = "skateboard"
+        dataset_args.dataset_root = "manifold://co3d/tree/extracted"
+        dataset_args.test_restrict_sequence_id = 0
+        dataset_args.dataset_JsonIndexDataset_args.limit_sequences_to = 1
+
+        data_source = ImplicitronDataSource(**args)
+        datasets, _ = data_source.get_datasets_and_dataloaders()
+        dataset = datasets.test
+
+        sequences = list(dataset.sequence_names())
+        self.assertEqual(len(sequences), 1)
+        i = 0
+        for seq in sequences:
+            last_ts = float("-Inf")
+            seq_frames = list(dataset.sequence_frames_in_order(seq))
+            self.assertEqual(len(seq_frames), 102)
+            for ts, _, idx in seq_frames:
+                self.assertEqual(i, idx)
+                i += 1
+                self.assertGreaterEqual(ts, last_ts)
+                last_ts = ts
+
+            last_ts = float("-Inf")
+            known_frames = list(dataset.sequence_frames_in_order(seq, "test_known"))
+            self.assertEqual(len(known_frames), 81)
+            for ts, _, _ in known_frames:
+                self.assertGreaterEqual(ts, last_ts)
+                last_ts = ts
+
+            known_indices = list(dataset.sequence_indices_in_order(seq, "test_known"))
+            self.assertEqual(len(known_indices), 81)
+
+            break  # testing only the first sequence
diff --git a/pytorch3d/tests/implicitron/test_data_llff.py b/pytorch3d/tests/implicitron/test_data_llff.py
new file mode 100644
index 0000000000000000000000000000000000000000..040a720f1238402d4c1c046f9d8e6171917c136c
--- /dev/null
+++ b/pytorch3d/tests/implicitron/test_data_llff.py
@@ -0,0 +1,158 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import os
+import unittest
+
+import torch
+from pytorch3d.implicitron.dataset.blender_dataset_map_provider import (
+    BlenderDatasetMapProvider,
+)
+from pytorch3d.implicitron.dataset.data_source import ImplicitronDataSource
+from pytorch3d.implicitron.dataset.dataset_base import FrameData
+from pytorch3d.implicitron.dataset.llff_dataset_map_provider import (
+    LlffDatasetMapProvider,
+)
+from pytorch3d.implicitron.tools.config import expand_args_fields, get_default_args
+from pytorch3d.renderer import PerspectiveCameras
+from tests.common_testing import TestCaseMixin
+
+
+# These tests are only run internally, where the data is available.
+internal = os.environ.get("FB_TEST", False)
+inside_re_worker = os.environ.get("INSIDE_RE_WORKER", False)
+
+
+@unittest.skipUnless(internal, "no data")
+class TestDataLlff(TestCaseMixin, unittest.TestCase):
+    def test_synthetic(self):
+        if inside_re_worker:
+            return
+        expand_args_fields(BlenderDatasetMapProvider)
+
+        provider = BlenderDatasetMapProvider(
+            base_dir="manifold://co3d/tree/nerf_data/nerf_synthetic/lego",
+            object_name="lego",
+        )
+        dataset_map = provider.get_dataset_map()
+        known_matrix = torch.zeros(1, 4, 4)
+        known_matrix[0, 0, 0] = 2.7778
+        known_matrix[0, 1, 1] = 2.7778
+        known_matrix[0, 2, 3] = 1
+        known_matrix[0, 3, 2] = 1
+
+        for name, length in [("train", 100), ("val", 100), ("test", 200)]:
+            dataset = getattr(dataset_map, name)
+            self.assertEqual(len(dataset), length)
+            # try getting a value
+            value = dataset[0]
+            self.assertEqual(value.image_rgb.shape, (3, 800, 800))
+            self.assertEqual(value.fg_probability.shape, (1, 800, 800))
+            # corner of image is background
+            self.assertEqual(value.fg_probability[0, 0, 0], 0)
+            self.assertEqual(value.fg_probability.max(), 1.0)
+            self.assertIsInstance(value.camera, PerspectiveCameras)
+            self.assertEqual(len(value.camera), 1)
+            self.assertIsNone(value.camera.K)
+            matrix = value.camera.get_projection_transform().get_matrix()
+            self.assertClose(matrix, known_matrix, atol=1e-4)
+            self.assertIsInstance(value, FrameData)
+
+    def test_llff(self):
+        if inside_re_worker:
+            return
+        expand_args_fields(LlffDatasetMapProvider)
+
+        provider = LlffDatasetMapProvider(
+            base_dir="manifold://co3d/tree/nerf_data/nerf_llff_data/fern",
+            object_name="fern",
+            downscale_factor=8,
+        )
+        dataset_map = provider.get_dataset_map()
+        known_matrix = torch.zeros(1, 4, 4)
+        known_matrix[0, 0, 0] = 2.1564
+        known_matrix[0, 1, 1] = 2.1564
+        known_matrix[0, 2, 3] = 1
+        known_matrix[0, 3, 2] = 1
+
+        for name, length, frame_type in [
+            ("train", 17, "known"),
+            ("test", 3, "unseen"),
+            ("val", 3, "unseen"),
+        ]:
+            dataset = getattr(dataset_map, name)
+            self.assertEqual(len(dataset), length)
+            # try getting a value
+            value = dataset[0]
+            self.assertIsInstance(value, FrameData)
+            self.assertEqual(value.frame_type, frame_type)
+            self.assertEqual(value.image_rgb.shape, (3, 378, 504))
+            self.assertIsInstance(value.camera, PerspectiveCameras)
+            self.assertEqual(len(value.camera), 1)
+            self.assertIsNone(value.camera.K)
+            matrix = value.camera.get_projection_transform().get_matrix()
+            self.assertClose(matrix, known_matrix, atol=1e-4)
+
+        self.assertEqual(len(dataset_map.test.get_eval_batches()), 3)
+        for batch in dataset_map.test.get_eval_batches():
+            self.assertEqual(len(batch), 1)
+            self.assertEqual(dataset_map.test[batch[0]].frame_type, "unseen")
+
+    def test_include_known_frames(self):
+        if inside_re_worker:
+            return
+        expand_args_fields(LlffDatasetMapProvider)
+
+        provider = LlffDatasetMapProvider(
+            base_dir="manifold://co3d/tree/nerf_data/nerf_llff_data/fern",
+            object_name="fern",
+            n_known_frames_for_test=2,
+        )
+        dataset_map = provider.get_dataset_map()
+
+        for name, types in [
+            ("train", ["known"] * 17),
+            ("val", ["unseen"] * 3 + ["known"] * 17),
+            ("test", ["unseen"] * 3 + ["known"] * 17),
+        ]:
+            dataset = getattr(dataset_map, name)
+            self.assertEqual(len(dataset), len(types))
+            for i, frame_type in enumerate(types):
+                value = dataset[i]
+                self.assertEqual(value.frame_type, frame_type)
+                self.assertIsNone(value.fg_probability)
+
+        self.assertEqual(len(dataset_map.test.get_eval_batches()), 3)
+        for batch in dataset_map.test.get_eval_batches():
+            self.assertEqual(len(batch), 3)
+            self.assertEqual(dataset_map.test[batch[0]].frame_type, "unseen")
+            for i in batch[1:]:
+                self.assertEqual(dataset_map.test[i].frame_type, "known")
+
+    def test_loaders(self):
+        if inside_re_worker:
+            return
+        args = get_default_args(ImplicitronDataSource)
+        args.dataset_map_provider_class_type = "BlenderDatasetMapProvider"
+        dataset_args = args.dataset_map_provider_BlenderDatasetMapProvider_args
+        dataset_args.object_name = "lego"
+        dataset_args.base_dir = "manifold://co3d/tree/nerf_data/nerf_synthetic/lego"
+
+        data_source = ImplicitronDataSource(**args)
+        _, data_loaders = data_source.get_datasets_and_dataloaders()
+        for i in data_loaders.train:
+            self.assertEqual(i.frame_type, ["known"])
+            self.assertEqual(i.image_rgb.shape, (1, 3, 800, 800))
+        for i in data_loaders.val:
+            self.assertEqual(i.frame_type, ["unseen"])
+            self.assertEqual(i.image_rgb.shape, (1, 3, 800, 800))
+        for i in data_loaders.test:
+            self.assertEqual(i.frame_type, ["unseen"])
+            self.assertEqual(i.image_rgb.shape, (1, 3, 800, 800))
+
+        cameras = data_source.all_train_cameras
+        self.assertIsInstance(cameras, PerspectiveCameras)
+        self.assertEqual(len(cameras), 100)
diff --git a/pytorch3d/tests/implicitron/test_data_source.py b/pytorch3d/tests/implicitron/test_data_source.py
new file mode 100644
index 0000000000000000000000000000000000000000..67c79fc5788f1069094bb499726b9c2ae85fd02f
--- /dev/null
+++ b/pytorch3d/tests/implicitron/test_data_source.py
@@ -0,0 +1,123 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import contextlib
+import os
+import unittest
+import unittest.mock
+
+import torch
+from omegaconf import OmegaConf
+from pytorch3d.implicitron.dataset.data_loader_map_provider import (
+    SequenceDataLoaderMapProvider,
+    SimpleDataLoaderMapProvider,
+)
+from pytorch3d.implicitron.dataset.data_source import ImplicitronDataSource
+from pytorch3d.implicitron.dataset.json_index_dataset import JsonIndexDataset
+from pytorch3d.implicitron.tools.config import get_default_args
+from tests.common_testing import get_tests_dir
+from tests.implicitron.common_resources import get_skateboard_data
+
+DATA_DIR = get_tests_dir() / "implicitron/data"
+DEBUG: bool = False
+
+
+class TestDataSource(unittest.TestCase):
+    def setUp(self):
+        self.maxDiff = None
+        torch.manual_seed(42)
+
+        stack = contextlib.ExitStack()
+        self.dataset_root, self.path_manager = stack.enter_context(
+            get_skateboard_data()
+        )
+        self.addCleanup(stack.close)
+
+    def _test_omegaconf_generic_failure(self):
+        # OmegaConf possible bug - this is why we need _GenericWorkaround
+        from dataclasses import dataclass
+
+        import torch
+
+        @dataclass
+        class D(torch.utils.data.Dataset[int]):
+            a: int = 3
+
+        OmegaConf.structured(D)
+
+    def _test_omegaconf_ListList(self):
+        # Demo that OmegaConf doesn't support nested lists
+        from dataclasses import dataclass
+        from typing import Sequence
+
+        @dataclass
+        class A:
+            a: Sequence[Sequence[int]] = ((32,),)
+
+        OmegaConf.structured(A)
+
+    def test_JsonIndexDataset_args(self):
+        # test that JsonIndexDataset works with get_default_args
+        get_default_args(JsonIndexDataset)
+
+    def test_one(self):
+        cfg = get_default_args(ImplicitronDataSource)
+        # making the test invariant to env variables
+        cfg.dataset_map_provider_JsonIndexDatasetMapProvider_args.dataset_root = ""
+        cfg.dataset_map_provider_JsonIndexDatasetMapProviderV2_args.dataset_root = ""
+        # making the test invariant to the presence of SQL dataset
+        if "dataset_map_provider_SqlIndexDatasetMapProvider_args" in cfg:
+            del cfg.dataset_map_provider_SqlIndexDatasetMapProvider_args
+        yaml = OmegaConf.to_yaml(cfg, sort_keys=False)
+        if DEBUG:
+            (DATA_DIR / "data_source.yaml").write_text(yaml)
+        self.assertEqual(yaml, (DATA_DIR / "data_source.yaml").read_text())
+
+    def test_default(self):
+        if os.environ.get("INSIDE_RE_WORKER") is not None:
+            return
+        args = get_default_args(ImplicitronDataSource)
+        args.dataset_map_provider_class_type = "JsonIndexDatasetMapProvider"
+        dataset_args = args.dataset_map_provider_JsonIndexDatasetMapProvider_args
+        dataset_args.category = "skateboard"
+        dataset_args.test_restrict_sequence_id = 0
+        dataset_args.n_frames_per_sequence = -1
+
+        dataset_args.dataset_root = self.dataset_root
+
+        data_source = ImplicitronDataSource(**args)
+        self.assertIsInstance(
+            data_source.data_loader_map_provider, SequenceDataLoaderMapProvider
+        )
+        _, data_loaders = data_source.get_datasets_and_dataloaders()
+        self.assertEqual(len(data_loaders.train), 81)
+        for i in data_loaders.train:
+            self.assertEqual(i.frame_type, ["test_known"])
+            break
+
+    def test_simple(self):
+        if os.environ.get("INSIDE_RE_WORKER") is not None:
+            return
+        args = get_default_args(ImplicitronDataSource)
+        args.dataset_map_provider_class_type = "JsonIndexDatasetMapProvider"
+        args.data_loader_map_provider_class_type = "SimpleDataLoaderMapProvider"
+        dataset_args = args.dataset_map_provider_JsonIndexDatasetMapProvider_args
+        dataset_args.category = "skateboard"
+        dataset_args.test_restrict_sequence_id = 0
+        dataset_args.n_frames_per_sequence = -1
+
+        dataset_args.dataset_root = self.dataset_root
+
+        data_source = ImplicitronDataSource(**args)
+        self.assertIsInstance(
+            data_source.data_loader_map_provider, SimpleDataLoaderMapProvider
+        )
+        _, data_loaders = data_source.get_datasets_and_dataloaders()
+
+        self.assertEqual(len(data_loaders.train), 81)
+        for i in data_loaders.train:
+            self.assertEqual(i.frame_type, ["test_known"])
+            break
diff --git a/pytorch3d/tests/implicitron/test_dataset_visualize.py b/pytorch3d/tests/implicitron/test_dataset_visualize.py
new file mode 100644
index 0000000000000000000000000000000000000000..d7af5b035cf3b4ea6e7b61ac638cd3049c611dda
--- /dev/null
+++ b/pytorch3d/tests/implicitron/test_dataset_visualize.py
@@ -0,0 +1,199 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import contextlib
+import copy
+import os
+import unittest
+
+import torch
+import torchvision
+from pytorch3d.implicitron.dataset.json_index_dataset import JsonIndexDataset
+from pytorch3d.implicitron.dataset.visualize import get_implicitron_sequence_pointcloud
+from pytorch3d.implicitron.tools.config import expand_args_fields
+from pytorch3d.implicitron.tools.point_cloud_utils import render_point_cloud_pytorch3d
+from pytorch3d.vis.plotly_vis import plot_scene
+
+
+if os.environ.get("INSIDE_RE_WORKER") is None:
+    from visdom import Visdom
+
+from tests.common_testing import interactive_testing_requested
+
+from .common_resources import get_skateboard_data
+
+VISDOM_PORT = int(os.environ.get("VISDOM_PORT", 8097))
+
+
+class TestDatasetVisualize(unittest.TestCase):
+    def setUp(self):
+        if not interactive_testing_requested():
+            return
+        category = "skateboard"
+        stack = contextlib.ExitStack()
+        dataset_root, path_manager = stack.enter_context(get_skateboard_data())
+        self.addCleanup(stack.close)
+        frame_file = os.path.join(dataset_root, category, "frame_annotations.jgz")
+        sequence_file = os.path.join(dataset_root, category, "sequence_annotations.jgz")
+        self.image_size = 256
+        expand_args_fields(JsonIndexDataset)
+        self.datasets = {
+            "simple": JsonIndexDataset(
+                frame_annotations_file=frame_file,
+                sequence_annotations_file=sequence_file,
+                dataset_root=dataset_root,
+                image_height=self.image_size,
+                image_width=self.image_size,
+                box_crop=True,
+                load_point_clouds=True,
+                path_manager=path_manager,
+            ),
+            "nonsquare": JsonIndexDataset(
+                frame_annotations_file=frame_file,
+                sequence_annotations_file=sequence_file,
+                dataset_root=dataset_root,
+                image_height=self.image_size,
+                image_width=self.image_size // 2,
+                box_crop=True,
+                load_point_clouds=True,
+                path_manager=path_manager,
+            ),
+            "nocrop": JsonIndexDataset(
+                frame_annotations_file=frame_file,
+                sequence_annotations_file=sequence_file,
+                dataset_root=dataset_root,
+                image_height=self.image_size,
+                image_width=self.image_size // 2,
+                box_crop=False,
+                load_point_clouds=True,
+                path_manager=path_manager,
+            ),
+        }
+        self.datasets.update(
+            {
+                k + "_newndc": _change_annotations_to_new_ndc(dataset)
+                for k, dataset in self.datasets.items()
+            }
+        )
+        self.visdom = Visdom(port=VISDOM_PORT)
+        if not self.visdom.check_connection():
+            print("Visdom server not running! Disabling visdom visualizations.")
+            self.visdom = None
+
+    def _render_one_pointcloud(self, point_cloud, cameras, render_size):
+        (_image_render, _, _) = render_point_cloud_pytorch3d(
+            cameras,
+            point_cloud,
+            render_size=render_size,
+            point_radius=1e-2,
+            topk=10,
+            bg_color=0.0,
+        )
+        return _image_render.clamp(0.0, 1.0)
+
+    def test_one(self):
+        """Test dataset visualization."""
+        if not interactive_testing_requested():
+            return
+        for max_frames in (16, -1):
+            for load_dataset_point_cloud in (True, False):
+                for dataset_key in self.datasets:
+                    self._gen_and_render_pointcloud(
+                        max_frames, load_dataset_point_cloud, dataset_key
+                    )
+
+    def _gen_and_render_pointcloud(
+        self, max_frames, load_dataset_point_cloud, dataset_key
+    ):
+        dataset = self.datasets[dataset_key]
+        # load the point cloud of the first sequence
+        sequence_show = list(dataset.seq_annots.keys())[0]
+        device = torch.device("cuda:0")
+
+        point_cloud, sequence_frame_data = get_implicitron_sequence_pointcloud(
+            dataset,
+            sequence_name=sequence_show,
+            mask_points=True,
+            max_frames=max_frames,
+            num_workers=10,
+            load_dataset_point_cloud=load_dataset_point_cloud,
+        )
+
+        # render on gpu
+        point_cloud = point_cloud.to(device)
+        cameras = sequence_frame_data.camera.to(device)
+
+        # render the point_cloud from the viewpoint of loaded cameras
+        images_render = torch.cat(
+            [
+                self._render_one_pointcloud(
+                    point_cloud,
+                    cameras[frame_i],
+                    (
+                        dataset.image_height,
+                        dataset.image_width,
+                    ),
+                )
+                for frame_i in range(len(cameras))
+            ]
+        ).cpu()
+        images_gt_and_render = torch.cat(
+            [sequence_frame_data.image_rgb, images_render], dim=3
+        )
+
+        imfile = os.path.join(
+            os.path.split(os.path.abspath(__file__))[0],
+            "test_dataset_visualize"
+            + f"_max_frames={max_frames}"
+            + f"_load_pcl={load_dataset_point_cloud}.png",
+        )
+        print(f"Exporting image {imfile}.")
+        torchvision.utils.save_image(images_gt_and_render, imfile, nrow=2)
+
+        if self.visdom is not None:
+            test_name = f"{max_frames}_{load_dataset_point_cloud}_{dataset_key}"
+            self.visdom.images(
+                images_gt_and_render,
+                env="test_dataset_visualize",
+                win=f"pcl_renders_{test_name}",
+                opts={"title": f"pcl_renders_{test_name}"},
+            )
+            plotlyplot = plot_scene(
+                {
+                    "scene_batch": {
+                        "cameras": cameras,
+                        "point_cloud": point_cloud,
+                    }
+                },
+                camera_scale=1.0,
+                pointcloud_max_points=10000,
+                pointcloud_marker_size=1.0,
+            )
+            self.visdom.plotlyplot(
+                plotlyplot,
+                env="test_dataset_visualize",
+                win=f"pcl_{test_name}",
+            )
+
+
+def _change_annotations_to_new_ndc(dataset):
+    dataset = copy.deepcopy(dataset)
+    for frame in dataset.frame_annots:
+        vp = frame["frame_annotation"].viewpoint
+        vp.intrinsics_format = "ndc_isotropic"
+        # this assume the focal length to be equal on x and y (ok for a test)
+        max_flength = max(vp.focal_length)
+        vp.principal_point = (
+            vp.principal_point[0] * max_flength / vp.focal_length[0],
+            vp.principal_point[1] * max_flength / vp.focal_length[1],
+        )
+        vp.focal_length = (
+            max_flength,
+            max_flength,
+        )
+
+    return dataset
diff --git a/pytorch3d/tests/implicitron/test_eval_cameras.py b/pytorch3d/tests/implicitron/test_eval_cameras.py
new file mode 100644
index 0000000000000000000000000000000000000000..5ec795251ee05bb3bff9425f9d9a965e4d9abffa
--- /dev/null
+++ b/pytorch3d/tests/implicitron/test_eval_cameras.py
@@ -0,0 +1,42 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+
+import torch
+from pytorch3d.implicitron.tools.eval_video_trajectory import (
+    generate_eval_video_cameras,
+)
+from pytorch3d.renderer.cameras import look_at_view_transform, PerspectiveCameras
+from pytorch3d.transforms import axis_angle_to_matrix
+from tests.common_testing import TestCaseMixin
+
+
+class TestEvalCameras(TestCaseMixin, unittest.TestCase):
+    def setUp(self):
+        torch.manual_seed(42)
+
+    def test_circular(self):
+        n_train_cameras = 10
+        n_test_cameras = 100
+        R, T = look_at_view_transform(azim=torch.rand(n_train_cameras) * 360)
+        amplitude = 0.01
+        R_jiggled = torch.bmm(
+            R, axis_angle_to_matrix(torch.rand(n_train_cameras, 3) * amplitude)
+        )
+        cameras_train = PerspectiveCameras(R=R_jiggled, T=T)
+        cameras_test = generate_eval_video_cameras(
+            cameras_train, trajectory_type="circular_lsq_fit", trajectory_scale=1.0
+        )
+
+        positions_test = cameras_test.get_camera_center()
+        center = positions_test.mean(0)
+        self.assertClose(center, torch.zeros(3), atol=0.1)
+        self.assertClose(
+            (positions_test - center).norm(dim=[1]),
+            torch.ones(n_test_cameras),
+            atol=0.1,
+        )
diff --git a/pytorch3d/tests/implicitron/test_eval_demo.py b/pytorch3d/tests/implicitron/test_eval_demo.py
new file mode 100644
index 0000000000000000000000000000000000000000..37df395db1cb871ae2e0f54f224d404056079fb1
--- /dev/null
+++ b/pytorch3d/tests/implicitron/test_eval_demo.py
@@ -0,0 +1,29 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import os
+import unittest
+
+from pytorch3d.implicitron import eval_demo
+
+from tests.common_testing import interactive_testing_requested
+
+from .common_resources import CO3D_MANIFOLD_PATH
+
+"""
+This test runs a single sequence eval_demo, useful for debugging datasets.
+It only runs interactively.
+"""
+
+
+class TestEvalDemo(unittest.TestCase):
+    def test_a(self):
+        if not interactive_testing_requested():
+            return
+
+        os.environ["CO3D_DATASET_ROOT"] = CO3D_MANIFOLD_PATH
+
+        eval_demo.evaluate_dbir_for_category("donut", single_sequence_id=0)
diff --git a/pytorch3d/tests/implicitron/test_evaluation.py b/pytorch3d/tests/implicitron/test_evaluation.py
new file mode 100644
index 0000000000000000000000000000000000000000..400d7835ff09f489a93f4fb4375f3ee403db4084
--- /dev/null
+++ b/pytorch3d/tests/implicitron/test_evaluation.py
@@ -0,0 +1,323 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import contextlib
+import dataclasses
+import itertools
+import math
+import os
+import unittest
+
+import lpips
+import numpy as np
+import torch
+
+from pytorch3d.implicitron.dataset.frame_data import FrameData
+from pytorch3d.implicitron.dataset.json_index_dataset import JsonIndexDataset
+from pytorch3d.implicitron.evaluation.evaluate_new_view_synthesis import eval_batch
+from pytorch3d.implicitron.models.base_model import ImplicitronModelBase
+from pytorch3d.implicitron.models.generic_model import GenericModel  # noqa
+from pytorch3d.implicitron.models.model_dbir import ModelDBIR  # noqa
+from pytorch3d.implicitron.tools.config import expand_args_fields, registry
+from pytorch3d.implicitron.tools.metric_utils import calc_psnr, eval_depth
+from pytorch3d.implicitron.tools.utils import dataclass_to_cuda_
+
+from .common_resources import get_skateboard_data, provide_lpips_vgg
+
+
+class TestEvaluation(unittest.TestCase):
+    def setUp(self):
+        # initialize evaluation dataset/dataloader
+        torch.manual_seed(42)
+
+        stack = contextlib.ExitStack()
+        dataset_root, path_manager = stack.enter_context(get_skateboard_data())
+        self.addCleanup(stack.close)
+
+        category = "skateboard"
+        frame_file = os.path.join(dataset_root, category, "frame_annotations.jgz")
+        sequence_file = os.path.join(dataset_root, category, "sequence_annotations.jgz")
+        self.image_size = 64
+        expand_args_fields(JsonIndexDataset)
+        self.dataset = JsonIndexDataset(
+            frame_annotations_file=frame_file,
+            sequence_annotations_file=sequence_file,
+            dataset_root=dataset_root,
+            image_height=self.image_size,
+            image_width=self.image_size,
+            box_crop=True,
+            remove_empty_masks=False,
+            path_manager=path_manager,
+        )
+        self.bg_color = (0.0, 0.0, 0.0)
+
+        # init the lpips model for eval
+        provide_lpips_vgg()
+        self.lpips_model = lpips.LPIPS(net="vgg").cuda()
+
+    def test_eval_depth(self):
+        """
+        Check that eval_depth correctly masks errors and that, for get_best_scale=True,
+        the error with scaled prediction equals the error without scaling the
+        predicted depth. Finally, test that the error values are as expected
+        for prediction and gt differing by a constant offset.
+        """
+        gt = (torch.randn(10, 1, 300, 400, device="cuda") * 5.0).clamp(0.0)
+        mask = (torch.rand_like(gt) > 0.5).type_as(gt)
+
+        for diff in 10 ** torch.linspace(-5, 0, 6):
+            for crop in (0, 5):
+
+                pred = gt + (torch.rand_like(gt) - 0.5) * 2 * diff
+
+                # scaled prediction test
+                mse_depth, abs_depth = eval_depth(
+                    pred,
+                    gt,
+                    crop=crop,
+                    mask=mask,
+                    get_best_scale=True,
+                )
+                mse_depth_scale, abs_depth_scale = eval_depth(
+                    pred * 10.0,
+                    gt,
+                    crop=crop,
+                    mask=mask,
+                    get_best_scale=True,
+                )
+                self.assertAlmostEqual(
+                    float(mse_depth.sum()), float(mse_depth_scale.sum()), delta=1e-4
+                )
+                self.assertAlmostEqual(
+                    float(abs_depth.sum()), float(abs_depth_scale.sum()), delta=1e-4
+                )
+
+                # error masking test
+                pred_masked_err = gt + (torch.rand_like(gt) + diff) * (1 - mask)
+                mse_depth_masked, abs_depth_masked = eval_depth(
+                    pred_masked_err,
+                    gt,
+                    crop=crop,
+                    mask=mask,
+                    get_best_scale=True,
+                )
+                self.assertAlmostEqual(
+                    float(mse_depth_masked.sum()), float(0.0), delta=1e-4
+                )
+                self.assertAlmostEqual(
+                    float(abs_depth_masked.sum()), float(0.0), delta=1e-4
+                )
+                mse_depth_unmasked, abs_depth_unmasked = eval_depth(
+                    pred_masked_err,
+                    gt,
+                    crop=crop,
+                    mask=1 - mask,
+                    get_best_scale=True,
+                )
+                self.assertGreater(
+                    float(mse_depth_unmasked.sum()),
+                    float(diff**2),
+                )
+                self.assertGreater(
+                    float(abs_depth_unmasked.sum()),
+                    float(diff),
+                )
+
+                # tests with constant error
+                pred_fix_diff = gt + diff * mask
+                for _mask_gt in (mask, None):
+                    mse_depth_fix_diff, abs_depth_fix_diff = eval_depth(
+                        pred_fix_diff,
+                        gt,
+                        crop=crop,
+                        mask=_mask_gt,
+                        get_best_scale=False,
+                    )
+                    if _mask_gt is not None:
+                        expected_err_abs = diff
+                        expected_err_mse = diff**2
+                    else:
+                        err_mask = (gt > 0.0).float() * mask
+                        if crop > 0:
+                            err_mask = err_mask[:, :, crop:-crop, crop:-crop]
+                            gt_cropped = gt[:, :, crop:-crop, crop:-crop]
+                        else:
+                            gt_cropped = gt
+                        gt_mass = (gt_cropped > 0.0).float().sum(dim=(1, 2, 3))
+                        expected_err_abs = (
+                            diff * err_mask.sum(dim=(1, 2, 3)) / (gt_mass)
+                        )
+                        expected_err_mse = diff * expected_err_abs
+                    self.assertTrue(
+                        torch.allclose(
+                            abs_depth_fix_diff,
+                            expected_err_abs * torch.ones_like(abs_depth_fix_diff),
+                            atol=1e-4,
+                        )
+                    )
+                    self.assertTrue(
+                        torch.allclose(
+                            mse_depth_fix_diff,
+                            expected_err_mse * torch.ones_like(mse_depth_fix_diff),
+                            atol=1e-4,
+                        )
+                    )
+
+    def test_psnr(self):
+        """
+        Compare against opencv and check that the psnr is above
+        the minimum possible value.
+        """
+        import cv2
+
+        im1 = torch.rand(100, 3, 256, 256).cuda()
+        im1_uint8 = (im1 * 255).to(torch.uint8)
+        im1_rounded = im1_uint8.float() / 255
+        for max_diff in 10 ** torch.linspace(-5, 0, 6):
+            im2 = im1 + (torch.rand_like(im1) - 0.5) * 2 * max_diff
+            im2 = im2.clamp(0.0, 1.0)
+            im2_uint8 = (im2 * 255).to(torch.uint8)
+            im2_rounded = im2_uint8.float() / 255
+            # check that our psnr matches the output of opencv
+            psnr = calc_psnr(im1_rounded, im2_rounded)
+            # some versions of cv2 can only take uint8 input
+            psnr_cv2 = cv2.PSNR(
+                im1_uint8.cpu().numpy(),
+                im2_uint8.cpu().numpy(),
+            )
+            self.assertAlmostEqual(float(psnr), float(psnr_cv2), delta=1e-4)
+            # check that all PSNRs are bigger than the minimum possible PSNR
+            max_mse = max_diff**2
+            min_psnr = 10 * math.log10(1.0 / max_mse)
+            for _im1, _im2 in zip(im1, im2):
+                _psnr = calc_psnr(_im1, _im2)
+                self.assertGreaterEqual(float(_psnr) + 1e-6, min_psnr)
+
+    def _one_sequence_test(
+        self,
+        seq_dataset,
+        model,
+        batch_indices,
+        check_metrics=False,
+    ):
+        loader = torch.utils.data.DataLoader(
+            seq_dataset,
+            shuffle=False,
+            batch_sampler=batch_indices,
+            collate_fn=FrameData.collate,
+        )
+
+        for frame_data in loader:
+            self.assertIsNone(frame_data.frame_type)
+            self.assertIsNotNone(frame_data.image_rgb)
+            # override the frame_type
+            frame_data.frame_type = [
+                "train_unseen",
+                *(["train_known"] * (len(frame_data.image_rgb) - 1)),
+            ]
+
+            frame_data = dataclass_to_cuda_(frame_data)
+            preds = model(**dataclasses.asdict(frame_data))
+
+            eval_result = eval_batch(
+                frame_data,
+                preds["implicitron_render"],
+                bg_color=self.bg_color,
+                lpips_model=self.lpips_model,
+            )
+
+            if check_metrics:
+                self._check_metrics(
+                    frame_data, preds["implicitron_render"], eval_result
+                )
+
+    def _check_metrics(self, frame_data, implicitron_render, eval_result):
+        # Make a terribly bad NVS prediction and check that this is worse
+        # than the DBIR prediction.
+        implicitron_render_bad = implicitron_render.clone()
+        implicitron_render_bad.depth_render += (
+            torch.randn_like(implicitron_render_bad.depth_render) * 100.0
+        )
+        implicitron_render_bad.image_render += (
+            torch.randn_like(implicitron_render_bad.image_render) * 100.0
+        )
+        implicitron_render_bad.mask_render = (
+            torch.randn_like(implicitron_render_bad.mask_render) > 0.0
+        ).float()
+        eval_result_bad = eval_batch(
+            frame_data,
+            implicitron_render_bad,
+            bg_color=self.bg_color,
+            lpips_model=self.lpips_model,
+        )
+
+        lower_better = {
+            "psnr_masked": False,
+            "psnr_fg": False,
+            "psnr_full_image": False,
+            "depth_abs_fg": True,
+            "iou": False,
+            "rgb_l1_masked": True,
+            "rgb_l1_fg": True,
+            "lpips_masked": True,
+            "lpips_full_image": True,
+        }
+
+        for metric in lower_better:
+            m_better = eval_result[metric]
+            m_worse = eval_result_bad[metric]
+            if np.isnan(m_better) or np.isnan(m_worse):
+                continue  # metric is missing, i.e. NaN
+            _assert = (
+                self.assertLessEqual
+                if lower_better[metric]
+                else self.assertGreaterEqual
+            )
+            _assert(m_better, m_worse)
+
+    def _get_random_batch_indices(
+        self, seq_dataset, n_batches=2, min_batch_size=5, max_batch_size=10
+    ):
+        batch_indices = []
+        for _ in range(n_batches):
+            batch_size = torch.randint(
+                low=min_batch_size, high=max_batch_size, size=(1,)
+            )
+            batch_indices.append(torch.randperm(len(seq_dataset))[:batch_size])
+
+        return batch_indices
+
+    def test_full_eval(self, n_sequences=5):
+        """Test evaluation."""
+
+        # caching batch indices first to preserve RNG state
+        seq_datasets = {}
+        batch_indices = {}
+        for seq in itertools.islice(self.dataset.sequence_names(), n_sequences):
+            idx = list(self.dataset.sequence_indices_in_order(seq))
+            seq_dataset = torch.utils.data.Subset(self.dataset, idx)
+            seq_datasets[seq] = seq_dataset
+            batch_indices[seq] = self._get_random_batch_indices(seq_dataset)
+
+        for model_class_type in ["ModelDBIR", "GenericModel"]:
+            ModelClass = registry.get(ImplicitronModelBase, model_class_type)
+            expand_args_fields(ModelClass)
+            model = ModelClass(
+                render_image_width=self.image_size,
+                render_image_height=self.image_size,
+                bg_color=self.bg_color,
+            )
+            model.eval()
+            model.cuda()
+
+            for seq in itertools.islice(self.dataset.sequence_names(), n_sequences):
+                self._one_sequence_test(
+                    seq_datasets[seq],
+                    model,
+                    batch_indices[seq],
+                    check_metrics=(model_class_type == "ModelDBIR"),
+                )
diff --git a/pytorch3d/tests/implicitron/test_extending_orm_types.py b/pytorch3d/tests/implicitron/test_extending_orm_types.py
new file mode 100644
index 0000000000000000000000000000000000000000..abfb3aabfaf3d2fab3459d1bbe193e1315485240
--- /dev/null
+++ b/pytorch3d/tests/implicitron/test_extending_orm_types.py
@@ -0,0 +1,230 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import dataclasses
+import logging
+import os
+import tempfile
+import unittest
+from typing import ClassVar, Optional, Type
+
+import pandas as pd
+import pkg_resources
+import sqlalchemy as sa
+
+from pytorch3d.implicitron.dataset import types
+from pytorch3d.implicitron.dataset.frame_data import FrameData, GenericFrameDataBuilder
+from pytorch3d.implicitron.dataset.orm_types import (
+    SqlFrameAnnotation,
+    SqlSequenceAnnotation,
+)
+from pytorch3d.implicitron.dataset.sql_dataset import SqlIndexDataset
+from pytorch3d.implicitron.dataset.utils import GenericWorkaround
+from pytorch3d.implicitron.tools.config import registry
+from sqlalchemy.orm import composite, Mapped, mapped_column, Session
+
+NO_BLOBS_KWARGS = {
+    "dataset_root": "",
+    "load_images": False,
+    "load_depths": False,
+    "load_masks": False,
+    "load_depth_masks": False,
+    "box_crop": False,
+}
+
+DATASET_ROOT = pkg_resources.resource_filename(__name__, "data/sql_dataset")
+METADATA_FILE = os.path.join(DATASET_ROOT, "sql_dataset_100.sqlite")
+
+logger = logging.getLogger("pytorch3d.implicitron.dataset.sql_dataset")
+sh = logging.StreamHandler()
+logger.addHandler(sh)
+logger.setLevel(logging.DEBUG)
+
+
+@dataclasses.dataclass
+class MagneticFieldAnnotation:
+    path: str
+    average_flux_density: Optional[float] = None
+
+
+class ExtendedSqlFrameAnnotation(SqlFrameAnnotation):
+    num_dogs: Mapped[Optional[int]] = mapped_column(default=None)
+
+    magnetic_field: Mapped[MagneticFieldAnnotation] = composite(
+        mapped_column("_magnetic_field_path", nullable=True),
+        mapped_column("_magnetic_field_average_flux_density", nullable=True),
+        default_factory=lambda: None,
+    )
+
+
+class ExtendedSqlIndexDataset(SqlIndexDataset):
+    frame_annotations_type: ClassVar[
+        Type[SqlFrameAnnotation]
+    ] = ExtendedSqlFrameAnnotation
+
+
+class CanineFrameData(FrameData):
+    num_dogs: Optional[int] = None
+    magnetic_field_average_flux_density: Optional[float] = None
+
+
+@registry.register
+class CanineFrameDataBuilder(
+    GenericWorkaround, GenericFrameDataBuilder[CanineFrameData]
+):
+    """
+    A concrete class to build an extended FrameData object
+    """
+
+    frame_data_type: ClassVar[Type[FrameData]] = CanineFrameData
+
+    def build(
+        self,
+        frame_annotation: ExtendedSqlFrameAnnotation,
+        sequence_annotation: types.SequenceAnnotation,
+        load_blobs: bool = True,
+    ) -> CanineFrameData:
+        frame_data = super().build(frame_annotation, sequence_annotation, load_blobs)
+        frame_data.num_dogs = frame_annotation.num_dogs or 101
+        frame_data.magnetic_field_average_flux_density = (
+            frame_annotation.magnetic_field.average_flux_density
+        )
+        return frame_data
+
+
+class CanineSqlIndexDataset(SqlIndexDataset):
+    frame_annotations_type: ClassVar[
+        Type[SqlFrameAnnotation]
+    ] = ExtendedSqlFrameAnnotation
+
+    frame_data_builder_class_type: str = "CanineFrameDataBuilder"
+
+
+class TestExtendingOrmTypes(unittest.TestCase):
+    def setUp(self):
+        # create a temporary copy of the DB with an extended schema
+        engine = sa.create_engine(f"sqlite:///{METADATA_FILE}")
+        with Session(engine) as session:
+            extended_annots = [
+                ExtendedSqlFrameAnnotation(
+                    **{
+                        k: v
+                        for k, v in frame_annot.__dict__.items()
+                        if not k.startswith("_")  # remove mapped fields and SA metadata
+                    }
+                )
+                for frame_annot in session.scalars(sa.select(SqlFrameAnnotation))
+            ]
+            seq_annots = session.scalars(
+                sa.select(SqlSequenceAnnotation),
+                execution_options={"prebuffer_rows": True},
+            )
+            session.expunge_all()
+
+        self._temp_db = tempfile.NamedTemporaryFile(delete=False)
+        engine_ext = sa.create_engine(f"sqlite:///{self._temp_db.name}")
+        ExtendedSqlFrameAnnotation.metadata.create_all(engine_ext, checkfirst=True)
+        with Session(engine_ext, expire_on_commit=False) as session_ext:
+            session_ext.add_all(extended_annots)
+            for instance in seq_annots:
+                session_ext.merge(instance)
+            session_ext.commit()
+
+        # check the setup is correct
+        with engine_ext.connect() as connection_ext:
+            df = pd.read_sql_query(
+                sa.select(ExtendedSqlFrameAnnotation), connection_ext
+            )
+            self.assertEqual(len(df), 100)
+            self.assertIn("_magnetic_field_average_flux_density", df.columns)
+
+            df_seq = pd.read_sql_query(sa.select(SqlSequenceAnnotation), connection_ext)
+            self.assertEqual(len(df_seq), 10)
+
+    def tearDown(self):
+        self._temp_db.close()
+        os.remove(self._temp_db.name)
+
+    def test_basic(self, sequence="cat1_seq2", frame_number=4):
+        dataset = ExtendedSqlIndexDataset(
+            sqlite_metadata_file=self._temp_db.name,
+            remove_empty_masks=False,
+            frame_data_builder_FrameDataBuilder_args=NO_BLOBS_KWARGS,
+        )
+
+        self.assertEqual(len(dataset), 100)
+
+        # check the items are consecutive
+        past_sequences = set()
+        last_frame_number = -1
+        last_sequence = ""
+        for i in range(len(dataset)):
+            item = dataset[i]
+
+            if item.frame_number == 0:
+                self.assertNotIn(item.sequence_name, past_sequences)
+                past_sequences.add(item.sequence_name)
+                last_sequence = item.sequence_name
+            else:
+                self.assertEqual(item.sequence_name, last_sequence)
+                self.assertEqual(item.frame_number, last_frame_number + 1)
+
+            last_frame_number = item.frame_number
+
+        # test indexing
+        with self.assertRaises(IndexError):
+            dataset[len(dataset) + 1]
+
+        # test sequence-frame indexing
+        item = dataset[sequence, frame_number]
+        self.assertEqual(item.sequence_name, sequence)
+        self.assertEqual(item.frame_number, frame_number)
+
+        with self.assertRaises(IndexError):
+            dataset[sequence, 13]
+
+    def test_extending_frame_data(self, sequence="cat1_seq2", frame_number=4):
+        dataset = CanineSqlIndexDataset(
+            sqlite_metadata_file=self._temp_db.name,
+            remove_empty_masks=False,
+            frame_data_builder_CanineFrameDataBuilder_args=NO_BLOBS_KWARGS,
+        )
+
+        self.assertEqual(len(dataset), 100)
+
+        # check the items are consecutive
+        past_sequences = set()
+        last_frame_number = -1
+        last_sequence = ""
+        for i in range(len(dataset)):
+            item = dataset[i]
+            self.assertIsInstance(item, CanineFrameData)
+            self.assertEqual(item.num_dogs, 101)
+            self.assertIsNone(item.magnetic_field_average_flux_density)
+
+            if item.frame_number == 0:
+                self.assertNotIn(item.sequence_name, past_sequences)
+                past_sequences.add(item.sequence_name)
+                last_sequence = item.sequence_name
+            else:
+                self.assertEqual(item.sequence_name, last_sequence)
+                self.assertEqual(item.frame_number, last_frame_number + 1)
+
+            last_frame_number = item.frame_number
+
+        # test indexing
+        with self.assertRaises(IndexError):
+            dataset[len(dataset) + 1]
+
+        # test sequence-frame indexing
+        item = dataset[sequence, frame_number]
+        self.assertIsInstance(item, CanineFrameData)
+        self.assertEqual(item.sequence_name, sequence)
+        self.assertEqual(item.frame_number, frame_number)
+        self.assertEqual(item.num_dogs, 101)
+
+        with self.assertRaises(IndexError):
+            dataset[sequence, 13]
diff --git a/pytorch3d/tests/implicitron/test_forward_pass.py b/pytorch3d/tests/implicitron/test_forward_pass.py
new file mode 100644
index 0000000000000000000000000000000000000000..64c9a1d574033b844a70427ab9642eb5c0db8576
--- /dev/null
+++ b/pytorch3d/tests/implicitron/test_forward_pass.py
@@ -0,0 +1,227 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import os
+import unittest
+
+import torch
+from omegaconf import DictConfig, OmegaConf
+from pytorch3d.implicitron.models.generic_model import GenericModel
+from pytorch3d.implicitron.models.renderer.base import EvaluationMode
+from pytorch3d.implicitron.tools.config import expand_args_fields, get_default_args
+from pytorch3d.renderer.cameras import look_at_view_transform, PerspectiveCameras
+from tests.common_testing import get_pytorch3d_dir
+
+from .common_resources import provide_resnet34
+
+IMPLICITRON_CONFIGS_DIR = (
+    get_pytorch3d_dir() / "projects" / "implicitron_trainer" / "configs"
+)
+
+
+class TestGenericModel(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls) -> None:
+        provide_resnet34()
+
+    def setUp(self):
+        torch.manual_seed(42)
+
+    def test_gm(self):
+        # Simple test of a forward and backward pass of the default GenericModel.
+        device = torch.device("cuda:0")
+        expand_args_fields(GenericModel)
+        model = GenericModel(render_image_height=80, render_image_width=80)
+        model.to(device)
+        self._one_model_test(model, device)
+
+    def test_all_gm_configs(self):
+        # Tests all model settings in the implicitron_trainer config folder.
+        device = torch.device("cuda:0")
+        config_files = []
+
+        for pattern in ("repro_singleseq*.yaml", "repro_multiseq*.yaml"):
+            config_files.extend(
+                [
+                    f
+                    for f in IMPLICITRON_CONFIGS_DIR.glob(pattern)
+                    if not f.name.endswith("_base.yaml")
+                ]
+            )
+
+        for config_file in config_files:
+            with self.subTest(name=config_file.stem):
+                cfg = _load_model_config_from_yaml(str(config_file))
+                cfg.render_image_height = 80
+                cfg.render_image_width = 80
+                model = GenericModel(**cfg)
+                model.to(device)
+                self._one_model_test(
+                    model,
+                    device,
+                    eval_test=True,
+                    bw_test=True,
+                )
+
+    def _one_model_test(
+        self,
+        model,
+        device,
+        n_train_cameras: int = 5,
+        eval_test: bool = True,
+        bw_test: bool = True,
+    ):
+
+        R, T = look_at_view_transform(azim=torch.rand(n_train_cameras) * 360)
+        cameras = PerspectiveCameras(R=R, T=T, device=device)
+
+        N, H, W = n_train_cameras, model.render_image_height, model.render_image_width
+
+        random_args = {
+            "camera": cameras,
+            "fg_probability": _random_input_tensor(N, 1, H, W, True, device),
+            "depth_map": _random_input_tensor(N, 1, H, W, False, device) + 0.1,
+            "mask_crop": _random_input_tensor(N, 1, H, W, True, device),
+            "sequence_name": ["sequence"] * N,
+            "image_rgb": _random_input_tensor(N, 3, H, W, False, device),
+        }
+
+        # training foward pass
+        model.train()
+        train_preds = model(
+            **random_args,
+            evaluation_mode=EvaluationMode.TRAINING,
+        )
+        self.assertTrue(
+            train_preds["objective"].isfinite().item()
+        )  # check finiteness of the objective
+
+        if bw_test:
+            train_preds["objective"].backward()
+
+        if eval_test:
+            model.eval()
+            with torch.no_grad():
+                eval_preds = model(
+                    **random_args,
+                    evaluation_mode=EvaluationMode.EVALUATION,
+                )
+                self.assertEqual(
+                    eval_preds["images_render"].shape,
+                    (1, 3, model.render_image_height, model.render_image_width),
+                )
+
+    def test_idr(self):
+        # Forward pass of GenericModel with IDR.
+        device = torch.device("cuda:0")
+        args = get_default_args(GenericModel)
+        args.renderer_class_type = "SignedDistanceFunctionRenderer"
+        args.implicit_function_class_type = "IdrFeatureField"
+        args.implicit_function_IdrFeatureField_args.n_harmonic_functions_xyz = 6
+
+        model = GenericModel(**args)
+        model.to(device)
+
+        n_train_cameras = 2
+        R, T = look_at_view_transform(azim=torch.rand(n_train_cameras) * 360)
+        cameras = PerspectiveCameras(R=R, T=T, device=device)
+
+        defaulted_args = {
+            "depth_map": None,
+            "mask_crop": None,
+            "sequence_name": None,
+        }
+
+        target_image_rgb = torch.rand(
+            (n_train_cameras, 3, model.render_image_height, model.render_image_width),
+            device=device,
+        )
+        fg_probability = torch.rand(
+            (n_train_cameras, 1, model.render_image_height, model.render_image_width),
+            device=device,
+        )
+        train_preds = model(
+            camera=cameras,
+            evaluation_mode=EvaluationMode.TRAINING,
+            image_rgb=target_image_rgb,
+            fg_probability=fg_probability,
+            **defaulted_args,
+        )
+        self.assertGreater(train_preds["objective"].item(), 0)
+
+    def test_viewpool(self):
+        device = torch.device("cuda:0")
+        args = get_default_args(GenericModel)
+        args.view_pooler_enabled = True
+        args.image_feature_extractor_class_type = "ResNetFeatureExtractor"
+        args.image_feature_extractor_ResNetFeatureExtractor_args.add_masks = False
+        model = GenericModel(**args)
+        model.to(device)
+
+        n_train_cameras = 2
+        R, T = look_at_view_transform(azim=torch.rand(n_train_cameras) * 360)
+        cameras = PerspectiveCameras(R=R, T=T, device=device)
+
+        defaulted_args = {
+            "fg_probability": None,
+            "depth_map": None,
+            "mask_crop": None,
+        }
+
+        target_image_rgb = torch.rand(
+            (n_train_cameras, 3, model.render_image_height, model.render_image_width),
+            device=device,
+        )
+        train_preds = model(
+            camera=cameras,
+            evaluation_mode=EvaluationMode.TRAINING,
+            image_rgb=target_image_rgb,
+            sequence_name=["a"] * n_train_cameras,
+            **defaulted_args,
+        )
+        self.assertGreater(train_preds["objective"].item(), 0)
+
+
+def _random_input_tensor(
+    N: int,
+    C: int,
+    H: int,
+    W: int,
+    is_binary: bool,
+    device: torch.device,
+) -> torch.Tensor:
+    T = torch.rand(N, C, H, W, device=device)
+    if is_binary:
+        T = (T > 0.5).float()
+    return T
+
+
+def _load_model_config_from_yaml(config_path, strict=True) -> DictConfig:
+    default_cfg = get_default_args(GenericModel)
+    cfg = _load_model_config_from_yaml_rec(default_cfg, config_path)
+    return cfg
+
+
+def _load_model_config_from_yaml_rec(cfg: DictConfig, config_path: str) -> DictConfig:
+    cfg_loaded = OmegaConf.load(config_path)
+    cfg_model_loaded = None
+    if "model_factory_ImplicitronModelFactory_args" in cfg_loaded:
+        factory_args = cfg_loaded.model_factory_ImplicitronModelFactory_args
+        if "model_GenericModel_args" in factory_args:
+            cfg_model_loaded = factory_args.model_GenericModel_args
+    defaults = cfg_loaded.pop("defaults", None)
+    if defaults is not None:
+        for default_name in defaults:
+            if default_name in ("_self_", "default_config"):
+                continue
+            default_name = os.path.splitext(default_name)[0]
+            defpath = os.path.join(os.path.dirname(config_path), default_name + ".yaml")
+            cfg = _load_model_config_from_yaml_rec(cfg, defpath)
+            if cfg_model_loaded is not None:
+                cfg = OmegaConf.merge(cfg, cfg_model_loaded)
+    elif cfg_model_loaded is not None:
+        cfg = OmegaConf.merge(cfg, cfg_model_loaded)
+    return cfg
diff --git a/pytorch3d/tests/implicitron/test_frame_data_builder.py b/pytorch3d/tests/implicitron/test_frame_data_builder.py
new file mode 100644
index 0000000000000000000000000000000000000000..73145815aa7c28cd6781dc6cb3a539df6c6af990
--- /dev/null
+++ b/pytorch3d/tests/implicitron/test_frame_data_builder.py
@@ -0,0 +1,263 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import contextlib
+import gzip
+import os
+import unittest
+from typing import List
+
+import numpy as np
+import torch
+
+from pytorch3d.implicitron.dataset import types
+from pytorch3d.implicitron.dataset.dataset_base import FrameData
+from pytorch3d.implicitron.dataset.frame_data import FrameDataBuilder
+from pytorch3d.implicitron.dataset.utils import (
+    get_bbox_from_mask,
+    load_16big_png_depth,
+    load_1bit_png_mask,
+    load_depth,
+    load_depth_mask,
+    load_image,
+    load_mask,
+    safe_as_tensor,
+    transpose_normalize_image,
+)
+from pytorch3d.implicitron.tools.config import get_default_args
+from pytorch3d.renderer.cameras import PerspectiveCameras
+
+from tests.common_testing import TestCaseMixin
+from tests.implicitron.common_resources import get_skateboard_data
+
+
+class TestFrameDataBuilder(TestCaseMixin, unittest.TestCase):
+    def setUp(self):
+        torch.manual_seed(42)
+
+        category = "skateboard"
+        stack = contextlib.ExitStack()
+        self.dataset_root, self.path_manager = stack.enter_context(
+            get_skateboard_data()
+        )
+        self.addCleanup(stack.close)
+        self.image_height = 768
+        self.image_width = 512
+
+        self.frame_data_builder = FrameDataBuilder(
+            image_height=self.image_height,
+            image_width=self.image_width,
+            dataset_root=self.dataset_root,
+            path_manager=self.path_manager,
+        )
+
+        # loading single frame annotation of dataset (see JsonIndexDataset._load_frames())
+        frame_file = os.path.join(self.dataset_root, category, "frame_annotations.jgz")
+        local_file = self.path_manager.get_local_path(frame_file)
+        with gzip.open(local_file, "rt", encoding="utf8") as zipfile:
+            frame_annots_list = types.load_dataclass(
+                zipfile, List[types.FrameAnnotation]
+            )
+            self.frame_annotation = frame_annots_list[0]
+
+        sequence_annotations_file = os.path.join(
+            self.dataset_root, category, "sequence_annotations.jgz"
+        )
+        local_file = self.path_manager.get_local_path(sequence_annotations_file)
+        with gzip.open(local_file, "rt", encoding="utf8") as zipfile:
+            seq_annots_list = types.load_dataclass(
+                zipfile, List[types.SequenceAnnotation]
+            )
+            seq_annots = {entry.sequence_name: entry for entry in seq_annots_list}
+            self.seq_annotation = seq_annots[self.frame_annotation.sequence_name]
+
+        point_cloud = self.seq_annotation.point_cloud
+        self.frame_data = FrameData(
+            frame_number=safe_as_tensor(self.frame_annotation.frame_number, torch.long),
+            frame_timestamp=safe_as_tensor(
+                self.frame_annotation.frame_timestamp, torch.float
+            ),
+            sequence_name=self.frame_annotation.sequence_name,
+            sequence_category=self.seq_annotation.category,
+            camera_quality_score=safe_as_tensor(
+                self.seq_annotation.viewpoint_quality_score, torch.float
+            ),
+            point_cloud_quality_score=safe_as_tensor(
+                point_cloud.quality_score, torch.float
+            )
+            if point_cloud is not None
+            else None,
+        )
+
+    def test_frame_data_builder_args(self):
+        # test that FrameDataBuilder works with get_default_args
+        get_default_args(FrameDataBuilder)
+
+    def test_fix_point_cloud_path(self):
+        """Some files in Co3Dv2 have an accidental absolute path stored."""
+        original_path = "some_file_path"
+        modified_path = self.frame_data_builder._fix_point_cloud_path(original_path)
+        self.assertIn(original_path, modified_path)
+        self.assertIn(self.frame_data_builder.dataset_root, modified_path)
+
+    def test_load_and_adjust_frame_data(self):
+        self.frame_data.image_size_hw = safe_as_tensor(
+            self.frame_annotation.image.size, torch.long
+        )
+        self.frame_data.effective_image_size_hw = self.frame_data.image_size_hw
+
+        fg_mask_np, mask_path = self.frame_data_builder._load_fg_probability(
+            self.frame_annotation
+        )
+        self.frame_data.mask_path = mask_path
+        self.frame_data.fg_probability = safe_as_tensor(fg_mask_np, torch.float)
+        mask_thr = self.frame_data_builder.box_crop_mask_thr
+        bbox_xywh = get_bbox_from_mask(fg_mask_np, mask_thr)
+        self.frame_data.bbox_xywh = safe_as_tensor(bbox_xywh, torch.long)
+
+        self.assertIsNotNone(self.frame_data.mask_path)
+        self.assertTrue(torch.is_tensor(self.frame_data.fg_probability))
+        self.assertTrue(torch.is_tensor(self.frame_data.bbox_xywh))
+        # assert bboxes shape
+        self.assertEqual(self.frame_data.bbox_xywh.shape, torch.Size([4]))
+
+        image_path = os.path.join(
+            self.frame_data_builder.dataset_root, self.frame_annotation.image.path
+        )
+        image_np = load_image(self.frame_data_builder._local_path(image_path))
+        self.assertIsInstance(image_np, np.ndarray)
+        self.frame_data.image_rgb = self.frame_data_builder._postprocess_image(
+            image_np, self.frame_annotation.image.size, self.frame_data.fg_probability
+        )
+        self.assertIsInstance(self.frame_data.image_rgb, torch.Tensor)
+
+        (
+            self.frame_data.depth_map,
+            depth_path,
+            self.frame_data.depth_mask,
+        ) = self.frame_data_builder._load_mask_depth(
+            self.frame_annotation,
+            self.frame_data.fg_probability,
+        )
+        self.assertTrue(torch.is_tensor(self.frame_data.depth_map))
+        self.assertIsNotNone(depth_path)
+        self.assertTrue(torch.is_tensor(self.frame_data.depth_mask))
+
+        new_size = (self.image_height, self.image_width)
+
+        if self.frame_data_builder.box_crop:
+            self.frame_data.crop_by_metadata_bbox_(
+                self.frame_data_builder.box_crop_context,
+            )
+
+        # assert image and mask shapes after resize
+        self.frame_data.resize_frame_(
+            new_size_hw=torch.tensor(new_size, dtype=torch.long),
+        )
+        self.assertEqual(
+            self.frame_data.mask_crop.shape,
+            torch.Size([1, self.image_height, self.image_width]),
+        )
+        self.assertEqual(
+            self.frame_data.image_rgb.shape,
+            torch.Size([3, self.image_height, self.image_width]),
+        )
+        self.assertEqual(
+            self.frame_data.mask_crop.shape,
+            torch.Size([1, self.image_height, self.image_width]),
+        )
+        self.assertEqual(
+            self.frame_data.fg_probability.shape,
+            torch.Size([1, self.image_height, self.image_width]),
+        )
+        self.assertEqual(
+            self.frame_data.depth_map.shape,
+            torch.Size([1, self.image_height, self.image_width]),
+        )
+        self.assertEqual(
+            self.frame_data.depth_mask.shape,
+            torch.Size([1, self.image_height, self.image_width]),
+        )
+        self.frame_data.camera = self.frame_data_builder._get_pytorch3d_camera(
+            self.frame_annotation,
+        )
+        self.assertEqual(type(self.frame_data.camera), PerspectiveCameras)
+
+    def test_transpose_normalize_image(self):
+        def inverse_transpose_normalize_image(image: np.ndarray) -> np.ndarray:
+            im = image * 255.0
+            return im.transpose((1, 2, 0)).astype(np.uint8)
+
+        # Test 2D input
+        input_image = np.array(
+            [[10, 20, 30], [40, 50, 60], [70, 80, 90]], dtype=np.uint8
+        )
+        expected_input = inverse_transpose_normalize_image(
+            transpose_normalize_image(input_image)
+        )
+        self.assertClose(input_image[..., None], expected_input)
+
+        # Test 3D input
+        input_image = np.array(
+            [
+                [[10, 20, 30], [40, 50, 60], [70, 80, 90]],
+                [[100, 110, 120], [130, 140, 150], [160, 170, 180]],
+                [[190, 200, 210], [220, 230, 240], [250, 255, 255]],
+            ],
+            dtype=np.uint8,
+        )
+        expected_input = inverse_transpose_normalize_image(
+            transpose_normalize_image(input_image)
+        )
+        self.assertClose(input_image, expected_input)
+
+    def test_load_image(self):
+        path = os.path.join(self.dataset_root, self.frame_annotation.image.path)
+        local_path = self.path_manager.get_local_path(path)
+        image = load_image(local_path)
+        self.assertEqual(image.dtype, np.float32)
+        self.assertLessEqual(np.max(image), 1.0)
+        self.assertGreaterEqual(np.min(image), 0.0)
+
+    def test_load_mask(self):
+        path = os.path.join(self.dataset_root, self.frame_annotation.mask.path)
+        path = self.path_manager.get_local_path(path)
+        mask = load_mask(path)
+        self.assertEqual(mask.dtype, np.float32)
+        self.assertLessEqual(np.max(mask), 1.0)
+        self.assertGreaterEqual(np.min(mask), 0.0)
+
+    def test_load_depth(self):
+        path = os.path.join(self.dataset_root, self.frame_annotation.depth.path)
+        path = self.path_manager.get_local_path(path)
+        depth_map = load_depth(path, self.frame_annotation.depth.scale_adjustment)
+        self.assertEqual(depth_map.dtype, np.float32)
+        self.assertEqual(len(depth_map.shape), 3)
+
+    def test_load_16big_png_depth(self):
+        path = os.path.join(self.dataset_root, self.frame_annotation.depth.path)
+        path = self.path_manager.get_local_path(path)
+        depth_map = load_16big_png_depth(path)
+        self.assertEqual(depth_map.dtype, np.float32)
+        self.assertEqual(len(depth_map.shape), 2)
+
+    def test_load_1bit_png_mask(self):
+        mask_path = os.path.join(
+            self.dataset_root, self.frame_annotation.depth.mask_path
+        )
+        mask_path = self.path_manager.get_local_path(mask_path)
+        mask = load_1bit_png_mask(mask_path)
+        self.assertEqual(mask.dtype, np.float32)
+        self.assertEqual(len(mask.shape), 2)
+
+    def test_load_depth_mask(self):
+        mask_path = os.path.join(
+            self.dataset_root, self.frame_annotation.depth.mask_path
+        )
+        mask_path = self.path_manager.get_local_path(mask_path)
+        mask = load_depth_mask(mask_path)
+        self.assertEqual(mask.dtype, np.float32)
+        self.assertEqual(len(mask.shape), 3)
diff --git a/pytorch3d/tests/implicitron/test_implicit_function_neural_radiance_field.py b/pytorch3d/tests/implicitron/test_implicit_function_neural_radiance_field.py
new file mode 100644
index 0000000000000000000000000000000000000000..f31dfcd11f0e481f58341d69448a5bda3d82ed55
--- /dev/null
+++ b/pytorch3d/tests/implicitron/test_implicit_function_neural_radiance_field.py
@@ -0,0 +1,66 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+
+import torch
+from pytorch3d.implicitron.models.implicit_function.base import ImplicitronRayBundle
+from pytorch3d.implicitron.models.implicit_function.neural_radiance_field import (
+    NeuralRadianceFieldImplicitFunction,
+)
+
+
+class TestNeuralRadianceFieldImplicitFunction(unittest.TestCase):
+    def setUp(self):
+        torch.manual_seed(42)
+
+    def test_forward_with_integrated_positionial_embedding(self):
+        shape = [2, 4, 4]
+        ray_bundle = ImplicitronRayBundle(
+            origins=torch.randn(*shape, 3),
+            directions=torch.randn(*shape, 3),
+            bins=torch.randn(*shape, 6 + 1),
+            lengths=torch.randn(*shape, 6),
+            pixel_radii_2d=torch.randn(*shape, 1),
+            xys=None,
+        )
+        model = NeuralRadianceFieldImplicitFunction(
+            n_hidden_neurons_dir=32, use_integrated_positional_encoding=True
+        )
+        raw_densities, ray_colors, _ = model(ray_bundle=ray_bundle)
+
+        self.assertEqual(raw_densities.shape, (*shape, ray_bundle.lengths.shape[-1], 1))
+        self.assertEqual(ray_colors.shape, (*shape, ray_bundle.lengths.shape[-1], 3))
+
+    def test_forward_with_integrated_positionial_embedding_raise_exception(self):
+        shape = [2, 4, 4]
+        ray_bundle = ImplicitronRayBundle(
+            origins=torch.randn(*shape, 3),
+            directions=torch.randn(*shape, 3),
+            bins=None,
+            lengths=torch.randn(*shape, 6),
+            pixel_radii_2d=torch.randn(*shape, 1),
+            xys=None,
+        )
+        model = NeuralRadianceFieldImplicitFunction(
+            n_hidden_neurons_dir=32, use_integrated_positional_encoding=True
+        )
+        with self.assertRaises(ValueError):
+            _ = model(ray_bundle=ray_bundle)
+
+    def test_forward(self):
+        shape = [2, 4, 4]
+        ray_bundle = ImplicitronRayBundle(
+            origins=torch.randn(*shape, 3),
+            directions=torch.randn(*shape, 3),
+            lengths=torch.randn(*shape, 6),
+            pixel_radii_2d=torch.randn(*shape, 1),
+            xys=None,
+        )
+        model = NeuralRadianceFieldImplicitFunction(n_hidden_neurons_dir=32)
+        raw_densities, ray_colors, _ = model(ray_bundle=ray_bundle)
+        self.assertEqual(raw_densities.shape, (*shape, 6, 1))
+        self.assertEqual(ray_colors.shape, (*shape, 6, 3))
diff --git a/pytorch3d/tests/implicitron/test_json_index_dataset_provider_v2.py b/pytorch3d/tests/implicitron/test_json_index_dataset_provider_v2.py
new file mode 100644
index 0000000000000000000000000000000000000000..c99481a485132f0abfeab8e0f23f1000cca072b4
--- /dev/null
+++ b/pytorch3d/tests/implicitron/test_json_index_dataset_provider_v2.py
@@ -0,0 +1,247 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import json
+import os
+import random
+import tempfile
+import unittest
+from typing import List
+
+import numpy as np
+
+import torch
+import torchvision
+from PIL import Image
+from pytorch3d.implicitron.dataset.frame_data import FrameData
+from pytorch3d.implicitron.dataset.json_index_dataset_map_provider_v2 import (
+    JsonIndexDatasetMapProviderV2,
+)
+from pytorch3d.implicitron.dataset.types import (
+    dump_dataclass_jgzip,
+    FrameAnnotation,
+    ImageAnnotation,
+    MaskAnnotation,
+    SequenceAnnotation,
+)
+from pytorch3d.implicitron.tools.config import expand_args_fields
+from tests.common_testing import interactive_testing_requested
+
+from .common_resources import CO3DV2_MANIFOLD_PATH
+
+
+class TestJsonIndexDatasetProviderV2(unittest.TestCase):
+    def test_random_dataset(self):
+        # store random frame annotations
+        expand_args_fields(JsonIndexDatasetMapProviderV2)
+        categories = ["A", "B"]
+        subset_name = "test"
+        eval_batch_size = 5
+        n_frames = 8 * 3
+        n_sequences = 5
+        n_eval_batches = 10
+        with tempfile.TemporaryDirectory() as tmpd:
+            _make_random_json_dataset_map_provider_v2_data(
+                tmpd,
+                categories,
+                eval_batch_size=eval_batch_size,
+                n_frames=n_frames,
+                n_sequences=n_sequences,
+                n_eval_batches=n_eval_batches,
+            )
+            for n_known_frames_for_test in [0, 2]:
+                dataset_providers = {
+                    category: JsonIndexDatasetMapProviderV2(
+                        category=category,
+                        subset_name="test",
+                        dataset_root=tmpd,
+                        n_known_frames_for_test=n_known_frames_for_test,
+                    )
+                    for category in [*categories, ",".join(sorted(categories))]
+                }
+                for category, dataset_provider in dataset_providers.items():
+                    dataset_map = dataset_provider.get_dataset_map()
+                    for set_ in ["train", "val", "test"]:
+                        dataset = getattr(dataset_map, set_)
+
+                        cat2seq = dataset.category_to_sequence_names()
+                        self.assertEqual(",".join(sorted(cat2seq.keys())), category)
+
+                        if not (n_known_frames_for_test != 0 and set_ == "test"):
+                            # check the lengths only in case we do not have the
+                            # n_known_frames_for_test set
+                            expected_dataset_len = n_frames * n_sequences // 3
+                            if "," in category:
+                                # multicategory json index dataset, sum the lengths of
+                                # category-specific ones
+                                expected_dataset_len = sum(
+                                    len(
+                                        getattr(
+                                            dataset_providers[c].get_dataset_map(), set_
+                                        )
+                                    )
+                                    for c in categories
+                                )
+                                self.assertEqual(
+                                    sum(len(s) for s in cat2seq.values()),
+                                    n_sequences * len(categories),
+                                )
+                                self.assertEqual(len(cat2seq), len(categories))
+                            else:
+                                self.assertEqual(
+                                    len(cat2seq[category]),
+                                    n_sequences,
+                                )
+                                self.assertEqual(len(cat2seq), 1)
+                            self.assertEqual(len(dataset), expected_dataset_len)
+
+                        if set_ == "test":
+                            # check the number of eval batches
+                            expected_n_eval_batches = n_eval_batches
+                            if "," in category:
+                                expected_n_eval_batches *= len(categories)
+                            self.assertTrue(
+                                len(dataset.get_eval_batches())
+                                == expected_n_eval_batches
+                            )
+                        if set_ in ["train", "val"]:
+                            dataloader = torch.utils.data.DataLoader(
+                                getattr(dataset_map, set_),
+                                batch_size=3,
+                                shuffle=True,
+                                collate_fn=FrameData.collate,
+                            )
+                        else:
+                            dataloader = torch.utils.data.DataLoader(
+                                getattr(dataset_map, set_),
+                                batch_sampler=dataset_map[set_].get_eval_batches(),
+                                collate_fn=FrameData.collate,
+                            )
+                        for batch in dataloader:
+                            if set_ == "test":
+                                self.assertTrue(
+                                    batch.image_rgb.shape[0]
+                                    == n_known_frames_for_test + eval_batch_size
+                                )
+                    category_to_subset_list = (
+                        dataset_provider.get_category_to_subset_name_list()
+                    )
+                    category_to_subset_list_ = {c: [subset_name] for c in categories}
+
+                    self.assertTrue(category_to_subset_list == category_to_subset_list_)
+
+
+def _make_random_json_dataset_map_provider_v2_data(
+    root: str,
+    categories: List[str],
+    n_frames: int = 8,
+    n_sequences: int = 5,
+    n_eval_batches: int = 10,
+    H: int = 50,
+    W: int = 30,
+    subset_name: str = "test",
+    eval_batch_size: int = 5,
+):
+    os.makedirs(root, exist_ok=True)
+    category_to_subset_list = {}
+    for category in categories:
+        frame_annotations = []
+        sequence_annotations = []
+        frame_index = []
+        for seq_i in range(n_sequences):
+            seq_name = category + str(seq_i)
+            for i in range(n_frames):
+                # generate and store image
+                imdir = os.path.join(root, category, seq_name, "images")
+                os.makedirs(imdir, exist_ok=True)
+                img_path = os.path.join(imdir, f"frame{i:05d}.jpg")
+                img = torch.rand(3, H, W)
+                torchvision.utils.save_image(img, img_path)
+
+                # generate and store mask
+                maskdir = os.path.join(root, category, seq_name, "masks")
+                os.makedirs(maskdir, exist_ok=True)
+                mask_path = os.path.join(maskdir, f"frame{i:05d}.png")
+                mask = np.zeros((H, W))
+                mask[H // 2 :, W // 2 :] = 1
+                Image.fromarray((mask * 255.0).astype(np.uint8), mode="L",).convert(
+                    "L"
+                ).save(mask_path)
+
+                fa = FrameAnnotation(
+                    sequence_name=seq_name,
+                    frame_number=i,
+                    frame_timestamp=float(i),
+                    image=ImageAnnotation(
+                        path=img_path.replace(os.path.normpath(root) + "/", ""),
+                        size=list(img.shape[-2:]),
+                    ),
+                    mask=MaskAnnotation(
+                        path=mask_path.replace(os.path.normpath(root) + "/", ""),
+                        mass=mask.sum().item(),
+                    ),
+                )
+                frame_annotations.append(fa)
+                frame_index.append((seq_name, i, fa.image.path))
+
+            sequence_annotations.append(
+                SequenceAnnotation(
+                    sequence_name=seq_name,
+                    category=category,
+                )
+            )
+
+        dump_dataclass_jgzip(
+            os.path.join(root, category, "frame_annotations.jgz"),
+            frame_annotations,
+        )
+        dump_dataclass_jgzip(
+            os.path.join(root, category, "sequence_annotations.jgz"),
+            sequence_annotations,
+        )
+
+        test_frame_index = frame_index[2::3]
+
+        set_list = {
+            "train": frame_index[0::3],
+            "val": frame_index[1::3],
+            "test": test_frame_index,
+        }
+        set_lists_dir = os.path.join(root, category, "set_lists")
+        os.makedirs(set_lists_dir, exist_ok=True)
+        set_list_file = os.path.join(set_lists_dir, f"set_lists_{subset_name}.json")
+        with open(set_list_file, "w") as f:
+            json.dump(set_list, f)
+
+        eval_batches = [
+            random.sample(test_frame_index, eval_batch_size)
+            for _ in range(n_eval_batches)
+        ]
+
+        eval_b_dir = os.path.join(root, category, "eval_batches")
+        os.makedirs(eval_b_dir, exist_ok=True)
+        eval_b_file = os.path.join(eval_b_dir, f"eval_batches_{subset_name}.json")
+        with open(eval_b_file, "w") as f:
+            json.dump(eval_batches, f)
+
+        category_to_subset_list[category] = [subset_name]
+
+    with open(os.path.join(root, "category_to_subset_name_list.json"), "w") as f:
+        json.dump(category_to_subset_list, f)
+
+
+class TestCo3dv2(unittest.TestCase):
+    def test_simple(self):
+        if not interactive_testing_requested():
+            return
+        dataset_provider = JsonIndexDatasetMapProviderV2(
+            category="apple",
+            subset_name="manyview_dev_0",
+            dataset_root=CO3DV2_MANIFOLD_PATH,
+            dataset_JsonIndexDataset_args={"load_point_clouds": True},
+        )
+        dataset_provider.get_dataset_map().train[0]
diff --git a/pytorch3d/tests/implicitron/test_model_visualize.py b/pytorch3d/tests/implicitron/test_model_visualize.py
new file mode 100644
index 0000000000000000000000000000000000000000..2c9b1f9a5343c6b126f198cf2f90a3724a168748
--- /dev/null
+++ b/pytorch3d/tests/implicitron/test_model_visualize.py
@@ -0,0 +1,155 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import contextlib
+import math
+import os
+import unittest
+from typing import Tuple
+
+import torch
+from pytorch3d.implicitron.dataset.json_index_dataset import JsonIndexDataset
+from pytorch3d.implicitron.dataset.visualize import get_implicitron_sequence_pointcloud
+
+from pytorch3d.implicitron.models.visualization.render_flyaround import render_flyaround
+from pytorch3d.implicitron.tools.config import expand_args_fields
+from pytorch3d.implicitron.tools.point_cloud_utils import render_point_cloud_pytorch3d
+from pytorch3d.renderer.cameras import CamerasBase
+from tests.common_testing import interactive_testing_requested
+from visdom import Visdom
+
+from .common_resources import get_skateboard_data
+
+
+class TestModelVisualize(unittest.TestCase):
+    def test_flyaround_one_sequence(
+        self,
+        image_size: int = 256,
+    ):
+        if not interactive_testing_requested():
+            return
+        category = "skateboard"
+        stack = contextlib.ExitStack()
+        dataset_root, path_manager = stack.enter_context(get_skateboard_data())
+        self.addCleanup(stack.close)
+        frame_file = os.path.join(dataset_root, category, "frame_annotations.jgz")
+        sequence_file = os.path.join(dataset_root, category, "sequence_annotations.jgz")
+        subset_lists_file = os.path.join(dataset_root, category, "set_lists.json")
+        expand_args_fields(JsonIndexDataset)
+        train_dataset = JsonIndexDataset(
+            frame_annotations_file=frame_file,
+            sequence_annotations_file=sequence_file,
+            subset_lists_file=subset_lists_file,
+            dataset_root=dataset_root,
+            image_height=image_size,
+            image_width=image_size,
+            box_crop=True,
+            load_point_clouds=True,
+            path_manager=path_manager,
+            subsets=[
+                "train_known",
+            ],
+        )
+
+        # select few sequences to visualize
+        sequence_names = list(train_dataset.seq_annots.keys())
+
+        # select the first sequence name
+        show_sequence_name = sequence_names[0]
+
+        output_dir = os.path.split(os.path.abspath(__file__))[0]
+
+        visdom_show_preds = Visdom().check_connection()
+
+        for load_dataset_pointcloud in [True, False]:
+
+            model = _PointcloudRenderingModel(
+                train_dataset,
+                show_sequence_name,
+                device="cuda:0",
+                load_dataset_pointcloud=load_dataset_pointcloud,
+            )
+
+            video_path = os.path.join(
+                output_dir,
+                f"load_pcl_{load_dataset_pointcloud}",
+            )
+
+            os.makedirs(output_dir, exist_ok=True)
+
+            for output_video_frames_dir in [None, video_path]:
+                render_flyaround(
+                    train_dataset,
+                    show_sequence_name,
+                    model,
+                    video_path,
+                    n_flyaround_poses=10,
+                    fps=5,
+                    max_angle=2 * math.pi,
+                    trajectory_type="circular_lsq_fit",
+                    trajectory_scale=1.1,
+                    scene_center=(0.0, 0.0, 0.0),
+                    up=(0.0, 1.0, 0.0),
+                    traj_offset=1.0,
+                    n_source_views=1,
+                    visdom_show_preds=visdom_show_preds,
+                    visdom_environment="test_model_visalize",
+                    visdom_server="http://127.0.0.1",
+                    visdom_port=8097,
+                    num_workers=10,
+                    seed=None,
+                    video_resize=None,
+                    visualize_preds_keys=[
+                        "images_render",
+                        "depths_render",
+                        "masks_render",
+                        "_all_source_images",
+                    ],
+                    output_video_frames_dir=output_video_frames_dir,
+                )
+
+
+class _PointcloudRenderingModel(torch.nn.Module):
+    def __init__(
+        self,
+        train_dataset: JsonIndexDataset,
+        sequence_name: str,
+        render_size: Tuple[int, int] = (400, 400),
+        device=None,
+        load_dataset_pointcloud: bool = False,
+        max_frames: int = 30,
+        num_workers: int = 10,
+    ):
+        super().__init__()
+        self._render_size = render_size
+        point_cloud, _ = get_implicitron_sequence_pointcloud(
+            train_dataset,
+            sequence_name=sequence_name,
+            mask_points=True,
+            max_frames=max_frames,
+            num_workers=num_workers,
+            load_dataset_point_cloud=load_dataset_pointcloud,
+        )
+        self._point_cloud = point_cloud.to(device)
+
+    def forward(
+        self,
+        camera: CamerasBase,
+        **kwargs,
+    ):
+        image_render, mask_render, depth_render = render_point_cloud_pytorch3d(
+            camera[0],
+            self._point_cloud,
+            render_size=self._render_size,
+            point_radius=1e-2,
+            topk=10,
+            bg_color=0.0,
+        )
+        return {
+            "images_render": image_render.clamp(0.0, 1.0),
+            "masks_render": mask_render,
+            "depths_render": depth_render,
+        }
diff --git a/pytorch3d/tests/implicitron/test_models_renderer_base.py b/pytorch3d/tests/implicitron/test_models_renderer_base.py
new file mode 100644
index 0000000000000000000000000000000000000000..2a4ff16df6173e983c00bb02010d353619a5f5b4
--- /dev/null
+++ b/pytorch3d/tests/implicitron/test_models_renderer_base.py
@@ -0,0 +1,288 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import unittest
+
+import numpy as np
+
+import torch
+
+from pytorch3d.implicitron.models.renderer.base import (
+    approximate_conical_frustum_as_gaussians,
+    compute_3d_diagonal_covariance_gaussian,
+    conical_frustum_to_gaussian,
+    ImplicitronRayBundle,
+)
+from pytorch3d.implicitron.models.renderer.ray_sampler import AbstractMaskRaySampler
+
+from tests.common_testing import TestCaseMixin
+
+
+class TestRendererBase(TestCaseMixin, unittest.TestCase):
+    def test_implicitron_from_bins(self) -> None:
+        bins = torch.randn(2, 3, 4, 5)
+        ray_bundle = ImplicitronRayBundle(
+            origins=None,
+            directions=None,
+            lengths=None,
+            xys=None,
+            bins=bins,
+        )
+        self.assertClose(ray_bundle.lengths, 0.5 * (bins[..., 1:] + bins[..., :-1]))
+        self.assertClose(ray_bundle.bins, bins)
+
+    def test_implicitron_raise_value_error_bins_is_set_and_try_to_set_lengths(
+        self,
+    ) -> None:
+        ray_bundle = ImplicitronRayBundle(
+            origins=torch.rand(2, 3, 4, 3),
+            directions=torch.rand(2, 3, 4, 3),
+            lengths=None,
+            xys=torch.rand(2, 3, 4, 2),
+            bins=torch.rand(2, 3, 4, 14),
+        )
+        with self.assertRaisesRegex(
+            ValueError,
+            "If the bins attribute is not None you cannot set the lengths attribute.",
+        ):
+            ray_bundle.lengths = torch.empty(2)
+
+    def test_implicitron_raise_value_error_if_bins_dim_equal_1(self) -> None:
+        with self.assertRaisesRegex(
+            ValueError, "The last dim of bins must be at least superior or equal to 2."
+        ):
+            ImplicitronRayBundle(
+                origins=torch.rand(2, 3, 4, 3),
+                directions=torch.rand(2, 3, 4, 3),
+                lengths=None,
+                xys=torch.rand(2, 3, 4, 2),
+                bins=torch.rand(2, 3, 4, 1),
+            )
+
+    def test_implicitron_raise_value_error_if_neither_bins_or_lengths_provided(
+        self,
+    ) -> None:
+        with self.assertRaisesRegex(
+            ValueError,
+            "Please set either bins or lengths to initialize an ImplicitronRayBundle.",
+        ):
+            ImplicitronRayBundle(
+                origins=torch.rand(2, 3, 4, 3),
+                directions=torch.rand(2, 3, 4, 3),
+                lengths=None,
+                xys=torch.rand(2, 3, 4, 2),
+                bins=None,
+            )
+
+    def test_conical_frustum_to_gaussian(self) -> None:
+        origins = torch.zeros(3, 3, 3)
+        directions = torch.tensor(
+            [
+                [[0, 0, 0], [1, 0, 0], [3, 0, 0]],
+                [[0, 0.25, 0], [1, 0.25, 0], [3, 0.25, 0]],
+                [[0, 1, 0], [1, 1, 0], [3, 1, 0]],
+            ]
+        )
+        bins = torch.tensor(
+            [
+                [[0.5, 1.5], [0.3, 0.7], [0.3, 0.7]],
+                [[0.5, 1.5], [0.3, 0.7], [0.3, 0.7]],
+                [[0.5, 1.5], [0.3, 0.7], [0.3, 0.7]],
+            ]
+        )
+        # see test_compute_pixel_radii_from_ray_direction
+        radii = torch.tensor(
+            [
+                [1.25, 2.25, 2.25],
+                [1.75, 2.75, 2.75],
+                [1.75, 2.75, 2.75],
+            ]
+        )
+        radii = radii[..., None] / 12**0.5
+
+        # The expected mean and diagonal covariance have been computed
+        # by hand from the official code of MipNerf.
+        # https://github.com/google/mipnerf/blob/84c969e0a623edd183b75693aed72a7e7c22902d/internal/mip.py#L125
+        # mean, cov_diag = cast_rays(length, origins, directions, radii, 'cone', diag=True)
+
+        expected_mean = torch.tensor(
+            [
+                [
+                    [[0.0, 0.0, 0.0]],
+                    [[0.5506329, 0.0, 0.0]],
+                    [[1.6518986, 0.0, 0.0]],
+                ],
+                [
+                    [[0.0, 0.28846154, 0.0]],
+                    [[0.5506329, 0.13765822, 0.0]],
+                    [[1.6518986, 0.13765822, 0.0]],
+                ],
+                [
+                    [[0.0, 1.1538461, 0.0]],
+                    [[0.5506329, 0.5506329, 0.0]],
+                    [[1.6518986, 0.5506329, 0.0]],
+                ],
+            ]
+        )
+        expected_diag_cov = torch.tensor(
+            [
+                [
+                    [[0.04544772, 0.04544772, 0.04544772]],
+                    [[0.01130973, 0.03317059, 0.03317059]],
+                    [[0.10178753, 0.03317059, 0.03317059]],
+                ],
+                [
+                    [[0.08907752, 0.00404956, 0.08907752]],
+                    [[0.0142245, 0.04734321, 0.04955113]],
+                    [[0.10212927, 0.04991625, 0.04955113]],
+                ],
+                [
+                    [[0.08907752, 0.0647929, 0.08907752]],
+                    [[0.03608529, 0.03608529, 0.04955113]],
+                    [[0.10674264, 0.05590574, 0.04955113]],
+                ],
+            ]
+        )
+
+        ray = ImplicitronRayBundle(
+            origins=origins,
+            directions=directions,
+            bins=bins,
+            lengths=None,
+            pixel_radii_2d=radii,
+            xys=None,
+        )
+        mean, diag_cov = conical_frustum_to_gaussian(ray)
+
+        self.assertClose(mean, expected_mean)
+        self.assertClose(diag_cov, expected_diag_cov)
+
+    def test_scale_conical_frustum_to_gaussian(self) -> None:
+        origins = torch.zeros(2, 2, 3)
+        directions = torch.Tensor(
+            [
+                [[0, 1, 0], [0, 0, 1]],
+                [[0, 1, 0], [0, 0, 1]],
+            ]
+        )
+        bins = torch.Tensor(
+            [
+                [[0.5, 1.5], [0.3, 0.7]],
+                [[0.5, 1.5], [0.3, 0.7]],
+            ]
+        )
+        radii = torch.ones(2, 2, 1)
+
+        ray = ImplicitronRayBundle(
+            origins=origins,
+            directions=directions,
+            bins=bins,
+            pixel_radii_2d=radii,
+            lengths=None,
+            xys=None,
+        )
+
+        mean, diag_cov = conical_frustum_to_gaussian(ray)
+
+        scaling_factor = 2.5
+        ray = ImplicitronRayBundle(
+            origins=origins,
+            directions=directions,
+            bins=bins * scaling_factor,
+            pixel_radii_2d=radii,
+            lengths=None,
+            xys=None,
+        )
+        mean_scaled, diag_cov_scaled = conical_frustum_to_gaussian(ray)
+        np.testing.assert_allclose(mean * scaling_factor, mean_scaled)
+        np.testing.assert_allclose(
+            diag_cov * scaling_factor**2, diag_cov_scaled, atol=1e-6
+        )
+
+    def test_approximate_conical_frustum_as_gaussian(self) -> None:
+        """Ensure that the computation modularity in our function is well done."""
+        bins = torch.Tensor([[0.5, 1.5], [0.3, 0.7]])
+        radii = torch.Tensor([[1.0], [1.0]])
+        t_mean, t_var, r_var = approximate_conical_frustum_as_gaussians(bins, radii)
+
+        self.assertEqual(t_mean.shape, (2, 1))
+        self.assertEqual(t_var.shape, (2, 1))
+        self.assertEqual(r_var.shape, (2, 1))
+
+        mu = np.array([[1.0], [0.5]])
+        delta = np.array([[0.5], [0.2]])
+
+        np.testing.assert_allclose(
+            mu + (2 * mu * delta**2) / (3 * mu**2 + delta**2), t_mean.numpy()
+        )
+        np.testing.assert_allclose(
+            (delta**2) / 3
+            - (4 / 15)
+            * (
+                (delta**4 * (12 * mu**2 - delta**2))
+                / (3 * mu**2 + delta**2) ** 2
+            ),
+            t_var.numpy(),
+        )
+        np.testing.assert_allclose(
+            radii**2
+            * (
+                (mu**2) / 4
+                + (5 / 12) * delta**2
+                - 4 / 15 * (delta**4) / (3 * mu**2 + delta**2)
+            ),
+            r_var.numpy(),
+        )
+
+    def test_compute_3d_diagonal_covariance_gaussian(self) -> None:
+        ray_directions = torch.Tensor([[0, 0, 1]])
+        t_var = torch.Tensor([0.5, 0.5, 1])
+        r_var = torch.Tensor([0.6, 0.3, 0.4])
+        expected_diag_cov = np.array(
+            [
+                [
+                    # t_cov_diag + xy_cov_diag
+                    [0.0 + 0.6, 0.0 + 0.6, 0.5 + 0.0],
+                    [0.0 + 0.3, 0.0 + 0.3, 0.5 + 0.0],
+                    [0.0 + 0.4, 0.0 + 0.4, 1.0 + 0.0],
+                ]
+            ]
+        )
+        diag_cov = compute_3d_diagonal_covariance_gaussian(ray_directions, t_var, r_var)
+        np.testing.assert_allclose(diag_cov.numpy(), expected_diag_cov)
+
+    def test_conical_frustum_to_gaussian_raise_valueerror(self) -> None:
+        lengths = torch.linspace(0, 1, steps=6)
+        directions = torch.tensor([0, 0, 1])
+        origins = torch.tensor([1, 1, 1])
+        ray = ImplicitronRayBundle(
+            origins=origins, directions=directions, lengths=lengths, xys=None
+        )
+
+        expected_error_message = (
+            "RayBundle pixel_radii_2d or bins have not been provided."
+            " Look at pytorch3d.renderer.implicit.renderer.ray_sampler::"
+            "AbstractMaskRaySampler to see how to compute them. Have you forgot to set"
+            "`cast_ray_bundle_as_cone` to True?"
+        )
+
+        with self.assertRaisesRegex(ValueError, expected_error_message):
+            _ = conical_frustum_to_gaussian(ray)
+
+        # Ensure message is coherent with AbstractMaskRaySampler
+        class FakeRaySampler(AbstractMaskRaySampler):
+            def _get_min_max_depth_bounds(self, *args):
+                return None
+
+        message_assertion = (
+            "If cast_ray_bundle_as_cone has been removed please update the doc"
+            "conical_frustum_to_gaussian"
+        )
+        self.assertIsNotNone(
+            getattr(FakeRaySampler(), "cast_ray_bundle_as_cone", None),
+            message_assertion,
+        )
diff --git a/pytorch3d/tests/implicitron/test_models_renderer_ray_sampler.py b/pytorch3d/tests/implicitron/test_models_renderer_ray_sampler.py
new file mode 100644
index 0000000000000000000000000000000000000000..17c1d1326a1911e7fbeff5bc636962fb13437b13
--- /dev/null
+++ b/pytorch3d/tests/implicitron/test_models_renderer_ray_sampler.py
@@ -0,0 +1,290 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+from itertools import product
+from typing import Tuple
+
+from unittest.mock import patch
+
+import torch
+from pytorch3d.common.compat import meshgrid_ij
+from pytorch3d.implicitron.models.renderer.base import EvaluationMode
+from pytorch3d.implicitron.models.renderer.ray_sampler import (
+    AdaptiveRaySampler,
+    compute_radii,
+    NearFarRaySampler,
+)
+
+from pytorch3d.renderer.cameras import (
+    CamerasBase,
+    FoVOrthographicCameras,
+    FoVPerspectiveCameras,
+    OrthographicCameras,
+    PerspectiveCameras,
+)
+from pytorch3d.renderer.implicit.utils import HeterogeneousRayBundle
+from tests.common_camera_utils import init_random_cameras
+
+from tests.common_testing import TestCaseMixin
+
+CAMERA_TYPES = (
+    FoVPerspectiveCameras,
+    FoVOrthographicCameras,
+    OrthographicCameras,
+    PerspectiveCameras,
+)
+
+
+def unproject_xy_grid_from_ndc_to_world_coord(
+    cameras: CamerasBase, xy_grid: torch.Tensor
+) -> Tuple[torch.Tensor, torch.Tensor]:
+    """
+
+    Unproject a xy_grid from NDC coordinates to world coordinates.
+
+    Args:
+        cameras: CamerasBase.
+        xy_grid: A tensor of shape `(..., H*W, 2)` representing the
+            x, y coords.
+
+    Returns:
+        A tensor of shape `(..., H*W, 3)` representing the
+    """
+
+    batch_size = xy_grid.shape[0]
+    n_rays_per_image = xy_grid.shape[1:-1].numel()
+    xy = xy_grid.view(batch_size, -1, 2)
+    xyz = torch.cat([xy, xy_grid.new_ones(batch_size, n_rays_per_image, 1)], dim=-1)
+    plane_at_depth1 = cameras.unproject_points(xyz, from_ndc=True)
+    return plane_at_depth1.view(*xy_grid.shape[:-1], 3)
+
+
+class TestRaysampler(TestCaseMixin, unittest.TestCase):
+    def test_ndc_raysampler_n_ray_total_is_none(self):
+        sampler = NearFarRaySampler()
+        message = (
+            "If you introduce the support of `n_rays_total` for {0}, please handle the "
+            "packing and unpacking logic for the radii and lengths computation."
+        )
+        self.assertIsNone(
+            sampler._training_raysampler._n_rays_total, message.format(type(sampler))
+        )
+        self.assertIsNone(
+            sampler._evaluation_raysampler._n_rays_total, message.format(type(sampler))
+        )
+
+        sampler = AdaptiveRaySampler()
+        self.assertIsNone(
+            sampler._training_raysampler._n_rays_total, message.format(type(sampler))
+        )
+        self.assertIsNone(
+            sampler._evaluation_raysampler._n_rays_total, message.format(type(sampler))
+        )
+
+    def test_catch_heterogeneous_exception(self):
+        cameras = init_random_cameras(FoVPerspectiveCameras, 1, random_z=True)
+
+        class FakeSampler:
+            def __init__(self):
+                self.min_x, self.max_x = 1, 2
+                self.min_y, self.max_y = 1, 2
+
+            def __call__(self, **kwargs):
+                return HeterogeneousRayBundle(
+                    torch.rand(3), torch.rand(3), torch.rand(3), torch.rand(1)
+                )
+
+        with patch(
+            "pytorch3d.implicitron.models.renderer.ray_sampler.NDCMultinomialRaysampler",
+            return_value=FakeSampler(),
+        ):
+            for sampler in [
+                AdaptiveRaySampler(cast_ray_bundle_as_cone=True),
+                NearFarRaySampler(cast_ray_bundle_as_cone=True),
+            ]:
+                with self.assertRaises(TypeError):
+                    _ = sampler(cameras, EvaluationMode.TRAINING)
+            for sampler in [
+                AdaptiveRaySampler(cast_ray_bundle_as_cone=False),
+                NearFarRaySampler(cast_ray_bundle_as_cone=False),
+            ]:
+                _ = sampler(cameras, EvaluationMode.TRAINING)
+
+    def test_compute_radii(self):
+        batch_size = 1
+        image_height, image_width = 20, 10
+        min_y, max_y, min_x, max_x = -1.0, 1.0, -1.0, 1.0
+        y, x = meshgrid_ij(
+            torch.linspace(min_y, max_y, image_height, dtype=torch.float32),
+            torch.linspace(min_x, max_x, image_width, dtype=torch.float32),
+        )
+        xy_grid = torch.stack([x, y], dim=-1).view(-1, 2)
+        pixel_width = (max_x - min_x) / (image_width - 1)
+        pixel_height = (max_y - min_y) / (image_height - 1)
+
+        for cam_type in CAMERA_TYPES:
+            # init a batch of random cameras
+            cameras = init_random_cameras(cam_type, batch_size, random_z=True)
+            # This method allow us to compute the radii whithout having
+            # access to the full grid. Raysamplers during the training
+            # will sample random rays from the grid.
+            radii = compute_radii(
+                cameras, xy_grid, pixel_hw_ndc=(pixel_height, pixel_width)
+            )
+            plane_at_depth1 = unproject_xy_grid_from_ndc_to_world_coord(
+                cameras, xy_grid
+            )
+            # This method absolutely needs the full grid to work.
+            expected_radii = compute_pixel_radii_from_grid(
+                plane_at_depth1.reshape(1, image_height, image_width, 3)
+            )
+            self.assertClose(expected_radii.reshape(-1, 1), radii)
+
+    def test_forward(self):
+        n_rays_per_image = 16
+        image_height, image_width = 20, 20
+        kwargs = {
+            "image_width": image_width,
+            "image_height": image_height,
+            "n_pts_per_ray_training": 32,
+            "n_pts_per_ray_evaluation": 32,
+            "n_rays_per_image_sampled_from_mask": n_rays_per_image,
+            "cast_ray_bundle_as_cone": False,
+        }
+
+        batch_size = 2
+        samplers = [NearFarRaySampler(**kwargs), AdaptiveRaySampler(**kwargs)]
+        evaluation_modes = [EvaluationMode.TRAINING, EvaluationMode.EVALUATION]
+
+        for cam_type, sampler, evaluation_mode in product(
+            CAMERA_TYPES, samplers, evaluation_modes
+        ):
+            cameras = init_random_cameras(cam_type, batch_size, random_z=True)
+            ray_bundle = sampler(cameras, evaluation_mode)
+
+            shape_out = (
+                (batch_size, image_width, image_height)
+                if evaluation_mode == EvaluationMode.EVALUATION
+                else (batch_size, n_rays_per_image, 1)
+            )
+            n_pts_per_ray = (
+                kwargs["n_pts_per_ray_evaluation"]
+                if evaluation_mode == EvaluationMode.EVALUATION
+                else kwargs["n_pts_per_ray_training"]
+            )
+            self.assertIsNone(ray_bundle.bins)
+            self.assertIsNone(ray_bundle.pixel_radii_2d)
+            self.assertEqual(
+                ray_bundle.lengths.shape,
+                (*shape_out, n_pts_per_ray),
+            )
+            self.assertEqual(ray_bundle.directions.shape, (*shape_out, 3))
+            self.assertEqual(ray_bundle.origins.shape, (*shape_out, 3))
+
+    def test_forward_with_use_bins(self):
+        n_rays_per_image = 16
+        image_height, image_width = 20, 20
+        kwargs = {
+            "image_width": image_width,
+            "image_height": image_height,
+            "n_pts_per_ray_training": 32,
+            "n_pts_per_ray_evaluation": 32,
+            "n_rays_per_image_sampled_from_mask": n_rays_per_image,
+            "cast_ray_bundle_as_cone": True,
+        }
+
+        batch_size = 1
+        samplers = [NearFarRaySampler(**kwargs), AdaptiveRaySampler(**kwargs)]
+        evaluation_modes = [EvaluationMode.TRAINING, EvaluationMode.EVALUATION]
+        for cam_type, sampler, evaluation_mode in product(
+            CAMERA_TYPES, samplers, evaluation_modes
+        ):
+            cameras = init_random_cameras(cam_type, batch_size, random_z=True)
+            ray_bundle = sampler(cameras, evaluation_mode)
+
+            lengths = 0.5 * (ray_bundle.bins[..., :-1] + ray_bundle.bins[..., 1:])
+
+            self.assertClose(ray_bundle.lengths, lengths)
+            shape_out = (
+                (batch_size, image_width, image_height)
+                if evaluation_mode == EvaluationMode.EVALUATION
+                else (batch_size, n_rays_per_image, 1)
+            )
+            self.assertEqual(ray_bundle.pixel_radii_2d.shape, (*shape_out, 1))
+            self.assertEqual(ray_bundle.directions.shape, (*shape_out, 3))
+            self.assertEqual(ray_bundle.origins.shape, (*shape_out, 3))
+
+
+# Helper to test compute_radii
+def compute_pixel_radii_from_grid(pixel_grid: torch.Tensor) -> torch.Tensor:
+    """
+    Compute the radii of a conical frustum given the pixel grid.
+
+    To compute the radii we first compute the translation from a pixel
+    to its neighbors along the x and y axis. Then, we compute the norm
+    of each translation along the x and y axis.
+    The radii are then obtained by the following formula:
+
+    (dx_norm + dy_norm) * 0.5 * 2 / 12**0.5
+
+    where 2/12**0.5 is a scaling factor to match
+    the variance of the pixel’s footprint.
+
+    Args:
+        pixel_grid: A tensor of shape `(..., H, W, dim)` representing the
+            full grid of rays pixel_grid.
+
+    Returns:
+        The radiis for each pixels and shape `(..., H, W, 1)`.
+    """
+    # [B, H, W - 1, 3]
+    x_translation = torch.diff(pixel_grid, dim=-2)
+    # [B, H - 1, W, 3]
+    y_translation = torch.diff(pixel_grid, dim=-3)
+    # [B, H, W - 1, 1]
+    dx_norm = torch.linalg.norm(x_translation, dim=-1, keepdim=True)
+    # [B, H - 1, W, 1]
+    dy_norm = torch.linalg.norm(y_translation, dim=-1, keepdim=True)
+
+    # Fill the missing value [B, H, W, 1]
+    dx_norm = torch.concatenate([dx_norm, dx_norm[..., -1:, :]], -2)
+    dy_norm = torch.concatenate([dy_norm, dy_norm[..., -1:, :, :]], -3)
+
+    # Cut the distance in half to obtain the base radius: (dx_norm + dy_norm) * 0.5
+    # and multiply it by the scaling factor: * 2 / 12**0.5
+    radii = (dx_norm + dy_norm) / 12**0.5
+    return radii
+
+
+class TestRadiiComputationOnFullGrid(TestCaseMixin, unittest.TestCase):
+    def test_compute_pixel_radii_from_grid(self):
+        pixel_grid = torch.tensor(
+            [
+                [[0.0, 0, 0], [1.0, 0.0, 0], [3.0, 0.0, 0.0]],
+                [[0.0, 0.25, 0], [1.0, 0.25, 0], [3.0, 0.25, 0]],
+                [[0.0, 1, 0], [1.0, 1.0, 0], [3.0000, 1.0, 0]],
+            ]
+        )
+
+        expected_y_norm = torch.tensor(
+            [
+                [0.25, 0.25, 0.25],
+                [0.75, 0.75, 0.75],
+                [0.75, 0.75, 0.75],  # duplicated from previous row
+            ]
+        )
+        expected_x_norm = torch.tensor(
+            [
+                # 3rd column is duplicated from 2nd
+                [1.0, 2.0, 2.0],
+                [1.0, 2.0, 2.0],
+                [1.0, 2.0, 2.0],
+            ]
+        )
+        expected_radii = (expected_x_norm + expected_y_norm) / 12**0.5
+        radii = compute_pixel_radii_from_grid(pixel_grid)
+        self.assertClose(radii, expected_radii[..., None])
diff --git a/pytorch3d/tests/implicitron/test_orm_types.py b/pytorch3d/tests/implicitron/test_orm_types.py
new file mode 100644
index 0000000000000000000000000000000000000000..e6f94c0100ffc5d8ca111a553b21861b881c206a
--- /dev/null
+++ b/pytorch3d/tests/implicitron/test_orm_types.py
@@ -0,0 +1,62 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+
+import numpy as np
+
+from pytorch3d.implicitron.dataset.orm_types import ArrayTypeFactory, TupleTypeFactory
+
+
+class TestOrmTypes(unittest.TestCase):
+    def test_tuple_serialization_none(self):
+        ttype = TupleTypeFactory()()
+        output = ttype.process_bind_param(None, None)
+        self.assertIsNone(output)
+        output = ttype.process_result_value(output, None)
+        self.assertIsNone(output)
+
+    def test_tuple_serialization_1d(self):
+        for input_tuple in [(1, 2, 3), (4.5, 6.7)]:
+            ttype = TupleTypeFactory(type(input_tuple[0]), (len(input_tuple),))()
+            output = ttype.process_bind_param(input_tuple, None)
+            input_hat = ttype.process_result_value(output, None)
+            self.assertEqual(type(input_hat[0]), type(input_tuple[0]))
+            np.testing.assert_almost_equal(input_hat, input_tuple, decimal=6)
+
+    def test_tuple_serialization_2d(self):
+        input_tuple = ((1.0, 2.0, 3.0), (4.5, 5.5, 6.6))
+        ttype = TupleTypeFactory(type(input_tuple[0][0]), (2, 3))()
+        output = ttype.process_bind_param(input_tuple, None)
+        input_hat = ttype.process_result_value(output, None)
+        self.assertEqual(type(input_hat[0][0]), type(input_tuple[0][0]))
+        # we use float32 to serialise
+        np.testing.assert_almost_equal(input_hat, input_tuple, decimal=6)
+
+    def test_array_serialization_none(self):
+        ttype = ArrayTypeFactory((3, 3))()
+        output = ttype.process_bind_param(None, None)
+        self.assertIsNone(output)
+        output = ttype.process_result_value(output, None)
+        self.assertIsNone(output)
+
+    def test_array_serialization(self):
+        for input_list in [[1, 2, 3], [[4.5, 6.7], [8.9, 10.0]]]:
+            input_array = np.array(input_list)
+
+            # first, dynamic-size array
+            ttype = ArrayTypeFactory()()
+            output = ttype.process_bind_param(input_array, None)
+            input_hat = ttype.process_result_value(output, None)
+            self.assertEqual(input_hat.dtype, np.float32)
+            np.testing.assert_almost_equal(input_hat, input_array, decimal=6)
+
+            # second, fixed-size array
+            ttype = ArrayTypeFactory(tuple(input_array.shape))()
+            output = ttype.process_bind_param(input_array, None)
+            input_hat = ttype.process_result_value(output, None)
+            self.assertEqual(input_hat.dtype, np.float32)
+            np.testing.assert_almost_equal(input_hat, input_array, decimal=6)
diff --git a/pytorch3d/tests/implicitron/test_pointcloud_utils.py b/pytorch3d/tests/implicitron/test_pointcloud_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..dc3176cc6717a9e548e2eb1fade44a43255008dd
--- /dev/null
+++ b/pytorch3d/tests/implicitron/test_pointcloud_utils.py
@@ -0,0 +1,72 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+
+import torch
+from pytorch3d.implicitron.tools.point_cloud_utils import get_rgbd_point_cloud
+
+from pytorch3d.renderer.cameras import PerspectiveCameras
+from tests.common_testing import TestCaseMixin
+
+
+class TestPointCloudUtils(TestCaseMixin, unittest.TestCase):
+    def setUp(self):
+        torch.manual_seed(42)
+
+    def test_unproject(self):
+        H, W = 50, 100
+
+        # Random RGBD image with depth 3
+        # (depth 0 = at the camera)
+        # and purple in the upper right corner
+
+        image = torch.rand(4, H, W)
+        depth = 3
+        image[3] = depth
+        image[1, H // 2 :, W // 2 :] *= 0.4
+
+        # two ways to define the same camera:
+        # at the origin facing the positive z axis
+        ndc_camera = PerspectiveCameras(focal_length=1.0)
+        screen_camera = PerspectiveCameras(
+            focal_length=H // 2,
+            in_ndc=False,
+            image_size=((H, W),),
+            principal_point=((W / 2, H / 2),),
+        )
+
+        for camera in (ndc_camera, screen_camera):
+            # 1. z-depth
+            cloud = get_rgbd_point_cloud(
+                camera,
+                image_rgb=image[:3][None],
+                depth_map=image[3:][None],
+                euclidean=False,
+            )
+            [points] = cloud.points_list()
+            self.assertConstant(points[:, 2], depth)  # constant depth
+            extremes = depth * torch.tensor([W / H - 1 / H, 1 - 1 / H])
+            self.assertClose(points[:, :2].min(0).values, -extremes)
+            self.assertClose(points[:, :2].max(0).values, extremes)
+
+            # 2. euclidean
+            cloud = get_rgbd_point_cloud(
+                camera,
+                image_rgb=image[:3][None],
+                depth_map=image[3:][None],
+                euclidean=True,
+            )
+            [points] = cloud.points_list()
+            self.assertConstant(torch.norm(points, dim=1), depth, atol=1e-5)
+
+            # 3. four channels
+            get_rgbd_point_cloud(
+                camera,
+                image_rgb=image[None],
+                depth_map=image[3:][None],
+                euclidean=True,
+            )
diff --git a/pytorch3d/tests/implicitron/test_ray_point_refiner.py b/pytorch3d/tests/implicitron/test_ray_point_refiner.py
new file mode 100644
index 0000000000000000000000000000000000000000..8a6ab36126cf8baab4e0854ba7f4e9574ceaf43f
--- /dev/null
+++ b/pytorch3d/tests/implicitron/test_ray_point_refiner.py
@@ -0,0 +1,157 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+from itertools import product
+
+import torch
+
+from pytorch3d.implicitron.models.renderer.ray_point_refiner import (
+    apply_blurpool_on_weights,
+    RayPointRefiner,
+)
+from pytorch3d.implicitron.models.renderer.ray_sampler import ImplicitronRayBundle
+from tests.common_testing import TestCaseMixin
+
+
+class TestRayPointRefiner(TestCaseMixin, unittest.TestCase):
+    def test_simple(self):
+        length = 15
+        n_pts_per_ray = 10
+
+        for add_input_samples, use_blurpool in product([False, True], [False, True]):
+            ray_point_refiner = RayPointRefiner(
+                n_pts_per_ray=n_pts_per_ray,
+                random_sampling=False,
+                add_input_samples=add_input_samples,
+                blurpool_weights=use_blurpool,
+            )
+            lengths = torch.arange(length, dtype=torch.float32).expand(3, 25, length)
+            bundle = ImplicitronRayBundle(
+                lengths=lengths,
+                origins=None,
+                directions=None,
+                xys=None,
+                camera_ids=None,
+                camera_counts=None,
+            )
+            weights = torch.ones(3, 25, length)
+            refined = ray_point_refiner(bundle, weights)
+
+            self.assertIsNone(refined.directions)
+            self.assertIsNone(refined.origins)
+            self.assertIsNone(refined.xys)
+            expected = torch.linspace(0.5, length - 1.5, n_pts_per_ray)
+            expected = expected.expand(3, 25, n_pts_per_ray)
+            if add_input_samples:
+                full_expected = torch.cat((lengths, expected), dim=-1).sort()[0]
+            else:
+                full_expected = expected
+            self.assertClose(refined.lengths, full_expected)
+
+            ray_point_refiner_random = RayPointRefiner(
+                n_pts_per_ray=n_pts_per_ray,
+                random_sampling=True,
+                add_input_samples=add_input_samples,
+                blurpool_weights=use_blurpool,
+            )
+            refined_random = ray_point_refiner_random(bundle, weights)
+            lengths_random = refined_random.lengths
+            self.assertEqual(lengths_random.shape, full_expected.shape)
+            if not add_input_samples:
+                self.assertGreater(lengths_random.min().item(), 0.5)
+                self.assertLess(lengths_random.max().item(), length - 1.5)
+
+            # Check sorted
+            self.assertTrue(
+                (lengths_random[..., 1:] - lengths_random[..., :-1] > 0).all()
+            )
+
+    def test_simple_use_bins(self):
+        """
+        Same spirit than test_simple but use bins in the ImplicitronRayBunle.
+        It has been duplicated to avoid cognitive overload while reading the
+        test (lot of if else).
+        """
+        length = 15
+        n_pts_per_ray = 10
+
+        for add_input_samples, use_blurpool in product([False, True], [False, True]):
+            ray_point_refiner = RayPointRefiner(
+                n_pts_per_ray=n_pts_per_ray,
+                random_sampling=False,
+                add_input_samples=add_input_samples,
+            )
+
+            bundle = ImplicitronRayBundle(
+                lengths=None,
+                bins=torch.arange(length + 1, dtype=torch.float32).expand(
+                    3, 25, length + 1
+                ),
+                origins=None,
+                directions=None,
+                xys=None,
+                camera_ids=None,
+                camera_counts=None,
+            )
+            weights = torch.ones(3, 25, length)
+            refined = ray_point_refiner(bundle, weights, blurpool_weights=use_blurpool)
+
+            self.assertIsNone(refined.directions)
+            self.assertIsNone(refined.origins)
+            self.assertIsNone(refined.xys)
+            expected_bins = torch.linspace(0, length, n_pts_per_ray + 1)
+            expected_bins = expected_bins.expand(3, 25, n_pts_per_ray + 1)
+            if add_input_samples:
+                expected_bins = torch.cat((bundle.bins, expected_bins), dim=-1).sort()[
+                    0
+                ]
+            full_expected = torch.lerp(
+                expected_bins[..., :-1], expected_bins[..., 1:], 0.5
+            )
+
+            self.assertClose(refined.lengths, full_expected)
+
+            ray_point_refiner_random = RayPointRefiner(
+                n_pts_per_ray=n_pts_per_ray,
+                random_sampling=True,
+                add_input_samples=add_input_samples,
+            )
+
+            refined_random = ray_point_refiner_random(
+                bundle, weights, blurpool_weights=use_blurpool
+            )
+            lengths_random = refined_random.lengths
+            self.assertEqual(lengths_random.shape, full_expected.shape)
+            if not add_input_samples:
+                self.assertGreater(lengths_random.min().item(), 0)
+                self.assertLess(lengths_random.max().item(), length)
+
+            # Check sorted
+            self.assertTrue(
+                (lengths_random[..., 1:] - lengths_random[..., :-1] > 0).all()
+            )
+
+    def test_apply_blurpool_on_weights(self):
+        weights = torch.tensor(
+            [
+                [0.5, 0.6, 0.7],
+                [0.5, 0.3, 0.9],
+            ]
+        )
+        expected_weights = 0.5 * torch.tensor(
+            [
+                [0.5 + 0.6, 0.6 + 0.7, 0.7 + 0.7],
+                [0.5 + 0.5, 0.5 + 0.9, 0.9 + 0.9],
+            ]
+        )
+        out_weights = apply_blurpool_on_weights(weights)
+        self.assertTrue(torch.allclose(out_weights, expected_weights))
+
+    def test_shapes_apply_blurpool_on_weights(self):
+        weights = torch.randn((5, 4, 3, 2, 1))
+        out_weights = apply_blurpool_on_weights(weights)
+        self.assertEqual((5, 4, 3, 2, 1), out_weights.shape)
diff --git a/pytorch3d/tests/implicitron/test_sql_dataset.py b/pytorch3d/tests/implicitron/test_sql_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..f5baf505752545579a7e069ef6d00b17ccc315a8
--- /dev/null
+++ b/pytorch3d/tests/implicitron/test_sql_dataset.py
@@ -0,0 +1,546 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+import os
+import unittest
+from collections import Counter
+
+import pkg_resources
+
+import torch
+
+from pytorch3d.implicitron.dataset.sql_dataset import SqlIndexDataset
+
+NO_BLOBS_KWARGS = {
+    "dataset_root": "",
+    "load_images": False,
+    "load_depths": False,
+    "load_masks": False,
+    "load_depth_masks": False,
+    "box_crop": False,
+}
+
+logger = logging.getLogger("pytorch3d.implicitron.dataset.sql_dataset")
+sh = logging.StreamHandler()
+logger.addHandler(sh)
+logger.setLevel(logging.DEBUG)
+
+
+DATASET_ROOT = pkg_resources.resource_filename(__name__, "data/sql_dataset")
+METADATA_FILE = os.path.join(DATASET_ROOT, "sql_dataset_100.sqlite")
+SET_LIST_FILE = os.path.join(DATASET_ROOT, "set_lists_100.json")
+
+
+class TestSqlDataset(unittest.TestCase):
+    def test_basic(self, sequence="cat1_seq2", frame_number=4):
+        dataset = SqlIndexDataset(
+            sqlite_metadata_file=METADATA_FILE,
+            remove_empty_masks=False,
+            frame_data_builder_FrameDataBuilder_args=NO_BLOBS_KWARGS,
+        )
+
+        self.assertEqual(len(dataset), 100)
+
+        # check the items are consecutive
+        past_sequences = set()
+        last_frame_number = -1
+        last_sequence = ""
+        for i in range(len(dataset)):
+            item = dataset[i]
+
+            if item.frame_number == 0:
+                self.assertNotIn(item.sequence_name, past_sequences)
+                past_sequences.add(item.sequence_name)
+                last_sequence = item.sequence_name
+            else:
+                self.assertEqual(item.sequence_name, last_sequence)
+                self.assertEqual(item.frame_number, last_frame_number + 1)
+
+            last_frame_number = item.frame_number
+
+        # test indexing
+        with self.assertRaises(IndexError):
+            dataset[len(dataset) + 1]
+
+        # test sequence-frame indexing
+        item = dataset[sequence, frame_number]
+        self.assertEqual(item.sequence_name, sequence)
+        self.assertEqual(item.frame_number, frame_number)
+
+        with self.assertRaises(IndexError):
+            dataset[sequence, 13]
+
+    def test_filter_empty_masks(self):
+        dataset = SqlIndexDataset(
+            sqlite_metadata_file=METADATA_FILE,
+            remove_empty_masks=True,
+            frame_data_builder_FrameDataBuilder_args=NO_BLOBS_KWARGS,
+        )
+
+        self.assertEqual(len(dataset), 78)
+
+    def test_pick_frames_sql_clause(self):
+        dataset_no_empty_masks = SqlIndexDataset(
+            sqlite_metadata_file=METADATA_FILE,
+            remove_empty_masks=True,
+            frame_data_builder_FrameDataBuilder_args=NO_BLOBS_KWARGS,
+        )
+
+        dataset = SqlIndexDataset(
+            sqlite_metadata_file=METADATA_FILE,
+            remove_empty_masks=False,
+            pick_frames_sql_clause="_mask_mass IS NULL OR _mask_mass > 0",
+            frame_data_builder_FrameDataBuilder_args=NO_BLOBS_KWARGS,
+        )
+
+        # check the datasets are equal
+        self.assertEqual(len(dataset), len(dataset_no_empty_masks))
+        for i in range(len(dataset)):
+            item_nem = dataset_no_empty_masks[i]
+            item = dataset[i]
+            self.assertEqual(item_nem.image_path, item.image_path)
+
+        # remove_empty_masks together with the custom criterion
+        dataset_ts = SqlIndexDataset(
+            sqlite_metadata_file=METADATA_FILE,
+            remove_empty_masks=True,
+            pick_frames_sql_clause="frame_timestamp < 0.15",
+            frame_data_builder_FrameDataBuilder_args=NO_BLOBS_KWARGS,
+        )
+        self.assertEqual(len(dataset_ts), 19)
+
+    def test_limit_categories(self, category="cat0"):
+        dataset = SqlIndexDataset(
+            sqlite_metadata_file=METADATA_FILE,
+            remove_empty_masks=False,
+            pick_categories=[category],
+            frame_data_builder_FrameDataBuilder_args=NO_BLOBS_KWARGS,
+        )
+
+        self.assertEqual(len(dataset), 50)
+        for i in range(len(dataset)):
+            self.assertEqual(dataset[i].sequence_category, category)
+
+    def test_limit_sequences(self, num_sequences=3):
+        dataset = SqlIndexDataset(
+            sqlite_metadata_file=METADATA_FILE,
+            remove_empty_masks=False,
+            limit_sequences_to=num_sequences,
+            frame_data_builder_FrameDataBuilder_args=NO_BLOBS_KWARGS,
+        )
+
+        self.assertEqual(len(dataset), 10 * num_sequences)
+
+        def delist(sequence_name):
+            return sequence_name if isinstance(sequence_name, str) else sequence_name[0]
+
+        unique_seqs = {delist(dataset[i].sequence_name) for i in range(len(dataset))}
+        self.assertEqual(len(unique_seqs), num_sequences)
+
+    def test_pick_exclude_sequencess(self, sequence="cat1_seq2"):
+        # pick sequence
+        dataset = SqlIndexDataset(
+            sqlite_metadata_file=METADATA_FILE,
+            remove_empty_masks=False,
+            pick_sequences=[sequence],
+            frame_data_builder_FrameDataBuilder_args=NO_BLOBS_KWARGS,
+        )
+
+        self.assertEqual(len(dataset), 10)
+        unique_seqs = {dataset[i].sequence_name for i in range(len(dataset))}
+        self.assertCountEqual(unique_seqs, {sequence})
+
+        item = dataset[sequence, 0]
+        self.assertEqual(item.sequence_name, sequence)
+        self.assertEqual(item.frame_number, 0)
+
+        # exclude sequence
+        dataset = SqlIndexDataset(
+            sqlite_metadata_file=METADATA_FILE,
+            remove_empty_masks=False,
+            exclude_sequences=[sequence],
+            frame_data_builder_FrameDataBuilder_args=NO_BLOBS_KWARGS,
+        )
+
+        self.assertEqual(len(dataset), 90)
+        unique_seqs = {dataset[i].sequence_name for i in range(len(dataset))}
+        self.assertNotIn(sequence, unique_seqs)
+
+        with self.assertRaises(IndexError):
+            dataset[sequence, 0]
+
+    def test_limit_frames(self, num_frames=13):
+        dataset = SqlIndexDataset(
+            sqlite_metadata_file=METADATA_FILE,
+            remove_empty_masks=False,
+            limit_to=num_frames,
+            frame_data_builder_FrameDataBuilder_args=NO_BLOBS_KWARGS,
+        )
+
+        self.assertEqual(len(dataset), num_frames)
+        unique_seqs = {dataset[i].sequence_name for i in range(len(dataset))}
+        self.assertEqual(len(unique_seqs), 2)
+
+        # test when the limit is not binding
+        dataset = SqlIndexDataset(
+            sqlite_metadata_file=METADATA_FILE,
+            remove_empty_masks=False,
+            limit_to=1000,
+            frame_data_builder_FrameDataBuilder_args=NO_BLOBS_KWARGS,
+        )
+
+        self.assertEqual(len(dataset), 100)
+
+    def test_limit_frames_per_sequence(self, num_frames=2):
+        dataset = SqlIndexDataset(
+            sqlite_metadata_file=METADATA_FILE,
+            remove_empty_masks=False,
+            n_frames_per_sequence=num_frames,
+            frame_data_builder_FrameDataBuilder_args=NO_BLOBS_KWARGS,
+        )
+
+        self.assertEqual(len(dataset), num_frames * 10)
+        seq_counts = Counter(dataset[i].sequence_name for i in range(len(dataset)))
+        self.assertEqual(len(seq_counts), 10)
+        self.assertCountEqual(
+            set(seq_counts.values()), {2}
+        )  # all counts are num_frames
+
+        with self.assertRaises(IndexError):
+            dataset[next(iter(seq_counts)), num_frames + 1]
+
+        # test when the limit is not binding
+        dataset = SqlIndexDataset(
+            sqlite_metadata_file=METADATA_FILE,
+            remove_empty_masks=False,
+            n_frames_per_sequence=13,
+            frame_data_builder_FrameDataBuilder_args=NO_BLOBS_KWARGS,
+        )
+        self.assertEqual(len(dataset), 100)
+
+    def test_limit_sequence_per_category(self, num_sequences=2):
+        dataset = SqlIndexDataset(
+            sqlite_metadata_file=METADATA_FILE,
+            remove_empty_masks=False,
+            limit_sequences_per_category_to=num_sequences,
+            frame_data_builder_FrameDataBuilder_args=NO_BLOBS_KWARGS,
+        )
+
+        self.assertEqual(len(dataset), num_sequences * 10 * 2)
+        seq_names = list(dataset.sequence_names())
+        self.assertEqual(len(seq_names), num_sequences * 2)
+        # check that we respect the row order
+        for seq_name in seq_names:
+            self.assertLess(int(seq_name[-1]), num_sequences)
+
+        # test when the limit is not binding
+        dataset = SqlIndexDataset(
+            sqlite_metadata_file=METADATA_FILE,
+            remove_empty_masks=False,
+            limit_sequences_per_category_to=13,
+            frame_data_builder_FrameDataBuilder_args=NO_BLOBS_KWARGS,
+        )
+        self.assertEqual(len(dataset), 100)
+
+    def test_filter_medley(self):
+        dataset = SqlIndexDataset(
+            sqlite_metadata_file=METADATA_FILE,
+            remove_empty_masks=True,
+            pick_categories=["cat1"],
+            exclude_sequences=["cat1_seq0"],  # retaining "cat1_seq1" and on
+            limit_sequences_to=2,  # retaining "cat1_seq1" and "cat1_seq2"
+            limit_to=14,  # retaining full "cat1_seq1" and 4 from "cat1_seq2"
+            n_frames_per_sequence=6,  # cutting "cat1_seq1" to 6 frames
+            frame_data_builder_FrameDataBuilder_args=NO_BLOBS_KWARGS,
+        )
+
+        # result: preserved 6 frames from cat1_seq1 and 4 from cat1_seq2
+        seq_counts = Counter(dataset[i].sequence_name for i in range(len(dataset)))
+        self.assertCountEqual(seq_counts.keys(), ["cat1_seq1", "cat1_seq2"])
+        self.assertEqual(seq_counts["cat1_seq1"], 6)
+        self.assertEqual(seq_counts["cat1_seq2"], 4)
+
+    def test_subsets_trivial(self):
+        dataset = SqlIndexDataset(
+            sqlite_metadata_file=METADATA_FILE,
+            remove_empty_masks=False,
+            subset_lists_file=SET_LIST_FILE,
+            limit_to=100,  # force sorting
+            subsets=["train", "test"],
+            frame_data_builder_FrameDataBuilder_args=NO_BLOBS_KWARGS,
+        )
+
+        self.assertEqual(len(dataset), 100)
+
+        # check the items are consecutive
+        past_sequences = set()
+        last_frame_number = -1
+        last_sequence = ""
+        for i in range(len(dataset)):
+            item = dataset[i]
+
+            if item.frame_number == 0:
+                self.assertNotIn(item.sequence_name, past_sequences)
+                past_sequences.add(item.sequence_name)
+                last_sequence = item.sequence_name
+            else:
+                self.assertEqual(item.sequence_name, last_sequence)
+                self.assertEqual(item.frame_number, last_frame_number + 1)
+
+            last_frame_number = item.frame_number
+
+    def test_subsets_filter_empty_masks(self):
+        # we need to test this case as it uses quite different logic with `df.drop()`
+        dataset = SqlIndexDataset(
+            sqlite_metadata_file=METADATA_FILE,
+            remove_empty_masks=True,
+            subset_lists_file=SET_LIST_FILE,
+            subsets=["train", "test"],
+            frame_data_builder_FrameDataBuilder_args=NO_BLOBS_KWARGS,
+        )
+
+        self.assertEqual(len(dataset), 78)
+
+    def test_subsets_pick_frames_sql_clause(self):
+        dataset_no_empty_masks = SqlIndexDataset(
+            sqlite_metadata_file=METADATA_FILE,
+            remove_empty_masks=True,
+            subset_lists_file=SET_LIST_FILE,
+            subsets=["train", "test"],
+            frame_data_builder_FrameDataBuilder_args=NO_BLOBS_KWARGS,
+        )
+
+        dataset = SqlIndexDataset(
+            sqlite_metadata_file=METADATA_FILE,
+            remove_empty_masks=False,
+            pick_frames_sql_clause="_mask_mass IS NULL OR _mask_mass > 0",
+            subset_lists_file=SET_LIST_FILE,
+            subsets=["train", "test"],
+            frame_data_builder_FrameDataBuilder_args=NO_BLOBS_KWARGS,
+        )
+
+        # check the datasets are equal
+        self.assertEqual(len(dataset), len(dataset_no_empty_masks))
+        for i in range(len(dataset)):
+            item_nem = dataset_no_empty_masks[i]
+            item = dataset[i]
+            self.assertEqual(item_nem.image_path, item.image_path)
+
+        # remove_empty_masks together with the custom criterion
+        dataset_ts = SqlIndexDataset(
+            sqlite_metadata_file=METADATA_FILE,
+            remove_empty_masks=True,
+            pick_frames_sql_clause="frame_timestamp < 0.15",
+            subset_lists_file=SET_LIST_FILE,
+            subsets=["train", "test"],
+            frame_data_builder_FrameDataBuilder_args=NO_BLOBS_KWARGS,
+        )
+
+        self.assertEqual(len(dataset_ts), 19)
+
+    def test_single_subset(self):
+        dataset = SqlIndexDataset(
+            sqlite_metadata_file=METADATA_FILE,
+            remove_empty_masks=False,
+            subset_lists_file=SET_LIST_FILE,
+            subsets=["train"],
+            frame_data_builder_FrameDataBuilder_args=NO_BLOBS_KWARGS,
+        )
+
+        self.assertEqual(len(dataset), 50)
+
+        with self.assertRaises(IndexError):
+            dataset[51]
+
+        # check the items are consecutive
+        past_sequences = set()
+        last_frame_number = -1
+        last_sequence = ""
+        for i in range(len(dataset)):
+            item = dataset[i]
+
+            if item.frame_number < 2:
+                self.assertNotIn(item.sequence_name, past_sequences)
+                past_sequences.add(item.sequence_name)
+                last_sequence = item.sequence_name
+            else:
+                self.assertEqual(item.sequence_name, last_sequence)
+                self.assertEqual(item.frame_number, last_frame_number + 2)
+
+            last_frame_number = item.frame_number
+
+        item = dataset[last_sequence, 0]
+        self.assertEqual(item.sequence_name, last_sequence)
+
+        with self.assertRaises(IndexError):
+            dataset[last_sequence, 1]
+
+    def test_subset_with_filters(self):
+        dataset = SqlIndexDataset(
+            sqlite_metadata_file=METADATA_FILE,
+            remove_empty_masks=True,
+            subset_lists_file=SET_LIST_FILE,
+            subsets=["train"],
+            pick_categories=["cat1"],
+            exclude_sequences=["cat1_seq0"],  # retaining "cat1_seq1" and on
+            limit_sequences_to=2,  # retaining "cat1_seq1" and "cat1_seq2"
+            limit_to=7,  # retaining full train set of "cat1_seq1" and 2 from "cat1_seq2"
+            n_frames_per_sequence=3,  # cutting "cat1_seq1" to 3 frames
+            frame_data_builder_FrameDataBuilder_args=NO_BLOBS_KWARGS,
+        )
+
+        # result: preserved 6 frames from cat1_seq1 and 4 from cat1_seq2
+        seq_counts = Counter(dataset[i].sequence_name for i in range(len(dataset)))
+        self.assertCountEqual(seq_counts.keys(), ["cat1_seq1", "cat1_seq2"])
+        self.assertEqual(seq_counts["cat1_seq1"], 3)
+        self.assertEqual(seq_counts["cat1_seq2"], 2)
+
+    def test_visitor(self):
+        dataset_sorted = SqlIndexDataset(
+            sqlite_metadata_file=METADATA_FILE,
+            remove_empty_masks=False,
+            frame_data_builder_FrameDataBuilder_args=NO_BLOBS_KWARGS,
+        )
+
+        sequences = dataset_sorted.sequence_names()
+        i = 0
+        for seq in sequences:
+            last_ts = float("-Inf")
+            for ts, _, idx in dataset_sorted.sequence_frames_in_order(seq):
+                self.assertEqual(i, idx)
+                i += 1
+                self.assertGreaterEqual(ts, last_ts)
+                last_ts = ts
+
+        # test legacy visitor
+        old_indices = None
+        for seq in sequences:
+            last_ts = float("-Inf")
+            rows = dataset_sorted._index.index.get_loc(seq)
+            indices = list(range(rows.start or 0, rows.stop, rows.step or 1))
+            fn_ts_list = dataset_sorted.get_frame_numbers_and_timestamps(indices)
+            self.assertEqual(len(fn_ts_list), len(indices))
+
+            if old_indices:
+                # check raising if we ask for multiple sequences
+                with self.assertRaises(ValueError):
+                    dataset_sorted.get_frame_numbers_and_timestamps(
+                        indices + old_indices
+                    )
+
+            old_indices = indices
+
+    def test_visitor_subsets(self):
+        dataset = SqlIndexDataset(
+            sqlite_metadata_file=METADATA_FILE,
+            remove_empty_masks=False,
+            limit_to=100,  # force sorting
+            subset_lists_file=SET_LIST_FILE,
+            subsets=["train", "test"],
+            frame_data_builder_FrameDataBuilder_args=NO_BLOBS_KWARGS,
+        )
+
+        sequences = dataset.sequence_names()
+        i = 0
+        for seq in sequences:
+            last_ts = float("-Inf")
+            seq_frames = list(dataset.sequence_frames_in_order(seq))
+            self.assertEqual(len(seq_frames), 10)
+            for ts, _, idx in seq_frames:
+                self.assertEqual(i, idx)
+                i += 1
+                self.assertGreaterEqual(ts, last_ts)
+                last_ts = ts
+
+            last_ts = float("-Inf")
+            train_frames = list(dataset.sequence_frames_in_order(seq, "train"))
+            self.assertEqual(len(train_frames), 5)
+            for ts, _, _ in train_frames:
+                self.assertGreaterEqual(ts, last_ts)
+                last_ts = ts
+
+    def test_category_to_sequence_names(self):
+        dataset = SqlIndexDataset(
+            sqlite_metadata_file=METADATA_FILE,
+            remove_empty_masks=False,
+            subset_lists_file=SET_LIST_FILE,
+            subsets=["train", "test"],
+            frame_data_builder_FrameDataBuilder_args=NO_BLOBS_KWARGS,
+        )
+
+        cat_to_seqs = dataset.category_to_sequence_names()
+        self.assertEqual(len(cat_to_seqs), 2)
+        self.assertIn("cat1", cat_to_seqs)
+        self.assertEqual(len(cat_to_seqs["cat1"]), 5)
+
+        # check that override preserves the behavior
+        cat_to_seqs_base = super(SqlIndexDataset, dataset).category_to_sequence_names()
+        self.assertDictEqual(cat_to_seqs, cat_to_seqs_base)
+
+    def test_category_to_sequence_names_filters(self):
+        dataset = SqlIndexDataset(
+            sqlite_metadata_file=METADATA_FILE,
+            remove_empty_masks=True,
+            subset_lists_file=SET_LIST_FILE,
+            exclude_sequences=["cat1_seq0"],
+            subsets=["train", "test"],
+            frame_data_builder_FrameDataBuilder_args=NO_BLOBS_KWARGS,
+        )
+
+        cat_to_seqs = dataset.category_to_sequence_names()
+        self.assertEqual(len(cat_to_seqs), 2)
+        self.assertIn("cat1", cat_to_seqs)
+        self.assertEqual(len(cat_to_seqs["cat1"]), 4)  # minus one
+
+        # check that override preserves the behavior
+        cat_to_seqs_base = super(SqlIndexDataset, dataset).category_to_sequence_names()
+        self.assertDictEqual(cat_to_seqs, cat_to_seqs_base)
+
+    def test_meta_access(self):
+        dataset = SqlIndexDataset(
+            sqlite_metadata_file=METADATA_FILE,
+            remove_empty_masks=False,
+            subset_lists_file=SET_LIST_FILE,
+            subsets=["train"],
+            frame_data_builder_FrameDataBuilder_args=NO_BLOBS_KWARGS,
+        )
+
+        self.assertEqual(len(dataset), 50)
+
+        for idx in [10, ("cat0_seq2", 2)]:
+            example_meta = dataset.meta[idx]
+            example = dataset[idx]
+            self.assertEqual(example_meta.sequence_name, example.sequence_name)
+            self.assertEqual(example_meta.frame_number, example.frame_number)
+            self.assertEqual(example_meta.frame_timestamp, example.frame_timestamp)
+            self.assertEqual(example_meta.sequence_category, example.sequence_category)
+            torch.testing.assert_close(example_meta.camera.R, example.camera.R)
+            torch.testing.assert_close(example_meta.camera.T, example.camera.T)
+            torch.testing.assert_close(
+                example_meta.camera.focal_length, example.camera.focal_length
+            )
+            torch.testing.assert_close(
+                example_meta.camera.principal_point, example.camera.principal_point
+            )
+
+    def test_meta_access_no_blobs(self):
+        dataset = SqlIndexDataset(
+            sqlite_metadata_file=METADATA_FILE,
+            remove_empty_masks=False,
+            subset_lists_file=SET_LIST_FILE,
+            subsets=["train"],
+            frame_data_builder_FrameDataBuilder_args={
+                "dataset_root": ".",
+                "box_crop": False,  # required by blob-less accessor
+            },
+        )
+
+        self.assertIsNone(dataset.meta[0].image_rgb)
+        self.assertIsNone(dataset.meta[0].fg_probability)
+        self.assertIsNone(dataset.meta[0].depth_map)
+        self.assertIsNone(dataset.meta[0].sequence_point_cloud)
+        self.assertIsNotNone(dataset.meta[0].camera)
diff --git a/pytorch3d/tests/implicitron/test_srn.py b/pytorch3d/tests/implicitron/test_srn.py
new file mode 100644
index 0000000000000000000000000000000000000000..311bbaa6dbfa86e579754a31a45adccacd3cc34e
--- /dev/null
+++ b/pytorch3d/tests/implicitron/test_srn.py
@@ -0,0 +1,121 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+
+import torch
+from pytorch3d.implicitron.models.generic_model import GenericModel
+from pytorch3d.implicitron.models.implicit_function.scene_representation_networks import (
+    SRNHyperNetImplicitFunction,
+    SRNImplicitFunction,
+    SRNPixelGenerator,
+)
+from pytorch3d.implicitron.models.renderer.ray_sampler import ImplicitronRayBundle
+from pytorch3d.implicitron.tools.config import get_default_args
+from pytorch3d.renderer import PerspectiveCameras
+
+from tests.common_testing import TestCaseMixin
+
+_BATCH_SIZE: int = 3
+_N_RAYS: int = 100
+_N_POINTS_ON_RAY: int = 10
+
+
+class TestSRN(TestCaseMixin, unittest.TestCase):
+    def setUp(self) -> None:
+        torch.manual_seed(42)
+        get_default_args(SRNHyperNetImplicitFunction)
+        get_default_args(SRNImplicitFunction)
+
+    def test_pixel_generator(self):
+        SRNPixelGenerator()
+
+    def _get_bundle(self, *, device) -> ImplicitronRayBundle:
+        origins = torch.rand(_BATCH_SIZE, _N_RAYS, 3, device=device)
+        directions = torch.rand(_BATCH_SIZE, _N_RAYS, 3, device=device)
+        lengths = torch.rand(_BATCH_SIZE, _N_RAYS, _N_POINTS_ON_RAY, device=device)
+        bundle = ImplicitronRayBundle(
+            lengths=lengths,
+            origins=origins,
+            directions=directions,
+            xys=None,
+            camera_ids=None,
+            camera_counts=None,
+        )
+        return bundle
+
+    def test_srn_implicit_function(self):
+        implicit_function = SRNImplicitFunction()
+        device = torch.device("cpu")
+        bundle = self._get_bundle(device=device)
+        rays_densities, rays_colors = implicit_function(ray_bundle=bundle)
+        out_features = implicit_function.raymarch_function.out_features
+        self.assertEqual(
+            rays_densities.shape,
+            (_BATCH_SIZE, _N_RAYS, _N_POINTS_ON_RAY, out_features),
+        )
+        self.assertIsNone(rays_colors)
+
+    def test_srn_hypernet_implicit_function(self):
+        # TODO investigate: If latent_dim_hypernet=0, why does this crash and dump core?
+        latent_dim_hypernet = 39
+        device = torch.device("cuda:0")
+        implicit_function = SRNHyperNetImplicitFunction(
+            latent_dim_hypernet=latent_dim_hypernet
+        )
+        implicit_function.to(device)
+        global_code = torch.rand(_BATCH_SIZE, latent_dim_hypernet, device=device)
+        bundle = self._get_bundle(device=device)
+        rays_densities, rays_colors = implicit_function(
+            ray_bundle=bundle, global_code=global_code
+        )
+        out_features = implicit_function.hypernet.out_features
+        self.assertEqual(
+            rays_densities.shape,
+            (_BATCH_SIZE, _N_RAYS, _N_POINTS_ON_RAY, out_features),
+        )
+        self.assertIsNone(rays_colors)
+
+    @torch.no_grad()
+    def test_lstm(self):
+        args = get_default_args(GenericModel)
+        args.render_image_height = 80
+        args.render_image_width = 80
+        args.implicit_function_class_type = "SRNImplicitFunction"
+        args.renderer_class_type = "LSTMRenderer"
+        args.raysampler_class_type = "NearFarRaySampler"
+        args.raysampler_NearFarRaySampler_args.n_pts_per_ray_training = 1
+        args.raysampler_NearFarRaySampler_args.n_pts_per_ray_evaluation = 1
+        args.renderer_LSTMRenderer_args.bg_color = [0.4, 0.4, 0.2]
+        gm = GenericModel(**args)
+
+        camera = PerspectiveCameras()
+        image = gm.forward(
+            camera=camera,
+            image_rgb=None,
+            fg_probability=None,
+            sequence_name="",
+            mask_crop=None,
+            depth_map=None,
+        )["images_render"]
+        self.assertEqual(image.shape, (1, 3, 80, 80))
+        self.assertGreater(image.max(), 0.8)
+
+        # Force everything to be background
+        pixel_generator = gm._implicit_functions[0]._fn.pixel_generator
+        pixel_generator._density_layer.weight.zero_()
+        pixel_generator._density_layer.bias.fill_(-1.0e6)
+
+        image = gm.forward(
+            camera=camera,
+            image_rgb=None,
+            fg_probability=None,
+            sequence_name="",
+            mask_crop=None,
+            depth_map=None,
+        )["images_render"]
+        self.assertConstant(image[:, :2], 0.4)
+        self.assertConstant(image[:, 2], 0.2)
diff --git a/pytorch3d/tests/implicitron/test_types.py b/pytorch3d/tests/implicitron/test_types.py
new file mode 100644
index 0000000000000000000000000000000000000000..aff749be56c77353caa3c9cffae8e19a257007e3
--- /dev/null
+++ b/pytorch3d/tests/implicitron/test_types.py
@@ -0,0 +1,97 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import dataclasses
+import unittest
+from typing import Dict, List, NamedTuple, Tuple
+
+from pytorch3d.implicitron.dataset import types
+from pytorch3d.implicitron.dataset.types import FrameAnnotation
+
+
+class _NT(NamedTuple):
+    annot: FrameAnnotation
+
+
+class TestDatasetTypes(unittest.TestCase):
+    def setUp(self):
+        self.entry = FrameAnnotation(
+            frame_number=23,
+            sequence_name="1",
+            frame_timestamp=1.2,
+            image=types.ImageAnnotation(path="/tmp/1.jpg", size=(224, 224)),
+            mask=types.MaskAnnotation(path="/tmp/1.png", mass=42.0),
+            viewpoint=types.ViewpointAnnotation(
+                R=(
+                    (1, 0, 0),
+                    (1, 0, 0),
+                    (1, 0, 0),
+                ),
+                T=(0, 0, 0),
+                principal_point=(100, 100),
+                focal_length=(200, 200),
+            ),
+        )
+
+    def test_asdict_rec(self):
+        first = [dataclasses.asdict(self.entry)]
+        second = types._asdict_rec([self.entry])
+        self.assertEqual(first, second)
+
+    def test_parsing(self):
+        """Test that we handle collections enclosing dataclasses."""
+
+        dct = dataclasses.asdict(self.entry)
+
+        parsed = types._dataclass_from_dict(dct, FrameAnnotation)
+        self.assertEqual(parsed, self.entry)
+
+        # namedtuple
+        parsed = types._dataclass_from_dict(_NT(dct), _NT)
+        self.assertEqual(parsed.annot, self.entry)
+
+        # tuple
+        parsed = types._dataclass_from_dict((dct,), Tuple[FrameAnnotation])
+        self.assertEqual(parsed, (self.entry,))
+
+        # list
+        parsed = types._dataclass_from_dict(
+            [
+                dct,
+            ],
+            List[FrameAnnotation],
+        )
+        self.assertEqual(
+            parsed,
+            [
+                self.entry,
+            ],
+        )
+
+        # dict
+        parsed = types._dataclass_from_dict({"key": dct}, Dict[str, FrameAnnotation])
+        self.assertEqual(parsed, {"key": self.entry})
+
+    def test_parsing_vectorized(self):
+        dct = dataclasses.asdict(self.entry)
+
+        self._compare_with_scalar(dct, FrameAnnotation)
+        self._compare_with_scalar(_NT(dct), _NT)
+        self._compare_with_scalar((dct,), Tuple[FrameAnnotation])
+        self._compare_with_scalar([dct], List[FrameAnnotation])
+        self._compare_with_scalar({"key": dct}, Dict[str, FrameAnnotation])
+
+        dct2 = dct.copy()
+        dct2["meta"] = {"aux": 76}
+        self._compare_with_scalar(dct2, FrameAnnotation)
+
+    def _compare_with_scalar(self, obj, typeannot, repeat=3):
+        input = [obj] * 3
+        vect_output = types._dataclass_list_from_dict_list(input, typeannot)
+        self.assertEqual(len(input), repeat)
+        gt = types._dataclass_from_dict(obj, typeannot)
+        self.assertTrue(all(res == gt for res in vect_output))
diff --git a/pytorch3d/tests/implicitron/test_viewsampling.py b/pytorch3d/tests/implicitron/test_viewsampling.py
new file mode 100644
index 0000000000000000000000000000000000000000..4094bf4ab7f762da027bdf7511feda23b72af119
--- /dev/null
+++ b/pytorch3d/tests/implicitron/test_viewsampling.py
@@ -0,0 +1,270 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import unittest
+
+import pytorch3d as pt3d
+import torch
+from pytorch3d.implicitron.models.view_pooler.view_sampler import ViewSampler
+from pytorch3d.implicitron.tools.config import expand_args_fields
+
+
+class TestViewsampling(unittest.TestCase):
+    def setUp(self):
+        torch.manual_seed(42)
+        expand_args_fields(ViewSampler)
+
+    def _init_view_sampler_problem(self, random_masks):
+        """
+        Generates a view-sampling problem:
+        - 4 source views, 1st/2nd from the first sequence 'seq1', the rest from 'seq2'
+        - 3 sets of 3D points from sequences 'seq1', 'seq2', 'seq2' respectively.
+            - first 50 points in each batch correctly project to the source views,
+                while the remaining 50 do not land in any projection plane.
+        - each source view is labeled with image feature tensors of shape 7x100x50,
+            where all elements of the n-th tensor are set to `n+1`.
+        - the elements of the source view masks are either set to random binary number
+            (if `random_masks==True`), or all set to 1 (`random_masks==False`).
+        - the source view cameras are uniformly distributed on a unit circle
+            in the x-z plane and look at (0,0,0).
+        """
+        seq_id_camera = ["seq1", "seq1", "seq2", "seq2"]
+        seq_id_pts = ["seq1", "seq2", "seq2"]
+        pts_batch = 3
+        n_pts = 100
+        n_views = 4
+        fdim = 7
+        H = 100
+        W = 50
+
+        # points that land into the projection planes of all cameras
+        pts_inside = (
+            torch.nn.functional.normalize(
+                torch.randn(pts_batch, n_pts // 2, 3, device="cuda"),
+                dim=-1,
+            )
+            * 0.1
+        )
+
+        # move the outside points far above the scene
+        pts_outside = pts_inside.clone()
+        pts_outside[:, :, 1] += 1e8
+        pts = torch.cat([pts_inside, pts_outside], dim=1)
+
+        R, T = pt3d.renderer.look_at_view_transform(
+            dist=1.0,
+            elev=0.0,
+            azim=torch.linspace(0, 360, n_views + 1)[:n_views],
+            degrees=True,
+            device=pts.device,
+        )
+        focal_length = R.new_ones(n_views, 2)
+        principal_point = R.new_zeros(n_views, 2)
+        camera = pt3d.renderer.PerspectiveCameras(
+            R=R,
+            T=T,
+            focal_length=focal_length,
+            principal_point=principal_point,
+            device=pts.device,
+        )
+
+        feats_map = torch.arange(n_views, device=pts.device, dtype=pts.dtype) + 1
+        feats = {"feats": feats_map[:, None, None, None].repeat(1, fdim, H, W)}
+
+        masks = (
+            torch.rand(n_views, 1, H, W, device=pts.device, dtype=pts.dtype) > 0.5
+        ).type_as(R)
+
+        if not random_masks:
+            masks[:] = 1.0
+
+        return pts, camera, feats, masks, seq_id_camera, seq_id_pts
+
+    def test_compare_with_naive(self):
+        """
+        Compares the outputs of the efficient ViewSampler module with a
+        naive implementation.
+        """
+
+        (
+            pts,
+            camera,
+            feats,
+            masks,
+            seq_id_camera,
+            seq_id_pts,
+        ) = self._init_view_sampler_problem(True)
+
+        for masked_sampling in (True, False):
+            feats_sampled_n, masks_sampled_n = _view_sample_naive(
+                pts,
+                seq_id_pts,
+                camera,
+                seq_id_camera,
+                feats,
+                masks,
+                masked_sampling,
+            )
+            # make sure we generate the constructor for ViewSampler
+            expand_args_fields(ViewSampler)
+            view_sampler = ViewSampler(masked_sampling=masked_sampling)
+            feats_sampled, masks_sampled = view_sampler(
+                pts=pts,
+                seq_id_pts=seq_id_pts,
+                camera=camera,
+                seq_id_camera=seq_id_camera,
+                feats=feats,
+                masks=masks,
+            )
+            for k in feats_sampled.keys():
+                self.assertTrue(torch.allclose(feats_sampled[k], feats_sampled_n[k]))
+            self.assertTrue(torch.allclose(masks_sampled, masks_sampled_n))
+
+    def test_viewsampling(self):
+        """
+        Generates a viewsampling problem with predictable outcome, and compares
+        the ViewSampler's output to the expected result.
+        """
+
+        (
+            pts,
+            camera,
+            feats,
+            masks,
+            seq_id_camera,
+            seq_id_pts,
+        ) = self._init_view_sampler_problem(False)
+
+        expand_args_fields(ViewSampler)
+
+        for masked_sampling in (True, False):
+
+            view_sampler = ViewSampler(masked_sampling=masked_sampling)
+
+            feats_sampled, masks_sampled = view_sampler(
+                pts=pts,
+                seq_id_pts=seq_id_pts,
+                camera=camera,
+                seq_id_camera=seq_id_camera,
+                feats=feats,
+                masks=masks,
+            )
+
+            n_views = camera.R.shape[0]
+            n_pts = pts.shape[1]
+            feat_dim = feats["feats"].shape[1]
+            pts_batch = pts.shape[0]
+            n_pts_away = n_pts // 2
+
+            for pts_i in range(pts_batch):
+                for view_i in range(n_views):
+                    if seq_id_pts[pts_i] != seq_id_camera[view_i]:
+                        # points / cameras come from different sequences
+                        gt_masks = pts.new_zeros(n_pts, 1)
+                        gt_feats = pts.new_zeros(n_pts, feat_dim)
+                    else:
+                        gt_masks = pts.new_ones(n_pts, 1)
+                        gt_feats = pts.new_ones(n_pts, feat_dim) * (view_i + 1)
+                        gt_feats[n_pts_away:] = 0.0
+                        if masked_sampling:
+                            gt_masks[n_pts_away:] = 0.0
+
+                    for k in feats_sampled:
+                        self.assertTrue(
+                            torch.allclose(
+                                feats_sampled[k][pts_i, view_i],
+                                gt_feats,
+                            )
+                        )
+                    self.assertTrue(
+                        torch.allclose(
+                            masks_sampled[pts_i, view_i],
+                            gt_masks,
+                        )
+                    )
+
+
+def _view_sample_naive(
+    pts,
+    seq_id_pts,
+    camera,
+    seq_id_camera,
+    feats,
+    masks,
+    masked_sampling,
+):
+    """
+    A naive implementation of the forward pass of ViewSampler.
+    Refer to ViewSampler's docstring for description of the arguments.
+    """
+
+    pts_batch = pts.shape[0]
+    n_views = camera.R.shape[0]
+    n_pts = pts.shape[1]
+
+    feats_sampled = [[[] for _ in range(n_views)] for _ in range(pts_batch)]
+    masks_sampled = [[[] for _ in range(n_views)] for _ in range(pts_batch)]
+
+    for pts_i in range(pts_batch):
+        for view_i in range(n_views):
+            if seq_id_pts[pts_i] != seq_id_camera[view_i]:
+                # points/cameras come from different sequences
+                feats_sampled_ = {
+                    k: f.new_zeros(n_pts, f.shape[1]) for k, f in feats.items()
+                }
+                masks_sampled_ = masks.new_zeros(n_pts, 1)
+            else:
+                # same sequence of pts and cameras -> sample
+                feats_sampled_, masks_sampled_ = _sample_one_view_naive(
+                    camera[view_i],
+                    pts[pts_i],
+                    {k: f[view_i] for k, f in feats.items()},
+                    masks[view_i],
+                    masked_sampling,
+                    sampling_mode="bilinear",
+                )
+            feats_sampled[pts_i][view_i] = feats_sampled_
+            masks_sampled[pts_i][view_i] = masks_sampled_
+
+    masks_sampled_cat = torch.stack([torch.stack(m) for m in masks_sampled])
+    feats_sampled_cat = {}
+    for k in feats_sampled[0][0].keys():
+        feats_sampled_cat[k] = torch.stack(
+            [torch.stack([f_[k] for f_ in f]) for f in feats_sampled]
+        )
+    return feats_sampled_cat, masks_sampled_cat
+
+
+def _sample_one_view_naive(
+    camera,
+    pts,
+    feats,
+    masks,
+    masked_sampling,
+    sampling_mode="bilinear",
+):
+    """
+    Sample a single source view.
+    """
+    proj_ndc = camera.transform_points(pts[None])[None, ..., :-1]  # 1 x 1 x n_pts x 2
+    feats_sampled = {
+        k: pt3d.renderer.ndc_grid_sample(f[None], proj_ndc, mode=sampling_mode).permute(
+            0, 3, 1, 2
+        )[0, :, :, 0]
+        for k, f in feats.items()
+    }  # n_pts x dim
+    if not masked_sampling:
+        n_pts = pts.shape[0]
+        masks_sampled = proj_ndc.new_ones(n_pts, 1)
+    else:
+        masks_sampled = pt3d.renderer.ndc_grid_sample(
+            masks[None],
+            proj_ndc,
+            mode=sampling_mode,
+            align_corners=False,
+        )[0, 0, 0, :][:, None]
+    return feats_sampled, masks_sampled
diff --git a/pytorch3d/tests/implicitron/test_voxel_grid_implicit_function.py b/pytorch3d/tests/implicitron/test_voxel_grid_implicit_function.py
new file mode 100644
index 0000000000000000000000000000000000000000..9727ba982adcfca916728d51bf280be59735988c
--- /dev/null
+++ b/pytorch3d/tests/implicitron/test_voxel_grid_implicit_function.py
@@ -0,0 +1,227 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import unittest
+
+import torch
+
+from omegaconf import DictConfig, OmegaConf
+from pytorch3d.implicitron.models.implicit_function.voxel_grid_implicit_function import (
+    VoxelGridImplicitFunction,
+)
+from pytorch3d.implicitron.models.renderer.base import ImplicitronRayBundle
+
+from pytorch3d.implicitron.tools.config import expand_args_fields, get_default_args
+from pytorch3d.renderer import ray_bundle_to_ray_points
+from tests.common_testing import TestCaseMixin
+
+
+class TestVoxelGridImplicitFunction(TestCaseMixin, unittest.TestCase):
+    def setUp(self) -> None:
+        torch.manual_seed(42)
+        expand_args_fields(VoxelGridImplicitFunction)
+
+    def _get_simple_implicit_function(self, scaffold_res=16):
+        default_cfg = get_default_args(VoxelGridImplicitFunction)
+        custom_cfg = DictConfig(
+            {
+                "voxel_grid_density_args": {
+                    "voxel_grid_FullResolutionVoxelGrid_args": {"n_features": 7}
+                },
+                "decoder_density_class_type": "ElementwiseDecoder",
+                "decoder_color_class_type": "MLPDecoder",
+                "decoder_color_MLPDecoder_args": {
+                    "network_args": {
+                        "n_layers": 2,
+                        "output_dim": 3,
+                        "hidden_dim": 128,
+                    }
+                },
+                "scaffold_resolution": (scaffold_res, scaffold_res, scaffold_res),
+            }
+        )
+        cfg = OmegaConf.merge(default_cfg, custom_cfg)
+        return VoxelGridImplicitFunction(**cfg)
+
+    def test_forward(self) -> None:
+        """
+        Test one forward of VoxelGridImplicitFunction.
+        """
+        func = self._get_simple_implicit_function()
+
+        n_grids, n_points = 10, 9
+        raybundle = ImplicitronRayBundle(
+            origins=torch.randn(n_grids, 2, 3, 3),
+            directions=torch.randn(n_grids, 2, 3, 3),
+            lengths=torch.randn(n_grids, 2, 3, n_points),
+            xys=0,
+        )
+        func(raybundle)
+
+    def test_scaffold_formation(self):
+        """
+        Test calculating the scaffold.
+
+        We define a custom density function and make the implicit function use it
+        After calculating the scaffold we compare the density of our custom
+        density function with densities from the scaffold.
+        """
+        device = "cuda" if torch.cuda.is_available() else "cpu"
+        func = self._get_simple_implicit_function().to(device)
+        func.scaffold_max_pool_kernel_size = 1
+
+        def new_density(points):
+            """
+            Density function which returns 1 if p>(0.5, 0.5, 0.5) or
+            p < (-0.5, -0.5, -0.5) else 0
+            """
+            inshape = points.shape
+            points = points.view(-1, 3)
+            out = []
+            for p in points:
+                if torch.all(p > 0.5) or torch.all(p < -0.5):
+                    out.append(torch.tensor([[1.0]]))
+                else:
+                    out.append(torch.tensor([[0.0]]))
+            return torch.cat(out).view(*inshape[:-1], 1).to(device)
+
+        func._get_density = new_density
+        func._get_scaffold(0)
+
+        points = torch.tensor(
+            [
+                [0, 0, 0],
+                [1, 1, 1],
+                [1, 0, 0],
+                [0.1, 0, 0],
+                [10, 1, -1],
+                [-0.8, -0.7, -0.9],
+            ]
+        ).to(device)
+        expected = new_density(points).float().to(device)
+        assert torch.allclose(func.voxel_grid_scaffold(points), expected), (
+            func.voxel_grid_scaffold(points),
+            expected,
+        )
+
+    def test_scaffold_filtering(self, n_test_points=100):
+        """
+        Test that filtering points with scaffold works.
+
+        We define a scaffold and make the implicit function use it. We also
+        define new density and color functions which check that all passed
+        points are not in empty space (with scaffold function). In the end
+        we compare the result from the implicit function with one calculated
+        simple python, this checks that the points were merged correectly.
+        """
+        device = "cuda"
+        func = self._get_simple_implicit_function().to(device)
+
+        def scaffold(points):
+            """'
+            Function to deterministically and randomly enough assign a point
+            to empty or occupied space.
+            Return 1 if second digit of sum after 0 is odd else 0
+            """
+            return (
+                ((points.sum(dim=-1, keepdim=True) * 10**2 % 10).long() % 2) == 1
+            ).float()
+
+        def new_density(points):
+            # check if all passed points should be passed here
+            assert torch.all(scaffold(points)), (scaffold(points), points.shape)
+            return points.sum(dim=-1, keepdim=True)
+
+        def new_color(points, camera, directions, non_empty_points, num_points_per_ray):
+            # check if all passed points should be passed here
+            assert torch.all(scaffold(points))  # , (scaffold(points), points)
+            return points * 2
+
+        # check both computation paths that they contain only points
+        # which are not in empty space
+        func._get_density = new_density
+        func._get_color = new_color
+        func.voxel_grid_scaffold.forward = scaffold
+        func._scaffold_ready = True
+
+        bundle = ImplicitronRayBundle(
+            origins=torch.rand((n_test_points, 2, 1, 3), device=device),
+            directions=torch.rand((n_test_points, 2, 1, 3), device=device),
+            lengths=torch.rand((n_test_points, 2, 1, 4), device=device),
+            xys=None,
+        )
+        points = ray_bundle_to_ray_points(bundle)
+        result_density, result_color, _ = func(bundle)
+
+        # construct the wanted result 'by hand'
+        flat_points = points.view(-1, 3)
+        expected_result_density, expected_result_color = [], []
+        for point in flat_points:
+            if scaffold(point) == 1:
+                expected_result_density.append(point.sum(dim=-1, keepdim=True))
+                expected_result_color.append(point * 2)
+            else:
+                expected_result_density.append(point.new_zeros((1,)))
+                expected_result_color.append(point.new_zeros((3,)))
+        expected_result_density = torch.stack(expected_result_density, dim=0).view(
+            *points.shape[:-1], 1
+        )
+        expected_result_color = torch.stack(expected_result_color, dim=0).view(
+            *points.shape[:-1], 3
+        )
+
+        # check that thre result is expected
+        assert torch.allclose(result_density, expected_result_density), (
+            result_density,
+            expected_result_density,
+        )
+        assert torch.allclose(result_color, expected_result_color), (
+            result_color,
+            expected_result_color,
+        )
+
+    def test_cropping(self, scaffold_res=9):
+        """
+        Tests whether implicit function finds the bounding box of the object and sends
+        correct min and max points to voxel grids for rescaling.
+        """
+        device = "cuda" if torch.cuda.is_available() else "cpu"
+        func = self._get_simple_implicit_function(scaffold_res=scaffold_res).to(device)
+
+        assert scaffold_res >= 8
+        div = (scaffold_res - 1) / 2
+        true_min_point = torch.tensor(
+            [-3 / div, 0 / div, -3 / div],
+            device=device,
+        )
+        true_max_point = torch.tensor(
+            [1 / div, 2 / div, 3 / div],
+            device=device,
+        )
+
+        def new_scaffold(points):
+            # 1 if between true_min and true_max point else 0
+            # return points.new_ones((*points.shape[:-1], 1))
+            return (
+                torch.logical_and(true_min_point <= points, points <= true_max_point)
+                .all(dim=-1)
+                .float()[..., None]
+            )
+
+        called_crop = []
+
+        def assert_min_max_points(min_point, max_point):
+            called_crop.append(1)
+            self.assertClose(min_point, true_min_point)
+            self.assertClose(max_point, true_max_point)
+
+        func.voxel_grid_density.crop_self = assert_min_max_points
+        func.voxel_grid_color.crop_self = assert_min_max_points
+        func.voxel_grid_scaffold.forward = new_scaffold
+        func._scaffold_ready = True
+        func._crop(epoch=0)
+        assert len(called_crop) == 2
diff --git a/pytorch3d/tests/implicitron/test_voxel_grids.py b/pytorch3d/tests/implicitron/test_voxel_grids.py
new file mode 100644
index 0000000000000000000000000000000000000000..2b47de7f952e7848a33898457936516c82bf92f8
--- /dev/null
+++ b/pytorch3d/tests/implicitron/test_voxel_grids.py
@@ -0,0 +1,860 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import unittest
+from typing import Optional, Tuple
+
+import torch
+from omegaconf import DictConfig, OmegaConf
+
+from pytorch3d.implicitron.models.implicit_function.utils import (
+    interpolate_line,
+    interpolate_plane,
+    interpolate_volume,
+)
+from pytorch3d.implicitron.models.implicit_function.voxel_grid import (
+    CPFactorizedVoxelGrid,
+    FullResolutionVoxelGrid,
+    VMFactorizedVoxelGrid,
+    VoxelGridModule,
+)
+
+from pytorch3d.implicitron.tools.config import expand_args_fields, get_default_args
+from tests.common_testing import TestCaseMixin
+
+
+class TestVoxelGrids(TestCaseMixin, unittest.TestCase):
+    """
+    Tests Voxel grids, tests them by setting all elements to zero (after retrieving
+    they should also return zero) and by setting all of the elements to one and
+    getting the result. Also tests the interpolation by 'manually' interpolating
+    one by one sample and comparing with the batched implementation.
+    """
+
+    def get_random_normalized_points(
+        self, n_grids, n_points=None, dimension=3
+    ) -> torch.Tensor:
+        middle_shape = torch.randint(1, 4, tuple(torch.randint(1, 5, size=(1,))))
+        # create random query points
+        return (
+            torch.rand(
+                n_grids, *(middle_shape if n_points is None else [n_points]), dimension
+            )
+            * 2
+            - 1
+        )
+
+    def _test_query_with_constant_init_cp(
+        self,
+        n_grids: int,
+        n_features: int,
+        n_components: int,
+        resolution: Tuple[int],
+        value: float = 1,
+    ) -> None:
+        # set everything to 'value' and do query for elementsthe result should
+        # be of shape (n_grids, n_points, n_features) and be filled with n_components
+        # * value
+        grid = CPFactorizedVoxelGrid(
+            resolution_changes={0: resolution},
+            n_components=n_components,
+            n_features=n_features,
+        )
+        shapes = grid.get_shapes(epoch=0)
+
+        params = grid.values_type(
+            **{k: torch.ones(n_grids, *shapes[k]) * value for k in shapes}
+        )
+        points = self.get_random_normalized_points(n_grids)
+        assert torch.allclose(
+            grid.evaluate_local(points, params),
+            torch.ones(n_grids, *points.shape[1:-1], n_features) * n_components * value,
+            rtol=0.0001,
+        )
+
+    def _test_query_with_constant_init_vm(
+        self,
+        n_grids: int,
+        n_features: int,
+        resolution: Tuple[int],
+        n_components: Optional[int] = None,
+        distribution: Optional[Tuple[int]] = None,
+        value: float = 1,
+        n_points: int = 1,
+    ) -> None:
+        # set everything to 'value' and do query for elements
+        grid = VMFactorizedVoxelGrid(
+            n_features=n_features,
+            resolution_changes={0: resolution},
+            n_components=n_components,
+            distribution_of_components=distribution,
+        )
+        shapes = grid.get_shapes(epoch=0)
+        params = grid.values_type(
+            **{k: torch.ones(n_grids, *shapes[k]) * value for k in shapes}
+        )
+
+        expected_element = (
+            n_components * value if distribution is None else sum(distribution) * value
+        )
+        points = self.get_random_normalized_points(n_grids)
+        assert torch.allclose(
+            grid.evaluate_local(points, params),
+            torch.ones(n_grids, *points.shape[1:-1], n_features) * expected_element,
+        )
+
+    def _test_query_with_constant_init_full(
+        self,
+        n_grids: int,
+        n_features: int,
+        resolution: Tuple[int],
+        value: int = 1,
+        n_points: int = 1,
+    ) -> None:
+        # set everything to 'value' and do query for elements
+        grid = FullResolutionVoxelGrid(
+            n_features=n_features, resolution_changes={0: resolution}
+        )
+        shapes = grid.get_shapes(epoch=0)
+        params = grid.values_type(
+            **{k: torch.ones(n_grids, *shapes[k]) * value for k in shapes}
+        )
+
+        expected_element = value
+        points = self.get_random_normalized_points(n_grids)
+        assert torch.allclose(
+            grid.evaluate_local(points, params),
+            torch.ones(n_grids, *points.shape[1:-1], n_features) * expected_element,
+        )
+
+    def test_query_with_constant_init(self):
+        with self.subTest("Full"):
+            self._test_query_with_constant_init_full(
+                n_grids=5, n_features=6, resolution=(3, 4, 5)
+            )
+        with self.subTest("Full with 1 in dimensions"):
+            self._test_query_with_constant_init_full(
+                n_grids=5, n_features=1, resolution=(33, 41, 1)
+            )
+        with self.subTest("CP"):
+            self._test_query_with_constant_init_cp(
+                n_grids=5,
+                n_features=6,
+                n_components=7,
+                resolution=(3, 4, 5),
+            )
+        with self.subTest("CP with 1 in dimensions"):
+            self._test_query_with_constant_init_cp(
+                n_grids=2,
+                n_features=1,
+                n_components=3,
+                resolution=(3, 1, 1),
+            )
+        with self.subTest("VM with symetric distribution"):
+            self._test_query_with_constant_init_vm(
+                n_grids=6,
+                n_features=9,
+                resolution=(2, 12, 2),
+                n_components=12,
+            )
+        with self.subTest("VM with distribution"):
+            self._test_query_with_constant_init_vm(
+                n_grids=5,
+                n_features=1,
+                resolution=(5, 9, 7),
+                distribution=(33, 41, 1),
+            )
+
+    def test_query_with_zero_init(self):
+        with self.subTest("Query testing with zero init CPFactorizedVoxelGrid"):
+            self._test_query_with_constant_init_cp(
+                n_grids=5,
+                n_features=6,
+                n_components=7,
+                resolution=(3, 2, 5),
+                value=0,
+            )
+        with self.subTest("Query testing with zero init VMFactorizedVoxelGrid"):
+            self._test_query_with_constant_init_vm(
+                n_grids=2,
+                n_features=9,
+                resolution=(2, 11, 3),
+                n_components=3,
+                value=0,
+            )
+        with self.subTest("Query testing with zero init FullResolutionVoxelGrid"):
+            self._test_query_with_constant_init_full(
+                n_grids=4, n_features=2, resolution=(3, 3, 5), value=0
+            )
+
+    def setUp(self):
+        torch.manual_seed(42)
+        expand_args_fields(FullResolutionVoxelGrid)
+        expand_args_fields(CPFactorizedVoxelGrid)
+        expand_args_fields(VMFactorizedVoxelGrid)
+        expand_args_fields(VoxelGridModule)
+
+    def _interpolate_1D(
+        self, points: torch.Tensor, vectors: torch.Tensor
+    ) -> torch.Tensor:
+        """
+        interpolate vector from points, which are (batch, 1) and individual point is in [-1, 1]
+        """
+        result = []
+        _, _, width = vectors.shape
+        # transform from [-1, 1] to [0, width-1]
+        points = (points + 1) / 2 * (width - 1)
+        for vector, row in zip(vectors, points):
+            newrow = []
+            for x in row:
+                xf, xc = int(torch.floor(x)), int(torch.ceil(x))
+                itemf, itemc = vector[:, xf], vector[:, xc]
+                tmp = itemf * (xc - x) + itemc * (x - xf)
+                newrow.append(tmp[None, None, :])
+            result.append(torch.cat(newrow, dim=1))
+        return torch.cat(result)
+
+    def _interpolate_2D(
+        self, points: torch.Tensor, matrices: torch.Tensor
+    ) -> torch.Tensor:
+        """
+        interpolate matrix from points, which are (batch, 2) and individual point is in [-1, 1]
+        """
+        result = []
+        n_grids, _, width, height = matrices.shape
+        points = (points + 1) / 2 * (torch.tensor([[[width, height]]]) - 1)
+        for matrix, row in zip(matrices, points):
+            newrow = []
+            for x, y in row:
+                xf, xc = int(torch.floor(x)), int(torch.ceil(x))
+                yf, yc = int(torch.floor(y)), int(torch.ceil(y))
+                itemff, itemfc = matrix[:, xf, yf], matrix[:, xf, yc]
+                itemcf, itemcc = matrix[:, xc, yf], matrix[:, xc, yc]
+                itemf = itemff * (xc - x) + itemcf * (x - xf)
+                itemc = itemfc * (xc - x) + itemcc * (x - xf)
+                tmp = itemf * (yc - y) + itemc * (y - yf)
+                newrow.append(tmp[None, None, :])
+            result.append(torch.cat(newrow, dim=1))
+        return torch.cat(result)
+
+    def _interpolate_3D(
+        self, points: torch.Tensor, tensors: torch.Tensor
+    ) -> torch.Tensor:
+        """
+        interpolate tensors from points, which are (batch, 3) and individual point is in [-1, 1]
+        """
+        result = []
+        _, _, width, height, depth = tensors.shape
+        batch_normalized_points = (
+            (points + 1) / 2 * (torch.tensor([[[width, height, depth]]]) - 1)
+        )
+        batch_points = points
+
+        for tensor, points, normalized_points in zip(
+            tensors, batch_points, batch_normalized_points
+        ):
+            newrow = []
+            for (x, y, z), (_, _, nz) in zip(points, normalized_points):
+                zf, zc = int(torch.floor(nz)), int(torch.ceil(nz))
+                itemf = self._interpolate_2D(
+                    points=torch.tensor([[[x, y]]]), matrices=tensor[None, :, :, :, zf]
+                )
+                itemc = self._interpolate_2D(
+                    points=torch.tensor([[[x, y]]]), matrices=tensor[None, :, :, :, zc]
+                )
+                tmp = self._interpolate_1D(
+                    points=torch.tensor([[[z]]]),
+                    vectors=torch.cat((itemf, itemc), dim=1).permute(0, 2, 1),
+                )
+                newrow.append(tmp)
+            result.append(torch.cat(newrow, dim=1))
+        return torch.cat(result)
+
+    def test_interpolation(self):
+
+        with self.subTest("1D interpolation"):
+            points = self.get_random_normalized_points(
+                n_grids=4, n_points=5, dimension=1
+            )
+            vector = torch.randn(size=(4, 3, 2))
+            assert torch.allclose(
+                self._interpolate_1D(points, vector),
+                interpolate_line(
+                    points,
+                    vector,
+                    align_corners=True,
+                    padding_mode="zeros",
+                    mode="bilinear",
+                ),
+                rtol=0.0001,
+                atol=0.0001,
+            )
+        with self.subTest("2D interpolation"):
+            points = self.get_random_normalized_points(
+                n_grids=4, n_points=5, dimension=2
+            )
+            matrix = torch.randn(size=(4, 2, 3, 5))
+            assert torch.allclose(
+                self._interpolate_2D(points, matrix),
+                interpolate_plane(
+                    points,
+                    matrix,
+                    align_corners=True,
+                    padding_mode="zeros",
+                    mode="bilinear",
+                ),
+                rtol=0.0001,
+                atol=0.0001,
+            )
+
+        with self.subTest("3D interpolation"):
+            points = self.get_random_normalized_points(
+                n_grids=4, n_points=5, dimension=3
+            )
+            tensor = torch.randn(size=(4, 5, 2, 7, 2))
+            assert torch.allclose(
+                self._interpolate_3D(points, tensor),
+                interpolate_volume(
+                    points,
+                    tensor,
+                    align_corners=True,
+                    padding_mode="zeros",
+                    mode="bilinear",
+                ),
+                rtol=0.0001,
+                atol=0.0001,
+            )
+
+    def test_floating_point_query(self):
+        """
+        test querying the voxel grids on some float positions
+        """
+        with self.subTest("FullResolution"):
+            grid = FullResolutionVoxelGrid(
+                n_features=1, resolution_changes={0: (1, 1, 1)}
+            )
+            params = grid.values_type(**grid.get_shapes(epoch=0))
+            params.voxel_grid = torch.tensor(
+                [
+                    [
+                        [[[1, 3], [5, 7]], [[9, 11], [13, 15]]],
+                        [[[2, 4], [6, 8]], [[10, 12], [14, 16]]],
+                    ],
+                    [
+                        [[[17, 18], [19, 20]], [[21, 22], [23, 24]]],
+                        [[[25, 26], [27, 28]], [[29, 30], [31, 32]]],
+                    ],
+                ],
+                dtype=torch.float,
+            )
+            points = (
+                torch.tensor(
+                    [
+                        [
+                            [1, 0, 1],
+                            [0.5, 1, 1],
+                            [1 / 3, 1 / 3, 2 / 3],
+                        ],
+                        [
+                            [0, 1, 1],
+                            [0, 0.5, 1],
+                            [1 / 4, 1 / 4, 3 / 4],
+                        ],
+                    ]
+                )
+                / torch.tensor([[1.0, 1, 1]])
+                * 2
+                - 1
+            )
+            expected_result = torch.tensor(
+                [
+                    [[11, 12], [11, 12], [6.333333, 7.3333333]],
+                    [[20, 28], [19, 27], [19.25, 27.25]],
+                ]
+            )
+
+            assert torch.allclose(
+                grid.evaluate_local(points, params),
+                expected_result,
+                rtol=0.0001,
+                atol=0.0001,
+            ), grid.evaluate_local(points, params)
+        with self.subTest("CP"):
+            grid = CPFactorizedVoxelGrid(
+                n_features=1, resolution_changes={0: (1, 1, 1)}, n_components=3
+            )
+            params = grid.values_type(**grid.get_shapes(epoch=0))
+            params.vector_components_x = torch.tensor(
+                [
+                    [[1, 2], [10.5, 20.5]],
+                    [[10, 20], [2, 4]],
+                ]
+            )
+            params.vector_components_y = torch.tensor(
+                [
+                    [[3, 4, 5], [30.5, 40.5, 50.5]],
+                    [[30, 40, 50], [1, 3, 5]],
+                ]
+            )
+            params.vector_components_z = torch.tensor(
+                [
+                    [[6, 7, 8, 9], [60.5, 70.5, 80.5, 90.5]],
+                    [[60, 70, 80, 90], [6, 7, 8, 9]],
+                ]
+            )
+            params.basis_matrix = torch.tensor(
+                [
+                    [[2.0], [2.0]],
+                    [[1.0], [2.0]],
+                ]
+            )
+            points = (
+                torch.tensor(
+                    [
+                        [
+                            [0, 2, 2],
+                            [1, 2, 0.25],
+                            [0.5, 0.5, 1],
+                            [1 / 3, 2 / 3, 2 + 1 / 3],
+                        ],
+                        [
+                            [1, 0, 1],
+                            [0.5, 2, 2],
+                            [1, 0.5, 0.5],
+                            [1 / 4, 3 / 4, 2 + 1 / 4],
+                        ],
+                    ]
+                )
+                / torch.tensor([[[1.0, 2, 3]]])
+                * 2
+                - 1
+            )
+            expected_result_matrix = torch.tensor(
+                [
+                    [[85450.25], [130566.5], [77658.75], [86285.422]],
+                    [[42056], [60240], [45604], [38775]],
+                ]
+            )
+            expected_result_sum = torch.tensor(
+                [
+                    [[42725.125], [65283.25], [38829.375], [43142.711]],
+                    [[42028], [60120], [45552], [38723.4375]],
+                ]
+            )
+            with self.subTest("CP with basis_matrix reduction"):
+                assert torch.allclose(
+                    grid.evaluate_local(points, params),
+                    expected_result_matrix,
+                    rtol=0.0001,
+                    atol=0.0001,
+                )
+            del params.basis_matrix
+            with self.subTest("CP with sum reduction"):
+                assert torch.allclose(
+                    grid.evaluate_local(points, params),
+                    expected_result_sum,
+                    rtol=0.0001,
+                    atol=0.0001,
+                )
+
+        with self.subTest("VM"):
+            grid = VMFactorizedVoxelGrid(
+                n_features=1, resolution_changes={0: (1, 1, 1)}, n_components=3
+            )
+            params = VMFactorizedVoxelGrid.values_type(**grid.get_shapes(epoch=0))
+            params.matrix_components_xy = torch.tensor(
+                [
+                    [[[1, 2], [3, 4]], [[19, 20], [21, 22.0]]],
+                    [[[35, 36], [37, 38]], [[39, 40], [41, 42]]],
+                ]
+            )
+            params.matrix_components_xz = torch.tensor(
+                [
+                    [[[7, 8], [9, 10]], [[25, 26], [27, 28.0]]],
+                    [[[43, 44], [45, 46]], [[47, 48], [49, 50]]],
+                ]
+            )
+            params.matrix_components_yz = torch.tensor(
+                [
+                    [[[13, 14], [15, 16]], [[31, 32], [33, 34.0]]],
+                    [[[51, 52], [53, 54]], [[55, 56], [57, 58.0]]],
+                ]
+            )
+
+            params.vector_components_z = torch.tensor(
+                [
+                    [[5, 6], [23, 24.0]],
+                    [[59, 60], [61, 62]],
+                ]
+            )
+            params.vector_components_y = torch.tensor(
+                [
+                    [[11, 12], [29, 30.0]],
+                    [[63, 64], [65, 66]],
+                ]
+            )
+            params.vector_components_x = torch.tensor(
+                [
+                    [[17, 18], [35, 36.0]],
+                    [[67, 68], [69, 70.0]],
+                ]
+            )
+
+            params.basis_matrix = torch.tensor(
+                [
+                    [2, 2, 2, 2, 2, 2.0],
+                    [1, 2, 1, 2, 1, 2.0],
+                ]
+            )[:, :, None]
+            points = (
+                torch.tensor(
+                    [
+                        [
+                            [1, 0, 1],
+                            [0.5, 1, 1],
+                            [1 / 3, 1 / 3, 2 / 3],
+                        ],
+                        [
+                            [0, 1, 0],
+                            [0, 0, 0],
+                            [0, 1, 0],
+                        ],
+                    ]
+                )
+                / torch.tensor([[[1.0, 1, 1]]])
+                * 2
+                - 1
+            )
+            expected_result_matrix = torch.tensor(
+                [
+                    [[5696], [5854], [5484.888]],
+                    [[27377], [26649], [27377]],
+                ]
+            )
+            expected_result_sum = torch.tensor(
+                [
+                    [[2848], [2927], [2742.444]],
+                    [[17902], [17420], [17902]],
+                ]
+            )
+            with self.subTest("VM with basis_matrix reduction"):
+                assert torch.allclose(
+                    grid.evaluate_local(points, params),
+                    expected_result_matrix,
+                    rtol=0.0001,
+                    atol=0.0001,
+                )
+            del params.basis_matrix
+            with self.subTest("VM with sum reduction"):
+                assert torch.allclose(
+                    grid.evaluate_local(points, params),
+                    expected_result_sum,
+                    rtol=0.0001,
+                    atol=0.0001,
+                ), grid.evaluate_local(points, params)
+
+    def test_forward_with_small_init_std(self):
+        """
+        Test does the grid return small values if it is initialized with small
+        mean and small standard deviation.
+        """
+
+        def test(cls, **kwargs):
+            with self.subTest(cls.__name__):
+                n_grids = 3
+                grid = cls(**kwargs)
+                shapes = grid.get_shapes(epoch=0)
+                params = cls.values_type(
+                    **{
+                        k: torch.normal(mean=torch.zeros(n_grids, *shape), std=0.0001)
+                        for k, shape in shapes.items()
+                    }
+                )
+                points = self.get_random_normalized_points(n_grids=n_grids, n_points=3)
+                max_expected_result = torch.zeros((len(points), 10)) + 1e-2
+                assert torch.all(
+                    grid.evaluate_local(points, params) < max_expected_result
+                )
+
+        test(
+            FullResolutionVoxelGrid,
+            resolution_changes={0: (4, 6, 9)},
+            n_features=10,
+        )
+        test(
+            CPFactorizedVoxelGrid,
+            resolution_changes={0: (4, 6, 9)},
+            n_features=10,
+            n_components=3,
+        )
+        test(
+            VMFactorizedVoxelGrid,
+            resolution_changes={0: (4, 6, 9)},
+            n_features=10,
+            n_components=3,
+        )
+
+    def test_voxel_grid_module_location(self, n_times=10):
+        """
+        This checks the module uses locator correctly etc..
+
+        If we know that voxel grids work for (x, y, z) in local coordinates
+        to test if the VoxelGridModule does not have permuted dimensions we
+        create local coordinates, pass them through verified voxelgrids and
+        compare the result with the result that we get when we convert
+        coordinates to world and pass them through the VoxelGridModule
+        """
+        for _ in range(n_times):
+            extents = tuple(torch.randint(1, 50, size=(3,)).tolist())
+
+            grid = VoxelGridModule(extents=extents)
+            local_point = torch.rand(1, 3) * 2 - 1
+            world_point = local_point * torch.tensor(extents) / 2
+            grid_values = grid.voxel_grid.values_type(**grid.params)
+
+            assert torch.allclose(
+                grid(world_point)[0, 0],
+                grid.voxel_grid.evaluate_local(local_point[None], grid_values)[0, 0, 0],
+                rtol=0.0001,
+                atol=0.0001,
+            )
+
+    def test_resolution_change(self, n_times=10):
+        for _ in range(n_times):
+            n_grids, n_features, n_components = torch.randint(1, 3, (3,)).tolist()
+            resolution = torch.randint(3, 10, (3,)).tolist()
+            resolution2 = torch.randint(3, 10, (3,)).tolist()
+            resolution_changes = {0: resolution, 1: resolution2}
+            n_components *= 3
+            for cls, kwargs in (
+                (
+                    FullResolutionVoxelGrid,
+                    {
+                        "n_features": n_features,
+                        "resolution_changes": resolution_changes,
+                    },
+                ),
+                (
+                    CPFactorizedVoxelGrid,
+                    {
+                        "n_features": n_features,
+                        "resolution_changes": resolution_changes,
+                        "n_components": n_components,
+                    },
+                ),
+                (
+                    VMFactorizedVoxelGrid,
+                    {
+                        "n_features": n_features,
+                        "resolution_changes": resolution_changes,
+                        "n_components": n_components,
+                    },
+                ),
+            ):
+                with self.subTest(cls.__name__):
+                    grid = cls(**kwargs)
+                    self.assertEqual(grid.get_resolution(epoch=0), resolution)
+                    shapes = grid.get_shapes(epoch=0)
+                    params = {
+                        name: torch.randn((n_grids, *shape))
+                        for name, shape in shapes.items()
+                    }
+                    grid_values = grid.values_type(**params)
+                    grid_values_changed_resolution, change = grid.change_resolution(
+                        epoch=1,
+                        grid_values=grid_values,
+                        mode="linear",
+                    )
+                    assert change
+                    self.assertEqual(grid.get_resolution(epoch=1), resolution2)
+                    shapes_changed_resolution = grid.get_shapes(epoch=1)
+                    for name, expected_shape in shapes_changed_resolution.items():
+                        shape = getattr(grid_values_changed_resolution, name).shape
+                        self.assertEqual(expected_shape, shape[1:])
+
+        with self.subTest("VoxelGridModule"):
+            n_changes = 10
+            grid = VoxelGridModule()
+            resolution_changes = {i: (i + 2, i + 2, i + 2) for i in range(n_changes)}
+            grid.voxel_grid = FullResolutionVoxelGrid(
+                resolution_changes=resolution_changes
+            )
+            epochs, apply_func = grid.subscribe_to_epochs()
+            self.assertEqual(list(range(n_changes)), list(epochs))
+            for epoch in epochs:
+                change = apply_func(epoch)
+                assert change
+                self.assertEqual(
+                    resolution_changes[epoch],
+                    grid.voxel_grid.get_resolution(epoch=epoch),
+                )
+
+    def _get_min_max_tuple(
+        self, n=4, denominator_base=2, max_exponent=6, add_edge_cases=True
+    ):
+        if add_edge_cases:
+            n -= 2
+
+        def get_pair():
+            def get_one():
+                sign = -1 if torch.rand((1,)) < 0.5 else 1
+                exponent = int(torch.randint(1, max_exponent, (1,)))
+                denominator = denominator_base**exponent
+                numerator = int(torch.randint(1, denominator, (1,)))
+                return sign * numerator / denominator * 1.0
+
+            while True:
+                a, b = get_one(), get_one()
+                if a < b:
+                    return a, b
+
+        for _ in range(n):
+            a, b, c = get_pair(), get_pair(), get_pair()
+            yield torch.tensor((a[0], b[0], c[0])), torch.tensor((a[1], b[1], c[1]))
+        if add_edge_cases:
+            yield torch.tensor((-1.0, -1.0, -1.0)), torch.tensor((1.0, 1.0, 1.0))
+            yield torch.tensor([0.0, 0.0, 0.0]), torch.tensor([1.0, 1.0, 1.0])
+
+    def test_cropping_voxel_grids(self, n_times=1):
+        """
+        If the grid is 1d and we crop at A and B
+        ---------A---------B---
+        and choose point p between them
+        ---------A-----p---B---
+        it can be represented as
+        p = A + (B-A) * p_c
+        where p_c is local coordinate of p in cropped grid. So we now just see
+        if grid evaluated at p and cropped grid evaluated at p_c agree.
+        """
+        for points_min, points_max in self._get_min_max_tuple(n=10):
+            n_grids, n_features, n_components = torch.randint(1, 3, (3,)).tolist()
+            n_grids = 1
+            n_components *= 3
+            resolution_changes = {0: (128 + 1, 128 + 1, 128 + 1)}
+            for cls, kwargs in (
+                (
+                    FullResolutionVoxelGrid,
+                    {
+                        "n_features": n_features,
+                        "resolution_changes": resolution_changes,
+                    },
+                ),
+                (
+                    CPFactorizedVoxelGrid,
+                    {
+                        "n_features": n_features,
+                        "resolution_changes": resolution_changes,
+                        "n_components": n_components,
+                    },
+                ),
+                (
+                    VMFactorizedVoxelGrid,
+                    {
+                        "n_features": n_features,
+                        "resolution_changes": resolution_changes,
+                        "n_components": n_components,
+                    },
+                ),
+            ):
+                with self.subTest(
+                    cls.__name__ + f" points {points_min} and {points_max}"
+                ):
+                    grid = cls(**kwargs)
+                    shapes = grid.get_shapes(epoch=0)
+                    params = {
+                        name: torch.normal(
+                            mean=torch.zeros((n_grids, *shape)),
+                            std=1,
+                        )
+                        for name, shape in shapes.items()
+                    }
+                    grid_values = grid.values_type(**params)
+
+                    grid_values_cropped = grid.crop_local(
+                        points_min, points_max, grid_values
+                    )
+
+                    points_local_cropped = torch.rand((1, n_times, 3))
+                    points_local = (
+                        points_min[None, None]
+                        + (points_max - points_min)[None, None] * points_local_cropped
+                    )
+                    points_local_cropped = (points_local_cropped - 0.5) * 2
+
+                    pred = grid.evaluate_local(points_local, grid_values)
+                    pred_cropped = grid.evaluate_local(
+                        points_local_cropped, grid_values_cropped
+                    )
+
+                    assert torch.allclose(pred, pred_cropped, rtol=1e-4, atol=1e-4), (
+                        pred,
+                        pred_cropped,
+                        points_local,
+                        points_local_cropped,
+                    )
+
+    def test_cropping_voxel_grid_module(self, n_times=1):
+        for points_min, points_max in self._get_min_max_tuple(n=5, max_exponent=5):
+            extents = torch.ones((3,)) * 2
+            translation = torch.ones((3,)) * 0.2
+            points_min += translation
+            points_max += translation
+
+            default_cfg = get_default_args(VoxelGridModule)
+            custom_cfg = DictConfig(
+                {
+                    "extents": tuple(float(e) for e in extents),
+                    "translation": tuple(float(t) for t in translation),
+                    "voxel_grid_FullResolutionVoxelGrid_args": {
+                        "resolution_changes": {0: (128 + 1, 128 + 1, 128 + 1)}
+                    },
+                }
+            )
+            cfg = OmegaConf.merge(default_cfg, custom_cfg)
+            grid = VoxelGridModule(**cfg)
+
+            points = (torch.rand(3) * (points_max - points_min) + points_min)[None]
+            result = grid(points)
+            grid.crop_self(points_min, points_max)
+            result_cropped = grid(points)
+
+            assert torch.allclose(result, result_cropped, rtol=0.001, atol=0.001), (
+                result,
+                result_cropped,
+            )
+
+    def test_loading_state_dict(self):
+        """
+        Test loading state dict after rescaling.
+
+        Create a voxel grid, rescale it and get the state_dict.
+        Create a new voxel grid with the same args as the first one and load
+        the state_dict and check if everything is ok.
+        """
+        n_changes = 10
+
+        resolution_changes = {i: (i + 2, i + 2, i + 2) for i in range(n_changes)}
+        cfg = DictConfig(
+            {
+                "voxel_grid_class_type": "VMFactorizedVoxelGrid",
+                "voxel_grid_VMFactorizedVoxelGrid_args": {
+                    "resolution_changes": resolution_changes,
+                    "n_components": 48,
+                },
+            }
+        )
+        grid = VoxelGridModule(**cfg)
+        epochs, apply_func = grid.subscribe_to_epochs()
+        for epoch in epochs:
+            apply_func(epoch)
+
+        loaded_grid = VoxelGridModule(**cfg)
+        loaded_grid.load_state_dict(grid.state_dict())
+        for name_loaded, param_loaded in loaded_grid.named_parameters():
+            for name, param in grid.named_parameters():
+                if name_loaded == name:
+                    torch.allclose(param_loaded, param)
diff --git a/pytorch3d/tests/pulsar/__init__.py b/pytorch3d/tests/pulsar/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..2e41cd717f6a439a9c08d76a9d0e4a54e190fc5a
--- /dev/null
+++ b/pytorch3d/tests/pulsar/__init__.py
@@ -0,0 +1,5 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
diff --git a/pytorch3d/tests/pulsar/create_multiview.py b/pytorch3d/tests/pulsar/create_multiview.py
new file mode 100644
index 0000000000000000000000000000000000000000..7dfc9c5b77c116d73445a39729b1ee68c0a53e08
--- /dev/null
+++ b/pytorch3d/tests/pulsar/create_multiview.py
@@ -0,0 +1,93 @@
+#!/usr/bin/env python3
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""Create multiview data."""
+import sys
+from os import path
+
+
+# Making sure you can run this, even if pulsar hasn't been installed yet.
+sys.path.insert(0, path.join(path.dirname(__file__), "..", ".."))
+
+
+def create_multiview():
+    """Test multiview optimization."""
+    import imageio
+
+    # import cv2
+    # import skvideo.io
+    import numpy as np
+    import torch
+    from pytorch3d.renderer.points.pulsar import Renderer
+    from torch import nn
+    from torch.autograd import Variable
+
+    # Constructor.
+    n_points = 10
+    width = 1000
+    height = 1000
+
+    class Model(nn.Module):
+        """A dummy model to test the integration into a stacked model."""
+
+        def __init__(self):
+            super(Model, self).__init__()
+            self.gamma = 0.1
+            self.renderer = Renderer(width, height, n_points)
+
+        def forward(self, vp, vc, vr, cam_params):
+            # self.gamma *= 0.995
+            # print("gamma: ", self.gamma)
+            return self.renderer.forward(vp, vc, vr, cam_params, self.gamma, 45.0)
+
+    # Generate sample data.
+    torch.manual_seed(1)
+    vert_pos = torch.rand(n_points, 3, dtype=torch.float32) * 10.0
+    vert_pos[:, 2] += 25.0
+    vert_pos[:, :2] -= 5.0
+    # print(vert_pos[0])
+    vert_col = torch.rand(n_points, 3, dtype=torch.float32)
+    vert_rad = torch.rand(n_points, dtype=torch.float32)
+
+    # Distortion.
+    # vert_pos[:, 1] += 0.5
+    vert_col *= 0.5
+    # vert_rad *= 0.7
+
+    for device in [torch.device("cuda")]:
+        model = Model().to(device)
+        vert_pos = vert_pos.to(device)
+        vert_col = vert_col.to(device)
+        vert_rad = vert_rad.to(device)
+        for angle_idx, angle in enumerate([-1.5, -0.8, -0.4, -0.1, 0.1, 0.4, 0.8, 1.5]):
+            vert_pos_v = Variable(vert_pos, requires_grad=False)
+            vert_col_v = Variable(vert_col, requires_grad=False)
+            vert_rad_v = Variable(vert_rad, requires_grad=False)
+            cam_params = torch.tensor(
+                [
+                    np.sin(angle) * 35.0,
+                    0.0,
+                    30.0 - np.cos(angle) * 35.0,
+                    0.0,
+                    -angle,
+                    0.0,
+                    5.0,
+                    2.0,
+                ],
+                dtype=torch.float32,
+            ).to(device)
+            cam_params_v = Variable(cam_params, requires_grad=False)
+            result = model.forward(vert_pos_v, vert_col_v, vert_rad_v, cam_params_v)
+            result_im = (result.cpu().detach().numpy() * 255).astype(np.uint8)
+            imageio.imsave(
+                "reference/examples_TestRenderer_test_multiview_%d.png" % (angle_idx),
+                result_im,
+            )
+
+
+if __name__ == "__main__":
+    create_multiview()
diff --git a/pytorch3d/tests/pulsar/test_channels.py b/pytorch3d/tests/pulsar/test_channels.py
new file mode 100644
index 0000000000000000000000000000000000000000..adbf4e56a9880e5b89db9e34f5424744f473d388
--- /dev/null
+++ b/pytorch3d/tests/pulsar/test_channels.py
@@ -0,0 +1,149 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""Test number of channels."""
+import logging
+import sys
+import unittest
+from os import path
+
+import torch
+
+from ..common_testing import TestCaseMixin
+
+
+sys.path.insert(0, path.join(path.dirname(__file__), "..", ".."))
+devices = [torch.device("cuda"), torch.device("cpu")]
+
+
+class TestChannels(TestCaseMixin, unittest.TestCase):
+    """Test different numbers of channels."""
+
+    def test_basic(self):
+        """Basic forward test."""
+        import torch
+        from pytorch3d.renderer.points.pulsar import Renderer
+
+        n_points = 10
+        width = 1_000
+        height = 1_000
+        renderer_1 = Renderer(width, height, n_points, n_channels=1)
+        renderer_3 = Renderer(width, height, n_points, n_channels=3)
+        renderer_8 = Renderer(width, height, n_points, n_channels=8)
+        # Generate sample data.
+        torch.manual_seed(1)
+        vert_pos = torch.rand(n_points, 3, dtype=torch.float32) * 10.0
+        vert_pos[:, 2] += 25.0
+        vert_pos[:, :2] -= 5.0
+        vert_col = torch.rand(n_points, 8, dtype=torch.float32)
+        vert_rad = torch.rand(n_points, dtype=torch.float32)
+        cam_params = torch.tensor(
+            [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.0, 2.0], dtype=torch.float32
+        )
+        for device in devices:
+            vert_pos = vert_pos.to(device)
+            vert_col = vert_col.to(device)
+            vert_rad = vert_rad.to(device)
+            cam_params = cam_params.to(device)
+            renderer_1 = renderer_1.to(device)
+            renderer_3 = renderer_3.to(device)
+            renderer_8 = renderer_8.to(device)
+            result_1 = (
+                renderer_1.forward(
+                    vert_pos,
+                    vert_col[:, :1],
+                    vert_rad,
+                    cam_params,
+                    1.0e-1,
+                    45.0,
+                    percent_allowed_difference=0.01,
+                )
+                .cpu()
+                .detach()
+                .numpy()
+            )
+            hits_1 = (
+                renderer_1.forward(
+                    vert_pos,
+                    vert_col[:, :1],
+                    vert_rad,
+                    cam_params,
+                    1.0e-1,
+                    45.0,
+                    percent_allowed_difference=0.01,
+                    mode=1,
+                )
+                .cpu()
+                .detach()
+                .numpy()
+            )
+            result_3 = (
+                renderer_3.forward(
+                    vert_pos,
+                    vert_col[:, :3],
+                    vert_rad,
+                    cam_params,
+                    1.0e-1,
+                    45.0,
+                    percent_allowed_difference=0.01,
+                )
+                .cpu()
+                .detach()
+                .numpy()
+            )
+            hits_3 = (
+                renderer_3.forward(
+                    vert_pos,
+                    vert_col[:, :3],
+                    vert_rad,
+                    cam_params,
+                    1.0e-1,
+                    45.0,
+                    percent_allowed_difference=0.01,
+                    mode=1,
+                )
+                .cpu()
+                .detach()
+                .numpy()
+            )
+            result_8 = (
+                renderer_8.forward(
+                    vert_pos,
+                    vert_col,
+                    vert_rad,
+                    cam_params,
+                    1.0e-1,
+                    45.0,
+                    percent_allowed_difference=0.01,
+                )
+                .cpu()
+                .detach()
+                .numpy()
+            )
+            hits_8 = (
+                renderer_8.forward(
+                    vert_pos,
+                    vert_col,
+                    vert_rad,
+                    cam_params,
+                    1.0e-1,
+                    45.0,
+                    percent_allowed_difference=0.01,
+                    mode=1,
+                )
+                .cpu()
+                .detach()
+                .numpy()
+            )
+            self.assertClose(result_1, result_3[:, :, :1])
+            self.assertClose(result_3, result_8[:, :, :3])
+            self.assertClose(hits_1, hits_3)
+            self.assertClose(hits_8, hits_3)
+
+
+if __name__ == "__main__":
+    logging.basicConfig(level=logging.INFO)
+    unittest.main()
diff --git a/pytorch3d/tests/pulsar/test_depth.py b/pytorch3d/tests/pulsar/test_depth.py
new file mode 100644
index 0000000000000000000000000000000000000000..023571ac7d5de7d239194b349cd8afbd3bfde1ed
--- /dev/null
+++ b/pytorch3d/tests/pulsar/test_depth.py
@@ -0,0 +1,94 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""Test the sorting of the closest spheres."""
+import logging
+import os
+import sys
+import unittest
+from os import path
+
+import imageio
+import numpy as np
+import torch
+
+from ..common_testing import TestCaseMixin
+
+# Making sure you can run this, even if pulsar hasn't been installed yet.
+sys.path.insert(0, path.join(path.dirname(__file__), "..", ".."))
+
+devices = [torch.device("cuda"), torch.device("cpu")]
+IN_REF_FP = path.join(path.dirname(__file__), "reference", "nr0000-in.pth")
+OUT_REF_FP = path.join(path.dirname(__file__), "reference", "nr0000-out.pth")
+
+
+class TestDepth(TestCaseMixin, unittest.TestCase):
+    """Test different numbers of channels."""
+
+    def test_basic(self):
+        from pytorch3d.renderer.points.pulsar import Renderer
+
+        for device in devices:
+            gamma = 1e-5
+            max_depth = 15.0
+            min_depth = 5.0
+            renderer = Renderer(
+                256,
+                256,
+                10000,
+                orthogonal_projection=True,
+                right_handed_system=False,
+                n_channels=1,
+            ).to(device)
+            data = torch.load(IN_REF_FP, map_location="cpu")
+            # For creating the reference files.
+            # Use in case of updates.
+            # data["pos"] = torch.rand_like(data["pos"])
+            # data["pos"][:, 0] = data["pos"][:, 0] * 2. - 1.
+            # data["pos"][:, 1] = data["pos"][:, 1] * 2. - 1.
+            # data["pos"][:, 2] = data["pos"][:, 2] + 9.5
+            result, result_info = renderer.forward(
+                data["pos"].to(device),
+                data["col"].to(device),
+                data["rad"].to(device),
+                data["cam_params"].to(device),
+                gamma,
+                min_depth=min_depth,
+                max_depth=max_depth,
+                return_forward_info=True,
+                bg_col=torch.zeros(1, device=device, dtype=torch.float32),
+                percent_allowed_difference=0.01,
+            )
+            depth_map = Renderer.depth_map_from_result_info_nograd(result_info)
+            depth_vis = (depth_map - depth_map[depth_map > 0].min()) * 200 / (
+                depth_map.max() - depth_map[depth_map > 0.0].min()
+            ) + 50
+            if not os.environ.get("FB_TEST", False):
+                imageio.imwrite(
+                    path.join(
+                        path.dirname(__file__),
+                        "test_out",
+                        "test_depth_test_basic_depth.png",
+                    ),
+                    depth_vis.cpu().numpy().astype(np.uint8),
+                )
+            # For creating the reference files.
+            # Use in case of updates.
+            # torch.save(
+            #     data, path.join(path.dirname(__file__), "reference", "nr0000-in.pth")
+            # )
+            # torch.save(
+            #     {"sphere_ids": sphere_ids, "depth_map": depth_map},
+            #     path.join(path.dirname(__file__), "reference", "nr0000-out.pth"),
+            # )
+            # sys.exit(0)
+            reference = torch.load(OUT_REF_FP, map_location="cpu")
+            self.assertClose(reference["depth_map"].to(device), depth_map)
+
+
+if __name__ == "__main__":
+    logging.basicConfig(level=logging.INFO)
+    unittest.main()
diff --git a/pytorch3d/tests/pulsar/test_forward.py b/pytorch3d/tests/pulsar/test_forward.py
new file mode 100644
index 0000000000000000000000000000000000000000..da79028cc5f192849dfc5b05fec430a7a005fab6
--- /dev/null
+++ b/pytorch3d/tests/pulsar/test_forward.py
@@ -0,0 +1,358 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""Basic rendering test."""
+import logging
+import os
+import sys
+import unittest
+from os import path
+
+import imageio
+import numpy as np
+import torch
+
+
+# Making sure you can run this, even if pulsar hasn't been installed yet.
+sys.path.insert(0, path.join(path.dirname(__file__), "..", ".."))
+LOGGER = logging.getLogger(__name__)
+devices = [torch.device("cuda"), torch.device("cpu")]
+
+
+class TestForward(unittest.TestCase):
+    """Rendering tests."""
+
+    def test_bg_weight(self):
+        """Test background reweighting."""
+        from pytorch3d.renderer.points.pulsar import Renderer
+
+        LOGGER.info("Setting up rendering test for 3 channels...")
+        n_points = 1
+        width = 1_000
+        height = 1_000
+        renderer = Renderer(width, height, n_points, background_normalized_depth=0.999)
+        vert_pos = torch.tensor([[0.0, 0.0, 25.0]], dtype=torch.float32)
+        vert_col = torch.tensor([[0.3, 0.5, 0.7]], dtype=torch.float32)
+        vert_rad = torch.tensor([1.0], dtype=torch.float32)
+        cam_params = torch.tensor(
+            [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.0, 2.0], dtype=torch.float32
+        )
+        for device in devices:
+            vert_pos = vert_pos.to(device)
+            vert_col = vert_col.to(device)
+            vert_rad = vert_rad.to(device)
+            cam_params = cam_params.to(device)
+            renderer = renderer.to(device)
+            LOGGER.info("Rendering...")
+            # Measurements.
+            result = renderer.forward(
+                vert_pos, vert_col, vert_rad, cam_params, 1.0e-1, 45.0
+            )
+            hits = renderer.forward(
+                vert_pos,
+                vert_col,
+                vert_rad,
+                cam_params,
+                1.0e-1,
+                45.0,
+                percent_allowed_difference=0.01,
+                mode=1,
+            )
+            if not os.environ.get("FB_TEST", False):
+                imageio.imsave(
+                    path.join(
+                        path.dirname(__file__),
+                        "test_out",
+                        "test_forward_TestForward_test_bg_weight.png",
+                    ),
+                    (result * 255.0).cpu().to(torch.uint8).numpy(),
+                )
+                imageio.imsave(
+                    path.join(
+                        path.dirname(__file__),
+                        "test_out",
+                        "test_forward_TestForward_test_bg_weight_hits.png",
+                    ),
+                    (hits * 255.0).cpu().to(torch.uint8).numpy(),
+                )
+            self.assertEqual(hits[500, 500, 0].item(), 1.0)
+            self.assertTrue(
+                np.allclose(
+                    result[500, 500, :].cpu().numpy(),
+                    [1.0, 1.0, 1.0],
+                    rtol=1e-2,
+                    atol=1e-2,
+                )
+            )
+
+    def test_basic_3chan(self):
+        """Test rendering one image with one sphere, 3 channels."""
+        from pytorch3d.renderer.points.pulsar import Renderer
+
+        LOGGER.info("Setting up rendering test for 3 channels...")
+        n_points = 1
+        width = 1_000
+        height = 1_000
+        renderer = Renderer(width, height, n_points)
+        vert_pos = torch.tensor([[0.0, 0.0, 25.0]], dtype=torch.float32)
+        vert_col = torch.tensor([[0.3, 0.5, 0.7]], dtype=torch.float32)
+        vert_rad = torch.tensor([1.0], dtype=torch.float32)
+        cam_params = torch.tensor(
+            [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.0, 2.0], dtype=torch.float32
+        )
+        for device in devices:
+            vert_pos = vert_pos.to(device)
+            vert_col = vert_col.to(device)
+            vert_rad = vert_rad.to(device)
+            cam_params = cam_params.to(device)
+            renderer = renderer.to(device)
+            LOGGER.info("Rendering...")
+            # Measurements.
+            result = renderer.forward(
+                vert_pos, vert_col, vert_rad, cam_params, 1.0e-1, 45.0
+            )
+            hits = renderer.forward(
+                vert_pos,
+                vert_col,
+                vert_rad,
+                cam_params,
+                1.0e-1,
+                45.0,
+                percent_allowed_difference=0.01,
+                mode=1,
+            )
+            if not os.environ.get("FB_TEST", False):
+                imageio.imsave(
+                    path.join(
+                        path.dirname(__file__),
+                        "test_out",
+                        "test_forward_TestForward_test_basic_3chan.png",
+                    ),
+                    (result * 255.0).cpu().to(torch.uint8).numpy(),
+                )
+                imageio.imsave(
+                    path.join(
+                        path.dirname(__file__),
+                        "test_out",
+                        "test_forward_TestForward_test_basic_3chan_hits.png",
+                    ),
+                    (hits * 255.0).cpu().to(torch.uint8).numpy(),
+                )
+            self.assertEqual(hits[500, 500, 0].item(), 1.0)
+            self.assertTrue(
+                np.allclose(
+                    result[500, 500, :].cpu().numpy(),
+                    [0.3, 0.5, 0.7],
+                    rtol=1e-2,
+                    atol=1e-2,
+                )
+            )
+
+    def test_basic_1chan(self):
+        """Test rendering one image with one sphere, 1 channel."""
+        from pytorch3d.renderer.points.pulsar import Renderer
+
+        LOGGER.info("Setting up rendering test for 1 channel...")
+        n_points = 1
+        width = 1_000
+        height = 1_000
+        renderer = Renderer(width, height, n_points, n_channels=1)
+        vert_pos = torch.tensor([[0.0, 0.0, 25.0]], dtype=torch.float32)
+        vert_col = torch.tensor([[0.3]], dtype=torch.float32)
+        vert_rad = torch.tensor([1.0], dtype=torch.float32)
+        cam_params = torch.tensor(
+            [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.0, 2.0], dtype=torch.float32
+        )
+        for device in devices:
+            vert_pos = vert_pos.to(device)
+            vert_col = vert_col.to(device)
+            vert_rad = vert_rad.to(device)
+            cam_params = cam_params.to(device)
+            renderer = renderer.to(device)
+            LOGGER.info("Rendering...")
+            # Measurements.
+            result = renderer.forward(
+                vert_pos, vert_col, vert_rad, cam_params, 1.0e-1, 45.0
+            )
+            hits = renderer.forward(
+                vert_pos,
+                vert_col,
+                vert_rad,
+                cam_params,
+                1.0e-1,
+                45.0,
+                percent_allowed_difference=0.01,
+                mode=1,
+            )
+            if not os.environ.get("FB_TEST", False):
+                imageio.imsave(
+                    path.join(
+                        path.dirname(__file__),
+                        "test_out",
+                        "test_forward_TestForward_test_basic_1chan.png",
+                    ),
+                    (result * 255.0).cpu().to(torch.uint8).numpy(),
+                )
+                imageio.imsave(
+                    path.join(
+                        path.dirname(__file__),
+                        "test_out",
+                        "test_forward_TestForward_test_basic_1chan_hits.png",
+                    ),
+                    (hits * 255.0).cpu().to(torch.uint8).numpy(),
+                )
+            self.assertEqual(hits[500, 500, 0].item(), 1.0)
+            self.assertTrue(
+                np.allclose(
+                    result[500, 500, :].cpu().numpy(), [0.3], rtol=1e-2, atol=1e-2
+                )
+            )
+
+    def test_basic_8chan(self):
+        """Test rendering one image with one sphere, 8 channels."""
+        from pytorch3d.renderer.points.pulsar import Renderer
+
+        LOGGER.info("Setting up rendering test for 8 channels...")
+        n_points = 1
+        width = 1_000
+        height = 1_000
+        renderer = Renderer(width, height, n_points, n_channels=8)
+        vert_pos = torch.tensor([[0.0, 0.0, 25.0]], dtype=torch.float32)
+        vert_col = torch.tensor(
+            [[1.0, 1.0, 1.0, 1.0, 1.0, 0.3, 0.5, 0.7]], dtype=torch.float32
+        )
+        vert_rad = torch.tensor([1.0], dtype=torch.float32)
+        cam_params = torch.tensor(
+            [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.0, 2.0], dtype=torch.float32
+        )
+        for device in devices:
+            vert_pos = vert_pos.to(device)
+            vert_col = vert_col.to(device)
+            vert_rad = vert_rad.to(device)
+            cam_params = cam_params.to(device)
+            renderer = renderer.to(device)
+            LOGGER.info("Rendering...")
+            # Measurements.
+            result = renderer.forward(
+                vert_pos, vert_col, vert_rad, cam_params, 1.0e-1, 45.0
+            )
+            hits = renderer.forward(
+                vert_pos,
+                vert_col,
+                vert_rad,
+                cam_params,
+                1.0e-1,
+                45.0,
+                percent_allowed_difference=0.01,
+                mode=1,
+            )
+            if not os.environ.get("FB_TEST", False):
+                imageio.imsave(
+                    path.join(
+                        path.dirname(__file__),
+                        "test_out",
+                        "test_forward_TestForward_test_basic_8chan.png",
+                    ),
+                    (result[:, :, 5:8] * 255.0).cpu().to(torch.uint8).numpy(),
+                )
+                imageio.imsave(
+                    path.join(
+                        path.dirname(__file__),
+                        "test_out",
+                        "test_forward_TestForward_test_basic_8chan_hits.png",
+                    ),
+                    (hits * 255.0).cpu().to(torch.uint8).numpy(),
+                )
+            self.assertEqual(hits[500, 500, 0].item(), 1.0)
+            self.assertTrue(
+                np.allclose(
+                    result[500, 500, 5:8].cpu().numpy(),
+                    [0.3, 0.5, 0.7],
+                    rtol=1e-2,
+                    atol=1e-2,
+                )
+            )
+            self.assertTrue(
+                np.allclose(
+                    result[500, 500, :5].cpu().numpy(), 1.0, rtol=1e-2, atol=1e-2
+                )
+            )
+
+    def test_principal_point(self):
+        """Test shifting the principal point."""
+        from pytorch3d.renderer.points.pulsar import Renderer
+
+        LOGGER.info("Setting up rendering test for shifted principal point...")
+        n_points = 1
+        width = 1_000
+        height = 1_000
+        renderer = Renderer(width, height, n_points, n_channels=1)
+        vert_pos = torch.tensor([[0.0, 0.0, 25.0]], dtype=torch.float32)
+        vert_col = torch.tensor([[0.0]], dtype=torch.float32)
+        vert_rad = torch.tensor([1.0], dtype=torch.float32)
+        cam_params = torch.tensor(
+            [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.0, 2.0, 0.0, 0.0], dtype=torch.float32
+        )
+        for device in devices:
+            vert_pos = vert_pos.to(device)
+            vert_col = vert_col.to(device)
+            vert_rad = vert_rad.to(device)
+            cam_params = cam_params.to(device)
+            cam_params[-2] = -250.0
+            cam_params[-1] = -250.0
+            renderer = renderer.to(device)
+            LOGGER.info("Rendering...")
+            # Measurements.
+            result = renderer.forward(
+                vert_pos, vert_col, vert_rad, cam_params, 1.0e-1, 45.0
+            )
+            if not os.environ.get("FB_TEST", False):
+                imageio.imsave(
+                    path.join(
+                        path.dirname(__file__),
+                        "test_out",
+                        "test_forward_TestForward_test_principal_point.png",
+                    ),
+                    (result * 255.0).cpu().to(torch.uint8).numpy(),
+                )
+            self.assertTrue(
+                np.allclose(
+                    result[750, 750, :].cpu().numpy(), [0.0], rtol=1e-2, atol=1e-2
+                )
+            )
+        for device in devices:
+            vert_pos = vert_pos.to(device)
+            vert_col = vert_col.to(device)
+            vert_rad = vert_rad.to(device)
+            cam_params = cam_params.to(device)
+            cam_params[-2] = 250.0
+            cam_params[-1] = 250.0
+            renderer = renderer.to(device)
+            LOGGER.info("Rendering...")
+            # Measurements.
+            result = renderer.forward(
+                vert_pos, vert_col, vert_rad, cam_params, 1.0e-1, 45.0
+            )
+            if not os.environ.get("FB_TEST", False):
+                imageio.imsave(
+                    path.join(
+                        path.dirname(__file__),
+                        "test_out",
+                        "test_forward_TestForward_test_principal_point.png",
+                    ),
+                    (result * 255.0).cpu().to(torch.uint8).numpy(),
+                )
+            self.assertTrue(
+                np.allclose(
+                    result[250, 250, :].cpu().numpy(), [0.0], rtol=1e-2, atol=1e-2
+                )
+            )
+
+
+if __name__ == "__main__":
+    logging.basicConfig(level=logging.INFO)
+    logging.getLogger("pulsar.renderer").setLevel(logging.WARN)
+    unittest.main()
diff --git a/pytorch3d/tests/pulsar/test_hands.py b/pytorch3d/tests/pulsar/test_hands.py
new file mode 100644
index 0000000000000000000000000000000000000000..128b228553514781cd9d4a019f12844115328929
--- /dev/null
+++ b/pytorch3d/tests/pulsar/test_hands.py
@@ -0,0 +1,120 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""Test right hand/left hand system compatibility."""
+import logging
+import sys
+import unittest
+from os import path
+
+import torch
+
+from ..common_testing import TestCaseMixin
+
+
+# Making sure you can run this, even if pulsar hasn't been installed yet.
+sys.path.insert(0, path.join(path.dirname(__file__), "..", ".."))
+devices = [torch.device("cuda"), torch.device("cpu")]
+
+
+class TestHands(TestCaseMixin, unittest.TestCase):
+    """Test right hand/left hand system compatibility."""
+
+    def test_basic(self):
+        """Basic forward test."""
+        from pytorch3d.renderer.points.pulsar import Renderer
+
+        n_points = 10
+        width = 1000
+        height = 1000
+        renderer_left = Renderer(width, height, n_points, right_handed_system=False)
+        renderer_right = Renderer(width, height, n_points, right_handed_system=True)
+        # Generate sample data.
+        torch.manual_seed(1)
+        vert_pos = torch.rand(n_points, 3, dtype=torch.float32) * 10.0
+        vert_pos[:, 2] += 25.0
+        vert_pos[:, :2] -= 5.0
+        vert_pos_neg = vert_pos.clone()
+        vert_pos_neg[:, 2] *= -1.0
+        vert_col = torch.rand(n_points, 3, dtype=torch.float32)
+        vert_rad = torch.rand(n_points, dtype=torch.float32)
+        cam_params = torch.tensor(
+            [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.0, 2.0], dtype=torch.float32
+        )
+        for device in devices:
+            vert_pos = vert_pos.to(device)
+            vert_pos_neg = vert_pos_neg.to(device)
+            vert_col = vert_col.to(device)
+            vert_rad = vert_rad.to(device)
+            cam_params = cam_params.to(device)
+            renderer_left = renderer_left.to(device)
+            renderer_right = renderer_right.to(device)
+            result_left = (
+                renderer_left.forward(
+                    vert_pos,
+                    vert_col,
+                    vert_rad,
+                    cam_params,
+                    1.0e-1,
+                    45.0,
+                    percent_allowed_difference=0.01,
+                )
+                .cpu()
+                .detach()
+                .numpy()
+            )
+            hits_left = (
+                renderer_left.forward(
+                    vert_pos,
+                    vert_col,
+                    vert_rad,
+                    cam_params,
+                    1.0e-1,
+                    45.0,
+                    percent_allowed_difference=0.01,
+                    mode=1,
+                )
+                .cpu()
+                .detach()
+                .numpy()
+            )
+            result_right = (
+                renderer_right.forward(
+                    vert_pos_neg,
+                    vert_col,
+                    vert_rad,
+                    cam_params,
+                    1.0e-1,
+                    45.0,
+                    percent_allowed_difference=0.01,
+                )
+                .cpu()
+                .detach()
+                .numpy()
+            )
+            hits_right = (
+                renderer_right.forward(
+                    vert_pos_neg,
+                    vert_col,
+                    vert_rad,
+                    cam_params,
+                    1.0e-1,
+                    45.0,
+                    percent_allowed_difference=0.01,
+                    mode=1,
+                )
+                .cpu()
+                .detach()
+                .numpy()
+            )
+            self.assertClose(result_left, result_right)
+            self.assertClose(hits_left, hits_right)
+
+
+if __name__ == "__main__":
+    logging.basicConfig(level=logging.INFO)
+    logging.getLogger("pulsar.renderer").setLevel(logging.WARN)
+    unittest.main()
diff --git a/pytorch3d/tests/pulsar/test_ortho.py b/pytorch3d/tests/pulsar/test_ortho.py
new file mode 100644
index 0000000000000000000000000000000000000000..3da5ea7037af37184ee14dadc57722ca29d48939
--- /dev/null
+++ b/pytorch3d/tests/pulsar/test_ortho.py
@@ -0,0 +1,131 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""Tests for the orthogonal projection."""
+import logging
+import sys
+import unittest
+from os import path
+
+import numpy as np
+import torch
+
+
+# Making sure you can run this, even if pulsar hasn't been installed yet.
+sys.path.insert(0, path.join(path.dirname(__file__), ".."))
+devices = [torch.device("cuda"), torch.device("cpu")]
+
+
+class TestOrtho(unittest.TestCase):
+    """Test the orthogonal projection."""
+
+    def test_basic(self):
+        """Basic forward test of the orthogonal projection."""
+        from pytorch3d.renderer.points.pulsar import Renderer
+
+        n_points = 10
+        width = 1000
+        height = 1000
+        renderer_left = Renderer(
+            width,
+            height,
+            n_points,
+            right_handed_system=False,
+            orthogonal_projection=True,
+        )
+        renderer_right = Renderer(
+            width,
+            height,
+            n_points,
+            right_handed_system=True,
+            orthogonal_projection=True,
+        )
+        # Generate sample data.
+        torch.manual_seed(1)
+        vert_pos = torch.rand(n_points, 3, dtype=torch.float32) * 10.0
+        vert_pos[:, 2] += 25.0
+        vert_pos[:, :2] -= 5.0
+        vert_pos_neg = vert_pos.clone()
+        vert_pos_neg[:, 2] *= -1.0
+        vert_col = torch.rand(n_points, 3, dtype=torch.float32)
+        vert_rad = torch.rand(n_points, dtype=torch.float32)
+        cam_params = torch.tensor(
+            [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 20.0], dtype=torch.float32
+        )
+        for device in devices:
+            vert_pos = vert_pos.to(device)
+            vert_pos_neg = vert_pos_neg.to(device)
+            vert_col = vert_col.to(device)
+            vert_rad = vert_rad.to(device)
+            cam_params = cam_params.to(device)
+            renderer_left = renderer_left.to(device)
+            renderer_right = renderer_right.to(device)
+            result_left = (
+                renderer_left.forward(
+                    vert_pos,
+                    vert_col,
+                    vert_rad,
+                    cam_params,
+                    1.0e-1,
+                    45.0,
+                    percent_allowed_difference=0.01,
+                )
+                .cpu()
+                .detach()
+                .numpy()
+            )
+            hits_left = (
+                renderer_left.forward(
+                    vert_pos,
+                    vert_col,
+                    vert_rad,
+                    cam_params,
+                    1.0e-1,
+                    45.0,
+                    percent_allowed_difference=0.01,
+                    mode=1,
+                )
+                .cpu()
+                .detach()
+                .numpy()
+            )
+            result_right = (
+                renderer_right.forward(
+                    vert_pos_neg,
+                    vert_col,
+                    vert_rad,
+                    cam_params,
+                    1.0e-1,
+                    45.0,
+                    percent_allowed_difference=0.01,
+                )
+                .cpu()
+                .detach()
+                .numpy()
+            )
+            hits_right = (
+                renderer_right.forward(
+                    vert_pos_neg,
+                    vert_col,
+                    vert_rad,
+                    cam_params,
+                    1.0e-1,
+                    45.0,
+                    percent_allowed_difference=0.01,
+                    mode=1,
+                )
+                .cpu()
+                .detach()
+                .numpy()
+            )
+            self.assertTrue(np.allclose(result_left, result_right))
+            self.assertTrue(np.allclose(hits_left, hits_right))
+
+
+if __name__ == "__main__":
+    logging.basicConfig(level=logging.INFO)
+    logging.getLogger("pulsar.renderer").setLevel(logging.WARN)
+    unittest.main()
diff --git a/pytorch3d/tests/pulsar/test_out/empty.txt b/pytorch3d/tests/pulsar/test_out/empty.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/pytorch3d/tests/pulsar/test_small_spheres.py b/pytorch3d/tests/pulsar/test_small_spheres.py
new file mode 100644
index 0000000000000000000000000000000000000000..d1282c70a863abbcf024333c99887db42069f9a6
--- /dev/null
+++ b/pytorch3d/tests/pulsar/test_small_spheres.py
@@ -0,0 +1,144 @@
+#!/usr/bin/env python3
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""Test right hand/left hand system compatibility."""
+import sys
+import unittest
+from os import path
+
+import numpy as np
+import torch
+from torch import nn
+
+
+sys.path.insert(0, path.join(path.dirname(__file__), ".."))
+devices = [torch.device("cuda"), torch.device("cpu")]
+
+
+n_points = 10
+width = 1_000
+height = 1_000
+
+
+class SceneModel(nn.Module):
+    """A simple model to demonstrate use in Modules."""
+
+    def __init__(self):
+        super(SceneModel, self).__init__()
+        from pytorch3d.renderer.points.pulsar import Renderer
+
+        self.gamma = 1.0
+        # Points.
+        torch.manual_seed(1)
+        vert_pos = torch.rand((1, n_points, 3), dtype=torch.float32) * 10.0
+        vert_pos[:, :, 2] += 25.0
+        vert_pos[:, :, :2] -= 5.0
+        self.register_parameter("vert_pos", nn.Parameter(vert_pos, requires_grad=False))
+        self.register_parameter(
+            "vert_col",
+            nn.Parameter(
+                torch.zeros(1, n_points, 3, dtype=torch.float32), requires_grad=True
+            ),
+        )
+        self.register_parameter(
+            "vert_rad",
+            nn.Parameter(
+                torch.ones(1, n_points, dtype=torch.float32) * 0.001,
+                requires_grad=False,
+            ),
+        )
+        self.register_parameter(
+            "vert_opy",
+            nn.Parameter(
+                torch.ones(1, n_points, dtype=torch.float32), requires_grad=False
+            ),
+        )
+        self.register_buffer(
+            "cam_params",
+            torch.tensor(
+                [
+                    [
+                        np.sin(angle) * 35.0,
+                        0.0,
+                        30.0 - np.cos(angle) * 35.0,
+                        0.0,
+                        -angle,
+                        0.0,
+                        5.0,
+                        2.0,
+                    ]
+                    for angle in [-1.5, -0.8, -0.4, -0.1, 0.1, 0.4, 0.8, 1.5]
+                ],
+                dtype=torch.float32,
+            ),
+        )
+        self.renderer = Renderer(width, height, n_points)
+
+    def forward(self, cam=None):
+        if cam is None:
+            cam = self.cam_params
+            n_views = 8
+        else:
+            n_views = 1
+        return self.renderer.forward(
+            self.vert_pos.expand(n_views, -1, -1),
+            self.vert_col.expand(n_views, -1, -1),
+            self.vert_rad.expand(n_views, -1),
+            cam,
+            self.gamma,
+            45.0,
+            return_forward_info=True,
+        )
+
+
+class TestSmallSpheres(unittest.TestCase):
+    """Test small sphere rendering and gradients."""
+
+    def test_basic(self):
+        for device in devices:
+            # Set up model.
+            model = SceneModel().to(device)
+            angle = 0.0
+            for _ in range(50):
+                cam_control = torch.tensor(
+                    [
+                        [
+                            np.sin(angle) * 35.0,
+                            0.0,
+                            30.0 - np.cos(angle) * 35.0,
+                            0.0,
+                            -angle,
+                            0.0,
+                            5.0,
+                            2.0,
+                        ]
+                    ],
+                    dtype=torch.float32,
+                ).to(device)
+                result, forw_info = model(cam=cam_control)
+                sphere_ids = model.renderer.sphere_ids_from_result_info_nograd(
+                    forw_info
+                )
+                # Assert all spheres are rendered.
+                for idx in range(n_points):
+                    self.assertTrue(
+                        (sphere_ids == idx).sum() > 0, "Sphere ID %d missing!" % (idx)
+                    )
+                # Visualization code. Activate for debugging.
+                # result_im = (result.cpu().detach().numpy() * 255).astype(np.uint8)
+                # cv2.imshow("res", result_im[0, :, :, ::-1])
+                # cv2.waitKey(0)
+                # Back-propagate some dummy gradients.
+                loss = ((result - torch.ones_like(result)).abs()).sum()
+                loss.backward()
+                # Now check whether the gradient arrives at every sphere.
+                self.assertTrue(torch.all(model.vert_col.grad[:, :, 0].abs() > 0.0))
+                angle += 0.15
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/pytorch3d/tests/test_acos_linear_extrapolation.py b/pytorch3d/tests/test_acos_linear_extrapolation.py
new file mode 100644
index 0000000000000000000000000000000000000000..6a4513d19f95c7e0107f07c008217c7d1de35e2a
--- /dev/null
+++ b/pytorch3d/tests/test_acos_linear_extrapolation.py
@@ -0,0 +1,138 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import unittest
+
+import numpy as np
+import torch
+from pytorch3d.transforms import acos_linear_extrapolation
+
+from .common_testing import TestCaseMixin
+
+
+class TestAcosLinearExtrapolation(TestCaseMixin, unittest.TestCase):
+    def setUp(self) -> None:
+        super().setUp()
+        torch.manual_seed(42)
+        np.random.seed(42)
+
+    @staticmethod
+    def init_acos_boundary_values(batch_size: int = 10000):
+        """
+        Initialize a tensor containing values close to the bounds of the
+        domain of `acos`, i.e. close to -1 or 1; and random values between (-1, 1).
+        """
+        device = torch.device("cuda:0")
+        # one quarter are random values between -1 and 1
+        x_rand = 2 * torch.rand(batch_size // 4, dtype=torch.float32, device=device) - 1
+        x = [x_rand]
+        for bound in [-1, 1]:
+            for above_bound in [True, False]:
+                for noise_std in [1e-4, 1e-2]:
+                    n_generate = (batch_size - batch_size // 4) // 8
+                    x_add = (
+                        bound
+                        + (2 * float(above_bound) - 1)
+                        * torch.randn(
+                            n_generate, device=device, dtype=torch.float32
+                        ).abs()
+                        * noise_std
+                    )
+                    x.append(x_add)
+        x = torch.cat(x)
+        return x
+
+    @staticmethod
+    def acos_linear_extrapolation(batch_size: int):
+        x = TestAcosLinearExtrapolation.init_acos_boundary_values(batch_size)
+        torch.cuda.synchronize()
+
+        def compute_acos():
+            acos_linear_extrapolation(x)
+            torch.cuda.synchronize()
+
+        return compute_acos
+
+    def _test_acos_outside_bounds(self, x, y, dydx, bound):
+        """
+        Check that `acos_linear_extrapolation` yields points on a line with correct
+        slope, and that the function is continuous around `bound`.
+        """
+        bound_t = torch.tensor(bound, device=x.device, dtype=x.dtype)
+        # fit a line: slope * x + bias = y
+        x_1 = torch.stack([x, torch.ones_like(x)], dim=-1)
+        slope, bias = torch.linalg.lstsq(x_1, y[:, None]).solution.view(-1)[:2]
+        desired_slope = (-1.0) / torch.sqrt(1.0 - bound_t**2)
+        # test that the desired slope is the same as the fitted one
+        self.assertClose(desired_slope.view(1), slope.view(1), atol=1e-2)
+        # test that the autograd's slope is the same as the desired one
+        self.assertClose(desired_slope.expand_as(dydx), dydx, atol=1e-2)
+        # test that the value of the fitted line at x=bound equals
+        # arccos(x), i.e. the function is continuous around the bound
+        y_bound_lin = (slope * bound_t + bias).view(1)
+        y_bound_acos = bound_t.acos().view(1)
+        self.assertClose(y_bound_lin, y_bound_acos, atol=1e-2)
+
+    def _one_acos_test(self, x: torch.Tensor, lower_bound: float, upper_bound: float):
+        """
+        Test that `acos_linear_extrapolation` returns correct values for
+        `x` between/above/below `lower_bound`/`upper_bound`.
+        """
+        x.requires_grad = True
+        x.grad = None
+        y = acos_linear_extrapolation(x, [lower_bound, upper_bound])
+        # compute the gradient of the acos w.r.t. x
+        y.backward(torch.ones_like(y))
+        dacos_dx = x.grad
+        x_lower = x <= lower_bound
+        x_upper = x >= upper_bound
+        x_mid = (~x_lower) & (~x_upper)
+        # test that between bounds, the function returns plain acos
+        self.assertClose(x[x_mid].acos(), y[x_mid])
+        # test that outside the bounds, the function is linear with the right
+        # slope and continuous around the bound
+        self._test_acos_outside_bounds(
+            x[x_upper], y[x_upper], dacos_dx[x_upper], upper_bound
+        )
+        self._test_acos_outside_bounds(
+            x[x_lower], y[x_lower], dacos_dx[x_lower], lower_bound
+        )
+
+    def test_acos(self, batch_size: int = 10000):
+        """
+        Tests whether the function returns correct outputs
+        inside/outside the bounds.
+        """
+        x = TestAcosLinearExtrapolation.init_acos_boundary_values(batch_size)
+        bounds = 1 - 10.0 ** torch.linspace(-1, -5, 5)
+        for lower_bound in -bounds:
+            for upper_bound in bounds:
+                if upper_bound < lower_bound:
+                    continue
+                self._one_acos_test(x, float(lower_bound), float(upper_bound))
+
+    def test_finite_gradient(self, batch_size: int = 10000):
+        """
+        Tests whether gradients stay finite close to the bounds.
+        """
+        x = TestAcosLinearExtrapolation.init_acos_boundary_values(batch_size)
+        x.requires_grad = True
+        bounds = 1 - 10.0 ** torch.linspace(-1, -5, 5)
+        for lower_bound in -bounds:
+            for upper_bound in bounds:
+                if upper_bound < lower_bound:
+                    continue
+                x.grad = None
+                y = acos_linear_extrapolation(
+                    x,
+                    [float(lower_bound), float(upper_bound)],
+                )
+                self.assertTrue(torch.isfinite(y).all())
+                loss = y.mean()
+                loss.backward()
+                self.assertIsNotNone(x.grad)
+                self.assertTrue(torch.isfinite(x.grad).all())
diff --git a/pytorch3d/tests/test_ball_query.py b/pytorch3d/tests/test_ball_query.py
new file mode 100644
index 0000000000000000000000000000000000000000..9210cbeb3cfe3426082c2f4f87b1d47f491c81d9
--- /dev/null
+++ b/pytorch3d/tests/test_ball_query.py
@@ -0,0 +1,231 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+from itertools import product
+
+import torch
+from pytorch3d.ops import sample_points_from_meshes
+from pytorch3d.ops.ball_query import ball_query
+from pytorch3d.ops.knn import _KNN
+from pytorch3d.utils import ico_sphere
+
+from .common_testing import get_random_cuda_device, TestCaseMixin
+
+
+class TestBallQuery(TestCaseMixin, unittest.TestCase):
+    def setUp(self) -> None:
+        super().setUp()
+        torch.manual_seed(1)
+
+    @staticmethod
+    def _ball_query_naive(
+        p1, p2, lengths1, lengths2, K: int, radius: float
+    ) -> torch.Tensor:
+        """
+        Naive PyTorch implementation of ball query.
+        """
+        N, P1, D = p1.shape
+        _N, P2, _D = p2.shape
+
+        assert N == _N and D == _D
+
+        if lengths1 is None:
+            lengths1 = torch.full((N,), P1, dtype=torch.int64, device=p1.device)
+        if lengths2 is None:
+            lengths2 = torch.full((N,), P2, dtype=torch.int64, device=p1.device)
+
+        radius2 = radius * radius
+        dists = torch.zeros((N, P1, K), dtype=torch.float32, device=p1.device)
+        idx = torch.full((N, P1, K), fill_value=-1, dtype=torch.int64, device=p1.device)
+
+        # Iterate through the batches
+        for n in range(N):
+            num1 = lengths1[n].item()
+            num2 = lengths2[n].item()
+
+            # Iterate through the points in the p1
+            for i in range(num1):
+                # Iterate through the points in the p2
+                count = 0
+                for j in range(num2):
+                    dist = p2[n, j] - p1[n, i]
+                    dist2 = (dist * dist).sum()
+                    if dist2 < radius2 and count < K:
+                        dists[n, i, count] = dist2
+                        idx[n, i, count] = j
+                        count += 1
+
+        return _KNN(dists=dists, idx=idx, knn=None)
+
+    def _ball_query_vs_python_square_helper(self, device):
+        Ns = [1, 4]
+        Ds = [3, 5, 8]
+        P1s = [8, 24]
+        P2s = [8, 16, 32]
+        Ks = [1, 5]
+        Rs = [3, 5]
+        factors = [Ns, Ds, P1s, P2s, Ks, Rs]
+        for N, D, P1, P2, K, R in product(*factors):
+            x = torch.randn(N, P1, D, device=device, requires_grad=True)
+            x_cuda = x.clone().detach()
+            x_cuda.requires_grad_(True)
+            y = torch.randn(N, P2, D, device=device, requires_grad=True)
+            y_cuda = y.clone().detach()
+            y_cuda.requires_grad_(True)
+
+            # forward
+            out1 = self._ball_query_naive(
+                x, y, lengths1=None, lengths2=None, K=K, radius=R
+            )
+            out2 = ball_query(x_cuda, y_cuda, K=K, radius=R)
+
+            # Check dists
+            self.assertClose(out1.dists, out2.dists)
+            # Check idx
+            self.assertTrue(torch.all(out1.idx == out2.idx))
+
+            # backward
+            grad_dist = torch.ones((N, P1, K), dtype=torch.float32, device=device)
+            loss1 = (out1.dists * grad_dist).sum()
+            loss1.backward()
+            loss2 = (out2.dists * grad_dist).sum()
+            loss2.backward()
+
+            self.assertClose(x_cuda.grad, x.grad, atol=5e-6)
+            self.assertClose(y_cuda.grad, y.grad, atol=5e-6)
+
+    def test_ball_query_vs_python_square_cpu(self):
+        device = torch.device("cpu")
+        self._ball_query_vs_python_square_helper(device)
+
+    def test_ball_query_vs_python_square_cuda(self):
+        device = get_random_cuda_device()
+        self._ball_query_vs_python_square_helper(device)
+
+    def _ball_query_vs_python_ragged_helper(self, device):
+        Ns = [1, 4]
+        Ds = [3, 5, 8]
+        P1s = [8, 24]
+        P2s = [8, 16, 32]
+        Ks = [2, 3, 10]
+        Rs = [1.4, 5]  # radius
+        factors = [Ns, Ds, P1s, P2s, Ks, Rs]
+        for N, D, P1, P2, K, R in product(*factors):
+            x = torch.rand((N, P1, D), device=device, requires_grad=True)
+            y = torch.rand((N, P2, D), device=device, requires_grad=True)
+            lengths1 = torch.randint(low=1, high=P1, size=(N,), device=device)
+            lengths2 = torch.randint(low=1, high=P2, size=(N,), device=device)
+
+            x_csrc = x.clone().detach()
+            x_csrc.requires_grad_(True)
+            y_csrc = y.clone().detach()
+            y_csrc.requires_grad_(True)
+
+            # forward
+            out1 = self._ball_query_naive(
+                x, y, lengths1=lengths1, lengths2=lengths2, K=K, radius=R
+            )
+            out2 = ball_query(
+                x_csrc,
+                y_csrc,
+                lengths1=lengths1,
+                lengths2=lengths2,
+                K=K,
+                radius=R,
+            )
+
+            self.assertClose(out1.idx, out2.idx)
+            self.assertClose(out1.dists, out2.dists)
+
+            # backward
+            grad_dist = torch.ones((N, P1, K), dtype=torch.float32, device=device)
+            loss1 = (out1.dists * grad_dist).sum()
+            loss1.backward()
+            loss2 = (out2.dists * grad_dist).sum()
+            loss2.backward()
+
+            self.assertClose(x_csrc.grad, x.grad, atol=5e-6)
+            self.assertClose(y_csrc.grad, y.grad, atol=5e-6)
+
+    def test_ball_query_vs_python_ragged_cpu(self):
+        device = torch.device("cpu")
+        self._ball_query_vs_python_ragged_helper(device)
+
+    def test_ball_query_vs_python_ragged_cuda(self):
+        device = get_random_cuda_device()
+        self._ball_query_vs_python_ragged_helper(device)
+
+    def test_ball_query_output_simple(self):
+        device = get_random_cuda_device()
+        N, P1, P2, K = 5, 8, 16, 4
+        sphere = ico_sphere(level=2, device=device).extend(N)
+        points_1 = sample_points_from_meshes(sphere, P1)
+        points_2 = sample_points_from_meshes(sphere, P2) * 5.0
+        radius = 6.0
+
+        naive_out = self._ball_query_naive(
+            points_1, points_2, lengths1=None, lengths2=None, K=K, radius=radius
+        )
+        cuda_out = ball_query(points_1, points_2, K=K, radius=radius)
+
+        # All points should have N sample neighbors as radius is large
+        # Zero is a valid index but can only be present once (i.e. no zero padding)
+        naive_out_zeros = (naive_out.idx == 0).sum(dim=-1).max()
+        cuda_out_zeros = (cuda_out.idx == 0).sum(dim=-1).max()
+        self.assertTrue(naive_out_zeros == 0 or naive_out_zeros == 1)
+        self.assertTrue(cuda_out_zeros == 0 or cuda_out_zeros == 1)
+
+        # All points should now have zero sample neighbors as radius is small
+        radius = 0.5
+        naive_out = self._ball_query_naive(
+            points_1, points_2, lengths1=None, lengths2=None, K=K, radius=radius
+        )
+        cuda_out = ball_query(points_1, points_2, K=K, radius=radius)
+        naive_out_allzeros = (naive_out.idx == -1).all()
+        cuda_out_allzeros = (cuda_out.idx == -1).sum()
+        self.assertTrue(naive_out_allzeros)
+        self.assertTrue(cuda_out_allzeros)
+
+    @staticmethod
+    def ball_query_square(
+        N: int, P1: int, P2: int, D: int, K: int, radius: float, device: str
+    ):
+        device = torch.device(device)
+        pts1 = torch.randn(N, P1, D, device=device, requires_grad=True)
+        pts2 = torch.randn(N, P2, D, device=device, requires_grad=True)
+        grad_dists = torch.randn(N, P1, K, device=device)
+        torch.cuda.synchronize()
+
+        def output():
+            out = ball_query(pts1, pts2, K=K, radius=radius)
+            loss = (out.dists * grad_dists).sum()
+            loss.backward()
+            torch.cuda.synchronize()
+
+        return output
+
+    @staticmethod
+    def ball_query_ragged(
+        N: int, P1: int, P2: int, D: int, K: int, radius: float, device: str
+    ):
+        device = torch.device(device)
+        pts1 = torch.rand((N, P1, D), device=device, requires_grad=True)
+        pts2 = torch.rand((N, P2, D), device=device, requires_grad=True)
+        lengths1 = torch.randint(low=1, high=P1, size=(N,), device=device)
+        lengths2 = torch.randint(low=1, high=P2, size=(N,), device=device)
+        grad_dists = torch.randn(N, P1, K, device=device)
+        torch.cuda.synchronize()
+
+        def output():
+            out = ball_query(
+                pts1, pts2, lengths1=lengths1, lengths2=lengths2, K=K, radius=radius
+            )
+            loss = (out.dists * grad_dists).sum()
+            loss.backward()
+            torch.cuda.synchronize()
+
+        return output
diff --git a/pytorch3d/tests/test_blending.py b/pytorch3d/tests/test_blending.py
new file mode 100644
index 0000000000000000000000000000000000000000..7f73ae9dd8d970b2e4a33ed28e1894e7c40c37aa
--- /dev/null
+++ b/pytorch3d/tests/test_blending.py
@@ -0,0 +1,473 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+
+import torch
+from pytorch3d.renderer.blending import (
+    BlendParams,
+    hard_rgb_blend,
+    sigmoid_alpha_blend,
+    softmax_rgb_blend,
+)
+from pytorch3d.renderer.cameras import FoVPerspectiveCameras
+from pytorch3d.renderer.mesh.rasterizer import Fragments
+from pytorch3d.renderer.splatter_blend import SplatterBlender
+
+from .common_testing import TestCaseMixin
+
+
+def sigmoid_blend_naive_loop(colors, fragments, blend_params):
+    """
+    Naive for loop based implementation of distance based alpha calculation.
+    Only for test purposes.
+    """
+    pix_to_face = fragments.pix_to_face
+    dists = fragments.dists
+    sigma = blend_params.sigma
+
+    N, H, W, K = pix_to_face.shape
+    device = pix_to_face.device
+    pixel_colors = torch.ones((N, H, W, 4), dtype=colors.dtype, device=device)
+
+    for n in range(N):
+        for h in range(H):
+            for w in range(W):
+                alpha = 1.0
+
+                # Loop over k faces and calculate 2D distance based probability
+                # map.
+                for k in range(K):
+                    if pix_to_face[n, h, w, k] >= 0:
+                        prob = torch.sigmoid(-dists[n, h, w, k] / sigma)
+                        alpha *= 1.0 - prob  # cumulative product
+                pixel_colors[n, h, w, :3] = colors[n, h, w, 0, :]
+                pixel_colors[n, h, w, 3] = 1.0 - alpha
+
+    return pixel_colors
+
+
+def sigmoid_alpha_blend_vectorized(colors, fragments, blend_params) -> torch.Tensor:
+    N, H, W, K = fragments.pix_to_face.shape
+    pixel_colors = torch.ones((N, H, W, 4), dtype=colors.dtype, device=colors.device)
+    mask = fragments.pix_to_face >= 0
+    prob = torch.sigmoid(-fragments.dists / blend_params.sigma) * mask
+    pixel_colors[..., :3] = colors[..., 0, :]
+    pixel_colors[..., 3] = 1.0 - torch.prod((1.0 - prob), dim=-1)
+    return pixel_colors
+
+
+def sigmoid_blend_naive_loop_backward(grad_images, images, fragments, blend_params):
+    pix_to_face = fragments.pix_to_face
+    dists = fragments.dists
+    sigma = blend_params.sigma
+
+    N, H, W, K = pix_to_face.shape
+    device = pix_to_face.device
+    grad_distances = torch.zeros((N, H, W, K), dtype=dists.dtype, device=device)
+
+    for n in range(N):
+        for h in range(H):
+            for w in range(W):
+                alpha = 1.0 - images[n, h, w, 3]
+                grad_alpha = grad_images[n, h, w, 3]
+                # Loop over k faces and calculate 2D distance based probability
+                # map.
+                for k in range(K):
+                    if pix_to_face[n, h, w, k] >= 0:
+                        prob = torch.sigmoid(-dists[n, h, w, k] / sigma)
+                        grad_distances[n, h, w, k] = (
+                            grad_alpha * (-1.0 / sigma) * prob * alpha
+                        )
+    return grad_distances
+
+
+def softmax_blend_naive(colors, fragments, blend_params):
+    """
+    Naive for loop based implementation of softmax blending.
+    Only for test purposes.
+    """
+    pix_to_face = fragments.pix_to_face
+    dists = fragments.dists
+    zbuf = fragments.zbuf
+    sigma = blend_params.sigma
+    gamma = blend_params.gamma
+
+    N, H, W, K = pix_to_face.shape
+    device = pix_to_face.device
+    pixel_colors = torch.ones((N, H, W, 4), dtype=colors.dtype, device=device)
+
+    # Near and far clipping planes
+    zfar = 100.0
+    znear = 1.0
+    eps = 1e-10
+
+    bk_color = blend_params.background_color
+    if not torch.is_tensor(bk_color):
+        bk_color = torch.tensor(bk_color, dtype=colors.dtype, device=device)
+
+    for n in range(N):
+        for h in range(H):
+            for w in range(W):
+                alpha = 1.0
+                weights_k = torch.zeros(K, device=device)
+                zmax = torch.tensor(0.0, device=device)
+
+                # Loop over K to find max z.
+                for k in range(K):
+                    if pix_to_face[n, h, w, k] >= 0:
+                        zinv = (zfar - zbuf[n, h, w, k]) / (zfar - znear)
+                        if zinv > zmax:
+                            zmax = zinv
+
+                # Loop over K faces to calculate 2D distance based probability
+                # map and zbuf based weights for colors.
+                for k in range(K):
+                    if pix_to_face[n, h, w, k] >= 0:
+                        zinv = (zfar - zbuf[n, h, w, k]) / (zfar - znear)
+                        prob = torch.sigmoid(-dists[n, h, w, k] / sigma)
+                        alpha *= 1.0 - prob  # cumulative product
+                        weights_k[k] = prob * torch.exp((zinv - zmax) / gamma)
+
+                # Clamp to ensure delta is never 0
+                delta = torch.exp((eps - zmax) / blend_params.gamma).clamp(min=eps)
+                delta = delta.to(device)
+                denom = weights_k.sum() + delta
+                cols = (weights_k[..., None] * colors[n, h, w, :, :]).sum(dim=0)
+                pixel_colors[n, h, w, :3] = cols + delta * bk_color
+                pixel_colors[n, h, w, :3] /= denom
+                pixel_colors[n, h, w, 3] = 1.0 - alpha
+
+    return pixel_colors
+
+
+class TestBlending(TestCaseMixin, unittest.TestCase):
+    def setUp(self) -> None:
+        torch.manual_seed(42)
+
+    def _compare_impls(
+        self, fn1, fn2, args1, args2, grad_var1=None, grad_var2=None, compare_grads=True
+    ):
+        out1 = fn1(*args1)
+        out2 = fn2(*args2)
+        self.assertClose(out1.cpu()[..., 3], out2.cpu()[..., 3], atol=1e-7)
+
+        # Check gradients
+        if not compare_grads:
+            return
+
+        grad_out = torch.randn_like(out1)
+        (out1 * grad_out).sum().backward()
+        self.assertTrue(hasattr(grad_var1, "grad"))
+
+        (out2 * grad_out).sum().backward()
+        self.assertTrue(hasattr(grad_var2, "grad"))
+
+        self.assertClose(grad_var1.grad.cpu(), grad_var2.grad.cpu(), atol=2e-5)
+
+    def test_hard_rgb_blend(self):
+        N, H, W, K = 5, 10, 10, 20
+        pix_to_face = torch.randint(low=-1, high=100, size=(N, H, W, K))
+        bary_coords = torch.ones((N, H, W, K, 3))
+        fragments = Fragments(
+            pix_to_face=pix_to_face,
+            bary_coords=bary_coords,
+            zbuf=pix_to_face,  # dummy
+            dists=pix_to_face,  # dummy
+        )
+        colors = torch.randn((N, H, W, K, 3))
+        blend_params = BlendParams(1e-4, 1e-4, (0.5, 0.5, 1))
+        images = hard_rgb_blend(colors, fragments, blend_params)
+
+        # Examine if the foreground colors are correct.
+        is_foreground = pix_to_face[..., 0] >= 0
+        self.assertClose(images[is_foreground][:, :3], colors[is_foreground][..., 0, :])
+
+        # Examine if the background colors are correct.
+        for i in range(3):  # i.e. RGB
+            channel_color = blend_params.background_color[i]
+            self.assertTrue(images[~is_foreground][..., i].eq(channel_color).all())
+
+        # Examine the alpha channel
+        self.assertClose(images[..., 3], (pix_to_face[..., 0] >= 0).float())
+
+    def test_sigmoid_alpha_blend_manual_gradients(self):
+        # Create dummy outputs of rasterization
+        torch.manual_seed(231)
+        F = 32  # number of faces in the mesh
+        # The python loop version is really slow so only using small input sizes.
+        N, S, K = 2, 3, 2
+        device = torch.device("cuda")
+        pix_to_face = torch.randint(F + 1, size=(N, S, S, K), device=device) - 1
+        colors = torch.randn((N, S, S, K, 3), device=device)
+        empty = torch.tensor([], device=device)
+
+        # # randomly flip the sign of the distance
+        # # (-) means inside triangle, (+) means outside triangle.
+        random_sign_flip = torch.rand((N, S, S, K))
+        random_sign_flip[random_sign_flip > 0.5] *= -1.0
+        dists = torch.randn(size=(N, S, S, K), requires_grad=True, device=device)
+        fragments = Fragments(
+            pix_to_face=pix_to_face,
+            bary_coords=empty,  # dummy
+            zbuf=empty,  # dummy
+            dists=dists,
+        )
+        blend_params = BlendParams(sigma=1e-3)
+        pix_cols = sigmoid_blend_naive_loop(colors, fragments, blend_params)
+        grad_out = torch.randn_like(pix_cols)
+
+        # Backward pass
+        pix_cols.backward(grad_out)
+        grad_dists = sigmoid_blend_naive_loop_backward(
+            grad_out, pix_cols, fragments, blend_params
+        )
+        self.assertTrue(torch.allclose(dists.grad, grad_dists, atol=1e-7))
+
+    def test_sigmoid_alpha_blend_python(self):
+        """
+        Test outputs of python tensorised function and python loop
+        """
+
+        # Create dummy outputs of rasterization
+        torch.manual_seed(231)
+        F = 32  # number of faces in the mesh
+        # The python loop version is really slow so only using small input sizes.
+        N, S, K = 1, 4, 1
+        device = torch.device("cuda")
+        pix_to_face = torch.randint(low=-1, high=F, size=(N, S, S, K), device=device)
+        colors = torch.randn((N, S, S, K, 3), device=device)
+        empty = torch.tensor([], device=device)
+
+        dists1 = torch.randn(size=(N, S, S, K), device=device)
+        dists2 = dists1.clone()
+        dists1.requires_grad = True
+        dists2.requires_grad = True
+
+        fragments1 = Fragments(
+            pix_to_face=pix_to_face,
+            bary_coords=empty,  # dummy
+            zbuf=empty,  # dummy
+            dists=dists1,
+        )
+        fragments2 = Fragments(
+            pix_to_face=pix_to_face,
+            bary_coords=empty,  # dummy
+            zbuf=empty,  # dummy
+            dists=dists2,
+        )
+
+        blend_params = BlendParams(sigma=1e-2)
+        args1 = (colors, fragments1, blend_params)
+        args2 = (colors, fragments2, blend_params)
+
+        self._compare_impls(
+            sigmoid_alpha_blend,
+            sigmoid_alpha_blend_vectorized,
+            args1,
+            args2,
+            dists1,
+            dists2,
+            compare_grads=True,
+        )
+
+    def test_softmax_rgb_blend(self):
+        # Create dummy outputs of rasterization simulating a cube in the center
+        # of the image with surrounding padded values.
+        N, S, K = 1, 8, 2
+        device = torch.device("cuda")
+        pix_to_face = torch.full(
+            (N, S, S, K), fill_value=-1, dtype=torch.int64, device=device
+        )
+        h = int(S / 2)
+        pix_to_face_full = torch.randint(
+            size=(N, h, h, K), low=0, high=100, device=device
+        )
+        s = int(S / 4)
+        e = int(0.75 * S)
+        pix_to_face[:, s:e, s:e, :] = pix_to_face_full
+        empty = torch.tensor([], device=device)
+
+        random_sign_flip = torch.rand((N, S, S, K), device=device)
+        random_sign_flip[random_sign_flip > 0.5] *= -1.0
+        zbuf1 = torch.randn(size=(N, S, S, K), device=device)
+
+        # randomly flip the sign of the distance
+        # (-) means inside triangle, (+) means outside triangle.
+        dists1 = torch.randn(size=(N, S, S, K), device=device) * random_sign_flip
+        dists2 = dists1.clone()
+        zbuf2 = zbuf1.clone()
+        dists1.requires_grad = True
+        dists2.requires_grad = True
+        colors = torch.randn((N, S, S, K, 3), device=device)
+        fragments1 = Fragments(
+            pix_to_face=pix_to_face,
+            bary_coords=empty,  # dummy
+            zbuf=zbuf1,
+            dists=dists1,
+        )
+        fragments2 = Fragments(
+            pix_to_face=pix_to_face,
+            bary_coords=empty,  # dummy
+            zbuf=zbuf2,
+            dists=dists2,
+        )
+
+        blend_params = BlendParams(sigma=1e-3)
+        args1 = (colors, fragments1, blend_params)
+        args2 = (colors, fragments2, blend_params)
+        self._compare_impls(
+            softmax_rgb_blend,
+            softmax_blend_naive,
+            args1,
+            args2,
+            dists1,
+            dists2,
+            compare_grads=True,
+        )
+
+    @staticmethod
+    def bm_sigmoid_alpha_blending(
+        num_meshes: int = 16,
+        image_size: int = 128,
+        faces_per_pixel: int = 100,
+        device="cuda",
+        backend: str = "pytorch",
+    ):
+        device = torch.device(device)
+        torch.manual_seed(231)
+
+        # Create dummy outputs of rasterization
+        N, S, K = num_meshes, image_size, faces_per_pixel
+        F = 32  # num faces in the mesh
+        pix_to_face = torch.randint(
+            low=-1, high=F + 1, size=(N, S, S, K), device=device
+        )
+        colors = torch.randn((N, S, S, K, 3), device=device)
+        empty = torch.tensor([], device=device)
+
+        dists1 = torch.randn(size=(N, S, S, K), requires_grad=True, device=device)
+        fragments = Fragments(
+            pix_to_face=pix_to_face,
+            bary_coords=empty,  # dummy
+            zbuf=empty,  # dummy
+            dists=dists1,
+        )
+        blend_params = BlendParams(sigma=1e-3)
+
+        blend_fn = (
+            sigmoid_alpha_blend_vectorized
+            if backend == "pytorch"
+            else sigmoid_alpha_blend
+        )
+
+        torch.cuda.synchronize()
+
+        def fn():
+            # test forward and backward pass
+            images = blend_fn(colors, fragments, blend_params)
+            images.sum().backward()
+            torch.cuda.synchronize()
+
+        return fn
+
+    @staticmethod
+    def bm_softmax_blending(
+        num_meshes: int = 16,
+        image_size: int = 128,
+        faces_per_pixel: int = 100,
+        device: str = "cpu",
+        backend: str = "pytorch",
+    ):
+        if torch.cuda.is_available() and "cuda:" in device:
+            # If a device other than the default is used, set the device explicity.
+            torch.cuda.set_device(device)
+
+        device = torch.device(device)
+        torch.manual_seed(231)
+
+        # Create dummy outputs of rasterization
+        N, S, K = num_meshes, image_size, faces_per_pixel
+        F = 32  # num faces in the mesh
+        pix_to_face = torch.randint(
+            low=-1, high=F + 1, size=(N, S, S, K), device=device
+        )
+        colors = torch.randn((N, S, S, K, 3), device=device)
+        empty = torch.tensor([], device=device)
+
+        dists1 = torch.randn(size=(N, S, S, K), requires_grad=True, device=device)
+        zbuf = torch.randn(size=(N, S, S, K), requires_grad=True, device=device)
+        fragments = Fragments(
+            pix_to_face=pix_to_face, bary_coords=empty, zbuf=zbuf, dists=dists1  # dummy
+        )
+        blend_params = BlendParams(sigma=1e-3)
+
+        torch.cuda.synchronize()
+
+        def fn():
+            # test forward and backward pass
+            images = softmax_rgb_blend(colors, fragments, blend_params)
+            images.sum().backward()
+            torch.cuda.synchronize()
+
+        return fn
+
+    @staticmethod
+    def bm_splatter_blending(
+        num_meshes: int = 16,
+        image_size: int = 128,
+        faces_per_pixel: int = 2,
+        use_jit: bool = False,
+        device: str = "cpu",
+        backend: str = "pytorch",
+    ):
+        if torch.cuda.is_available() and "cuda:" in device:
+            # If a device other than the default is used, set the device explicity.
+            torch.cuda.set_device(device)
+
+        device = torch.device(device)
+        torch.manual_seed(231)
+
+        # Create dummy outputs of rasterization
+        N, S, K = num_meshes, image_size, faces_per_pixel
+        F = 32  # num faces in the mesh
+
+        pixel_coords_camera = torch.randn(
+            (N, S, S, K, 3), device=device, requires_grad=True
+        )
+        cameras = FoVPerspectiveCameras(device=device)
+        colors = torch.randn((N, S, S, K, 3), device=device)
+        background_mask = torch.randint(
+            low=-1, high=F + 1, size=(N, S, S, K), device=device
+        )
+        background_mask = torch.full((N, S, S, K), False, dtype=bool, device=device)
+        blend_params = BlendParams(sigma=0.5)
+
+        torch.cuda.synchronize()
+        splatter_blender = SplatterBlender((N, S, S, K), colors.device)
+
+        def fn():
+            # test forward and backward pass
+            images = splatter_blender(
+                colors,
+                pixel_coords_camera,
+                cameras,
+                background_mask,
+                blend_params,
+            )
+            images.sum().backward()
+            torch.cuda.synchronize()
+
+        return fn
+
+    def test_blend_params(self):
+        """Test color parameter of BlendParams().
+        Assert passed value overrides default value.
+        """
+        bp_default = BlendParams()
+        bp_new = BlendParams(background_color=(0.5, 0.5, 0.5))
+        self.assertEqual(bp_new.background_color, (0.5, 0.5, 0.5))
+        self.assertEqual(bp_default.background_color, (1.0, 1.0, 1.0))
diff --git a/pytorch3d/tests/test_build.py b/pytorch3d/tests/test_build.py
new file mode 100644
index 0000000000000000000000000000000000000000..e1d222ebf2542ca2f5f823a584ae1647468333e3
--- /dev/null
+++ b/pytorch3d/tests/test_build.py
@@ -0,0 +1,102 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import importlib
+import json
+import os
+import sys
+import unittest
+import unittest.mock
+from collections import Counter
+
+from .common_testing import get_pytorch3d_dir
+
+
+# This file groups together tests which look at the code without running it.
+in_conda_build = os.environ.get("CONDA_BUILD_STATE", "") == "TEST"
+in_re_worker = os.environ.get("INSIDE_RE_WORKER") is not None
+
+
+class TestBuild(unittest.TestCase):
+    def test_name_clash(self):
+        # For setup.py, all translation units need distinct names, so we
+        # cannot have foo.cu and foo.cpp, even in different directories.
+        source_dir = get_pytorch3d_dir() / "pytorch3d"
+
+        stems = []
+        for extension in [".cu", ".cpp"]:
+            files = source_dir.glob(f"**/*{extension}")
+            stems.extend(f.stem for f in files)
+
+        counter = Counter(stems)
+        for k, v in counter.items():
+            self.assertEqual(v, 1, f"Too many files with stem {k}.")
+
+    @unittest.skipIf(in_re_worker, "In RE worker")
+    def test_valid_ipynbs(self):
+        # Check that the ipython notebooks are valid json
+        root_dir = get_pytorch3d_dir()
+        tutorials_dir = root_dir / "docs" / "tutorials"
+        tutorials = sorted(tutorials_dir.glob("*.ipynb"))
+
+        for tutorial in tutorials:
+            with open(tutorial) as f:
+                json.load(f)
+
+    @unittest.skipIf(in_conda_build or in_re_worker, "In conda build, or RE worker")
+    def test_enumerated_ipynbs(self):
+        # Check that the tutorials are all referenced in tutorials.json.
+        root_dir = get_pytorch3d_dir()
+        tutorials_dir = root_dir / "docs" / "tutorials"
+        tutorials_on_disk = sorted(i.stem for i in tutorials_dir.glob("*.ipynb"))
+
+        json_file = root_dir / "website" / "tutorials.json"
+        with open(json_file) as f:
+            cfg_dict = json.load(f)
+        listed_in_json = []
+        for section in cfg_dict.values():
+            listed_in_json.extend(item["id"] for item in section)
+
+        self.assertListEqual(sorted(listed_in_json), tutorials_on_disk)
+
+    @unittest.skipIf(in_conda_build or in_re_worker, "In conda build, or RE worker")
+    def test_enumerated_notes(self):
+        # Check that the notes are all referenced in sidebars.json.
+        root_dir = get_pytorch3d_dir()
+        notes_dir = root_dir / "docs" / "notes"
+        notes_on_disk = sorted(i.stem for i in notes_dir.glob("*.md"))
+
+        json_file = root_dir / "website" / "sidebars.json"
+        with open(json_file) as f:
+            cfg_dict = json.load(f)
+        listed_in_json = []
+        for section in cfg_dict["docs"].values():
+            listed_in_json.extend(section)
+
+        self.assertListEqual(sorted(listed_in_json), notes_on_disk)
+
+    def test_no_import_cycles(self):
+        # Check each module of pytorch3d imports cleanly,
+        # which may fail if there are import cycles.
+
+        with unittest.mock.patch.dict(sys.modules):
+            for module in list(sys.modules):
+                # If any of pytorch3d is already imported,
+                # the test would be pointless.
+                if module.startswith("pytorch3d"):
+                    sys.modules.pop(module, None)
+
+            root_dir = get_pytorch3d_dir() / "pytorch3d"
+            for module_file in root_dir.glob("**/*.py"):
+                if module_file.stem in ("__init__", "plotly_vis"):
+                    continue
+                if "implicitron" in str(module_file):
+                    continue
+                relative_module = str(module_file.relative_to(root_dir))[:-3]
+                module = "pytorch3d." + relative_module.replace("/", ".")
+                with self.subTest(name=module):
+                    with unittest.mock.patch.dict(sys.modules):
+                        importlib.import_module(module)
diff --git a/pytorch3d/tests/test_camera_conversions.py b/pytorch3d/tests/test_camera_conversions.py
new file mode 100644
index 0000000000000000000000000000000000000000..b9b208ac06594f4695d502aa695590981e820892
--- /dev/null
+++ b/pytorch3d/tests/test_camera_conversions.py
@@ -0,0 +1,235 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import json
+import unittest
+
+import numpy as np
+import torch
+from pytorch3d.ops import eyes
+from pytorch3d.renderer.points.pulsar import Renderer as PulsarRenderer
+from pytorch3d.transforms import so3_exp_map, so3_log_map
+from pytorch3d.utils import (
+    cameras_from_opencv_projection,
+    opencv_from_cameras_projection,
+    pulsar_from_opencv_projection,
+)
+
+from .common_testing import get_tests_dir, TestCaseMixin
+
+
+DATA_DIR = get_tests_dir() / "data"
+
+
+def cv2_project_points(pts, rvec, tvec, camera_matrix):
+    """
+    Reproduces the `cv2.projectPoints` function from OpenCV using PyTorch.
+    """
+    R = so3_exp_map(rvec)
+    pts_proj_3d = (
+        camera_matrix.bmm(R.bmm(pts.permute(0, 2, 1)) + tvec[:, :, None])
+    ).permute(0, 2, 1)
+    depth = pts_proj_3d[..., 2:]
+    pts_proj_2d = pts_proj_3d[..., :2] / depth
+    return pts_proj_2d
+
+
+class TestCameraConversions(TestCaseMixin, unittest.TestCase):
+    def setUp(self) -> None:
+        super().setUp()
+        torch.manual_seed(42)
+        np.random.seed(42)
+
+    def test_cv2_project_points(self):
+        """
+        Tests that the local implementation of cv2_project_points gives the same
+        restults OpenCV's `cv2.projectPoints`. The check is done against a set
+        of precomputed results `cv_project_points_precomputed`.
+        """
+        with open(DATA_DIR / "cv_project_points_precomputed.json", "r") as f:
+            cv_project_points_precomputed = json.load(f)
+
+        for test_case in cv_project_points_precomputed:
+            _pts_proj = cv2_project_points(
+                **{
+                    k: torch.tensor(test_case[k])[None]
+                    for k in ("pts", "rvec", "tvec", "camera_matrix")
+                }
+            )
+            pts_proj = torch.tensor(test_case["pts_proj"])[None]
+            self.assertClose(_pts_proj, pts_proj, atol=1e-4)
+
+    def test_opencv_conversion(self):
+        """
+        Tests that the cameras converted from opencv to pytorch3d convention
+        return correct projections of random 3D points. The check is done
+        against a set of results precomuted using `cv2.projectPoints` function.
+        """
+        device = torch.device("cuda:0")
+        image_size = [[480, 640]] * 4
+        R = [
+            [
+                [1.0, 0.0, 0.0],
+                [0.0, 1.0, 0.0],
+                [0.0, 0.0, 1.0],
+            ],
+            [
+                [1.0, 0.0, 0.0],
+                [0.0, 0.0, -1.0],
+                [0.0, 1.0, 0.0],
+            ],
+            [
+                [0.0, 0.0, 1.0],
+                [1.0, 0.0, 0.0],
+                [0.0, 1.0, 0.0],
+            ],
+            [
+                [0.0, 0.0, 1.0],
+                [1.0, 0.0, 0.0],
+                [0.0, 1.0, 0.0],
+            ],
+        ]
+
+        tvec = [
+            [0.0, 0.0, 3.0],
+            [0.3, -0.3, 3.0],
+            [-0.15, 0.1, 4.0],
+            [0.0, 0.0, 4.0],
+        ]
+        focal_length = [
+            [100.0, 100.0],
+            [115.0, 115.0],
+            [105.0, 105.0],
+            [120.0, 120.0],
+        ]
+        # These values are in y, x format, but they should be in x, y format.
+        # The tests work like this because they only test for consistency,
+        # but this format is misleading.
+        principal_point = [
+            [240, 320],
+            [240.5, 320.3],
+            [241, 318],
+            [242, 322],
+        ]
+
+        principal_point, focal_length, R, tvec, image_size = [
+            torch.tensor(x, device=device)
+            for x in (principal_point, focal_length, R, tvec, image_size)
+        ]
+        camera_matrix = eyes(dim=3, N=4, device=device)
+        camera_matrix[:, 0, 0], camera_matrix[:, 1, 1] = (
+            focal_length[:, 0],
+            focal_length[:, 1],
+        )
+        camera_matrix[:, :2, 2] = principal_point
+
+        pts = torch.nn.functional.normalize(
+            torch.randn(4, 1000, 3, device=device), dim=-1
+        )
+
+        # project the 3D points with the opencv projection function
+        rvec = so3_log_map(R)
+        pts_proj_opencv = cv2_project_points(pts, rvec, tvec, camera_matrix)
+
+        # make the pytorch3d cameras
+        cameras_opencv_to_pytorch3d = cameras_from_opencv_projection(
+            R, tvec, camera_matrix, image_size
+        )
+        self.assertEqual(cameras_opencv_to_pytorch3d.device, device)
+
+        # project the 3D points with converted cameras to screen space.
+        pts_proj_pytorch3d_screen = cameras_opencv_to_pytorch3d.transform_points_screen(
+            pts
+        )[..., :2]
+
+        # compare to the cached projected points
+        self.assertClose(pts_proj_opencv, pts_proj_pytorch3d_screen, atol=1e-5)
+
+        # Check the inverse.
+        R_i, tvec_i, camera_matrix_i = opencv_from_cameras_projection(
+            cameras_opencv_to_pytorch3d, image_size
+        )
+        self.assertClose(R, R_i)
+        self.assertClose(tvec, tvec_i)
+        self.assertClose(camera_matrix, camera_matrix_i)
+
+    def test_pulsar_conversion(self):
+        """
+        Tests that the cameras converted from opencv to pulsar convention
+        return correct projections of random 3D points. The check is done
+        against a set of results precomputed using `cv2.projectPoints` function.
+        """
+        image_size = [[480, 640]]
+        R = [
+            [
+                [1.0, 0.0, 0.0],
+                [0.0, 1.0, 0.0],
+                [0.0, 0.0, 1.0],
+            ],
+            [
+                [0.1968, -0.6663, -0.7192],
+                [0.7138, -0.4055, 0.5710],
+                [-0.6721, -0.6258, 0.3959],
+            ],
+        ]
+        tvec = [
+            [10.0, 10.0, 3.0],
+            [-0.0, -0.0, 20.0],
+        ]
+        focal_length = [
+            [100.0, 100.0],
+            [10.0, 10.0],
+        ]
+        principal_point = [
+            [320, 240],
+            [320, 240],
+        ]
+
+        principal_point, focal_length, R, tvec, image_size = [
+            torch.FloatTensor(x)
+            for x in (principal_point, focal_length, R, tvec, image_size)
+        ]
+        camera_matrix = eyes(dim=3, N=2)
+        camera_matrix[:, 0, 0] = focal_length[:, 0]
+        camera_matrix[:, 1, 1] = focal_length[:, 1]
+        camera_matrix[:, :2, 2] = principal_point
+        rvec = so3_log_map(R)
+        pts = torch.tensor(
+            [[[0.0, 0.0, 120.0]], [[0.0, 0.0, 120.0]]], dtype=torch.float32
+        )
+        radii = torch.tensor([[1e-5], [1e-5]], dtype=torch.float32)
+        col = torch.zeros((2, 1, 1), dtype=torch.float32)
+
+        # project the 3D points with the opencv projection function
+        pts_proj_opencv = cv2_project_points(pts, rvec, tvec, camera_matrix)
+        pulsar_cam = pulsar_from_opencv_projection(
+            R, tvec, camera_matrix, image_size, znear=100.0
+        )
+        pulsar_rend = PulsarRenderer(
+            640, 480, 1, right_handed_system=False, n_channels=1
+        )
+        rendered = torch.flip(
+            pulsar_rend(
+                pts,
+                col,
+                radii,
+                pulsar_cam,
+                1e-5,
+                max_depth=150.0,
+                min_depth=100.0,
+            ),
+            dims=(1,),
+        )
+        for batch_id in range(2):
+            point_pos = torch.where(rendered[batch_id] == rendered[batch_id].min())
+            point_pos = point_pos[1][0], point_pos[0][0]
+            self.assertLess(
+                torch.abs(point_pos[0] - pts_proj_opencv[batch_id, 0, 0]), 2
+            )
+            self.assertLess(
+                torch.abs(point_pos[1] - pts_proj_opencv[batch_id, 0, 1]), 2
+            )
diff --git a/pytorch3d/tests/test_camera_pixels.py b/pytorch3d/tests/test_camera_pixels.py
new file mode 100644
index 0000000000000000000000000000000000000000..3f7e428d018fdce7cf8f72d2d35efebf31077688
--- /dev/null
+++ b/pytorch3d/tests/test_camera_pixels.py
@@ -0,0 +1,264 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+
+import torch
+from pytorch3d.renderer import (
+    MeshRasterizer,
+    NDCMultinomialRaysampler,
+    PerspectiveCameras,
+    PointsRasterizationSettings,
+    PointsRasterizer,
+    PulsarPointsRenderer,
+    RasterizationSettings,
+)
+from pytorch3d.structures import Meshes, Pointclouds
+
+from .common_testing import TestCaseMixin
+
+
+"""
+PyTorch3D renderers operate in an align_corners=False manner.
+This file demonstrates the pixel-perfect calculation by very simple
+examples.
+"""
+
+
+class _CommonData:
+    """
+    Contains data for all these tests.
+
+    - Firstly, a non-square at the origin specified in ndc space and
+    screen space. Principal point is in the center of the image.
+    Focal length is 1.0 in world space.
+    This camera has the identity as its world to view transformation, so
+    it is facing down the positive z axis with y being up and x being left.
+    A point on the z=1.0 focal plane has its x,y world coordinate equal to
+    its NDC.
+
+    - Secondly, batched together with that, is a camera with the same
+    focal length facing in the same direction but located so that it faces
+    the corner of the corner pixel of the first image, with its principal
+    point located at its corner, so that it maps the z=1 plane to NDC just
+    like the first.
+
+    - a single point self.point in world space which is located on a plane 1.0
+    in front from the camera which is located exactly in the center
+    of a known pixel (self.x, self.y), specifically with negative x and slightly
+    positive y, so it is in the top right quadrant of the image.
+
+    - A second batch of cameras defined in screen space which exactly match the
+    first ones.
+
+    So that this data can be copied for making demos, it is easiest to leave
+    it as a freestanding class.
+    """
+
+    def __init__(self):
+        self.H, self.W = 249, 125
+        self.image_size = (self.H, self.W)
+        self.camera_ndc = PerspectiveCameras(
+            focal_length=1.0,
+            image_size=(self.image_size,),
+            in_ndc=True,
+            T=torch.tensor([[0.0, 0.0, 0.0], [-1.0, self.H / self.W, 0.0]]),
+            principal_point=((-0.0, -0.0), (1.0, -self.H / self.W)),
+        )
+        # Note how principal point is  specifiied
+        self.camera_screen = PerspectiveCameras(
+            focal_length=self.W / 2.0,
+            principal_point=((self.W / 2.0, self.H / 2.0), (0.0, self.H)),
+            image_size=(self.image_size,),
+            T=torch.tensor([[0.0, 0.0, 0.0], [-1.0, self.H / self.W, 0.0]]),
+            in_ndc=False,
+        )
+
+        # 81 is more than half of 125, 113 is a bit less than half of 249
+        self.x, self.y = 81, 113
+        self.point = [-0.304, 0.176, 1]
+        # The point is in the center of pixel (81, 113)
+        # where pixel (0,0) is the top left.
+        # 81 is 38/2 pixels over the midpoint (125-1)/2=62
+        # and 38/125=0.304
+        # 113 is 22/2 pixels under the midpoint (249-1)/2=124
+        # and 22/125=0.176
+
+
+class TestPixels(TestCaseMixin, unittest.TestCase):
+    def test_mesh(self):
+        data = _CommonData()
+        # Three points on the plane at unit 1 from the camera in
+        # world space, whose mean is the known point.
+        verts = torch.tensor(
+            [[-0.288, 0.192, 1], [-0.32, 0.192, 1], [-0.304, 0.144, 1]]
+        )
+        self.assertClose(verts.mean(0), torch.tensor(data.point))
+        faces = torch.LongTensor([[0, 1, 2]])
+        # A mesh of one triangular face whose centroid is the known point
+        # duplicated so it can be rendered from two cameras.
+        meshes = Meshes(verts=[verts], faces=[faces]).extend(2)
+        faces_per_pixel = 2
+        for camera in (data.camera_ndc, data.camera_screen):
+            rasterizer = MeshRasterizer(
+                cameras=camera,
+                raster_settings=RasterizationSettings(
+                    image_size=data.image_size, faces_per_pixel=faces_per_pixel
+                ),
+            )
+            barycentric_coords_found = rasterizer(meshes).bary_coords
+            self.assertTupleEqual(
+                barycentric_coords_found.shape,
+                (2,) + data.image_size + (faces_per_pixel, 3),
+            )
+            # We see that the barycentric coordinates at the expected
+            # pixel are (1/3, 1/3, 1/3), indicating that this pixel
+            # hits the centroid of the triangle.
+            self.assertClose(
+                barycentric_coords_found[:, data.y, data.x, 0],
+                torch.full((2, 3), 1 / 3.0),
+                atol=1e-5,
+            )
+
+    def test_pointcloud(self):
+        data = _CommonData()
+        clouds = Pointclouds(points=torch.tensor([[data.point]])).extend(2)
+        colorful_cloud = Pointclouds(
+            points=torch.tensor([[data.point]]), features=torch.ones(1, 1, 3)
+        ).extend(2)
+        points_per_pixel = 2
+        # for camera in [data.camera_screen]:
+        for camera in (data.camera_ndc, data.camera_screen):
+            rasterizer = PointsRasterizer(
+                cameras=camera,
+                raster_settings=PointsRasterizationSettings(
+                    image_size=data.image_size,
+                    radius=0.0001,
+                    points_per_pixel=points_per_pixel,
+                ),
+            )
+            # when rasterizing we expect only one pixel to be occupied
+            rasterizer_output = rasterizer(clouds).idx
+            self.assertTupleEqual(
+                rasterizer_output.shape, (2,) + data.image_size + (points_per_pixel,)
+            )
+            found = torch.nonzero(rasterizer_output != -1)
+            self.assertTupleEqual(found.shape, (2, 4))
+            self.assertListEqual(found[0].tolist(), [0, data.y, data.x, 0])
+            self.assertListEqual(found[1].tolist(), [1, data.y, data.x, 0])
+
+            if camera.in_ndc():
+                # Pulsar not currently working in screen space.
+                pulsar_renderer = PulsarPointsRenderer(rasterizer=rasterizer)
+                pulsar_output = pulsar_renderer(
+                    colorful_cloud, gamma=(0.1, 0.1), znear=(0.1, 0.1), zfar=(70, 70)
+                )
+                self.assertTupleEqual(
+                    pulsar_output.shape, (2,) + data.image_size + (3,)
+                )
+                # Look for points rendered in the red channel only, expecting our one.
+                # Check the first batch element only.
+                # TODO: Something is odd with the second.
+                found = torch.nonzero(pulsar_output[0, :, :, 0])
+                self.assertTupleEqual(found.shape, (1, 2))
+                self.assertListEqual(found[0].tolist(), [data.y, data.x])
+                # Should be:
+                # found = torch.nonzero(pulsar_output[:, :, :, 0])
+                # self.assertTupleEqual(found.shape, (2, 3))
+                # self.assertListEqual(found[0].tolist(), [0, data.y, data.x])
+                # self.assertListEqual(found[1].tolist(), [1, data.y, data.x])
+
+    def test_raysampler(self):
+        data = _CommonData()
+        gridsampler = NDCMultinomialRaysampler(
+            image_width=data.W,
+            image_height=data.H,
+            n_pts_per_ray=2,
+            min_depth=1.0,
+            max_depth=2.0,
+        )
+        for camera in (data.camera_ndc, data.camera_screen):
+            bundle = gridsampler(camera)
+            self.assertTupleEqual(bundle.xys.shape, (2,) + data.image_size + (2,))
+            self.assertTupleEqual(
+                bundle.directions.shape, (2,) + data.image_size + (3,)
+            )
+            self.assertClose(
+                bundle.xys[:, data.y, data.x],
+                torch.tensor(data.point[:2]).expand(2, -1),
+            )
+            # We check only the first batch element.
+            # Second element varies because of camera location.
+            self.assertClose(
+                bundle.directions[0, data.y, data.x],
+                torch.tensor(data.point),
+            )
+
+    def test_camera(self):
+        data = _CommonData()
+        # Our point, plus the image center, and a corner of the image.
+        # Located at the focal-length distance away
+        points = torch.tensor([data.point, [0, 0, 1], [1, data.H / data.W, 1]])
+        for cameras in (data.camera_ndc, data.camera_screen):
+            ndc_points = cameras.transform_points_ndc(points)
+            screen_points = cameras.transform_points_screen(points)
+            screen_points_without_xyflip = cameras.transform_points_screen(
+                points, with_xyflip=False
+            )
+            camera_points = cameras.transform_points(points)
+            for batch_idx in range(2):
+                # NDC space agrees with the original
+                self.assertClose(ndc_points[batch_idx], points, atol=1e-5)
+                # First point in screen space is the center of our expected pixel
+                self.assertClose(
+                    screen_points[batch_idx][0],
+                    torch.tensor([data.x + 0.5, data.y + 0.5, 1.0]),
+                    atol=1e-5,
+                )
+                # Screen coords without xyflip should have x, y that negate the non-
+                # flipped values, and unchanged z.
+                self.assertClose(
+                    screen_points_without_xyflip[batch_idx][0],
+                    torch.tensor([-(data.x + 0.5), -(data.y + 0.5), 1.0]),
+                    atol=1e-5,
+                )
+                # Second point in screen space is the center of the screen
+                self.assertClose(
+                    screen_points[batch_idx][1],
+                    torch.tensor([data.W / 2.0, data.H / 2.0, 1.0]),
+                    atol=1e-5,
+                )
+                # Third point in screen space is the corner of the screen
+                # (corner of corner pixels)
+                self.assertClose(
+                    screen_points[batch_idx][2],
+                    torch.tensor([0.0, 0.0, 1.0]),
+                    atol=1e-5,
+                )
+
+                if cameras.in_ndc():
+                    self.assertClose(camera_points[batch_idx], ndc_points[batch_idx])
+                else:
+                    # transform_points does something strange for screen cameras
+                    if batch_idx == 0:
+                        wanted = torch.stack(
+                            [
+                                data.W - screen_points[batch_idx, :, 0],
+                                data.H - screen_points[batch_idx, :, 1],
+                                torch.ones(3),
+                            ],
+                            dim=1,
+                        )
+                    else:
+                        wanted = torch.stack(
+                            [
+                                -screen_points[batch_idx, :, 0],
+                                2 * data.H - screen_points[batch_idx, :, 1],
+                                torch.ones(3),
+                            ],
+                            dim=1,
+                        )
+                    self.assertClose(camera_points[batch_idx], wanted)
diff --git a/pytorch3d/tests/test_camera_utils.py b/pytorch3d/tests/test_camera_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..d8cdcfab3ba8860d7377c937f17575c249fe5547
--- /dev/null
+++ b/pytorch3d/tests/test_camera_utils.py
@@ -0,0 +1,168 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+from math import radians
+
+import torch
+from pytorch3d.renderer.camera_utils import camera_to_eye_at_up, rotate_on_spot
+from pytorch3d.renderer.cameras import (
+    get_world_to_view_transform,
+    look_at_view_transform,
+    PerspectiveCameras,
+)
+from pytorch3d.transforms import axis_angle_to_matrix
+from torch.nn.functional import normalize
+
+from .common_testing import TestCaseMixin
+
+
+def _batched_dotprod(x: torch.Tensor, y: torch.Tensor):
+    """
+    Takes two tensors of shape (N,3) and returns their batched
+    dot product along the last dimension as a tensor of shape
+    (N,).
+    """
+    return torch.einsum("ij,ij->i", x, y)
+
+
+class TestCameraUtils(TestCaseMixin, unittest.TestCase):
+    def setUp(self) -> None:
+        torch.manual_seed(42)
+
+    def test_invert_eye_at_up(self):
+        # Generate random cameras and check we can reconstruct their eye, at,
+        # and up vectors.
+        N = 13
+        eye = torch.rand(N, 3)
+        at = torch.rand(N, 3)
+        up = torch.rand(N, 3)
+
+        R, T = look_at_view_transform(eye=eye, at=at, up=up)
+        cameras = PerspectiveCameras(R=R, T=T)
+
+        eye2, at2, up2 = camera_to_eye_at_up(cameras.get_world_to_view_transform())
+
+        # The retrieved eye matches
+        self.assertClose(eye, eye2, atol=1e-5)
+        self.assertClose(cameras.get_camera_center(), eye)
+
+        # at-eye as retrieved must be a vector in the same direction as
+        # the original.
+        self.assertClose(normalize(at - eye), normalize(at2 - eye2))
+
+        # The up vector as retrieved should be rotated the same amount
+        # around at-eye as the original. The component in the at-eye
+        # direction is unimportant, as is the length.
+        # So check that (up x (at-eye)) as retrieved is in the same
+        # direction as its original value.
+        up_check = torch.cross(up, at - eye, dim=-1)
+        up_check2 = torch.cross(up2, at - eye, dim=-1)
+        self.assertClose(normalize(up_check), normalize(up_check2))
+
+        # Master check that we get the same camera if we reinitialise.
+        R2, T2 = look_at_view_transform(eye=eye2, at=at2, up=up2)
+        cameras2 = PerspectiveCameras(R=R2, T=T2)
+        cam_trans = cameras.get_world_to_view_transform()
+        cam_trans2 = cameras2.get_world_to_view_transform()
+
+        self.assertClose(cam_trans.get_matrix(), cam_trans2.get_matrix(), atol=1e-5)
+
+    def test_rotate_on_spot_yaw(self):
+        N = 14
+        eye = torch.rand(N, 3)
+        at = torch.rand(N, 3)
+        up = torch.rand(N, 3)
+
+        R, T = look_at_view_transform(eye=eye, at=at, up=up)
+
+        # Moving around the y axis looks left.
+        angles = torch.FloatTensor([0, -radians(10), 0])
+        rotation = axis_angle_to_matrix(angles)
+        R_rot, T_rot = rotate_on_spot(R, T, rotation)
+
+        eye_rot, at_rot, up_rot = camera_to_eye_at_up(
+            get_world_to_view_transform(R=R_rot, T=T_rot)
+        )
+        self.assertClose(eye, eye_rot, atol=1e-5)
+
+        # Make vectors pointing exactly left and up
+        left = torch.cross(up, at - eye, dim=-1)
+        left_rot = torch.cross(up_rot, at_rot - eye_rot, dim=-1)
+        fully_up = torch.cross(at - eye, left, dim=-1)
+        fully_up_rot = torch.cross(at_rot - eye_rot, left_rot, dim=-1)
+
+        # The up direction is unchanged
+        self.assertClose(normalize(fully_up), normalize(fully_up_rot), atol=1e-5)
+
+        # The camera has moved left
+        agree = _batched_dotprod(torch.cross(left, left_rot, dim=1), fully_up)
+        self.assertGreater(agree.min(), 0)
+
+        # Batch dimension for rotation
+        R_rot2, T_rot2 = rotate_on_spot(R, T, rotation.expand(N, 3, 3))
+        self.assertClose(R_rot, R_rot2)
+        self.assertClose(T_rot, T_rot2)
+
+        # No batch dimension for either
+        R_rot3, T_rot3 = rotate_on_spot(R[0], T[0], rotation)
+        self.assertClose(R_rot[:1], R_rot3)
+        self.assertClose(T_rot[:1], T_rot3)
+
+        # No batch dimension for R, T
+        R_rot4, T_rot4 = rotate_on_spot(R[0], T[0], rotation.expand(N, 3, 3))
+        self.assertClose(R_rot[:1].expand(N, 3, 3), R_rot4)
+        self.assertClose(T_rot[:1].expand(N, 3), T_rot4)
+
+    def test_rotate_on_spot_pitch(self):
+        N = 14
+        eye = torch.rand(N, 3)
+        at = torch.rand(N, 3)
+        up = torch.rand(N, 3)
+
+        R, T = look_at_view_transform(eye=eye, at=at, up=up)
+
+        # Moving around the x axis looks down.
+        angles = torch.FloatTensor([-radians(10), 0, 0])
+        rotation = axis_angle_to_matrix(angles)
+        R_rot, T_rot = rotate_on_spot(R, T, rotation)
+        eye_rot, at_rot, up_rot = camera_to_eye_at_up(
+            get_world_to_view_transform(R=R_rot, T=T_rot)
+        )
+        self.assertClose(eye, eye_rot, atol=1e-5)
+
+        # A vector pointing left is unchanged
+        left = torch.cross(up, at - eye, dim=-1)
+        left_rot = torch.cross(up_rot, at_rot - eye_rot, dim=-1)
+        self.assertClose(normalize(left), normalize(left_rot), atol=1e-5)
+
+        # The camera has moved down
+        fully_up = torch.cross(at - eye, left, dim=-1)
+        fully_up_rot = torch.cross(at_rot - eye_rot, left_rot, dim=-1)
+        agree = _batched_dotprod(torch.cross(fully_up, fully_up_rot, dim=1), left)
+        self.assertGreater(agree.min(), 0)
+
+    def test_rotate_on_spot_roll(self):
+        N = 14
+        eye = torch.rand(N, 3)
+        at = torch.rand(N, 3)
+        up = torch.rand(N, 3)
+
+        R, T = look_at_view_transform(eye=eye, at=at, up=up)
+
+        # Moving around the z axis rotates the image.
+        angles = torch.FloatTensor([0, 0, -radians(10)])
+        rotation = axis_angle_to_matrix(angles)
+        R_rot, T_rot = rotate_on_spot(R, T, rotation)
+        eye_rot, at_rot, up_rot = camera_to_eye_at_up(
+            get_world_to_view_transform(R=R_rot, T=T_rot)
+        )
+        self.assertClose(eye, eye_rot, atol=1e-5)
+        self.assertClose(normalize(at - eye), normalize(at_rot - eye), atol=1e-5)
+
+        # The camera has moved clockwise
+        agree = _batched_dotprod(torch.cross(up, up_rot, dim=1), at - eye)
+        self.assertGreater(agree.min(), 0)
diff --git a/pytorch3d/tests/test_cameras.py b/pytorch3d/tests/test_cameras.py
new file mode 100644
index 0000000000000000000000000000000000000000..c953e4572afe50a25b32efd912d7b35a18d1fcea
--- /dev/null
+++ b/pytorch3d/tests/test_cameras.py
@@ -0,0 +1,1679 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# @licenselint-loose-mode
+
+# Some of the code below is adapted from Soft Rasterizer (SoftRas)
+#
+# Copyright (c) 2017 Hiroharu Kato
+# Copyright (c) 2018 Nikos Kolotouros
+# Copyright (c) 2019 Shichen Liu
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+import math
+import pickle
+import unittest
+from itertools import product
+
+import numpy as np
+import torch
+from pytorch3d.common.datatypes import Device
+from pytorch3d.renderer.camera_utils import join_cameras_as_batch
+from pytorch3d.renderer.cameras import (
+    camera_position_from_spherical_angles,
+    CamerasBase,
+    FoVOrthographicCameras,
+    FoVPerspectiveCameras,
+    get_world_to_view_transform,
+    look_at_rotation,
+    look_at_view_transform,
+    OpenGLOrthographicCameras,
+    OpenGLPerspectiveCameras,
+    OrthographicCameras,
+    PerspectiveCameras,
+    SfMOrthographicCameras,
+    SfMPerspectiveCameras,
+)
+from pytorch3d.renderer.fisheyecameras import FishEyeCameras
+from pytorch3d.transforms import Transform3d
+from pytorch3d.transforms.rotation_conversions import random_rotations
+from pytorch3d.transforms.so3 import so3_exp_map
+
+from .common_camera_utils import init_random_cameras
+
+from .common_testing import TestCaseMixin
+
+
+# Naive function adapted from SoftRasterizer for test purposes.
+def perspective_project_naive(points, fov=60.0):
+    """
+    Compute perspective projection from a given viewing angle.
+    Args:
+        points: (N, V, 3) representing the padded points.
+        viewing angle: degrees
+    Returns:
+        (N, V, 3) tensor of projected points preserving the view space z
+        coordinate (no z renormalization)
+    """
+    device = points.device
+    halfFov = torch.tensor((fov / 2) / 180 * np.pi, dtype=torch.float32, device=device)
+    scale = torch.tan(halfFov[None])
+    scale = scale[:, None]
+    z = points[:, :, 2]
+    x = points[:, :, 0] / z / scale
+    y = points[:, :, 1] / z / scale
+    points = torch.stack((x, y, z), dim=2)
+    return points
+
+
+def sfm_perspective_project_naive(points, fx=1.0, fy=1.0, p0x=0.0, p0y=0.0):
+    """
+    Compute perspective projection using focal length and principal point.
+
+    Args:
+        points: (N, V, 3) representing the padded points.
+        fx: world units
+        fy: world units
+        p0x: pixels
+        p0y: pixels
+    Returns:
+        (N, V, 3) tensor of projected points.
+    """
+    z = points[:, :, 2]
+    x = (points[:, :, 0] * fx) / z + p0x
+    y = (points[:, :, 1] * fy) / z + p0y
+    points = torch.stack((x, y, 1.0 / z), dim=2)
+    return points
+
+
+# Naive function adapted from SoftRasterizer for test purposes.
+def orthographic_project_naive(points, scale_xyz=(1.0, 1.0, 1.0)):
+    """
+    Compute orthographic projection from a given angle
+    Args:
+        points: (N, V, 3) representing the padded points.
+        scaled: (N, 3) scaling factors for each of xyz directions
+    Returns:
+        (N, V, 3) tensor of projected points preserving the view space z
+        coordinate (no z renormalization).
+    """
+    if not torch.is_tensor(scale_xyz):
+        scale_xyz = torch.tensor(scale_xyz)
+    scale_xyz = scale_xyz.view(-1, 3)
+    z = points[:, :, 2]
+    x = points[:, :, 0] * scale_xyz[:, 0]
+    y = points[:, :, 1] * scale_xyz[:, 1]
+    points = torch.stack((x, y, z), dim=2)
+    return points
+
+
+def ndc_to_screen_points_naive(points, imsize):
+    """
+    Transforms points from PyTorch3D's NDC space to screen space
+    Args:
+        points: (N, V, 3) representing padded points
+        imsize: (N, 2) image size = (height, width)
+    Returns:
+        (N, V, 3) tensor of transformed points
+    """
+    height, width = imsize.unbind(1)
+    width = width.view(-1, 1)
+    half_width = width / 2.0
+    height = height.view(-1, 1)
+    half_height = height / 2.0
+
+    scale = (
+        half_width * (height > width).float() + half_height * (height <= width).float()
+    )
+
+    x, y, z = points.unbind(2)
+    x = -scale * x + half_width
+    y = -scale * y + half_height
+    return torch.stack((x, y, z), dim=2)
+
+
+class TestCameraHelpers(TestCaseMixin, unittest.TestCase):
+    def setUp(self) -> None:
+        super().setUp()
+        torch.manual_seed(42)
+
+    def test_look_at_view_transform_from_eye_point_tuple(self):
+        dist = math.sqrt(2)
+        elev = math.pi / 4
+        azim = 0.0
+        eye = ((0.0, 1.0, 1.0),)
+        # using passed values for dist, elev, azim
+        R, t = look_at_view_transform(dist, elev, azim, degrees=False)
+        # using other values for dist, elev, azim - eye overrides
+        R_eye, t_eye = look_at_view_transform(dist=3, elev=2, azim=1, eye=eye)
+        # using only eye value
+
+        R_eye_only, t_eye_only = look_at_view_transform(eye=eye)
+        self.assertTrue(torch.allclose(R, R_eye, atol=2e-7))
+        self.assertTrue(torch.allclose(t, t_eye, atol=2e-7))
+        self.assertTrue(torch.allclose(R, R_eye_only, atol=2e-7))
+        self.assertTrue(torch.allclose(t, t_eye_only, atol=2e-7))
+
+    def test_look_at_view_transform_default_values(self):
+        dist = 1.0
+        elev = 0.0
+        azim = 0.0
+        # Using passed values for dist, elev, azim
+        R, t = look_at_view_transform(dist, elev, azim)
+        # Using default dist=1.0, elev=0.0, azim=0.0
+        R_default, t_default = look_at_view_transform()
+        # test default = passed = expected
+        self.assertTrue(torch.allclose(R, R_default, atol=2e-7))
+        self.assertTrue(torch.allclose(t, t_default, atol=2e-7))
+
+    def test_look_at_view_transform_non_default_at_position(self):
+        dist = 1.0
+        elev = 0.0
+        azim = 0.0
+        at = ((1, 1, 1),)
+        # Using passed values for dist, elev, azim, at
+        R, t = look_at_view_transform(dist, elev, azim, at=at)
+        # Using default dist=1.0, elev=0.0, azim=0.0
+        R_default, t_default = look_at_view_transform()
+        # test default = passed = expected
+        # R must be the same, t must be translated by (1,-1,1) with respect to t_default
+        t_trans = torch.tensor([1, -1, 1], dtype=torch.float32).view(1, 3)
+        self.assertTrue(torch.allclose(R, R_default, atol=2e-7))
+        self.assertTrue(torch.allclose(t, t_default + t_trans, atol=2e-7))
+
+    def test_camera_position_from_angles_python_scalar(self):
+        dist = 2.7
+        elev = 90.0
+        azim = 0.0
+        expected_position = torch.tensor([0.0, 2.7, 0.0], dtype=torch.float32).view(
+            1, 3
+        )
+        position = camera_position_from_spherical_angles(dist, elev, azim)
+        self.assertClose(position, expected_position, atol=2e-7)
+
+    def test_camera_position_from_angles_python_scalar_radians(self):
+        dist = 2.7
+        elev = math.pi / 2
+        azim = 0.0
+        expected_position = torch.tensor([0.0, 2.7, 0.0], dtype=torch.float32)
+        expected_position = expected_position.view(1, 3)
+        position = camera_position_from_spherical_angles(
+            dist, elev, azim, degrees=False
+        )
+        self.assertClose(position, expected_position, atol=2e-7)
+
+    def test_camera_position_from_angles_torch_scalars(self):
+        dist = torch.tensor(2.7)
+        elev = torch.tensor(0.0)
+        azim = torch.tensor(90.0)
+        expected_position = torch.tensor([2.7, 0.0, 0.0], dtype=torch.float32).view(
+            1, 3
+        )
+        position = camera_position_from_spherical_angles(dist, elev, azim)
+        self.assertClose(position, expected_position, atol=2e-7)
+
+    def test_camera_position_from_angles_mixed_scalars(self):
+        dist = 2.7
+        elev = torch.tensor(0.0)
+        azim = 90.0
+        expected_position = torch.tensor([2.7, 0.0, 0.0], dtype=torch.float32).view(
+            1, 3
+        )
+        position = camera_position_from_spherical_angles(dist, elev, azim)
+        self.assertClose(position, expected_position, atol=2e-7)
+
+    def test_camera_position_from_angles_torch_scalar_grads(self):
+        dist = torch.tensor(2.7, requires_grad=True)
+        elev = torch.tensor(45.0, requires_grad=True)
+        azim = torch.tensor(45.0)
+        position = camera_position_from_spherical_angles(dist, elev, azim)
+        position.sum().backward()
+        self.assertTrue(hasattr(elev, "grad"))
+        self.assertTrue(hasattr(dist, "grad"))
+        elev_grad = elev.grad.clone()
+        dist_grad = dist.grad.clone()
+        elev = math.pi / 180.0 * elev.detach()
+        azim = math.pi / 180.0 * azim
+        grad_dist = (
+            torch.cos(elev) * torch.sin(azim)
+            + torch.sin(elev)
+            + torch.cos(elev) * torch.cos(azim)
+        )
+        grad_elev = (
+            -(torch.sin(elev)) * torch.sin(azim)
+            + torch.cos(elev)
+            - torch.sin(elev) * torch.cos(azim)
+        )
+        grad_elev = dist * (math.pi / 180.0) * grad_elev
+        self.assertClose(elev_grad, grad_elev)
+        self.assertClose(dist_grad, grad_dist)
+
+    def test_camera_position_from_angles_vectors(self):
+        dist = torch.tensor([2.0, 2.0])
+        elev = torch.tensor([0.0, 90.0])
+        azim = torch.tensor([90.0, 0.0])
+        expected_position = torch.tensor(
+            [[2.0, 0.0, 0.0], [0.0, 2.0, 0.0]], dtype=torch.float32
+        )
+        position = camera_position_from_spherical_angles(dist, elev, azim)
+        self.assertClose(position, expected_position, atol=2e-7)
+
+    def test_camera_position_from_angles_vectors_broadcast(self):
+        dist = torch.tensor([2.0, 3.0, 5.0])
+        elev = torch.tensor([0.0])
+        azim = torch.tensor([90.0])
+        expected_position = torch.tensor(
+            [[2.0, 0.0, 0.0], [3.0, 0.0, 0.0], [5.0, 0.0, 0.0]], dtype=torch.float32
+        )
+        position = camera_position_from_spherical_angles(dist, elev, azim)
+        self.assertClose(position, expected_position, atol=3e-7)
+
+    def test_camera_position_from_angles_vectors_mixed_broadcast(self):
+        dist = torch.tensor([2.0, 3.0, 5.0])
+        elev = 0.0
+        azim = torch.tensor(90.0)
+        expected_position = torch.tensor(
+            [[2.0, 0.0, 0.0], [3.0, 0.0, 0.0], [5.0, 0.0, 0.0]], dtype=torch.float32
+        )
+        position = camera_position_from_spherical_angles(dist, elev, azim)
+        self.assertClose(position, expected_position, atol=3e-7)
+
+    def test_camera_position_from_angles_vectors_mixed_broadcast_grads(self):
+        dist = torch.tensor([2.0, 3.0, 5.0], requires_grad=True)
+        elev = torch.tensor(45.0, requires_grad=True)
+        azim = 45.0
+        position = camera_position_from_spherical_angles(dist, elev, azim)
+        position.sum().backward()
+        self.assertTrue(hasattr(elev, "grad"))
+        self.assertTrue(hasattr(dist, "grad"))
+        elev_grad = elev.grad.clone()
+        dist_grad = dist.grad.clone()
+        azim = torch.tensor(azim)
+        elev = math.pi / 180.0 * elev.detach()
+        azim = math.pi / 180.0 * azim
+        grad_dist = (
+            torch.cos(elev) * torch.sin(azim)
+            + torch.sin(elev)
+            + torch.cos(elev) * torch.cos(azim)
+        )
+        grad_elev = (
+            -(torch.sin(elev)) * torch.sin(azim)
+            + torch.cos(elev)
+            - torch.sin(elev) * torch.cos(azim)
+        )
+        grad_elev = (dist * (math.pi / 180.0) * grad_elev).sum()
+        self.assertClose(elev_grad, grad_elev)
+        self.assertClose(dist_grad, torch.full([3], grad_dist))
+
+    def test_camera_position_from_angles_vectors_bad_broadcast(self):
+        # Batch dim for broadcast must be N or 1
+        dist = torch.tensor([2.0, 3.0, 5.0])
+        elev = torch.tensor([0.0, 90.0])
+        azim = torch.tensor([90.0])
+        with self.assertRaises(ValueError):
+            camera_position_from_spherical_angles(dist, elev, azim)
+
+    def test_look_at_rotation_python_list(self):
+        camera_position = [[0.0, 0.0, -1.0]]  # camera pointing along negative z
+        rot_mat = look_at_rotation(camera_position)
+        self.assertClose(rot_mat, torch.eye(3)[None], atol=2e-7)
+
+    def test_look_at_rotation_input_fail(self):
+        camera_position = [-1.0]  # expected to have xyz positions
+        with self.assertRaises(ValueError):
+            look_at_rotation(camera_position)
+
+    def test_look_at_rotation_list_broadcast(self):
+        # fmt: off
+        camera_positions = [[0.0, 0.0, -1.0], [0.0, 0.0, 1.0]]
+        rot_mats_expected = torch.tensor(
+            [
+                [
+                    [1.0, 0.0, 0.0],
+                    [0.0, 1.0, 0.0],
+                    [0.0, 0.0, 1.0]
+                ],
+                [
+                    [-1.0, 0.0,  0.0],  # noqa: E241, E201
+                    [ 0.0, 1.0,  0.0],  # noqa: E241, E201
+                    [ 0.0, 0.0, -1.0]   # noqa: E241, E201
+                ],
+            ],
+            dtype=torch.float32
+        )
+        # fmt: on
+        rot_mats = look_at_rotation(camera_positions)
+        self.assertClose(rot_mats, rot_mats_expected, atol=2e-7)
+
+    def test_look_at_rotation_tensor_broadcast(self):
+        # fmt: off
+        camera_positions = torch.tensor([
+            [0.0, 0.0, -1.0],
+            [0.0, 0.0,  1.0]   # noqa: E241, E201
+        ], dtype=torch.float32)
+        rot_mats_expected = torch.tensor(
+            [
+                [
+                    [1.0, 0.0, 0.0],
+                    [0.0, 1.0, 0.0],
+                    [0.0, 0.0, 1.0]
+                ],
+                [
+                    [-1.0, 0.0,  0.0],  # noqa: E241, E201
+                    [ 0.0, 1.0,  0.0],  # noqa: E241, E201
+                    [ 0.0, 0.0, -1.0]   # noqa: E241, E201
+                ],
+            ],
+            dtype=torch.float32
+        )
+        # fmt: on
+        rot_mats = look_at_rotation(camera_positions)
+        self.assertClose(rot_mats, rot_mats_expected, atol=2e-7)
+
+    def test_look_at_rotation_tensor_grad(self):
+        camera_position = torch.tensor([[0.0, 0.0, -1.0]], requires_grad=True)
+        rot_mat = look_at_rotation(camera_position)
+        rot_mat.sum().backward()
+        self.assertTrue(hasattr(camera_position, "grad"))
+        self.assertClose(
+            camera_position.grad, torch.zeros_like(camera_position), atol=2e-7
+        )
+
+    def test_view_transform(self):
+        T = torch.tensor([0.0, 0.0, -1.0], requires_grad=True).view(1, -1)
+        R = look_at_rotation(T)
+        RT = get_world_to_view_transform(R=R, T=T)
+        self.assertTrue(isinstance(RT, Transform3d))
+
+    def test_look_at_view_transform_corner_case(self):
+        dist = 2.7
+        elev = 90
+        azim = 90
+        expected_position = torch.tensor([0.0, 2.7, 0.0], dtype=torch.float32).view(
+            1, 3
+        )
+        position = camera_position_from_spherical_angles(dist, elev, azim)
+        self.assertClose(position, expected_position, atol=2e-7)
+        R, _ = look_at_view_transform(eye=position)
+        x_axis = R[:, :, 0]
+        expected_x_axis = torch.tensor([0.0, 0.0, -1.0], dtype=torch.float32).view(1, 3)
+        self.assertClose(x_axis, expected_x_axis, atol=5e-3)
+
+
+class TestCamerasCommon(TestCaseMixin, unittest.TestCase):
+    def test_K(self, batch_size=10):
+        T = torch.randn(batch_size, 3)
+        R = random_rotations(batch_size)
+        K = torch.randn(batch_size, 4, 4)
+        for cam_type in (
+            FoVOrthographicCameras,
+            FoVPerspectiveCameras,
+            OrthographicCameras,
+            PerspectiveCameras,
+        ):
+            cam = cam_type(R=R, T=T, K=K)
+            cam.get_projection_transform()
+            # Just checking that we don't crash or anything
+
+    def test_view_transform_class_method(self):
+        T = torch.tensor([0.0, 0.0, -1.0], requires_grad=True).view(1, -1)
+        R = look_at_rotation(T)
+        RT = get_world_to_view_transform(R=R, T=T)
+        for cam_type in (
+            OpenGLPerspectiveCameras,
+            OpenGLOrthographicCameras,
+            SfMOrthographicCameras,
+            SfMPerspectiveCameras,
+            FoVOrthographicCameras,
+            FoVPerspectiveCameras,
+            OrthographicCameras,
+            PerspectiveCameras,
+        ):
+            cam = cam_type(R=R, T=T)
+            RT_class = cam.get_world_to_view_transform()
+            self.assertTrue(torch.allclose(RT.get_matrix(), RT_class.get_matrix()))
+
+        self.assertTrue(isinstance(RT, Transform3d))
+
+    def test_get_camera_center(self, batch_size=10):
+        T = torch.randn(batch_size, 3)
+        R = random_rotations(batch_size)
+        for cam_type in (
+            OpenGLPerspectiveCameras,
+            OpenGLOrthographicCameras,
+            SfMOrthographicCameras,
+            SfMPerspectiveCameras,
+            FoVOrthographicCameras,
+            FoVPerspectiveCameras,
+            OrthographicCameras,
+            PerspectiveCameras,
+        ):
+            cam = cam_type(R=R, T=T)
+            C = cam.get_camera_center()
+            C_ = -torch.bmm(R, T[:, :, None])[:, :, 0]
+            self.assertTrue(torch.allclose(C, C_, atol=1e-05))
+
+    @staticmethod
+    def init_equiv_cameras_ndc_screen(cam_type: CamerasBase, batch_size: int):
+        T = torch.randn(batch_size, 3) * 0.03
+        T[:, 2] = 4
+        R = so3_exp_map(torch.randn(batch_size, 3) * 3.0)
+        screen_cam_params = {"R": R, "T": T}
+        ndc_cam_params = {"R": R, "T": T}
+        if cam_type in (OrthographicCameras, PerspectiveCameras):
+            fcl = torch.rand((batch_size, 2)) * 3.0 + 0.1
+            prc = torch.randn((batch_size, 2)) * 0.2
+            # (height, width)
+            image_size = torch.randint(low=2, high=64, size=(batch_size, 2))
+            # scale
+            scale = (image_size.min(dim=1, keepdim=True).values) / 2.0
+
+            ndc_cam_params["focal_length"] = fcl
+            ndc_cam_params["principal_point"] = prc
+            ndc_cam_params["image_size"] = image_size
+
+            screen_cam_params["image_size"] = image_size
+            screen_cam_params["focal_length"] = fcl * scale
+            screen_cam_params["principal_point"] = (
+                image_size[:, [1, 0]]
+            ) / 2.0 - prc * scale
+            screen_cam_params["in_ndc"] = False
+        else:
+            raise ValueError(str(cam_type))
+        return cam_type(**ndc_cam_params), cam_type(**screen_cam_params)
+
+    def test_unproject_points(self, batch_size=50, num_points=100):
+        """
+        Checks that an unprojection of a randomly projected point cloud
+        stays the same.
+        """
+
+        for cam_type in (
+            SfMOrthographicCameras,
+            OpenGLPerspectiveCameras,
+            OpenGLOrthographicCameras,
+            SfMPerspectiveCameras,
+            FoVOrthographicCameras,
+            FoVPerspectiveCameras,
+            OrthographicCameras,
+            PerspectiveCameras,
+        ):
+            # init the cameras
+            cameras = init_random_cameras(cam_type, batch_size)
+            # xyz - the ground truth point cloud
+            xyz = torch.randn(batch_size, num_points, 3) * 0.3
+            # xyz in camera coordinates
+            xyz_cam = cameras.get_world_to_view_transform().transform_points(xyz)
+            # depth = z-component of xyz_cam
+            depth = xyz_cam[:, :, 2:]
+            # project xyz
+            xyz_proj = cameras.transform_points(xyz)
+            xy, cam_depth = xyz_proj.split(2, dim=2)
+            # input to the unprojection function
+            xy_depth = torch.cat((xy, depth), dim=2)
+
+            for to_world in (False, True):
+                if to_world:
+                    matching_xyz = xyz
+                else:
+                    matching_xyz = xyz_cam
+
+                # if we have FoV (= OpenGL) cameras
+                # test for scaled_depth_input=True/False
+                if cam_type in (
+                    OpenGLPerspectiveCameras,
+                    OpenGLOrthographicCameras,
+                    FoVPerspectiveCameras,
+                    FoVOrthographicCameras,
+                ):
+                    for scaled_depth_input in (True, False):
+                        if scaled_depth_input:
+                            xy_depth_ = xyz_proj
+                        else:
+                            xy_depth_ = xy_depth
+                        xyz_unproj = cameras.unproject_points(
+                            xy_depth_,
+                            world_coordinates=to_world,
+                            scaled_depth_input=scaled_depth_input,
+                        )
+                        self.assertTrue(
+                            torch.allclose(xyz_unproj, matching_xyz, atol=1e-4)
+                        )
+                else:
+                    xyz_unproj = cameras.unproject_points(
+                        xy_depth, world_coordinates=to_world
+                    )
+                    self.assertTrue(torch.allclose(xyz_unproj, matching_xyz, atol=1e-4))
+
+    @staticmethod
+    def unproject_points(
+        cam_type, batch_size=50, num_points=100, device: Device = "cpu"
+    ):
+        """
+        Checks that an unprojection of a randomly projected point cloud
+        stays the same.
+        """
+        if device == "cuda":
+            device = torch.device("cuda:0")
+        else:
+            device = torch.device("cpu")
+
+        str2cls = {  # noqa
+            "OpenGLOrthographicCameras": OpenGLOrthographicCameras,
+            "OpenGLPerspectiveCameras": OpenGLPerspectiveCameras,
+            "SfMOrthographicCameras": SfMOrthographicCameras,
+            "SfMPerspectiveCameras": SfMPerspectiveCameras,
+            "FoVOrthographicCameras": FoVOrthographicCameras,
+            "FoVPerspectiveCameras": FoVPerspectiveCameras,
+            "OrthographicCameras": OrthographicCameras,
+            "PerspectiveCameras": PerspectiveCameras,
+            "FishEyeCameras": FishEyeCameras,
+        }
+
+        def run_cameras():
+            # init the cameras
+            cameras = init_random_cameras(str2cls[cam_type], batch_size, device=device)
+            # xyz - the ground truth point cloud
+            xyz = torch.randn(num_points, 3) * 0.3
+            xyz = cameras.unproject_points(xyz, scaled_depth_input=True)
+
+        return run_cameras
+
+    def test_project_points_screen(self, batch_size=50, num_points=100):
+        """
+        Checks that an unprojection of a randomly projected point cloud
+        stays the same.
+        """
+
+        for cam_type in (
+            OpenGLOrthographicCameras,
+            OpenGLPerspectiveCameras,
+            SfMOrthographicCameras,
+            SfMPerspectiveCameras,
+            FoVOrthographicCameras,
+            FoVPerspectiveCameras,
+            OrthographicCameras,
+            PerspectiveCameras,
+        ):
+
+            # init the cameras
+            cameras = init_random_cameras(cam_type, batch_size)
+            # xyz - the ground truth point cloud
+            xy = torch.randn(batch_size, num_points, 2) * 2.0 - 1.0
+            z = torch.randn(batch_size, num_points, 1) * 3.0 + 1.0
+            xyz = torch.cat((xy, z), dim=2)
+            # image size
+            image_size = torch.randint(low=32, high=64, size=(batch_size, 2))
+            # project points
+            xyz_project_ndc = cameras.transform_points_ndc(xyz)
+            xyz_project_screen = cameras.transform_points_screen(
+                xyz, image_size=image_size
+            )
+            # naive
+            xyz_project_screen_naive = ndc_to_screen_points_naive(
+                xyz_project_ndc, image_size
+            )
+            # we set atol to 1e-4, remember that screen points are in [0, W]x[0, H] space
+            self.assertClose(xyz_project_screen, xyz_project_screen_naive, atol=1e-4)
+
+    @staticmethod
+    def transform_points(
+        cam_type, batch_size=50, num_points=100, device: Device = "cpu"
+    ):
+        """
+        Checks that an unprojection of a randomly projected point cloud
+        stays the same.
+        """
+
+        if device == "cuda":
+            device = torch.device("cuda:0")
+        else:
+            device = torch.device("cpu")
+        str2cls = {  # noqa
+            "OpenGLOrthographicCameras": OpenGLOrthographicCameras,
+            "OpenGLPerspectiveCameras": OpenGLPerspectiveCameras,
+            "SfMOrthographicCameras": SfMOrthographicCameras,
+            "SfMPerspectiveCameras": SfMPerspectiveCameras,
+            "FoVOrthographicCameras": FoVOrthographicCameras,
+            "FoVPerspectiveCameras": FoVPerspectiveCameras,
+            "OrthographicCameras": OrthographicCameras,
+            "PerspectiveCameras": PerspectiveCameras,
+            "FishEyeCameras": FishEyeCameras,
+        }
+
+        def run_cameras():
+            # init the cameras
+            cameras = init_random_cameras(str2cls[cam_type], batch_size, device=device)
+            # xyz - the ground truth point cloud
+            xy = torch.randn(num_points, 2) * 2.0 - 1.0
+            z = torch.randn(num_points, 1) * 3.0 + 1.0
+            xyz = torch.cat((xy, z), dim=-1)
+            xy = cameras.transform_points(xyz)
+
+        return run_cameras
+
+    def test_equiv_project_points(self, batch_size=50, num_points=100):
+        """
+        Checks that NDC and screen cameras project points to ndc correctly.
+        Applies only to OrthographicCameras and PerspectiveCameras.
+        """
+        for cam_type in (OrthographicCameras, PerspectiveCameras):
+            # init the cameras
+            (
+                ndc_cameras,
+                screen_cameras,
+            ) = TestCamerasCommon.init_equiv_cameras_ndc_screen(cam_type, batch_size)
+            # xyz - the ground truth point cloud in Py3D space
+            xy = torch.randn(batch_size, num_points, 2) * 0.3
+            z = torch.rand(batch_size, num_points, 1) + 3.0 + 0.1
+            xyz = torch.cat((xy, z), dim=2)
+            # project points
+            xyz_ndc = ndc_cameras.transform_points_ndc(xyz)
+            xyz_screen = screen_cameras.transform_points_ndc(xyz)
+            # check correctness
+            self.assertClose(xyz_ndc, xyz_screen, atol=1e-5)
+
+    def test_clone(self, batch_size: int = 10):
+        """
+        Checks the clone function of the cameras.
+        """
+        for cam_type in (
+            SfMOrthographicCameras,
+            OpenGLPerspectiveCameras,
+            OpenGLOrthographicCameras,
+            SfMPerspectiveCameras,
+            FoVOrthographicCameras,
+            FoVPerspectiveCameras,
+            OrthographicCameras,
+            PerspectiveCameras,
+        ):
+            cameras = init_random_cameras(cam_type, batch_size)
+            cameras = cameras.to(torch.device("cpu"))
+            cameras_clone = cameras.clone()
+
+            for var in cameras.__dict__.keys():
+                val = getattr(cameras, var)
+                val_clone = getattr(cameras_clone, var)
+                if torch.is_tensor(val):
+                    self.assertClose(val, val_clone)
+                    self.assertSeparate(val, val_clone)
+                else:
+                    self.assertTrue(val == val_clone)
+
+    def test_join_cameras_as_batch_errors(self):
+        cam0 = PerspectiveCameras(device="cuda:0")
+        cam1 = OrthographicCameras(device="cuda:0")
+
+        # Cameras not of the same type
+        with self.assertRaisesRegex(ValueError, "same type"):
+            join_cameras_as_batch([cam0, cam1])
+
+        cam2 = OrthographicCameras(device="cpu")
+        # Cameras not on the same device
+        with self.assertRaisesRegex(ValueError, "same device"):
+            join_cameras_as_batch([cam1, cam2])
+
+        cam3 = OrthographicCameras(in_ndc=False, device="cuda:0")
+        # Different coordinate systems -- all should be in ndc or in screen
+        with self.assertRaisesRegex(
+            ValueError, "Attribute _in_ndc is not constant across inputs"
+        ):
+            join_cameras_as_batch([cam1, cam3])
+
+    def join_cameras_as_batch_fov(self, camera_cls):
+        R0 = torch.randn((6, 3, 3))
+        R1 = torch.randn((3, 3, 3))
+        cam0 = camera_cls(znear=10.0, zfar=100.0, R=R0, device="cuda:0")
+        cam1 = camera_cls(znear=10.0, zfar=200.0, R=R1, device="cuda:0")
+
+        cam_batch = join_cameras_as_batch([cam0, cam1])
+
+        self.assertEqual(cam_batch._N, cam0._N + cam1._N)
+        self.assertEqual(cam_batch.device, cam0.device)
+        self.assertClose(cam_batch.R, torch.cat((R0, R1), dim=0).to(device="cuda:0"))
+
+    def join_cameras_as_batch(self, camera_cls):
+        R0 = torch.randn((6, 3, 3))
+        R1 = torch.randn((3, 3, 3))
+        p0 = torch.randn((6, 2, 1))
+        p1 = torch.randn((3, 2, 1))
+        f0 = 5.0
+        f1 = torch.randn(3, 2)
+        f2 = torch.randn(3, 1)
+        cam0 = camera_cls(
+            R=R0,
+            focal_length=f0,
+            principal_point=p0,
+        )
+        cam1 = camera_cls(
+            R=R1,
+            focal_length=f0,
+            principal_point=p1,
+        )
+        cam2 = camera_cls(
+            R=R1,
+            focal_length=f1,
+            principal_point=p1,
+        )
+        cam3 = camera_cls(
+            R=R1,
+            focal_length=f2,
+            principal_point=p1,
+        )
+        cam_batch = join_cameras_as_batch([cam0, cam1])
+
+        self.assertEqual(cam_batch._N, cam0._N + cam1._N)
+        self.assertEqual(cam_batch.device, cam0.device)
+        self.assertClose(cam_batch.R, torch.cat((R0, R1), dim=0))
+        self.assertClose(cam_batch.principal_point, torch.cat((p0, p1), dim=0))
+        self.assertEqual(cam_batch._in_ndc, cam0._in_ndc)
+
+        # Test one broadcasted value and one fixed value
+        # Focal length as (N,) in one camera and (N, 2) in the other
+        cam_batch = join_cameras_as_batch([cam0, cam2])
+        self.assertEqual(cam_batch._N, cam0._N + cam2._N)
+        self.assertClose(cam_batch.R, torch.cat((R0, R1), dim=0))
+        self.assertClose(
+            cam_batch.focal_length,
+            torch.cat([torch.tensor([[f0, f0]]).expand(6, -1), f1], dim=0),
+        )
+
+        # Focal length as (N, 1) in one camera and (N, 2) in the other
+        cam_batch = join_cameras_as_batch([cam2, cam3])
+        self.assertClose(
+            cam_batch.focal_length,
+            torch.cat([f1, f2.expand(-1, 2)], dim=0),
+        )
+
+    def test_join_batch_perspective(self):
+        self.join_cameras_as_batch_fov(FoVPerspectiveCameras)
+        self.join_cameras_as_batch(PerspectiveCameras)
+
+    def test_join_batch_orthographic(self):
+        self.join_cameras_as_batch_fov(FoVOrthographicCameras)
+        self.join_cameras_as_batch(OrthographicCameras)
+
+    def test_iterable(self):
+        for camera_type in [PerspectiveCameras, OrthographicCameras]:
+            a_list = list(camera_type())
+            self.assertEqual(len(a_list), 1)
+
+
+############################################################
+#                FoVPerspective Camera                     #
+############################################################
+
+
+class TestFoVPerspectiveProjection(TestCaseMixin, unittest.TestCase):
+    def test_perspective(self):
+        far = 10.0
+        near = 1.0
+        cameras = FoVPerspectiveCameras(znear=near, zfar=far, fov=60.0)
+        P = cameras.get_projection_transform()
+        # vertices are at the far clipping plane so z gets mapped to 1.
+        vertices = torch.tensor([1, 2, far], dtype=torch.float32)
+        projected_verts = torch.tensor(
+            [np.sqrt(3) / far, 2 * np.sqrt(3) / far, 1.0], dtype=torch.float32
+        )
+        vertices = vertices[None, None, :]
+        v1 = P.transform_points(vertices)
+        v2 = perspective_project_naive(vertices, fov=60.0)
+        self.assertClose(v1[..., :2], v2[..., :2])
+        self.assertClose(far * v1[..., 2], v2[..., 2])
+        self.assertClose(v1.squeeze(), projected_verts)
+
+        # vertices are at the near clipping plane so z gets mapped to 0.0.
+        vertices[..., 2] = near
+        projected_verts = torch.tensor(
+            [np.sqrt(3) / near, 2 * np.sqrt(3) / near, 0.0], dtype=torch.float32
+        )
+        v1 = P.transform_points(vertices)
+        v2 = perspective_project_naive(vertices, fov=60.0)
+        self.assertClose(v1[..., :2], v2[..., :2])
+        self.assertClose(v1.squeeze(), projected_verts)
+
+    def test_perspective_kwargs(self):
+        cameras = FoVPerspectiveCameras(znear=5.0, zfar=100.0, fov=0.0)
+        # Override defaults by passing in values to get_projection_transform
+        far = 10.0
+        P = cameras.get_projection_transform(znear=1.0, zfar=far, fov=60.0)
+        vertices = torch.tensor([1, 2, far], dtype=torch.float32)
+        projected_verts = torch.tensor(
+            [np.sqrt(3) / far, 2 * np.sqrt(3) / far, 1.0], dtype=torch.float32
+        )
+        vertices = vertices[None, None, :]
+        v1 = P.transform_points(vertices)
+        self.assertClose(v1.squeeze(), projected_verts)
+
+    def test_perspective_mixed_inputs_broadcast(self):
+        far = torch.tensor([10.0, 20.0], dtype=torch.float32)
+        near = 1.0
+        fov = torch.tensor(60.0)
+        cameras = FoVPerspectiveCameras(znear=near, zfar=far, fov=fov)
+        P = cameras.get_projection_transform()
+        vertices = torch.tensor([1, 2, 10], dtype=torch.float32)
+        z1 = 1.0  # vertices at far clipping plane so z = 1.0
+        z2 = (20.0 / (20.0 - 1.0) * 10.0 + -20.0 / (20.0 - 1.0)) / 10.0
+        projected_verts = torch.tensor(
+            [
+                [np.sqrt(3) / 10.0, 2 * np.sqrt(3) / 10.0, z1],
+                [np.sqrt(3) / 10.0, 2 * np.sqrt(3) / 10.0, z2],
+            ],
+            dtype=torch.float32,
+        )
+        vertices = vertices[None, None, :]
+        v1 = P.transform_points(vertices)
+        v2 = perspective_project_naive(vertices, fov=60.0)
+        self.assertClose(v1[..., :2], torch.cat([v2, v2])[..., :2])
+        self.assertClose(v1.squeeze(), projected_verts)
+
+    def test_perspective_mixed_inputs_grad(self):
+        far = torch.tensor([10.0])
+        near = 1.0
+        fov = torch.tensor(60.0, requires_grad=True)
+        cameras = FoVPerspectiveCameras(znear=near, zfar=far, fov=fov)
+        P = cameras.get_projection_transform()
+        vertices = torch.tensor([1, 2, 10], dtype=torch.float32)
+        vertices_batch = vertices[None, None, :]
+        v1 = P.transform_points(vertices_batch).squeeze()
+        v1.sum().backward()
+        self.assertTrue(hasattr(fov, "grad"))
+        fov_grad = fov.grad.clone()
+        half_fov_rad = (math.pi / 180.0) * fov.detach() / 2.0
+        grad_cotan = -(1.0 / (torch.sin(half_fov_rad) ** 2.0) * 1 / 2.0)
+        grad_fov = (math.pi / 180.0) * grad_cotan
+        grad_fov = (vertices[0] + vertices[1]) * grad_fov / 10.0
+        self.assertClose(fov_grad, grad_fov)
+
+    def test_camera_class_init(self):
+        device = torch.device("cuda:0")
+        cam = FoVPerspectiveCameras(znear=10.0, zfar=(100.0, 200.0))
+
+        # Check broadcasting
+        self.assertTrue(cam.znear.shape == (2,))
+        self.assertTrue(cam.zfar.shape == (2,))
+
+        # Test to
+        new_cam = cam.to(device=device)
+        self.assertTrue(new_cam.device == device)
+
+    def test_getitem(self):
+        N_CAMERAS = 6
+        R_matrix = torch.randn((N_CAMERAS, 3, 3))
+        cam = FoVPerspectiveCameras(znear=10.0, zfar=100.0, R=R_matrix)
+
+        # Check get item returns an instance of the same class
+        # with all the same keys
+        c0 = cam[0]
+        self.assertTrue(isinstance(c0, FoVPerspectiveCameras))
+        self.assertEqual(cam.__dict__.keys(), c0.__dict__.keys())
+
+        # Check all fields correct in get item with int index
+        self.assertEqual(len(c0), 1)
+        self.assertClose(c0.zfar, torch.tensor([100.0]))
+        self.assertClose(c0.znear, torch.tensor([10.0]))
+        self.assertClose(c0.R, R_matrix[0:1, ...])
+        self.assertEqual(c0.device, torch.device("cpu"))
+
+        # Check list(int) index
+        c012 = cam[[0, 1, 2]]
+        self.assertEqual(len(c012), 3)
+        self.assertClose(c012.zfar, torch.tensor([100.0] * 3))
+        self.assertClose(c012.znear, torch.tensor([10.0] * 3))
+        self.assertClose(c012.R, R_matrix[0:3, ...])
+
+        # Check torch.LongTensor index
+        SLICE = [1, 3, 5]
+        index = torch.tensor(SLICE, dtype=torch.int64)
+        c135 = cam[index]
+        self.assertEqual(len(c135), 3)
+        self.assertClose(c135.zfar, torch.tensor([100.0] * 3))
+        self.assertClose(c135.znear, torch.tensor([10.0] * 3))
+        self.assertClose(c135.R, R_matrix[SLICE, ...])
+
+        # Check torch.BoolTensor index
+        bool_slice = [i in SLICE for i in range(N_CAMERAS)]
+        index = torch.tensor(bool_slice, dtype=torch.bool)
+        c135 = cam[index]
+        self.assertEqual(len(c135), 3)
+        self.assertClose(c135.zfar, torch.tensor([100.0] * 3))
+        self.assertClose(c135.znear, torch.tensor([10.0] * 3))
+        self.assertClose(c135.R, R_matrix[SLICE, ...])
+
+        # Check errors with get item
+        with self.assertRaisesRegex(IndexError, "out of bounds"):
+            cam[N_CAMERAS]
+
+        index = torch.tensor([1, 0, 1], dtype=torch.bool)
+        with self.assertRaisesRegex(ValueError, "does not match cameras"):
+            cam[index]
+
+        with self.assertRaisesRegex(ValueError, "Invalid index type"):
+            cam[slice(0, 1)]
+
+        with self.assertRaisesRegex(ValueError, "Invalid index type"):
+            cam[[True, False]]
+
+        index = torch.tensor(SLICE, dtype=torch.float32)
+        with self.assertRaisesRegex(ValueError, "Invalid index type"):
+            cam[index]
+
+    def test_get_full_transform(self):
+        cam = FoVPerspectiveCameras()
+        T = torch.tensor([0.0, 0.0, 1.0]).view(1, -1)
+        R = look_at_rotation(T)
+        P = cam.get_full_projection_transform(R=R, T=T)
+        self.assertTrue(isinstance(P, Transform3d))
+        self.assertClose(cam.R, R)
+        self.assertClose(cam.T, T)
+
+    def test_transform_points(self):
+        # Check transform_points methods works with default settings for
+        # RT and P
+        far = 10.0
+        cam = FoVPerspectiveCameras(znear=1.0, zfar=far, fov=60.0)
+        points = torch.tensor([1, 2, far], dtype=torch.float32)
+        points = points.view(1, 1, 3).expand(5, 10, -1)
+        projected_points = torch.tensor(
+            [np.sqrt(3) / far, 2 * np.sqrt(3) / far, 1.0], dtype=torch.float32
+        )
+        projected_points = projected_points.view(1, 1, 3).expand(5, 10, -1)
+        new_points = cam.transform_points(points)
+        self.assertClose(new_points, projected_points)
+
+    def test_perspective_type(self):
+        cam = FoVPerspectiveCameras(znear=1.0, zfar=10.0, fov=60.0)
+        self.assertTrue(cam.is_perspective())
+        self.assertEqual(cam.get_znear(), 1.0)
+
+
+############################################################
+#                FoVOrthographic Camera                    #
+############################################################
+
+
+class TestFoVOrthographicProjection(TestCaseMixin, unittest.TestCase):
+    def test_orthographic(self):
+        far = 10.0
+        near = 1.0
+        cameras = FoVOrthographicCameras(znear=near, zfar=far)
+        P = cameras.get_projection_transform()
+
+        vertices = torch.tensor([1, 2, far], dtype=torch.float32)
+        projected_verts = torch.tensor([1, 2, 1], dtype=torch.float32)
+        vertices = vertices[None, None, :]
+        v1 = P.transform_points(vertices)
+        v2 = orthographic_project_naive(vertices)
+        self.assertClose(v1[..., :2], v2[..., :2])
+        self.assertClose(v1.squeeze(), projected_verts)
+
+        vertices[..., 2] = near
+        projected_verts[2] = 0.0
+        v1 = P.transform_points(vertices)
+        v2 = orthographic_project_naive(vertices)
+        self.assertClose(v1[..., :2], v2[..., :2])
+        self.assertClose(v1.squeeze(), projected_verts)
+
+    def test_orthographic_scaled(self):
+        vertices = torch.tensor([1, 2, 0.5], dtype=torch.float32)
+        vertices = vertices[None, None, :]
+        scale = torch.tensor([[2.0, 0.5, 20]])
+        # applying the scale puts the z coordinate at the far clipping plane
+        # so the z is mapped to 1.0
+        projected_verts = torch.tensor([2, 1, 1], dtype=torch.float32)
+        cameras = FoVOrthographicCameras(znear=1.0, zfar=10.0, scale_xyz=scale)
+        P = cameras.get_projection_transform()
+        v1 = P.transform_points(vertices)
+        v2 = orthographic_project_naive(vertices, scale)
+        self.assertClose(v1[..., :2], v2[..., :2])
+        self.assertClose(v1, projected_verts[None, None])
+
+    def test_orthographic_kwargs(self):
+        cameras = FoVOrthographicCameras(znear=5.0, zfar=100.0)
+        far = 10.0
+        P = cameras.get_projection_transform(znear=1.0, zfar=far)
+        vertices = torch.tensor([1, 2, far], dtype=torch.float32)
+        projected_verts = torch.tensor([1, 2, 1], dtype=torch.float32)
+        vertices = vertices[None, None, :]
+        v1 = P.transform_points(vertices)
+        self.assertClose(v1.squeeze(), projected_verts)
+
+    def test_orthographic_mixed_inputs_broadcast(self):
+        far = torch.tensor([10.0, 20.0])
+        near = 1.0
+        cameras = FoVOrthographicCameras(znear=near, zfar=far)
+        P = cameras.get_projection_transform()
+        vertices = torch.tensor([1.0, 2.0, 10.0], dtype=torch.float32)
+        z2 = 1.0 / (20.0 - 1.0) * 10.0 + -1.0 / (20.0 - 1.0)
+        projected_verts = torch.tensor(
+            [[1.0, 2.0, 1.0], [1.0, 2.0, z2]], dtype=torch.float32
+        )
+        vertices = vertices[None, None, :]
+        v1 = P.transform_points(vertices)
+        v2 = orthographic_project_naive(vertices)
+        self.assertClose(v1[..., :2], torch.cat([v2, v2])[..., :2])
+        self.assertClose(v1.squeeze(), projected_verts)
+
+    def test_orthographic_mixed_inputs_grad(self):
+        far = torch.tensor([10.0])
+        near = 1.0
+        scale = torch.tensor([[1.0, 1.0, 1.0]], requires_grad=True)
+        cameras = FoVOrthographicCameras(znear=near, zfar=far, scale_xyz=scale)
+        P = cameras.get_projection_transform()
+        vertices = torch.tensor([1.0, 2.0, 10.0], dtype=torch.float32)
+        vertices_batch = vertices[None, None, :]
+        v1 = P.transform_points(vertices_batch)
+        v1.sum().backward()
+        self.assertTrue(hasattr(scale, "grad"))
+        scale_grad = scale.grad.clone()
+        grad_scale = torch.tensor(
+            [
+                [
+                    vertices[0] * P._matrix[:, 0, 0],
+                    vertices[1] * P._matrix[:, 1, 1],
+                    vertices[2] * P._matrix[:, 2, 2],
+                ]
+            ]
+        )
+        self.assertClose(scale_grad, grad_scale)
+
+    def test_perspective_type(self):
+        cam = FoVOrthographicCameras(znear=1.0, zfar=10.0)
+        self.assertFalse(cam.is_perspective())
+        self.assertEqual(cam.get_znear(), 1.0)
+
+    def test_getitem(self):
+        R_matrix = torch.randn((6, 3, 3))
+        scale = torch.tensor([[1.0, 1.0, 1.0]], requires_grad=True)
+        cam = FoVOrthographicCameras(
+            znear=10.0, zfar=100.0, R=R_matrix, scale_xyz=scale
+        )
+
+        # Check get item returns an instance of the same class
+        # with all the same keys
+        c0 = cam[0]
+        self.assertTrue(isinstance(c0, FoVOrthographicCameras))
+        self.assertEqual(cam.__dict__.keys(), c0.__dict__.keys())
+
+        # Check torch.LongTensor index
+        index = torch.tensor([1, 3, 5], dtype=torch.int64)
+        c135 = cam[index]
+        self.assertEqual(len(c135), 3)
+        self.assertClose(c135.zfar, torch.tensor([100.0] * 3))
+        self.assertClose(c135.znear, torch.tensor([10.0] * 3))
+        self.assertClose(c135.min_x, torch.tensor([-1.0] * 3))
+        self.assertClose(c135.max_x, torch.tensor([1.0] * 3))
+        self.assertClose(c135.R, R_matrix[[1, 3, 5], ...])
+        self.assertClose(c135.scale_xyz, scale.expand(3, -1))
+
+
+############################################################
+#                Orthographic Camera                       #
+############################################################
+
+
+class TestOrthographicProjection(TestCaseMixin, unittest.TestCase):
+    def test_orthographic(self):
+        cameras = OrthographicCameras()
+        P = cameras.get_projection_transform()
+
+        vertices = torch.randn([3, 4, 3], dtype=torch.float32)
+        projected_verts = vertices.clone()
+        v1 = P.transform_points(vertices)
+        v2 = orthographic_project_naive(vertices)
+
+        self.assertClose(v1[..., :2], v2[..., :2])
+        self.assertClose(v1, projected_verts)
+
+    def test_orthographic_scaled(self):
+        focal_length_x = 10.0
+        focal_length_y = 15.0
+
+        cameras = OrthographicCameras(focal_length=((focal_length_x, focal_length_y),))
+        P = cameras.get_projection_transform()
+
+        vertices = torch.randn([3, 4, 3], dtype=torch.float32)
+        projected_verts = vertices.clone()
+        projected_verts[:, :, 0] *= focal_length_x
+        projected_verts[:, :, 1] *= focal_length_y
+        v1 = P.transform_points(vertices)
+        v2 = orthographic_project_naive(
+            vertices, scale_xyz=(focal_length_x, focal_length_y, 1.0)
+        )
+        v3 = cameras.transform_points(vertices)
+        self.assertClose(v1[..., :2], v2[..., :2])
+        self.assertClose(v3[..., :2], v2[..., :2])
+        self.assertClose(v1, projected_verts)
+
+    def test_orthographic_kwargs(self):
+        cameras = OrthographicCameras(focal_length=5.0, principal_point=((2.5, 2.5),))
+        P = cameras.get_projection_transform(
+            focal_length=2.0, principal_point=((2.5, 3.5),)
+        )
+        vertices = torch.randn([3, 4, 3], dtype=torch.float32)
+        projected_verts = vertices.clone()
+        projected_verts[:, :, :2] *= 2.0
+        projected_verts[:, :, 0] += 2.5
+        projected_verts[:, :, 1] += 3.5
+        v1 = P.transform_points(vertices)
+        self.assertClose(v1, projected_verts)
+
+    def test_perspective_type(self):
+        cam = OrthographicCameras(focal_length=5.0, principal_point=((2.5, 2.5),))
+        self.assertFalse(cam.is_perspective())
+        self.assertIsNone(cam.get_znear())
+
+    def test_getitem(self):
+        R_matrix = torch.randn((6, 3, 3))
+        principal_point = torch.randn((6, 2, 1))
+        focal_length = 5.0
+        cam = OrthographicCameras(
+            R=R_matrix,
+            focal_length=focal_length,
+            principal_point=principal_point,
+        )
+
+        # Check get item returns an instance of the same class
+        # with all the same keys
+        c0 = cam[0]
+        self.assertTrue(isinstance(c0, OrthographicCameras))
+        self.assertEqual(cam.__dict__.keys(), c0.__dict__.keys())
+
+        # Check torch.LongTensor index
+        index = torch.tensor([1, 3, 5], dtype=torch.int64)
+        c135 = cam[index]
+        self.assertEqual(len(c135), 3)
+        self.assertClose(c135.focal_length, torch.tensor([[5.0, 5.0]] * 3))
+        self.assertClose(c135.R, R_matrix[[1, 3, 5], ...])
+        self.assertClose(c135.principal_point, principal_point[[1, 3, 5], ...])
+
+
+############################################################
+#                Perspective Camera                        #
+############################################################
+
+
+class TestPerspectiveProjection(TestCaseMixin, unittest.TestCase):
+    def test_perspective(self):
+        cameras = PerspectiveCameras()
+        P = cameras.get_projection_transform()
+
+        vertices = torch.randn([3, 4, 3], dtype=torch.float32)
+        v1 = P.transform_points(vertices)
+        v2 = sfm_perspective_project_naive(vertices)
+        self.assertClose(v1, v2)
+
+    def test_perspective_scaled(self):
+        focal_length_x = 10.0
+        focal_length_y = 15.0
+        p0x = 15.0
+        p0y = 30.0
+
+        cameras = PerspectiveCameras(
+            focal_length=((focal_length_x, focal_length_y),),
+            principal_point=((p0x, p0y),),
+        )
+        P = cameras.get_projection_transform()
+
+        vertices = torch.randn([3, 4, 3], dtype=torch.float32)
+        v1 = P.transform_points(vertices)
+        v2 = sfm_perspective_project_naive(
+            vertices, fx=focal_length_x, fy=focal_length_y, p0x=p0x, p0y=p0y
+        )
+        v3 = cameras.transform_points(vertices)
+        self.assertClose(v1, v2)
+        self.assertClose(v3[..., :2], v2[..., :2])
+
+    def test_perspective_kwargs(self):
+        cameras = PerspectiveCameras(focal_length=5.0, principal_point=((2.5, 2.5),))
+        P = cameras.get_projection_transform(
+            focal_length=2.0, principal_point=((2.5, 3.5),)
+        )
+        vertices = torch.randn([3, 4, 3], dtype=torch.float32)
+        v1 = P.transform_points(vertices)
+        v2 = sfm_perspective_project_naive(vertices, fx=2.0, fy=2.0, p0x=2.5, p0y=3.5)
+        self.assertClose(v1, v2, atol=1e-6)
+
+    def test_perspective_type(self):
+        cam = PerspectiveCameras(focal_length=5.0, principal_point=((2.5, 2.5),))
+        self.assertTrue(cam.is_perspective())
+        self.assertIsNone(cam.get_znear())
+
+    def test_getitem(self):
+        R_matrix = torch.randn((6, 3, 3))
+        principal_point = torch.randn((6, 2, 1))
+        focal_length = 5.0
+        cam = PerspectiveCameras(
+            R=R_matrix,
+            focal_length=focal_length,
+            principal_point=principal_point,
+        )
+
+        # Check get item returns an instance of the same class
+        # with all the same keys
+        c0 = cam[0]
+        self.assertTrue(isinstance(c0, PerspectiveCameras))
+        self.assertEqual(cam.__dict__.keys(), c0.__dict__.keys())
+
+        # Check torch.LongTensor index
+        index = torch.tensor([1, 3, 5], dtype=torch.int64)
+        c135 = cam[index]
+        self.assertEqual(len(c135), 3)
+        self.assertClose(c135.focal_length, torch.tensor([[5.0, 5.0]] * 3))
+        self.assertClose(c135.R, R_matrix[[1, 3, 5], ...])
+        self.assertClose(c135.principal_point, principal_point[[1, 3, 5], ...])
+
+        # Check in_ndc is handled correctly
+        self.assertEqual(cam._in_ndc, c0._in_ndc)
+
+    def test_clone_picklable(self):
+        camera = PerspectiveCameras()
+        pickle.dumps(camera)
+        pickle.dumps(camera.clone())
+
+
+############################################################
+#                FishEye Camera                        #
+############################################################
+
+
+class TestFishEyeProjection(TestCaseMixin, unittest.TestCase):
+    def setUpSimpleCase(self) -> None:
+        super().setUp()
+        focal = torch.tensor([[240]], dtype=torch.float32)
+        principal_point = torch.tensor([[320, 240]])
+        p_3d = torch.tensor(
+            [
+                [2.0, 3.0, 1.0],
+                [3.0, 2.0, 1.0],
+            ],
+            dtype=torch.float32,
+        )
+        return focal, principal_point, p_3d
+
+    def setUpAriaCase(self) -> None:
+        super().setUp()
+        torch.manual_seed(42)
+        focal = torch.tensor([[608.9255557152]], dtype=torch.float32)
+        principal_point = torch.tensor(
+            [[712.0114821205, 706.8666571177]], dtype=torch.float32
+        )
+        radial_params = torch.tensor(
+            [
+                [
+                    0.3877090026,
+                    -0.315613384,
+                    -0.3434984955,
+                    1.8565874201,
+                    -2.1799372221,
+                    0.7713834763,
+                ],
+            ],
+            dtype=torch.float32,
+        )
+        tangential_params = torch.tensor(
+            [[-0.0002747019, 0.0005228974]], dtype=torch.float32
+        )
+        thin_prism_params = torch.tensor(
+            [
+                [0.000134884, -0.000084822, -0.0009420014, -0.0001276838],
+            ],
+            dtype=torch.float32,
+        )
+        return (
+            focal,
+            principal_point,
+            radial_params,
+            tangential_params,
+            thin_prism_params,
+        )
+
+    def setUpBatchCameras(self, combination: None) -> None:
+        super().setUp()
+        focal, principal_point, p_3d = self.setUpSimpleCase()
+        radial_params = torch.tensor(
+            [
+                [0, 0, 0, 0, 0, 0],
+            ],
+            dtype=torch.float32,
+        )
+        tangential_params = torch.tensor([[0, 0]], dtype=torch.float32)
+        thin_prism_params = torch.tensor([[0, 0, 0, 0]], dtype=torch.float32)
+        (
+            focal1,
+            principal_point1,
+            radial_params1,
+            tangential_params1,
+            thin_prism_params1,
+        ) = self.setUpAriaCase()
+        focal = torch.cat([focal, focal1], dim=0)
+        principal_point = torch.cat([principal_point, principal_point1], dim=0)
+        radial_params = torch.cat([radial_params, radial_params1], dim=0)
+        tangential_params = torch.cat([tangential_params, tangential_params1], dim=0)
+        thin_prism_params = torch.cat([thin_prism_params, thin_prism_params1], dim=0)
+        if combination is None:
+            combination = [True, True, True]
+        cameras = FishEyeCameras(
+            use_radial=combination[0],
+            use_tangential=combination[1],
+            use_thin_prism=combination[2],
+            focal_length=focal,
+            principal_point=principal_point,
+            radial_params=radial_params,
+            tangential_params=tangential_params,
+            thin_prism_params=thin_prism_params,
+        )
+
+        return cameras
+
+    def test_distortion_params_set_to_zeors(self):
+        # test case 1: all distortion params are 0. Note that
+        # setting radial_params to zeros is not equivalent to
+        # disabling radial distortions, set use_radial=False does
+        focal, principal_point, p_3d = self.setUpSimpleCase()
+        cameras = FishEyeCameras(
+            focal_length=focal,
+            principal_point=principal_point,
+        )
+        uv_case1 = cameras.transform_points(p_3d)
+        self.assertClose(
+            uv_case1,
+            torch.tensor(
+                [[493.0993, 499.6489, 1.0], [579.6489, 413.0993, 1.0]],
+            ),
+        )
+        # test case 2: equivalent of test case 1 by
+        # disabling use_tangential and use_thin_prism
+        cameras = FishEyeCameras(
+            focal_length=focal,
+            principal_point=principal_point,
+            use_tangential=False,
+            use_thin_prism=False,
+        )
+        uv_case2 = cameras.transform_points(p_3d)
+        self.assertClose(uv_case2, uv_case1)
+
+    def test_fisheye_against_perspective_cameras(self):
+        # test case: check equivalence with PerspectiveCameras
+        # by disabling all distortions
+        focal, principal_point, p_3d = self.setUpSimpleCase()
+        cameras = PerspectiveCameras(
+            focal_length=focal,
+            principal_point=principal_point,
+        )
+        P = cameras.get_projection_transform()
+        uv_perspective = P.transform_points(p_3d)
+
+        # disable all distortions
+        cameras = FishEyeCameras(
+            focal_length=focal,
+            principal_point=principal_point,
+            use_radial=False,
+            use_tangential=False,
+            use_thin_prism=False,
+        )
+        uv = cameras.transform_points(p_3d)
+        self.assertClose(uv, uv_perspective)
+
+    def test_project_shape_broadcasts(self):
+        focal, principal_point, p_3d = self.setUpSimpleCase()
+        torch.set_printoptions(precision=6)
+        combinations = product([0, 1], repeat=3)
+        for combination in combinations:
+            cameras = FishEyeCameras(
+                use_radial=combination[0],
+                use_tangential=combination[1],
+                use_thin_prism=combination[2],
+                focal_length=focal,
+                principal_point=principal_point,
+            )
+            # test case 1:
+            # 1 transform with points of shape (P, 3) -> (P, 3)
+            # 1 transform with points of shape (1, P, 3) -> (1, P, 3)
+            # 1 transform with points of shape (M, P, 3) -> (M, P, 3)
+            points = p_3d.repeat(1, 1, 1)
+            cameras = FishEyeCameras(
+                focal_length=focal,
+                principal_point=principal_point,
+                use_radial=False,
+                use_tangential=False,
+                use_thin_prism=False,
+            )
+            uv = cameras.transform_points(p_3d)
+            uv_point_batch = cameras.transform_points(points)
+            self.assertClose(uv_point_batch, uv.repeat(1, 1, 1))
+
+        points = p_3d.repeat(3, 1, 1)
+        uv_point_batch = cameras.transform_points(points)
+        self.assertClose(uv_point_batch, uv.repeat(3, 1, 1))
+
+        # test case 2
+        # test with N transforms and points of shape (P, 3) -> (N, P, 3)
+        # test with N transforms and points of shape (1, P, 3) -> (N, P, 3)
+        torch.set_printoptions(sci_mode=False)
+        p_3d = torch.tensor(
+            [
+                [2.0, 3.0, 1.0],
+                [3.0, 2.0, 1.0],
+            ]
+        )
+        expected_res = torch.tensor(
+            [
+                [
+                    [
+                        [800.000000, 960.000000, 1.000000],
+                        [1040.000000, 720.000000, 1.000000],
+                    ],
+                    [
+                        [1929.862549, 2533.643311, 1.000000],
+                        [2538.788086, 1924.717773, 1.000000],
+                    ],
+                ],
+                [
+                    [
+                        [800.000000, 960.000000, 1.000000],
+                        [1040.000000, 720.000000, 1.000000],
+                    ],
+                    [
+                        [1927.272095, 2524.220459, 1.000000],
+                        [2536.197754, 1915.295166, 1.000000],
+                    ],
+                ],
+                [
+                    [
+                        [800.000000, 960.000000, 1.000000],
+                        [1040.000000, 720.000000, 1.000000],
+                    ],
+                    [
+                        [1930.050293, 2538.434814, 1.000000],
+                        [2537.956543, 1927.569092, 1.000000],
+                    ],
+                ],
+                [
+                    [
+                        [800.000000, 960.000000, 1.000000],
+                        [1040.000000, 720.000000, 1.000000],
+                    ],
+                    [
+                        [1927.459839, 2529.011963, 1.000000],
+                        [2535.366211, 1918.146484, 1.000000],
+                    ],
+                ],
+                [
+                    [
+                        [493.099304, 499.648926, 1.000000],
+                        [579.648926, 413.099304, 1.000000],
+                    ],
+                    [
+                        [1662.673950, 2132.860352, 1.000000],
+                        [2138.005127, 1657.529053, 1.000000],
+                    ],
+                ],
+                [
+                    [
+                        [493.099304, 499.648926, 1.000000],
+                        [579.648926, 413.099304, 1.000000],
+                    ],
+                    [
+                        [1660.083496, 2123.437744, 1.000000],
+                        [2135.414795, 1648.106445, 1.000000],
+                    ],
+                ],
+                [
+                    [
+                        [493.099304, 499.648926, 1.000000],
+                        [579.648926, 413.099304, 1.000000],
+                    ],
+                    [
+                        [1662.861816, 2137.651855, 1.000000],
+                        [2137.173828, 1660.380371, 1.000000],
+                    ],
+                ],
+                [
+                    [
+                        [493.099304, 499.648926, 1.000000],
+                        [579.648926, 413.099304, 1.000000],
+                    ],
+                    [
+                        [1660.271240, 2128.229248, 1.000000],
+                        [2134.583496, 1650.957764, 1.000000],
+                    ],
+                ],
+            ]
+        )
+        combinations = product([0, 1], repeat=3)
+        for i, combination in enumerate(combinations):
+            cameras = self.setUpBatchCameras(combination)
+            uv_point_batch = cameras.transform_points(p_3d)
+            self.assertClose(uv_point_batch, expected_res[i])
+
+            uv_point_batch = cameras.transform_points(p_3d.repeat(1, 1, 1))
+            self.assertClose(uv_point_batch, expected_res[i].repeat(1, 1, 1))
+
+    def test_cuda(self):
+        """
+        Test cuda device
+        """
+        focal, principal_point, p_3d = self.setUpSimpleCase()
+        cameras_cuda = FishEyeCameras(
+            focal_length=focal,
+            principal_point=principal_point,
+            device="cuda:0",
+        )
+        uv = cameras_cuda.transform_points(p_3d)
+        expected_res = torch.tensor(
+            [[493.0993, 499.6489, 1.0], [579.6489, 413.0993, 1.0]],
+        )
+        self.assertClose(uv, expected_res.to("cuda:0"))
+
+        rep_3d = cameras_cuda.unproject_points(uv)
+        self.assertClose(rep_3d, p_3d.to("cuda:0"))
+
+    def test_unproject_shape_broadcasts(self):
+        # test case 1:
+        # 1 transform with points of (P, 3) -> (P, 3)
+        # 1 transform with points of (M, P, 3) -> (M, P, 3)
+        (
+            focal,
+            principal_point,
+            radial_params,
+            tangential_params,
+            thin_prism_params,
+        ) = self.setUpAriaCase()
+        xy_depth = torch.tensor(
+            [
+                [2134.5814033, 1650.95653328, 1.0],
+                [1074.25442904, 1159.52461285, 1.0],
+            ]
+        )
+        cameras = FishEyeCameras(
+            focal_length=focal,
+            principal_point=principal_point,
+            radial_params=radial_params,
+            tangential_params=tangential_params,
+            thin_prism_params=thin_prism_params,
+        )
+        rep_3d = cameras.unproject_points(xy_depth)
+        expected_res = torch.tensor(
+            [
+                [[2.999442, 1.990583, 1.000000], [0.666728, 0.833142, 1.000000]],
+                [[2.997338, 2.005411, 1.000000], [0.666859, 0.834456, 1.000000]],
+                [[3.002090, 1.985229, 1.000000], [0.666537, 0.832025, 1.000000]],
+                [[2.999999, 2.000000, 1.000000], [0.666667, 0.833333, 1.000000]],
+                [[2.999442, 1.990583, 1.000000], [0.666728, 0.833142, 1.000000]],
+                [[2.997338, 2.005411, 1.000000], [0.666859, 0.834456, 1.000000]],
+                [[3.002090, 1.985229, 1.000000], [0.666537, 0.832025, 1.000000]],
+                [[2.999999, 2.000000, 1.000000], [0.666667, 0.833333, 1.000000]],
+            ]
+        )
+        torch.set_printoptions(precision=6)
+        combinations = product([0, 1], repeat=3)
+        for i, combination in enumerate(combinations):
+            cameras = FishEyeCameras(
+                use_radial=combination[0],
+                use_tangential=combination[1],
+                use_thin_prism=combination[2],
+                focal_length=focal,
+                principal_point=principal_point,
+                radial_params=radial_params,
+                tangential_params=tangential_params,
+                thin_prism_params=thin_prism_params,
+            )
+            rep_3d = cameras.unproject_points(xy_depth)
+            self.assertClose(rep_3d, expected_res[i])
+            rep_3d = cameras.unproject_points(xy_depth.repeat(3, 1, 1))
+            self.assertClose(rep_3d, expected_res[i].repeat(3, 1, 1))
+
+            # test case 2:
+            # N transforms with points of (P, 3) -> (N, P, 3)
+            # N transforms with points of (1, P, 3) -> (N, P, 3)
+            cameras = FishEyeCameras(
+                use_radial=combination[0],
+                use_tangential=combination[1],
+                use_thin_prism=combination[2],
+                focal_length=focal.repeat(2, 1),
+                principal_point=principal_point.repeat(2, 1),
+                radial_params=radial_params.repeat(2, 1),
+                tangential_params=tangential_params.repeat(2, 1),
+                thin_prism_params=thin_prism_params.repeat(2, 1),
+            )
+            rep_3d = cameras.unproject_points(xy_depth)
+            self.assertClose(rep_3d, expected_res[i].repeat(2, 1, 1))
+
+    def test_unhandled_shape(self):
+        """
+        Test error handling when shape of transforms
+        and points are not expected.
+        """
+        cameras = self.setUpBatchCameras(None)
+        points = torch.rand(3, 3, 1)
+        with self.assertRaises(ValueError):
+            cameras.transform_points(points)
+
+    def test_getitem(self):
+        # Check get item returns an instance of the same class
+        # with all the same keys
+        cam = self.setUpBatchCameras(None)
+        c0 = cam[0]
+        self.assertTrue(isinstance(c0, FishEyeCameras))
+        self.assertEqual(cam.__dict__.keys(), c0.__dict__.keys())
diff --git a/pytorch3d/tests/test_cameras_alignment.py b/pytorch3d/tests/test_cameras_alignment.py
new file mode 100644
index 0000000000000000000000000000000000000000..3481064807195dacdbd6b4bd8878c2c869afba0c
--- /dev/null
+++ b/pytorch3d/tests/test_cameras_alignment.py
@@ -0,0 +1,177 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+
+import numpy as np
+import torch
+from pytorch3d.ops import corresponding_cameras_alignment
+from pytorch3d.renderer.cameras import (
+    OpenGLOrthographicCameras,
+    OpenGLPerspectiveCameras,
+    SfMOrthographicCameras,
+    SfMPerspectiveCameras,
+)
+from pytorch3d.transforms.rotation_conversions import random_rotations
+from pytorch3d.transforms.so3 import so3_exp_map, so3_relative_angle
+
+from .common_testing import TestCaseMixin
+from .test_cameras import init_random_cameras
+
+
+class TestCamerasAlignment(TestCaseMixin, unittest.TestCase):
+    def setUp(self) -> None:
+        super().setUp()
+        torch.manual_seed(42)
+        np.random.seed(42)
+
+    def test_corresponding_cameras_alignment(self):
+        """
+        Checks the corresponding_cameras_alignment function.
+        """
+        device = torch.device("cuda:0")
+
+        # try few different random setups
+        for _ in range(3):
+            for estimate_scale in (True, False):
+                # init true alignment transform
+                R_align_gt = random_rotations(1, device=device)[0]
+                T_align_gt = torch.randn(3, dtype=torch.float32, device=device)
+
+                # init true scale
+                if estimate_scale:
+                    s_align_gt = torch.randn(
+                        1, dtype=torch.float32, device=device
+                    ).exp()
+                else:
+                    s_align_gt = torch.tensor(1.0, dtype=torch.float32, device=device)
+
+                for cam_type in (
+                    SfMOrthographicCameras,
+                    OpenGLPerspectiveCameras,
+                    OpenGLOrthographicCameras,
+                    SfMPerspectiveCameras,
+                ):
+                    # try well-determined and underdetermined cases
+                    for batch_size in (10, 4, 3, 2, 1):
+                        # get random cameras
+                        cameras = init_random_cameras(
+                            cam_type, batch_size, random_z=True
+                        ).to(device)
+                        # try all alignment modes
+                        for mode in ("extrinsics", "centers"):
+                            # try different noise levels
+                            for add_noise in (0.0, 0.01, 1e-4):
+                                self._corresponding_cameras_alignment_test_case(
+                                    cameras,
+                                    R_align_gt,
+                                    T_align_gt,
+                                    s_align_gt,
+                                    estimate_scale,
+                                    mode,
+                                    add_noise,
+                                )
+
+    def _corresponding_cameras_alignment_test_case(
+        self,
+        cameras,
+        R_align_gt,
+        T_align_gt,
+        s_align_gt,
+        estimate_scale,
+        mode,
+        add_noise,
+    ):
+        batch_size = cameras.R.shape[0]
+
+        # get target camera centers
+        R_new = torch.bmm(R_align_gt[None].expand_as(cameras.R), cameras.R)
+        T_new = (
+            torch.bmm(T_align_gt[None, None].repeat(batch_size, 1, 1), cameras.R)[:, 0]
+            + cameras.T
+        ) * s_align_gt
+
+        if add_noise != 0.0:
+            R_new = torch.bmm(R_new, so3_exp_map(torch.randn_like(T_new) * add_noise))
+            T_new += torch.randn_like(T_new) * add_noise
+
+        # create new cameras from R_new and T_new
+        cameras_tgt = cameras.clone()
+        cameras_tgt.R = R_new
+        cameras_tgt.T = T_new
+
+        # align cameras and cameras_tgt
+        cameras_aligned = corresponding_cameras_alignment(
+            cameras, cameras_tgt, estimate_scale=estimate_scale, mode=mode
+        )
+
+        if batch_size <= 2 and mode == "centers":
+            # underdetermined case - check only the center alignment error
+            # since the rotation and translation are ambiguous here
+            self.assertClose(
+                cameras_aligned.get_camera_center(),
+                cameras_tgt.get_camera_center(),
+                atol=max(add_noise * 7.0, 1e-4),
+            )
+
+        else:
+
+            def _rmse(a):
+                return (torch.norm(a, dim=1, p=2) ** 2).mean().sqrt()
+
+            if add_noise != 0.0:
+                # in a noisy case check mean rotation/translation error for
+                # extrinsic alignment and root mean center error for center alignment
+                if mode == "centers":
+                    self.assertNormsClose(
+                        cameras_aligned.get_camera_center(),
+                        cameras_tgt.get_camera_center(),
+                        _rmse,
+                        atol=max(add_noise * 10.0, 1e-4),
+                    )
+                elif mode == "extrinsics":
+                    angle_err = so3_relative_angle(
+                        cameras_aligned.R, cameras_tgt.R, cos_angle=True
+                    ).mean()
+                    self.assertClose(
+                        angle_err, torch.ones_like(angle_err), atol=add_noise * 0.03
+                    )
+                    self.assertNormsClose(
+                        cameras_aligned.T, cameras_tgt.T, _rmse, atol=add_noise * 7.0
+                    )
+                else:
+                    raise ValueError(mode)
+
+            else:
+                # compare the rotations and translations of cameras
+                self.assertClose(cameras_aligned.R, cameras_tgt.R, atol=3e-4)
+                self.assertClose(cameras_aligned.T, cameras_tgt.T, atol=3e-4)
+                # compare the centers
+                self.assertClose(
+                    cameras_aligned.get_camera_center(),
+                    cameras_tgt.get_camera_center(),
+                    atol=3e-4,
+                )
+
+    @staticmethod
+    def corresponding_cameras_alignment(
+        batch_size: int, estimate_scale: bool, mode: str, cam_type=SfMPerspectiveCameras
+    ):
+        device = torch.device("cuda:0")
+        cameras_src, cameras_tgt = [
+            init_random_cameras(cam_type, batch_size, random_z=True).to(device)
+            for _ in range(2)
+        ]
+
+        torch.cuda.synchronize()
+
+        def compute_corresponding_cameras_alignment():
+            corresponding_cameras_alignment(
+                cameras_src, cameras_tgt, estimate_scale=estimate_scale, mode=mode
+            )
+            torch.cuda.synchronize()
+
+        return compute_corresponding_cameras_alignment
diff --git a/pytorch3d/tests/test_chamfer.py b/pytorch3d/tests/test_chamfer.py
new file mode 100644
index 0000000000000000000000000000000000000000..142fbcd873c82fdaa8219f4acc5c2f82b4cc042b
--- /dev/null
+++ b/pytorch3d/tests/test_chamfer.py
@@ -0,0 +1,1159 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+from collections import namedtuple
+
+import numpy as np
+import torch
+import torch.nn.functional as F
+from pytorch3d.loss import chamfer_distance
+from pytorch3d.structures.pointclouds import Pointclouds
+
+from .common_testing import get_random_cuda_device, TestCaseMixin
+
+
+# Output of init_pointclouds
+points_normals = namedtuple(
+    "points_normals", "p1_lengths p2_lengths cloud1 cloud2 p1 p2 n1 n2 weights"
+)
+
+
+class TestChamfer(TestCaseMixin, unittest.TestCase):
+    def setUp(self) -> None:
+        super().setUp()
+        torch.manual_seed(1)
+
+    @staticmethod
+    def init_pointclouds(
+        N, P1, P2, device, requires_grad: bool = True, allow_empty: bool = True
+    ):
+        """
+        Create 2 pointclouds object and associated padded points/normals tensors by
+        starting from lists. The clouds and tensors have the same data. The
+        leaf nodes for the clouds are a list of tensors. The padded tensor can be
+        used directly as a leaf node.
+        """
+        low = 0 if allow_empty else 1
+        p1_lengths = torch.randint(low, P1, size=(N,), dtype=torch.int64, device=device)
+        p2_lengths = torch.randint(low, P2, size=(N,), dtype=torch.int64, device=device)
+        P1 = p1_lengths.max().item()
+        P2 = p2_lengths.max().item()
+        weights = torch.rand((N,), dtype=torch.float32, device=device)
+
+        # list of points and normals tensors
+        p1 = torch.rand((N, P1, 3), dtype=torch.float32, device=device)
+        p2 = torch.rand((N, P2, 3), dtype=torch.float32, device=device)
+        n1 = torch.rand((N, P1, 3), dtype=torch.float32, device=device)
+        n2 = torch.rand((N, P2, 3), dtype=torch.float32, device=device)
+        n1 /= n1.norm(dim=-1, p=2, keepdim=True)
+        n2 /= n2.norm(dim=-1, p=2, keepdim=True)
+
+        p1_list = []
+        p2_list = []
+        n1_list = []
+        n2_list = []
+        for i in range(N):
+            l1 = p1_lengths[i]
+            l2 = p2_lengths[i]
+            p1_list.append(p1[i, :l1].clone())
+            p2_list.append(p2[i, :l2].clone())
+            n1_list.append(n1[i, :l1].clone())
+            n2_list.append(n2[i, :l2].clone())
+
+        # Set requires_grad for all tensors in the lists and
+        # padded tensors.
+        if requires_grad:
+            for p in p2_list + p1_list + n1_list + n2_list + [p1, p2, n1, n2]:
+                p.requires_grad = True
+
+        # Create pointclouds objects
+        cloud1 = Pointclouds(points=p1_list, normals=n1_list)
+        cloud2 = Pointclouds(points=p2_list, normals=n2_list)
+
+        # Return pointclouds objects and padded tensors
+        return points_normals(
+            p1_lengths=p1_lengths,
+            p2_lengths=p2_lengths,
+            cloud1=cloud1,
+            cloud2=cloud2,
+            p1=p1,
+            p2=p2,
+            n1=n1,
+            n2=n2,
+            weights=weights,
+        )
+
+    @staticmethod
+    def chamfer_distance_naive_pointclouds(
+        p1, p2, norm: int = 2, device="cpu", abs_cosine=True
+    ):
+        """
+        Naive iterative implementation of nearest neighbor and chamfer distance.
+        x and y are assumed to be pointclouds objects with points and optionally normals.
+        This functions supports heterogeneous pointclouds in a batch.
+        Returns lists of the unreduced loss and loss_normals.
+        """
+        x = p1.points_padded()
+        y = p2.points_padded()
+        N, P1, D = x.shape
+        P2 = y.size(1)
+        x_lengths = p1.num_points_per_cloud()
+        y_lengths = p2.num_points_per_cloud()
+        x_normals = p1.normals_padded()
+        y_normals = p2.normals_padded()
+
+        return_normals = x_normals is not None and y_normals is not None
+
+        # Initialize all distances to + inf
+        dist = torch.ones((N, P1, P2), dtype=torch.float32, device=device) * np.inf
+
+        x_mask = (
+            torch.arange(P1, device=x.device)[None] >= x_lengths[:, None]
+        )  # shape [N, P1]
+        y_mask = (
+            torch.arange(P2, device=y.device)[None] >= y_lengths[:, None]
+        )  # shape [N, P2]
+
+        is_x_heterogeneous = (x_lengths != P1).any()
+        is_y_heterogeneous = (y_lengths != P2).any()
+        # Only calculate the distances for the points which are not masked
+        for n in range(N):
+            for i1 in range(x_lengths[n]):
+                for i2 in range(y_lengths[n]):
+                    if norm == 2:
+                        dist[n, i1, i2] = torch.sum((x[n, i1, :] - y[n, i2, :]) ** 2)
+                    elif norm == 1:
+                        dist[n, i1, i2] = torch.sum(
+                            torch.abs(x[n, i1, :] - y[n, i2, :])
+                        )
+                    else:
+                        raise ValueError("No support for norm %d" % (norm))
+
+        x_dist = torch.min(dist, dim=2)[0]  # (N, P1)
+        y_dist = torch.min(dist, dim=1)[0]  # (N, P2)
+
+        if is_x_heterogeneous:
+            x_dist[x_mask] = 0.0
+        if is_y_heterogeneous:
+            y_dist[y_mask] = 0.0
+
+        loss = [x_dist, y_dist]
+
+        lnorm = [x.new_zeros(()), x.new_zeros(())]
+
+        if return_normals:
+            x_index = dist.argmin(2).view(N, P1, 1).expand(N, P1, 3)
+            y_index = dist.argmin(1).view(N, P2, 1).expand(N, P2, 3)
+            cosine_sim1 = F.cosine_similarity(
+                x_normals, y_normals.gather(1, x_index), dim=2, eps=1e-6
+            )
+            cosine_sim2 = F.cosine_similarity(
+                y_normals, x_normals.gather(1, y_index), dim=2, eps=1e-6
+            )
+
+            if abs_cosine:
+                lnorm1 = 1 - torch.abs(cosine_sim1)
+                lnorm2 = 1 - torch.abs(cosine_sim2)
+            else:
+                lnorm1 = 1 - cosine_sim1
+                lnorm2 = 1 - cosine_sim2
+
+            if is_x_heterogeneous:
+                lnorm1[x_mask] = 0.0
+            if is_y_heterogeneous:
+                lnorm2[y_mask] = 0.0
+
+            lnorm = [lnorm1, lnorm2]  # [(N, P1), (N, P2)]
+
+        return loss, lnorm
+
+    @staticmethod
+    def chamfer_distance_naive(
+        x, y, x_normals=None, y_normals=None, norm: int = 2, abs_cosine=True
+    ):
+        """
+        Naive iterative implementation of nearest neighbor and chamfer distance.
+        Returns lists of the unreduced loss and loss_normals. This naive
+        version only supports homogeneous pointcouds in a batch.
+        """
+        N, P1, D = x.shape
+        P2 = y.size(1)
+        device = x.device
+        return_normals = x_normals is not None and y_normals is not None
+        dist = torch.zeros((N, P1, P2), dtype=torch.float32, device=device)
+
+        for n in range(N):
+            for i1 in range(P1):
+                for i2 in range(P2):
+                    if norm == 2:
+                        dist[n, i1, i2] = torch.sum((x[n, i1, :] - y[n, i2, :]) ** 2)
+                    elif norm == 1:
+                        dist[n, i1, i2] = torch.sum(
+                            torch.abs(x[n, i1, :] - y[n, i2, :])
+                        )
+                    else:
+                        raise ValueError("No support for norm %d" % (norm))
+
+        loss = [
+            torch.min(dist, dim=2)[0],  # (N, P1)
+            torch.min(dist, dim=1)[0],  # (N, P2)
+        ]
+        lnorm = [x.new_zeros(()), x.new_zeros(())]
+
+        if return_normals:
+            x_index = dist.argmin(2).view(N, P1, 1).expand(N, P1, 3)
+            y_index = dist.argmin(1).view(N, P2, 1).expand(N, P2, 3)
+
+            cosine_sim1 = F.cosine_similarity(
+                x_normals, y_normals.gather(1, x_index), dim=2, eps=1e-6
+            )
+            cosine_sim2 = F.cosine_similarity(
+                y_normals, x_normals.gather(1, y_index), dim=2, eps=1e-6
+            )
+
+            if abs_cosine:
+                lnorm1 = 1 - torch.abs(cosine_sim1)
+                lnorm2 = 1 - torch.abs(cosine_sim2)
+            else:
+                lnorm1 = 1 - cosine_sim1
+                lnorm2 = 1 - cosine_sim2
+
+            lnorm = [lnorm1, lnorm2]  # [(N, P1), (N, P2)]
+
+        return loss, lnorm
+
+    def test_chamfer_point_batch_reduction_mean(self):
+        """
+        Compare output of vectorized chamfer loss with naive implementation
+        for the default settings (point_reduction = "mean" and batch_reduction = "mean")
+        and no normals.
+        This tests only uses homogeneous pointclouds.
+        """
+        N, max_P1, max_P2 = 7, 10, 18
+        device = get_random_cuda_device()
+
+        for norm in [1, 2]:
+            points_normals = TestChamfer.init_pointclouds(N, max_P1, max_P2, device)
+            p1 = points_normals.p1
+            p2 = points_normals.p2
+            weights = points_normals.weights
+            p11 = p1.detach().clone()
+            p22 = p2.detach().clone()
+            p11.requires_grad = True
+            p22.requires_grad = True
+            P1 = p1.shape[1]
+            P2 = p2.shape[1]
+
+            pred_loss, pred_loss_norm = TestChamfer.chamfer_distance_naive(
+                p1, p2, norm=norm
+            )
+
+            # point_reduction = "mean".
+            loss, loss_norm = chamfer_distance(p11, p22, weights=weights, norm=norm)
+            pred_loss = pred_loss[0].sum(1) / P1 + pred_loss[1].sum(1) / P2
+            pred_loss *= weights
+            pred_loss = pred_loss.sum() / weights.sum()
+
+            self.assertClose(loss, pred_loss)
+            self.assertTrue(loss_norm is None)
+
+            # Check gradients
+            self._check_gradients(loss, None, pred_loss, None, p1, p11, p2, p22)
+
+    def test_chamfer_vs_naive_pointcloud(self):
+        """
+        Test the default settings for chamfer_distance
+        (point reduction = "mean" and batch_reduction="mean") but with heterogeneous
+        pointclouds as input. Compare with the naive implementation of chamfer
+        which supports heterogeneous pointcloud objects.
+        """
+        N, max_P1, max_P2 = 3, 70, 70
+        device = get_random_cuda_device()
+
+        for norm in [1, 2]:
+            points_normals = TestChamfer.init_pointclouds(N, max_P1, max_P2, device)
+            weights = points_normals.weights
+            x_lengths = points_normals.p1_lengths
+            y_lengths = points_normals.p2_lengths
+
+            # Chamfer with tensors as input for heterogeneous pointclouds.
+            cham_tensor, norm_tensor = chamfer_distance(
+                points_normals.p1,
+                points_normals.p2,
+                x_normals=points_normals.n1,
+                y_normals=points_normals.n2,
+                x_lengths=points_normals.p1_lengths,
+                y_lengths=points_normals.p2_lengths,
+                weights=weights,
+                norm=norm,
+            )
+
+            # Chamfer with pointclouds as input.
+            pred_loss, pred_norm_loss = TestChamfer.chamfer_distance_naive_pointclouds(
+                points_normals.cloud1, points_normals.cloud2, norm=norm, device=device
+            )
+
+            # Mean reduction point loss.
+            pred_loss[0] *= weights.view(N, 1)
+            pred_loss[1] *= weights.view(N, 1)
+            pred_loss_mean = (
+                pred_loss[0].sum(1) / x_lengths + pred_loss[1].sum(1) / y_lengths
+            )
+            pred_loss_mean = pred_loss_mean.sum()
+            pred_loss_mean /= weights.sum()
+
+            # Mean reduction norm loss.
+            pred_norm_loss[0] *= weights.view(N, 1)
+            pred_norm_loss[1] *= weights.view(N, 1)
+            pred_norm_loss_mean = (
+                pred_norm_loss[0].sum(1) / x_lengths
+                + pred_norm_loss[1].sum(1) / y_lengths
+            )
+            pred_norm_loss_mean = pred_norm_loss_mean.sum() / weights.sum()
+
+            self.assertClose(pred_loss_mean, cham_tensor)
+            self.assertClose(pred_norm_loss_mean, norm_tensor)
+
+            self._check_gradients(
+                cham_tensor,
+                norm_tensor,
+                pred_loss_mean,
+                pred_norm_loss_mean,
+                points_normals.cloud1.points_list(),
+                points_normals.p1,
+                points_normals.cloud2.points_list(),
+                points_normals.p2,
+                points_normals.cloud1.normals_list(),
+                points_normals.n1,
+                points_normals.cloud2.normals_list(),
+                points_normals.n2,
+                x_lengths,
+                y_lengths,
+            )
+
+    def test_single_directional_chamfer_vs_naive_pointcloud(self):
+        """
+        Test the single directional settings for chamfer_distance
+        (point reduction = "mean" and batch_reduction="mean") but with heterogeneous
+        pointclouds as input. Compare with the naive implementation of chamfer
+        which supports heterogeneous pointcloud objects.
+        """
+        N, max_P1, max_P2 = 3, 70, 70
+        device = get_random_cuda_device()
+
+        for norm in [1, 2]:
+            for abs_cosine in [True, False]:
+                points_normals = TestChamfer.init_pointclouds(N, max_P1, max_P2, device)
+                weights = points_normals.weights
+                x_lengths = points_normals.p1_lengths
+                y_lengths = points_normals.p2_lengths
+
+                # Chamfer with tensors as input for heterogeneous pointclouds.
+                cham_tensor, norm_tensor = chamfer_distance(
+                    points_normals.p1,
+                    points_normals.p2,
+                    x_normals=points_normals.n1,
+                    y_normals=points_normals.n2,
+                    x_lengths=points_normals.p1_lengths,
+                    y_lengths=points_normals.p2_lengths,
+                    weights=weights,
+                    norm=norm,
+                    single_directional=True,
+                    abs_cosine=abs_cosine,
+                )
+
+                # Chamfer with pointclouds as input.
+                (
+                    pred_loss,
+                    pred_norm_loss,
+                ) = TestChamfer.chamfer_distance_naive_pointclouds(
+                    points_normals.cloud1,
+                    points_normals.cloud2,
+                    norm=norm,
+                    device=device,
+                    abs_cosine=abs_cosine,
+                )
+
+                # Mean reduction point loss.
+                pred_loss[0] *= weights.view(N, 1)
+                pred_loss_mean = pred_loss[0].sum(1) / x_lengths
+                pred_loss_mean = pred_loss_mean.sum()
+                pred_loss_mean /= weights.sum()
+
+                # Mean reduction norm loss.
+                pred_norm_loss[0] *= weights.view(N, 1)
+                pred_norm_loss_mean = pred_norm_loss[0].sum(1) / x_lengths
+                pred_norm_loss_mean = pred_norm_loss_mean.sum() / weights.sum()
+
+                self.assertClose(pred_loss_mean, cham_tensor)
+                self.assertClose(pred_norm_loss_mean, norm_tensor)
+
+                self._check_gradients(
+                    cham_tensor,
+                    norm_tensor,
+                    pred_loss_mean,
+                    pred_norm_loss_mean,
+                    points_normals.cloud1.points_list(),
+                    points_normals.p1,
+                    points_normals.cloud2.points_list(),
+                    points_normals.p2,
+                    points_normals.cloud1.normals_list(),
+                    points_normals.n1,
+                    points_normals.cloud2.normals_list(),
+                    points_normals.n2,
+                    x_lengths,
+                    y_lengths,
+                )
+
+    def test_chamfer_pointcloud_object_withnormals(self):
+        N = 5
+        P1, P2 = 100, 100
+        device = get_random_cuda_device()
+
+        reductions = [
+            ("sum", "sum"),
+            ("mean", "sum"),
+            ("sum", "mean"),
+            ("mean", "mean"),
+            ("sum", None),
+            ("mean", None),
+            (None, None),
+        ]
+        for point_reduction, batch_reduction in reductions:
+            # Reinitialize all the tensors so that the
+            # backward pass can be computed.
+            points_normals = TestChamfer.init_pointclouds(
+                N, P1, P2, device, allow_empty=False
+            )
+
+            # Chamfer with pointclouds as input.
+            cham_cloud, norm_cloud = chamfer_distance(
+                points_normals.cloud1,
+                points_normals.cloud2,
+                point_reduction=point_reduction,
+                batch_reduction=batch_reduction,
+            )
+
+            # Chamfer with tensors as input.
+            cham_tensor, norm_tensor = chamfer_distance(
+                points_normals.p1,
+                points_normals.p2,
+                x_lengths=points_normals.p1_lengths,
+                y_lengths=points_normals.p2_lengths,
+                x_normals=points_normals.n1,
+                y_normals=points_normals.n2,
+                point_reduction=point_reduction,
+                batch_reduction=batch_reduction,
+            )
+
+            if point_reduction is None:
+                cham_tensor_bidirectional = torch.hstack(
+                    [cham_tensor[0], cham_tensor[1]]
+                )
+                norm_tensor_bidirectional = torch.hstack(
+                    [norm_tensor[0], norm_tensor[1]]
+                )
+                cham_cloud_bidirectional = torch.hstack([cham_cloud[0], cham_cloud[1]])
+                norm_cloud_bidirectional = torch.hstack([norm_cloud[0], norm_cloud[1]])
+                self.assertClose(cham_cloud_bidirectional, cham_tensor_bidirectional)
+                self.assertClose(norm_cloud_bidirectional, norm_tensor_bidirectional)
+                self._check_gradients(
+                    cham_tensor_bidirectional,
+                    norm_tensor_bidirectional,
+                    cham_cloud_bidirectional,
+                    norm_cloud_bidirectional,
+                    points_normals.cloud1.points_list(),
+                    points_normals.p1,
+                    points_normals.cloud2.points_list(),
+                    points_normals.p2,
+                    points_normals.cloud1.normals_list(),
+                    points_normals.n1,
+                    points_normals.cloud2.normals_list(),
+                    points_normals.n2,
+                    points_normals.p1_lengths,
+                    points_normals.p2_lengths,
+                )
+            else:
+                self.assertClose(cham_cloud, cham_tensor)
+                self.assertClose(norm_cloud, norm_tensor)
+                self._check_gradients(
+                    cham_tensor,
+                    norm_tensor,
+                    cham_cloud,
+                    norm_cloud,
+                    points_normals.cloud1.points_list(),
+                    points_normals.p1,
+                    points_normals.cloud2.points_list(),
+                    points_normals.p2,
+                    points_normals.cloud1.normals_list(),
+                    points_normals.n1,
+                    points_normals.cloud2.normals_list(),
+                    points_normals.n2,
+                    points_normals.p1_lengths,
+                    points_normals.p2_lengths,
+                )
+
+    def test_chamfer_pointcloud_object_nonormals(self):
+        N = 5
+        P1, P2 = 100, 100
+        device = get_random_cuda_device()
+
+        reductions = [
+            ("sum", "sum"),
+            ("mean", "sum"),
+            ("sum", "mean"),
+            ("mean", "mean"),
+            ("sum", None),
+            ("mean", None),
+            (None, None),
+        ]
+        for point_reduction, batch_reduction in reductions:
+            # Reinitialize all the tensors so that the
+            # backward pass can be computed.
+            points_normals = TestChamfer.init_pointclouds(
+                N, P1, P2, device, allow_empty=False
+            )
+
+            # Chamfer with pointclouds as input.
+            cham_cloud, _ = chamfer_distance(
+                points_normals.cloud1,
+                points_normals.cloud2,
+                point_reduction=point_reduction,
+                batch_reduction=batch_reduction,
+            )
+
+            # Chamfer with tensors as input.
+            cham_tensor, _ = chamfer_distance(
+                points_normals.p1,
+                points_normals.p2,
+                x_lengths=points_normals.p1_lengths,
+                y_lengths=points_normals.p2_lengths,
+                point_reduction=point_reduction,
+                batch_reduction=batch_reduction,
+            )
+
+            if point_reduction is None:
+                cham_tensor_bidirectional = torch.hstack(
+                    [cham_tensor[0], cham_tensor[1]]
+                )
+                cham_cloud_bidirectional = torch.hstack([cham_cloud[0], cham_cloud[1]])
+                self.assertClose(cham_cloud_bidirectional, cham_tensor_bidirectional)
+                self._check_gradients(
+                    cham_tensor_bidirectional,
+                    None,
+                    cham_cloud_bidirectional,
+                    None,
+                    points_normals.cloud1.points_list(),
+                    points_normals.p1,
+                    points_normals.cloud2.points_list(),
+                    points_normals.p2,
+                    lengths1=points_normals.p1_lengths,
+                    lengths2=points_normals.p2_lengths,
+                )
+            else:
+                self.assertClose(cham_cloud, cham_tensor)
+                self._check_gradients(
+                    cham_tensor,
+                    None,
+                    cham_cloud,
+                    None,
+                    points_normals.cloud1.points_list(),
+                    points_normals.p1,
+                    points_normals.cloud2.points_list(),
+                    points_normals.p2,
+                    lengths1=points_normals.p1_lengths,
+                    lengths2=points_normals.p2_lengths,
+                )
+
+    def test_chamfer_point_reduction_mean(self):
+        """
+        Compare output of vectorized chamfer loss with naive implementation
+        for point_reduction = "mean" and batch_reduction = None.
+        """
+        N, max_P1, max_P2 = 7, 10, 18
+        device = get_random_cuda_device()
+        points_normals = TestChamfer.init_pointclouds(N, max_P1, max_P2, device)
+        p1 = points_normals.p1
+        p2 = points_normals.p2
+        p1_normals = points_normals.n1
+        p2_normals = points_normals.n2
+        weights = points_normals.weights
+        p11 = p1.detach().clone()
+        p22 = p2.detach().clone()
+        p11.requires_grad = True
+        p22.requires_grad = True
+        P1 = p1.shape[1]
+        P2 = p2.shape[1]
+
+        pred_loss, pred_loss_norm = TestChamfer.chamfer_distance_naive(
+            p1, p2, x_normals=p1_normals, y_normals=p2_normals
+        )
+
+        # point_reduction = "mean".
+        loss, loss_norm = chamfer_distance(
+            p11,
+            p22,
+            x_normals=p1_normals,
+            y_normals=p2_normals,
+            weights=weights,
+            batch_reduction=None,
+            point_reduction="mean",
+        )
+        pred_loss_mean = pred_loss[0].sum(1) / P1 + pred_loss[1].sum(1) / P2
+        pred_loss_mean *= weights
+        self.assertClose(loss, pred_loss_mean)
+
+        pred_loss_norm_mean = (
+            pred_loss_norm[0].sum(1) / P1 + pred_loss_norm[1].sum(1) / P2
+        )
+        pred_loss_norm_mean *= weights
+        self.assertClose(loss_norm, pred_loss_norm_mean)
+
+        # Check gradients
+        self._check_gradients(
+            loss, loss_norm, pred_loss_mean, pred_loss_norm_mean, p1, p11, p2, p22
+        )
+
+    def test_single_direction_chamfer_point_reduction_mean(self):
+        """
+        Compare output of vectorized chamfer loss with naive implementation
+        for point_reduction = "mean" and batch_reduction = None.
+        """
+        N, max_P1, max_P2 = 7, 10, 18
+        device = get_random_cuda_device()
+        points_normals = TestChamfer.init_pointclouds(N, max_P1, max_P2, device)
+        p1 = points_normals.p1
+        p2 = points_normals.p2
+        p1_normals = points_normals.n1
+        p2_normals = points_normals.n2
+        weights = points_normals.weights
+        p11 = p1.detach().clone()
+        p22 = p2.detach().clone()
+        p11.requires_grad = True
+        p22.requires_grad = True
+        P1 = p1.shape[1]
+
+        pred_loss, pred_loss_norm = TestChamfer.chamfer_distance_naive(
+            p1, p2, x_normals=p1_normals, y_normals=p2_normals
+        )
+
+        # point_reduction = "mean".
+        loss, loss_norm = chamfer_distance(
+            p11,
+            p22,
+            x_normals=p1_normals,
+            y_normals=p2_normals,
+            weights=weights,
+            batch_reduction=None,
+            point_reduction="mean",
+            single_directional=True,
+        )
+        pred_loss_mean = pred_loss[0].sum(1) / P1
+        pred_loss_mean *= weights
+        self.assertClose(loss, pred_loss_mean)
+
+        pred_loss_norm_mean = pred_loss_norm[0].sum(1) / P1
+        pred_loss_norm_mean *= weights
+        self.assertClose(loss_norm, pred_loss_norm_mean)
+
+        # Check gradients
+        self._check_gradients(
+            loss, loss_norm, pred_loss_mean, pred_loss_norm_mean, p1, p11, p2, p22
+        )
+
+    def test_chamfer_point_reduction_sum(self):
+        """
+        Compare output of vectorized chamfer loss with naive implementation
+        for point_reduction = "sum" and batch_reduction = None.
+        """
+        N, P1, P2 = 7, 10, 18
+        device = get_random_cuda_device()
+        points_normals = TestChamfer.init_pointclouds(N, P1, P2, device)
+        p1 = points_normals.p1
+        p2 = points_normals.p2
+        p1_normals = points_normals.n1
+        p2_normals = points_normals.n2
+        weights = points_normals.weights
+        p11 = p1.detach().clone()
+        p22 = p2.detach().clone()
+        p11.requires_grad = True
+        p22.requires_grad = True
+
+        pred_loss, pred_loss_norm = TestChamfer.chamfer_distance_naive(
+            p1, p2, x_normals=p1_normals, y_normals=p2_normals
+        )
+
+        loss, loss_norm = chamfer_distance(
+            p11,
+            p22,
+            x_normals=p1_normals,
+            y_normals=p2_normals,
+            weights=weights,
+            batch_reduction=None,
+            point_reduction="sum",
+        )
+        pred_loss_sum = pred_loss[0].sum(1) + pred_loss[1].sum(1)
+        pred_loss_sum *= weights
+        self.assertClose(loss, pred_loss_sum)
+
+        pred_loss_norm_sum = pred_loss_norm[0].sum(1) + pred_loss_norm[1].sum(1)
+        pred_loss_norm_sum *= weights
+        self.assertClose(loss_norm, pred_loss_norm_sum)
+
+        # Check gradients
+        self._check_gradients(
+            loss, loss_norm, pred_loss_sum, pred_loss_norm_sum, p1, p11, p2, p22
+        )
+
+    def test_single_directional_chamfer_point_reduction_sum(self):
+        """
+        Compare output of vectorized single directional chamfer loss with naive implementation
+        for point_reduction = "sum" and batch_reduction = None.
+        """
+        N, P1, P2 = 7, 10, 18
+        device = get_random_cuda_device()
+        points_normals = TestChamfer.init_pointclouds(N, P1, P2, device)
+        p1 = points_normals.p1
+        p2 = points_normals.p2
+        p1_normals = points_normals.n1
+        p2_normals = points_normals.n2
+        weights = points_normals.weights
+        p11 = p1.detach().clone()
+        p22 = p2.detach().clone()
+        p11.requires_grad = True
+        p22.requires_grad = True
+
+        pred_loss, pred_loss_norm = TestChamfer.chamfer_distance_naive(
+            p1, p2, x_normals=p1_normals, y_normals=p2_normals
+        )
+
+        loss, loss_norm = chamfer_distance(
+            p11,
+            p22,
+            x_normals=p1_normals,
+            y_normals=p2_normals,
+            weights=weights,
+            batch_reduction=None,
+            point_reduction="sum",
+            single_directional=True,
+        )
+        pred_loss_sum = pred_loss[0].sum(1)
+        pred_loss_sum *= weights
+        self.assertClose(loss, pred_loss_sum)
+
+        pred_loss_norm_sum = pred_loss_norm[0].sum(1)
+        pred_loss_norm_sum *= weights
+        self.assertClose(loss_norm, pred_loss_norm_sum)
+
+        # Check gradients
+        self._check_gradients(
+            loss, loss_norm, pred_loss_sum, pred_loss_norm_sum, p1, p11, p2, p22
+        )
+
+    def test_chamfer_point_reduction_none(self):
+        """
+        Compare output of vectorized chamfer loss with naive implementation
+        for point_reduction = None and batch_reduction = None.
+        """
+        N, max_P1, max_P2 = 7, 10, 18
+        device = get_random_cuda_device()
+        points_normals = TestChamfer.init_pointclouds(N, max_P1, max_P2, device)
+        p1 = points_normals.p1
+        p2 = points_normals.p2
+        p1_normals = points_normals.n1
+        p2_normals = points_normals.n2
+        p11 = p1.detach().clone()
+        p22 = p2.detach().clone()
+        p11.requires_grad = True
+        p22.requires_grad = True
+
+        pred_loss, pred_loss_norm = TestChamfer.chamfer_distance_naive(
+            p1, p2, x_normals=p1_normals, y_normals=p2_normals
+        )
+
+        # point_reduction = None
+        loss, loss_norm = chamfer_distance(
+            p11,
+            p22,
+            x_normals=p1_normals,
+            y_normals=p2_normals,
+            batch_reduction=None,
+            point_reduction=None,
+        )
+
+        loss_bidirectional = torch.hstack([loss[0], loss[1]])
+        pred_loss_bidirectional = torch.hstack([pred_loss[0], pred_loss[1]])
+        loss_norm_bidirectional = torch.hstack([loss_norm[0], loss_norm[1]])
+        pred_loss_norm_bidirectional = torch.hstack(
+            [pred_loss_norm[0], pred_loss_norm[1]]
+        )
+
+        self.assertClose(loss_bidirectional, pred_loss_bidirectional)
+        self.assertClose(loss_norm_bidirectional, pred_loss_norm_bidirectional)
+
+        # Check gradients
+        self._check_gradients(
+            loss_bidirectional,
+            loss_norm_bidirectional,
+            pred_loss_bidirectional,
+            pred_loss_norm_bidirectional,
+            p1,
+            p11,
+            p2,
+            p22,
+        )
+
+    def test_single_direction_chamfer_point_reduction_none(self):
+        """
+        Compare output of vectorized chamfer loss with naive implementation
+        for point_reduction = None and batch_reduction = None.
+        """
+        N, max_P1, max_P2 = 7, 10, 18
+        device = get_random_cuda_device()
+        points_normals = TestChamfer.init_pointclouds(N, max_P1, max_P2, device)
+        p1 = points_normals.p1
+        p2 = points_normals.p2
+        p1_normals = points_normals.n1
+        p2_normals = points_normals.n2
+        p11 = p1.detach().clone()
+        p22 = p2.detach().clone()
+        p11.requires_grad = True
+        p22.requires_grad = True
+
+        pred_loss, pred_loss_norm = TestChamfer.chamfer_distance_naive(
+            p1, p2, x_normals=p1_normals, y_normals=p2_normals
+        )
+
+        # point_reduction = None
+        loss, loss_norm = chamfer_distance(
+            p11,
+            p22,
+            x_normals=p1_normals,
+            y_normals=p2_normals,
+            batch_reduction=None,
+            point_reduction=None,
+            single_directional=True,
+        )
+
+        self.assertClose(loss, pred_loss[0])
+        self.assertClose(loss_norm, pred_loss_norm[0])
+
+        # Check gradients
+        self._check_gradients(
+            loss, loss_norm, pred_loss[0], pred_loss_norm[0], p1, p11, p2, p22
+        )
+
+    def _check_gradients(
+        self,
+        loss,
+        loss_norm,
+        pred_loss,
+        pred_loss_norm,
+        x1,
+        x2,
+        y1,
+        y2,
+        xn1=None,  # normals
+        xn2=None,  # normals
+        yn1=None,  # normals
+        yn2=None,  # normals
+        lengths1=None,
+        lengths2=None,
+    ):
+        """
+        x1 and x2 can have different types based on the leaf node used in the calculation:
+        e.g. x1 may be a list of tensors whereas x2 is a padded tensor.
+        This also applies for the pairs: (y1, y2), (xn1, xn2), (yn1, yn2).
+        """
+        grad_loss = torch.rand(loss.shape, device=loss.device, dtype=loss.dtype)
+
+        # Loss for normals is optional. Iniitalize to 0.
+        norm_loss_term = pred_norm_loss_term = 0.0
+        if loss_norm is not None and pred_loss_norm is not None:
+            grad_normals = torch.rand(
+                loss_norm.shape, device=loss.device, dtype=loss.dtype
+            )
+            norm_loss_term = loss_norm * grad_normals
+            pred_norm_loss_term = pred_loss_norm * grad_normals
+
+        l1 = (loss * grad_loss) + norm_loss_term
+        l1.sum().backward()
+        l2 = (pred_loss * grad_loss) + pred_norm_loss_term
+        l2.sum().backward()
+
+        self._check_grad_by_type(x1, x2, lengths1)
+        self._check_grad_by_type(y1, y2, lengths2)
+
+        # If leaf nodes for normals are passed in, check their gradients.
+        if all(n is not None for n in [xn1, xn2, yn1, yn2]):
+            self._check_grad_by_type(xn1, xn2, lengths1)
+            self._check_grad_by_type(yn1, yn2, lengths2)
+
+    def _check_grad_by_type(self, x1, x2, lengths=None):
+        """
+        x1 and x2 can be of different types e.g. list or tensor - compare appropriately
+        based on the types.
+        """
+        error_msg = "All values for gradient checks must be tensors or lists of tensors"
+
+        if all(isinstance(p, list) for p in [x1, x2]):
+            # Lists of tensors
+            for i in range(len(x1)):
+                self.assertClose(x1[i].grad, x2[i].grad)
+        elif isinstance(x1, list) and torch.is_tensor(x2):
+            self.assertIsNotNone(lengths)  # lengths is required
+
+            # List of tensors vs padded tensor
+            for i in range(len(x1)):
+                self.assertClose(x1[i].grad, x2.grad[i, : lengths[i]], atol=1e-7)
+                self.assertTrue(x2.grad[i, lengths[i] :].sum().item() == 0.0)
+        elif all(torch.is_tensor(p) for p in [x1, x2]):
+            # Two tensors
+            self.assertClose(x1.grad, x2.grad)
+        else:
+            raise ValueError(error_msg)
+
+    def test_chamfer_joint_reduction(self):
+        """
+        Compare output of vectorized chamfer loss with naive implementation
+        when batch_reduction in ["mean", "sum"] and
+        point_reduction in ["mean", "sum"].
+        """
+        N, max_P1, max_P2 = 7, 10, 18
+        device = get_random_cuda_device()
+
+        points_normals = TestChamfer.init_pointclouds(N, max_P1, max_P2, device)
+        p1 = points_normals.p1
+        p2 = points_normals.p2
+        p1_normals = points_normals.n1
+        p2_normals = points_normals.n2
+        weights = points_normals.weights
+
+        P1 = p1.shape[1]
+        P2 = p2.shape[1]
+
+        pred_loss, pred_loss_norm = TestChamfer.chamfer_distance_naive(
+            p1, p2, x_normals=p1_normals, y_normals=p2_normals
+        )
+
+        # batch_reduction = "sum", point_reduction = "sum".
+        loss, loss_norm = chamfer_distance(
+            p1,
+            p2,
+            x_normals=p1_normals,
+            y_normals=p2_normals,
+            weights=weights,
+            batch_reduction="sum",
+            point_reduction="sum",
+        )
+        pred_loss[0] *= weights.view(N, 1)
+        pred_loss[1] *= weights.view(N, 1)
+        pred_loss_sum = pred_loss[0].sum(1) + pred_loss[1].sum(1)  # point sum
+        pred_loss_sum = pred_loss_sum.sum()  # batch sum
+        self.assertClose(loss, pred_loss_sum)
+
+        pred_loss_norm[0] *= weights.view(N, 1)
+        pred_loss_norm[1] *= weights.view(N, 1)
+        pred_loss_norm_sum = pred_loss_norm[0].sum(1) + pred_loss_norm[1].sum(
+            1
+        )  # point sum.
+        pred_loss_norm_sum = pred_loss_norm_sum.sum()  # batch sum
+        self.assertClose(loss_norm, pred_loss_norm_sum)
+
+        # batch_reduction = "mean", point_reduction = "sum".
+        loss, loss_norm = chamfer_distance(
+            p1,
+            p2,
+            x_normals=p1_normals,
+            y_normals=p2_normals,
+            weights=weights,
+            batch_reduction="mean",
+            point_reduction="sum",
+        )
+        pred_loss_sum /= weights.sum()
+        self.assertClose(loss, pred_loss_sum)
+
+        pred_loss_norm_sum /= weights.sum()
+        self.assertClose(loss_norm, pred_loss_norm_sum)
+
+        # batch_reduction = "sum", point_reduction = "mean".
+        loss, loss_norm = chamfer_distance(
+            p1,
+            p2,
+            x_normals=p1_normals,
+            y_normals=p2_normals,
+            weights=weights,
+            batch_reduction="sum",
+            point_reduction="mean",
+        )
+        pred_loss_mean = pred_loss[0].sum(1) / P1 + pred_loss[1].sum(1) / P2
+        pred_loss_mean = pred_loss_mean.sum()
+        self.assertClose(loss, pred_loss_mean)
+
+        pred_loss_norm_mean = (
+            pred_loss_norm[0].sum(1) / P1 + pred_loss_norm[1].sum(1) / P2
+        )
+        pred_loss_norm_mean = pred_loss_norm_mean.sum()
+        self.assertClose(loss_norm, pred_loss_norm_mean)
+
+        # batch_reduction = "mean", point_reduction = "mean". This is the default.
+        loss, loss_norm = chamfer_distance(
+            p1,
+            p2,
+            x_normals=p1_normals,
+            y_normals=p2_normals,
+            weights=weights,
+            batch_reduction="mean",
+            point_reduction="mean",
+        )
+        pred_loss_mean /= weights.sum()
+        self.assertClose(loss, pred_loss_mean)
+
+        pred_loss_norm_mean /= weights.sum()
+        self.assertClose(loss_norm, pred_loss_norm_mean)
+
+        # Error when batch_reduction is not in ["mean", "sum"] or None.
+        with self.assertRaisesRegex(ValueError, "batch_reduction must be one of"):
+            chamfer_distance(p1, p2, weights=weights, batch_reduction="max")
+
+        # Error when point_reduction is not in ["mean", "sum"] or None.
+        with self.assertRaisesRegex(ValueError, "point_reduction must be one of"):
+            chamfer_distance(p1, p2, weights=weights, point_reduction="max")
+
+    def test_incorrect_weights(self):
+        N, P1, P2 = 16, 64, 128
+        device = get_random_cuda_device()
+        p1 = torch.rand(
+            (N, P1, 3), dtype=torch.float32, device=device, requires_grad=True
+        )
+        p2 = torch.rand(
+            (N, P2, 3), dtype=torch.float32, device=device, requires_grad=True
+        )
+
+        weights = torch.zeros((N,), dtype=torch.float32, device=device)
+        loss, loss_norm = chamfer_distance(
+            p1, p2, weights=weights, batch_reduction="mean"
+        )
+        self.assertClose(loss.cpu(), torch.zeros(()))
+        self.assertTrue(loss.requires_grad)
+        self.assertClose(loss_norm.cpu(), torch.zeros(()))
+        self.assertTrue(loss_norm.requires_grad)
+
+        loss, loss_norm = chamfer_distance(
+            p1, p2, weights=weights, batch_reduction=None
+        )
+        self.assertClose(loss.cpu(), torch.zeros((N, N)))
+        self.assertTrue(loss.requires_grad)
+        self.assertClose(loss_norm.cpu(), torch.zeros((N, N)))
+        self.assertTrue(loss_norm.requires_grad)
+
+        weights = torch.ones((N,), dtype=torch.float32, device=device) * -1
+        with self.assertRaises(ValueError):
+            loss, loss_norm = chamfer_distance(p1, p2, weights=weights)
+
+        weights = torch.zeros((N - 1,), dtype=torch.float32, device=device)
+        with self.assertRaises(ValueError):
+            loss, loss_norm = chamfer_distance(p1, p2, weights=weights)
+
+    def test_incorrect_inputs(self):
+        N, P1, P2 = 7, 10, 18
+        device = get_random_cuda_device()
+        points_normals = TestChamfer.init_pointclouds(N, P1, P2, device)
+        p1 = points_normals.p1
+        p2 = points_normals.p2
+        p1_normals = points_normals.n1
+
+        # Normals of wrong shape
+        with self.assertRaisesRegex(ValueError, "Expected normals to be of shape"):
+            chamfer_distance(p1, p2, x_normals=p1_normals[None])
+
+        # Points of wrong shape
+        with self.assertRaisesRegex(ValueError, "Expected points to be of shape"):
+            chamfer_distance(p1[None], p2)
+
+        # Lengths of wrong shape
+        with self.assertRaisesRegex(ValueError, "Expected lengths to be of shape"):
+            chamfer_distance(p1, p2, x_lengths=torch.tensor([1, 2, 3], device=device))
+
+        # Points are not a tensor or Pointclouds
+        with self.assertRaisesRegex(ValueError, "Pointclouds objects or torch.Tensor"):
+            chamfer_distance(x=[1, 1, 1], y=[1, 1, 1])
+
+    def test_invalid_norm(self):
+        N, P1, P2 = 7, 10, 18
+        device = get_random_cuda_device()
+        points_normals = TestChamfer.init_pointclouds(N, P1, P2, device)
+        p1 = points_normals.p1
+        p2 = points_normals.p2
+
+        with self.assertRaisesRegex(ValueError, "Support for 1 or 2 norm."):
+            chamfer_distance(p1, p2, norm=0)
+
+        with self.assertRaisesRegex(ValueError, "Support for 1 or 2 norm."):
+            chamfer_distance(p1, p2, norm=3)
+
+    def test_empty_clouds(self):
+        # Check that point_reduction doesn't divide by zero
+        points1 = Pointclouds(points=[torch.zeros(0, 3), torch.zeros(10, 3)])
+        points2 = Pointclouds(points=torch.ones(2, 40, 3))
+        loss, _ = chamfer_distance(points1, points2, batch_reduction=None)
+        self.assertClose(loss, torch.tensor([0.0, 6.0]))
+
+        # Check that batch_reduction doesn't divide by zero
+        loss2, _ = chamfer_distance(Pointclouds([]), Pointclouds([]))
+        self.assertClose(loss2, torch.tensor(0.0))
+
+    @staticmethod
+    def chamfer_with_init(
+        batch_size: int,
+        P1: int,
+        P2: int,
+        return_normals: bool,
+        homogeneous: bool,
+        device="cpu",
+    ):
+        points_normals = TestChamfer.init_pointclouds(batch_size, P1, P2, device=device)
+        l1 = points_normals.p1_lengths
+        l2 = points_normals.p2_lengths
+        if homogeneous:
+            # Set lengths to None so in Chamfer it assumes
+            # there is no padding.
+            l1 = l2 = None
+
+        torch.cuda.synchronize()
+
+        def loss():
+            loss, loss_normals = chamfer_distance(
+                points_normals.p1,
+                points_normals.p2,
+                x_lengths=l1,
+                y_lengths=l2,
+                x_normals=points_normals.n1,
+                y_normals=points_normals.n2,
+                weights=points_normals.weights,
+            )
+            torch.cuda.synchronize()
+
+        return loss
+
+    @staticmethod
+    def chamfer_naive_with_init(
+        batch_size: int, P1: int, P2: int, return_normals: bool, device="cpu"
+    ):
+        points_normals = TestChamfer.init_pointclouds(batch_size, P1, P2, device=device)
+        torch.cuda.synchronize()
+
+        def loss():
+            loss, loss_normals = TestChamfer.chamfer_distance_naive(
+                points_normals.p1,
+                points_normals.p2,
+                x_normals=points_normals.n1,
+                y_normals=points_normals.n2,
+            )
+            torch.cuda.synchronize()
+
+        return loss
diff --git a/pytorch3d/tests/test_checkerboard.py b/pytorch3d/tests/test_checkerboard.py
new file mode 100644
index 0000000000000000000000000000000000000000..7da0dbacefa96365fe97a5bc1ac28c06b327de61
--- /dev/null
+++ b/pytorch3d/tests/test_checkerboard.py
@@ -0,0 +1,21 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+
+import torch
+from pytorch3d.utils import checkerboard
+
+from .common_testing import TestCaseMixin
+
+
+class TestCheckerboard(TestCaseMixin, unittest.TestCase):
+    def test_simple(self):
+        board = checkerboard(5)
+        verts = board.verts_packed()
+        expect = torch.tensor([5.0, 5.0, 0])
+        self.assertClose(verts.min(dim=0).values, -expect)
+        self.assertClose(verts.max(dim=0).values, expect)
diff --git a/pytorch3d/tests/test_common_linear_with_repeat.py b/pytorch3d/tests/test_common_linear_with_repeat.py
new file mode 100644
index 0000000000000000000000000000000000000000..2b7cb63f4dc5bc139931bf657cedaa88368d7c79
--- /dev/null
+++ b/pytorch3d/tests/test_common_linear_with_repeat.py
@@ -0,0 +1,33 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+
+import torch
+from pytorch3d.common.linear_with_repeat import LinearWithRepeat
+
+from .common_testing import TestCaseMixin
+
+
+class TestLinearWithRepeat(TestCaseMixin, unittest.TestCase):
+    def setUp(self) -> None:
+        super().setUp()
+        torch.manual_seed(42)
+
+    def test_simple(self):
+        x = torch.rand(4, 6, 7, 3)
+        y = torch.rand(4, 6, 4)
+
+        linear = torch.nn.Linear(7, 8)
+        torch.nn.init.xavier_uniform_(linear.weight.data)
+        linear.bias.data.uniform_()
+        equivalent = torch.cat([x, y.unsqueeze(-2).expand(4, 6, 7, 4)], dim=-1)
+        expected = linear.forward(equivalent)
+
+        linear_with_repeat = LinearWithRepeat(7, 8)
+        linear_with_repeat.load_state_dict(linear.state_dict())
+        actual = linear_with_repeat.forward((x, y))
+        self.assertClose(actual, expected, rtol=1e-4)
diff --git a/pytorch3d/tests/test_common_testing.py b/pytorch3d/tests/test_common_testing.py
new file mode 100644
index 0000000000000000000000000000000000000000..e57f4c5c25031f38f70acfff9c433fca4702063a
--- /dev/null
+++ b/pytorch3d/tests/test_common_testing.py
@@ -0,0 +1,62 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+
+import numpy as np
+import torch
+
+from .common_testing import TestCaseMixin
+
+
+class TestOpsUtils(TestCaseMixin, unittest.TestCase):
+    def setUp(self) -> None:
+        super().setUp()
+        torch.manual_seed(42)
+        np.random.seed(42)
+
+    def test_all_close(self):
+        device = torch.device("cuda:0")
+        n_points = 20
+        noise_std = 1e-3
+        msg = "tratata"
+
+        # test absolute tolerance
+        x = torch.rand(n_points, 3, device=device)
+        x_noise = x + noise_std * torch.rand(n_points, 3, device=device)
+        assert torch.allclose(x, x_noise, atol=10 * noise_std)
+        assert not torch.allclose(x, x_noise, atol=0.1 * noise_std)
+        self.assertClose(x, x_noise, atol=10 * noise_std)
+        with self.assertRaises(AssertionError) as context:
+            self.assertClose(x, x_noise, atol=0.1 * noise_std, msg=msg)
+        self.assertTrue(msg in str(context.exception))
+
+        # test numpy
+        def to_np(t):
+            return t.data.cpu().numpy()
+
+        self.assertClose(to_np(x), to_np(x_noise), atol=10 * noise_std)
+        with self.assertRaises(AssertionError) as context:
+            self.assertClose(to_np(x), to_np(x_noise), atol=0.1 * noise_std, msg=msg)
+        self.assertIn(msg, str(context.exception))
+        self.assertIn("Not close", str(context.exception))
+
+        # test relative tolerance
+        assert torch.allclose(x, x_noise, rtol=100 * noise_std)
+        assert not torch.allclose(x, x_noise, rtol=noise_std)
+        self.assertClose(x, x_noise, rtol=100 * noise_std)
+        with self.assertRaises(AssertionError) as context:
+            self.assertClose(x, x_noise, rtol=noise_std, msg=msg)
+        self.assertTrue(msg in str(context.exception))
+
+        # test norm aggregation
+        # if one of the spatial dimensions is small, norm aggregation helps
+        x_noise[:, 0] = x_noise[:, 0] - x[:, 0]
+        x[:, 0] = 0.0
+        assert not torch.allclose(x, x_noise, rtol=100 * noise_std)
+        self.assertNormsClose(
+            x, x_noise, rtol=100 * noise_std, norm_fn=lambda t: t.norm(dim=-1)
+        )
diff --git a/pytorch3d/tests/test_common_workaround.py b/pytorch3d/tests/test_common_workaround.py
new file mode 100644
index 0000000000000000000000000000000000000000..1f8e3d5d97fef870e819ad897c238cc4376233d4
--- /dev/null
+++ b/pytorch3d/tests/test_common_workaround.py
@@ -0,0 +1,57 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import unittest
+
+import numpy as np
+import torch
+from pytorch3d.common.workaround import _safe_det_3x3
+
+from .common_testing import TestCaseMixin
+
+
+class TestSafeDet3x3(TestCaseMixin, unittest.TestCase):
+    def setUp(self) -> None:
+        super().setUp()
+        torch.manual_seed(42)
+        np.random.seed(42)
+
+    def _test_det_3x3(self, batch_size, device):
+        t = torch.rand((batch_size, 3, 3), dtype=torch.float32, device=device)
+        actual_det = _safe_det_3x3(t)
+        expected_det = t.det()
+        self.assertClose(actual_det, expected_det, atol=1e-7)
+
+    def test_empty_batch(self):
+        self._test_det_3x3(0, torch.device("cpu"))
+        self._test_det_3x3(0, torch.device("cuda:0"))
+
+    def test_manual(self):
+        t = torch.Tensor(
+            [
+                [[1, 0, 0], [0, 1, 0], [0, 0, 1]],
+                [[2, -5, 3], [0, 7, -2], [-1, 4, 1]],
+                [[6, 1, 1], [4, -2, 5], [2, 8, 7]],
+            ]
+        ).to(dtype=torch.float32)
+        expected_det = torch.Tensor([1, 41, -306]).to(dtype=torch.float32)
+        self.assertClose(_safe_det_3x3(t), expected_det)
+
+        device_cuda = torch.device("cuda:0")
+        self.assertClose(
+            _safe_det_3x3(t.to(device=device_cuda)), expected_det.to(device=device_cuda)
+        )
+
+    def test_regression(self):
+        tries = 32
+        device_cpu = torch.device("cpu")
+        device_cuda = torch.device("cuda:0")
+        batch_sizes = np.random.randint(low=1, high=128, size=tries)
+
+        for batch_size in batch_sizes:
+            self._test_det_3x3(batch_size, device_cpu)
+            self._test_det_3x3(batch_size, device_cuda)
diff --git a/pytorch3d/tests/test_compositing.py b/pytorch3d/tests/test_compositing.py
new file mode 100644
index 0000000000000000000000000000000000000000..5101ad1eab09234894fde9f0ae339d531d7d5f50
--- /dev/null
+++ b/pytorch3d/tests/test_compositing.py
@@ -0,0 +1,428 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+
+import torch
+from pytorch3d.renderer.compositing import (
+    alpha_composite,
+    norm_weighted_sum,
+    weighted_sum,
+)
+
+from .common_testing import get_random_cuda_device, TestCaseMixin
+
+
+class TestAccumulatePoints(TestCaseMixin, unittest.TestCase):
+
+    # NAIVE PYTHON IMPLEMENTATIONS (USED FOR TESTING)
+    @staticmethod
+    def accumulate_alphacomposite_python(points_idx, alphas, features):
+        """
+        Naive pure PyTorch implementation of alpha_composite.
+        Inputs / Outputs: Same as function
+        """
+
+        B, K, H, W = points_idx.size()
+        C = features.size(0)
+
+        output = torch.zeros(B, C, H, W, dtype=alphas.dtype)
+
+        for b in range(0, B):
+            for c in range(0, C):
+                for i in range(0, W):
+                    for j in range(0, H):
+                        t_alpha = 1
+                        for k in range(0, K):
+                            n_idx = points_idx[b, k, j, i]
+
+                            if n_idx < 0:
+                                continue
+
+                            alpha = alphas[b, k, j, i]
+                            output[b, c, j, i] += features[c, n_idx] * alpha * t_alpha
+                            t_alpha = (1 - alpha) * t_alpha
+
+        return output
+
+    @staticmethod
+    def accumulate_weightedsum_python(points_idx, alphas, features):
+        """
+        Naive pure PyTorch implementation of weighted_sum rasterization.
+        Inputs / Outputs: Same as function
+        """
+        B, K, H, W = points_idx.size()
+        C = features.size(0)
+
+        output = torch.zeros(B, C, H, W, dtype=alphas.dtype)
+
+        for b in range(0, B):
+            for c in range(0, C):
+                for i in range(0, W):
+                    for j in range(0, H):
+
+                        for k in range(0, K):
+                            n_idx = points_idx[b, k, j, i]
+
+                            if n_idx < 0:
+                                continue
+
+                            alpha = alphas[b, k, j, i]
+                            output[b, c, j, i] += features[c, n_idx] * alpha
+
+        return output
+
+    @staticmethod
+    def accumulate_weightedsumnorm_python(points_idx, alphas, features):
+        """
+        Naive pure PyTorch implementation of norm_weighted_sum.
+        Inputs / Outputs: Same as function
+        """
+
+        B, K, H, W = points_idx.size()
+        C = features.size(0)
+
+        output = torch.zeros(B, C, H, W, dtype=alphas.dtype)
+
+        for b in range(0, B):
+            for c in range(0, C):
+                for i in range(0, W):
+                    for j in range(0, H):
+                        t_alpha = 0
+                        for k in range(0, K):
+                            n_idx = points_idx[b, k, j, i]
+
+                            if n_idx < 0:
+                                continue
+
+                            t_alpha += alphas[b, k, j, i]
+
+                        t_alpha = max(t_alpha, 1e-4)
+
+                        for k in range(0, K):
+                            n_idx = points_idx[b, k, j, i]
+
+                            if n_idx < 0:
+                                continue
+
+                            alpha = alphas[b, k, j, i]
+                            output[b, c, j, i] += features[c, n_idx] * alpha / t_alpha
+
+        return output
+
+    def test_python(self):
+        device = torch.device("cpu")
+        self._simple_alphacomposite(self.accumulate_alphacomposite_python, device)
+        self._simple_wsum(self.accumulate_weightedsum_python, device)
+        self._simple_wsumnorm(self.accumulate_weightedsumnorm_python, device)
+
+    def test_cpu(self):
+        device = torch.device("cpu")
+        self._simple_alphacomposite(alpha_composite, device)
+        self._simple_wsum(weighted_sum, device)
+        self._simple_wsumnorm(norm_weighted_sum, device)
+
+    def test_cuda(self):
+        device = get_random_cuda_device()
+        self._simple_alphacomposite(alpha_composite, device)
+        self._simple_wsum(weighted_sum, device)
+        self._simple_wsumnorm(norm_weighted_sum, device)
+
+    def test_python_vs_cpu_vs_cuda(self):
+        self._python_vs_cpu_vs_cuda(
+            self.accumulate_alphacomposite_python, alpha_composite
+        )
+        self._python_vs_cpu_vs_cuda(
+            self.accumulate_weightedsumnorm_python, norm_weighted_sum
+        )
+        self._python_vs_cpu_vs_cuda(self.accumulate_weightedsum_python, weighted_sum)
+
+    def _python_vs_cpu_vs_cuda(self, accumulate_func_python, accumulate_func):
+        torch.manual_seed(231)
+        device = torch.device("cpu")
+
+        W = 8
+        C = 3
+        P = 32
+
+        for d in ["cpu", get_random_cuda_device()]:
+            # TODO(gkioxari) add torch.float64 to types after double precision
+            # support is added to atomicAdd
+            for t in [torch.float32]:
+                device = torch.device(d)
+
+                # Create values
+                alphas = torch.rand(2, 4, W, W, dtype=t).to(device)
+                alphas.requires_grad = True
+                alphas_cpu = alphas.detach().cpu()
+                alphas_cpu.requires_grad = True
+
+                features = torch.randn(C, P, dtype=t).to(device)
+                features.requires_grad = True
+                features_cpu = features.detach().cpu()
+                features_cpu.requires_grad = True
+
+                inds = torch.randint(P + 1, size=(2, 4, W, W)).to(device) - 1
+                inds_cpu = inds.detach().cpu()
+
+                args_cuda = (inds, alphas, features)
+                args_cpu = (inds_cpu, alphas_cpu, features_cpu)
+
+                self._compare_impls(
+                    accumulate_func_python,
+                    accumulate_func,
+                    args_cpu,
+                    args_cuda,
+                    (alphas_cpu, features_cpu),
+                    (alphas, features),
+                    compare_grads=True,
+                )
+
+    def _compare_impls(
+        self, fn1, fn2, args1, args2, grads1, grads2, compare_grads=False
+    ):
+        res1 = fn1(*args1)
+        res2 = fn2(*args2)
+
+        self.assertClose(res1.cpu(), res2.cpu(), atol=1e-6)
+
+        if not compare_grads:
+            return
+
+        # Compare gradients
+        torch.manual_seed(231)
+        grad_res = torch.randn_like(res1)
+        loss1 = (res1 * grad_res).sum()
+        loss1.backward()
+
+        grads1 = [gradsi.grad.data.clone().cpu() for gradsi in grads1]
+        grad_res = grad_res.to(res2)
+
+        loss2 = (res2 * grad_res).sum()
+        loss2.backward()
+        grads2 = [gradsi.grad.data.clone().cpu() for gradsi in grads2]
+
+        for i in range(0, len(grads1)):
+            self.assertClose(grads1[i].cpu(), grads2[i].cpu(), atol=1e-6)
+
+    def _simple_wsum(self, accum_func, device):
+        # Initialise variables
+        features = torch.Tensor([[0.1, 0.4, 0.6, 0.9], [0.1, 0.4, 0.6, 0.9]]).to(device)
+
+        alphas = torch.Tensor(
+            [
+                [
+                    [
+                        [0.5, 0.5, 0.5, 0.5],
+                        [0.5, 1.0, 1.0, 0.5],
+                        [0.5, 1.0, 1.0, 0.5],
+                        [0.5, 0.5, 0.5, 0.5],
+                    ],
+                    [
+                        [0.5, 0.5, 0.5, 0.5],
+                        [0.5, 1.0, 1.0, 0.5],
+                        [0.5, 1.0, 1.0, 0.5],
+                        [0.5, 0.5, 0.5, 0.5],
+                    ],
+                ]
+            ]
+        ).to(device)
+
+        points_idx = (
+            torch.Tensor(
+                [
+                    [
+                        # fmt: off
+                        [
+                            [0,  0,  0,  0],  # noqa: E241, E201
+                            [0, -1, -1, -1],  # noqa: E241, E201
+                            [0,  1,  1,  0],  # noqa: E241, E201
+                            [0,  0,  0,  0],  # noqa: E241, E201
+                        ],
+                        [
+                            [2,  2,  2,  2],  # noqa: E241, E201
+                            [2,  3,  3,  2],  # noqa: E241, E201
+                            [2,  3,  3,  2],  # noqa: E241, E201
+                            [2,  2, -1,  2],  # noqa: E241, E201
+                        ],
+                        # fmt: on
+                    ]
+                ]
+            )
+            .long()
+            .to(device)
+        )
+
+        result = accum_func(points_idx, alphas, features)
+
+        self.assertTrue(result.shape == (1, 2, 4, 4))
+
+        true_result = torch.Tensor(
+            [
+                [
+                    [
+                        [0.35, 0.35, 0.35, 0.35],
+                        [0.35, 0.90, 0.90, 0.30],
+                        [0.35, 1.30, 1.30, 0.35],
+                        [0.35, 0.35, 0.05, 0.35],
+                    ],
+                    [
+                        [0.35, 0.35, 0.35, 0.35],
+                        [0.35, 0.90, 0.90, 0.30],
+                        [0.35, 1.30, 1.30, 0.35],
+                        [0.35, 0.35, 0.05, 0.35],
+                    ],
+                ]
+            ]
+        ).to(device)
+
+        self.assertClose(result.cpu(), true_result.cpu(), rtol=1e-3)
+
+    def _simple_wsumnorm(self, accum_func, device):
+        # Initialise variables
+        features = torch.Tensor([[0.1, 0.4, 0.6, 0.9], [0.1, 0.4, 0.6, 0.9]]).to(device)
+
+        alphas = torch.Tensor(
+            [
+                [
+                    [
+                        [0.5, 0.5, 0.5, 0.5],
+                        [0.5, 1.0, 1.0, 0.5],
+                        [0.5, 1.0, 1.0, 0.5],
+                        [0.5, 0.5, 0.5, 0.5],
+                    ],
+                    [
+                        [0.5, 0.5, 0.5, 0.5],
+                        [0.5, 1.0, 1.0, 0.5],
+                        [0.5, 1.0, 1.0, 0.5],
+                        [0.5, 0.5, 0.5, 0.5],
+                    ],
+                ]
+            ]
+        ).to(device)
+
+        # fmt: off
+        points_idx = (
+            torch.Tensor(
+                [
+                    [
+                        [
+                            [0,  0,  0,  0],  # noqa: E241, E201
+                            [0, -1, -1, -1],  # noqa: E241, E201
+                            [0,  1,  1,  0],  # noqa: E241, E201
+                            [0,  0,  0,  0],  # noqa: E241, E201
+                        ],
+                        [
+                            [2, 2,  2, 2],  # noqa: E241, E201
+                            [2, 3,  3, 2],  # noqa: E241, E201
+                            [2, 3,  3, 2],  # noqa: E241, E201
+                            [2, 2, -1, 2],  # noqa: E241, E201
+                        ],
+                    ]
+                ]
+            )
+            .long()
+            .to(device)
+        )
+        # fmt: on
+
+        result = accum_func(points_idx, alphas, features)
+
+        self.assertTrue(result.shape == (1, 2, 4, 4))
+
+        true_result = torch.Tensor(
+            [
+                [
+                    [
+                        [0.35, 0.35, 0.35, 0.35],
+                        [0.35, 0.90, 0.90, 0.60],
+                        [0.35, 0.65, 0.65, 0.35],
+                        [0.35, 0.35, 0.10, 0.35],
+                    ],
+                    [
+                        [0.35, 0.35, 0.35, 0.35],
+                        [0.35, 0.90, 0.90, 0.60],
+                        [0.35, 0.65, 0.65, 0.35],
+                        [0.35, 0.35, 0.10, 0.35],
+                    ],
+                ]
+            ]
+        ).to(device)
+
+        self.assertClose(result.cpu(), true_result.cpu(), rtol=1e-3)
+
+    def _simple_alphacomposite(self, accum_func, device):
+        # Initialise variables
+        features = torch.Tensor([[0.1, 0.4, 0.6, 0.9], [0.1, 0.4, 0.6, 0.9]]).to(device)
+
+        alphas = torch.Tensor(
+            [
+                [
+                    [
+                        [0.5, 0.5, 0.5, 0.5],
+                        [0.5, 1.0, 1.0, 0.5],
+                        [0.5, 1.0, 1.0, 0.5],
+                        [0.5, 0.5, 0.5, 0.5],
+                    ],
+                    [
+                        [0.5, 0.5, 0.5, 0.5],
+                        [0.5, 1.0, 1.0, 0.5],
+                        [0.5, 1.0, 1.0, 0.5],
+                        [0.5, 0.5, 0.5, 0.5],
+                    ],
+                ]
+            ]
+        ).to(device)
+
+        # fmt: off
+        points_idx = (
+            torch.Tensor(
+                [
+                    [
+                        [
+                            [0,  0,  0,  0],  # noqa: E241, E201
+                            [0, -1, -1, -1],  # noqa: E241, E201
+                            [0,  1,  1,  0],  # noqa: E241, E201
+                            [0,  0,  0,  0],  # noqa: E241, E201
+                        ],
+                        [
+                            [2, 2,  2, 2],  # noqa: E241, E201
+                            [2, 3,  3, 2],  # noqa: E241, E201
+                            [2, 3,  3, 2],  # noqa: E241, E201
+                            [2, 2, -1, 2],  # noqa: E241, E201
+                        ],
+                    ]
+                ]
+            )
+            .long()
+            .to(device)
+        )
+        # fmt: on
+
+        result = accum_func(points_idx, alphas, features)
+
+        self.assertTrue(result.shape == (1, 2, 4, 4))
+
+        true_result = torch.Tensor(
+            [
+                [
+                    [
+                        [0.20, 0.20, 0.20, 0.20],
+                        [0.20, 0.90, 0.90, 0.30],
+                        [0.20, 0.40, 0.40, 0.20],
+                        [0.20, 0.20, 0.05, 0.20],
+                    ],
+                    [
+                        [0.20, 0.20, 0.20, 0.20],
+                        [0.20, 0.90, 0.90, 0.30],
+                        [0.20, 0.40, 0.40, 0.20],
+                        [0.20, 0.20, 0.05, 0.20],
+                    ],
+                ]
+            ]
+        ).to(device)
+
+        self.assertTrue((result == true_result).all().item())
diff --git a/pytorch3d/tests/test_cubify.py b/pytorch3d/tests/test_cubify.py
new file mode 100644
index 0000000000000000000000000000000000000000..1c82919b5cb58a49292a3a006dfd34b6fd5f018a
--- /dev/null
+++ b/pytorch3d/tests/test_cubify.py
@@ -0,0 +1,315 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+
+import torch
+from pytorch3d.ops import cubify
+
+from .common_testing import TestCaseMixin
+
+
+class TestCubify(TestCaseMixin, unittest.TestCase):
+    def test_allempty(self):
+        N, V = 32, 14
+        device = torch.device("cuda:0")
+        voxels = torch.zeros((N, V, V, V), dtype=torch.float32, device=device)
+        meshes = cubify(voxels, 0.5)
+        self.assertTrue(meshes.isempty())
+
+    def test_cubify(self):
+        N, V = 4, 2
+        device = torch.device("cuda:0")
+        voxels = torch.zeros((N, V, V, V), dtype=torch.float32, device=device)
+
+        # 1st example: (top left corner, znear) is on
+        voxels[0, 0, 0, 0] = 1.0
+        # 2nd example: all are on
+        voxels[1] = 1.0
+        # 3rd example: empty
+        # 4th example
+        voxels[3, :, :, 1] = 1.0
+        voxels[3, 1, 1, 0] = 1.0
+
+        # compute cubify
+        meshes = cubify(voxels, 0.5)
+
+        # 1st-check
+        verts, faces = meshes.get_mesh_verts_faces(0)
+        self.assertClose(faces.max().cpu(), torch.tensor(verts.size(0) - 1))
+        self.assertClose(
+            verts,
+            torch.tensor(
+                [
+                    [-1.0, -1.0, -1.0],
+                    [-1.0, -1.0, 1.0],
+                    [1.0, -1.0, -1.0],
+                    [1.0, -1.0, 1.0],
+                    [-1.0, 1.0, -1.0],
+                    [-1.0, 1.0, 1.0],
+                    [1.0, 1.0, -1.0],
+                    [1.0, 1.0, 1.0],
+                ],
+                dtype=torch.float32,
+                device=device,
+            ),
+        )
+        self.assertClose(
+            faces,
+            torch.tensor(
+                [
+                    [0, 1, 4],
+                    [1, 5, 4],
+                    [4, 5, 6],
+                    [5, 7, 6],
+                    [0, 4, 6],
+                    [0, 6, 2],
+                    [0, 3, 1],
+                    [0, 2, 3],
+                    [6, 7, 3],
+                    [6, 3, 2],
+                    [1, 7, 5],
+                    [1, 3, 7],
+                ],
+                dtype=torch.int64,
+                device=device,
+            ),
+        )
+        # 2nd-check
+        verts, faces = meshes.get_mesh_verts_faces(1)
+        self.assertClose(faces.max().cpu(), torch.tensor(verts.size(0) - 1))
+        self.assertClose(
+            verts,
+            torch.tensor(
+                [
+                    [-1.0, -1.0, -1.0],
+                    [-1.0, -1.0, 1.0],
+                    [-1.0, -1.0, 3.0],
+                    [1.0, -1.0, -1.0],
+                    [1.0, -1.0, 1.0],
+                    [1.0, -1.0, 3.0],
+                    [3.0, -1.0, -1.0],
+                    [3.0, -1.0, 1.0],
+                    [3.0, -1.0, 3.0],
+                    [-1.0, 1.0, -1.0],
+                    [-1.0, 1.0, 1.0],
+                    [-1.0, 1.0, 3.0],
+                    [1.0, 1.0, -1.0],
+                    [1.0, 1.0, 3.0],
+                    [3.0, 1.0, -1.0],
+                    [3.0, 1.0, 1.0],
+                    [3.0, 1.0, 3.0],
+                    [-1.0, 3.0, -1.0],
+                    [-1.0, 3.0, 1.0],
+                    [-1.0, 3.0, 3.0],
+                    [1.0, 3.0, -1.0],
+                    [1.0, 3.0, 1.0],
+                    [1.0, 3.0, 3.0],
+                    [3.0, 3.0, -1.0],
+                    [3.0, 3.0, 1.0],
+                    [3.0, 3.0, 3.0],
+                ],
+                dtype=torch.float32,
+                device=device,
+            ),
+        )
+        self.assertClose(
+            faces,
+            torch.tensor(
+                [
+                    [0, 1, 9],
+                    [1, 10, 9],
+                    [0, 9, 12],
+                    [0, 12, 3],
+                    [0, 4, 1],
+                    [0, 3, 4],
+                    [1, 2, 10],
+                    [2, 11, 10],
+                    [1, 5, 2],
+                    [1, 4, 5],
+                    [2, 13, 11],
+                    [2, 5, 13],
+                    [3, 12, 14],
+                    [3, 14, 6],
+                    [3, 7, 4],
+                    [3, 6, 7],
+                    [14, 15, 7],
+                    [14, 7, 6],
+                    [4, 8, 5],
+                    [4, 7, 8],
+                    [15, 16, 8],
+                    [15, 8, 7],
+                    [5, 16, 13],
+                    [5, 8, 16],
+                    [9, 10, 17],
+                    [10, 18, 17],
+                    [17, 18, 20],
+                    [18, 21, 20],
+                    [9, 17, 20],
+                    [9, 20, 12],
+                    [10, 11, 18],
+                    [11, 19, 18],
+                    [18, 19, 21],
+                    [19, 22, 21],
+                    [11, 22, 19],
+                    [11, 13, 22],
+                    [20, 21, 23],
+                    [21, 24, 23],
+                    [12, 20, 23],
+                    [12, 23, 14],
+                    [23, 24, 15],
+                    [23, 15, 14],
+                    [21, 22, 24],
+                    [22, 25, 24],
+                    [24, 25, 16],
+                    [24, 16, 15],
+                    [13, 25, 22],
+                    [13, 16, 25],
+                ],
+                dtype=torch.int64,
+                device=device,
+            ),
+        )
+
+        # 3rd-check
+        verts, faces = meshes.get_mesh_verts_faces(2)
+        self.assertTrue(verts.size(0) == 0)
+        self.assertTrue(faces.size(0) == 0)
+
+        # 4th-check
+        verts, faces = meshes.get_mesh_verts_faces(3)
+        self.assertClose(
+            verts,
+            torch.tensor(
+                [
+                    [1.0, -1.0, -1.0],
+                    [1.0, -1.0, 1.0],
+                    [1.0, -1.0, 3.0],
+                    [3.0, -1.0, -1.0],
+                    [3.0, -1.0, 1.0],
+                    [3.0, -1.0, 3.0],
+                    [-1.0, 1.0, 1.0],
+                    [-1.0, 1.0, 3.0],
+                    [1.0, 1.0, -1.0],
+                    [1.0, 1.0, 1.0],
+                    [1.0, 1.0, 3.0],
+                    [3.0, 1.0, -1.0],
+                    [3.0, 1.0, 1.0],
+                    [3.0, 1.0, 3.0],
+                    [-1.0, 3.0, 1.0],
+                    [-1.0, 3.0, 3.0],
+                    [1.0, 3.0, -1.0],
+                    [1.0, 3.0, 1.0],
+                    [1.0, 3.0, 3.0],
+                    [3.0, 3.0, -1.0],
+                    [3.0, 3.0, 1.0],
+                    [3.0, 3.0, 3.0],
+                ],
+                dtype=torch.float32,
+                device=device,
+            ),
+        )
+        self.assertClose(
+            faces,
+            torch.tensor(
+                [
+                    [0, 1, 8],
+                    [1, 9, 8],
+                    [0, 8, 11],
+                    [0, 11, 3],
+                    [0, 4, 1],
+                    [0, 3, 4],
+                    [11, 12, 4],
+                    [11, 4, 3],
+                    [1, 2, 9],
+                    [2, 10, 9],
+                    [1, 5, 2],
+                    [1, 4, 5],
+                    [12, 13, 5],
+                    [12, 5, 4],
+                    [2, 13, 10],
+                    [2, 5, 13],
+                    [6, 7, 14],
+                    [7, 15, 14],
+                    [14, 15, 17],
+                    [15, 18, 17],
+                    [6, 14, 17],
+                    [6, 17, 9],
+                    [6, 10, 7],
+                    [6, 9, 10],
+                    [7, 18, 15],
+                    [7, 10, 18],
+                    [8, 9, 16],
+                    [9, 17, 16],
+                    [16, 17, 19],
+                    [17, 20, 19],
+                    [8, 16, 19],
+                    [8, 19, 11],
+                    [19, 20, 12],
+                    [19, 12, 11],
+                    [17, 18, 20],
+                    [18, 21, 20],
+                    [20, 21, 13],
+                    [20, 13, 12],
+                    [10, 21, 18],
+                    [10, 13, 21],
+                ],
+                dtype=torch.int64,
+                device=device,
+            ),
+        )
+
+    def test_align(self):
+        N, V = 1, 2
+        device = torch.device("cuda:0")
+        voxels = torch.ones((N, V, V, V), dtype=torch.float32, device=device)
+
+        # topleft align
+        mesh = cubify(voxels, 0.5)
+        verts, faces = mesh.get_mesh_verts_faces(0)
+        self.assertClose(verts.min(), torch.tensor(-1.0, device=device))
+        self.assertClose(verts.max(), torch.tensor(3.0, device=device))
+
+        # corner align
+        mesh = cubify(voxels, 0.5, align="corner")
+        verts, faces = mesh.get_mesh_verts_faces(0)
+        self.assertClose(verts.min(), torch.tensor(-1.0, device=device))
+        self.assertClose(verts.max(), torch.tensor(1.0, device=device))
+
+        # center align
+        mesh = cubify(voxels, 0.5, align="center")
+        verts, faces = mesh.get_mesh_verts_faces(0)
+        self.assertClose(verts.min(), torch.tensor(-2.0, device=device))
+        self.assertClose(verts.max(), torch.tensor(2.0, device=device))
+
+        # invalid align
+        with self.assertRaisesRegex(ValueError, "Align mode must be one of"):
+            cubify(voxels, 0.5, align="")
+
+        # invalid align
+        with self.assertRaisesRegex(ValueError, "Align mode must be one of"):
+            cubify(voxels, 0.5, align="topright")
+
+        # inside occupancy, similar to GH#185 use case
+        N, V = 1, 4
+        voxels = torch.zeros((N, V, V, V), dtype=torch.float32, device=device)
+        voxels[0, : V // 2, : V // 2, : V // 2] = 1.0
+        mesh = cubify(voxels, 0.5, align="corner")
+        verts, faces = mesh.get_mesh_verts_faces(0)
+        self.assertClose(verts.min(), torch.tensor(-1.0, device=device))
+        self.assertClose(verts.max(), torch.tensor(0.0, device=device))
+
+    @staticmethod
+    def cubify_with_init(batch_size: int, V: int):
+        device = torch.device("cuda:0")
+        voxels = torch.rand((batch_size, V, V, V), dtype=torch.float32, device=device)
+        torch.cuda.synchronize()
+
+        def convert():
+            cubify(voxels, 0.5)
+            torch.cuda.synchronize()
+
+        return convert
diff --git a/pytorch3d/tests/test_face_areas_normals.py b/pytorch3d/tests/test_face_areas_normals.py
new file mode 100644
index 0000000000000000000000000000000000000000..1efe263d1b328cf518db0c179100b3cedc0f854c
--- /dev/null
+++ b/pytorch3d/tests/test_face_areas_normals.py
@@ -0,0 +1,144 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import unittest
+
+import torch
+from pytorch3d.ops import mesh_face_areas_normals
+from pytorch3d.structures.meshes import Meshes
+
+from .common_testing import get_random_cuda_device, TestCaseMixin
+
+
+class TestFaceAreasNormals(TestCaseMixin, unittest.TestCase):
+    def setUp(self) -> None:
+        super().setUp()
+        torch.manual_seed(1)
+
+    @staticmethod
+    def init_meshes(
+        num_meshes: int = 10,
+        num_verts: int = 1000,
+        num_faces: int = 3000,
+        device: str = "cpu",
+    ):
+        device = torch.device(device)
+        verts_list = []
+        faces_list = []
+        for _ in range(num_meshes):
+            verts = torch.rand(
+                (num_verts, 3), dtype=torch.float32, device=device, requires_grad=True
+            )
+            faces = torch.randint(
+                num_verts, size=(num_faces, 3), dtype=torch.int64, device=device
+            )
+            verts_list.append(verts)
+            faces_list.append(faces)
+        meshes = Meshes(verts_list, faces_list)
+
+        return meshes
+
+    @staticmethod
+    def face_areas_normals_python(verts, faces):
+        """
+        Pytorch implementation for face areas & normals.
+        """
+        # TODO(gkioxari) Change cast to floats once we add support for doubles.
+        verts = verts.float()
+        vertices_faces = verts[faces]  # (F, 3, 3)
+        # vector pointing from v0 to v1
+        v01 = vertices_faces[:, 1] - vertices_faces[:, 0]
+        # vector pointing from v0 to v2
+        v02 = vertices_faces[:, 2] - vertices_faces[:, 0]
+        normals = torch.cross(v01, v02, dim=1)  # (F, 3)
+        face_areas = normals.norm(dim=-1) / 2
+        face_normals = torch.nn.functional.normalize(normals, p=2, dim=1, eps=1e-6)
+        return face_areas, face_normals
+
+    def _test_face_areas_normals_helper(self, device, dtype=torch.float32):
+        """
+        Check the results from face_areas cuda/cpp and PyTorch implementation are
+        the same.
+        """
+        meshes = self.init_meshes(10, 200, 400, device=device)
+        # make them leaf nodes
+        verts = meshes.verts_packed().detach().clone().to(dtype)
+        verts.requires_grad = True
+        faces = meshes.faces_packed().detach().clone()
+
+        # forward
+        areas, normals = mesh_face_areas_normals(verts, faces)
+        verts_torch = verts.detach().clone().to(dtype)
+        verts_torch.requires_grad = True
+        faces_torch = faces.detach().clone()
+        (areas_torch, normals_torch) = TestFaceAreasNormals.face_areas_normals_python(
+            verts_torch, faces_torch
+        )
+        self.assertClose(areas_torch, areas, atol=1e-7)
+        # normals get normalized by area thus sensitivity increases as areas
+        # in our tests can be arbitrarily small. Thus we compare normals after
+        # multiplying with areas
+        unnormals = normals * areas.view(-1, 1)
+        unnormals_torch = normals_torch * areas_torch.view(-1, 1)
+        self.assertClose(unnormals_torch, unnormals, atol=1e-6)
+
+        # backward
+        grad_areas = torch.rand(areas.shape, device=device, dtype=dtype)
+        grad_normals = torch.rand(normals.shape, device=device, dtype=dtype)
+        areas.backward((grad_areas, grad_normals))
+        grad_verts = verts.grad
+        areas_torch.backward((grad_areas, grad_normals))
+        grad_verts_torch = verts_torch.grad
+        self.assertClose(grad_verts_torch, grad_verts, atol=1e-6)
+
+    def test_face_areas_normals_cpu(self):
+        self._test_face_areas_normals_helper("cpu")
+
+    def test_face_areas_normals_cuda(self):
+        device = get_random_cuda_device()
+        self._test_face_areas_normals_helper(device)
+
+    def test_nonfloats_cpu(self):
+        self._test_face_areas_normals_helper("cpu", dtype=torch.double)
+
+    def test_nonfloats_cuda(self):
+        device = get_random_cuda_device()
+        self._test_face_areas_normals_helper(device, dtype=torch.double)
+
+    @staticmethod
+    def face_areas_normals_with_init(
+        num_meshes: int, num_verts: int, num_faces: int, device: str = "cpu"
+    ):
+        meshes = TestFaceAreasNormals.init_meshes(
+            num_meshes, num_verts, num_faces, device
+        )
+        verts = meshes.verts_packed()
+        faces = meshes.faces_packed()
+        torch.cuda.synchronize()
+
+        def face_areas_normals():
+            mesh_face_areas_normals(verts, faces)
+            torch.cuda.synchronize()
+
+        return face_areas_normals
+
+    @staticmethod
+    def face_areas_normals_with_init_torch(
+        num_meshes: int, num_verts: int, num_faces: int, device: str = "cpu"
+    ):
+        meshes = TestFaceAreasNormals.init_meshes(
+            num_meshes, num_verts, num_faces, device
+        )
+        verts = meshes.verts_packed()
+        faces = meshes.faces_packed()
+        torch.cuda.synchronize()
+
+        def face_areas_normals():
+            TestFaceAreasNormals.face_areas_normals_python(verts, faces)
+            torch.cuda.synchronize()
+
+        return face_areas_normals
diff --git a/pytorch3d/tests/test_graph_conv.py b/pytorch3d/tests/test_graph_conv.py
new file mode 100644
index 0000000000000000000000000000000000000000..94856ebda47a7043a8f5b2a42bf00fc1d53715e2
--- /dev/null
+++ b/pytorch3d/tests/test_graph_conv.py
@@ -0,0 +1,204 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+
+import torch
+import torch.nn as nn
+from pytorch3d import _C
+from pytorch3d.ops.graph_conv import gather_scatter, gather_scatter_python, GraphConv
+from pytorch3d.structures.meshes import Meshes
+from pytorch3d.utils import ico_sphere
+
+from .common_testing import get_random_cuda_device, TestCaseMixin
+
+
+class TestGraphConv(TestCaseMixin, unittest.TestCase):
+    def test_undirected(self):
+        dtype = torch.float32
+        device = get_random_cuda_device()
+        verts = torch.tensor(
+            [[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=dtype, device=device
+        )
+        edges = torch.tensor([[0, 1], [0, 2]], device=device)
+        w0 = torch.tensor([[1, 1, 1]], dtype=dtype, device=device)
+        w1 = torch.tensor([[-1, -1, -1]], dtype=dtype, device=device)
+
+        expected_y = torch.tensor(
+            [
+                [1 + 2 + 3 - 4 - 5 - 6 - 7 - 8 - 9],
+                [4 + 5 + 6 - 1 - 2 - 3],
+                [7 + 8 + 9 - 1 - 2 - 3],
+            ],
+            dtype=dtype,
+            device=device,
+        )
+
+        conv = GraphConv(3, 1, directed=False).to(device)
+        conv.w0.weight.data.copy_(w0)
+        conv.w0.bias.data.zero_()
+        conv.w1.weight.data.copy_(w1)
+        conv.w1.bias.data.zero_()
+
+        y = conv(verts, edges)
+        self.assertClose(y, expected_y)
+
+    def test_no_edges(self):
+        dtype = torch.float32
+        verts = torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=dtype)
+        edges = torch.zeros(0, 2, dtype=torch.int64)
+        w0 = torch.tensor([[1, -1, -2]], dtype=dtype)
+        expected_y = torch.tensor(
+            [[1 - 2 - 2 * 3], [4 - 5 - 2 * 6], [7 - 8 - 2 * 9]], dtype=dtype
+        )
+        conv = GraphConv(3, 1).to(dtype)
+        conv.w0.weight.data.copy_(w0)
+        conv.w0.bias.data.zero_()
+
+        y = conv(verts, edges)
+        self.assertClose(y, expected_y)
+
+    def test_no_verts_and_edges(self):
+        dtype = torch.float32
+        verts = torch.tensor([], dtype=dtype, requires_grad=True)
+        edges = torch.tensor([], dtype=dtype)
+        w0 = torch.tensor([[1, -1, -2]], dtype=dtype)
+
+        conv = GraphConv(3, 1).to(dtype)
+        conv.w0.weight.data.copy_(w0)
+        conv.w0.bias.data.zero_()
+        y = conv(verts, edges)
+        self.assertClose(y, torch.zeros((0, 1)))
+        self.assertTrue(y.requires_grad)
+
+        conv2 = GraphConv(3, 2).to(dtype)
+        conv2.w0.weight.data.copy_(w0.repeat(2, 1))
+        conv2.w0.bias.data.zero_()
+        y = conv2(verts, edges)
+        self.assertClose(y, torch.zeros((0, 2)))
+        self.assertTrue(y.requires_grad)
+
+    def test_directed(self):
+        dtype = torch.float32
+        verts = torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=dtype)
+        edges = torch.tensor([[0, 1], [0, 2]])
+        w0 = torch.tensor([[1, 1, 1]], dtype=dtype)
+        w1 = torch.tensor([[-1, -1, -1]], dtype=dtype)
+
+        expected_y = torch.tensor(
+            [[1 + 2 + 3 - 4 - 5 - 6 - 7 - 8 - 9], [4 + 5 + 6], [7 + 8 + 9]], dtype=dtype
+        )
+
+        conv = GraphConv(3, 1, directed=True).to(dtype)
+        conv.w0.weight.data.copy_(w0)
+        conv.w0.bias.data.zero_()
+        conv.w1.weight.data.copy_(w1)
+        conv.w1.bias.data.zero_()
+
+        y = conv(verts, edges)
+        self.assertClose(y, expected_y)
+
+    def test_backward(self):
+        device = get_random_cuda_device()
+        mesh = ico_sphere()
+        verts = mesh.verts_packed()
+        edges = mesh.edges_packed()
+        verts_cpu = verts.clone()
+        edges_cpu = edges.clone()
+        verts_cuda = verts.clone().to(device)
+        edges_cuda = edges.clone().to(device)
+        verts.requires_grad = True
+        verts_cpu.requires_grad = True
+        verts_cuda.requires_grad = True
+
+        neighbor_sums_cuda = gather_scatter(verts_cuda, edges_cuda, False)
+        neighbor_sums_cpu = gather_scatter(verts_cpu, edges_cpu, False)
+        neighbor_sums = gather_scatter_python(verts, edges, False)
+        randoms = torch.rand_like(neighbor_sums)
+        (neighbor_sums_cuda * randoms.to(device)).sum().backward()
+        (neighbor_sums_cpu * randoms).sum().backward()
+        (neighbor_sums * randoms).sum().backward()
+
+        self.assertClose(verts.grad, verts_cuda.grad.cpu())
+        self.assertClose(verts.grad, verts_cpu.grad)
+
+    def test_repr(self):
+        conv = GraphConv(32, 64, directed=True)
+        self.assertEqual(repr(conv), "GraphConv(32 -> 64, directed=True)")
+
+    def test_cpu_cuda_tensor_error(self):
+        device = get_random_cuda_device()
+        verts = torch.tensor(
+            [[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=torch.float32, device=device
+        )
+        edges = torch.tensor([[0, 1], [0, 2]])
+        conv = GraphConv(3, 1, directed=True).to(torch.float32)
+        with self.assertRaises(Exception) as err:
+            conv(verts, edges)
+        self.assertTrue("tensors must be on the same device." in str(err.exception))
+
+    def test_gather_scatter(self):
+        """
+        Check gather_scatter cuda and python versions give the same results.
+        Check that gather_scatter cuda version throws an error if cpu tensors
+        are given as input.
+        """
+        device = get_random_cuda_device()
+        mesh = ico_sphere()
+        verts = mesh.verts_packed()
+        edges = mesh.edges_packed()
+        w0 = nn.Linear(3, 1)
+        input = w0(verts)
+
+        # undirected
+        output_python = gather_scatter_python(input, edges, False)
+        output_cuda = _C.gather_scatter(
+            input.to(device=device), edges.to(device=device), False, False
+        )
+        self.assertClose(output_cuda.cpu(), output_python)
+
+        output_cpu = _C.gather_scatter(input.cpu(), edges.cpu(), False, False)
+        self.assertClose(output_cpu, output_python)
+
+        # directed
+        output_python = gather_scatter_python(input, edges, True)
+        output_cuda = _C.gather_scatter(
+            input.to(device=device), edges.to(device=device), True, False
+        )
+        self.assertClose(output_cuda.cpu(), output_python)
+        output_cpu = _C.gather_scatter(input.cpu(), edges.cpu(), True, False)
+        self.assertClose(output_cpu, output_python)
+
+    @staticmethod
+    def graph_conv_forward_backward(
+        gconv_dim,
+        num_meshes,
+        num_verts,
+        num_faces,
+        directed: bool,
+        backend: str = "cuda",
+    ):
+        device = torch.device("cuda") if backend == "cuda" else "cpu"
+        verts_list = torch.tensor(num_verts * [[0.11, 0.22, 0.33]], device=device).view(
+            -1, 3
+        )
+        faces_list = torch.tensor(num_faces * [[1, 2, 3]], device=device).view(-1, 3)
+        meshes = Meshes(num_meshes * [verts_list], num_meshes * [faces_list])
+        gconv = GraphConv(gconv_dim, gconv_dim, directed=directed)
+        gconv.to(device)
+        edges = meshes.edges_packed()
+        total_verts = meshes.verts_packed().shape[0]
+
+        # Features.
+        x = torch.randn(total_verts, gconv_dim, device=device, requires_grad=True)
+        torch.cuda.synchronize()
+
+        def run_graph_conv():
+            y1 = gconv(x, edges)
+            y1.sum().backward()
+            torch.cuda.synchronize()
+
+        return run_graph_conv
diff --git a/pytorch3d/tests/test_harmonic_embedding.py b/pytorch3d/tests/test_harmonic_embedding.py
new file mode 100644
index 0000000000000000000000000000000000000000..91e1ba8bbaad747e442ed7b0f38caa367e869710
--- /dev/null
+++ b/pytorch3d/tests/test_harmonic_embedding.py
@@ -0,0 +1,153 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+
+import torch
+from pytorch3d.renderer.implicit import HarmonicEmbedding
+from torch.distributions import MultivariateNormal
+
+from .common_testing import TestCaseMixin
+
+
+class TestHarmonicEmbedding(TestCaseMixin, unittest.TestCase):
+    def setUp(self) -> None:
+        super().setUp()
+        torch.manual_seed(1)
+
+    def test_correct_output_dim(self):
+        embed_fun = HarmonicEmbedding(n_harmonic_functions=2, append_input=False)
+        # input_dims * (2 * n_harmonic_functions + int(append_input))
+        output_dim = 3 * (2 * 2 + int(False))
+        self.assertEqual(
+            output_dim,
+            embed_fun.get_output_dim_static(
+                input_dims=3, n_harmonic_functions=2, append_input=False
+            ),
+        )
+        self.assertEqual(output_dim, embed_fun.get_output_dim())
+
+    def test_correct_frequency_range(self):
+        embed_fun_log = HarmonicEmbedding(n_harmonic_functions=3)
+        embed_fun_lin = HarmonicEmbedding(n_harmonic_functions=3, logspace=False)
+        self.assertClose(embed_fun_log._frequencies, torch.FloatTensor((1.0, 2.0, 4.0)))
+        self.assertClose(embed_fun_lin._frequencies, torch.FloatTensor((1.0, 2.5, 4.0)))
+
+    def test_correct_embed_out(self):
+        n_harmonic_functions = 2
+        x = torch.randn((1, 5))
+        D = 5 * n_harmonic_functions * 2  # sin + cos
+
+        embed_fun = HarmonicEmbedding(
+            n_harmonic_functions=n_harmonic_functions, append_input=False
+        )
+        embed_out = embed_fun(x)
+
+        self.assertEqual(embed_out.shape, (1, D))
+        # Sum the squares of the respective frequencies
+        # cos^2(x) + sin^2(x) = 1
+        sum_squares = embed_out[0, : D // 2] ** 2 + embed_out[0, D // 2 :] ** 2
+        self.assertClose(sum_squares, torch.ones((D // 2)))
+
+        # Test append input
+        embed_fun = HarmonicEmbedding(
+            n_harmonic_functions=n_harmonic_functions, append_input=True
+        )
+        embed_out_appended_input = embed_fun(x)
+        self.assertClose(
+            embed_out_appended_input.shape, torch.tensor((1, D + x.shape[-1]))
+        )
+        # Last plane in output is the input
+        self.assertClose(embed_out_appended_input[..., -x.shape[-1] :], x)
+        self.assertClose(embed_out_appended_input[..., : -x.shape[-1]], embed_out)
+
+    def test_correct_embed_out_with_diag_cov(self):
+        n_harmonic_functions = 2
+        x = torch.randn((1, 3))
+        diag_cov = torch.randn((1, 3))
+        D = 3 * n_harmonic_functions * 2  # sin + cos
+
+        embed_fun = HarmonicEmbedding(
+            n_harmonic_functions=n_harmonic_functions, append_input=False
+        )
+        embed_out = embed_fun(x, diag_cov=diag_cov)
+
+        self.assertEqual(embed_out.shape, (1, D))
+
+        # Compute the scaling factor introduce in MipNerf
+        scale_factor = (
+            -0.5 * diag_cov[..., None] * torch.pow(embed_fun._frequencies[None, :], 2)
+        )
+        scale_factor = torch.exp(scale_factor).reshape(1, -1).tile((1, 2))
+        # If we remove this scaling factor, we should go back to the
+        # classical harmonic embedding:
+        # Sum the squares of the respective frequencies
+        # cos^2(x) + sin^2(x) = 1
+        embed_out_without_cov = embed_out / scale_factor
+        sum_squares = (
+            embed_out_without_cov[0, : D // 2] ** 2
+            + embed_out_without_cov[0, D // 2 :] ** 2
+        )
+        self.assertClose(sum_squares, torch.ones((D // 2)))
+
+        # Test append input
+        embed_fun = HarmonicEmbedding(
+            n_harmonic_functions=n_harmonic_functions, append_input=True
+        )
+        embed_out_appended_input = embed_fun(x, diag_cov=diag_cov)
+        self.assertClose(
+            embed_out_appended_input.shape, torch.tensor((1, D + x.shape[-1]))
+        )
+        # Last plane in output is the input
+        self.assertClose(embed_out_appended_input[..., -x.shape[-1] :], x)
+        self.assertClose(embed_out_appended_input[..., : -x.shape[-1]], embed_out)
+
+    def test_correct_behavior_between_ipe_and_its_estimation_from_harmonic_embedding(
+        self,
+    ):
+        """
+        Check that the HarmonicEmbedding with integrated_position_encoding (IPE) set to
+        True is coherent with the HarmonicEmbedding.
+
+        What is the idea behind this test?
+
+        We wish to produce an IPE that is the expectation
+        of our lifted multivariate gaussian, modulated by the sine and cosine of
+        the coordinates. These expectation has a closed-form
+        (see equations 11, 12, 13, 14 of [1]).
+
+        We sample N elements from the multivariate gaussian defined by its mean and covariance
+        and compute the HarmonicEmbedding. The expected value of those embeddings should be
+        equal to our IPE.
+
+        Inspired from:
+        https://github.com/google/mipnerf/blob/84c969e0a623edd183b75693aed72a7e7c22902d/internal/mip_test.py#L359
+
+        References:
+            [1] `MIP-NeRF <https://arxiv.org/abs/2103.13415>`_.
+        """
+        num_dims = 3
+        n_harmonic_functions = 6
+        mean = torch.randn(num_dims)
+        diag_cov = torch.rand(num_dims)
+
+        he_fun = HarmonicEmbedding(
+            n_harmonic_functions=n_harmonic_functions, logspace=True, append_input=False
+        )
+        ipe_fun = HarmonicEmbedding(
+            n_harmonic_functions=n_harmonic_functions,
+            append_input=False,
+        )
+
+        embedding_ipe = ipe_fun(mean, diag_cov=diag_cov)
+
+        rand_mvn = MultivariateNormal(mean, torch.eye(num_dims) * diag_cov)
+
+        # Providing a large enough number of samples
+        # we should obtain an estimation close to our IPE
+        num_samples = 100000
+        embedding_he = he_fun(rand_mvn.sample_n(num_samples))
+        self.assertClose(embedding_he.mean(0), embedding_ipe, rtol=1e-2, atol=1e-2)
diff --git a/pytorch3d/tests/test_interpolate_face_attributes.py b/pytorch3d/tests/test_interpolate_face_attributes.py
new file mode 100644
index 0000000000000000000000000000000000000000..8db7f2925d7c7b3e353318670bce6b6dc9ecac5e
--- /dev/null
+++ b/pytorch3d/tests/test_interpolate_face_attributes.py
@@ -0,0 +1,194 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+
+import torch
+from pytorch3d.ops.interp_face_attrs import (
+    interpolate_face_attributes,
+    interpolate_face_attributes_python,
+)
+from pytorch3d.renderer.mesh import TexturesVertex
+from pytorch3d.renderer.mesh.rasterizer import Fragments
+from pytorch3d.structures import Meshes
+
+from .common_testing import get_random_cuda_device, TestCaseMixin
+
+
+class TestInterpolateFaceAttributes(TestCaseMixin, unittest.TestCase):
+    def _test_interp_face_attrs(self, interp_fun, device):
+        pix_to_face = [0, 2, -1, 0, 1, -1]
+        barycentric_coords = [
+            [1.0, 0.0, 0.0],
+            [0.0, 1.0, 0.0],
+            [0.0, 0.0, 1.0],
+            [0.5, 0.5, 0.0],
+            [0.8, 0.0, 0.2],
+            [0.25, 0.5, 0.25],
+        ]
+        face_attrs = [
+            [[1, 2], [3, 4], [5, 6]],
+            [[7, 8], [9, 10], [11, 12]],
+            [[13, 14], [15, 16], [17, 18]],
+        ]
+        pix_attrs = [
+            [1, 2],
+            [15, 16],
+            [0, 0],
+            [2, 3],
+            [0.8 * 7 + 0.2 * 11, 0.8 * 8 + 0.2 * 12],
+            [0, 0],
+        ]
+        N, H, W, K, D = 1, 2, 1, 3, 2
+        pix_to_face = torch.tensor(pix_to_face, dtype=torch.int64, device=device)
+        pix_to_face = pix_to_face.view(N, H, W, K)
+        barycentric_coords = torch.tensor(
+            barycentric_coords, dtype=torch.float32, device=device
+        )
+        barycentric_coords = barycentric_coords.view(N, H, W, K, 3)
+        face_attrs = torch.tensor(face_attrs, dtype=torch.float32, device=device)
+        pix_attrs = torch.tensor(pix_attrs, dtype=torch.float32, device=device)
+        pix_attrs = pix_attrs.view(N, H, W, K, D)
+
+        args = (pix_to_face, barycentric_coords, face_attrs)
+        pix_attrs_actual = interp_fun(*args)
+        self.assertClose(pix_attrs_actual, pix_attrs)
+
+    def test_python(self):
+        device = torch.device("cuda:0")
+        self._test_interp_face_attrs(interpolate_face_attributes_python, device)
+
+    def test_cuda(self):
+        device = torch.device("cuda:0")
+        self._test_interp_face_attrs(interpolate_face_attributes, device)
+
+    def test_python_vs_cuda(self):
+        N, H, W, K = 2, 32, 32, 5
+        F = 1000
+        D = 3
+        device = get_random_cuda_device()
+        torch.manual_seed(598)
+        pix_to_face = torch.randint(-F, F, (N, H, W, K), device=device)
+        barycentric_coords = torch.randn(
+            N, H, W, K, 3, device=device, requires_grad=True
+        )
+        face_attrs = torch.randn(F, 3, D, device=device, requires_grad=True)
+        grad_pix_attrs = torch.randn(N, H, W, K, D, device=device)
+        args = (pix_to_face, barycentric_coords, face_attrs)
+
+        # Run the python version
+        pix_attrs_py = interpolate_face_attributes_python(*args)
+        pix_attrs_py.backward(gradient=grad_pix_attrs)
+        grad_bary_py = barycentric_coords.grad.clone()
+        grad_face_attrs_py = face_attrs.grad.clone()
+
+        # Clear gradients
+        barycentric_coords.grad.zero_()
+        face_attrs.grad.zero_()
+
+        # Run the CUDA version
+        pix_attrs_cu = interpolate_face_attributes(*args)
+        pix_attrs_cu.backward(gradient=grad_pix_attrs)
+        grad_bary_cu = barycentric_coords.grad.clone()
+        grad_face_attrs_cu = face_attrs.grad.clone()
+
+        # Check they are the same
+        self.assertClose(pix_attrs_py, pix_attrs_cu, rtol=2e-3)
+        self.assertClose(grad_bary_py, grad_bary_cu, rtol=1e-4)
+        self.assertClose(grad_face_attrs_py, grad_face_attrs_cu, rtol=1e-3)
+
+    def test_interpolate_attributes(self):
+        verts = torch.randn((4, 3), dtype=torch.float32)
+        faces = torch.tensor([[2, 1, 0], [3, 1, 0]], dtype=torch.int64)
+        vert_tex = torch.tensor(
+            [[0, 1, 0], [0, 1, 1], [1, 1, 0], [1, 1, 1]], dtype=torch.float32
+        )
+        tex = TexturesVertex(verts_features=vert_tex[None, :])
+        mesh = Meshes(verts=[verts], faces=[faces], textures=tex)
+        pix_to_face = torch.tensor([0, 1], dtype=torch.int64).view(1, 1, 1, 2)
+        barycentric_coords = torch.tensor(
+            [[0.5, 0.3, 0.2], [0.3, 0.6, 0.1]], dtype=torch.float32
+        ).view(1, 1, 1, 2, -1)
+        expected_vals = torch.tensor(
+            [[0.5, 1.0, 0.3], [0.3, 1.0, 0.9]], dtype=torch.float32
+        ).view(1, 1, 1, 2, -1)
+        fragments = Fragments(
+            pix_to_face=pix_to_face,
+            bary_coords=barycentric_coords,
+            zbuf=torch.ones_like(pix_to_face),
+            dists=torch.ones_like(pix_to_face),
+        )
+
+        verts_features_packed = mesh.textures.verts_features_packed()
+        faces_verts_features = verts_features_packed[mesh.faces_packed()]
+
+        texels = interpolate_face_attributes(
+            fragments.pix_to_face, fragments.bary_coords, faces_verts_features
+        )
+        self.assertTrue(torch.allclose(texels, expected_vals[None, :]))
+
+    def test_interpolate_attributes_grad(self):
+        verts = torch.randn((4, 3), dtype=torch.float32)
+        faces = torch.tensor([[2, 1, 0], [3, 1, 0]], dtype=torch.int64)
+        vert_tex = torch.tensor(
+            [[0, 1, 0], [0, 1, 1], [1, 1, 0], [1, 1, 1]],
+            dtype=torch.float32,
+            requires_grad=True,
+        )
+        tex = TexturesVertex(verts_features=vert_tex[None, :])
+        mesh = Meshes(verts=[verts], faces=[faces], textures=tex)
+        pix_to_face = torch.tensor([0, 1], dtype=torch.int64).view(1, 1, 1, 2)
+        barycentric_coords = torch.tensor(
+            [[0.5, 0.3, 0.2], [0.3, 0.6, 0.1]], dtype=torch.float32
+        ).view(1, 1, 1, 2, -1)
+        fragments = Fragments(
+            pix_to_face=pix_to_face,
+            bary_coords=barycentric_coords,
+            zbuf=torch.ones_like(pix_to_face),
+            dists=torch.ones_like(pix_to_face),
+        )
+        grad_vert_tex = torch.tensor(
+            [[0.3, 0.3, 0.3], [0.9, 0.9, 0.9], [0.5, 0.5, 0.5], [0.3, 0.3, 0.3]],
+            dtype=torch.float32,
+        )
+        verts_features_packed = mesh.textures.verts_features_packed()
+        faces_verts_features = verts_features_packed[mesh.faces_packed()]
+
+        texels = interpolate_face_attributes(
+            fragments.pix_to_face, fragments.bary_coords, faces_verts_features
+        )
+        texels.sum().backward()
+        self.assertTrue(hasattr(vert_tex, "grad"))
+        self.assertTrue(torch.allclose(vert_tex.grad, grad_vert_tex[None, :]))
+
+    def test_interpolate_face_attributes_fail(self):
+        # 1. A face can only have 3 verts
+        #   i.e. face_attributes must have shape (F, 3, D)
+        face_attributes = torch.ones(1, 4, 3)
+        pix_to_face = torch.ones((1, 1, 1, 1))
+        fragments = Fragments(
+            pix_to_face=pix_to_face,
+            bary_coords=pix_to_face[..., None].expand(-1, -1, -1, -1, 3),
+            zbuf=pix_to_face,
+            dists=pix_to_face,
+        )
+        with self.assertRaises(ValueError):
+            interpolate_face_attributes(
+                fragments.pix_to_face, fragments.bary_coords, face_attributes
+            )
+
+        # 2. pix_to_face must have shape (N, H, W, K)
+        pix_to_face = torch.ones((1, 1, 1, 1, 3))
+        fragments = Fragments(
+            pix_to_face=pix_to_face,
+            bary_coords=pix_to_face,
+            zbuf=pix_to_face,
+            dists=pix_to_face,
+        )
+        with self.assertRaises(ValueError):
+            interpolate_face_attributes(
+                fragments.pix_to_face, fragments.bary_coords, face_attributes
+            )
diff --git a/pytorch3d/tests/test_io_gltf.py b/pytorch3d/tests/test_io_gltf.py
new file mode 100644
index 0000000000000000000000000000000000000000..3b13c31c044951d4895317eb0fc81ebe35cced23
--- /dev/null
+++ b/pytorch3d/tests/test_io_gltf.py
@@ -0,0 +1,383 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import os.path
+import unittest
+from math import radians
+
+import numpy as np
+import torch
+from PIL import Image
+from pytorch3d.io import IO
+from pytorch3d.io.experimental_gltf_io import _read_header, MeshGlbFormat
+from pytorch3d.renderer import (
+    AmbientLights,
+    BlendParams,
+    FoVPerspectiveCameras,
+    look_at_view_transform,
+    PointLights,
+    RasterizationSettings,
+    rotate_on_spot,
+)
+from pytorch3d.renderer.mesh import (
+    HardPhongShader,
+    MeshRasterizer,
+    MeshRenderer,
+    TexturesVertex,
+)
+from pytorch3d.structures import Meshes
+from pytorch3d.transforms import axis_angle_to_matrix
+from pytorch3d.utils import ico_sphere
+from pytorch3d.vis.texture_vis import texturesuv_image_PIL
+
+from .common_testing import get_pytorch3d_dir, get_tests_dir, TestCaseMixin
+
+
+DATA_DIR = get_tests_dir() / "data"
+TUTORIAL_DATA_DIR = get_pytorch3d_dir() / "docs/tutorials/data"
+DEBUG = False
+
+
+def _load(path, **kwargs) -> Meshes:
+    io = IO()
+    io.register_meshes_format(MeshGlbFormat())
+    return io.load_mesh(path, **kwargs)
+
+
+def _write(mesh, path, **kwargs) -> None:
+    io = IO()
+    io.register_meshes_format(MeshGlbFormat())
+    io.save_mesh(mesh, path, **kwargs)
+
+    with open(path, "rb") as f:
+        _, stored_length = _read_header(f)
+    assert stored_length == os.path.getsize(path)
+
+
+def _render(
+    mesh: Meshes,
+    name: str,
+    dist: float = 3.0,
+    elev: float = 10.0,
+    azim: float = 0,
+    image_size: int = 256,
+    pan=None,
+    RT=None,
+    use_ambient=False,
+):
+    device = mesh.device
+    if RT is not None:
+        R, T = RT
+    else:
+        R, T = look_at_view_transform(dist, elev, azim)
+        if pan is not None:
+            R, T = rotate_on_spot(R, T, pan)
+    cameras = FoVPerspectiveCameras(device=device, R=R, T=T)
+
+    raster_settings = RasterizationSettings(
+        image_size=image_size, blur_radius=0.0, faces_per_pixel=1
+    )
+
+    # Init shader settings
+    if use_ambient:
+        lights = AmbientLights(device=device)
+    else:
+        lights = PointLights(device=device)
+        lights.location = torch.tensor([0.0, 0.0, 2.0], device=device)[None]
+
+    blend_params = BlendParams(
+        sigma=1e-1,
+        gamma=1e-4,
+        background_color=torch.tensor([1.0, 1.0, 1.0], device=device),
+    )
+    # Init renderer
+    renderer = MeshRenderer(
+        rasterizer=MeshRasterizer(cameras=cameras, raster_settings=raster_settings),
+        shader=HardPhongShader(
+            device=device, lights=lights, cameras=cameras, blend_params=blend_params
+        ),
+    )
+
+    output = renderer(mesh)
+
+    image = (output[0, ..., :3].cpu().numpy() * 255).astype(np.uint8)
+
+    if DEBUG:
+        Image.fromarray(image).save(DATA_DIR / f"glb_{name}_.png")
+
+    return image
+
+
+class TestMeshGltfIO(TestCaseMixin, unittest.TestCase):
+    def test_load_apartment(self):
+        """
+        This is the example habitat example scene from inside
+        http://dl.fbaipublicfiles.com/habitat/habitat-test-scenes.zip
+
+        The scene is "already lit", i.e. the textures reflect the lighting
+        already, so we want to render them with full ambient light.
+        """
+
+        self.skipTest("Data not available")
+
+        glb = DATA_DIR / "apartment_1.glb"
+        self.assertTrue(glb.is_file())
+        device = torch.device("cuda:0")
+        mesh = _load(glb, device=device)
+
+        if DEBUG:
+            texturesuv_image_PIL(mesh.textures).save(DATA_DIR / "out_apartment.png")
+
+        for i in range(19):
+            # random locations in the apartment
+            eye = ((np.random.uniform(-6, 0.5), np.random.uniform(-8, 2), 0),)
+            at = ((np.random.uniform(-6, 0.5), np.random.uniform(-8, 2), 0),)
+            up = ((0, 0, -1),)
+            RT = look_at_view_transform(eye=eye, at=at, up=up)
+            _render(mesh, f"apartment_eau{i}", RT=RT, use_ambient=True)
+
+        for i in range(12):
+            # panning around the inner room from one location
+            pan = axis_angle_to_matrix(torch.FloatTensor([0, radians(30 * i), 0]))
+            _render(mesh, f"apartment{i}", 1.0, -90, pan, use_ambient=True)
+
+    def test_load_cow(self):
+        """
+        Load the cow as converted to a single mesh in a glb file.
+        """
+        glb = DATA_DIR / "cow.glb"
+        self.assertTrue(glb.is_file())
+        device = torch.device("cuda:0")
+        mesh = _load(glb, device=device)
+        self.assertEqual(mesh.device, device)
+
+        self.assertEqual(mesh.faces_packed().shape, (5856, 3))
+        self.assertEqual(mesh.verts_packed().shape, (3225, 3))
+        mesh_obj = _load(TUTORIAL_DATA_DIR / "cow_mesh/cow.obj")
+        self.assertClose(mesh.get_bounding_boxes().cpu(), mesh_obj.get_bounding_boxes())
+
+        self.assertClose(
+            mesh.textures.verts_uvs_padded().cpu(), mesh_obj.textures.verts_uvs_padded()
+        )
+
+        self.assertClose(
+            mesh.textures.faces_uvs_padded().cpu(), mesh_obj.textures.faces_uvs_padded()
+        )
+
+        self.assertClose(
+            mesh.textures.maps_padded().cpu(), mesh_obj.textures.maps_padded()
+        )
+
+        if DEBUG:
+            texturesuv_image_PIL(mesh.textures).save(DATA_DIR / "out_cow.png")
+
+            image = _render(mesh, "cow", azim=4)
+            with Image.open(DATA_DIR / "glb_cow.png") as f:
+                expected = np.array(f)
+
+            self.assertClose(image, expected)
+
+    def test_save_cow(self):
+        """
+        Save the cow mesh to a glb file
+        """
+        # load cow mesh from a glb file
+        glb = DATA_DIR / "cow.glb"
+        self.assertTrue(glb.is_file())
+        device = torch.device("cuda:0")
+        mesh = _load(glb, device=device)
+
+        # save the mesh to a glb file
+        glb = DATA_DIR / "cow_write.glb"
+        _write(mesh, glb)
+
+        # load again
+        glb_reload = DATA_DIR / "cow_write.glb"
+        self.assertTrue(glb_reload.is_file())
+        device = torch.device("cuda:0")
+        mesh_reload = _load(glb_reload, device=device)
+
+        # assertions
+        self.assertEqual(mesh_reload.faces_packed().shape, (5856, 3))
+        self.assertEqual(mesh_reload.verts_packed().shape, (3225, 3))
+        self.assertClose(
+            mesh_reload.get_bounding_boxes().cpu(), mesh.get_bounding_boxes().cpu()
+        )
+
+        self.assertClose(
+            mesh_reload.textures.verts_uvs_padded().cpu(),
+            mesh.textures.verts_uvs_padded().cpu(),
+        )
+
+        self.assertClose(
+            mesh_reload.textures.faces_uvs_padded().cpu(),
+            mesh.textures.faces_uvs_padded().cpu(),
+        )
+
+        self.assertClose(
+            mesh_reload.textures.maps_padded().cpu(), mesh.textures.maps_padded().cpu()
+        )
+
+    def test_save_ico_sphere(self):
+        """
+        save the ico_sphere mesh in a glb file
+        """
+        ico_sphere_mesh = ico_sphere(level=3)
+        glb = DATA_DIR / "ico_sphere.glb"
+        _write(ico_sphere_mesh, glb)
+
+        # reload the ico_sphere
+        device = torch.device("cuda:0")
+        mesh_reload = _load(glb, device=device, include_textures=False)
+
+        self.assertClose(
+            ico_sphere_mesh.verts_padded().cpu(),
+            mesh_reload.verts_padded().cpu(),
+        )
+
+        self.assertClose(
+            ico_sphere_mesh.faces_padded().cpu(),
+            mesh_reload.faces_padded().cpu(),
+        )
+
+    def test_load_cow_no_texture(self):
+        """
+        Load the cow as converted to a single mesh in a glb file.
+        """
+        glb = DATA_DIR / "cow.glb"
+        self.assertTrue(glb.is_file())
+        device = torch.device("cuda:0")
+        mesh = _load(glb, device=device, include_textures=False)
+        self.assertEqual(len(mesh), 1)
+        self.assertIsNone(mesh.textures)
+
+        self.assertEqual(mesh.faces_packed().shape, (5856, 3))
+        self.assertEqual(mesh.verts_packed().shape, (3225, 3))
+        mesh_obj = _load(TUTORIAL_DATA_DIR / "cow_mesh/cow.obj")
+        self.assertClose(mesh.get_bounding_boxes().cpu(), mesh_obj.get_bounding_boxes())
+
+        mesh.textures = TexturesVertex(0.5 * torch.ones_like(mesh.verts_padded()))
+
+        image = _render(mesh, "cow_gray")
+
+        with Image.open(DATA_DIR / "glb_cow_gray.png") as f:
+            expected = np.array(f)
+
+        self.assertClose(image, expected)
+
+    def test_load_save_load_cow_texturesvertex(self):
+        """
+        Load the cow as converted to a single mesh in a glb file and then save it to a glb file.
+        """
+
+        glb = DATA_DIR / "cow.glb"
+        self.assertTrue(glb.is_file())
+        device = torch.device("cuda:0")
+        mesh = _load(glb, device=device, include_textures=False)
+        self.assertEqual(len(mesh), 1)
+        self.assertIsNone(mesh.textures)
+
+        self.assertEqual(mesh.faces_packed().shape, (5856, 3))
+        self.assertEqual(mesh.verts_packed().shape, (3225, 3))
+        mesh_obj = _load(TUTORIAL_DATA_DIR / "cow_mesh/cow.obj")
+        self.assertClose(mesh.get_bounding_boxes().cpu(), mesh_obj.get_bounding_boxes())
+
+        mesh.textures = TexturesVertex(0.5 * torch.ones_like(mesh.verts_padded()))
+
+        image = _render(mesh, "cow_gray")
+
+        with Image.open(DATA_DIR / "glb_cow_gray.png") as f:
+            expected = np.array(f)
+
+        self.assertClose(image, expected)
+
+        # save the mesh to a glb file
+        glb = DATA_DIR / "cow_write_texturesvertex.glb"
+        _write(mesh, glb)
+
+        # reload the mesh glb file saved in TexturesVertex format
+        glb = DATA_DIR / "cow_write_texturesvertex.glb"
+        self.assertTrue(glb.is_file())
+        mesh_dash = _load(glb, device=device)
+        self.assertEqual(len(mesh_dash), 1)
+
+        self.assertEqual(mesh_dash.faces_packed().shape, (5856, 3))
+        self.assertEqual(mesh_dash.verts_packed().shape, (3225, 3))
+        self.assertEqual(mesh_dash.textures.verts_features_list()[0].shape, (3225, 3))
+
+        # check the re-rendered image with expected
+        image_dash = _render(mesh, "cow_gray_texturesvertex")
+        self.assertClose(image_dash, expected)
+
+    def test_save_toy(self):
+        """
+        Construct a simple mesh and save it to a glb file in TexturesVertex mode.
+        """
+
+        example = {}
+        example["POSITION"] = torch.tensor(
+            [
+                [
+                    [0.0, 0.0, 0.0],
+                    [-1.0, 0.0, 0.0],
+                    [-1.0, 0.0, 1.0],
+                    [0.0, 0.0, 1.0],
+                    [0.0, 1.0, 0.0],
+                    [-1.0, 1.0, 0.0],
+                    [-1.0, 1.0, 1.0],
+                    [0.0, 1.0, 1.0],
+                ]
+            ]
+        )
+        example["indices"] = torch.tensor(
+            [
+                [
+                    [1, 4, 2],
+                    [4, 3, 2],
+                    [3, 7, 2],
+                    [7, 6, 2],
+                    [3, 4, 7],
+                    [4, 8, 7],
+                    [8, 5, 7],
+                    [5, 6, 7],
+                    [5, 2, 6],
+                    [5, 1, 2],
+                    [1, 5, 4],
+                    [5, 8, 4],
+                ]
+            ]
+        )
+        example["indices"] -= 1
+        example["COLOR_0"] = torch.tensor(
+            [
+                [
+                    [1.0, 0.0, 0.0],
+                    [1.0, 0.0, 0.0],
+                    [1.0, 0.0, 0.0],
+                    [1.0, 0.0, 0.0],
+                    [1.0, 0.0, 0.0],
+                    [1.0, 0.0, 0.0],
+                    [1.0, 0.0, 0.0],
+                    [1.0, 0.0, 0.0],
+                ]
+            ]
+        )
+        # example['prop'] = {'material':
+        #                       {'pbrMetallicRoughness':
+        #                           {'baseColorFactor':
+        #                                torch.tensor([[0.7, 0.7, 1, 0.5]]),
+        #                            'metallicFactor': torch.tensor([1]),
+        #                            'roughnessFactor': torch.tensor([0.1])},
+        #                    'alphaMode': 'BLEND',
+        #                    'doubleSided': True}}
+
+        texture = TexturesVertex(example["COLOR_0"])
+        mesh = Meshes(
+            verts=example["POSITION"], faces=example["indices"], textures=texture
+        )
+
+        glb = DATA_DIR / "example_write_texturesvertex.glb"
+        _write(mesh, glb)
diff --git a/pytorch3d/tests/test_io_obj.py b/pytorch3d/tests/test_io_obj.py
new file mode 100644
index 0000000000000000000000000000000000000000..3668be4ea2547a5075cdb2be7ec2a550ba3aac67
--- /dev/null
+++ b/pytorch3d/tests/test_io_obj.py
@@ -0,0 +1,1340 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import os
+import unittest
+import warnings
+from collections import Counter
+from io import StringIO
+from pathlib import Path
+from tempfile import NamedTemporaryFile, TemporaryDirectory
+
+import torch
+from iopath.common.file_io import PathManager
+from pytorch3d.io import IO, load_obj, load_objs_as_meshes, save_obj
+from pytorch3d.io.mtl_io import (
+    _bilinear_interpolation_grid_sample,
+    _bilinear_interpolation_vectorized,
+    _parse_mtl,
+)
+from pytorch3d.renderer import TexturesAtlas, TexturesUV, TexturesVertex
+from pytorch3d.structures import join_meshes_as_batch, Meshes
+from pytorch3d.utils import torus
+
+from .common_testing import (
+    get_pytorch3d_dir,
+    get_tests_dir,
+    load_rgb_image,
+    TestCaseMixin,
+)
+
+
+DATA_DIR = get_tests_dir() / "data"
+TUTORIAL_DATA_DIR = get_pytorch3d_dir() / "docs/tutorials/data"
+
+
+class TestMeshObjIO(TestCaseMixin, unittest.TestCase):
+    def test_load_obj_simple(self):
+        obj_file = "\n".join(
+            [
+                "# this is a comment",  # Comments should be ignored.
+                "v 0.1 0.2 0.3",
+                "v 0.2 0.3 0.4",
+                "v 0.3 0.4 0.5",
+                "v  0.4 0.5 0.6",  # some obj files have multiple spaces after v
+                "f 1 2 3",
+                "f 1 2 4 3 1",  # Polygons should be split into triangles
+            ]
+        )
+        with NamedTemporaryFile(mode="w", suffix=".obj") as f:
+            f.write(obj_file)
+            f.flush()
+
+            verts, faces, aux = load_obj(Path(f.name))
+            normals = aux.normals
+            textures = aux.verts_uvs
+            materials = aux.material_colors
+            tex_maps = aux.texture_images
+
+            expected_verts = torch.tensor(
+                [[0.1, 0.2, 0.3], [0.2, 0.3, 0.4], [0.3, 0.4, 0.5], [0.4, 0.5, 0.6]],
+                dtype=torch.float32,
+            )
+            expected_faces = torch.tensor(
+                [
+                    [0, 1, 2],  # First face
+                    [0, 1, 3],  # Second face (polygon)
+                    [0, 3, 2],  # Second face (polygon)
+                    [0, 2, 0],  # Second face (polygon)
+                ],
+                dtype=torch.int64,
+            )
+            self.assertTrue(torch.all(verts == expected_verts))
+            self.assertTrue(torch.all(faces.verts_idx == expected_faces))
+            padded_vals = -(torch.ones_like(faces.verts_idx))
+            self.assertTrue(torch.all(faces.normals_idx == padded_vals))
+            self.assertTrue(torch.all(faces.textures_idx == padded_vals))
+            self.assertTrue(
+                torch.all(faces.materials_idx == -(torch.ones(len(expected_faces))))
+            )
+            self.assertTrue(normals is None)
+            self.assertTrue(textures is None)
+            self.assertTrue(materials is None)
+            self.assertTrue(tex_maps is None)
+
+    def test_load_obj_complex(self):
+        obj_file = "\n".join(
+            [
+                "# this is a comment",  # Comments should be ignored.
+                "v 0.1 0.2 0.3",
+                "v 0.2 0.3 0.4",
+                "v 0.3 0.4 0.5",
+                "v 0.4 0.5 0.6",
+                "vn 0.000000 0.000000 -1.000000",
+                "vn -1.000000 -0.000000 -0.000000",
+                "vn -0.000000 -0.000000 1.000000",  # Normals should not be ignored.
+                "v 0.5 0.6 0.7",
+                "vt 0.749279 0.501284 0.0",  # Some files add 0.0 - ignore this.
+                "vt 0.999110 0.501077",
+                "vt 0.999455 0.750380",
+                "f 1 2 3",
+                "f 1 2 4 3 5",  # Polygons should be split into triangles
+                "f 2/1/2 3/1/2 4/2/2",  # Texture/normals are loaded correctly.
+                "f -1 -2 1",  # Negative indexing counts from the end.
+            ]
+        )
+
+        with NamedTemporaryFile(mode="w", suffix=".obj") as f:
+            f.write(obj_file)
+            f.flush()
+
+            verts, faces, aux = load_obj(Path(f.name))
+            normals = aux.normals
+            textures = aux.verts_uvs
+            materials = aux.material_colors
+            tex_maps = aux.texture_images
+
+            expected_verts = torch.tensor(
+                [
+                    [0.1, 0.2, 0.3],
+                    [0.2, 0.3, 0.4],
+                    [0.3, 0.4, 0.5],
+                    [0.4, 0.5, 0.6],
+                    [0.5, 0.6, 0.7],
+                ],
+                dtype=torch.float32,
+            )
+            expected_faces = torch.tensor(
+                [
+                    [0, 1, 2],  # First face
+                    [0, 1, 3],  # Second face (polygon)
+                    [0, 3, 2],  # Second face (polygon)
+                    [0, 2, 4],  # Second face (polygon)
+                    [1, 2, 3],  # Third face (normals / texture)
+                    [4, 3, 0],  # Fourth face (negative indices)
+                ],
+                dtype=torch.int64,
+            )
+            expected_normals = torch.tensor(
+                [
+                    [0.000000, 0.000000, -1.000000],
+                    [-1.000000, -0.000000, -0.000000],
+                    [-0.000000, -0.000000, 1.000000],
+                ],
+                dtype=torch.float32,
+            )
+            expected_textures = torch.tensor(
+                [[0.749279, 0.501284], [0.999110, 0.501077], [0.999455, 0.750380]],
+                dtype=torch.float32,
+            )
+            expected_faces_normals_idx = -(
+                torch.ones_like(expected_faces, dtype=torch.int64)
+            )
+            expected_faces_normals_idx[4, :] = torch.tensor(
+                [1, 1, 1], dtype=torch.int64
+            )
+            expected_faces_textures_idx = -(
+                torch.ones_like(expected_faces, dtype=torch.int64)
+            )
+            expected_faces_textures_idx[4, :] = torch.tensor(
+                [0, 0, 1], dtype=torch.int64
+            )
+
+            self.assertTrue(torch.all(verts == expected_verts))
+            self.assertTrue(torch.all(faces.verts_idx == expected_faces))
+            self.assertClose(normals, expected_normals)
+            self.assertClose(textures, expected_textures)
+            self.assertClose(faces.normals_idx, expected_faces_normals_idx)
+            self.assertClose(faces.textures_idx, expected_faces_textures_idx)
+            self.assertTrue(materials is None)
+            self.assertTrue(tex_maps is None)
+
+    def test_load_obj_complex_pluggable(self):
+        """
+        This won't work on Windows due to the behavior of NamedTemporaryFile
+        """
+        obj_file = "\n".join(
+            [
+                "# this is a comment",  # Comments should be ignored.
+                "v 0.1 0.2 0.3",
+                "v 0.2 0.3 0.4",
+                "v 0.3 0.4 0.5",
+                "v 0.4 0.5 0.6",
+                "vn 0.000000 0.000000 -1.000000",
+                "vn -1.000000 -0.000000 -0.000000",
+                "vn -0.000000 -0.000000 1.000000",  # Normals should not be ignored.
+                "v 0.5 0.6 0.7",
+                "vt 0.749279 0.501284 0.0",  # Some files add 0.0 - ignore this.
+                "vt 0.999110 0.501077",
+                "vt 0.999455 0.750380",
+                "f 1 2 3",
+                "f 1 2 4 3 5",  # Polygons should be split into triangles
+                "f 2/1/2 3/1/2 4/2/2",  # Texture/normals are loaded correctly.
+                "f -1 -2 1",  # Negative indexing counts from the end.
+            ]
+        )
+        io = IO()
+        with NamedTemporaryFile(mode="w", suffix=".obj") as f:
+            f.write(obj_file)
+            f.flush()
+            mesh = io.load_mesh(f.name)
+            mesh_from_path = io.load_mesh(Path(f.name))
+
+        with NamedTemporaryFile(mode="w", suffix=".ply") as f:
+            f.write(obj_file)
+            f.flush()
+            with self.assertRaisesRegex(ValueError, "Invalid file header."):
+                io.load_mesh(f.name)
+
+        expected_verts = torch.tensor(
+            [
+                [0.1, 0.2, 0.3],
+                [0.2, 0.3, 0.4],
+                [0.3, 0.4, 0.5],
+                [0.4, 0.5, 0.6],
+                [0.5, 0.6, 0.7],
+            ],
+            dtype=torch.float32,
+        )
+        expected_faces = torch.tensor(
+            [
+                [0, 1, 2],  # First face
+                [0, 1, 3],  # Second face (polygon)
+                [0, 3, 2],  # Second face (polygon)
+                [0, 2, 4],  # Second face (polygon)
+                [1, 2, 3],  # Third face (normals / texture)
+                [4, 3, 0],  # Fourth face (negative indices)
+            ],
+            dtype=torch.int64,
+        )
+        self.assertClose(mesh.verts_padded(), expected_verts[None])
+        self.assertClose(mesh.faces_padded(), expected_faces[None])
+        self.assertClose(mesh_from_path.verts_padded(), expected_verts[None])
+        self.assertClose(mesh_from_path.faces_padded(), expected_faces[None])
+        self.assertIsNone(mesh.textures)
+
+    def test_load_obj_normals_only(self):
+        obj_file = "\n".join(
+            [
+                "v 0.1 0.2 0.3",
+                "v 0.2 0.3 0.4",
+                "v 0.3 0.4 0.5",
+                "v 0.4 0.5 0.6",
+                "vn 0.000000 0.000000 -1.000000",
+                "vn -1.000000 -0.000000 -0.000000",
+                "f 2//1 3//1 4//2",
+            ]
+        )
+
+        expected_faces_normals_idx = torch.tensor([[0, 0, 1]], dtype=torch.int64)
+        expected_normals = torch.tensor(
+            [[0.000000, 0.000000, -1.000000], [-1.000000, -0.000000, -0.000000]],
+            dtype=torch.float32,
+        )
+        expected_verts = torch.tensor(
+            [[0.1, 0.2, 0.3], [0.2, 0.3, 0.4], [0.3, 0.4, 0.5], [0.4, 0.5, 0.6]],
+            dtype=torch.float32,
+        )
+
+        with NamedTemporaryFile(mode="w", suffix=".obj") as f:
+            f.write(obj_file)
+            f.flush()
+
+            verts, faces, aux = load_obj(Path(f.name))
+            normals = aux.normals
+            textures = aux.verts_uvs
+            materials = aux.material_colors
+            tex_maps = aux.texture_images
+            self.assertClose(faces.normals_idx, expected_faces_normals_idx)
+            self.assertClose(normals, expected_normals)
+            self.assertClose(verts, expected_verts)
+            # Textures idx padded  with -1.
+            self.assertClose(faces.textures_idx, torch.ones_like(faces.verts_idx) * -1)
+            self.assertTrue(textures is None)
+            self.assertTrue(materials is None)
+            self.assertTrue(tex_maps is None)
+
+    def test_load_obj_textures_only(self):
+        obj_file = "\n".join(
+            [
+                "v 0.1 0.2 0.3",
+                "v 0.2 0.3 0.4",
+                "v 0.3 0.4 0.5",
+                "v 0.4 0.5 0.6",
+                "vt 0.999110 0.501077",
+                "vt 0.999455 0.750380",
+                "f 2/1 3/1 4/2",
+            ]
+        )
+
+        expected_faces_textures_idx = torch.tensor([[0, 0, 1]], dtype=torch.int64)
+        expected_textures = torch.tensor(
+            [[0.999110, 0.501077], [0.999455, 0.750380]], dtype=torch.float32
+        )
+        expected_verts = torch.tensor(
+            [[0.1, 0.2, 0.3], [0.2, 0.3, 0.4], [0.3, 0.4, 0.5], [0.4, 0.5, 0.6]],
+            dtype=torch.float32,
+        )
+
+        with NamedTemporaryFile(mode="w", suffix=".obj") as f:
+            f.write(obj_file)
+            f.flush()
+
+            verts, faces, aux = load_obj(Path(f.name))
+            normals = aux.normals
+            textures = aux.verts_uvs
+            materials = aux.material_colors
+            tex_maps = aux.texture_images
+
+            self.assertClose(faces.textures_idx, expected_faces_textures_idx)
+            self.assertClose(expected_textures, textures)
+            self.assertClose(expected_verts, verts)
+            self.assertTrue(
+                torch.all(faces.normals_idx == -(torch.ones_like(faces.textures_idx)))
+            )
+            self.assertTrue(normals is None)
+            self.assertTrue(materials is None)
+            self.assertTrue(tex_maps is None)
+
+    def test_load_obj_error_textures(self):
+        obj_file = "\n".join(["vt 0.1"])
+        with NamedTemporaryFile(mode="w", suffix=".obj") as f:
+            f.write(obj_file)
+            f.flush()
+
+            with self.assertRaises(ValueError) as err:
+                load_obj(Path(f.name))
+            self.assertTrue("does not have 2 values" in str(err.exception))
+
+    def test_load_obj_error_normals(self):
+        obj_file = "\n".join(["vn 0.1"])
+        with NamedTemporaryFile(mode="w", suffix=".obj") as f:
+            f.write(obj_file)
+            f.flush()
+
+            with self.assertRaises(ValueError) as err:
+                load_obj(Path(f.name))
+            self.assertTrue("does not have 3 values" in str(err.exception))
+
+    def test_load_obj_error_vertices(self):
+        obj_file = "\n".join(["v 1"])
+        with NamedTemporaryFile(mode="w", suffix=".obj") as f:
+            f.write(obj_file)
+            f.flush()
+
+            with self.assertRaises(ValueError) as err:
+                load_obj(Path(f.name))
+            self.assertTrue("does not have 3 values" in str(err.exception))
+
+    def test_load_obj_error_inconsistent_triplets(self):
+        obj_file = "\n".join(["f 2//1 3/1 4/1/2"])
+        with NamedTemporaryFile(mode="w", suffix=".obj") as f:
+            f.write(obj_file)
+            f.flush()
+
+            with self.assertRaises(ValueError) as err:
+                load_obj(Path(f.name))
+            self.assertTrue("Vertex properties are inconsistent" in str(err.exception))
+
+    def test_load_obj_error_too_many_vertex_properties(self):
+        obj_file = "\n".join(["f 2/1/1/3"])
+        with NamedTemporaryFile(mode="w", suffix=".obj") as f:
+            f.write(obj_file)
+            f.flush()
+
+            with self.assertRaises(ValueError) as err:
+                load_obj(Path(f.name))
+            self.assertTrue(
+                "Face vertices can only have 3 properties" in str(err.exception)
+            )
+
+    def test_load_obj_error_invalid_vertex_indices(self):
+        obj_file = "\n".join(
+            ["v 0.1 0.2 0.3", "v 0.1 0.2 0.3", "v 0.1 0.2 0.3", "f -2 5 1"]
+        )
+        with NamedTemporaryFile(mode="w", suffix=".obj") as f:
+            f.write(obj_file)
+            f.flush()
+
+            with self.assertWarnsRegex(UserWarning, "Faces have invalid indices"):
+                load_obj(Path(f.name))
+
+    def test_load_obj_error_invalid_normal_indices(self):
+        obj_file = "\n".join(
+            [
+                "v 0.1 0.2 0.3",
+                "v 0.1 0.2 0.3",
+                "v 0.1 0.2 0.3",
+                "vn 0.1 0.2 0.3",
+                "vn 0.1 0.2 0.3",
+                "vn 0.1 0.2 0.3",
+                "f -2/2 2/4 1/1",
+            ]
+        )
+        with NamedTemporaryFile(mode="w", suffix=".obj") as f:
+            f.write(obj_file)
+            f.flush()
+
+            with self.assertWarnsRegex(UserWarning, "Faces have invalid indices"):
+                load_obj(Path(f.name))
+
+    def test_load_obj_error_invalid_texture_indices(self):
+        obj_file = "\n".join(
+            [
+                "v 0.1 0.2 0.3",
+                "v 0.1 0.2 0.3",
+                "v 0.1 0.2 0.3",
+                "vt 0.1 0.2",
+                "vt 0.1 0.2",
+                "vt 0.1 0.2",
+                "f -2//2 2//6 1//1",
+            ]
+        )
+        with NamedTemporaryFile(mode="w", suffix=".obj") as f:
+            f.write(obj_file)
+            f.flush()
+
+            with self.assertWarnsRegex(UserWarning, "Faces have invalid indices"):
+                load_obj(Path(f.name))
+
+    def test_save_obj_invalid_shapes(self):
+        # Invalid vertices shape
+        verts = torch.FloatTensor([[0.1, 0.2, 0.3, 0.4]])  # (V, 4)
+        faces = torch.LongTensor([[0, 1, 2]])
+        with self.assertRaises(ValueError) as error:
+            with NamedTemporaryFile(mode="w", suffix=".obj") as f:
+                save_obj(Path(f.name), verts, faces)
+        expected_message = (
+            "Argument 'verts' should either be empty or of shape (num_verts, 3)."
+        )
+        self.assertTrue(expected_message, error.exception)
+
+        # Invalid faces shape
+        verts = torch.FloatTensor([[0.1, 0.2, 0.3]])
+        faces = torch.LongTensor([[0, 1, 2, 3]])  # (F, 4)
+        with self.assertRaises(ValueError) as error:
+            with NamedTemporaryFile(mode="w", suffix=".obj") as f:
+                save_obj(Path(f.name), verts, faces)
+        expected_message = (
+            "Argument 'faces' should either be empty or of shape (num_faces, 3)."
+        )
+        self.assertTrue(expected_message, error.exception)
+
+    def test_save_obj_invalid_indices(self):
+        message_regex = "Faces have invalid indices"
+        verts = torch.FloatTensor([[0.1, 0.2, 0.3]])
+        faces = torch.LongTensor([[0, 1, 2]])
+        with self.assertWarnsRegex(UserWarning, message_regex):
+            with NamedTemporaryFile(mode="w", suffix=".obj") as f:
+                save_obj(Path(f.name), verts, faces)
+
+        faces = torch.LongTensor([[-1, 0, 1]])
+        with self.assertWarnsRegex(UserWarning, message_regex):
+            with NamedTemporaryFile(mode="w", suffix=".obj") as f:
+                save_obj(Path(f.name), verts, faces)
+
+    def _test_save_load(self, verts, faces):
+        with NamedTemporaryFile(mode="w", suffix=".obj") as f:
+            file_path = Path(f.name)
+            save_obj(file_path, verts, faces)
+            f.flush()
+
+            expected_verts, expected_faces = verts, faces
+            if not len(expected_verts):  # Always compare with a (V, 3) tensor
+                expected_verts = torch.zeros(size=(0, 3), dtype=torch.float32)
+            if not len(expected_faces):  # Always compare with an (F, 3) tensor
+                expected_faces = torch.zeros(size=(0, 3), dtype=torch.int64)
+            actual_verts, actual_faces, _ = load_obj(file_path)
+            self.assertClose(expected_verts, actual_verts)
+            self.assertClose(expected_faces, actual_faces.verts_idx)
+
+    def test_empty_save_load_obj(self):
+        # Vertices + empty faces
+        verts = torch.FloatTensor([[0.1, 0.2, 0.3]])
+        faces = torch.LongTensor([])
+        self._test_save_load(verts, faces)
+
+        faces = torch.zeros(size=(0, 3), dtype=torch.int64)
+        self._test_save_load(verts, faces)
+
+        # Faces + empty vertices
+        message_regex = "Faces have invalid indices"
+        verts = torch.FloatTensor([])
+        faces = torch.LongTensor([[0, 1, 2]])
+        with self.assertWarnsRegex(UserWarning, message_regex):
+            self._test_save_load(verts, faces)
+
+        verts = torch.zeros(size=(0, 3), dtype=torch.float32)
+        with self.assertWarnsRegex(UserWarning, message_regex):
+            self._test_save_load(verts, faces)
+
+        # Empty vertices + empty faces
+        message_regex = "Empty 'verts' and 'faces' arguments provided"
+        verts0 = torch.FloatTensor([])
+        faces0 = torch.LongTensor([])
+        with self.assertWarnsRegex(UserWarning, message_regex):
+            self._test_save_load(verts0, faces0)
+
+        faces3 = torch.zeros(size=(0, 3), dtype=torch.int64)
+        with self.assertWarnsRegex(UserWarning, message_regex):
+            self._test_save_load(verts0, faces3)
+
+        verts3 = torch.zeros(size=(0, 3), dtype=torch.float32)
+        with self.assertWarnsRegex(UserWarning, message_regex):
+            self._test_save_load(verts3, faces0)
+
+        with self.assertWarnsRegex(UserWarning, message_regex):
+            self._test_save_load(verts3, faces3)
+
+    def test_save_obj(self):
+        verts = torch.tensor(
+            [[0.01, 0.2, 0.301], [0.2, 0.03, 0.408], [0.3, 0.4, 0.05], [0.6, 0.7, 0.8]],
+            dtype=torch.float32,
+        )
+        faces = torch.tensor(
+            [[0, 2, 1], [0, 1, 2], [3, 2, 1], [3, 1, 0]], dtype=torch.int64
+        )
+        with NamedTemporaryFile(mode="w", suffix=".obj") as f:
+            save_obj(Path(f.name), verts, faces, decimal_places=2)
+
+            expected_file = "\n".join(
+                [
+                    "v 0.01 0.20 0.30",
+                    "v 0.20 0.03 0.41",
+                    "v 0.30 0.40 0.05",
+                    "v 0.60 0.70 0.80",
+                    "f 1 3 2",
+                    "f 1 2 3",
+                    "f 4 3 2",
+                    "f 4 2 1",
+                ]
+            )
+            self.assertEqual(Path(f.name).read_text(), expected_file)
+
+    def test_load_mtl(self):
+        obj_filename = "cow_mesh/cow.obj"
+        filename = os.path.join(TUTORIAL_DATA_DIR, obj_filename)
+        verts, faces, aux = load_obj(filename)
+        materials = aux.material_colors
+        tex_maps = aux.texture_images
+
+        dtype = torch.float32
+        expected_materials = {
+            "material_1": {
+                "ambient_color": torch.tensor([1.0, 1.0, 1.0], dtype=dtype),
+                "diffuse_color": torch.tensor([1.0, 1.0, 1.0], dtype=dtype),
+                "specular_color": torch.tensor([0.0, 0.0, 0.0], dtype=dtype),
+                "shininess": torch.tensor([10.0], dtype=dtype),
+            }
+        }
+        # Texture atlas is not created as `create_texture_atlas=True` was
+        # not set in the load_obj args
+        self.assertTrue(aux.texture_atlas is None)
+        # Check that there is an image with material name material_1.
+        self.assertTrue(tuple(tex_maps.keys()) == ("material_1",))
+        self.assertTrue(torch.is_tensor(tuple(tex_maps.values())[0]))
+        self.assertTrue(
+            torch.all(faces.materials_idx == torch.zeros(len(faces.verts_idx)))
+        )
+
+        # Check all keys and values in dictionary are the same.
+        for n1, n2 in zip(materials.keys(), expected_materials.keys()):
+            self.assertTrue(n1 == n2)
+            for k1, k2 in zip(materials[n1].keys(), expected_materials[n2].keys()):
+                self.assertTrue(
+                    torch.allclose(materials[n1][k1], expected_materials[n2][k2])
+                )
+
+    def test_load_mtl_with_spaces_in_resource_filename(self):
+        """
+        Check that the texture image for materials in mtl files
+        is loaded correctly even if there is a space in the file name
+        e.g. material 1.png
+        """
+        mtl_file = "\n".join(
+            [
+                "newmtl material_1",
+                "map_Kd material 1.png",
+                "Ka 1.000 1.000 1.000",  # white
+                "Kd 1.000 1.000 1.000",  # white
+                "Ks 0.000 0.000 0.000",  # black
+                "Ns 10.0",
+            ]
+        )
+        with NamedTemporaryFile(mode="w", suffix=".mtl") as f:
+            f.write(mtl_file)
+            f.flush()
+
+            material_properties, texture_files = _parse_mtl(
+                Path(f.name), path_manager=PathManager(), device="cpu"
+            )
+
+            dtype = torch.float32
+            expected_materials = {
+                "material_1": {
+                    "ambient_color": torch.tensor([1.0, 1.0, 1.0], dtype=dtype),
+                    "diffuse_color": torch.tensor([1.0, 1.0, 1.0], dtype=dtype),
+                    "specular_color": torch.tensor([0.0, 0.0, 0.0], dtype=dtype),
+                    "shininess": torch.tensor([10.0], dtype=dtype),
+                }
+            }
+            # Check that there is a material with name material_1
+            self.assertTrue(tuple(texture_files.keys()) == ("material_1",))
+            # Check that there is an image with name material 1.png
+            self.assertTrue(texture_files["material_1"] == "material 1.png")
+
+            # Check all keys and values in dictionary are the same.
+            for n1, n2 in zip(material_properties.keys(), expected_materials.keys()):
+                self.assertTrue(n1 == n2)
+                for k1, k2 in zip(
+                    material_properties[n1].keys(), expected_materials[n2].keys()
+                ):
+                    self.assertTrue(
+                        torch.allclose(
+                            material_properties[n1][k1], expected_materials[n2][k2]
+                        )
+                    )
+
+    def test_load_mtl_texture_atlas_compare_softras(self):
+        # Load saved texture atlas created with SoftRas.
+        device = torch.device("cuda:0")
+        obj_filename = TUTORIAL_DATA_DIR / "cow_mesh/cow.obj"
+        expected_atlas_fname = DATA_DIR / "cow_texture_atlas_softras.pt"
+
+        # Note, the reference texture atlas generated using SoftRas load_obj function
+        # is too large to check in to the repo. Download the file to run the test locally.
+        if not os.path.exists(expected_atlas_fname):
+            url = (
+                "https://dl.fbaipublicfiles.com/pytorch3d/data/"
+                "tests/cow_texture_atlas_softras.pt"
+            )
+            msg = (
+                "cow_texture_atlas_softras.pt not found, download from %s, "
+                "save it at the path %s, and rerun" % (url, expected_atlas_fname)
+            )
+            warnings.warn(msg)
+            return True
+
+        expected_atlas = torch.load(expected_atlas_fname)
+        _, _, aux = load_obj(
+            obj_filename,
+            load_textures=True,
+            device=device,
+            create_texture_atlas=True,
+            texture_atlas_size=15,
+            texture_wrap="repeat",
+        )
+
+        self.assertClose(expected_atlas, aux.texture_atlas, atol=5e-5)
+
+    def test_load_mtl_noload(self):
+        obj_filename = "cow_mesh/cow.obj"
+        filename = os.path.join(TUTORIAL_DATA_DIR, obj_filename)
+        verts, faces, aux = load_obj(filename, load_textures=False)
+
+        self.assertTrue(aux.material_colors is None)
+        self.assertTrue(aux.texture_images is None)
+
+    def test_load_no_usemtl(self):
+        obj_filename = "missing_usemtl/cow.obj"
+        # obj_filename has no "usemtl material_1" line
+        filename = os.path.join(DATA_DIR, obj_filename)
+        # TexturesUV type
+        mesh = IO().load_mesh(filename)
+        self.assertIsNotNone(mesh.textures)
+
+        verts, faces, aux = load_obj(filename)
+        self.assertTrue("material_1" in aux.material_colors)
+        self.assertTrue("material_1" in aux.texture_images)
+
+    def test_load_mtl_fail(self):
+        # Faces have a material
+        obj_file = "\n".join(
+            [
+                "v 0.1 0.2 0.3",
+                "v 0.2 0.3 0.4",
+                "v 0.3 0.4 0.5",
+                "v 0.4 0.5 0.6",
+                "usemtl material_1",
+                "f 1 2 3",
+                "f 1 2 4",
+            ]
+        )
+
+        with NamedTemporaryFile(mode="w", suffix=".obj") as f:
+            f.write(obj_file)
+            f.flush()
+
+            with self.assertWarnsRegex(UserWarning, "No mtl file provided"):
+                verts, faces, aux = load_obj(Path(f.name))
+
+            expected_verts = torch.tensor(
+                [[0.1, 0.2, 0.3], [0.2, 0.3, 0.4], [0.3, 0.4, 0.5], [0.4, 0.5, 0.6]],
+                dtype=torch.float32,
+            )
+            expected_faces = torch.tensor([[0, 1, 2], [0, 1, 3]], dtype=torch.int64)
+            self.assertTrue(torch.allclose(verts, expected_verts))
+            self.assertTrue(torch.allclose(faces.verts_idx, expected_faces))
+            self.assertTrue(aux.material_colors is None)
+            self.assertTrue(aux.texture_images is None)
+            self.assertTrue(aux.normals is None)
+            self.assertTrue(aux.verts_uvs is None)
+
+    def test_load_obj_mtl_no_image(self):
+        obj_filename = "obj_mtl_no_image/model.obj"
+        filename = os.path.join(DATA_DIR, obj_filename)
+        R = 8
+        verts, faces, aux = load_obj(
+            filename,
+            load_textures=True,
+            create_texture_atlas=True,
+            texture_atlas_size=R,
+            texture_wrap=None,
+        )
+
+        expected_verts = torch.tensor(
+            [[0.1, 0.2, 0.3], [0.2, 0.3, 0.4], [0.3, 0.4, 0.5], [0.4, 0.5, 0.6]],
+            dtype=torch.float32,
+        )
+        expected_faces = torch.tensor([[0, 1, 2], [0, 1, 3]], dtype=torch.int64)
+        self.assertTrue(torch.allclose(verts, expected_verts))
+        self.assertTrue(torch.allclose(faces.verts_idx, expected_faces))
+
+        # Check that the material diffuse color has been assigned to all the
+        # values in the texture atlas.
+        expected_atlas = torch.tensor([0.5, 0.0, 0.0], dtype=torch.float32)
+        expected_atlas = expected_atlas[None, None, None, :].expand(2, R, R, -1)
+        self.assertTrue(torch.allclose(aux.texture_atlas, expected_atlas))
+        self.assertEqual(len(aux.material_colors.keys()), 1)
+        self.assertEqual(list(aux.material_colors.keys()), ["material_1"])
+
+    def test_load_obj_missing_texture(self):
+        obj_filename = "missing_files_obj/model.obj"
+        filename = os.path.join(DATA_DIR, obj_filename)
+        with self.assertWarnsRegex(UserWarning, "Texture file does not exist"):
+            verts, faces, aux = load_obj(filename)
+
+        expected_verts = torch.tensor(
+            [[0.1, 0.2, 0.3], [0.2, 0.3, 0.4], [0.3, 0.4, 0.5], [0.4, 0.5, 0.6]],
+            dtype=torch.float32,
+        )
+        expected_faces = torch.tensor([[0, 1, 2], [0, 1, 3]], dtype=torch.int64)
+        self.assertTrue(torch.allclose(verts, expected_verts))
+        self.assertTrue(torch.allclose(faces.verts_idx, expected_faces))
+
+    def test_load_obj_missing_texture_noload(self):
+        obj_filename = "missing_files_obj/model.obj"
+        filename = os.path.join(DATA_DIR, obj_filename)
+        verts, faces, aux = load_obj(filename, load_textures=False)
+
+        expected_verts = torch.tensor(
+            [[0.1, 0.2, 0.3], [0.2, 0.3, 0.4], [0.3, 0.4, 0.5], [0.4, 0.5, 0.6]],
+            dtype=torch.float32,
+        )
+        expected_faces = torch.tensor([[0, 1, 2], [0, 1, 3]], dtype=torch.int64)
+        self.assertTrue(torch.allclose(verts, expected_verts))
+        self.assertTrue(torch.allclose(faces.verts_idx, expected_faces))
+        self.assertTrue(aux.material_colors is None)
+        self.assertTrue(aux.texture_images is None)
+
+    def test_load_obj_missing_mtl(self):
+        obj_filename = "missing_files_obj/model2.obj"
+        filename = os.path.join(DATA_DIR, obj_filename)
+        with self.assertWarnsRegex(UserWarning, "Mtl file does not exist"):
+            verts, faces, aux = load_obj(filename)
+
+        expected_verts = torch.tensor(
+            [[0.1, 0.2, 0.3], [0.2, 0.3, 0.4], [0.3, 0.4, 0.5], [0.4, 0.5, 0.6]],
+            dtype=torch.float32,
+        )
+        expected_faces = torch.tensor([[0, 1, 2], [0, 1, 3]], dtype=torch.int64)
+        self.assertTrue(torch.allclose(verts, expected_verts))
+        self.assertTrue(torch.allclose(faces.verts_idx, expected_faces))
+
+    def test_load_obj_missing_mtl_noload(self):
+        obj_filename = "missing_files_obj/model2.obj"
+        filename = os.path.join(DATA_DIR, obj_filename)
+        verts, faces, aux = load_obj(filename, load_textures=False)
+
+        expected_verts = torch.tensor(
+            [[0.1, 0.2, 0.3], [0.2, 0.3, 0.4], [0.3, 0.4, 0.5], [0.4, 0.5, 0.6]],
+            dtype=torch.float32,
+        )
+        expected_faces = torch.tensor([[0, 1, 2], [0, 1, 3]], dtype=torch.int64)
+        self.assertTrue(torch.allclose(verts, expected_verts))
+        self.assertTrue(torch.allclose(faces.verts_idx, expected_faces))
+        self.assertTrue(aux.material_colors is None)
+        self.assertTrue(aux.texture_images is None)
+
+    def test_join_meshes_as_batch(self):
+        """
+        Test that join_meshes_as_batch and load_objs_as_meshes are consistent
+        with single meshes.
+        """
+
+        def check_triple(mesh, mesh3):
+            """
+            Verify that mesh3 is three copies of mesh.
+            """
+
+            def check_item(x, y):
+                self.assertEqual(x is None, y is None)
+                if x is not None:
+                    self.assertClose(torch.cat([x, x, x]), y)
+
+            check_item(mesh.verts_padded(), mesh3.verts_padded())
+            check_item(mesh.faces_padded(), mesh3.faces_padded())
+
+            if mesh.textures is not None:
+                if isinstance(mesh.textures, TexturesUV):
+                    check_item(
+                        mesh.textures.faces_uvs_padded(),
+                        mesh3.textures.faces_uvs_padded(),
+                    )
+                    check_item(
+                        mesh.textures.verts_uvs_padded(),
+                        mesh3.textures.verts_uvs_padded(),
+                    )
+                    check_item(
+                        mesh.textures.maps_padded(), mesh3.textures.maps_padded()
+                    )
+                elif isinstance(mesh.textures, TexturesVertex):
+                    check_item(
+                        mesh.textures.verts_features_padded(),
+                        mesh3.textures.verts_features_padded(),
+                    )
+                elif isinstance(mesh.textures, TexturesAtlas):
+                    check_item(
+                        mesh.textures.atlas_padded(), mesh3.textures.atlas_padded()
+                    )
+
+        obj_filename = TUTORIAL_DATA_DIR / "cow_mesh/cow.obj"
+
+        mesh = load_objs_as_meshes([obj_filename])
+        mesh3 = load_objs_as_meshes([obj_filename, obj_filename, obj_filename])
+        check_triple(mesh, mesh3)
+        self.assertTupleEqual(mesh.textures.maps_padded().shape, (1, 1024, 1024, 3))
+
+        # Try mismatched texture map sizes, which needs a call to interpolate()
+        mesh2048 = mesh.clone()
+        maps = mesh.textures.maps_padded()
+        mesh2048.textures._maps_padded = torch.cat([maps, maps], dim=1)
+        join_meshes_as_batch([mesh.to("cuda:0"), mesh2048.to("cuda:0")])
+
+        mesh_notex = load_objs_as_meshes([obj_filename], load_textures=False)
+        mesh3_notex = load_objs_as_meshes(
+            [obj_filename, obj_filename, obj_filename], load_textures=False
+        )
+        check_triple(mesh_notex, mesh3_notex)
+        self.assertIsNone(mesh_notex.textures)
+
+        # meshes with vertex texture, join into a batch.
+        verts = torch.randn((4, 3), dtype=torch.float32)
+        faces = torch.tensor([[2, 1, 0], [3, 1, 0]], dtype=torch.int64)
+        vert_tex = torch.ones_like(verts)
+        rgb_tex = TexturesVertex(verts_features=[vert_tex])
+        mesh_rgb = Meshes(verts=[verts], faces=[faces], textures=rgb_tex)
+        mesh_rgb3 = join_meshes_as_batch([mesh_rgb, mesh_rgb, mesh_rgb])
+        check_triple(mesh_rgb, mesh_rgb3)
+        nums_rgb = mesh_rgb.textures._num_verts_per_mesh
+        nums_rgb3 = mesh_rgb3.textures._num_verts_per_mesh
+        self.assertEqual(type(nums_rgb), list)
+        self.assertEqual(type(nums_rgb3), list)
+        self.assertListEqual(nums_rgb * 3, nums_rgb3)
+
+        # meshes with texture atlas, join into a batch.
+        device = "cuda:0"
+        atlas = torch.rand((2, 4, 4, 3), dtype=torch.float32, device=device)
+        atlas_tex = TexturesAtlas(atlas=[atlas])
+        mesh_atlas = Meshes(verts=[verts], faces=[faces], textures=atlas_tex)
+        mesh_atlas3 = join_meshes_as_batch([mesh_atlas, mesh_atlas, mesh_atlas])
+        check_triple(mesh_atlas, mesh_atlas3)
+
+        # Test load multiple meshes with textures into a batch.
+        teapot_obj = TUTORIAL_DATA_DIR / "teapot.obj"
+        mesh_teapot = load_objs_as_meshes([teapot_obj])
+        teapot_verts, teapot_faces = mesh_teapot.get_mesh_verts_faces(0)
+        mix_mesh = load_objs_as_meshes([obj_filename, teapot_obj], load_textures=False)
+        self.assertEqual(len(mix_mesh), 2)
+        self.assertClose(mix_mesh.verts_list()[0], mesh.verts_list()[0])
+        self.assertClose(mix_mesh.faces_list()[0], mesh.faces_list()[0])
+        self.assertClose(mix_mesh.verts_list()[1], teapot_verts)
+        self.assertClose(mix_mesh.faces_list()[1], teapot_faces)
+
+        cow3_tea = join_meshes_as_batch([mesh3, mesh_teapot], include_textures=False)
+        self.assertEqual(len(cow3_tea), 4)
+        check_triple(mesh_notex, cow3_tea[:3])
+        self.assertClose(cow3_tea.verts_list()[3], mesh_teapot.verts_list()[0])
+        self.assertClose(cow3_tea.faces_list()[3], mesh_teapot.faces_list()[0])
+
+        # Check error raised if all meshes in the batch don't have the same texture type
+        with self.assertRaisesRegex(ValueError, "same type of texture"):
+            join_meshes_as_batch([mesh_atlas, mesh_rgb, mesh_atlas])
+
+    def test_save_obj_with_normal(self):
+        verts = torch.tensor(
+            [[0.01, 0.2, 0.301], [0.2, 0.03, 0.408], [0.3, 0.4, 0.05], [0.6, 0.7, 0.8]],
+            dtype=torch.float32,
+        )
+        faces = torch.tensor(
+            [[0, 2, 1], [0, 1, 2], [3, 2, 1], [3, 1, 0]], dtype=torch.int64
+        )
+        normals = torch.tensor(
+            [
+                [0.02, 0.5, 0.73],
+                [0.3, 0.03, 0.361],
+                [0.32, 0.12, 0.47],
+                [0.36, 0.17, 0.9],
+                [0.40, 0.7, 0.19],
+                [1.0, 0.00, 0.000],
+                [0.00, 1.00, 0.00],
+                [0.00, 0.00, 1.0],
+            ],
+            dtype=torch.float32,
+        )
+        faces_normals_idx = torch.tensor(
+            [[0, 1, 2], [2, 3, 4], [4, 5, 6], [6, 7, 0]], dtype=torch.int64
+        )
+
+        with TemporaryDirectory() as temp_dir:
+            obj_file = os.path.join(temp_dir, "mesh.obj")
+            save_obj(
+                obj_file,
+                verts,
+                faces,
+                decimal_places=2,
+                normals=normals,
+                faces_normals_idx=faces_normals_idx,
+            )
+
+            expected_obj_file = "\n".join(
+                [
+                    "v 0.01 0.20 0.30",
+                    "v 0.20 0.03 0.41",
+                    "v 0.30 0.40 0.05",
+                    "v 0.60 0.70 0.80",
+                    "vn 0.02 0.50 0.73",
+                    "vn 0.30 0.03 0.36",
+                    "vn 0.32 0.12 0.47",
+                    "vn 0.36 0.17 0.90",
+                    "vn 0.40 0.70 0.19",
+                    "vn 1.00 0.00 0.00",
+                    "vn 0.00 1.00 0.00",
+                    "vn 0.00 0.00 1.00",
+                    "f 1//1 3//2 2//3",
+                    "f 1//3 2//4 3//5",
+                    "f 4//5 3//6 2//7",
+                    "f 4//7 2//8 1//1",
+                ]
+            )
+
+            # Check the obj file is saved correctly
+            with open(obj_file, "r") as actual_file:
+                self.assertEqual(actual_file.read(), expected_obj_file)
+
+    def test_save_obj_with_texture(self):
+        verts = torch.tensor(
+            [[0.01, 0.2, 0.301], [0.2, 0.03, 0.408], [0.3, 0.4, 0.05], [0.6, 0.7, 0.8]],
+            dtype=torch.float32,
+        )
+        faces = torch.tensor(
+            [[0, 2, 1], [0, 1, 2], [3, 2, 1], [3, 1, 0]], dtype=torch.int64
+        )
+        verts_uvs = torch.tensor(
+            [[0.02, 0.5], [0.3, 0.03], [0.32, 0.12], [0.36, 0.17]],
+            dtype=torch.float32,
+        )
+        faces_uvs = faces
+        texture_map = torch.randint(size=(2, 2, 3), high=255) / 255.0
+
+        with TemporaryDirectory() as temp_dir:
+            obj_file = os.path.join(temp_dir, "mesh.obj")
+            save_obj(
+                obj_file,
+                verts,
+                faces,
+                decimal_places=2,
+                verts_uvs=verts_uvs,
+                faces_uvs=faces_uvs,
+                texture_map=texture_map,
+            )
+
+            expected_obj_file = "\n".join(
+                [
+                    "",
+                    "mtllib mesh.mtl",
+                    "usemtl mesh",
+                    "",
+                    "v 0.01 0.20 0.30",
+                    "v 0.20 0.03 0.41",
+                    "v 0.30 0.40 0.05",
+                    "v 0.60 0.70 0.80",
+                    "vt 0.02 0.50",
+                    "vt 0.30 0.03",
+                    "vt 0.32 0.12",
+                    "vt 0.36 0.17",
+                    "f 1/1 3/3 2/2",
+                    "f 1/1 2/2 3/3",
+                    "f 4/4 3/3 2/2",
+                    "f 4/4 2/2 1/1",
+                ]
+            )
+            expected_mtl_file = "\n".join(["newmtl mesh", "map_Kd mesh.png", ""])
+
+            # Check there are only 3 files in the temp dir
+            tempfiles = ["mesh.obj", "mesh.png", "mesh.mtl"]
+            tempfiles_dir = os.listdir(temp_dir)
+            self.assertEqual(Counter(tempfiles), Counter(tempfiles_dir))
+
+            # Check the obj file is saved correctly
+            with open(obj_file, "r") as actual_file:
+                self.assertEqual(actual_file.read(), expected_obj_file)
+
+            # Check the mtl file is saved correctly
+            mtl_file_name = os.path.join(temp_dir, "mesh.mtl")
+            with open(mtl_file_name, "r") as mtl_file:
+                self.assertEqual(mtl_file.read(), expected_mtl_file)
+
+            # Check the texture image file is saved correctly
+            texture_image = load_rgb_image("mesh.png", temp_dir)
+            self.assertClose(texture_image, texture_map)
+
+    def test_save_obj_with_normal_and_texture(self):
+        verts = torch.tensor(
+            [[0.01, 0.2, 0.301], [0.2, 0.03, 0.408], [0.3, 0.4, 0.05], [0.6, 0.7, 0.8]],
+            dtype=torch.float32,
+        )
+        faces = torch.tensor(
+            [[0, 2, 1], [0, 1, 2], [3, 2, 1], [3, 1, 0]], dtype=torch.int64
+        )
+        normals = torch.tensor(
+            [
+                [0.02, 0.5, 0.73],
+                [0.3, 0.03, 0.361],
+                [0.32, 0.12, 0.47],
+                [0.36, 0.17, 0.9],
+            ],
+            dtype=torch.float32,
+        )
+        faces_normals_idx = faces
+        verts_uvs = torch.tensor(
+            [[0.02, 0.5], [0.3, 0.03], [0.32, 0.12], [0.36, 0.17]],
+            dtype=torch.float32,
+        )
+        faces_uvs = faces
+        texture_map = torch.randint(size=(2, 2, 3), high=255) / 255.0
+
+        with TemporaryDirectory() as temp_dir:
+            obj_file = os.path.join(temp_dir, "mesh.obj")
+            save_obj(
+                obj_file,
+                verts,
+                faces,
+                decimal_places=2,
+                normals=normals,
+                faces_normals_idx=faces_normals_idx,
+                verts_uvs=verts_uvs,
+                faces_uvs=faces_uvs,
+                texture_map=texture_map,
+            )
+
+            expected_obj_file = "\n".join(
+                [
+                    "",
+                    "mtllib mesh.mtl",
+                    "usemtl mesh",
+                    "",
+                    "v 0.01 0.20 0.30",
+                    "v 0.20 0.03 0.41",
+                    "v 0.30 0.40 0.05",
+                    "v 0.60 0.70 0.80",
+                    "vn 0.02 0.50 0.73",
+                    "vn 0.30 0.03 0.36",
+                    "vn 0.32 0.12 0.47",
+                    "vn 0.36 0.17 0.90",
+                    "vt 0.02 0.50",
+                    "vt 0.30 0.03",
+                    "vt 0.32 0.12",
+                    "vt 0.36 0.17",
+                    "f 1/1/1 3/3/3 2/2/2",
+                    "f 1/1/1 2/2/2 3/3/3",
+                    "f 4/4/4 3/3/3 2/2/2",
+                    "f 4/4/4 2/2/2 1/1/1",
+                ]
+            )
+            expected_mtl_file = "\n".join(["newmtl mesh", "map_Kd mesh.png", ""])
+
+            # Check there are only 3 files in the temp dir
+            tempfiles = ["mesh.obj", "mesh.png", "mesh.mtl"]
+            tempfiles_dir = os.listdir(temp_dir)
+            self.assertEqual(Counter(tempfiles), Counter(tempfiles_dir))
+
+            # Check the obj file is saved correctly
+            with open(obj_file, "r") as actual_file:
+                self.assertEqual(actual_file.read(), expected_obj_file)
+
+            # Check the mtl file is saved correctly
+            mtl_file_name = os.path.join(temp_dir, "mesh.mtl")
+            with open(mtl_file_name, "r") as mtl_file:
+                self.assertEqual(mtl_file.read(), expected_mtl_file)
+
+            # Check the texture image file is saved correctly
+            texture_image = load_rgb_image("mesh.png", temp_dir)
+            self.assertClose(texture_image, texture_map)
+
+    def test_save_obj_with_texture_errors(self):
+        verts = torch.tensor(
+            [[0.01, 0.2, 0.301], [0.2, 0.03, 0.408], [0.3, 0.4, 0.05], [0.6, 0.7, 0.8]],
+            dtype=torch.float32,
+        )
+        faces = torch.tensor(
+            [[0, 2, 1], [0, 1, 2], [3, 2, 1], [3, 1, 0]], dtype=torch.int64
+        )
+        verts_uvs = torch.tensor(
+            [[0.02, 0.5], [0.3, 0.03], [0.32, 0.12], [0.36, 0.17]],
+            dtype=torch.float32,
+        )
+        faces_uvs = faces
+        texture_map = torch.randint(size=(2, 2, 3), high=255)
+
+        expected_obj_file = "\n".join(
+            [
+                "v 0.01 0.20 0.30",
+                "v 0.20 0.03 0.41",
+                "v 0.30 0.40 0.05",
+                "v 0.60 0.70 0.80",
+                "f 1 3 2",
+                "f 1 2 3",
+                "f 4 3 2",
+                "f 4 2 1",
+            ]
+        )
+        with TemporaryDirectory() as temp_dir:
+            obj_file = os.path.join(temp_dir, "mesh.obj")
+
+            # If only one of verts_uvs/faces_uvs/texture_map is provided
+            # then textures are not saved
+            for arg in [
+                {"verts_uvs": verts_uvs},
+                {"faces_uvs": faces_uvs},
+                {"texture_map": texture_map},
+            ]:
+                save_obj(
+                    obj_file,
+                    verts,
+                    faces,
+                    decimal_places=2,
+                    **arg,
+                )
+
+                # Check there is only 1 file in the temp dir
+                tempfiles = ["mesh.obj"]
+                tempfiles_dir = os.listdir(temp_dir)
+                self.assertEqual(tempfiles, tempfiles_dir)
+
+                # Check the obj file is saved correctly
+                with open(obj_file, "r") as actual_file:
+                    self.assertEqual(actual_file.read(), expected_obj_file)
+
+        obj_file = StringIO()
+        with self.assertRaises(ValueError):
+            save_obj(
+                obj_file,
+                verts,
+                faces,
+                decimal_places=2,
+                verts_uvs=verts_uvs,
+                faces_uvs=faces_uvs[..., 2],  # Incorrect shape
+                texture_map=texture_map,
+            )
+
+        with self.assertRaises(ValueError):
+            save_obj(
+                obj_file,
+                verts,
+                faces,
+                decimal_places=2,
+                verts_uvs=verts_uvs[..., 0],  # Incorrect shape
+                faces_uvs=faces_uvs,
+                texture_map=texture_map,
+            )
+
+        with self.assertRaises(ValueError):
+            save_obj(
+                obj_file,
+                verts,
+                faces,
+                decimal_places=2,
+                verts_uvs=verts_uvs,
+                faces_uvs=faces_uvs,
+                texture_map=texture_map[..., 1],  # Incorrect shape
+            )
+
+    def test_save_obj_with_texture_IO(self):
+        verts = torch.tensor(
+            [[0.01, 0.2, 0.301], [0.2, 0.03, 0.408], [0.3, 0.4, 0.05], [0.6, 0.7, 0.8]],
+            dtype=torch.float32,
+        )
+        faces = torch.tensor(
+            [[0, 2, 1], [0, 1, 2], [3, 2, 1], [3, 1, 0]], dtype=torch.int64
+        )
+        verts_uvs = torch.tensor(
+            [[0.02, 0.5], [0.3, 0.03], [0.32, 0.12], [0.36, 0.17]],
+            dtype=torch.float32,
+        )
+        faces_uvs = faces
+        texture_map = torch.randint(size=(2, 2, 3), high=255) / 255.0
+
+        with TemporaryDirectory() as temp_dir:
+            obj_file = os.path.join(temp_dir, "mesh.obj")
+            textures_uv = TexturesUV([texture_map], [faces_uvs], [verts_uvs])
+            test_mesh = Meshes(verts=[verts], faces=[faces], textures=textures_uv)
+
+            IO().save_mesh(data=test_mesh, path=obj_file, decimal_places=2)
+
+            expected_obj_file = "\n".join(
+                [
+                    "",
+                    "mtllib mesh.mtl",
+                    "usemtl mesh",
+                    "",
+                    "v 0.01 0.20 0.30",
+                    "v 0.20 0.03 0.41",
+                    "v 0.30 0.40 0.05",
+                    "v 0.60 0.70 0.80",
+                    "vt 0.02 0.50",
+                    "vt 0.30 0.03",
+                    "vt 0.32 0.12",
+                    "vt 0.36 0.17",
+                    "f 1/1 3/3 2/2",
+                    "f 1/1 2/2 3/3",
+                    "f 4/4 3/3 2/2",
+                    "f 4/4 2/2 1/1",
+                ]
+            )
+            expected_mtl_file = "\n".join(["newmtl mesh", "map_Kd mesh.png", ""])
+
+            # Check there are only 3 files in the temp dir
+            tempfiles = ["mesh.obj", "mesh.png", "mesh.mtl"]
+            tempfiles_dir = os.listdir(temp_dir)
+            self.assertEqual(Counter(tempfiles), Counter(tempfiles_dir))
+
+            # Check the obj file is saved correctly
+            with open(obj_file, "r") as actual_file:
+                self.assertEqual(actual_file.read(), expected_obj_file)
+
+            # Check the mtl file is saved correctly
+            mtl_file_name = os.path.join(temp_dir, "mesh.mtl")
+            with open(mtl_file_name, "r") as mtl_file:
+                self.assertEqual(mtl_file.read(), expected_mtl_file)
+
+            # Check the texture image file is saved correctly
+            texture_image = load_rgb_image("mesh.png", temp_dir)
+            self.assertClose(texture_image, texture_map)
+
+    @staticmethod
+    def _bm_save_obj(verts: torch.Tensor, faces: torch.Tensor, decimal_places: int):
+        return lambda: save_obj(StringIO(), verts, faces, decimal_places)
+
+    @staticmethod
+    def _bm_load_obj(verts: torch.Tensor, faces: torch.Tensor, decimal_places: int):
+        f = StringIO()
+        save_obj(f, verts, faces, decimal_places)
+        s = f.getvalue()
+        # Recreate stream so it's unaffected by how it was created.
+        return lambda: load_obj(StringIO(s))
+
+    @staticmethod
+    def bm_save_simple_obj_with_init(V: int, F: int):
+        verts = torch.tensor(V * [[0.11, 0.22, 0.33]]).view(-1, 3)
+        faces = torch.tensor(F * [[1, 2, 3]]).view(-1, 3)
+        return TestMeshObjIO._bm_save_obj(verts, faces, decimal_places=2)
+
+    @staticmethod
+    def bm_load_simple_obj_with_init(V: int, F: int):
+        verts = torch.tensor(V * [[0.1, 0.2, 0.3]]).view(-1, 3)
+        faces = torch.tensor(F * [[1, 2, 3]]).view(-1, 3)
+        return TestMeshObjIO._bm_load_obj(verts, faces, decimal_places=2)
+
+    @staticmethod
+    def bm_save_complex_obj(N: int):
+        meshes = torus(r=0.25, R=1.0, sides=N, rings=2 * N)
+        [verts], [faces] = meshes.verts_list(), meshes.faces_list()
+        return TestMeshObjIO._bm_save_obj(verts, faces, decimal_places=5)
+
+    @staticmethod
+    def bm_load_complex_obj(N: int):
+        meshes = torus(r=0.25, R=1.0, sides=N, rings=2 * N)
+        [verts], [faces] = meshes.verts_list(), meshes.faces_list()
+        return TestMeshObjIO._bm_load_obj(verts, faces, decimal_places=5)
+
+    @staticmethod
+    def bm_load_texture_atlas(R: int):
+        device = torch.device("cuda:0")
+        torch.cuda.set_device(device)
+        data_dir = "/data/users/nikhilar/fbsource/fbcode/vision/fair/pytorch3d/docs/"
+        obj_filename = os.path.join(data_dir, "tutorials/data/cow_mesh/cow.obj")
+        torch.cuda.synchronize()
+
+        def load():
+            load_obj(
+                obj_filename,
+                load_textures=True,
+                device=device,
+                create_texture_atlas=True,
+                texture_atlas_size=R,
+            )
+            torch.cuda.synchronize()
+
+        return load
+
+    @staticmethod
+    def bm_bilinear_sampling_vectorized(S: int, F: int, R: int):
+        device = torch.device("cuda:0")
+        torch.cuda.set_device(device)
+        image = torch.rand((S, S, 3))
+        grid = torch.rand((F, R, R, 2))
+        torch.cuda.synchronize()
+
+        def load():
+            _bilinear_interpolation_vectorized(image, grid)
+            torch.cuda.synchronize()
+
+        return load
+
+    @staticmethod
+    def bm_bilinear_sampling_grid_sample(S: int, F: int, R: int):
+        device = torch.device("cuda:0")
+        torch.cuda.set_device(device)
+        image = torch.rand((S, S, 3))
+        grid = torch.rand((F, R, R, 2))
+        torch.cuda.synchronize()
+
+        def load():
+            _bilinear_interpolation_grid_sample(image, grid)
+            torch.cuda.synchronize()
+
+        return load
diff --git a/pytorch3d/tests/test_io_off.py b/pytorch3d/tests/test_io_off.py
new file mode 100644
index 0000000000000000000000000000000000000000..6a5dc669a85ff9f6df2c3a3159bf48d52df2e782
--- /dev/null
+++ b/pytorch3d/tests/test_io_off.py
@@ -0,0 +1,335 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+from tempfile import NamedTemporaryFile
+
+import torch
+from pytorch3d.io import IO
+from pytorch3d.renderer import TexturesAtlas, TexturesVertex
+from pytorch3d.utils import ico_sphere
+
+from .common_testing import TestCaseMixin
+
+
+CUBE_FACES = [
+    [0, 1, 2],
+    [7, 4, 0],
+    [4, 5, 1],
+    [5, 6, 2],
+    [3, 2, 6],
+    [6, 5, 4],
+    [0, 2, 3],
+    [7, 0, 3],
+    [4, 1, 0],
+    [5, 2, 1],
+    [3, 6, 7],
+    [6, 4, 7],
+]
+
+
+class TestMeshOffIO(TestCaseMixin, unittest.TestCase):
+    def test_load_face_colors(self):
+        # Example from wikipedia
+        off_file_lines = [
+            "OFF",
+            "# cube.off",
+            "# A cube",
+            " ",
+            "8 6 12",
+            " 1.0  0.0 1.4142",
+            " 0.0  1.0 1.4142",
+            "-1.0  0.0 1.4142",
+            " 0.0 -1.0 1.4142",
+            " 1.0  0.0 0.0",
+            " 0.0  1.0 0.0",
+            "-1.0  0.0 0.0",
+            " 0.0 -1.0 0.0",
+            "4  0 1 2 3  255 0 0 #red",
+            "4  7 4 0 3  0 255 0 #green",
+            "4  4 5 1 0  0 0 255 #blue",
+            "4  5 6 2 1  0 255 0 ",
+            "4  3 2 6 7  0 0 255",
+            "4  6 5 4 7  255 0 0",
+        ]
+        off_file = "\n".join(off_file_lines)
+        io = IO()
+        with NamedTemporaryFile(mode="w", suffix=".off") as f:
+            f.write(off_file)
+            f.flush()
+            mesh = io.load_mesh(f.name)
+
+        self.assertEqual(mesh.verts_padded().shape, (1, 8, 3))
+        verts_str = " ".join(off_file_lines[5:13])
+        verts_data = torch.tensor([float(i) for i in verts_str.split()])
+        self.assertClose(mesh.verts_padded().flatten(), verts_data)
+        self.assertClose(mesh.faces_padded(), torch.tensor(CUBE_FACES)[None])
+
+        faces_colors_full = mesh.textures.atlas_padded()
+        self.assertEqual(faces_colors_full.shape, (1, 12, 1, 1, 3))
+        faces_colors = faces_colors_full[0, :, 0, 0]
+        max_color = faces_colors.max()
+        self.assertEqual(max_color, 1)
+
+        # Every face has one color 1, the rest 0.
+        total_color = faces_colors.sum(dim=1)
+        self.assertEqual(total_color.max(), max_color)
+        self.assertEqual(total_color.min(), max_color)
+
+    def test_load_vertex_colors(self):
+        # Example with no faces and with integer vertex colors
+        off_file_lines = [
+            "8 1 12",
+            " 1.0  0.0 1.4142 0 1 0",
+            " 0.0  1.0 1.4142 0 1 0",
+            "-1.0  0.0 1.4142 0 1 0",
+            " 0.0 -1.0 1.4142 0 1 0",
+            " 1.0  0.0 0.0 0 1 0",
+            " 0.0  1.0 0.0 0 1 0",
+            "-1.0  0.0 0.0 0 1 0",
+            " 0.0 -1.0 0.0 0 1 0",
+            "3 0 1 2",
+        ]
+        off_file = "\n".join(off_file_lines)
+        io = IO()
+        with NamedTemporaryFile(mode="w", suffix=".off") as f:
+            f.write(off_file)
+            f.flush()
+            mesh = io.load_mesh(f.name)
+
+        self.assertEqual(mesh.verts_padded().shape, (1, 8, 3))
+        verts_lines = (line.split()[:3] for line in off_file_lines[1:9])
+        verts_data = [[[float(x) for x in line] for line in verts_lines]]
+        self.assertClose(mesh.verts_padded(), torch.tensor(verts_data))
+        self.assertClose(mesh.faces_padded(), torch.tensor([[[0, 1, 2]]]))
+
+        self.assertIsInstance(mesh.textures, TexturesVertex)
+        colors = mesh.textures.verts_features_padded()
+
+        self.assertEqual(colors.shape, (1, 8, 3))
+        self.assertClose(colors[0, :, [0, 2]], torch.zeros(8, 2))
+        self.assertClose(colors[0, :, 1], torch.full((8,), 1.0 / 255))
+
+    def test_load_lumpy(self):
+        # Example off file whose faces have different numbers of vertices.
+        off_file_lines = [
+            "8 3 12",
+            " 1.0  0.0 1.4142",
+            " 0.0  1.0 1.4142",
+            "-1.0  0.0 1.4142",
+            " 0.0 -1.0 1.4142",
+            " 1.0  0.0 0.0",
+            " 0.0  1.0 0.0",
+            "-1.0  0.0 0.0",
+            " 0.0 -1.0 0.0",
+            "3  0 1 2    255 0 0 #red",
+            "4  7 4 0 3  0 255 0 #green",
+            "4  4 5 1 0  0 0 255 #blue",
+        ]
+        off_file = "\n".join(off_file_lines)
+        io = IO()
+        with NamedTemporaryFile(mode="w", suffix=".off") as f:
+            f.write(off_file)
+            f.flush()
+            mesh = io.load_mesh(f.name)
+
+        self.assertEqual(mesh.verts_padded().shape, (1, 8, 3))
+        verts_str = " ".join(off_file_lines[1:9])
+        verts_data = torch.tensor([float(i) for i in verts_str.split()])
+        self.assertClose(mesh.verts_padded().flatten(), verts_data)
+
+        self.assertEqual(mesh.faces_padded().shape, (1, 5, 3))
+        faces_expected = [[0, 1, 2], [7, 4, 0], [7, 0, 3], [4, 5, 1], [4, 1, 0]]
+        self.assertClose(mesh.faces_padded()[0], torch.tensor(faces_expected))
+
+    def test_save_load_icosphere(self):
+        # Test that saving a mesh as an off file and loading it results in the
+        # same data on the correct device, for all permitted types of textures.
+        # Standard test is for random colors, but also check totally white,
+        # because there's a different in OFF semantics between "1.0" color (=full)
+        # and "1" (= 1/255 color)
+        sphere = ico_sphere(0)
+        io = IO()
+        device = torch.device("cuda:0")
+
+        atlas_padded = torch.rand(1, sphere.faces_list()[0].shape[0], 1, 1, 3)
+        atlas = TexturesAtlas(atlas_padded)
+
+        atlas_padded_white = torch.ones(1, sphere.faces_list()[0].shape[0], 1, 1, 3)
+        atlas_white = TexturesAtlas(atlas_padded_white)
+
+        verts_colors_padded = torch.rand(1, sphere.verts_list()[0].shape[0], 3)
+        vertex_texture = TexturesVertex(verts_colors_padded)
+
+        verts_colors_padded_white = torch.ones(1, sphere.verts_list()[0].shape[0], 3)
+        vertex_texture_white = TexturesVertex(verts_colors_padded_white)
+
+        # No colors case
+        with NamedTemporaryFile(mode="w", suffix=".off") as f:
+            io.save_mesh(sphere, f.name)
+            f.flush()
+            mesh1 = io.load_mesh(f.name, device=device)
+        self.assertEqual(mesh1.device, device)
+        mesh1 = mesh1.cpu()
+        self.assertClose(mesh1.verts_padded(), sphere.verts_padded())
+        self.assertClose(mesh1.faces_padded(), sphere.faces_padded())
+        self.assertIsNone(mesh1.textures)
+
+        # Atlas case
+        sphere.textures = atlas
+        with NamedTemporaryFile(mode="w", suffix=".off") as f:
+            io.save_mesh(sphere, f.name)
+            f.flush()
+            mesh2 = io.load_mesh(f.name, device=device)
+
+        self.assertEqual(mesh2.device, device)
+        mesh2 = mesh2.cpu()
+        self.assertClose(mesh2.verts_padded(), sphere.verts_padded())
+        self.assertClose(mesh2.faces_padded(), sphere.faces_padded())
+        self.assertClose(mesh2.textures.atlas_padded(), atlas_padded, atol=1e-4)
+
+        # White atlas case
+        sphere.textures = atlas_white
+        with NamedTemporaryFile(mode="w", suffix=".off") as f:
+            io.save_mesh(sphere, f.name)
+            f.flush()
+            mesh3 = io.load_mesh(f.name)
+
+        self.assertClose(mesh3.textures.atlas_padded(), atlas_padded_white, atol=1e-4)
+
+        # TexturesVertex case
+        sphere.textures = vertex_texture
+        with NamedTemporaryFile(mode="w", suffix=".off") as f:
+            io.save_mesh(sphere, f.name)
+            f.flush()
+            mesh4 = io.load_mesh(f.name, device=device)
+
+        self.assertEqual(mesh4.device, device)
+        mesh4 = mesh4.cpu()
+        self.assertClose(mesh4.verts_padded(), sphere.verts_padded())
+        self.assertClose(mesh4.faces_padded(), sphere.faces_padded())
+        self.assertClose(
+            mesh4.textures.verts_features_padded(), verts_colors_padded, atol=1e-4
+        )
+
+        # white TexturesVertex case
+        sphere.textures = vertex_texture_white
+        with NamedTemporaryFile(mode="w", suffix=".off") as f:
+            io.save_mesh(sphere, f.name)
+            f.flush()
+            mesh5 = io.load_mesh(f.name)
+
+        self.assertClose(
+            mesh5.textures.verts_features_padded(), verts_colors_padded_white, atol=1e-4
+        )
+
+    def test_bad(self):
+        # Test errors from various invalid OFF files.
+        io = IO()
+
+        def load(lines):
+            off_file = "\n".join(lines)
+            with NamedTemporaryFile(mode="w", suffix=".off") as f:
+                f.write(off_file)
+                f.flush()
+                io.load_mesh(f.name)
+
+        # First a good example
+        lines = [
+            "4 2 12",
+            " 1.0  0.0 1.4142",
+            " 0.0  1.0 1.4142",
+            " 1.0  0.0 0.4142",
+            " 0.0  1.0 0.4142",
+            "3  0 1 2 ",
+            "3  1 3 0 ",
+        ]
+
+        # This example passes.
+        load(lines)
+
+        # OFF can occur on the first line separately
+        load(["OFF"] + lines)
+
+        # OFF line can be merged in to the first line
+        lines2 = lines.copy()
+        lines2[0] = "OFF " + lines[0]
+        load(lines2)
+
+        # OFF line can be merged in to the first line with no space
+        lines2 = lines.copy()
+        lines2[0] = "OFF" + lines[0]
+        load(lines2)
+
+        with self.assertRaisesRegex(ValueError, "Not enough face data."):
+            load(lines[:-1])
+
+        lines2 = lines.copy()
+        lines2[0] = "4 1 12"
+        with self.assertRaisesRegex(ValueError, "Extra data at end of file:"):
+            load(lines2)
+
+        lines2 = lines.copy()
+        lines2[-1] = "2 1 3"
+        with self.assertRaisesRegex(ValueError, "Faces must have at least 3 vertices."):
+            load(lines2)
+
+        lines2 = lines.copy()
+        lines2[-1] = "4 1 3 0"
+        with self.assertRaisesRegex(
+            ValueError, "A line of face data did not have the specified length."
+        ):
+            load(lines2)
+
+        lines2 = lines.copy()
+        lines2[0] = "6 2 0"
+        with self.assertRaisesRegex(ValueError, "Wrong number of columns at line 5"):
+            load(lines2)
+
+        lines2[0] = "5 1 0"
+        with self.assertRaisesRegex(ValueError, "Wrong number of columns at line 5"):
+            load(lines2)
+
+        lines2[0] = "16 2 0"
+        with self.assertRaisesRegex(ValueError, "Wrong number of columns at line 5"):
+            load(lines2)
+
+        lines2[0] = "3 3 0"
+        # This is a bit of a special case because the last vertex could be a face
+        with self.assertRaisesRegex(ValueError, "Faces must have at least 3 vertices."):
+            load(lines2)
+
+        lines2[4] = "7.3 4.2 8.3"
+        with self.assertRaisesRegex(
+            ValueError, "A line of face data did not have the specified length."
+        ):
+            load(lines2)
+
+        # Now try bad number of colors
+
+        lines2 = lines.copy()
+        lines2[2] = "7.3 4.2 8.3 932"
+        with self.assertRaisesRegex(ValueError, "Wrong number of columns at line 2"):
+            load(lines2)
+
+        lines2[1] = "7.3 4.2 8.3 932"
+        lines2[3] = "7.3 4.2 8.3 932"
+        lines2[4] = "7.3 4.2 8.3 932"
+        with self.assertRaisesRegex(ValueError, "Bad vertex data."):
+            load(lines2)
+
+        lines2 = lines.copy()
+        lines2[5] = "3  0 1 2 0.9"
+        lines2[6] = "3  0 3 0 0.9"
+        with self.assertRaisesRegex(ValueError, "Unexpected number of colors."):
+            load(lines2)
+
+        lines2 = lines.copy()
+        for i in range(1, 7):
+            lines2[i] = lines2[i] + " 4 4 4 4"
+        msg = "Faces colors ignored because vertex colors provided too."
+        with self.assertWarnsRegex(UserWarning, msg):
+            load(lines2)
diff --git a/pytorch3d/tests/test_io_ply.py b/pytorch3d/tests/test_io_ply.py
new file mode 100644
index 0000000000000000000000000000000000000000..7bd2bc79d048fab7d7802658ce56740b59b57a57
--- /dev/null
+++ b/pytorch3d/tests/test_io_ply.py
@@ -0,0 +1,965 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import itertools
+import struct
+import unittest
+from io import BytesIO, StringIO
+from tempfile import NamedTemporaryFile, TemporaryFile
+
+import numpy as np
+import pytorch3d.io.ply_io
+import torch
+from iopath.common.file_io import PathManager
+from pytorch3d.io import IO
+from pytorch3d.io.ply_io import load_ply, save_ply
+from pytorch3d.renderer.mesh import TexturesVertex
+from pytorch3d.structures import Meshes, Pointclouds
+from pytorch3d.utils import torus
+
+from .common_testing import get_tests_dir, TestCaseMixin
+
+
+global_path_manager = PathManager()
+DATA_DIR = get_tests_dir() / "data"
+
+
+def _load_ply_raw(stream):
+    return pytorch3d.io.ply_io._load_ply_raw(stream, global_path_manager)
+
+
+CUBE_PLY_LINES = [
+    "ply",
+    "format ascii 1.0",
+    "comment made by Greg Turk",
+    "comment this file is a cube",
+    "element vertex 8",
+    "property float x",
+    "property float y",
+    "property float z",
+    "element face 6",
+    "property list uchar int vertex_index",
+    "end_header",
+    "0 0 0",
+    "0 0 1",
+    "0 1 1",
+    "0 1 0",
+    "1 0 0",
+    "1 0 1",
+    "1 1 1",
+    "1 1 0",
+    "4 0 1 2 3",
+    "4 7 6 5 4",
+    "4 0 4 5 1",
+    "4 1 5 6 2",
+    "4 2 6 7 3",
+    "4 3 7 4 0",
+]
+
+CUBE_VERTS = [
+    [0, 0, 0],
+    [0, 0, 1],
+    [0, 1, 1],
+    [0, 1, 0],
+    [1, 0, 0],
+    [1, 0, 1],
+    [1, 1, 1],
+    [1, 1, 0],
+]
+CUBE_FACES = [
+    [0, 1, 2],
+    [7, 6, 5],
+    [0, 4, 5],
+    [1, 5, 6],
+    [2, 6, 7],
+    [3, 7, 4],
+    [0, 2, 3],
+    [7, 5, 4],
+    [0, 5, 1],
+    [1, 6, 2],
+    [2, 7, 3],
+    [3, 4, 0],
+]
+
+
+class TestMeshPlyIO(TestCaseMixin, unittest.TestCase):
+    def test_raw_load_simple_ascii(self):
+        ply_file = "\n".join(
+            [
+                "ply",
+                "format ascii 1.0",
+                "comment made by Greg Turk",
+                "comment this file is a cube",
+                "element vertex 8",
+                "property float x",
+                "property float y",
+                "property float z",
+                "element face 6",
+                "property list uchar int vertex_index",
+                "element irregular_list 3",
+                "property list uchar int vertex_index",
+                "end_header",
+                "0 0 0",
+                "0 0 1",
+                "0 1 1",
+                "0 1 0",
+                "1 0 0",
+                "1 0 1",
+                "1 1 1",
+                "1 1 0",
+                "4 0 1 2 3",
+                "4 7 6 5 4",
+                "4 0 4 5 1",
+                "4 1 5 6 2",
+                "4 2 6 7 3",
+                "4 3 7 4 0",  # end of faces
+                "4 0 1 2 3",
+                "4 7 6 5 4",
+                "3 4 5 1",
+            ]
+        )
+        for line_ending in [None, "\n", "\r\n"]:
+            if line_ending is None:
+                stream = StringIO(ply_file)
+            else:
+                byte_file = ply_file.encode("ascii")
+                if line_ending == "\r\n":
+                    byte_file = byte_file.replace(b"\n", b"\r\n")
+                stream = BytesIO(byte_file)
+            header, data = _load_ply_raw(stream)
+            self.assertTrue(header.ascii)
+            self.assertEqual(len(data), 3)
+            self.assertTupleEqual(data["face"].shape, (6, 4))
+            self.assertClose([0, 1, 2, 3], data["face"][0])
+            self.assertClose([3, 7, 4, 0], data["face"][5])
+            [vertex0] = data["vertex"]
+            self.assertTupleEqual(vertex0.shape, (8, 3))
+            irregular = data["irregular_list"]
+            self.assertEqual(len(irregular), 3)
+            self.assertEqual(type(irregular), list)
+            [x] = irregular[0]
+            self.assertClose(x, [0, 1, 2, 3])
+            [x] = irregular[1]
+            self.assertClose(x, [7, 6, 5, 4])
+            [x] = irregular[2]
+            self.assertClose(x, [4, 5, 1])
+
+    def test_load_simple_ascii(self):
+        ply_file = "\n".join(CUBE_PLY_LINES)
+        for line_ending in [None, "\n", "\r\n"]:
+            if line_ending is None:
+                stream = StringIO(ply_file)
+            else:
+                byte_file = ply_file.encode("ascii")
+                if line_ending == "\r\n":
+                    byte_file = byte_file.replace(b"\n", b"\r\n")
+                stream = BytesIO(byte_file)
+            verts, faces = load_ply(stream)
+            self.assertEqual(verts.shape, (8, 3))
+            self.assertEqual(faces.shape, (12, 3))
+            self.assertClose(verts, torch.FloatTensor(CUBE_VERTS))
+            self.assertClose(faces, torch.LongTensor(CUBE_FACES))
+
+    def test_pluggable_load_cube(self):
+        """
+        This won't work on Windows due to NamedTemporaryFile being reopened.
+        Use the testpath package instead?
+        """
+        ply_file = "\n".join(CUBE_PLY_LINES)
+        io = IO()
+        with NamedTemporaryFile(mode="w", suffix=".ply") as f:
+            f.write(ply_file)
+            f.flush()
+            mesh = io.load_mesh(f.name)
+        self.assertClose(mesh.verts_padded(), torch.FloatTensor(CUBE_VERTS)[None])
+        self.assertClose(mesh.faces_padded(), torch.LongTensor(CUBE_FACES)[None])
+
+        device = torch.device("cuda:0")
+
+        with NamedTemporaryFile(mode="w", suffix=".ply") as f2:
+            io.save_mesh(mesh, f2.name)
+            f2.flush()
+            mesh2 = io.load_mesh(f2.name, device=device)
+        self.assertEqual(mesh2.verts_padded().device, device)
+        self.assertClose(mesh2.verts_padded().cpu(), mesh.verts_padded())
+        self.assertClose(mesh2.faces_padded().cpu(), mesh.faces_padded())
+
+        with NamedTemporaryFile(mode="w") as f3:
+            with self.assertRaisesRegex(
+                ValueError, "No mesh interpreter found to write to"
+            ):
+                io.save_mesh(mesh, f3.name)
+            with self.assertRaisesRegex(
+                ValueError, "No mesh interpreter found to read "
+            ):
+                io.load_mesh(f3.name)
+
+    def test_heterogenous_verts_per_face(self):
+        # The cube but where one face is pentagon not square.
+        text = CUBE_PLY_LINES.copy()
+        text[-1] = "5 3 7 4 0 1"
+        stream = StringIO("\n".join(text))
+        verts, faces = load_ply(stream)
+        self.assertEqual(verts.shape, (8, 3))
+        self.assertEqual(faces.shape, (13, 3))
+
+    def test_save_too_many_colors(self):
+        verts = torch.tensor(
+            [[0, 0, 0], [0, 0, 1], [0, 1, 0], [1, 0, 0]], dtype=torch.float32
+        )
+        faces = torch.tensor([[0, 1, 2], [0, 2, 3]])
+        vert_colors = torch.rand((4, 7))
+        texture_with_seven_colors = TexturesVertex(verts_features=[vert_colors])
+
+        mesh = Meshes(
+            verts=[verts],
+            faces=[faces],
+            textures=texture_with_seven_colors,
+        )
+
+        io = IO()
+        msg = "Texture will not be saved as it has 7 colors, not 3."
+        with NamedTemporaryFile(mode="w", suffix=".ply") as f:
+            with self.assertWarnsRegex(UserWarning, msg):
+                io.save_mesh(mesh.cuda(), f.name)
+
+    def test_save_load_meshes(self):
+        verts = torch.tensor(
+            [[0, 0, 0], [0, 0, 1], [0, 1, 0], [1, 0, 0]], dtype=torch.float32
+        )
+        faces = torch.tensor([[0, 1, 2], [0, 2, 3]])
+        normals = torch.tensor(
+            [[0, 1, 0], [1, 0, 0], [1, 4, 1], [1, 0, 0]], dtype=torch.float32
+        )
+        vert_colors = torch.rand_like(verts)
+        texture = TexturesVertex(verts_features=[vert_colors])
+
+        for do_textures, do_normals in itertools.product([True, False], [True, False]):
+            mesh = Meshes(
+                verts=[verts],
+                faces=[faces],
+                textures=texture if do_textures else None,
+                verts_normals=[normals] if do_normals else None,
+            )
+            device = torch.device("cuda:0")
+
+            io = IO()
+            with NamedTemporaryFile(mode="w", suffix=".ply") as f:
+                io.save_mesh(mesh.cuda(), f.name)
+                f.flush()
+                mesh2 = io.load_mesh(f.name, device=device)
+            self.assertEqual(mesh2.device, device)
+            mesh2 = mesh2.cpu()
+            self.assertClose(mesh2.verts_padded(), mesh.verts_padded())
+            self.assertClose(mesh2.faces_padded(), mesh.faces_padded())
+            if do_normals:
+                self.assertTrue(mesh.has_verts_normals())
+                self.assertTrue(mesh2.has_verts_normals())
+                self.assertClose(
+                    mesh2.verts_normals_padded(), mesh.verts_normals_padded()
+                )
+            else:
+                self.assertFalse(mesh.has_verts_normals())
+                self.assertFalse(mesh2.has_verts_normals())
+                self.assertFalse(torch.allclose(mesh2.verts_normals_padded(), normals))
+            if do_textures:
+                self.assertIsInstance(mesh2.textures, TexturesVertex)
+                self.assertClose(mesh2.textures.verts_features_list()[0], vert_colors)
+            else:
+                self.assertIsNone(mesh2.textures)
+
+    def test_save_load_with_normals(self):
+        points = torch.tensor(
+            [[0, 0, 0], [0, 0, 1], [0, 1, 0], [1, 0, 0]], dtype=torch.float32
+        )
+        normals = torch.tensor(
+            [[0, 1, 0], [1, 0, 0], [1, 4, 1], [1, 0, 0]], dtype=torch.float32
+        )
+        features = torch.rand_like(points)
+
+        for do_features, do_normals in itertools.product([True, False], [True, False]):
+            cloud = Pointclouds(
+                points=[points],
+                features=[features] if do_features else None,
+                normals=[normals] if do_normals else None,
+            )
+            device = torch.device("cuda:0")
+
+            io = IO()
+            with NamedTemporaryFile(mode="w", suffix=".ply") as f:
+                io.save_pointcloud(cloud.cuda(), f.name)
+                f.flush()
+                cloud2 = io.load_pointcloud(f.name, device=device)
+            self.assertEqual(cloud2.device, device)
+            cloud2 = cloud2.cpu()
+            self.assertClose(cloud2.points_padded(), cloud.points_padded())
+            if do_normals:
+                self.assertClose(cloud2.normals_padded(), cloud.normals_padded())
+            else:
+                self.assertIsNone(cloud.normals_padded())
+                self.assertIsNone(cloud2.normals_padded())
+            if do_features:
+                self.assertClose(cloud2.features_packed(), features)
+            else:
+                self.assertIsNone(cloud2.features_packed())
+
+    def test_save_ply_invalid_shapes(self):
+        # Invalid vertices shape
+        verts = torch.FloatTensor([[0.1, 0.2, 0.3, 0.4]])  # (V, 4)
+        faces = torch.LongTensor([[0, 1, 2]])
+        with self.assertRaises(ValueError) as error:
+            save_ply(BytesIO(), verts, faces)
+        expected_message = (
+            "Argument 'verts' should either be empty or of shape (num_verts, 3)."
+        )
+        self.assertTrue(expected_message, error.exception)
+
+        # Invalid faces shape
+        verts = torch.FloatTensor([[0.1, 0.2, 0.3]])
+        faces = torch.LongTensor([[0, 1, 2, 3]])  # (F, 4)
+        with self.assertRaises(ValueError) as error:
+            save_ply(BytesIO(), verts, faces)
+        expected_message = (
+            "Argument 'faces' should either be empty or of shape (num_faces, 3)."
+        )
+        self.assertTrue(expected_message, error.exception)
+
+    def test_save_ply_invalid_indices(self):
+        message_regex = "Faces have invalid indices"
+        verts = torch.FloatTensor([[0.1, 0.2, 0.3]])
+        faces = torch.LongTensor([[0, 1, 2]])
+        with self.assertWarnsRegex(UserWarning, message_regex):
+            save_ply(BytesIO(), verts, faces)
+
+        faces = torch.LongTensor([[-1, 0, 1]])
+        with self.assertWarnsRegex(UserWarning, message_regex):
+            save_ply(BytesIO(), verts, faces)
+
+    def _test_save_load(self, verts, faces):
+        f = BytesIO()
+        save_ply(f, verts, faces)
+        f.seek(0)
+        # raise Exception(f.getvalue())
+        expected_verts, expected_faces = verts, faces
+        if not len(expected_verts):  # Always compare with a (V, 3) tensor
+            expected_verts = torch.zeros(size=(0, 3), dtype=torch.float32)
+        if not len(expected_faces):  # Always compare with an (F, 3) tensor
+            expected_faces = torch.zeros(size=(0, 3), dtype=torch.int64)
+
+        actual_verts, actual_faces = load_ply(f)
+        self.assertClose(expected_verts, actual_verts)
+        if len(actual_verts):
+            self.assertClose(expected_faces, actual_faces)
+        else:
+            self.assertEqual(actual_faces.numel(), 0)
+
+    def test_normals_save(self):
+        verts = torch.tensor(
+            [[0, 0, 0], [0, 0, 1], [0, 1, 0], [1, 0, 0]], dtype=torch.float32
+        )
+        faces = torch.tensor([[0, 1, 2], [0, 2, 3]])
+        normals = torch.tensor(
+            [[0, 1, 0], [1, 0, 0], [0, 0, 1], [1, 0, 0]], dtype=torch.float32
+        )
+        file = BytesIO()
+        save_ply(file, verts=verts, faces=faces, verts_normals=normals)
+        file.close()
+
+    def test_contiguity_unimportant(self):
+        verts = torch.rand(32, 3)
+        self._test_save_load(verts, torch.randint(30, size=(10, 3)))
+        self._test_save_load(verts, torch.randint(30, size=(3, 10)).T)
+
+    def test_empty_save_load(self):
+        # Vertices + empty faces
+        verts = torch.tensor([[0.1, 0.2, 0.3]])
+        faces = torch.LongTensor([])
+        self._test_save_load(verts, faces)
+
+        faces = torch.zeros(size=(0, 3), dtype=torch.int64)
+        self._test_save_load(verts, faces)
+
+        # Faces + empty vertices
+        # => We don't save the faces
+        verts = torch.FloatTensor([])
+        faces = torch.LongTensor([[0, 1, 2]])
+        message_regex = "Empty 'verts' provided"
+        with self.assertWarnsRegex(UserWarning, message_regex):
+            self._test_save_load(verts, faces)
+
+        verts = torch.zeros(size=(0, 3), dtype=torch.float32)
+        with self.assertWarnsRegex(UserWarning, message_regex):
+            self._test_save_load(verts, faces)
+
+        # Empty vertices + empty faces
+        verts0 = torch.FloatTensor([])
+        faces0 = torch.LongTensor([])
+        with self.assertWarnsRegex(UserWarning, message_regex):
+            self._test_save_load(verts0, faces0)
+
+        faces3 = torch.zeros(size=(0, 3), dtype=torch.int64)
+        with self.assertWarnsRegex(UserWarning, message_regex):
+            self._test_save_load(verts0, faces3)
+
+        verts3 = torch.zeros(size=(0, 3), dtype=torch.float32)
+        with self.assertWarnsRegex(UserWarning, message_regex):
+            self._test_save_load(verts3, faces0)
+
+        with self.assertWarnsRegex(UserWarning, message_regex):
+            self._test_save_load(verts3, faces3)
+
+    def test_simple_save(self):
+        verts = torch.tensor(
+            [[0, 0, 0], [0, 0, 1], [0, 1, 0], [1, 0, 0], [1, 2, 0]], dtype=torch.float32
+        )
+        faces = torch.tensor([[0, 1, 2], [0, 3, 4]])
+        for filetype in BytesIO, TemporaryFile:
+            lengths = {}
+            for ascii in [True, False]:
+                file = filetype()
+                save_ply(file, verts=verts, faces=faces, ascii=ascii)
+                lengths[ascii] = file.tell()
+
+                file.seek(0)
+                verts2, faces2 = load_ply(file)
+                self.assertClose(verts, verts2)
+                self.assertClose(faces, faces2)
+
+                file.seek(0)
+                if ascii:
+                    file.read().decode("ascii")
+                else:
+                    with self.assertRaises(UnicodeDecodeError):
+                        file.read().decode("ascii")
+
+                if filetype is TemporaryFile:
+                    file.close()
+            self.assertLess(lengths[False], lengths[True], "ascii should be longer")
+
+    def test_heterogeneous_property(self):
+        ply_file_ascii = "\n".join(
+            [
+                "ply",
+                "format ascii 1.0",
+                "element vertex 8",
+                "property float x",
+                "property int y",
+                "property int z",
+                "end_header",
+                "0 0 0",
+                "0 0 1",
+                "0 1 1",
+                "0 1 0",
+                "1 0 0",
+                "1 0 1",
+                "1 1 1",
+                "1 1 0",
+            ]
+        )
+        ply_file_binary = "\n".join(
+            [
+                "ply",
+                "format binary_little_endian 1.0",
+                "element vertex 8",
+                "property uchar x",
+                "property char y",
+                "property char z",
+                "end_header",
+                "",
+            ]
+        )
+        data = [0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0]
+        stream_ascii = StringIO(ply_file_ascii)
+        stream_binary = BytesIO(ply_file_binary.encode("ascii") + bytes(data))
+        X = np.array([[0, 0, 0, 0, 1, 1, 1, 1]]).T
+        YZ = np.array([0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0])
+        for stream in (stream_ascii, stream_binary):
+            header, elements = _load_ply_raw(stream)
+            [x, yz] = elements["vertex"]
+            self.assertClose(x, X)
+            self.assertClose(yz, YZ.reshape(8, 2))
+
+    def test_load_cloudcompare_pointcloud(self):
+        """
+        Test loading a pointcloud styled like some cloudcompare output.
+        cloudcompare is an open source 3D point cloud processing software.
+        """
+        header = "\n".join(
+            [
+                "ply",
+                "format binary_little_endian 1.0",
+                "obj_info Not a key-value pair!",
+                "element vertex 8",
+                "property double x",
+                "property double y",
+                "property double z",
+                "property uchar red",
+                "property uchar green",
+                "property uchar blue",
+                "property float my_Favorite",
+                "end_header",
+                "",
+            ]
+        ).encode("ascii")
+        data = struct.pack("<" + "dddBBBf" * 8, *range(56))
+        io = IO()
+        with NamedTemporaryFile(mode="wb", suffix=".ply") as f:
+            f.write(header)
+            f.write(data)
+            f.flush()
+            pointcloud = io.load_pointcloud(f.name)
+
+        self.assertClose(
+            pointcloud.points_padded()[0],
+            torch.FloatTensor([0, 1, 2]) + 7 * torch.arange(8)[:, None],
+        )
+        self.assertClose(
+            pointcloud.features_padded()[0] * 255,
+            torch.FloatTensor([3, 4, 5]) + 7 * torch.arange(8)[:, None],
+        )
+
+    def test_load_open3d_mesh(self):
+        # Header based on issue #1104
+        header = "\n".join(
+            [
+                "ply",
+                "format binary_little_endian 1.0",
+                "comment Created by Open3D",
+                "element vertex 3",
+                "property double x",
+                "property double y",
+                "property double z",
+                "property double nx",
+                "property double ny",
+                "property double nz",
+                "property uchar red",
+                "property uchar green",
+                "property uchar blue",
+                "element face 1",
+                "property list uchar uint vertex_indices",
+                "end_header",
+                "",
+            ]
+        ).encode("ascii")
+        vert_data = struct.pack("<" + "ddddddBBB" * 3, *range(9 * 3))
+        face_data = struct.pack("<" + "BIII", 3, 0, 1, 2)
+        io = IO()
+        with NamedTemporaryFile(mode="wb", suffix=".ply") as f:
+            f.write(header)
+            f.write(vert_data)
+            f.write(face_data)
+            f.flush()
+            mesh = io.load_mesh(f.name)
+
+        self.assertClose(mesh.faces_padded(), torch.arange(3)[None, None])
+        self.assertClose(
+            mesh.verts_padded(),
+            (torch.arange(3) + 9.0 * torch.arange(3)[:, None])[None],
+        )
+
+    def test_save_pointcloud(self):
+        header = "\n".join(
+            [
+                "ply",
+                "format binary_little_endian 1.0",
+                "element vertex 8",
+                "property float x",
+                "property float y",
+                "property float z",
+                "property float red",
+                "property float green",
+                "property float blue",
+                "end_header",
+                "",
+            ]
+        ).encode("ascii")
+        data = struct.pack("<" + "f" * 48, *range(48))
+        points = torch.FloatTensor([0, 1, 2]) + 6 * torch.arange(8)[:, None]
+        features_large = torch.FloatTensor([3, 4, 5]) + 6 * torch.arange(8)[:, None]
+        features = features_large / 255.0
+        pointcloud_largefeatures = Pointclouds(
+            points=[points], features=[features_large]
+        )
+        pointcloud = Pointclouds(points=[points], features=[features])
+
+        io = IO()
+        with NamedTemporaryFile(mode="rb", suffix=".ply") as f:
+            io.save_pointcloud(data=pointcloud_largefeatures, path=f.name)
+            f.flush()
+            f.seek(0)
+            actual_data = f.read()
+            reloaded_pointcloud = io.load_pointcloud(f.name)
+
+        self.assertEqual(header + data, actual_data)
+        self.assertClose(reloaded_pointcloud.points_list()[0], points)
+        self.assertClose(reloaded_pointcloud.features_list()[0], features_large)
+        # Test the load-save cycle leaves file completely unchanged
+        with NamedTemporaryFile(mode="rb", suffix=".ply") as f:
+            io.save_pointcloud(
+                data=reloaded_pointcloud,
+                path=f.name,
+            )
+            f.flush()
+            f.seek(0)
+            data2 = f.read()
+            self.assertEqual(data2, actual_data)
+
+        with NamedTemporaryFile(mode="r", suffix=".ply") as f:
+            io.save_pointcloud(
+                data=pointcloud, path=f.name, binary=False, decimal_places=9
+            )
+            reloaded_pointcloud2 = io.load_pointcloud(f.name)
+            self.assertEqual(f.readline(), "ply\n")
+            self.assertEqual(f.readline(), "format ascii 1.0\n")
+        self.assertClose(reloaded_pointcloud2.points_list()[0], points)
+        self.assertClose(reloaded_pointcloud2.features_list()[0], features)
+
+        for binary in [True, False]:
+            with NamedTemporaryFile(mode="rb", suffix=".ply") as f:
+                io.save_pointcloud(
+                    data=pointcloud, path=f.name, colors_as_uint8=True, binary=binary
+                )
+                f.flush()
+                f.seek(0)
+                actual_data = f.read()
+                reloaded_pointcloud3 = io.load_pointcloud(f.name)
+            self.assertClose(reloaded_pointcloud3.features_list()[0], features)
+            self.assertIn(b"property uchar green", actual_data)
+
+            # Test the load-save cycle leaves file completely unchanged
+            with NamedTemporaryFile(mode="rb", suffix=".ply") as f:
+                io.save_pointcloud(
+                    data=reloaded_pointcloud3,
+                    path=f.name,
+                    binary=binary,
+                    colors_as_uint8=True,
+                )
+                f.flush()
+                f.seek(0)
+                data2 = f.read()
+                self.assertEqual(data2, actual_data)
+
+    def test_load_pointcloud_bad_order(self):
+        """
+        Ply file with a strange property order
+        """
+        file = "\n".join(
+            [
+                "ply",
+                "format ascii 1.0",
+                "element vertex 1",
+                "property uchar green",
+                "property float x",
+                "property float z",
+                "property uchar red",
+                "property float y",
+                "property uchar blue",
+                "end_header",
+                "1 2 3 4 5 6",
+            ]
+        )
+
+        io = IO()
+        pointcloud_gpu = io.load_pointcloud(StringIO(file), device="cuda:0")
+        self.assertEqual(pointcloud_gpu.device, torch.device("cuda:0"))
+        pointcloud = pointcloud_gpu.to(torch.device("cpu"))
+        expected_points = torch.tensor([[[2, 5, 3]]], dtype=torch.float32)
+        expected_features = torch.tensor([[[4, 1, 6]]], dtype=torch.float32) / 255.0
+        self.assertClose(pointcloud.points_padded(), expected_points)
+        self.assertClose(pointcloud.features_padded(), expected_features)
+
+    def test_load_simple_binary(self):
+        for big_endian in [True, False]:
+            verts = (
+                "0 0 0 " "0 0 1 " "0 1 1 " "0 1 0 " "1 0 0 " "1 0 1 " "1 1 1 " "1 1 0"
+            ).split()
+            faces = (
+                "4 0 1 2 3 "
+                "4 7 6 5 4 "
+                "4 0 4 5 1 "
+                "4 1 5 6 2 "
+                "4 2 6 7 3 "
+                "4 3 7 4 0 "  # end of first 6
+                "4 0 1 2 3 "
+                "4 7 6 5 4 "
+                "3 4 5 1"
+            ).split()
+            short_one = b"\00\01" if big_endian else b"\01\00"
+            mixed_data = b"\00\00" b"\03\03" + (short_one + b"\00\01\01\01" b"\00\02")
+            minus_one_data = b"\xff" * 14
+            endian_char = ">" if big_endian else "<"
+            format = (
+                "format binary_big_endian 1.0"
+                if big_endian
+                else "format binary_little_endian 1.0"
+            )
+            vertex_pattern = endian_char + "24f"
+            vertex_data = struct.pack(vertex_pattern, *map(float, verts))
+            vertex1_pattern = endian_char + "fdffdffdffdffdffdffdffdf"
+            vertex1_data = struct.pack(vertex1_pattern, *map(float, verts))
+            face_char_pattern = endian_char + "44b"
+            face_char_data = struct.pack(face_char_pattern, *map(int, faces))
+            header = "\n".join(
+                [
+                    "ply",
+                    format,
+                    "element vertex 8",
+                    "property float x",
+                    "property float32 y",
+                    "property float z",
+                    "element vertex1 8",
+                    "property float x",
+                    "property double y",
+                    "property float z",
+                    "element face 6",
+                    "property list uchar uchar vertex_index",
+                    "element irregular_list 3",
+                    "property list uchar uchar vertex_index",
+                    "element mixed 2",
+                    "property list short uint foo",
+                    "property short bar",
+                    "element minus_ones 1",
+                    "property char 1",
+                    "property uchar 2",
+                    "property short 3",
+                    "property ushort 4",
+                    "property int 5",
+                    "property uint 6",
+                    "end_header\n",
+                ]
+            )
+            ply_file = b"".join(
+                [
+                    header.encode("ascii"),
+                    vertex_data,
+                    vertex1_data,
+                    face_char_data,
+                    mixed_data,
+                    minus_one_data,
+                ]
+            )
+            metadata, data = _load_ply_raw(BytesIO(ply_file))
+            self.assertFalse(metadata.ascii)
+            self.assertEqual(len(data), 6)
+            self.assertTupleEqual(data["face"].shape, (6, 4))
+            self.assertClose([0, 1, 2, 3], data["face"][0])
+            self.assertClose([3, 7, 4, 0], data["face"][5])
+
+            [vertex0] = data["vertex"]
+            self.assertTupleEqual(vertex0.shape, (8, 3))
+            self.assertEqual(len(data["vertex1"]), 3)
+            self.assertClose(vertex0, np.column_stack(data["vertex1"]))
+            self.assertClose(vertex0.flatten(), list(map(float, verts)))
+
+            irregular = data["irregular_list"]
+            self.assertEqual(len(irregular), 3)
+            self.assertEqual(type(irregular), list)
+            [x] = irregular[0]
+            self.assertClose(x, [0, 1, 2, 3])
+            [x] = irregular[1]
+            self.assertClose(x, [7, 6, 5, 4])
+            [x] = irregular[2]
+            self.assertClose(x, [4, 5, 1])
+
+            mixed = data["mixed"]
+            self.assertEqual(len(mixed), 2)
+            self.assertEqual(len(mixed[0]), 2)
+            self.assertEqual(len(mixed[1]), 2)
+            self.assertEqual(mixed[0][1], 3 * 256 + 3)
+            self.assertEqual(len(mixed[0][0]), 0)
+            self.assertEqual(mixed[1][1], (2 if big_endian else 2 * 256))
+            base = 1 + 256 + 256 * 256
+            self.assertEqual(len(mixed[1][0]), 1)
+            self.assertEqual(mixed[1][0][0], base if big_endian else 256 * base)
+
+            self.assertListEqual(
+                data["minus_ones"], [-1, 255, -1, 65535, -1, 4294967295]
+            )
+
+    def test_load_uvs(self):
+        io = IO()
+        mesh = io.load_mesh(DATA_DIR / "uvs.ply")
+        self.assertEqual(mesh.textures.verts_uvs_padded().shape, (1, 8, 2))
+        self.assertClose(
+            mesh.textures.verts_uvs_padded()[0],
+            torch.tensor([[0, 0]] + [[0.2, 0.3]] * 6 + [[0.4, 0.5]]),
+        )
+        self.assertEqual(
+            mesh.textures.faces_uvs_padded().shape, mesh.faces_padded().shape
+        )
+        self.assertEqual(mesh.textures.maps_padded().shape, (1, 512, 512, 3))
+
+    def test_bad_ply_syntax(self):
+        """Some syntactically bad ply files."""
+        lines = [
+            "ply",
+            "format ascii 1.0",
+            "comment dashfadskfj;k",
+            "element vertex 1",
+            "property float x",
+            "element listy 1",
+            "property list uint int x",
+            "end_header",
+            "0",
+            "0",
+        ]
+        lines2 = lines.copy()
+        # this is ok
+        _load_ply_raw(StringIO("\n".join(lines2)))
+
+        lines2 = lines.copy()
+        lines2[0] = "PLY"
+        with self.assertRaisesRegex(ValueError, "Invalid file header."):
+            _load_ply_raw(StringIO("\n".join(lines2)))
+
+        lines2 = lines.copy()
+        lines2[2] = "#this is a comment"
+        with self.assertRaisesRegex(ValueError, "Invalid line.*"):
+            _load_ply_raw(StringIO("\n".join(lines2)))
+
+        lines2 = lines.copy()
+        lines2[3] = lines[4]
+        lines2[4] = lines[3]
+        with self.assertRaisesRegex(
+            ValueError, "Encountered property before any element."
+        ):
+            _load_ply_raw(StringIO("\n".join(lines2)))
+
+        lines2 = lines.copy()
+        lines2[8] = "1 2"
+        with self.assertRaisesRegex(ValueError, "Inconsistent data for vertex."):
+            _load_ply_raw(StringIO("\n".join(lines2)))
+
+        lines2 = lines[:-1]
+        with self.assertRaisesRegex(ValueError, "Not enough data for listy."):
+            _load_ply_raw(StringIO("\n".join(lines2)))
+
+        lines2 = lines.copy()
+        lines2[5] = "element listy 2"
+        with self.assertRaisesRegex(ValueError, "Not enough data for listy."):
+            _load_ply_raw(StringIO("\n".join(lines2)))
+
+        lines2 = lines.copy()
+        lines2.insert(4, "property short x")
+        with self.assertRaisesRegex(
+            ValueError, "Cannot have two properties called x in vertex."
+        ):
+            _load_ply_raw(StringIO("\n".join(lines2)))
+
+        lines2 = lines.copy()
+        lines2.insert(4, "property zz short")
+        with self.assertRaisesRegex(ValueError, "Invalid datatype: zz"):
+            _load_ply_raw(StringIO("\n".join(lines2)))
+
+        lines2 = lines.copy()
+        lines2.append("3")
+        with self.assertRaisesRegex(ValueError, "Extra data at end of file."):
+            _load_ply_raw(StringIO("\n".join(lines2)))
+
+        lines2 = lines.copy()
+        lines2.append("comment foo")
+        with self.assertRaisesRegex(ValueError, "Extra data at end of file."):
+            _load_ply_raw(StringIO("\n".join(lines2)))
+
+        lines2 = lines.copy()
+        lines2.insert(4, "element bad 1")
+        with self.assertRaisesRegex(ValueError, "Found an element with no properties."):
+            _load_ply_raw(StringIO("\n".join(lines2)))
+
+        lines2 = lines.copy()
+        lines2[-1] = "3 2 3 3"
+        _load_ply_raw(StringIO("\n".join(lines2)))
+
+        lines2 = lines.copy()
+        lines2[-1] = "3 1 2 3 4"
+        msg = "A line of listy data did not have the specified length."
+        with self.assertRaisesRegex(ValueError, msg):
+            _load_ply_raw(StringIO("\n".join(lines2)))
+
+        lines2 = lines.copy()
+        lines2[3] = "element vertex one"
+        msg = "Number of items for vertex was not a number."
+        with self.assertRaisesRegex(ValueError, msg):
+            _load_ply_raw(StringIO("\n".join(lines2)))
+
+        # Heterogeneous cases
+        lines2 = lines.copy()
+        lines2.insert(4, "property double y")
+
+        with self.assertRaisesRegex(ValueError, "Inconsistent data for vertex."):
+            _load_ply_raw(StringIO("\n".join(lines2)))
+
+        lines2[-2] = "3.3 4.2"
+        _load_ply_raw(StringIO("\n".join(lines2)))
+
+        lines2[-2] = "3.3 4.3 2"
+        with self.assertRaisesRegex(ValueError, "Inconsistent data for vertex."):
+            _load_ply_raw(StringIO("\n".join(lines2)))
+
+        with self.assertRaisesRegex(ValueError, "Invalid vertices in file."):
+            load_ply(StringIO("\n".join(lines)))
+
+        lines2 = lines.copy()
+        lines2[5] = "element face 1"
+        with self.assertRaisesRegex(ValueError, "Invalid vertices in file."):
+            load_ply(StringIO("\n".join(lines2)))
+
+        lines2.insert(5, "property float z")
+        lines2.insert(5, "property float y")
+        lines2[-2] = "0 0 0"
+        lines2[-1] = ""
+        with self.assertRaisesRegex(ValueError, "Not enough data for face."):
+            load_ply(StringIO("\n".join(lines2)))
+
+        lines2[-1] = "2 0 0"
+        with self.assertRaisesRegex(ValueError, "Faces must have at least 3 vertices."):
+            load_ply(StringIO("\n".join(lines2)))
+
+        # Good one
+        lines2[-1] = "3 0 0 0"
+        load_ply(StringIO("\n".join(lines2)))
+
+    @staticmethod
+    def _bm_save_ply(verts: torch.Tensor, faces: torch.Tensor, decimal_places: int):
+        return lambda: save_ply(
+            BytesIO(),
+            verts=verts,
+            faces=faces,
+            ascii=True,
+            decimal_places=decimal_places,
+        )
+
+    @staticmethod
+    def _bm_load_ply(verts: torch.Tensor, faces: torch.Tensor, decimal_places: int):
+        f = BytesIO()
+        save_ply(f, verts=verts, faces=faces, ascii=True, decimal_places=decimal_places)
+        s = f.getvalue()
+        # Recreate stream so it's unaffected by how it was created.
+        return lambda: load_ply(BytesIO(s))
+
+    @staticmethod
+    def bm_save_simple_ply_with_init(V: int, F: int):
+        verts = torch.tensor(V * [[0.11, 0.22, 0.33]]).view(-1, 3)
+        faces = torch.tensor(F * [[0, 1, 2]]).view(-1, 3)
+        return TestMeshPlyIO._bm_save_ply(verts, faces, decimal_places=2)
+
+    @staticmethod
+    def bm_load_simple_ply_with_init(V: int, F: int):
+        verts = torch.tensor([[0.1, 0.2, 0.3]]).expand(V, 3)
+        faces = torch.tensor([[0, 1, 2]], dtype=torch.int64).expand(F, 3)
+        return TestMeshPlyIO._bm_load_ply(verts, faces, decimal_places=2)
+
+    @staticmethod
+    def bm_save_complex_ply(N: int):
+        meshes = torus(r=0.25, R=1.0, sides=N, rings=2 * N)
+        [verts], [faces] = meshes.verts_list(), meshes.faces_list()
+        return TestMeshPlyIO._bm_save_ply(verts, faces, decimal_places=5)
+
+    @staticmethod
+    def bm_load_complex_ply(N: int):
+        meshes = torus(r=0.25, R=1.0, sides=N, rings=2 * N)
+        [verts], [faces] = meshes.verts_list(), meshes.faces_list()
+        return TestMeshPlyIO._bm_load_ply(verts, faces, decimal_places=5)
diff --git a/pytorch3d/tests/test_iou_box3d.py b/pytorch3d/tests/test_iou_box3d.py
new file mode 100644
index 0000000000000000000000000000000000000000..f5f2b9e4483974936b3147743177add42f142f58
--- /dev/null
+++ b/pytorch3d/tests/test_iou_box3d.py
@@ -0,0 +1,1642 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import pickle
+import random
+import unittest
+from typing import List, Tuple, Union
+
+import torch
+import torch.nn.functional as F
+from pytorch3d.io import save_obj
+from pytorch3d.ops.iou_box3d import _box_planes, _box_triangles, box3d_overlap
+from pytorch3d.transforms.rotation_conversions import random_rotation
+
+from .common_testing import get_random_cuda_device, get_tests_dir, TestCaseMixin
+
+
+OBJECTRON_TO_PYTORCH3D_FACE_IDX = [0, 4, 6, 2, 1, 5, 7, 3]
+DATA_DIR = get_tests_dir() / "data"
+DEBUG = False
+DOT_EPS = 1e-3
+AREA_EPS = 1e-4
+
+UNIT_BOX = [
+    [0, 0, 0],
+    [1, 0, 0],
+    [1, 1, 0],
+    [0, 1, 0],
+    [0, 0, 1],
+    [1, 0, 1],
+    [1, 1, 1],
+    [0, 1, 1],
+]
+
+
+class TestIoU3D(TestCaseMixin, unittest.TestCase):
+    def setUp(self) -> None:
+        super().setUp()
+        torch.manual_seed(1)
+
+    @staticmethod
+    def create_box(xyz, whl):
+        x, y, z = xyz
+        w, h, le = whl
+
+        verts = torch.tensor(
+            [
+                [x - w / 2.0, y - h / 2.0, z - le / 2.0],
+                [x + w / 2.0, y - h / 2.0, z - le / 2.0],
+                [x + w / 2.0, y + h / 2.0, z - le / 2.0],
+                [x - w / 2.0, y + h / 2.0, z - le / 2.0],
+                [x - w / 2.0, y - h / 2.0, z + le / 2.0],
+                [x + w / 2.0, y - h / 2.0, z + le / 2.0],
+                [x + w / 2.0, y + h / 2.0, z + le / 2.0],
+                [x - w / 2.0, y + h / 2.0, z + le / 2.0],
+            ],
+            device=xyz.device,
+            dtype=torch.float32,
+        )
+        return verts
+
+    @staticmethod
+    def _box3d_overlap_naive_batched(boxes1, boxes2):
+        """
+        Wrapper around box3d_overlap_naive to support
+        batched input
+        """
+        N = boxes1.shape[0]
+        M = boxes2.shape[0]
+        vols = torch.zeros((N, M), dtype=torch.float32, device=boxes1.device)
+        ious = torch.zeros((N, M), dtype=torch.float32, device=boxes1.device)
+        for n in range(N):
+            for m in range(M):
+                vol, iou = box3d_overlap_naive(boxes1[n], boxes2[m])
+                vols[n, m] = vol
+                ious[n, m] = iou
+        return vols, ious
+
+    @staticmethod
+    def _box3d_overlap_sampling_batched(boxes1, boxes2, num_samples: int):
+        """
+        Wrapper around box3d_overlap_sampling to support
+        batched input
+        """
+        N = boxes1.shape[0]
+        M = boxes2.shape[0]
+        ious = torch.zeros((N, M), dtype=torch.float32, device=boxes1.device)
+        for n in range(N):
+            for m in range(M):
+                iou = box3d_overlap_sampling(boxes1[n], boxes2[m])
+                ious[n, m] = iou
+        return ious
+
+    def _test_iou(self, overlap_fn, device):
+
+        box1 = torch.tensor(
+            UNIT_BOX,
+            dtype=torch.float32,
+            device=device,
+        )
+
+        # 1st test: same box, iou = 1.0
+        vol, iou = overlap_fn(box1[None], box1[None])
+        self.assertClose(vol, torch.tensor([[1.0]], device=vol.device, dtype=vol.dtype))
+        self.assertClose(iou, torch.tensor([[1.0]], device=vol.device, dtype=vol.dtype))
+
+        # 2nd test
+        dd = random.random()
+        box2 = box1 + torch.tensor([[0.0, dd, 0.0]], device=device)
+        vol, iou = overlap_fn(box1[None], box2[None])
+        self.assertClose(
+            vol, torch.tensor([[1 - dd]], device=vol.device, dtype=vol.dtype)
+        )
+        # symmetry
+        vol, iou = overlap_fn(box2[None], box1[None])
+        self.assertClose(
+            vol, torch.tensor([[1 - dd]], device=vol.device, dtype=vol.dtype)
+        )
+
+        # 3rd test
+        dd = random.random()
+        box2 = box1 + torch.tensor([[dd, 0.0, 0.0]], device=device)
+        vol, _ = overlap_fn(box1[None], box2[None])
+        self.assertClose(
+            vol, torch.tensor([[1 - dd]], device=vol.device, dtype=vol.dtype)
+        )
+        # symmetry
+        vol, _ = overlap_fn(box2[None], box1[None])
+        self.assertClose(
+            vol, torch.tensor([[1 - dd]], device=vol.device, dtype=vol.dtype)
+        )
+
+        # 4th test
+        ddx, ddy, ddz = random.random(), random.random(), random.random()
+        box2 = box1 + torch.tensor([[ddx, ddy, ddz]], device=device)
+        vol, _ = overlap_fn(box1[None], box2[None])
+        self.assertClose(
+            vol,
+            torch.tensor(
+                [[(1 - ddx) * (1 - ddy) * (1 - ddz)]],
+                device=vol.device,
+                dtype=vol.dtype,
+            ),
+        )
+        # symmetry
+        vol, _ = overlap_fn(box2[None], box1[None])
+        self.assertClose(
+            vol,
+            torch.tensor(
+                [[(1 - ddx) * (1 - ddy) * (1 - ddz)]],
+                device=vol.device,
+                dtype=vol.dtype,
+            ),
+        )
+
+        # Also check IoU is 1 when computing overlap with the same shifted box
+        vol, iou = overlap_fn(box2[None], box2[None])
+        self.assertClose(iou, torch.tensor([[1.0]], device=vol.device, dtype=vol.dtype))
+
+        # 5th test
+        ddx, ddy, ddz = random.random(), random.random(), random.random()
+        box2 = box1 + torch.tensor([[ddx, ddy, ddz]], device=device)
+        RR = random_rotation(dtype=torch.float32, device=device)
+        box1r = box1 @ RR.transpose(0, 1)
+        box2r = box2 @ RR.transpose(0, 1)
+        vol, _ = overlap_fn(box1r[None], box2r[None])
+        self.assertClose(
+            vol,
+            torch.tensor(
+                [[(1 - ddx) * (1 - ddy) * (1 - ddz)]],
+                device=vol.device,
+                dtype=vol.dtype,
+            ),
+        )
+        # symmetry
+        vol, _ = overlap_fn(box2r[None], box1r[None])
+        self.assertClose(
+            vol,
+            torch.tensor(
+                [[(1 - ddx) * (1 - ddy) * (1 - ddz)]],
+                device=vol.device,
+                dtype=vol.dtype,
+            ),
+        )
+
+        # 6th test
+        ddx, ddy, ddz = random.random(), random.random(), random.random()
+        box2 = box1 + torch.tensor([[ddx, ddy, ddz]], device=device)
+        RR = random_rotation(dtype=torch.float32, device=device)
+        TT = torch.rand((1, 3), dtype=torch.float32, device=device)
+        box1r = box1 @ RR.transpose(0, 1) + TT
+        box2r = box2 @ RR.transpose(0, 1) + TT
+        vol, _ = overlap_fn(box1r[None], box2r[None])
+        self.assertClose(
+            vol,
+            torch.tensor(
+                [[(1 - ddx) * (1 - ddy) * (1 - ddz)]],
+                device=vol.device,
+                dtype=vol.dtype,
+            ),
+            atol=1e-7,
+        )
+        # symmetry
+        vol, _ = overlap_fn(box2r[None], box1r[None])
+        self.assertClose(
+            vol,
+            torch.tensor(
+                [[(1 - ddx) * (1 - ddy) * (1 - ddz)]],
+                device=vol.device,
+                dtype=vol.dtype,
+            ),
+            atol=1e-7,
+        )
+
+        # 7th test: hand coded example and test with meshlab output
+
+        # Meshlab procedure to compute volumes of shapes
+        # 1. Load a shape, then Filters
+        #       -> Remeshing, Simplification, Reconstruction -> Convex Hull
+        # 2. Select the convex hull shape (This is important!)
+        # 3. Then Filters -> Quality Measure and Computation -> Compute Geometric Measures
+        # 3. Check for "Mesh Volume" in the stdout
+        box1r = torch.tensor(
+            [
+                [3.1673, -2.2574, 0.4817],
+                [4.6470, 0.2223, 2.4197],
+                [5.2200, 1.1844, 0.7510],
+                [3.7403, -1.2953, -1.1869],
+                [-4.9316, 2.5724, 0.4856],
+                [-3.4519, 5.0521, 2.4235],
+                [-2.8789, 6.0142, 0.7549],
+                [-4.3586, 3.5345, -1.1831],
+            ],
+            device=device,
+        )
+        box2r = torch.tensor(
+            [
+                [0.5623, 4.0647, 3.4334],
+                [3.3584, 4.3191, 1.1791],
+                [3.0724, -5.9235, -0.3315],
+                [0.2763, -6.1779, 1.9229],
+                [-2.0773, 4.6121, 0.2213],
+                [0.7188, 4.8665, -2.0331],
+                [0.4328, -5.3761, -3.5436],
+                [-2.3633, -5.6305, -1.2893],
+            ],
+            device=device,
+        )
+        # from Meshlab:
+        vol_inters = 33.558529
+        vol_box1 = 65.899010
+        vol_box2 = 156.386719
+        iou_mesh = vol_inters / (vol_box1 + vol_box2 - vol_inters)
+
+        vol, iou = overlap_fn(box1r[None], box2r[None])
+        self.assertClose(vol, torch.tensor([[vol_inters]], device=device), atol=1e-1)
+        self.assertClose(iou, torch.tensor([[iou_mesh]], device=device), atol=1e-1)
+        # symmetry
+        vol, iou = overlap_fn(box2r[None], box1r[None])
+        self.assertClose(vol, torch.tensor([[vol_inters]], device=device), atol=1e-1)
+        self.assertClose(iou, torch.tensor([[iou_mesh]], device=device), atol=1e-1)
+
+        # 8th test: compare with sampling
+        # create box1
+        ctrs = torch.rand((2, 3), device=device)
+        whl = torch.rand((2, 3), device=device) * 10.0 + 1.0
+        # box8a & box8b
+        box8a = self.create_box(ctrs[0], whl[0])
+        box8b = self.create_box(ctrs[1], whl[1])
+        RR1 = random_rotation(dtype=torch.float32, device=device)
+        TT1 = torch.rand((1, 3), dtype=torch.float32, device=device)
+        RR2 = random_rotation(dtype=torch.float32, device=device)
+        TT2 = torch.rand((1, 3), dtype=torch.float32, device=device)
+        box1r = box8a @ RR1.transpose(0, 1) + TT1
+        box2r = box8b @ RR2.transpose(0, 1) + TT2
+        vol, iou = overlap_fn(box1r[None], box2r[None])
+        iou_sampling = self._box3d_overlap_sampling_batched(
+            box1r[None], box2r[None], num_samples=10000
+        )
+        self.assertClose(iou, iou_sampling, atol=1e-2)
+        # symmetry
+        vol, iou = overlap_fn(box2r[None], box1r[None])
+        self.assertClose(iou, iou_sampling, atol=1e-2)
+
+        # 9th test: non overlapping boxes, iou = 0.0
+        box2 = box1 + torch.tensor([[0.0, 100.0, 0.0]], device=device)
+        vol, iou = overlap_fn(box1[None], box2[None])
+        self.assertClose(vol, torch.tensor([[0.0]], device=vol.device, dtype=vol.dtype))
+        self.assertClose(iou, torch.tensor([[0.0]], device=vol.device, dtype=vol.dtype))
+        # symmetry
+        vol, iou = overlap_fn(box2[None], box1[None])
+        self.assertClose(vol, torch.tensor([[0.0]], device=vol.device, dtype=vol.dtype))
+        self.assertClose(iou, torch.tensor([[0.0]], device=vol.device, dtype=vol.dtype))
+
+        # 10th test: Non coplanar verts in a plane
+        box10 = box1 + torch.rand((8, 3), dtype=torch.float32, device=device)
+        msg = "Plane vertices are not coplanar"
+        with self.assertRaisesRegex(ValueError, msg):
+            overlap_fn(box10[None], box10[None])
+
+        # 11th test: Skewed bounding boxes but all verts are coplanar
+        box_skew_1 = torch.tensor(
+            [
+                [0, 0, 0],
+                [1, 0, 0],
+                [1, 1, 0],
+                [0, 1, 0],
+                [-2, -2, 2],
+                [2, -2, 2],
+                [2, 2, 2],
+                [-2, 2, 2],
+            ],
+            dtype=torch.float32,
+            device=device,
+        )
+        box_skew_2 = torch.tensor(
+            [
+                [2.015995, 0.695233, 2.152806],
+                [2.832533, 0.663448, 1.576389],
+                [2.675445, -0.309592, 1.407520],
+                [1.858907, -0.277806, 1.983936],
+                [-0.413922, 3.161758, 2.044343],
+                [2.852230, 3.034615, -0.261321],
+                [2.223878, -0.857545, -0.936800],
+                [-1.042273, -0.730402, 1.368864],
+            ],
+            dtype=torch.float32,
+            device=device,
+        )
+        vol1 = 14.000
+        vol2 = 14.000005
+        vol_inters = 5.431122
+        iou = vol_inters / (vol1 + vol2 - vol_inters)
+
+        vols, ious = overlap_fn(box_skew_1[None], box_skew_2[None])
+        self.assertClose(vols, torch.tensor([[vol_inters]], device=device), atol=1e-1)
+        self.assertClose(ious, torch.tensor([[iou]], device=device), atol=1e-1)
+        # symmetry
+        vols, ious = overlap_fn(box_skew_2[None], box_skew_1[None])
+        self.assertClose(vols, torch.tensor([[vol_inters]], device=device), atol=1e-1)
+        self.assertClose(ious, torch.tensor([[iou]], device=device), atol=1e-1)
+
+        # 12th test: Zero area bounding box (from GH issue #992)
+        box12a = torch.tensor(
+            [
+                [-1.0000, -1.0000, -0.5000],
+                [1.0000, -1.0000, -0.5000],
+                [1.0000, 1.0000, -0.5000],
+                [-1.0000, 1.0000, -0.5000],
+                [-1.0000, -1.0000, 0.5000],
+                [1.0000, -1.0000, 0.5000],
+                [1.0000, 1.0000, 0.5000],
+                [-1.0000, 1.0000, 0.5000],
+            ],
+            device=device,
+            dtype=torch.float32,
+        )
+
+        box12b = torch.tensor(
+            [
+                [0.0, 0.0, 0.0],
+                [0.0, 0.0, 0.0],
+                [0.0, 0.0, 0.0],
+                [0.0, 0.0, 0.0],
+                [0.0, 0.0, 0.0],
+                [0.0, 0.0, 0.0],
+                [0.0, 0.0, 0.0],
+                [0.0, 0.0, 0.0],
+            ],
+            device=device,
+            dtype=torch.float32,
+        )
+        msg = "Planes have zero areas"
+        with self.assertRaisesRegex(ValueError, msg):
+            overlap_fn(box12a[None], box12b[None])
+        # symmetry
+        with self.assertRaisesRegex(ValueError, msg):
+            overlap_fn(box12b[None], box12a[None])
+
+        # 13th test: From GH issue #992
+        # Zero area coplanar face after intersection
+        ctrs = torch.tensor([[0.0, 0.0, 0.0], [-1.0, 1.0, 0.0]])
+        whl = torch.tensor([[2.0, 2.0, 2.0], [2.0, 2, 2]])
+        box13a = TestIoU3D.create_box(ctrs[0], whl[0])
+        box13b = TestIoU3D.create_box(ctrs[1], whl[1])
+        vol, iou = overlap_fn(box13a[None], box13b[None])
+        self.assertClose(vol, torch.tensor([[2.0]], device=vol.device, dtype=vol.dtype))
+
+        # 14th test: From GH issue #992
+        # Random rotation, same boxes, iou should be 1.0
+        corners = (
+            torch.tensor(
+                [
+                    [-1.0, -1.0, -1.0],
+                    [1.0, -1.0, -1.0],
+                    [1.0, 1.0, -1.0],
+                    [-1.0, 1.0, -1.0],
+                    [-1.0, -1.0, 1.0],
+                    [1.0, -1.0, 1.0],
+                    [1.0, 1.0, 1.0],
+                    [-1.0, 1.0, 1.0],
+                ],
+                device=device,
+                dtype=torch.float32,
+            )
+            * 0.5
+        )
+        yaw = torch.tensor(0.185)
+        Rot = torch.tensor(
+            [
+                [torch.cos(yaw), 0.0, torch.sin(yaw)],
+                [0.0, 1.0, 0.0],
+                [-torch.sin(yaw), 0.0, torch.cos(yaw)],
+            ],
+            dtype=torch.float32,
+            device=device,
+        )
+        corners = (Rot.mm(corners.t())).t()
+        vol, iou = overlap_fn(corners[None], corners[None])
+        self.assertClose(
+            iou, torch.tensor([[1.0]], device=vol.device, dtype=vol.dtype), atol=1e-2
+        )
+
+        # 15th test: From GH issue #1082
+        box15a = torch.tensor(
+            [
+                [-2.5629019, 4.13995749, -1.76344576],
+                [1.92329434, 4.28127117, -1.86155124],
+                [1.86994571, 5.97489644, -1.86155124],
+                [-2.61625053, 5.83358276, -1.76344576],
+                [-2.53123587, 4.14095496, -0.31397536],
+                [1.95496037, 4.28226864, -0.41208084],
+                [1.90161174, 5.97589391, -0.41208084],
+                [-2.5845845, 5.83458023, -0.31397536],
+            ],
+            device=device,
+            dtype=torch.float32,
+        )
+
+        box15b = torch.tensor(
+            [
+                [-2.6256125, 4.13036357, -1.82893437],
+                [1.87201008, 4.25296695, -1.82893437],
+                [1.82562476, 5.95458116, -1.82893437],
+                [-2.67199782, 5.83197777, -1.82893437],
+                [-2.6256125, 4.13036357, -0.40095884],
+                [1.87201008, 4.25296695, -0.40095884],
+                [1.82562476, 5.95458116, -0.40095884],
+                [-2.67199782, 5.83197777, -0.40095884],
+            ],
+            device=device,
+            dtype=torch.float32,
+        )
+        vol, iou = overlap_fn(box15a[None], box15b[None])
+        self.assertClose(
+            iou, torch.tensor([[0.91]], device=vol.device, dtype=vol.dtype), atol=1e-2
+        )
+        # symmetry
+        vol, iou = overlap_fn(box15b[None], box15a[None])
+        self.assertClose(
+            iou, torch.tensor([[0.91]], device=vol.device, dtype=vol.dtype), atol=1e-2
+        )
+
+        # 16th test: From GH issue 1287
+        box16a = torch.tensor(
+            [
+                [-167.5847, -70.6167, -2.7927],
+                [-166.7333, -72.4264, -2.7927],
+                [-166.7333, -72.4264, -4.5927],
+                [-167.5847, -70.6167, -4.5927],
+                [-163.0605, -68.4880, -2.7927],
+                [-162.2090, -70.2977, -2.7927],
+                [-162.2090, -70.2977, -4.5927],
+                [-163.0605, -68.4880, -4.5927],
+            ],
+            device=device,
+            dtype=torch.float32,
+        )
+
+        box16b = torch.tensor(
+            [
+                [-167.5847, -70.6167, -2.7927],
+                [-166.7333, -72.4264, -2.7927],
+                [-166.7333, -72.4264, -4.5927],
+                [-167.5847, -70.6167, -4.5927],
+                [-163.0605, -68.4880, -2.7927],
+                [-162.2090, -70.2977, -2.7927],
+                [-162.2090, -70.2977, -4.5927],
+                [-163.0605, -68.4880, -4.5927],
+            ],
+            device=device,
+            dtype=torch.float32,
+        )
+        vol, iou = overlap_fn(box16a[None], box16b[None])
+        self.assertClose(
+            iou, torch.tensor([[1.0]], device=vol.device, dtype=vol.dtype), atol=1e-2
+        )
+        # symmetry
+        vol, iou = overlap_fn(box16b[None], box16a[None])
+        self.assertClose(
+            iou, torch.tensor([[1.0]], device=vol.device, dtype=vol.dtype), atol=1e-2
+        )
+
+        # 17th test: From GH issue 1287
+        box17a = torch.tensor(
+            [
+                [-33.94158, -4.51639, 0.96941],
+                [-34.67156, -2.65437, 0.96941],
+                [-34.67156, -2.65437, -0.95367],
+                [-33.94158, -4.51639, -0.95367],
+                [-38.75954, -6.40521, 0.96941],
+                [-39.48952, -4.54319, 0.96941],
+                [-39.48952, -4.54319, -0.95367],
+                [-38.75954, -6.40521, -0.95367],
+            ],
+            device=device,
+            dtype=torch.float32,
+        )
+
+        box17b = torch.tensor(
+            [
+                [-33.94159, -4.51638, 0.96939],
+                [-34.67158, -2.65437, 0.96939],
+                [-34.67158, -2.65437, -0.95368],
+                [-33.94159, -4.51638, -0.95368],
+                [-38.75954, -6.40523, 0.96939],
+                [-39.48953, -4.54321, 0.96939],
+                [-39.48953, -4.54321, -0.95368],
+                [-38.75954, -6.40523, -0.95368],
+            ],
+            device=device,
+            dtype=torch.float32,
+        )
+        vol, iou = overlap_fn(box17a[None], box17b[None])
+        self.assertClose(
+            iou, torch.tensor([[1.0]], device=vol.device, dtype=vol.dtype), atol=1e-2
+        )
+        # symmetry
+        vol, iou = overlap_fn(box17b[None], box17a[None])
+        self.assertClose(
+            iou, torch.tensor([[1.0]], device=vol.device, dtype=vol.dtype), atol=1e-2
+        )
+
+        # 18th test: From GH issue 1287
+        box18a = torch.tensor(
+            [
+                [-105.6248, -32.7026, -1.2279],
+                [-106.4690, -30.8895, -1.2279],
+                [-106.4690, -30.8895, -3.0279],
+                [-105.6248, -32.7026, -3.0279],
+                [-110.1575, -34.8132, -1.2279],
+                [-111.0017, -33.0001, -1.2279],
+                [-111.0017, -33.0001, -3.0279],
+                [-110.1575, -34.8132, -3.0279],
+            ],
+            device=device,
+            dtype=torch.float32,
+        )
+        box18b = torch.tensor(
+            [
+                [-105.5094, -32.9504, -1.0641],
+                [-106.4272, -30.9793, -1.0641],
+                [-106.4272, -30.9793, -3.1916],
+                [-105.5094, -32.9504, -3.1916],
+                [-110.0421, -35.0609, -1.0641],
+                [-110.9599, -33.0899, -1.0641],
+                [-110.9599, -33.0899, -3.1916],
+                [-110.0421, -35.0609, -3.1916],
+            ],
+            device=device,
+            dtype=torch.float32,
+        )
+        # from Meshlab
+        vol_inters = 17.108501
+        vol_box1 = 18.000067
+        vol_box2 = 23.128527
+        iou_mesh = vol_inters / (vol_box1 + vol_box2 - vol_inters)
+        vol, iou = overlap_fn(box18a[None], box18b[None])
+        self.assertClose(
+            iou,
+            torch.tensor([[iou_mesh]], device=vol.device, dtype=vol.dtype),
+            atol=1e-2,
+        )
+        self.assertClose(
+            vol,
+            torch.tensor([[vol_inters]], device=vol.device, dtype=vol.dtype),
+            atol=1e-2,
+        )
+        # symmetry
+        vol, iou = overlap_fn(box18b[None], box18a[None])
+        self.assertClose(
+            iou,
+            torch.tensor([[iou_mesh]], device=vol.device, dtype=vol.dtype),
+            atol=1e-2,
+        )
+        self.assertClose(
+            vol,
+            torch.tensor([[vol_inters]], device=vol.device, dtype=vol.dtype),
+            atol=1e-2,
+        )
+
+        # 19th example: From GH issue 1287
+        box19a = torch.tensor(
+            [
+                [-59.4785, -15.6003, 0.4398],
+                [-60.2263, -13.6928, 0.4398],
+                [-60.2263, -13.6928, -1.3909],
+                [-59.4785, -15.6003, -1.3909],
+                [-64.1743, -17.4412, 0.4398],
+                [-64.9221, -15.5337, 0.4398],
+                [-64.9221, -15.5337, -1.3909],
+                [-64.1743, -17.4412, -1.3909],
+            ],
+            device=device,
+            dtype=torch.float32,
+        )
+        box19b = torch.tensor(
+            [
+                [-59.4874, -15.5775, -0.1512],
+                [-60.2174, -13.7155, -0.1512],
+                [-60.2174, -13.7155, -1.9820],
+                [-59.4874, -15.5775, -1.9820],
+                [-64.1832, -17.4185, -0.1512],
+                [-64.9132, -15.5564, -0.1512],
+                [-64.9132, -15.5564, -1.9820],
+                [-64.1832, -17.4185, -1.9820],
+            ],
+            device=device,
+            dtype=torch.float32,
+        )
+        # from Meshlab
+        vol_inters = 12.505723
+        vol_box1 = 18.918238
+        vol_box2 = 18.468531
+        iou_mesh = vol_inters / (vol_box1 + vol_box2 - vol_inters)
+        vol, iou = overlap_fn(box19a[None], box19b[None])
+        self.assertClose(
+            iou,
+            torch.tensor([[iou_mesh]], device=vol.device, dtype=vol.dtype),
+            atol=1e-2,
+        )
+        self.assertClose(
+            vol,
+            torch.tensor([[vol_inters]], device=vol.device, dtype=vol.dtype),
+            atol=1e-2,
+        )
+        # symmetry
+        vol, iou = overlap_fn(box19b[None], box19a[None])
+        self.assertClose(
+            iou,
+            torch.tensor([[iou_mesh]], device=vol.device, dtype=vol.dtype),
+            atol=1e-2,
+        )
+        self.assertClose(
+            vol,
+            torch.tensor([[vol_inters]], device=vol.device, dtype=vol.dtype),
+            atol=1e-2,
+        )
+
+    def _test_real_boxes(self, overlap_fn, device):
+        data_filename = "./real_boxes.pkl"
+        with open(DATA_DIR / data_filename, "rb") as f:
+            example = pickle.load(f)
+
+        verts1 = torch.FloatTensor(example["verts1"])
+        verts2 = torch.FloatTensor(example["verts2"])
+        boxes = torch.stack((verts1, verts2)).to(device)
+
+        iou_expected = torch.eye(2).to(device)
+        vol, iou = overlap_fn(boxes, boxes)
+        self.assertClose(iou, iou_expected)
+
+    def test_iou_naive(self):
+        device = get_random_cuda_device()
+        self._test_iou(self._box3d_overlap_naive_batched, device)
+        self._test_compare_objectron(self._box3d_overlap_naive_batched, device)
+        self._test_real_boxes(self._box3d_overlap_naive_batched, device)
+
+    def test_iou_cpu(self):
+        device = torch.device("cpu")
+        self._test_iou(box3d_overlap, device)
+        self._test_compare_objectron(box3d_overlap, device)
+        self._test_real_boxes(box3d_overlap, device)
+
+    def test_iou_cuda(self):
+        device = torch.device("cuda:0")
+        self._test_iou(box3d_overlap, device)
+        self._test_compare_objectron(box3d_overlap, device)
+        self._test_real_boxes(box3d_overlap, device)
+
+    def _test_compare_objectron(self, overlap_fn, device):
+        # Load saved objectron data
+        data_filename = "./objectron_vols_ious.pt"
+        objectron_vals = torch.load(DATA_DIR / data_filename)
+        boxes1 = objectron_vals["boxes1"]
+        boxes2 = objectron_vals["boxes2"]
+        vols_objectron = objectron_vals["vols"]
+        ious_objectron = objectron_vals["ious"]
+
+        boxes1 = boxes1.to(device=device, dtype=torch.float32)
+        boxes2 = boxes2.to(device=device, dtype=torch.float32)
+
+        # Convert vertex orderings from Objectron to PyTorch3D convention
+        idx = torch.tensor(
+            OBJECTRON_TO_PYTORCH3D_FACE_IDX, dtype=torch.int64, device=device
+        )
+        boxes1 = boxes1.index_select(index=idx, dim=1)
+        boxes2 = boxes2.index_select(index=idx, dim=1)
+
+        # Run PyTorch3D version
+        vols, ious = overlap_fn(boxes1, boxes2)
+
+        # Check values match
+        self.assertClose(vols_objectron, vols.cpu())
+        self.assertClose(ious_objectron, ious.cpu())
+
+    def test_batched_errors(self):
+        N, M = 5, 10
+        boxes1 = torch.randn((N, 8, 3))
+        boxes2 = torch.randn((M, 10, 3))
+        with self.assertRaisesRegex(ValueError, "(8, 3)"):
+            box3d_overlap(boxes1, boxes2)
+
+    def test_box_volume(self):
+        device = torch.device("cuda:0")
+        box1 = torch.tensor(
+            [
+                [3.1673, -2.2574, 0.4817],
+                [4.6470, 0.2223, 2.4197],
+                [5.2200, 1.1844, 0.7510],
+                [3.7403, -1.2953, -1.1869],
+                [-4.9316, 2.5724, 0.4856],
+                [-3.4519, 5.0521, 2.4235],
+                [-2.8789, 6.0142, 0.7549],
+                [-4.3586, 3.5345, -1.1831],
+            ],
+            dtype=torch.float32,
+            device=device,
+        )
+        box2 = torch.tensor(
+            [
+                [0.5623, 4.0647, 3.4334],
+                [3.3584, 4.3191, 1.1791],
+                [3.0724, -5.9235, -0.3315],
+                [0.2763, -6.1779, 1.9229],
+                [-2.0773, 4.6121, 0.2213],
+                [0.7188, 4.8665, -2.0331],
+                [0.4328, -5.3761, -3.5436],
+                [-2.3633, -5.6305, -1.2893],
+            ],
+            dtype=torch.float32,
+            device=device,
+        )
+
+        box3 = torch.tensor(
+            [
+                [0, 0, 0],
+                [1, 0, 0],
+                [1, 1, 0],
+                [0, 1, 0],
+                [0, 0, 1],
+                [1, 0, 1],
+                [1, 1, 1],
+                [0, 1, 1],
+            ],
+            dtype=torch.float32,
+            device=device,
+        )
+
+        RR = random_rotation(dtype=torch.float32, device=device)
+        TT = torch.rand((1, 3), dtype=torch.float32, device=device)
+        box4 = box3 @ RR.transpose(0, 1) + TT
+
+        self.assertClose(box_volume(box1).cpu(), torch.tensor(65.899010), atol=1e-3)
+        self.assertClose(box_volume(box2).cpu(), torch.tensor(156.386719), atol=1e-3)
+        self.assertClose(box_volume(box3).cpu(), torch.tensor(1.0), atol=1e-3)
+        self.assertClose(box_volume(box4).cpu(), torch.tensor(1.0), atol=1e-3)
+
+    def test_box_planar_dir(self):
+        device = torch.device("cuda:0")
+        box1 = torch.tensor(
+            UNIT_BOX,
+            dtype=torch.float32,
+            device=device,
+        )
+
+        n1 = torch.tensor(
+            [
+                [0.0, 0.0, 1.0],
+                [0.0, -1.0, 0.0],
+                [0.0, 1.0, 0.0],
+                [1.0, 0.0, 0.0],
+                [-1.0, 0.0, 0.0],
+                [0.0, 0.0, -1.0],
+            ],
+            device=device,
+            dtype=torch.float32,
+        )
+
+        RR = random_rotation(dtype=torch.float32, device=device)
+        TT = torch.rand((1, 3), dtype=torch.float32, device=device)
+        box2 = box1 @ RR.transpose(0, 1) + TT
+        n2 = n1 @ RR.transpose(0, 1)
+
+        self.assertClose(box_planar_dir(box1), n1)
+        self.assertClose(box_planar_dir(box2), n2)
+
+    @staticmethod
+    def iou_naive(N: int, M: int, device="cpu"):
+        box = torch.tensor(
+            [UNIT_BOX],
+            dtype=torch.float32,
+            device=device,
+        )
+        boxes1 = box + torch.randn((N, 1, 3), device=device)
+        boxes2 = box + torch.randn((M, 1, 3), device=device)
+
+        def output():
+            vol, iou = TestIoU3D._box3d_overlap_naive_batched(boxes1, boxes2)
+
+        return output
+
+    @staticmethod
+    def iou(N: int, M: int, device="cpu"):
+        box = torch.tensor(
+            [UNIT_BOX],
+            dtype=torch.float32,
+            device=device,
+        )
+        boxes1 = box + torch.randn((N, 1, 3), device=device)
+        boxes2 = box + torch.randn((M, 1, 3), device=device)
+
+        def output():
+            vol, iou = box3d_overlap(boxes1, boxes2)
+
+        return output
+
+    @staticmethod
+    def iou_sampling(N: int, M: int, num_samples: int, device="cpu"):
+        box = torch.tensor(
+            [UNIT_BOX],
+            dtype=torch.float32,
+            device=device,
+        )
+        boxes1 = box + torch.randn((N, 1, 3), device=device)
+        boxes2 = box + torch.randn((M, 1, 3), device=device)
+
+        def output():
+            _ = TestIoU3D._box3d_overlap_sampling_batched(boxes1, boxes2, num_samples)
+
+        return output
+
+
+# -------------------------------------------------- #
+#                NAIVE IMPLEMENTATION                #
+# -------------------------------------------------- #
+
+"""
+The main functions below are:
+* box3d_overlap_naive: which computes the exact IoU of box1 and box2
+* box3d_overlap_sampling: which computes an approximate IoU of box1 and box2
+    by sampling points within the boxes
+
+Note that both implementations currently do not support batching.
+"""
+# -------------------------------------------------- #
+# Throughout this implementation, we assume that boxes
+# are defined by their 8 corners in the following order
+#
+#        (4) +---------+. (5)
+#            | ` .     |  ` .
+#            | (0) +---+-----+ (1)
+#            |     |   |     |
+#        (7) +-----+---+. (6)|
+#            ` .   |     ` . |
+#            (3) ` +---------+ (2)
+#
+# -------------------------------------------------- #
+
+# -------------------------------------------------- #
+#       HELPER FUNCTIONS FOR EXACT SOLUTION          #
+# -------------------------------------------------- #
+
+
+def get_tri_verts(box: torch.Tensor) -> torch.Tensor:
+    """
+    Return the vertex coordinates forming the triangles of the box.
+    The computation here resembles the Meshes data structure.
+    But since we only want this tiny functionality, we abstract it out.
+    Args:
+        box: tensor of shape (8, 3)
+    Returns:
+        tri_verts: tensor of shape (12, 3, 3)
+    """
+    device = box.device
+    faces = torch.tensor(_box_triangles, device=device, dtype=torch.int64)  # (12, 3)
+    tri_verts = box[faces]  # (12, 3, 3)
+    return tri_verts
+
+
+def get_plane_verts(box: torch.Tensor) -> torch.Tensor:
+    """
+    Return the vertex coordinates forming the planes of the box.
+    The computation here resembles the Meshes data structure.
+    But since we only want this tiny functionality, we abstract it out.
+    Args:
+        box: tensor of shape (8, 3)
+    Returns:
+        plane_verts: tensor of shape (6, 4, 3)
+    """
+    device = box.device
+    faces = torch.tensor(_box_planes, device=device, dtype=torch.int64)  # (6, 4)
+    plane_verts = box[faces]  # (6, 4, 3)
+    return plane_verts
+
+
+def get_tri_center_normal(tris: torch.Tensor) -> torch.Tensor:
+    """
+    Returns the center and normal of triangles
+    Args:
+        tris: tensor of shape (T, 3, 3)
+    Returns:
+        center: tensor of shape (T, 3)
+        normal: tensor of shape (T, 3)
+    """
+    add_dim0 = False
+    if tris.ndim == 2:
+        tris = tris.unsqueeze(0)
+        add_dim0 = True
+
+    ctr = tris.mean(1)  # (T, 3)
+    normals = torch.zeros_like(ctr)
+
+    v0, v1, v2 = tris.unbind(1)  # 3 x (T, 3)
+
+    # unvectorized solution
+    T = tris.shape[0]
+    for t in range(T):
+        ns = torch.zeros((3, 3), device=tris.device)
+        ns[0] = torch.cross(v0[t] - ctr[t], v1[t] - ctr[t], dim=-1)
+        ns[1] = torch.cross(v0[t] - ctr[t], v2[t] - ctr[t], dim=-1)
+        ns[2] = torch.cross(v1[t] - ctr[t], v2[t] - ctr[t], dim=-1)
+
+        i = torch.norm(ns, dim=-1).argmax()
+        normals[t] = ns[i]
+
+    if add_dim0:
+        ctr = ctr[0]
+        normals = normals[0]
+    normals = F.normalize(normals, dim=-1)
+    return ctr, normals
+
+
+def get_plane_center_normal(planes: torch.Tensor) -> torch.Tensor:
+    """
+    Returns the center and normal of planes
+    Args:
+        planes: tensor of shape (P, 4, 3)
+    Returns:
+        center: tensor of shape (P, 3)
+        normal: tensor of shape (P, 3)
+    """
+    add_dim0 = False
+    if planes.ndim == 2:
+        planes = planes.unsqueeze(0)
+        add_dim0 = True
+
+    ctr = planes.mean(1)  # (P, 3)
+    normals = torch.zeros_like(ctr)
+
+    v0, v1, v2, v3 = planes.unbind(1)  # 4 x (P, 3)
+
+    # unvectorized solution
+    P = planes.shape[0]
+    for t in range(P):
+        ns = torch.zeros((6, 3), device=planes.device)
+        ns[0] = torch.cross(v0[t] - ctr[t], v1[t] - ctr[t], dim=-1)
+        ns[1] = torch.cross(v0[t] - ctr[t], v2[t] - ctr[t], dim=-1)
+        ns[2] = torch.cross(v0[t] - ctr[t], v3[t] - ctr[t], dim=-1)
+        ns[3] = torch.cross(v1[t] - ctr[t], v2[t] - ctr[t], dim=-1)
+        ns[4] = torch.cross(v1[t] - ctr[t], v3[t] - ctr[t], dim=-1)
+        ns[5] = torch.cross(v2[t] - ctr[t], v3[t] - ctr[t], dim=-1)
+
+        i = torch.norm(ns, dim=-1).argmax()
+        normals[t] = ns[i]
+
+    if add_dim0:
+        ctr = ctr[0]
+        normals = normals[0]
+    normals = F.normalize(normals, dim=-1)
+    return ctr, normals
+
+
+def box_planar_dir(
+    box: torch.Tensor, dot_eps: float = DOT_EPS, area_eps: float = AREA_EPS
+) -> torch.Tensor:
+    """
+    Finds the unit vector n which is perpendicular to each plane in the box
+    and points towards the inside of the box.
+    The planes are defined by `_box_planes`.
+    Since the shape is convex, we define the interior to be the direction
+    pointing to the center of the shape.
+    Args:
+       box: tensor of shape (8, 3) of the vertices of the 3D box
+    Returns:
+       n: tensor of shape (6,) of the unit vector orthogonal to the face pointing
+          towards the interior of the shape
+    """
+    assert box.shape[0] == 8 and box.shape[1] == 3
+
+    # center point of each box
+    box_ctr = box.mean(0).view(1, 3)
+
+    # box planes
+    plane_verts = get_plane_verts(box)  # (6, 4, 3)
+    v0, v1, v2, v3 = plane_verts.unbind(1)
+    plane_ctr, n = get_plane_center_normal(plane_verts)
+
+    # Check all verts are coplanar
+    if (
+        not (
+            F.normalize(v3 - v0, dim=-1).unsqueeze(1).bmm(n.unsqueeze(2)).abs()
+            < dot_eps
+        )
+        .all()
+        .item()
+    ):
+        msg = "Plane vertices are not coplanar"
+        raise ValueError(msg)
+
+    # Check all faces have non zero area
+    area1 = torch.cross(v1 - v0, v2 - v0, dim=-1).norm(dim=-1) / 2
+    area2 = torch.cross(v3 - v0, v2 - v0, dim=-1).norm(dim=-1) / 2
+    if (area1 < area_eps).any().item() or (area2 < area_eps).any().item():
+        msg = "Planes have zero areas"
+        raise ValueError(msg)
+
+    # We can write:  `box_ctr = plane_ctr + a * e0 + b * e1 + c * n`, (1).
+    # With <e0, n> = 0 and <e1, n> = 0, where <.,.> refers to the dot product,
+    # since that e0 is orthogonal to n. Same for e1.
+    """
+    # Below is how one would solve for (a, b, c)
+    # Solving for (a, b)
+    numF = verts.shape[0]
+    A = torch.ones((numF, 2, 2), dtype=torch.float32, device=device)
+    B = torch.ones((numF, 2), dtype=torch.float32, device=device)
+    A[:, 0, 1] = (e0 * e1).sum(-1)
+    A[:, 1, 0] = (e0 * e1).sum(-1)
+    B[:, 0] = ((box_ctr - plane_ctr) * e0).sum(-1)
+    B[:, 1] = ((box_ctr - plane_ctr) * e1).sum(-1)
+    ab = torch.linalg.solve(A, B)  # (numF, 2)
+    a, b = ab.unbind(1)
+    # solving for c
+    c = ((box_ctr - plane_ctr - a.view(numF, 1) * e0 - b.view(numF, 1) * e1) * n).sum(-1)
+    """
+    # Since we know that <e0, n> = 0 and <e1, n> = 0 (e0 and e1 are orthogonal to n),
+    # the above solution is equivalent to
+    direc = F.normalize(box_ctr - plane_ctr, dim=-1)  # (6, 3)
+    c = (direc * n).sum(-1)
+    # If c is negative, then we revert the direction of n such that n points "inside"
+    negc = c < 0.0
+    n[negc] *= -1.0
+    # c[negc] *= -1.0
+    # Now (a, b, c) is the solution to (1)
+
+    return n
+
+
+def tri_verts_area(tri_verts: torch.Tensor) -> torch.Tensor:
+    """
+    Computes the area of the triangle faces in tri_verts
+    Args:
+        tri_verts: tensor of shape (T, 3, 3)
+    Returns:
+        areas: the area of the triangles (T, 1)
+    """
+    add_dim = False
+    if tri_verts.ndim == 2:
+        tri_verts = tri_verts.unsqueeze(0)
+        add_dim = True
+
+    v0, v1, v2 = tri_verts.unbind(1)
+    areas = torch.cross(v1 - v0, v2 - v0, dim=-1).norm(dim=-1) / 2.0
+
+    if add_dim:
+        areas = areas[0]
+    return areas
+
+
+def box_volume(box: torch.Tensor) -> torch.Tensor:
+    """
+    Computes the volume of each box in boxes.
+    The volume of each box is the sum of all the tetrahedrons
+    formed by the faces of the box. The face of the box is the base of
+    that tetrahedron and the center point of the box is the apex.
+    In other words, vol(box) = sum_i A_i * d_i / 3,
+    where A_i is the area of the i-th face and d_i is the
+    distance of the apex from the face.
+    We use the equivalent dot/cross product formulation.
+    Read https://en.wikipedia.org/wiki/Tetrahedron#Volume
+
+    Args:
+        box: tensor of shape (8, 3) containing the vertices
+            of the 3D box
+    Returns:
+        vols: the volume of the box
+    """
+    assert box.shape[0] == 8 and box.shape[1] == 3
+
+    # Compute the center point of each box
+    ctr = box.mean(0).view(1, 1, 3)
+
+    # Extract the coordinates of the faces for each box
+    tri_verts = get_tri_verts(box)
+    # Set the origin of the coordinate system to coincide
+    # with the apex of the tetrahedron to simplify the volume calculation
+    # See https://en.wikipedia.org/wiki/Tetrahedron#Volume
+    tri_verts = tri_verts - ctr
+
+    # Compute the volume of each box using the dot/cross product formula
+    vols = torch.sum(
+        tri_verts[:, 0] * torch.cross(tri_verts[:, 1], tri_verts[:, 2], dim=-1),
+        dim=-1,
+    )
+    vols = (vols.abs() / 6.0).sum()
+
+    return vols
+
+
+def coplanar_tri_faces(tri1: torch.Tensor, tri2: torch.Tensor, eps: float = DOT_EPS):
+    """
+    Determines whether two triangle faces in 3D are coplanar
+    Args:
+        tri1: tensor of shape (3, 3) of the vertices of the 1st triangle
+        tri2: tensor of shape (3, 3) of the vertices of the 2nd triangle
+    Returns:
+        is_coplanar: bool
+    """
+    tri1_ctr, tri1_n = get_tri_center_normal(tri1)
+    tri2_ctr, tri2_n = get_tri_center_normal(tri2)
+
+    check1 = tri1_n.dot(tri2_n).abs() > 1 - eps  # checks if parallel
+
+    dist12 = torch.norm(tri1.unsqueeze(1) - tri2.unsqueeze(0), dim=-1)
+    dist12_argmax = dist12.argmax()
+    i1 = dist12_argmax // 3
+    i2 = dist12_argmax % 3
+    assert dist12[i1, i2] == dist12.max()
+
+    check2 = (
+        F.normalize(tri1[i1] - tri2[i2], dim=0).dot(tri1_n).abs() < eps
+    ) or F.normalize(tri1[i1] - tri2[i2], dim=0).dot(tri2_n).abs() < eps
+
+    return check1 and check2
+
+
+def coplanar_tri_plane(
+    tri: torch.Tensor, plane: torch.Tensor, n: torch.Tensor, eps: float = DOT_EPS
+):
+    """
+    Determines whether two triangle faces in 3D are coplanar
+    Args:
+            tri: tensor of shape (3, 3) of the vertices of the triangle
+            plane: tensor of shape (4, 3) of the vertices of the plane
+            n: tensor of shape (3,) of the unit "inside" direction on the plane
+    Returns:
+            is_coplanar: bool
+    """
+    tri_ctr, tri_n = get_tri_center_normal(tri)
+
+    check1 = tri_n.dot(n).abs() > 1 - eps  # checks if parallel
+
+    dist12 = torch.norm(tri.unsqueeze(1) - plane.unsqueeze(0), dim=-1)
+    dist12_argmax = dist12.argmax()
+    i1 = dist12_argmax // 4
+    i2 = dist12_argmax % 4
+    assert dist12[i1, i2] == dist12.max()
+
+    check2 = F.normalize(tri[i1] - plane[i2], dim=0).dot(n).abs() < eps
+
+    return check1 and check2
+
+
+def is_inside(
+    plane: torch.Tensor,
+    n: torch.Tensor,
+    points: torch.Tensor,
+    return_proj: bool = True,
+):
+    """
+    Computes whether point is "inside" the plane.
+    The definition of "inside" means that the point
+    has a positive component in the direction of the plane normal defined by n.
+    For example,
+                  plane
+                    |
+                    |         . (A)
+                    |--> n
+                    |
+         .(B)       |
+
+    Point (A) is "inside" the plane, while point (B) is "outside" the plane.
+    Args:
+      plane: tensor of shape (4,3) of vertices of a box plane
+      n: tensor of shape (3,) of the unit "inside" direction on the plane
+      points: tensor of shape (P, 3) of coordinates of a point
+      return_proj: bool whether to return the projected point on the plane
+    Returns:
+      is_inside: bool of shape (P,) of whether point is inside
+      p_proj: tensor of shape (P, 3) of the projected point on plane
+    """
+    device = plane.device
+    v0, v1, v2, v3 = plane.unbind(0)
+    plane_ctr = plane.mean(0)
+    e0 = F.normalize(v0 - plane_ctr, dim=0)
+    e1 = F.normalize(v1 - plane_ctr, dim=0)
+    if not torch.allclose(e0.dot(n), torch.zeros((1,), device=device), atol=1e-2):
+        raise ValueError("Input n is not perpendicular to the plane")
+    if not torch.allclose(e1.dot(n), torch.zeros((1,), device=device), atol=1e-2):
+        raise ValueError("Input n is not perpendicular to the plane")
+
+    add_dim = False
+    if points.ndim == 1:
+        points = points.unsqueeze(0)
+        add_dim = True
+
+    assert points.shape[1] == 3
+    # Every point p can be written as p = ctr + a e0 + b e1 + c n
+
+    # If return_proj is True, we need to solve for (a, b)
+    p_proj = None
+    if return_proj:
+        # solving for (a, b)
+        A = torch.tensor(
+            [[1.0, e0.dot(e1)], [e0.dot(e1), 1.0]], dtype=torch.float32, device=device
+        )
+        B = torch.zeros((2, points.shape[0]), dtype=torch.float32, device=device)
+        B[0, :] = torch.sum((points - plane_ctr.view(1, 3)) * e0.view(1, 3), dim=-1)
+        B[1, :] = torch.sum((points - plane_ctr.view(1, 3)) * e1.view(1, 3), dim=-1)
+        ab = A.inverse() @ B  # (2, P)
+        p_proj = plane_ctr.view(1, 3) + ab.transpose(0, 1) @ torch.stack(
+            (e0, e1), dim=0
+        )
+
+    # solving for c
+    # c = (point - ctr - a * e0 - b * e1).dot(n)
+    direc = torch.sum((points - plane_ctr.view(1, 3)) * n.view(1, 3), dim=-1)
+    ins = direc >= 0.0
+
+    if add_dim:
+        assert p_proj.shape[0] == 1
+        p_proj = p_proj[0]
+
+    return ins, p_proj
+
+
+def plane_edge_point_of_intersection(plane, n, p0, p1, eps: float = DOT_EPS):
+    """
+    Finds the point of intersection between a box plane and
+    a line segment connecting (p0, p1).
+    The plane is assumed to be infinite long.
+    Args:
+      plane: tensor of shape (4, 3) of the coordinates of the vertices defining the plane
+      n: tensor of shape (3,) of the unit direction perpendicular on the plane
+          (Note that we could compute n but since it's computed in the main
+          body of the function, we save time by feeding it in. For the purpose
+          of this function, it's not important that n points "inside" the shape.)
+      p0, p1: tensors of shape (3,), (3,)
+    Returns:
+      p: tensor of shape (3,) of the coordinates of the point of intersection
+      a: scalar such that p = p0 + a*(p1-p0)
+    """
+    # The point of intersection can be parametrized
+    # p = p0 + a (p1 - p0) where a in [0, 1]
+    # We want to find a such that p is on plane
+    # <p - ctr, n> = 0
+
+    # if segment (p0, p1) is parallel to plane (it can only be on it)
+    direc = F.normalize(p1 - p0, dim=0)
+    if direc.dot(n).abs() < eps:
+        return (p1 + p0) / 2.0, 0.5
+    else:
+        ctr = plane.mean(0)
+        a = -(p0 - ctr).dot(n) / ((p1 - p0).dot(n))
+        p = p0 + a * (p1 - p0)
+        return p, a
+
+
+"""
+The three following functions support clipping a triangle face by a plane.
+They contain the following cases: (a) the triangle has one point "outside" the plane and
+(b) the triangle has two points "outside" the plane.
+This logic follows the logic of clipping triangles when they intersect the image plane while
+rendering.
+"""
+
+
+def clip_tri_by_plane_oneout(
+    plane: torch.Tensor,
+    n: torch.Tensor,
+    vout: torch.Tensor,
+    vin1: torch.Tensor,
+    vin2: torch.Tensor,
+) -> Tuple[torch.Tensor, torch.Tensor]:
+    """
+    Case (a).
+    Clips triangle by plane when vout is outside plane, and vin1, vin2, is inside
+    In this case, only one vertex of the triangle is outside the plane.
+    Clip the triangle into a quadrilateral, and then split into two triangles
+    Args:
+        plane: tensor of shape (4, 3) of the coordinates of the vertices forming the plane
+        n: tensor of shape (3,) of the unit "inside" direction of the plane
+        vout, vin1, vin2: tensors of shape (3,) of the points forming the triangle, where
+            vout is "outside" the plane and vin1, vin2 are "inside"
+    Returns:
+        verts: tensor of shape (4, 3) containing the new vertices formed after clipping the
+            original intersecting triangle (vout, vin1, vin2)
+        faces: tensor of shape (2, 3) defining the vertex indices forming the two new triangles
+            which are "inside" the plane formed after clipping
+    """
+    device = plane.device
+    # point of intersection between plane and (vin1, vout)
+    pint1, a1 = plane_edge_point_of_intersection(plane, n, vin1, vout)
+    assert a1 >= -0.0001 and a1 <= 1.0001, a1
+    # point of intersection between plane and (vin2, vout)
+    pint2, a2 = plane_edge_point_of_intersection(plane, n, vin2, vout)
+    assert a2 >= -0.0001 and a2 <= 1.0001, a2
+
+    verts = torch.stack((vin1, pint1, pint2, vin2), dim=0)  # 4x3
+    faces = torch.tensor(
+        [[0, 1, 2], [0, 2, 3]], dtype=torch.int64, device=device
+    )  # 2x3
+    return verts, faces
+
+
+def clip_tri_by_plane_twoout(
+    plane: torch.Tensor,
+    n: torch.Tensor,
+    vout1: torch.Tensor,
+    vout2: torch.Tensor,
+    vin: torch.Tensor,
+) -> Tuple[torch.Tensor, torch.Tensor]:
+    """
+    Case (b).
+    Clips face by plane when vout1, vout2 are outside plane, and vin1 is inside
+    In this case, only one vertex of the triangle is inside the plane.
+    Args:
+        plane: tensor of shape (4, 3) of the coordinates of the vertices forming the plane
+        n: tensor of shape (3,) of the unit "inside" direction of the plane
+        vout1, vout2, vin: tensors of shape (3,) of the points forming the triangle, where
+            vin is "inside" the plane and vout1, vout2 are "outside"
+    Returns:
+        verts: tensor of shape (3, 3) containing the new vertices formed after clipping the
+            original intersectiong triangle (vout, vin1, vin2)
+        faces: tensor of shape (1, 3) defining the vertex indices forming
+            the single new triangle which is "inside" the plane formed after clipping
+    """
+    device = plane.device
+    # point of intersection between plane and (vin, vout1)
+    pint1, a1 = plane_edge_point_of_intersection(plane, n, vin, vout1)
+    assert a1 >= -0.0001 and a1 <= 1.0001, a1
+    # point of intersection between plane and (vin, vout2)
+    pint2, a2 = plane_edge_point_of_intersection(plane, n, vin, vout2)
+    assert a2 >= -0.0001 and a2 <= 1.0001, a2
+
+    verts = torch.stack((vin, pint1, pint2), dim=0)  # 3x3
+    faces = torch.tensor(
+        [
+            [0, 1, 2],
+        ],
+        dtype=torch.int64,
+        device=device,
+    )  # 1x3
+    return verts, faces
+
+
+def clip_tri_by_plane(plane, n, tri_verts) -> Union[List, torch.Tensor]:
+    """
+    Clip a trianglular face defined by tri_verts with a plane of inside "direction" n.
+    This function computes whether the triangle has one or two
+    or none points "outside" the plane.
+    Args:
+       plane: tensor of shape (4, 3) of the vertex coordinates of the plane
+       n: tensor of shape (3,) of the unit "inside" direction of the plane
+       tri_verts: tensor of shape (3, 3) of the vertex coordiantes of the the triangle faces
+    Returns:
+        tri_verts: tensor of shape (K, 3, 3) of the vertex coordinates of the triangles formed
+            after clipping. All K triangles are now "inside" the plane.
+    """
+    if coplanar_tri_plane(tri_verts, plane, n):
+        return tri_verts.view(1, 3, 3)
+
+    v0, v1, v2 = tri_verts.unbind(0)
+    isin0, _ = is_inside(plane, n, v0)
+    isin1, _ = is_inside(plane, n, v1)
+    isin2, _ = is_inside(plane, n, v2)
+
+    if isin0 and isin1 and isin2:
+        # all in, no clipping, keep the old triangle face
+        return tri_verts.view(1, 3, 3)
+    elif (not isin0) and (not isin1) and (not isin2):
+        # all out, delete triangle
+        return []
+    else:
+        if isin0:
+            if isin1:  # (isin0, isin1, not isin2)
+                verts, faces = clip_tri_by_plane_oneout(plane, n, v2, v0, v1)
+                return verts[faces]
+            elif isin2:  # (isin0, not isin1, isin2)
+                verts, faces = clip_tri_by_plane_oneout(plane, n, v1, v0, v2)
+                return verts[faces]
+            else:  # (isin0, not isin1, not isin2)
+                verts, faces = clip_tri_by_plane_twoout(plane, n, v1, v2, v0)
+                return verts[faces]
+        else:
+            if isin1 and isin2:  # (not isin0, isin1, isin2)
+                verts, faces = clip_tri_by_plane_oneout(plane, n, v0, v1, v2)
+                return verts[faces]
+            elif isin1:  # (not isin0, isin1, not isin2)
+                verts, faces = clip_tri_by_plane_twoout(plane, n, v0, v2, v1)
+                return verts[faces]
+            elif isin2:  # (not isin0, not isin1, isin2)
+                verts, faces = clip_tri_by_plane_twoout(plane, n, v0, v1, v2)
+                return verts[faces]
+
+    # Should not be reached
+    return []
+
+
+# -------------------------------------------------- #
+#               MAIN: BOX3D_OVERLAP                  #
+# -------------------------------------------------- #
+
+
+def box3d_overlap_naive(box1: torch.Tensor, box2: torch.Tensor):
+    """
+    Computes the intersection of 3D boxes1 and boxes2.
+    Inputs boxes1, boxes2 are tensors of shape (8, 3) containing
+    the 8 corners of the boxes, as follows
+
+        (4) +---------+. (5)
+            | ` .     |  ` .
+            | (0) +---+-----+ (1)
+            |     |   |     |
+        (7) +-----+---+. (6)|
+            ` .   |     ` . |
+            (3) ` +---------+ (2)
+
+    Args:
+        box1: tensor of shape (8, 3) of the coordinates of the 1st box
+        box2: tensor of shape (8, 3) of the coordinates of the 2nd box
+    Returns:
+        vol: the volume of the intersecting convex shape
+        iou: the intersection over union which is simply
+            `iou = vol / (vol1 + vol2 - vol)`
+    """
+    device = box1.device
+
+    # For boxes1 we compute the unit directions n1 corresponding to quad_faces
+    n1 = box_planar_dir(box1)  # (6, 3)
+    # For boxes2 we compute the unit directions n2 corresponding to quad_faces
+    n2 = box_planar_dir(box2)
+
+    # We define triangle faces
+    vol1 = box_volume(box1)
+    vol2 = box_volume(box2)
+
+    tri_verts1 = get_tri_verts(box1)  # (12, 3, 3)
+    plane_verts1 = get_plane_verts(box1)  # (6, 4, 3)
+    tri_verts2 = get_tri_verts(box2)  # (12, 3, 3)
+    plane_verts2 = get_plane_verts(box2)  # (6, 4, 3)
+
+    num_planes = plane_verts1.shape[0]  # (=6) based on our definition of planes
+
+    # Every triangle in box1 will be compared to each plane in box2.
+    # If the triangle is fully outside or fully inside, then it will remain as is
+    # If the triangle intersects with the (infinite) plane, it will be broken into
+    # subtriangles such that each subtriangle is either fully inside or outside the plane.
+
+    # Tris in Box1 -> Planes in Box2
+    for pidx in range(num_planes):
+        plane = plane_verts2[pidx]
+        nplane = n2[pidx]
+        tri_verts_updated = torch.zeros((0, 3, 3), dtype=torch.float32, device=device)
+        for i in range(tri_verts1.shape[0]):
+            tri = clip_tri_by_plane(plane, nplane, tri_verts1[i])
+            if len(tri) > 0:
+                tri_verts_updated = torch.cat((tri_verts_updated, tri), dim=0)
+        tri_verts1 = tri_verts_updated
+
+    # Tris in Box2 -> Planes in Box1
+    for pidx in range(num_planes):
+        plane = plane_verts1[pidx]
+        nplane = n1[pidx]
+        tri_verts_updated = torch.zeros((0, 3, 3), dtype=torch.float32, device=device)
+        for i in range(tri_verts2.shape[0]):
+            tri = clip_tri_by_plane(plane, nplane, tri_verts2[i])
+            if len(tri) > 0:
+                tri_verts_updated = torch.cat((tri_verts_updated, tri), dim=0)
+        tri_verts2 = tri_verts_updated
+
+    # remove triangles that are coplanar from the intersection as
+    # otherwise they would be doublecounting towards the volume
+    # this happens only if the original 3D boxes have common planes
+    # Since the resulting shape is convex and specifically composed of planar segments,
+    # each planar segment can belong either on box1 or box2 but not both.
+    # Without loss of generality, we assign shared planar segments to box1
+    keep2 = torch.ones((tri_verts2.shape[0],), device=device, dtype=torch.bool)
+    for i1 in range(tri_verts1.shape[0]):
+        for i2 in range(tri_verts2.shape[0]):
+            if (
+                coplanar_tri_faces(tri_verts1[i1], tri_verts2[i2])
+                and tri_verts_area(tri_verts1[i1]) > AREA_EPS
+            ):
+                keep2[i2] = 0
+    keep2 = keep2.nonzero()[:, 0]
+    tri_verts2 = tri_verts2[keep2]
+
+    # intersecting shape
+    num_faces = tri_verts1.shape[0] + tri_verts2.shape[0]
+    num_verts = num_faces * 3  # V=F*3
+    overlap_faces = torch.arange(num_verts).view(num_faces, 3)  # Fx3
+    overlap_tri_verts = torch.cat((tri_verts1, tri_verts2), dim=0)  # Fx3x3
+    overlap_verts = overlap_tri_verts.view(num_verts, 3)  # Vx3
+
+    # the volume of the convex hull defined by (overlap_verts, overlap_faces)
+    # can be defined as the sum of all the tetrahedrons formed where for each tetrahedron
+    # the base is the triangle and the apex is the center point of the convex hull
+    # See the math here: https://en.wikipedia.org/wiki/Tetrahedron#Volume
+
+    # we compute the center by computing the center point of each face
+    # and then averaging the face centers
+    ctr = overlap_tri_verts.mean(1).mean(0)
+    tetras = overlap_tri_verts - ctr.view(1, 1, 3)
+    vol = torch.sum(
+        tetras[:, 0] * torch.cross(tetras[:, 1], tetras[:, 2], dim=-1), dim=-1
+    )
+    vol = (vol.abs() / 6.0).sum()
+
+    iou = vol / (vol1 + vol2 - vol)
+
+    if DEBUG:
+        # save shapes
+        tri_faces = torch.tensor(_box_triangles, device=device, dtype=torch.int64)
+        save_obj("/tmp/output/shape1.obj", box1, tri_faces)
+        save_obj("/tmp/output/shape2.obj", box2, tri_faces)
+        if len(overlap_verts) > 0:
+            save_obj("/tmp/output/inters_shape.obj", overlap_verts, overlap_faces)
+    return vol, iou
+
+
+# -------------------------------------------------- #
+#       HELPER FUNCTIONS FOR SAMPLING SOLUTION       #
+# -------------------------------------------------- #
+
+
+def is_point_inside_box(box: torch.Tensor, points: torch.Tensor):
+    """
+    Determines whether points are inside the boxes
+    Args:
+        box: tensor of shape (8, 3) of the corners of the boxes
+        points: tensor of shape (P, 3) of the points
+    Returns:
+        inside: bool tensor of shape (P,)
+    """
+    device = box.device
+    P = points.shape[0]
+
+    n = box_planar_dir(box)  # (6, 3)
+    box_planes = get_plane_verts(box)  # (6, 4)
+    num_planes = box_planes.shape[0]  # = 6
+
+    # a point p is inside the box if it "inside" all planes of the box
+    # so we run the checks
+    ins = torch.zeros((P, num_planes), device=device, dtype=torch.bool)
+    for i in range(num_planes):
+        is_in, _ = is_inside(box_planes[i], n[i], points, return_proj=False)
+        ins[:, i] = is_in
+    ins = ins.all(dim=1)
+    return ins
+
+
+def sample_points_within_box(box: torch.Tensor, num_samples: int = 10):
+    """
+    Sample points within a box defined by its 8 coordinates
+    Args:
+        box: tensor of shape (8, 3) of the box coordinates
+        num_samples: int defining the number of samples
+    Returns:
+        points: (num_samples, 3) of points inside the box
+    """
+    assert box.shape[0] == 8 and box.shape[1] == 3
+    xyzmin = box.min(0).values.view(1, 3)
+    xyzmax = box.max(0).values.view(1, 3)
+
+    uvw = torch.rand((num_samples, 3), device=box.device)
+    points = uvw * (xyzmax - xyzmin) + xyzmin
+
+    # because the box is not axis aligned we need to check wether
+    # the points are within the box
+    num_points = 0
+    samples = []
+    while num_points < num_samples:
+        inside = is_point_inside_box(box, points)
+        samples.append(points[inside].view(-1, 3))
+        num_points += inside.sum()
+
+    samples = torch.cat(samples, dim=0)
+    return samples[1:num_samples]
+
+
+# -------------------------------------------------- #
+#          MAIN: BOX3D_OVERLAP_SAMPLING              #
+# -------------------------------------------------- #
+
+
+def box3d_overlap_sampling(
+    box1: torch.Tensor, box2: torch.Tensor, num_samples: int = 10000
+):
+    """
+    Computes the intersection of two boxes by sampling points
+    """
+    vol1 = box_volume(box1)
+    vol2 = box_volume(box2)
+
+    points1 = sample_points_within_box(box1, num_samples=num_samples)
+    points2 = sample_points_within_box(box2, num_samples=num_samples)
+
+    isin21 = is_point_inside_box(box1, points2)
+    num21 = isin21.sum()
+    isin12 = is_point_inside_box(box2, points1)
+    num12 = isin12.sum()
+
+    assert num12 <= num_samples
+    assert num21 <= num_samples
+
+    inters = (vol1 * num12 + vol2 * num21) / 2.0
+    union = vol1 * num_samples + vol2 * num_samples - inters
+    return inters / union
diff --git a/pytorch3d/tests/test_knn.py b/pytorch3d/tests/test_knn.py
new file mode 100644
index 0000000000000000000000000000000000000000..bb77911e715e159606898889375e84e3409fc9f6
--- /dev/null
+++ b/pytorch3d/tests/test_knn.py
@@ -0,0 +1,263 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+from itertools import product
+
+import torch
+from pytorch3d.ops.knn import _KNN, knn_gather, knn_points
+
+from .common_testing import get_random_cuda_device, TestCaseMixin
+
+
+class TestKNN(TestCaseMixin, unittest.TestCase):
+    def setUp(self) -> None:
+        super().setUp()
+        torch.manual_seed(1)
+
+    @staticmethod
+    def _knn_points_naive(
+        p1, p2, lengths1, lengths2, K: int, norm: int = 2
+    ) -> torch.Tensor:
+        """
+        Naive PyTorch implementation of K-Nearest Neighbors.
+        Returns always sorted results
+        """
+        N, P1, D = p1.shape
+        _N, P2, _D = p2.shape
+
+        assert N == _N and D == _D
+
+        if lengths1 is None:
+            lengths1 = torch.full((N,), P1, dtype=torch.int64, device=p1.device)
+        if lengths2 is None:
+            lengths2 = torch.full((N,), P2, dtype=torch.int64, device=p1.device)
+
+        dists = torch.zeros((N, P1, K), dtype=torch.float32, device=p1.device)
+        idx = torch.zeros((N, P1, K), dtype=torch.int64, device=p1.device)
+
+        for n in range(N):
+            num1 = lengths1[n].item()
+            num2 = lengths2[n].item()
+            pp1 = p1[n, :num1].view(num1, 1, D)
+            pp2 = p2[n, :num2].view(1, num2, D)
+            diff = pp1 - pp2
+            if norm == 2:
+                diff = (diff * diff).sum(2)
+            elif norm == 1:
+                diff = diff.abs().sum(2)
+            else:
+                raise ValueError("No support for norm %d" % (norm))
+            num2 = min(num2, K)
+            for i in range(num1):
+                dd = diff[i]
+                srt_dd, srt_idx = dd.sort()
+
+                dists[n, i, :num2] = srt_dd[:num2]
+                idx[n, i, :num2] = srt_idx[:num2]
+
+        return _KNN(dists=dists, idx=idx, knn=None)
+
+    def _knn_vs_python_square_helper(self, device, return_sorted):
+        Ns = [1, 4]
+        Ds = [3, 5, 8]
+        P1s = [8, 24]
+        P2s = [8, 16, 32]
+        Ks = [1, 3, 10]
+        norms = [1, 2]
+        versions = [0, 1, 2, 3]
+        factors = [Ns, Ds, P1s, P2s, Ks, norms]
+        for N, D, P1, P2, K, norm in product(*factors):
+            for version in versions:
+                if version == 3 and K > 4:
+                    continue
+                x = torch.randn(N, P1, D, device=device, requires_grad=True)
+                x_cuda = x.clone().detach()
+                x_cuda.requires_grad_(True)
+                y = torch.randn(N, P2, D, device=device, requires_grad=True)
+                y_cuda = y.clone().detach()
+                y_cuda.requires_grad_(True)
+
+                # forward
+                out1 = self._knn_points_naive(
+                    x, y, lengths1=None, lengths2=None, K=K, norm=norm
+                )
+                out2 = knn_points(
+                    x_cuda,
+                    y_cuda,
+                    K=K,
+                    norm=norm,
+                    version=version,
+                    return_sorted=return_sorted,
+                )
+                if K > 1 and not return_sorted:
+                    # check out2 is not sorted
+                    self.assertFalse(torch.allclose(out1[0], out2[0]))
+                    self.assertFalse(torch.allclose(out1[1], out2[1]))
+                    # now sort out2
+                    dists, idx, _ = out2
+                    if P2 < K:
+                        dists[..., P2:] = float("inf")
+                        dists, sort_idx = dists.sort(dim=2)
+                        dists[..., P2:] = 0
+                    else:
+                        dists, sort_idx = dists.sort(dim=2)
+                    idx = idx.gather(2, sort_idx)
+                    out2 = _KNN(dists, idx, None)
+
+                self.assertClose(out1[0], out2[0])
+                self.assertTrue(torch.all(out1[1] == out2[1]))
+
+                # backward
+                grad_dist = torch.ones((N, P1, K), dtype=torch.float32, device=device)
+                loss1 = (out1.dists * grad_dist).sum()
+                loss1.backward()
+                loss2 = (out2.dists * grad_dist).sum()
+                loss2.backward()
+
+                self.assertClose(x_cuda.grad, x.grad, atol=5e-6)
+                self.assertClose(y_cuda.grad, y.grad, atol=5e-6)
+
+    def test_knn_vs_python_square_cpu(self):
+        device = torch.device("cpu")
+        self._knn_vs_python_square_helper(device, return_sorted=True)
+
+    def test_knn_vs_python_square_cuda(self):
+        device = get_random_cuda_device()
+        # Check both cases where the output is sorted and unsorted
+        self._knn_vs_python_square_helper(device, return_sorted=True)
+        self._knn_vs_python_square_helper(device, return_sorted=False)
+
+    def _knn_vs_python_ragged_helper(self, device):
+        Ns = [1, 4]
+        Ds = [3, 5, 8]
+        P1s = [8, 24]
+        P2s = [8, 16, 32]
+        Ks = [1, 3, 10]
+        norms = [1, 2]
+        factors = [Ns, Ds, P1s, P2s, Ks, norms]
+        for N, D, P1, P2, K, norm in product(*factors):
+            x = torch.rand((N, P1, D), device=device, requires_grad=True)
+            y = torch.rand((N, P2, D), device=device, requires_grad=True)
+            lengths1 = torch.randint(low=1, high=P1, size=(N,), device=device)
+            lengths2 = torch.randint(low=1, high=P2, size=(N,), device=device)
+
+            x_csrc = x.clone().detach()
+            x_csrc.requires_grad_(True)
+            y_csrc = y.clone().detach()
+            y_csrc.requires_grad_(True)
+
+            # forward
+            out1 = self._knn_points_naive(
+                x, y, lengths1=lengths1, lengths2=lengths2, K=K, norm=norm
+            )
+            out2 = knn_points(
+                x_csrc, y_csrc, lengths1=lengths1, lengths2=lengths2, K=K, norm=norm
+            )
+            self.assertClose(out1[0], out2[0])
+            self.assertTrue(torch.all(out1[1] == out2[1]))
+
+            # backward
+            grad_dist = torch.ones((N, P1, K), dtype=torch.float32, device=device)
+            loss1 = (out1.dists * grad_dist).sum()
+            loss1.backward()
+            loss2 = (out2.dists * grad_dist).sum()
+            loss2.backward()
+
+            self.assertClose(x_csrc.grad, x.grad, atol=5e-6)
+            self.assertClose(y_csrc.grad, y.grad, atol=5e-6)
+
+    def test_knn_vs_python_ragged_cpu(self):
+        device = torch.device("cpu")
+        self._knn_vs_python_ragged_helper(device)
+
+    def test_knn_vs_python_ragged_cuda(self):
+        device = get_random_cuda_device()
+        self._knn_vs_python_ragged_helper(device)
+
+    def test_knn_gather(self):
+        device = get_random_cuda_device()
+        N, P1, P2, K, D = 4, 16, 12, 8, 3
+        x = torch.rand((N, P1, D), device=device)
+        y = torch.rand((N, P2, D), device=device)
+        lengths1 = torch.randint(low=1, high=P1, size=(N,), device=device)
+        lengths2 = torch.randint(low=1, high=P2, size=(N,), device=device)
+
+        out = knn_points(x, y, lengths1=lengths1, lengths2=lengths2, K=K)
+        y_nn = knn_gather(y, out.idx, lengths2)
+
+        for n in range(N):
+            for p1 in range(P1):
+                for k in range(K):
+                    if k < lengths2[n]:
+                        self.assertClose(y_nn[n, p1, k], y[n, out.idx[n, p1, k]])
+                    else:
+                        self.assertTrue(torch.all(y_nn[n, p1, k] == 0.0))
+
+    def test_knn_check_version(self):
+        try:
+            from pytorch3d._C import knn_check_version
+        except ImportError:
+            # knn_check_version will only be defined if we compiled with CUDA support
+            return
+        for D in range(-10, 10):
+            for K in range(-10, 20):
+                v0 = True
+                v1 = 1 <= D <= 32
+                v2 = 1 <= D <= 8 and 1 <= K <= 32
+                v3 = 1 <= D <= 8 and 1 <= K <= 4
+                all_expected = [v0, v1, v2, v3]
+                for version in range(-10, 10):
+                    actual = knn_check_version(version, D, K)
+                    expected = False
+                    if 0 <= version < len(all_expected):
+                        expected = all_expected[version]
+                    self.assertEqual(actual, expected)
+
+    def test_invalid_norm(self):
+        device = get_random_cuda_device()
+        N, P1, P2, K, D = 4, 16, 12, 8, 3
+        x = torch.rand((N, P1, D), device=device)
+        y = torch.rand((N, P2, D), device=device)
+        with self.assertRaisesRegex(ValueError, "Support for 1 or 2 norm."):
+            knn_points(x, y, K=K, norm=3)
+
+        with self.assertRaisesRegex(ValueError, "Support for 1 or 2 norm."):
+            knn_points(x, y, K=K, norm=0)
+
+    @staticmethod
+    def knn_square(N: int, P1: int, P2: int, D: int, K: int, device: str):
+        device = torch.device(device)
+        pts1 = torch.randn(N, P1, D, device=device, requires_grad=True)
+        pts2 = torch.randn(N, P2, D, device=device, requires_grad=True)
+        grad_dists = torch.randn(N, P1, K, device=device)
+        torch.cuda.synchronize()
+
+        def output():
+            out = knn_points(pts1, pts2, K=K)
+            loss = (out.dists * grad_dists).sum()
+            loss.backward()
+            torch.cuda.synchronize()
+
+        return output
+
+    @staticmethod
+    def knn_ragged(N: int, P1: int, P2: int, D: int, K: int, device: str):
+        device = torch.device(device)
+        pts1 = torch.rand((N, P1, D), device=device, requires_grad=True)
+        pts2 = torch.rand((N, P2, D), device=device, requires_grad=True)
+        lengths1 = torch.randint(low=1, high=P1, size=(N,), device=device)
+        lengths2 = torch.randint(low=1, high=P2, size=(N,), device=device)
+        grad_dists = torch.randn(N, P1, K, device=device)
+        torch.cuda.synchronize()
+
+        def output():
+            out = knn_points(pts1, pts2, lengths1=lengths1, lengths2=lengths2, K=K)
+            loss = (out.dists * grad_dists).sum()
+            loss.backward()
+            torch.cuda.synchronize()
+
+        return output
diff --git a/pytorch3d/tests/test_laplacian_matrices.py b/pytorch3d/tests/test_laplacian_matrices.py
new file mode 100644
index 0000000000000000000000000000000000000000..f04c6dfebf2ce6a0cebee01e4a7a1100974a33b8
--- /dev/null
+++ b/pytorch3d/tests/test_laplacian_matrices.py
@@ -0,0 +1,120 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+
+import torch
+from pytorch3d.ops import cot_laplacian, laplacian, norm_laplacian
+from pytorch3d.structures.meshes import Meshes
+
+from .common_testing import get_random_cuda_device, TestCaseMixin
+
+
+class TestLaplacianMatrices(TestCaseMixin, unittest.TestCase):
+    def setUp(self) -> None:
+        super().setUp()
+        torch.manual_seed(1)
+
+    def init_mesh(self) -> Meshes:
+        V, F = 32, 64
+        device = get_random_cuda_device()
+        # random vertices
+        verts = torch.rand((V, 3), dtype=torch.float32, device=device)
+        # random valid faces (no self circles, e.g. (v0, v0, v1))
+        faces = torch.stack([torch.randperm(V) for f in range(F)], dim=0)[:, :3]
+        faces = faces.to(device=device)
+        return Meshes(verts=[verts], faces=[faces])
+
+    def test_laplacian(self):
+        mesh = self.init_mesh()
+        verts = mesh.verts_packed()
+        edges = mesh.edges_packed()
+        V, E = verts.shape[0], edges.shape[0]
+
+        L = laplacian(verts, edges)
+
+        Lnaive = torch.zeros((V, V), dtype=torch.float32, device=verts.device)
+        for e in range(E):
+            e0, e1 = edges[e]
+            Lnaive[e0, e1] = 1
+            # symetric
+            Lnaive[e1, e0] = 1
+
+        deg = Lnaive.sum(1).view(-1, 1)
+        deg[deg > 0] = 1.0 / deg[deg > 0]
+        Lnaive = Lnaive * deg
+        diag = torch.eye(V, dtype=torch.float32, device=mesh.device)
+        Lnaive.masked_fill_(diag > 0, -1)
+
+        self.assertClose(L.to_dense(), Lnaive)
+
+    def test_cot_laplacian(self):
+        mesh = self.init_mesh()
+        verts = mesh.verts_packed()
+        faces = mesh.faces_packed()
+        V = verts.shape[0]
+
+        eps = 1e-12
+
+        L, inv_areas = cot_laplacian(verts, faces, eps=eps)
+
+        Lnaive = torch.zeros((V, V), dtype=torch.float32, device=verts.device)
+        inv_areas_naive = torch.zeros((V, 1), dtype=torch.float32, device=verts.device)
+
+        for f in faces:
+            v0 = verts[f[0], :]
+            v1 = verts[f[1], :]
+            v2 = verts[f[2], :]
+            A = (v1 - v2).norm()
+            B = (v0 - v2).norm()
+            C = (v0 - v1).norm()
+            s = 0.5 * (A + B + C)
+
+            face_area = (s * (s - A) * (s - B) * (s - C)).clamp_(min=1e-12).sqrt()
+            inv_areas_naive[f[0]] += face_area
+            inv_areas_naive[f[1]] += face_area
+            inv_areas_naive[f[2]] += face_area
+
+            A2, B2, C2 = A * A, B * B, C * C
+            cota = (B2 + C2 - A2) / face_area / 4.0
+            cotb = (A2 + C2 - B2) / face_area / 4.0
+            cotc = (A2 + B2 - C2) / face_area / 4.0
+
+            Lnaive[f[1], f[2]] += cota
+            Lnaive[f[2], f[0]] += cotb
+            Lnaive[f[0], f[1]] += cotc
+            # symetric
+            Lnaive[f[2], f[1]] += cota
+            Lnaive[f[0], f[2]] += cotb
+            Lnaive[f[1], f[0]] += cotc
+
+        idx = inv_areas_naive > 0
+        inv_areas_naive[idx] = 1.0 / inv_areas_naive[idx]
+
+        self.assertClose(inv_areas, inv_areas_naive)
+        self.assertClose(L.to_dense(), Lnaive)
+
+    def test_norm_laplacian(self):
+        mesh = self.init_mesh()
+        verts = mesh.verts_packed()
+        edges = mesh.edges_packed()
+        V, E = verts.shape[0], edges.shape[0]
+
+        eps = 1e-12
+
+        L = norm_laplacian(verts, edges, eps=eps)
+
+        Lnaive = torch.zeros((V, V), dtype=torch.float32, device=verts.device)
+        for e in range(E):
+            e0, e1 = edges[e]
+            v0 = verts[e0]
+            v1 = verts[e1]
+
+            w01 = 1.0 / ((v0 - v1).norm() + eps)
+            Lnaive[e0, e1] += w01
+            Lnaive[e1, e0] += w01
+
+        self.assertClose(L.to_dense(), Lnaive)
diff --git a/pytorch3d/tests/test_lighting.py b/pytorch3d/tests/test_lighting.py
new file mode 100644
index 0000000000000000000000000000000000000000..d886e825442eea746c4d12c687d17d3b8ad34ec5
--- /dev/null
+++ b/pytorch3d/tests/test_lighting.py
@@ -0,0 +1,554 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+
+import numpy as np
+import torch
+from pytorch3d.renderer.lighting import AmbientLights, DirectionalLights, PointLights
+from pytorch3d.transforms import RotateAxisAngle
+
+from .common_testing import TestCaseMixin
+
+
+class TestLights(TestCaseMixin, unittest.TestCase):
+    def test_init_lights(self):
+        """
+        Initialize Lights class with the default values.
+        """
+        device = torch.device("cuda:0")
+        light = DirectionalLights(device=device)
+        keys = ["ambient_color", "diffuse_color", "specular_color", "direction"]
+        for k in keys:
+            prop = getattr(light, k)
+            self.assertTrue(torch.is_tensor(prop))
+            self.assertTrue(prop.device == device)
+            self.assertTrue(prop.shape == (1, 3))
+
+        light = PointLights(device=device)
+        keys = ["ambient_color", "diffuse_color", "specular_color", "location"]
+        for k in keys:
+            prop = getattr(light, k)
+            self.assertTrue(torch.is_tensor(prop))
+            self.assertTrue(prop.device == device)
+            self.assertTrue(prop.shape == (1, 3))
+
+    def test_lights_clone_to(self):
+        device = torch.device("cuda:0")
+        cpu = torch.device("cpu")
+        light = DirectionalLights()
+        new_light = light.clone().to(device)
+        keys = ["ambient_color", "diffuse_color", "specular_color", "direction"]
+        for k in keys:
+            prop = getattr(light, k)
+            new_prop = getattr(new_light, k)
+            self.assertTrue(prop.device == cpu)
+            self.assertTrue(new_prop.device == device)
+            self.assertSeparate(new_prop, prop)
+
+        light = PointLights()
+        new_light = light.clone().to(device)
+        keys = ["ambient_color", "diffuse_color", "specular_color", "location"]
+        for k in keys:
+            prop = getattr(light, k)
+            new_prop = getattr(new_light, k)
+            self.assertTrue(prop.device == cpu)
+            self.assertTrue(new_prop.device == device)
+            self.assertSeparate(new_prop, prop)
+
+    def test_lights_accessor(self):
+        d_light = DirectionalLights(ambient_color=((0.0, 0.0, 0.0), (1.0, 1.0, 1.0)))
+        p_light = PointLights(ambient_color=((0.0, 0.0, 0.0), (1.0, 1.0, 1.0)))
+        for light in [d_light, p_light]:
+            # Update element
+            color = (0.5, 0.5, 0.5)
+            light[1].ambient_color = color
+            self.assertClose(light.ambient_color[1], torch.tensor(color))
+            # Get item and get value
+            l0 = light[0]
+            self.assertClose(l0.ambient_color, torch.tensor((0.0, 0.0, 0.0)))
+
+    def test_initialize_lights_broadcast(self):
+        light = DirectionalLights(
+            ambient_color=torch.randn(10, 3),
+            diffuse_color=torch.randn(1, 3),
+            specular_color=torch.randn(1, 3),
+        )
+        keys = ["ambient_color", "diffuse_color", "specular_color", "direction"]
+        for k in keys:
+            prop = getattr(light, k)
+            self.assertTrue(prop.shape == (10, 3))
+
+        light = PointLights(
+            ambient_color=torch.randn(10, 3),
+            diffuse_color=torch.randn(1, 3),
+            specular_color=torch.randn(1, 3),
+        )
+        keys = ["ambient_color", "diffuse_color", "specular_color", "location"]
+        for k in keys:
+            prop = getattr(light, k)
+            self.assertTrue(prop.shape == (10, 3))
+
+    def test_initialize_lights_broadcast_fail(self):
+        """
+        Batch dims have to be the same or 1.
+        """
+        with self.assertRaises(ValueError):
+            DirectionalLights(
+                ambient_color=torch.randn(10, 3), diffuse_color=torch.randn(15, 3)
+            )
+
+        with self.assertRaises(ValueError):
+            PointLights(
+                ambient_color=torch.randn(10, 3), diffuse_color=torch.randn(15, 3)
+            )
+
+    def test_initialize_lights_dimensions_fail(self):
+        """
+        Color should have shape (N, 3) or (1, 3)
+        """
+        with self.assertRaises(ValueError):
+            DirectionalLights(ambient_color=torch.randn(10, 4))
+
+        with self.assertRaises(ValueError):
+            DirectionalLights(direction=torch.randn(10, 4))
+
+        with self.assertRaises(ValueError):
+            PointLights(ambient_color=torch.randn(10, 4))
+
+        with self.assertRaises(ValueError):
+            PointLights(location=torch.randn(10, 4))
+
+    def test_initialize_ambient(self):
+        N = 13
+        color = 0.8 * torch.ones((N, 3))
+        lights = AmbientLights(ambient_color=color)
+        self.assertEqual(len(lights), N)
+        self.assertClose(lights.ambient_color, color)
+
+        lights = AmbientLights(ambient_color=color[:1])
+        self.assertEqual(len(lights), 1)
+        self.assertClose(lights.ambient_color, color[:1])
+
+
+class TestDiffuseLighting(TestCaseMixin, unittest.TestCase):
+    def test_diffuse_directional_lights(self):
+        """
+        Test with a single point where:
+        1) the normal and light direction are 45 degrees apart.
+        2) the normal and light direction are 90 degrees apart. The output
+           should be zero for this case
+        """
+        color = torch.tensor([1, 1, 1], dtype=torch.float32)
+        direction = torch.tensor(
+            [0, 1 / np.sqrt(2), 1 / np.sqrt(2)], dtype=torch.float32
+        )
+        normals = torch.tensor([0, 0, 1], dtype=torch.float32)
+        normals = normals[None, None, :]
+        expected_output = torch.tensor(
+            [1 / np.sqrt(2), 1 / np.sqrt(2), 1 / np.sqrt(2)], dtype=torch.float32
+        )
+        expected_output = expected_output.view(1, 1, 3).repeat(3, 1, 1)
+        light = DirectionalLights(diffuse_color=color, direction=direction)
+        output_light = light.diffuse(normals=normals)
+        self.assertClose(output_light, expected_output)
+
+        # Change light direction to be 90 degrees apart from normal direction.
+        direction = torch.tensor([0, 1, 0], dtype=torch.float32)
+        light.direction = direction
+        expected_output = torch.zeros_like(expected_output)
+        output_light = light.diffuse(normals=normals)
+        self.assertClose(output_light, expected_output)
+
+    def test_diffuse_point_lights(self):
+        """
+        Test with a single point at the origin. Test two cases:
+        1) the point light is at (1, 0, 1) hence the light direction is 45
+           degrees apart from the normal direction
+        1) the point light is at (0, 1, 0) hence the light direction is 90
+           degrees apart from the normal direction. The output
+           should be zero for this case
+        """
+        color = torch.tensor([1, 1, 1], dtype=torch.float32)
+        location = torch.tensor(
+            [0, 1 / np.sqrt(2), 1 / np.sqrt(2)], dtype=torch.float32
+        )
+        points = torch.tensor([0, 0, 0], dtype=torch.float32)
+        normals = torch.tensor([0, 0, 1], dtype=torch.float32)
+        expected_output = torch.tensor(
+            [1 / np.sqrt(2), 1 / np.sqrt(2), 1 / np.sqrt(2)], dtype=torch.float32
+        )
+        expected_output = expected_output.view(-1, 1, 3)
+        light = PointLights(diffuse_color=color[None, :], location=location[None, :])
+        output_light = light.diffuse(
+            points=points[None, None, :], normals=normals[None, None, :]
+        )
+        self.assertClose(output_light, expected_output)
+
+        # Change light direction to be 90 degrees apart from normal direction.
+        location = torch.tensor([0, 1, 0], dtype=torch.float32)
+        expected_output = torch.zeros_like(expected_output)
+        light = PointLights(diffuse_color=color[None, :], location=location[None, :])
+        output_light = light.diffuse(
+            points=points[None, None, :], normals=normals[None, None, :]
+        )
+        self.assertClose(output_light, expected_output)
+
+    def test_diffuse_batched(self):
+        """
+        Test with a batch where each batch element has one point
+        where the normal and light direction are 45 degrees apart.
+        """
+        batch_size = 10
+        color = torch.tensor([1, 1, 1], dtype=torch.float32)
+        direction = torch.tensor(
+            [0, 1 / np.sqrt(2), 1 / np.sqrt(2)], dtype=torch.float32
+        )
+        normals = torch.tensor([0, 0, 1], dtype=torch.float32)
+        expected_out = torch.tensor(
+            [1 / np.sqrt(2), 1 / np.sqrt(2), 1 / np.sqrt(2)], dtype=torch.float32
+        )
+
+        # Reshape
+        direction = direction.view(-1, 3).expand(batch_size, -1)
+        normals = normals.view(-1, 1, 3).expand(batch_size, -1, -1)
+        color = color.view(-1, 3).expand(batch_size, -1)
+        expected_out = expected_out.view(-1, 1, 3).expand(batch_size, 1, 3)
+
+        lights = DirectionalLights(diffuse_color=color, direction=direction)
+        output_light = lights.diffuse(normals=normals)
+        self.assertClose(output_light, expected_out)
+
+    def test_diffuse_batched_broadcast_inputs(self):
+        """
+        Test with a batch where each batch element has one point
+        where the normal and light direction are 45 degrees apart.
+        The color and direction are the same for each batch element.
+        """
+        batch_size = 10
+        color = torch.tensor([1, 1, 1], dtype=torch.float32)
+        direction = torch.tensor(
+            [0, 1 / np.sqrt(2), 1 / np.sqrt(2)], dtype=torch.float32
+        )
+        normals = torch.tensor([0, 0, 1], dtype=torch.float32)
+        expected_out = torch.tensor(
+            [1 / np.sqrt(2), 1 / np.sqrt(2), 1 / np.sqrt(2)], dtype=torch.float32
+        )
+
+        # Reshape
+        normals = normals.view(-1, 1, 3).expand(batch_size, -1, -1)
+        expected_out = expected_out.view(-1, 1, 3).expand(batch_size, 1, 3)
+
+        # Don't expand the direction or color. Broadcasting should happen
+        # in the diffuse function.
+        direction = direction.view(1, 3)
+        color = color.view(1, 3)
+
+        lights = DirectionalLights(diffuse_color=color, direction=direction)
+        output_light = lights.diffuse(normals=normals)
+        self.assertClose(output_light, expected_out)
+
+    def test_diffuse_batched_arbitrary_input_dims(self):
+        """
+        Test with a batch of inputs where shape of the input is mimicking the
+        shape in a shading function i.e. an interpolated normal per pixel for
+        top K faces per pixel.
+        """
+        N, H, W, K = 16, 256, 256, 100
+        device = torch.device("cuda:0")
+        color = torch.tensor([1, 1, 1], dtype=torch.float32, device=device)
+        direction = torch.tensor(
+            [0, 1 / np.sqrt(2), 1 / np.sqrt(2)], dtype=torch.float32, device=device
+        )
+        normals = torch.tensor([0, 0, 1], dtype=torch.float32, device=device)
+        normals = normals.view(1, 1, 1, 1, 3).expand(N, H, W, K, -1)
+        direction = direction.view(1, 3)
+        color = color.view(1, 3)
+        expected_output = torch.tensor(
+            [1 / np.sqrt(2), 1 / np.sqrt(2), 1 / np.sqrt(2)],
+            dtype=torch.float32,
+            device=device,
+        )
+        expected_output = expected_output.view(1, 1, 1, 1, 3)
+        expected_output = expected_output.expand(N, H, W, K, -1)
+
+        lights = DirectionalLights(diffuse_color=color, direction=direction)
+        output_light = lights.diffuse(normals=normals)
+        self.assertClose(output_light, expected_output)
+
+    def test_diffuse_batched_packed(self):
+        """
+        Test with a batch of 2 meshes each of which has faces on a single plane.
+        The normal and light direction are 45 degrees apart for the first mesh
+        and 90 degrees apart for the second mesh.
+
+        The points and normals are in the packed format i.e. no batch dimension.
+        """
+        verts_packed = torch.rand((10, 3))  # points aren't used
+        faces_per_mesh = [6, 4]
+        mesh_to_vert_idx = [0] * faces_per_mesh[0] + [1] * faces_per_mesh[1]
+        mesh_to_vert_idx = torch.tensor(mesh_to_vert_idx, dtype=torch.int64)
+        color = torch.tensor([[1, 1, 1], [1, 1, 1]], dtype=torch.float32)
+        direction = torch.tensor(
+            [
+                [0, 1 / np.sqrt(2), 1 / np.sqrt(2)],
+                [0, 1, 0],  # 90 degrees to normal so zero diffuse light
+            ],
+            dtype=torch.float32,
+        )
+        normals = torch.tensor([[0, 0, 1], [0, 0, 1]], dtype=torch.float32)
+        expected_output = torch.zeros_like(verts_packed, dtype=torch.float32)
+        expected_output[:6, :] += 1 / np.sqrt(2)
+        expected_output[6:, :] = 0.0
+        lights = DirectionalLights(
+            diffuse_color=color[mesh_to_vert_idx, :],
+            direction=direction[mesh_to_vert_idx, :],
+        )
+        output_light = lights.diffuse(normals=normals[mesh_to_vert_idx, :])
+        self.assertClose(output_light, expected_output)
+
+
+class TestSpecularLighting(TestCaseMixin, unittest.TestCase):
+    def test_specular_directional_lights(self):
+        """
+        Specular highlights depend on the camera position as well as the light
+        position/direction.
+        Test with a single point where:
+        1) the normal and light direction are -45 degrees apart and the normal
+           and camera position are +45 degrees apart. The reflected light ray
+           will be perfectly aligned with the camera so the output is 1.0.
+        2) the normal and light direction are -45 degrees apart and the
+           camera position is behind the point. The output should be zero for
+           this case.
+        """
+        color = torch.tensor([1, 0, 1], dtype=torch.float32)
+        direction = torch.tensor(
+            [-1 / np.sqrt(2), 1 / np.sqrt(2), 0], dtype=torch.float32
+        )
+        camera_position = torch.tensor(
+            [+1 / np.sqrt(2), 1 / np.sqrt(2), 0], dtype=torch.float32
+        )
+        points = torch.tensor([0, 0, 0], dtype=torch.float32)
+        normals = torch.tensor([0, 1, 0], dtype=torch.float32)
+        expected_output = torch.tensor([1.0, 0.0, 1.0], dtype=torch.float32)
+        expected_output = expected_output.view(1, 1, 3).repeat(3, 1, 1)
+        lights = DirectionalLights(specular_color=color, direction=direction)
+        output_light = lights.specular(
+            points=points[None, None, :],
+            normals=normals[None, None, :],
+            camera_position=camera_position[None, :],
+            shininess=torch.tensor(10),
+        )
+        self.assertClose(output_light, expected_output)
+
+        # Change camera position to be behind the point.
+        camera_position = torch.tensor(
+            [+1 / np.sqrt(2), -1 / np.sqrt(2), 0], dtype=torch.float32
+        )
+        expected_output = torch.zeros_like(expected_output)
+        output_light = lights.specular(
+            points=points[None, None, :],
+            normals=normals[None, None, :],
+            camera_position=camera_position[None, :],
+            shininess=torch.tensor(10),
+        )
+        self.assertClose(output_light, expected_output)
+
+    def test_specular_point_lights(self):
+        """
+        Replace directional lights with point lights and check the output
+        is the same.
+
+        Test an additional case where the angle between the light reflection
+        direction and the view direction is 30 degrees.
+        """
+        color = torch.tensor([1, 0, 1], dtype=torch.float32)
+        location = torch.tensor([-1, 1, 0], dtype=torch.float32)
+        camera_position = torch.tensor(
+            [+1 / np.sqrt(2), 1 / np.sqrt(2), 0], dtype=torch.float32
+        )
+        points = torch.tensor([0, 0, 0], dtype=torch.float32)
+        normals = torch.tensor([0, 1, 0], dtype=torch.float32)
+        expected_output = torch.tensor([1.0, 0.0, 1.0], dtype=torch.float32)
+        expected_output = expected_output.view(-1, 1, 3)
+        lights = PointLights(specular_color=color[None, :], location=location[None, :])
+        output_light = lights.specular(
+            points=points[None, None, :],
+            normals=normals[None, None, :],
+            camera_position=camera_position[None, :],
+            shininess=torch.tensor(10),
+        )
+        self.assertClose(output_light, expected_output)
+
+        # Change camera position to be behind the point
+        camera_position = torch.tensor(
+            [+1 / np.sqrt(2), -1 / np.sqrt(2), 0], dtype=torch.float32
+        )
+        expected_output = torch.zeros_like(expected_output)
+        output_light = lights.specular(
+            points=points[None, None, :],
+            normals=normals[None, None, :],
+            camera_position=camera_position[None, :],
+            shininess=torch.tensor(10),
+        )
+        self.assertClose(output_light, expected_output)
+
+        # Change camera direction to be 30 degrees from the reflection direction
+        camera_position = torch.tensor(
+            [+1 / np.sqrt(2), 1 / np.sqrt(2), 0], dtype=torch.float32
+        )
+        rotate_30 = RotateAxisAngle(-30, axis="z")
+        camera_position = rotate_30.transform_points(camera_position[None, :])
+        expected_output = torch.tensor(
+            [np.cos(30.0 * np.pi / 180), 0.0, np.cos(30.0 * np.pi / 180)],
+            dtype=torch.float32,
+        )
+        expected_output = expected_output.view(-1, 1, 3)
+        output_light = lights.specular(
+            points=points[None, None, :],
+            normals=normals[None, None, :],
+            camera_position=camera_position[None, :],
+            shininess=torch.tensor(10),
+        )
+        self.assertClose(output_light, expected_output**10)
+
+    def test_specular_batched(self):
+        batch_size = 10
+        color = torch.tensor([1, 0, 1], dtype=torch.float32)
+        direction = torch.tensor(
+            [-1 / np.sqrt(2), 1 / np.sqrt(2), 0], dtype=torch.float32
+        )
+        camera_position = torch.tensor(
+            [+1 / np.sqrt(2), 1 / np.sqrt(2), 0], dtype=torch.float32
+        )
+        points = torch.tensor([0, 0, 0], dtype=torch.float32)
+        normals = torch.tensor([0, 1, 0], dtype=torch.float32)
+        expected_out = torch.tensor([1.0, 0.0, 1.0], dtype=torch.float32)
+
+        # Reshape
+        direction = direction.view(1, 3).expand(batch_size, -1)
+        camera_position = camera_position.view(1, 3).expand(batch_size, -1)
+        normals = normals.view(1, 1, 3).expand(batch_size, -1, -1)
+        points = points.view(1, 1, 3).expand(batch_size, -1, -1)
+        color = color.view(1, 3).expand(batch_size, -1)
+        expected_out = expected_out.view(1, 1, 3).expand(batch_size, 1, 3)
+
+        lights = DirectionalLights(specular_color=color, direction=direction)
+        output_light = lights.specular(
+            points=points,
+            normals=normals,
+            camera_position=camera_position,
+            shininess=torch.tensor(10),
+        )
+        self.assertClose(output_light, expected_out)
+
+    def test_specular_batched_broadcast_inputs(self):
+        batch_size = 10
+        color = torch.tensor([1, 0, 1], dtype=torch.float32)
+        direction = torch.tensor(
+            [-1 / np.sqrt(2), 1 / np.sqrt(2), 0], dtype=torch.float32
+        )
+        camera_position = torch.tensor(
+            [+1 / np.sqrt(2), 1 / np.sqrt(2), 0], dtype=torch.float32
+        )
+        points = torch.tensor([0, 0, 0], dtype=torch.float32)
+        normals = torch.tensor([0, 1, 0], dtype=torch.float32)
+        expected_out = torch.tensor([1.0, 0.0, 1.0], dtype=torch.float32)
+
+        # Reshape
+        normals = normals.view(1, 1, 3).expand(batch_size, -1, -1)
+        points = points.view(1, 1, 3).expand(batch_size, -1, -1)
+        expected_out = expected_out.view(1, 1, 3).expand(batch_size, 1, 3)
+
+        # Don't expand the direction, color or camera_position.
+        # These should be broadcasted in the specular function
+        direction = direction.view(1, 3)
+        camera_position = camera_position.view(1, 3)
+        color = color.view(1, 3)
+
+        lights = DirectionalLights(specular_color=color, direction=direction)
+        output_light = lights.specular(
+            points=points,
+            normals=normals,
+            camera_position=camera_position,
+            shininess=torch.tensor(10),
+        )
+        self.assertClose(output_light, expected_out)
+
+    def test_specular_batched_arbitrary_input_dims(self):
+        """
+        Test with a batch of inputs where shape of the input is mimicking the
+        shape expected after rasterization i.e. a normal per pixel for
+        top K faces per pixel.
+        """
+        device = torch.device("cuda:0")
+        N, H, W, K = 8, 128, 128, 100
+        color = torch.tensor([1, 0, 1], dtype=torch.float32, device=device)
+        direction = torch.tensor(
+            [-1 / np.sqrt(2), 1 / np.sqrt(2), 0], dtype=torch.float32
+        )
+        camera_position = torch.tensor(
+            [+1 / np.sqrt(2), 1 / np.sqrt(2), 0], dtype=torch.float32
+        )
+        points = torch.tensor([0, 0, 0], dtype=torch.float32, device=device)
+        normals = torch.tensor([0, 1, 0], dtype=torch.float32, device=device)
+        points = points.view(1, 1, 1, 1, 3).expand(N, H, W, K, 3)
+        normals = normals.view(1, 1, 1, 1, 3).expand(N, H, W, K, 3)
+
+        direction = direction.view(1, 3)
+        color = color.view(1, 3)
+        camera_position = camera_position.view(1, 3)
+
+        expected_output = torch.tensor(
+            [1.0, 0.0, 1.0], dtype=torch.float32, device=device
+        )
+        expected_output = expected_output.view(-1, 1, 1, 1, 3)
+        expected_output = expected_output.expand(N, H, W, K, -1)
+
+        lights = DirectionalLights(specular_color=color, direction=direction)
+        output_light = lights.specular(
+            points=points,
+            normals=normals,
+            camera_position=camera_position,
+            shininess=10.0,
+        )
+        self.assertClose(output_light, expected_output)
+
+    def test_specular_batched_packed(self):
+        """
+        Test with a batch of 2 meshes each of which has faces on a single plane.
+        The points and normals are in the packed format i.e. no batch dimension.
+        """
+        faces_per_mesh = [6, 4]
+        mesh_to_vert_idx = [0] * faces_per_mesh[0] + [1] * faces_per_mesh[1]
+        mesh_to_vert_idx = torch.tensor(mesh_to_vert_idx, dtype=torch.int64)
+        color = torch.tensor([[1, 1, 1], [1, 0, 1]], dtype=torch.float32)
+        direction = torch.tensor(
+            [[-1 / np.sqrt(2), 1 / np.sqrt(2), 0], [-1, 1, 0]], dtype=torch.float32
+        )
+        camera_position = torch.tensor(
+            [
+                [+1 / np.sqrt(2), 1 / np.sqrt(2), 0],
+                [+1 / np.sqrt(2), -1 / np.sqrt(2), 0],
+            ],
+            dtype=torch.float32,
+        )
+        points = torch.tensor([[0, 0, 0]], dtype=torch.float32)
+        normals = torch.tensor([[0, 1, 0], [0, 1, 0]], dtype=torch.float32)
+        expected_output = torch.zeros((10, 3), dtype=torch.float32)
+        expected_output[:6, :] += 1.0
+
+        lights = DirectionalLights(
+            specular_color=color[mesh_to_vert_idx, :],
+            direction=direction[mesh_to_vert_idx, :],
+        )
+        output_light = lights.specular(
+            points=points.view(-1, 3).expand(10, -1),
+            normals=normals.view(-1, 3)[mesh_to_vert_idx, :],
+            camera_position=camera_position[mesh_to_vert_idx, :],
+            shininess=10.0,
+        )
+        self.assertClose(output_light, expected_output)
diff --git a/pytorch3d/tests/test_marching_cubes.py b/pytorch3d/tests/test_marching_cubes.py
new file mode 100644
index 0000000000000000000000000000000000000000..116a18cb52f591e7972a9861c7f85e3e97a06d33
--- /dev/null
+++ b/pytorch3d/tests/test_marching_cubes.py
@@ -0,0 +1,963 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import os
+import pickle
+import unittest
+
+import torch
+from pytorch3d.ops.marching_cubes import marching_cubes, marching_cubes_naive
+
+from .common_testing import get_tests_dir, TestCaseMixin
+
+
+USE_SCIKIT = False
+DATA_DIR = get_tests_dir() / "data"
+
+
+def convert_to_local(verts, volume_dim):
+    return (2 * verts) / (volume_dim - 1) - 1
+
+
+class TestCubeConfiguration(TestCaseMixin, unittest.TestCase):
+
+    # Test single cubes. Each case corresponds to the corresponding
+    # cube vertex configuration in each case here (0-indexed):
+    # https://en.wikipedia.org/wiki/Marching_cubes#/media/File:MarchingCubes.svg
+
+    def test_empty_volume(self):  # case 0
+        volume_data = torch.ones(1, 2, 2, 2)  # (B, W, H, D)
+        verts, faces = marching_cubes_naive(volume_data, return_local_coords=False)
+
+        expected_verts = torch.tensor([[]])
+        expected_faces = torch.tensor([[]], dtype=torch.int64)
+        self.assertClose(verts, expected_verts)
+        self.assertClose(faces, expected_faces)
+
+        verts, faces = marching_cubes(volume_data, return_local_coords=False)
+        self.assertClose(verts, expected_verts)
+        self.assertClose(faces, expected_faces)
+
+    def test_case1(self):  # case 1
+        volume_data = torch.ones(1, 2, 2, 2)  # (B, W, H, D)
+        volume_data[0, 0, 0, 0] = 0
+        volume_data = volume_data.permute(0, 3, 2, 1)  # (B, D, H, W)
+        expected_verts = torch.tensor(
+            [
+                [0.5, 0, 0],
+                [0, 0.5, 0],
+                [0, 0, 0.5],
+            ]
+        )
+        expected_faces = torch.tensor([[0, 1, 2]])
+
+        verts, faces = marching_cubes_naive(volume_data, return_local_coords=False)
+        self.assertClose(verts[0], expected_verts)
+        self.assertClose(faces[0], expected_faces)
+
+        verts, faces = marching_cubes(volume_data, return_local_coords=False)
+        self.assertClose(verts[0], expected_verts)
+        self.assertClose(faces[0], expected_faces)
+
+        expected_verts = convert_to_local(expected_verts, 2)
+        verts, faces = marching_cubes_naive(volume_data, return_local_coords=True)
+        self.assertClose(verts[0], expected_verts)
+        self.assertClose(faces[0], expected_faces)
+
+        verts, faces = marching_cubes(volume_data, return_local_coords=True)
+        self.assertClose(verts[0], expected_verts)
+        self.assertClose(faces[0], expected_faces)
+
+    def test_case2(self):
+        volume_data = torch.ones(1, 2, 2, 2)  # (B, W, H, D)
+        volume_data[0, 0:2, 0, 0] = 0
+        volume_data = volume_data.permute(0, 3, 2, 1)  # (B, D, H, W)
+        verts, faces = marching_cubes_naive(volume_data, return_local_coords=False)
+
+        expected_verts = torch.tensor(
+            [
+                [1.0000, 0.0000, 0.5000],
+                [0.0000, 0.5000, 0.0000],
+                [0.0000, 0.0000, 0.5000],
+                [1.0000, 0.5000, 0.0000],
+            ]
+        )
+        expected_faces = torch.tensor([[0, 1, 2], [3, 1, 0]])
+        self.assertClose(verts[0], expected_verts)
+        self.assertClose(faces[0], expected_faces)
+
+        verts, faces = marching_cubes(volume_data, return_local_coords=False)
+        self.assertClose(verts[0], expected_verts)
+        self.assertClose(faces[0], expected_faces)
+
+        verts, faces = marching_cubes_naive(volume_data, return_local_coords=True)
+        expected_verts = convert_to_local(expected_verts, 2)
+        self.assertClose(verts[0], expected_verts)
+        self.assertClose(faces[0], expected_faces)
+
+        verts, faces = marching_cubes(volume_data, return_local_coords=True)
+        self.assertClose(verts[0], expected_verts)
+        self.assertClose(faces[0], expected_faces)
+
+    def test_case3(self):
+        volume_data = torch.ones(1, 2, 2, 2)  # (B, W, H, D)
+        volume_data[0, 0, 0, 0] = 0
+        volume_data[0, 1, 1, 0] = 0
+        volume_data = volume_data.permute(0, 3, 2, 1)  # (B, D, H, W)
+        verts, faces = marching_cubes_naive(volume_data, return_local_coords=False)
+
+        expected_verts = torch.tensor(
+            [
+                [1.0000, 0.5000, 0.0000],
+                [1.0000, 1.0000, 0.5000],
+                [0.5000, 1.0000, 0.0000],
+                [0.5000, 0.0000, 0.0000],
+                [0.0000, 0.5000, 0.0000],
+                [0.0000, 0.0000, 0.5000],
+            ]
+        )
+        expected_faces = torch.tensor([[0, 1, 2], [3, 4, 5]])
+        self.assertClose(verts[0], expected_verts)
+        self.assertClose(faces[0], expected_faces)
+
+        verts, faces = marching_cubes(volume_data, return_local_coords=False)
+        self.assertClose(verts[0], expected_verts)
+        self.assertClose(faces[0], expected_faces)
+
+        verts, faces = marching_cubes_naive(volume_data, return_local_coords=True)
+        expected_verts = convert_to_local(expected_verts, 2)
+        self.assertClose(verts[0], expected_verts)
+        self.assertClose(faces[0], expected_faces)
+
+        verts, faces = marching_cubes(volume_data, return_local_coords=True)
+        self.assertClose(verts[0], expected_verts)
+        self.assertClose(faces[0], expected_faces)
+
+    def test_case4(self):
+        volume_data = torch.ones(1, 2, 2, 2)  # (B, W, H, D)
+        volume_data[0, 1, 0, 0] = 0
+        volume_data[0, 1, 0, 1] = 0
+        volume_data[0, 0, 0, 1] = 0
+        volume_data = volume_data.permute(0, 3, 2, 1)  # (B, D, H, W)
+        verts, faces = marching_cubes_naive(volume_data, return_local_coords=False)
+
+        expected_verts = torch.tensor(
+            [
+                [0.0000, 0.0000, 0.5000],
+                [1.0000, 0.5000, 0.0000],
+                [0.5000, 0.0000, 0.0000],
+                [0.0000, 0.5000, 1.0000],
+                [1.0000, 0.5000, 1.0000],
+            ]
+        )
+        expected_faces = torch.tensor([[0, 1, 2], [0, 3, 1], [3, 4, 1]])
+        self.assertClose(verts[0], expected_verts)
+        self.assertClose(faces[0], expected_faces)
+
+        verts, faces = marching_cubes(volume_data, return_local_coords=False)
+        self.assertClose(verts[0], expected_verts)
+        self.assertClose(faces[0], expected_faces)
+
+        verts, faces = marching_cubes_naive(volume_data, return_local_coords=True)
+        expected_verts = convert_to_local(expected_verts, 2)
+        self.assertClose(verts[0], expected_verts)
+        self.assertClose(faces[0], expected_faces)
+
+        verts, faces = marching_cubes(volume_data, return_local_coords=True)
+        self.assertClose(verts[0], expected_verts)
+        self.assertClose(faces[0], expected_faces)
+
+    def test_case5(self):
+        volume_data = torch.ones(1, 2, 2, 2)  # (B, W, H, D)
+        volume_data[0, 0:2, 0, 0:2] = 0
+        volume_data = volume_data.permute(0, 3, 2, 1)  # (B, D, H, W)
+        verts, faces = marching_cubes_naive(volume_data, return_local_coords=False)
+
+        expected_verts = torch.tensor(
+            [
+                [1.0000, 0.5000, 0.0000],
+                [0.0000, 0.5000, 0.0000],
+                [1.0000, 0.5000, 1.0000],
+                [0.0000, 0.5000, 1.0000],
+            ]
+        )
+
+        expected_faces = torch.tensor([[0, 1, 2], [2, 1, 3]])
+        self.assertClose(verts[0], expected_verts)
+        self.assertClose(faces[0], expected_faces)
+
+        verts, faces = marching_cubes(volume_data, return_local_coords=False)
+        self.assertClose(verts[0], expected_verts)
+        self.assertClose(faces[0], expected_faces)
+
+        verts, faces = marching_cubes_naive(volume_data, return_local_coords=True)
+        expected_verts = convert_to_local(expected_verts, 2)
+        self.assertClose(verts[0], expected_verts)
+        self.assertClose(faces[0], expected_faces)
+
+        verts, faces = marching_cubes(volume_data, return_local_coords=True)
+        self.assertClose(verts[0], expected_verts)
+        self.assertClose(faces[0], expected_faces)
+
+    def test_case6(self):
+        volume_data = torch.ones(1, 2, 2, 2)  # (B, W, H, D)
+        volume_data[0, 1, 0, 0] = 0
+        volume_data[0, 1, 0, 1] = 0
+        volume_data[0, 0, 0, 1] = 0
+        volume_data[0, 0, 1, 0] = 0
+        volume_data = volume_data.permute(0, 3, 2, 1)  # (B, D, H, W)
+        verts, faces = marching_cubes_naive(volume_data, return_local_coords=False)
+
+        expected_verts = torch.tensor(
+            [
+                [0.5000, 1.0000, 0.0000],
+                [0.0000, 1.0000, 0.5000],
+                [0.0000, 0.5000, 0.0000],
+                [1.0000, 0.5000, 0.0000],
+                [0.5000, 0.0000, 0.0000],
+                [0.0000, 0.5000, 1.0000],
+                [1.0000, 0.5000, 1.0000],
+                [0.0000, 0.0000, 0.5000],
+            ]
+        )
+        expected_faces = torch.tensor([[0, 1, 2], [3, 4, 5], [3, 5, 6], [5, 4, 7]])
+
+        self.assertClose(verts[0], expected_verts)
+        self.assertClose(faces[0], expected_faces)
+
+        verts, faces = marching_cubes(volume_data, return_local_coords=False)
+        self.assertClose(verts[0], expected_verts)
+        self.assertClose(faces[0], expected_faces)
+
+        verts, faces = marching_cubes_naive(volume_data, return_local_coords=True)
+        expected_verts = convert_to_local(expected_verts, 2)
+        self.assertClose(verts[0], expected_verts)
+        self.assertClose(faces[0], expected_faces)
+
+        verts, faces = marching_cubes(volume_data, return_local_coords=True)
+        self.assertClose(verts[0], expected_verts)
+        self.assertClose(faces[0], expected_faces)
+
+    def test_case7(self):
+        volume_data = torch.ones(1, 2, 2, 2)  # (B, W, H, D)
+        volume_data[0, 0, 0, 0] = 0
+        volume_data[0, 1, 0, 1] = 0
+        volume_data[0, 1, 1, 0] = 0
+        volume_data[0, 0, 1, 1] = 0
+        volume_data = volume_data.permute(0, 3, 2, 1)  # (B, D, H, W)
+        verts, faces = marching_cubes_naive(volume_data, return_local_coords=False)
+
+        expected_verts = torch.tensor(
+            [
+                [0.5000, 1.0000, 1.0000],
+                [0.0000, 0.5000, 1.0000],
+                [0.0000, 1.0000, 0.5000],
+                [1.0000, 0.0000, 0.5000],
+                [0.5000, 0.0000, 1.0000],
+                [1.0000, 0.5000, 1.0000],
+                [0.5000, 0.0000, 0.0000],
+                [0.0000, 0.5000, 0.0000],
+                [0.0000, 0.0000, 0.5000],
+                [0.5000, 1.0000, 0.0000],
+                [1.0000, 0.5000, 0.0000],
+                [1.0000, 1.0000, 0.5000],
+            ]
+        )
+
+        expected_faces = torch.tensor([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 10, 11]])
+
+        self.assertClose(verts[0], expected_verts)
+        self.assertClose(faces[0], expected_faces)
+
+        verts, faces = marching_cubes(volume_data, return_local_coords=False)
+        self.assertClose(verts[0], expected_verts)
+        self.assertClose(faces[0], expected_faces)
+
+        verts, faces = marching_cubes_naive(volume_data, return_local_coords=True)
+        expected_verts = convert_to_local(expected_verts, 2)
+        self.assertClose(verts[0], expected_verts)
+        self.assertClose(faces[0], expected_faces)
+
+        verts, faces = marching_cubes(volume_data, return_local_coords=True)
+        self.assertClose(verts[0], expected_verts)
+        self.assertClose(faces[0], expected_faces)
+
+    def test_case8(self):
+        volume_data = torch.ones(1, 2, 2, 2)  # (B, W, H, D)
+        volume_data[0, 0, 0, 0] = 0
+        volume_data[0, 0, 0, 1] = 0
+        volume_data[0, 1, 0, 1] = 0
+        volume_data[0, 0, 1, 1] = 0
+        volume_data = volume_data.permute(0, 3, 2, 1)  # (B, D, H, W)
+        verts, faces = marching_cubes_naive(volume_data, return_local_coords=False)
+
+        expected_verts = torch.tensor(
+            [
+                [1.0000, 0.5000, 1.0000],
+                [0.0000, 1.0000, 0.5000],
+                [0.5000, 1.0000, 1.0000],
+                [1.0000, 0.0000, 0.5000],
+                [0.0000, 0.5000, 0.0000],
+                [0.5000, 0.0000, 0.0000],
+            ]
+        )
+        expected_faces = torch.tensor([[0, 1, 2], [3, 1, 0], [3, 4, 1], [3, 5, 4]])
+
+        self.assertClose(verts[0], expected_verts)
+        self.assertClose(faces[0], expected_faces)
+
+        verts, faces = marching_cubes(volume_data, return_local_coords=False)
+        self.assertClose(verts[0], expected_verts)
+        self.assertClose(faces[0], expected_faces)
+
+        verts, faces = marching_cubes_naive(volume_data, return_local_coords=True)
+        expected_verts = convert_to_local(expected_verts, 2)
+        self.assertClose(verts[0], expected_verts)
+        self.assertClose(faces[0], expected_faces)
+
+        verts, faces = marching_cubes(volume_data, return_local_coords=True)
+        self.assertClose(verts[0], expected_verts)
+        self.assertClose(faces[0], expected_faces)
+
+    def test_case9(self):
+        volume_data = torch.ones(1, 2, 2, 2)  # (B, W, H, D)
+        volume_data[0, 1, 0, 0] = 0
+        volume_data[0, 0, 0, 1] = 0
+        volume_data[0, 1, 0, 1] = 0
+        volume_data[0, 0, 1, 1] = 0
+        volume_data = volume_data.permute(0, 3, 2, 1)  # (B, D, H, W)
+        verts, faces = marching_cubes_naive(volume_data, return_local_coords=False)
+
+        expected_verts = torch.tensor(
+            [
+                [0.5000, 0.0000, 0.0000],
+                [0.0000, 0.0000, 0.5000],
+                [0.0000, 1.0000, 0.5000],
+                [1.0000, 0.5000, 1.0000],
+                [1.0000, 0.5000, 0.0000],
+                [0.5000, 1.0000, 1.0000],
+            ]
+        )
+        expected_faces = torch.tensor([[0, 1, 2], [0, 2, 3], [0, 3, 4], [5, 3, 2]])
+
+        self.assertClose(verts[0], expected_verts)
+        self.assertClose(faces[0], expected_faces)
+
+        verts, faces = marching_cubes(volume_data, return_local_coords=False)
+        self.assertClose(verts[0], expected_verts)
+        self.assertClose(faces[0], expected_faces)
+
+        verts, faces = marching_cubes_naive(volume_data, return_local_coords=True)
+        expected_verts = convert_to_local(expected_verts, 2)
+        self.assertClose(verts[0], expected_verts)
+        self.assertClose(faces[0], expected_faces)
+
+        verts, faces = marching_cubes(volume_data, return_local_coords=True)
+        self.assertClose(verts[0], expected_verts)
+        self.assertClose(faces[0], expected_faces)
+
+    def test_case10(self):
+        volume_data = torch.ones(1, 2, 2, 2)  # (B, W, H, D)
+        volume_data[0, 0, 0, 0] = 0
+        volume_data[0, 1, 1, 1] = 0
+        volume_data = volume_data.permute(0, 3, 2, 1)  # (B, D, H, W)
+        verts, faces = marching_cubes_naive(volume_data, return_local_coords=False)
+
+        expected_verts = torch.tensor(
+            [
+                [0.5000, 0.0000, 0.0000],
+                [0.0000, 0.5000, 0.0000],
+                [0.0000, 0.0000, 0.5000],
+                [1.0000, 1.0000, 0.5000],
+                [1.0000, 0.5000, 1.0000],
+                [0.5000, 1.0000, 1.0000],
+            ]
+        )
+
+        expected_faces = torch.tensor([[0, 1, 2], [3, 4, 5]])
+
+        self.assertClose(verts[0], expected_verts)
+        self.assertClose(faces[0], expected_faces)
+
+        verts, faces = marching_cubes(volume_data, return_local_coords=False)
+        self.assertClose(verts[0], expected_verts)
+        self.assertClose(faces[0], expected_faces)
+
+        verts, faces = marching_cubes_naive(volume_data, return_local_coords=True)
+        expected_verts = convert_to_local(expected_verts, 2)
+        self.assertClose(verts[0], expected_verts)
+        self.assertClose(faces[0], expected_faces)
+
+        verts, faces = marching_cubes(volume_data, return_local_coords=True)
+        self.assertClose(verts[0], expected_verts)
+        self.assertClose(faces[0], expected_faces)
+
+    def test_case11(self):
+        volume_data = torch.ones(1, 2, 2, 2)  # (B, W, H, D)
+        volume_data[0, 0, 0, 0] = 0
+        volume_data[0, 1, 0, 0] = 0
+        volume_data[0, 1, 1, 1] = 0
+        volume_data = volume_data.permute(0, 3, 2, 1)  # (B, D, H, W)
+        verts, faces = marching_cubes_naive(volume_data, return_local_coords=False)
+
+        expected_verts = torch.tensor(
+            [
+                [1.0000, 0.0000, 0.5000],
+                [0.0000, 0.5000, 0.0000],
+                [0.0000, 0.0000, 0.5000],
+                [1.0000, 0.5000, 0.0000],
+                [1.0000, 1.0000, 0.5000],
+                [1.0000, 0.5000, 1.0000],
+                [0.5000, 1.0000, 1.0000],
+            ]
+        )
+
+        expected_faces = torch.tensor([[0, 1, 2], [0, 3, 1], [4, 5, 6]])
+
+        self.assertClose(verts[0], expected_verts)
+        self.assertClose(faces[0], expected_faces)
+
+        verts, faces = marching_cubes(volume_data, return_local_coords=False)
+        self.assertClose(verts[0], expected_verts)
+        self.assertClose(faces[0], expected_faces)
+
+        verts, faces = marching_cubes_naive(volume_data, return_local_coords=True)
+        expected_verts = convert_to_local(expected_verts, 2)
+        self.assertClose(verts[0], expected_verts)
+        self.assertClose(faces[0], expected_faces)
+
+        verts, faces = marching_cubes(volume_data, return_local_coords=True)
+        self.assertClose(verts[0], expected_verts)
+        self.assertClose(faces[0], expected_faces)
+
+    def test_case12(self):
+        volume_data = torch.ones(1, 2, 2, 2)  # (B, W, H, D)
+        volume_data[0, 1, 0, 0] = 0
+        volume_data[0, 0, 1, 0] = 0
+        volume_data[0, 1, 1, 1] = 0
+        volume_data = volume_data.permute(0, 3, 2, 1)  # (B, D, H, W)
+        verts, faces = marching_cubes_naive(volume_data, return_local_coords=False)
+
+        expected_verts = torch.tensor(
+            [
+                [1.0000, 0.0000, 0.5000],
+                [1.0000, 0.5000, 0.0000],
+                [0.5000, 0.0000, 0.0000],
+                [1.0000, 1.0000, 0.5000],
+                [1.0000, 0.5000, 1.0000],
+                [0.5000, 1.0000, 1.0000],
+                [0.0000, 0.5000, 0.0000],
+                [0.5000, 1.0000, 0.0000],
+                [0.0000, 1.0000, 0.5000],
+            ]
+        )
+
+        expected_faces = torch.tensor([[0, 1, 2], [3, 4, 5], [6, 7, 8]])
+
+        self.assertClose(verts[0], expected_verts)
+        self.assertClose(faces[0], expected_faces)
+
+        verts, faces = marching_cubes(volume_data, return_local_coords=False)
+        self.assertClose(verts[0], expected_verts)
+        self.assertClose(faces[0], expected_faces)
+
+        verts, faces = marching_cubes_naive(volume_data, return_local_coords=True)
+        expected_verts = convert_to_local(expected_verts, 2)
+        self.assertClose(verts[0], expected_verts)
+        self.assertClose(faces[0], expected_faces)
+
+        verts, faces = marching_cubes(volume_data, return_local_coords=True)
+        self.assertClose(verts[0], expected_verts)
+        self.assertClose(faces[0], expected_faces)
+
+    def test_case13(self):
+        volume_data = torch.ones(1, 2, 2, 2)  # (B, W, H, D)
+        volume_data[0, 0, 0, 0] = 0
+        volume_data[0, 0, 1, 0] = 0
+        volume_data[0, 1, 0, 1] = 0
+        volume_data[0, 1, 1, 1] = 0
+        volume_data = volume_data.permute(0, 3, 2, 1)  # (B, D, H, W)
+        verts, faces = marching_cubes_naive(volume_data, return_local_coords=False)
+
+        expected_verts = torch.tensor(
+            [
+                [1.0000, 0.0000, 0.5000],
+                [0.5000, 0.0000, 1.0000],
+                [1.0000, 1.0000, 0.5000],
+                [0.5000, 1.0000, 1.0000],
+                [0.0000, 0.0000, 0.5000],
+                [0.5000, 0.0000, 0.0000],
+                [0.5000, 1.0000, 0.0000],
+                [0.0000, 1.0000, 0.5000],
+            ]
+        )
+
+        expected_faces = torch.tensor([[0, 1, 2], [2, 1, 3], [4, 5, 6], [4, 6, 7]])
+
+        self.assertClose(verts[0], expected_verts)
+        self.assertClose(faces[0], expected_faces)
+
+        verts, faces = marching_cubes(volume_data, return_local_coords=False)
+        self.assertClose(verts[0], expected_verts)
+        self.assertClose(faces[0], expected_faces)
+
+        verts, faces = marching_cubes_naive(volume_data, return_local_coords=True)
+        expected_verts = convert_to_local(expected_verts, 2)
+        self.assertClose(verts[0], expected_verts)
+        self.assertClose(faces[0], expected_faces)
+
+        verts, faces = marching_cubes(volume_data, return_local_coords=True)
+        self.assertClose(verts[0], expected_verts)
+        self.assertClose(faces[0], expected_faces)
+
+    def test_case14(self):
+        volume_data = torch.ones(1, 2, 2, 2)  # (B, W, H, D)
+        volume_data[0, 0, 0, 0] = 0
+        volume_data[0, 0, 0, 1] = 0
+        volume_data[0, 1, 0, 1] = 0
+        volume_data[0, 1, 1, 1] = 0
+        volume_data = volume_data.permute(0, 3, 2, 1)  # (B, D, H, W)
+        verts, faces = marching_cubes_naive(volume_data, return_local_coords=False)
+
+        expected_verts = torch.tensor(
+            [
+                [0.5000, 0.0000, 0.0000],
+                [0.0000, 0.5000, 0.0000],
+                [0.0000, 0.5000, 1.0000],
+                [1.0000, 1.0000, 0.5000],
+                [1.0000, 0.0000, 0.5000],
+                [0.5000, 1.0000, 1.0000],
+            ]
+        )
+
+        expected_faces = torch.tensor([[0, 1, 2], [0, 2, 3], [0, 3, 4], [3, 2, 5]])
+
+        self.assertClose(verts[0], expected_verts)
+        self.assertClose(faces[0], expected_faces)
+
+        verts, faces = marching_cubes(volume_data, return_local_coords=False)
+        self.assertClose(verts[0], expected_verts)
+        self.assertClose(faces[0], expected_faces)
+
+        verts, faces = marching_cubes_naive(volume_data, return_local_coords=True)
+        expected_verts = convert_to_local(expected_verts, 2)
+        self.assertClose(verts[0], expected_verts)
+        self.assertClose(faces[0], expected_faces)
+
+        verts, faces = marching_cubes(volume_data, return_local_coords=True)
+        self.assertClose(verts[0], expected_verts)
+        self.assertClose(faces[0], expected_faces)
+
+
+class TestMarchingCubes(TestCaseMixin, unittest.TestCase):
+    def test_single_point(self):
+        volume_data = torch.zeros(1, 3, 3, 3)  # (B, W, H, D)
+        volume_data[0, 1, 1, 1] = 1
+        volume_data = volume_data.permute(0, 3, 2, 1)  # (B, D, H, W)
+        verts, faces = marching_cubes_naive(volume_data, return_local_coords=False)
+
+        expected_verts = torch.tensor(
+            [
+                [1.0000, 0.5000, 1.0000],
+                [1.0000, 1.0000, 0.5000],
+                [0.5000, 1.0000, 1.0000],
+                [1.5000, 1.0000, 1.0000],
+                [1.0000, 1.5000, 1.0000],
+                [1.0000, 1.0000, 1.5000],
+            ]
+        )
+        expected_faces = torch.tensor(
+            [
+                [0, 1, 2],
+                [1, 0, 3],
+                [1, 4, 2],
+                [1, 3, 4],
+                [0, 2, 5],
+                [3, 0, 5],
+                [2, 4, 5],
+                [3, 5, 4],
+            ]
+        )
+        self.assertClose(verts[0], expected_verts)
+        self.assertClose(faces[0], expected_faces)
+
+        verts, faces = marching_cubes(volume_data, return_local_coords=False)
+        self.assertClose(verts[0], expected_verts)
+        self.assertClose(faces[0], expected_faces)
+
+        verts, faces = marching_cubes_naive(volume_data, return_local_coords=True)
+        expected_verts = convert_to_local(expected_verts, 3)
+        self.assertClose(verts[0], expected_verts)
+        self.assertClose(faces[0], expected_faces)
+        self.assertTrue(verts[0].ge(-1).all() and verts[0].le(1).all())
+
+        verts, faces = marching_cubes(volume_data, return_local_coords=True)
+        self.assertClose(verts[0], expected_verts)
+        self.assertClose(faces[0], expected_faces)
+        self.assertTrue(verts[0].ge(-1).all() and verts[0].le(1).all())
+
+    def test_cube(self):
+        volume_data = torch.zeros(1, 5, 5, 5)  # (B, W, H, D)
+        volume_data[0, 1, 1, 1] = 1
+        volume_data[0, 1, 1, 2] = 1
+        volume_data[0, 2, 1, 1] = 1
+        volume_data[0, 2, 1, 2] = 1
+        volume_data[0, 1, 2, 1] = 1
+        volume_data[0, 1, 2, 2] = 1
+        volume_data[0, 2, 2, 1] = 1
+        volume_data[0, 2, 2, 2] = 1
+        volume_data = volume_data.permute(0, 3, 2, 1)  # (B, D, H, W)
+        expected_verts = torch.tensor(
+            [
+                [1.0000, 0.9000, 1.0000],
+                [1.0000, 1.0000, 0.9000],
+                [0.9000, 1.0000, 1.0000],
+                [2.0000, 0.9000, 1.0000],
+                [2.0000, 1.0000, 0.9000],
+                [2.1000, 1.0000, 1.0000],
+                [1.0000, 2.0000, 0.9000],
+                [0.9000, 2.0000, 1.0000],
+                [2.0000, 2.0000, 0.9000],
+                [2.1000, 2.0000, 1.0000],
+                [1.0000, 2.1000, 1.0000],
+                [2.0000, 2.1000, 1.0000],
+                [1.0000, 0.9000, 2.0000],
+                [0.9000, 1.0000, 2.0000],
+                [2.0000, 0.9000, 2.0000],
+                [2.1000, 1.0000, 2.0000],
+                [0.9000, 2.0000, 2.0000],
+                [2.1000, 2.0000, 2.0000],
+                [1.0000, 2.1000, 2.0000],
+                [2.0000, 2.1000, 2.0000],
+                [1.0000, 1.0000, 2.1000],
+                [2.0000, 1.0000, 2.1000],
+                [1.0000, 2.0000, 2.1000],
+                [2.0000, 2.0000, 2.1000],
+            ]
+        )
+
+        expected_faces = torch.tensor(
+            [
+                [0, 1, 2],
+                [0, 3, 4],
+                [1, 0, 4],
+                [4, 3, 5],
+                [1, 6, 7],
+                [2, 1, 7],
+                [4, 8, 1],
+                [1, 8, 6],
+                [8, 4, 5],
+                [9, 8, 5],
+                [6, 10, 7],
+                [6, 8, 11],
+                [10, 6, 11],
+                [8, 9, 11],
+                [12, 0, 2],
+                [13, 12, 2],
+                [3, 0, 14],
+                [14, 0, 12],
+                [15, 5, 3],
+                [14, 15, 3],
+                [2, 7, 13],
+                [7, 16, 13],
+                [5, 15, 9],
+                [9, 15, 17],
+                [10, 18, 16],
+                [7, 10, 16],
+                [11, 19, 10],
+                [19, 18, 10],
+                [9, 17, 19],
+                [11, 9, 19],
+                [12, 13, 20],
+                [14, 12, 20],
+                [21, 14, 20],
+                [15, 14, 21],
+                [13, 16, 22],
+                [20, 13, 22],
+                [21, 20, 23],
+                [20, 22, 23],
+                [17, 15, 21],
+                [23, 17, 21],
+                [16, 18, 22],
+                [23, 22, 18],
+                [19, 23, 18],
+                [17, 23, 19],
+            ]
+        )
+        verts, faces = marching_cubes_naive(volume_data, 0.9, return_local_coords=False)
+        self.assertClose(verts[0], expected_verts)
+        self.assertClose(faces[0], expected_faces)
+
+        verts, faces = marching_cubes(volume_data, 0.9, return_local_coords=False)
+        verts2, faces2 = marching_cubes(volume_data, 0.9, return_local_coords=False)
+
+        self.assertClose(verts[0], expected_verts)
+        self.assertClose(faces[0], expected_faces)
+
+        verts, faces = marching_cubes_naive(volume_data, 0.9, return_local_coords=True)
+        expected_verts = convert_to_local(expected_verts, 5)
+        self.assertClose(verts[0], expected_verts)
+        self.assertClose(faces[0], expected_faces)
+
+        # Check all values are in the range [-1, 1]
+        self.assertTrue(verts[0].ge(-1).all() and verts[0].le(1).all())
+
+        verts, faces = marching_cubes(volume_data, 0.9, return_local_coords=True)
+        self.assertClose(verts[0], expected_verts)
+        self.assertClose(faces[0], expected_faces)
+        self.assertTrue(verts[0].ge(-1).all() and verts[0].le(1).all())
+
+    def test_cube_no_duplicate_verts(self):
+        volume_data = torch.zeros(1, 5, 5, 5)  # (B, W, H, D)
+        volume_data[0, 1, 1, 1] = 1
+        volume_data[0, 1, 1, 2] = 1
+        volume_data[0, 2, 1, 1] = 1
+        volume_data[0, 2, 1, 2] = 1
+        volume_data[0, 1, 2, 1] = 1
+        volume_data[0, 1, 2, 2] = 1
+        volume_data[0, 2, 2, 1] = 1
+        volume_data[0, 2, 2, 2] = 1
+        volume_data = volume_data.permute(0, 3, 2, 1)  # (B, D, H, W)
+        verts, faces = marching_cubes_naive(volume_data, 1, return_local_coords=False)
+
+        expected_verts = torch.tensor(
+            [
+                [2.0, 1.0, 1.0],
+                [2.0, 2.0, 1.0],
+                [1.0, 1.0, 1.0],
+                [1.0, 2.0, 1.0],
+                [2.0, 1.0, 1.0],
+                [1.0, 1.0, 1.0],
+                [2.0, 1.0, 2.0],
+                [1.0, 1.0, 2.0],
+                [1.0, 1.0, 1.0],
+                [1.0, 2.0, 1.0],
+                [1.0, 1.0, 2.0],
+                [1.0, 2.0, 2.0],
+                [2.0, 1.0, 1.0],
+                [2.0, 1.0, 2.0],
+                [2.0, 2.0, 1.0],
+                [2.0, 2.0, 2.0],
+                [2.0, 2.0, 1.0],
+                [2.0, 2.0, 2.0],
+                [1.0, 2.0, 1.0],
+                [1.0, 2.0, 2.0],
+                [2.0, 1.0, 2.0],
+                [1.0, 1.0, 2.0],
+                [2.0, 2.0, 2.0],
+                [1.0, 2.0, 2.0],
+            ]
+        )
+
+        expected_faces = torch.tensor(
+            [
+                [0, 1, 2],
+                [2, 1, 3],
+                [4, 5, 6],
+                [6, 5, 7],
+                [8, 9, 10],
+                [9, 11, 10],
+                [12, 13, 14],
+                [14, 13, 15],
+                [16, 17, 18],
+                [17, 19, 18],
+                [20, 21, 22],
+                [21, 23, 22],
+            ]
+        )
+        self.assertClose(verts[0], expected_verts)
+        self.assertClose(faces[0], expected_faces)
+
+        verts, faces = marching_cubes(volume_data, 1, return_local_coords=False)
+        self.assertClose(verts[0], expected_verts)
+        self.assertClose(faces[0], expected_faces)
+
+        verts, faces = marching_cubes_naive(volume_data, 1, return_local_coords=True)
+        expected_verts = convert_to_local(expected_verts, 5)
+        self.assertClose(verts[0], expected_verts)
+        self.assertClose(faces[0], expected_faces)
+        self.assertTrue(verts[0].ge(-1).all() and verts[0].le(1).all())
+
+    def test_sphere(self):
+        # (B, W, H, D)
+        volume = torch.Tensor(
+            [
+                [
+                    [(x - 10) ** 2 + (y - 10) ** 2 + (z - 10) ** 2 for z in range(20)]
+                    for y in range(20)
+                ]
+                for x in range(20)
+            ]
+        ).unsqueeze(0)
+        volume = volume.permute(0, 3, 2, 1)  # (B, D, H, W)
+        verts, faces = marching_cubes_naive(
+            volume, isolevel=64, return_local_coords=False
+        )
+
+        data_filename = "test_marching_cubes_data/sphere_level64.pickle"
+        filename = os.path.join(DATA_DIR, data_filename)
+        with open(filename, "rb") as file:
+            verts_and_faces = pickle.load(file)
+        expected_verts = verts_and_faces["verts"]
+        expected_faces = verts_and_faces["faces"]
+
+        self.assertClose(verts[0], expected_verts)
+        self.assertClose(faces[0], expected_faces)
+
+        verts, faces = marching_cubes(volume, 64, return_local_coords=False)
+        self.assertClose(verts[0], expected_verts)
+        self.assertClose(faces[0], expected_faces)
+
+        verts, faces = marching_cubes_naive(
+            volume, isolevel=64, return_local_coords=True
+        )
+
+        expected_verts = convert_to_local(expected_verts, 20)
+        self.assertClose(verts[0], expected_verts)
+        self.assertClose(faces[0], expected_faces)
+
+        # Check all values are in the range [-1, 1]
+        self.assertTrue(verts[0].ge(-1).all() and verts[0].le(1).all())
+
+        verts, faces = marching_cubes(volume, 64, return_local_coords=True)
+        self.assertClose(verts[0], expected_verts)
+        self.assertClose(faces[0], expected_faces)
+        self.assertTrue(verts[0].ge(-1).all() and verts[0].le(1).all())
+
+    # Uses skimage.draw.ellipsoid
+    def test_double_ellipsoid(self):
+        if USE_SCIKIT:
+            import numpy as np
+            from skimage.draw import ellipsoid
+
+            ellip_base = ellipsoid(6, 10, 16, levelset=True)
+            ellip_double = np.concatenate(
+                (ellip_base[:-1, ...], ellip_base[2:, ...]), axis=0
+            )
+            volume = torch.Tensor(ellip_double).unsqueeze(0)
+            volume = volume.permute(0, 3, 2, 1)  # (B, D, H, W)
+            verts, faces = marching_cubes_naive(volume, isolevel=0.001)
+            verts2, faces2 = marching_cubes(volume, isolevel=0.001)
+
+            data_filename = "test_marching_cubes_data/double_ellipsoid.pickle"
+            filename = os.path.join(DATA_DIR, data_filename)
+            with open(filename, "rb") as file:
+                verts_and_faces = pickle.load(file)
+            expected_verts = verts_and_faces["verts"]
+            expected_faces = verts_and_faces["faces"]
+
+            self.assertClose(verts[0], expected_verts)
+            self.assertClose(faces[0], expected_faces)
+            self.assertClose(verts2[0], expected_verts)
+            self.assertClose(faces2[0], expected_faces)
+
+    def test_cube_surface_area(self):
+        if USE_SCIKIT:
+            from skimage.measure import marching_cubes_classic, mesh_surface_area
+
+            volume_data = torch.zeros(1, 5, 5, 5)
+            volume_data[0, 1, 1, 1] = 1
+            volume_data[0, 1, 1, 2] = 1
+            volume_data[0, 2, 1, 1] = 1
+            volume_data[0, 2, 1, 2] = 1
+            volume_data[0, 1, 2, 1] = 1
+            volume_data[0, 1, 2, 2] = 1
+            volume_data[0, 2, 2, 1] = 1
+            volume_data[0, 2, 2, 2] = 1
+            volume_data = volume_data.permute(0, 3, 2, 1)  # (B, D, H, W)
+            verts, faces = marching_cubes_naive(volume_data, return_local_coords=False)
+            verts_c, faces_c = marching_cubes(volume_data, return_local_coords=False)
+            verts_sci, faces_sci = marching_cubes_classic(volume_data[0])
+
+            surf = mesh_surface_area(verts[0], faces[0])
+            surf_c = mesh_surface_area(verts_c[0], faces_c[0])
+            surf_sci = mesh_surface_area(verts_sci, faces_sci)
+
+            self.assertClose(surf, surf_sci)
+            self.assertClose(surf, surf_c)
+
+    def test_sphere_surface_area(self):
+        if USE_SCIKIT:
+            from skimage.measure import marching_cubes_classic, mesh_surface_area
+
+            # (B, W, H, D)
+            volume = torch.Tensor(
+                [
+                    [
+                        [
+                            (x - 10) ** 2 + (y - 10) ** 2 + (z - 10) ** 2
+                            for z in range(20)
+                        ]
+                        for y in range(20)
+                    ]
+                    for x in range(20)
+                ]
+            ).unsqueeze(0)
+            volume = volume.permute(0, 3, 2, 1)  # (B, D, H, W)
+            verts, faces = marching_cubes_naive(volume, isolevel=64)
+            verts_c, faces_c = marching_cubes(volume, isolevel=64)
+            verts_sci, faces_sci = marching_cubes_classic(volume[0], level=64)
+
+            surf = mesh_surface_area(verts[0], faces[0])
+            surf_c = mesh_surface_area(verts_c[0], faces_c[0])
+            surf_sci = mesh_surface_area(verts_sci, faces_sci)
+
+            self.assertClose(surf, surf_sci)
+            self.assertClose(surf, surf_c)
+
+    def test_double_ellipsoid_surface_area(self):
+        if USE_SCIKIT:
+            import numpy as np
+            from skimage.draw import ellipsoid
+            from skimage.measure import marching_cubes_classic, mesh_surface_area
+
+            ellip_base = ellipsoid(6, 10, 16, levelset=True)
+            ellip_double = np.concatenate(
+                (ellip_base[:-1, ...], ellip_base[2:, ...]), axis=0
+            )
+            volume = torch.Tensor(ellip_double).unsqueeze(0)
+            volume = volume.permute(0, 3, 2, 1)  # (B, D, H, W)
+            verts, faces = marching_cubes_naive(volume, isolevel=0)
+            verts_c, faces_c = marching_cubes(volume, isolevel=0)
+            verts_sci, faces_sci = marching_cubes_classic(volume[0], level=0)
+
+            surf = mesh_surface_area(verts[0], faces[0])
+            surf_c = mesh_surface_area(verts_c[0], faces_c[0])
+            surf_sci = mesh_surface_area(verts_sci, faces_sci)
+
+            self.assertClose(surf, surf_sci)
+            self.assertClose(surf, surf_c)
+
+    def test_ball_example(self):
+        N = 30
+        axis_tensor = torch.arange(0, N)
+        X, Y, Z = torch.meshgrid(axis_tensor, axis_tensor, axis_tensor, indexing="ij")
+        u = (X - 15) ** 2 + (Y - 15) ** 2 + (Z - 15) ** 2 - 8**2
+        u = u[None].float()
+        verts, faces = marching_cubes_naive(u, 0, return_local_coords=False)
+        verts2, faces2 = marching_cubes(u, 0, return_local_coords=False)
+        self.assertClose(verts2[0], verts[0])
+        self.assertClose(faces2[0], faces[0])
+        verts3, faces3 = marching_cubes(u.cuda(), 0, return_local_coords=False)
+        self.assertEqual(len(verts3), len(verts))
+        self.assertEqual(len(faces3), len(faces))
+
+    @staticmethod
+    def marching_cubes_with_init(algo_type: str, batch_size: int, V: int, device: str):
+        device = torch.device(device)
+        volume_data = torch.rand(
+            (batch_size, V, V, V), dtype=torch.float32, device=device
+        )
+        algo_table = {
+            "naive": marching_cubes_naive,
+            "extension": marching_cubes,
+        }
+
+        def convert():
+            algo_table[algo_type](volume_data, return_local_coords=False)
+            torch.cuda.synchronize()
+
+        return convert
diff --git a/pytorch3d/tests/test_materials.py b/pytorch3d/tests/test_materials.py
new file mode 100644
index 0000000000000000000000000000000000000000..4a37fbebb6c10fdf3c48fcc30ba069f64d2bc63c
--- /dev/null
+++ b/pytorch3d/tests/test_materials.py
@@ -0,0 +1,95 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+
+import torch
+from pytorch3d.renderer.materials import Materials
+
+from .common_testing import TestCaseMixin
+
+
+class TestMaterials(TestCaseMixin, unittest.TestCase):
+    def test_init(self):
+        """
+        Initialize Materials class with the default values.
+        """
+        device = torch.device("cuda:0")
+        mat = Materials(device=device)
+        self.assertTrue(torch.is_tensor(mat.ambient_color))
+        self.assertTrue(torch.is_tensor(mat.diffuse_color))
+        self.assertTrue(torch.is_tensor(mat.specular_color))
+        self.assertTrue(torch.is_tensor(mat.shininess))
+        self.assertTrue(mat.ambient_color.device == device)
+        self.assertTrue(mat.diffuse_color.device == device)
+        self.assertTrue(mat.specular_color.device == device)
+        self.assertTrue(mat.shininess.device == device)
+        self.assertTrue(mat.ambient_color.shape == (1, 3))
+        self.assertTrue(mat.diffuse_color.shape == (1, 3))
+        self.assertTrue(mat.specular_color.shape == (1, 3))
+        self.assertTrue(mat.shininess.shape == (1,))
+
+    def test_materials_clone_to(self):
+        device = torch.device("cuda:0")
+        cpu = torch.device("cpu")
+        mat = Materials()
+        new_mat = mat.clone().to(device)
+        self.assertTrue(mat.ambient_color.device == cpu)
+        self.assertTrue(mat.diffuse_color.device == cpu)
+        self.assertTrue(mat.specular_color.device == cpu)
+        self.assertTrue(mat.shininess.device == cpu)
+        self.assertTrue(new_mat.ambient_color.device == device)
+        self.assertTrue(new_mat.diffuse_color.device == device)
+        self.assertTrue(new_mat.specular_color.device == device)
+        self.assertTrue(new_mat.shininess.device == device)
+        self.assertSeparate(new_mat.ambient_color, mat.ambient_color)
+        self.assertSeparate(new_mat.diffuse_color, mat.diffuse_color)
+        self.assertSeparate(new_mat.specular_color, mat.specular_color)
+        self.assertSeparate(new_mat.shininess, mat.shininess)
+
+    def test_initialize_materials_broadcast(self):
+        materials = Materials(
+            ambient_color=torch.randn(10, 3),
+            diffuse_color=torch.randn(1, 3),
+            specular_color=torch.randn(1, 3),
+            shininess=torch.randn(1),
+        )
+        self.assertTrue(materials.ambient_color.shape == (10, 3))
+        self.assertTrue(materials.diffuse_color.shape == (10, 3))
+        self.assertTrue(materials.specular_color.shape == (10, 3))
+        self.assertTrue(materials.shininess.shape == (10,))
+
+    def test_initialize_materials_broadcast_fail(self):
+        """
+        Batch dims have to be the same or 1.
+        """
+        with self.assertRaises(ValueError):
+            Materials(
+                ambient_color=torch.randn(10, 3), diffuse_color=torch.randn(15, 3)
+            )
+
+    def test_initialize_materials_dimensions_fail(self):
+        """
+        Color should have shape (N, 3) or (1, 3), Shininess should have shape
+        (1), (1, 1), (N) or (N, 1)
+        """
+        with self.assertRaises(ValueError):
+            Materials(ambient_color=torch.randn(10, 4))
+
+        with self.assertRaises(ValueError):
+            Materials(shininess=torch.randn(10, 2))
+
+    def test_initialize_materials_mixed_inputs(self):
+        mat = Materials(ambient_color=torch.randn(1, 3), diffuse_color=((1, 1, 1),))
+        self.assertTrue(mat.ambient_color.shape == (1, 3))
+        self.assertTrue(mat.diffuse_color.shape == (1, 3))
+
+    def test_initialize_materials_mixed_inputs_broadcast(self):
+        mat = Materials(ambient_color=torch.randn(10, 3), diffuse_color=((1, 1, 1),))
+        self.assertTrue(mat.ambient_color.shape == (10, 3))
+        self.assertTrue(mat.diffuse_color.shape == (10, 3))
+        self.assertTrue(mat.specular_color.shape == (10, 3))
+        self.assertTrue(mat.shininess.shape == (10,))
diff --git a/pytorch3d/tests/test_mesh_edge_loss.py b/pytorch3d/tests/test_mesh_edge_loss.py
new file mode 100644
index 0000000000000000000000000000000000000000..c18cad3bc205e95de4b5a720c092e7612ca5b048
--- /dev/null
+++ b/pytorch3d/tests/test_mesh_edge_loss.py
@@ -0,0 +1,107 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+
+import torch
+from pytorch3d.loss import mesh_edge_loss
+from pytorch3d.structures import Meshes
+
+from .common_testing import TestCaseMixin
+from .test_sample_points_from_meshes import init_meshes
+
+
+class TestMeshEdgeLoss(TestCaseMixin, unittest.TestCase):
+    def test_empty_meshes(self):
+        device = torch.device("cuda:0")
+        target_length = 0
+        N = 10
+        V = 32
+        verts_list = []
+        faces_list = []
+        for _ in range(N):
+            vn = torch.randint(3, high=V, size=(1,))[0].item()
+            verts = torch.rand((vn, 3), dtype=torch.float32, device=device)
+            faces = torch.tensor([], dtype=torch.int64, device=device)
+            verts_list.append(verts)
+            faces_list.append(faces)
+        mesh = Meshes(verts=verts_list, faces=faces_list)
+        loss = mesh_edge_loss(mesh, target_length=target_length)
+
+        self.assertClose(loss, torch.tensor([0.0], dtype=torch.float32, device=device))
+        self.assertTrue(loss.requires_grad)
+
+    @staticmethod
+    def mesh_edge_loss_naive(meshes, target_length: float = 0.0):
+        """
+        Naive iterative implementation of mesh loss calculation.
+        """
+        edges_packed = meshes.edges_packed()
+        verts_packed = meshes.verts_packed()
+        edge_to_mesh = meshes.edges_packed_to_mesh_idx()
+        N = len(meshes)
+        device = meshes.device
+        valid = meshes.valid
+        predlosses = torch.zeros((N,), dtype=torch.float32, device=device)
+
+        for b in range(N):
+            if valid[b] == 0:
+                continue
+            mesh_edges = edges_packed[edge_to_mesh == b]
+            verts_edges = verts_packed[mesh_edges]
+            num_edges = mesh_edges.size(0)
+            for e in range(num_edges):
+                v0, v1 = verts_edges[e, 0], verts_edges[e, 1]
+                predlosses[b] += ((v0 - v1).norm(dim=0, p=2) - target_length) ** 2.0
+
+            if num_edges > 0:
+                predlosses[b] = predlosses[b] / num_edges
+
+        return predlosses.mean()
+
+    def test_mesh_edge_loss_output(self):
+        """
+        Check outputs of tensorized and iterative implementations are the same.
+        """
+        device = torch.device("cuda:0")
+        target_length = 0.5
+        num_meshes = 10
+        num_verts = 32
+        num_faces = 64
+
+        verts_list = []
+        faces_list = []
+        valid = torch.randint(2, size=(num_meshes,))
+
+        for n in range(num_meshes):
+            if valid[n]:
+                vn = torch.randint(3, high=num_verts, size=(1,))[0].item()
+                fn = torch.randint(vn, high=num_faces, size=(1,))[0].item()
+                verts = torch.rand((vn, 3), dtype=torch.float32, device=device)
+                faces = torch.randint(
+                    vn, size=(fn, 3), dtype=torch.int64, device=device
+                )
+            else:
+                verts = torch.tensor([], dtype=torch.float32, device=device)
+                faces = torch.tensor([], dtype=torch.int64, device=device)
+            verts_list.append(verts)
+            faces_list.append(faces)
+        meshes = Meshes(verts=verts_list, faces=faces_list)
+        loss = mesh_edge_loss(meshes, target_length=target_length)
+
+        predloss = TestMeshEdgeLoss.mesh_edge_loss_naive(meshes, target_length)
+        self.assertClose(loss, predloss)
+
+    @staticmethod
+    def mesh_edge_loss(num_meshes: int = 10, max_v: int = 100, max_f: int = 300):
+        meshes = init_meshes(num_meshes, max_v, max_f, device="cuda:0")
+        torch.cuda.synchronize()
+
+        def compute_loss():
+            mesh_edge_loss(meshes, target_length=0.0)
+            torch.cuda.synchronize()
+
+        return compute_loss
diff --git a/pytorch3d/tests/test_mesh_filtering.py b/pytorch3d/tests/test_mesh_filtering.py
new file mode 100644
index 0000000000000000000000000000000000000000..897a5f8a3ba4a924213ea34d11eb5a02b4602a5d
--- /dev/null
+++ b/pytorch3d/tests/test_mesh_filtering.py
@@ -0,0 +1,42 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import unittest
+
+import torch
+from pytorch3d.ops import taubin_smoothing
+from pytorch3d.structures import Meshes
+from pytorch3d.utils import ico_sphere
+
+from .common_testing import get_random_cuda_device, TestCaseMixin
+
+
+class TestTaubinSmoothing(TestCaseMixin, unittest.TestCase):
+    def setUp(self) -> None:
+        super().setUp()
+        torch.manual_seed(1)
+
+    def test_taubin(self):
+        N = 3
+        device = get_random_cuda_device()
+
+        mesh = ico_sphere(4, device).extend(N)
+        ico_verts = mesh.verts_padded()
+        ico_faces = mesh.faces_padded()
+
+        rand_noise = torch.rand_like(ico_verts) * 0.2 - 0.1
+        z_mask = (ico_verts[:, :, -1] > 0).view(N, -1, 1)
+        rand_noise = rand_noise * z_mask
+        verts = ico_verts + rand_noise
+        mesh = Meshes(verts=verts, faces=ico_faces)
+
+        smooth_mesh = taubin_smoothing(mesh, num_iter=50)
+        smooth_verts = smooth_mesh.verts_padded()
+
+        smooth_dist = (smooth_verts - ico_verts).norm(dim=-1).mean()
+        dist = (verts - ico_verts).norm(dim=-1).mean()
+        self.assertTrue(smooth_dist < dist)
diff --git a/pytorch3d/tests/test_mesh_laplacian_smoothing.py b/pytorch3d/tests/test_mesh_laplacian_smoothing.py
new file mode 100644
index 0000000000000000000000000000000000000000..91bd7fc595ee069e61c64c15f2e023732b63d858
--- /dev/null
+++ b/pytorch3d/tests/test_mesh_laplacian_smoothing.py
@@ -0,0 +1,202 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import unittest
+
+import torch
+from pytorch3d.loss.mesh_laplacian_smoothing import mesh_laplacian_smoothing
+from pytorch3d.structures.meshes import Meshes
+
+
+class TestLaplacianSmoothing(unittest.TestCase):
+    @staticmethod
+    def laplacian_smoothing_naive_uniform(meshes):
+        """
+        Naive implementation of laplacian smoothing with uniform weights.
+        """
+        verts_packed = meshes.verts_packed()  # (sum(V_n), 3)
+        faces_packed = meshes.faces_packed()  # (sum(F_n), 3)
+        V = verts_packed.shape[0]
+
+        L = torch.zeros((V, V), dtype=torch.float32, device=meshes.device)
+
+        # filling L with the face pairs should be the same as edge pairs
+        for f in faces_packed:
+            L[f[0], f[1]] = 1
+            L[f[0], f[2]] = 1
+            L[f[1], f[2]] = 1
+            # symetric
+            L[f[1], f[0]] = 1
+            L[f[2], f[0]] = 1
+            L[f[2], f[1]] = 1
+
+        norm_w = L.sum(dim=1, keepdims=True)
+        idx = norm_w > 0
+        norm_w[idx] = 1.0 / norm_w[idx]
+
+        loss = (L.mm(verts_packed) * norm_w - verts_packed).norm(dim=1)
+
+        weights = torch.zeros(V, dtype=torch.float32, device=meshes.device)
+        for v in range(V):
+            weights[v] = meshes.num_verts_per_mesh()[
+                meshes.verts_packed_to_mesh_idx()[v]
+            ]
+        weights = 1.0 / weights
+        loss = loss * weights
+
+        return loss.sum() / len(meshes)
+
+    @staticmethod
+    def laplacian_smoothing_naive_cot(meshes, method: str = "cot"):
+        """
+        Naive implementation of laplacian smoothing wit cotangent weights.
+        """
+        verts_packed = meshes.verts_packed()  # (sum(V_n), 3)
+        faces_packed = meshes.faces_packed()  # (sum(F_n), 3)
+        V = verts_packed.shape[0]
+
+        L = torch.zeros((V, V), dtype=torch.float32, device=meshes.device)
+        inv_areas = torch.zeros((V, 1), dtype=torch.float32, device=meshes.device)
+
+        for f in faces_packed:
+            v0 = verts_packed[f[0], :]
+            v1 = verts_packed[f[1], :]
+            v2 = verts_packed[f[2], :]
+            A = (v1 - v2).norm()
+            B = (v0 - v2).norm()
+            C = (v0 - v1).norm()
+            s = 0.5 * (A + B + C)
+
+            face_area = (s * (s - A) * (s - B) * (s - C)).clamp_(min=1e-12).sqrt()
+            inv_areas[f[0]] += face_area
+            inv_areas[f[1]] += face_area
+            inv_areas[f[2]] += face_area
+
+            A2, B2, C2 = A * A, B * B, C * C
+            cota = (B2 + C2 - A2) / face_area / 4.0
+            cotb = (A2 + C2 - B2) / face_area / 4.0
+            cotc = (A2 + B2 - C2) / face_area / 4.0
+
+            L[f[1], f[2]] += cota
+            L[f[2], f[0]] += cotb
+            L[f[0], f[1]] += cotc
+            # symetric
+            L[f[2], f[1]] += cota
+            L[f[0], f[2]] += cotb
+            L[f[1], f[0]] += cotc
+
+        idx = inv_areas > 0
+        inv_areas[idx] = 1.0 / inv_areas[idx]
+
+        norm_w = L.sum(dim=1, keepdims=True)
+        L_sum = norm_w.clone()
+        idx = norm_w > 0
+        norm_w[idx] = 1.0 / norm_w[idx]
+
+        if method == "cotcurv":
+            loss = (L.mm(verts_packed) - L_sum * verts_packed) * inv_areas * 0.25
+            loss = loss.norm(dim=1)
+        else:
+            loss = L.mm(verts_packed) * norm_w - verts_packed
+            loss = loss.norm(dim=1)
+
+        weights = torch.zeros(V, dtype=torch.float32, device=meshes.device)
+        for v in range(V):
+            weights[v] = meshes.num_verts_per_mesh()[
+                meshes.verts_packed_to_mesh_idx()[v]
+            ]
+        weights = 1.0 / weights
+        loss = loss * weights
+
+        return loss.sum() / len(meshes)
+
+    @staticmethod
+    def init_meshes(num_meshes: int = 10, num_verts: int = 1000, num_faces: int = 3000):
+        device = torch.device("cuda:0")
+        verts_list = []
+        faces_list = []
+        for _ in range(num_meshes):
+            verts = (
+                torch.rand((num_verts, 3), dtype=torch.float32, device=device) * 2.0
+                - 1.0
+            )  # verts in the space of [-1, 1]
+            faces = torch.stack(
+                [
+                    torch.randperm(num_verts, device=device)[:3]
+                    for _ in range(num_faces)
+                ],
+                dim=0,
+            )
+            # avoids duplicate vertices in a face
+            verts_list.append(verts)
+            faces_list.append(faces)
+        meshes = Meshes(verts_list, faces_list)
+
+        return meshes
+
+    def test_laplacian_smoothing_uniform(self):
+        """
+        Test Laplacian Smoothing with uniform weights.
+        """
+        meshes = TestLaplacianSmoothing.init_meshes(10, 100, 300)
+
+        # feats in list
+        out = mesh_laplacian_smoothing(meshes, method="uniform")
+        naive_out = TestLaplacianSmoothing.laplacian_smoothing_naive_uniform(meshes)
+
+        self.assertTrue(torch.allclose(out, naive_out))
+
+    def test_laplacian_smoothing_cot(self):
+        """
+        Test Laplacian Smoothing with cot weights.
+        """
+        meshes = TestLaplacianSmoothing.init_meshes(10, 100, 300)
+
+        # feats in list
+        out = mesh_laplacian_smoothing(meshes, method="cot")
+        naive_out = TestLaplacianSmoothing.laplacian_smoothing_naive_cot(
+            meshes, method="cot"
+        )
+
+        self.assertTrue(torch.allclose(out, naive_out))
+
+    def test_laplacian_smoothing_cotcurv(self):
+        """
+        Test Laplacian Smoothing with cotcurv weights.
+        """
+        meshes = TestLaplacianSmoothing.init_meshes(10, 100, 300)
+
+        # feats in list
+        out = mesh_laplacian_smoothing(meshes, method="cotcurv")
+        naive_out = TestLaplacianSmoothing.laplacian_smoothing_naive_cot(
+            meshes, method="cotcurv"
+        )
+
+        self.assertTrue(torch.allclose(out, naive_out))
+
+    @staticmethod
+    def laplacian_smoothing_with_init(
+        num_meshes: int, num_verts: int, num_faces: int, device: str = "cpu"
+    ):
+        device = torch.device(device)
+        verts_list = []
+        faces_list = []
+        for _ in range(num_meshes):
+            verts = torch.rand((num_verts, 3), dtype=torch.float32, device=device)
+            faces = torch.randint(
+                num_verts, size=(num_faces, 3), dtype=torch.int64, device=device
+            )
+            verts_list.append(verts)
+            faces_list.append(faces)
+        meshes = Meshes(verts_list, faces_list)
+        torch.cuda.synchronize()
+
+        def smooth():
+            mesh_laplacian_smoothing(meshes, method="cotcurv")
+            torch.cuda.synchronize()
+
+        return smooth
diff --git a/pytorch3d/tests/test_mesh_normal_consistency.py b/pytorch3d/tests/test_mesh_normal_consistency.py
new file mode 100644
index 0000000000000000000000000000000000000000..adac9fccf14a94b3d66a31bfc6100e2499d15ac1
--- /dev/null
+++ b/pytorch3d/tests/test_mesh_normal_consistency.py
@@ -0,0 +1,266 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import unittest
+
+import torch
+from pytorch3d.loss.mesh_normal_consistency import mesh_normal_consistency
+from pytorch3d.structures.meshes import Meshes
+from pytorch3d.utils.ico_sphere import ico_sphere
+
+
+IS_TORCH_1_8 = torch.__version__.startswith("1.8.")
+PROBLEMATIC_CUDA = torch.version.cuda in ("11.0", "11.1")
+# TODO: There are problems with cuda 11.0 and 11.1 here.
+# The symptom can be
+# RuntimeError: radix_sort: failed on 1st step: cudaErrorInvalidDevice: invalid device ordinal
+# or something like
+# operator(): block: [0,0,0], thread: [96,0,0]
+# Assertion `index >= -sizes[i] && index < sizes[i] && "index out of bounds"` failed.
+AVOID_LARGE_MESH_CUDA = PROBLEMATIC_CUDA and IS_TORCH_1_8
+
+
+class TestMeshNormalConsistency(unittest.TestCase):
+    def setUp(self) -> None:
+        torch.manual_seed(42)
+
+    @staticmethod
+    def init_faces(num_verts: int = 1000):
+        faces = []
+        for f0 in range(num_verts):
+            for f1 in range(f0 + 1, num_verts):
+                f2 = torch.arange(f1 + 1, num_verts)
+                n = f2.shape[0]
+                if n == 0:
+                    continue
+                faces.append(
+                    torch.stack(
+                        [
+                            torch.full((n,), f0, dtype=torch.int64),
+                            torch.full((n,), f1, dtype=torch.int64),
+                            f2,
+                        ],
+                        dim=1,
+                    )
+                )
+        faces = torch.cat(faces, 0)
+        return faces
+
+    @staticmethod
+    def init_meshes(num_meshes: int = 10, num_verts: int = 1000, num_faces: int = 3000):
+        if AVOID_LARGE_MESH_CUDA:
+            device = torch.device("cpu")
+        else:
+            device = torch.device("cuda:0")
+        valid_faces = TestMeshNormalConsistency.init_faces(num_verts).to(device)
+        verts_list = []
+        faces_list = []
+        for _ in range(num_meshes):
+            verts = (
+                torch.rand((num_verts, 3), dtype=torch.float32, device=device) * 2.0
+                - 1.0
+            )  # verts in the space of [-1, 1]
+            """
+            faces = torch.stack(
+                [
+                    torch.randperm(num_verts, device=device)[:3]
+                    for _ in range(num_faces)
+                ],
+                dim=0,
+            )
+            # avoids duplicate vertices in a face
+            """
+            idx = torch.randperm(valid_faces.shape[0], device=device)[
+                : min(valid_faces.shape[0], num_faces)
+            ]
+            faces = valid_faces[idx]
+            verts_list.append(verts)
+            faces_list.append(faces)
+        meshes = Meshes(verts_list, faces_list)
+        return meshes
+
+    @staticmethod
+    def mesh_normal_consistency_naive(meshes):
+        """
+        Naive iterative implementation of mesh normal consistency.
+        """
+        N = len(meshes)
+        verts_packed = meshes.verts_packed()
+        faces_packed = meshes.faces_packed()
+        edges_packed = meshes.edges_packed()
+        face_to_edge = meshes.faces_packed_to_edges_packed()
+        edges_packed_to_mesh_idx = meshes.edges_packed_to_mesh_idx()
+
+        E = edges_packed.shape[0]
+        loss = []
+        mesh_idx = []
+
+        for e in range(E):
+            face_idx = face_to_edge.eq(e).any(1).nonzero()  # indexed to faces
+            v0 = verts_packed[edges_packed[e, 0]]
+            v1 = verts_packed[edges_packed[e, 1]]
+            normals = []
+            for f in face_idx:
+                v2 = -1
+                for j in range(3):
+                    if (
+                        faces_packed[f, j] != edges_packed[e, 0]
+                        and faces_packed[f, j] != edges_packed[e, 1]
+                    ):
+                        v2 = faces_packed[f, j]
+                assert v2 > -1
+                v2 = verts_packed[v2]
+                normals.append((v1 - v0).view(-1).cross((v2 - v0).view(-1)))
+            for i in range(len(normals) - 1):
+                for j in range(i + 1, len(normals)):
+                    mesh_idx.append(edges_packed_to_mesh_idx[e])
+                    loss.append(
+                        (
+                            1
+                            - torch.cosine_similarity(
+                                normals[i].view(1, 3), -normals[j].view(1, 3)
+                            )
+                        )
+                    )
+
+        mesh_idx = torch.tensor(mesh_idx, device=meshes.device)
+        num = mesh_idx.bincount(minlength=N)
+        weights = 1.0 / num[mesh_idx].float()
+
+        loss = torch.cat(loss) * weights
+        return loss.sum() / N
+
+    def test_mesh_normal_consistency_simple(self):
+        r"""
+        Mesh 1:
+                        v3
+                        /\
+                       /  \
+                   e4 / f1 \ e3
+                     /      \
+                 v2 /___e2___\ v1
+                    \        /
+                     \      /
+                 e1   \ f0 / e0
+                       \  /
+                        \/
+                        v0
+        """
+        device = torch.device("cuda:0")
+        # mesh1 shown above
+        verts1 = torch.rand((4, 3), dtype=torch.float32, device=device)
+        faces1 = torch.tensor([[0, 1, 2], [2, 1, 3]], dtype=torch.int64, device=device)
+
+        # mesh2 is a cuboid with 8 verts, 12 faces and 18 edges
+        verts2 = torch.tensor(
+            [
+                [0, 0, 0],
+                [0, 0, 1],
+                [0, 1, 0],
+                [0, 1, 1],
+                [1, 0, 0],
+                [1, 0, 1],
+                [1, 1, 0],
+                [1, 1, 1],
+            ],
+            dtype=torch.float32,
+            device=device,
+        )
+        faces2 = torch.tensor(
+            [
+                [0, 1, 2],
+                [1, 3, 2],  # left face: 0, 1
+                [2, 3, 6],
+                [3, 7, 6],  # bottom face: 2, 3
+                [0, 2, 6],
+                [0, 6, 4],  # front face: 4, 5
+                [0, 5, 1],
+                [0, 4, 5],  # up face: 6, 7
+                [6, 7, 5],
+                [6, 5, 4],  # right face: 8, 9
+                [1, 7, 3],
+                [1, 5, 7],  # back face: 10, 11
+            ],
+            dtype=torch.int64,
+            device=device,
+        )
+
+        # mesh3 is like mesh1 but with another face added to e2
+        verts3 = torch.rand((5, 3), dtype=torch.float32, device=device)
+        faces3 = torch.tensor(
+            [[0, 1, 2], [2, 1, 3], [2, 1, 4]], dtype=torch.int64, device=device
+        )
+
+        meshes = Meshes(verts=[verts1, verts2, verts3], faces=[faces1, faces2, faces3])
+
+        # mesh1: normal consistency computation
+        n0 = (verts1[1] - verts1[2]).cross(verts1[3] - verts1[2])
+        n1 = (verts1[1] - verts1[2]).cross(verts1[0] - verts1[2])
+        loss1 = 1.0 - torch.cosine_similarity(n0.view(1, 3), -(n1.view(1, 3)))
+
+        # mesh2: normal consistency computation
+        # In the cube mesh, 6 edges are shared with coplanar faces (loss=0),
+        # 12 edges are shared by perpendicular faces (loss=1)
+        loss2 = 12.0 / 18
+
+        # mesh3
+        n0 = (verts3[1] - verts3[2]).cross(verts3[3] - verts3[2])
+        n1 = (verts3[1] - verts3[2]).cross(verts3[0] - verts3[2])
+        n2 = (verts3[1] - verts3[2]).cross(verts3[4] - verts3[2])
+        loss3 = (
+            3.0
+            - torch.cosine_similarity(n0.view(1, 3), -(n1.view(1, 3)))
+            - torch.cosine_similarity(n0.view(1, 3), -(n2.view(1, 3)))
+            - torch.cosine_similarity(n1.view(1, 3), -(n2.view(1, 3)))
+        )
+        loss3 /= 3.0
+
+        loss = (loss1 + loss2 + loss3) / 3.0
+
+        out = mesh_normal_consistency(meshes)
+
+        self.assertTrue(torch.allclose(out, loss))
+
+    def test_mesh_normal_consistency(self):
+        """
+        Test Mesh Normal Consistency for random meshes.
+        """
+        meshes = TestMeshNormalConsistency.init_meshes(5, 100, 300)
+
+        out1 = mesh_normal_consistency(meshes)
+        out2 = TestMeshNormalConsistency.mesh_normal_consistency_naive(meshes)
+
+        self.assertTrue(torch.allclose(out1, out2))
+
+    def test_no_intersection(self):
+        """
+        Test Mesh Normal Consistency for a mesh known to have no
+        intersecting faces.
+        """
+        verts = torch.rand(1, 6, 3)
+        faces = torch.arange(6).reshape(1, 2, 3)
+        meshes = Meshes(verts=verts, faces=faces)
+        out = mesh_normal_consistency(meshes)
+        self.assertEqual(out.item(), 0)
+
+    @staticmethod
+    def mesh_normal_consistency_with_ico(
+        num_meshes: int, level: int = 3, device: str = "cpu"
+    ):
+        device = torch.device(device)
+        mesh = ico_sphere(level, device)
+        verts, faces = mesh.get_mesh_verts_faces(0)
+        verts_list = [verts.clone() for _ in range(num_meshes)]
+        faces_list = [faces.clone() for _ in range(num_meshes)]
+        meshes = Meshes(verts_list, faces_list)
+        torch.cuda.synchronize()
+
+        def loss():
+            mesh_normal_consistency(meshes)
+            torch.cuda.synchronize()
+
+        return loss
diff --git a/pytorch3d/tests/test_mesh_rendering_utils.py b/pytorch3d/tests/test_mesh_rendering_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..4c5b4f5ad5eda1f16d37ad2289fc194157f3ddc4
--- /dev/null
+++ b/pytorch3d/tests/test_mesh_rendering_utils.py
@@ -0,0 +1,27 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import unittest
+
+import torch
+from pytorch3d.renderer.mesh.utils import _clip_barycentric_coordinates
+
+
+class TestMeshRenderingUtils(unittest.TestCase):
+    def test_bary_clip(self):
+        N = 10
+        bary = torch.randn(size=(N, 3))
+        # randomly make some values negative
+        bary[bary < 0.3] *= -1.0
+        # randomly make some values be greater than 1
+        bary[bary > 0.8] *= 2.0
+        negative_mask = bary < 0.0
+        positive_mask = bary > 1.0
+        clipped = _clip_barycentric_coordinates(bary)
+        self.assertTrue(clipped[negative_mask].sum() == 0)
+        self.assertTrue(clipped[positive_mask].gt(1.0).sum() == 0)
+        self.assertTrue(torch.allclose(clipped.sum(dim=-1), torch.ones(N)))
diff --git a/pytorch3d/tests/test_meshes.py b/pytorch3d/tests/test_meshes.py
new file mode 100644
index 0000000000000000000000000000000000000000..ed87437e7e2916ee32a461e85f15651e7acb6bf3
--- /dev/null
+++ b/pytorch3d/tests/test_meshes.py
@@ -0,0 +1,1507 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import itertools
+import random
+import unittest
+
+import numpy as np
+import torch
+from pytorch3d.structures.meshes import Meshes
+
+from .common_testing import TestCaseMixin
+
+
+def init_mesh(
+    num_meshes: int = 10,
+    max_v: int = 100,
+    max_f: int = 300,
+    lists_to_tensors: bool = False,
+    device: str = "cpu",
+    requires_grad: bool = False,
+):
+    """
+    Function to generate a Meshes object of N meshes with
+    random numbers of vertices and faces.
+
+    Args:
+        num_meshes: Number of meshes to generate.
+        max_v: Max number of vertices per mesh.
+        max_f: Max number of faces per mesh.
+        lists_to_tensors: Determines whether the generated meshes should be
+                            constructed from lists (=False) or
+                            a tensor (=True) of faces/verts.
+
+    Returns:
+        Meshes object.
+    """
+    device = torch.device(device)
+
+    verts_list = []
+    faces_list = []
+
+    # Randomly generate numbers of faces and vertices in each mesh.
+    if lists_to_tensors:
+        # If we define faces/verts with tensors, f/v has to be the
+        # same for each mesh in the batch.
+        f = torch.randint(1, max_f, size=(1,), dtype=torch.int32)
+        v = torch.randint(3, high=max_v, size=(1,), dtype=torch.int32)
+        f = f.repeat(num_meshes)
+        v = v.repeat(num_meshes)
+    else:
+        # For lists of faces and vertices, we can sample different v/f
+        # per mesh.
+        f = torch.randint(max_f, size=(num_meshes,), dtype=torch.int32)
+        v = torch.randint(3, high=max_v, size=(num_meshes,), dtype=torch.int32)
+
+    # Generate the actual vertices and faces.
+    for i in range(num_meshes):
+        verts = torch.rand(
+            (v[i], 3),
+            dtype=torch.float32,
+            device=device,
+            requires_grad=requires_grad,
+        )
+        faces = torch.randint(v[i], size=(f[i], 3), dtype=torch.int64, device=device)
+        verts_list.append(verts)
+        faces_list.append(faces)
+
+    if lists_to_tensors:
+        verts_list = torch.stack(verts_list)
+        faces_list = torch.stack(faces_list)
+
+    return Meshes(verts=verts_list, faces=faces_list)
+
+
+def init_simple_mesh(device: str = "cpu"):
+    """
+    Returns a Meshes data structure of simple mesh examples.
+
+    Returns:
+        Meshes object.
+    """
+    device = torch.device(device)
+
+    verts = [
+        torch.tensor(
+            [[0.1, 0.3, 0.5], [0.5, 0.2, 0.1], [0.6, 0.8, 0.7]],
+            dtype=torch.float32,
+            device=device,
+        ),
+        torch.tensor(
+            [[0.1, 0.3, 0.3], [0.6, 0.7, 0.8], [0.2, 0.3, 0.4], [0.1, 0.5, 0.3]],
+            dtype=torch.float32,
+            device=device,
+        ),
+        torch.tensor(
+            [
+                [0.7, 0.3, 0.6],
+                [0.2, 0.4, 0.8],
+                [0.9, 0.5, 0.2],
+                [0.2, 0.3, 0.4],
+                [0.9, 0.3, 0.8],
+            ],
+            dtype=torch.float32,
+            device=device,
+        ),
+    ]
+    faces = [
+        torch.tensor([[0, 1, 2]], dtype=torch.int64, device=device),
+        torch.tensor([[0, 1, 2], [1, 2, 3]], dtype=torch.int64, device=device),
+        torch.tensor(
+            [
+                [1, 2, 0],
+                [0, 1, 3],
+                [2, 3, 1],
+                [4, 3, 2],
+                [4, 0, 1],
+                [4, 3, 1],
+                [4, 2, 1],
+            ],
+            dtype=torch.int64,
+            device=device,
+        ),
+    ]
+    return Meshes(verts=verts, faces=faces)
+
+
+def mesh_structures_equal(mesh1, mesh2) -> bool:
+    """
+    Two meshes are equal if they have identical verts_list and faces_list.
+
+    Use to_sorted() before passing into this function to obtain meshes invariant to
+    vertex permutations. Note that this operator treats two geometrically identical
+    meshes as different if their vertices are in different coordinate frames.
+    """
+    if mesh1.__class__ != mesh1.__class__:
+        return False
+
+    if mesh1.textures is not None or mesh2.textures is not None:
+        raise NotImplementedError(
+            "mesh equality is not implemented for textured meshes."
+        )
+
+    if len(mesh1.verts_list()) != len(mesh2.verts_list()) or not all(
+        torch.equal(verts_mesh1, verts_mesh2)
+        for (verts_mesh1, verts_mesh2) in zip(mesh1.verts_list(), mesh2.verts_list())
+    ):
+        return False
+
+    if len(mesh1.faces_list()) != len(mesh2.faces_list()) or not all(
+        torch.equal(faces_mesh1, faces_mesh2)
+        for (faces_mesh1, faces_mesh2) in zip(mesh1.faces_list(), mesh2.faces_list())
+    ):
+        return False
+
+    if len(mesh1.verts_normals_list()) != len(mesh2.verts_normals_list()) or not all(
+        torch.equal(normals_mesh1, normals_mesh2)
+        for (normals_mesh1, normals_mesh2) in zip(
+            mesh1.verts_normals_list(), mesh2.verts_normals_list()
+        )
+    ):
+        return False
+
+    return True
+
+
+def to_sorted(mesh: Meshes) -> "Meshes":
+    """
+    Create a new Meshes object, where each sub-mesh's vertices are sorted
+    alphabetically.
+
+    Returns:
+        A Meshes object with the same topology as this mesh, with vertices sorted
+        alphabetically.
+
+    Example:
+
+    For a mesh with verts [[2.3, .2, .4], [.0, .1, .2], [.0, .0, .1]] and a single
+    face [[0, 1, 2]], to_sorted will create a new mesh with verts [[.0, .0, .1],
+    [.0, .1, .2], [2.3, .2, .4]] and a single face [[2, 1, 0]]. This is useful to
+    create a semi-canonical representation of the mesh that is invariant to vertex
+    permutations, but not invariant to coordinate frame changes.
+    """
+    if mesh.textures is not None:
+        raise NotImplementedError(
+            "to_sorted is not implemented for meshes with "
+            f"{type(mesh.textures).__name__} textures."
+        )
+
+    verts_list = mesh.verts_list()
+    faces_list = mesh.faces_list()
+    verts_sorted_list = []
+    faces_sorted_list = []
+
+    for verts, faces in zip(verts_list, faces_list):
+        # Argsort the vertices alphabetically: sort_ids[k] corresponds to the id of
+        # the vertex in the non-sorted mesh that should sit at index k in the sorted mesh.
+        sort_ids = torch.tensor(
+            [
+                idx_and_val[0]
+                for idx_and_val in sorted(
+                    enumerate(verts.tolist()),
+                    key=lambda idx_and_val: idx_and_val[1],
+                )
+            ],
+            device=mesh.device,
+        )
+
+        # Resort the vertices. index_select allocates new memory.
+        verts_sorted = verts[sort_ids]
+        verts_sorted_list.append(verts_sorted)
+
+        # The `faces` tensor contains vertex ids. Substitute old vertex ids for the
+        # new ones. new_vertex_ids is the inverse of sort_ids: new_vertex_ids[k]
+        # corresponds to the id of the vertex in the sorted mesh that is the same as
+        # vertex k in the non-sorted mesh.
+        new_vertex_ids = torch.argsort(sort_ids)
+        faces_sorted = (
+            torch.gather(new_vertex_ids, 0, faces.flatten())
+            .reshape(faces.shape)
+            .clone()
+        )
+        faces_sorted_list.append(faces_sorted)
+
+    other = mesh.__class__(verts=verts_sorted_list, faces=faces_sorted_list)
+    for k in mesh._INTERNAL_TENSORS:
+        v = getattr(mesh, k)
+        if torch.is_tensor(v):
+            setattr(other, k, v.clone())
+
+    return other
+
+
+def init_cube_meshes(device: str = "cpu"):
+    # Make Meshes with four cubes translated from the origin by varying amounts.
+    verts = torch.FloatTensor(
+        [
+            [0, 0, 0],
+            [1, 0, 0],  # 1->0
+            [1, 1, 0],  # 2->1
+            [0, 1, 0],  # 3->2
+            [0, 1, 1],  # 3
+            [1, 1, 1],  # 4
+            [1, 0, 1],  # 5
+            [0, 0, 1],
+        ],
+        device=device,
+    )
+
+    faces = torch.FloatTensor(
+        [
+            [0, 2, 1],
+            [0, 3, 2],
+            [2, 3, 4],  # 1,2, 3
+            [2, 4, 5],  #
+            [1, 2, 5],  #
+            [1, 5, 6],  #
+            [0, 7, 4],
+            [0, 4, 3],
+            [5, 4, 7],
+            [5, 7, 6],
+            [0, 6, 7],
+            [0, 1, 6],
+        ],
+        device=device,
+    )
+
+    return Meshes(
+        verts=[verts, verts + 1, verts + 2, verts + 3],
+        faces=[faces, faces, faces, faces],
+    )
+
+
+class TestMeshes(TestCaseMixin, unittest.TestCase):
+    def setUp(self) -> None:
+        np.random.seed(42)
+        torch.manual_seed(42)
+
+    def test_simple(self):
+        mesh = init_simple_mesh("cuda:0")
+
+        # Check that faces/verts per mesh are set in init:
+        self.assertClose(mesh._num_faces_per_mesh.cpu(), torch.tensor([1, 2, 7]))
+        self.assertClose(mesh._num_verts_per_mesh.cpu(), torch.tensor([3, 4, 5]))
+
+        # Check computed tensors
+        self.assertClose(
+            mesh.verts_packed_to_mesh_idx().cpu(),
+            torch.tensor([0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2]),
+        )
+        self.assertClose(
+            mesh.mesh_to_verts_packed_first_idx().cpu(), torch.tensor([0, 3, 7])
+        )
+        self.assertClose(
+            mesh.verts_padded_to_packed_idx().cpu(),
+            torch.tensor([0, 1, 2, 5, 6, 7, 8, 10, 11, 12, 13, 14]),
+        )
+        self.assertClose(
+            mesh.faces_packed_to_mesh_idx().cpu(),
+            torch.tensor([0, 1, 1, 2, 2, 2, 2, 2, 2, 2]),
+        )
+        self.assertClose(
+            mesh.mesh_to_faces_packed_first_idx().cpu(), torch.tensor([0, 1, 3])
+        )
+        self.assertClose(
+            mesh.num_edges_per_mesh().cpu(), torch.tensor([3, 5, 10], dtype=torch.int32)
+        )
+        self.assertClose(
+            mesh.mesh_to_edges_packed_first_idx().cpu(),
+            torch.tensor([0, 3, 8], dtype=torch.int64),
+        )
+
+    def test_init_error(self):
+        # Check if correct errors are raised when verts/faces are on
+        # different devices
+
+        mesh = init_mesh(10, 10, 100)
+        verts_list = mesh.verts_list()  # all tensors on cpu
+        verts_list = [
+            v.to("cuda:0") if random.uniform(0, 1) > 0.5 else v for v in verts_list
+        ]
+        faces_list = mesh.faces_list()
+
+        with self.assertRaisesRegex(ValueError, "same device"):
+            Meshes(verts=verts_list, faces=faces_list)
+
+        verts_padded = mesh.verts_padded()  # on cpu
+        verts_padded = verts_padded.to("cuda:0")
+        faces_padded = mesh.faces_padded()
+
+        with self.assertRaisesRegex(ValueError, "same device"):
+            Meshes(verts=verts_padded, faces=faces_padded)
+
+    def test_simple_random_meshes(self):
+
+        # Define the test mesh object either as a list or tensor of faces/verts.
+        for lists_to_tensors in (False, True):
+            N = 10
+            mesh = init_mesh(N, 100, 300, lists_to_tensors=lists_to_tensors)
+            verts_list = mesh.verts_list()
+            faces_list = mesh.faces_list()
+
+            # Check batch calculations.
+            verts_padded = mesh.verts_padded()
+            faces_padded = mesh.faces_padded()
+            verts_per_mesh = mesh.num_verts_per_mesh()
+            faces_per_mesh = mesh.num_faces_per_mesh()
+            for n in range(N):
+                v = verts_list[n].shape[0]
+                f = faces_list[n].shape[0]
+                self.assertClose(verts_padded[n, :v, :], verts_list[n])
+                if verts_padded.shape[1] > v:
+                    self.assertTrue(verts_padded[n, v:, :].eq(0).all())
+                self.assertClose(faces_padded[n, :f, :], faces_list[n])
+                if faces_padded.shape[1] > f:
+                    self.assertTrue(faces_padded[n, f:, :].eq(-1).all())
+                self.assertEqual(verts_per_mesh[n], v)
+                self.assertEqual(faces_per_mesh[n], f)
+
+            # Check compute packed.
+            verts_packed = mesh.verts_packed()
+            vert_to_mesh = mesh.verts_packed_to_mesh_idx()
+            mesh_to_vert = mesh.mesh_to_verts_packed_first_idx()
+            faces_packed = mesh.faces_packed()
+            face_to_mesh = mesh.faces_packed_to_mesh_idx()
+            mesh_to_face = mesh.mesh_to_faces_packed_first_idx()
+
+            curv, curf = 0, 0
+            for n in range(N):
+                v = verts_list[n].shape[0]
+                f = faces_list[n].shape[0]
+                self.assertClose(verts_packed[curv : curv + v, :], verts_list[n])
+                self.assertClose(faces_packed[curf : curf + f, :] - curv, faces_list[n])
+                self.assertTrue(vert_to_mesh[curv : curv + v].eq(n).all())
+                self.assertTrue(face_to_mesh[curf : curf + f].eq(n).all())
+                self.assertTrue(mesh_to_vert[n] == curv)
+                self.assertTrue(mesh_to_face[n] == curf)
+                curv += v
+                curf += f
+
+            # Check compute edges and compare with numpy unique.
+            edges = mesh.edges_packed().cpu().numpy()
+            edge_to_mesh_idx = mesh.edges_packed_to_mesh_idx().cpu().numpy()
+            num_edges_per_mesh = mesh.num_edges_per_mesh().cpu().numpy()
+
+            npfaces_packed = mesh.faces_packed().cpu().numpy()
+            e01 = npfaces_packed[:, [0, 1]]
+            e12 = npfaces_packed[:, [1, 2]]
+            e20 = npfaces_packed[:, [2, 0]]
+            npedges = np.concatenate((e12, e20, e01), axis=0)
+            npedges = np.sort(npedges, axis=1)
+
+            unique_edges, unique_idx = np.unique(npedges, return_index=True, axis=0)
+            self.assertTrue(np.allclose(edges, unique_edges))
+            temp = face_to_mesh.cpu().numpy()
+            temp = np.concatenate((temp, temp, temp), axis=0)
+            edge_to_mesh = temp[unique_idx]
+            self.assertTrue(np.allclose(edge_to_mesh_idx, edge_to_mesh))
+            num_edges = np.bincount(edge_to_mesh, minlength=N)
+            self.assertTrue(np.allclose(num_edges_per_mesh, num_edges))
+            mesh_to_edges_packed_first_idx = (
+                mesh.mesh_to_edges_packed_first_idx().cpu().numpy()
+            )
+            self.assertTrue(
+                np.allclose(mesh_to_edges_packed_first_idx[1:], num_edges.cumsum()[:-1])
+            )
+            self.assertTrue(mesh_to_edges_packed_first_idx[0] == 0)
+
+    def test_allempty(self):
+        mesh = Meshes(verts=[], faces=[])
+        self.assertEqual(len(mesh), 0)
+        self.assertEqual(mesh.verts_padded().shape[0], 0)
+        self.assertEqual(mesh.faces_padded().shape[0], 0)
+        self.assertEqual(mesh.verts_packed().shape[0], 0)
+        self.assertEqual(mesh.faces_packed().shape[0], 0)
+        self.assertEqual(mesh.num_faces_per_mesh().shape[0], 0)
+        self.assertEqual(mesh.num_verts_per_mesh().shape[0], 0)
+
+    def test_empty(self):
+        N, V, F = 10, 100, 300
+        device = torch.device("cuda:0")
+        verts_list = []
+        faces_list = []
+        valid = torch.randint(2, size=(N,), dtype=torch.uint8, device=device)
+        for n in range(N):
+            if valid[n]:
+                v = torch.randint(
+                    3, high=V, size=(1,), dtype=torch.int32, device=device
+                )[0]
+                f = torch.randint(F, size=(1,), dtype=torch.int32, device=device)[0]
+                verts = torch.rand((v, 3), dtype=torch.float32, device=device)
+                faces = torch.randint(v, size=(f, 3), dtype=torch.int64, device=device)
+            else:
+                verts = torch.tensor([], dtype=torch.float32, device=device)
+                faces = torch.tensor([], dtype=torch.int64, device=device)
+            verts_list.append(verts)
+            faces_list.append(faces)
+
+        mesh = Meshes(verts=verts_list, faces=faces_list)
+        verts_padded = mesh.verts_padded()
+        faces_padded = mesh.faces_padded()
+        verts_per_mesh = mesh.num_verts_per_mesh()
+        faces_per_mesh = mesh.num_faces_per_mesh()
+        for n in range(N):
+            v = len(verts_list[n])
+            f = len(faces_list[n])
+            if v > 0:
+                self.assertClose(verts_padded[n, :v, :], verts_list[n])
+                if verts_padded.shape[1] > v:
+                    self.assertTrue(verts_padded[n, v:, :].eq(0).all())
+            if f > 0:
+                self.assertClose(faces_padded[n, :f, :], faces_list[n])
+                if faces_padded.shape[1] > f:
+                    self.assertTrue(faces_padded[n, f:, :].eq(-1).all())
+            self.assertTrue(verts_per_mesh[n] == v)
+            self.assertTrue(faces_per_mesh[n] == f)
+
+    def test_padding(self):
+        N, V, F = 10, 100, 300
+        device = torch.device("cuda:0")
+        verts, faces = [], []
+        valid = torch.randint(2, size=(N,), dtype=torch.uint8, device=device)
+        num_verts, num_faces = (
+            torch.zeros(N, dtype=torch.int32),
+            torch.zeros(N, dtype=torch.int32),
+        )
+        for n in range(N):
+            verts.append(torch.rand((V, 3), dtype=torch.float32, device=device))
+            this_faces = torch.full((F, 3), -1, dtype=torch.int64, device=device)
+            if valid[n]:
+                v = torch.randint(
+                    3, high=V, size=(1,), dtype=torch.int32, device=device
+                )[0]
+                f = torch.randint(F, size=(1,), dtype=torch.int32, device=device)[0]
+                this_faces[:f, :] = torch.randint(
+                    v, size=(f, 3), dtype=torch.int64, device=device
+                )
+                num_verts[n] = v
+                num_faces[n] = f
+            faces.append(this_faces)
+
+        mesh = Meshes(verts=torch.stack(verts), faces=torch.stack(faces))
+
+        # Check verts/faces per mesh are set correctly in init.
+        self.assertListEqual(mesh._num_faces_per_mesh.tolist(), num_faces.tolist())
+        self.assertListEqual(mesh._num_verts_per_mesh.tolist(), [V] * N)
+
+        for n, (vv, ff) in enumerate(zip(mesh.verts_list(), mesh.faces_list())):
+            self.assertClose(ff, faces[n][: num_faces[n]])
+            self.assertClose(vv, verts[n])
+
+        new_faces = [ff.clone() for ff in faces]
+        v = torch.randint(3, high=V, size=(1,), dtype=torch.int32, device=device)[0]
+        f = torch.randint(F - 10, size=(1,), dtype=torch.int32, device=device)[0]
+        this_faces = torch.full((F, 3), -1, dtype=torch.int64, device=device)
+        this_faces[10 : f + 10, :] = torch.randint(
+            v, size=(f, 3), dtype=torch.int64, device=device
+        )
+        new_faces[3] = this_faces
+
+        with self.assertRaisesRegex(ValueError, "Padding of faces"):
+            Meshes(verts=torch.stack(verts), faces=torch.stack(new_faces))
+
+    def test_clone(self):
+        N = 5
+        mesh = init_mesh(N, 10, 100)
+        for force in [0, 1]:
+            if force:
+                # force mesh to have computed attributes
+                mesh.verts_packed()
+                mesh.edges_packed()
+                mesh.verts_padded()
+
+            new_mesh = mesh.clone()
+
+            # Modify tensors in both meshes.
+            new_mesh._verts_list[0] = new_mesh._verts_list[0] * 5
+
+            # Check cloned and original Meshes objects do not share tensors.
+            self.assertFalse(
+                torch.allclose(new_mesh._verts_list[0], mesh._verts_list[0])
+            )
+            self.assertSeparate(new_mesh.verts_packed(), mesh.verts_packed())
+            self.assertSeparate(new_mesh.verts_padded(), mesh.verts_padded())
+            self.assertSeparate(new_mesh.faces_packed(), mesh.faces_packed())
+            self.assertSeparate(new_mesh.faces_padded(), mesh.faces_padded())
+            self.assertSeparate(new_mesh.edges_packed(), mesh.edges_packed())
+
+    def test_detach(self):
+        N = 5
+        mesh = init_mesh(N, 10, 100, requires_grad=True)
+        for force in [0, 1]:
+            if force:
+                # force mesh to have computed attributes
+                mesh.verts_packed()
+                mesh.edges_packed()
+                mesh.verts_padded()
+
+            new_mesh = mesh.detach()
+
+            self.assertFalse(new_mesh.verts_packed().requires_grad)
+            self.assertClose(new_mesh.verts_packed(), mesh.verts_packed())
+            self.assertFalse(new_mesh.verts_padded().requires_grad)
+            self.assertClose(new_mesh.verts_padded(), mesh.verts_padded())
+            for v, newv in zip(mesh.verts_list(), new_mesh.verts_list()):
+                self.assertFalse(newv.requires_grad)
+                self.assertClose(newv, v)
+
+    def test_offset_verts(self):
+        def naive_offset_verts(mesh, vert_offsets_packed):
+            # new Meshes class
+            new_verts_packed = mesh.verts_packed() + vert_offsets_packed
+            new_verts_list = list(
+                new_verts_packed.split(mesh.num_verts_per_mesh().tolist(), 0)
+            )
+            new_faces_list = [f.clone() for f in mesh.faces_list()]
+            return Meshes(verts=new_verts_list, faces=new_faces_list)
+
+        N = 5
+        mesh = init_mesh(N, 30, 100, lists_to_tensors=True)
+        all_v = mesh.verts_packed().size(0)
+        verts_per_mesh = mesh.num_verts_per_mesh()
+        for force, deform_shape in itertools.product([False, True], [(all_v, 3), 3]):
+            if force:
+                # force mesh to have computed attributes
+                mesh._compute_packed(refresh=True)
+                mesh._compute_padded()
+                mesh._compute_edges_packed()
+                mesh.verts_padded_to_packed_idx()
+                mesh._compute_face_areas_normals(refresh=True)
+                mesh._compute_vertex_normals(refresh=True)
+
+            deform = torch.rand(deform_shape, dtype=torch.float32, device=mesh.device)
+            # new meshes class to hold the deformed mesh
+            new_mesh_naive = naive_offset_verts(mesh, deform)
+
+            new_mesh = mesh.offset_verts(deform)
+
+            # check verts_list & faces_list
+            verts_cumsum = torch.cumsum(verts_per_mesh, 0).tolist()
+            verts_cumsum.insert(0, 0)
+            for i in range(N):
+                item_offset = (
+                    deform
+                    if deform.ndim == 1
+                    else deform[verts_cumsum[i] : verts_cumsum[i + 1]]
+                )
+                self.assertClose(
+                    new_mesh.verts_list()[i],
+                    mesh.verts_list()[i] + item_offset,
+                )
+                self.assertClose(
+                    new_mesh.verts_list()[i], new_mesh_naive.verts_list()[i]
+                )
+                self.assertClose(mesh.faces_list()[i], new_mesh_naive.faces_list()[i])
+                self.assertClose(
+                    new_mesh.faces_list()[i], new_mesh_naive.faces_list()[i]
+                )
+
+                # check faces and vertex normals
+                self.assertClose(
+                    new_mesh.verts_normals_list()[i],
+                    new_mesh_naive.verts_normals_list()[i],
+                    atol=1e-6,
+                )
+                self.assertClose(
+                    new_mesh.faces_normals_list()[i],
+                    new_mesh_naive.faces_normals_list()[i],
+                    atol=1e-6,
+                )
+
+            # check padded & packed
+            self.assertClose(new_mesh.faces_padded(), new_mesh_naive.faces_padded())
+            self.assertClose(new_mesh.verts_padded(), new_mesh_naive.verts_padded())
+            self.assertClose(new_mesh.faces_packed(), new_mesh_naive.faces_packed())
+            self.assertClose(new_mesh.verts_packed(), new_mesh_naive.verts_packed())
+            self.assertClose(new_mesh.edges_packed(), new_mesh_naive.edges_packed())
+            self.assertClose(
+                new_mesh.verts_packed_to_mesh_idx(),
+                new_mesh_naive.verts_packed_to_mesh_idx(),
+            )
+            self.assertClose(
+                new_mesh.mesh_to_verts_packed_first_idx(),
+                new_mesh_naive.mesh_to_verts_packed_first_idx(),
+            )
+            self.assertClose(
+                new_mesh.num_verts_per_mesh(), new_mesh_naive.num_verts_per_mesh()
+            )
+            self.assertClose(
+                new_mesh.faces_packed_to_mesh_idx(),
+                new_mesh_naive.faces_packed_to_mesh_idx(),
+            )
+            self.assertClose(
+                new_mesh.mesh_to_faces_packed_first_idx(),
+                new_mesh_naive.mesh_to_faces_packed_first_idx(),
+            )
+            self.assertClose(
+                new_mesh.num_faces_per_mesh(), new_mesh_naive.num_faces_per_mesh()
+            )
+            self.assertClose(
+                new_mesh.edges_packed_to_mesh_idx(),
+                new_mesh_naive.edges_packed_to_mesh_idx(),
+            )
+            self.assertClose(
+                new_mesh.verts_padded_to_packed_idx(),
+                new_mesh_naive.verts_padded_to_packed_idx(),
+            )
+            self.assertTrue(all(new_mesh.valid == new_mesh_naive.valid))
+            self.assertTrue(new_mesh.equisized == new_mesh_naive.equisized)
+
+            # check face areas, normals and vertex normals
+            self.assertClose(
+                new_mesh.verts_normals_packed(),
+                new_mesh_naive.verts_normals_packed(),
+                atol=1e-6,
+            )
+            self.assertClose(
+                new_mesh.verts_normals_padded(),
+                new_mesh_naive.verts_normals_padded(),
+                atol=1e-6,
+            )
+            self.assertClose(
+                new_mesh.faces_normals_packed(),
+                new_mesh_naive.faces_normals_packed(),
+                atol=1e-6,
+            )
+            self.assertClose(
+                new_mesh.faces_normals_padded(),
+                new_mesh_naive.faces_normals_padded(),
+                atol=1e-6,
+            )
+            self.assertClose(
+                new_mesh.faces_areas_packed(), new_mesh_naive.faces_areas_packed()
+            )
+            self.assertClose(
+                new_mesh.mesh_to_edges_packed_first_idx(),
+                new_mesh_naive.mesh_to_edges_packed_first_idx(),
+            )
+
+    def test_scale_verts(self):
+        def naive_scale_verts(mesh, scale):
+            if not torch.is_tensor(scale):
+                scale = torch.ones(len(mesh)).mul_(scale)
+            # new Meshes class
+            new_verts_list = [
+                scale[i] * v.clone() for (i, v) in enumerate(mesh.verts_list())
+            ]
+            new_faces_list = [f.clone() for f in mesh.faces_list()]
+            return Meshes(verts=new_verts_list, faces=new_faces_list)
+
+        N = 5
+        for test in ["tensor", "scalar"]:
+            for force in (False, True):
+                mesh = init_mesh(N, 10, 100, lists_to_tensors=True)
+                if force:
+                    # force mesh to have computed attributes
+                    mesh.verts_packed()
+                    mesh.edges_packed()
+                    mesh.verts_padded()
+                    mesh._compute_face_areas_normals(refresh=True)
+                    mesh._compute_vertex_normals(refresh=True)
+
+                if test == "tensor":
+                    scales = torch.rand(N)
+                elif test == "scalar":
+                    scales = torch.rand(1)[0].item()
+                new_mesh_naive = naive_scale_verts(mesh, scales)
+                new_mesh = mesh.scale_verts(scales)
+                for i in range(N):
+                    if test == "tensor":
+                        self.assertClose(
+                            scales[i] * mesh.verts_list()[i], new_mesh.verts_list()[i]
+                        )
+                    else:
+                        self.assertClose(
+                            scales * mesh.verts_list()[i], new_mesh.verts_list()[i]
+                        )
+                    self.assertClose(
+                        new_mesh.verts_list()[i], new_mesh_naive.verts_list()[i]
+                    )
+                    self.assertClose(
+                        mesh.faces_list()[i], new_mesh_naive.faces_list()[i]
+                    )
+                    self.assertClose(
+                        new_mesh.faces_list()[i], new_mesh_naive.faces_list()[i]
+                    )
+                    # check face and vertex normals
+                    self.assertClose(
+                        new_mesh.verts_normals_list()[i],
+                        new_mesh_naive.verts_normals_list()[i],
+                    )
+                    self.assertClose(
+                        new_mesh.faces_normals_list()[i],
+                        new_mesh_naive.faces_normals_list()[i],
+                    )
+
+                # check padded & packed
+                self.assertClose(new_mesh.faces_padded(), new_mesh_naive.faces_padded())
+                self.assertClose(new_mesh.verts_padded(), new_mesh_naive.verts_padded())
+                self.assertClose(new_mesh.faces_packed(), new_mesh_naive.faces_packed())
+                self.assertClose(new_mesh.verts_packed(), new_mesh_naive.verts_packed())
+                self.assertClose(new_mesh.edges_packed(), new_mesh_naive.edges_packed())
+                self.assertClose(
+                    new_mesh.verts_packed_to_mesh_idx(),
+                    new_mesh_naive.verts_packed_to_mesh_idx(),
+                )
+                self.assertClose(
+                    new_mesh.mesh_to_verts_packed_first_idx(),
+                    new_mesh_naive.mesh_to_verts_packed_first_idx(),
+                )
+                self.assertClose(
+                    new_mesh.num_verts_per_mesh(), new_mesh_naive.num_verts_per_mesh()
+                )
+                self.assertClose(
+                    new_mesh.faces_packed_to_mesh_idx(),
+                    new_mesh_naive.faces_packed_to_mesh_idx(),
+                )
+                self.assertClose(
+                    new_mesh.mesh_to_faces_packed_first_idx(),
+                    new_mesh_naive.mesh_to_faces_packed_first_idx(),
+                )
+                self.assertClose(
+                    new_mesh.num_faces_per_mesh(), new_mesh_naive.num_faces_per_mesh()
+                )
+                self.assertClose(
+                    new_mesh.edges_packed_to_mesh_idx(),
+                    new_mesh_naive.edges_packed_to_mesh_idx(),
+                )
+                self.assertClose(
+                    new_mesh.verts_padded_to_packed_idx(),
+                    new_mesh_naive.verts_padded_to_packed_idx(),
+                )
+                self.assertTrue(all(new_mesh.valid == new_mesh_naive.valid))
+                self.assertTrue(new_mesh.equisized == new_mesh_naive.equisized)
+
+                # check face areas, normals and vertex normals
+                self.assertClose(
+                    new_mesh.verts_normals_packed(),
+                    new_mesh_naive.verts_normals_packed(),
+                )
+                self.assertClose(
+                    new_mesh.verts_normals_padded(),
+                    new_mesh_naive.verts_normals_padded(),
+                )
+                self.assertClose(
+                    new_mesh.faces_normals_packed(),
+                    new_mesh_naive.faces_normals_packed(),
+                )
+                self.assertClose(
+                    new_mesh.faces_normals_padded(),
+                    new_mesh_naive.faces_normals_padded(),
+                )
+                self.assertClose(
+                    new_mesh.faces_areas_packed(), new_mesh_naive.faces_areas_packed()
+                )
+                self.assertClose(
+                    new_mesh.mesh_to_edges_packed_first_idx(),
+                    new_mesh_naive.mesh_to_edges_packed_first_idx(),
+                )
+
+    def test_extend_list(self):
+        N = 10
+        mesh = init_mesh(5, 10, 100)
+        for force in [0, 1]:
+            if force:
+                # force some computes to happen
+                mesh._compute_packed(refresh=True)
+                mesh._compute_padded()
+                mesh._compute_edges_packed()
+                mesh.verts_padded_to_packed_idx()
+            new_mesh = mesh.extend(N)
+            self.assertEqual(len(mesh) * 10, len(new_mesh))
+            for i in range(len(mesh)):
+                for n in range(N):
+                    self.assertClose(
+                        mesh.verts_list()[i], new_mesh.verts_list()[i * N + n]
+                    )
+                    self.assertClose(
+                        mesh.faces_list()[i], new_mesh.faces_list()[i * N + n]
+                    )
+                    self.assertTrue(mesh.valid[i] == new_mesh.valid[i * N + n])
+            self.assertAllSeparate(
+                mesh.verts_list()
+                + new_mesh.verts_list()
+                + mesh.faces_list()
+                + new_mesh.faces_list()
+            )
+            self.assertTrue(new_mesh._verts_packed is None)
+            self.assertTrue(new_mesh._faces_packed is None)
+            self.assertTrue(new_mesh._verts_padded is None)
+            self.assertTrue(new_mesh._faces_padded is None)
+            self.assertTrue(new_mesh._edges_packed is None)
+
+        with self.assertRaises(ValueError):
+            mesh.extend(N=-1)
+
+    def test_to(self):
+        mesh = init_mesh(5, 10, 100)
+
+        cpu_device = torch.device("cpu")
+
+        converted_mesh = mesh.to("cpu")
+        self.assertEqual(cpu_device, converted_mesh.device)
+        self.assertEqual(cpu_device, mesh.device)
+        self.assertIs(mesh, converted_mesh)
+
+        converted_mesh = mesh.to(cpu_device)
+        self.assertEqual(cpu_device, converted_mesh.device)
+        self.assertEqual(cpu_device, mesh.device)
+        self.assertIs(mesh, converted_mesh)
+
+        cuda_device = torch.device("cuda:0")
+
+        converted_mesh = mesh.to("cuda:0")
+        self.assertEqual(cuda_device, converted_mesh.device)
+        self.assertEqual(cpu_device, mesh.device)
+        self.assertIsNot(mesh, converted_mesh)
+
+        converted_mesh = mesh.to(cuda_device)
+        self.assertEqual(cuda_device, converted_mesh.device)
+        self.assertEqual(cpu_device, mesh.device)
+        self.assertIsNot(mesh, converted_mesh)
+
+    def test_split_mesh(self):
+        mesh = init_mesh(5, 10, 100)
+        split_sizes = [2, 3]
+        split_meshes = mesh.split(split_sizes)
+        self.assertTrue(len(split_meshes[0]) == 2)
+        self.assertTrue(
+            split_meshes[0].verts_list()
+            == [mesh.get_mesh_verts_faces(0)[0], mesh.get_mesh_verts_faces(1)[0]]
+        )
+        self.assertTrue(len(split_meshes[1]) == 3)
+        self.assertTrue(
+            split_meshes[1].verts_list()
+            == [
+                mesh.get_mesh_verts_faces(2)[0],
+                mesh.get_mesh_verts_faces(3)[0],
+                mesh.get_mesh_verts_faces(4)[0],
+            ]
+        )
+
+        split_sizes = [2, 0.3]
+        with self.assertRaises(ValueError):
+            mesh.split(split_sizes)
+
+    def test_update_padded(self):
+        # Define the test mesh object either as a list or tensor of faces/verts.
+        N = 10
+        for lists_to_tensors in (False, True):
+            for force in (True, False):
+                mesh = init_mesh(N, 100, 300, lists_to_tensors=lists_to_tensors)
+                num_verts_per_mesh = mesh.num_verts_per_mesh()
+                if force:
+                    # force mesh to have computed attributes
+                    mesh.verts_packed()
+                    mesh.edges_packed()
+                    mesh.laplacian_packed()
+                    mesh.faces_areas_packed()
+
+                new_verts = torch.rand((mesh._N, mesh._V, 3), device=mesh.device)
+                new_verts_list = [
+                    new_verts[i, : num_verts_per_mesh[i]] for i in range(N)
+                ]
+                new_mesh = mesh.update_padded(new_verts)
+
+                # check the attributes assigned at construction time
+                self.assertEqual(new_mesh._N, mesh._N)
+                self.assertEqual(new_mesh._F, mesh._F)
+                self.assertEqual(new_mesh._V, mesh._V)
+                self.assertEqual(new_mesh.equisized, mesh.equisized)
+                self.assertTrue(all(new_mesh.valid == mesh.valid))
+                self.assertNotSeparate(
+                    new_mesh.num_verts_per_mesh(), mesh.num_verts_per_mesh()
+                )
+                self.assertClose(
+                    new_mesh.num_verts_per_mesh(), mesh.num_verts_per_mesh()
+                )
+                self.assertNotSeparate(
+                    new_mesh.num_faces_per_mesh(), mesh.num_faces_per_mesh()
+                )
+                self.assertClose(
+                    new_mesh.num_faces_per_mesh(), mesh.num_faces_per_mesh()
+                )
+
+                # check that the following attributes are not assigned
+                self.assertIsNone(new_mesh._verts_list)
+                self.assertIsNone(new_mesh._faces_areas_packed)
+                self.assertIsNone(new_mesh._faces_normals_packed)
+                self.assertIsNone(new_mesh._verts_normals_packed)
+
+                check_tensors = [
+                    "_faces_packed",
+                    "_verts_packed_to_mesh_idx",
+                    "_faces_packed_to_mesh_idx",
+                    "_mesh_to_verts_packed_first_idx",
+                    "_mesh_to_faces_packed_first_idx",
+                    "_edges_packed",
+                    "_edges_packed_to_mesh_idx",
+                    "_mesh_to_edges_packed_first_idx",
+                    "_faces_packed_to_edges_packed",
+                    "_num_edges_per_mesh",
+                ]
+                for k in check_tensors:
+                    v = getattr(new_mesh, k)
+                    if not force:
+                        self.assertIsNone(v)
+                    else:
+                        v_old = getattr(mesh, k)
+                        self.assertNotSeparate(v, v_old)
+                        self.assertClose(v, v_old)
+
+                # check verts/faces padded
+                self.assertClose(new_mesh.verts_padded(), new_verts)
+                self.assertNotSeparate(new_mesh.verts_padded(), new_verts)
+                self.assertClose(new_mesh.faces_padded(), mesh.faces_padded())
+                self.assertNotSeparate(new_mesh.faces_padded(), mesh.faces_padded())
+                # check verts/faces list
+                for i in range(N):
+                    self.assertNotSeparate(
+                        new_mesh.faces_list()[i], mesh.faces_list()[i]
+                    )
+                    self.assertClose(new_mesh.faces_list()[i], mesh.faces_list()[i])
+                    self.assertSeparate(new_mesh.verts_list()[i], mesh.verts_list()[i])
+                    self.assertClose(new_mesh.verts_list()[i], new_verts_list[i])
+                # check verts/faces packed
+                self.assertClose(new_mesh.verts_packed(), torch.cat(new_verts_list))
+                self.assertSeparate(new_mesh.verts_packed(), mesh.verts_packed())
+                self.assertClose(new_mesh.faces_packed(), mesh.faces_packed())
+                # check pad_to_packed
+                self.assertClose(
+                    new_mesh.verts_padded_to_packed_idx(),
+                    mesh.verts_padded_to_packed_idx(),
+                )
+                # check edges
+                self.assertClose(new_mesh.edges_packed(), mesh.edges_packed())
+
+    def test_get_mesh_verts_faces(self):
+        device = torch.device("cuda:0")
+        verts_list = []
+        faces_list = []
+        verts_faces = [(10, 100), (20, 200)]
+        for (V, F) in verts_faces:
+            verts = torch.rand((V, 3), dtype=torch.float32, device=device)
+            faces = torch.randint(V, size=(F, 3), dtype=torch.int64, device=device)
+            verts_list.append(verts)
+            faces_list.append(faces)
+
+        mesh = Meshes(verts=verts_list, faces=faces_list)
+
+        for i, (V, F) in enumerate(verts_faces):
+            verts, faces = mesh.get_mesh_verts_faces(i)
+            self.assertTrue(len(verts) == V)
+            self.assertClose(verts, verts_list[i])
+            self.assertTrue(len(faces) == F)
+            self.assertClose(faces, faces_list[i])
+
+        with self.assertRaises(ValueError):
+            mesh.get_mesh_verts_faces(5)
+        with self.assertRaises(ValueError):
+            mesh.get_mesh_verts_faces(0.2)
+
+    def test_get_bounding_boxes(self):
+        device = torch.device("cuda:0")
+        verts_list = []
+        faces_list = []
+        for (V, F) in [(10, 100)]:
+            verts = torch.rand((V, 3), dtype=torch.float32, device=device)
+            faces = torch.randint(V, size=(F, 3), dtype=torch.int64, device=device)
+            verts_list.append(verts)
+            faces_list.append(faces)
+
+        mins = torch.min(verts, dim=0)[0]
+        maxs = torch.max(verts, dim=0)[0]
+        bboxes_gt = torch.stack([mins, maxs], dim=1).unsqueeze(0)
+        mesh = Meshes(verts=verts_list, faces=faces_list)
+        bboxes = mesh.get_bounding_boxes()
+        self.assertClose(bboxes_gt, bboxes)
+
+    def test_padded_to_packed_idx(self):
+        device = torch.device("cuda:0")
+        verts_list = []
+        faces_list = []
+        verts_faces = [(10, 100), (20, 200), (30, 300)]
+        for (V, F) in verts_faces:
+            verts = torch.rand((V, 3), dtype=torch.float32, device=device)
+            faces = torch.randint(V, size=(F, 3), dtype=torch.int64, device=device)
+            verts_list.append(verts)
+            faces_list.append(faces)
+
+        mesh = Meshes(verts=verts_list, faces=faces_list)
+        verts_padded_to_packed_idx = mesh.verts_padded_to_packed_idx()
+        verts_packed = mesh.verts_packed()
+        verts_padded = mesh.verts_padded()
+        verts_padded_flat = verts_padded.view(-1, 3)
+
+        self.assertClose(verts_padded_flat[verts_padded_to_packed_idx], verts_packed)
+
+        idx = verts_padded_to_packed_idx.view(-1, 1).expand(-1, 3)
+        self.assertClose(verts_padded_flat.gather(0, idx), verts_packed)
+
+    def test_getitem(self):
+        device = torch.device("cuda:0")
+        verts_list = []
+        faces_list = []
+        verts_faces = [(10, 100), (20, 200), (30, 300)]
+        for (V, F) in verts_faces:
+            verts = torch.rand((V, 3), dtype=torch.float32, device=device)
+            faces = torch.randint(V, size=(F, 3), dtype=torch.int64, device=device)
+            verts_list.append(verts)
+            faces_list.append(faces)
+
+        mesh = Meshes(verts=verts_list, faces=faces_list)
+
+        def check_equal(selected, indices):
+            for selectedIdx, index in enumerate(indices):
+                self.assertClose(
+                    selected.verts_list()[selectedIdx], mesh.verts_list()[index]
+                )
+                self.assertClose(
+                    selected.faces_list()[selectedIdx], mesh.faces_list()[index]
+                )
+
+        # int index
+        index = 1
+        mesh_selected = mesh[index]
+        self.assertTrue(len(mesh_selected) == 1)
+        check_equal(mesh_selected, [index])
+
+        # list index
+        index = [1, 2]
+        mesh_selected = mesh[index]
+        self.assertTrue(len(mesh_selected) == len(index))
+        check_equal(mesh_selected, index)
+
+        # slice index
+        index = slice(0, 2, 1)
+        mesh_selected = mesh[index]
+        check_equal(mesh_selected, [0, 1])
+
+        # bool tensor
+        index = torch.tensor([1, 0, 1], dtype=torch.bool, device=device)
+        mesh_selected = mesh[index]
+        self.assertTrue(len(mesh_selected) == index.sum())
+        check_equal(mesh_selected, [0, 2])
+
+        # int tensor
+        index = torch.tensor([1, 2], dtype=torch.int64, device=device)
+        mesh_selected = mesh[index]
+        self.assertTrue(len(mesh_selected) == index.numel())
+        check_equal(mesh_selected, index.tolist())
+
+        # invalid index
+        index = torch.tensor([1, 0, 1], dtype=torch.float32, device=device)
+        with self.assertRaises(IndexError):
+            mesh_selected = mesh[index]
+        index = 1.2
+        with self.assertRaises(IndexError):
+            mesh_selected = mesh[index]
+
+    def test_compute_faces_areas(self):
+        verts = torch.tensor(
+            [
+                [0.0, 0.0, 0.0],
+                [0.5, 0.0, 0.0],
+                [0.5, 0.5, 0.0],
+                [0.5, 0.0, 0.0],
+                [0.25, 0.8, 0.0],
+            ],
+            dtype=torch.float32,
+        )
+        faces = torch.tensor([[0, 1, 2], [0, 3, 4]], dtype=torch.int64)
+        mesh = Meshes(verts=[verts], faces=[faces])
+
+        face_areas = mesh.faces_areas_packed()
+        expected_areas = torch.tensor([0.125, 0.2])
+        self.assertClose(face_areas, expected_areas)
+
+    def test_compute_normals(self):
+
+        # Simple case with one mesh where normals point in either +/- ijk
+        verts = torch.tensor(
+            [
+                [0.1, 0.3, 0.0],
+                [0.5, 0.2, 0.0],
+                [0.6, 0.8, 0.0],
+                [0.0, 0.3, 0.2],
+                [0.0, 0.2, 0.5],
+                [0.0, 0.8, 0.7],
+                [0.5, 0.0, 0.2],
+                [0.6, 0.0, 0.5],
+                [0.8, 0.0, 0.7],
+                [0.0, 0.0, 0.0],
+                [0.0, 0.0, 0.0],
+                [0.0, 0.0, 0.0],
+            ],
+            dtype=torch.float32,
+        )
+        faces = torch.tensor(
+            [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 10, 11]], dtype=torch.int64
+        )
+        mesh = Meshes(verts=[verts], faces=[faces])
+        self.assertFalse(mesh.has_verts_normals())
+        verts_normals_expected = torch.tensor(
+            [
+                [0.0, 0.0, 1.0],
+                [0.0, 0.0, 1.0],
+                [0.0, 0.0, 1.0],
+                [-1.0, 0.0, 0.0],
+                [-1.0, 0.0, 0.0],
+                [-1.0, 0.0, 0.0],
+                [0.0, 1.0, 0.0],
+                [0.0, 1.0, 0.0],
+                [0.0, 1.0, 0.0],
+                [0.0, 0.0, 0.0],
+                [0.0, 0.0, 0.0],
+                [0.0, 0.0, 0.0],
+            ]
+        )
+        faces_normals_expected = verts_normals_expected[[0, 3, 6, 9], :]
+
+        self.assertTrue(
+            torch.allclose(mesh.verts_normals_list()[0], verts_normals_expected)
+        )
+        self.assertTrue(mesh.has_verts_normals())
+        self.assertTrue(
+            torch.allclose(mesh.faces_normals_list()[0], faces_normals_expected)
+        )
+        self.assertTrue(
+            torch.allclose(mesh.verts_normals_packed(), verts_normals_expected)
+        )
+        self.assertTrue(
+            torch.allclose(mesh.faces_normals_packed(), faces_normals_expected)
+        )
+
+        # Multiple meshes in the batch with equal sized meshes
+        meshes_extended = mesh.extend(3)
+        for m in meshes_extended.verts_normals_list():
+            self.assertClose(m, verts_normals_expected)
+        for f in meshes_extended.faces_normals_list():
+            self.assertClose(f, faces_normals_expected)
+
+        # Multiple meshes in the batch with different sized meshes
+        # Check padded and packed normals are the correct sizes.
+        verts2 = torch.tensor(
+            [
+                [0.1, 0.3, 0.0],
+                [0.5, 0.2, 0.0],
+                [0.6, 0.8, 0.0],
+                [0.0, 0.3, 0.2],
+                [0.0, 0.2, 0.5],
+                [0.0, 0.8, 0.7],
+            ],
+            dtype=torch.float32,
+        )
+        faces2 = torch.tensor([[0, 1, 2], [3, 4, 5]], dtype=torch.int64)
+        verts_list = [verts, verts2]
+        faces_list = [faces, faces2]
+        meshes = Meshes(verts=verts_list, faces=faces_list)
+        verts_normals_padded = meshes.verts_normals_padded()
+        faces_normals_padded = meshes.faces_normals_padded()
+
+        for n in range(len(meshes)):
+            v = verts_list[n].shape[0]
+            f = faces_list[n].shape[0]
+            if verts_normals_padded.shape[1] > v:
+                self.assertTrue(verts_normals_padded[n, v:, :].eq(0).all())
+                self.assertTrue(
+                    torch.allclose(
+                        verts_normals_padded[n, :v, :].view(-1, 3),
+                        verts_normals_expected[:v, :],
+                    )
+                )
+            if faces_normals_padded.shape[1] > f:
+                self.assertTrue(faces_normals_padded[n, f:, :].eq(0).all())
+                self.assertTrue(
+                    torch.allclose(
+                        faces_normals_padded[n, :f, :].view(-1, 3),
+                        faces_normals_expected[:f, :],
+                    )
+                )
+
+        verts_normals_packed = meshes.verts_normals_packed()
+        faces_normals_packed = meshes.faces_normals_packed()
+        self.assertTrue(
+            list(verts_normals_packed.shape) == [verts.shape[0] + verts2.shape[0], 3]
+        )
+        self.assertTrue(
+            list(faces_normals_packed.shape) == [faces.shape[0] + faces2.shape[0], 3]
+        )
+
+        # Single mesh where two faces share one vertex so the normal is
+        # the weighted sum of the two face normals.
+        verts = torch.tensor(
+            [
+                [0.1, 0.3, 0.0],
+                [0.5, 0.2, 0.0],
+                [0.0, 0.3, 0.2],  # vertex is shared between two faces
+                [0.0, 0.2, 0.5],
+                [0.0, 0.8, 0.7],
+            ],
+            dtype=torch.float32,
+        )
+        faces = torch.tensor([[0, 1, 2], [2, 3, 4]], dtype=torch.int64)
+        mesh = Meshes(verts=[verts], faces=[faces])
+
+        verts_normals_expected = torch.tensor(
+            [
+                [-0.2408, -0.9631, -0.1204],
+                [-0.2408, -0.9631, -0.1204],
+                [-0.9389, -0.3414, -0.0427],
+                [-1.0000, 0.0000, 0.0000],
+                [-1.0000, 0.0000, 0.0000],
+            ]
+        )
+        faces_normals_expected = torch.tensor(
+            [[-0.2408, -0.9631, -0.1204], [-1.0000, 0.0000, 0.0000]]
+        )
+        self.assertTrue(
+            torch.allclose(
+                mesh.verts_normals_list()[0], verts_normals_expected, atol=4e-5
+            )
+        )
+        self.assertTrue(
+            torch.allclose(
+                mesh.faces_normals_list()[0], faces_normals_expected, atol=4e-5
+            )
+        )
+
+        # Check empty mesh has empty normals
+        meshes = Meshes(verts=[], faces=[])
+        self.assertEqual(meshes.verts_normals_packed().shape[0], 0)
+        self.assertEqual(meshes.verts_normals_padded().shape[0], 0)
+        self.assertEqual(meshes.verts_normals_list(), [])
+        self.assertEqual(meshes.faces_normals_packed().shape[0], 0)
+        self.assertEqual(meshes.faces_normals_padded().shape[0], 0)
+        self.assertEqual(meshes.faces_normals_list(), [])
+
+    def test_assigned_normals(self):
+        verts = torch.rand(2, 6, 3)
+        faces = torch.randint(6, size=(2, 4, 3))
+        no_normals = Meshes(verts=verts, faces=faces)
+        self.assertFalse(no_normals.has_verts_normals())
+
+        for verts_normals in [list(verts.unbind(0)), verts]:
+            yes_normals = Meshes(
+                verts=verts.clone(), faces=faces, verts_normals=verts_normals
+            )
+            self.assertTrue(yes_normals.has_verts_normals())
+            self.assertClose(yes_normals.verts_normals_padded(), verts)
+            yes_normals.offset_verts_(torch.FloatTensor([1, 2, 3]))
+            self.assertClose(yes_normals.verts_normals_padded(), verts)
+            yes_normals.offset_verts_(torch.FloatTensor([1, 2, 3]).expand(12, 3))
+            self.assertFalse(torch.allclose(yes_normals.verts_normals_padded(), verts))
+
+    def test_submeshes(self):
+        empty_mesh = Meshes([], [])
+        # Four cubes with offsets [0, 1, 2, 3].
+        cubes = init_cube_meshes()
+
+        # Extracting an empty submesh from an empty mesh is allowed, but extracting
+        # a nonempty submesh from an empty mesh should result in a value error.
+        self.assertTrue(mesh_structures_equal(empty_mesh.submeshes([]), empty_mesh))
+        self.assertTrue(
+            mesh_structures_equal(cubes.submeshes([[], [], [], []]), empty_mesh)
+        )
+
+        with self.assertRaisesRegex(
+            ValueError, "You must specify exactly one set of submeshes"
+        ):
+            empty_mesh.submeshes([torch.LongTensor([0])])
+
+        # Check that we can chop the cube up into its facets.
+        subcubes = to_sorted(
+            cubes.submeshes(
+                [  # Do not submesh cube#1.
+                    [],
+                    # Submesh the front face and the top-and-bottom of cube#2.
+                    [
+                        torch.LongTensor([0, 1]),
+                        torch.LongTensor([2, 3, 4, 5]),
+                    ],
+                    # Do not submesh cube#3.
+                    [],
+                    # Submesh the whole cube#4 (clone it).
+                    [torch.LongTensor(list(range(12)))],
+                ]
+            )
+        )
+
+        # The cube should've been chopped into three submeshes.
+        self.assertEqual(len(subcubes), 3)
+
+        # The first submesh should be a single facet of cube#2.
+        front_facet = to_sorted(
+            Meshes(
+                verts=torch.FloatTensor([[[0, 0, 0], [1, 0, 0], [1, 1, 0], [0, 1, 0]]])
+                + 1,
+                faces=torch.LongTensor([[[0, 2, 1], [0, 3, 2]]]),
+            )
+        )
+        self.assertTrue(mesh_structures_equal(front_facet, subcubes[0]))
+
+        # The second submesh should be the top and bottom facets of cube#2.
+        top_and_bottom = Meshes(
+            verts=torch.FloatTensor(
+                [[[1, 0, 0], [1, 1, 0], [0, 1, 0], [0, 1, 1], [1, 1, 1], [1, 0, 1]]]
+            )
+            + 1,
+            faces=torch.LongTensor([[[1, 2, 3], [1, 3, 4], [0, 1, 4], [0, 4, 5]]]),
+        )
+        self.assertTrue(mesh_structures_equal(to_sorted(top_and_bottom), subcubes[1]))
+
+        # The last submesh should be all of cube#3.
+        self.assertTrue(mesh_structures_equal(to_sorted(cubes[3]), subcubes[2]))
+
+        # Test alternative input parameterization: list of LongTensors.
+        two_facets = torch.LongTensor([[0, 1], [4, 5]])
+        subcubes = to_sorted(cubes.submeshes([two_facets, [], two_facets, []]))
+        expected_verts = torch.FloatTensor(
+            [
+                [[0, 0, 0], [0, 1, 0], [1, 0, 0], [1, 1, 0]],
+                [[1, 0, 0], [1, 0, 1], [1, 1, 0], [1, 1, 1]],
+                [[2, 2, 2], [2, 3, 2], [3, 2, 2], [3, 3, 2]],
+                [[3, 2, 2], [3, 2, 3], [3, 3, 2], [3, 3, 3]],
+            ]
+        )
+        expected_faces = torch.LongTensor(
+            [
+                [[0, 3, 2], [0, 1, 3]],
+                [[0, 2, 3], [0, 3, 1]],
+                [[0, 3, 2], [0, 1, 3]],
+                [[0, 2, 3], [0, 3, 1]],
+            ]
+        )
+        expected_meshes = Meshes(verts=expected_verts, faces=expected_faces)
+        self.assertTrue(mesh_structures_equal(subcubes, expected_meshes))
+
+        # Test alternative input parameterization: a single LongTensor.
+        triangle_per_mesh = torch.LongTensor([[[0]], [[1]], [[4]], [[5]]])
+        subcubes = to_sorted(cubes.submeshes(triangle_per_mesh))
+        expected_verts = torch.FloatTensor(
+            [
+                [[0, 0, 0], [1, 0, 0], [1, 1, 0]],
+                [[1, 1, 1], [1, 2, 1], [2, 2, 1]],
+                [[3, 2, 2], [3, 3, 2], [3, 3, 3]],
+                [[4, 3, 3], [4, 3, 4], [4, 4, 4]],
+            ]
+        )
+        expected_faces = torch.LongTensor(
+            [[[0, 2, 1]], [[0, 1, 2]], [[0, 1, 2]], [[0, 2, 1]]]
+        )
+        expected_meshes = Meshes(verts=expected_verts, faces=expected_faces)
+        self.assertTrue(mesh_structures_equal(subcubes, expected_meshes))
+
+    def test_compute_faces_areas_cpu_cuda(self):
+        num_meshes = 10
+        max_v = 100
+        max_f = 300
+        mesh_cpu = init_mesh(num_meshes, max_v, max_f, device="cpu")
+        device = torch.device("cuda:0")
+        mesh_cuda = mesh_cpu.to(device)
+
+        face_areas_cpu = mesh_cpu.faces_areas_packed()
+        face_normals_cpu = mesh_cpu.faces_normals_packed()
+        face_areas_cuda = mesh_cuda.faces_areas_packed()
+        face_normals_cuda = mesh_cuda.faces_normals_packed()
+        self.assertClose(face_areas_cpu, face_areas_cuda.cpu(), atol=1e-6)
+        # because of the normalization of the normals with arbitrarily small values,
+        # normals can become unstable. Thus only compare normals, for faces
+        # with areas > eps=1e-6
+        nonzero = face_areas_cpu > 1e-6
+        self.assertClose(
+            face_normals_cpu[nonzero], face_normals_cuda.cpu()[nonzero], atol=1e-6
+        )
+
+    def test_equality(self):
+        meshes1 = init_mesh(num_meshes=2)
+        meshes2 = init_mesh(num_meshes=2)
+        meshes3 = init_mesh(num_meshes=3)
+        empty_mesh = Meshes([], [])
+        self.assertTrue(mesh_structures_equal(empty_mesh, Meshes([], [])))
+        self.assertTrue(mesh_structures_equal(meshes1, meshes1))
+        self.assertTrue(mesh_structures_equal(meshes1, meshes1.clone()))
+        self.assertFalse(mesh_structures_equal(empty_mesh, meshes1))
+        self.assertFalse(mesh_structures_equal(meshes1, meshes2))
+        self.assertFalse(mesh_structures_equal(meshes1, meshes3))
+
+    def test_to_sorted(self):
+        mesh = init_simple_mesh()
+        sorted_mesh = to_sorted(mesh)
+
+        expected_verts = [
+            torch.tensor(
+                [[0.1, 0.3, 0.5], [0.5, 0.2, 0.1], [0.6, 0.8, 0.7]],
+                dtype=torch.float32,
+            ),
+            torch.tensor(
+                # Vertex permutation: 0->0, 1->3, 2->2, 3->1
+                [[0.1, 0.3, 0.3], [0.1, 0.5, 0.3], [0.2, 0.3, 0.4], [0.6, 0.7, 0.8]],
+                dtype=torch.float32,
+            ),
+            torch.tensor(
+                # Vertex permutation: 0->2, 1->1, 2->4, 3->0, 4->3
+                [
+                    [0.2, 0.3, 0.4],
+                    [0.2, 0.4, 0.8],
+                    [0.7, 0.3, 0.6],
+                    [0.9, 0.3, 0.8],
+                    [0.9, 0.5, 0.2],
+                ],
+                dtype=torch.float32,
+            ),
+        ]
+
+        expected_faces = [
+            torch.tensor([[0, 1, 2]], dtype=torch.int64),
+            torch.tensor([[0, 3, 2], [3, 2, 1]], dtype=torch.int64),
+            torch.tensor(
+                [
+                    [1, 4, 2],
+                    [2, 1, 0],
+                    [4, 0, 1],
+                    [3, 0, 4],
+                    [3, 2, 1],
+                    [3, 0, 1],
+                    [3, 4, 1],
+                ],
+                dtype=torch.int64,
+            ),
+        ]
+
+        self.assertFalse(mesh_structures_equal(mesh, sorted_mesh))
+        self.assertTrue(
+            mesh_structures_equal(
+                Meshes(verts=expected_verts, faces=expected_faces), sorted_mesh
+            )
+        )
+
+    @staticmethod
+    def compute_packed_with_init(
+        num_meshes: int = 10, max_v: int = 100, max_f: int = 300, device: str = "cpu"
+    ):
+        mesh = init_mesh(num_meshes, max_v, max_f, device=device)
+        torch.cuda.synchronize()
+
+        def compute_packed():
+            mesh._compute_packed(refresh=True)
+            torch.cuda.synchronize()
+
+        return compute_packed
+
+    @staticmethod
+    def compute_padded_with_init(
+        num_meshes: int = 10, max_v: int = 100, max_f: int = 300, device: str = "cpu"
+    ):
+        mesh = init_mesh(num_meshes, max_v, max_f, device=device)
+        torch.cuda.synchronize()
+
+        def compute_padded():
+            mesh._compute_padded(refresh=True)
+            torch.cuda.synchronize()
+
+        return compute_padded
diff --git a/pytorch3d/tests/test_opengl_utils.py b/pytorch3d/tests/test_opengl_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..ab5a9b786d09448d8adf5700814776abd4976f00
--- /dev/null
+++ b/pytorch3d/tests/test_opengl_utils.py
@@ -0,0 +1,391 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import ctypes
+import os
+import sys
+import threading
+import unittest
+
+import torch
+
+os.environ["PYOPENGL_PLATFORM"] = "egl"
+import pycuda._driver  # noqa
+from OpenGL import GL as gl  # noqa
+from OpenGL.raw.EGL._errors import EGLError  # noqa
+from pytorch3d.renderer.opengl import _can_import_egl_and_pycuda  # noqa
+from pytorch3d.renderer.opengl.opengl_utils import (  # noqa
+    _define_egl_extension,
+    _egl_convert_to_int_array,
+    _get_cuda_device,
+    egl,
+    EGLContext,
+    global_device_context_store,
+)
+
+from .common_testing import TestCaseMixin  # noqa
+
+MAX_EGL_HEIGHT = global_device_context_store.max_egl_height
+MAX_EGL_WIDTH = global_device_context_store.max_egl_width
+
+
+def _draw_square(r=1.0, g=0.0, b=1.0, **kwargs) -> torch.Tensor:
+    gl.glClear(gl.GL_COLOR_BUFFER_BIT)
+    gl.glColor3f(r, g, b)
+    x1, x2 = -0.5, 0.5
+    y1, y2 = -0.5, 0.5
+    gl.glRectf(x1, y1, x2, y2)
+    out_buffer = gl.glReadPixels(
+        0, 0, MAX_EGL_WIDTH, MAX_EGL_HEIGHT, gl.GL_RGB, gl.GL_UNSIGNED_BYTE
+    )
+    image = torch.frombuffer(out_buffer, dtype=torch.uint8).reshape(
+        MAX_EGL_HEIGHT, MAX_EGL_WIDTH, 3
+    )
+    return image
+
+
+def _draw_squares_with_context(
+    cuda_device_id=0, result=None, thread_id=None, **kwargs
+) -> None:
+    context = EGLContext(MAX_EGL_WIDTH, MAX_EGL_HEIGHT, cuda_device_id)
+    with context.active_and_locked():
+        images = []
+        for _ in range(3):
+            images.append(_draw_square(**kwargs).float())
+        if result is not None and thread_id is not None:
+            egl_info = context.get_context_info()
+            data = {"egl": egl_info, "images": images}
+            result[thread_id] = data
+
+
+def _draw_squares_with_context_store(
+    cuda_device_id=0,
+    result=None,
+    thread_id=None,
+    verbose=False,
+    **kwargs,
+) -> None:
+    device = torch.device(f"cuda:{cuda_device_id}")
+    context = global_device_context_store.get_egl_context(device)
+    if verbose:
+        print(f"In thread {thread_id}, device {cuda_device_id}.")
+    with context.active_and_locked():
+        images = []
+        for _ in range(3):
+            images.append(_draw_square(**kwargs).float())
+        if result is not None and thread_id is not None:
+            egl_info = context.get_context_info()
+            data = {"egl": egl_info, "images": images}
+            result[thread_id] = data
+
+
+class TestDeviceContextStore(TestCaseMixin, unittest.TestCase):
+    def test_cuda_context(self):
+        cuda_context_1 = global_device_context_store.get_cuda_context(
+            device=torch.device("cuda:0")
+        )
+        cuda_context_2 = global_device_context_store.get_cuda_context(
+            device=torch.device("cuda:0")
+        )
+        cuda_context_3 = global_device_context_store.get_cuda_context(
+            device=torch.device("cuda:1")
+        )
+        cuda_context_4 = global_device_context_store.get_cuda_context(
+            device=torch.device("cuda:1")
+        )
+        self.assertIs(cuda_context_1, cuda_context_2)
+        self.assertIs(cuda_context_3, cuda_context_4)
+        self.assertIsNot(cuda_context_1, cuda_context_3)
+
+    def test_egl_context(self):
+        egl_context_1 = global_device_context_store.get_egl_context(
+            torch.device("cuda:0")
+        )
+        egl_context_2 = global_device_context_store.get_egl_context(
+            torch.device("cuda:0")
+        )
+        egl_context_3 = global_device_context_store.get_egl_context(
+            torch.device("cuda:1")
+        )
+        egl_context_4 = global_device_context_store.get_egl_context(
+            torch.device("cuda:1")
+        )
+        self.assertIs(egl_context_1, egl_context_2)
+        self.assertIs(egl_context_3, egl_context_4)
+        self.assertIsNot(egl_context_1, egl_context_3)
+
+
+class TestUtils(TestCaseMixin, unittest.TestCase):
+    def test_load_extensions(self):
+        # This should work
+        _define_egl_extension("eglGetPlatformDisplayEXT", egl.EGLDisplay)
+
+        # And this shouldn't (wrong extension)
+        with self.assertRaisesRegex(RuntimeError, "Cannot find EGL extension"):
+            _define_egl_extension("eglFakeExtensionEXT", egl.EGLBoolean)
+
+    def test_get_cuda_device(self):
+        # This should work
+        device = _get_cuda_device(0)
+        self.assertIsNotNone(device)
+
+        with self.assertRaisesRegex(ValueError, "Device 10000 not available"):
+            _get_cuda_device(10000)
+
+    def test_egl_convert_to_int_array(self):
+        egl_attributes = {egl.EGL_RED_SIZE: 8}
+        attribute_array = _egl_convert_to_int_array(egl_attributes)
+        self.assertEqual(attribute_array._type_, ctypes.c_int)
+        self.assertEqual(attribute_array._length_, 3)
+        self.assertEqual(attribute_array[0], egl.EGL_RED_SIZE)
+        self.assertEqual(attribute_array[1], 8)
+        self.assertEqual(attribute_array[2], egl.EGL_NONE)
+
+
+class TestOpenGLSingleThreaded(TestCaseMixin, unittest.TestCase):
+    def test_draw_square(self):
+        context = EGLContext(width=MAX_EGL_WIDTH, height=MAX_EGL_HEIGHT)
+        with context.active_and_locked():
+            rendering_result = _draw_square().float()
+            expected_result = torch.zeros(
+                (MAX_EGL_WIDTH, MAX_EGL_HEIGHT, 3), dtype=torch.float
+            )
+            start_px = int(MAX_EGL_WIDTH / 4)
+            end_px = int(MAX_EGL_WIDTH * 3 / 4)
+            expected_result[start_px:end_px, start_px:end_px, 0] = 255.0
+            expected_result[start_px:end_px, start_px:end_px, 2] = 255.0
+
+        self.assertTrue(torch.all(expected_result == rendering_result))
+
+    def test_render_two_squares(self):
+        # Check that drawing twice doesn't overwrite the initial buffer.
+        context = EGLContext(width=MAX_EGL_WIDTH, height=MAX_EGL_HEIGHT)
+        with context.active_and_locked():
+            red_square = _draw_square(r=1.0, g=0.0, b=0.0)
+            blue_square = _draw_square(r=0.0, g=0.0, b=1.0)
+
+        start_px = int(MAX_EGL_WIDTH / 4)
+        end_px = int(MAX_EGL_WIDTH * 3 / 4)
+
+        self.assertTrue(
+            torch.all(
+                red_square[start_px:end_px, start_px:end_px]
+                == torch.tensor([255, 0, 0])
+            )
+        )
+        self.assertTrue(
+            torch.all(
+                blue_square[start_px:end_px, start_px:end_px]
+                == torch.tensor([0, 0, 255])
+            )
+        )
+
+
+class TestOpenGLMultiThreaded(TestCaseMixin, unittest.TestCase):
+    def test_multiple_renders_single_gpu_single_context(self):
+        _draw_squares_with_context()
+
+    def test_multiple_renders_single_gpu_context_store(self):
+        _draw_squares_with_context_store()
+
+    def test_render_two_threads_single_gpu(self):
+        self._render_two_threads_single_gpu(_draw_squares_with_context)
+
+    def test_render_two_threads_single_gpu_context_store(self):
+        self._render_two_threads_single_gpu(_draw_squares_with_context_store)
+
+    def test_render_two_threads_two_gpus(self):
+        self._render_two_threads_two_gpus(_draw_squares_with_context)
+
+    def test_render_two_threads_two_gpus_context_store(self):
+        self._render_two_threads_two_gpus(_draw_squares_with_context_store)
+
+    def _render_two_threads_single_gpu(self, draw_fn):
+        result = [None] * 2
+        thread1 = threading.Thread(
+            target=draw_fn,
+            kwargs={
+                "cuda_device_id": 0,
+                "result": result,
+                "thread_id": 0,
+                "r": 1.0,
+                "g": 0.0,
+                "b": 0.0,
+            },
+        )
+        thread2 = threading.Thread(
+            target=draw_fn,
+            kwargs={
+                "cuda_device_id": 0,
+                "result": result,
+                "thread_id": 1,
+                "r": 0.0,
+                "g": 1.0,
+                "b": 0.0,
+            },
+        )
+
+        thread1.start()
+        thread2.start()
+        thread1.join()
+        thread2.join()
+
+        start_px = int(MAX_EGL_WIDTH / 4)
+        end_px = int(MAX_EGL_WIDTH * 3 / 4)
+        red_squares = torch.stack(result[0]["images"], dim=0)[
+            :, start_px:end_px, start_px:end_px
+        ]
+        green_squares = torch.stack(result[1]["images"], dim=0)[
+            :, start_px:end_px, start_px:end_px
+        ]
+        self.assertTrue(torch.all(red_squares == torch.tensor([255.0, 0.0, 0.0])))
+        self.assertTrue(torch.all(green_squares == torch.tensor([0.0, 255.0, 0.0])))
+
+    def _render_two_threads_two_gpus(self, draw_fn):
+        # Contrary to _render_two_threads_two_gpus, this renders in two separate threads
+        # but on a different GPU each. This means using different EGL contexts and is a
+        # much less risky endeavour.
+        result = [None] * 2
+        thread1 = threading.Thread(
+            target=draw_fn,
+            kwargs={
+                "cuda_device_id": 0,
+                "result": result,
+                "thread_id": 0,
+                "r": 1.0,
+                "g": 0.0,
+                "b": 0.0,
+            },
+        )
+        thread2 = threading.Thread(
+            target=draw_fn,
+            kwargs={
+                "cuda_device_id": 1,
+                "result": result,
+                "thread_id": 1,
+                "r": 0.0,
+                "g": 1.0,
+                "b": 0.0,
+            },
+        )
+        thread1.start()
+        thread2.start()
+        thread1.join()
+        thread2.join()
+        self.assertNotEqual(
+            result[0]["egl"]["context"].address, result[1]["egl"]["context"].address
+        )
+
+        start_px = int(MAX_EGL_WIDTH / 4)
+        end_px = int(MAX_EGL_WIDTH * 3 / 4)
+        red_squares = torch.stack(result[0]["images"], dim=0)[
+            :, start_px:end_px, start_px:end_px
+        ]
+        green_squares = torch.stack(result[1]["images"], dim=0)[
+            :, start_px:end_px, start_px:end_px
+        ]
+        self.assertTrue(torch.all(red_squares == torch.tensor([255.0, 0.0, 0.0])))
+        self.assertTrue(torch.all(green_squares == torch.tensor([0.0, 255.0, 0.0])))
+
+    def test_render_multi_thread_multi_gpu(self):
+        # Multiple threads using up multiple GPUs; more threads than GPUs.
+        # This is certainly not encouraged in practice, but shouldn't fail. Note that
+        # the context store will only allow one rendering at a time to occur on a
+        # single GPU, even across threads.
+        n_gpus = torch.cuda.device_count()
+        n_threads = 10
+        kwargs = {
+            "r": 1.0,
+            "g": 0.0,
+            "b": 0.0,
+            "verbose": True,
+        }
+
+        threads = []
+        for thread_id in range(n_threads):
+            kwargs.update(
+                {"cuda_device_id": thread_id % n_gpus, "thread_id": thread_id}
+            )
+            threads.append(
+                threading.Thread(
+                    target=_draw_squares_with_context_store, kwargs=dict(kwargs)
+                )
+            )
+
+        for thread in threads:
+            thread.start()
+        for thread in threads:
+            thread.join()
+
+
+class TestOpenGLUtils(TestCaseMixin, unittest.TestCase):
+    @classmethod
+    def tearDownClass(cls):
+        global_device_context_store.set_context_data(torch.device("cuda:0"), None)
+
+    def test_device_context_store(self):
+        # Most of DCS's functionality is tested in the tests above, test the remainder.
+        device = torch.device("cuda:0")
+        global_device_context_store.set_context_data(device, 123)
+
+        self.assertEqual(global_device_context_store.get_context_data(device), 123)
+
+        self.assertEqual(
+            global_device_context_store.get_context_data(torch.device("cuda:1")), None
+        )
+
+        # Check that contexts in store can be manually released (although that's a very
+        # bad idea! Don't do it manually!)
+        egl_ctx = global_device_context_store.get_egl_context(device)
+        cuda_ctx = global_device_context_store.get_cuda_context(device)
+        egl_ctx.release()
+        cuda_ctx.detach()
+
+        # Reset the contexts (just for testing! never do this manually!). Then, check
+        # that first running DeviceContextStore.release() will cause subsequent releases
+        # to fail (because we already released all the contexts).
+        global_device_context_store._cuda_contexts = {}
+        global_device_context_store._egl_contexts = {}
+
+        egl_ctx = global_device_context_store.get_egl_context(device)
+        cuda_ctx = global_device_context_store.get_cuda_context(device)
+        global_device_context_store.release()
+        with self.assertRaisesRegex(EGLError, "EGL_NOT_INITIALIZED"):
+            egl_ctx.release()
+        with self.assertRaisesRegex(pycuda._driver.LogicError, "cannot detach"):
+            cuda_ctx.detach()
+
+    def test_no_egl_error(self):
+        # Remove EGL, import OpenGL with the wrong backend. This should make it
+        # impossible to import OpenGL.EGL.
+        del os.environ["PYOPENGL_PLATFORM"]
+        modules = list(sys.modules)
+        for m in modules:
+            if "OpenGL" in m:
+                del sys.modules[m]
+        import OpenGL.GL  # noqa
+
+        self.assertFalse(_can_import_egl_and_pycuda())
+
+        # Import OpenGL back with the right backend. This should get things on track.
+        modules = list(sys.modules)
+        for m in modules:
+            if "OpenGL" in m:
+                del sys.modules[m]
+
+        os.environ["PYOPENGL_PLATFORM"] = "egl"
+        self.assertTrue(_can_import_egl_and_pycuda())
+
+    def test_egl_release_error(self):
+        # Creating two contexts on the same device will lead to trouble (that's one of
+        # the reasons behind DeviceContextStore). You can release one of them,
+        # but you cannot release the same EGL resources twice!
+        ctx1 = EGLContext(width=100, height=100)
+        ctx2 = EGLContext(width=100, height=100)
+
+        ctx1.release()
+        with self.assertRaisesRegex(EGLError, "EGL_NOT_INITIALIZED"):
+            ctx2.release()
diff --git a/pytorch3d/tests/test_ops_utils.py b/pytorch3d/tests/test_ops_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..81de07f5a73bacf7b3b5d443f94f2f3adf14dd60
--- /dev/null
+++ b/pytorch3d/tests/test_ops_utils.py
@@ -0,0 +1,89 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+
+import numpy as np
+import torch
+from pytorch3d.ops import utils as oputil
+
+from .common_testing import TestCaseMixin
+
+
+class TestOpsUtils(TestCaseMixin, unittest.TestCase):
+    def setUp(self) -> None:
+        super().setUp()
+        torch.manual_seed(42)
+        np.random.seed(42)
+
+    def test_wmean(self):
+        device = torch.device("cuda:0")
+        n_points = 20
+
+        x = torch.rand(n_points, 3, device=device)
+        weight = torch.rand(n_points, device=device)
+        x_np = x.cpu().data.numpy()
+        weight_np = weight.cpu().data.numpy()
+
+        # test unweighted
+        mean = oputil.wmean(x, keepdim=False)
+        mean_gt = np.average(x_np, axis=-2)
+        self.assertClose(mean.cpu().data.numpy(), mean_gt)
+
+        # test weighted
+        mean = oputil.wmean(x, weight=weight, keepdim=False)
+        mean_gt = np.average(x_np, axis=-2, weights=weight_np)
+        self.assertClose(mean.cpu().data.numpy(), mean_gt)
+
+        # test keepdim
+        mean = oputil.wmean(x, weight=weight, keepdim=True)
+        self.assertClose(mean[0].cpu().data.numpy(), mean_gt)
+
+        # test binary weigths
+        mean = oputil.wmean(x, weight=weight > 0.5, keepdim=False)
+        mean_gt = np.average(x_np, axis=-2, weights=weight_np > 0.5)
+        self.assertClose(mean.cpu().data.numpy(), mean_gt)
+
+        # test broadcasting
+        x = torch.rand(10, n_points, 3, device=device)
+        x_np = x.cpu().data.numpy()
+        mean = oputil.wmean(x, weight=weight, keepdim=False)
+        mean_gt = np.average(x_np, axis=-2, weights=weight_np)
+        self.assertClose(mean.cpu().data.numpy(), mean_gt)
+
+        weight = weight[None, None, :].repeat(3, 1, 1)
+        mean = oputil.wmean(x, weight=weight, keepdim=False)
+        self.assertClose(mean[0].cpu().data.numpy(), mean_gt)
+
+        # test failing broadcasting
+        weight = torch.rand(x.shape[0], device=device)
+        with self.assertRaises(ValueError) as context:
+            oputil.wmean(x, weight=weight, keepdim=False)
+        self.assertTrue("weights are not compatible" in str(context.exception))
+
+        # test dim
+        weight = torch.rand(x.shape[0], n_points, device=device)
+        weight_np = np.tile(
+            weight[:, :, None].cpu().data.numpy(), (1, 1, x_np.shape[-1])
+        )
+        mean = oputil.wmean(x, dim=0, weight=weight, keepdim=False)
+        mean_gt = np.average(x_np, axis=0, weights=weight_np)
+        self.assertClose(mean.cpu().data.numpy(), mean_gt)
+
+        # test dim tuple
+        mean = oputil.wmean(x, dim=(0, 1), weight=weight, keepdim=False)
+        mean_gt = np.average(x_np, axis=(0, 1), weights=weight_np)
+        self.assertClose(mean.cpu().data.numpy(), mean_gt)
+
+    def test_masked_gather_errors(self):
+        idx = torch.randint(0, 10, size=(5, 10, 4, 2))
+        points = torch.randn(size=(5, 10, 3))
+        with self.assertRaisesRegex(ValueError, "format is not supported"):
+            oputil.masked_gather(points, idx)
+
+        points = torch.randn(size=(2, 10, 3))
+        with self.assertRaisesRegex(ValueError, "same batch dimension"):
+            oputil.masked_gather(points, idx)
diff --git a/pytorch3d/tests/test_packed_to_padded.py b/pytorch3d/tests/test_packed_to_padded.py
new file mode 100644
index 0000000000000000000000000000000000000000..e04c725688a562e718107ffb3b7d885aa74b26d9
--- /dev/null
+++ b/pytorch3d/tests/test_packed_to_padded.py
@@ -0,0 +1,297 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+
+import torch
+from pytorch3d.ops import packed_to_padded, padded_to_packed
+from pytorch3d.structures.meshes import Meshes
+
+from .common_testing import get_random_cuda_device, TestCaseMixin
+
+
+class TestPackedToPadded(TestCaseMixin, unittest.TestCase):
+    def setUp(self) -> None:
+        super().setUp()
+        torch.manual_seed(1)
+
+    @staticmethod
+    def init_meshes(
+        num_meshes: int = 10,
+        num_verts: int = 1000,
+        num_faces: int = 3000,
+        device: str = "cpu",
+    ):
+        device = torch.device(device)
+        verts_list = []
+        faces_list = []
+        for _ in range(num_meshes):
+            verts = torch.rand((num_verts, 3), dtype=torch.float32, device=device)
+            faces = torch.randint(
+                num_verts, size=(num_faces, 3), dtype=torch.int64, device=device
+            )
+            verts_list.append(verts)
+            faces_list.append(faces)
+        meshes = Meshes(verts_list, faces_list)
+
+        return meshes
+
+    @staticmethod
+    def packed_to_padded_python(inputs, first_idxs, max_size, device):
+        """
+        PyTorch implementation of packed_to_padded function.
+        """
+        num_meshes = first_idxs.size(0)
+        if inputs.dim() == 1:
+            inputs_padded = torch.zeros((num_meshes, max_size), device=device)
+        else:
+            inputs_padded = torch.zeros(
+                (num_meshes, max_size, *inputs.shape[1:]), device=device
+            )
+        for m in range(num_meshes):
+            s = first_idxs[m]
+            if m == num_meshes - 1:
+                f = inputs.shape[0]
+            else:
+                f = first_idxs[m + 1]
+            inputs_padded[m, : f - s] = inputs[s:f]
+
+        return inputs_padded
+
+    @staticmethod
+    def padded_to_packed_python(inputs, first_idxs, num_inputs, device):
+        """
+        PyTorch implementation of padded_to_packed function.
+        """
+        num_meshes = inputs.size(0)
+        if inputs.dim() == 2:
+            inputs_packed = torch.zeros((num_inputs,), device=device)
+        else:
+            inputs_packed = torch.zeros((num_inputs, *inputs.shape[2:]), device=device)
+        for m in range(num_meshes):
+            s = first_idxs[m]
+            if m == num_meshes - 1:
+                f = num_inputs
+            else:
+                f = first_idxs[m + 1]
+            inputs_packed[s:f] = inputs[m, : f - s]
+
+        return inputs_packed
+
+    def _test_packed_to_padded_helper(self, dims, device):
+        """
+        Check the results from packed_to_padded and PyTorch implementations
+        are the same.
+        """
+        meshes = self.init_meshes(16, 100, 300, device=device)
+        faces = meshes.faces_packed()
+        mesh_to_faces_packed_first_idx = meshes.mesh_to_faces_packed_first_idx()
+        max_faces = meshes.num_faces_per_mesh().max().item()
+
+        if len(dims) == 0:
+            values = torch.rand((faces.shape[0],), device=device, requires_grad=True)
+        else:
+            values = torch.rand(
+                (faces.shape[0], *dims), device=device, requires_grad=True
+            )
+        values_torch = values.detach().clone()
+        values_torch.requires_grad = True
+        values_padded = packed_to_padded(
+            values, mesh_to_faces_packed_first_idx, max_faces
+        )
+        values_padded_torch = TestPackedToPadded.packed_to_padded_python(
+            values_torch, mesh_to_faces_packed_first_idx, max_faces, device
+        )
+        # check forward
+        self.assertClose(values_padded, values_padded_torch)
+
+        # check backward
+        if len(dims) == 0:
+            grad_inputs = torch.rand((len(meshes), max_faces), device=device)
+        else:
+            grad_inputs = torch.rand((len(meshes), max_faces, *dims), device=device)
+        values_padded.backward(grad_inputs)
+        grad_outputs = values.grad
+        values_padded_torch.backward(grad_inputs)
+        grad_outputs_torch1 = values_torch.grad
+        grad_outputs_torch2 = TestPackedToPadded.padded_to_packed_python(
+            grad_inputs, mesh_to_faces_packed_first_idx, values.size(0), device=device
+        )
+        self.assertClose(grad_outputs, grad_outputs_torch1)
+        self.assertClose(grad_outputs, grad_outputs_torch2)
+
+    def test_packed_to_padded_flat_cpu(self):
+        self._test_packed_to_padded_helper([], "cpu")
+
+    def test_packed_to_padded_D1_cpu(self):
+        self._test_packed_to_padded_helper([1], "cpu")
+
+    def test_packed_to_padded_D16_cpu(self):
+        self._test_packed_to_padded_helper([16], "cpu")
+
+    def test_packed_to_padded_D16_9_cpu(self):
+        self._test_packed_to_padded_helper([16, 9], "cpu")
+
+    def test_packed_to_padded_D16_3_2_cpu(self):
+        self._test_packed_to_padded_helper([16, 3, 2], "cpu")
+
+    def test_packed_to_padded_flat_cuda(self):
+        device = get_random_cuda_device()
+        self._test_packed_to_padded_helper([], device)
+
+    def test_packed_to_padded_D1_cuda(self):
+        device = get_random_cuda_device()
+        self._test_packed_to_padded_helper([1], device)
+
+    def test_packed_to_padded_D16_cuda(self):
+        device = get_random_cuda_device()
+        self._test_packed_to_padded_helper([16], device)
+
+    def test_packed_to_padded_D16_9_cuda(self):
+        device = get_random_cuda_device()
+        self._test_packed_to_padded_helper([16, 9], device)
+
+    def test_packed_to_padded_D16_3_2_cuda(self):
+        device = get_random_cuda_device()
+        self._test_packed_to_padded_helper([16, 3, 2], device)
+
+    def _test_padded_to_packed_helper(self, dims, device):
+        """
+        Check the results from packed_to_padded and PyTorch implementations
+        are the same.
+        """
+        meshes = self.init_meshes(16, 100, 300, device=device)
+        mesh_to_faces_packed_first_idx = meshes.mesh_to_faces_packed_first_idx()
+        num_faces_per_mesh = meshes.num_faces_per_mesh()
+        max_faces = num_faces_per_mesh.max().item()
+        if len(dims) == 0:
+            values = torch.rand((len(meshes), max_faces), device=device)
+        else:
+            values = torch.rand((len(meshes), max_faces, *dims), device=device)
+        for i, num in enumerate(num_faces_per_mesh):
+            values[i, num:] = 0
+        values.requires_grad = True
+        values_torch = values.detach().clone()
+        values_torch.requires_grad = True
+        values_packed = padded_to_packed(
+            values, mesh_to_faces_packed_first_idx, num_faces_per_mesh.sum().item()
+        )
+        values_packed_torch = TestPackedToPadded.padded_to_packed_python(
+            values_torch,
+            mesh_to_faces_packed_first_idx,
+            num_faces_per_mesh.sum().item(),
+            device,
+        )
+        # check forward
+        self.assertClose(values_packed, values_packed_torch)
+
+        if len(dims) > 0:
+            values_packed_dim2 = padded_to_packed(
+                values.transpose(1, 2),
+                mesh_to_faces_packed_first_idx,
+                num_faces_per_mesh.sum().item(),
+                max_size_dim=2,
+            )
+            # check forward
+            self.assertClose(values_packed_dim2, values_packed_torch)
+
+        # check backward
+        if len(dims) == 0:
+            grad_inputs = torch.rand((num_faces_per_mesh.sum().item()), device=device)
+        else:
+            grad_inputs = torch.rand(
+                (num_faces_per_mesh.sum().item(), *dims), device=device
+            )
+        values_packed.backward(grad_inputs)
+        grad_outputs = values.grad
+        values_packed_torch.backward(grad_inputs)
+        grad_outputs_torch1 = values_torch.grad
+        grad_outputs_torch2 = TestPackedToPadded.packed_to_padded_python(
+            grad_inputs, mesh_to_faces_packed_first_idx, values.size(1), device=device
+        )
+        self.assertClose(grad_outputs, grad_outputs_torch1)
+        self.assertClose(grad_outputs, grad_outputs_torch2)
+
+    def test_padded_to_packed_flat_cpu(self):
+        self._test_padded_to_packed_helper([], "cpu")
+
+    def test_padded_to_packed_D1_cpu(self):
+        self._test_padded_to_packed_helper([1], "cpu")
+
+    def test_padded_to_packed_D16_cpu(self):
+        self._test_padded_to_packed_helper([16], "cpu")
+
+    def test_padded_to_packed_D16_9_cpu(self):
+        self._test_padded_to_packed_helper([16, 9], "cpu")
+
+    def test_padded_to_packed_D16_3_2_cpu(self):
+        self._test_padded_to_packed_helper([16, 3, 2], "cpu")
+
+    def test_padded_to_packed_flat_cuda(self):
+        device = get_random_cuda_device()
+        self._test_padded_to_packed_helper([], device)
+
+    def test_padded_to_packed_D1_cuda(self):
+        device = get_random_cuda_device()
+        self._test_padded_to_packed_helper([1], device)
+
+    def test_padded_to_packed_D16_cuda(self):
+        device = get_random_cuda_device()
+        self._test_padded_to_packed_helper([16], device)
+
+    def test_padded_to_packed_D16_9_cuda(self):
+        device = get_random_cuda_device()
+        self._test_padded_to_packed_helper([16, 9], device)
+
+    def test_padded_to_packed_D16_3_2_cuda(self):
+        device = get_random_cuda_device()
+        self._test_padded_to_packed_helper([16, 3, 2], device)
+
+    @staticmethod
+    def packed_to_padded_with_init(
+        num_meshes: int, num_verts: int, num_faces: int, num_d: int, device: str = "cpu"
+    ):
+        meshes = TestPackedToPadded.init_meshes(
+            num_meshes, num_verts, num_faces, device
+        )
+        faces = meshes.faces_packed()
+        mesh_to_faces_packed_first_idx = meshes.mesh_to_faces_packed_first_idx()
+        max_faces = meshes.num_faces_per_mesh().max().item()
+        if num_d == 0:
+            values = torch.rand((faces.shape[0],), device=meshes.device)
+        else:
+            values = torch.rand((faces.shape[0], num_d), device=meshes.device)
+        torch.cuda.synchronize()
+
+        def out():
+            packed_to_padded(values, mesh_to_faces_packed_first_idx, max_faces)
+            torch.cuda.synchronize()
+
+        return out
+
+    @staticmethod
+    def packed_to_padded_with_init_torch(
+        num_meshes: int, num_verts: int, num_faces: int, num_d: int, device: str = "cpu"
+    ):
+        meshes = TestPackedToPadded.init_meshes(
+            num_meshes, num_verts, num_faces, device
+        )
+        faces = meshes.faces_packed()
+        mesh_to_faces_packed_first_idx = meshes.mesh_to_faces_packed_first_idx()
+        max_faces = meshes.num_faces_per_mesh().max().item()
+        if num_d == 0:
+            values = torch.rand((faces.shape[0],), device=meshes.device)
+        else:
+            values = torch.rand((faces.shape[0], num_d), device=meshes.device)
+        torch.cuda.synchronize()
+
+        def out():
+            TestPackedToPadded.packed_to_padded_python(
+                values, mesh_to_faces_packed_first_idx, max_faces, device
+            )
+            torch.cuda.synchronize()
+
+        return out
diff --git a/pytorch3d/tests/test_perspective_n_points.py b/pytorch3d/tests/test_perspective_n_points.py
new file mode 100644
index 0000000000000000000000000000000000000000..e5c8f57e85030cee0a1cd8b16a30c2f867bfd924
--- /dev/null
+++ b/pytorch3d/tests/test_perspective_n_points.py
@@ -0,0 +1,201 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+
+import torch
+from pytorch3d.ops import perspective_n_points
+from pytorch3d.transforms import rotation_conversions
+
+from .common_testing import TestCaseMixin
+
+
+def reproj_error(x_world, y, R, T, weight=None):
+    # applies the affine transform, projects, and computes the reprojection error
+    y_hat = torch.matmul(x_world, R) + T[:, None, :]
+    y_hat = y_hat / y_hat[..., 2:]
+    if weight is None:
+        weight = y.new_ones((1, 1))
+    return (((weight[:, :, None] * (y - y_hat[..., :2])) ** 2).sum(dim=-1) ** 0.5).mean(
+        dim=-1
+    )
+
+
+class TestPerspectiveNPoints(TestCaseMixin, unittest.TestCase):
+    def setUp(self) -> None:
+        super().setUp()
+        torch.manual_seed(42)
+
+    @classmethod
+    def _generate_epnp_test_from_2d(cls, y):
+        """
+        Instantiate random x_world, x_cam, R, T given a set of input
+        2D projections y.
+        """
+        batch_size = y.shape[0]
+        x_cam = torch.cat((y, torch.rand_like(y[:, :, :1]) * 2.0 + 3.5), dim=2)
+        x_cam[:, :, :2] *= x_cam[:, :, 2:]  # unproject
+        R = rotation_conversions.random_rotations(batch_size).to(y)
+        T = torch.randn_like(R[:, :1, :])
+        T[:, :, 2] = (T[:, :, 2] + 3.0).clamp(2.0)
+        x_world = torch.matmul(x_cam - T, R.transpose(1, 2))
+        return x_cam, x_world, R, T
+
+    def _run_and_print(self, x_world, y, R, T, print_stats, skip_q, check_output=False):
+        sol = perspective_n_points.efficient_pnp(
+            x_world, y.expand_as(x_world[:, :, :2]), skip_quadratic_eq=skip_q
+        )
+
+        err_2d = reproj_error(x_world, y, sol.R, sol.T)
+        R_est_quat = rotation_conversions.matrix_to_quaternion(sol.R)
+        R_quat = rotation_conversions.matrix_to_quaternion(R)
+
+        num_pts = x_world.shape[-2]
+        if check_output:
+            assert_msg = (
+                f"test_perspective_n_points assertion failure for "
+                f"n_points={num_pts}, "
+                f"skip_quadratic={skip_q}, "
+                f"no noise."
+            )
+
+            self.assertClose(err_2d, sol.err_2d, msg=assert_msg)
+            self.assertTrue((err_2d < 1e-3).all(), msg=assert_msg)
+
+            def norm_fn(t):
+                return t.norm(dim=-1)
+
+            self.assertNormsClose(
+                T, sol.T[:, None, :], rtol=4e-3, norm_fn=norm_fn, msg=assert_msg
+            )
+            self.assertNormsClose(
+                R_quat, R_est_quat, rtol=3e-3, norm_fn=norm_fn, msg=assert_msg
+            )
+
+        if print_stats:
+            torch.set_printoptions(precision=5, sci_mode=False)
+            for err_2d, err_3d, R_gt, T_gt in zip(
+                sol.err_2d,
+                sol.err_3d,
+                torch.cat((sol.R, R), dim=-1),
+                torch.stack((sol.T, T[:, 0, :]), dim=-1),
+            ):
+                print("2D Error: %1.4f" % err_2d.item())
+                print("3D Error: %1.4f" % err_3d.item())
+                print("R_hat | R_gt\n", R_gt)
+                print("T_hat | T_gt\n", T_gt)
+
+    def _testcase_from_2d(
+        self, y, print_stats, benchmark, skip_q=False, skip_check_thresh=5
+    ):
+        """
+        In case num_pts < 6, EPnP gets unstable, so we check it doesn't crash
+        """
+        x_cam, x_world, R, T = TestPerspectiveNPoints._generate_epnp_test_from_2d(
+            y[None].repeat(16, 1, 1)
+        )
+
+        if print_stats:
+            print("Run without noise")
+
+        if benchmark:  # return curried call
+            torch.cuda.synchronize()
+
+            def result():
+                self._run_and_print(x_world, y, R, T, False, skip_q)
+                torch.cuda.synchronize()
+
+            return result
+
+        self._run_and_print(
+            x_world,
+            y,
+            R,
+            T,
+            print_stats,
+            skip_q,
+            check_output=True if y.shape[1] > skip_check_thresh else False,
+        )
+
+        # in the noisy case, there are no guarantees, so we check it doesn't crash
+        if print_stats:
+            print("Run with noise")
+        x_world += torch.randn_like(x_world) * 0.1
+        self._run_and_print(x_world, y, R, T, print_stats, skip_q)
+
+    def case_with_gaussian_points(
+        self, batch_size=10, num_pts=20, print_stats=False, benchmark=True, skip_q=False
+    ):
+        return self._testcase_from_2d(
+            torch.randn((num_pts, 2)).cuda() / 3.0,
+            print_stats=print_stats,
+            benchmark=benchmark,
+            skip_q=skip_q,
+        )
+
+    def test_perspective_n_points(self, print_stats=False):
+        if print_stats:
+            print("RUN ON A DENSE GRID")
+        u = torch.linspace(-1.0, 1.0, 20)
+        v = torch.linspace(-1.0, 1.0, 15)
+        for skip_q in [False, True]:
+            self._testcase_from_2d(
+                torch.cartesian_prod(u, v).cuda(), print_stats, False, skip_q
+            )
+
+        for num_pts in range(6, 3, -1):
+            for skip_q in [False, True]:
+                if print_stats:
+                    print(f"RUN ON {num_pts} points; skip_quadratic: {skip_q}")
+
+                self.case_with_gaussian_points(
+                    num_pts=num_pts,
+                    print_stats=print_stats,
+                    benchmark=False,
+                    skip_q=skip_q,
+                )
+
+    def test_weighted_perspective_n_points(self, batch_size=16, num_pts=200):
+        # instantiate random x_world and y
+        y = torch.randn((batch_size, num_pts, 2)).cuda() / 3.0
+        x_cam, x_world, R, T = TestPerspectiveNPoints._generate_epnp_test_from_2d(y)
+
+        # randomly drop 50% of the rows
+        weights = (torch.rand_like(x_world[:, :, 0]) > 0.5).float()
+
+        # make sure we retain at least 6 points for each case
+        weights[:, :6] = 1.0
+
+        # fill ignored y with trash to ensure that we get different
+        # solution in case the weighting is wrong
+        y = y + (1 - weights[:, :, None]) * 100.0
+
+        def norm_fn(t):
+            return t.norm(dim=-1)
+
+        for skip_quadratic_eq in (True, False):
+            # get the solution for the 0/1 weighted case
+            sol = perspective_n_points.efficient_pnp(
+                x_world, y, skip_quadratic_eq=skip_quadratic_eq, weights=weights
+            )
+            sol_R_quat = rotation_conversions.matrix_to_quaternion(sol.R)
+            sol_T = sol.T
+
+            # check that running only on points with non-zero weights ends in the
+            # same place as running the 0/1 weighted version
+            for i in range(batch_size):
+                ok = weights[i] > 0
+                x_world_ok = x_world[i, ok][None]
+                y_ok = y[i, ok][None]
+                sol_ok = perspective_n_points.efficient_pnp(
+                    x_world_ok, y_ok, skip_quadratic_eq=False
+                )
+                R_est_quat_ok = rotation_conversions.matrix_to_quaternion(sol_ok.R)
+
+                self.assertNormsClose(sol_T[i], sol_ok.T[0], rtol=3e-3, norm_fn=norm_fn)
+                self.assertNormsClose(
+                    sol_R_quat[i], R_est_quat_ok[0], rtol=3e-4, norm_fn=norm_fn
+                )
diff --git a/pytorch3d/tests/test_point_mesh_distance.py b/pytorch3d/tests/test_point_mesh_distance.py
new file mode 100644
index 0000000000000000000000000000000000000000..2bf8680475dc46c5030d5589258c04e9d7ac0362
--- /dev/null
+++ b/pytorch3d/tests/test_point_mesh_distance.py
@@ -0,0 +1,938 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+
+import numpy as np
+import torch
+from pytorch3d import _C
+from pytorch3d.loss import point_mesh_edge_distance, point_mesh_face_distance
+from pytorch3d.structures import Meshes, packed_to_list, Pointclouds
+
+from .common_testing import get_random_cuda_device, TestCaseMixin
+
+
+class TestPointMeshDistance(TestCaseMixin, unittest.TestCase):
+    def setUp(self) -> None:
+        np.random.seed(42)
+        torch.manual_seed(42)
+
+    @staticmethod
+    def eps():
+        return 1e-8
+
+    @staticmethod
+    def min_triangle_area():
+        return 5e-3
+
+    @staticmethod
+    def init_meshes_clouds(
+        batch_size: int = 10,
+        num_verts: int = 1000,
+        num_faces: int = 3000,
+        num_points: int = 3000,
+        device: str = "cuda:0",
+    ):
+        device = torch.device(device)
+        nump = torch.randint(low=1, high=num_points, size=(batch_size,))
+        numv = torch.randint(low=3, high=num_verts, size=(batch_size,))
+        numf = torch.randint(low=1, high=num_faces, size=(batch_size,))
+        verts_list = []
+        faces_list = []
+        points_list = []
+        for i in range(batch_size):
+            # Randomly choose vertices
+            verts = torch.rand((numv[i], 3), dtype=torch.float32, device=device)
+            verts.requires_grad_(True)
+
+            # Randomly choose faces. Our tests below compare argmin indices
+            # over faces and edges. Argmin is sensitive even to small numeral variations
+            # thus we make sure that faces are valid
+            # i.e. a face f = (i0, i1, i2) s.t. i0 != i1 != i2,
+            # otherwise argmin due to numeral sensitivities cannot be resolved
+            faces, allf = [], 0
+            validf = numv[i].item() - numv[i].item() % 3
+            while allf < numf[i]:
+                ff = torch.randperm(numv[i], device=device)[:validf].view(-1, 3)
+                faces.append(ff)
+                allf += ff.shape[0]
+            faces = torch.cat(faces, 0)
+            if faces.shape[0] > numf[i]:
+                faces = faces[: numf[i]]
+
+            verts_list.append(verts)
+            faces_list.append(faces)
+
+            # Randomly choose points
+            points = torch.rand((nump[i], 3), dtype=torch.float32, device=device)
+            points.requires_grad_(True)
+
+            points_list.append(points)
+
+        meshes = Meshes(verts_list, faces_list)
+        pcls = Pointclouds(points_list)
+
+        return meshes, pcls
+
+    @staticmethod
+    def _point_to_bary(point: torch.Tensor, tri: torch.Tensor) -> torch.Tensor:
+        """
+        Computes the barycentric coordinates of point wrt triangle (tri)
+        Note that point needs to live in the space spanned by tri = (a, b, c),
+        i.e. by taking the projection of an arbitrary point on the space spanned by tri
+
+        Args:
+            point: FloatTensor of shape (3)
+            tri: FloatTensor of shape (3, 3)
+        Returns:
+            bary: FloatTensor of shape (3)
+        """
+        assert point.dim() == 1 and point.shape[0] == 3
+        assert tri.dim() == 2 and tri.shape[0] == 3 and tri.shape[1] == 3
+
+        a, b, c = tri.unbind(0)
+
+        v0 = b - a
+        v1 = c - a
+        v2 = point - a
+
+        d00 = v0.dot(v0)
+        d01 = v0.dot(v1)
+        d11 = v1.dot(v1)
+        d20 = v2.dot(v0)
+        d21 = v2.dot(v1)
+
+        denom = d00 * d11 - d01 * d01 + TestPointMeshDistance.eps()
+        s2 = (d11 * d20 - d01 * d21) / denom
+        s3 = (d00 * d21 - d01 * d20) / denom
+        s1 = 1.0 - s2 - s3
+
+        bary = torch.tensor([s1, s2, s3])
+        return bary
+
+    @staticmethod
+    def _is_inside_triangle(point: torch.Tensor, tri: torch.Tensor) -> torch.Tensor:
+        """
+        Computes whether point is inside triangle tri
+        Note that point needs to live in the space spanned by tri = (a, b, c)
+        i.e. by taking the projection of an arbitrary point on the space spanned by tri
+
+        Args:
+            point: FloatTensor of shape (3)
+            tri: FloatTensor of shape (3, 3)
+        Returns:
+            inside: BoolTensor of shape (1)
+        """
+        v0 = tri[1] - tri[0]
+        v1 = tri[2] - tri[0]
+        area = torch.cross(v0, v1).norm() / 2.0
+
+        # check if triangle is a line or a point. In that case, return False
+        if area < 5e-3:
+            return False
+        bary = TestPointMeshDistance._point_to_bary(point, tri)
+        inside = ((bary >= 0.0) * (bary <= 1.0)).all()
+        return inside
+
+    @staticmethod
+    def _point_to_edge_distance(
+        point: torch.Tensor, edge: torch.Tensor
+    ) -> torch.Tensor:
+        """
+        Computes the squared euclidean distance of points to edges
+        Args:
+            point: FloatTensor of shape (3)
+            edge: FloatTensor of shape (2, 3)
+        Returns:
+            dist: FloatTensor of shape (1)
+
+        If a, b are the start and end points of the segments, we
+        parametrize a point p as
+            x(t) = a + t * (b - a)
+        To find t which describes p we minimize (x(t) - p) ^ 2
+        Note that p does not need to live in the space spanned by (a, b)
+        """
+        s0, s1 = edge.unbind(0)
+
+        s01 = s1 - s0
+        norm_s01 = s01.dot(s01)
+
+        same_edge = norm_s01 < TestPointMeshDistance.eps()
+        if same_edge:
+            dist = 0.5 * (point - s0).dot(point - s0) + 0.5 * (point - s1).dot(
+                point - s1
+            )
+            return dist
+
+        t = s01.dot(point - s0) / norm_s01
+        t = torch.clamp(t, min=0.0, max=1.0)
+        x = s0 + t * s01
+        dist = (x - point).dot(x - point)
+        return dist
+
+    @staticmethod
+    def _point_to_tri_distance(point: torch.Tensor, tri: torch.Tensor) -> torch.Tensor:
+        """
+        Computes the squared euclidean distance of points to edges
+        Args:
+            point: FloatTensor of shape (3)
+            tri: FloatTensor of shape (3, 3)
+        Returns:
+            dist: FloatTensor of shape (1)
+        """
+        a, b, c = tri.unbind(0)
+        cross = torch.cross(b - a, c - a)
+        norm = cross.norm()
+        normal = torch.nn.functional.normalize(cross, dim=0)
+
+        # p0 is the projection of p onto the plane spanned by (a, b, c)
+        # p0 = p + tt * normal, s.t. (p0 - a) is orthogonal to normal
+        # => tt = dot(a - p, n)
+        tt = normal.dot(a) - normal.dot(point)
+        p0 = point + tt * normal
+        dist_p = tt * tt
+
+        # Compute the distance of p to all edge segments
+        e01_dist = TestPointMeshDistance._point_to_edge_distance(point, tri[[0, 1]])
+        e02_dist = TestPointMeshDistance._point_to_edge_distance(point, tri[[0, 2]])
+        e12_dist = TestPointMeshDistance._point_to_edge_distance(point, tri[[1, 2]])
+
+        with torch.no_grad():
+            inside_tri = TestPointMeshDistance._is_inside_triangle(p0, tri)
+
+        if inside_tri and (norm > TestPointMeshDistance.eps()):
+            return dist_p
+        else:
+            if e01_dist.le(e02_dist) and e01_dist.le(e12_dist):
+                return e01_dist
+            elif e02_dist.le(e01_dist) and e02_dist.le(e12_dist):
+                return e02_dist
+            else:
+                return e12_dist
+
+    def test_point_edge_array_distance(self):
+        """
+        Test CUDA implementation for PointEdgeArrayDistanceForward
+            &  PointEdgeArrayDistanceBackward
+        """
+        P, E = 16, 32
+        device = get_random_cuda_device()
+        points = torch.rand((P, 3), dtype=torch.float32, device=device)
+        edges = torch.rand((E, 2, 3), dtype=torch.float32, device=device)
+
+        # randomly make some edge points equal
+        same = torch.rand((E,), dtype=torch.float32, device=device) > 0.5
+        edges[same, 1] = edges[same, 0].clone().detach()
+
+        points_cpu = points.clone().cpu()
+        edges_cpu = edges.clone().cpu()
+
+        points.requires_grad = True
+        edges.requires_grad = True
+        grad_dists = torch.rand((P, E), dtype=torch.float32, device=device)
+
+        # Naive python implementation
+        dists_naive = torch.zeros((P, E), dtype=torch.float32, device=device)
+        for p in range(P):
+            for e in range(E):
+                dist = self._point_to_edge_distance(points[p], edges[e])
+                dists_naive[p, e] = dist
+
+        # Cuda Forward Implementation
+        dists_cuda = _C.point_edge_array_dist_forward(points, edges)
+        dists_cpu = _C.point_edge_array_dist_forward(points_cpu, edges_cpu)
+
+        # Compare
+        self.assertClose(dists_naive.cpu(), dists_cuda.cpu())
+        self.assertClose(dists_naive.cpu(), dists_cpu)
+
+        # CUDA Bacwkard Implementation
+        grad_points_cuda, grad_edges_cuda = _C.point_edge_array_dist_backward(
+            points, edges, grad_dists
+        )
+        grad_points_cpu, grad_edges_cpu = _C.point_edge_array_dist_backward(
+            points_cpu, edges_cpu, grad_dists.cpu()
+        )
+
+        dists_naive.backward(grad_dists)
+        grad_points_naive = points.grad.cpu()
+        grad_edges_naive = edges.grad.cpu()
+
+        # Compare
+        self.assertClose(grad_points_naive, grad_points_cuda.cpu())
+        self.assertClose(grad_edges_naive, grad_edges_cuda.cpu())
+        self.assertClose(grad_points_naive, grad_points_cpu)
+        self.assertClose(grad_edges_naive, grad_edges_cpu)
+
+    def test_point_edge_distance(self):
+        """
+        Test CUDA implementation for PointEdgeDistanceForward
+            &  PointEdgeDistanceBackward
+        """
+        device = get_random_cuda_device()
+        N, V, F, P = 4, 32, 16, 24
+        meshes, pcls = self.init_meshes_clouds(N, V, F, P, device=device)
+
+        # make points packed a leaf node
+        points_packed = pcls.points_packed().detach().clone()  # (P, 3)
+
+        points_first_idx = pcls.cloud_to_packed_first_idx()
+        max_p = pcls.num_points_per_cloud().max().item()
+
+        # make edges packed a leaf node
+        verts_packed = meshes.verts_packed()
+        edges_packed = verts_packed[meshes.edges_packed()]  # (E, 2, 3)
+        edges_packed = edges_packed.clone().detach()
+
+        edges_first_idx = meshes.mesh_to_edges_packed_first_idx()
+
+        # leaf nodes
+        points_packed.requires_grad = True
+        edges_packed.requires_grad = True
+        grad_dists = torch.rand(
+            (points_packed.shape[0],), dtype=torch.float32, device=device
+        )
+
+        # Cuda Implementation: forward
+        dists_cuda, idx_cuda = _C.point_edge_dist_forward(
+            points_packed, points_first_idx, edges_packed, edges_first_idx, max_p
+        )
+        # Cuda Implementation: backward
+        grad_points_cuda, grad_edges_cuda = _C.point_edge_dist_backward(
+            points_packed, edges_packed, idx_cuda, grad_dists
+        )
+        # Cpu Implementation: forward
+        dists_cpu, idx_cpu = _C.point_edge_dist_forward(
+            points_packed.cpu(),
+            points_first_idx.cpu(),
+            edges_packed.cpu(),
+            edges_first_idx.cpu(),
+            max_p,
+        )
+
+        # Cpu Implementation: backward
+        # Note that using idx_cpu doesn't pass - there seems to be a problem with tied results.
+        grad_points_cpu, grad_edges_cpu = _C.point_edge_dist_backward(
+            points_packed.cpu(), edges_packed.cpu(), idx_cuda.cpu(), grad_dists.cpu()
+        )
+
+        # Naive Implementation: forward
+        edges_list = packed_to_list(edges_packed, meshes.num_edges_per_mesh().tolist())
+        dists_naive = []
+        for i in range(N):
+            points = pcls.points_list()[i]
+            edges = edges_list[i]
+            dists_temp = torch.zeros(
+                (points.shape[0], edges.shape[0]), dtype=torch.float32, device=device
+            )
+            for p in range(points.shape[0]):
+                for e in range(edges.shape[0]):
+                    dist = self._point_to_edge_distance(points[p], edges[e])
+                    dists_temp[p, e] = dist
+            # torch.min() doesn't necessarily return the first index of the
+            # smallest value, our warp_reduce does. So it's not straightforward
+            # to directly compare indices, nor the gradients of grad_edges which
+            # also depend on the indices of the minimum value.
+            # To be able to compare, we will compare dists_temp.min(1) and
+            # then feed the cuda indices to the naive output
+
+            start = points_first_idx[i]
+            end = points_first_idx[i + 1] if i < N - 1 else points_packed.shape[0]
+
+            min_idx = idx_cuda[start:end] - edges_first_idx[i]
+            iidx = torch.arange(points.shape[0], device=device)
+            min_dist = dists_temp[iidx, min_idx]
+
+            dists_naive.append(min_dist)
+
+        dists_naive = torch.cat(dists_naive)
+
+        # Compare
+        self.assertClose(dists_naive.cpu(), dists_cuda.cpu())
+        self.assertClose(dists_naive.cpu(), dists_cpu)
+
+        # Naive Implementation: backward
+        dists_naive.backward(grad_dists)
+        grad_points_naive = torch.cat([cloud.grad for cloud in pcls.points_list()])
+        grad_edges_naive = edges_packed.grad.cpu()
+
+        # Compare
+        self.assertClose(grad_points_naive.cpu(), grad_points_cuda.cpu(), atol=1e-7)
+        self.assertClose(grad_edges_naive, grad_edges_cuda.cpu(), atol=5e-7)
+        self.assertClose(grad_points_naive.cpu(), grad_points_cpu, atol=1e-7)
+        self.assertClose(grad_edges_naive, grad_edges_cpu, atol=5e-7)
+
+    def test_edge_point_distance(self):
+        """
+        Test CUDA implementation for EdgePointDistanceForward
+            &  EdgePointDistanceBackward
+        """
+        device = get_random_cuda_device()
+        N, V, F, P = 4, 32, 16, 24
+        meshes, pcls = self.init_meshes_clouds(N, V, F, P, device=device)
+
+        # make points packed a leaf node
+        points_packed = pcls.points_packed().detach().clone()  # (P, 3)
+
+        points_first_idx = pcls.cloud_to_packed_first_idx()
+
+        # make edges packed a leaf node
+        verts_packed = meshes.verts_packed()
+        edges_packed = verts_packed[meshes.edges_packed()]  # (E, 2, 3)
+        edges_packed = edges_packed.clone().detach()
+
+        edges_first_idx = meshes.mesh_to_edges_packed_first_idx()
+        max_e = meshes.num_edges_per_mesh().max().item()
+
+        # leaf nodes
+        points_packed.requires_grad = True
+        edges_packed.requires_grad = True
+        grad_dists = torch.rand(
+            (edges_packed.shape[0],), dtype=torch.float32, device=device
+        )
+
+        # Cuda Implementation: forward
+        dists_cuda, idx_cuda = _C.edge_point_dist_forward(
+            points_packed, points_first_idx, edges_packed, edges_first_idx, max_e
+        )
+
+        # Cuda Implementation: backward
+        grad_points_cuda, grad_edges_cuda = _C.edge_point_dist_backward(
+            points_packed, edges_packed, idx_cuda, grad_dists
+        )
+
+        # Cpu Implementation: forward
+        dists_cpu, idx_cpu = _C.edge_point_dist_forward(
+            points_packed.cpu(),
+            points_first_idx.cpu(),
+            edges_packed.cpu(),
+            edges_first_idx.cpu(),
+            max_e,
+        )
+
+        # Cpu Implementation: backward
+        grad_points_cpu, grad_edges_cpu = _C.edge_point_dist_backward(
+            points_packed.cpu(), edges_packed.cpu(), idx_cpu, grad_dists.cpu()
+        )
+
+        # Naive Implementation: forward
+        edges_list = packed_to_list(edges_packed, meshes.num_edges_per_mesh().tolist())
+        dists_naive = []
+        for i in range(N):
+            points = pcls.points_list()[i]
+            edges = edges_list[i]
+            dists_temp = torch.zeros(
+                (edges.shape[0], points.shape[0]), dtype=torch.float32, device=device
+            )
+            for e in range(edges.shape[0]):
+                for p in range(points.shape[0]):
+                    dist = self._point_to_edge_distance(points[p], edges[e])
+                    dists_temp[e, p] = dist
+
+            # torch.min() doesn't necessarily return the first index of the
+            # smallest value, our warp_reduce does. So it's not straightforward
+            # to directly compare indices, nor the gradients of grad_edges which
+            # also depend on the indices of the minimum value.
+            # To be able to compare, we will compare dists_temp.min(1) and
+            # then feed the cuda indices to the naive output
+
+            start = edges_first_idx[i]
+            end = edges_first_idx[i + 1] if i < N - 1 else edges_packed.shape[0]
+
+            min_idx = idx_cuda.cpu()[start:end] - points_first_idx[i].cpu()
+            iidx = torch.arange(edges.shape[0], device=device)
+            min_dist = dists_temp[iidx, min_idx]
+
+            dists_naive.append(min_dist)
+
+        dists_naive = torch.cat(dists_naive)
+
+        # Compare
+        self.assertClose(dists_naive.cpu(), dists_cuda.cpu())
+        self.assertClose(dists_naive.cpu(), dists_cpu)
+
+        # Naive Implementation: backward
+        dists_naive.backward(grad_dists)
+        grad_points_naive = torch.cat([cloud.grad for cloud in pcls.points_list()])
+        grad_edges_naive = edges_packed.grad.cpu()
+
+        # Compare
+        self.assertClose(grad_points_naive.cpu(), grad_points_cuda.cpu(), atol=1e-7)
+        self.assertClose(grad_edges_naive, grad_edges_cuda.cpu(), atol=5e-7)
+        self.assertClose(grad_points_naive.cpu(), grad_points_cpu, atol=1e-7)
+        self.assertClose(grad_edges_naive, grad_edges_cpu, atol=5e-7)
+
+    def test_point_mesh_edge_distance(self):
+        """
+        Test point_mesh_edge_distance from pytorch3d.loss
+        """
+        device = get_random_cuda_device()
+        N, V, F, P = 4, 32, 16, 24
+        meshes, pcls = self.init_meshes_clouds(N, V, F, P, device=device)
+
+        # clone and detach for another backward pass through the op
+        verts_op = [verts.clone().detach() for verts in meshes.verts_list()]
+        for i in range(N):
+            verts_op[i].requires_grad = True
+
+        faces_op = [faces.clone().detach() for faces in meshes.faces_list()]
+        meshes_op = Meshes(verts=verts_op, faces=faces_op)
+        points_op = [points.clone().detach() for points in pcls.points_list()]
+        for i in range(N):
+            points_op[i].requires_grad = True
+        pcls_op = Pointclouds(points_op)
+
+        # Cuda implementation: forward & backward
+        loss_op = point_mesh_edge_distance(meshes_op, pcls_op)
+
+        # Naive implementation: forward & backward
+        edges_packed = meshes.edges_packed()
+        edges_list = packed_to_list(edges_packed, meshes.num_edges_per_mesh().tolist())
+        loss_naive = torch.zeros(N, dtype=torch.float32, device=device)
+        for i in range(N):
+            points = pcls.points_list()[i]
+            verts = meshes.verts_list()[i]
+            v_first_idx = meshes.mesh_to_verts_packed_first_idx()[i]
+            edges = verts[edges_list[i] - v_first_idx]
+
+            num_p = points.shape[0]
+            num_e = edges.shape[0]
+            dists = torch.zeros((num_p, num_e), dtype=torch.float32, device=device)
+            for p in range(num_p):
+                for e in range(num_e):
+                    dist = self._point_to_edge_distance(points[p], edges[e])
+                    dists[p, e] = dist
+
+            min_dist_p, min_idx_p = dists.min(1)
+            min_dist_e, min_idx_e = dists.min(0)
+
+            loss_naive[i] = min_dist_p.mean() + min_dist_e.mean()
+        loss_naive = loss_naive.mean()
+
+        # NOTE that hear the comparison holds despite the discrepancy
+        # due to the argmin indices returned by min(). This is because
+        # we don't will compare gradients on the verts and not on the
+        # edges or faces.
+
+        # Compare forward pass
+        self.assertClose(loss_op, loss_naive)
+
+        # Compare backward pass
+        rand_val = torch.rand(1).item()
+        grad_dist = torch.tensor(rand_val, dtype=torch.float32, device=device)
+
+        loss_naive.backward(grad_dist)
+        loss_op.backward(grad_dist)
+
+        # check verts grad
+        for i in range(N):
+            self.assertClose(
+                meshes.verts_list()[i].grad, meshes_op.verts_list()[i].grad
+            )
+            self.assertClose(pcls.points_list()[i].grad, pcls_op.points_list()[i].grad)
+
+    def test_point_face_array_distance(self):
+        """
+        Test CUDA implementation for PointFaceArrayDistanceForward
+            &  PointFaceArrayDistanceBackward
+        """
+        P, T = 16, 32
+        device = get_random_cuda_device()
+        points = torch.rand((P, 3), dtype=torch.float32, device=device)
+        tris = torch.rand((T, 3, 3), dtype=torch.float32, device=device)
+        points_cpu = points.clone().cpu()
+        tris_cpu = tris.clone().cpu()
+
+        points.requires_grad = True
+        tris.requires_grad = True
+        grad_dists = torch.rand((P, T), dtype=torch.float32, device=device)
+
+        points_temp = points.clone().detach()
+        points_temp.requires_grad = True
+        tris_temp = tris.clone().detach()
+        tris_temp.requires_grad = True
+
+        # Naive python implementation
+        dists_naive = torch.zeros((P, T), dtype=torch.float32, device=device)
+        for p in range(P):
+            for t in range(T):
+                dist = self._point_to_tri_distance(points[p], tris[t])
+                dists_naive[p, t] = dist
+
+        # Naive Backward
+        dists_naive.backward(grad_dists)
+        grad_points_naive = points.grad.cpu()
+        grad_tris_naive = tris.grad.cpu()
+
+        # Cuda Forward Implementation
+        dists_cuda = _C.point_face_array_dist_forward(
+            points, tris, TestPointMeshDistance.min_triangle_area()
+        )
+        dists_cpu = _C.point_face_array_dist_forward(
+            points_cpu, tris_cpu, TestPointMeshDistance.min_triangle_area()
+        )
+
+        # Compare
+        self.assertClose(dists_naive.cpu(), dists_cuda.cpu())
+        self.assertClose(dists_naive.cpu(), dists_cpu)
+
+        # CUDA Backward Implementation
+        grad_points_cuda, grad_tris_cuda = _C.point_face_array_dist_backward(
+            points, tris, grad_dists, TestPointMeshDistance.min_triangle_area()
+        )
+        grad_points_cpu, grad_tris_cpu = _C.point_face_array_dist_backward(
+            points_cpu,
+            tris_cpu,
+            grad_dists.cpu(),
+            TestPointMeshDistance.min_triangle_area(),
+        )
+
+        # Compare
+        self.assertClose(grad_points_naive, grad_points_cuda.cpu())
+        self.assertClose(grad_tris_naive, grad_tris_cuda.cpu(), atol=5e-6)
+        self.assertClose(grad_points_naive, grad_points_cpu)
+        self.assertClose(grad_tris_naive, grad_tris_cpu, atol=5e-6)
+
+    def test_point_face_distance(self):
+        """
+        Test CUDA implementation for PointFaceDistanceForward
+            &  PointFaceDistanceBackward
+        """
+        device = get_random_cuda_device()
+        N, V, F, P = 4, 32, 16, 24
+        meshes, pcls = self.init_meshes_clouds(N, V, F, P, device=device)
+
+        # make points packed a leaf node
+        points_packed = pcls.points_packed().detach().clone()  # (P, 3)
+
+        points_first_idx = pcls.cloud_to_packed_first_idx()
+        max_p = pcls.num_points_per_cloud().max().item()
+
+        # make edges packed a leaf node
+        verts_packed = meshes.verts_packed()
+        faces_packed = verts_packed[meshes.faces_packed()]  # (T, 3, 3)
+        faces_packed = faces_packed.clone().detach()
+
+        faces_first_idx = meshes.mesh_to_faces_packed_first_idx()
+
+        # leaf nodes
+        points_packed.requires_grad = True
+        faces_packed.requires_grad = True
+        grad_dists = torch.rand(
+            (points_packed.shape[0],), dtype=torch.float32, device=device
+        )
+
+        # Cuda Implementation: forward
+        dists_cuda, idx_cuda = _C.point_face_dist_forward(
+            points_packed,
+            points_first_idx,
+            faces_packed,
+            faces_first_idx,
+            max_p,
+            TestPointMeshDistance.min_triangle_area(),
+        )
+
+        # Cuda Implementation: backward
+        grad_points_cuda, grad_faces_cuda = _C.point_face_dist_backward(
+            points_packed,
+            faces_packed,
+            idx_cuda,
+            grad_dists,
+            TestPointMeshDistance.min_triangle_area(),
+        )
+
+        # Cpu Implementation: forward
+        dists_cpu, idx_cpu = _C.point_face_dist_forward(
+            points_packed.cpu(),
+            points_first_idx.cpu(),
+            faces_packed.cpu(),
+            faces_first_idx.cpu(),
+            max_p,
+            TestPointMeshDistance.min_triangle_area(),
+        )
+
+        # Cpu Implementation: backward
+        # Note that using idx_cpu doesn't pass - there seems to be a problem with tied results.
+        grad_points_cpu, grad_faces_cpu = _C.point_face_dist_backward(
+            points_packed.cpu(),
+            faces_packed.cpu(),
+            idx_cuda.cpu(),
+            grad_dists.cpu(),
+            TestPointMeshDistance.min_triangle_area(),
+        )
+
+        # Naive Implementation: forward
+        faces_list = packed_to_list(faces_packed, meshes.num_faces_per_mesh().tolist())
+        dists_naive = []
+        for i in range(N):
+            points = pcls.points_list()[i]
+            tris = faces_list[i]
+            dists_temp = torch.zeros(
+                (points.shape[0], tris.shape[0]), dtype=torch.float32, device=device
+            )
+            for p in range(points.shape[0]):
+                for t in range(tris.shape[0]):
+                    dist = self._point_to_tri_distance(points[p], tris[t])
+                    dists_temp[p, t] = dist
+
+            # torch.min() doesn't necessarily return the first index of the
+            # smallest value, our warp_reduce does. So it's not straightforward
+            # to directly compare indices, nor the gradients of grad_tris which
+            # also depend on the indices of the minimum value.
+            # To be able to compare, we will compare dists_temp.min(1) and
+            # then feed the cuda indices to the naive output
+
+            start = points_first_idx[i]
+            end = points_first_idx[i + 1] if i < N - 1 else points_packed.shape[0]
+
+            min_idx = idx_cuda.cpu()[start:end] - faces_first_idx[i].cpu()
+            iidx = torch.arange(points.shape[0], device=device)
+            min_dist = dists_temp[iidx, min_idx]
+
+            dists_naive.append(min_dist)
+
+        dists_naive = torch.cat(dists_naive)
+
+        # Compare
+        self.assertClose(dists_naive.cpu(), dists_cuda.cpu())
+        self.assertClose(dists_naive.cpu(), dists_cpu)
+
+        #  Naive Implementation: backward
+        dists_naive.backward(grad_dists)
+        grad_points_naive = torch.cat([cloud.grad for cloud in pcls.points_list()])
+        grad_faces_naive = faces_packed.grad.cpu()
+
+        # Compare
+        self.assertClose(grad_points_naive.cpu(), grad_points_cuda.cpu(), atol=1e-7)
+        self.assertClose(grad_faces_naive, grad_faces_cuda.cpu(), atol=5e-7)
+        self.assertClose(grad_points_naive.cpu(), grad_points_cpu, atol=1e-7)
+        self.assertClose(grad_faces_naive, grad_faces_cpu, atol=5e-7)
+
+    def test_face_point_distance(self):
+        """
+        Test CUDA implementation for FacePointDistanceForward
+            &  FacePointDistanceBackward
+        """
+        device = get_random_cuda_device()
+        N, V, F, P = 4, 32, 16, 24
+        meshes, pcls = self.init_meshes_clouds(N, V, F, P, device=device)
+
+        # make points packed a leaf node
+        points_packed = pcls.points_packed().detach().clone()  # (P, 3)
+
+        points_first_idx = pcls.cloud_to_packed_first_idx()
+
+        # make edges packed a leaf node
+        verts_packed = meshes.verts_packed()
+        faces_packed = verts_packed[meshes.faces_packed()]  # (T, 3, 3)
+        faces_packed = faces_packed.clone().detach()
+
+        faces_first_idx = meshes.mesh_to_faces_packed_first_idx()
+        max_f = meshes.num_faces_per_mesh().max().item()
+
+        # leaf nodes
+        points_packed.requires_grad = True
+        faces_packed.requires_grad = True
+        grad_dists = torch.rand(
+            (faces_packed.shape[0],), dtype=torch.float32, device=device
+        )
+
+        # Cuda Implementation: forward
+        dists_cuda, idx_cuda = _C.face_point_dist_forward(
+            points_packed,
+            points_first_idx,
+            faces_packed,
+            faces_first_idx,
+            max_f,
+            TestPointMeshDistance.min_triangle_area(),
+        )
+
+        # Cuda Implementation: backward
+        grad_points_cuda, grad_faces_cuda = _C.face_point_dist_backward(
+            points_packed,
+            faces_packed,
+            idx_cuda,
+            grad_dists,
+            TestPointMeshDistance.min_triangle_area(),
+        )
+
+        # Cpu Implementation: forward
+        dists_cpu, idx_cpu = _C.face_point_dist_forward(
+            points_packed.cpu(),
+            points_first_idx.cpu(),
+            faces_packed.cpu(),
+            faces_first_idx.cpu(),
+            max_f,
+            TestPointMeshDistance.min_triangle_area(),
+        )
+
+        # Cpu Implementation: backward
+        grad_points_cpu, grad_faces_cpu = _C.face_point_dist_backward(
+            points_packed.cpu(),
+            faces_packed.cpu(),
+            idx_cpu,
+            grad_dists.cpu(),
+            TestPointMeshDistance.min_triangle_area(),
+        )
+
+        # Naive Implementation: forward
+        faces_list = packed_to_list(faces_packed, meshes.num_faces_per_mesh().tolist())
+        dists_naive = []
+        for i in range(N):
+            points = pcls.points_list()[i]
+            tris = faces_list[i]
+            dists_temp = torch.zeros(
+                (tris.shape[0], points.shape[0]), dtype=torch.float32, device=device
+            )
+            for t in range(tris.shape[0]):
+                for p in range(points.shape[0]):
+                    dist = self._point_to_tri_distance(points[p], tris[t])
+                    dists_temp[t, p] = dist
+
+            # torch.min() doesn't necessarily return the first index of the
+            # smallest value, our warp_reduce does. So it's not straightforward
+            # to directly compare indices, nor the gradients of grad_tris which
+            # also depend on the indices of the minimum value.
+            # To be able to compare, we will compare dists_temp.min(1) and
+            # then feed the cuda indices to the naive output
+
+            start = faces_first_idx[i]
+            end = faces_first_idx[i + 1] if i < N - 1 else faces_packed.shape[0]
+
+            min_idx = idx_cuda.cpu()[start:end] - points_first_idx[i].cpu()
+            iidx = torch.arange(tris.shape[0], device=device)
+            min_dist = dists_temp[iidx, min_idx]
+
+            dists_naive.append(min_dist)
+
+        dists_naive = torch.cat(dists_naive)
+
+        # Compare
+        self.assertClose(dists_naive.cpu(), dists_cuda.cpu())
+        self.assertClose(dists_naive.cpu(), dists_cpu)
+
+        # Naive Implementation: backward
+        dists_naive.backward(grad_dists)
+        grad_points_naive = torch.cat([cloud.grad for cloud in pcls.points_list()])
+        grad_faces_naive = faces_packed.grad
+
+        # Compare
+        self.assertClose(grad_points_naive.cpu(), grad_points_cuda.cpu(), atol=1e-7)
+        self.assertClose(grad_faces_naive.cpu(), grad_faces_cuda.cpu(), atol=5e-7)
+        self.assertClose(grad_points_naive.cpu(), grad_points_cpu, atol=1e-7)
+        self.assertClose(grad_faces_naive.cpu(), grad_faces_cpu, atol=5e-7)
+
+    def test_point_mesh_face_distance(self):
+        """
+        Test point_mesh_face_distance from pytorch3d.loss
+        """
+        device = get_random_cuda_device()
+        N, V, F, P = 4, 32, 16, 24
+        meshes, pcls = self.init_meshes_clouds(N, V, F, P, device=device)
+
+        # clone and detach for another backward pass through the op
+        verts_op = [verts.clone().detach() for verts in meshes.verts_list()]
+        for i in range(N):
+            verts_op[i].requires_grad = True
+
+        faces_op = [faces.clone().detach() for faces in meshes.faces_list()]
+        meshes_op = Meshes(verts=verts_op, faces=faces_op)
+        points_op = [points.clone().detach() for points in pcls.points_list()]
+        for i in range(N):
+            points_op[i].requires_grad = True
+        pcls_op = Pointclouds(points_op)
+
+        # naive implementation
+        loss_naive = torch.zeros(N, dtype=torch.float32, device=device)
+        for i in range(N):
+            points = pcls.points_list()[i]
+            verts = meshes.verts_list()[i]
+            faces = meshes.faces_list()[i]
+            tris = verts[faces]
+
+            num_p = points.shape[0]
+            num_t = tris.shape[0]
+            dists = torch.zeros((num_p, num_t), dtype=torch.float32, device=device)
+            for p in range(num_p):
+                for t in range(num_t):
+                    dist = self._point_to_tri_distance(points[p], tris[t])
+                    dists[p, t] = dist
+
+            min_dist_p, min_idx_p = dists.min(1)
+            min_dist_t, min_idx_t = dists.min(0)
+
+            loss_naive[i] = min_dist_p.mean() + min_dist_t.mean()
+        loss_naive = loss_naive.mean()
+
+        # Op
+        loss_op = point_mesh_face_distance(meshes_op, pcls_op)
+
+        # Compare forward pass
+        self.assertClose(loss_op, loss_naive)
+
+        # Compare backward pass
+        rand_val = torch.rand(1).item()
+        grad_dist = torch.tensor(rand_val, dtype=torch.float32, device=device)
+
+        loss_naive.backward(grad_dist)
+        loss_op.backward(grad_dist)
+
+        # check verts grad
+        for i in range(N):
+            self.assertClose(
+                meshes.verts_list()[i].grad, meshes_op.verts_list()[i].grad
+            )
+            self.assertClose(pcls.points_list()[i].grad, pcls_op.points_list()[i].grad)
+
+    def test_small_faces_case(self):
+        for device in [torch.device("cpu"), torch.device("cuda:0")]:
+            mesh_vertices = torch.tensor(
+                [
+                    [-0.0021, -0.3769, 0.7146],
+                    [-0.0161, -0.3771, 0.7146],
+                    [-0.0021, -0.3771, 0.7147],
+                ],
+                dtype=torch.float32,
+                device=device,
+            )
+            mesh1_faces = torch.tensor([[0, 2, 1]], device=device)
+            mesh2_faces = torch.tensor([[2, 0, 1]], device=device)
+            pcd_points = torch.tensor([[-0.3623, -0.5340, 0.7727]], device=device)
+            mesh1 = Meshes(verts=[mesh_vertices], faces=[mesh1_faces])
+            mesh2 = Meshes(verts=[mesh_vertices], faces=[mesh2_faces])
+            pcd = Pointclouds(points=[pcd_points])
+
+            loss1 = point_mesh_face_distance(mesh1, pcd)
+            loss2 = point_mesh_face_distance(mesh2, pcd)
+            self.assertClose(loss1, loss2)
+
+    @staticmethod
+    def point_mesh_edge(N: int, V: int, F: int, P: int, device: str):
+        device = torch.device(device)
+        meshes, pcls = TestPointMeshDistance.init_meshes_clouds(
+            N, V, F, P, device=device
+        )
+        torch.cuda.synchronize()
+
+        def loss():
+            point_mesh_edge_distance(meshes, pcls)
+            torch.cuda.synchronize()
+
+        return loss
+
+    @staticmethod
+    def point_mesh_face(N: int, V: int, F: int, P: int, device: str):
+        device = torch.device(device)
+        meshes, pcls = TestPointMeshDistance.init_meshes_clouds(
+            N, V, F, P, device=device
+        )
+        torch.cuda.synchronize()
+
+        def loss():
+            point_mesh_face_distance(meshes, pcls)
+            torch.cuda.synchronize()
+
+        return loss
diff --git a/pytorch3d/tests/test_pointclouds.py b/pytorch3d/tests/test_pointclouds.py
new file mode 100644
index 0000000000000000000000000000000000000000..25289263e0820fd0347cbbdc30e33d970931163d
--- /dev/null
+++ b/pytorch3d/tests/test_pointclouds.py
@@ -0,0 +1,1237 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import itertools
+import random
+import unittest
+
+import numpy as np
+import torch
+from pytorch3d.structures import utils as struct_utils
+from pytorch3d.structures.pointclouds import (
+    join_pointclouds_as_batch,
+    join_pointclouds_as_scene,
+    Pointclouds,
+)
+
+from .common_testing import TestCaseMixin
+
+
+class TestPointclouds(TestCaseMixin, unittest.TestCase):
+    def setUp(self) -> None:
+        np.random.seed(42)
+        torch.manual_seed(42)
+
+    @staticmethod
+    def init_cloud(
+        num_clouds: int = 3,
+        max_points: int = 100,
+        channels: int = 4,
+        lists_to_tensors: bool = False,
+        with_normals: bool = True,
+        with_features: bool = True,
+        min_points: int = 0,
+        requires_grad: bool = False,
+    ):
+        """
+        Function to generate a Pointclouds object of N meshes with
+        random number of points.
+
+        Args:
+            num_clouds: Number of clouds to generate.
+            channels: Number of features.
+            max_points: Max number of points per cloud.
+            lists_to_tensors: Determines whether the generated clouds should be
+                              constructed from lists (=False) or
+                              tensors (=True) of points/normals/features.
+            with_normals: bool whether to include normals
+            with_features: bool whether to include features
+            min_points: Min number of points per cloud
+
+        Returns:
+            Pointclouds object.
+        """
+        device = torch.device("cuda:0")
+        p = torch.randint(low=min_points, high=max_points, size=(num_clouds,))
+        if lists_to_tensors:
+            p.fill_(p[0])
+
+        points_list = [
+            torch.rand(
+                (i, 3), device=device, dtype=torch.float32, requires_grad=requires_grad
+            )
+            for i in p
+        ]
+        normals_list, features_list = None, None
+        if with_normals:
+            normals_list = [
+                torch.rand(
+                    (i, 3),
+                    device=device,
+                    dtype=torch.float32,
+                    requires_grad=requires_grad,
+                )
+                for i in p
+            ]
+        if with_features:
+            features_list = [
+                torch.rand(
+                    (i, channels),
+                    device=device,
+                    dtype=torch.float32,
+                    requires_grad=requires_grad,
+                )
+                for i in p
+            ]
+
+        if lists_to_tensors:
+            points_list = torch.stack(points_list)
+            if with_normals:
+                normals_list = torch.stack(normals_list)
+            if with_features:
+                features_list = torch.stack(features_list)
+
+        return Pointclouds(points_list, normals=normals_list, features=features_list)
+
+    def test_simple(self):
+        device = torch.device("cuda:0")
+        points = [
+            torch.tensor(
+                [[0.1, 0.3, 0.5], [0.5, 0.2, 0.1], [0.6, 0.8, 0.7]],
+                dtype=torch.float32,
+                device=device,
+            ),
+            torch.tensor(
+                [[0.1, 0.3, 0.3], [0.6, 0.7, 0.8], [0.2, 0.3, 0.4], [0.1, 0.5, 0.3]],
+                dtype=torch.float32,
+                device=device,
+            ),
+            torch.tensor(
+                [
+                    [0.7, 0.3, 0.6],
+                    [0.2, 0.4, 0.8],
+                    [0.9, 0.5, 0.2],
+                    [0.2, 0.3, 0.4],
+                    [0.9, 0.3, 0.8],
+                ],
+                dtype=torch.float32,
+                device=device,
+            ),
+        ]
+        clouds = Pointclouds(points)
+
+        self.assertClose(
+            (clouds.packed_to_cloud_idx()).cpu(),
+            torch.tensor([0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2]),
+        )
+        self.assertClose(
+            clouds.cloud_to_packed_first_idx().cpu(), torch.tensor([0, 3, 7])
+        )
+        self.assertClose(clouds.num_points_per_cloud().cpu(), torch.tensor([3, 4, 5]))
+        self.assertClose(
+            clouds.padded_to_packed_idx().cpu(),
+            torch.tensor([0, 1, 2, 5, 6, 7, 8, 10, 11, 12, 13, 14]),
+        )
+
+    def test_init_error(self):
+        # Check if correct errors are raised when verts/faces are on
+        # different devices
+
+        clouds = self.init_cloud(10, 100, 5)
+        points_list = clouds.points_list()  # all tensors on cuda:0
+        points_list = [
+            p.to("cpu") if random.uniform(0, 1) > 0.5 else p for p in points_list
+        ]
+        features_list = clouds.features_list()
+        normals_list = clouds.normals_list()
+
+        with self.assertRaisesRegex(ValueError, "same device"):
+            Pointclouds(
+                points=points_list, features=features_list, normals=normals_list
+            )
+
+        points_list = clouds.points_list()
+        features_list = [
+            f.to("cpu") if random.uniform(0, 1) > 0.2 else f for f in features_list
+        ]
+        with self.assertRaisesRegex(ValueError, "same device"):
+            Pointclouds(
+                points=points_list, features=features_list, normals=normals_list
+            )
+
+        points_padded = clouds.points_padded()  # on cuda:0
+        features_padded = clouds.features_padded().to("cpu")
+        normals_padded = clouds.normals_padded()
+
+        with self.assertRaisesRegex(ValueError, "same device"):
+            Pointclouds(
+                points=points_padded, features=features_padded, normals=normals_padded
+            )
+
+    def test_all_constructions(self):
+        public_getters = [
+            "points_list",
+            "points_packed",
+            "packed_to_cloud_idx",
+            "cloud_to_packed_first_idx",
+            "num_points_per_cloud",
+            "points_padded",
+            "padded_to_packed_idx",
+        ]
+        public_normals_getters = ["normals_list", "normals_packed", "normals_padded"]
+        public_features_getters = [
+            "features_list",
+            "features_packed",
+            "features_padded",
+        ]
+
+        lengths = [3, 4, 2]
+        max_len = max(lengths)
+        C = 4
+
+        points_data = [torch.zeros((max_len, 3)).uniform_() for i in lengths]
+        normals_data = [torch.zeros((max_len, 3)).uniform_() for i in lengths]
+        features_data = [torch.zeros((max_len, C)).uniform_() for i in lengths]
+        for length, p, n, f in zip(lengths, points_data, normals_data, features_data):
+            p[length:] = 0.0
+            n[length:] = 0.0
+            f[length:] = 0.0
+        points_list = [d[:length] for length, d in zip(lengths, points_data)]
+        normals_list = [d[:length] for length, d in zip(lengths, normals_data)]
+        features_list = [d[:length] for length, d in zip(lengths, features_data)]
+        points_packed = torch.cat(points_data)
+        normals_packed = torch.cat(normals_data)
+        features_packed = torch.cat(features_data)
+        test_cases_inputs = [
+            ("list_0_0", points_list, None, None),
+            ("list_1_0", points_list, normals_list, None),
+            ("list_0_1", points_list, None, features_list),
+            ("list_1_1", points_list, normals_list, features_list),
+            ("padded_0_0", points_data, None, None),
+            ("padded_1_0", points_data, normals_data, None),
+            ("padded_0_1", points_data, None, features_data),
+            ("padded_1_1", points_data, normals_data, features_data),
+            ("emptylist_emptylist_emptylist", [], [], []),
+        ]
+        false_cases_inputs = [
+            ("list_packed", points_list, normals_packed, features_packed, ValueError),
+            ("packed_0", points_packed, None, None, ValueError),
+        ]
+
+        for name, points, normals, features in test_cases_inputs:
+            with self.subTest(name=name):
+                p = Pointclouds(points, normals, features)
+                for method in public_getters:
+                    self.assertIsNotNone(getattr(p, method)())
+                for method in public_normals_getters:
+                    if normals is None or p.isempty():
+                        self.assertIsNone(getattr(p, method)())
+                for method in public_features_getters:
+                    if features is None or p.isempty():
+                        self.assertIsNone(getattr(p, method)())
+
+        for name, points, normals, features, error in false_cases_inputs:
+            with self.subTest(name=name):
+                with self.assertRaises(error):
+                    Pointclouds(points, normals, features)
+
+    def test_simple_random_clouds(self):
+        # Define the test object either from lists or tensors.
+        for with_normals in (False, True):
+            for with_features in (False, True):
+                for lists_to_tensors in (False, True):
+                    N = 10
+                    cloud = self.init_cloud(
+                        N,
+                        lists_to_tensors=lists_to_tensors,
+                        with_normals=with_normals,
+                        with_features=with_features,
+                    )
+                    points_list = cloud.points_list()
+                    normals_list = cloud.normals_list()
+                    features_list = cloud.features_list()
+
+                    # Check batch calculations.
+                    points_padded = cloud.points_padded()
+                    normals_padded = cloud.normals_padded()
+                    features_padded = cloud.features_padded()
+                    points_per_cloud = cloud.num_points_per_cloud()
+
+                    if not with_normals:
+                        self.assertIsNone(normals_list)
+                        self.assertIsNone(normals_padded)
+                    if not with_features:
+                        self.assertIsNone(features_list)
+                        self.assertIsNone(features_padded)
+                    for n in range(N):
+                        p = points_list[n].shape[0]
+                        self.assertClose(points_padded[n, :p, :], points_list[n])
+                        if with_normals:
+                            norms = normals_list[n].shape[0]
+                            self.assertEqual(p, norms)
+                            self.assertClose(normals_padded[n, :p, :], normals_list[n])
+                        if with_features:
+                            f = features_list[n].shape[0]
+                            self.assertEqual(p, f)
+                            self.assertClose(
+                                features_padded[n, :p, :], features_list[n]
+                            )
+                        if points_padded.shape[1] > p:
+                            self.assertTrue(points_padded[n, p:, :].eq(0).all())
+                            if with_features:
+                                self.assertTrue(features_padded[n, p:, :].eq(0).all())
+                        self.assertEqual(points_per_cloud[n], p)
+
+                    # Check compute packed.
+                    points_packed = cloud.points_packed()
+                    packed_to_cloud = cloud.packed_to_cloud_idx()
+                    cloud_to_packed = cloud.cloud_to_packed_first_idx()
+                    normals_packed = cloud.normals_packed()
+                    features_packed = cloud.features_packed()
+                    if not with_normals:
+                        self.assertIsNone(normals_packed)
+                    if not with_features:
+                        self.assertIsNone(features_packed)
+
+                    cur = 0
+                    for n in range(N):
+                        p = points_list[n].shape[0]
+                        self.assertClose(
+                            points_packed[cur : cur + p, :], points_list[n]
+                        )
+                        if with_normals:
+                            self.assertClose(
+                                normals_packed[cur : cur + p, :], normals_list[n]
+                            )
+                        if with_features:
+                            self.assertClose(
+                                features_packed[cur : cur + p, :], features_list[n]
+                            )
+                        self.assertTrue(packed_to_cloud[cur : cur + p].eq(n).all())
+                        self.assertTrue(cloud_to_packed[n] == cur)
+                        cur += p
+
+    def test_allempty(self):
+        clouds = Pointclouds([], [])
+        self.assertEqual(len(clouds), 0)
+        self.assertIsNone(clouds.normals_list())
+        self.assertIsNone(clouds.features_list())
+        self.assertEqual(clouds.points_padded().shape[0], 0)
+        self.assertIsNone(clouds.normals_padded())
+        self.assertIsNone(clouds.features_padded())
+        self.assertEqual(clouds.points_packed().shape[0], 0)
+        self.assertIsNone(clouds.normals_packed())
+        self.assertIsNone(clouds.features_packed())
+
+    def test_empty(self):
+        N, P, C = 10, 100, 2
+        device = torch.device("cuda:0")
+        points_list = []
+        normals_list = []
+        features_list = []
+        valid = torch.randint(2, size=(N,), dtype=torch.uint8, device=device)
+        for n in range(N):
+            if valid[n]:
+                p = torch.randint(
+                    3, high=P, size=(1,), dtype=torch.int32, device=device
+                )[0]
+                points = torch.rand((p, 3), dtype=torch.float32, device=device)
+                normals = torch.rand((p, 3), dtype=torch.float32, device=device)
+                features = torch.rand((p, C), dtype=torch.float32, device=device)
+            else:
+                points = torch.tensor([], dtype=torch.float32, device=device)
+                normals = torch.tensor([], dtype=torch.float32, device=device)
+                features = torch.tensor([], dtype=torch.int64, device=device)
+            points_list.append(points)
+            normals_list.append(normals)
+            features_list.append(features)
+
+        for with_normals in (False, True):
+            for with_features in (False, True):
+                this_features, this_normals = None, None
+                if with_normals:
+                    this_normals = normals_list
+                if with_features:
+                    this_features = features_list
+                clouds = Pointclouds(
+                    points=points_list, normals=this_normals, features=this_features
+                )
+                points_padded = clouds.points_padded()
+                normals_padded = clouds.normals_padded()
+                features_padded = clouds.features_padded()
+                if not with_normals:
+                    self.assertIsNone(normals_padded)
+                if not with_features:
+                    self.assertIsNone(features_padded)
+                points_per_cloud = clouds.num_points_per_cloud()
+                for n in range(N):
+                    p = len(points_list[n])
+                    if p > 0:
+                        self.assertClose(points_padded[n, :p, :], points_list[n])
+                        if with_normals:
+                            self.assertClose(normals_padded[n, :p, :], normals_list[n])
+                        if with_features:
+                            self.assertClose(
+                                features_padded[n, :p, :], features_list[n]
+                            )
+                        if points_padded.shape[1] > p:
+                            self.assertTrue(points_padded[n, p:, :].eq(0).all())
+                            if with_normals:
+                                self.assertTrue(normals_padded[n, p:, :].eq(0).all())
+                            if with_features:
+                                self.assertTrue(features_padded[n, p:, :].eq(0).all())
+                    self.assertTrue(points_per_cloud[n] == p)
+
+    def test_list_someempty(self):
+        # We want
+        #     point_cloud = Pointclouds(
+        #         [pcl.points_packed() for pcl in point_clouds],
+        #         features=[pcl.features_packed() for pcl in point_clouds],
+        #     )
+        # to work if point_clouds is a list of pointclouds with some empty and some not.
+        points_list = [torch.rand(30, 3), torch.zeros(0, 3)]
+        features_list = [torch.rand(30, 3), None]
+        pcls = Pointclouds(points=points_list, features=features_list)
+        self.assertEqual(len(pcls), 2)
+        self.assertClose(
+            pcls.points_padded(),
+            torch.stack([points_list[0], torch.zeros_like(points_list[0])]),
+        )
+        self.assertClose(pcls.points_packed(), points_list[0])
+        self.assertClose(
+            pcls.features_padded(),
+            torch.stack([features_list[0], torch.zeros_like(points_list[0])]),
+        )
+        self.assertClose(pcls.features_packed(), features_list[0])
+
+        points_list = [torch.zeros(0, 3), torch.rand(30, 3)]
+        features_list = [None, torch.rand(30, 3)]
+        pcls = Pointclouds(points=points_list, features=features_list)
+        self.assertEqual(len(pcls), 2)
+        self.assertClose(
+            pcls.points_padded(),
+            torch.stack([torch.zeros_like(points_list[1]), points_list[1]]),
+        )
+        self.assertClose(pcls.points_packed(), points_list[1])
+        self.assertClose(
+            pcls.features_padded(),
+            torch.stack([torch.zeros_like(points_list[1]), features_list[1]]),
+        )
+        self.assertClose(pcls.features_packed(), features_list[1])
+
+    def test_clone_list(self):
+        N = 5
+        clouds = self.init_cloud(N, 100, 5)
+        for force in (False, True):
+            if force:
+                clouds.points_packed()
+
+            new_clouds = clouds.clone()
+
+            # Check cloned and original objects do not share tensors.
+            self.assertSeparate(new_clouds.points_list()[0], clouds.points_list()[0])
+            self.assertSeparate(new_clouds.normals_list()[0], clouds.normals_list()[0])
+            self.assertSeparate(
+                new_clouds.features_list()[0], clouds.features_list()[0]
+            )
+            for attrib in [
+                "points_packed",
+                "normals_packed",
+                "features_packed",
+                "points_padded",
+                "normals_padded",
+                "features_padded",
+            ]:
+                self.assertSeparate(
+                    getattr(new_clouds, attrib)(), getattr(clouds, attrib)()
+                )
+
+            self.assertCloudsEqual(clouds, new_clouds)
+
+    def test_clone_tensor(self):
+        N = 5
+        clouds = self.init_cloud(N, 100, 5, lists_to_tensors=True)
+        for force in (False, True):
+            if force:
+                clouds.points_packed()
+
+            new_clouds = clouds.clone()
+
+            # Check cloned and original objects do not share tensors.
+            self.assertSeparate(new_clouds.points_list()[0], clouds.points_list()[0])
+            self.assertSeparate(new_clouds.normals_list()[0], clouds.normals_list()[0])
+            self.assertSeparate(
+                new_clouds.features_list()[0], clouds.features_list()[0]
+            )
+            for attrib in [
+                "points_packed",
+                "normals_packed",
+                "features_packed",
+                "points_padded",
+                "normals_padded",
+                "features_padded",
+            ]:
+                self.assertSeparate(
+                    getattr(new_clouds, attrib)(), getattr(clouds, attrib)()
+                )
+
+            self.assertCloudsEqual(clouds, new_clouds)
+
+    def test_detach(self):
+        N = 5
+        for lists_to_tensors in (True, False):
+            clouds = self.init_cloud(
+                N, 100, 5, lists_to_tensors=lists_to_tensors, requires_grad=True
+            )
+            for force in (False, True):
+                if force:
+                    clouds.points_packed()
+
+                new_clouds = clouds.detach()
+
+                for cloud in new_clouds.points_list():
+                    self.assertFalse(cloud.requires_grad)
+                for normal in new_clouds.normals_list():
+                    self.assertFalse(normal.requires_grad)
+                for feats in new_clouds.features_list():
+                    self.assertFalse(feats.requires_grad)
+
+                for attrib in [
+                    "points_packed",
+                    "normals_packed",
+                    "features_packed",
+                    "points_padded",
+                    "normals_padded",
+                    "features_padded",
+                ]:
+                    self.assertFalse(getattr(new_clouds, attrib)().requires_grad)
+
+                self.assertCloudsEqual(clouds, new_clouds)
+
+    def assertCloudsEqual(self, cloud1, cloud2):
+        N = len(cloud1)
+        self.assertEqual(N, len(cloud2))
+
+        for i in range(N):
+            self.assertClose(cloud1.points_list()[i], cloud2.points_list()[i])
+            self.assertClose(cloud1.normals_list()[i], cloud2.normals_list()[i])
+            self.assertClose(cloud1.features_list()[i], cloud2.features_list()[i])
+        has_normals = cloud1.normals_list() is not None
+        self.assertTrue(has_normals == (cloud2.normals_list() is not None))
+        has_features = cloud1.features_list() is not None
+        self.assertTrue(has_features == (cloud2.features_list() is not None))
+
+        # check padded & packed
+        self.assertClose(cloud1.points_padded(), cloud2.points_padded())
+        self.assertClose(cloud1.points_packed(), cloud2.points_packed())
+        if has_normals:
+            self.assertClose(cloud1.normals_padded(), cloud2.normals_padded())
+            self.assertClose(cloud1.normals_packed(), cloud2.normals_packed())
+        if has_features:
+            self.assertClose(cloud1.features_padded(), cloud2.features_padded())
+            self.assertClose(cloud1.features_packed(), cloud2.features_packed())
+        self.assertClose(cloud1.packed_to_cloud_idx(), cloud2.packed_to_cloud_idx())
+        self.assertClose(
+            cloud1.cloud_to_packed_first_idx(), cloud2.cloud_to_packed_first_idx()
+        )
+        self.assertClose(cloud1.num_points_per_cloud(), cloud2.num_points_per_cloud())
+        self.assertClose(cloud1.packed_to_cloud_idx(), cloud2.packed_to_cloud_idx())
+        self.assertClose(cloud1.padded_to_packed_idx(), cloud2.padded_to_packed_idx())
+        self.assertTrue(all(cloud1.valid == cloud2.valid))
+        self.assertTrue(cloud1.equisized == cloud2.equisized)
+
+    def test_offset(self):
+        def naive_offset(clouds, offsets_packed):
+            new_points_packed = clouds.points_packed() + offsets_packed
+            new_points_list = list(
+                new_points_packed.split(clouds.num_points_per_cloud().tolist(), 0)
+            )
+            return Pointclouds(
+                points=new_points_list,
+                normals=clouds.normals_list(),
+                features=clouds.features_list(),
+            )
+
+        N = 5
+        clouds = self.init_cloud(N, 100, 10)
+        all_p = clouds.points_packed().size(0)
+        points_per_cloud = clouds.num_points_per_cloud()
+        for force, deform_shape in itertools.product((0, 1), [(all_p, 3), 3]):
+            if force:
+                clouds._compute_packed(refresh=True)
+                clouds._compute_padded()
+                clouds.padded_to_packed_idx()
+
+            deform = torch.rand(deform_shape, dtype=torch.float32, device=clouds.device)
+            new_clouds_naive = naive_offset(clouds, deform)
+
+            new_clouds = clouds.offset(deform)
+
+            points_cumsum = torch.cumsum(points_per_cloud, 0).tolist()
+            points_cumsum.insert(0, 0)
+            for i in range(N):
+                item_offset = (
+                    deform
+                    if deform.ndim == 1
+                    else deform[points_cumsum[i] : points_cumsum[i + 1]]
+                )
+                self.assertClose(
+                    new_clouds.points_list()[i],
+                    clouds.points_list()[i] + item_offset,
+                )
+                self.assertClose(
+                    clouds.normals_list()[i], new_clouds_naive.normals_list()[i]
+                )
+                self.assertClose(
+                    clouds.features_list()[i], new_clouds_naive.features_list()[i]
+                )
+            self.assertCloudsEqual(new_clouds, new_clouds_naive)
+
+    def test_scale(self):
+        def naive_scale(cloud, scale):
+            if not torch.is_tensor(scale):
+                scale = torch.full((len(cloud),), scale, device=cloud.device)
+            new_points_list = [
+                scale[i] * points.clone()
+                for (i, points) in enumerate(cloud.points_list())
+            ]
+            return Pointclouds(
+                new_points_list, cloud.normals_list(), cloud.features_list()
+            )
+
+        N = 5
+        for test in ["tensor", "scalar"]:
+            for force in (False, True):
+                clouds = self.init_cloud(N, 100, 10)
+                if force:
+                    clouds._compute_packed(refresh=True)
+                    clouds._compute_padded()
+                    clouds.padded_to_packed_idx()
+                if test == "tensor":
+                    scales = torch.rand(N)
+                elif test == "scalar":
+                    scales = torch.rand(1)[0].item()
+                new_clouds_naive = naive_scale(clouds, scales)
+                new_clouds = clouds.scale(scales)
+                for i in range(N):
+                    if test == "tensor":
+                        self.assertClose(
+                            scales[i] * clouds.points_list()[i],
+                            new_clouds.points_list()[i],
+                        )
+                    else:
+                        self.assertClose(
+                            scales * clouds.points_list()[i],
+                            new_clouds.points_list()[i],
+                        )
+                    self.assertClose(
+                        clouds.normals_list()[i], new_clouds_naive.normals_list()[i]
+                    )
+                    self.assertClose(
+                        clouds.features_list()[i], new_clouds_naive.features_list()[i]
+                    )
+                self.assertCloudsEqual(new_clouds, new_clouds_naive)
+
+    def test_extend_list(self):
+        N = 10
+        clouds = self.init_cloud(N, 100, 10)
+        for force in (False, True):
+            if force:
+                # force some computes to happen
+                clouds._compute_packed(refresh=True)
+                clouds._compute_padded()
+                clouds.padded_to_packed_idx()
+            new_clouds = clouds.extend(N)
+            self.assertEqual(len(clouds) * 10, len(new_clouds))
+            for i in range(len(clouds)):
+                for n in range(N):
+                    self.assertClose(
+                        clouds.points_list()[i], new_clouds.points_list()[i * N + n]
+                    )
+                    self.assertClose(
+                        clouds.normals_list()[i], new_clouds.normals_list()[i * N + n]
+                    )
+                    self.assertClose(
+                        clouds.features_list()[i], new_clouds.features_list()[i * N + n]
+                    )
+                    self.assertTrue(clouds.valid[i] == new_clouds.valid[i * N + n])
+            self.assertAllSeparate(
+                clouds.points_list()
+                + new_clouds.points_list()
+                + clouds.normals_list()
+                + new_clouds.normals_list()
+                + clouds.features_list()
+                + new_clouds.features_list()
+            )
+            self.assertIsNone(new_clouds._points_packed)
+            self.assertIsNone(new_clouds._normals_packed)
+            self.assertIsNone(new_clouds._features_packed)
+            self.assertIsNone(new_clouds._points_padded)
+            self.assertIsNone(new_clouds._normals_padded)
+            self.assertIsNone(new_clouds._features_padded)
+
+        with self.assertRaises(ValueError):
+            clouds.extend(N=-1)
+
+    def test_to(self):
+        cloud = self.init_cloud(5, 100, 10)  # Using device "cuda:0"
+
+        cuda_device = torch.device("cuda:0")
+
+        converted_cloud = cloud.to("cuda:0")
+        self.assertEqual(cuda_device, converted_cloud.device)
+        self.assertEqual(cuda_device, cloud.device)
+        self.assertIs(cloud, converted_cloud)
+
+        converted_cloud = cloud.to(cuda_device)
+        self.assertEqual(cuda_device, converted_cloud.device)
+        self.assertEqual(cuda_device, cloud.device)
+        self.assertIs(cloud, converted_cloud)
+
+        cpu_device = torch.device("cpu")
+
+        converted_cloud = cloud.to("cpu")
+        self.assertEqual(cpu_device, converted_cloud.device)
+        self.assertEqual(cuda_device, cloud.device)
+        self.assertIsNot(cloud, converted_cloud)
+
+        converted_cloud = cloud.to(cpu_device)
+        self.assertEqual(cpu_device, converted_cloud.device)
+        self.assertEqual(cuda_device, cloud.device)
+        self.assertIsNot(cloud, converted_cloud)
+
+    def test_to_list(self):
+        cloud = self.init_cloud(5, 100, 10)
+        device = torch.device("cuda:1")
+
+        new_cloud = cloud.to(device)
+        self.assertTrue(new_cloud.device == device)
+        self.assertTrue(cloud.device == torch.device("cuda:0"))
+        for attrib in [
+            "points_padded",
+            "points_packed",
+            "normals_padded",
+            "normals_packed",
+            "features_padded",
+            "features_packed",
+            "num_points_per_cloud",
+            "cloud_to_packed_first_idx",
+            "padded_to_packed_idx",
+        ]:
+            self.assertClose(
+                getattr(new_cloud, attrib)().cpu(), getattr(cloud, attrib)().cpu()
+            )
+        for i in range(len(cloud)):
+            self.assertClose(
+                cloud.points_list()[i].cpu(), new_cloud.points_list()[i].cpu()
+            )
+            self.assertClose(
+                cloud.normals_list()[i].cpu(), new_cloud.normals_list()[i].cpu()
+            )
+            self.assertClose(
+                cloud.features_list()[i].cpu(), new_cloud.features_list()[i].cpu()
+            )
+        self.assertTrue(all(cloud.valid.cpu() == new_cloud.valid.cpu()))
+        self.assertTrue(cloud.equisized == new_cloud.equisized)
+        self.assertTrue(cloud._N == new_cloud._N)
+        self.assertTrue(cloud._P == new_cloud._P)
+        self.assertTrue(cloud._C == new_cloud._C)
+
+    def test_to_tensor(self):
+        cloud = self.init_cloud(5, 100, 10, lists_to_tensors=True)
+        device = torch.device("cuda:1")
+
+        new_cloud = cloud.to(device)
+        self.assertTrue(new_cloud.device == device)
+        self.assertTrue(cloud.device == torch.device("cuda:0"))
+        for attrib in [
+            "points_padded",
+            "points_packed",
+            "normals_padded",
+            "normals_packed",
+            "features_padded",
+            "features_packed",
+            "num_points_per_cloud",
+            "cloud_to_packed_first_idx",
+            "padded_to_packed_idx",
+        ]:
+            self.assertClose(
+                getattr(new_cloud, attrib)().cpu(), getattr(cloud, attrib)().cpu()
+            )
+        for i in range(len(cloud)):
+            self.assertClose(
+                cloud.points_list()[i].cpu(), new_cloud.points_list()[i].cpu()
+            )
+            self.assertClose(
+                cloud.normals_list()[i].cpu(), new_cloud.normals_list()[i].cpu()
+            )
+            self.assertClose(
+                cloud.features_list()[i].cpu(), new_cloud.features_list()[i].cpu()
+            )
+        self.assertTrue(all(cloud.valid.cpu() == new_cloud.valid.cpu()))
+        self.assertTrue(cloud.equisized == new_cloud.equisized)
+        self.assertTrue(cloud._N == new_cloud._N)
+        self.assertTrue(cloud._P == new_cloud._P)
+        self.assertTrue(cloud._C == new_cloud._C)
+
+    def test_split(self):
+        clouds = self.init_cloud(5, 100, 10)
+        split_sizes = [2, 3]
+        split_clouds = clouds.split(split_sizes)
+        self.assertEqual(len(split_clouds[0]), 2)
+        self.assertTrue(
+            split_clouds[0].points_list()
+            == [clouds.get_cloud(0)[0], clouds.get_cloud(1)[0]]
+        )
+        self.assertEqual(len(split_clouds[1]), 3)
+        self.assertTrue(
+            split_clouds[1].points_list()
+            == [clouds.get_cloud(2)[0], clouds.get_cloud(3)[0], clouds.get_cloud(4)[0]]
+        )
+
+        split_sizes = [2, 0.3]
+        with self.assertRaises(ValueError):
+            clouds.split(split_sizes)
+
+    def test_get_cloud(self):
+        clouds = self.init_cloud(2, 100, 10)
+        for i in range(len(clouds)):
+            points, normals, features = clouds.get_cloud(i)
+            self.assertClose(points, clouds.points_list()[i])
+            self.assertClose(normals, clouds.normals_list()[i])
+            self.assertClose(features, clouds.features_list()[i])
+
+        with self.assertRaises(ValueError):
+            clouds.get_cloud(5)
+        with self.assertRaises(ValueError):
+            clouds.get_cloud(0.2)
+
+    def test_get_bounding_boxes(self):
+        device = torch.device("cuda:0")
+        points_list = []
+        for size in [10]:
+            points = torch.rand((size, 3), dtype=torch.float32, device=device)
+            points_list.append(points)
+
+        mins = torch.min(points, dim=0)[0]
+        maxs = torch.max(points, dim=0)[0]
+        bboxes_gt = torch.stack([mins, maxs], dim=1).unsqueeze(0)
+        clouds = Pointclouds(points_list)
+        bboxes = clouds.get_bounding_boxes()
+        self.assertClose(bboxes_gt, bboxes)
+
+    def test_padded_to_packed_idx(self):
+        device = torch.device("cuda:0")
+        points_list = []
+        npoints = [10, 20, 30]
+        for p in npoints:
+            points = torch.rand((p, 3), dtype=torch.float32, device=device)
+            points_list.append(points)
+
+        clouds = Pointclouds(points_list)
+
+        padded_to_packed_idx = clouds.padded_to_packed_idx()
+        points_packed = clouds.points_packed()
+        points_padded = clouds.points_padded()
+        points_padded_flat = points_padded.view(-1, 3)
+
+        self.assertClose(points_padded_flat[padded_to_packed_idx], points_packed)
+
+        idx = padded_to_packed_idx.view(-1, 1).expand(-1, 3)
+        self.assertClose(points_padded_flat.gather(0, idx), points_packed)
+
+    def test_getitem(self):
+        device = torch.device("cuda:0")
+        clouds = self.init_cloud(3, 10, 100)
+
+        def check_equal(selected, indices):
+            for selectedIdx, index in indices:
+                self.assertClose(
+                    selected.points_list()[selectedIdx], clouds.points_list()[index]
+                )
+                self.assertClose(
+                    selected.normals_list()[selectedIdx], clouds.normals_list()[index]
+                )
+                self.assertClose(
+                    selected.features_list()[selectedIdx], clouds.features_list()[index]
+                )
+
+        # int index
+        index = 1
+        clouds_selected = clouds[index]
+        self.assertEqual(len(clouds_selected), 1)
+        check_equal(clouds_selected, [(0, 1)])
+
+        # list index
+        index = [1, 2]
+        clouds_selected = clouds[index]
+        self.assertEqual(len(clouds_selected), len(index))
+        check_equal(clouds_selected, enumerate(index))
+
+        # slice index
+        index = slice(0, 2, 1)
+        clouds_selected = clouds[index]
+        self.assertEqual(len(clouds_selected), 2)
+        check_equal(clouds_selected, [(0, 0), (1, 1)])
+
+        # bool tensor
+        index = torch.tensor([1, 0, 1], dtype=torch.bool, device=device)
+        clouds_selected = clouds[index]
+        self.assertEqual(len(clouds_selected), index.sum())
+        check_equal(clouds_selected, [(0, 0), (1, 2)])
+
+        # int tensor
+        index = torch.tensor([1, 2], dtype=torch.int64, device=device)
+        clouds_selected = clouds[index]
+        self.assertEqual(len(clouds_selected), index.numel())
+        check_equal(clouds_selected, enumerate(index.tolist()))
+
+        # invalid index
+        index = torch.tensor([1, 0, 1], dtype=torch.float32, device=device)
+        with self.assertRaises(IndexError):
+            clouds_selected = clouds[index]
+        index = 1.2
+        with self.assertRaises(IndexError):
+            clouds_selected = clouds[index]
+
+    def test_update_padded(self):
+        N, P, C = 5, 100, 4
+        for with_normfeat in (True, False):
+            for with_new_normfeat in (True, False):
+                clouds = self.init_cloud(
+                    N, P, C, with_normals=with_normfeat, with_features=with_normfeat
+                )
+
+                num_points_per_cloud = clouds.num_points_per_cloud()
+
+                # initialize new points, normals, features
+                new_points = torch.rand(
+                    clouds.points_padded().shape, device=clouds.device
+                )
+                new_points_list = [
+                    new_points[i, : num_points_per_cloud[i]] for i in range(N)
+                ]
+                new_normals, new_normals_list = None, None
+                new_features, new_features_list = None, None
+                if with_new_normfeat:
+                    new_normals = torch.rand(
+                        clouds.points_padded().shape, device=clouds.device
+                    )
+                    new_normals_list = [
+                        new_normals[i, : num_points_per_cloud[i]] for i in range(N)
+                    ]
+                    feat_shape = [
+                        clouds.points_padded().shape[0],
+                        clouds.points_padded().shape[1],
+                        C,
+                    ]
+                    new_features = torch.rand(feat_shape, device=clouds.device)
+                    new_features_list = [
+                        new_features[i, : num_points_per_cloud[i]] for i in range(N)
+                    ]
+
+                # update
+                new_clouds = clouds.update_padded(new_points, new_normals, new_features)
+                self.assertIsNone(new_clouds._points_list)
+                self.assertIsNone(new_clouds._points_packed)
+
+                self.assertEqual(new_clouds.equisized, clouds.equisized)
+                self.assertTrue(all(new_clouds.valid == clouds.valid))
+
+                self.assertClose(new_clouds.points_padded(), new_points)
+                self.assertClose(new_clouds.points_packed(), torch.cat(new_points_list))
+                for i in range(N):
+                    self.assertClose(new_clouds.points_list()[i], new_points_list[i])
+
+                if with_new_normfeat:
+                    for i in range(N):
+                        self.assertClose(
+                            new_clouds.normals_list()[i], new_normals_list[i]
+                        )
+                        self.assertClose(
+                            new_clouds.features_list()[i], new_features_list[i]
+                        )
+                    self.assertClose(new_clouds.normals_padded(), new_normals)
+                    self.assertClose(
+                        new_clouds.normals_packed(), torch.cat(new_normals_list)
+                    )
+                    self.assertClose(new_clouds.features_padded(), new_features)
+                    self.assertClose(
+                        new_clouds.features_packed(), torch.cat(new_features_list)
+                    )
+                else:
+                    if with_normfeat:
+                        for i in range(N):
+                            self.assertClose(
+                                new_clouds.normals_list()[i], clouds.normals_list()[i]
+                            )
+                            self.assertClose(
+                                new_clouds.features_list()[i], clouds.features_list()[i]
+                            )
+                            self.assertNotSeparate(
+                                new_clouds.normals_list()[i], clouds.normals_list()[i]
+                            )
+                            self.assertNotSeparate(
+                                new_clouds.features_list()[i], clouds.features_list()[i]
+                            )
+
+                        self.assertClose(
+                            new_clouds.normals_padded(), clouds.normals_padded()
+                        )
+                        self.assertClose(
+                            new_clouds.normals_packed(), clouds.normals_packed()
+                        )
+                        self.assertClose(
+                            new_clouds.features_padded(), clouds.features_padded()
+                        )
+                        self.assertClose(
+                            new_clouds.features_packed(), clouds.features_packed()
+                        )
+                        self.assertNotSeparate(
+                            new_clouds.normals_padded(), clouds.normals_padded()
+                        )
+                        self.assertNotSeparate(
+                            new_clouds.features_padded(), clouds.features_padded()
+                        )
+                    else:
+                        self.assertIsNone(new_clouds.normals_list())
+                        self.assertIsNone(new_clouds.features_list())
+                        self.assertIsNone(new_clouds.normals_padded())
+                        self.assertIsNone(new_clouds.features_padded())
+                        self.assertIsNone(new_clouds.normals_packed())
+                        self.assertIsNone(new_clouds.features_packed())
+
+                for attrib in [
+                    "num_points_per_cloud",
+                    "cloud_to_packed_first_idx",
+                    "padded_to_packed_idx",
+                ]:
+                    self.assertClose(
+                        getattr(new_clouds, attrib)(), getattr(clouds, attrib)()
+                    )
+
+    def test_inside_box(self):
+        def inside_box_naive(cloud, box_min, box_max):
+            return ((cloud >= box_min.view(1, 3)) * (cloud <= box_max.view(1, 3))).all(
+                dim=-1
+            )
+
+        N, P, C = 5, 100, 4
+
+        clouds = self.init_cloud(N, P, C, with_normals=False, with_features=False)
+        device = clouds.device
+
+        # box of shape Nx2x3
+        box_min = torch.rand((N, 1, 3), device=device)
+        box_max = box_min + torch.rand((N, 1, 3), device=device)
+        box = torch.cat([box_min, box_max], dim=1)
+
+        within_box = clouds.inside_box(box)
+
+        within_box_naive = []
+        for i, cloud in enumerate(clouds.points_list()):
+            within_box_naive.append(inside_box_naive(cloud, box[i, 0], box[i, 1]))
+        within_box_naive = torch.cat(within_box_naive, 0)
+        self.assertTrue(torch.equal(within_box, within_box_naive))
+
+        # box of shape 2x3
+        box2 = box[0, :]
+
+        within_box2 = clouds.inside_box(box2)
+
+        within_box_naive2 = []
+        for cloud in clouds.points_list():
+            within_box_naive2.append(inside_box_naive(cloud, box2[0], box2[1]))
+        within_box_naive2 = torch.cat(within_box_naive2, 0)
+        self.assertTrue(torch.equal(within_box2, within_box_naive2))
+        # box of shape 1x2x3
+        box3 = box2.expand(1, 2, 3)
+
+        within_box3 = clouds.inside_box(box3)
+        self.assertTrue(torch.equal(within_box2, within_box3))
+
+        # invalid box
+        invalid_box = torch.cat(
+            [box_min, box_min - torch.rand((N, 1, 3), device=device)], dim=1
+        )
+        with self.assertRaisesRegex(ValueError, "Input box is invalid"):
+            clouds.inside_box(invalid_box)
+
+        # invalid box shapes
+        invalid_box = box[0].expand(2, 2, 3)
+        with self.assertRaisesRegex(ValueError, "Input box dimension is"):
+            clouds.inside_box(invalid_box)
+        invalid_box = torch.rand((5, 8, 9, 3), device=device)
+        with self.assertRaisesRegex(ValueError, "Input box must be of shape"):
+            clouds.inside_box(invalid_box)
+
+    def test_estimate_normals(self):
+        for with_normals in (True, False):
+            for run_padded in (True, False):
+                for run_packed in (True, False):
+
+                    clouds = TestPointclouds.init_cloud(
+                        3,
+                        100,
+                        with_normals=with_normals,
+                        with_features=False,
+                        min_points=60,
+                    )
+                    nums = clouds.num_points_per_cloud()
+                    if run_padded:
+                        clouds.points_padded()
+                    if run_packed:
+                        clouds.points_packed()
+
+                    normals_est_padded = clouds.estimate_normals(assign_to_self=True)
+                    normals_est_list = struct_utils.padded_to_list(
+                        normals_est_padded, nums.tolist()
+                    )
+                    self.assertClose(clouds.normals_padded(), normals_est_padded)
+                    for i in range(len(clouds)):
+                        self.assertClose(clouds.normals_list()[i], normals_est_list[i])
+                    self.assertClose(
+                        clouds.normals_packed(), torch.cat(normals_est_list, dim=0)
+                    )
+
+    def test_subsample(self):
+        lengths = [4, 5, 13, 3]
+        points = [torch.rand(length, 3) for length in lengths]
+        features = [torch.rand(length, 5) for length in lengths]
+        normals = [torch.rand(length, 3) for length in lengths]
+
+        pcl1 = Pointclouds(points=points).cuda()
+        self.assertIs(pcl1, pcl1.subsample(13))
+        self.assertIs(pcl1, pcl1.subsample([6, 13, 13, 13]))
+
+        lengths_max_4 = torch.tensor([4, 4, 4, 3]).cuda()
+        for with_normals, with_features in itertools.product([True, False], repeat=2):
+            with self.subTest(f"{with_normals} {with_features}"):
+                pcl = Pointclouds(
+                    points=points,
+                    normals=normals if with_normals else None,
+                    features=features if with_features else None,
+                )
+                pcl_copy = pcl.subsample(max_points=4)
+                for length, points_ in zip(lengths_max_4, pcl_copy.points_list()):
+                    self.assertEqual(points_.shape, (length, 3))
+                if with_normals:
+                    for length, normals_ in zip(lengths_max_4, pcl_copy.normals_list()):
+                        self.assertEqual(normals_.shape, (length, 3))
+                else:
+                    self.assertIsNone(pcl_copy.normals_list())
+                if with_features:
+                    for length, features_ in zip(
+                        lengths_max_4, pcl_copy.features_list()
+                    ):
+                        self.assertEqual(features_.shape, (length, 5))
+                else:
+                    self.assertIsNone(pcl_copy.features_list())
+
+        pcl2 = Pointclouds(points=points)
+        pcl_copy2 = pcl2.subsample(lengths_max_4)
+        for length, points_ in zip(lengths_max_4, pcl_copy2.points_list()):
+            self.assertEqual(points_.shape, (length, 3))
+
+    def test_join_pointclouds_as_batch(self):
+        """
+        Test join_pointclouds_as_batch
+        """
+
+        def check_item(x, y):
+            self.assertEqual(x is None, y is None)
+            if x is not None:
+                self.assertClose(torch.cat([x, x, x]), y)
+
+        def check_triple(points, points3):
+            """
+            Verify that points3 is three copies of points.
+            """
+            check_item(points.points_padded(), points3.points_padded())
+            check_item(points.normals_padded(), points3.normals_padded())
+            check_item(points.features_padded(), points3.features_padded())
+
+        lengths = [4, 5, 13, 3]
+        points = [torch.rand(length, 3) for length in lengths]
+        features = [torch.rand(length, 5) for length in lengths]
+        normals = [torch.rand(length, 3) for length in lengths]
+
+        # Test with normals and features present
+        pcl1 = Pointclouds(points=points, features=features, normals=normals)
+        pcl3 = join_pointclouds_as_batch([pcl1] * 3)
+        check_triple(pcl1, pcl3)
+
+        # Test with normals and features present for tensor backed pointclouds
+        N, P, D = 5, 30, 4
+        pcl = Pointclouds(
+            points=torch.rand(N, P, 3),
+            features=torch.rand(N, P, D),
+            normals=torch.rand(N, P, 3),
+        )
+        pcl3 = join_pointclouds_as_batch([pcl] * 3)
+        check_triple(pcl, pcl3)
+
+        # Test with inconsistent #features
+        with self.assertRaisesRegex(ValueError, "same number of features"):
+            join_pointclouds_as_batch([pcl1, pcl])
+
+        # Test without normals
+        pcl_nonormals = Pointclouds(points=points, features=features)
+        pcl3 = join_pointclouds_as_batch([pcl_nonormals] * 3)
+        check_triple(pcl_nonormals, pcl3)
+        pcl_scene = join_pointclouds_as_scene([pcl_nonormals] * 3)
+        self.assertEqual(len(pcl_scene), 1)
+        self.assertClose(pcl_scene.features_packed(), pcl3.features_packed())
+
+        # Test without features
+        pcl_nofeats = Pointclouds(points=points, normals=normals)
+        pcl3 = join_pointclouds_as_batch([pcl_nofeats] * 3)
+        check_triple(pcl_nofeats, pcl3)
+        pcl_scene = join_pointclouds_as_scene([pcl_nofeats] * 3)
+        self.assertEqual(len(pcl_scene), 1)
+        self.assertClose(pcl_scene.normals_packed(), pcl3.normals_packed())
+
+        # Check error raised if all pointclouds in the batch
+        # are not consistent in including normals/features
+        with self.assertRaisesRegex(ValueError, "some set to None"):
+            join_pointclouds_as_batch([pcl, pcl_nonormals, pcl_nonormals])
+        with self.assertRaisesRegex(ValueError, "some set to None"):
+            join_pointclouds_as_batch([pcl, pcl_nofeats, pcl_nofeats])
+
+        # Check error if first input is a single pointclouds object
+        # instead of a list
+        with self.assertRaisesRegex(ValueError, "Wrong first argument"):
+            join_pointclouds_as_batch(pcl)
+
+        # Check error if all pointclouds are not on the same device
+        with self.assertRaisesRegex(ValueError, "same device"):
+            join_pointclouds_as_batch([pcl, pcl.to("cuda:0")])
+
+    @staticmethod
+    def compute_packed_with_init(
+        num_clouds: int = 10, max_p: int = 100, features: int = 300
+    ):
+        clouds = TestPointclouds.init_cloud(num_clouds, max_p, features)
+        torch.cuda.synchronize()
+
+        def compute_packed():
+            clouds._compute_packed(refresh=True)
+            torch.cuda.synchronize()
+
+        return compute_packed
+
+    @staticmethod
+    def compute_padded_with_init(
+        num_clouds: int = 10, max_p: int = 100, features: int = 300
+    ):
+        clouds = TestPointclouds.init_cloud(num_clouds, max_p, features)
+        torch.cuda.synchronize()
+
+        def compute_padded():
+            clouds._compute_padded(refresh=True)
+            torch.cuda.synchronize()
+
+        return compute_padded
diff --git a/pytorch3d/tests/test_points_alignment.py b/pytorch3d/tests/test_points_alignment.py
new file mode 100644
index 0000000000000000000000000000000000000000..39b07a2a27e09f810231ffe4812498e02183a1dc
--- /dev/null
+++ b/pytorch3d/tests/test_points_alignment.py
@@ -0,0 +1,689 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+
+import numpy as np
+import torch
+from pytorch3d.ops import points_alignment
+from pytorch3d.structures.pointclouds import Pointclouds
+from pytorch3d.transforms import rotation_conversions
+
+from .common_testing import get_tests_dir, TestCaseMixin
+
+
+def _apply_pcl_transformation(X, R, T, s=None):
+    """
+    Apply a batch of similarity/rigid transformations, parametrized with
+    rotation `R`, translation `T` and scale `s`, to an input batch of
+    point clouds `X`.
+    """
+    if isinstance(X, Pointclouds):
+        num_points = X.num_points_per_cloud()
+        X_t = X.points_padded()
+    else:
+        X_t = X
+
+    if s is not None:
+        X_t = s[:, None, None] * X_t
+
+    X_t = torch.bmm(X_t, R) + T[:, None, :]
+
+    if isinstance(X, Pointclouds):
+        X_list = [x[:n_p] for x, n_p in zip(X_t, num_points)]
+        X_t = Pointclouds(X_list)
+
+    return X_t
+
+
+class TestICP(TestCaseMixin, unittest.TestCase):
+    def setUp(self) -> None:
+        super().setUp()
+        torch.manual_seed(42)
+        np.random.seed(42)
+        trimesh_results_path = get_tests_dir() / "data/icp_data.pth"
+        self.trimesh_results = torch.load(trimesh_results_path)
+
+    @staticmethod
+    def iterative_closest_point(
+        batch_size=10,
+        n_points_X=100,
+        n_points_Y=100,
+        dim=3,
+        use_pointclouds=False,
+        estimate_scale=False,
+    ):
+
+        device = torch.device("cuda:0")
+
+        # initialize a ground truth point cloud
+        X, Y = [
+            TestCorrespondingPointsAlignment.init_point_cloud(
+                batch_size=batch_size,
+                n_points=n_points,
+                dim=dim,
+                device=device,
+                use_pointclouds=use_pointclouds,
+                random_pcl_size=True,
+                fix_seed=i,
+            )
+            for i, n_points in enumerate((n_points_X, n_points_Y))
+        ]
+
+        torch.cuda.synchronize()
+
+        def run_iterative_closest_point():
+            points_alignment.iterative_closest_point(
+                X,
+                Y,
+                estimate_scale=estimate_scale,
+                allow_reflection=False,
+                verbose=False,
+                max_iterations=100,
+                relative_rmse_thr=1e-4,
+            )
+            torch.cuda.synchronize()
+
+        return run_iterative_closest_point
+
+    def test_init_transformation(self, batch_size=10):
+        """
+        First runs a full ICP on a random problem. Then takes a given point
+        in the history of ICP iteration transformations, initializes
+        a second run of ICP with this transformation and checks whether
+        both runs ended with the same solution.
+        """
+
+        device = torch.device("cuda:0")
+
+        for dim in (2, 3, 11):
+            for n_points_X in (30, 100):
+                for n_points_Y in (30, 100):
+                    # initialize ground truth point clouds
+                    X, Y = [
+                        TestCorrespondingPointsAlignment.init_point_cloud(
+                            batch_size=batch_size,
+                            n_points=n_points,
+                            dim=dim,
+                            device=device,
+                            use_pointclouds=False,
+                            random_pcl_size=True,
+                        )
+                        for n_points in (n_points_X, n_points_Y)
+                    ]
+
+                    # run full icp
+                    (
+                        converged,
+                        _,
+                        Xt,
+                        (R, T, s),
+                        t_hist,
+                    ) = points_alignment.iterative_closest_point(
+                        X,
+                        Y,
+                        estimate_scale=False,
+                        allow_reflection=False,
+                        verbose=False,
+                        max_iterations=100,
+                    )
+
+                    # start from the solution after the third
+                    # iteration of the previous ICP
+                    t_init = t_hist[min(2, len(t_hist) - 1)]
+
+                    # rerun the ICP
+                    (
+                        converged_init,
+                        _,
+                        Xt_init,
+                        (R_init, T_init, s_init),
+                        t_hist_init,
+                    ) = points_alignment.iterative_closest_point(
+                        X,
+                        Y,
+                        init_transform=t_init,
+                        estimate_scale=False,
+                        allow_reflection=False,
+                        verbose=False,
+                        max_iterations=100,
+                    )
+
+                    # compare transformations and obtained clouds
+                    # check that both sets of transforms are the same
+                    atol = 3e-5
+                    self.assertClose(R_init, R, atol=atol)
+                    self.assertClose(T_init, T, atol=atol)
+                    self.assertClose(s_init, s, atol=atol)
+                    self.assertClose(Xt_init, Xt, atol=atol)
+
+    def test_heterogeneous_inputs(self, batch_size=7):
+        """
+        Tests whether we get the same result when running ICP on
+        a set of randomly-sized Pointclouds and on their padded versions.
+        """
+
+        torch.manual_seed(4)
+        device = torch.device("cuda:0")
+
+        for estimate_scale in (True, False):
+            for max_n_points in (10, 30, 100):
+                # initialize ground truth point clouds
+                X_pcl, Y_pcl = [
+                    TestCorrespondingPointsAlignment.init_point_cloud(
+                        batch_size=batch_size,
+                        n_points=max_n_points,
+                        dim=3,
+                        device=device,
+                        use_pointclouds=True,
+                        random_pcl_size=True,
+                    )
+                    for _ in range(2)
+                ]
+
+                # get the padded versions and their num of points
+                X_padded = X_pcl.points_padded()
+                Y_padded = Y_pcl.points_padded()
+                n_points_X = X_pcl.num_points_per_cloud()
+                n_points_Y = Y_pcl.num_points_per_cloud()
+
+                # run icp with Pointlouds inputs
+                (
+                    _,
+                    _,
+                    Xt_pcl,
+                    (R_pcl, T_pcl, s_pcl),
+                    _,
+                ) = points_alignment.iterative_closest_point(
+                    X_pcl,
+                    Y_pcl,
+                    estimate_scale=estimate_scale,
+                    allow_reflection=False,
+                    verbose=False,
+                    max_iterations=100,
+                )
+                Xt_pcl = Xt_pcl.points_padded()
+
+                # run icp with tensor inputs on each element
+                # of the batch separately
+                icp_results = [
+                    points_alignment.iterative_closest_point(
+                        X_[None, :n_X, :],
+                        Y_[None, :n_Y, :],
+                        estimate_scale=estimate_scale,
+                        allow_reflection=False,
+                        verbose=False,
+                        max_iterations=100,
+                    )
+                    for X_, Y_, n_X, n_Y in zip(
+                        X_padded, Y_padded, n_points_X, n_points_Y
+                    )
+                ]
+
+                # parse out the transformation results
+                R, T, s = [
+                    torch.cat([x.RTs[i] for x in icp_results], dim=0) for i in range(3)
+                ]
+
+                # check that both sets of transforms are the same
+                atol = 1e-5
+                self.assertClose(R_pcl, R, atol=atol)
+                self.assertClose(T_pcl, T, atol=atol)
+                self.assertClose(s_pcl, s, atol=atol)
+
+                # compare the transformed point clouds
+                for pcli in range(batch_size):
+                    nX = n_points_X[pcli]
+                    Xt_ = icp_results[pcli].Xt[0, :nX]
+                    Xt_pcl_ = Xt_pcl[pcli][:nX]
+                    self.assertClose(Xt_pcl_, Xt_, atol=atol)
+
+    def test_compare_with_trimesh(self):
+        """
+        Compares the outputs of `iterative_closest_point` with the results
+        of `trimesh.registration.icp` from the `trimesh` python package:
+        https://github.com/mikedh/trimesh
+
+        We have run `trimesh.registration.icp` on several random problems
+        with different point cloud sizes. The results of trimesh, together with
+        the randomly generated input clouds are loaded in the constructor of
+        this class and this test compares the loaded results to our runs.
+        """
+        for n_points_X in (10, 20, 50, 100):
+            for n_points_Y in (10, 20, 50, 100):
+                self._compare_with_trimesh(n_points_X=n_points_X, n_points_Y=n_points_Y)
+
+    def _compare_with_trimesh(
+        self, n_points_X=100, n_points_Y=100, estimate_scale=False
+    ):
+        """
+        Executes a single test for `iterative_closest_point` for a
+        specific setting of the inputs / outputs. Compares the result with
+        the result of the trimesh package on the same input data.
+        """
+
+        device = torch.device("cuda:0")
+
+        # load the trimesh results and the initial point clouds for icp
+        key = (int(n_points_X), int(n_points_Y), int(estimate_scale))
+        X, Y, R_trimesh, T_trimesh, s_trimesh = [
+            x.to(device) for x in self.trimesh_results[key]
+        ]
+
+        # run the icp algorithm
+        (
+            converged,
+            _,
+            _,
+            (R_ours, T_ours, s_ours),
+            _,
+        ) = points_alignment.iterative_closest_point(
+            X,
+            Y,
+            estimate_scale=estimate_scale,
+            allow_reflection=False,
+            verbose=False,
+            max_iterations=100,
+        )
+
+        # check that we have the same transformation
+        # and that the icp converged
+        atol = 1e-5
+        self.assertClose(R_ours, R_trimesh, atol=atol)
+        self.assertClose(T_ours, T_trimesh, atol=atol)
+        self.assertClose(s_ours, s_trimesh, atol=atol)
+        self.assertTrue(converged)
+
+
+class TestCorrespondingPointsAlignment(TestCaseMixin, unittest.TestCase):
+    def setUp(self) -> None:
+        super().setUp()
+        torch.manual_seed(42)
+        np.random.seed(42)
+
+    @staticmethod
+    def random_rotation(batch_size, dim, device=None):
+        """
+        Generates a batch of random `dim`-dimensional rotation matrices.
+        """
+        if dim == 3:
+            R = rotation_conversions.random_rotations(batch_size, device=device)
+        else:
+            # generate random rotation matrices with orthogonalization of
+            # random normal square matrices, followed by a transformation
+            # that ensures determinant(R)==1
+            H = torch.randn(batch_size, dim, dim, dtype=torch.float32, device=device)
+            U, _, V = torch.svd(H)
+            E = torch.eye(dim, dtype=torch.float32, device=device)[None].repeat(
+                batch_size, 1, 1
+            )
+            E[:, -1, -1] = torch.det(torch.bmm(U, V.transpose(2, 1)))
+            R = torch.bmm(torch.bmm(U, E), V.transpose(2, 1))
+            assert torch.allclose(torch.det(R), R.new_ones(batch_size), atol=1e-4)
+
+        return R
+
+    @staticmethod
+    def init_point_cloud(
+        batch_size=10,
+        n_points=1000,
+        dim=3,
+        device=None,
+        use_pointclouds=False,
+        random_pcl_size=True,
+        fix_seed=None,
+    ):
+        """
+        Generate a batch of normally distributed point clouds.
+        """
+
+        if fix_seed is not None:
+            # make sure we always generate the same pointcloud
+            seed = torch.random.get_rng_state()
+            torch.manual_seed(fix_seed)
+
+        if use_pointclouds:
+            assert dim == 3, "Pointclouds support only 3-dim points."
+            # generate a `batch_size` point clouds with number of points
+            # between 4 and `n_points`
+            if random_pcl_size:
+                n_points_per_batch = torch.randint(
+                    low=4,
+                    high=n_points,
+                    size=(batch_size,),
+                    device=device,
+                    dtype=torch.int64,
+                )
+                X_list = [
+                    torch.randn(int(n_pt), dim, device=device, dtype=torch.float32)
+                    for n_pt in n_points_per_batch
+                ]
+                X = Pointclouds(X_list)
+            else:
+                X = torch.randn(
+                    batch_size, n_points, dim, device=device, dtype=torch.float32
+                )
+                X = Pointclouds(list(X))
+        else:
+            X = torch.randn(
+                batch_size, n_points, dim, device=device, dtype=torch.float32
+            )
+
+        if fix_seed:
+            torch.random.set_rng_state(seed)
+
+        return X
+
+    @staticmethod
+    def generate_pcl_transformation(
+        batch_size=10, scale=False, reflect=False, dim=3, device=None
+    ):
+        """
+        Generate a batch of random rigid/similarity transformations.
+        """
+        R = TestCorrespondingPointsAlignment.random_rotation(
+            batch_size, dim, device=device
+        )
+        T = torch.randn(batch_size, dim, dtype=torch.float32, device=device)
+        if scale:
+            s = torch.rand(batch_size, dtype=torch.float32, device=device) + 0.1
+        else:
+            s = torch.ones(batch_size, dtype=torch.float32, device=device)
+
+        return R, T, s
+
+    @staticmethod
+    def generate_random_reflection(batch_size=10, dim=3, device=None):
+        """
+        Generate a batch of reflection matrices of shape (batch_size, dim, dim),
+        where M_i is an identity matrix with one random entry on the
+        diagonal equal to -1.
+        """
+        # randomly select one of the dimensions to reflect for each
+        # element in the batch
+        dim_to_reflect = torch.randint(
+            low=0, high=dim, size=(batch_size,), device=device, dtype=torch.int64
+        )
+
+        # convert dim_to_reflect to a batch of reflection matrices M
+        M = torch.diag_embed(
+            (
+                dim_to_reflect[:, None]
+                != torch.arange(dim, device=device, dtype=torch.float32)
+            ).float()
+            * 2
+            - 1,
+            dim1=1,
+            dim2=2,
+        )
+
+        return M
+
+    @staticmethod
+    def corresponding_points_alignment(
+        batch_size=10,
+        n_points=100,
+        dim=3,
+        use_pointclouds=False,
+        estimate_scale=False,
+        allow_reflection=False,
+        reflect=False,
+        random_weights=False,
+    ):
+
+        device = torch.device("cuda:0")
+
+        # initialize a ground truth point cloud
+        X = TestCorrespondingPointsAlignment.init_point_cloud(
+            batch_size=batch_size,
+            n_points=n_points,
+            dim=dim,
+            device=device,
+            use_pointclouds=use_pointclouds,
+            random_pcl_size=True,
+        )
+
+        # generate the true transformation
+        R, T, s = TestCorrespondingPointsAlignment.generate_pcl_transformation(
+            batch_size=batch_size,
+            scale=estimate_scale,
+            reflect=reflect,
+            dim=dim,
+            device=device,
+        )
+
+        # apply the generated transformation to the generated
+        # point cloud X
+        X_t = _apply_pcl_transformation(X, R, T, s=s)
+
+        weights = None
+        if random_weights:
+            template = X.points_padded() if use_pointclouds else X
+            weights = torch.rand_like(template[:, :, 0])
+            weights = weights / weights.sum(dim=1, keepdim=True)
+            # zero out some weights as zero weights are a common use case
+            # this guarantees there are no zero weight
+            weights *= (weights * template.size()[1] > 0.3).to(weights)
+            if use_pointclouds:  # convert to List[Tensor]
+                weights = [
+                    w[:npts] for w, npts in zip(weights, X.num_points_per_cloud())
+                ]
+
+        torch.cuda.synchronize()
+
+        def run_corresponding_points_alignment():
+            points_alignment.corresponding_points_alignment(
+                X,
+                X_t,
+                weights,
+                allow_reflection=allow_reflection,
+                estimate_scale=estimate_scale,
+            )
+            torch.cuda.synchronize()
+
+        return run_corresponding_points_alignment
+
+    def test_corresponding_points_alignment(self, batch_size=10):
+        """
+        Tests whether we can estimate a rigid/similarity motion between
+        a randomly initialized point cloud and its randomly transformed version.
+
+        The tests are done for all possible combinations
+        of the following boolean flags:
+            - estimate_scale ... Estimate also a scaling component of
+                                 the transformation.
+            - reflect ... The ground truth orthonormal part of the generated
+                         transformation is a reflection (det==-1).
+            - allow_reflection ... If True, the orthonormal matrix of the
+                                  estimated transformation is allowed to be
+                                  a reflection (det==-1).
+            - use_pointclouds ... If True, passes the Pointclouds objects
+                                  to corresponding_points_alignment.
+        """
+        # run this for several different point cloud sizes
+        for n_points in (100, 3, 2, 1):
+            # run this for several different dimensionalities
+            for dim in range(2, 10):
+                # switches whether we should use the Pointclouds inputs
+                use_point_clouds_cases = (
+                    (True, False) if dim == 3 and n_points > 3 else (False,)
+                )
+                for random_weights in (False, True):
+                    for use_pointclouds in use_point_clouds_cases:
+                        for estimate_scale in (False, True):
+                            for reflect in (False, True):
+                                for allow_reflection in (False, True):
+                                    self._test_single_corresponding_points_alignment(
+                                        batch_size=10,
+                                        n_points=n_points,
+                                        dim=dim,
+                                        use_pointclouds=use_pointclouds,
+                                        estimate_scale=estimate_scale,
+                                        reflect=reflect,
+                                        allow_reflection=allow_reflection,
+                                        random_weights=random_weights,
+                                    )
+
+    def _test_single_corresponding_points_alignment(
+        self,
+        batch_size=10,
+        n_points=100,
+        dim=3,
+        use_pointclouds=False,
+        estimate_scale=False,
+        reflect=False,
+        allow_reflection=False,
+        random_weights=False,
+    ):
+        """
+        Executes a single test for `corresponding_points_alignment` for a
+        specific setting of the inputs / outputs.
+        """
+
+        device = torch.device("cuda:0")
+
+        # initialize the a ground truth point cloud
+        X = TestCorrespondingPointsAlignment.init_point_cloud(
+            batch_size=batch_size,
+            n_points=n_points,
+            dim=dim,
+            device=device,
+            use_pointclouds=use_pointclouds,
+            random_pcl_size=True,
+        )
+
+        # generate the true transformation
+        R, T, s = TestCorrespondingPointsAlignment.generate_pcl_transformation(
+            batch_size=batch_size,
+            scale=estimate_scale,
+            reflect=reflect,
+            dim=dim,
+            device=device,
+        )
+
+        if reflect:
+            # generate random reflection M and apply to the rotations
+            M = TestCorrespondingPointsAlignment.generate_random_reflection(
+                batch_size=batch_size, dim=dim, device=device
+            )
+            R = torch.bmm(M, R)
+
+        weights = None
+        if random_weights:
+            template = X.points_padded() if use_pointclouds else X
+            weights = torch.rand_like(template[:, :, 0])
+            weights = weights / weights.sum(dim=1, keepdim=True)
+            # zero out some weights as zero weights are a common use case
+            # this guarantees there are no zero weight
+            weights *= (weights * template.size()[1] > 0.3).to(weights)
+            if use_pointclouds:  # convert to List[Tensor]
+                weights = [
+                    w[:npts] for w, npts in zip(weights, X.num_points_per_cloud())
+                ]
+
+        # apply the generated transformation to the generated
+        # point cloud X
+        X_t = _apply_pcl_transformation(X, R, T, s=s)
+
+        # run the CorrespondingPointsAlignment algorithm
+        R_est, T_est, s_est = points_alignment.corresponding_points_alignment(
+            X,
+            X_t,
+            weights,
+            allow_reflection=allow_reflection,
+            estimate_scale=estimate_scale,
+        )
+
+        assert_error_message = (
+            f"Corresponding_points_alignment assertion failure for "
+            f"n_points={n_points}, "
+            f"dim={dim}, "
+            f"use_pointclouds={use_pointclouds}, "
+            f"estimate_scale={estimate_scale}, "
+            f"reflect={reflect}, "
+            f"allow_reflection={allow_reflection},"
+            f"random_weights={random_weights}."
+        )
+
+        # if we test the weighted case, check that weights help with noise
+        if random_weights and not use_pointclouds and n_points >= (dim + 10):
+            # add noise to 20% points with smallest weight
+            X_noisy = X_t.clone()
+            _, mink_idx = torch.topk(-weights, int(n_points * 0.2), dim=1)
+            mink_idx = mink_idx[:, :, None].expand(-1, -1, X_t.shape[-1])
+            X_noisy.scatter_add_(
+                1, mink_idx, 0.3 * torch.randn_like(mink_idx, dtype=X_t.dtype)
+            )
+
+            def align_and_get_mse(weights_):
+                R_n, T_n, s_n = points_alignment.corresponding_points_alignment(
+                    X_noisy,
+                    X_t,
+                    weights_,
+                    allow_reflection=allow_reflection,
+                    estimate_scale=estimate_scale,
+                )
+
+                X_t_est = _apply_pcl_transformation(X_noisy, R_n, T_n, s=s_n)
+
+                return (((X_t_est - X_t) * weights[..., None]) ** 2).sum(
+                    dim=(1, 2)
+                ) / weights.sum(dim=-1)
+
+            # check that using weights leads to lower weighted_MSE(X_noisy, X_t)
+            self.assertTrue(
+                torch.all(align_and_get_mse(weights) <= align_and_get_mse(None))
+            )
+
+        if reflect and not allow_reflection:
+            # check that all rotations have det=1
+            self._assert_all_close(
+                torch.det(R_est),
+                R_est.new_ones(batch_size),
+                assert_error_message,
+                atol=2e-5,
+            )
+
+        else:
+            # mask out inputs with too few non-degenerate points for assertions
+            w = (
+                torch.ones_like(R_est[:, 0, 0])
+                if weights is None or n_points >= dim + 10
+                else (weights > 0.0).all(dim=1).to(R_est)
+            )
+            # check that the estimated tranformation is the same
+            # as the ground truth
+            if n_points >= (dim + 1):
+                # the checks on transforms apply only when
+                # the problem setup is unambiguous
+                msg = assert_error_message
+                self._assert_all_close(R_est, R, msg, w[:, None, None], atol=1e-5)
+                self._assert_all_close(T_est, T, msg, w[:, None])
+                self._assert_all_close(s_est, s, msg, w)
+
+                # check that the orthonormal part of the
+                # transformation has a correct determinant (+1/-1)
+                desired_det = R_est.new_ones(batch_size)
+                if reflect:
+                    desired_det *= -1.0
+                self._assert_all_close(torch.det(R_est), desired_det, msg, w, atol=2e-5)
+
+            # check that the transformed point cloud
+            # X matches X_t
+            X_t_est = _apply_pcl_transformation(X, R_est, T_est, s=s_est)
+            self._assert_all_close(
+                X_t, X_t_est, assert_error_message, w[:, None, None], atol=2e-5
+            )
+
+    def _assert_all_close(self, a_, b_, err_message, weights=None, atol=1e-6):
+        if isinstance(a_, Pointclouds):
+            a_ = a_.points_packed()
+        if isinstance(b_, Pointclouds):
+            b_ = b_.points_packed()
+        if weights is None:
+            self.assertClose(a_, b_, atol=atol, msg=err_message)
+        else:
+            self.assertClose(a_ * weights, b_ * weights, atol=atol, msg=err_message)
diff --git a/pytorch3d/tests/test_points_normals.py b/pytorch3d/tests/test_points_normals.py
new file mode 100644
index 0000000000000000000000000000000000000000..6c4ab1c8089e98da4a931b8e5fbbf40048b12e27
--- /dev/null
+++ b/pytorch3d/tests/test_points_normals.py
@@ -0,0 +1,161 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+from typing import Tuple, Union
+
+import torch
+from pytorch3d.ops import (
+    estimate_pointcloud_local_coord_frames,
+    estimate_pointcloud_normals,
+)
+from pytorch3d.structures.pointclouds import Pointclouds
+
+from .common_testing import TestCaseMixin
+
+
+DEBUG = False
+
+
+class TestPCLNormals(TestCaseMixin, unittest.TestCase):
+    def setUp(self) -> None:
+        super().setUp()
+        torch.manual_seed(42)
+
+    @staticmethod
+    def init_spherical_pcl(
+        batch_size=3, num_points=3000, device=None, use_pointclouds=False
+    ) -> Tuple[Union[torch.Tensor, Pointclouds], torch.Tensor]:
+        # random spherical point cloud
+        pcl = torch.randn(
+            (batch_size, num_points, 3), device=device, dtype=torch.float32
+        )
+        pcl = torch.nn.functional.normalize(pcl, dim=2)
+
+        # GT normals are the same as
+        # the location of each point on the 0-centered sphere
+        normals = pcl.clone()
+
+        # scale and offset the sphere randomly
+        pcl *= torch.rand(batch_size, 1, 1).type_as(pcl) + 1.0
+        pcl += torch.randn(batch_size, 1, 3).type_as(pcl)
+
+        if use_pointclouds:
+            num_points = torch.randint(
+                size=(batch_size,), low=int(num_points * 0.7), high=num_points
+            )
+            pcl, normals = [
+                [x[:np] for x, np in zip(X, num_points)] for X in (pcl, normals)
+            ]
+            pcl = Pointclouds(pcl, normals=normals)
+
+        return pcl, normals
+
+    def test_pcl_normals(self, batch_size=3, num_points=300, neighborhood_size=50):
+        """
+        Tests the normal estimation on a spherical point cloud, where
+        we know the ground truth normals.
+        """
+        device = torch.device("cuda:0")
+        # run several times for different random point clouds
+        for run_idx in range(3):
+            # either use tensors or Pointclouds as input
+            for use_pointclouds in (True, False):
+                # get a spherical point cloud
+                pcl, normals_gt = TestPCLNormals.init_spherical_pcl(
+                    num_points=num_points,
+                    batch_size=batch_size,
+                    device=device,
+                    use_pointclouds=use_pointclouds,
+                )
+                if use_pointclouds:
+                    normals_gt = pcl.normals_padded()
+                    num_pcl_points = pcl.num_points_per_cloud()
+                else:
+                    num_pcl_points = [pcl.shape[1]] * batch_size
+
+                # check for both disambiguation options
+                for disambiguate_directions in (True, False):
+                    (
+                        curvatures,
+                        local_coord_frames,
+                    ) = estimate_pointcloud_local_coord_frames(
+                        pcl,
+                        neighborhood_size=neighborhood_size,
+                        disambiguate_directions=disambiguate_directions,
+                    )
+
+                    # estimate the normals
+                    normals = estimate_pointcloud_normals(
+                        pcl,
+                        neighborhood_size=neighborhood_size,
+                        disambiguate_directions=disambiguate_directions,
+                    )
+
+                    # TODO: temporarily disabled
+                    if use_pointclouds:
+                        # test that the class method gives the same output
+                        normals_pcl = pcl.estimate_normals(
+                            neighborhood_size=neighborhood_size,
+                            disambiguate_directions=disambiguate_directions,
+                            assign_to_self=True,
+                        )
+                        normals_from_pcl = pcl.normals_padded()
+                        for nrm, nrm_from_pcl, nrm_pcl, np in zip(
+                            normals, normals_from_pcl, normals_pcl, num_pcl_points
+                        ):
+                            self.assertClose(nrm[:np], nrm_pcl[:np], atol=1e-5)
+                            self.assertClose(nrm[:np], nrm_from_pcl[:np], atol=1e-5)
+
+                    # check that local coord frames give the same normal
+                    # as normals
+                    for nrm, lcoord, np in zip(
+                        normals, local_coord_frames, num_pcl_points
+                    ):
+                        self.assertClose(nrm[:np], lcoord[:np, :, 0], atol=1e-5)
+
+                    # dotp between normals and normals_gt
+                    normal_parallel = (normals_gt * normals).sum(2)
+
+                    # check that normals are on average
+                    # parallel to the expected ones
+                    for normp, np in zip(normal_parallel, num_pcl_points):
+                        abs_parallel = normp[:np].abs()
+                        avg_parallel = abs_parallel.mean()
+                        std_parallel = abs_parallel.std()
+                        self.assertClose(
+                            avg_parallel, torch.ones_like(avg_parallel), atol=1e-2
+                        )
+                        self.assertClose(
+                            std_parallel, torch.zeros_like(std_parallel), atol=1e-2
+                        )
+
+                    if disambiguate_directions:
+                        # check that 95% of normal dot products
+                        # have the same sign
+                        for normp, np in zip(normal_parallel, num_pcl_points):
+                            n_pos = (normp[:np] > 0).sum()
+                            self.assertTrue((n_pos > np * 0.95) or (n_pos < np * 0.05))
+
+                    if DEBUG and run_idx == 0 and not use_pointclouds:
+                        import os
+
+                        from pytorch3d.io.ply_io import save_ply
+
+                        # export to .ply
+                        outdir = "/tmp/pt3d_pcl_normals_test/"
+                        os.makedirs(outdir, exist_ok=True)
+                        plyfile = os.path.join(
+                            outdir, f"pcl_disamb={disambiguate_directions}.ply"
+                        )
+                        print(f"Storing point cloud with normals to {plyfile}.")
+                        pcl_idx = 0
+                        save_ply(
+                            plyfile,
+                            pcl[pcl_idx].cpu(),
+                            faces=None,
+                            verts_normals=normals[pcl_idx].cpu(),
+                        )
diff --git a/pytorch3d/tests/test_points_to_volumes.py b/pytorch3d/tests/test_points_to_volumes.py
new file mode 100644
index 0000000000000000000000000000000000000000..0c3d9b1aece15b626e45dac78bda31e25d7b2746
--- /dev/null
+++ b/pytorch3d/tests/test_points_to_volumes.py
@@ -0,0 +1,596 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+from functools import partial
+from itertools import product
+from typing import Tuple
+
+import numpy as np
+import torch
+from pytorch3d.ops import (
+    add_pointclouds_to_volumes,
+    add_points_features_to_volume_densities_features,
+)
+from pytorch3d.ops.points_to_volumes import _points_to_volumes
+from pytorch3d.ops.sample_points_from_meshes import sample_points_from_meshes
+from pytorch3d.structures.meshes import Meshes
+from pytorch3d.structures.pointclouds import Pointclouds
+from pytorch3d.structures.volumes import Volumes
+from pytorch3d.transforms.so3 import so3_exp_map
+
+from .common_testing import TestCaseMixin
+
+
+DEBUG = False
+if DEBUG:
+    import os
+    import tempfile
+
+    from PIL import Image
+
+
+def init_cube_point_cloud(batch_size: int, n_points: int, device: str, rotate_y: bool):
+    """
+    Generate a random point cloud of `n_points` whose points
+    are sampled from faces of a 3D cube.
+    """
+
+    # create the cube mesh batch_size times
+    meshes = TestPointsToVolumes.init_cube_mesh(batch_size=batch_size, device=device)
+
+    # generate point clouds by sampling points from the meshes
+    pcl = sample_points_from_meshes(meshes, num_samples=n_points, return_normals=False)
+
+    # colors of the cube sides
+    clrs = [
+        [1.0, 0.0, 0.0],
+        [1.0, 1.0, 0.0],
+        [0.0, 1.0, 0.0],
+        [0.0, 1.0, 1.0],
+        [1.0, 1.0, 1.0],
+        [1.0, 0.0, 1.0],
+    ]
+
+    # init the color tensor "rgb"
+    rgb = torch.zeros_like(pcl)
+
+    # color each side of the cube with a constant color
+    clri = 0
+    for dim in (0, 1, 2):
+        for offs in (0.0, 1.0):
+            current_face_verts = (pcl[:, :, dim] - offs).abs() <= 1e-2
+            for bi in range(batch_size):
+                rgb[bi, current_face_verts[bi], :] = torch.tensor(clrs[clri]).type_as(
+                    pcl
+                )
+            clri += 1
+
+    if rotate_y:
+        # uniformly spaced rotations around y axis
+        R = init_uniform_y_rotations(batch_size=batch_size, device=device)
+        # rotate the point clouds around y axis
+        pcl = torch.bmm(pcl - 0.5, R) + 0.5
+
+    return pcl, rgb
+
+
+def init_volume_boundary_pointcloud(
+    batch_size: int,
+    volume_size: Tuple[int, int, int],
+    n_points: int,
+    interp_mode: str,
+    device: str,
+    require_grad: bool = False,
+):
+    """
+    Initialize a point cloud that closely follows a boundary of
+    a volume with a given size. The volume buffer is initialized as well.
+    """
+
+    # generate a 3D point cloud sampled from sides of a [0,1] cube
+    xyz, rgb = init_cube_point_cloud(
+        batch_size, n_points=n_points, device=device, rotate_y=True
+    )
+
+    # make volume_size tensor
+    volume_size_t = torch.tensor(volume_size, dtype=xyz.dtype, device=xyz.device)
+
+    if interp_mode == "trilinear":
+        # make the xyz locations fall on the boundary of the
+        # first/last two voxels along each spatial dimension of the
+        # volume - this properly checks the correctness of the
+        # trilinear interpolation scheme
+        xyz = (xyz - 0.5) * ((volume_size_t - 2) / (volume_size_t - 1))[[2, 1, 0]] + 0.5
+
+    # rescale the cube pointcloud to overlap with the volume sides
+    # of the volume
+    rel_scale = volume_size_t / volume_size[0]
+    xyz = xyz * rel_scale[[2, 1, 0]][None, None]
+
+    # enable grad accumulation for the differentiability check
+    xyz.requires_grad = require_grad
+    rgb.requires_grad = require_grad
+
+    # create the pointclouds structure
+    pointclouds = Pointclouds(xyz, features=rgb)
+
+    # set the volume translation so that the point cloud is centered
+    # around 0
+    volume_translation = -0.5 * rel_scale[[2, 1, 0]]
+
+    # set the voxel size to 1 / (volume_size-1)
+    volume_voxel_size = 1 / (volume_size[0] - 1.0)
+
+    # instantiate the volumes
+    initial_volumes = Volumes(
+        features=xyz.new_zeros(batch_size, 3, *volume_size),
+        densities=xyz.new_zeros(batch_size, 1, *volume_size),
+        volume_translation=volume_translation,
+        voxel_size=volume_voxel_size,
+    )
+
+    return pointclouds, initial_volumes
+
+
+def init_uniform_y_rotations(batch_size: int, device: torch.device):
+    """
+    Generate a batch of `batch_size` 3x3 rotation matrices around y-axis
+    whose angles are uniformly distributed between 0 and 2 pi.
+    """
+    axis = torch.tensor([0.0, 1.0, 0.0], device=device, dtype=torch.float32)
+    angles = torch.linspace(0, 2.0 * np.pi, batch_size + 1, device=device)
+    angles = angles[:batch_size]
+    log_rots = axis[None, :] * angles[:, None]
+    R = so3_exp_map(log_rots)
+    return R
+
+
+class TestPointsToVolumes(TestCaseMixin, unittest.TestCase):
+    def setUp(self) -> None:
+        np.random.seed(42)
+        torch.manual_seed(42)
+
+    @staticmethod
+    def add_points_to_volumes(
+        batch_size: int,
+        volume_size: Tuple[int, int, int],
+        n_points: int,
+        interp_mode: str,
+        device: str,
+    ):
+        (pointclouds, initial_volumes) = init_volume_boundary_pointcloud(
+            batch_size=batch_size,
+            volume_size=volume_size,
+            n_points=n_points,
+            interp_mode=interp_mode,
+            require_grad=False,
+            device=device,
+        )
+
+        torch.cuda.synchronize()
+
+        def _add_points_to_volumes():
+            add_pointclouds_to_volumes(pointclouds, initial_volumes, mode=interp_mode)
+            torch.cuda.synchronize()
+
+        return _add_points_to_volumes
+
+    @staticmethod
+    def stack_4d_tensor_to_3d(arr):
+        n = arr.shape[0]
+        H = int(np.ceil(np.sqrt(n)))
+        W = int(np.ceil(n / H))
+        n_add = H * W - n
+        arr = torch.cat((arr, torch.zeros_like(arr[:1]).repeat(n_add, 1, 1, 1)))
+        rows = torch.chunk(arr, chunks=W, dim=0)
+        arr3d = torch.cat([torch.cat(list(row), dim=2) for row in rows], dim=1)
+        return arr3d
+
+    @staticmethod
+    def init_cube_mesh(batch_size: int, device: str):
+        """
+        Generate a batch of `batch_size` cube meshes.
+        """
+
+        device = torch.device(device)
+
+        verts, faces = [], []
+
+        for _ in range(batch_size):
+            v = torch.tensor(
+                [
+                    [0.0, 0.0, 0.0],
+                    [1.0, 0.0, 0.0],
+                    [1.0, 1.0, 0.0],
+                    [0.0, 1.0, 0.0],
+                    [0.0, 1.0, 1.0],
+                    [1.0, 1.0, 1.0],
+                    [1.0, 0.0, 1.0],
+                    [0.0, 0.0, 1.0],
+                ],
+                dtype=torch.float32,
+                device=device,
+            )
+            verts.append(v)
+            faces.append(
+                torch.tensor(
+                    [
+                        [0, 2, 1],
+                        [0, 3, 2],
+                        [2, 3, 4],
+                        [2, 4, 5],
+                        [1, 2, 5],
+                        [1, 5, 6],
+                        [0, 7, 4],
+                        [0, 4, 3],
+                        [5, 4, 7],
+                        [5, 7, 6],
+                        [0, 6, 7],
+                        [0, 1, 6],
+                    ],
+                    dtype=torch.int64,
+                    device=device,
+                )
+            )
+
+        faces = torch.stack(faces)
+        verts = torch.stack(verts)
+
+        simpleces = Meshes(verts=verts, faces=faces)
+
+        return simpleces
+
+    def test_from_point_cloud(self, interp_mode="trilinear"):
+        """
+        Generates a volume from a random point cloud sampled from faces
+        of a 3D cube. Since each side of the cube is homogeneously colored with
+        a different color, this should result in a volume with a
+        predefined homogeneous color of the cells along its borders
+        and black interior. The test is run for both cube and non-cube shaped
+        volumes.
+        """
+
+        # batch_size = 4 sides of the cube
+        batch_size = 4
+
+        for volume_size in ([25, 25, 25], [30, 25, 15]):
+
+            for python, interp_mode in product([True, False], ["trilinear", "nearest"]):
+
+                (pointclouds, initial_volumes) = init_volume_boundary_pointcloud(
+                    volume_size=volume_size,
+                    n_points=int(1e5),
+                    interp_mode=interp_mode,
+                    batch_size=batch_size,
+                    require_grad=True,
+                    device="cuda:0",
+                )
+
+                volumes = add_pointclouds_to_volumes(
+                    pointclouds,
+                    initial_volumes,
+                    mode=interp_mode,
+                    _python=python,
+                )
+
+                V_color, V_density = volumes.features(), volumes.densities()
+
+                # expected colors of different cube sides
+                clr_sides = torch.tensor(
+                    [
+                        [[1.0, 1.0, 1.0], [1.0, 0.0, 1.0]],
+                        [[1.0, 0.0, 0.0], [1.0, 1.0, 0.0]],
+                        [[1.0, 0.0, 1.0], [1.0, 1.0, 1.0]],
+                        [[1.0, 1.0, 0.0], [1.0, 0.0, 0.0]],
+                    ],
+                    dtype=V_color.dtype,
+                    device=V_color.device,
+                )
+                clr_ambient = torch.tensor(
+                    [0.0, 0.0, 0.0], dtype=V_color.dtype, device=V_color.device
+                )
+                clr_top_bot = torch.tensor(
+                    [[0.0, 1.0, 0.0], [0.0, 1.0, 1.0]],
+                    dtype=V_color.dtype,
+                    device=V_color.device,
+                )
+
+                if DEBUG:
+                    outdir = tempfile.gettempdir() + "/test_points_to_volumes"
+                    os.makedirs(outdir, exist_ok=True)
+
+                    for slice_dim in (1, 2):
+                        for vidx in range(V_color.shape[0]):
+                            vim = V_color.detach()[vidx].split(1, dim=slice_dim)
+                            vim = torch.stack([v.squeeze() for v in vim])
+                            vim = TestPointsToVolumes.stack_4d_tensor_to_3d(vim.cpu())
+                            im = Image.fromarray(
+                                (vim.numpy() * 255.0)
+                                .astype(np.uint8)
+                                .transpose(1, 2, 0)
+                            )
+                            outfile = (
+                                outdir
+                                + f"/rgb_{interp_mode}"
+                                + f"_{str(volume_size).replace(' ','')}"
+                                + f"_{vidx:003d}_sldim{slice_dim}.png"
+                            )
+                            im.save(outfile)
+                            print("exported %s" % outfile)
+
+                # check the density V_density
+                # first binarize the density
+                V_density_bin = (V_density > 1e-4).type_as(V_density)
+                d_one = V_density.new_ones(1)
+                d_zero = V_density.new_zeros(1)
+                for vidx in range(V_color.shape[0]):
+                    # the first/last depth-wise slice has to be filled with 1.0
+                    self._check_volume_slice_color_density(
+                        V_density_bin[vidx], 1, interp_mode, d_one, "first"
+                    )
+                    self._check_volume_slice_color_density(
+                        V_density_bin[vidx], 1, interp_mode, d_one, "last"
+                    )
+                    # the middle depth-wise slices have to be empty
+                    self._check_volume_slice_color_density(
+                        V_density_bin[vidx], 1, interp_mode, d_zero, "middle"
+                    )
+                    # the top/bottom slices have to be filled with 1.0
+                    self._check_volume_slice_color_density(
+                        V_density_bin[vidx], 2, interp_mode, d_one, "first"
+                    )
+                    self._check_volume_slice_color_density(
+                        V_density_bin[vidx], 2, interp_mode, d_one, "last"
+                    )
+
+                # check the colors
+                for vidx in range(V_color.shape[0]):
+                    self._check_volume_slice_color_density(
+                        V_color[vidx], 1, interp_mode, clr_sides[vidx][0], "first"
+                    )
+                    self._check_volume_slice_color_density(
+                        V_color[vidx], 1, interp_mode, clr_sides[vidx][1], "last"
+                    )
+                    self._check_volume_slice_color_density(
+                        V_color[vidx], 1, interp_mode, clr_ambient, "middle"
+                    )
+                    self._check_volume_slice_color_density(
+                        V_color[vidx], 2, interp_mode, clr_top_bot[0], "first"
+                    )
+                    self._check_volume_slice_color_density(
+                        V_color[vidx], 2, interp_mode, clr_top_bot[1], "last"
+                    )
+
+                # check differentiability
+                loss = V_color.mean() + V_density.mean()
+                loss.backward()
+                rgb = pointclouds.features_padded()
+                xyz = pointclouds.points_padded()
+                for field in (xyz, rgb):
+                    if interp_mode == "nearest" and (field is xyz):
+                        # this does not produce grads w.r.t. xyz
+                        self.assertIsNone(field.grad)
+                    else:
+                        self.assertTrue(torch.isfinite(field.grad.data).all())
+
+    def test_defaulted_arguments(self):
+        points = torch.rand(30, 1000, 3)
+        features = torch.rand(30, 1000, 5)
+        _, densities = add_points_features_to_volume_densities_features(
+            points,
+            features,
+            torch.zeros(30, 1, 32, 32, 32),
+            torch.zeros(30, 5, 32, 32, 32),
+        )
+        self.assertClose(torch.sum(densities), torch.tensor(30 * 1000.0), atol=0.1)
+
+    def test_unscaled(self):
+        D = 5
+        P = 1000
+        B, C, H, W = 2, 3, D, D
+        densities = torch.zeros(B, 1, D, H, W)
+        features = torch.zeros(B, C, D, H, W)
+        volumes = Volumes(densities=densities, features=features)
+        points = torch.rand(B, 1000, 3) * (D - 1) - ((D - 1) * 0.5)
+        point_features = torch.rand(B, 1000, C)
+        pointclouds = Pointclouds(points=points, features=point_features)
+
+        volumes2 = add_pointclouds_to_volumes(
+            pointclouds, volumes, rescale_features=False
+        )
+        self.assertConstant(volumes2.densities().sum([2, 3, 4]) / P, 1, atol=1e-5)
+        self.assertConstant(volumes2.features().sum([2, 3, 4]) / P, 0.5, atol=0.03)
+
+    def _check_volume_slice_color_density(
+        self, V, split_dim, interp_mode, clr_gt, slice_type, border=3
+    ):
+        # decompose the volume to individual slices along split_dim
+        vim = V.detach().split(1, dim=split_dim)
+        vim = torch.stack([v.squeeze(split_dim) for v in vim])
+
+        # determine which slices should be compared to clr_gt based on
+        # the 'slice_type' input
+        if slice_type == "first":
+            slice_dims = (0, 1) if interp_mode == "trilinear" else (0,)
+        elif slice_type == "last":
+            slice_dims = (-1, -2) if interp_mode == "trilinear" else (-1,)
+        elif slice_type == "middle":
+            internal_border = 2 if interp_mode == "trilinear" else 1
+            slice_dims = torch.arange(internal_border, vim.shape[0] - internal_border)
+        else:
+            raise ValueError(slice_type)
+
+        # compute the average error within each slice
+        clr_diff = (
+            vim[slice_dims, :, border:-border, border:-border]
+            - clr_gt[None, :, None, None]
+        )
+        clr_diff = clr_diff.abs().mean(dim=(2, 3)).view(-1)
+
+        # check that all per-slice avg errors vanish
+        self.assertClose(clr_diff, torch.zeros_like(clr_diff), atol=1e-2)
+
+
+class TestRawFunction(TestCaseMixin, unittest.TestCase):
+    """
+    Testing the _C.points_to_volumes function through its wrapper
+    _points_to_volumes.
+    """
+
+    def setUp(self) -> None:
+        torch.manual_seed(42)
+
+    def test_grad_corners_splat_cpu(self):
+        self.do_gradcheck(torch.device("cpu"), True, True)
+
+    def test_grad_corners_round_cpu(self):
+        self.do_gradcheck(torch.device("cpu"), False, True)
+
+    def test_grad_splat_cpu(self):
+        self.do_gradcheck(torch.device("cpu"), True, False)
+
+    def test_grad_round_cpu(self):
+        self.do_gradcheck(torch.device("cpu"), False, False)
+
+    def test_grad_corners_splat_cuda(self):
+        self.do_gradcheck(torch.device("cuda:0"), True, True)
+
+    def test_grad_corners_round_cuda(self):
+        self.do_gradcheck(torch.device("cuda:0"), False, True)
+
+    def test_grad_splat_cuda(self):
+        self.do_gradcheck(torch.device("cuda:0"), True, False)
+
+    def test_grad_round_cuda(self):
+        self.do_gradcheck(torch.device("cuda:0"), False, False)
+
+    def do_gradcheck(self, device, splat: bool, align_corners: bool):
+        """
+        Use gradcheck to verify the gradient of _points_to_volumes
+        with random input.
+        """
+        N, C, D, H, W, P = 2, 4, 5, 6, 7, 5
+        points_3d = (
+            torch.rand((N, P, 3), device=device, dtype=torch.float64) * 0.8 + 0.1
+        )
+        points_features = torch.rand((N, P, C), device=device, dtype=torch.float64)
+        volume_densities = torch.zeros((N, 1, D, H, W), device=device)
+        volume_features = torch.zeros((N, C, D, H, W), device=device)
+        volume_densities_scale = torch.rand_like(volume_densities)
+        volume_features_scale = torch.rand_like(volume_features)
+        grid_sizes = torch.tensor([D, H, W], dtype=torch.int64, device=device).expand(
+            N, 3
+        )
+        mask = torch.ones((N, P), device=device)
+        mask[:, 0] = 0
+        align_corners = False
+
+        def f(points_3d_, points_features_):
+            (volume_densities_, volume_features_) = _points_to_volumes(
+                points_3d_.to(torch.float32),
+                points_features_.to(torch.float32),
+                volume_densities.clone(),
+                volume_features.clone(),
+                grid_sizes,
+                2.0,
+                mask,
+                align_corners,
+                splat,
+            )
+            density = (volume_densities_ * volume_densities_scale).sum()
+            features = (volume_features_ * volume_features_scale).sum()
+            return density, features
+
+        base = f(points_3d.clone(), points_features.clone())
+        self.assertGreater(base[0], 0)
+        self.assertGreater(base[1], 0)
+
+        points_features.requires_grad = True
+        if splat:
+            points_3d.requires_grad = True
+            torch.autograd.gradcheck(
+                f,
+                (points_3d, points_features),
+                check_undefined_grad=False,
+                eps=2e-4,
+                atol=0.01,
+            )
+        else:
+            torch.autograd.gradcheck(
+                partial(f, points_3d),
+                points_features,
+                check_undefined_grad=False,
+                eps=2e-3,
+                atol=0.001,
+            )
+
+    def test_single_corners_round_cpu(self):
+        self.single_point(torch.device("cpu"), False, True)
+
+    def test_single_corners_splat_cpu(self):
+        self.single_point(torch.device("cpu"), True, True)
+
+    def test_single_round_cpu(self):
+        self.single_point(torch.device("cpu"), False, False)
+
+    def test_single_splat_cpu(self):
+        self.single_point(torch.device("cpu"), True, False)
+
+    def test_single_corners_round_cuda(self):
+        self.single_point(torch.device("cuda:0"), False, True)
+
+    def test_single_corners_splat_cuda(self):
+        self.single_point(torch.device("cuda:0"), True, True)
+
+    def test_single_round_cuda(self):
+        self.single_point(torch.device("cuda:0"), False, False)
+
+    def test_single_splat_cuda(self):
+        self.single_point(torch.device("cuda:0"), True, False)
+
+    def single_point(self, device, splat: bool, align_corners: bool):
+        """
+        Check the outcome of _points_to_volumes where a single point
+        exists which lines up with a single voxel.
+        """
+        D, H, W = (6, 6, 11) if align_corners else (5, 5, 10)
+        N, C, P = 1, 1, 1
+        if align_corners:
+            points_3d = torch.tensor([[[-0.2, 0.2, -0.2]]], device=device)
+        else:
+            points_3d = torch.tensor([[[-0.3, 0.4, -0.4]]], device=device)
+        points_features = torch.zeros((N, P, C), device=device)
+        volume_densities = torch.zeros((N, 1, D, H, W), device=device)
+        volume_densities_expected = torch.zeros((N, 1, D, H, W), device=device)
+        volume_features = torch.zeros((N, C, D, H, W), device=device)
+        grid_sizes = torch.tensor([D, H, W], dtype=torch.int64, device=device).expand(
+            N, 3
+        )
+        mask = torch.ones((N, P), device=device)
+        point_weight = 19.0
+
+        volume_densities_, volume_features_ = _points_to_volumes(
+            points_3d,
+            points_features,
+            volume_densities,
+            volume_features,
+            grid_sizes,
+            point_weight,
+            mask,
+            align_corners,
+            splat,
+        )
+
+        self.assertTrue(volume_densities_.is_set_to(volume_densities))
+        self.assertTrue(volume_features_.is_set_to(volume_features))
+
+        if align_corners:
+            volume_densities_expected[0, 0, 2, 3, 4] = point_weight
+        else:
+            volume_densities_expected[0, 0, 1, 3, 3] = point_weight
+
+        self.assertClose(volume_densities, volume_densities_expected)
diff --git a/pytorch3d/tests/test_r2n2.py b/pytorch3d/tests/test_r2n2.py
new file mode 100644
index 0000000000000000000000000000000000000000..2596663157d8acb66273b4eea434274ce472cc6a
--- /dev/null
+++ b/pytorch3d/tests/test_r2n2.py
@@ -0,0 +1,397 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""
+Sanity checks for loading R2N2.
+"""
+import json
+import os
+import unittest
+
+import numpy as np
+import torch
+from PIL import Image
+from pytorch3d.datasets import (
+    BlenderCamera,
+    collate_batched_R2N2,
+    R2N2,
+    render_cubified_voxels,
+)
+from pytorch3d.renderer import (
+    FoVPerspectiveCameras,
+    look_at_view_transform,
+    PointLights,
+    RasterizationSettings,
+)
+from pytorch3d.renderer.cameras import get_world_to_view_transform
+from pytorch3d.transforms import Transform3d
+from pytorch3d.transforms.so3 import so3_exp_map
+from torch.utils.data import DataLoader
+
+from .common_testing import get_tests_dir, load_rgb_image, TestCaseMixin
+
+
+# Set these paths in order to run the tests.
+R2N2_PATH = None
+SHAPENET_PATH = None
+SPLITS_PATH = None
+VOXELS_REL_PATH = "ShapeNetVox"
+
+
+DEBUG = False
+DATA_DIR = get_tests_dir() / "data"
+
+
+class TestR2N2(TestCaseMixin, unittest.TestCase):
+    def setUp(self):
+        """
+        Check if the data paths are given otherwise skip tests.
+        """
+        if SHAPENET_PATH is None or not os.path.exists(SHAPENET_PATH):
+            url = "https://www.shapenet.org/"
+            msg = (
+                "ShapeNet data not found, download from %s, update "
+                "SHAPENET_PATH at the top of the file, and rerun."
+            )
+            self.skipTest(msg % url)
+        if R2N2_PATH is None or not os.path.exists(R2N2_PATH):
+            url = "http://3d-r2n2.stanford.edu/"
+            msg = (
+                "R2N2 data not found, download from %s, update "
+                "R2N2_PATH at the top of the file, and rerun."
+            )
+            self.skipTest(msg % url)
+        if SPLITS_PATH is None or not os.path.exists(SPLITS_PATH):
+            msg = """Splits file not found, update SPLITS_PATH at the top
+                of the file, and rerun."""
+            self.skipTest(msg)
+
+    def test_load_R2N2(self):
+        """
+        Test the loaded train split of R2N2 return items of the correct shapes and types. Also
+        check the first image returned is correct.
+        """
+        # Load dataset in the train split.
+        r2n2_dataset = R2N2(
+            "test",
+            SHAPENET_PATH,
+            R2N2_PATH,
+            SPLITS_PATH,
+            return_voxels=True,
+            voxels_rel_path=VOXELS_REL_PATH,
+        )
+
+        # Check total number of objects in the dataset is correct.
+        with open(SPLITS_PATH) as splits:
+            split_dict = json.load(splits)["test"]
+        model_nums = [len(split_dict[synset]) for synset in split_dict]
+        self.assertEqual(len(r2n2_dataset), sum(model_nums))
+
+        # Check the numbers of loaded instances for each category are correct.
+        for synset in split_dict:
+            split_synset_nums = sum(
+                len(split_dict[synset][model]) for model in split_dict[synset]
+            )
+            idx_start = r2n2_dataset.synset_start_idxs[synset]
+            idx_end = idx_start + r2n2_dataset.synset_num_models[synset]
+            synset_views_list = r2n2_dataset.views_per_model_list[idx_start:idx_end]
+            loaded_synset_views = sum(len(views) for views in synset_views_list)
+            self.assertEqual(loaded_synset_views, split_synset_nums)
+
+        # Retrieve an object from the dataset.
+        r2n2_obj = r2n2_dataset[39]
+        # Check that verts and faces returned by __getitem__ have the correct shapes and types.
+        verts, faces = r2n2_obj["verts"], r2n2_obj["faces"]
+        self.assertTrue(verts.dtype == torch.float32)
+        self.assertTrue(faces.dtype == torch.int64)
+        self.assertEqual(verts.ndim, 2)
+        self.assertEqual(verts.shape[-1], 3)
+        self.assertEqual(faces.ndim, 2)
+        self.assertEqual(faces.shape[-1], 3)
+
+        # Check that the intrinsic matrix and extrinsic matrix have the
+        # correct shapes.
+        self.assertEqual(r2n2_obj["R"].shape[0], 24)
+        self.assertEqual(r2n2_obj["R"].shape[1:], (3, 3))
+        self.assertEqual(r2n2_obj["T"].ndim, 2)
+        self.assertEqual(r2n2_obj["T"].shape[1], 3)
+        self.assertEqual(r2n2_obj["K"].ndim, 3)
+        self.assertEqual(r2n2_obj["K"].shape[1:], (4, 4))
+
+        # Check that image batch returned by __getitem__ has the correct shape.
+        self.assertEqual(r2n2_obj["images"].shape[0], 24)
+        self.assertEqual(r2n2_obj["images"].shape[1:-1], (137, 137))
+        self.assertEqual(r2n2_obj["images"].shape[-1], 3)
+        self.assertEqual(r2n2_dataset[39, [21]]["images"].shape[0], 1)
+        self.assertEqual(r2n2_dataset[39, torch.tensor([12, 21])]["images"].shape[0], 2)
+
+        # Check models with total view counts less than 24 return image batches
+        # of the correct shapes.
+        self.assertEqual(r2n2_dataset[635]["images"].shape[0], 5)
+        self.assertEqual(r2n2_dataset[8369]["images"].shape[0], 10)
+
+        # Check that the voxel tensor returned by __getitem__ has the correct shape.
+        self.assertEqual(r2n2_obj["voxels"].ndim, 4)
+        self.assertEqual(r2n2_obj["voxels"].shape, (24, 128, 128, 128))
+
+    def test_collate_models(self):
+        """
+        Test collate_batched_meshes returns items of the correct shapes and types.
+        Check that when collate_batched_meshes is passed to Dataloader, batches of
+        the correct shapes and types are returned.
+        """
+        # Load dataset in the train split.
+        r2n2_dataset = R2N2(
+            "val",
+            SHAPENET_PATH,
+            R2N2_PATH,
+            SPLITS_PATH,
+            return_voxels=True,
+            voxels_rel_path=VOXELS_REL_PATH,
+        )
+
+        # Randomly retrieve several objects from the dataset and collate them.
+        collated_meshes = collate_batched_R2N2(
+            [r2n2_dataset[idx] for idx in torch.randint(len(r2n2_dataset), (6,))]
+        )
+        # Check the collated verts and faces have the correct shapes.
+        verts, faces = collated_meshes["verts"], collated_meshes["faces"]
+        self.assertEqual(len(verts), 6)
+        self.assertEqual(len(faces), 6)
+        self.assertEqual(verts[0].shape[-1], 3)
+        self.assertEqual(faces[0].shape[-1], 3)
+
+        # Check the collated mesh has the correct shape.
+        mesh = collated_meshes["mesh"]
+        self.assertEqual(mesh.verts_padded().shape[0], 6)
+        self.assertEqual(mesh.verts_padded().shape[-1], 3)
+        self.assertEqual(mesh.faces_padded().shape[0], 6)
+        self.assertEqual(mesh.faces_padded().shape[-1], 3)
+
+        # Pass the custom collate_fn function to DataLoader and check elements
+        # in batch have the correct shape.
+        batch_size = 12
+        r2n2_loader = DataLoader(
+            r2n2_dataset, batch_size=batch_size, collate_fn=collate_batched_R2N2
+        )
+        it = iter(r2n2_loader)
+        object_batch = next(it)
+        self.assertEqual(len(object_batch["synset_id"]), batch_size)
+        self.assertEqual(len(object_batch["model_id"]), batch_size)
+        self.assertEqual(len(object_batch["label"]), batch_size)
+        self.assertEqual(object_batch["mesh"].verts_padded().shape[0], batch_size)
+        self.assertEqual(object_batch["mesh"].faces_padded().shape[0], batch_size)
+        self.assertEqual(object_batch["images"].shape[0], batch_size)
+        self.assertEqual(object_batch["R"].shape[0], batch_size)
+        self.assertEqual(object_batch["T"].shape[0], batch_size)
+        self.assertEqual(object_batch["K"].shape[0], batch_size)
+        self.assertEqual(len(object_batch["voxels"]), batch_size)
+
+    def test_catch_render_arg_errors(self):
+        """
+        Test rendering R2N2 with an invalid model_id, category or index, and
+        catch corresponding errors.
+        """
+        # Load dataset in the train split.
+        r2n2_dataset = R2N2("train", SHAPENET_PATH, R2N2_PATH, SPLITS_PATH)
+
+        # Try loading with an invalid model_id and catch error.
+        with self.assertRaises(ValueError) as err:
+            r2n2_dataset.render(model_ids=["lamp0"])
+        self.assertTrue("not found in the loaded dataset" in str(err.exception))
+
+        # Try loading with an index out of bounds and catch error.
+        with self.assertRaises(IndexError) as err:
+            r2n2_dataset.render(idxs=[1000000])
+        self.assertTrue("are out of bounds" in str(err.exception))
+
+        blend_cameras = BlenderCamera(
+            R=torch.rand((3, 3, 3)), T=torch.rand((3, 3)), K=torch.rand((3, 4, 4))
+        )
+        with self.assertRaises(ValueError) as err:
+            r2n2_dataset.render(idxs=[10, 11], cameras=blend_cameras)
+        self.assertTrue("Mismatch between batch dims" in str(err.exception))
+
+    def test_render_r2n2(self):
+        """
+        Test rendering objects from R2N2 selected both by indices and model_ids.
+        """
+        # Set up device and seed for random selections.
+        device = torch.device("cuda:0")
+        torch.manual_seed(39)
+
+        # Load dataset in the train split.
+        r2n2_dataset = R2N2("train", SHAPENET_PATH, R2N2_PATH, SPLITS_PATH)
+
+        # Render first three models in the dataset.
+        R, T = look_at_view_transform(1.0, 1.0, 90)
+        cameras = FoVPerspectiveCameras(R=R, T=T, device=device)
+        raster_settings = RasterizationSettings(image_size=512)
+        lights = PointLights(
+            location=torch.tensor([0.0, 1.0, -2.0], device=device)[None],
+            # TODO: debug the source of the discrepancy in two images when rendering on GPU.
+            diffuse_color=((0, 0, 0),),
+            specular_color=((0, 0, 0),),
+            device=device,
+        )
+
+        r2n2_by_idxs = r2n2_dataset.render(
+            idxs=list(range(3)),
+            device=device,
+            cameras=cameras,
+            raster_settings=raster_settings,
+            lights=lights,
+        )
+        # Check that there are three images in the batch.
+        self.assertEqual(r2n2_by_idxs.shape[0], 3)
+
+        # Compare the rendered models to the reference images.
+        for idx in range(3):
+            r2n2_by_idxs_rgb = r2n2_by_idxs[idx, ..., :3].squeeze().cpu()
+            if DEBUG:
+                Image.fromarray((r2n2_by_idxs_rgb.numpy() * 255).astype(np.uint8)).save(
+                    DATA_DIR / ("DEBUG_r2n2_render_by_idxs_%s.png" % idx)
+                )
+            image_ref = load_rgb_image(
+                "test_r2n2_render_by_idxs_and_ids_%s.png" % idx, DATA_DIR
+            )
+            self.assertClose(r2n2_by_idxs_rgb, image_ref, atol=0.05)
+
+        # Render the same models but by model_ids this time.
+        r2n2_by_model_ids = r2n2_dataset.render(
+            model_ids=[
+                "1a4a8592046253ab5ff61a3a2a0e2484",
+                "1a04dcce7027357ab540cc4083acfa57",
+                "1a9d0480b74d782698f5bccb3529a48d",
+            ],
+            device=device,
+            cameras=cameras,
+            raster_settings=raster_settings,
+            lights=lights,
+        )
+
+        # Compare the rendered models to the reference images.
+        for idx in range(3):
+            r2n2_by_model_ids_rgb = r2n2_by_model_ids[idx, ..., :3].squeeze().cpu()
+            if DEBUG:
+                Image.fromarray(
+                    (r2n2_by_model_ids_rgb.numpy() * 255).astype(np.uint8)
+                ).save(DATA_DIR / ("DEBUG_r2n2_render_by_model_ids_%s.png" % idx))
+            image_ref = load_rgb_image(
+                "test_r2n2_render_by_idxs_and_ids_%s.png" % idx, DATA_DIR
+            )
+            self.assertClose(r2n2_by_model_ids_rgb, image_ref, atol=0.05)
+
+        ###############################
+        # Test rendering by categories
+        ###############################
+
+        # Render a mixture of categories.
+        categories = ["chair", "lamp"]
+        mixed_objs = r2n2_dataset.render(
+            categories=categories,
+            sample_nums=[1, 2],
+            device=device,
+            cameras=cameras,
+            raster_settings=raster_settings,
+            lights=lights,
+        )
+        # Compare the rendered models to the reference images.
+        for idx in range(3):
+            mixed_rgb = mixed_objs[idx, ..., :3].squeeze().cpu()
+            if DEBUG:
+                Image.fromarray((mixed_rgb.numpy() * 255).astype(np.uint8)).save(
+                    DATA_DIR / ("DEBUG_r2n2_render_by_categories_%s.png" % idx)
+                )
+            image_ref = load_rgb_image(
+                "test_r2n2_render_by_categories_%s.png" % idx, DATA_DIR
+            )
+            self.assertClose(mixed_rgb, image_ref, atol=0.05)
+
+    def test_blender_camera(self):
+        """
+        Test BlenderCamera.
+        """
+        # Test get_world_to_view_transform.
+        T = torch.randn(10, 3)
+        R = so3_exp_map(torch.randn(10, 3) * 3.0)
+        RT = get_world_to_view_transform(R=R, T=T)
+        cam = BlenderCamera(R=R, T=T)
+        RT_class = cam.get_world_to_view_transform()
+        self.assertTrue(torch.allclose(RT.get_matrix(), RT_class.get_matrix()))
+        self.assertTrue(isinstance(RT, Transform3d))
+
+        # Test getting camera center.
+        C = cam.get_camera_center()
+        C_ = -torch.bmm(R, T[:, :, None])[:, :, 0]
+        self.assertTrue(torch.allclose(C, C_, atol=1e-05))
+
+    def test_render_by_r2n2_calibration(self):
+        """
+        Test rendering R2N2 models with calibration matrices from R2N2's own Blender
+        in batches.
+        """
+        # Set up device and seed for random selections.
+        device = torch.device("cuda:0")
+        torch.manual_seed(39)
+
+        # Load dataset in the train split.
+        r2n2_dataset = R2N2("train", SHAPENET_PATH, R2N2_PATH, SPLITS_PATH)
+        model_idxs = torch.randint(1000, (2,)).tolist()
+        view_idxs = torch.randint(24, (2,)).tolist()
+        raster_settings = RasterizationSettings(image_size=512)
+        lights = PointLights(
+            location=torch.tensor([0.0, 1.0, -2.0], device=device)[None],
+            # TODO(nikhilar): debug the source of the discrepancy in two images when
+            # rendering on GPU.
+            diffuse_color=((0, 0, 0),),
+            specular_color=((0, 0, 0),),
+            device=device,
+        )
+        r2n2_batch = r2n2_dataset.render(
+            idxs=model_idxs,
+            view_idxs=view_idxs,
+            device=device,
+            raster_settings=raster_settings,
+            lights=lights,
+        )
+        for idx in range(4):
+            r2n2_batch_rgb = r2n2_batch[idx, ..., :3].squeeze().cpu()
+            if DEBUG:
+                Image.fromarray((r2n2_batch_rgb.numpy() * 255).astype(np.uint8)).save(
+                    DATA_DIR
+                    / ("DEBUG_r2n2_render_with_blender_calibrations_%s.png" % idx)
+                )
+            image_ref = load_rgb_image(
+                "test_r2n2_render_with_blender_calibrations_%s.png" % idx, DATA_DIR
+            )
+            self.assertClose(r2n2_batch_rgb, image_ref, atol=0.05)
+
+    def test_render_voxels(self):
+        """
+        Test rendering meshes formed from voxels.
+        """
+        # Set up device and seed for random selections.
+        device = torch.device("cuda:0")
+
+        # Load dataset in the train split with only a single view returned for each model.
+        r2n2_dataset = R2N2(
+            "train",
+            SHAPENET_PATH,
+            R2N2_PATH,
+            SPLITS_PATH,
+            return_voxels=True,
+            voxels_rel_path=VOXELS_REL_PATH,
+        )
+        r2n2_model = r2n2_dataset[6, [5]]
+        vox_render = render_cubified_voxels(r2n2_model["voxels"], device=device)
+        vox_render_rgb = vox_render[0, ..., :3].squeeze().cpu()
+        if DEBUG:
+            Image.fromarray((vox_render_rgb.numpy() * 255).astype(np.uint8)).save(
+                DATA_DIR / ("DEBUG_r2n2_voxel_to_mesh_render.png")
+            )
+        image_ref = load_rgb_image("test_r2n2_voxel_to_mesh_render.png", DATA_DIR)
+        self.assertClose(vox_render_rgb, image_ref, atol=0.05)
diff --git a/pytorch3d/tests/test_rasterize_meshes.py b/pytorch3d/tests/test_rasterize_meshes.py
new file mode 100644
index 0000000000000000000000000000000000000000..48738ca44a3764e66f5661d8f91f5eb4ef12feb6
--- /dev/null
+++ b/pytorch3d/tests/test_rasterize_meshes.py
@@ -0,0 +1,1292 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import functools
+import unittest
+
+import torch
+from pytorch3d import _C
+from pytorch3d.renderer import FoVPerspectiveCameras, look_at_view_transform
+from pytorch3d.renderer.mesh import MeshRasterizer, RasterizationSettings
+from pytorch3d.renderer.mesh.rasterize_meshes import (
+    rasterize_meshes,
+    rasterize_meshes_python,
+)
+from pytorch3d.renderer.mesh.utils import (
+    _clip_barycentric_coordinates,
+    _interpolate_zbuf,
+)
+from pytorch3d.structures import Meshes
+from pytorch3d.utils import ico_sphere
+
+from .common_testing import get_random_cuda_device, TestCaseMixin
+
+
+class TestRasterizeMeshes(TestCaseMixin, unittest.TestCase):
+    def test_simple_python(self):
+        device = torch.device("cpu")
+        self._simple_triangle_raster(rasterize_meshes_python, device, bin_size=-1)
+        self._simple_blurry_raster(rasterize_meshes_python, device, bin_size=-1)
+        self._test_behind_camera(rasterize_meshes_python, device, bin_size=-1)
+        self._test_perspective_correct(rasterize_meshes_python, device, bin_size=-1)
+        self._test_barycentric_clipping(rasterize_meshes_python, device, bin_size=-1)
+        self._test_back_face_culling(rasterize_meshes_python, device, bin_size=-1)
+
+    def _test_simple_cpu_naive_instance(self):
+        device = torch.device("cpu")
+        self._simple_triangle_raster(rasterize_meshes, device, bin_size=0)
+        self._simple_blurry_raster(rasterize_meshes, device, bin_size=0)
+        self._test_behind_camera(rasterize_meshes, device, bin_size=0)
+        self._test_perspective_correct(rasterize_meshes, device, bin_size=0)
+        self._test_back_face_culling(rasterize_meshes, device, bin_size=0)
+
+    def test_simple_cpu_naive(self):
+        n_threads = torch.get_num_threads()
+        torch.set_num_threads(1)  # single threaded
+        self._test_simple_cpu_naive_instance()
+        torch.set_num_threads(4)  # even (divisible) number of threads
+        self._test_simple_cpu_naive_instance()
+        torch.set_num_threads(5)  # odd (nondivisible) number of threads
+        self._test_simple_cpu_naive_instance()
+        torch.set_num_threads(n_threads)
+
+    def test_simple_cuda_naive(self):
+        device = get_random_cuda_device()
+        self._simple_triangle_raster(rasterize_meshes, device, bin_size=0)
+        self._simple_blurry_raster(rasterize_meshes, device, bin_size=0)
+        self._test_behind_camera(rasterize_meshes, device, bin_size=0)
+        self._test_perspective_correct(rasterize_meshes, device, bin_size=0)
+        self._test_back_face_culling(rasterize_meshes, device, bin_size=0)
+
+    def test_simple_cuda_binned(self):
+        device = get_random_cuda_device()
+        self._simple_triangle_raster(rasterize_meshes, device, bin_size=5)
+        self._simple_blurry_raster(rasterize_meshes, device, bin_size=5)
+        self._test_behind_camera(rasterize_meshes, device, bin_size=5)
+        self._test_perspective_correct(rasterize_meshes, device, bin_size=5)
+        self._test_back_face_culling(rasterize_meshes, device, bin_size=5)
+
+    def test_python_vs_cpu_vs_cuda(self):
+        torch.manual_seed(231)
+        device = torch.device("cpu")
+        image_size = 32
+        blur_radius = 0.1**2
+        faces_per_pixel = 3
+
+        for d in ["cpu", get_random_cuda_device()]:
+            device = torch.device(d)
+            compare_grads = True
+            # Mesh with a single face.
+            verts1 = torch.tensor(
+                [[0.0, 0.6, 0.1], [-0.7, -0.4, 0.5], [0.7, -0.4, 0.7]],
+                dtype=torch.float32,
+                requires_grad=True,
+                device=device,
+            )
+            faces1 = torch.tensor([[0, 1, 2]], dtype=torch.int64, device=device)
+            meshes1 = Meshes(verts=[verts1], faces=[faces1])
+            args1 = (meshes1, image_size, blur_radius, faces_per_pixel)
+            verts2 = verts1.detach().clone()
+            verts2.requires_grad = True
+            meshes2 = Meshes(verts=[verts2], faces=[faces1])
+            args2 = (meshes2, image_size, blur_radius, faces_per_pixel)
+            self._compare_impls(
+                rasterize_meshes_python,
+                rasterize_meshes,
+                args1,
+                args2,
+                verts1,
+                verts2,
+                compare_grads=compare_grads,
+            )
+
+            # Mesh with multiple faces.
+            # fmt: off
+            verts1 = torch.tensor(
+                [
+                    [ -0.5, 0.0,  0.1],  # noqa: E241, E201
+                    [  0.0, 0.6,  0.5],  # noqa: E241, E201
+                    [  0.5, 0.0,  0.7],  # noqa: E241, E201
+                    [-0.25, 0.0,  0.9],  # noqa: E241, E201
+                    [ 0.26, 0.5,  0.8],  # noqa: E241, E201
+                    [ 0.76, 0.0,  0.8],  # noqa: E241, E201
+                    [-0.41, 0.0,  0.5],  # noqa: E241, E201
+                    [ 0.61, 0.6,  0.6],  # noqa: E241, E201
+                    [ 0.41, 0.0,  0.5],  # noqa: E241, E201
+                    [ -0.2, 0.0, -0.5],  # noqa: E241, E201
+                    [  0.3, 0.6, -0.5],  # noqa: E241, E201
+                    [  0.4, 0.0, -0.5],  # noqa: E241, E201
+                ],
+                dtype=torch.float32,
+                device=device,
+                requires_grad=True
+            )
+            faces1 = torch.tensor(
+                [
+                    [ 1, 0,  2],  # noqa: E241, E201
+                    [ 4, 3,  5],  # noqa: E241, E201
+                    [ 7, 6,  8],  # noqa: E241, E201
+                    [10, 9, 11]   # noqa: E241, E201
+                ],
+                dtype=torch.int64,
+                device=device,
+            )
+            # fmt: on
+            meshes = Meshes(verts=[verts1], faces=[faces1])
+            args1 = (meshes, image_size, blur_radius, faces_per_pixel)
+            verts2 = verts1.clone().detach()
+            verts2.requires_grad = True
+            meshes2 = Meshes(verts=[verts2], faces=[faces1])
+            args2 = (meshes2, image_size, blur_radius, faces_per_pixel)
+            self._compare_impls(
+                rasterize_meshes_python,
+                rasterize_meshes,
+                args1,
+                args2,
+                verts1,
+                verts2,
+                compare_grads=compare_grads,
+            )
+
+            # Icosphere
+            meshes = ico_sphere(device=device)
+            verts1, faces1 = meshes.get_mesh_verts_faces(0)
+            verts1.requires_grad = True
+            meshes = Meshes(verts=[verts1], faces=[faces1])
+            args1 = (meshes, image_size, blur_radius, faces_per_pixel)
+            verts2 = verts1.detach().clone()
+            verts2.requires_grad = True
+            meshes2 = Meshes(verts=[verts2], faces=[faces1])
+            args2 = (meshes2, image_size, blur_radius, faces_per_pixel)
+            self._compare_impls(
+                rasterize_meshes_python,
+                rasterize_meshes,
+                args1,
+                args2,
+                verts1,
+                verts2,
+                compare_grads=compare_grads,
+            )
+
+    def test_cpu_vs_cuda_naive(self):
+        """
+        Compare naive versions of cuda and cpp
+        """
+
+        torch.manual_seed(231)
+        image_size = 64
+        radius = 0.1**2
+        faces_per_pixel = 3
+        device = torch.device("cpu")
+        meshes_cpu = ico_sphere(0, device)
+        verts1, faces1 = meshes_cpu.get_mesh_verts_faces(0)
+        verts1.requires_grad = True
+        meshes_cpu = Meshes(verts=[verts1], faces=[faces1])
+
+        device = get_random_cuda_device()
+        meshes_cuda = ico_sphere(0, device)
+        verts2, faces2 = meshes_cuda.get_mesh_verts_faces(0)
+        verts2.requires_grad = True
+        meshes_cuda = Meshes(verts=[verts2], faces=[faces2])
+
+        barycentric_clip = True
+        args_cpu = (
+            meshes_cpu,
+            image_size,
+            radius,
+            faces_per_pixel,
+            None,
+            None,
+            False,
+            barycentric_clip,
+            False,
+        )
+        args_cuda = (
+            meshes_cuda,
+            image_size,
+            radius,
+            faces_per_pixel,
+            0,
+            0,
+            False,
+            barycentric_clip,
+            False,
+        )
+        self._compare_impls(
+            rasterize_meshes,
+            rasterize_meshes,
+            args_cpu,
+            args_cuda,
+            verts1,
+            verts2,
+            compare_grads=True,
+        )
+
+    def test_coarse_cpu(self):
+        return self._test_coarse_rasterize(torch.device("cpu"))
+
+    def test_coarse_cuda(self):
+        return self._test_coarse_rasterize(get_random_cuda_device())
+
+    def test_cpp_vs_cuda_naive_vs_cuda_binned(self):
+        # Make sure that the backward pass runs for all pathways
+        image_size = 64  # test is too slow for very large images.
+        N = 1
+        radius = 0.1**2
+        faces_per_pixel = 3
+
+        grad_zbuf = torch.randn(N, image_size, image_size, faces_per_pixel)
+        grad_dist = torch.randn(N, image_size, image_size, faces_per_pixel)
+        grad_bary = torch.randn(N, image_size, image_size, faces_per_pixel, 3)
+
+        device = torch.device("cpu")
+        meshes = ico_sphere(0, device)
+        verts, faces = meshes.get_mesh_verts_faces(0)
+        verts.requires_grad = True
+        meshes = Meshes(verts=[verts], faces=[faces])
+
+        # Option I: CPU, naive
+        args = (meshes, image_size, radius, faces_per_pixel)
+        idx1, zbuf1, bary1, dist1 = rasterize_meshes(*args)
+
+        loss = (
+            (zbuf1 * grad_zbuf).sum()
+            + (dist1 * grad_dist).sum()
+            + (bary1 * grad_bary).sum()
+        )
+        loss.backward()
+        idx1 = idx1.data.cpu().clone()
+        zbuf1 = zbuf1.data.cpu().clone()
+        dist1 = dist1.data.cpu().clone()
+        grad1 = verts.grad.data.cpu().clone()
+
+        # Option II: CUDA, naive
+        device = get_random_cuda_device()
+        meshes = ico_sphere(0, device)
+        verts, faces = meshes.get_mesh_verts_faces(0)
+        verts.requires_grad = True
+        meshes = Meshes(verts=[verts], faces=[faces])
+
+        args = (meshes, image_size, radius, faces_per_pixel, 0, 0)
+        idx2, zbuf2, bary2, dist2 = rasterize_meshes(*args)
+        grad_zbuf = grad_zbuf.to(device)
+        grad_dist = grad_dist.to(device)
+        grad_bary = grad_bary.to(device)
+        loss = (
+            (zbuf2 * grad_zbuf).sum()
+            + (dist2 * grad_dist).sum()
+            + (bary2 * grad_bary).sum()
+        )
+        loss.backward()
+        idx2 = idx2.data.cpu().clone()
+        zbuf2 = zbuf2.data.cpu().clone()
+        dist2 = dist2.data.cpu().clone()
+        grad2 = verts.grad.data.cpu().clone()
+
+        # Option III: CUDA, binned
+        meshes = ico_sphere(0, device)
+        verts, faces = meshes.get_mesh_verts_faces(0)
+        verts.requires_grad = True
+        meshes = Meshes(verts=[verts], faces=[faces])
+
+        args = (meshes, image_size, radius, faces_per_pixel, 32, 500)
+        idx3, zbuf3, bary3, dist3 = rasterize_meshes(*args)
+
+        loss = (
+            (zbuf3 * grad_zbuf).sum()
+            + (dist3 * grad_dist).sum()
+            + (bary3 * grad_bary).sum()
+        )
+        loss.backward()
+        idx3 = idx3.data.cpu().clone()
+        zbuf3 = zbuf3.data.cpu().clone()
+        dist3 = dist3.data.cpu().clone()
+        grad3 = verts.grad.data.cpu().clone()
+
+        # Make sure everything was the same
+        self.assertTrue((idx1 == idx2).all().item())
+        self.assertTrue((idx1 == idx3).all().item())
+        self.assertClose(zbuf1, zbuf2, atol=1e-6)
+        self.assertClose(zbuf1, zbuf3, atol=1e-6)
+        self.assertClose(dist1, dist2, atol=1e-6)
+        self.assertClose(dist1, dist3, atol=1e-6)
+
+        self.assertClose(grad1, grad2, rtol=5e-3)  # flaky test
+        self.assertClose(grad1, grad3, rtol=5e-3)
+        self.assertClose(grad2, grad3, rtol=5e-3)
+
+    def test_compare_coarse_cpu_vs_cuda(self):
+        torch.manual_seed(231)
+        N = 1
+        image_size = (512, 512)
+        blur_radius = 0.0
+        bin_size = 32
+        max_faces_per_bin = 20
+
+        device = torch.device("cpu")
+
+        meshes = ico_sphere(2, device)
+        faces = meshes.faces_packed()
+        verts = meshes.verts_packed()
+        faces_verts = verts[faces]
+        num_faces_per_mesh = meshes.num_faces_per_mesh()
+        mesh_to_face_first_idx = meshes.mesh_to_faces_packed_first_idx()
+
+        bin_faces_cpu = _C._rasterize_meshes_coarse(
+            faces_verts,
+            mesh_to_face_first_idx,
+            num_faces_per_mesh,
+            image_size,
+            blur_radius,
+            bin_size,
+            max_faces_per_bin,
+        )
+        device = get_random_cuda_device()
+        meshes = meshes.clone().to(device)
+
+        faces = meshes.faces_packed()
+        verts = meshes.verts_packed()
+        faces_verts = verts[faces]
+        num_faces_per_mesh = meshes.num_faces_per_mesh()
+        mesh_to_face_first_idx = meshes.mesh_to_faces_packed_first_idx()
+
+        bin_faces_cuda = _C._rasterize_meshes_coarse(
+            faces_verts,
+            mesh_to_face_first_idx,
+            num_faces_per_mesh,
+            image_size,
+            blur_radius,
+            bin_size,
+            max_faces_per_bin,
+        )
+
+        # Bin faces might not be the same: CUDA version might write them in
+        # any order. But if we sort the non-(-1) elements of the CUDA output
+        # then they should be the same.
+
+        for n in range(N):
+            for by in range(bin_faces_cpu.shape[1]):
+                for bx in range(bin_faces_cpu.shape[2]):
+                    K = (bin_faces_cuda[n, by, bx] != -1).sum().item()
+                    idxs_cpu = bin_faces_cpu[n, by, bx].tolist()
+                    idxs_cuda = bin_faces_cuda[n, by, bx].tolist()
+                    idxs_cuda[:K] = sorted(idxs_cuda[:K])
+                    self.assertEqual(idxs_cpu, idxs_cuda)
+
+    def test_python_vs_cpp_bary_clip(self):
+        torch.manual_seed(232)
+        N = 2
+        V = 10
+        F = 5
+        verts1 = torch.randn(N, V, 3, requires_grad=True)
+        verts2 = verts1.detach().clone().requires_grad_(True)
+        faces = torch.randint(V, size=(N, F, 3))
+        meshes1 = Meshes(verts1, faces)
+        meshes2 = Meshes(verts2, faces)
+
+        kwargs = {"image_size": 24, "clip_barycentric_coords": True}
+        fn1 = functools.partial(rasterize_meshes, meshes1, **kwargs)
+        fn2 = functools.partial(rasterize_meshes_python, meshes2, **kwargs)
+        args = ()
+        self._compare_impls(fn1, fn2, args, args, verts1, verts2, compare_grads=True)
+
+    def test_cpp_vs_cuda_bary_clip(self):
+        meshes = ico_sphere(2, device=torch.device("cpu"))
+        verts1, faces1 = meshes.get_mesh_verts_faces(0)
+        verts1.requires_grad = True
+        meshes1 = Meshes(verts=[verts1], faces=[faces1])
+        device = get_random_cuda_device()
+        verts2 = verts1.detach().to(device).requires_grad_(True)
+        faces2 = faces1.detach().clone().to(device)
+        meshes2 = Meshes(verts=[verts2], faces=[faces2])
+
+        kwargs = {"image_size": 64, "clip_barycentric_coords": True}
+        fn1 = functools.partial(rasterize_meshes, meshes1, **kwargs)
+        fn2 = functools.partial(rasterize_meshes, meshes2, bin_size=0, **kwargs)
+        args = ()
+        self._compare_impls(fn1, fn2, args, args, verts1, verts2, compare_grads=True)
+
+    def test_python_vs_cpp_perspective_correct(self):
+        torch.manual_seed(232)
+        N = 2
+        V = 10
+        F = 5
+        verts1 = torch.randn(N, V, 3, requires_grad=True)
+        verts2 = verts1.detach().clone().requires_grad_(True)
+        faces = torch.randint(V, size=(N, F, 3))
+        meshes1 = Meshes(verts1, faces)
+        meshes2 = Meshes(verts2, faces)
+
+        kwargs = {"image_size": 24, "perspective_correct": True}
+        fn1 = functools.partial(rasterize_meshes, meshes1, **kwargs)
+        fn2 = functools.partial(rasterize_meshes_python, meshes2, **kwargs)
+        args = ()
+        self._compare_impls(fn1, fn2, args, args, verts1, verts2, compare_grads=True)
+
+    def test_cpp_vs_cuda_perspective_correct(self):
+        meshes = ico_sphere(2, device=torch.device("cpu"))
+        verts1, faces1 = meshes.get_mesh_verts_faces(0)
+        verts1.requires_grad = True
+        meshes1 = Meshes(verts=[verts1], faces=[faces1])
+        device = get_random_cuda_device()
+        verts2 = verts1.detach().to(device).requires_grad_(True)
+        faces2 = faces1.detach().clone().to(device)
+        meshes2 = Meshes(verts=[verts2], faces=[faces2])
+
+        kwargs = {"image_size": 64, "perspective_correct": True}
+        fn1 = functools.partial(rasterize_meshes, meshes1, **kwargs)
+        fn2 = functools.partial(rasterize_meshes, meshes2, bin_size=0, **kwargs)
+        args = ()
+        self._compare_impls(fn1, fn2, args, args, verts1, verts2, compare_grads=True)
+
+    def test_cuda_naive_vs_binned_perspective_correct(self):
+        device = get_random_cuda_device()
+        meshes = ico_sphere(2, device=device)
+        verts1, faces1 = meshes.get_mesh_verts_faces(0)
+        verts1.requires_grad = True
+        meshes1 = Meshes(verts=[verts1], faces=[faces1])
+        verts2 = verts1.detach().clone().requires_grad_(True)
+        faces2 = faces1.detach().clone()
+        meshes2 = Meshes(verts=[verts2], faces=[faces2])
+
+        kwargs = {"image_size": 64, "perspective_correct": True}
+        fn1 = functools.partial(rasterize_meshes, meshes1, bin_size=0, **kwargs)
+        fn2 = functools.partial(rasterize_meshes, meshes2, bin_size=8, **kwargs)
+        args = ()
+        self._compare_impls(fn1, fn2, args, args, verts1, verts2, compare_grads=True)
+
+    def test_bin_size_error(self):
+        meshes = ico_sphere(2)
+        image_size = 1024
+        bin_size = 16
+        with self.assertRaisesRegex(ValueError, "bin_size too small"):
+            rasterize_meshes(meshes, image_size, 0.0, 2, bin_size)
+
+    def _test_back_face_culling(self, rasterize_meshes_fn, device, bin_size):
+        # Square based pyramid mesh.
+        # fmt: off
+        verts = torch.tensor([
+            [-0.5, 0.0,  0.5],  # noqa: E241 E201 Front right
+            [ 0.5, 0.0,  0.5],  # noqa: E241 E201 Front left
+            [ 0.5, 0.0,  1.5],  # noqa: E241 E201 Back left
+            [-0.5, 0.0,  1.5],  # noqa: E241 E201 Back right
+            [ 0.0, 1.0,  1.0]   # noqa: E241 E201 Top point of pyramid
+        ], dtype=torch.float32, device=device)
+
+        faces = torch.tensor([
+            [2, 1, 0],  # noqa: E241 E201 Square base
+            [3, 2, 0],  # noqa: E241 E201 Square base
+            [1, 0, 4],  # noqa: E241 E201 Triangle on front
+            [2, 4, 3],  # noqa: E241 E201 Triangle on back
+            [3, 4, 0],  # noqa: E241 E201 Triangle on left side
+            [1, 4, 2]   # noqa: E241 E201 Triangle on right side
+        ], dtype=torch.int64, device=device)
+        # fmt: on
+        mesh = Meshes(verts=[verts], faces=[faces])
+        kwargs = {
+            "meshes": mesh,
+            "image_size": 10,
+            "faces_per_pixel": 2,
+            "blur_radius": 0.0,
+            "perspective_correct": False,
+            "cull_backfaces": False,
+        }
+        if bin_size != -1:
+            kwargs["bin_size"] = bin_size
+
+        # fmt: off
+        pix_to_face_frontface = torch.tensor([
+            [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1],  # noqa: E241 E201
+            [-1, -1, -1, -1,  2,  2, -1, -1, -1, -1],  # noqa: E241 E201
+            [-1, -1, -1, -1,  2,  2, -1, -1, -1, -1],  # noqa: E241 E201
+            [-1, -1, -1,  2,  2,  2,  2, -1, -1, -1],  # noqa: E241 E201
+            [-1, -1, -1,  2,  2,  2,  2, -1, -1, -1],  # noqa: E241 E201
+            [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1],  # noqa: E241 E201
+            [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1],  # noqa: E241 E201
+            [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1],  # noqa: E241 E201
+            [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1],  # noqa: E241 E201
+            [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]   # noqa: E241 E201
+        ], dtype=torch.int64, device=device)
+        pix_to_face_backface = torch.tensor([
+            [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1],  # noqa: E241 E201
+            [-1, -1, -1, -1,  3,  3, -1, -1, -1, -1],  # noqa: E241 E201
+            [-1, -1, -1, -1,  3,  3, -1, -1, -1, -1],  # noqa: E241 E201
+            [-1, -1, -1,  3,  3,  3,  3, -1, -1, -1],  # noqa: E241 E201
+            [-1, -1, -1,  3,  3,  3,  3, -1, -1, -1],  # noqa: E241 E201
+            [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1],  # noqa: E241 E201
+            [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1],  # noqa: E241 E201
+            [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1],  # noqa: E241 E201
+            [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1],  # noqa: E241 E201
+            [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]   # noqa: E241 E201
+        ], dtype=torch.int64, device=device)
+        # fmt: on
+
+        pix_to_face_padded = -(torch.ones_like(pix_to_face_frontface))
+        # Run with and without culling
+        # Without culling, for k=0, the front face (i.e. face 2) is
+        # rasterized and for k=1, the back face (i.e. face 3) is
+        # rasterized.
+        idx_f, zbuf_f, bary_f, dists_f = rasterize_meshes_fn(**kwargs)
+        self.assertTrue(torch.all(idx_f[..., 0].squeeze() == pix_to_face_frontface))
+        self.assertTrue(torch.all(idx_f[..., 1].squeeze() == pix_to_face_backface))
+
+        # With culling, for k=0, the front face (i.e. face 2) is
+        # rasterized and for k=1, there are no faces rasterized
+        kwargs["cull_backfaces"] = True
+        idx_t, zbuf_t, bary_t, dists_t = rasterize_meshes_fn(**kwargs)
+        self.assertTrue(torch.all(idx_t[..., 0].squeeze() == pix_to_face_frontface))
+        self.assertTrue(torch.all(idx_t[..., 1].squeeze() == pix_to_face_padded))
+
+    def _compare_impls(
+        self,
+        fn1,
+        fn2,
+        args1,
+        args2,
+        grad_var1=None,
+        grad_var2=None,
+        compare_grads=False,
+    ):
+        idx1, zbuf1, bary1, dist1 = fn1(*args1)
+        idx2, zbuf2, bary2, dist2 = fn2(*args2)
+        self.assertTrue((idx1.cpu() == idx2.cpu()).all().item())
+        self.assertClose(zbuf1.cpu(), zbuf2.cpu(), rtol=1e-4)
+        self.assertClose(dist1.cpu(), dist2.cpu(), rtol=6e-3)
+        self.assertClose(bary1.cpu(), bary2.cpu(), rtol=1e-3)
+        if not compare_grads:
+            return
+
+        # Compare gradients.
+        torch.manual_seed(231)
+        grad_zbuf = torch.randn_like(zbuf1)
+        grad_dist = torch.randn_like(dist1)
+        grad_bary = torch.randn_like(bary1)
+        loss1 = (
+            (dist1 * grad_dist).sum()
+            + (zbuf1 * grad_zbuf).sum()
+            + (bary1 * grad_bary).sum()
+        )
+
+        # avoid gradient error if rasterize_meshes_python() culls all triangles
+        loss1 += grad_var1.sum() * 0.0
+
+        loss1.backward()
+        grad_verts1 = grad_var1.grad.data.clone().cpu()
+
+        grad_zbuf = grad_zbuf.to(zbuf2)
+        grad_dist = grad_dist.to(dist2)
+        grad_bary = grad_bary.to(bary2)
+        loss2 = (
+            (dist2 * grad_dist).sum()
+            + (zbuf2 * grad_zbuf).sum()
+            + (bary2 * grad_bary).sum()
+        )
+
+        # avoid gradient error if rasterize_meshes_python() culls all triangles
+        loss2 += grad_var2.sum() * 0.0
+
+        grad_var1.grad.data.zero_()
+        loss2.backward()
+        grad_verts2 = grad_var2.grad.data.clone().cpu()
+        self.assertClose(grad_verts1, grad_verts2, rtol=2e-3)
+
+    def _test_perspective_correct(self, rasterize_meshes_fn, device, bin_size=None):
+        # fmt: off
+        verts = torch.tensor([
+            [-0.4, -0.4, 10],  # noqa: E241, E201
+            [ 0.4, -0.4, 10],  # noqa: E241, E201
+            [ 0.0,  0.4, 20],  # noqa: E241, E201
+        ], dtype=torch.float32, device=device)
+        # fmt: on
+        faces = torch.tensor([[0, 1, 2]], device=device)
+        meshes = Meshes(verts=[verts], faces=[faces])
+        kwargs = {
+            "meshes": meshes,
+            "image_size": 11,
+            "faces_per_pixel": 1,
+            "blur_radius": 0.2,
+            "perspective_correct": False,
+        }
+        if bin_size != -1:
+            kwargs["bin_size"] = bin_size
+
+        # Run with and without perspective correction
+        idx_f, zbuf_f, bary_f, dists_f = rasterize_meshes_fn(**kwargs)
+
+        kwargs["perspective_correct"] = True
+        idx_t, zbuf_t, bary_t, dists_t = rasterize_meshes_fn(**kwargs)
+
+        # Expected output tensors in the format with axes +X left, +Y up, +Z in
+        # idx and dists should be the same with or without perspecitve correction
+        # fmt: off
+        idx_expected = torch.tensor([
+            [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1],  # noqa: E241, E201
+            [-1, -1, -1, -1,  0,  0,  0, -1, -1, -1, -1],  # noqa: E241, E201
+            [-1, -1, -1,  0,  0,  0,  0,  0, -1, -1, -1],  # noqa: E241, E201
+            [-1, -1, -1,  0,  0,  0,  0,  0, -1, -1, -1],  # noqa: E241, E201
+            [-1, -1,  0,  0,  0,  0,  0,  0,  0, -1, -1],  # noqa: E241, E201
+            [-1, -1,  0,  0,  0,  0,  0,  0,  0, -1, -1],  # noqa: E241, E201
+            [-1,  0,  0,  0,  0,  0,  0,  0,  0,  0, -1],  # noqa: E241, E201
+            [-1,  0,  0,  0,  0,  0,  0,  0,  0,  0, -1],  # noqa: E241, E201
+            [-1,  0,  0,  0,  0,  0,  0,  0,  0,  0, -1],  # noqa: E241, E201
+            [-1, -1,  0,  0,  0,  0,  0,  0,  0, -1, -1],  # noqa: E241, E201
+            [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1]   # noqa: E241, E201
+        ], dtype=torch.int64, device=device).view(1, 11, 11, 1)
+
+        dists_expected = torch.tensor([
+            [-1.,     -1.,     -1.,     -1.,    -1.,     -1.,     -1.,     -1.,     -1.,   -1., -1.],  # noqa: E241, E201, B950
+            [-1.,     -1.,     -1.,     -1., 0.1402,  0.1071,  0.1402,     -1.,     -1.,   -1., -1.],  # noqa: E241, E201, B950
+            [-1.,     -1., -    1., 0.1523,  0.0542,  0.0212,  0.0542,  0.1523,     -1.,   -1., -1.],  # noqa: E241, E201, B950
+            [-1.,     -1.,     -1., 0.0955,  0.0214, -0.0003,  0.0214,  0.0955,     -1.,   -1., -1.],  # noqa: E241, E201, B950
+            [-1.,     -1., 0.1523,  0.0518,  0.0042, -0.0095,  0.0042,  0.0518, 0.1523,    -1., -1.],  # noqa: E241, E201, B950
+            [-1.,     -1., 0.0955,  0.0214, -0.0003,  -0.032, -0.0003,  0.0214, 0.0955,    -1., -1.],  # noqa: E241, E201, B950
+            [-1., 0.1523,  0.0518,  0.0042, -0.0095, -0.0476, -0.0095,  0.0042, 0.0518, 0.1523, -1.],  # noqa: E241, E201, B950
+            [-1., 0.1084,  0.0225, -0.0003, -0.0013, -0.0013, -0.0013, -0.0003, 0.0225, 0.1084, -1.],  # noqa: E241, E201, B950
+            [-1., 0.1283,  0.0423,  0.0212,  0.0212,  0.0212,  0.0212,  0.0212, 0.0423, 0.1283, -1.],  # noqa: E241, E201, B950
+            [-1.,     -1., 0.1283,  0.1071,  0.1071,  0.1071,  0.1071,  0.1071, 0.1283,    -1., -1.],  # noqa: E241, E201, B950
+            [-1.,     -1.,     -1.,     -1.,     -1.,     -1.,     -1.,     -1.,    -1.,   -1., -1.]   # noqa: E241, E201, B950
+        ], dtype=torch.float32, device=device).view(1, 11, 11, 1)
+
+        # zbuf and barycentric will be different with perspective correction
+        zbuf_f_expected = torch.tensor([
+            [-1.,      -1.,     -1.,     -1.,     -1.,     -1.,      -1.,    -1.,     -1.,     -1., -1.],  # noqa: E241, E201, B950
+            [-1.,      -1.,     -1.,     -1., 24.0909, 24.0909, 24.0909,     -1.,     -1.,     -1., -1.],  # noqa: E241, E201, B950
+            [-1.,      -1.,     -1., 21.8182, 21.8182, 21.8182, 21.8182, 21.8182,     -1.,     -1., -1.],  # noqa: E241, E201, B950
+            [-1.,      -1.,     -1., 19.5455, 19.5455, 19.5455, 19.5455, 19.5455,     -1.,     -1., -1.],  # noqa: E241, E201, B950
+            [-1.,      -1., 17.2727, 17.2727, 17.2727, 17.2727, 17.2727, 17.2727, 17.2727,     -1., -1.],  # noqa: E241, E201, B950
+            [-1.,      -1.,      15.,     15.,     15.,     15.,     15.,    15.,     15.,     -1., -1.],  # noqa: E241, E201, B950
+            [-1., 12.7273,  12.7273, 12.7273, 12.7273, 12.7273, 12.7273, 12.7273, 12.7273, 12.7273, -1.],  # noqa: E241, E201, B950
+            [-1., 10.4545,  10.4545, 10.4545, 10.4545, 10.4545, 10.4545, 10.4545, 10.4545, 10.4545, -1.],  # noqa: E241, E201, B950
+            [-1.,  8.1818,   8.1818,  8.1818,  8.1818,  8.1818,  8.1818,  8.1818,  8.1818,  8.1818, -1.],  # noqa: E241, E201, B950
+            [-1.,      -1.,  5.9091,  5.9091,  5.9091,  5.9091,  5.9091,  5.9091,  5.9091,     -1., -1.],  # noqa: E241, E201, B950
+            [-1.,       -1.,     -1.,     -1.,     -1.,     -1.,     -1.,     -1.,     -1.,    -1., -1.],  # noqa: E241, E201, B950
+        ], dtype=torch.float32, device=device).view(1, 11, 11, 1)
+
+        zbuf_t_expected = torch.tensor([
+            [-1.,     -1.,     -1.,     -1.,     -1.,     -1.,     -1.,     -1.,     -1.,     -1., -1.],  # noqa: E241, E201, B950
+            [-1.,     -1.,     -1.,     -1., 33.8461, 33.8462, 33.8462,     -1.,     -1.,     -1., -1.],  # noqa: E241, E201, B950
+            [-1.,     -1.,     -1., 24.4444, 24.4444, 24.4444, 24.4444, 24.4444,     -1.,     -1., -1.],  # noqa: E241, E201, B950
+            [-1.,     -1.,     -1., 19.1304, 19.1304, 19.1304, 19.1304, 19.1304,     -1.,     -1., -1.],  # noqa: E241, E201, B950
+            [-1.,     -1., 15.7143, 15.7143, 15.7143, 15.7143, 15.7143, 15.7143, 15.7143,     -1., -1.],  # noqa: E241, E201, B950
+            [-1.,     -1., 13.3333, 13.3333, 13.3333, 13.3333, 13.3333, 13.3333, 13.3333,     -1., -1.],  # noqa: E241, E201, B950
+            [-1., 11.5789, 11.5789, 11.5789, 11.5789, 11.5789, 11.5789, 11.5789, 11.5789, 11.5789, -1.],  # noqa: E241, E201, B950
+            [-1., 10.2326, 10.2326, 10.2326, 10.2326, 10.2326, 10.2326, 10.2326, 10.2326, 10.2326, -1.],  # noqa: E241, E201, B950
+            [-1.,  9.1667,  9.1667,  9.1667,  9.1667,  9.1667,  9.1667,  9.1667,  9.1667,  9.1667, -1.],  # noqa: E241, E201, B950
+            [-1.,      -1., 8.3019,  8.3019,  8.3019,  8.3019,  8.3019,  8.3019,  8.3019,     -1., -1.],  # noqa: E241, E201, B950
+            [-1.,      -1.,     -1.,    -1.,     -1.,     -1.,     -1.,     -1.,     -1.,     -1., -1.]   # noqa: E241, E201, B950
+        ], dtype=torch.float32, device=device).view(1, 11, 11, 1)
+        # fmt: on
+
+        self.assertTrue(torch.all(idx_f == idx_expected).item())
+        self.assertTrue(torch.all(idx_t == idx_expected).item())
+        dists_t_max_diff = (dists_t - dists_expected).abs().max().item()
+        dists_f_max_diff = (dists_f - dists_expected).abs().max().item()
+        self.assertLess(dists_t_max_diff, 1e-4)
+        self.assertLess(dists_f_max_diff, 1e-4)
+        zbuf_f_max_diff = (zbuf_f - zbuf_f_expected).abs().max().item()
+        zbuf_t_max_diff = (zbuf_t - zbuf_t_expected).abs().max().item()
+        self.assertLess(zbuf_f_max_diff, 1e-4)
+        self.assertLess(zbuf_t_max_diff, 1e-4)
+
+        # Check barycentrics by using them to re-compute zbuf
+        z0 = verts[0, 2]
+        z1 = verts[1, 2]
+        z2 = verts[2, 2]
+        w0_f, w1_f, w2_f = bary_f.unbind(dim=4)
+        w0_t, w1_t, w2_t = bary_t.unbind(dim=4)
+        zbuf_f_bary = w0_f * z0 + w1_f * z1 + w2_f * z2
+        zbuf_t_bary = w0_t * z0 + w1_t * z1 + w2_t * z2
+        mask = idx_expected != -1
+        zbuf_f_bary_diff = (zbuf_f_bary[mask] - zbuf_f_expected[mask]).abs().max()
+        zbuf_t_bary_diff = (zbuf_t_bary[mask] - zbuf_t_expected[mask]).abs().max()
+        self.assertLess(zbuf_f_bary_diff, 1e-4)
+        self.assertLess(zbuf_t_bary_diff, 1e-4)
+
+    def _test_barycentric_clipping(self, rasterize_meshes_fn, device, bin_size=None):
+        # fmt: off
+        verts = torch.tensor([
+            [-0.4, -0.4, 10],  # noqa: E241, E201
+            [ 0.4, -0.4, 10],  # noqa: E241, E201
+            [ 0.0,  0.4, 20],  # noqa: E241, E201
+        ], dtype=torch.float32, device=device)
+        # fmt: on
+        faces = torch.tensor([[0, 1, 2]], device=device)
+        meshes = Meshes(verts=[verts], faces=[faces])
+        kwargs = {
+            "meshes": meshes,
+            "image_size": 5,
+            "faces_per_pixel": 1,
+            "blur_radius": 0.2,
+            "perspective_correct": False,
+            "clip_barycentric_coords": False,  # Initially set this to false
+        }
+        if bin_size != -1:
+            kwargs["bin_size"] = bin_size
+
+        # Run with and without perspective correction
+        idx_f, zbuf_f, bary_f, dists_f = rasterize_meshes_fn(**kwargs)
+
+        # fmt: off
+        expected_bary = torch.tensor([
+            [
+                [-1.0000, -1.0000, -1.0000],  # noqa: E241, E201
+                [-1.0000, -1.0000, -1.0000],  # noqa: E241, E201
+                [-0.2500, -0.2500,  1.5000],  # noqa: E241, E201
+                [-1.0000, -1.0000, -1.0000],  # noqa: E241, E201
+                [-1.0000, -1.0000, -1.0000]   # noqa: E241, E201
+            ],
+            [
+                [-1.0000, -1.0000, -1.0000],  # noqa: E241, E201
+                [-0.5000,  0.5000,  1.0000],  # noqa: E241, E201
+                [-0.0000, -0.0000,  1.0000],  # noqa: E241, E201
+                [ 0.5000, -0.5000,  1.0000],  # noqa: E241, E201
+                [-1.0000, -1.0000, -1.0000]   # noqa: E241, E201
+            ],
+            [
+                [-1.0000, -1.0000, -1.0000],  # noqa: E241, E201
+                [-0.2500,  0.7500,  0.5000],  # noqa: E241, E201
+                [ 0.2500,  0.2500,  0.5000],  # noqa: E241, E201
+                [ 0.7500, -0.2500,  0.5000],  # noqa: E241, E201
+                [-1.0000, -1.0000, -1.0000]   # noqa: E241, E201
+            ],
+            [
+                [-0.5000,  1.5000, -0.0000],  # noqa: E241, E201
+                [-0.0000,  1.0000, -0.0000],  # noqa: E241, E201
+                [ 0.5000,  0.5000, -0.0000],  # noqa: E241, E201
+                [ 1.0000, -0.0000, -0.0000],  # noqa: E241, E201
+                [ 1.5000, -0.5000,  0.0000]   # noqa: E241, E201
+            ],
+            [
+                [-1.0000, -1.0000, -1.0000],  # noqa: E241, E201
+                [ 0.2500,  1.2500, -0.5000],  # noqa: E241, E201
+                [ 0.7500,  0.7500, -0.5000],  # noqa: E241, E201
+                [ 1.2500,  0.2500, -0.5000],  # noqa: E241, E201
+                [-1.0000, -1.0000, -1.0000]   # noqa: E241, E201
+            ]
+        ], dtype=torch.float32, device=device).view(1, 5, 5, 1, 3)
+        # fmt: on
+
+        self.assertClose(expected_bary, bary_f, atol=1e-4)
+
+        # calculate the expected clipped barycentrics and zbuf
+        expected_bary_clipped = _clip_barycentric_coordinates(expected_bary)
+        expected_z_clipped = _interpolate_zbuf(idx_f, expected_bary_clipped, meshes)
+
+        kwargs["clip_barycentric_coords"] = True
+        idx_t, zbuf_t, bary_t, dists_t = rasterize_meshes_fn(**kwargs)
+
+        self.assertClose(expected_bary_clipped, bary_t, atol=1e-4)
+        self.assertClose(expected_z_clipped, zbuf_t, atol=1e-4)
+
+    def _test_behind_camera(self, rasterize_meshes_fn, device, bin_size=None):
+        """
+        All verts are behind the camera so nothing should get rasterized.
+        """
+        N = 1
+        # fmt: off
+        verts = torch.tensor(
+            [
+                [ -0.5, 0.0, -0.1],  # noqa: E241, E201
+                [  0.0, 0.6, -0.1],  # noqa: E241, E201
+                [  0.5, 0.0, -0.1],  # noqa: E241, E201
+                [-0.25, 0.0, -0.9],  # noqa: E241, E201
+                [ 0.25, 0.5, -0.9],  # noqa: E241, E201
+                [ 0.75, 0.0, -0.9],  # noqa: E241, E201
+                [ -0.4, 0.0, -0.5],  # noqa: E241, E201
+                [  0.6, 0.6, -0.5],  # noqa: E241, E201
+                [  0.8, 0.0, -0.5],  # noqa: E241, E201
+                [ -0.2, 0.0, -0.5],  # noqa: E241, E201
+                [  0.3, 0.6, -0.5],  # noqa: E241, E201
+                [  0.4, 0.0, -0.5],  # noqa: E241, E201
+            ],
+            dtype=torch.float32,
+            device=device,
+        )
+        # fmt: on
+        faces = torch.tensor(
+            [[1, 0, 2], [4, 3, 5], [7, 6, 8], [10, 9, 11]],
+            dtype=torch.int64,
+            device=device,
+        )
+        meshes = Meshes(verts=[verts], faces=[faces])
+        image_size = 16
+        faces_per_pixel = 1
+        radius = 0.2
+        idx_expected = torch.full(
+            (N, image_size, image_size, faces_per_pixel),
+            fill_value=-1,
+            dtype=torch.int64,
+            device=device,
+        )
+        bary_expected = torch.full(
+            (N, image_size, image_size, faces_per_pixel, 3),
+            fill_value=-1,
+            dtype=torch.float32,
+            device=device,
+        )
+        zbuf_expected = torch.full(
+            (N, image_size, image_size, faces_per_pixel),
+            fill_value=-1,
+            dtype=torch.float32,
+            device=device,
+        )
+        dists_expected = zbuf_expected.clone()
+        if bin_size == -1:
+            # naive python version with no binning
+            idx, zbuf, bary, dists = rasterize_meshes_fn(
+                meshes, image_size, radius, faces_per_pixel
+            )
+        else:
+            idx, zbuf, bary, dists = rasterize_meshes_fn(
+                meshes, image_size, radius, faces_per_pixel, bin_size
+            )
+        idx_same = (idx == idx_expected).all().item()
+        zbuf_same = (zbuf == zbuf_expected).all().item()
+        self.assertTrue(idx_same)
+        self.assertTrue(zbuf_same)
+        self.assertClose(bary, bary_expected)
+        self.assertClose(dists, dists_expected)
+
+    def _simple_triangle_raster(self, raster_fn, device, bin_size=None):
+        image_size = 10
+
+        # Mesh with a single non-symmetrical face - this will help
+        # check that the XY directions are correctly oriented.
+        verts0 = torch.tensor(
+            [[-0.3, -0.4, 0.1], [0.0, 0.6, 0.1], [0.9, -0.4, 0.1]],
+            dtype=torch.float32,
+            device=device,
+        )
+        faces0 = torch.tensor([[1, 0, 2]], dtype=torch.int64, device=device)
+
+        # Mesh with two overlapping faces.
+        # fmt: off
+        verts1 = torch.tensor(
+            [
+                [-0.9, -0.2, 0.1],  # noqa: E241, E201
+                [ 0.0,  0.6, 0.1],  # noqa: E241, E201
+                [ 0.7, -0.4, 0.1],  # noqa: E241, E201
+                [-0.7,  0.4, 0.5],  # noqa: E241, E201
+                [ 0.0, -0.6, 0.5],  # noqa: E241, E201
+                [ 0.7,  0.4, 0.5],  # noqa: E241, E201
+            ],
+            dtype=torch.float32,
+            device=device,
+        )
+        # fmt on
+        faces1 = torch.tensor(
+            [[1, 0, 2], [3, 4, 5]], dtype=torch.int64, device=device
+        )
+
+        # Expected output tensors in the format with axes +X left, +Y up, +Z in
+        # k = 0, closest point.
+        # fmt off
+        expected_p2face_k0 = torch.tensor(
+            [
+                [
+                    [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1],  # noqa: E241, E201
+                    [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1],  # noqa: E241, E201
+                    [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1],  # noqa: E241, E201
+                    [-1, -1, -1, -1,  0, -1, -1, -1, -1, -1],  # noqa: E241, E201
+                    [-1, -1, -1,  0,  0,  0, -1, -1, -1, -1],  # noqa: E241, E201
+                    [-1, -1,  0,  0,  0,  0, -1, -1, -1, -1],  # noqa: E241, E201
+                    [-1,  0,  0,  0,  0,  0, -1, -1, -1, -1],  # noqa: E241, E201
+                    [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1],  # noqa: E241, E201
+                    [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1],  # noqa: E241, E201
+                    [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1],  # noqa: E241, E201
+                ],
+                [
+                    [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1],  # noqa: E241, E201
+                    [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1],  # noqa: E241, E201
+                    [-1, -1, -1, -1, -1,  1, -1, -1, -1, -1],  # noqa: E241, E201
+                    [-1, -1,  2,  2,  1,  1,  1,  2, -1, -1],  # noqa: E241, E201
+                    [-1, -1, -1,  1,  1,  1,  1,  1, -1, -1],  # noqa: E241, E201
+                    [-1, -1, -1,  1,  1,  1,  1,  1,  1, -1],  # noqa: E241, E201
+                    [-1, -1,  1,  1,  1,  2, -1, -1, -1, -1],  # noqa: E241, E201
+                    [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1],  # noqa: E241, E201
+                    [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1],  # noqa: E241, E201
+                    [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1],  # noqa: E241, E201
+                ],
+            ],
+            dtype=torch.int64,
+            device=device,
+        )
+        expected_zbuf_k0 = torch.tensor(
+            [
+                [
+                    [-1,  -1,  -1,  -1,  -1,  -1, -1, -1, -1, -1],  # noqa: E241, E201
+                    [-1,  -1,  -1,  -1,  -1,  -1, -1, -1, -1, -1],  # noqa: E241, E201
+                    [-1,  -1,  -1,  -1,  -1,  -1, -1, -1, -1, -1],  # noqa: E241, E201
+                    [-1,  -1,  -1,  -1, 0.1,  -1, -1, -1, -1, -1],  # noqa: E241, E201
+                    [-1,  -1,  -1, 0.1, 0.1, 0.1, -1, -1, -1, -1],  # noqa: E241, E201
+                    [-1,  -1, 0.1, 0.1, 0.1, 0.1, -1, -1, -1, -1],  # noqa: E241, E201
+                    [-1, 0.1, 0.1, 0.1, 0.1, 0.1, -1, -1, -1, -1],  # noqa: E241, E201
+                    [-1,  -1,  -1,  -1,  -1,  -1, -1, -1, -1, -1],  # noqa: E241, E201
+                    [-1,  -1,  -1,  -1,  -1,  -1, -1, -1, -1, -1],  # noqa: E241, E201
+                    [-1,  -1,  -1,  -1,  -1,  -1, -1, -1, -1, -1]   # noqa: E241, E201
+                ],
+                [
+                    [-1, -1,  -1,  -1,  -1, -1,   -1,  -1,  -1, -1],  # noqa: E241, E201
+                    [-1, -1,  -1,  -1,  -1, -1,   -1,  -1,  -1, -1],  # noqa: E241, E201
+                    [-1, -1,  -1,  -1,  -1, 0.1,  -1,  -1,  -1, -1],  # noqa: E241, E201
+                    [-1, -1, 0.5, 0.5, 0.1, 0.1, 0.1, 0.5,  -1, -1],  # noqa: E241, E201
+                    [-1, -1,  -1, 0.1, 0.1, 0.1, 0.1, 0.1,  -1, -1],  # noqa: E241, E201
+                    [-1, -1,  -1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, -1],  # noqa: E241, E201
+                    [-1, -1, 0.1, 0.1, 0.1, 0.5,  -1,  -1,  -1, -1],  # noqa: E241, E201
+                    [-1, -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1, -1],  # noqa: E241, E201
+                    [-1, -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1, -1],  # noqa: E241, E201
+                    [-1, -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1, -1]   # noqa: E241, E201
+                ]
+            ],
+            device=device,
+        )
+        # fmt: on
+
+        meshes = Meshes(verts=[verts0, verts1], faces=[faces0, faces1])
+
+        # k = 1, second closest point.
+        expected_p2face_k1 = expected_p2face_k0.clone()
+        expected_p2face_k1[0, :] = torch.ones_like(expected_p2face_k1[0, :]) * -1
+
+        # fmt: off
+        expected_p2face_k1[1, :] = torch.tensor(
+            [
+                [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1],  # noqa: E241, E201
+                [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1],  # noqa: E241, E201
+                [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1],  # noqa: E241, E201
+                [-1, -1, -1, -1,  2,  2,  2, -1, -1, -1],  # noqa: E241, E201
+                [-1, -1, -1,  2,  2,  2,  2, -1, -1, -1],  # noqa: E241, E201
+                [-1, -1, -1,  2,  2,  2,  2, -1, -1, -1],  # noqa: E241, E201
+                [-1, -1, -1, -1,  2, -1, -1, -1, -1, -1],  # noqa: E241, E201
+                [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1],  # noqa: E241, E201
+                [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1],  # noqa: E241, E201
+                [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]   # noqa: E241, E201
+            ],
+            dtype=torch.int64,
+            device=device,
+        )
+        expected_zbuf_k1 = expected_zbuf_k0.clone()
+        expected_zbuf_k1[0, :] = torch.ones_like(expected_zbuf_k1[0, :]) * -1
+        expected_zbuf_k1[1, :] = torch.tensor(
+            [
+                [-1., -1., -1.,  -1.,  -1.,  -1., -1., -1., -1., -1.],  # noqa: E241, E201
+                [-1., -1., -1.,  -1.,  -1.,  -1., -1., -1., -1., -1.],  # noqa: E241, E201
+                [-1., -1., -1.,  -1.,  -1.,  -1., -1., -1., -1., -1.],  # noqa: E241, E201
+                [-1., -1., -1.,  -1.,  0.5, 0.5,  0.5, -1., -1., -1.],  # noqa: E241, E201
+                [-1., -1., -1.,  0.5,  0.5, 0.5,  0.5, -1., -1., -1.],  # noqa: E241, E201
+                [-1., -1., -1.,  0.5,  0.5, 0.5,  0.5, -1., -1., -1.],  # noqa: E241, E201
+                [-1., -1., -1.,  -1.,  0.5,  -1., -1., -1., -1., -1.],  # noqa: E241, E201
+                [-1., -1., -1.,  -1.,  -1.,  -1., -1., -1., -1., -1.],  # noqa: E241, E201
+                [-1., -1., -1.,  -1.,  -1.,  -1., -1., -1., -1., -1.],  # noqa: E241, E201
+                [-1., -1., -1.,  -1.,  -1.,  -1., -1., -1., -1., -1.]   # noqa: E241, E201
+            ],
+            dtype=torch.float32,
+            device=device,
+        )
+        # fmt: on
+
+        #  Coordinate conventions +Y up, +Z in, +X left
+        if bin_size == -1:
+            # simple python, no bin_size
+            p2face, zbuf, bary, pix_dists = raster_fn(meshes, image_size, 0.0, 2)
+        else:
+            p2face, zbuf, bary, pix_dists = raster_fn(
+                meshes, image_size, 0.0, 2, bin_size
+            )
+
+        self.assertClose(p2face[..., 0], expected_p2face_k0)
+        self.assertClose(zbuf[..., 0], expected_zbuf_k0)
+        self.assertClose(p2face[..., 1], expected_p2face_k1)
+        self.assertClose(zbuf[..., 1], expected_zbuf_k1)
+
+    def _simple_blurry_raster(self, raster_fn, device, bin_size=None):
+        """
+        Check that pix_to_face, dist and zbuf values are invariant to the
+        ordering of faces.
+        """
+        image_size = 10
+        blur_radius = 0.12**2
+        faces_per_pixel = 1
+
+        # fmt: off
+        verts = torch.tensor(
+            [
+                [ -0.3, 0.0,  0.1],  # noqa: E241, E201
+                [  0.0, 0.6,  0.1],  # noqa: E241, E201
+                [  0.8, 0.0,  0.1],  # noqa: E241, E201
+                [-0.25, 0.0,  0.9],  # noqa: E241, E201
+                [0.25,  0.5,  0.9],  # noqa: E241, E201
+                [0.75,  0.0,  0.9],  # noqa: E241, E201
+                [-0.4,  0.0,  0.5],  # noqa: E241, E201
+                [ 0.6,  0.6,  0.5],  # noqa: E241, E201
+                [ 0.8,  0.0,  0.5],  # noqa: E241, E201
+                [-0.2,  0.0, -0.5],  # noqa: E241, E201  face behind the camera
+                [ 0.3,  0.6, -0.5],  # noqa: E241, E201
+                [ 0.4,  0.0, -0.5],  # noqa: E241, E201
+            ],
+            dtype=torch.float32,
+            device=device,
+        )
+        # Face with index 0 is non symmetric about the X and Y axis to
+        # test that the positive Y and X directions are correct in the output.
+        faces_packed = torch.tensor(
+            [[1, 0, 2], [4, 3, 5], [7, 6, 8], [10, 9, 11]],
+            dtype=torch.int64,
+            device=device,
+        )
+        expected_p2f = torch.tensor(
+            [
+                [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1],  # noqa: E241, E201
+                [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1],  # noqa: E241, E201
+                [-1,  2,  2,  0,  0,  0, -1, -1, -1, -1],  # noqa: E241, E201
+                [-1,  2,  0,  0,  0,  0, -1, -1, -1, -1],  # noqa: E241, E201
+                [-1,  0,  0,  0,  0,  0,  0, -1, -1, -1],  # noqa: E241, E201
+                [-1,  0,  0,  0,  0,  0,  0, -1, -1, -1],  # noqa: E241, E201
+                [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1],  # noqa: E241, E201
+                [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1],  # noqa: E241, E201
+                [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1],  # noqa: E241, E201
+                [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1],  # noqa: E241, E201
+            ],
+            dtype=torch.int64,
+            device=device,
+        )
+        expected_zbuf = torch.tensor(
+            [
+                [-1.,   -1.,  -1.,  -1.,  -1.,  -1., -1., -1., -1., -1.],  # noqa: E241, E201
+                [-1.,   -1.,  -1.,  -1.,  -1.,  -1., -1., -1., -1., -1.],  # noqa: E241, E201
+                [-1.,  0.5,  0.5,  0.1,  0.1,  0.1,  -1., -1., -1., -1.],  # noqa: E241, E201
+                [-1.,  0.5,  0.1,  0.1,  0.1,  0.1,  -1., -1., -1., -1.],  # noqa: E241, E201
+                [-1.,  0.1,  0.1,  0.1,  0.1,  0.1,  0.1, -1., -1., -1.],  # noqa: E241, E201
+                [-1.,  0.1,  0.1,  0.1,  0.1,  0.1,  0.1, -1., -1., -1.],  # noqa: E241, E201
+                [-1.,   -1.,  -1.,  -1.,  -1.,  -1., -1., -1., -1., -1.],  # noqa: E241, E201
+                [-1.,   -1.,  -1.,  -1.,  -1.,  -1., -1., -1., -1., -1.],  # noqa: E241, E201
+                [-1.,   -1.,  -1.,  -1.,  -1.,  -1., -1., -1., -1., -1.],  # noqa: E241, E201
+                [-1.,   -1.,  -1.,  -1.,  -1.,  -1., -1., -1., -1., -1.]   # noqa: E241, E201
+            ],
+            dtype=torch.float32,
+            device=device,
+        )
+        # fmt: on
+
+        for i, order in enumerate([[0, 1, 2], [1, 2, 0], [2, 0, 1]]):
+            faces = faces_packed[order]  # rearrange order of faces.
+            mesh = Meshes(verts=[verts], faces=[faces])
+            if bin_size == -1:
+                # simple python, no bin size arg
+                pix_to_face, zbuf, bary_coords, dists = raster_fn(
+                    mesh, image_size, blur_radius, faces_per_pixel
+                )
+            else:
+                pix_to_face, zbuf, bary_coords, dists = raster_fn(
+                    mesh, image_size, blur_radius, faces_per_pixel, bin_size
+                )
+            if i == 0:
+                expected_dists = dists
+            p2f = expected_p2f.clone()
+            p2f[expected_p2f == 0] = order.index(0)
+            p2f[expected_p2f == 1] = order.index(1)
+            p2f[expected_p2f == 2] = order.index(2)
+            self.assertClose(pix_to_face.squeeze(), p2f)
+            self.assertClose(zbuf.squeeze(), expected_zbuf, rtol=1e-5)
+            self.assertClose(dists, expected_dists)
+
+    def _test_coarse_rasterize(self, device):
+        image_size = (16, 16)
+        # No blurring. This test checks that the XY directions are
+        # correctly oriented.
+        blur_radius = 0.0
+        bin_size = 8
+        max_faces_per_bin = 3
+
+        # fmt: off
+        verts = torch.tensor(
+            [
+                [-0.5,   0.1,  0.1],  # noqa: E241, E201
+                [-0.3,   0.6,  0.1],  # noqa: E241, E201
+                [-0.1,   0.1,  0.1],  # noqa: E241, E201
+                [-0.3,  -0.1,  0.4],  # noqa: E241, E201
+                [ 0.3,   0.5,  0.4],  # noqa: E241, E201
+                [0.75,  -0.1,  0.4],  # noqa: E241, E201
+                [ 0.2,  -0.3,  0.9],  # noqa: E241, E201
+                [ 0.3,  -0.7,  0.9],  # noqa: E241, E201
+                [ 0.6,  -0.3,  0.9],  # noqa: E241, E201
+                [-0.4,   0.0, -1.5],  # noqa: E241, E201
+                [ 0.6,   0.6, -1.5],  # noqa: E241, E201
+                [ 0.8,   0.0, -1.5],  # noqa: E241, E201
+            ],
+            device=device,
+        )
+        # Expected faces using axes convention +Y down, + X right, +Z in
+        # Non symmetrical triangles i.e face 0 and 3 are in one bin only
+        faces = torch.tensor(
+            [
+                [ 1, 0,  2],  # noqa: E241, E201  bin 01 only
+                [ 4, 3,  5],  # noqa: E241, E201  all bins
+                [ 7, 6,  8],  # noqa: E241, E201  bin 10 only
+                [10, 9, 11],  # noqa: E241, E201  negative z, should not appear.
+            ],
+            dtype=torch.int64,
+            device=device,
+        )
+        # fmt: on
+
+        meshes = Meshes(verts=[verts], faces=[faces])
+        faces_verts = verts[faces]
+        num_faces_per_mesh = meshes.num_faces_per_mesh()
+        mesh_to_face_first_idx = meshes.mesh_to_faces_packed_first_idx()
+
+        # Expected faces using axes convention +Y down, + X right, + Z in
+        bin_faces_expected = (
+            torch.ones((1, 2, 2, max_faces_per_bin), dtype=torch.int32, device=device)
+            * -1
+        )
+        bin_faces_expected[0, 1, 1, 0] = torch.tensor([1])
+        bin_faces_expected[0, 0, 1, 0:2] = torch.tensor([1, 2])
+        bin_faces_expected[0, 1, 0, 0:2] = torch.tensor([0, 1])
+        bin_faces_expected[0, 0, 0, 0] = torch.tensor([1])
+
+        # +Y up, +X left, +Z in
+        bin_faces = _C._rasterize_meshes_coarse(
+            faces_verts,
+            mesh_to_face_first_idx,
+            num_faces_per_mesh,
+            image_size,
+            blur_radius,
+            bin_size,
+            max_faces_per_bin,
+        )
+
+        bin_faces_same = (bin_faces.squeeze() == bin_faces_expected).all()
+        self.assertTrue(bin_faces_same.item() == 1)
+
+    def test_order_of_ties(self):
+        # Tied faces are rasterized in index order
+        # We rasterize a mesh with many faces.
+        device = torch.device("cuda:0")
+        verts = -5 * torch.eye(3, dtype=torch.float32, device=device)[None]
+        faces = torch.arange(3, device=device, dtype=torch.int64).expand(1, 100, 3)
+        mesh = Meshes(verts=verts, faces=faces)
+
+        R, T = look_at_view_transform(2.7, 0.0, 0.0)
+        cameras = FoVPerspectiveCameras(device=device, R=R, T=T)
+
+        raster_settings = RasterizationSettings(
+            image_size=28, faces_per_pixel=100, bin_size=0
+        )
+        rasterizer = MeshRasterizer(raster_settings=raster_settings)
+
+        out = rasterizer(mesh, cameras=cameras)
+        self.assertClose(
+            out.pix_to_face[0, 14:, :14],
+            torch.arange(100, device=device).expand(14, 14, 100),
+        )
+
+    @staticmethod
+    def rasterize_meshes_python_with_init(
+        num_meshes: int,
+        ico_level: int,
+        image_size: int,
+        blur_radius: float,
+        faces_per_pixel: int,
+    ):
+        device = torch.device("cpu")
+        meshes = ico_sphere(ico_level, device)
+        meshes_batch = meshes.extend(num_meshes)
+
+        def rasterize():
+            rasterize_meshes_python(
+                meshes_batch, image_size, blur_radius, faces_per_pixel
+            )
+
+        return rasterize
+
+    @staticmethod
+    def rasterize_meshes_cpu_with_init(
+        num_meshes: int,
+        ico_level: int,
+        image_size: int,
+        blur_radius: float,
+        faces_per_pixel: int,
+    ):
+        meshes = ico_sphere(ico_level, torch.device("cpu"))
+        meshes_batch = meshes.extend(num_meshes)
+
+        def rasterize():
+            rasterize_meshes(
+                meshes_batch,
+                image_size,
+                blur_radius,
+                faces_per_pixel=faces_per_pixel,
+                bin_size=0,
+            )
+
+        return rasterize
+
+    @staticmethod
+    def rasterize_meshes_cuda_with_init(
+        num_meshes: int,
+        ico_level: int,
+        image_size: int,
+        blur_radius: float,
+        faces_per_pixel: int,
+    ):
+        device = get_random_cuda_device()
+        meshes = ico_sphere(ico_level, device)
+        meshes_batch = meshes.extend(num_meshes)
+        torch.cuda.synchronize(device)
+
+        def rasterize():
+            rasterize_meshes(meshes_batch, image_size, blur_radius, faces_per_pixel)
+            torch.cuda.synchronize(device)
+
+        return rasterize
+
+    @staticmethod
+    def bm_rasterize_meshes_with_clipping(
+        num_meshes: int,
+        ico_level: int,
+        image_size: int,
+        blur_radius: float,
+        faces_per_pixel: int,
+        dist: float,
+    ):
+        device = get_random_cuda_device()
+        meshes = ico_sphere(ico_level, device)
+        meshes_batch = meshes.extend(num_meshes)
+
+        settings = RasterizationSettings(
+            image_size=image_size,
+            blur_radius=blur_radius,
+            faces_per_pixel=faces_per_pixel,
+            z_clip_value=1e-2,
+            perspective_correct=True,
+            cull_to_frustum=True,
+        )
+
+        # The camera is positioned so that the image plane intersects
+        # the mesh and some faces are partially behind the image plane.
+        R, T = look_at_view_transform(dist, 0, 0)
+        cameras = FoVPerspectiveCameras(device=device, R=R, T=T, fov=90)
+        rasterizer = MeshRasterizer(raster_settings=settings, cameras=cameras)
+
+        # Transform the meshes to projec them onto the image plane
+        meshes_screen = rasterizer.transform(meshes_batch)
+        torch.cuda.synchronize(device)
+
+        def rasterize():
+            # Only measure rasterization speed (including clipping)
+            rasterize_meshes(
+                meshes_screen,
+                image_size,
+                blur_radius,
+                faces_per_pixel,
+                z_clip_value=1e-2,
+                perspective_correct=True,
+                cull_to_frustum=True,
+            )
+            torch.cuda.synchronize(device)
+
+        return rasterize
diff --git a/pytorch3d/tests/test_rasterize_points.py b/pytorch3d/tests/test_rasterize_points.py
new file mode 100644
index 0000000000000000000000000000000000000000..664dac9eb9527f3c2e9dd421e0a714789f5e1123
--- /dev/null
+++ b/pytorch3d/tests/test_rasterize_points.py
@@ -0,0 +1,651 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import unittest
+
+import numpy as np
+import torch
+from pytorch3d import _C
+from pytorch3d.renderer.points.rasterize_points import (
+    _format_radius,
+    rasterize_points,
+    rasterize_points_python,
+)
+from pytorch3d.structures.pointclouds import Pointclouds
+
+from .common_testing import get_random_cuda_device, TestCaseMixin
+
+
+class TestRasterizePoints(TestCaseMixin, unittest.TestCase):
+    def test_python_simple_cpu(self):
+        self._simple_test_case(
+            rasterize_points_python, torch.device("cpu"), bin_size=-1
+        )
+
+    def test_naive_simple_cpu(self):
+        device = torch.device("cpu")
+        self._simple_test_case(rasterize_points, device)
+
+    def test_naive_simple_cuda(self):
+        device = get_random_cuda_device()
+        self._simple_test_case(rasterize_points, device, bin_size=0)
+
+    def test_python_behind_camera(self):
+        self._test_behind_camera(
+            rasterize_points_python, torch.device("cpu"), bin_size=-1
+        )
+
+    def test_cpu_behind_camera(self):
+        self._test_behind_camera(rasterize_points, torch.device("cpu"))
+
+    def test_cuda_behind_camera(self):
+        device = get_random_cuda_device()
+        self._test_behind_camera(rasterize_points, device, bin_size=0)
+
+    def test_python_variable_radius(self):
+        self._test_variable_size_radius(
+            rasterize_points_python, torch.device("cpu"), bin_size=-1
+        )
+
+    def test_cpu_variable_radius(self):
+        self._test_variable_size_radius(rasterize_points, torch.device("cpu"))
+
+    def test_cuda_variable_radius(self):
+        device = get_random_cuda_device()
+        # Naive
+        self._test_variable_size_radius(rasterize_points, device, bin_size=0)
+        # Coarse to fine
+        self._test_variable_size_radius(rasterize_points, device, bin_size=None)
+
+    def test_cpp_vs_naive_vs_binned(self):
+        # Make sure that the backward pass runs for all pathways
+        N = 2
+        P = 1000
+        image_size = 32
+        radius = 0.1
+        points_per_pixel = 3
+        points1 = torch.randn(P, 3, requires_grad=True)
+        points2 = torch.randn(int(P / 2), 3, requires_grad=True)
+        pointclouds = Pointclouds(points=[points1, points2])
+        grad_zbuf = torch.randn(N, image_size, image_size, points_per_pixel)
+        grad_dists = torch.randn(N, image_size, image_size, points_per_pixel)
+
+        # Option I: CPU, naive
+        idx1, zbuf1, dists1 = rasterize_points(
+            pointclouds, image_size, radius, points_per_pixel, bin_size=0
+        )
+        loss = (zbuf1 * grad_zbuf).sum() + (dists1 * grad_dists).sum()
+        loss.backward()
+        grad1 = points1.grad.data.clone()
+
+        # Option II: CUDA, naive
+        points1_cuda = points1.cuda().detach().clone().requires_grad_(True)
+        points2_cuda = points2.cuda().detach().clone().requires_grad_(True)
+        pointclouds = Pointclouds(points=[points1_cuda, points2_cuda])
+        grad_zbuf = grad_zbuf.cuda()
+        grad_dists = grad_dists.cuda()
+        idx2, zbuf2, dists2 = rasterize_points(
+            pointclouds, image_size, radius, points_per_pixel, bin_size=0
+        )
+        loss = (zbuf2 * grad_zbuf).sum() + (dists2 * grad_dists).sum()
+        loss.backward()
+        idx2 = idx2.data.cpu().clone()
+        zbuf2 = zbuf2.data.cpu().clone()
+        dists2 = dists2.data.cpu().clone()
+        grad2 = points1_cuda.grad.data.cpu().clone()
+
+        # Option III: CUDA, binned
+        points1_cuda = points1.cuda().detach().clone().requires_grad_(True)
+        points2_cuda = points2.cuda().detach().clone().requires_grad_(True)
+        pointclouds = Pointclouds(points=[points1_cuda, points2_cuda])
+        idx3, zbuf3, dists3 = rasterize_points(
+            pointclouds, image_size, radius, points_per_pixel, bin_size=32
+        )
+        loss = (zbuf3 * grad_zbuf).sum() + (dists3 * grad_dists).sum()
+        points1.grad.data.zero_()
+        loss.backward()
+        idx3 = idx3.data.cpu().clone()
+        zbuf3 = zbuf3.data.cpu().clone()
+        dists3 = dists3.data.cpu().clone()
+        grad3 = points1_cuda.grad.data.cpu().clone()
+
+        # Make sure everything was the same
+        idx12_same = (idx1 == idx2).all().item()
+        idx13_same = (idx1 == idx3).all().item()
+        zbuf12_same = (zbuf1 == zbuf2).all().item()
+        zbuf13_same = (zbuf1 == zbuf3).all().item()
+        dists12_diff = (dists1 - dists2).abs().max().item()
+        dists13_diff = (dists1 - dists3).abs().max().item()
+        self.assertTrue(idx12_same)
+        self.assertTrue(idx13_same)
+        self.assertTrue(zbuf12_same)
+        self.assertTrue(zbuf13_same)
+        self.assertTrue(dists12_diff < 1e-6)
+        self.assertTrue(dists13_diff < 1e-6)
+
+        diff12 = (grad1 - grad2).abs().max().item()
+        diff13 = (grad1 - grad3).abs().max().item()
+        diff23 = (grad2 - grad3).abs().max().item()
+        self.assertTrue(diff12 < 5e-6)
+        self.assertTrue(diff13 < 5e-6)
+        self.assertTrue(diff23 < 5e-6)
+
+    def test_python_vs_cpu_naive(self):
+        torch.manual_seed(231)
+        image_size = 32
+        radius = 0.1
+        points_per_pixel = 3
+
+        # Test a batch of homogeneous point clouds.
+        N = 2
+        P = 17
+        points = torch.randn(N, P, 3, requires_grad=True)
+        pointclouds = Pointclouds(points=points)
+        args = (pointclouds, image_size, radius, points_per_pixel)
+        self._compare_impls(
+            rasterize_points_python,
+            rasterize_points,
+            args,
+            args,
+            points,
+            points,
+            compare_grads=True,
+        )
+
+        # Test a batch of heterogeneous point clouds.
+        P2 = 10
+        points1 = torch.randn(P, 3, requires_grad=True)
+        points2 = torch.randn(P2, 3)
+        pointclouds = Pointclouds(points=[points1, points2])
+        args = (pointclouds, image_size, radius, points_per_pixel)
+        self._compare_impls(
+            rasterize_points_python,
+            rasterize_points,
+            args,
+            args,
+            points1,  # check gradients for first element in batch
+            points1,
+            compare_grads=True,
+        )
+
+    def test_cpu_vs_cuda_naive(self):
+        torch.manual_seed(231)
+        image_size = 64
+        radius = 0.1
+        points_per_pixel = 5
+
+        # Test homogeneous point cloud batch.
+        N = 2
+        P = 1000
+        bin_size = 0
+        points_cpu = torch.rand(N, P, 3, requires_grad=True)
+        points_cuda = points_cpu.cuda().detach().requires_grad_(True)
+        pointclouds_cpu = Pointclouds(points=points_cpu)
+        pointclouds_cuda = Pointclouds(points=points_cuda)
+        args_cpu = (pointclouds_cpu, image_size, radius, points_per_pixel, bin_size)
+        args_cuda = (pointclouds_cuda, image_size, radius, points_per_pixel, bin_size)
+        self._compare_impls(
+            rasterize_points,
+            rasterize_points,
+            args_cpu,
+            args_cuda,
+            points_cpu,
+            points_cuda,
+            compare_grads=True,
+        )
+
+    def _compare_impls(
+        self,
+        fn1,
+        fn2,
+        args1,
+        args2,
+        grad_var1=None,
+        grad_var2=None,
+        compare_grads=False,
+    ):
+        idx1, zbuf1, dist1 = fn1(*args1)
+        torch.manual_seed(231)
+        grad_zbuf = torch.randn_like(zbuf1)
+        grad_dist = torch.randn_like(dist1)
+        loss = (zbuf1 * grad_zbuf).sum() + (dist1 * grad_dist).sum()
+        if compare_grads:
+            loss.backward()
+            grad_points1 = grad_var1.grad.data.clone().cpu()
+
+        idx2, zbuf2, dist2 = fn2(*args2)
+        grad_zbuf = grad_zbuf.to(zbuf2)
+        grad_dist = grad_dist.to(dist2)
+        loss = (zbuf2 * grad_zbuf).sum() + (dist2 * grad_dist).sum()
+        if compare_grads:
+            # clear points1.grad in case args1 and args2 reused the same tensor
+            grad_var1.grad.data.zero_()
+            loss.backward()
+            grad_points2 = grad_var2.grad.data.clone().cpu()
+
+        self.assertEqual((idx1.cpu() == idx2.cpu()).all().item(), 1)
+        self.assertEqual((zbuf1.cpu() == zbuf2.cpu()).all().item(), 1)
+        self.assertClose(dist1.cpu(), dist2.cpu())
+        if compare_grads:
+            self.assertClose(grad_points1, grad_points2, atol=2e-6)
+
+    def test_bin_size_error(self):
+        points = Pointclouds(points=torch.rand(5, 100, 3))
+        image_size = 1024
+        bin_size = 16
+        with self.assertRaisesRegex(ValueError, "bin_size too small"):
+            rasterize_points(points, image_size, 0.0, 2, bin_size=bin_size)
+
+    def _test_behind_camera(self, rasterize_points_fn, device, bin_size=None):
+        # Test case where all points are behind the camera -- nothing should
+        # get rasterized
+        N = 2
+        P = 32
+        xy = torch.randn(N, P, 2)
+        z = torch.randn(N, P, 1).abs().mul(-1)  # Make them all negative
+        points = torch.cat([xy, z], dim=2).to(device)
+        image_size = 16
+        points_per_pixel = 3
+        radius = 0.2
+        idx_expected = torch.full(
+            (N, 16, 16, 3), fill_value=-1, dtype=torch.int32, device=device
+        )
+        zbuf_expected = torch.full(
+            (N, 16, 16, 3), fill_value=-1, dtype=torch.float32, device=device
+        )
+        dists_expected = zbuf_expected.clone()
+        pointclouds = Pointclouds(points=points)
+        if bin_size == -1:
+            # simple python case with no binning
+            idx, zbuf, dists = rasterize_points_fn(
+                pointclouds, image_size, radius, points_per_pixel
+            )
+        else:
+            idx, zbuf, dists = rasterize_points_fn(
+                pointclouds, image_size, radius, points_per_pixel, bin_size
+            )
+        idx_same = (idx == idx_expected).all().item() == 1
+        zbuf_same = (zbuf == zbuf_expected).all().item() == 1
+
+        self.assertTrue(idx_same)
+        self.assertTrue(zbuf_same)
+        self.assertClose(dists, dists_expected)
+
+    def _simple_test_case(self, rasterize_points_fn, device, bin_size=0):
+        # Create two pointclouds with different numbers of points.
+        # fmt: off
+        points1 = torch.tensor(
+            [
+                [0.0, 0.0,  0.0],  # noqa: E241
+                [0.4, 0.0,  0.1],  # noqa: E241
+                [0.0, 0.4,  0.2],  # noqa: E241
+                [0.0, 0.0, -0.1],  # noqa: E241 Points with negative z should be skippped
+            ],
+            device=device,
+        )
+        points2 = torch.tensor(
+            [
+                [0.0, 0.0,  0.0],  # noqa: E241
+                [0.4, 0.0,  0.1],  # noqa: E241
+                [0.0, 0.4,  0.2],  # noqa: E241
+                [0.0, 0.0, -0.1],  # noqa: E241 Points with negative z should be skippped
+                [0.0, 0.0, -0.7],  # noqa: E241 Points with negative z should be skippped
+            ],
+            device=device,
+        )
+        # fmt: on
+        pointclouds = Pointclouds(points=[points1, points2])
+
+        image_size = 5
+        points_per_pixel = 2
+        radius = 0.5
+
+        # The expected output values. Note that in the outputs, the world space
+        # +Y is up, and the world space +X is left.
+        idx1_expected = torch.full(
+            (1, 5, 5, 2), fill_value=-1, dtype=torch.int32, device=device
+        )
+        # fmt: off
+        idx1_expected[0, :, :, 0] = torch.tensor([
+            [-1, -1,  2, -1, -1],  # noqa: E241
+            [-1,  1,  0,  2, -1],  # noqa: E241
+            [ 1,  0,  0,  0, -1],  # noqa: E241 E201
+            [-1,  1,  0, -1, -1],  # noqa: E241
+            [-1, -1, -1, -1, -1],  # noqa: E241
+        ], device=device)
+        idx1_expected[0, :, :, 1] = torch.tensor([
+            [-1, -1, -1, -1, -1],  # noqa: E241
+            [-1,  2,  2, -1, -1],  # noqa: E241
+            [-1,  1,  1, -1, -1],  # noqa: E241
+            [-1, -1, -1, -1, -1],  # noqa: E241
+            [-1, -1, -1, -1, -1],  # noqa: E241
+        ], device=device)
+        # fmt: on
+
+        zbuf1_expected = torch.full(
+            (1, 5, 5, 2), fill_value=100, dtype=torch.float32, device=device
+        )
+        # fmt: off
+        zbuf1_expected[0, :, :, 0] = torch.tensor([
+            [-1.0, -1.0,  0.2, -1.0, -1.0],  # noqa: E241
+            [-1.0,  0.1,  0.0,  0.2, -1.0],  # noqa: E241
+            [ 0.1,  0.0,  0.0,  0.0, -1.0],  # noqa: E241 E201
+            [-1.0,  0.1,  0.0, -1.0, -1.0],  # noqa: E241
+            [-1.0, -1.0, -1.0, -1.0, -1.0]   # noqa: E241
+        ], device=device)
+        zbuf1_expected[0, :, :, 1] = torch.tensor([
+            [-1.0, -1.0, -1.0, -1.0, -1.0],  # noqa: E241
+            [-1.0,  0.2,  0.2, -1.0, -1.0],  # noqa: E241
+            [-1.0,  0.1,  0.1, -1.0, -1.0],  # noqa: E241
+            [-1.0, -1.0, -1.0, -1.0, -1.0],  # noqa: E241
+            [-1.0, -1.0, -1.0, -1.0, -1.0],  # noqa: E241
+        ], device=device)
+        # fmt: on
+
+        dists1_expected = torch.zeros((5, 5, 2), dtype=torch.float32, device=device)
+        # fmt: off
+        dists1_expected[:, :, 0] = torch.tensor([
+            [-1.00, -1.00,  0.16, -1.00, -1.00],  # noqa: E241
+            [-1.00,  0.16,  0.16,  0.16, -1.00],  # noqa: E241
+            [ 0.16,  0.16,  0.00,  0.16, -1.00],  # noqa: E241 E201
+            [-1.00,  0.16,  0.16, -1.00, -1.00],  # noqa: E241
+            [-1.00, -1.00, -1.00, -1.00, -1.00],  # noqa: E241
+        ], device=device)
+        dists1_expected[:, :, 1] = torch.tensor([
+            [-1.00, -1.00, -1.00, -1.00, -1.00],  # noqa: E241
+            [-1.00,  0.16,  0.00, -1.00, -1.00],  # noqa: E241
+            [-1.00,  0.00,  0.16, -1.00, -1.00],  # noqa: E241
+            [-1.00, -1.00, -1.00, -1.00, -1.00],  # noqa: E241
+            [-1.00, -1.00, -1.00, -1.00, -1.00],  # noqa: E241
+        ], device=device)
+        # fmt: on
+
+        if bin_size == -1:
+            # simple python case with no binning
+            idx, zbuf, dists = rasterize_points_fn(
+                pointclouds, image_size, radius, points_per_pixel
+            )
+        else:
+            idx, zbuf, dists = rasterize_points_fn(
+                pointclouds, image_size, radius, points_per_pixel, bin_size
+            )
+
+        # check first point cloud
+        idx_same = (idx[0, ...] == idx1_expected).all().item() == 1
+        if idx_same == 0:
+            print(idx[0, :, :, 0])
+            print(idx[0, :, :, 1])
+        zbuf_same = (zbuf[0, ...] == zbuf1_expected).all().item() == 1
+        self.assertClose(dists[0, ...], dists1_expected)
+        self.assertTrue(idx_same)
+        self.assertTrue(zbuf_same)
+
+        # Check second point cloud - the indices in idx refer to points in the
+        # pointclouds.points_packed() tensor. In the second point cloud,
+        # two points are behind the screen - the expected indices are the same
+        # the first pointcloud but offset by the number of points in the
+        # first pointcloud.
+        num_points_per_cloud = pointclouds.num_points_per_cloud()
+        idx1_expected[idx1_expected >= 0] += num_points_per_cloud[0]
+
+        idx_same = (idx[1, ...] == idx1_expected).all().item() == 1
+        zbuf_same = (zbuf[1, ...] == zbuf1_expected).all().item() == 1
+        self.assertTrue(idx_same)
+        self.assertTrue(zbuf_same)
+        self.assertClose(dists[1, ...], dists1_expected)
+
+    def test_coarse_cpu(self):
+        return self._test_coarse_rasterize(torch.device("cpu"))
+
+    def test_coarse_cuda(self):
+        device = get_random_cuda_device()
+        return self._test_coarse_rasterize(device)
+
+    def test_compare_coarse_cpu_vs_cuda(self):
+        torch.manual_seed(231)
+        N = 3
+        max_P = 1000
+        image_size = (64, 64)
+        radius = 0.1
+        bin_size = 16
+        max_points_per_bin = 500
+
+        # create heterogeneous point clouds
+        points = []
+        for _ in range(N):
+            p = np.random.choice(max_P)
+            points.append(torch.randn(p, 3))
+
+        pointclouds = Pointclouds(points=points)
+        points_packed = pointclouds.points_packed()
+        cloud_to_packed_first_idx = pointclouds.cloud_to_packed_first_idx()
+        num_points_per_cloud = pointclouds.num_points_per_cloud()
+
+        radius = torch.full((points_packed.shape[0],), fill_value=radius)
+        args = (
+            points_packed,
+            cloud_to_packed_first_idx,
+            num_points_per_cloud,
+            image_size,
+            radius,
+            bin_size,
+            max_points_per_bin,
+        )
+        bp_cpu = _C._rasterize_points_coarse(*args)
+
+        device = get_random_cuda_device()
+        pointclouds_cuda = pointclouds.to(device)
+        points_packed = pointclouds_cuda.points_packed()
+        cloud_to_packed_first_idx = pointclouds_cuda.cloud_to_packed_first_idx()
+        num_points_per_cloud = pointclouds_cuda.num_points_per_cloud()
+        radius = radius.to(device)
+        args = (
+            points_packed,
+            cloud_to_packed_first_idx,
+            num_points_per_cloud,
+            image_size,
+            radius,
+            bin_size,
+            max_points_per_bin,
+        )
+        bp_cuda = _C._rasterize_points_coarse(*args)
+
+        # Bin points might not be the same: CUDA version might write them in
+        # any order. But if we sort the non-(-1) elements of the CUDA output
+        # then they should be the same.
+        for n in range(N):
+            for by in range(bp_cpu.shape[1]):
+                for bx in range(bp_cpu.shape[2]):
+                    K = (bp_cpu[n, by, bx] != -1).sum().item()
+                    idxs_cpu = bp_cpu[n, by, bx].tolist()
+                    idxs_cuda = bp_cuda[n, by, bx].tolist()
+                    idxs_cuda[:K] = sorted(idxs_cuda[:K])
+                    self.assertEqual(idxs_cpu, idxs_cuda)
+
+    def _test_coarse_rasterize(self, device):
+        #
+        #
+        #           |2                  (4)
+        #           |
+        #           |
+        #           |
+        #           |1
+        #           |
+        #           |    (1)
+        #        (2)|
+        # _________(5)___(0)_______________
+        # -1        |           1         2
+        #           |
+        #           |            (3)
+        #           |
+        #           |-1
+        #
+        # Locations of the points are shown by o. The screen bounding box
+        # is between [-1, 1] in both the x and y directions.
+        #
+        # These points are interesting because:
+        # (0) Falls into two bins;
+        # (1) and (2) fall into one bin;
+        # (3) is out-of-bounds, but its disk is in-bounds;
+        # (4) is out-of-bounds, and its entire disk is also out-of-bounds
+        # (5) has a negative z-value, so it should be skipped
+        # fmt: off
+        points = torch.tensor(
+            [
+                [ 0.5,  0.0,  0.0],  # noqa: E241, E201
+                [ 0.5,  0.5,  0.1],  # noqa: E241, E201
+                [-0.3,  0.4,  0.0],  # noqa: E241
+                [ 1.1, -0.5,  0.2],  # noqa: E241, E201
+                [ 2.0,  2.0,  0.3],  # noqa: E241, E201
+                [ 0.0,  0.0, -0.1],  # noqa: E241, E201
+            ],
+            device=device
+        )
+        # fmt: on
+        image_size = (16, 16)
+        radius = 0.2
+        bin_size = 8
+        max_points_per_bin = 5
+
+        bin_points_expected = -1 * torch.ones(
+            1, 2, 2, 5, dtype=torch.int32, device=device
+        )
+        # Note that the order is only deterministic here for CUDA if all points
+        # fit in one chunk. This will the the case for this small example, but
+        # to properly exercise coordinated writes among multiple chunks we need
+        # to use a bigger test case.
+        bin_points_expected[0, 0, 1, :2] = torch.tensor([0, 3])
+        bin_points_expected[0, 1, 0, 0] = torch.tensor([2])
+        bin_points_expected[0, 1, 1, :2] = torch.tensor([0, 1])
+
+        pointclouds = Pointclouds(points=[points])
+        radius = torch.full((points.shape[0],), fill_value=radius, device=device)
+        args = (
+            pointclouds.points_packed(),
+            pointclouds.cloud_to_packed_first_idx(),
+            pointclouds.num_points_per_cloud(),
+            image_size,
+            radius,
+            bin_size,
+            max_points_per_bin,
+        )
+        bin_points = _C._rasterize_points_coarse(*args)
+        bin_points_same = (bin_points == bin_points_expected).all()
+
+        self.assertTrue(bin_points_same.item() == 1)
+
+    def _test_variable_size_radius(self, rasterize_points_fn, device, bin_size=0):
+        # Two points
+        points = torch.tensor(
+            [[0.5, 0.5, 0.3], [0.5, -0.5, -0.1], [0.0, 0.0, 0.3]],
+            dtype=torch.float32,
+            device=device,
+        )
+        image_size = 16
+        points_per_pixel = 1
+        radius = torch.tensor([0.1, 0.0, 0.2], dtype=torch.float32, device=device)
+        pointclouds = Pointclouds(points=[points])
+        if bin_size == -1:
+            # simple python case with no binning
+            idx, zbuf, dists = rasterize_points_fn(
+                pointclouds, image_size, radius, points_per_pixel
+            )
+        else:
+            idx, zbuf, dists = rasterize_points_fn(
+                pointclouds, image_size, radius, points_per_pixel, bin_size
+            )
+
+        idx_expected = torch.zeros(
+            (1, image_size, image_size, 1), dtype=torch.int64, device=device
+        )
+        # fmt: off
+        idx_expected[0, ..., 0] = torch.tensor(
+            [
+                [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1],  # noqa: E241
+                [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1],  # noqa: E241
+                [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1],  # noqa: E241
+                [-1, -1, -1,  0,  0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1],  # noqa: E241
+                [-1, -1, -1,  0,  0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1],  # noqa: E241
+                [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1],  # noqa: E241
+                [-1, -1, -1, -1, -1, -1, -1,  2,  2, -1, -1, -1, -1, -1, -1, -1],  # noqa: E241
+                [-1, -1, -1, -1, -1, -1,  2,  2,  2,  2, -1, -1, -1, -1, -1, -1],  # noqa: E241
+                [-1, -1, -1, -1, -1, -1,  2,  2,  2,  2, -1, -1, -1, -1, -1, -1],  # noqa: E241
+                [-1, -1, -1, -1, -1, -1, -1,  2,  2, -1, -1, -1, -1, -1, -1, -1],  # noqa: E241
+                [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1],  # noqa: E241
+                [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1],  # noqa: E241
+                [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1],  # noqa: E241
+                [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1],  # noqa: E241
+                [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1],  # noqa: E241
+                [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1]   # noqa: E241
+            ],
+            dtype=torch.int64,
+            device=device
+        )
+        # fmt: on
+        zbuf_expected = torch.full(
+            idx_expected.shape, fill_value=-1, dtype=torch.float32, device=device
+        )
+        zbuf_expected[idx_expected == 0] = 0.3
+        zbuf_expected[idx_expected == 2] = 0.3
+
+        dists_expected = torch.full(
+            idx_expected.shape, fill_value=-1, dtype=torch.float32, device=device
+        )
+
+        # fmt: off
+        dists_expected[0, ..., 0] = torch.Tensor(
+            [
+                [-1., -1., -1.,    -1.,     -1., -1.,    -1.,    -1.,    -1.,    -1., -1., -1., -1., -1., -1., -1.],  # noqa: E241, B950
+                [-1., -1., -1.,    -1.,     -1., -1.,    -1.,    -1.,    -1.,    -1., -1., -1., -1., -1., -1., -1.],  # noqa: E241, B950
+                [-1., -1., -1.,    -1.,     -1., -1.,    -1.,    -1.,     -1.,   -1., -1., -1., -1., -1., -1., -1.],  # noqa: E241, B950
+                [-1., -1., -1., 0.0078, 0.0078,  -1.,    -1.,    -1.,    -1.,    -1., -1., -1., -1., -1., -1., -1.],  # noqa: E241, B950
+                [-1., -1., -1., 0.0078, 0.0078,  -1.,    -1.,    -1.,    -1.,    -1., -1., -1., -1., -1., -1., -1.],  # noqa: E241, B950
+                [-1., -1., -1.,    -1.,     -1., -1.,    -1.,    -1.,    -1.,    -1., -1., -1., -1., -1., -1., -1.],  # noqa: E241, B950
+                [-1., -1., -1.,    -1.,     -1., -1.,    -1., 0.0391, 0.0391,    -1., -1., -1., -1., -1., -1., -1.],  # noqa: E241, B950
+                [-1., -1., -1.,    -1.,     -1., -1., 0.0391, 0.0078, 0.0078, 0.0391, -1., -1., -1., -1., -1., -1.],  # noqa: E241, B950
+                [-1., -1., -1.,    -1.,     -1., -1., 0.0391, 0.0078, 0.0078, 0.0391, -1., -1., -1., -1., -1., -1.],  # noqa: E241, B950
+                [-1., -1., -1.,    -1.,     -1., -1.,    -1., 0.0391, 0.0391,    -1., -1., -1., -1., -1., -1., -1.],  # noqa: E241, B950
+                [-1., -1., -1.,    -1.,     -1., -1.,    -1.,    -1.,    -1.,    -1., -1., -1., -1., -1., -1., -1.],  # noqa: E241, B950
+                [-1., -1., -1.,    -1.,     -1., -1.,    -1.,    -1.,    -1.,    -1., -1., -1., -1., -1., -1., -1.],  # noqa: E241, B950
+                [-1., -1., -1.,    -1.,     -1., -1.,    -1.,    -1.,    -1.,    -1., -1., -1., -1., -1., -1., -1.],  # noqa: E241, B950
+                [-1., -1., -1.,    -1.,     -1., -1.,    -1.,    -1.,    -1.,    -1., -1., -1., -1., -1., -1., -1.],  # noqa: E241, B950
+                [-1., -1., -1.,    -1.,     -1., -1.,    -1.,    -1.,    -1.,    -1., -1., -1., -1., -1., -1., -1.],  # noqa: E241, B950
+                [-1., -1., -1.,    -1.,     -1., -1.,    -1.,    -1.,    -1.,    -1., -1., -1., -1., -1., -1., -1.]   # noqa: E241, B950
+            ]
+        )
+        # fmt: on
+
+        # Check the distances for a point are less than the squared radius
+        # for that point.
+        self.assertTrue((dists[idx == 0] < radius[0] ** 2).all())
+        self.assertTrue((dists[idx == 2] < radius[2] ** 2).all())
+
+        # Check all values are correct.
+        idx_same = (idx == idx_expected).all().item() == 1
+        zbuf_same = (zbuf == zbuf_expected).all().item() == 1
+
+        self.assertTrue(idx_same)
+        self.assertTrue(zbuf_same)
+        self.assertClose(dists, dists_expected, atol=4e-5)
+
+    def test_radius_format_failure(self):
+        N = 20
+        P_max = 15
+        points_list = []
+        for _ in range(N):
+            p = torch.randint(low=1, high=P_max, size=(1,))[0]
+            points_list.append(torch.randn((p, 3)))
+
+        points = Pointclouds(points=points_list)
+
+        # Incorrect shape
+        with self.assertRaisesRegex(ValueError, "radius must be of shape"):
+            _format_radius([0, 1, 2], points)
+
+        # Incorrect type
+        with self.assertRaisesRegex(ValueError, "float, list, tuple or tensor"):
+            _format_radius({0: [0, 1, 2]}, points)
diff --git a/pytorch3d/tests/test_rasterize_rectangle_images.py b/pytorch3d/tests/test_rasterize_rectangle_images.py
new file mode 100644
index 0000000000000000000000000000000000000000..536eb04fdfbb9ffb173831c908752780a9c4f265
--- /dev/null
+++ b/pytorch3d/tests/test_rasterize_rectangle_images.py
@@ -0,0 +1,833 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import re
+import unittest
+from itertools import product
+
+import numpy as np
+import torch
+from PIL import Image
+from pytorch3d.io import load_obj
+from pytorch3d.renderer import (
+    BlendParams,
+    FoVPerspectiveCameras,
+    look_at_view_transform,
+    Materials,
+    MeshRasterizer,
+    MeshRenderer,
+    PointLights,
+    RasterizationSettings,
+    SoftPhongShader,
+    SplatterPhongShader,
+    TexturesUV,
+)
+from pytorch3d.renderer.mesh.rasterize_meshes import (
+    rasterize_meshes,
+    rasterize_meshes_python,
+)
+from pytorch3d.renderer.mesh.rasterizer import Fragments
+from pytorch3d.renderer.opengl import MeshRasterizerOpenGL
+from pytorch3d.renderer.points import (
+    AlphaCompositor,
+    PointsRasterizationSettings,
+    PointsRasterizer,
+    PointsRenderer,
+)
+from pytorch3d.renderer.points.rasterize_points import (
+    rasterize_points,
+    rasterize_points_python,
+)
+from pytorch3d.renderer.points.rasterizer import PointFragments
+from pytorch3d.structures import Meshes, Pointclouds
+from pytorch3d.transforms.transform3d import Transform3d
+from pytorch3d.utils import torus
+
+from .common_testing import (
+    get_pytorch3d_dir,
+    get_tests_dir,
+    load_rgb_image,
+    TestCaseMixin,
+)
+
+
+DEBUG = False
+DATA_DIR = get_tests_dir() / "data"
+
+# Verts/Faces for a simple mesh with two faces.
+verts0 = torch.tensor(
+    [
+        [-0.7, -0.70, 1.0],
+        [0.0, -0.1, 1.0],
+        [0.7, -0.7, 1.0],
+        [-0.7, 0.1, 1.0],
+        [0.0, 0.7, 1.0],
+        [0.7, 0.1, 1.0],
+    ],
+    dtype=torch.float32,
+)
+faces0 = torch.tensor([[1, 0, 2], [4, 3, 5]], dtype=torch.int64)
+
+# Points for a simple point cloud. Get the vertices from a
+# torus and apply rotations such that the points are no longer
+# symmerical in X/Y.
+torus_mesh = torus(r=0.25, R=1.0, sides=5, rings=2 * 5)
+t = (
+    Transform3d()
+    .rotate_axis_angle(angle=90, axis="Y")
+    .rotate_axis_angle(angle=45, axis="Z")
+    .scale(0.3)
+)
+torus_points = t.transform_points(torus_mesh.verts_padded()).squeeze()
+
+
+def _save_debug_image(idx, image_size, bin_size, blur):
+    """
+    Save a mask image from the rasterization output for debugging.
+    """
+    H, W = image_size
+    # Save out the last image for debugging
+    rgb = (idx[-1, ..., :3].cpu() > -1).squeeze()
+    suffix = "square" if H == W else "non_square"
+    filename = "%s_bin_size_%s_blur_%.3f_%dx%d.png"
+    filename = filename % (suffix, str(bin_size), blur, H, W)
+    if DEBUG:
+        filename = "DEBUG_%s" % filename
+        Image.fromarray((rgb.numpy() * 255).astype(np.uint8)).save(DATA_DIR / filename)
+
+
+class TestRasterizeRectangleImagesErrors(TestCaseMixin, unittest.TestCase):
+    def test_mesh_image_size_arg(self):
+        meshes = Meshes(verts=[verts0], faces=[faces0])
+
+        with self.assertRaisesRegex(ValueError, re.escape("tuple/list of (H, W)")):
+            rasterize_meshes(
+                meshes,
+                (100, 200, 3),
+                0.0001,
+                faces_per_pixel=1,
+            )
+
+        with self.assertRaisesRegex(ValueError, "sizes must be greater than 0"):
+            rasterize_meshes(
+                meshes,
+                (0, 10),
+                0.0001,
+                faces_per_pixel=1,
+            )
+
+        with self.assertRaisesRegex(ValueError, "sizes must be integers"):
+            rasterize_meshes(
+                meshes,
+                (100.5, 120.5),
+                0.0001,
+                faces_per_pixel=1,
+            )
+
+    def test_points_image_size_arg(self):
+        points = Pointclouds([verts0])
+
+        with self.assertRaisesRegex(ValueError, re.escape("tuple/list of (H, W)")):
+            rasterize_points(
+                points,
+                (100, 200, 3),
+                0.0001,
+                points_per_pixel=1,
+            )
+
+        with self.assertRaisesRegex(ValueError, "sizes must be greater than 0"):
+            rasterize_points(
+                points,
+                (0, 10),
+                0.0001,
+                points_per_pixel=1,
+            )
+
+        with self.assertRaisesRegex(ValueError, "sizes must be integers"):
+            rasterize_points(
+                points,
+                (100.5, 120.5),
+                0.0001,
+                points_per_pixel=1,
+            )
+
+
+class TestRasterizeRectangleImagesMeshes(TestCaseMixin, unittest.TestCase):
+    @staticmethod
+    def _clone_mesh(verts0, faces0, device, batch_size):
+        """
+        Helper function to detach and clone the verts/faces.
+        This is needed in order to set up the tensors for
+        gradient computation in different tests.
+        """
+        verts = verts0.detach().clone()
+        verts.requires_grad = True
+        meshes = Meshes(verts=[verts], faces=[faces0])
+        meshes = meshes.to(device).extend(batch_size)
+        return verts, meshes
+
+    def _rasterize(self, meshes, image_size, bin_size, blur):
+        """
+        Simple wrapper around the rasterize function to return
+        the fragment data.
+        """
+        face_idxs, zbuf, bary_coords, pix_dists = rasterize_meshes(
+            meshes,
+            image_size,
+            blur,
+            faces_per_pixel=1,
+            bin_size=bin_size,
+        )
+        return Fragments(
+            pix_to_face=face_idxs,
+            zbuf=zbuf,
+            bary_coords=bary_coords,
+            dists=pix_dists,
+        )
+
+    @staticmethod
+    def _save_debug_image(fragments, image_size, bin_size, blur):
+        """
+        Save a mask image from the rasterization output for debugging.
+        """
+        H, W = image_size
+        # Save out the last image for debugging
+        rgb = (fragments.pix_to_face[-1, ..., :3].cpu() > -1).squeeze()
+        suffix = "square" if H == W else "non_square"
+        filename = "triangle_%s_bin_size_%s_blur_%.3f_%dx%d.png"
+        filename = filename % (suffix, str(bin_size), blur, H, W)
+        if DEBUG:
+            filename = "DEBUG_%s" % filename
+            Image.fromarray((rgb.numpy() * 255).astype(np.uint8)).save(
+                DATA_DIR / filename
+            )
+
+    def _check_fragments(self, frag_1, frag_2):
+        """
+        Helper function to check that the tensors in
+        the Fragments frag_1 and frag_2 are the same.
+        """
+        self.assertClose(frag_1.pix_to_face, frag_2.pix_to_face)
+        self.assertClose(frag_1.dists, frag_2.dists)
+        self.assertClose(frag_1.bary_coords, frag_2.bary_coords)
+        self.assertClose(frag_1.zbuf, frag_2.zbuf)
+
+    def _compare_square_with_nonsq(
+        self,
+        image_size,
+        blur,
+        device,
+        verts0,
+        faces0,
+        nonsq_fragment_gradtensor_list,
+        batch_size=1,
+    ):
+        """
+        Calculate the output from rasterizing a square image with the minimum of (H, W).
+        Then compare this with the same square region in the non square image.
+        The input mesh faces given by faces0 and verts0 are contained within the
+        [-1, 1] range of the image so all the relevant pixels will be within the square region.
+
+        `nonsq_fragment_gradtensor_list` is a list of fragments and verts grad tensors
+        from rasterizing non square images.
+        """
+        # Rasterize the square version of the image
+        H, W = image_size
+        S = min(H, W)
+        verts_square, meshes_sq = self._clone_mesh(verts0, faces0, device, batch_size)
+        square_fragments = self._rasterize(
+            meshes_sq, image_size=(S, S), bin_size=0, blur=blur
+        )
+        # Save debug image
+        _save_debug_image(square_fragments.pix_to_face, (S, S), 0, blur)
+
+        # Extract the values in the square image which are non zero.
+        square_mask = square_fragments.pix_to_face > -1
+        square_dists = square_fragments.dists[square_mask]
+        square_zbuf = square_fragments.zbuf[square_mask]
+        square_bary = square_fragments.bary_coords[square_mask]
+
+        # Retain gradients on the output of fragments to check
+        # intermediate values with the non square outputs.
+        square_fragments.dists.retain_grad()
+        square_fragments.bary_coords.retain_grad()
+        square_fragments.zbuf.retain_grad()
+
+        # Calculate gradient for the square image
+        torch.manual_seed(231)
+        grad_zbuf = torch.randn_like(square_zbuf)
+        grad_dist = torch.randn_like(square_dists)
+        grad_bary = torch.randn_like(square_bary)
+        loss0 = (
+            (grad_dist * square_dists).sum()
+            + (grad_zbuf * square_zbuf).sum()
+            + (grad_bary * square_bary).sum()
+        )
+        loss0.backward()
+
+        # Now compare against the non square outputs provided
+        # in the nonsq_fragment_gradtensor_list list
+        for fragments, grad_tensor, _name in nonsq_fragment_gradtensor_list:
+            # Check that there are the same number of non zero pixels
+            # in both the square and non square images.
+            non_square_mask = fragments.pix_to_face > -1
+            self.assertEqual(non_square_mask.sum().item(), square_mask.sum().item())
+
+            # Check dists, zbuf and bary match the square image
+            non_square_dists = fragments.dists[non_square_mask]
+            non_square_zbuf = fragments.zbuf[non_square_mask]
+            non_square_bary = fragments.bary_coords[non_square_mask]
+            self.assertClose(square_dists, non_square_dists)
+            self.assertClose(square_zbuf, non_square_zbuf)
+            self.assertClose(
+                square_bary,
+                non_square_bary,
+                atol=2e-7,
+            )
+
+            # Retain gradients to compare values with outputs from
+            # square image
+            fragments.dists.retain_grad()
+            fragments.bary_coords.retain_grad()
+            fragments.zbuf.retain_grad()
+            loss1 = (
+                (grad_dist * non_square_dists).sum()
+                + (grad_zbuf * non_square_zbuf).sum()
+                + (grad_bary * non_square_bary).sum()
+            )
+            loss1.sum().backward()
+
+            # Get the non zero values in the intermediate gradients
+            # and compare with the values from the square image
+            non_square_grad_dists = fragments.dists.grad[non_square_mask]
+            non_square_grad_bary = fragments.bary_coords.grad[non_square_mask]
+            non_square_grad_zbuf = fragments.zbuf.grad[non_square_mask]
+
+            self.assertClose(
+                non_square_grad_dists,
+                square_fragments.dists.grad[square_mask],
+            )
+            self.assertClose(
+                non_square_grad_bary,
+                square_fragments.bary_coords.grad[square_mask],
+            )
+            self.assertClose(
+                non_square_grad_zbuf,
+                square_fragments.zbuf.grad[square_mask],
+            )
+
+            # Finally check the gradients of the input vertices for
+            # the square and non square case
+            self.assertClose(verts_square.grad, grad_tensor.grad, rtol=3e-4, atol=5e-3)
+
+    def test_gpu(self):
+        """
+        Test that the output of rendering non square images
+        gives the same result as square images. i.e. the
+        dists, zbuf, bary are all the same for the square
+        region which is present in both images.
+        """
+        # Test both cases: (W > H), (H > W) as well as the case where
+        # H and W are not integer multiples of each other (i.e. float aspect ratio)
+        image_sizes = [(64, 128), (128, 64), (128, 256), (256, 128), (600, 1110)]
+
+        devices = ["cuda:0"]
+        blurs = [0.0, 0.001]
+        batch_sizes = [1, 4]
+        test_cases = product(image_sizes, blurs, devices, batch_sizes)
+
+        for image_size, blur, device, batch_size in test_cases:
+            # Initialize the verts grad tensor and the meshes objects
+            verts_nonsq_naive, meshes_nonsq_naive = self._clone_mesh(
+                verts0, faces0, device, batch_size
+            )
+            verts_nonsq_binned, meshes_nonsq_binned = self._clone_mesh(
+                verts0, faces0, device, batch_size
+            )
+
+            # Get the outputs for both naive and coarse to fine rasterization
+            fragments_naive = self._rasterize(
+                meshes_nonsq_naive,
+                image_size,
+                blur=blur,
+                bin_size=0,
+            )
+            fragments_binned = self._rasterize(
+                meshes_nonsq_binned,
+                image_size,
+                blur=blur,
+                bin_size=None,
+            )
+
+            # Save out debug images if needed
+            _save_debug_image(fragments_naive.pix_to_face, image_size, 0, blur)
+            _save_debug_image(fragments_binned.pix_to_face, image_size, None, blur)
+
+            # Check naive and binned fragments give the same outputs
+            self._check_fragments(fragments_naive, fragments_binned)
+
+            # Here we want to compare the square image with the naive and the
+            # coarse to fine methods outputs
+            nonsq_fragment_gradtensor_list = [
+                (fragments_naive, verts_nonsq_naive, "naive"),
+                (fragments_binned, verts_nonsq_binned, "coarse-to-fine"),
+            ]
+
+            self._compare_square_with_nonsq(
+                image_size,
+                blur,
+                device,
+                verts0,
+                faces0,
+                nonsq_fragment_gradtensor_list,
+                batch_size,
+            )
+
+    def test_cpu(self):
+        """
+        Test that the output of rendering non square images
+        gives the same result as square images. i.e. the
+        dists, zbuf, bary are all the same for the square
+        region which is present in both images.
+
+        In this test we compare between the naive C++ implementation
+        and the naive python implementation as the Coarse/Fine
+        method is not fully implemented in C++
+        """
+        # Test both when (W > H) and (H > W).
+        # Using smaller image sizes here as the Python rasterizer is really slow.
+        image_sizes = [(32, 64), (64, 32), (60, 110)]
+        devices = ["cpu"]
+        blurs = [0.0, 0.001]
+        batch_sizes = [1]
+        test_cases = product(image_sizes, blurs, devices, batch_sizes)
+
+        for image_size, blur, device, batch_size in test_cases:
+            # Initialize the verts grad tensor and the meshes objects
+            verts_nonsq_naive, meshes_nonsq_naive = self._clone_mesh(
+                verts0, faces0, device, batch_size
+            )
+            verts_nonsq_python, meshes_nonsq_python = self._clone_mesh(
+                verts0, faces0, device, batch_size
+            )
+
+            # Compare Naive CPU with Python as Coarse/Fine rasteriztation
+            # is not implemented for CPU
+            fragments_naive = self._rasterize(
+                meshes_nonsq_naive, image_size, bin_size=0, blur=blur
+            )
+            face_idxs, zbuf, bary_coords, pix_dists = rasterize_meshes_python(
+                meshes_nonsq_python,
+                image_size,
+                blur,
+                faces_per_pixel=1,
+            )
+            fragments_python = Fragments(
+                pix_to_face=face_idxs,
+                zbuf=zbuf,
+                bary_coords=bary_coords,
+                dists=pix_dists,
+            )
+
+            # Save debug images if DEBUG is set to true at the top of the file.
+            _save_debug_image(fragments_naive.pix_to_face, image_size, 0, blur)
+            _save_debug_image(fragments_python.pix_to_face, image_size, "python", blur)
+
+            # List of non square outputs to compare with the square output
+            nonsq_fragment_gradtensor_list = [
+                (fragments_naive, verts_nonsq_naive, "naive"),
+                (fragments_python, verts_nonsq_python, "python"),
+            ]
+            self._compare_square_with_nonsq(
+                image_size,
+                blur,
+                device,
+                verts0,
+                faces0,
+                nonsq_fragment_gradtensor_list,
+                batch_size,
+            )
+
+    def test_render_cow(self):
+        self._render_cow(MeshRasterizer)
+
+    def test_render_cow_opengl(self):
+        self._render_cow(MeshRasterizerOpenGL)
+
+    def _render_cow(self, rasterizer_type):
+        """
+        Test a larger textured mesh is rendered correctly in a non square image.
+        """
+        device = torch.device("cuda:0")
+        obj_dir = get_pytorch3d_dir() / "docs/tutorials/data"
+        obj_filename = obj_dir / "cow_mesh/cow.obj"
+
+        # Load mesh + texture
+        verts, faces, aux = load_obj(
+            obj_filename, device=device, load_textures=True, texture_wrap=None
+        )
+        tex_map = list(aux.texture_images.values())[0]
+        tex_map = tex_map[None, ...].to(faces.textures_idx.device)
+        textures = TexturesUV(
+            maps=tex_map, faces_uvs=[faces.textures_idx], verts_uvs=[aux.verts_uvs]
+        )
+        mesh = Meshes(verts=[verts], faces=[faces.verts_idx], textures=textures)
+
+        # Init rasterizer settings
+        R, T = look_at_view_transform(1.2, 0, 90)
+        cameras = FoVPerspectiveCameras(device=device, R=R, T=T)
+
+        raster_settings = RasterizationSettings(
+            image_size=(500, 800), blur_radius=0.0, faces_per_pixel=1
+        )
+
+        # Init shader settings
+        materials = Materials(device=device)
+        lights = PointLights(device=device)
+        lights.location = torch.tensor([0.0, 0.0, -2.0], device=device)[None]
+
+        # Init renderer
+        rasterizer = rasterizer_type(cameras=cameras, raster_settings=raster_settings)
+        if rasterizer_type == MeshRasterizer:
+            blend_params = BlendParams(
+                sigma=1e-1,
+                gamma=1e-4,
+                background_color=torch.tensor([1.0, 1.0, 1.0], device=device),
+            )
+            shader = SoftPhongShader(
+                lights=lights,
+                cameras=cameras,
+                materials=materials,
+                blend_params=blend_params,
+            )
+        else:
+            blend_params = BlendParams(
+                sigma=0.5,
+                background_color=torch.tensor([1.0, 1.0, 1.0], device=device),
+            )
+            shader = SplatterPhongShader(
+                lights=lights,
+                cameras=cameras,
+                materials=materials,
+                blend_params=blend_params,
+            )
+
+        renderer = MeshRenderer(rasterizer=rasterizer, shader=shader)
+
+        # Load reference image
+        image_ref = load_rgb_image(
+            f"test_cow_image_rectangle_{rasterizer_type.__name__}.png", DATA_DIR
+        )
+
+        for bin_size in [0, None]:
+            if bin_size == 0 and rasterizer_type == MeshRasterizerOpenGL:
+                continue
+
+            # Check both naive and coarse to fine produce the same output.
+            renderer.rasterizer.raster_settings.bin_size = bin_size
+            images = renderer(mesh)
+            rgb = images[0, ..., :3].squeeze().cpu()
+
+            if DEBUG:
+                Image.fromarray((rgb.numpy() * 255).astype(np.uint8)).save(
+                    DATA_DIR
+                    / f"DEBUG_cow_image_rectangle_{rasterizer_type.__name__}.png"
+                )
+
+            # NOTE some pixels can be flaky
+            cond1 = torch.allclose(rgb, image_ref, atol=0.05)
+            self.assertTrue(cond1)
+
+
+class TestRasterizeRectangleImagesPointclouds(TestCaseMixin, unittest.TestCase):
+    @staticmethod
+    def _clone_pointcloud(verts0, device, batch_size):
+        """
+        Helper function to detach and clone the verts.
+        This is needed in order to set up the tensors for
+        gradient computation in different tests.
+        """
+        verts = verts0.detach().clone()
+        verts.requires_grad = True
+        pointclouds = Pointclouds(points=[verts])
+        pointclouds = pointclouds.to(device).extend(batch_size)
+        return verts, pointclouds
+
+    def _rasterize(self, meshes, image_size, bin_size, blur):
+        """
+        Simple wrapper around the rasterize function to return
+        the fragment data.
+        """
+        idxs, zbuf, dists = rasterize_points(
+            meshes,
+            image_size,
+            blur,
+            points_per_pixel=1,
+            bin_size=bin_size,
+        )
+        return PointFragments(
+            idx=idxs,
+            zbuf=zbuf,
+            dists=dists,
+        )
+
+    def _check_fragments(self, frag_1, frag_2):
+        """
+        Helper function to check that the tensors in
+        the Fragments frag_1 and frag_2 are the same.
+        """
+        self.assertClose(frag_1.idx, frag_2.idx)
+        self.assertClose(frag_1.dists, frag_2.dists)
+        self.assertClose(frag_1.zbuf, frag_2.zbuf)
+
+    def _compare_square_with_nonsq(
+        self,
+        image_size,
+        blur,
+        device,
+        points,
+        nonsq_fragment_gradtensor_list,
+        batch_size=1,
+    ):
+        """
+        Calculate the output from rasterizing a square image with the minimum of (H, W).
+        Then compare this with the same square region in the non square image.
+        The input points are contained within the [-1, 1] range of the image
+        so all the relevant pixels will be within the square region.
+
+        `nonsq_fragment_gradtensor_list` is a list of fragments and verts grad tensors
+        from rasterizing non square images.
+        """
+        # Rasterize the square version of the image
+        H, W = image_size
+        S = min(H, W)
+        points_square, pointclouds_sq = self._clone_pointcloud(
+            points, device, batch_size
+        )
+        square_fragments = self._rasterize(
+            pointclouds_sq, image_size=(S, S), bin_size=0, blur=blur
+        )
+        # Save debug image
+        _save_debug_image(square_fragments.idx, (S, S), 0, blur)
+
+        # Extract the values in the square image which are non zero.
+        square_mask = square_fragments.idx > -1
+        square_dists = square_fragments.dists[square_mask]
+        square_zbuf = square_fragments.zbuf[square_mask]
+
+        # Retain gradients on the output of fragments to check
+        # intermediate values with the non square outputs.
+        square_fragments.dists.retain_grad()
+        square_fragments.zbuf.retain_grad()
+
+        # Calculate gradient for the square image
+        torch.manual_seed(231)
+        grad_zbuf = torch.randn_like(square_zbuf)
+        grad_dist = torch.randn_like(square_dists)
+        loss0 = (grad_dist * square_dists).sum() + (grad_zbuf * square_zbuf).sum()
+        loss0.backward()
+
+        # Now compare against the non square outputs provided
+        # in the nonsq_fragment_gradtensor_list list
+        for fragments, grad_tensor, _name in nonsq_fragment_gradtensor_list:
+            # Check that there are the same number of non zero pixels
+            # in both the square and non square images.
+            non_square_mask = fragments.idx > -1
+            self.assertEqual(non_square_mask.sum().item(), square_mask.sum().item())
+
+            # Check dists, zbuf and bary match the square image
+            non_square_dists = fragments.dists[non_square_mask]
+            non_square_zbuf = fragments.zbuf[non_square_mask]
+            self.assertClose(square_dists, non_square_dists)
+            self.assertClose(square_zbuf, non_square_zbuf)
+
+            # Retain gradients to compare values with outputs from
+            # square image
+            fragments.dists.retain_grad()
+            fragments.zbuf.retain_grad()
+            loss1 = (grad_dist * non_square_dists).sum() + (
+                grad_zbuf * non_square_zbuf
+            ).sum()
+            loss1.sum().backward()
+
+            # Get the non zero values in the intermediate gradients
+            # and compare with the values from the square image
+            non_square_grad_dists = fragments.dists.grad[non_square_mask]
+            non_square_grad_zbuf = fragments.zbuf.grad[non_square_mask]
+
+            self.assertClose(
+                non_square_grad_dists,
+                square_fragments.dists.grad[square_mask],
+            )
+            self.assertClose(
+                non_square_grad_zbuf,
+                square_fragments.zbuf.grad[square_mask],
+            )
+
+            # Finally check the gradients of the input vertices for
+            # the square and non square case
+            self.assertClose(points_square.grad, grad_tensor.grad, rtol=2e-4)
+
+    def test_gpu(self):
+        """
+        Test that the output of rendering non square images
+        gives the same result as square images. i.e. the
+        dists, zbuf, idx are all the same for the square
+        region which is present in both images.
+        """
+        # Test both cases: (W > H), (H > W) as well as the case where
+        # H and W are not integer multiples of each other (i.e. float aspect ratio)
+        image_sizes = [(64, 128), (128, 64), (128, 256), (256, 128), (600, 1110)]
+
+        devices = ["cuda:0"]
+        blurs = [5e-2]
+        batch_sizes = [1, 4]
+        test_cases = product(image_sizes, blurs, devices, batch_sizes)
+
+        for image_size, blur, device, batch_size in test_cases:
+            # Initialize the verts grad tensor and the meshes objects
+            verts_nonsq_naive, pointcloud_nonsq_naive = self._clone_pointcloud(
+                torus_points, device, batch_size
+            )
+            verts_nonsq_binned, pointcloud_nonsq_binned = self._clone_pointcloud(
+                torus_points, device, batch_size
+            )
+
+            # Get the outputs for both naive and coarse to fine rasterization
+            fragments_naive = self._rasterize(
+                pointcloud_nonsq_naive,
+                image_size,
+                blur=blur,
+                bin_size=0,
+            )
+            fragments_binned = self._rasterize(
+                pointcloud_nonsq_binned,
+                image_size,
+                blur=blur,
+                bin_size=None,
+            )
+
+            # Save out debug images if needed
+            _save_debug_image(fragments_naive.idx, image_size, 0, blur)
+            _save_debug_image(fragments_binned.idx, image_size, None, blur)
+
+            # Check naive and binned fragments give the same outputs
+            self._check_fragments(fragments_naive, fragments_binned)
+
+            # Here we want to compare the square image with the naive and the
+            # coarse to fine methods outputs
+            nonsq_fragment_gradtensor_list = [
+                (fragments_naive, verts_nonsq_naive, "naive"),
+                (fragments_binned, verts_nonsq_binned, "coarse-to-fine"),
+            ]
+
+            self._compare_square_with_nonsq(
+                image_size,
+                blur,
+                device,
+                torus_points,
+                nonsq_fragment_gradtensor_list,
+                batch_size,
+            )
+
+    def test_cpu(self):
+        """
+        Test that the output of rendering non square images
+        gives the same result as square images. i.e. the
+        dists, zbuf, idx are all the same for the square
+        region which is present in both images.
+
+        In this test we compare between the naive C++ implementation
+        and the naive python implementation as the Coarse/Fine
+        method is not fully implemented in C++
+        """
+        # Test both when (W > H) and (H > W).
+        # Using smaller image sizes here as the Python rasterizer is really slow.
+        image_sizes = [(32, 64), (64, 32), (60, 110)]
+        devices = ["cpu"]
+        blurs = [5e-2]
+        batch_sizes = [1]
+        test_cases = product(image_sizes, blurs, devices, batch_sizes)
+
+        for image_size, blur, device, batch_size in test_cases:
+            # Initialize the verts grad tensor and the meshes objects
+            verts_nonsq_naive, pointcloud_nonsq_naive = self._clone_pointcloud(
+                torus_points, device, batch_size
+            )
+            verts_nonsq_python, pointcloud_nonsq_python = self._clone_pointcloud(
+                torus_points, device, batch_size
+            )
+
+            # Compare Naive CPU with Python as Coarse/Fine rasteriztation
+            # is not implemented for CPU
+            fragments_naive = self._rasterize(
+                pointcloud_nonsq_naive, image_size, bin_size=0, blur=blur
+            )
+            idxs, zbuf, pix_dists = rasterize_points_python(
+                pointcloud_nonsq_python,
+                image_size,
+                blur,
+                points_per_pixel=1,
+            )
+            fragments_python = PointFragments(
+                idx=idxs,
+                zbuf=zbuf,
+                dists=pix_dists,
+            )
+
+            # Save debug images if DEBUG is set to true at the top of the file.
+            _save_debug_image(fragments_naive.idx, image_size, 0, blur)
+            _save_debug_image(fragments_python.idx, image_size, "python", blur)
+
+            # List of non square outputs to compare with the square output
+            nonsq_fragment_gradtensor_list = [
+                (fragments_naive, verts_nonsq_naive, "naive"),
+                (fragments_python, verts_nonsq_python, "python"),
+            ]
+            self._compare_square_with_nonsq(
+                image_size,
+                blur,
+                device,
+                torus_points,
+                nonsq_fragment_gradtensor_list,
+                batch_size,
+            )
+
+    def test_render_pointcloud(self):
+        """
+        Test a textured point cloud is rendered correctly in a non square image.
+        """
+        device = torch.device("cuda:0")
+        pointclouds = Pointclouds(
+            points=[torus_points * 2.0],
+            features=torch.ones_like(torus_points[None, ...]),
+        ).to(device)
+        R, T = look_at_view_transform(2.7, 0.0, 0.0)
+        cameras = FoVPerspectiveCameras(device=device, R=R, T=T)
+        raster_settings = PointsRasterizationSettings(
+            image_size=(512, 1024), radius=5e-2, points_per_pixel=1
+        )
+        rasterizer = PointsRasterizer(cameras=cameras, raster_settings=raster_settings)
+        compositor = AlphaCompositor()
+        renderer = PointsRenderer(rasterizer=rasterizer, compositor=compositor)
+
+        # Load reference image
+        image_ref = load_rgb_image("test_pointcloud_rectangle_image.png", DATA_DIR)
+
+        for bin_size in [0, None]:
+            # Check both naive and coarse to fine produce the same output.
+            renderer.rasterizer.raster_settings.bin_size = bin_size
+            images = renderer(pointclouds)
+            rgb = images[0, ..., :3].squeeze().cpu()
+
+            if DEBUG:
+                Image.fromarray((rgb.numpy() * 255).astype(np.uint8)).save(
+                    DATA_DIR / "DEBUG_pointcloud_rectangle_image.png"
+                )
+
+            # NOTE some pixels can be flaky
+            cond1 = torch.allclose(rgb, image_ref, atol=0.05)
+            self.assertTrue(cond1)
diff --git a/pytorch3d/tests/test_rasterizer.py b/pytorch3d/tests/test_rasterizer.py
new file mode 100644
index 0000000000000000000000000000000000000000..8f9e61731c5a1116f5407743c4adeaa138f0f34b
--- /dev/null
+++ b/pytorch3d/tests/test_rasterizer.py
@@ -0,0 +1,578 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import unittest
+
+import numpy as np
+import torch
+from PIL import Image
+from pytorch3d.renderer import (
+    FoVOrthographicCameras,
+    FoVPerspectiveCameras,
+    look_at_view_transform,
+    MeshRasterizer,
+    OrthographicCameras,
+    PerspectiveCameras,
+    PointsRasterizationSettings,
+    PointsRasterizer,
+    RasterizationSettings,
+)
+from pytorch3d.renderer.fisheyecameras import FishEyeCameras
+from pytorch3d.renderer.opengl.rasterizer_opengl import (
+    _check_cameras,
+    _check_raster_settings,
+    _convert_meshes_to_gl_ndc,
+    _parse_and_verify_image_size,
+    MeshRasterizerOpenGL,
+)
+from pytorch3d.structures import Pointclouds
+from pytorch3d.structures.meshes import Meshes
+from pytorch3d.utils.ico_sphere import ico_sphere
+
+from .common_testing import get_tests_dir, TestCaseMixin
+
+
+DATA_DIR = get_tests_dir() / "data"
+DEBUG = False  # Set DEBUG to true to save outputs from the tests.
+
+
+def convert_image_to_binary_mask(filename):
+    with Image.open(filename) as raw_image:
+        image = torch.from_numpy(np.array(raw_image))
+    mx = image.max()
+    image_norm = (image == mx).to(torch.int64)
+    return image_norm
+
+
+class TestMeshRasterizer(unittest.TestCase):
+    def test_simple_sphere(self):
+        self._simple_sphere(MeshRasterizer)
+
+    def test_simple_sphere_fisheye(self):
+        self._simple_sphere_fisheye_against_perspective(MeshRasterizer)
+
+    def test_simple_sphere_opengl(self):
+        self._simple_sphere(MeshRasterizerOpenGL)
+
+    def _simple_sphere(self, rasterizer_type):
+        device = torch.device("cuda:0")
+        ref_filename = f"test_rasterized_sphere_{rasterizer_type.__name__}.png"
+        image_ref_filename = DATA_DIR / ref_filename
+
+        # Rescale image_ref to the 0 - 1 range and convert to a binary mask.
+        image_ref = convert_image_to_binary_mask(image_ref_filename)
+
+        # Init mesh
+        sphere_mesh = ico_sphere(5, device)
+
+        # Init rasterizer settings
+        R, T = look_at_view_transform(2.7, 0, 0)
+        cameras = FoVPerspectiveCameras(device=device, R=R, T=T)
+        raster_settings = RasterizationSettings(
+            image_size=512, blur_radius=0.0, faces_per_pixel=1, bin_size=0
+        )
+
+        # Init rasterizer
+        rasterizer = rasterizer_type(cameras=cameras, raster_settings=raster_settings)
+
+        ####################################
+        # 1. Test rasterizing a single mesh
+        ####################################
+
+        fragments = rasterizer(sphere_mesh)
+        image = fragments.pix_to_face[0, ..., 0].squeeze().cpu()
+        # Convert pix_to_face to a binary mask
+        image[image >= 0] = 1.0
+        image[image < 0] = 0.0
+
+        if DEBUG:
+            Image.fromarray((image.numpy() * 255).astype(np.uint8)).save(
+                DATA_DIR
+                / f"DEBUG_test_rasterized_sphere_{rasterizer_type.__name__}.png"
+            )
+
+        self.assertTrue(torch.allclose(image, image_ref))
+
+        ##################################
+        #  2. Test with a batch of meshes
+        ##################################
+
+        batch_size = 10
+        sphere_meshes = sphere_mesh.extend(batch_size)
+        fragments = rasterizer(sphere_meshes)
+        for i in range(batch_size):
+            image = fragments.pix_to_face[i, ..., 0].squeeze().cpu()
+            image[image >= 0] = 1.0
+            image[image < 0] = 0.0
+            self.assertTrue(torch.allclose(image, image_ref))
+
+        ####################################################
+        #  3. Test that passing kwargs to rasterizer works.
+        ####################################################
+
+        #  Change the view transform to zoom out.
+        R, T = look_at_view_transform(20.0, 0, 0, device=device)
+        fragments = rasterizer(sphere_mesh, R=R, T=T)
+        image = fragments.pix_to_face[0, ..., 0].squeeze().cpu()
+        image[image >= 0] = 1.0
+        image[image < 0] = 0.0
+
+        ref_filename = f"test_rasterized_sphere_zoom_{rasterizer_type.__name__}.png"
+        image_ref_filename = DATA_DIR / ref_filename
+        image_ref = convert_image_to_binary_mask(image_ref_filename)
+
+        if DEBUG:
+            Image.fromarray((image.numpy() * 255).astype(np.uint8)).save(
+                DATA_DIR
+                / f"DEBUG_test_rasterized_sphere_zoom_{rasterizer_type.__name__}.png"
+            )
+        self.assertTrue(torch.allclose(image, image_ref))
+
+        #################################
+        #  4. Test init without cameras.
+        ##################################
+
+        # Create a new empty rasterizer:
+        rasterizer = rasterizer_type(raster_settings=raster_settings)
+
+        # Check that omitting the cameras in both initialization
+        # and the forward pass throws an error:
+        with self.assertRaisesRegex(ValueError, "Cameras must be specified"):
+            rasterizer(sphere_mesh)
+
+        # Now pass in the cameras as a kwarg
+        fragments = rasterizer(sphere_mesh, cameras=cameras)
+        image = fragments.pix_to_face[0, ..., 0].squeeze().cpu()
+        # Convert pix_to_face to a binary mask
+        image[image >= 0] = 1.0
+        image[image < 0] = 0.0
+
+        if DEBUG:
+            Image.fromarray((image.numpy() * 255).astype(np.uint8)).save(
+                DATA_DIR
+                / f"DEBUG_test_rasterized_sphere_{rasterizer_type.__name__}.png"
+            )
+
+        self.assertTrue(torch.allclose(image, image_ref))
+
+    def _simple_sphere_fisheye_against_perspective(self, rasterizer_type):
+        device = torch.device("cuda:0")
+
+        # Init mesh
+        sphere_mesh = ico_sphere(5, device)
+
+        # Init rasterizer settings
+        R, T = look_at_view_transform(2.7, 0, 0)
+
+        # Init Fisheye camera params
+        focal = torch.tensor([[1.7321]], dtype=torch.float32)
+        principal_point = torch.tensor([[0.0101, -0.0101]])
+        perspective_cameras = PerspectiveCameras(
+            R=R,
+            T=T,
+            focal_length=focal,
+            principal_point=principal_point,
+            device="cuda:0",
+        )
+        fisheye_cameras = FishEyeCameras(
+            device=device,
+            R=R,
+            T=T,
+            focal_length=focal,
+            principal_point=principal_point,
+            world_coordinates=True,
+            use_radial=False,
+            use_tangential=False,
+            use_thin_prism=False,
+        )
+        raster_settings = RasterizationSettings(
+            image_size=512, blur_radius=0.0, faces_per_pixel=1, bin_size=0
+        )
+
+        # Init rasterizer
+        perspective_rasterizer = rasterizer_type(
+            cameras=perspective_cameras, raster_settings=raster_settings
+        )
+        fisheye_rasterizer = rasterizer_type(
+            cameras=fisheye_cameras, raster_settings=raster_settings
+        )
+
+        ####################################################################################
+        # Test rasterizing a single mesh comparing fisheye camera against perspective camera
+        ####################################################################################
+
+        perspective_fragments = perspective_rasterizer(sphere_mesh)
+        perspective_image = perspective_fragments.pix_to_face[0, ..., 0].squeeze().cpu()
+        # Convert pix_to_face to a binary mask
+        perspective_image[perspective_image >= 0] = 1.0
+        perspective_image[perspective_image < 0] = 0.0
+
+        if DEBUG:
+            Image.fromarray((perspective_image.numpy() * 255).astype(np.uint8)).save(
+                DATA_DIR
+                / f"DEBUG_test_perspective_rasterized_sphere_{rasterizer_type.__name__}.png"
+            )
+
+        fisheye_fragments = fisheye_rasterizer(sphere_mesh)
+        fisheye_image = fisheye_fragments.pix_to_face[0, ..., 0].squeeze().cpu()
+        # Convert pix_to_face to a binary mask
+        fisheye_image[fisheye_image >= 0] = 1.0
+        fisheye_image[fisheye_image < 0] = 0.0
+
+        if DEBUG:
+            Image.fromarray((fisheye_image.numpy() * 255).astype(np.uint8)).save(
+                DATA_DIR
+                / f"DEBUG_test_fisheye_rasterized_sphere_{rasterizer_type.__name__}.png"
+            )
+
+        self.assertTrue(torch.allclose(fisheye_image, perspective_image))
+
+        ##################################
+        #  2. Test with a batch of meshes
+        ##################################
+
+        batch_size = 10
+        sphere_meshes = sphere_mesh.extend(batch_size)
+        fragments = fisheye_rasterizer(sphere_meshes)
+        for i in range(batch_size):
+            image = fragments.pix_to_face[i, ..., 0].squeeze().cpu()
+            image[image >= 0] = 1.0
+            image[image < 0] = 0.0
+            self.assertTrue(torch.allclose(image, perspective_image))
+
+    def test_simple_to(self):
+        # Check that to() works without a cameras object.
+        device = torch.device("cuda:0")
+        rasterizer = MeshRasterizer()
+        rasterizer.to(device)
+
+        rasterizer = MeshRasterizerOpenGL()
+        rasterizer.to(device)
+
+    def test_compare_rasterizers(self):
+        device = torch.device("cuda:0")
+
+        # Init rasterizer settings
+        R, T = look_at_view_transform(2.7, 0, 0)
+        cameras = FoVPerspectiveCameras(device=device, R=R, T=T)
+        raster_settings = RasterizationSettings(
+            image_size=512,
+            blur_radius=0.0,
+            faces_per_pixel=1,
+            bin_size=0,
+            perspective_correct=True,
+        )
+        from pytorch3d.io import load_obj
+        from pytorch3d.renderer import TexturesAtlas
+
+        from .common_testing import get_pytorch3d_dir
+
+        TUTORIAL_DATA_DIR = get_pytorch3d_dir() / "docs/tutorials/data"
+        obj_filename = TUTORIAL_DATA_DIR / "cow_mesh/cow.obj"
+
+        # Load mesh and texture as a per face texture atlas.
+        verts, faces, aux = load_obj(
+            obj_filename,
+            device=device,
+            load_textures=True,
+            create_texture_atlas=True,
+            texture_atlas_size=8,
+            texture_wrap=None,
+        )
+        atlas = aux.texture_atlas
+        mesh = Meshes(
+            verts=[verts],
+            faces=[faces.verts_idx],
+            textures=TexturesAtlas(atlas=[atlas]),
+        )
+
+        # Rasterize using both rasterizers and compare results.
+        rasterizer = MeshRasterizerOpenGL(
+            cameras=cameras, raster_settings=raster_settings
+        )
+        fragments_opengl = rasterizer(mesh)
+
+        rasterizer = MeshRasterizer(cameras=cameras, raster_settings=raster_settings)
+        fragments = rasterizer(mesh)
+
+        # Ensure that 99.9% of bary_coords is at most 0.001 different.
+        self.assertLess(
+            torch.quantile(
+                (fragments.bary_coords - fragments_opengl.bary_coords).abs(), 0.999
+            ),
+            0.001,
+        )
+
+        # Ensure that 99.9% of zbuf vals is at most 0.001 different.
+        self.assertLess(
+            torch.quantile((fragments.zbuf - fragments_opengl.zbuf).abs(), 0.999), 0.001
+        )
+
+        # Ensure that 99.99% of pix_to_face is identical.
+        self.assertEqual(
+            torch.quantile(
+                (fragments.pix_to_face != fragments_opengl.pix_to_face).float(), 0.9999
+            ),
+            0,
+        )
+
+
+class TestMeshRasterizerOpenGLUtils(TestCaseMixin, unittest.TestCase):
+    def setUp(self):
+        verts = torch.tensor(
+            [[-1, 1, 0], [1, 1, 0], [1, -1, 0]], dtype=torch.float32
+        ).cuda()
+        faces = torch.tensor([[0, 1, 2]]).cuda()
+        self.meshes_world = Meshes(verts=[verts], faces=[faces])
+
+    # Test various utils specific to the OpenGL rasterizer. Full "integration tests"
+    # live in test_render_meshes and test_render_multigpu.
+    def test_check_cameras(self):
+        _check_cameras(FoVPerspectiveCameras())
+        _check_cameras(FoVPerspectiveCameras())
+        with self.assertRaisesRegex(ValueError, "Cameras must be specified"):
+            _check_cameras(None)
+        with self.assertRaisesRegex(ValueError, "MeshRasterizerOpenGL only works with"):
+            _check_cameras(PerspectiveCameras())
+        with self.assertRaisesRegex(ValueError, "MeshRasterizerOpenGL only works with"):
+            _check_cameras(OrthographicCameras())
+
+        MeshRasterizerOpenGL(FoVPerspectiveCameras().cuda())(self.meshes_world)
+        MeshRasterizerOpenGL(FoVOrthographicCameras().cuda())(self.meshes_world)
+        MeshRasterizerOpenGL()(
+            self.meshes_world, cameras=FoVPerspectiveCameras().cuda()
+        )
+
+        with self.assertRaisesRegex(ValueError, "MeshRasterizerOpenGL only works with"):
+            MeshRasterizerOpenGL(PerspectiveCameras().cuda())(self.meshes_world)
+        with self.assertRaisesRegex(ValueError, "MeshRasterizerOpenGL only works with"):
+            MeshRasterizerOpenGL(OrthographicCameras().cuda())(self.meshes_world)
+        with self.assertRaisesRegex(ValueError, "Cameras must be specified"):
+            MeshRasterizerOpenGL()(self.meshes_world)
+
+    def test_check_raster_settings(self):
+        raster_settings = RasterizationSettings()
+        raster_settings.faces_per_pixel = 100
+        with self.assertWarnsRegex(UserWarning, ".* one face per pixel"):
+            _check_raster_settings(raster_settings)
+
+        with self.assertWarnsRegex(UserWarning, ".* one face per pixel"):
+            MeshRasterizerOpenGL(raster_settings=raster_settings)(
+                self.meshes_world, cameras=FoVPerspectiveCameras().cuda()
+            )
+
+    def test_convert_meshes_to_gl_ndc_square_img(self):
+        R, T = look_at_view_transform(1, 90, 180)
+        cameras = FoVOrthographicCameras(R=R, T=T).cuda()
+
+        meshes_gl_ndc = _convert_meshes_to_gl_ndc(
+            self.meshes_world, (100, 100), cameras
+        )
+
+        # After look_at_view_transform rotating 180 deg around z-axis, we recover
+        # the original coordinates. After additionally elevating the view by 90
+        # deg, we "zero out" the y-coordinate. Finally, we negate the x and y axes
+        # to adhere to OpenGL conventions (which go against the PyTorch3D convention).
+        self.assertClose(
+            meshes_gl_ndc.verts_list()[0],
+            torch.tensor(
+                [[-1, 0, 0], [1, 0, 0], [1, 0, 2]], dtype=torch.float32
+            ).cuda(),
+            atol=1e-5,
+        )
+
+    def test_parse_and_verify_image_size(self):
+        img_size = _parse_and_verify_image_size(512)
+        self.assertEqual(img_size, (512, 512))
+
+        img_size = _parse_and_verify_image_size((2047, 10))
+        self.assertEqual(img_size, (2047, 10))
+
+        img_size = _parse_and_verify_image_size((10, 2047))
+        self.assertEqual(img_size, (10, 2047))
+
+        with self.assertRaisesRegex(ValueError, "Max rasterization size is"):
+            _parse_and_verify_image_size((2049, 512))
+
+        with self.assertRaisesRegex(ValueError, "Max rasterization size is"):
+            _parse_and_verify_image_size((512, 2049))
+
+        with self.assertRaisesRegex(ValueError, "Max rasterization size is"):
+            _parse_and_verify_image_size((2049, 2049))
+
+        rasterizer = MeshRasterizerOpenGL(FoVPerspectiveCameras().cuda())
+        raster_settings = RasterizationSettings()
+
+        raster_settings.image_size = 512
+        fragments = rasterizer(self.meshes_world, raster_settings=raster_settings)
+        self.assertEqual(fragments.pix_to_face.shape, torch.Size([1, 512, 512, 1]))
+
+        raster_settings.image_size = (2047, 10)
+        fragments = rasterizer(self.meshes_world, raster_settings=raster_settings)
+        self.assertEqual(fragments.pix_to_face.shape, torch.Size([1, 2047, 10, 1]))
+
+        raster_settings.image_size = (10, 2047)
+        fragments = rasterizer(self.meshes_world, raster_settings=raster_settings)
+        self.assertEqual(fragments.pix_to_face.shape, torch.Size([1, 10, 2047, 1]))
+
+        raster_settings.image_size = (2049, 512)
+        with self.assertRaisesRegex(ValueError, "Max rasterization size is"):
+            rasterizer(self.meshes_world, raster_settings=raster_settings)
+
+        raster_settings.image_size = (512, 2049)
+        with self.assertRaisesRegex(ValueError, "Max rasterization size is"):
+            rasterizer(self.meshes_world, raster_settings=raster_settings)
+
+        raster_settings.image_size = (2049, 2049)
+        with self.assertRaisesRegex(ValueError, "Max rasterization size is"):
+            rasterizer(self.meshes_world, raster_settings=raster_settings)
+
+
+class TestPointRasterizer(unittest.TestCase):
+    def test_simple_sphere(self):
+        device = torch.device("cuda:0")
+
+        # Load reference image
+        ref_filename = "test_simple_pointcloud_sphere.png"
+        image_ref_filename = DATA_DIR / ref_filename
+
+        # Rescale image_ref to the 0 - 1 range and convert to a binary mask.
+        image_ref = convert_image_to_binary_mask(image_ref_filename).to(torch.int32)
+
+        sphere_mesh = ico_sphere(1, device)
+        verts_padded = sphere_mesh.verts_padded()
+        verts_padded[..., 1] += 0.2
+        verts_padded[..., 0] += 0.2
+        pointclouds = Pointclouds(points=verts_padded)
+        R, T = look_at_view_transform(2.7, 0.0, 0.0)
+        cameras = FoVPerspectiveCameras(device=device, R=R, T=T)
+        raster_settings = PointsRasterizationSettings(
+            image_size=256, radius=5e-2, points_per_pixel=1
+        )
+
+        #################################
+        #  1. Test init without cameras.
+        ##################################
+
+        # Initialize without passing in the cameras
+        rasterizer = PointsRasterizer()
+
+        # Check that omitting the cameras in both initialization
+        # and the forward pass throws an error:
+        with self.assertRaisesRegex(ValueError, "Cameras must be specified"):
+            rasterizer(pointclouds)
+
+        ##########################################
+        # 2. Test rasterizing a single pointcloud
+        ##########################################
+
+        fragments = rasterizer(
+            pointclouds, cameras=cameras, raster_settings=raster_settings
+        )
+
+        # Convert idx to a binary mask
+        image = fragments.idx[0, ..., 0].squeeze().cpu()
+        image[image >= 0] = 1.0
+        image[image < 0] = 0.0
+
+        if DEBUG:
+            Image.fromarray((image.numpy() * 255).astype(np.uint8)).save(
+                DATA_DIR / "DEBUG_test_rasterized_sphere_points.png"
+            )
+
+        self.assertTrue(torch.allclose(image, image_ref[..., 0]))
+
+        ########################################
+        #  3. Test with a batch of pointclouds
+        ########################################
+
+        batch_size = 10
+        pointclouds = pointclouds.extend(batch_size)
+        fragments = rasterizer(
+            pointclouds, cameras=cameras, raster_settings=raster_settings
+        )
+        for i in range(batch_size):
+            image = fragments.idx[i, ..., 0].squeeze().cpu()
+            image[image >= 0] = 1.0
+            image[image < 0] = 0.0
+            self.assertTrue(torch.allclose(image, image_ref[..., 0]))
+
+    def test_simple_sphere_fisheye_against_perspective(self):
+        device = torch.device("cuda:0")
+
+        # Rescale image_ref to the 0 - 1 range and convert to a binary mask.
+        sphere_mesh = ico_sphere(1, device)
+        verts_padded = sphere_mesh.verts_padded()
+        verts_padded[..., 1] += 0.2
+        verts_padded[..., 0] += 0.2
+        pointclouds = Pointclouds(points=verts_padded)
+        R, T = look_at_view_transform(2.7, 0.0, 0.0)
+        perspective_cameras = PerspectiveCameras(
+            R=R,
+            T=T,
+            device=device,
+        )
+        fisheye_cameras = FishEyeCameras(
+            device=device,
+            R=R,
+            T=T,
+            world_coordinates=True,
+            use_radial=False,
+            use_tangential=False,
+            use_thin_prism=False,
+        )
+        raster_settings = PointsRasterizationSettings(
+            image_size=256, radius=5e-2, points_per_pixel=1
+        )
+
+        #################################
+        #  1. Test init without cameras.
+        ##################################
+
+        # Initialize without passing in the cameras
+        rasterizer = PointsRasterizer()
+
+        # Check that omitting the cameras in both initialization
+        # and the forward pass throws an error:
+        with self.assertRaisesRegex(ValueError, "Cameras must be specified"):
+            rasterizer(pointclouds)
+
+        ########################################################################################
+        # 2. Test rasterizing a single pointcloud with fisheye camera agasint perspective camera
+        ########################################################################################
+
+        perspective_fragments = rasterizer(
+            pointclouds, cameras=perspective_cameras, raster_settings=raster_settings
+        )
+        fisheye_fragments = rasterizer(
+            pointclouds, cameras=fisheye_cameras, raster_settings=raster_settings
+        )
+
+        # Convert idx to a binary mask
+        perspective_image = perspective_fragments.idx[0, ..., 0].squeeze().cpu()
+        perspective_image[perspective_image >= 0] = 1.0
+        perspective_image[perspective_image < 0] = 0.0
+
+        fisheye_image = fisheye_fragments.idx[0, ..., 0].squeeze().cpu()
+        fisheye_image[fisheye_image >= 0] = 1.0
+        fisheye_image[fisheye_image < 0] = 0.0
+
+        if DEBUG:
+            Image.fromarray((perspective_image.numpy() * 255).astype(np.uint8)).save(
+                DATA_DIR / "DEBUG_test_rasterized_perspective_sphere_points.png"
+            )
+            Image.fromarray((fisheye_image.numpy() * 255).astype(np.uint8)).save(
+                DATA_DIR / "DEBUG_test_rasterized_fisheye_sphere_points.png"
+            )
+
+        self.assertTrue(torch.allclose(fisheye_image, perspective_image))
+
+    def test_simple_to(self):
+        # Check that to() works without a cameras object.
+        device = torch.device("cuda:0")
+        rasterizer = PointsRasterizer()
+        rasterizer.to(device)
diff --git a/pytorch3d/tests/test_raymarching.py b/pytorch3d/tests/test_raymarching.py
new file mode 100644
index 0000000000000000000000000000000000000000..152b52fc57edf5dba39d4ec179a94e46b24b6430
--- /dev/null
+++ b/pytorch3d/tests/test_raymarching.py
@@ -0,0 +1,201 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+
+import torch
+from pytorch3d.renderer import AbsorptionOnlyRaymarcher, EmissionAbsorptionRaymarcher
+
+from .common_testing import TestCaseMixin
+
+
+class TestRaymarching(TestCaseMixin, unittest.TestCase):
+    def setUp(self) -> None:
+        torch.manual_seed(42)
+
+    @staticmethod
+    def _init_random_rays(
+        n_rays=10, n_pts_per_ray=9, device="cuda", dtype=torch.float32
+    ):
+        """
+        Generate a batch of ray points with features, densities, and z-coordinates
+        such that their EmissionAbsorption renderring results in
+        feature renders `features_gt`, depth renders `depths_gt`,
+        and opacity renders `opacities_gt`.
+        """
+
+        # generate trivial ray z-coordinates of sampled points coinciding with
+        # each point's order along a ray.
+        rays_z = torch.arange(n_pts_per_ray, dtype=dtype, device=device)[None].repeat(
+            n_rays, 1
+        )
+
+        # generate ground truth depth values of the underlying surface.
+        depths_gt = torch.randint(
+            low=1, high=n_pts_per_ray + 2, size=(n_rays,)
+        ).type_as(rays_z)
+
+        # compute ideal densities that are 0 before the surface and 1 after
+        # the corresponding ground truth depth value
+        rays_densities = (rays_z >= depths_gt[..., None]).type_as(rays_z)[..., None]
+        opacities_gt = (depths_gt < n_pts_per_ray).type_as(rays_z)
+
+        # generate random per-ray features
+        rays_features = torch.rand(
+            (n_rays, n_pts_per_ray, 3), device=rays_z.device, dtype=rays_z.dtype
+        )
+
+        # infer the expected feature render "features_gt"
+        gt_surface = ((rays_z - depths_gt[..., None]).abs() <= 1e-4).type_as(rays_z)
+        features_gt = (rays_features * gt_surface[..., None]).sum(dim=-2)
+
+        return (
+            rays_z,
+            rays_densities,
+            rays_features,
+            depths_gt,
+            features_gt,
+            opacities_gt,
+        )
+
+    @staticmethod
+    def raymarcher(
+        raymarcher_type=EmissionAbsorptionRaymarcher, n_rays=10, n_pts_per_ray=10
+    ):
+        (
+            rays_z,
+            rays_densities,
+            rays_features,
+            depths_gt,
+            features_gt,
+            opacities_gt,
+        ) = TestRaymarching._init_random_rays(
+            n_rays=n_rays, n_pts_per_ray=n_pts_per_ray
+        )
+
+        raymarcher = raymarcher_type()
+
+        def run_raymarcher():
+            raymarcher(
+                rays_densities=rays_densities,
+                rays_features=rays_features,
+                rays_z=rays_z,
+            )
+            torch.cuda.synchronize()
+
+        return run_raymarcher
+
+    def test_emission_absorption_inputs(self):
+        """
+        Test the checks of validity of the inputs to `EmissionAbsorptionRaymarcher`.
+        """
+
+        # init the EA raymarcher
+        raymarcher_ea = EmissionAbsorptionRaymarcher()
+
+        # bad ways of passing densities and features
+        # [rays_densities, rays_features, rays_z]
+        bad_inputs = [
+            [torch.rand(10, 5, 4), None],
+            [torch.Tensor(3)[0], torch.rand(10, 5, 4)],
+            [1.0, torch.rand(10, 5, 4)],
+            [torch.rand(10, 5, 4), 1.0],
+            [torch.rand(10, 5, 4), None],
+            [torch.rand(10, 5, 4), torch.rand(10, 5, 4)],
+            [torch.rand(10, 5, 4), torch.rand(10, 5, 4, 3)],
+            [torch.rand(10, 5, 4, 3), torch.rand(10, 5, 4, 3)],
+        ]
+
+        for bad_input in bad_inputs:
+            with self.assertRaises(ValueError):
+                raymarcher_ea(*bad_input)
+
+    def test_absorption_only_inputs(self):
+        """
+        Test the checks of validity of the inputs to `AbsorptionOnlyRaymarcher`.
+        """
+
+        # init the AO raymarcher
+        raymarcher_ao = AbsorptionOnlyRaymarcher()
+
+        # bad ways of passing densities and features
+        # [rays_densities, rays_features, rays_z]
+        bad_inputs = [[torch.Tensor(3)[0]]]
+
+        for bad_input in bad_inputs:
+            with self.assertRaises(ValueError):
+                raymarcher_ao(*bad_input)
+
+    def test_emission_absorption(self):
+        """
+        Test the EA raymarching algorithm.
+        """
+        (
+            rays_z,
+            rays_densities,
+            rays_features,
+            depths_gt,
+            features_gt,
+            opacities_gt,
+        ) = TestRaymarching._init_random_rays(
+            n_rays=1000, n_pts_per_ray=9, device=None, dtype=torch.float32
+        )
+
+        # init the EA raymarcher
+        raymarcher_ea = EmissionAbsorptionRaymarcher()
+
+        # allow gradients for a differentiability check
+        rays_densities.requires_grad = True
+        rays_features.requires_grad = True
+
+        # render the features first and check with gt
+        data_render = raymarcher_ea(rays_densities, rays_features)
+        features_render, opacities_render = data_render[..., :-1], data_render[..., -1]
+        self.assertClose(opacities_render, opacities_gt)
+        self.assertClose(
+            features_render * opacities_render[..., None],
+            features_gt * opacities_gt[..., None],
+        )
+
+        # get the depth map by rendering the ray z components and check with gt
+        depths_render = raymarcher_ea(rays_densities, rays_z[..., None])[..., 0]
+        self.assertClose(depths_render * opacities_render, depths_gt * opacities_gt)
+
+        # check differentiability
+        loss = features_render.mean()
+        loss.backward()
+        for field in (rays_densities, rays_features):
+            self.assertTrue(torch.isfinite(field.grad.data).all())
+
+    def test_absorption_only(self):
+        """
+        Test the AO raymarching algorithm.
+        """
+        (
+            rays_z,
+            rays_densities,
+            rays_features,
+            depths_gt,
+            features_gt,
+            opacities_gt,
+        ) = TestRaymarching._init_random_rays(
+            n_rays=1000, n_pts_per_ray=9, device=None, dtype=torch.float32
+        )
+
+        # init the AO raymarcher
+        raymarcher_ao = AbsorptionOnlyRaymarcher()
+
+        # allow gradients for a differentiability check
+        rays_densities.requires_grad = True
+
+        # render opacities, check with gt and check that returned features are None
+        opacities_render = raymarcher_ao(rays_densities)[..., 0]
+        self.assertClose(opacities_render, opacities_gt)
+
+        # check differentiability
+        loss = opacities_render.mean()
+        loss.backward()
+        self.assertTrue(torch.isfinite(rays_densities.grad.data).all())
diff --git a/pytorch3d/tests/test_raysampling.py b/pytorch3d/tests/test_raysampling.py
new file mode 100644
index 0000000000000000000000000000000000000000..d05041c0d70e33692555fe21b7b96e9927a609fc
--- /dev/null
+++ b/pytorch3d/tests/test_raysampling.py
@@ -0,0 +1,642 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+from typing import Callable
+
+import torch
+from pytorch3d.common.compat import meshgrid_ij
+from pytorch3d.ops import eyes
+from pytorch3d.renderer import (
+    MonteCarloRaysampler,
+    MultinomialRaysampler,
+    NDCGridRaysampler,
+    NDCMultinomialRaysampler,
+)
+from pytorch3d.renderer.cameras import (
+    FoVOrthographicCameras,
+    FoVPerspectiveCameras,
+    OrthographicCameras,
+    PerspectiveCameras,
+)
+from pytorch3d.renderer.implicit.raysampling import (
+    _jiggle_within_stratas,
+    _safe_multinomial,
+)
+from pytorch3d.renderer.implicit.utils import (
+    ray_bundle_to_ray_points,
+    ray_bundle_variables_to_ray_points,
+)
+from pytorch3d.transforms import Rotate
+
+from .common_testing import TestCaseMixin
+from .test_cameras import init_random_cameras
+
+
+class TestNDCRaysamplerConvention(TestCaseMixin, unittest.TestCase):
+    def setUp(self) -> None:
+        torch.manual_seed(42)
+
+    def test_ndc_convention(
+        self,
+        h=428,
+        w=760,
+    ):
+        device = torch.device("cuda")
+
+        camera = init_random_cameras(PerspectiveCameras, 1, random_z=True).to(device)
+
+        depth_map = torch.ones((1, 1, h, w)).to(device)
+
+        xyz = ray_bundle_to_ray_points(
+            NDCGridRaysampler(
+                image_width=w,
+                image_height=h,
+                n_pts_per_ray=1,
+                min_depth=1.0,
+                max_depth=1.0,
+            )(camera)._replace(lengths=depth_map[:, 0, ..., None])
+        ).view(1, -1, 3)
+
+        # project pointcloud
+        xy = camera.transform_points(xyz)[:, :, :2].squeeze()
+
+        xy_grid = self._get_ndc_grid(h, w, device)
+
+        self.assertClose(
+            xy,
+            xy_grid,
+            atol=1e-4,
+        )
+
+    def _get_ndc_grid(self, h, w, device):
+        if w >= h:
+            range_x = w / h
+            range_y = 1.0
+        else:
+            range_x = 1.0
+            range_y = h / w
+
+        half_pix_width = range_x / w
+        half_pix_height = range_y / h
+
+        min_x = range_x - half_pix_width
+        max_x = -range_x + half_pix_width
+        min_y = range_y - half_pix_height
+        max_y = -range_y + half_pix_height
+
+        y_grid, x_grid = meshgrid_ij(
+            torch.linspace(min_y, max_y, h, dtype=torch.float32),
+            torch.linspace(min_x, max_x, w, dtype=torch.float32),
+        )
+
+        x_points = x_grid.contiguous().view(-1).to(device)
+        y_points = y_grid.contiguous().view(-1).to(device)
+        xy = torch.stack((x_points, y_points), dim=1)
+        return xy
+
+
+class TestRaysampling(TestCaseMixin, unittest.TestCase):
+    def setUp(self) -> None:
+        torch.manual_seed(42)
+
+    @staticmethod
+    def raysampler(
+        raysampler_type,
+        camera_type,
+        n_pts_per_ray: int,
+        batch_size: int,
+        image_width: int,
+        image_height: int,
+    ) -> Callable[[], None]:
+        """
+        Used for benchmarks.
+        """
+        device = torch.device("cuda")
+
+        # init raysamplers
+        raysampler = TestRaysampling.init_raysampler(
+            raysampler_type=raysampler_type,
+            min_x=-1.0,
+            max_x=1.0,
+            min_y=-1.0,
+            max_y=1.0,
+            image_width=image_width,
+            image_height=image_height,
+            min_depth=1.0,
+            max_depth=10.0,
+            n_pts_per_ray=n_pts_per_ray,
+        ).to(device)
+
+        # init a batch of random cameras
+        cameras = init_random_cameras(camera_type, batch_size, random_z=True).to(device)
+
+        def run_raysampler() -> None:
+            raysampler(cameras=cameras)
+            torch.cuda.synchronize()
+
+        return run_raysampler
+
+    @staticmethod
+    def init_raysampler(
+        raysampler_type,
+        min_x=-1.0,
+        max_x=1.0,
+        min_y=-1.0,
+        max_y=1.0,
+        image_width=10,
+        image_height=20,
+        min_depth=1.0,
+        max_depth=10.0,
+        n_pts_per_ray=10,
+        n_rays_total=None,
+        n_rays_per_image=None,
+    ):
+        raysampler_params = {
+            "min_x": min_x,
+            "max_x": max_x,
+            "min_y": min_y,
+            "max_y": max_y,
+            "n_pts_per_ray": n_pts_per_ray,
+            "min_depth": min_depth,
+            "max_depth": max_depth,
+            "n_rays_total": n_rays_total,
+            "n_rays_per_image": n_rays_per_image,
+        }
+
+        if issubclass(raysampler_type, MultinomialRaysampler):
+            raysampler_params.update(
+                {"image_width": image_width, "image_height": image_height}
+            )
+        elif issubclass(raysampler_type, MonteCarloRaysampler):
+            raysampler_params["n_rays_per_image"] = (
+                image_width * image_height
+                if (n_rays_total is None) and (n_rays_per_image is None)
+                else n_rays_per_image
+            )
+        else:
+            raise ValueError(str(raysampler_type))
+
+        if issubclass(raysampler_type, NDCMultinomialRaysampler):
+            # NDCGridRaysampler does not use min/max_x/y
+            for k in ("min_x", "max_x", "min_y", "max_y"):
+                del raysampler_params[k]
+
+        # instantiate the raysampler
+        raysampler = raysampler_type(**raysampler_params)
+
+        return raysampler
+
+    def test_raysamplers(
+        self,
+        batch_size=25,
+        min_x=-1.0,
+        max_x=1.0,
+        min_y=-1.0,
+        max_y=1.0,
+        image_width=10,
+        image_height=20,
+        min_depth=1.0,
+        max_depth=10.0,
+    ):
+        """
+        Tests the shapes and outputs of MC and GridRaysamplers for randomly
+        generated cameras and different numbers of points per ray.
+        """
+
+        device = torch.device("cuda")
+
+        for n_pts_per_ray in (100, 1):
+
+            for raysampler_type in (
+                MonteCarloRaysampler,
+                MultinomialRaysampler,
+                NDCMultinomialRaysampler,
+            ):
+
+                raysampler = TestRaysampling.init_raysampler(
+                    raysampler_type=raysampler_type,
+                    min_x=min_x,
+                    max_x=max_x,
+                    min_y=min_y,
+                    max_y=max_y,
+                    image_width=image_width,
+                    image_height=image_height,
+                    min_depth=min_depth,
+                    max_depth=max_depth,
+                    n_pts_per_ray=n_pts_per_ray,
+                )
+
+                if issubclass(raysampler_type, NDCMultinomialRaysampler):
+                    # adjust the gt bounds for NDCGridRaysampler
+                    if image_width >= image_height:
+                        range_x = image_width / image_height
+                        range_y = 1.0
+                    else:
+                        range_x = 1.0
+                        range_y = image_height / image_width
+
+                    half_pix_width = range_x / image_width
+                    half_pix_height = range_y / image_height
+                    min_x_ = range_x - half_pix_width
+                    max_x_ = -range_x + half_pix_width
+                    min_y_ = range_y - half_pix_height
+                    max_y_ = -range_y + half_pix_height
+                else:
+                    min_x_ = min_x
+                    max_x_ = max_x
+                    min_y_ = min_y
+                    max_y_ = max_y
+
+                # carry out the test over several camera types
+                for cam_type in (
+                    FoVPerspectiveCameras,
+                    FoVOrthographicCameras,
+                    OrthographicCameras,
+                    PerspectiveCameras,
+                ):
+
+                    # init a batch of random cameras
+                    cameras = init_random_cameras(
+                        cam_type, batch_size, random_z=True
+                    ).to(device)
+
+                    # call the raysampler
+                    ray_bundle = raysampler(cameras=cameras)
+
+                    # check the shapes of the raysampler outputs
+                    self._check_raysampler_output_shapes(
+                        raysampler,
+                        ray_bundle,
+                        batch_size,
+                        image_width,
+                        image_height,
+                        n_pts_per_ray,
+                    )
+
+                    # check the points sampled along each ray
+                    self._check_raysampler_ray_points(
+                        raysampler,
+                        cameras,
+                        ray_bundle,
+                        min_x_,
+                        max_x_,
+                        min_y_,
+                        max_y_,
+                        image_width,
+                        image_height,
+                        min_depth,
+                        max_depth,
+                    )
+
+                    # check the output direction vectors
+                    self._check_raysampler_ray_directions(
+                        cameras, raysampler, ray_bundle
+                    )
+
+    def _check_grid_shape(self, grid, batch_size, spatial_size, n_pts_per_ray, dim):
+        """
+        A helper for checking the desired size of a variable output by a raysampler.
+        """
+        tgt_shape = [
+            x for x in [batch_size, *spatial_size, n_pts_per_ray, dim] if x > 0
+        ]
+        self.assertTrue(all(sz1 == sz2 for sz1, sz2 in zip(grid.shape, tgt_shape)))
+
+    def _check_raysampler_output_shapes(
+        self,
+        raysampler,
+        ray_bundle,
+        batch_size,
+        image_width,
+        image_height,
+        n_pts_per_ray,
+    ):
+        """
+        Checks the shapes of raysampler outputs.
+        """
+
+        if isinstance(raysampler, MultinomialRaysampler):
+            spatial_size = [image_height, image_width]
+        elif isinstance(raysampler, MonteCarloRaysampler):
+            spatial_size = [image_height * image_width]
+        else:
+            raise ValueError(str(type(raysampler)))
+
+        self._check_grid_shape(ray_bundle.xys, batch_size, spatial_size, 0, 2)
+        self._check_grid_shape(ray_bundle.origins, batch_size, spatial_size, 0, 3)
+        self._check_grid_shape(ray_bundle.directions, batch_size, spatial_size, 0, 3)
+        self._check_grid_shape(
+            ray_bundle.lengths, batch_size, spatial_size, n_pts_per_ray, 0
+        )
+
+    def _check_raysampler_ray_points(
+        self,
+        raysampler,
+        cameras,
+        ray_bundle,
+        min_x,
+        max_x,
+        min_y,
+        max_y,
+        image_width,
+        image_height,
+        min_depth,
+        max_depth,
+    ):
+        """
+        Check rays_points_world and rays_zs outputs of raysamplers.
+        """
+
+        batch_size = cameras.R.shape[0]
+
+        # convert to ray points
+        rays_points_world = ray_bundle_variables_to_ray_points(
+            ray_bundle.origins, ray_bundle.directions, ray_bundle.lengths
+        )
+        n_pts_per_ray = rays_points_world.shape[-2]
+
+        # check that the outputs if ray_bundle_variables_to_ray_points and
+        # ray_bundle_to_ray_points match
+        rays_points_world_ = ray_bundle_to_ray_points(ray_bundle)
+        self.assertClose(rays_points_world, rays_points_world_)
+
+        # check that the depth of each ray point in camera coords
+        # matches the expected linearly-spaced depth
+        depth_expected = torch.linspace(
+            min_depth,
+            max_depth,
+            n_pts_per_ray,
+            dtype=torch.float32,
+            device=rays_points_world.device,
+        )
+        ray_points_camera = (
+            cameras.get_world_to_view_transform()
+            .transform_points(rays_points_world.view(batch_size, -1, 3))
+            .view(batch_size, -1, n_pts_per_ray, 3)
+        )
+        self.assertClose(
+            ray_points_camera[..., 2],
+            depth_expected[None, None, :].expand_as(ray_points_camera[..., 2]),
+            atol=1e-4,
+        )
+
+        # check also that rays_zs is consistent with depth_expected
+        self.assertClose(
+            ray_bundle.lengths.view(batch_size, -1, n_pts_per_ray),
+            depth_expected[None, None, :].expand_as(ray_points_camera[..., 2]),
+            atol=1e-6,
+        )
+
+        # project the world ray points back to screen space
+        ray_points_projected = cameras.transform_points(
+            rays_points_world.view(batch_size, -1, 3)
+        ).view(rays_points_world.shape)
+
+        # check that ray_xy matches rays_points_projected xy
+        rays_xy_projected = ray_points_projected[..., :2].view(
+            batch_size, -1, n_pts_per_ray, 2
+        )
+        self.assertClose(
+            ray_bundle.xys.view(batch_size, -1, 1, 2).expand_as(rays_xy_projected),
+            rays_xy_projected,
+            atol=1e-4,
+        )
+
+        # check that projected world points' xy coordinates
+        # range correctly between [minx/y, max/y]
+        if isinstance(raysampler, MultinomialRaysampler):
+            # get the expected coordinates along each grid axis
+            ys, xs = [
+                torch.linspace(
+                    low, high, sz, dtype=torch.float32, device=rays_points_world.device
+                )
+                for low, high, sz in (
+                    (min_y, max_y, image_height),
+                    (min_x, max_x, image_width),
+                )
+            ]
+            # compare expected xy with the output xy
+            for dim, gt_axis in zip(
+                (0, 1), (xs[None, None, :, None], ys[None, :, None, None])
+            ):
+                self.assertClose(
+                    ray_points_projected[..., dim],
+                    gt_axis.expand_as(ray_points_projected[..., dim]),
+                    atol=1e-4,
+                )
+
+        elif isinstance(raysampler, MonteCarloRaysampler):
+            # check that the randomly sampled locations
+            # are within the allowed bounds for both x and y axes
+            for dim, axis_bounds in zip((0, 1), ((min_x, max_x), (min_y, max_y))):
+                self.assertTrue(
+                    (
+                        (ray_points_projected[..., dim] <= axis_bounds[1])
+                        & (ray_points_projected[..., dim] >= axis_bounds[0])
+                    ).all()
+                )
+
+                # also check that x,y along each ray is constant
+                if n_pts_per_ray > 1:
+                    self.assertClose(
+                        ray_points_projected[..., :2].std(dim=-2),
+                        torch.zeros_like(ray_points_projected[..., 0, :2]),
+                        atol=1e-5,
+                    )
+
+        else:
+            raise ValueError(str(type(raysampler)))
+
+    def _check_raysampler_ray_directions(self, cameras, raysampler, ray_bundle):
+        """
+        Check the rays_directions_world output of raysamplers.
+        """
+
+        batch_size = cameras.R.shape[0]
+        n_pts_per_ray = ray_bundle.lengths.shape[-1]
+        spatial_size = ray_bundle.xys.shape[1:-1]
+        n_rays_per_image = spatial_size.numel()
+
+        # obtain the ray points in world coords
+        rays_points_world = cameras.unproject_points(
+            torch.cat(
+                (
+                    ray_bundle.xys.view(batch_size, n_rays_per_image, 1, 2).expand(
+                        batch_size, n_rays_per_image, n_pts_per_ray, 2
+                    ),
+                    ray_bundle.lengths.view(
+                        batch_size, n_rays_per_image, n_pts_per_ray, 1
+                    ),
+                ),
+                dim=-1,
+            ).view(batch_size, -1, 3)
+        ).view(batch_size, -1, n_pts_per_ray, 3)
+
+        # reshape to common testing size
+        rays_directions_world_normed = torch.nn.functional.normalize(
+            ray_bundle.directions.view(batch_size, -1, 3), dim=-1
+        )
+
+        # check that the l2-normed difference of all consecutive planes
+        # of points in world coords matches ray_directions_world
+        rays_directions_world_ = torch.nn.functional.normalize(
+            rays_points_world[:, :, -1:] - rays_points_world[:, :, :-1], dim=-1
+        )
+        self.assertClose(
+            rays_directions_world_normed[:, :, None].expand_as(rays_directions_world_),
+            rays_directions_world_,
+            atol=1e-4,
+        )
+
+        # check the ray directions rotated using camera rotation matrix
+        # match the ray directions of a camera with trivial extrinsics
+        cameras_trivial_extrinsic = cameras.clone()
+        cameras_trivial_extrinsic.R = eyes(
+            N=batch_size, dim=3, dtype=cameras.R.dtype, device=cameras.device
+        )
+        cameras_trivial_extrinsic.T = torch.zeros_like(cameras.T)
+
+        # make sure we get the same random rays in case we call the
+        # MonteCarloRaysampler twice below
+        with torch.random.fork_rng(devices=range(torch.cuda.device_count())):
+            torch.random.manual_seed(42)
+            ray_bundle_world_fix_seed = raysampler(cameras=cameras)
+            torch.random.manual_seed(42)
+            ray_bundle_camera_fix_seed = raysampler(cameras=cameras_trivial_extrinsic)
+
+        rays_directions_camera_fix_seed_ = Rotate(
+            cameras.R, device=cameras.R.device
+        ).transform_points(ray_bundle_world_fix_seed.directions.view(batch_size, -1, 3))
+
+        self.assertClose(
+            rays_directions_camera_fix_seed_,
+            ray_bundle_camera_fix_seed.directions.view(batch_size, -1, 3),
+            atol=1e-5,
+        )
+
+    @unittest.skipIf(
+        torch.__version__[:4] == "1.5.", "non persistent buffer needs PyTorch 1.6"
+    )
+    def test_load_state_different_resolution(self):
+        # check that we can load the state of one ray sampler into
+        # another with different image size.
+        module1 = NDCGridRaysampler(
+            image_width=20,
+            image_height=30,
+            n_pts_per_ray=40,
+            min_depth=1.2,
+            max_depth=2.3,
+        )
+        module2 = NDCGridRaysampler(
+            image_width=22,
+            image_height=32,
+            n_pts_per_ray=42,
+            min_depth=1.2,
+            max_depth=2.3,
+        )
+        state = module1.state_dict()
+        module2.load_state_dict(state)
+
+    def test_jiggle(self):
+        # random data which is in ascending order along the last dimension
+        scale = 180
+        data = scale * torch.cumsum(torch.rand(8, 3, 4, 20), dim=-1)
+
+        out = _jiggle_within_stratas(data)
+        self.assertTupleEqual(out.shape, data.shape)
+
+        # Check `out` is in ascending order
+        self.assertGreater((out[..., 1:] - out[..., :-1]).min(), 0)
+
+        self.assertConstant(out[..., :-1] < data[..., 1:], True)
+        self.assertConstant(data[..., :-1] < out[..., 1:], True)
+
+        jiggles = out - data
+        # jiggles is random between -scale/2 and scale/2
+        self.assertLess(jiggles.min(), -0.4 * scale)
+        self.assertGreater(jiggles.min(), -0.5 * scale)
+        self.assertGreater(jiggles.max(), 0.4 * scale)
+        self.assertLess(jiggles.max(), 0.5 * scale)
+
+    def test_safe_multinomial(self):
+        mask = [
+            [1, 0, 0, 0, 0],
+            [1, 1, 0, 0, 0],
+            [1, 1, 1, 0, 0],
+            [1, 1, 1, 1, 0],
+        ]
+        tmask = torch.tensor(mask, dtype=torch.float32)
+
+        for _ in range(5):
+            random_scalar = torch.rand(1)
+            samples = _safe_multinomial(tmask * random_scalar, 3)
+            self.assertTupleEqual(samples.shape, (4, 3))
+
+            # samples[0] is exactly determined
+            self.assertConstant(samples[0], 0)
+
+            self.assertGreaterEqual(samples[1].min(), 0)
+            self.assertLessEqual(samples[1].max(), 1)
+
+            # samples[2] is exactly determined
+            self.assertSetEqual(set(samples[2].tolist()), {0, 1, 2})
+
+            # samples[3] has enough sources, so must contain 3 distinct values.
+            self.assertLessEqual(samples[3].max(), 3)
+            self.assertEqual(len(set(samples[3].tolist())), 3)
+
+    def test_heterogeneous_sampling(self, batch_size=8):
+        """
+        Test that the output of heterogeneous sampling has the first dimension equal
+        to n_rays_total and second to 1 and that ray_bundle elements from different
+        sampled cameras are different and equal for same sampled cameras.
+        """
+        cameras = init_random_cameras(PerspectiveCameras, batch_size, random_z=True)
+        for n_rays_total in [2, 3, 17, 21, 32]:
+            for cls in (MultinomialRaysampler, MonteCarloRaysampler):
+                with self.subTest(cls.__name__ + ", n_rays_total=" + str(n_rays_total)):
+                    raysampler = self.init_raysampler(
+                        cls, n_rays_total=n_rays_total, n_rays_per_image=None
+                    )
+                    ray_bundle = raysampler(cameras)
+
+                    # test weather they are of the correct shape
+                    for attr in ("origins", "directions", "lengths", "xys"):
+                        tensor = getattr(ray_bundle, attr)
+                        assert tensor.shape[:2] == torch.Size(
+                            (n_rays_total, 1)
+                        ), tensor.shape
+
+                    # if two camera ids are same than origins should also be the same
+                    # directions and xys are always different and lengths equal
+                    for i1, (origin1, dir1, len1, id1) in enumerate(
+                        zip(
+                            ray_bundle.origins,
+                            ray_bundle.directions,
+                            ray_bundle.lengths,
+                            torch.repeat_interleave(
+                                ray_bundle.camera_ids, ray_bundle.camera_counts
+                            ),
+                        )
+                    ):
+                        for i2, (origin2, dir2, len2, id2) in enumerate(
+                            zip(
+                                ray_bundle.origins,
+                                ray_bundle.directions,
+                                ray_bundle.lengths,
+                                torch.repeat_interleave(
+                                    ray_bundle.camera_ids, ray_bundle.camera_counts
+                                ),
+                            )
+                        ):
+                            if i1 == i2:
+                                continue
+                            assert torch.allclose(
+                                origin1, origin2, rtol=1e-4, atol=1e-4
+                            ) == (id1 == id2), (origin1, origin2, id1, id2)
+                            assert not torch.allclose(dir1, dir2), (dir1, dir2)
+                            self.assertClose(len1, len2), (len1, len2)
diff --git a/pytorch3d/tests/test_render_implicit.py b/pytorch3d/tests/test_render_implicit.py
new file mode 100644
index 0000000000000000000000000000000000000000..cc797b7f55d68ba433d5839470702e13c7a5699d
--- /dev/null
+++ b/pytorch3d/tests/test_render_implicit.py
@@ -0,0 +1,417 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+
+import numpy as np
+import torch
+from pytorch3d.renderer import (
+    BlendParams,
+    EmissionAbsorptionRaymarcher,
+    ImplicitRenderer,
+    Materials,
+    MeshRasterizer,
+    MeshRenderer,
+    MonteCarloRaysampler,
+    MultinomialRaysampler,
+    NDCMultinomialRaysampler,
+    PointLights,
+    RasterizationSettings,
+    ray_bundle_to_ray_points,
+    RayBundle,
+    SoftPhongShader,
+    TexturesVertex,
+)
+from pytorch3d.structures import Meshes
+from pytorch3d.utils import ico_sphere
+
+from .common_testing import TestCaseMixin
+from .test_render_volumes import init_cameras
+
+
+DEBUG = False
+if DEBUG:
+    import os
+    import tempfile
+
+    from PIL import Image
+
+
+def spherical_volumetric_function(
+    ray_bundle: RayBundle,
+    sphere_centroid: torch.Tensor,
+    sphere_diameter: float,
+    **kwargs,
+):
+    """
+    Volumetric function of a simple RGB sphere with diameter `sphere_diameter`
+    and centroid `sphere_centroid`.
+    """
+    # convert the ray bundle to world points
+    rays_points_world = ray_bundle_to_ray_points(ray_bundle)
+    batch_size = rays_points_world.shape[0]
+
+    # surface_vectors = vectors from world coords towards the sphere centroid
+    surface_vectors = (
+        rays_points_world.view(batch_size, -1, 3) - sphere_centroid[:, None]
+    )
+
+    # the squared distance of each ray point to the centroid of the sphere
+    surface_dist = (
+        (surface_vectors**2)
+        .sum(-1, keepdim=True)
+        .view(*rays_points_world.shape[:-1], 1)
+    )
+
+    # set all ray densities within the sphere_diameter distance from the centroid to 1
+    rays_densities = torch.sigmoid(-100.0 * (surface_dist - sphere_diameter**2))
+
+    # ray colors are proportional to the normalized surface_vectors
+    rays_features = (
+        torch.nn.functional.normalize(
+            surface_vectors.view(rays_points_world.shape), dim=-1
+        )
+        * 0.5
+        + 0.5
+    )
+
+    return rays_densities, rays_features
+
+
+class TestRenderImplicit(TestCaseMixin, unittest.TestCase):
+    def setUp(self) -> None:
+        super().setUp()
+        torch.manual_seed(42)
+        np.random.seed(42)
+
+    @staticmethod
+    def renderer(
+        batch_size=10,
+        raymarcher_type=EmissionAbsorptionRaymarcher,
+        n_rays_per_image=10,
+        n_pts_per_ray=10,
+        sphere_diameter=0.75,
+    ):
+        # generate NDC camera extrinsics and intrinsics
+        cameras = init_cameras(batch_size, image_size=None, ndc=True)
+
+        # get rand offset of the volume
+        sphere_centroid = torch.randn(batch_size, 3, device=cameras.device) * 0.1
+
+        # init the mc raysampler
+        raysampler = MonteCarloRaysampler(
+            min_x=-1.0,
+            max_x=1.0,
+            min_y=-1.0,
+            max_y=1.0,
+            n_rays_per_image=n_rays_per_image,
+            n_pts_per_ray=n_pts_per_ray,
+            min_depth=0.1,
+            max_depth=2.0,
+        ).to(cameras.device)
+
+        # get the raymarcher
+        raymarcher = raymarcher_type()
+
+        # get the implicit renderer
+        renderer = ImplicitRenderer(raysampler=raysampler, raymarcher=raymarcher)
+
+        def run_renderer():
+            renderer(
+                cameras=cameras,
+                volumetric_function=spherical_volumetric_function,
+                sphere_centroid=sphere_centroid,
+                sphere_diameter=sphere_diameter,
+            )
+
+        return run_renderer
+
+    def test_input_types(self):
+        """
+        Check that ValueErrors are thrown where expected.
+        """
+        # check the constructor
+        for bad_raysampler in (None, 5, []):
+            for bad_raymarcher in (None, 5, []):
+                with self.assertRaises(ValueError):
+                    ImplicitRenderer(
+                        raysampler=bad_raysampler, raymarcher=bad_raymarcher
+                    )
+
+        # init a trivial renderer
+        renderer = ImplicitRenderer(
+            raysampler=NDCMultinomialRaysampler(
+                image_width=100,
+                image_height=100,
+                n_pts_per_ray=10,
+                min_depth=0.1,
+                max_depth=1.0,
+            ),
+            raymarcher=EmissionAbsorptionRaymarcher(),
+        )
+
+        # get default cameras
+        cameras = init_cameras()
+
+        for bad_volumetric_function in (None, 5, []):
+            with self.assertRaises(ValueError):
+                renderer(cameras=cameras, volumetric_function=bad_volumetric_function)
+
+    def test_compare_with_meshes_renderer(self):
+        self._compare_with_meshes_renderer(image_size=(200, 100))
+        self._compare_with_meshes_renderer(image_size=(100, 200))
+
+    def _compare_with_meshes_renderer(
+        self, image_size, batch_size=11, sphere_diameter=0.6
+    ):
+        """
+        Generate a spherical RGB volumetric function and its corresponding mesh
+        and check whether MeshesRenderer returns the same images as the
+        corresponding ImplicitRenderer.
+        """
+
+        # generate NDC camera extrinsics and intrinsics
+        cameras = init_cameras(batch_size, image_size=image_size, ndc=True)
+
+        # get rand offset of the volume
+        sphere_centroid = torch.randn(batch_size, 3, device=cameras.device) * 0.1
+        sphere_centroid.requires_grad = True
+
+        # init the grid raysampler with the ndc grid
+        raysampler = NDCMultinomialRaysampler(
+            image_width=image_size[1],
+            image_height=image_size[0],
+            n_pts_per_ray=256,
+            min_depth=0.1,
+            max_depth=2.0,
+        )
+
+        # get the EA raymarcher
+        raymarcher = EmissionAbsorptionRaymarcher()
+
+        # jitter the camera intrinsics a bit for each render
+        cameras_randomized = cameras.clone()
+        cameras_randomized.principal_point = (
+            torch.randn_like(cameras.principal_point) * 0.3
+        )
+        cameras_randomized.focal_length = (
+            cameras.focal_length + torch.randn_like(cameras.focal_length) * 0.2
+        )
+
+        # the list of differentiable camera vars
+        cam_vars = ("R", "T", "focal_length", "principal_point")
+        # enable the gradient caching for the camera variables
+        for cam_var in cam_vars:
+            getattr(cameras_randomized, cam_var).requires_grad = True
+
+        # get the implicit renderer
+        images_opacities = ImplicitRenderer(
+            raysampler=raysampler, raymarcher=raymarcher
+        )(
+            cameras=cameras_randomized,
+            volumetric_function=spherical_volumetric_function,
+            sphere_centroid=sphere_centroid,
+            sphere_diameter=sphere_diameter,
+        )[
+            0
+        ]
+
+        # check that the renderer does not erase gradients
+        loss = images_opacities.sum()
+        loss.backward()
+        for check_var in (
+            *[getattr(cameras_randomized, cam_var) for cam_var in cam_vars],
+            sphere_centroid,
+        ):
+            self.assertIsNotNone(check_var.grad)
+
+        # instantiate the corresponding spherical mesh
+        ico = ico_sphere(level=4, device=cameras.device).extend(batch_size)
+        verts = (
+            torch.nn.functional.normalize(ico.verts_padded(), dim=-1) * sphere_diameter
+            + sphere_centroid[:, None]
+        )
+        meshes = Meshes(
+            verts=verts,
+            faces=ico.faces_padded(),
+            textures=TexturesVertex(
+                verts_features=(
+                    torch.nn.functional.normalize(verts, dim=-1) * 0.5 + 0.5
+                )
+            ),
+        )
+
+        # instantiate the corresponding mesh renderer
+        lights = PointLights(device=cameras.device, location=[[0.0, 0.0, 0.0]])
+        renderer_textured = MeshRenderer(
+            rasterizer=MeshRasterizer(
+                cameras=cameras_randomized,
+                raster_settings=RasterizationSettings(
+                    image_size=image_size,
+                    blur_radius=1e-3,
+                    faces_per_pixel=10,
+                    z_clip_value=None,
+                    perspective_correct=False,
+                ),
+            ),
+            shader=SoftPhongShader(
+                device=cameras.device,
+                cameras=cameras_randomized,
+                lights=lights,
+                materials=Materials(
+                    ambient_color=((2.0, 2.0, 2.0),),
+                    diffuse_color=((0.0, 0.0, 0.0),),
+                    specular_color=((0.0, 0.0, 0.0),),
+                    shininess=64,
+                    device=cameras.device,
+                ),
+                blend_params=BlendParams(
+                    sigma=1e-3, gamma=1e-4, background_color=(0.0, 0.0, 0.0)
+                ),
+            ),
+        )
+
+        # get the mesh render
+        images_opacities_meshes = renderer_textured(
+            meshes, cameras=cameras_randomized, lights=lights
+        )
+
+        if DEBUG:
+            outdir = tempfile.gettempdir() + "/test_implicit_vs_mesh_renderer"
+            os.makedirs(outdir, exist_ok=True)
+
+            frames = []
+            for (image_opacity, image_opacity_mesh) in zip(
+                images_opacities, images_opacities_meshes
+            ):
+                image, opacity = image_opacity.split([3, 1], dim=-1)
+                image_mesh, opacity_mesh = image_opacity_mesh.split([3, 1], dim=-1)
+                diff_image = (
+                    ((image - image_mesh) * 0.5 + 0.5)
+                    .mean(dim=2, keepdim=True)
+                    .repeat(1, 1, 3)
+                )
+                image_pil = Image.fromarray(
+                    (
+                        torch.cat(
+                            (
+                                image,
+                                image_mesh,
+                                diff_image,
+                                opacity.repeat(1, 1, 3),
+                                opacity_mesh.repeat(1, 1, 3),
+                            ),
+                            dim=1,
+                        )
+                        .detach()
+                        .cpu()
+                        .numpy()
+                        * 255.0
+                    ).astype(np.uint8)
+                )
+                frames.append(image_pil)
+
+            # export gif
+            outfile = os.path.join(outdir, "implicit_vs_mesh_render.gif")
+            frames[0].save(
+                outfile,
+                save_all=True,
+                append_images=frames[1:],
+                duration=batch_size // 15,
+                loop=0,
+            )
+            print(f"exported {outfile}")
+
+            # export concatenated frames
+            outfile_cat = os.path.join(outdir, "implicit_vs_mesh_render.png")
+            Image.fromarray(np.concatenate([np.array(f) for f in frames], axis=0)).save(
+                outfile_cat
+            )
+            print(f"exported {outfile_cat}")
+
+        # compare the renders
+        diff = (images_opacities - images_opacities_meshes).abs().mean(dim=-1)
+        mu_diff = diff.mean(dim=(1, 2))
+        std_diff = diff.std(dim=(1, 2))
+        self.assertClose(mu_diff, torch.zeros_like(mu_diff), atol=5e-2)
+        self.assertClose(std_diff, torch.zeros_like(std_diff), atol=6e-2)
+
+    def test_rotating_gif(self):
+        self._rotating_gif(image_size=(200, 100))
+        self._rotating_gif(image_size=(100, 200))
+
+    def _rotating_gif(self, image_size, n_frames=50, fps=15, sphere_diameter=0.5):
+        """
+        Render a gif animation of a rotating sphere (runs only if `DEBUG==True`).
+        """
+
+        if not DEBUG:
+            # do not run this if debug is False
+            return
+
+        # generate camera extrinsics and intrinsics
+        cameras = init_cameras(n_frames, image_size=image_size)
+
+        # init the grid raysampler
+        raysampler = MultinomialRaysampler(
+            min_x=0.5,
+            max_x=image_size[1] - 0.5,
+            min_y=0.5,
+            max_y=image_size[0] - 0.5,
+            image_width=image_size[1],
+            image_height=image_size[0],
+            n_pts_per_ray=256,
+            min_depth=0.1,
+            max_depth=2.0,
+        )
+
+        # get the EA raymarcher
+        raymarcher = EmissionAbsorptionRaymarcher()
+
+        # get the implicit render
+        renderer = ImplicitRenderer(raysampler=raysampler, raymarcher=raymarcher)
+
+        # get the (0) centroid of the sphere
+        sphere_centroid = torch.zeros(n_frames, 3, device=cameras.device) * 0.1
+
+        # run the renderer
+        images_opacities = renderer(
+            cameras=cameras,
+            volumetric_function=spherical_volumetric_function,
+            sphere_centroid=sphere_centroid,
+            sphere_diameter=sphere_diameter,
+        )[0]
+
+        # split output to the alpha channel and rendered images
+        images, opacities = images_opacities[..., :3], images_opacities[..., 3]
+
+        # export the gif
+        outdir = tempfile.gettempdir() + "/test_implicit_renderer_gifs"
+        os.makedirs(outdir, exist_ok=True)
+        frames = []
+        for image, opacity in zip(images, opacities):
+            image_pil = Image.fromarray(
+                (
+                    torch.cat(
+                        (image, opacity[..., None].clamp(0.0, 1.0).repeat(1, 1, 3)),
+                        dim=1,
+                    )
+                    .detach()
+                    .cpu()
+                    .numpy()
+                    * 255.0
+                ).astype(np.uint8)
+            )
+            frames.append(image_pil)
+        outfile = os.path.join(outdir, "rotating_sphere.gif")
+        frames[0].save(
+            outfile,
+            save_all=True,
+            append_images=frames[1:],
+            duration=n_frames // fps,
+            loop=0,
+        )
+        print(f"exported {outfile}")
diff --git a/pytorch3d/tests/test_render_meshes.py b/pytorch3d/tests/test_render_meshes.py
new file mode 100644
index 0000000000000000000000000000000000000000..61fe46418480dd2ea3e12ad88351923404e4f503
--- /dev/null
+++ b/pytorch3d/tests/test_render_meshes.py
@@ -0,0 +1,1783 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+"""
+Sanity checks for output images from the renderer.
+"""
+import os
+import unittest
+from collections import namedtuple
+
+from itertools import product
+
+import numpy as np
+import torch
+from PIL import Image
+from pytorch3d.io import load_obj, load_objs_as_meshes
+from pytorch3d.renderer import (
+    AmbientLights,
+    FoVOrthographicCameras,
+    FoVPerspectiveCameras,
+    look_at_view_transform,
+    Materials,
+    MeshRasterizer,
+    MeshRenderer,
+    MeshRendererWithFragments,
+    OrthographicCameras,
+    PerspectiveCameras,
+    PointLights,
+    RasterizationSettings,
+    TexturesAtlas,
+    TexturesUV,
+    TexturesVertex,
+)
+from pytorch3d.renderer.fisheyecameras import FishEyeCameras
+from pytorch3d.renderer.mesh.shader import (
+    BlendParams,
+    HardFlatShader,
+    HardGouraudShader,
+    HardPhongShader,
+    SoftPhongShader,
+    SoftSilhouetteShader,
+    SplatterPhongShader,
+    TexturedSoftPhongShader,
+)
+from pytorch3d.renderer.opengl import MeshRasterizerOpenGL
+from pytorch3d.structures.meshes import (
+    join_meshes_as_batch,
+    join_meshes_as_scene,
+    Meshes,
+)
+from pytorch3d.utils.ico_sphere import ico_sphere
+from pytorch3d.utils.torus import torus
+
+from .common_testing import (
+    get_pytorch3d_dir,
+    get_tests_dir,
+    load_rgb_image,
+    TestCaseMixin,
+)
+
+# If DEBUG=True, save out images generated in the tests for debugging.
+# All saved images have prefix DEBUG_
+DEBUG = False
+DATA_DIR = get_tests_dir() / "data"
+TUTORIAL_DATA_DIR = get_pytorch3d_dir() / "docs/tutorials/data"
+
+RasterizerTest = namedtuple(
+    "RasterizerTest", ["rasterizer", "shader", "reference_name", "debug_name"]
+)
+
+
+class TestRenderMeshes(TestCaseMixin, unittest.TestCase):
+    def test_simple_sphere(self, elevated_camera=False, check_depth=False):
+        """
+        Test output of phong and gouraud shading matches a reference image using
+        the default values for the light sources.
+
+        Args:
+            elevated_camera: Defines whether the camera observing the scene should
+                           have an elevation of 45 degrees.
+        """
+        device = torch.device("cuda:0")
+
+        # Init mesh
+        sphere_mesh = ico_sphere(5, device)
+        verts_padded = sphere_mesh.verts_padded()
+        faces_padded = sphere_mesh.faces_padded()
+        feats = torch.ones_like(verts_padded, device=device)
+        textures = TexturesVertex(verts_features=feats)
+        sphere_mesh = Meshes(verts=verts_padded, faces=faces_padded, textures=textures)
+
+        # Init rasterizer settings
+        if elevated_camera:
+            # Elevated and rotated camera
+            R, T = look_at_view_transform(dist=2.7, elev=45.0, azim=45.0)
+            postfix = "_elevated_"
+            # If y axis is up, the spot of light should
+            # be on the bottom left of the sphere.
+        else:
+            # No elevation or azimuth rotation
+            R, T = look_at_view_transform(2.7, 0.0, 0.0)
+            postfix = "_"
+        for cam_type in (
+            FoVPerspectiveCameras,
+            FoVOrthographicCameras,
+            PerspectiveCameras,
+            OrthographicCameras,
+            FishEyeCameras,
+        ):
+            if cam_type == FishEyeCameras:
+                cam_kwargs = {
+                    "radial_params": torch.tensor(
+                        [
+                            [-1, -2, -3, 0, 0, 1],
+                        ],
+                        dtype=torch.float32,
+                    ),
+                    "tangential_params": torch.tensor(
+                        [[0.7002747019, -0.4005228974]], dtype=torch.float32
+                    ),
+                    "thin_prism_params": torch.tensor(
+                        [
+                            [-1.000134884, -1.000084822, -1.0009420014, -1.0001276838],
+                        ],
+                        dtype=torch.float32,
+                    ),
+                }
+                cameras = cam_type(
+                    device=device,
+                    R=R,
+                    T=T,
+                    use_tangential=True,
+                    use_radial=True,
+                    use_thin_prism=True,
+                    world_coordinates=True,
+                    **cam_kwargs,
+                )
+            else:
+                cameras = cam_type(device=device, R=R, T=T)
+
+            # Init shader settings
+            materials = Materials(device=device)
+            lights = PointLights(device=device)
+            lights.location = torch.tensor([0.0, 0.0, +2.0], device=device)[None]
+
+            raster_settings = RasterizationSettings(
+                image_size=512, blur_radius=0.0, faces_per_pixel=1
+            )
+            blend_params = BlendParams(0.5, 1e-4, (0, 0, 0))
+
+            # Test several shaders
+            rasterizer_tests = [
+                RasterizerTest(MeshRasterizer, HardPhongShader, "phong", "hard_phong"),
+                RasterizerTest(
+                    MeshRasterizer, HardGouraudShader, "gouraud", "hard_gouraud"
+                ),
+                RasterizerTest(MeshRasterizer, HardFlatShader, "flat", "hard_flat"),
+                RasterizerTest(
+                    MeshRasterizerOpenGL,
+                    SplatterPhongShader,
+                    "splatter",
+                    "splatter_phong",
+                ),
+            ]
+            for test in rasterizer_tests:
+                shader = test.shader(
+                    lights=lights,
+                    cameras=cameras,
+                    materials=materials,
+                    blend_params=blend_params,
+                )
+                if test.rasterizer == MeshRasterizer:
+                    rasterizer = test.rasterizer(
+                        cameras=cameras, raster_settings=raster_settings
+                    )
+                elif test.rasterizer == MeshRasterizerOpenGL:
+                    if type(cameras) in [
+                        PerspectiveCameras,
+                        OrthographicCameras,
+                        FishEyeCameras,
+                    ]:
+                        # MeshRasterizerOpenGL is only compatible with FoV cameras.
+                        continue
+                    rasterizer = test.rasterizer(
+                        cameras=cameras,
+                        raster_settings=raster_settings,
+                    )
+
+                if check_depth:
+                    renderer = MeshRendererWithFragments(
+                        rasterizer=rasterizer, shader=shader
+                    )
+                    images, fragments = renderer(sphere_mesh)
+                    self.assertClose(fragments.zbuf, rasterizer(sphere_mesh).zbuf)
+                    # Check the alpha channel is the mask. For soft rasterizers, the
+                    # boundary will not match exactly so we use quantiles to compare.
+                    self.assertLess(
+                        (
+                            images[..., -1]
+                            - (fragments.pix_to_face[..., 0] >= 0).float()
+                        ).quantile(0.99),
+                        0.005,
+                    )
+                else:
+                    renderer = MeshRenderer(rasterizer=rasterizer, shader=shader)
+                    images = renderer(sphere_mesh)
+
+                rgb = images[0, ..., :3].squeeze().cpu()
+                filename = "simple_sphere_light_%s%s%s.png" % (
+                    test.reference_name,
+                    postfix,
+                    cam_type.__name__,
+                )
+
+                image_ref = load_rgb_image("test_%s" % filename, DATA_DIR)
+                if DEBUG:
+                    debug_filename = "simple_sphere_light_%s%s%s.png" % (
+                        test.debug_name,
+                        postfix,
+                        cam_type.__name__,
+                    )
+                    filename = "DEBUG_%s" % debug_filename
+                    Image.fromarray((rgb.numpy() * 255).astype(np.uint8)).save(
+                        DATA_DIR / filename
+                    )
+                self.assertClose(rgb, image_ref, atol=0.05)
+
+            ########################################################
+            # Move the light to the +z axis in world space so it is
+            # behind the sphere. Note that +Z is in, +Y up,
+            # +X left for both world and camera space.
+            ########################################################
+            lights.location[..., 2] = -2.0
+            phong_shader = HardPhongShader(
+                lights=lights,
+                cameras=cameras,
+                materials=materials,
+                blend_params=blend_params,
+            )
+            if check_depth:
+                phong_renderer = MeshRendererWithFragments(
+                    rasterizer=rasterizer, shader=phong_shader
+                )
+                images, fragments = phong_renderer(sphere_mesh, lights=lights)
+                self.assertClose(
+                    fragments.zbuf, rasterizer(sphere_mesh, lights=lights).zbuf
+                )
+                # Check the alpha channel is the mask
+                self.assertLess(
+                    (
+                        images[..., -1] - (fragments.pix_to_face[..., 0] >= 0).float()
+                    ).quantile(0.99),
+                    0.005,
+                )
+            else:
+                phong_renderer = MeshRenderer(
+                    rasterizer=rasterizer, shader=phong_shader
+                )
+                images = phong_renderer(sphere_mesh, lights=lights)
+            rgb = images[0, ..., :3].squeeze().cpu()
+            if DEBUG:
+                filename = "DEBUG_simple_sphere_dark%s%s.png" % (
+                    postfix,
+                    cam_type.__name__,
+                )
+                Image.fromarray((rgb.numpy() * 255).astype(np.uint8)).save(
+                    DATA_DIR / filename
+                )
+
+            image_ref_phong_dark = load_rgb_image(
+                "test_simple_sphere_dark%s%s.png" % (postfix, cam_type.__name__),
+                DATA_DIR,
+            )
+            # Soft shaders (SplatterPhong) will have a different boundary than hard
+            # ones, but should be identical otherwise.
+            self.assertLess((rgb - image_ref_phong_dark).quantile(0.99), 0.005)
+
+    def test_simple_sphere_elevated_camera(self):
+        """
+        Test output of phong and gouraud shading matches a reference image using
+        the default values for the light sources.
+
+        The rendering is performed with a camera that has non-zero elevation.
+        """
+        self.test_simple_sphere(elevated_camera=True)
+
+    def test_simple_sphere_depth(self):
+        """
+        Test output of phong and gouraud shading matches a reference image using
+        the default values for the light sources.
+
+        The rendering is performed with a camera that has non-zero elevation.
+        """
+        self.test_simple_sphere(check_depth=True)
+
+    def test_simple_sphere_screen(self):
+
+        """
+        Test output when rendering with PerspectiveCameras & OrthographicCameras
+        in NDC vs screen space.
+        """
+        device = torch.device("cuda:0")
+
+        # Init mesh
+        sphere_mesh = ico_sphere(5, device)
+        verts_padded = sphere_mesh.verts_padded()
+        faces_padded = sphere_mesh.faces_padded()
+        feats = torch.ones_like(verts_padded, device=device)
+        textures = TexturesVertex(verts_features=feats)
+        sphere_mesh = Meshes(verts=verts_padded, faces=faces_padded, textures=textures)
+
+        R, T = look_at_view_transform(2.7, 0.0, 0.0)
+
+        # Init shader settings
+        materials = Materials(device=device)
+        lights = PointLights(device=device)
+        lights.location = torch.tensor([0.0, 0.0, +2.0], device=device)[None]
+
+        raster_settings = RasterizationSettings(
+            image_size=512, blur_radius=0.0, faces_per_pixel=1
+        )
+        half_half = (512.0 / 2.0, 512.0 / 2.0)
+        for cam_type in (PerspectiveCameras, OrthographicCameras):
+            cameras = cam_type(
+                device=device,
+                R=R,
+                T=T,
+                principal_point=(half_half,),
+                focal_length=(half_half,),
+                image_size=((512, 512),),
+                in_ndc=False,
+            )
+            rasterizer = MeshRasterizer(
+                cameras=cameras, raster_settings=raster_settings
+            )
+            blend_params = BlendParams(1e-4, 1e-4, (0, 0, 0))
+
+            shader = HardPhongShader(
+                lights=lights,
+                cameras=cameras,
+                materials=materials,
+                blend_params=blend_params,
+            )
+            renderer = MeshRenderer(rasterizer=rasterizer, shader=shader)
+            images = renderer(sphere_mesh)
+            rgb = images[0, ..., :3].squeeze().cpu()
+            filename = "test_simple_sphere_light_phong_%s.png" % cam_type.__name__
+            if DEBUG:
+                Image.fromarray((rgb.numpy() * 255).astype(np.uint8)).save(
+                    DATA_DIR / f"{filename}_.png"
+                )
+
+            image_ref = load_rgb_image(filename, DATA_DIR)
+            self.assertClose(rgb, image_ref, atol=0.05)
+
+    def test_simple_sphere_batched(self):
+        """
+        Test a mesh with vertex textures can be extended to form a batch, and
+        is rendered correctly with Phong, Gouraud and Flat Shaders with batched
+        lighting and hard and soft blending.
+        """
+        batch_size = 3
+        device = torch.device("cuda:0")
+
+        # Init mesh with vertex textures.
+        sphere_meshes = ico_sphere(3, device).extend(batch_size)
+        verts_padded = sphere_meshes.verts_padded()
+        faces_padded = sphere_meshes.faces_padded()
+        feats = torch.ones_like(verts_padded, device=device)
+        textures = TexturesVertex(verts_features=feats)
+        sphere_meshes = Meshes(
+            verts=verts_padded, faces=faces_padded, textures=textures
+        )
+
+        # Init rasterizer settings
+        dist = torch.tensor([2, 4, 6]).to(device)
+        elev = torch.zeros_like(dist)
+        azim = torch.zeros_like(dist)
+        R, T = look_at_view_transform(dist, elev, azim)
+        cameras = FoVPerspectiveCameras(device=device, R=R, T=T)
+        raster_settings = RasterizationSettings(
+            image_size=512, blur_radius=0.0, faces_per_pixel=4
+        )
+
+        # Init shader settings
+        materials = Materials(device=device)
+        lights_location = torch.tensor([0.0, 0.0, +2.0], device=device)
+        lights_location = lights_location[None].expand(batch_size, -1)
+        lights = PointLights(device=device, location=lights_location)
+        blend_params = BlendParams(0.5, 1e-4, (0, 0, 0))
+
+        # Init renderer
+        rasterizer_tests = [
+            RasterizerTest(MeshRasterizer, HardPhongShader, "phong", "hard_phong"),
+            RasterizerTest(
+                MeshRasterizer, HardGouraudShader, "gouraud", "hard_gouraud"
+            ),
+            RasterizerTest(MeshRasterizer, HardFlatShader, "flat", "hard_flat"),
+            RasterizerTest(
+                MeshRasterizerOpenGL,
+                SplatterPhongShader,
+                "splatter",
+                "splatter_phong",
+            ),
+        ]
+        for test in rasterizer_tests:
+            reference_name = test.reference_name
+            debug_name = test.debug_name
+            rasterizer = test.rasterizer(
+                cameras=cameras, raster_settings=raster_settings
+            )
+
+            shader = test.shader(
+                lights=lights,
+                cameras=cameras,
+                materials=materials,
+                blend_params=blend_params,
+            )
+            renderer = MeshRenderer(rasterizer=rasterizer, shader=shader)
+            images = renderer(sphere_meshes)
+            for i in range(batch_size):
+                image_ref = load_rgb_image(
+                    "test_simple_sphere_batched_%s_%s_%s.png"
+                    % (reference_name, type(cameras).__name__, i),
+                    DATA_DIR,
+                )
+                rgb = images[i, ..., :3].squeeze().cpu()
+                if DEBUG:
+                    filename = "DEBUG_simple_sphere_batched_%s_%s_%s.png" % (
+                        debug_name,
+                        type(cameras).__name__,
+                        i,
+                    )
+                    Image.fromarray((rgb.numpy() * 255).astype(np.uint8)).save(
+                        DATA_DIR / filename
+                    )
+                self.assertClose(rgb, image_ref, atol=0.05)
+
+    def test_silhouette_with_grad(self):
+        """
+        Test silhouette blending. Also check that gradient calculation works.
+        """
+        device = torch.device("cuda:0")
+        sphere_mesh = ico_sphere(5, device)
+        verts, faces = sphere_mesh.get_mesh_verts_faces(0)
+        sphere_mesh = Meshes(verts=[verts], faces=[faces])
+
+        blend_params = BlendParams(sigma=1e-4, gamma=1e-4)
+        raster_settings = RasterizationSettings(
+            image_size=512,
+            blur_radius=np.log(1.0 / 1e-4 - 1.0) * blend_params.sigma,
+            faces_per_pixel=80,
+            clip_barycentric_coords=True,
+        )
+
+        # Init rasterizer settings
+        R, T = look_at_view_transform(2.7, 0, 0)
+        for cam_type in (
+            FoVPerspectiveCameras,
+            FoVOrthographicCameras,
+            PerspectiveCameras,
+            OrthographicCameras,
+            FishEyeCameras,
+        ):
+            if cam_type == FishEyeCameras:
+                cameras = cam_type(
+                    device=device,
+                    R=R,
+                    T=T,
+                    use_tangential=False,
+                    use_radial=False,
+                    use_thin_prism=False,
+                    world_coordinates=True,
+                )
+            else:
+                cameras = cam_type(device=device, R=R, T=T)
+
+            # Init renderer
+            renderer = MeshRenderer(
+                rasterizer=MeshRasterizer(
+                    cameras=cameras, raster_settings=raster_settings
+                ),
+                shader=SoftSilhouetteShader(blend_params=blend_params),
+            )
+            images = renderer(sphere_mesh)
+            alpha = images[0, ..., 3].squeeze().cpu()
+            if DEBUG:
+                filename = os.path.join(
+                    DATA_DIR, "DEBUG_%s_silhouette.png" % (cam_type.__name__)
+                )
+                Image.fromarray((alpha.detach().numpy() * 255).astype(np.uint8)).save(
+                    filename
+                )
+
+            ref_filename = "test_%s_silhouette.png" % (cam_type.__name__)
+            image_ref_filename = DATA_DIR / ref_filename
+            with Image.open(image_ref_filename) as raw_image_ref:
+                image_ref = torch.from_numpy(np.array(raw_image_ref))
+
+            image_ref = image_ref.to(dtype=torch.float32) / 255.0
+            self.assertClose(alpha, image_ref, atol=0.055)
+
+            # Check grad exist
+            verts.requires_grad = True
+            sphere_mesh = Meshes(verts=[verts], faces=[faces])
+            images = renderer(sphere_mesh)
+            images[0, ...].sum().backward()
+            self.assertIsNotNone(verts.grad)
+
+    def test_texture_map(self):
+        """
+        Test a mesh with a texture map is loaded and rendered correctly.
+        The pupils in the eyes of the cow should always be looking to the left.
+        """
+        self._texture_map_per_rasterizer(MeshRasterizer)
+
+    def test_texture_map_opengl(self):
+        """
+        Test a mesh with a texture map is loaded and rendered correctly.
+        The pupils in the eyes of the cow should always be looking to the left.
+        """
+        self._texture_map_per_rasterizer(MeshRasterizerOpenGL)
+
+    def _texture_map_per_rasterizer(self, rasterizer_type):
+        device = torch.device("cuda:0")
+
+        obj_filename = TUTORIAL_DATA_DIR / "cow_mesh/cow.obj"
+
+        # Load mesh + texture
+        verts, faces, aux = load_obj(
+            obj_filename, device=device, load_textures=True, texture_wrap=None
+        )
+        tex_map = list(aux.texture_images.values())[0]
+        tex_map = tex_map[None, ...].to(faces.textures_idx.device)
+        textures = TexturesUV(
+            maps=tex_map, faces_uvs=[faces.textures_idx], verts_uvs=[aux.verts_uvs]
+        )
+        mesh = Meshes(verts=[verts], faces=[faces.verts_idx], textures=textures)
+
+        # Init rasterizer settings
+        R, T = look_at_view_transform(2.7, 0, 0)
+        cameras = FoVPerspectiveCameras(device=device, R=R, T=T)
+
+        raster_settings = RasterizationSettings(
+            image_size=512, blur_radius=0.0, faces_per_pixel=1
+        )
+
+        # Init shader settings
+        materials = Materials(device=device)
+        lights = PointLights(device=device)
+
+        # Place light behind the cow in world space. The front of
+        # the cow is facing the -z direction.
+        lights.location = torch.tensor([0.0, 0.0, 2.0], device=device)[None]
+
+        blend_params = BlendParams(
+            sigma=1e-1 if rasterizer_type == MeshRasterizer else 0.5,
+            gamma=1e-4,
+            background_color=torch.tensor([1.0, 1.0, 1.0], device=device),
+        )
+        # Init renderer
+        rasterizer = rasterizer_type(cameras=cameras, raster_settings=raster_settings)
+        if rasterizer_type == MeshRasterizer:
+            shader = TexturedSoftPhongShader(
+                lights=lights,
+                cameras=cameras,
+                materials=materials,
+                blend_params=blend_params,
+            )
+        elif rasterizer_type == MeshRasterizerOpenGL:
+            shader = SplatterPhongShader(
+                lights=lights,
+                cameras=cameras,
+                materials=materials,
+                blend_params=blend_params,
+            )
+        renderer = MeshRenderer(rasterizer=rasterizer, shader=shader)
+
+        # Load reference image
+        image_ref = load_rgb_image(
+            f"test_texture_map_back_{rasterizer_type.__name__}.png", DATA_DIR
+        )
+
+        for bin_size in [0, None]:
+            if rasterizer_type == MeshRasterizerOpenGL and bin_size == 0:
+                # MeshRasterizerOpenGL does not use this parameter.
+                continue
+            # Check both naive and coarse to fine produce the same output.
+            renderer.rasterizer.raster_settings.bin_size = bin_size
+            images = renderer(mesh)
+            rgb = images[0, ..., :3].squeeze().cpu()
+
+            if DEBUG:
+                Image.fromarray((rgb.numpy() * 255).astype(np.uint8)).save(
+                    DATA_DIR / f"DEBUG_texture_map_back_{rasterizer_type.__name__}.png"
+                )
+
+            # NOTE some pixels can be flaky and will not lead to
+            # `cond1` being true. Add `cond2` and check `cond1 or cond2`
+            cond1 = torch.allclose(rgb, image_ref, atol=0.05)
+            cond2 = ((rgb - image_ref).abs() > 0.05).sum() < 5
+            # self.assertTrue(cond1 or cond2)
+
+        # Check grad exists
+        [verts] = mesh.verts_list()
+        verts.requires_grad = True
+        mesh2 = Meshes(verts=[verts], faces=mesh.faces_list(), textures=mesh.textures)
+        images = renderer(mesh2)
+        images[0, ...].sum().backward()
+        self.assertIsNotNone(verts.grad)
+
+        ##########################################
+        # Check rendering of the front of the cow
+        ##########################################
+
+        R, T = look_at_view_transform(2.7, 0, 180)
+        cameras = FoVPerspectiveCameras(device=device, R=R, T=T)
+
+        # Move light to the front of the cow in world space
+        lights.location = torch.tensor([0.0, 0.0, -2.0], device=device)[None]
+
+        # Load reference image
+        image_ref = load_rgb_image(
+            f"test_texture_map_front_{rasterizer_type.__name__}.png", DATA_DIR
+        )
+
+        for bin_size in [0, None]:
+            if rasterizer == MeshRasterizerOpenGL and bin_size == 0:
+                # MeshRasterizerOpenGL does not use this parameter.
+                continue
+            # Check both naive and coarse to fine produce the same output.
+            renderer.rasterizer.raster_settings.bin_size = bin_size
+
+            images = renderer(mesh, cameras=cameras, lights=lights)
+            rgb = images[0, ..., :3].squeeze().cpu()
+
+            if DEBUG:
+                Image.fromarray((rgb.numpy() * 255).astype(np.uint8)).save(
+                    DATA_DIR / f"DEBUG_texture_map_front_{rasterizer_type.__name__}.png"
+                )
+
+            # NOTE some pixels can be flaky and will not lead to
+            # `cond1` being true. Add `cond2` and check `cond1 or cond2`
+            cond1 = torch.allclose(rgb, image_ref, atol=0.05)
+            cond2 = ((rgb - image_ref).abs() > 0.05).sum() < 5
+            self.assertTrue(cond1 or cond2)
+
+        #################################
+        # Add blurring to rasterization
+        #################################
+        if rasterizer_type == MeshRasterizer:
+            # Note that MeshRasterizer can blur the images arbitrarily, however
+            # MeshRasterizerOpenGL is limited by its kernel size (currently 3 px^2),
+            # so this test only makes sense for MeshRasterizer.
+            R, T = look_at_view_transform(2.7, 0, 180)
+            cameras = FoVPerspectiveCameras(device=device, R=R, T=T)
+            # For MeshRasterizer, blurring is controlled by blur_radius. For
+            # MeshRasterizerOpenGL, by sigma.
+            blend_params = BlendParams(sigma=5e-4, gamma=1e-4)
+            raster_settings = RasterizationSettings(
+                image_size=512,
+                blur_radius=np.log(1.0 / 1e-4 - 1.0) * blend_params.sigma,
+                faces_per_pixel=100,
+                clip_barycentric_coords=True,
+                perspective_correct=rasterizer_type.__name__ == "MeshRasterizerOpenGL",
+            )
+
+            # Load reference image
+            image_ref = load_rgb_image("test_blurry_textured_rendering.png", DATA_DIR)
+
+            for bin_size in [0, None]:
+                # Check both naive and coarse to fine produce the same output.
+                renderer.rasterizer.raster_settings.bin_size = bin_size
+
+                images = renderer(
+                    mesh.clone(),
+                    cameras=cameras,
+                    raster_settings=raster_settings,
+                    blend_params=blend_params,
+                )
+                rgb = images[0, ..., :3].squeeze().cpu()
+
+                if DEBUG:
+                    Image.fromarray((rgb.numpy() * 255).astype(np.uint8)).save(
+                        DATA_DIR / "DEBUG_blurry_textured_rendering.png"
+                    )
+
+                self.assertClose(rgb, image_ref, atol=0.05)
+
+    def test_batch_uvs(self):
+        self._batch_uvs(MeshRasterizer)
+
+    def test_batch_uvs_opengl(self):
+        self._batch_uvs(MeshRasterizer)
+
+    def _batch_uvs(self, rasterizer_type):
+        """Test that two random tori with TexturesUV render the same as each individually."""
+        torch.manual_seed(1)
+        device = torch.device("cuda:0")
+
+        plain_torus = torus(r=1, R=4, sides=10, rings=10, device=device)
+        [verts] = plain_torus.verts_list()
+        [faces] = plain_torus.faces_list()
+        nocolor = torch.zeros((100, 100), device=device)
+        color_gradient = torch.linspace(0, 1, steps=100, device=device)
+        color_gradient1 = color_gradient[None].expand_as(nocolor)
+        color_gradient2 = color_gradient[:, None].expand_as(nocolor)
+        colors1 = torch.stack([nocolor, color_gradient1, color_gradient2], dim=2)
+        colors2 = torch.stack([color_gradient1, color_gradient2, nocolor], dim=2)
+        verts_uvs1 = torch.rand(size=(verts.shape[0], 2), device=device)
+        verts_uvs2 = torch.rand(size=(verts.shape[0], 2), device=device)
+
+        textures1 = TexturesUV(
+            maps=[colors1], faces_uvs=[faces], verts_uvs=[verts_uvs1]
+        )
+        textures2 = TexturesUV(
+            maps=[colors2], faces_uvs=[faces], verts_uvs=[verts_uvs2]
+        )
+        mesh1 = Meshes(verts=[verts], faces=[faces], textures=textures1)
+        mesh2 = Meshes(verts=[verts], faces=[faces], textures=textures2)
+        mesh_both = join_meshes_as_batch([mesh1, mesh2])
+
+        R, T = look_at_view_transform(10, 10, 0)
+        cameras = FoVPerspectiveCameras(device=device, R=R, T=T)
+
+        raster_settings = RasterizationSettings(
+            image_size=128, blur_radius=0.0, faces_per_pixel=1
+        )
+
+        # Init shader settings
+        lights = PointLights(device=device)
+        lights.location = torch.tensor([0.0, 0.0, 2.0], device=device)[None]
+
+        blend_params = BlendParams(
+            sigma=0.5,
+            gamma=1e-4,
+            background_color=torch.tensor([1.0, 1.0, 1.0], device=device),
+        )
+        # Init renderer
+        rasterizer = MeshRasterizer(cameras=cameras, raster_settings=raster_settings)
+        if rasterizer_type == MeshRasterizer:
+            shader = HardPhongShader(
+                device=device, lights=lights, cameras=cameras, blend_params=blend_params
+            )
+        else:
+            shader = SplatterPhongShader(
+                device=device, lights=lights, cameras=cameras, blend_params=blend_params
+            )
+
+        renderer = MeshRenderer(rasterizer, shader)
+
+        outputs = []
+        for meshes in [mesh_both, mesh1, mesh2]:
+            outputs.append(renderer(meshes))
+
+        if DEBUG:
+            Image.fromarray(
+                (outputs[0][0, ..., :3].cpu().numpy() * 255).astype(np.uint8)
+            ).save(DATA_DIR / "test_batch_uvs0.png")
+            Image.fromarray(
+                (outputs[1][0, ..., :3].cpu().numpy() * 255).astype(np.uint8)
+            ).save(DATA_DIR / "test_batch_uvs1.png")
+            Image.fromarray(
+                (outputs[0][1, ..., :3].cpu().numpy() * 255).astype(np.uint8)
+            ).save(DATA_DIR / "test_batch_uvs2.png")
+            Image.fromarray(
+                (outputs[2][0, ..., :3].cpu().numpy() * 255).astype(np.uint8)
+            ).save(DATA_DIR / "test_batch_uvs3.png")
+
+            diff = torch.abs(outputs[0][0, ..., :3] - outputs[1][0, ..., :3])
+            Image.fromarray(((diff > 1e-5).cpu().numpy().astype(np.uint8) * 255)).save(
+                DATA_DIR / "test_batch_uvs01.png"
+            )
+            diff = torch.abs(outputs[0][1, ..., :3] - outputs[2][0, ..., :3])
+            Image.fromarray(((diff > 1e-5).cpu().numpy().astype(np.uint8) * 255)).save(
+                DATA_DIR / "test_batch_uvs23.png"
+            )
+
+        self.assertClose(outputs[0][0, ..., :3], outputs[1][0, ..., :3], atol=1e-5)
+        self.assertClose(outputs[0][1, ..., :3], outputs[2][0, ..., :3], atol=1e-5)
+
+    def test_join_uvs(self):
+        self._join_uvs(MeshRasterizer)
+
+    def test_join_uvs_opengl(self):
+        self._join_uvs(MeshRasterizerOpenGL)
+
+    def _join_uvs(self, rasterizer_type):
+        """Meshes with TexturesUV joined into a scene"""
+        # Test the result of rendering three tori with separate textures.
+        # The expected result is consistent with rendering them each alone.
+        # This tests TexturesUV.join_scene with rectangle flipping,
+        # and we check the form of the merged map as well.
+        torch.manual_seed(1)
+        device = torch.device("cuda:0")
+
+        R, T = look_at_view_transform(18, 0, 0)
+        cameras = FoVPerspectiveCameras(device=device, R=R, T=T)
+
+        raster_settings = RasterizationSettings(
+            image_size=256, blur_radius=0.0, faces_per_pixel=1
+        )
+
+        lights = AmbientLights(device=device)
+        blend_params = BlendParams(
+            sigma=0.5,
+            gamma=1e-4,
+            background_color=torch.tensor([1.0, 1.0, 1.0], device=device),
+        )
+        rasterizer = rasterizer_type(cameras=cameras, raster_settings=raster_settings)
+        if rasterizer_type == MeshRasterizer:
+            shader = HardPhongShader(
+                device=device, blend_params=blend_params, cameras=cameras, lights=lights
+            )
+        else:
+            shader = SplatterPhongShader(
+                device=device, blend_params=blend_params, cameras=cameras, lights=lights
+            )
+        renderer = MeshRenderer(rasterizer, shader)
+
+        plain_torus = torus(r=1, R=4, sides=5, rings=6, device=device)
+        [verts] = plain_torus.verts_list()
+        verts_shifted1 = verts.clone()
+        verts_shifted1 *= 0.5
+        verts_shifted1[:, 1] += 7
+        verts_shifted2 = verts.clone()
+        verts_shifted2 *= 0.5
+        verts_shifted2[:, 1] -= 7
+        verts_shifted3 = verts.clone()
+        verts_shifted3 *= 0.5
+        verts_shifted3[:, 1] -= 700
+
+        [faces] = plain_torus.faces_list()
+        nocolor = torch.zeros((100, 100), device=device)
+        color_gradient = torch.linspace(0, 1, steps=100, device=device)
+        color_gradient1 = color_gradient[None].expand_as(nocolor)
+        color_gradient2 = color_gradient[:, None].expand_as(nocolor)
+        colors1 = torch.stack([nocolor, color_gradient1, color_gradient2], dim=2)
+        colors2 = torch.stack([color_gradient1, color_gradient2, nocolor], dim=2)
+        verts_uvs1 = torch.rand(size=(verts.shape[0], 2), device=device)
+        verts_uvs2 = torch.rand(size=(verts.shape[0], 2), device=device)
+
+        for i, align_corners, padding_mode in [
+            (0, True, "border"),
+            (1, False, "border"),
+            (2, False, "zeros"),
+        ]:
+            textures1 = TexturesUV(
+                maps=[colors1],
+                faces_uvs=[faces],
+                verts_uvs=[verts_uvs1],
+                align_corners=align_corners,
+                padding_mode=padding_mode,
+            )
+
+            # These downsamplings of colors2 are chosen to ensure a flip and a non flip
+            # when the maps are merged.
+            # We have maps of size (100, 100), (50, 99) and (99, 50).
+            textures2 = TexturesUV(
+                maps=[colors2[::2, :-1]],
+                faces_uvs=[faces],
+                verts_uvs=[verts_uvs2],
+                align_corners=align_corners,
+                padding_mode=padding_mode,
+            )
+            offset = torch.tensor([0, 0, 0.5], device=device)
+            textures3 = TexturesUV(
+                maps=[colors2[:-1, ::2] + offset],
+                faces_uvs=[faces],
+                verts_uvs=[verts_uvs2],
+                align_corners=align_corners,
+                padding_mode=padding_mode,
+            )
+            mesh1 = Meshes(verts=[verts], faces=[faces], textures=textures1)
+            mesh2 = Meshes(verts=[verts_shifted1], faces=[faces], textures=textures2)
+            mesh3 = Meshes(verts=[verts_shifted2], faces=[faces], textures=textures3)
+            # mesh4 is like mesh1 but outside the field of view. It is here to test
+            # that having another texture with the same map doesn't produce
+            # two copies in the joined map.
+            mesh4 = Meshes(verts=[verts_shifted3], faces=[faces], textures=textures1)
+            mesh = join_meshes_as_scene([mesh1, mesh2, mesh3, mesh4])
+
+            output = renderer(mesh)[0, ..., :3].cpu()
+            output1 = renderer(mesh1)[0, ..., :3].cpu()
+            output2 = renderer(mesh2)[0, ..., :3].cpu()
+            output3 = renderer(mesh3)[0, ..., :3].cpu()
+            # The background color is white and the objects do not overlap, so we can
+            # predict the merged image by taking the minimum over every channel
+            merged = torch.min(torch.min(output1, output2), output3)
+
+            image_ref = load_rgb_image(
+                f"test_joinuvs{i}_{rasterizer_type.__name__}_final.png", DATA_DIR
+            )
+            map_ref = load_rgb_image(f"test_joinuvs{i}_map.png", DATA_DIR)
+
+            if DEBUG:
+                Image.fromarray((output.numpy() * 255).astype(np.uint8)).save(
+                    DATA_DIR
+                    / f"DEBUG_test_joinuvs{i}_{rasterizer_type.__name__}_final.png"
+                )
+                Image.fromarray((merged.numpy() * 255).astype(np.uint8)).save(
+                    DATA_DIR
+                    / f"DEBUG_test_joinuvs{i}_{rasterizer_type.__name__}_merged.png"
+                )
+
+                Image.fromarray((output1.numpy() * 255).astype(np.uint8)).save(
+                    DATA_DIR / f"DEBUG_test_joinuvs{i}_{rasterizer_type.__name__}_1.png"
+                )
+                Image.fromarray((output2.numpy() * 255).astype(np.uint8)).save(
+                    DATA_DIR / f"DEBUG_test_joinuvs{i}_{rasterizer_type.__name__}_2.png"
+                )
+                Image.fromarray((output3.numpy() * 255).astype(np.uint8)).save(
+                    DATA_DIR / f"DEBUG_test_joinuvs{i}_{rasterizer_type.__name__}_3.png"
+                )
+                Image.fromarray(
+                    (mesh.textures.maps_padded()[0].cpu().numpy() * 255).astype(
+                        np.uint8
+                    )
+                ).save(DATA_DIR / f"DEBUG_test_joinuvs{i}_map.png")
+                Image.fromarray(
+                    (mesh2.textures.maps_padded()[0].cpu().numpy() * 255).astype(
+                        np.uint8
+                    )
+                ).save(DATA_DIR / f"DEBUG_test_joinuvs{i}_map2.png")
+                Image.fromarray(
+                    (mesh3.textures.maps_padded()[0].cpu().numpy() * 255).astype(
+                        np.uint8
+                    )
+                ).save(DATA_DIR / f"DEBUG_test_joinuvs{i}_map3.png")
+
+            self.assertClose(output, merged, atol=0.005)
+            self.assertClose(output, image_ref, atol=0.005)
+            self.assertClose(mesh.textures.maps_padded()[0].cpu(), map_ref, atol=0.05)
+
+    def test_join_uvs_simple(self):
+        # Example from issue #826
+        a = TexturesUV(
+            maps=torch.full((1, 4000, 4000, 3), 0.8),
+            faces_uvs=torch.arange(300).reshape(1, 100, 3),
+            verts_uvs=torch.rand(1, 300, 2) * 0.4 + 0.1,
+        )
+        b = TexturesUV(
+            maps=torch.full((1, 2000, 2000, 3), 0.7),
+            faces_uvs=torch.arange(150).reshape(1, 50, 3),
+            verts_uvs=torch.rand(1, 150, 2) * 0.2 + 0.3,
+        )
+        self.assertEqual(a._num_faces_per_mesh, [100])
+        self.assertEqual(b._num_faces_per_mesh, [50])
+        c = a.join_batch([b]).join_scene()
+        self.assertEqual(a._num_faces_per_mesh, [100])
+        self.assertEqual(b._num_faces_per_mesh, [50])
+        self.assertEqual(c._num_faces_per_mesh, [150])
+
+        color = c.faces_verts_textures_packed()
+        color1 = color[:100, :, 0].flatten()
+        color2 = color[100:, :, 0].flatten()
+        expect1 = color1.new_tensor(0.8)
+        expect2 = color2.new_tensor(0.7)
+        self.assertClose(color1.min(), expect1)
+        self.assertClose(color1.max(), expect1)
+        self.assertClose(color2.min(), expect2)
+        self.assertClose(color2.max(), expect2)
+
+        if DEBUG:
+            from pytorch3d.vis.texture_vis import texturesuv_image_PIL as PI
+
+            PI(a, radius=5).save(DATA_DIR / "test_join_uvs_simple_a.png")
+            PI(b, radius=5).save(DATA_DIR / "test_join_uvs_simple_b.png")
+            PI(c, radius=5).save(DATA_DIR / "test_join_uvs_simple_c.png")
+
+    def test_join_verts(self):
+        self._join_verts(MeshRasterizer)
+
+    def test_join_verts_opengl(self):
+        self._join_verts(MeshRasterizerOpenGL)
+
+    def _join_verts(self, rasterizer_type):
+        """Meshes with TexturesVertex joined into a scene"""
+        # Test the result of rendering two tori with separate textures.
+        # The expected result is consistent with rendering them each alone.
+        torch.manual_seed(1)
+        device = torch.device("cuda:0")
+
+        plain_torus = torus(r=1, R=4, sides=5, rings=6, device=device)
+        [verts] = plain_torus.verts_list()
+        verts_shifted1 = verts.clone()
+        verts_shifted1 *= 0.5
+        verts_shifted1[:, 1] += 7
+
+        faces = plain_torus.faces_list()
+        textures1 = TexturesVertex(verts_features=[torch.rand_like(verts)])
+        textures2 = TexturesVertex(verts_features=[torch.rand_like(verts)])
+        mesh1 = Meshes(verts=[verts], faces=faces, textures=textures1)
+        mesh2 = Meshes(verts=[verts_shifted1], faces=faces, textures=textures2)
+        mesh = join_meshes_as_scene([mesh1, mesh2])
+
+        R, T = look_at_view_transform(18, 0, 0)
+        cameras = FoVPerspectiveCameras(device=device, R=R, T=T)
+
+        raster_settings = RasterizationSettings(
+            image_size=256, blur_radius=0.0, faces_per_pixel=1
+        )
+
+        lights = AmbientLights(device=device)
+        blend_params = BlendParams(
+            sigma=0.5,
+            gamma=1e-4,
+            background_color=torch.tensor([1.0, 1.0, 1.0], device=device),
+        )
+        rasterizer = rasterizer_type(cameras=cameras, raster_settings=raster_settings)
+        if rasterizer_type == MeshRasterizer:
+            shader = HardPhongShader(
+                device=device, blend_params=blend_params, cameras=cameras, lights=lights
+            )
+        else:
+            shader = SplatterPhongShader(
+                device=device, blend_params=blend_params, cameras=cameras, lights=lights
+            )
+
+        renderer = MeshRenderer(rasterizer, shader)
+
+        output = renderer(mesh)
+
+        image_ref = load_rgb_image(
+            f"test_joinverts_final_{rasterizer_type.__name__}.png", DATA_DIR
+        )
+
+        if DEBUG:
+            debugging_outputs = []
+            for mesh_ in [mesh1, mesh2]:
+                debugging_outputs.append(renderer(mesh_))
+            Image.fromarray(
+                (output[0, ..., :3].cpu().numpy() * 255).astype(np.uint8)
+            ).save(
+                DATA_DIR / f"DEBUG_test_joinverts_final_{rasterizer_type.__name__}.png"
+            )
+            Image.fromarray(
+                (debugging_outputs[0][0, ..., :3].cpu().numpy() * 255).astype(np.uint8)
+            ).save(DATA_DIR / "DEBUG_test_joinverts_1.png")
+            Image.fromarray(
+                (debugging_outputs[1][0, ..., :3].cpu().numpy() * 255).astype(np.uint8)
+            ).save(DATA_DIR / "DEBUG_test_joinverts_2.png")
+
+        result = output[0, ..., :3].cpu()
+        self.assertClose(result, image_ref, atol=0.05)
+
+    def test_join_atlas(self):
+        self._join_atlas(MeshRasterizer)
+
+    def test_join_atlas_opengl(self):
+        self._join_atlas(MeshRasterizerOpenGL)
+
+    def _join_atlas(self, rasterizer_type):
+        """Meshes with TexturesAtlas joined into a scene"""
+        # Test the result of rendering two tori with separate textures.
+        # The expected result is consistent with rendering them each alone.
+        torch.manual_seed(1)
+        device = torch.device("cuda:0")
+
+        plain_torus = torus(r=1, R=4, sides=5, rings=6, device=device)
+        [verts] = plain_torus.verts_list()
+        verts_shifted1 = verts.clone()
+        verts_shifted1 *= 1.2
+        verts_shifted1[:, 0] += 4
+        verts_shifted1[:, 1] += 5
+        verts[:, 0] -= 4
+        verts[:, 1] -= 4
+
+        [faces] = plain_torus.faces_list()
+        map_size = 3
+        # Two random atlases.
+        # The averaging of the random numbers here is not consistent with the
+        # meaning of the atlases, but makes each face a bit smoother than
+        # if everything had a random color.
+        atlas1 = torch.rand(size=(faces.shape[0], map_size, map_size, 3), device=device)
+        atlas1[:, 1] = 0.5 * atlas1[:, 0] + 0.5 * atlas1[:, 2]
+        atlas1[:, :, 1] = 0.5 * atlas1[:, :, 0] + 0.5 * atlas1[:, :, 2]
+        atlas2 = torch.rand(size=(faces.shape[0], map_size, map_size, 3), device=device)
+        atlas2[:, 1] = 0.5 * atlas2[:, 0] + 0.5 * atlas2[:, 2]
+        atlas2[:, :, 1] = 0.5 * atlas2[:, :, 0] + 0.5 * atlas2[:, :, 2]
+
+        textures1 = TexturesAtlas(atlas=[atlas1])
+        textures2 = TexturesAtlas(atlas=[atlas2])
+        mesh1 = Meshes(verts=[verts], faces=[faces], textures=textures1)
+        mesh2 = Meshes(verts=[verts_shifted1], faces=[faces], textures=textures2)
+        self.assertEqual(textures1._num_faces_per_mesh, [len(faces)])
+        self.assertEqual(textures2._num_faces_per_mesh, [len(faces)])
+        mesh_joined = join_meshes_as_scene([mesh1, mesh2])
+        self.assertEqual(textures1._num_faces_per_mesh, [len(faces)])
+        self.assertEqual(textures2._num_faces_per_mesh, [len(faces)])
+        self.assertEqual(mesh_joined.textures._num_faces_per_mesh, [len(faces) * 2])
+
+        R, T = look_at_view_transform(18, 0, 0)
+        cameras = FoVPerspectiveCameras(device=device, R=R, T=T)
+
+        raster_settings = RasterizationSettings(
+            image_size=512,
+            blur_radius=0.0,
+            faces_per_pixel=1,
+            perspective_correct=rasterizer_type.__name__ == "MeshRasterizerOpenGL",
+        )
+
+        lights = AmbientLights(device=device)
+        blend_params = BlendParams(
+            sigma=0.5,
+            gamma=1e-4,
+            background_color=torch.tensor([1.0, 1.0, 1.0], device=device),
+        )
+
+        rasterizer = rasterizer_type(cameras=cameras, raster_settings=raster_settings)
+        if rasterizer_type == MeshRasterizer:
+            shader = HardPhongShader(
+                device=device, blend_params=blend_params, cameras=cameras, lights=lights
+            )
+        else:
+            shader = SplatterPhongShader(
+                device=device, blend_params=blend_params, cameras=cameras, lights=lights
+            )
+
+        renderer = MeshRenderer(rasterizer, shader)
+
+        output = renderer(mesh_joined)
+
+        image_ref = load_rgb_image(
+            f"test_joinatlas_final_{rasterizer_type.__name__}.png", DATA_DIR
+        )
+
+        if DEBUG:
+            debugging_outputs = []
+            for mesh_ in [mesh1, mesh2]:
+                debugging_outputs.append(renderer(mesh_))
+            Image.fromarray(
+                (output[0, ..., :3].cpu().numpy() * 255).astype(np.uint8)
+            ).save(
+                DATA_DIR / f"DEBUG_test_joinatlas_final_{rasterizer_type.__name__}.png"
+            )
+            Image.fromarray(
+                (debugging_outputs[0][0, ..., :3].cpu().numpy() * 255).astype(np.uint8)
+            ).save(DATA_DIR / f"test_joinatlas_1_{rasterizer_type.__name__}.png")
+            Image.fromarray(
+                (debugging_outputs[1][0, ..., :3].cpu().numpy() * 255).astype(np.uint8)
+            ).save(DATA_DIR / f"test_joinatlas_2_{rasterizer_type.__name__}.png")
+
+        result = output[0, ..., :3].cpu()
+        self.assertClose(result, image_ref, atol=0.05)
+
+    def test_joined_spheres(self):
+        self._joined_spheres(MeshRasterizer)
+
+    def test_joined_spheres_opengl(self):
+        self._joined_spheres(MeshRasterizerOpenGL)
+
+    def _joined_spheres(self, rasterizer_type):
+        """
+        Test a list of Meshes can be joined as a single mesh and
+        the single mesh is rendered correctly with Phong, Gouraud
+        and Flat Shaders.
+        """
+        device = torch.device("cuda:0")
+
+        # Init mesh with vertex textures.
+        # Initialize a list containing two ico spheres of different sizes.
+        sphere_list = [ico_sphere(3, device), ico_sphere(4, device)]
+        # [(42 verts, 80 faces), (162 verts, 320 faces)]
+        # The scale the vertices need to be set at to resize the spheres
+        scales = [0.25, 1]
+        # The distance the spheres ought to be offset horizontally to prevent overlap.
+        offsets = [1.2, -0.3]
+        # Initialize a list containing the adjusted sphere meshes.
+        sphere_mesh_list = []
+        for i in range(len(sphere_list)):
+            verts = sphere_list[i].verts_padded() * scales[i]
+            verts[0, :, 0] += offsets[i]
+            sphere_mesh_list.append(
+                Meshes(verts=verts, faces=sphere_list[i].faces_padded())
+            )
+        joined_sphere_mesh = join_meshes_as_scene(sphere_mesh_list)
+        joined_sphere_mesh.textures = TexturesVertex(
+            verts_features=torch.ones_like(joined_sphere_mesh.verts_padded())
+        )
+
+        # Init rasterizer settings
+        R, T = look_at_view_transform(2.7, 0.0, 0.0)
+        cameras = FoVPerspectiveCameras(device=device, R=R, T=T)
+        raster_settings = RasterizationSettings(
+            image_size=512,
+            blur_radius=0.0,
+            faces_per_pixel=1,
+            perspective_correct=rasterizer_type.__name__ == "MeshRasterizerOpenGL",
+        )
+
+        # Init shader settings
+        materials = Materials(device=device)
+        lights = PointLights(device=device)
+        lights.location = torch.tensor([0.0, 0.0, +2.0], device=device)[None]
+        blend_params = BlendParams(0.5, 1e-4, (0, 0, 0))
+
+        # Init renderer
+        rasterizer = rasterizer_type(cameras=cameras, raster_settings=raster_settings)
+        shaders = {
+            "phong": HardPhongShader,
+            "gouraud": HardGouraudShader,
+            "flat": HardFlatShader,
+            "splatter": SplatterPhongShader,
+        }
+        for (name, shader_init) in shaders.items():
+            if rasterizer_type == MeshRasterizerOpenGL and name != "splatter":
+                continue
+            if rasterizer_type == MeshRasterizer and name == "splatter":
+                continue
+
+            shader = shader_init(
+                lights=lights,
+                cameras=cameras,
+                materials=materials,
+                blend_params=blend_params,
+            )
+            renderer = MeshRenderer(rasterizer=rasterizer, shader=shader)
+            image = renderer(joined_sphere_mesh)
+            rgb = image[..., :3].squeeze().cpu()
+            if DEBUG:
+                file_name = "DEBUG_joined_spheres_%s.png" % name
+                Image.fromarray((rgb.numpy() * 255).astype(np.uint8)).save(
+                    DATA_DIR / file_name
+                )
+            image_ref = load_rgb_image("test_joined_spheres_%s.png" % name, DATA_DIR)
+            self.assertClose(rgb, image_ref, atol=0.05)
+
+    def test_texture_map_atlas(self):
+        self._texture_map_atlas(MeshRasterizer)
+
+    def test_texture_map_atlas_opengl(self):
+        self._texture_map_atlas(MeshRasterizerOpenGL)
+
+    def _texture_map_atlas(self, rasterizer_type):
+        """
+        Test a mesh with a texture map as a per face atlas is loaded and rendered correctly.
+        Also check that the backward pass for texture atlas rendering is differentiable.
+        """
+        device = torch.device("cuda:0")
+
+        obj_filename = TUTORIAL_DATA_DIR / "cow_mesh/cow.obj"
+
+        # Load mesh and texture as a per face texture atlas.
+        verts, faces, aux = load_obj(
+            obj_filename,
+            device=device,
+            load_textures=True,
+            create_texture_atlas=True,
+            texture_atlas_size=8,
+            texture_wrap=None,
+        )
+        atlas = aux.texture_atlas
+        mesh = Meshes(
+            verts=[verts],
+            faces=[faces.verts_idx],
+            textures=TexturesAtlas(atlas=[atlas]),
+        )
+
+        # Init rasterizer settings
+        R, T = look_at_view_transform(2.7, 0, 0)
+        cameras = FoVPerspectiveCameras(device=device, R=R, T=T)
+
+        raster_settings = RasterizationSettings(
+            image_size=512,
+            blur_radius=0.0,
+            faces_per_pixel=1,
+            cull_backfaces=True,
+            perspective_correct=rasterizer_type.__name__ == "MeshRasterizerOpenGL",
+        )
+
+        # Init shader settings
+        materials = Materials(device=device, specular_color=((0, 0, 0),), shininess=0.0)
+        blend_params = BlendParams(0.5, 1e-4, (1.0, 1.0, 1.0))
+        lights = PointLights(device=device)
+
+        # Place light behind the cow in world space. The front of
+        # the cow is facing the -z direction.
+        lights.location = torch.tensor([0.0, 0.0, 2.0], device=device)[None]
+
+        # The HardPhongShader can be used directly with atlas textures.
+        rasterizer = rasterizer_type(cameras=cameras, raster_settings=raster_settings)
+        if rasterizer_type == MeshRasterizer:
+            shader = HardPhongShader(
+                device=device,
+                blend_params=blend_params,
+                cameras=cameras,
+                lights=lights,
+                materials=materials,
+            )
+        else:
+            shader = SplatterPhongShader(
+                device=device,
+                blend_params=blend_params,
+                cameras=cameras,
+                lights=lights,
+                materials=materials,
+            )
+
+        renderer = MeshRenderer(rasterizer, shader)
+
+        images = renderer(mesh)
+        rgb = images[0, ..., :3].squeeze()
+
+        # Load reference image
+        image_ref = load_rgb_image(
+            f"test_texture_atlas_8x8_back_{rasterizer_type.__name__}.png", DATA_DIR
+        )
+
+        if DEBUG:
+            Image.fromarray((rgb.detach().cpu().numpy() * 255).astype(np.uint8)).save(
+                DATA_DIR
+                / f"DEBUG_texture_atlas_8x8_back_{rasterizer_type.__name__}.png"
+            )
+
+        self.assertClose(rgb.cpu(), image_ref, atol=0.05)
+
+        # Check gradients are propagated
+        # correctly back to the texture atlas.
+        # Because of how texture sampling is implemented
+        # for the texture atlas it is not possible to get
+        # gradients back to the vertices.
+        atlas.requires_grad = True
+        mesh = Meshes(
+            verts=[verts],
+            faces=[faces.verts_idx],
+            textures=TexturesAtlas(atlas=[atlas]),
+        )
+        raster_settings = RasterizationSettings(
+            image_size=512,
+            blur_radius=0.0001,
+            faces_per_pixel=5 if rasterizer_type.__name__ == "MeshRasterizer" else 1,
+            cull_backfaces=rasterizer_type.__name__ == "MeshRasterizer",
+            clip_barycentric_coords=True,
+        )
+        images = renderer(mesh, raster_settings=raster_settings)
+        images[0, ...].sum().backward()
+
+        fragments = rasterizer(mesh, raster_settings=raster_settings)
+        if rasterizer_type == MeshRasterizer:
+            # Some of the bary coordinates are outside the
+            # [0, 1] range as expected because the blur is > 0.
+            self.assertTrue(fragments.bary_coords.ge(1.0).any())
+        self.assertIsNotNone(atlas.grad)
+        self.assertTrue(atlas.grad.sum().abs() > 0.0)
+
+    def test_simple_sphere_outside_zfar(self):
+        self._simple_sphere_outside_zfar(MeshRasterizer)
+
+    def test_simple_sphere_outside_zfar_opengl(self):
+        self._simple_sphere_outside_zfar(MeshRasterizerOpenGL)
+
+    def _simple_sphere_outside_zfar(self, rasterizer_type):
+        """
+        Test output when rendering a sphere that is beyond zfar with a SoftPhongShader.
+        This renders a sphere of radius 500, with the camera at x=1500 for different
+        settings of zfar.  This is intended to check 1) setting cameras.zfar propagates
+        to the blender and that the rendered sphere is (soft) clipped if it is beyond
+        zfar, 2) make sure there are no numerical precision/overflow errors associated
+        with larger world coordinates
+        """
+        device = torch.device("cuda:0")
+
+        # Init mesh
+        sphere_mesh = ico_sphere(5, device)
+        verts_padded = sphere_mesh.verts_padded() * 500
+        faces_padded = sphere_mesh.faces_padded()
+        feats = torch.ones_like(verts_padded, device=device)
+        textures = TexturesVertex(verts_features=feats)
+        sphere_mesh = Meshes(verts=verts_padded, faces=faces_padded, textures=textures)
+
+        R, T = look_at_view_transform(1500, 0.0, 0.0)
+
+        # Init shader settings
+        materials = Materials(device=device)
+        lights = PointLights(device=device)
+        lights.location = torch.tensor([0.0, 0.0, +1000.0], device=device)[None]
+
+        raster_settings = RasterizationSettings(
+            image_size=256, blur_radius=0.0, faces_per_pixel=1
+        )
+        for zfar in (10000.0, 100.0):
+            cameras = FoVPerspectiveCameras(
+                device=device, R=R, T=T, aspect_ratio=1.0, fov=60.0, zfar=zfar
+            )
+            blend_params = BlendParams(
+                1e-4 if rasterizer_type == MeshRasterizer else 0.5, 1e-4, (0, 0, 1.0)
+            )
+            rasterizer = rasterizer_type(
+                cameras=cameras, raster_settings=raster_settings
+            )
+            if rasterizer_type == MeshRasterizer:
+                shader = SoftPhongShader(
+                    blend_params=blend_params,
+                    cameras=cameras,
+                    lights=lights,
+                    materials=materials,
+                )
+            else:
+                shader = SplatterPhongShader(
+                    device=device,
+                    blend_params=blend_params,
+                    cameras=cameras,
+                    lights=lights,
+                    materials=materials,
+                )
+            renderer = MeshRenderer(rasterizer=rasterizer, shader=shader)
+            images = renderer(sphere_mesh)
+            rgb = images[0, ..., :3].squeeze().cpu()
+
+            filename = (
+                "test_simple_sphere_outside_zfar_"
+                f"{int(zfar)}_{rasterizer_type.__name__}.png"
+            )
+
+            # Load reference image
+            image_ref = load_rgb_image(filename, DATA_DIR)
+
+            if DEBUG:
+                Image.fromarray((rgb.numpy() * 255).astype(np.uint8)).save(
+                    DATA_DIR / ("DEBUG_" + filename)
+                )
+
+            self.assertClose(rgb, image_ref, atol=0.05)
+
+    def test_cameras_kwarg(self):
+        """
+        Test that when cameras are passed in as a kwarg the rendering
+        works as expected
+        """
+        device = torch.device("cuda:0")
+
+        # Init mesh
+        sphere_mesh = ico_sphere(5, device)
+        verts_padded = sphere_mesh.verts_padded()
+        faces_padded = sphere_mesh.faces_padded()
+        feats = torch.ones_like(verts_padded, device=device)
+        textures = TexturesVertex(verts_features=feats)
+        sphere_mesh = Meshes(verts=verts_padded, faces=faces_padded, textures=textures)
+
+        # No elevation or azimuth rotation
+        rasterizer_tests = [
+            RasterizerTest(MeshRasterizer, HardPhongShader, "phong", "hard_phong"),
+            RasterizerTest(
+                MeshRasterizerOpenGL,
+                SplatterPhongShader,
+                "splatter",
+                "splatter_phong",
+            ),
+        ]
+        R, T = look_at_view_transform(2.7, 0.0, 0.0)
+        for cam_type in (
+            FoVPerspectiveCameras,
+            FoVOrthographicCameras,
+            PerspectiveCameras,
+            OrthographicCameras,
+        ):
+            for test in rasterizer_tests:
+                if test.rasterizer == MeshRasterizerOpenGL and cam_type in [
+                    PerspectiveCameras,
+                    OrthographicCameras,
+                ]:
+                    # MeshRasterizerOpenGL only works with FoV cameras.
+                    continue
+
+                cameras = cam_type(device=device, R=R, T=T)
+
+                # Init shader settings
+                materials = Materials(device=device)
+                lights = PointLights(device=device)
+                lights.location = torch.tensor([0.0, 0.0, +2.0], device=device)[None]
+
+                raster_settings = RasterizationSettings(
+                    image_size=512, blur_radius=0.0, faces_per_pixel=1
+                )
+                rasterizer = test.rasterizer(raster_settings=raster_settings)
+                blend_params = BlendParams(0.5, 1e-4, (0, 0, 0))
+                shader = test.shader(
+                    lights=lights, materials=materials, blend_params=blend_params
+                )
+                renderer = MeshRenderer(rasterizer=rasterizer, shader=shader)
+
+                # Cameras can be passed into the renderer in the forward pass
+                images = renderer(sphere_mesh, cameras=cameras)
+                rgb = images.squeeze()[..., :3].cpu().numpy()
+                image_ref = load_rgb_image(
+                    f"test_simple_sphere_light_{test.reference_name}_{cam_type.__name__}.png",
+                    DATA_DIR,
+                )
+                self.assertClose(rgb, image_ref, atol=0.05)
+
+    def test_nd_sphere(self):
+        """
+        Test that the render can handle textures with more than 3 channels and
+        not just 3 channel RGB.
+        """
+        torch.manual_seed(1)
+        device = torch.device("cuda:0")
+        C = 5
+        WHITE = ((1.0,) * C,)
+        BLACK = ((0.0,) * C,)
+
+        # Init mesh
+        sphere_mesh = ico_sphere(5, device)
+        verts_padded = sphere_mesh.verts_padded()
+        faces_padded = sphere_mesh.faces_padded()
+        feats = torch.ones(*verts_padded.shape[:-1], C, device=device)
+        n_verts = feats.shape[1]
+        # make some non-uniform pattern
+        feats *= torch.arange(0, 10, step=10 / n_verts, device=device).unsqueeze(1)
+        textures = TexturesVertex(verts_features=feats)
+        sphere_mesh = Meshes(verts=verts_padded, faces=faces_padded, textures=textures)
+
+        # No elevation or azimuth rotation
+        R, T = look_at_view_transform(2.7, 0.0, 0.0)
+
+        cameras = PerspectiveCameras(device=device, R=R, T=T)
+
+        # Init shader settings
+        materials = Materials(
+            device=device,
+            ambient_color=WHITE,
+            diffuse_color=WHITE,
+            specular_color=WHITE,
+        )
+        lights = AmbientLights(
+            device=device,
+            ambient_color=WHITE,
+        )
+        lights.location = torch.tensor([0.0, 0.0, +2.0], device=device)[None]
+
+        raster_settings = RasterizationSettings(
+            image_size=512, blur_radius=0.0, faces_per_pixel=1
+        )
+        rasterizer = MeshRasterizer(cameras=cameras, raster_settings=raster_settings)
+        blend_params = BlendParams(
+            1e-4,
+            1e-4,
+            background_color=BLACK[0],
+        )
+
+        # only test HardFlatShader since that's the only one that makes
+        # sense for classification
+        shader = HardFlatShader(
+            lights=lights,
+            cameras=cameras,
+            materials=materials,
+            blend_params=blend_params,
+        )
+        renderer = MeshRenderer(rasterizer=rasterizer, shader=shader)
+        images = renderer(sphere_mesh)
+
+        self.assertEqual(images.shape[-1], C + 1)
+        self.assertClose(images.amax(), torch.tensor(10.0), atol=0.01)
+        self.assertClose(images.amin(), torch.tensor(0.0), atol=0.01)
+
+        # grab last 3 color channels
+        rgb = (images[0, ..., C - 3 : C] / 10).squeeze().cpu()
+        filename = "test_nd_sphere.png"
+
+        if DEBUG:
+            debug_filename = "DEBUG_%s" % filename
+            Image.fromarray((rgb.numpy() * 255).astype(np.uint8)).save(
+                DATA_DIR / debug_filename
+            )
+
+        image_ref = load_rgb_image(filename, DATA_DIR)
+        self.assertClose(rgb, image_ref, atol=0.05)
+
+    def test_simple_sphere_fisheye_params(self):
+        """
+        Test output of phong and gouraud shading matches a reference image using
+        the default values for the light sources.
+
+        """
+        device = torch.device("cuda:0")
+
+        # Init mesh
+        sphere_mesh = ico_sphere(5, device)
+        verts_padded = sphere_mesh.verts_padded()
+        faces_padded = sphere_mesh.faces_padded()
+        feats = torch.ones_like(verts_padded, device=device)
+        textures = TexturesVertex(verts_features=feats)
+        sphere_mesh = Meshes(verts=verts_padded, faces=faces_padded, textures=textures)
+
+        # Init rasterizer settings
+        R, T = look_at_view_transform(2.7, 0.0, 0.0)
+        postfix = "_"
+
+        cam_kwargs = [
+            {
+                "radial_params": torch.tensor(
+                    [
+                        [-1, -2, -3, 0, 0, 1],
+                    ],
+                    dtype=torch.float32,
+                ),
+            },
+            {
+                "tangential_params": torch.tensor(
+                    [[0.7002747019, -0.4005228974]], dtype=torch.float32
+                ),
+            },
+            {
+                "thin_prism_params": torch.tensor(
+                    [
+                        [
+                            -1.000134884,
+                            -1.000084822,
+                            -1.0009420014,
+                            -1.0001276838,
+                        ],
+                    ],
+                    dtype=torch.float32,
+                ),
+            },
+        ]
+        variants = ["radial", "tangential", "prism"]
+        for test_case, variant in zip(cam_kwargs, variants):
+            cameras = FishEyeCameras(
+                device=device,
+                R=R,
+                T=T,
+                use_tangential=True,
+                use_radial=True,
+                use_thin_prism=True,
+                world_coordinates=True,
+                **test_case,
+            )
+
+            # Init shader settings
+            materials = Materials(device=device)
+            lights = PointLights(device=device)
+            lights.location = torch.tensor([0.0, 0.0, +2.0], device=device)[None]
+
+            raster_settings = RasterizationSettings(
+                image_size=512, blur_radius=0.0, faces_per_pixel=1
+            )
+            blend_params = BlendParams(0.5, 1e-4, (0, 0, 0))
+
+            # Test several shaders
+            rasterizer_tests = [
+                RasterizerTest(
+                    MeshRasterizer, HardPhongShader, "hard_phong", "hard_phong"
+                ),
+                RasterizerTest(
+                    MeshRasterizer, HardGouraudShader, "hard_gouraud", "hard_gouraud"
+                ),
+                RasterizerTest(
+                    MeshRasterizer, HardFlatShader, "hard_flat", "hard_flat"
+                ),
+            ]
+            for test in rasterizer_tests:
+                shader = test.shader(
+                    lights=lights,
+                    cameras=cameras,
+                    materials=materials,
+                    blend_params=blend_params,
+                )
+                if test.rasterizer == MeshRasterizer:
+                    rasterizer = test.rasterizer(
+                        cameras=cameras, raster_settings=raster_settings
+                    )
+
+                renderer = MeshRenderer(rasterizer=rasterizer, shader=shader)
+                images = renderer(sphere_mesh)
+
+                rgb = images[0, ..., :3].squeeze().cpu()
+                filename = "simple_sphere_light_%s%s%s%s%s.png" % (
+                    test.reference_name,
+                    postfix,
+                    variant,
+                    postfix,
+                    FishEyeCameras.__name__,
+                )
+
+                image_ref = load_rgb_image("test_%s" % filename, DATA_DIR)
+                if DEBUG:
+                    debug_filename = "simple_sphere_light_%s%s%s%s%s.png" % (
+                        test.debug_name,
+                        postfix,
+                        variant,
+                        postfix,
+                        FishEyeCameras.__name__,
+                    )
+                    filename = "DEBUG_%s" % debug_filename
+                    Image.fromarray((rgb.numpy() * 255).astype(np.uint8)).save(
+                        DATA_DIR / filename
+                    )
+                self.assertClose(rgb, image_ref, atol=0.05)
+
+            ########################################################
+            # Move the light to the +z axis in world space so it is
+            # behind the sphere. Note that +Z is in, +Y up,
+            # +X left for both world and camera space.
+            ########################################################
+            lights.location[..., 2] = -2.0
+            phong_shader = HardPhongShader(
+                lights=lights,
+                cameras=cameras,
+                materials=materials,
+                blend_params=blend_params,
+            )
+
+            phong_renderer = MeshRenderer(rasterizer=rasterizer, shader=phong_shader)
+            images = phong_renderer(sphere_mesh, lights=lights)
+            rgb = images[0, ..., :3].squeeze().cpu()
+            if DEBUG:
+                filename = "DEBUG_simple_sphere_dark%s%s%s%s.png" % (
+                    postfix,
+                    variant,
+                    postfix,
+                    FishEyeCameras.__name__,
+                )
+                Image.fromarray((rgb.numpy() * 255).astype(np.uint8)).save(
+                    DATA_DIR / filename
+                )
+
+            image_ref_phong_dark = load_rgb_image(
+                "test_simple_sphere_dark%s%s%s%s.png"
+                % (postfix, variant, postfix, FishEyeCameras.__name__),
+                DATA_DIR,
+            )
+            # Soft shaders (SplatterPhong) will have a different boundary than hard
+            # ones, but should be identical otherwise.
+            self.assertLess((rgb - image_ref_phong_dark).quantile(0.99), 0.005)
+
+    def test_fisheye_cow_mesh(self):
+        """
+        Test FishEye Camera distortions on real meshes
+        """
+        device = torch.device("cuda:0")
+        obj_filename = os.path.join(DATA_DIR, "missing_usemtl/cow.obj")
+        mesh = load_objs_as_meshes([obj_filename], device=device)
+        R, T = look_at_view_transform(2.7, 0, 180)
+        radial_params = torch.tensor([[-1.0, 1.0, 1.0, 0.0, 0.0, -1.0]])
+        tangential_params = torch.tensor([[0.5, 0.5]])
+        thin_prism_params = torch.tensor([[0.5, 0.5, 0.5, 0.5]])
+        combinations = product([False, True], repeat=3)
+        for combination in combinations:
+            cameras = FishEyeCameras(
+                device=device,
+                R=R,
+                T=T,
+                world_coordinates=True,
+                use_radial=combination[0],
+                use_tangential=combination[1],
+                use_thin_prism=combination[2],
+                radial_params=radial_params,
+                tangential_params=tangential_params,
+                thin_prism_params=thin_prism_params,
+            )
+            raster_settings = RasterizationSettings(
+                image_size=512,
+                blur_radius=0.0,
+                faces_per_pixel=1,
+            )
+            lights = PointLights(device=device, location=[[0.0, 0.0, -3.0]])
+            renderer = MeshRenderer(
+                rasterizer=MeshRasterizer(
+                    cameras=cameras, raster_settings=raster_settings
+                ),
+                shader=SoftPhongShader(device=device, cameras=cameras, lights=lights),
+            )
+            images = renderer(mesh)
+            rgb = images[0, ..., :3].squeeze().cpu()
+            filename = "test_cow_mesh_%s_radial_%s_tangential_%s_prism_%s.png" % (
+                FishEyeCameras.__name__,
+                combination[0],
+                combination[1],
+                combination[2],
+            )
+            image_ref = load_rgb_image(filename, DATA_DIR)
+            if DEBUG:
+                filename = filename.replace("test", "DEBUG")
+                Image.fromarray((rgb.numpy() * 255).astype(np.uint8)).save(
+                    DATA_DIR / filename
+                )
+            self.assertClose(rgb, image_ref, atol=0.05)
diff --git a/pytorch3d/tests/test_render_meshes_clipped.py b/pytorch3d/tests/test_render_meshes_clipped.py
new file mode 100644
index 0000000000000000000000000000000000000000..153d10bd9a2a055da6d41ea5414e766ab3e274a3
--- /dev/null
+++ b/pytorch3d/tests/test_render_meshes_clipped.py
@@ -0,0 +1,692 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+"""
+Checks for mesh rasterization in the case where the camera enters the
+inside of the mesh and some mesh faces are partially
+behind the image plane. These faces are clipped and then rasterized.
+See pytorch3d/renderer/mesh/clip.py for more details about the
+clipping process.
+"""
+import unittest
+
+import imageio
+import numpy as np
+import torch
+from pytorch3d.io import save_obj
+from pytorch3d.renderer.cameras import (
+    FoVPerspectiveCameras,
+    look_at_view_transform,
+    PerspectiveCameras,
+)
+from pytorch3d.renderer.lighting import PointLights
+from pytorch3d.renderer.mesh import (
+    clip_faces,
+    ClipFrustum,
+    convert_clipped_rasterization_to_original_faces,
+    TexturesUV,
+)
+from pytorch3d.renderer.mesh.rasterize_meshes import _RasterizeFaceVerts
+from pytorch3d.renderer.mesh.rasterizer import MeshRasterizer, RasterizationSettings
+from pytorch3d.renderer.mesh.renderer import MeshRenderer
+from pytorch3d.renderer.mesh.shader import SoftPhongShader
+from pytorch3d.renderer.mesh.textures import TexturesVertex
+from pytorch3d.structures.meshes import Meshes
+from pytorch3d.utils import torus
+
+from .common_testing import get_tests_dir, load_rgb_image, TestCaseMixin
+
+
+# If DEBUG=True, save out images generated in the tests for debugging.
+# All saved images have prefix DEBUG_
+DEBUG = False
+DATA_DIR = get_tests_dir() / "data"
+
+
+class TestRenderMeshesClipping(TestCaseMixin, unittest.TestCase):
+    def load_cube_mesh_with_texture(self, device="cpu", with_grad: bool = False):
+        verts = torch.tensor(
+            [
+                [-1, 1, 1],
+                [1, 1, 1],
+                [1, -1, 1],
+                [-1, -1, 1],
+                [-1, 1, -1],
+                [1, 1, -1],
+                [1, -1, -1],
+                [-1, -1, -1],
+            ],
+            device=device,
+            dtype=torch.float32,
+            requires_grad=with_grad,
+        )
+
+        # all faces correctly wound
+        faces = torch.tensor(
+            [
+                [0, 1, 4],
+                [4, 1, 5],
+                [1, 2, 5],
+                [5, 2, 6],
+                [2, 7, 6],
+                [2, 3, 7],
+                [3, 4, 7],
+                [0, 4, 3],
+                [4, 5, 6],
+                [4, 6, 7],
+            ],
+            device=device,
+            dtype=torch.int64,
+        )
+
+        verts_uvs = torch.tensor(
+            [
+                [
+                    [0, 1],
+                    [1, 1],
+                    [1, 0],
+                    [0, 0],
+                    [0.204, 0.743],
+                    [0.781, 0.743],
+                    [0.781, 0.154],
+                    [0.204, 0.154],
+                ]
+            ],
+            device=device,
+            dtype=torch.float,
+        )
+        texture_map = load_rgb_image("room.jpg", DATA_DIR).to(device)
+        textures = TexturesUV(
+            maps=[texture_map], faces_uvs=faces.unsqueeze(0), verts_uvs=verts_uvs
+        )
+        mesh = Meshes([verts], [faces], textures=textures)
+        if with_grad:
+            return mesh, verts
+        return mesh
+
+    def debug_cube_mesh_render(self):
+        """
+        End-End debug run of rendering a cube mesh with texture
+        from decreasing camera distances. The camera starts
+        outside the cube and enters the inside of the cube.
+        """
+        device = torch.device("cuda:0")
+        mesh = self.load_cube_mesh_with_texture(device)
+        raster_settings = RasterizationSettings(
+            image_size=512,
+            blur_radius=1e-8,
+            faces_per_pixel=5,
+            z_clip_value=1e-2,
+            perspective_correct=True,
+            bin_size=0,
+        )
+
+        # Only ambient, no diffuse or specular
+        lights = PointLights(
+            device=device,
+            ambient_color=((1.0, 1.0, 1.0),),
+            diffuse_color=((0.0, 0.0, 0.0),),
+            specular_color=((0.0, 0.0, 0.0),),
+            location=[[0.0, 0.0, -3.0]],
+        )
+
+        renderer = MeshRenderer(
+            rasterizer=MeshRasterizer(raster_settings=raster_settings),
+            shader=SoftPhongShader(device=device, lights=lights),
+        )
+
+        # Render the cube by decreasing the distance from the camera until
+        # the camera enters the cube. Check the output looks correct.
+        images_list = []
+        dists = np.linspace(0.1, 2.5, 20)[::-1]
+
+        for d in dists:
+            R, T = look_at_view_transform(d, 0, 0)
+            T[0, 1] -= 0.1  # move down in the y axis
+            cameras = FoVPerspectiveCameras(device=device, R=R, T=T, fov=90)
+            images = renderer(mesh, cameras=cameras)
+            rgb = images[0, ..., :3].cpu().detach()
+            im = (rgb.numpy() * 255).astype(np.uint8)
+            images_list.append(im)
+
+        # Save a gif of the output - this should show
+        # the camera moving inside the cube.
+        if DEBUG:
+            gif_filename = (
+                "room_original.gif"
+                if raster_settings.z_clip_value is None
+                else "room_clipped.gif"
+            )
+            imageio.mimsave(DATA_DIR / gif_filename, images_list, fps=2)
+            save_obj(
+                f=DATA_DIR / "cube.obj",
+                verts=mesh.verts_packed().cpu(),
+                faces=mesh.faces_packed().cpu(),
+            )
+
+    @staticmethod
+    def clip_faces(meshes):
+        verts_packed = meshes.verts_packed()
+        faces_packed = meshes.faces_packed()
+        face_verts = verts_packed[faces_packed]
+        mesh_to_face_first_idx = meshes.mesh_to_faces_packed_first_idx()
+        num_faces_per_mesh = meshes.num_faces_per_mesh()
+
+        frustum = ClipFrustum(
+            left=-1,
+            right=1,
+            top=-1,
+            bottom=1,
+            # In the unit tests for each case below the triangles are asummed
+            #  to have already been projected onto the image plane.
+            perspective_correct=False,
+            z_clip_value=1e-2,
+            cull=True,  # Cull to frustrum
+        )
+
+        clipped_faces = clip_faces(
+            face_verts, mesh_to_face_first_idx, num_faces_per_mesh, frustum
+        )
+        return clipped_faces
+
+    def test_grad(self):
+        """
+        Check that gradient flow is unaffected when the camera is inside the mesh
+        """
+        device = torch.device("cuda:0")
+        mesh, verts = self.load_cube_mesh_with_texture(device=device, with_grad=True)
+        raster_settings = RasterizationSettings(
+            image_size=512,
+            blur_radius=1e-5,
+            faces_per_pixel=5,
+            z_clip_value=1e-2,
+            perspective_correct=True,
+            bin_size=0,
+        )
+
+        renderer = MeshRenderer(
+            rasterizer=MeshRasterizer(raster_settings=raster_settings),
+            shader=SoftPhongShader(device=device),
+        )
+        dist = 0.4  # Camera is inside the cube
+        R, T = look_at_view_transform(dist, 0, 0)
+        cameras = FoVPerspectiveCameras(device=device, R=R, T=T, fov=90)
+        images = renderer(mesh, cameras=cameras)
+        images.sum().backward()
+
+        # Check gradients exist
+        self.assertIsNotNone(verts.grad)
+
+    def test_case_1(self):
+        """
+        Case 1: Single triangle fully in front of the image plane (z=0)
+        Triangle is not clipped or culled. The triangle is asummed to have
+        already been projected onto the image plane so no perspective
+        correction is needed.
+        """
+        device = "cuda:0"
+        verts = torch.tensor(
+            [[0.0, 0.0, 1.0], [1.0, 0.0, 1.0], [0.0, 1.0, 1.0]],
+            dtype=torch.float32,
+            device=device,
+        )
+        faces = torch.tensor(
+            [
+                [0, 1, 2],
+            ],
+            dtype=torch.int64,
+            device=device,
+        )
+        meshes = Meshes(verts=[verts], faces=[faces])
+        clipped_faces = self.clip_faces(meshes)
+
+        self.assertClose(clipped_faces.face_verts, verts[faces])
+        self.assertEqual(clipped_faces.mesh_to_face_first_idx.item(), 0)
+        self.assertEqual(clipped_faces.num_faces_per_mesh.item(), 1)
+        self.assertIsNone(clipped_faces.faces_clipped_to_unclipped_idx)
+        self.assertIsNone(clipped_faces.faces_clipped_to_conversion_idx)
+        self.assertIsNone(clipped_faces.clipped_faces_neighbor_idx)
+        self.assertIsNone(clipped_faces.barycentric_conversion)
+
+    def test_case_2(self):
+        """
+        Case 2 triangles are fully behind the image plane (z=0) so are completely culled.
+        Test with a single triangle behind the image plane.
+        """
+
+        device = "cuda:0"
+        verts = torch.tensor(
+            [[-1.0, 0.0, -1.0], [0.0, 1.0, -1.0], [1.0, 0.0, -1.0]],
+            dtype=torch.float32,
+            device=device,
+        )
+        faces = torch.tensor(
+            [
+                [0, 1, 2],
+            ],
+            dtype=torch.int64,
+            device=device,
+        )
+        meshes = Meshes(verts=[verts], faces=[faces])
+        clipped_faces = self.clip_faces(meshes)
+
+        zero_t = torch.zeros(size=(1,), dtype=torch.int64, device=device)
+        self.assertClose(
+            clipped_faces.face_verts, torch.empty(device=device, size=(0, 3, 3))
+        )
+        self.assertClose(clipped_faces.mesh_to_face_first_idx, zero_t)
+        self.assertClose(clipped_faces.num_faces_per_mesh, zero_t)
+        self.assertClose(
+            clipped_faces.faces_clipped_to_unclipped_idx,
+            torch.empty(device=device, dtype=torch.int64, size=(0,)),
+        )
+        self.assertIsNone(clipped_faces.faces_clipped_to_conversion_idx)
+        self.assertIsNone(clipped_faces.clipped_faces_neighbor_idx)
+        self.assertIsNone(clipped_faces.barycentric_conversion)
+
+    def test_case_3(self):
+        """
+        Case 3 triangles have exactly two vertices behind the clipping plane (z=0) so are
+        clipped into a smaller triangle.
+
+        Test with a single triangle parallel to the z axis which intersects with
+        the image plane.
+        """
+
+        device = "cuda:0"
+        verts = torch.tensor(
+            [[-1.0, 0.0, -1.0], [0.0, 0.0, 1.0], [1.0, 0.0, -1.0]],
+            dtype=torch.float32,
+            device=device,
+        )
+        faces = torch.tensor(
+            [
+                [0, 1, 2],
+            ],
+            dtype=torch.int64,
+            device=device,
+        )
+        meshes = Meshes(verts=[verts], faces=[faces])
+        clipped_faces = self.clip_faces(meshes)
+
+        zero_t = torch.zeros(size=(1,), dtype=torch.int64, device=device)
+        clipped_face_verts = torch.tensor(
+            [
+                [
+                    [0.4950, 0.0000, 0.0100],
+                    [-0.4950, 0.0000, 0.0100],
+                    [0.0000, 0.0000, 1.0000],
+                ]
+            ],
+            device=device,
+            dtype=torch.float32,
+        )
+
+        # barycentric_conversion[i, :, k] stores the barycentric weights
+        # in terms of the world coordinates of the original
+        # (big) triangle for the kth vertex in the clipped (small) triangle.
+        barycentric_conversion = torch.tensor(
+            [
+                [
+                    [0.0000, 0.4950, 0.0000],
+                    [0.5050, 0.5050, 1.0000],
+                    [0.4950, 0.0000, 0.0000],
+                ]
+            ],
+            device=device,
+            dtype=torch.float32,
+        )
+
+        self.assertClose(clipped_faces.face_verts, clipped_face_verts)
+        self.assertEqual(clipped_faces.mesh_to_face_first_idx.item(), 0)
+        self.assertEqual(clipped_faces.num_faces_per_mesh.item(), 1)
+        self.assertClose(clipped_faces.faces_clipped_to_unclipped_idx, zero_t)
+        self.assertClose(clipped_faces.faces_clipped_to_conversion_idx, zero_t)
+        self.assertClose(
+            clipped_faces.clipped_faces_neighbor_idx,
+            zero_t - 1,  # default is -1
+        )
+        self.assertClose(clipped_faces.barycentric_conversion, barycentric_conversion)
+
+    def test_case_4(self):
+        """
+        Case 4 triangles have exactly 1 vertex behind the clipping plane (z=0) so
+        are clipped into a smaller quadrilateral and then divided into two triangles.
+
+        Test with a single triangle parallel to the z axis which intersects with
+        the image plane.
+        """
+
+        device = "cuda:0"
+        verts = torch.tensor(
+            [[0.0, 0.0, -1.0], [-1.0, 0.0, 1.0], [1.0, 0.0, 1.0]],
+            dtype=torch.float32,
+            device=device,
+        )
+        faces = torch.tensor(
+            [
+                [0, 1, 2],
+            ],
+            dtype=torch.int64,
+            device=device,
+        )
+        meshes = Meshes(verts=[verts], faces=[faces])
+        clipped_faces = self.clip_faces(meshes)
+
+        clipped_face_verts = torch.tensor(
+            [
+                # t1
+                [
+                    [-0.5050, 0.0000, 0.0100],
+                    [-1.0000, 0.0000, 1.0000],
+                    [0.5050, 0.0000, 0.0100],
+                ],
+                # t2
+                [
+                    [0.5050, 0.0000, 0.0100],
+                    [-1.0000, 0.0000, 1.0000],
+                    [1.0000, 0.0000, 1.0000],
+                ],
+            ],
+            device=device,
+            dtype=torch.float32,
+        )
+
+        barycentric_conversion = torch.tensor(
+            [
+                [
+                    [0.4950, 0.0000, 0.4950],
+                    [0.5050, 1.0000, 0.0000],
+                    [0.0000, 0.0000, 0.5050],
+                ],
+                [
+                    [0.4950, 0.0000, 0.0000],
+                    [0.0000, 1.0000, 0.0000],
+                    [0.5050, 0.0000, 1.0000],
+                ],
+            ],
+            device=device,
+            dtype=torch.float32,
+        )
+
+        self.assertClose(clipped_faces.face_verts, clipped_face_verts)
+        self.assertEqual(clipped_faces.mesh_to_face_first_idx.item(), 0)
+        self.assertEqual(
+            clipped_faces.num_faces_per_mesh.item(), 2
+        )  # now two faces instead of 1
+        self.assertClose(
+            clipped_faces.faces_clipped_to_unclipped_idx,
+            torch.tensor([0, 0], device=device, dtype=torch.int64),
+        )
+        # Neighboring face for each of the sub triangles e.g. for t1, neighbor is t2,
+        # and for t2, neighbor is t1
+        self.assertClose(
+            clipped_faces.clipped_faces_neighbor_idx,
+            torch.tensor([1, 0], device=device, dtype=torch.int64),
+        )
+        # barycentric_conversion is of shape (F_clipped)
+        self.assertEqual(clipped_faces.barycentric_conversion.shape[0], 2)
+        self.assertClose(clipped_faces.barycentric_conversion, barycentric_conversion)
+        # Index into barycentric_conversion for each clipped face.
+        self.assertClose(
+            clipped_faces.faces_clipped_to_conversion_idx,
+            torch.tensor([0, 1], device=device, dtype=torch.int64),
+        )
+
+    def test_mixture_of_cases(self):
+        """
+        Test with two meshes composed of different cases to check all the
+        indexing is correct.
+        Case 4 faces are subdivided into two faces which are referred
+        to as t1 and t2.
+        """
+        device = "cuda:0"
+        # fmt: off
+        verts = [
+            torch.tensor(
+                [
+                    [-1.0,  0.0, -1.0],  # noqa: E241, E201
+                    [ 0.0,  1.0, -1.0],  # noqa: E241, E201
+                    [ 1.0,  0.0, -1.0],  # noqa: E241, E201
+                    [ 0.0, -1.0, -1.0],  # noqa: E241, E201
+                    [-1.0,  0.5,  0.5],  # noqa: E241, E201
+                    [ 1.0,  1.0,  1.0],  # noqa: E241, E201
+                    [ 0.0, -1.0,  1.0],  # noqa: E241, E201
+                    [-1.0,  0.5, -0.5],  # noqa: E241, E201
+                    [ 1.0,  1.0, -1.0],  # noqa: E241, E201
+                    [-1.0,  0.0,  1.0],  # noqa: E241, E201
+                    [ 0.0,  1.0,  1.0],  # noqa: E241, E201
+                    [ 1.0,  0.0,  1.0],  # noqa: E241, E201
+                ],
+                dtype=torch.float32,
+                device=device,
+            ),
+            torch.tensor(
+                [
+                    [ 0.0, -1.0, -1.0],  # noqa: E241, E201
+                    [-1.0,  0.5,  0.5],  # noqa: E241, E201
+                    [ 1.0,  1.0,  1.0],  # noqa: E241, E201
+                ],
+                dtype=torch.float32,
+                device=device
+            )
+        ]
+        faces = [
+            torch.tensor(
+                [
+                    [0,  1,  2],  # noqa: E241, E201  Case 2 fully clipped
+                    [3,  4,  5],  # noqa: E241, E201  Case 4 clipped and subdivided
+                    [5,  4,  3],  # noqa: E241, E201  Repeat of Case 4
+                    [6,  7,  8],  # noqa: E241, E201  Case 3 clipped
+                    [9, 10, 11],  # noqa: E241, E201  Case 1 untouched
+                ],
+                dtype=torch.int64,
+                device=device,
+            ),
+            torch.tensor(
+                [
+                    [0,  1,  2],  # noqa: E241, E201  Case 4
+                ],
+                dtype=torch.int64,
+                device=device,
+            ),
+        ]
+        # fmt: on
+        meshes = Meshes(verts=verts, faces=faces)
+
+        # Clip meshes
+        clipped_faces = self.clip_faces(meshes)
+
+        # mesh 1: 4x faces (from Case 4) + 1 (from Case 3) + 1 (from Case 1)
+        # mesh 2: 2x faces (from Case 4)
+        self.assertEqual(clipped_faces.face_verts.shape[0], 6 + 2)
+
+        # dummy idx type tensor to avoid having to initialize the dype/device each time
+        idx = torch.empty(size=(1,), dtype=torch.int64, device=device)
+        unclipped_idx = idx.new_tensor([1, 1, 2, 2, 3, 4, 5, 5])
+        neighbors = idx.new_tensor([1, 0, 3, 2, -1, -1, 7, 6])
+        first_idx = idx.new_tensor([0, 6])
+        num_faces = idx.new_tensor([6, 2])
+
+        self.assertClose(clipped_faces.clipped_faces_neighbor_idx, neighbors)
+        self.assertClose(clipped_faces.faces_clipped_to_unclipped_idx, unclipped_idx)
+        self.assertClose(clipped_faces.mesh_to_face_first_idx, first_idx)
+        self.assertClose(clipped_faces.num_faces_per_mesh, num_faces)
+
+        # faces_clipped_to_conversion_idx maps each output face to the
+        # corresponding row of the barycentric_conversion matrix.
+        # The barycentric_conversion matrix is composed by
+        # finding the barycentric conversion weights for case 3 faces
+        # case 4 (t1) faces and case 4 (t2) faces. These are then
+        # concatenated. Therefore case 3 faces will be the first rows of
+        # the barycentric_conversion matrix followed by t1 and then t2.
+        # Case type of all faces: [4 (t1), 4 (t2), 4 (t1), 4 (t2), 3, 1, 4 (t1), 4 (t2)]
+        # Based on this information we can calculate the indices into the
+        # barycentric conversion matrix.
+        bary_idx = idx.new_tensor([1, 4, 2, 5, 0, -1, 3, 6])
+        self.assertClose(clipped_faces.faces_clipped_to_conversion_idx, bary_idx)
+
+    def test_convert_clipped_to_unclipped_case_4(self):
+        """
+        Test with a single case 4 triangle which is clipped into
+        a quadrilateral and subdivided.
+        """
+        device = "cuda:0"
+        # fmt: off
+        verts = torch.tensor(
+            [
+                [-1.0,  0.0, -1.0],  # noqa: E241, E201
+                [ 0.0,  1.0, -1.0],  # noqa: E241, E201
+                [ 1.0,  0.0, -1.0],  # noqa: E241, E201
+                [ 0.0, -1.0, -1.0],  # noqa: E241, E201
+                [-1.0,  0.5,  0.5],  # noqa: E241, E201
+                [ 1.0,  1.0,  1.0],  # noqa: E241, E201
+                [ 0.0, -1.0,  1.0],  # noqa: E241, E201
+                [-1.0,  0.5, -0.5],  # noqa: E241, E201
+                [ 1.0,  1.0, -1.0],  # noqa: E241, E201
+                [-1.0,  0.0,  1.0],  # noqa: E241, E201
+                [ 0.0,  1.0,  1.0],  # noqa: E241, E201
+                [ 1.0,  0.0,  1.0],  # noqa: E241, E201
+            ],
+            dtype=torch.float32,
+            device=device,
+        )
+        faces = torch.tensor(
+            [
+                [0,  1,  2],  # noqa: E241, E201  Case 2 fully clipped
+                [3,  4,  5],  # noqa: E241, E201  Case 4 clipped and subdivided
+                [5,  4,  3],  # noqa: E241, E201  Repeat of Case 4
+                [6,  7,  8],  # noqa: E241, E201  Case 3 clipped
+                [9, 10, 11],  # noqa: E241, E201  Case 1 untouched
+            ],
+            dtype=torch.int64,
+            device=device,
+        )
+        # fmt: on
+        meshes = Meshes(verts=[verts], faces=[faces])
+
+        # Clip meshes
+        clipped_faces = self.clip_faces(meshes)
+
+        # 4x faces (from Case 4) + 1 (from Case 3) + 1 (from Case 1)
+        self.assertEqual(clipped_faces.face_verts.shape[0], 6)
+
+        image_size = (10, 10)
+        blur_radius = 0.05
+        faces_per_pixel = 2
+        perspective_correct = True
+        bin_size = 0
+        max_faces_per_bin = 20
+        clip_barycentric_coords = False
+        cull_backfaces = False
+
+        # Rasterize clipped mesh
+        pix_to_face, zbuf, barycentric_coords, dists = _RasterizeFaceVerts.apply(
+            clipped_faces.face_verts,
+            clipped_faces.mesh_to_face_first_idx,
+            clipped_faces.num_faces_per_mesh,
+            clipped_faces.clipped_faces_neighbor_idx,
+            image_size,
+            blur_radius,
+            faces_per_pixel,
+            bin_size,
+            max_faces_per_bin,
+            perspective_correct,
+            clip_barycentric_coords,
+            cull_backfaces,
+        )
+
+        # Convert outputs so they are in terms of the unclipped mesh.
+        outputs = convert_clipped_rasterization_to_original_faces(
+            pix_to_face,
+            barycentric_coords,
+            clipped_faces,
+        )
+        pix_to_face_unclipped, barycentric_coords_unclipped = outputs
+
+        # In the clipped mesh there are more faces than in the unclipped mesh
+        self.assertTrue(pix_to_face.max() > pix_to_face_unclipped.max())
+        # Unclipped pix_to_face indices must be in the limit of the number
+        # of faces in the unclipped mesh.
+        self.assertTrue(pix_to_face_unclipped.max() < faces.shape[0])
+
+    def test_case_4_no_duplicates(self):
+        """
+        In the case of an simple mesh with one face that is cut by the image
+        plane into a quadrilateral, there shouldn't be duplicates indices of
+        the face in the pix_to_face output of rasterization.
+        """
+        for (device, bin_size) in [("cpu", 0), ("cuda:0", 0), ("cuda:0", None)]:
+            verts = torch.tensor(
+                [[0.0, -10.0, 1.0], [-1.0, 2.0, -2.0], [1.0, 5.0, -10.0]],
+                dtype=torch.float32,
+                device=device,
+            )
+            faces = torch.tensor(
+                [
+                    [0, 1, 2],
+                ],
+                dtype=torch.int64,
+                device=device,
+            )
+            meshes = Meshes(verts=[verts], faces=[faces])
+            k = 3
+            settings = RasterizationSettings(
+                image_size=10,
+                blur_radius=0.05,
+                faces_per_pixel=k,
+                z_clip_value=1e-2,
+                perspective_correct=True,
+                cull_to_frustum=True,
+                bin_size=bin_size,
+            )
+
+            # The camera is positioned so that the image plane cuts
+            # the mesh face into a quadrilateral.
+            R, T = look_at_view_transform(0.2, 0, 0)
+            cameras = FoVPerspectiveCameras(device=device, R=R, T=T, fov=90)
+            rasterizer = MeshRasterizer(raster_settings=settings, cameras=cameras)
+            fragments = rasterizer(meshes)
+
+            p2f = fragments.pix_to_face.reshape(-1, k)
+            unique_vals, idx_counts = p2f.unique(dim=0, return_counts=True)
+            # There is only one face in this mesh so if it hits a pixel
+            # it can only be at position k = 0
+            # For any pixel, the values [0, 0, 1] for the top K faces cannot be possible
+            double_hit = torch.tensor([0, 0, -1], device=device)
+            check_double_hit = any(torch.allclose(i, double_hit) for i in unique_vals)
+            self.assertFalse(check_double_hit)
+
+    def test_mesh_outside_frustrum(self):
+        """
+        Test cases:
+        1. Where the mesh is completely outside the view
+        frustrum so all faces are culled and z_clip_value = None.
+        2. Where the part of the mesh is in the view frustrum but
+        the z_clip value = 5.0 so all the visible faces are behind
+        the clip plane so are culled instead of clipped.
+        """
+        device = "cuda:0"
+        mesh1 = torus(20.0, 85.0, 32, 16, device=device)
+        mesh2 = torus(2.0, 3.0, 32, 16, device=device)
+        for (mesh, z_clip) in [(mesh1, None), (mesh2, 5.0)]:
+            tex = TexturesVertex(verts_features=torch.rand_like(mesh.verts_padded()))
+            mesh.textures = tex
+            raster_settings = RasterizationSettings(
+                image_size=512, cull_to_frustum=True, z_clip_value=z_clip
+            )
+            R, T = look_at_view_transform(3.0, 0.0, 0.0)
+            cameras = PerspectiveCameras(device=device, R=R, T=T)
+            renderer = MeshRenderer(
+                rasterizer=MeshRasterizer(
+                    cameras=cameras, raster_settings=raster_settings
+                ),
+                shader=SoftPhongShader(cameras=cameras, device=device),
+            )
+            images = renderer(mesh)
+            # The image should be white.
+            self.assertClose(images[0, ..., :3], torch.ones_like(images[0, ..., :3]))
diff --git a/pytorch3d/tests/test_render_multigpu.py b/pytorch3d/tests/test_render_multigpu.py
new file mode 100644
index 0000000000000000000000000000000000000000..3f77c96116cd039d1f449a233edd4e9777a73ed8
--- /dev/null
+++ b/pytorch3d/tests/test_render_multigpu.py
@@ -0,0 +1,231 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+
+import torch
+import torch.nn as nn
+from pytorch3d.renderer import (
+    AlphaCompositor,
+    BlendParams,
+    HardGouraudShader,
+    Materials,
+    MeshRasterizer,
+    MeshRenderer,
+    PointLights,
+    PointsRasterizationSettings,
+    PointsRasterizer,
+    PointsRenderer,
+    RasterizationSettings,
+    SoftPhongShader,
+    SplatterPhongShader,
+    TexturesVertex,
+)
+from pytorch3d.renderer.cameras import FoVPerspectiveCameras, look_at_view_transform
+from pytorch3d.renderer.opengl import MeshRasterizerOpenGL
+from pytorch3d.structures import Meshes, Pointclouds
+from pytorch3d.utils.ico_sphere import ico_sphere
+
+from .common_testing import TestCaseMixin
+
+
+# Set the number of GPUS you want to test with
+NUM_GPUS = 2
+GPU_LIST = [f"cuda:{idx}" for idx in range(NUM_GPUS)]
+print("GPUs: %s" % ", ".join(GPU_LIST))
+
+
+class TestRenderMeshesMultiGPU(TestCaseMixin, unittest.TestCase):
+    def _check_mesh_renderer_props_on_device(self, renderer, device):
+        """
+        Helper function to check that all the properties of the mesh
+        renderer have been moved to the correct device.
+        """
+        # Cameras
+        self.assertEqual(renderer.rasterizer.cameras.device, device)
+        self.assertEqual(renderer.rasterizer.cameras.R.device, device)
+        self.assertEqual(renderer.rasterizer.cameras.T.device, device)
+        self.assertEqual(renderer.shader.cameras.device, device)
+        self.assertEqual(renderer.shader.cameras.R.device, device)
+        self.assertEqual(renderer.shader.cameras.T.device, device)
+
+        # Lights and Materials
+        self.assertEqual(renderer.shader.lights.device, device)
+        self.assertEqual(renderer.shader.lights.ambient_color.device, device)
+        self.assertEqual(renderer.shader.materials.device, device)
+        self.assertEqual(renderer.shader.materials.ambient_color.device, device)
+
+    def _mesh_renderer_to(self, rasterizer_class, shader_class):
+        """
+        Test moving all the tensors in the mesh renderer to a new device.
+        """
+
+        device1 = torch.device("cuda:0")
+
+        R, T = look_at_view_transform(1500, 0.0, 0.0)
+
+        # Init shader settings
+        materials = Materials(device=device1)
+        lights = PointLights(device=device1)
+        lights.location = torch.tensor([0.0, 0.0, +1000.0], device=device1)[None]
+
+        raster_settings = RasterizationSettings(
+            image_size=128, blur_radius=0.0, faces_per_pixel=1
+        )
+        cameras = FoVPerspectiveCameras(
+            device=device1, R=R, T=T, aspect_ratio=1.0, fov=60.0, zfar=100
+        )
+        rasterizer = rasterizer_class(cameras=cameras, raster_settings=raster_settings)
+
+        blend_params = BlendParams(
+            1e-4,
+            1e-4,
+            background_color=torch.zeros(3, dtype=torch.float32, device=device1),
+        )
+
+        shader = shader_class(
+            lights=lights,
+            cameras=cameras,
+            materials=materials,
+            blend_params=blend_params,
+        )
+        renderer = MeshRenderer(rasterizer=rasterizer, shader=shader)
+
+        mesh = ico_sphere(2, device1)
+        verts_padded = mesh.verts_padded()
+        textures = TexturesVertex(
+            verts_features=torch.ones_like(verts_padded, device=device1)
+        )
+        mesh.textures = textures
+        self._check_mesh_renderer_props_on_device(renderer, device1)
+
+        # Test rendering on cpu
+        output_images = renderer(mesh)
+        self.assertEqual(output_images.device, device1)
+
+        # Move renderer and mesh to another device and re render
+        # This also tests that background_color is correctly moved to
+        # the new device
+        device2 = torch.device("cuda:1")
+        renderer = renderer.to(device2)
+        mesh = mesh.to(device2)
+        self._check_mesh_renderer_props_on_device(renderer, device2)
+        output_images = renderer(mesh)
+        self.assertEqual(output_images.device, device2)
+
+    def test_mesh_renderer_to(self):
+        self._mesh_renderer_to(MeshRasterizer, SoftPhongShader)
+
+    def test_mesh_renderer_opengl_to(self):
+        self._mesh_renderer_to(MeshRasterizerOpenGL, SplatterPhongShader)
+
+    def _render_meshes(self, rasterizer_class, shader_class):
+        test = self
+
+        class Model(nn.Module):
+            def __init__(self, device):
+                super(Model, self).__init__()
+                mesh = ico_sphere(3).to(device)
+                self.register_buffer("faces", mesh.faces_padded())
+                self.renderer = self.init_render(device)
+
+            def init_render(self, device):
+
+                cameras = FoVPerspectiveCameras().to(device)
+                raster_settings = RasterizationSettings(
+                    image_size=128, blur_radius=0.0, faces_per_pixel=1
+                )
+                lights = PointLights(
+                    ambient_color=((1.0, 1.0, 1.0),),
+                    diffuse_color=((0, 0.0, 0),),
+                    specular_color=((0.0, 0, 0),),
+                    location=((0.0, 0.0, 1e5),),
+                ).to(device)
+                renderer = MeshRenderer(
+                    rasterizer=rasterizer_class(
+                        cameras=cameras, raster_settings=raster_settings
+                    ),
+                    shader=shader_class(cameras=cameras, lights=lights),
+                )
+                return renderer
+
+            def forward(self, verts, texs):
+                batch_size = verts.size(0)
+                self.renderer = self.renderer.to(verts.device)
+                tex = TexturesVertex(verts_features=texs)
+                faces = self.faces.expand(batch_size, -1, -1).to(verts.device)
+                mesh = Meshes(verts, faces, tex).to(verts.device)
+
+                test._check_mesh_renderer_props_on_device(self.renderer, verts.device)
+                img_render = self.renderer(mesh)
+                return img_render[:, :, :, :3]
+
+        # Make sure we use all GPUs in GPU_LIST by making the batch size 4 x GPU count.
+        verts = ico_sphere(3).verts_padded().expand(len(GPU_LIST) * 4, 642, 3)
+        texs = verts.new_ones(verts.shape)
+        model = Model(device=GPU_LIST[0])
+        model = nn.DataParallel(model, device_ids=GPU_LIST)
+
+        # Test a few iterations
+        for _ in range(100):
+            model(verts, texs)
+
+    def test_render_meshes(self):
+        self._render_meshes(MeshRasterizer, HardGouraudShader)
+
+    # @unittest.skip("Multi-GPU OpenGL training is currently not supported.")
+    def test_render_meshes_opengl(self):
+        self._render_meshes(MeshRasterizerOpenGL, SplatterPhongShader)
+
+
+class TestRenderPointsMultiGPU(TestCaseMixin, unittest.TestCase):
+    def _check_points_renderer_props_on_device(self, renderer, device):
+        """
+        Helper function to check that all the properties have
+        been moved to the correct device.
+        """
+        # Cameras
+        self.assertEqual(renderer.rasterizer.cameras.device, device)
+        self.assertEqual(renderer.rasterizer.cameras.R.device, device)
+        self.assertEqual(renderer.rasterizer.cameras.T.device, device)
+
+    def test_points_renderer_to(self):
+        """
+        Test moving all the tensors in the points renderer to a new device.
+        """
+
+        device1 = torch.device("cpu")
+
+        R, T = look_at_view_transform(1500, 0.0, 0.0)
+
+        raster_settings = PointsRasterizationSettings(
+            image_size=256, radius=0.001, points_per_pixel=1
+        )
+        cameras = FoVPerspectiveCameras(
+            device=device1, R=R, T=T, aspect_ratio=1.0, fov=60.0, zfar=100
+        )
+        rasterizer = PointsRasterizer(cameras=cameras, raster_settings=raster_settings)
+
+        renderer = PointsRenderer(rasterizer=rasterizer, compositor=AlphaCompositor())
+
+        mesh = ico_sphere(2, device1)
+        verts_padded = mesh.verts_padded()
+        pointclouds = Pointclouds(
+            points=verts_padded, features=torch.randn_like(verts_padded)
+        )
+        self._check_points_renderer_props_on_device(renderer, device1)
+
+        # Test rendering on cpu
+        output_images = renderer(pointclouds)
+        self.assertEqual(output_images.device, device1)
+
+        # Move renderer and pointclouds to another device and re render
+        device2 = torch.device("cuda:0")
+        renderer = renderer.to(device2)
+        pointclouds = pointclouds.to(device2)
+        self._check_points_renderer_props_on_device(renderer, device2)
+        output_images = renderer(pointclouds)
+        self.assertEqual(output_images.device, device2)
diff --git a/pytorch3d/tests/test_render_points.py b/pytorch3d/tests/test_render_points.py
new file mode 100644
index 0000000000000000000000000000000000000000..eede6ebc4a72f1434cad6669c23a7b73114c7fe6
--- /dev/null
+++ b/pytorch3d/tests/test_render_points.py
@@ -0,0 +1,469 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+"""
+Sanity checks for output images from the pointcloud renderer.
+"""
+import unittest
+import warnings
+from os import path
+
+import numpy as np
+import torch
+from PIL import Image
+from pytorch3d.renderer.cameras import (
+    FoVOrthographicCameras,
+    FoVPerspectiveCameras,
+    look_at_view_transform,
+    OrthographicCameras,
+    PerspectiveCameras,
+)
+from pytorch3d.renderer.compositing import alpha_composite, norm_weighted_sum
+from pytorch3d.renderer.fisheyecameras import FishEyeCameras
+from pytorch3d.renderer.points import (
+    AlphaCompositor,
+    NormWeightedCompositor,
+    PointsRasterizationSettings,
+    PointsRasterizer,
+    PointsRenderer,
+    PulsarPointsRenderer,
+)
+from pytorch3d.structures.pointclouds import Pointclouds
+from pytorch3d.utils.ico_sphere import ico_sphere
+
+from .common_testing import (
+    get_pytorch3d_dir,
+    get_tests_dir,
+    load_rgb_image,
+    TestCaseMixin,
+)
+
+
+# If DEBUG=True, save out images generated in the tests for debugging.
+# All saved images have prefix DEBUG_
+DEBUG = False
+DATA_DIR = get_tests_dir() / "data"
+
+
+class TestRenderPoints(TestCaseMixin, unittest.TestCase):
+    def test_simple_sphere(self):
+        device = torch.device("cuda:0")
+        sphere_mesh = ico_sphere(1, device)
+        verts_padded = sphere_mesh.verts_padded()
+        # Shift vertices to check coordinate frames are correct.
+        verts_padded[..., 1] += 0.2
+        verts_padded[..., 0] += 0.2
+        pointclouds = Pointclouds(
+            points=verts_padded, features=torch.ones_like(verts_padded)
+        )
+        R, T = look_at_view_transform(2.7, 0.0, 0.0)
+        cameras = FoVPerspectiveCameras(device=device, R=R, T=T)
+        raster_settings = PointsRasterizationSettings(
+            image_size=256, radius=5e-2, points_per_pixel=1
+        )
+        rasterizer = PointsRasterizer(cameras=cameras, raster_settings=raster_settings)
+        compositor = NormWeightedCompositor()
+        renderer = PointsRenderer(rasterizer=rasterizer, compositor=compositor)
+
+        # Load reference image
+        filename = "simple_pointcloud_sphere.png"
+        image_ref = load_rgb_image("test_%s" % filename, DATA_DIR)
+
+        for bin_size in [0, None]:
+            # Check both naive and coarse to fine produce the same output.
+            renderer.rasterizer.raster_settings.bin_size = bin_size
+            images = renderer(pointclouds)
+            rgb = images[0, ..., :3].squeeze().cpu()
+            if DEBUG:
+                filename = "DEBUG_%s" % filename
+                Image.fromarray((rgb.numpy() * 255).astype(np.uint8)).save(
+                    DATA_DIR / filename
+                )
+            self.assertClose(rgb, image_ref)
+
+    def test_simple_sphere_fisheye(self):
+        device = torch.device("cuda:0")
+        sphere_mesh = ico_sphere(1, device)
+        verts_padded = sphere_mesh.verts_padded()
+        # Shift vertices to check coordinate frames are correct.
+        verts_padded[..., 1] += 0.2
+        verts_padded[..., 0] += 0.2
+        pointclouds = Pointclouds(
+            points=verts_padded, features=torch.ones_like(verts_padded)
+        )
+        R, T = look_at_view_transform(2.7, 0.0, 0.0)
+        cameras = FishEyeCameras(
+            device=device,
+            R=R,
+            T=T,
+            use_radial=False,
+            use_tangential=False,
+            use_thin_prism=False,
+            world_coordinates=True,
+        )
+        raster_settings = PointsRasterizationSettings(
+            image_size=256, radius=5e-2, points_per_pixel=1
+        )
+        rasterizer = PointsRasterizer(cameras=cameras, raster_settings=raster_settings)
+        compositor = NormWeightedCompositor()
+        renderer = PointsRenderer(rasterizer=rasterizer, compositor=compositor)
+
+        # Load reference image
+        filename = "render_fisheye_sphere_points.png"
+        image_ref = load_rgb_image("test_%s" % filename, DATA_DIR)
+
+        for bin_size in [0, None]:
+            # Check both naive and coarse to fine produce the same output.
+            renderer.rasterizer.raster_settings.bin_size = bin_size
+            images = renderer(pointclouds)
+            rgb = images[0, ..., :3].squeeze().cpu()
+            if DEBUG:
+                filename = "DEBUG_%s" % filename
+                Image.fromarray((rgb.numpy() * 255).astype(np.uint8)).save(
+                    DATA_DIR / filename
+                )
+            self.assertClose(rgb, image_ref)
+
+    def test_simple_sphere_pulsar(self):
+        for device in [torch.device("cpu"), torch.device("cuda")]:
+            sphere_mesh = ico_sphere(1, device)
+            verts_padded = sphere_mesh.verts_padded()
+            # Shift vertices to check coordinate frames are correct.
+            verts_padded[..., 1] += 0.2
+            verts_padded[..., 0] += 0.2
+            pointclouds = Pointclouds(
+                points=verts_padded, features=torch.ones_like(verts_padded)
+            )
+            for azimuth in [0.0, 90.0]:
+                R, T = look_at_view_transform(2.7, 0.0, azimuth)
+                for camera_name, cameras in [
+                    ("fovperspective", FoVPerspectiveCameras(device=device, R=R, T=T)),
+                    (
+                        "fovorthographic",
+                        FoVOrthographicCameras(device=device, R=R, T=T),
+                    ),
+                    ("perspective", PerspectiveCameras(device=device, R=R, T=T)),
+                    ("orthographic", OrthographicCameras(device=device, R=R, T=T)),
+                ]:
+                    raster_settings = PointsRasterizationSettings(
+                        image_size=256, radius=5e-2, points_per_pixel=1
+                    )
+                    rasterizer = PointsRasterizer(
+                        cameras=cameras, raster_settings=raster_settings
+                    )
+                    renderer = PulsarPointsRenderer(rasterizer=rasterizer).to(device)
+                    # Load reference image
+                    filename = (
+                        "pulsar_simple_pointcloud_sphere_"
+                        f"azimuth{azimuth}_{camera_name}.png"
+                    )
+                    image_ref = load_rgb_image("test_%s" % filename, DATA_DIR)
+                    images = renderer(
+                        pointclouds, gamma=(1e-3,), znear=(1.0,), zfar=(100.0,)
+                    )
+                    rgb = images[0, ..., :3].squeeze().cpu()
+                    if DEBUG:
+                        filename = "DEBUG_%s" % filename
+                        Image.fromarray((rgb.numpy() * 255).astype(np.uint8)).save(
+                            DATA_DIR / filename
+                        )
+                    self.assertClose(rgb, image_ref, rtol=7e-3, atol=5e-3)
+
+    def test_unified_inputs_pulsar(self):
+        # Test data on different devices.
+        for device in [torch.device("cpu"), torch.device("cuda")]:
+            sphere_mesh = ico_sphere(1, device)
+            verts_padded = sphere_mesh.verts_padded()
+            pointclouds = Pointclouds(
+                points=verts_padded, features=torch.ones_like(verts_padded)
+            )
+            R, T = look_at_view_transform(2.7, 0.0, 0.0)
+            # Test the different camera types.
+            for _, cameras in [
+                ("fovperspective", FoVPerspectiveCameras(device=device, R=R, T=T)),
+                (
+                    "fovorthographic",
+                    FoVOrthographicCameras(device=device, R=R, T=T),
+                ),
+                ("perspective", PerspectiveCameras(device=device, R=R, T=T)),
+                ("orthographic", OrthographicCameras(device=device, R=R, T=T)),
+            ]:
+                # Test different ways for image size specification.
+                for image_size in (256, (256, 256)):
+                    raster_settings = PointsRasterizationSettings(
+                        image_size=image_size, radius=5e-2, points_per_pixel=1
+                    )
+                    rasterizer = PointsRasterizer(
+                        cameras=cameras, raster_settings=raster_settings
+                    )
+                    # Test that the compositor can be provided. It's value is ignored
+                    # so use a dummy.
+                    _ = PulsarPointsRenderer(rasterizer=rasterizer, compositor=1).to(
+                        device
+                    )
+                    # Constructor without compositor.
+                    _ = PulsarPointsRenderer(rasterizer=rasterizer).to(device)
+                    # Constructor with n_channels.
+                    _ = PulsarPointsRenderer(rasterizer=rasterizer, n_channels=3).to(
+                        device
+                    )
+                    # Constructor with max_num_spheres.
+                    renderer = PulsarPointsRenderer(
+                        rasterizer=rasterizer, max_num_spheres=1000
+                    ).to(device)
+                    # Test the forward function.
+                    if isinstance(cameras, (PerspectiveCameras, OrthographicCameras)):
+                        # znear and zfar is required in this case.
+                        self.assertRaises(
+                            ValueError,
+                            lambda renderer=renderer, pointclouds=pointclouds: renderer.forward(
+                                point_clouds=pointclouds, gamma=(1e-4,)
+                            ),
+                        )
+                        renderer.forward(
+                            point_clouds=pointclouds,
+                            gamma=(1e-4,),
+                            znear=(1.0,),
+                            zfar=(2.0,),
+                        )
+                        # znear and zfar must be batched.
+                        self.assertRaises(
+                            TypeError,
+                            lambda renderer=renderer, pointclouds=pointclouds: renderer.forward(
+                                point_clouds=pointclouds,
+                                gamma=(1e-4,),
+                                znear=1.0,
+                                zfar=(2.0,),
+                            ),
+                        )
+                        self.assertRaises(
+                            TypeError,
+                            lambda renderer=renderer, pointclouds=pointclouds: renderer.forward(
+                                point_clouds=pointclouds,
+                                gamma=(1e-4,),
+                                znear=(1.0,),
+                                zfar=2.0,
+                            ),
+                        )
+                    else:
+                        # gamma must be batched.
+                        self.assertRaises(
+                            TypeError,
+                            lambda renderer=renderer, pointclouds=pointclouds: renderer.forward(
+                                point_clouds=pointclouds, gamma=1e-4
+                            ),
+                        )
+                        renderer.forward(point_clouds=pointclouds, gamma=(1e-4,))
+                        # rasterizer width and height change.
+                        renderer.rasterizer.raster_settings.image_size = 0
+                        self.assertRaises(
+                            ValueError,
+                            lambda renderer=renderer, pointclouds=pointclouds: renderer.forward(
+                                point_clouds=pointclouds, gamma=(1e-4,)
+                            ),
+                        )
+
+    def test_pointcloud_with_features(self):
+        device = torch.device("cuda:0")
+        file_dir = get_pytorch3d_dir() / "docs/tutorials/data"
+        pointcloud_filename = file_dir / "PittsburghBridge/pointcloud.npz"
+
+        # Note, this file is too large to check in to the repo.
+        # Download the file to run the test locally.
+        if not path.exists(pointcloud_filename):
+            url = (
+                "https://dl.fbaipublicfiles.com/pytorch3d/data/"
+                "PittsburghBridge/pointcloud.npz"
+            )
+            msg = (
+                "pointcloud.npz not found, download from %s, save it at the path %s, and rerun"
+                % (url, pointcloud_filename)
+            )
+            warnings.warn(msg)
+            return True
+
+        # Load point cloud
+        pointcloud = np.load(pointcloud_filename)
+        verts = torch.Tensor(pointcloud["verts"]).to(device)
+        rgb_feats = torch.Tensor(pointcloud["rgb"]).to(device)
+
+        verts.requires_grad = True
+        rgb_feats.requires_grad = True
+        point_cloud = Pointclouds(points=[verts], features=[rgb_feats])
+
+        R, T = look_at_view_transform(20, 10, 0)
+        cameras = FoVOrthographicCameras(device=device, R=R, T=T, znear=0.01)
+
+        raster_settings = PointsRasterizationSettings(
+            # Set image_size so it is not a multiple of 16 (min bin_size)
+            # in order to confirm that there are no errors in coarse rasterization.
+            image_size=500,
+            radius=0.003,
+            points_per_pixel=10,
+        )
+
+        renderer = PointsRenderer(
+            rasterizer=PointsRasterizer(
+                cameras=cameras, raster_settings=raster_settings
+            ),
+            compositor=AlphaCompositor(),
+        )
+
+        images = renderer(point_cloud)
+
+        # Load reference image
+        filename = "bridge_pointcloud.png"
+        image_ref = load_rgb_image("test_%s" % filename, DATA_DIR)
+
+        for bin_size in [0, None]:
+            # Check both naive and coarse to fine produce the same output.
+            renderer.rasterizer.raster_settings.bin_size = bin_size
+            images = renderer(point_cloud)
+            rgb = images[0, ..., :3].squeeze().cpu()
+            if DEBUG:
+                filename = "DEBUG_%s" % filename
+                Image.fromarray((rgb.detach().numpy() * 255).astype(np.uint8)).save(
+                    DATA_DIR / filename
+                )
+            self.assertClose(rgb, image_ref, atol=0.015)
+
+        # Check grad exists.
+        grad_images = torch.randn_like(images)
+        images.backward(grad_images)
+        self.assertIsNotNone(verts.grad)
+        self.assertIsNotNone(rgb_feats.grad)
+
+    def test_simple_sphere_batched(self):
+        device = torch.device("cuda:0")
+        sphere_mesh = ico_sphere(1, device)
+        verts_padded = sphere_mesh.verts_padded()
+        verts_padded[..., 1] += 0.2
+        verts_padded[..., 0] += 0.2
+        pointclouds = Pointclouds(
+            points=verts_padded, features=torch.ones_like(verts_padded)
+        )
+        batch_size = 20
+        pointclouds = pointclouds.extend(batch_size)
+        R, T = look_at_view_transform(2.7, 0.0, 0.0)
+        cameras = FoVPerspectiveCameras(device=device, R=R, T=T)
+        raster_settings = PointsRasterizationSettings(
+            image_size=256, radius=5e-2, points_per_pixel=1
+        )
+        rasterizer = PointsRasterizer(cameras=cameras, raster_settings=raster_settings)
+        compositor = NormWeightedCompositor()
+        renderer = PointsRenderer(rasterizer=rasterizer, compositor=compositor)
+
+        # Load reference image
+        filename = "simple_pointcloud_sphere.png"
+        image_ref = load_rgb_image("test_%s" % filename, DATA_DIR)
+
+        images = renderer(pointclouds)
+        for i in range(batch_size):
+            rgb = images[i, ..., :3].squeeze().cpu()
+            if i == 0 and DEBUG:
+                filename = "DEBUG_%s" % filename
+                Image.fromarray((rgb.numpy() * 255).astype(np.uint8)).save(
+                    DATA_DIR / filename
+                )
+            self.assertClose(rgb, image_ref)
+
+    def test_compositor_background_color_rgba(self):
+
+        N, H, W, K, C, P = 1, 15, 15, 20, 4, 225
+        ptclds = torch.randn((C, P))
+        alphas = torch.rand((N, K, H, W))
+        pix_idxs = torch.randint(-1, 20, (N, K, H, W))  # 20 < P, large amount of -1
+        background_color = [0.5, 0, 1]
+
+        compositor_funcs = [
+            (NormWeightedCompositor, norm_weighted_sum),
+            (AlphaCompositor, alpha_composite),
+        ]
+
+        for (compositor_class, composite_func) in compositor_funcs:
+
+            compositor = compositor_class(background_color)
+
+            # run the forward method to generate masked images
+            masked_images = compositor.forward(pix_idxs, alphas, ptclds)
+
+            # generate unmasked images for testing purposes
+            images = composite_func(pix_idxs, alphas, ptclds)
+
+            is_foreground = pix_idxs[:, 0] >= 0
+
+            # make sure foreground values are unchanged
+            self.assertClose(
+                torch.masked_select(masked_images, is_foreground[:, None]),
+                torch.masked_select(images, is_foreground[:, None]),
+            )
+
+            is_background = ~is_foreground[..., None].expand(-1, -1, -1, C)
+
+            # permute masked_images to correctly get rgb values
+            masked_images = masked_images.permute(0, 2, 3, 1)
+            for i in range(3):
+                channel_color = background_color[i]
+
+                # check if background colors are properly changed
+                self.assertTrue(
+                    masked_images[is_background]
+                    .view(-1, C)[..., i]
+                    .eq(channel_color)
+                    .all()
+                )
+
+            # check background color alpha values
+            self.assertTrue(
+                masked_images[is_background].view(-1, C)[..., 3].eq(1).all()
+            )
+
+    def test_compositor_background_color_rgb(self):
+
+        N, H, W, K, C, P = 1, 15, 15, 20, 3, 225
+        ptclds = torch.randn((C, P))
+        alphas = torch.rand((N, K, H, W))
+        pix_idxs = torch.randint(-1, 20, (N, K, H, W))  # 20 < P, large amount of -1
+        background_color = [0.5, 0, 1]
+
+        compositor_funcs = [
+            (NormWeightedCompositor, norm_weighted_sum),
+            (AlphaCompositor, alpha_composite),
+        ]
+
+        for (compositor_class, composite_func) in compositor_funcs:
+
+            compositor = compositor_class(background_color)
+
+            # run the forward method to generate masked images
+            masked_images = compositor.forward(pix_idxs, alphas, ptclds)
+
+            # generate unmasked images for testing purposes
+            images = composite_func(pix_idxs, alphas, ptclds)
+
+            is_foreground = pix_idxs[:, 0] >= 0
+
+            # make sure foreground values are unchanged
+            self.assertClose(
+                torch.masked_select(masked_images, is_foreground[:, None]),
+                torch.masked_select(images, is_foreground[:, None]),
+            )
+
+            is_background = ~is_foreground[..., None].expand(-1, -1, -1, C)
+
+            # permute masked_images to correctly get rgb values
+            masked_images = masked_images.permute(0, 2, 3, 1)
+            for i in range(3):
+                channel_color = background_color[i]
+
+                # check if background colors are properly changed
+                self.assertTrue(
+                    masked_images[is_background]
+                    .view(-1, C)[..., i]
+                    .eq(channel_color)
+                    .all()
+                )
diff --git a/pytorch3d/tests/test_render_volumes.py b/pytorch3d/tests/test_render_volumes.py
new file mode 100644
index 0000000000000000000000000000000000000000..d1cf4760cccdce105e41533ca41f22cc8a57bcfc
--- /dev/null
+++ b/pytorch3d/tests/test_render_volumes.py
@@ -0,0 +1,719 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+from typing import Optional, Tuple
+
+import numpy as np
+import torch
+from pytorch3d.ops import knn_points
+from pytorch3d.renderer import (
+    AbsorptionOnlyRaymarcher,
+    AlphaCompositor,
+    EmissionAbsorptionRaymarcher,
+    MonteCarloRaysampler,
+    MultinomialRaysampler,
+    NDCMultinomialRaysampler,
+    PerspectiveCameras,
+    PointsRasterizationSettings,
+    PointsRasterizer,
+    PointsRenderer,
+    RayBundle,
+    VolumeRenderer,
+    VolumeSampler,
+)
+from pytorch3d.renderer.implicit.utils import _validate_ray_bundle_variables
+from pytorch3d.structures import Pointclouds, Volumes
+
+from .common_testing import TestCaseMixin
+from .test_points_to_volumes import init_uniform_y_rotations
+
+
+DEBUG = False
+if DEBUG:
+    import os
+    import tempfile
+
+    from PIL import Image
+
+
+ZERO_TRANSLATION = torch.zeros(1, 3)
+
+
+def init_boundary_volume(
+    batch_size: int,
+    volume_size: Tuple[int, int, int],
+    border_offset: int = 2,
+    shape: str = "cube",
+    volume_translation: torch.Tensor = ZERO_TRANSLATION,
+):
+    """
+    Generate a volume with sides colored with distinct colors.
+    """
+
+    device = torch.device("cuda")
+
+    # first center the volume for the purpose of generating the canonical shape
+    volume_translation_tmp = (0.0, 0.0, 0.0)
+
+    # set the voxel size to 1 / (volume_size-1)
+    volume_voxel_size = 1 / (volume_size[0] - 1.0)
+
+    # colors of the sides of the cube
+    clr_sides = torch.tensor(
+        [
+            [1.0, 1.0, 1.0],
+            [1.0, 0.0, 0.0],
+            [1.0, 0.0, 1.0],
+            [1.0, 1.0, 0.0],
+            [0.0, 1.0, 0.0],
+            [0.0, 1.0, 1.0],
+        ],
+        dtype=torch.float32,
+        device=device,
+    )
+
+    # get the coord grid of the volume
+    coord_grid = Volumes(
+        densities=torch.zeros(1, 1, *volume_size, device=device),
+        voxel_size=volume_voxel_size,
+        volume_translation=volume_translation_tmp,
+    ).get_coord_grid()[0]
+
+    # extract the boundary points and their colors of the cube
+    if shape == "cube":
+        boundary_points, boundary_colors = [], []
+        for side, clr_side in enumerate(clr_sides):
+            first = side % 2
+            dim = side // 2
+            slices = [slice(border_offset, -border_offset, 1)] * 3
+            slices[dim] = int(border_offset * (2 * first - 1))
+            slices.append(slice(0, 3, 1))
+            boundary_points_ = coord_grid[slices].reshape(-1, 3)
+            boundary_points.append(boundary_points_)
+            boundary_colors.append(clr_side[None].expand_as(boundary_points_))
+        # set the internal part of the volume to be completely opaque
+        volume_densities = torch.zeros(*volume_size, device=device)
+        volume_densities[[slice(border_offset, -border_offset, 1)] * 3] = 1.0
+        boundary_points, boundary_colors = [
+            torch.cat(p, dim=0) for p in [boundary_points, boundary_colors]
+        ]
+        # color the volume voxels with the nearest boundary points' color
+        _, idx, _ = knn_points(
+            coord_grid.view(1, -1, 3), boundary_points.view(1, -1, 3)
+        )
+        volume_colors = (
+            boundary_colors[idx.view(-1)].view(*volume_size, 3).permute(3, 0, 1, 2)
+        )
+
+    elif shape == "sphere":
+        # set all voxels within a certain distance from the origin to be opaque
+        volume_densities = (
+            coord_grid.norm(dim=-1)
+            <= 0.5 * volume_voxel_size * (volume_size[0] - border_offset)
+        ).float()
+        # color each voxel with the standrd spherical color
+        volume_colors = (
+            (torch.nn.functional.normalize(coord_grid, dim=-1) + 1.0) * 0.5
+        ).permute(3, 0, 1, 2)
+
+    else:
+        raise ValueError(shape)
+
+    volume_voxel_size = torch.ones((batch_size, 1), device=device) * volume_voxel_size
+    volume_translation = volume_translation.expand(batch_size, 3)
+    volumes = Volumes(
+        densities=volume_densities[None, None].expand(batch_size, 1, *volume_size),
+        features=volume_colors[None].expand(batch_size, 3, *volume_size),
+        voxel_size=volume_voxel_size,
+        volume_translation=volume_translation,
+    )
+
+    return volumes, volume_voxel_size, volume_translation
+
+
+def init_cameras(
+    batch_size: int = 10,
+    image_size: Optional[Tuple[int, int]] = (50, 50),
+    ndc: bool = False,
+):
+    """
+    Initialize a batch of cameras whose extrinsics rotate the cameras around
+    the world's y axis.
+    Depending on whether we want an NDC-space (`ndc==True`) or a screen-space camera,
+    the camera's focal length and principal point are initialized accordingly:
+        For `ndc==False`, p0=focal_length=image_size/2.
+        For `ndc==True`, focal_length=1.0, p0 = 0.0.
+    The the z-coordinate of the translation vector of each camera is fixed to 1.5.
+    """
+    device = torch.device("cuda:0")
+
+    # trivial rotations
+    R = init_uniform_y_rotations(batch_size=batch_size, device=device)
+
+    # move camera 1.5 m away from the scene center
+    T = torch.zeros((batch_size, 3), device=device)
+    T[:, 2] = 1.5
+
+    if ndc:
+        p0 = torch.zeros(batch_size, 2, device=device)
+        focal = torch.ones(batch_size, device=device)
+    else:
+        p0 = torch.ones(batch_size, 2, device=device)
+        p0[:, 0] *= image_size[1] * 0.5
+        p0[:, 1] *= image_size[0] * 0.5
+        focal = max(*image_size) * torch.ones(batch_size, device=device)
+
+    # convert to a Camera object
+    cameras = PerspectiveCameras(focal, p0, R=R, T=T, device=device)
+    return cameras
+
+
+class TestRenderVolumes(TestCaseMixin, unittest.TestCase):
+    def setUp(self) -> None:
+        super().setUp()
+        torch.manual_seed(42)
+        np.random.seed(42)
+
+    @staticmethod
+    def renderer(
+        volume_size=(25, 25, 25),
+        batch_size=10,
+        shape="sphere",
+        raymarcher_type=EmissionAbsorptionRaymarcher,
+        n_rays_per_image=10,
+        n_pts_per_ray=10,
+    ):
+        # get the volumes
+        volumes = init_boundary_volume(
+            volume_size=volume_size, batch_size=batch_size, shape=shape
+        )[0]
+
+        # init the mc raysampler
+        raysampler = MonteCarloRaysampler(
+            min_x=-1.0,
+            max_x=1.0,
+            min_y=-1.0,
+            max_y=1.0,
+            n_rays_per_image=n_rays_per_image,
+            n_pts_per_ray=n_pts_per_ray,
+            min_depth=0.1,
+            max_depth=2.0,
+        ).to(volumes.device)
+
+        # get the raymarcher
+        raymarcher = raymarcher_type()
+
+        renderer = VolumeRenderer(
+            raysampler=raysampler, raymarcher=raymarcher, sample_mode="bilinear"
+        )
+
+        # generate NDC camera extrinsics and intrinsics
+        cameras = init_cameras(batch_size, image_size=None, ndc=True)
+
+        def run_renderer():
+            renderer(cameras=cameras, volumes=volumes)
+
+        return run_renderer
+
+    def test_input_types(self, batch_size: int = 10):
+        """
+        Check that ValueErrors are thrown where expected.
+        """
+        # check the constructor
+        for bad_raysampler in (None, 5, []):
+            for bad_raymarcher in (None, 5, []):
+                with self.assertRaises(ValueError):
+                    VolumeRenderer(raysampler=bad_raysampler, raymarcher=bad_raymarcher)
+
+        raysampler = NDCMultinomialRaysampler(
+            image_width=100,
+            image_height=100,
+            n_pts_per_ray=10,
+            min_depth=0.1,
+            max_depth=1.0,
+        )
+
+        # init a trivial renderer
+        renderer = VolumeRenderer(
+            raysampler=raysampler, raymarcher=EmissionAbsorptionRaymarcher()
+        )
+
+        # get cameras
+        cameras = init_cameras(batch_size=batch_size)
+
+        # get volumes
+        volumes = init_boundary_volume(volume_size=(10, 10, 10), batch_size=batch_size)[
+            0
+        ]
+
+        # different batch sizes for cameras / volumes
+        with self.assertRaises(ValueError):
+            renderer(cameras=cameras, volumes=volumes[:-1])
+
+        # ray checks for VolumeSampler
+        volume_sampler = VolumeSampler(volumes=volumes)
+        n_rays = 100
+        for bad_ray_bundle in (
+            (
+                torch.rand(batch_size, n_rays, 3),
+                torch.rand(batch_size, n_rays + 1, 3),
+                torch.rand(batch_size, n_rays, 10),
+            ),
+            (
+                torch.rand(batch_size + 1, n_rays, 3),
+                torch.rand(batch_size, n_rays, 3),
+                torch.rand(batch_size, n_rays, 10),
+            ),
+            (
+                torch.rand(batch_size, n_rays, 3),
+                torch.rand(batch_size, n_rays, 2),
+                torch.rand(batch_size, n_rays, 10),
+            ),
+            (
+                torch.rand(batch_size, n_rays, 3),
+                torch.rand(batch_size, n_rays, 3),
+                torch.rand(batch_size, n_rays),
+            ),
+        ):
+            ray_bundle = RayBundle(
+                **dict(
+                    zip(
+                        ("origins", "directions", "lengths"),
+                        [r.to(cameras.device) for r in bad_ray_bundle],
+                    )
+                ),
+                xys=None,
+            )
+            with self.assertRaises(ValueError):
+                volume_sampler(ray_bundle)
+
+            # check also explicitly the ray bundle validation function
+            with self.assertRaises(ValueError):
+                _validate_ray_bundle_variables(*bad_ray_bundle)
+
+    def test_compare_with_pointclouds_renderer(
+        self, batch_size=11, volume_size=(30, 30, 30), image_size=(200, 250)
+    ):
+        """
+        Generate a volume and its corresponding point cloud and check whether
+        PointsRenderer returns the same images as the corresponding VolumeRenderer.
+        """
+
+        # generate NDC camera extrinsics and intrinsics
+        cameras = init_cameras(batch_size, image_size=image_size, ndc=True)
+
+        # init the boundary volume
+        for shape in ("sphere", "cube"):
+
+            if not DEBUG and shape == "cube":
+                # do not run numeric checks for the cube as the
+                # differences in rendering equations make the renders incomparable
+                continue
+
+            # get rand offset of the volume
+            volume_translation = torch.randn(batch_size, 3) * 0.1
+            # volume_translation[2] = 0.1
+            volumes = init_boundary_volume(
+                volume_size=volume_size,
+                batch_size=batch_size,
+                shape=shape,
+                volume_translation=volume_translation,
+            )[0]
+
+            # convert the volumes to a pointcloud
+            points = []
+            points_features = []
+            for densities_one, features_one, grid_one in zip(
+                volumes.densities(),
+                volumes.features(),
+                volumes.get_coord_grid(world_coordinates=True),
+            ):
+                opaque = densities_one.view(-1) > 1e-4
+                points.append(grid_one.view(-1, 3)[opaque])
+                points_features.append(features_one.reshape(3, -1).t()[opaque])
+            pointclouds = Pointclouds(points, features=points_features)
+
+            # init the grid raysampler with the ndc grid
+            coord_range = 1.0
+            half_pix_size = coord_range / max(*image_size)
+            raysampler = NDCMultinomialRaysampler(
+                image_width=image_size[1],
+                image_height=image_size[0],
+                n_pts_per_ray=256,
+                min_depth=0.1,
+                max_depth=2.0,
+            )
+
+            # get the EA raymarcher
+            raymarcher = EmissionAbsorptionRaymarcher()
+
+            # jitter the camera intrinsics a bit for each render
+            cameras_randomized = cameras.clone()
+            cameras_randomized.principal_point = (
+                torch.randn_like(cameras.principal_point) * 0.3
+            )
+            cameras_randomized.focal_length = (
+                cameras.focal_length + torch.randn_like(cameras.focal_length) * 0.2
+            )
+
+            # get the volumetric render
+            images = VolumeRenderer(
+                raysampler=raysampler, raymarcher=raymarcher, sample_mode="bilinear"
+            )(cameras=cameras_randomized, volumes=volumes)[0][..., :3]
+
+            # instantiate the points renderer
+            point_radius = 6 * half_pix_size
+            points_renderer = PointsRenderer(
+                rasterizer=PointsRasterizer(
+                    cameras=cameras_randomized,
+                    raster_settings=PointsRasterizationSettings(
+                        image_size=image_size, radius=point_radius, points_per_pixel=10
+                    ),
+                ),
+                compositor=AlphaCompositor(),
+            )
+
+            # get the point render
+            images_pts = points_renderer(pointclouds)
+
+            if shape == "sphere":
+                diff = (images - images_pts).abs().mean(dim=-1)
+                mu_diff = diff.mean(dim=(1, 2))
+                std_diff = diff.std(dim=(1, 2))
+                self.assertClose(mu_diff, torch.zeros_like(mu_diff), atol=3e-2)
+                self.assertClose(std_diff, torch.zeros_like(std_diff), atol=6e-2)
+
+            if DEBUG:
+                outdir = tempfile.gettempdir() + "/test_volume_vs_pts_renderer"
+                os.makedirs(outdir, exist_ok=True)
+
+                frames = []
+                for (image, image_pts) in zip(images, images_pts):
+                    diff_image = (
+                        ((image - image_pts) * 0.5 + 0.5)
+                        .mean(dim=2, keepdim=True)
+                        .repeat(1, 1, 3)
+                    )
+                    image_pil = Image.fromarray(
+                        (
+                            torch.cat((image, image_pts, diff_image), dim=1)
+                            .detach()
+                            .cpu()
+                            .numpy()
+                            * 255.0
+                        ).astype(np.uint8)
+                    )
+                    frames.append(image_pil)
+
+                # export gif
+                outfile = os.path.join(outdir, f"volume_vs_pts_render_{shape}.gif")
+                frames[0].save(
+                    outfile,
+                    save_all=True,
+                    append_images=frames[1:],
+                    duration=batch_size // 15,
+                    loop=0,
+                )
+                print(f"exported {outfile}")
+
+                # export concatenated frames
+                outfile_cat = os.path.join(outdir, f"volume_vs_pts_render_{shape}.png")
+                Image.fromarray(
+                    np.concatenate([np.array(f) for f in frames], axis=0)
+                ).save(outfile_cat)
+                print(f"exported {outfile_cat}")
+
+    def test_monte_carlo_rendering(
+        self, n_frames=20, volume_size=(30, 30, 30), image_size=(40, 50)
+    ):
+        """
+        Tests that rendering with the MonteCarloRaysampler matches the
+        rendering with MultinomialRaysampler sampled at the corresponding
+        MonteCarlo locations.
+        """
+        volumes = init_boundary_volume(
+            volume_size=volume_size, batch_size=n_frames, shape="sphere"
+        )[0]
+
+        # generate camera extrinsics and intrinsics
+        cameras = init_cameras(n_frames, image_size=image_size)
+
+        # init the grid raysampler
+        raysampler_multinomial = MultinomialRaysampler(
+            min_x=0.5,
+            max_x=image_size[1] - 0.5,
+            min_y=0.5,
+            max_y=image_size[0] - 0.5,
+            image_width=image_size[1],
+            image_height=image_size[0],
+            n_pts_per_ray=256,
+            min_depth=0.5,
+            max_depth=2.0,
+        )
+
+        # init the mc raysampler
+        raysampler_mc = MonteCarloRaysampler(
+            min_x=0.5,
+            max_x=image_size[1] - 0.5,
+            min_y=0.5,
+            max_y=image_size[0] - 0.5,
+            n_rays_per_image=3000,
+            n_pts_per_ray=256,
+            min_depth=0.5,
+            max_depth=2.0,
+        )
+
+        # get the EA raymarcher
+        raymarcher = EmissionAbsorptionRaymarcher()
+
+        # get both mc and grid renders
+        (
+            (images_opacities_mc, ray_bundle_mc),
+            (images_opacities_grid, ray_bundle_grid),
+        ) = [
+            VolumeRenderer(
+                raysampler=raysampler_multinomial,
+                raymarcher=raymarcher,
+                sample_mode="bilinear",
+            )(cameras=cameras, volumes=volumes)
+            for raysampler in (raysampler_mc, raysampler_multinomial)
+        ]
+
+        # convert the mc sampling locations to [-1, 1]
+        sample_loc = ray_bundle_mc.xys.clone()
+        sample_loc[..., 0] = 2 * (sample_loc[..., 0] / image_size[1]) - 1
+        sample_loc[..., 1] = 2 * (sample_loc[..., 1] / image_size[0]) - 1
+
+        # sample the grid render at the mc locations
+        images_opacities_mc_ = torch.nn.functional.grid_sample(
+            images_opacities_grid.permute(0, 3, 1, 2), sample_loc, align_corners=False
+        )
+
+        # check that the samples are the same
+        self.assertClose(
+            images_opacities_mc.permute(0, 3, 1, 2), images_opacities_mc_, atol=1e-4
+        )
+
+    def test_rotating_gif(self):
+        self._rotating_gif(image_size=(200, 100))
+        self._rotating_gif(image_size=(100, 200))
+
+    def _rotating_gif(
+        self, image_size, n_frames=50, fps=15, volume_size=(100, 100, 100)
+    ):
+        """
+        Render a gif animation of a rotating cube/sphere (runs only if `DEBUG==True`).
+        """
+
+        if not DEBUG:
+            # do not run this if debug is False
+            return
+
+        for shape in ("sphere", "cube"):
+            for sample_mode in ("bilinear", "nearest"):
+
+                volumes = init_boundary_volume(
+                    volume_size=volume_size, batch_size=n_frames, shape=shape
+                )[0]
+
+                # generate camera extrinsics and intrinsics
+                cameras = init_cameras(n_frames, image_size=image_size)
+
+                # init the grid raysampler
+                raysampler = MultinomialRaysampler(
+                    min_x=0.5,
+                    max_x=image_size[1] - 0.5,
+                    min_y=0.5,
+                    max_y=image_size[0] - 0.5,
+                    image_width=image_size[1],
+                    image_height=image_size[0],
+                    n_pts_per_ray=256,
+                    min_depth=0.5,
+                    max_depth=2.0,
+                )
+
+                # get the EA raymarcher
+                raymarcher = EmissionAbsorptionRaymarcher()
+
+                # initialize the renderer
+                renderer = VolumeRenderer(
+                    raysampler=raysampler,
+                    raymarcher=raymarcher,
+                    sample_mode=sample_mode,
+                )
+
+                # run the renderer
+                images_opacities = renderer(cameras=cameras, volumes=volumes)[0]
+
+                # split output to the alpha channel and rendered images
+                images, opacities = images_opacities[..., :3], images_opacities[..., 3]
+
+                # export the gif
+                outdir = tempfile.gettempdir() + "/test_volume_renderer_gifs"
+                os.makedirs(outdir, exist_ok=True)
+                frames = []
+                for image, opacity in zip(images, opacities):
+                    image_pil = Image.fromarray(
+                        (
+                            torch.cat(
+                                (image, opacity[..., None].repeat(1, 1, 3)), dim=1
+                            )
+                            .detach()
+                            .cpu()
+                            .numpy()
+                            * 255.0
+                        ).astype(np.uint8)
+                    )
+                    frames.append(image_pil)
+                outfile = os.path.join(outdir, f"{shape}_{sample_mode}.gif")
+                frames[0].save(
+                    outfile,
+                    save_all=True,
+                    append_images=frames[1:],
+                    duration=n_frames // fps,
+                    loop=0,
+                )
+                print(f"exported {outfile}")
+
+    def test_rotating_cube_volume_render(self):
+        """
+        Generates 4 renders of 4 sides of a volume representing a 3D cube.
+        Since each side of the cube is homogeneously colored with
+        a different color, this should result in 4 images of homogeneous color
+        with the depth of each pixel equal to a constant.
+        """
+
+        # batch_size = 4 sides of the cube
+        batch_size = 4
+        image_size = (50, 40)
+
+        for volume_size in ([25, 25, 25],):
+            for sample_mode in ("bilinear", "nearest"):
+
+                volume_translation = torch.zeros(4, 3)
+                volume_translation.requires_grad = True
+                volumes, volume_voxel_size, _ = init_boundary_volume(
+                    volume_size=volume_size,
+                    batch_size=batch_size,
+                    shape="cube",
+                    volume_translation=volume_translation,
+                )
+
+                # generate camera extrinsics and intrinsics
+                cameras = init_cameras(batch_size, image_size=image_size)
+
+                # enable the gradient caching for the camera variables
+                # the list of differentiable camera vars
+                cam_vars = ("R", "T", "focal_length", "principal_point")
+                for cam_var in cam_vars:
+                    getattr(cameras, cam_var).requires_grad = True
+                # enable the grad for volume vars as well
+                volumes.features().requires_grad = True
+                volumes.densities().requires_grad = True
+
+                raysampler = MultinomialRaysampler(
+                    min_x=0.5,
+                    max_x=image_size[1] - 0.5,
+                    min_y=0.5,
+                    max_y=image_size[0] - 0.5,
+                    image_width=image_size[1],
+                    image_height=image_size[0],
+                    n_pts_per_ray=128,
+                    min_depth=0.01,
+                    max_depth=3.0,
+                )
+
+                raymarcher = EmissionAbsorptionRaymarcher()
+                renderer = VolumeRenderer(
+                    raysampler=raysampler,
+                    raymarcher=raymarcher,
+                    sample_mode=sample_mode,
+                )
+                images_opacities = renderer(cameras=cameras, volumes=volumes)[0]
+                images, opacities = images_opacities[..., :3], images_opacities[..., 3]
+
+                # check that the renderer does not erase gradients
+                loss = images_opacities.sum()
+                loss.backward()
+                for check_var in (
+                    *[getattr(cameras, cam_var) for cam_var in cam_vars],
+                    volumes.features(),
+                    volumes.densities(),
+                    volume_translation,
+                ):
+                    self.assertIsNotNone(check_var.grad)
+
+                # ao opacities should be exactly the same as the ea ones
+                # we can further get the ea opacities from a feature-less
+                # version of our volumes
+                raymarcher_ao = AbsorptionOnlyRaymarcher()
+                renderer_ao = VolumeRenderer(
+                    raysampler=raysampler,
+                    raymarcher=raymarcher_ao,
+                    sample_mode=sample_mode,
+                )
+                volumes_featureless = Volumes(
+                    densities=volumes.densities(),
+                    volume_translation=volume_translation,
+                    voxel_size=volume_voxel_size,
+                )
+                opacities_ao = renderer_ao(
+                    cameras=cameras, volumes=volumes_featureless
+                )[0][..., 0]
+                self.assertClose(opacities, opacities_ao)
+
+                # colors of the sides of the cube
+                gt_clr_sides = torch.tensor(
+                    [
+                        [1.0, 0.0, 0.0],
+                        [0.0, 1.0, 1.0],
+                        [1.0, 1.0, 1.0],
+                        [0.0, 1.0, 0.0],
+                    ],
+                    dtype=torch.float32,
+                    device=images.device,
+                )
+
+                if DEBUG:
+                    outdir = tempfile.gettempdir() + "/test_volume_renderer"
+                    os.makedirs(outdir, exist_ok=True)
+                    for imidx, (image, opacity) in enumerate(zip(images, opacities)):
+                        for image_ in (image, opacity):
+                            image_pil = Image.fromarray(
+                                (image_.detach().cpu().numpy() * 255.0).astype(np.uint8)
+                            )
+                            outfile = (
+                                outdir
+                                + f"/rgb_{sample_mode}"
+                                + f"_{str(volume_size).replace(' ','')}"
+                                + f"_{imidx:003d}"
+                            )
+                            if image_ is image:
+                                outfile += "_rgb.png"
+                            else:
+                                outfile += "_opacity.png"
+                            image_pil.save(outfile)
+                            print(f"exported {outfile}")
+
+                border = 10
+                for image, opacity, gt_color in zip(images, opacities, gt_clr_sides):
+                    image_crop = image[border:-border, border:-border]
+                    opacity_crop = opacity[border:-border, border:-border]
+
+                    # check mean and std difference from gt
+                    err = (
+                        (image_crop - gt_color[None, None].expand_as(image_crop))
+                        .abs()
+                        .mean(dim=-1)
+                    )
+                    zero = err.new_zeros(1)[0]
+                    self.assertClose(err.mean(), zero, atol=1e-2)
+                    self.assertClose(err.std(), zero, atol=1e-2)
+
+                    err_opacity = (opacity_crop - 1.0).abs()
+                    self.assertClose(err_opacity.mean(), zero, atol=1e-2)
+                    self.assertClose(err_opacity.std(), zero, atol=1e-2)
diff --git a/pytorch3d/tests/test_rendering_utils.py b/pytorch3d/tests/test_rendering_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..fca51d9e03b0f6772c8a99c30845274e8a83924c
--- /dev/null
+++ b/pytorch3d/tests/test_rendering_utils.py
@@ -0,0 +1,275 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import unittest
+
+import numpy as np
+import torch
+from pytorch3d.common.compat import meshgrid_ij
+from pytorch3d.ops import eyes
+from pytorch3d.renderer import (
+    AlphaCompositor,
+    PerspectiveCameras,
+    PointsRasterizationSettings,
+    PointsRasterizer,
+    PointsRenderer,
+)
+from pytorch3d.renderer.utils import (
+    ndc_grid_sample,
+    ndc_to_grid_sample_coords,
+    TensorProperties,
+)
+from pytorch3d.structures import Pointclouds
+
+from .common_testing import TestCaseMixin
+
+
+# Example class for testing
+class TensorPropertiesTestClass(TensorProperties):
+    def __init__(self, x=None, y=None, device="cpu"):
+        super().__init__(device=device, x=x, y=y)
+
+    def clone(self):
+        other = TensorPropertiesTestClass()
+        return super().clone(other)
+
+
+class TestTensorProperties(TestCaseMixin, unittest.TestCase):
+    def test_init(self):
+        example = TensorPropertiesTestClass(x=10.0, y=(100.0, 200.0))
+        # Check kwargs set as attributes + converted to tensors
+        self.assertTrue(torch.is_tensor(example.x))
+        self.assertTrue(torch.is_tensor(example.y))
+        # Check broadcasting
+        self.assertTrue(example.x.shape == (2,))
+        self.assertTrue(example.y.shape == (2,))
+        self.assertTrue(len(example) == 2)
+
+    def test_to(self):
+        # Check to method
+        example = TensorPropertiesTestClass(x=10.0, y=(100.0, 200.0))
+        device = torch.device("cuda:0")
+        new_example = example.to(device=device)
+        self.assertEqual(new_example.device, device)
+
+        example_cpu = example.cpu()
+        self.assertEqual(example_cpu.device, torch.device("cpu"))
+
+        example_gpu = example.cuda()
+        self.assertEqual(example_gpu.device.type, "cuda")
+        self.assertIsNotNone(example_gpu.device.index)
+
+        example_gpu1 = example.cuda(1)
+        self.assertEqual(example_gpu1.device, torch.device("cuda:1"))
+
+    def test_clone(self):
+        # Check clone method
+        example = TensorPropertiesTestClass(x=10.0, y=(100.0, 200.0))
+        new_example = example.clone()
+        self.assertSeparate(example.x, new_example.x)
+        self.assertSeparate(example.y, new_example.y)
+
+    def test_get_set(self):
+        # Test getitem returns an accessor which can be used to modify
+        # attributes at a particular index
+        example = TensorPropertiesTestClass(x=10.0, y=(100.0, 200.0, 300.0))
+
+        # update y1
+        example[1].y = 5.0
+        self.assertTrue(example.y[1] == 5.0)
+
+        # Get item and get value
+        ex0 = example[0]
+        self.assertTrue(ex0.y == 100.0)
+
+    def test_empty_input(self):
+        example = TensorPropertiesTestClass(x=(), y=())
+        self.assertTrue(len(example) == 0)
+        self.assertTrue(example.isempty())
+
+    def test_gather_props(self):
+        N = 4
+        x = torch.randn((N, 3, 4))
+        y = torch.randn((N, 5))
+        test_class = TensorPropertiesTestClass(x=x, y=y)
+
+        S = 15
+        idx = torch.tensor(np.random.choice(N, S))
+        test_class_gathered = test_class.gather_props(idx)
+
+        self.assertTrue(test_class_gathered.x.shape == (S, 3, 4))
+        self.assertTrue(test_class_gathered.y.shape == (S, 5))
+
+        for i in range(N):
+            inds = idx == i
+            if inds.sum() > 0:
+                # Check the gathered points in the output have the same value from
+                # the input.
+                self.assertClose(test_class_gathered.x[inds].mean(dim=0), x[i, ...])
+                self.assertClose(test_class_gathered.y[inds].mean(dim=0), y[i, ...])
+
+    def test_ndc_grid_sample_rendering(self):
+        """
+        Use PyTorch3D point renderer to render a colored point cloud, then
+        sample the image at the locations of the point projections with
+        `ndc_grid_sample`. Finally, assert that the sampled colors are equal to the
+        original point cloud colors.
+
+        Note that, in order to ensure correctness, we use a nearest-neighbor
+        assignment point renderer (i.e. no soft splatting).
+        """
+
+        # generate a bunch of 3D points on a regular grid lying in the z-plane
+        n_grid_pts = 10
+        grid_scale = 0.9
+        z_plane = 2.0
+        image_size = [128, 128]
+        point_radius = 0.015
+        n_pts = n_grid_pts * n_grid_pts
+        pts = torch.stack(
+            meshgrid_ij(
+                [torch.linspace(-grid_scale, grid_scale, n_grid_pts)] * 2,
+            ),
+            dim=-1,
+        )
+        pts = torch.cat([pts, z_plane * torch.ones_like(pts[..., :1])], dim=-1)
+        pts = pts.reshape(1, n_pts, 3)
+
+        # color the points randomly
+        pts_colors = torch.rand(1, n_pts, 3)
+
+        # make trivial rendering cameras
+        cameras = PerspectiveCameras(
+            R=eyes(dim=3, N=1),
+            device=pts.device,
+            T=torch.zeros(1, 3, dtype=torch.float32, device=pts.device),
+        )
+
+        # render the point cloud
+        pcl = Pointclouds(points=pts, features=pts_colors)
+        renderer = NearestNeighborPointsRenderer(
+            rasterizer=PointsRasterizer(
+                cameras=cameras,
+                raster_settings=PointsRasterizationSettings(
+                    image_size=image_size,
+                    radius=point_radius,
+                    points_per_pixel=1,
+                ),
+            ),
+            compositor=AlphaCompositor(),
+        )
+        im_render = renderer(pcl)
+
+        # sample the render at projected pts
+        pts_proj = cameras.transform_points(pcl.points_padded())[..., :2]
+        pts_colors_sampled = ndc_grid_sample(
+            im_render,
+            pts_proj,
+            mode="nearest",
+            align_corners=False,
+        ).permute(0, 2, 1)
+
+        # assert that the samples are the same as original points
+        self.assertClose(pts_colors, pts_colors_sampled, atol=1e-4)
+
+    def test_ndc_to_grid_sample_coords(self):
+        """
+        Test the conversion from ndc to grid_sample coords by comparing
+        to known conversion results.
+        """
+
+        # square image tests
+        image_size_square = [100, 100]
+        xy_ndc_gs_square = torch.FloatTensor(
+            [
+                # 4 corners
+                [[-1.0, -1.0], [1.0, 1.0]],
+                [[1.0, 1.0], [-1.0, -1.0]],
+                [[1.0, -1.0], [-1.0, 1.0]],
+                [[1.0, 1.0], [-1.0, -1.0]],
+                # center
+                [[0.0, 0.0], [0.0, 0.0]],
+            ]
+        )
+
+        # non-batched version
+        for xy_ndc, xy_gs in xy_ndc_gs_square:
+            xy_gs_predicted = ndc_to_grid_sample_coords(
+                xy_ndc,
+                image_size_square,
+            )
+            self.assertClose(xy_gs_predicted, xy_gs)
+
+        # batched version
+        xy_ndc, xy_gs = xy_ndc_gs_square[:, 0], xy_ndc_gs_square[:, 1]
+        xy_gs_predicted = ndc_to_grid_sample_coords(
+            xy_ndc,
+            image_size_square,
+        )
+        self.assertClose(xy_gs_predicted, xy_gs)
+
+        # non-square image tests
+        image_size = [100, 200]
+        xy_ndc_gs = torch.FloatTensor(
+            [
+                # 4 corners
+                [[-2.0, -1.0], [1.0, 1.0]],
+                [[2.0, -1.0], [-1.0, 1.0]],
+                [[-2.0, 1.0], [1.0, -1.0]],
+                [[2.0, 1.0], [-1.0, -1.0]],
+                # center
+                [[0.0, 0.0], [0.0, 0.0]],
+                # non-corner points
+                [[4.0, 0.5], [-2.0, -0.5]],
+                [[1.0, -0.5], [-0.5, 0.5]],
+            ]
+        )
+
+        # check both H > W and W > H
+        for flip_axes in [False, True]:
+
+            # non-batched version
+            for xy_ndc, xy_gs in xy_ndc_gs:
+                xy_gs_predicted = ndc_to_grid_sample_coords(
+                    xy_ndc.flip(dims=(-1,)) if flip_axes else xy_ndc,
+                    list(reversed(image_size)) if flip_axes else image_size,
+                )
+                self.assertClose(
+                    xy_gs_predicted,
+                    xy_gs.flip(dims=(-1,)) if flip_axes else xy_gs,
+                )
+
+            # batched version
+            xy_ndc, xy_gs = xy_ndc_gs[:, 0], xy_ndc_gs[:, 1]
+            xy_gs_predicted = ndc_to_grid_sample_coords(
+                xy_ndc.flip(dims=(-1,)) if flip_axes else xy_ndc,
+                list(reversed(image_size)) if flip_axes else image_size,
+            )
+            self.assertClose(
+                xy_gs_predicted,
+                xy_gs.flip(dims=(-1,)) if flip_axes else xy_gs,
+            )
+
+
+class NearestNeighborPointsRenderer(PointsRenderer):
+    """
+    A class for rendering a batch of points by a trivial nearest
+    neighbor assignment.
+    """
+
+    def forward(self, point_clouds, **kwargs) -> torch.Tensor:
+        fragments = self.rasterizer(point_clouds, **kwargs)
+        # set all weights trivially to one
+        dists2 = fragments.dists.permute(0, 3, 1, 2)
+        weights = torch.ones_like(dists2)
+        images = self.compositor(
+            fragments.idx.long().permute(0, 3, 1, 2),
+            weights,
+            point_clouds.features_packed().permute(1, 0),
+            **kwargs,
+        )
+        return images
diff --git a/pytorch3d/tests/test_rotation_conversions.py b/pytorch3d/tests/test_rotation_conversions.py
new file mode 100644
index 0000000000000000000000000000000000000000..7090d3ca3c43db7f1fe1d1fea7c4f23d9fda43bc
--- /dev/null
+++ b/pytorch3d/tests/test_rotation_conversions.py
@@ -0,0 +1,303 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import itertools
+import math
+import unittest
+from distutils.version import LooseVersion
+from typing import Optional, Union
+
+import numpy as np
+import torch
+from pytorch3d.transforms.rotation_conversions import (
+    axis_angle_to_matrix,
+    axis_angle_to_quaternion,
+    euler_angles_to_matrix,
+    matrix_to_axis_angle,
+    matrix_to_euler_angles,
+    matrix_to_quaternion,
+    matrix_to_rotation_6d,
+    quaternion_apply,
+    quaternion_multiply,
+    quaternion_to_axis_angle,
+    quaternion_to_matrix,
+    random_quaternions,
+    random_rotation,
+    random_rotations,
+    rotation_6d_to_matrix,
+)
+
+from .common_testing import TestCaseMixin
+
+
+class TestRandomRotation(unittest.TestCase):
+    def setUp(self) -> None:
+        super().setUp()
+        torch.manual_seed(1)
+
+    def test_random_rotation_invariant(self):
+        """The image of the x-axis isn't biased among quadrants."""
+        N = 1000
+        base = random_rotation()
+        quadrants = list(itertools.product([False, True], repeat=3))
+
+        matrices = random_rotations(N)
+        transformed = torch.matmul(base, matrices)
+        transformed2 = torch.matmul(matrices, base)
+
+        for k, results in enumerate([matrices, transformed, transformed2]):
+            counts = {i: 0 for i in quadrants}
+            for j in range(N):
+                counts[tuple(i.item() > 0 for i in results[j, 0])] += 1
+            average = N / 8.0
+            counts_tensor = torch.tensor(list(counts.values()))
+            chisquare_statistic = torch.sum(
+                (counts_tensor - average) * (counts_tensor - average) / average
+            )
+            # The 0.1 significance level for chisquare(8-1) is
+            # scipy.stats.chi2(7).ppf(0.9) == 12.017.
+            self.assertLess(chisquare_statistic, 12, (counts, chisquare_statistic, k))
+
+
+class TestRotationConversion(TestCaseMixin, unittest.TestCase):
+    def setUp(self) -> None:
+        super().setUp()
+        torch.manual_seed(1)
+
+    def test_from_quat(self):
+        """quat -> mtx -> quat"""
+        data = random_quaternions(13, dtype=torch.float64)
+        mdata = matrix_to_quaternion(quaternion_to_matrix(data))
+        self._assert_quaternions_close(data, mdata)
+
+    def test_to_quat(self):
+        """mtx -> quat -> mtx"""
+        data = random_rotations(13, dtype=torch.float64)
+        mdata = quaternion_to_matrix(matrix_to_quaternion(data))
+        self.assertClose(data, mdata)
+
+    def test_quat_grad_exists(self):
+        """Quaternion calculations are differentiable."""
+        rotation = random_rotation()
+        rotation.requires_grad = True
+        modified = quaternion_to_matrix(matrix_to_quaternion(rotation))
+        [g] = torch.autograd.grad(modified.sum(), rotation)
+        self.assertTrue(torch.isfinite(g).all())
+
+    def _tait_bryan_conventions(self):
+        return map("".join, itertools.permutations("XYZ"))
+
+    def _proper_euler_conventions(self):
+        letterpairs = itertools.permutations("XYZ", 2)
+        return (l0 + l1 + l0 for l0, l1 in letterpairs)
+
+    def _all_euler_angle_conventions(self):
+        return itertools.chain(
+            self._tait_bryan_conventions(), self._proper_euler_conventions()
+        )
+
+    def test_conventions(self):
+        """The conventions listings have the right length."""
+        all = list(self._all_euler_angle_conventions())
+        self.assertEqual(len(all), 12)
+        self.assertEqual(len(set(all)), 12)
+
+    def test_from_euler(self):
+        """euler -> mtx -> euler"""
+        n_repetitions = 10
+        # tolerance is how much we keep the middle angle away from the extreme
+        # allowed values which make the calculation unstable (Gimbal lock).
+        tolerance = 0.04
+        half_pi = math.pi / 2
+        data = torch.zeros(n_repetitions, 3)
+        data.uniform_(-math.pi, math.pi)
+
+        data[:, 1].uniform_(-half_pi + tolerance, half_pi - tolerance)
+        for convention in self._tait_bryan_conventions():
+            matrices = euler_angles_to_matrix(data, convention)
+            mdata = matrix_to_euler_angles(matrices, convention)
+            self.assertClose(data, mdata)
+
+        data[:, 1] += half_pi
+        for convention in self._proper_euler_conventions():
+            matrices = euler_angles_to_matrix(data, convention)
+            mdata = matrix_to_euler_angles(matrices, convention)
+            self.assertClose(data, mdata)
+
+    def test_to_euler(self):
+        """mtx -> euler -> mtx"""
+        data = random_rotations(13, dtype=torch.float64)
+        for convention in self._all_euler_angle_conventions():
+            euler_angles = matrix_to_euler_angles(data, convention)
+            mdata = euler_angles_to_matrix(euler_angles, convention)
+            self.assertClose(data, mdata)
+
+    def test_euler_grad_exists(self):
+        """Euler angle calculations are differentiable."""
+        rotation = random_rotation(dtype=torch.float64)
+        rotation.requires_grad = True
+        for convention in self._all_euler_angle_conventions():
+            euler_angles = matrix_to_euler_angles(rotation, convention)
+            mdata = euler_angles_to_matrix(euler_angles, convention)
+            [g] = torch.autograd.grad(mdata.sum(), rotation)
+            self.assertTrue(torch.isfinite(g).all())
+
+    def test_quaternion_multiplication(self):
+        """Quaternion and matrix multiplication are equivalent."""
+        a = random_quaternions(15, torch.float64).reshape((3, 5, 4))
+        b = random_quaternions(21, torch.float64).reshape((7, 3, 1, 4))
+        ab = quaternion_multiply(a, b)
+        self.assertEqual(ab.shape, (7, 3, 5, 4))
+        a_matrix = quaternion_to_matrix(a)
+        b_matrix = quaternion_to_matrix(b)
+        ab_matrix = torch.matmul(a_matrix, b_matrix)
+        ab_from_matrix = matrix_to_quaternion(ab_matrix)
+        self._assert_quaternions_close(ab, ab_from_matrix)
+
+    def test_matrix_to_quaternion_corner_case(self):
+        """Check no bad gradients from sqrt(0)."""
+        matrix = torch.eye(3, requires_grad=True)
+        target = torch.Tensor([0.984808, 0, 0.174, 0])
+
+        optimizer = torch.optim.Adam([matrix], lr=0.05)
+        optimizer.zero_grad()
+        q = matrix_to_quaternion(matrix)
+        loss = torch.sum((q - target) ** 2)
+        loss.backward()
+        optimizer.step()
+
+        self.assertClose(matrix, matrix, msg="Result has non-finite values")
+        delta = 1e-2
+        self.assertLess(
+            matrix.trace(),
+            3.0 - delta,
+            msg="Identity initialisation unchanged by a gradient step",
+        )
+
+    def test_matrix_to_quaternion_by_pi(self):
+        # We check that rotations by pi around each of the 26
+        # nonzero vectors containing nothing but 0, 1 and -1
+        # are mapped to the right quaternions.
+        # This is representative across the directions.
+        options = [0.0, -1.0, 1.0]
+        axes = [
+            torch.tensor(vec)
+            for vec in itertools.islice(  # exclude [0, 0, 0]
+                itertools.product(options, options, options), 1, None
+            )
+        ]
+
+        axes = torch.nn.functional.normalize(torch.stack(axes), dim=-1)
+        # Rotation by pi around unit vector x is given by
+        # the matrix 2 x x^T - Id.
+        R = 2 * torch.matmul(axes[..., None], axes[..., None, :]) - torch.eye(3)
+        quats_hat = matrix_to_quaternion(R)
+        R_hat = quaternion_to_matrix(quats_hat)
+        self.assertClose(R, R_hat, atol=1e-3)
+
+    def test_from_axis_angle(self):
+        """axis_angle -> mtx -> axis_angle"""
+        n_repetitions = 20
+        data = torch.rand(n_repetitions, 3)
+        matrices = axis_angle_to_matrix(data)
+        mdata = matrix_to_axis_angle(matrices)
+        self.assertClose(data, mdata, atol=2e-6)
+
+    def test_from_axis_angle_has_grad(self):
+        n_repetitions = 20
+        data = torch.rand(n_repetitions, 3, requires_grad=True)
+        matrices = axis_angle_to_matrix(data)
+        mdata = matrix_to_axis_angle(matrices)
+        quats = axis_angle_to_quaternion(data)
+        mdata2 = quaternion_to_axis_angle(quats)
+        (grad,) = torch.autograd.grad(mdata.sum() + mdata2.sum(), data)
+        self.assertTrue(torch.isfinite(grad).all())
+
+    def test_to_axis_angle(self):
+        """mtx -> axis_angle -> mtx"""
+        data = random_rotations(13, dtype=torch.float64)
+        euler_angles = matrix_to_axis_angle(data)
+        mdata = axis_angle_to_matrix(euler_angles)
+        self.assertClose(data, mdata)
+
+    def test_quaternion_application(self):
+        """Applying a quaternion is the same as applying the matrix."""
+        quaternions = random_quaternions(3, torch.float64)
+        quaternions.requires_grad = True
+        matrices = quaternion_to_matrix(quaternions)
+        points = torch.randn(3, 3, dtype=torch.float64, requires_grad=True)
+        transform1 = quaternion_apply(quaternions, points)
+        transform2 = torch.matmul(matrices, points[..., None])[..., 0]
+        self.assertClose(transform1, transform2)
+
+        [p, q] = torch.autograd.grad(transform1.sum(), [points, quaternions])
+        self.assertTrue(torch.isfinite(p).all())
+        self.assertTrue(torch.isfinite(q).all())
+
+    def test_6d(self):
+        """Converting to 6d and back"""
+        r = random_rotations(13, dtype=torch.float64)
+
+        # 6D representation is not unique,
+        # but we implement it by taking the first two rows of the matrix
+        r6d = matrix_to_rotation_6d(r)
+        self.assertClose(r6d, r[:, :2, :].reshape(-1, 6))
+
+        # going to 6D and back should not change the matrix
+        r_hat = rotation_6d_to_matrix(r6d)
+        self.assertClose(r_hat, r)
+
+        # moving the second row R2 in the span of (R1, R2) should not matter
+        r6d[:, 3:] += 2 * r6d[:, :3]
+        r6d[:, :3] *= 3.0
+        r_hat = rotation_6d_to_matrix(r6d)
+        self.assertClose(r_hat, r)
+
+        # check that we map anything to a valid rotation
+        r6d = torch.rand(13, 6)
+        r6d[:4, :] *= 3.0
+        r6d[4:8, :] -= 0.5
+        r = rotation_6d_to_matrix(r6d)
+        self.assertClose(
+            torch.matmul(r, r.permute(0, 2, 1)), torch.eye(3).expand_as(r), atol=1e-6
+        )
+
+    @unittest.skipIf(LooseVersion(torch.__version__) < "1.9", "recent torchscript only")
+    def test_scriptable(self):
+        torch.jit.script(axis_angle_to_matrix)
+        torch.jit.script(axis_angle_to_quaternion)
+        torch.jit.script(euler_angles_to_matrix)
+        torch.jit.script(matrix_to_axis_angle)
+        torch.jit.script(matrix_to_euler_angles)
+        torch.jit.script(matrix_to_quaternion)
+        torch.jit.script(matrix_to_rotation_6d)
+        torch.jit.script(quaternion_apply)
+        torch.jit.script(quaternion_multiply)
+        torch.jit.script(quaternion_to_matrix)
+        torch.jit.script(quaternion_to_axis_angle)
+        torch.jit.script(random_quaternions)
+        torch.jit.script(random_rotation)
+        torch.jit.script(random_rotations)
+        torch.jit.script(random_quaternions)
+        torch.jit.script(rotation_6d_to_matrix)
+
+    def _assert_quaternions_close(
+        self,
+        input: Union[torch.Tensor, np.ndarray],
+        other: Union[torch.Tensor, np.ndarray],
+        *,
+        rtol: float = 1e-05,
+        atol: float = 1e-08,
+        equal_nan: bool = False,
+        msg: Optional[str] = None,
+    ):
+        self.assertEqual(np.shape(input), np.shape(other))
+        dot = (input * other).sum(-1)
+        ones = torch.ones_like(dot)
+        self.assertClose(
+            dot.abs(), ones, rtol=rtol, atol=atol, equal_nan=equal_nan, msg=msg
+        )
diff --git a/pytorch3d/tests/test_sample_farthest_points.py b/pytorch3d/tests/test_sample_farthest_points.py
new file mode 100644
index 0000000000000000000000000000000000000000..a0c48628d5536f2d236cef17c8401f71c8447b30
--- /dev/null
+++ b/pytorch3d/tests/test_sample_farthest_points.py
@@ -0,0 +1,276 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+
+import numpy as np
+import torch
+from pytorch3d.io import load_obj
+from pytorch3d.ops.sample_farthest_points import (
+    sample_farthest_points,
+    sample_farthest_points_naive,
+)
+from pytorch3d.ops.utils import masked_gather
+
+from .common_testing import (
+    get_pytorch3d_dir,
+    get_random_cuda_device,
+    get_tests_dir,
+    TestCaseMixin,
+)
+
+
+DATA_DIR = get_tests_dir() / "data"
+TUTORIAL_DATA_DIR = get_pytorch3d_dir() / "docs/tutorials/data"
+DEBUG = False
+
+
+class TestFPS(TestCaseMixin, unittest.TestCase):
+    def _test_simple(self, fps_func, device="cpu"):
+        # fmt: off
+        points = torch.tensor(
+            [
+                [
+                    [-1.0, -1.0],  # noqa: E241, E201
+                    [-1.3,  1.1],  # noqa: E241, E201
+                    [ 0.2, -1.1],  # noqa: E241, E201
+                    [ 0.0,  0.0],  # noqa: E241, E201
+                    [ 1.3,  1.3],  # noqa: E241, E201
+                    [ 1.0,  0.5],  # noqa: E241, E201
+                    [-1.3,  0.2],  # noqa: E241, E201
+                    [ 1.5, -0.5],  # noqa: E241, E201
+                ],
+                [
+                    [-2.2, -2.4],  # noqa: E241, E201
+                    [-2.1,  2.0],  # noqa: E241, E201
+                    [ 2.2,  2.1],  # noqa: E241, E201
+                    [ 2.1, -2.4],  # noqa: E241, E201
+                    [ 0.4, -1.0],  # noqa: E241, E201
+                    [ 0.3,  0.3],  # noqa: E241, E201
+                    [ 1.2,  0.5],  # noqa: E241, E201
+                    [ 4.5,  4.5],  # noqa: E241, E201
+                ],
+            ],
+            dtype=torch.float32,
+            device=device,
+        )
+        # fmt: on
+        expected_inds = torch.tensor([[0, 4], [0, 7]], dtype=torch.int64, device=device)
+        out_points, out_inds = fps_func(points, K=2)
+        self.assertClose(out_inds, expected_inds)
+
+        # Gather the points
+        expected_inds = expected_inds[..., None].expand(-1, -1, points.shape[-1])
+        self.assertClose(out_points, points.gather(dim=1, index=expected_inds))
+
+        # Different number of points sampled for each pointcloud in the batch
+        expected_inds = torch.tensor(
+            [[0, 4, 1], [0, 7, -1]], dtype=torch.int64, device=device
+        )
+        out_points, out_inds = fps_func(points, K=[3, 2])
+        self.assertClose(out_inds, expected_inds)
+
+        # Gather the points
+        expected_points = masked_gather(points, expected_inds)
+        self.assertClose(out_points, expected_points)
+
+    def _test_compare_random_heterogeneous(self, device="cpu"):
+        N, P, D, K = 5, 20, 5, 8
+        points = torch.randn((N, P, D), device=device, dtype=torch.float32)
+        out_points_naive, out_idxs_naive = sample_farthest_points_naive(points, K=K)
+        out_points, out_idxs = sample_farthest_points(points, K=K)
+        self.assertTrue(out_idxs.min() >= 0)
+        self.assertClose(out_idxs, out_idxs_naive)
+        self.assertClose(out_points, out_points_naive)
+        for n in range(N):
+            self.assertEqual(out_idxs[n].ne(-1).sum(), K)
+
+        # Test case where K > P
+        K = 30
+        points1 = torch.randn((N, P, D), dtype=torch.float32, device=device)
+        points2 = points1.clone()
+        points1.requires_grad = True
+        points2.requires_grad = True
+        lengths = torch.randint(low=1, high=P, size=(N,), device=device)
+        out_points_naive, out_idxs_naive = sample_farthest_points_naive(
+            points1, lengths, K=K
+        )
+        out_points, out_idxs = sample_farthest_points(points2, lengths, K=K)
+        self.assertClose(out_idxs, out_idxs_naive)
+        self.assertClose(out_points, out_points_naive)
+
+        for n in range(N):
+            # Check that for heterogeneous batches, the max number of
+            # selected points is less than the length
+            self.assertTrue(out_idxs[n].ne(-1).sum() <= lengths[n])
+            self.assertTrue(out_idxs[n].max() <= lengths[n])
+
+            # Check there are no duplicate indices
+            val_mask = out_idxs[n].ne(-1)
+            vals, counts = torch.unique(out_idxs[n][val_mask], return_counts=True)
+            self.assertTrue(counts.le(1).all())
+
+        # Check gradients
+        grad_sampled_points = torch.ones((N, K, D), dtype=torch.float32, device=device)
+        loss1 = (out_points_naive * grad_sampled_points).sum()
+        loss1.backward()
+        loss2 = (out_points * grad_sampled_points).sum()
+        loss2.backward()
+        self.assertClose(points1.grad, points2.grad, atol=5e-6)
+
+    def _test_errors(self, fps_func, device="cpu"):
+        N, P, D, K = 5, 40, 5, 8
+        points = torch.randn((N, P, D), device=device)
+        wrong_batch_dim = torch.randint(low=1, high=K, size=(K,), device=device)
+
+        # K has diferent batch dimension to points
+        with self.assertRaisesRegex(ValueError, "K and points must have"):
+            sample_farthest_points_naive(points, K=wrong_batch_dim)
+
+        # lengths has diferent batch dimension to points
+        with self.assertRaisesRegex(ValueError, "points and lengths must have"):
+            sample_farthest_points_naive(points, lengths=wrong_batch_dim, K=K)
+
+    def _test_random_start(self, fps_func, device="cpu"):
+        N, P, D, K = 5, 40, 5, 8
+        points = torch.randn((N, P, D), dtype=torch.float32, device=device)
+        out_points, out_idxs = fps_func(points, K=K, random_start_point=True)
+        # Check the first index is not 0 or the same number for all batch elements
+        # when random_start_point = True
+        self.assertTrue(out_idxs[:, 0].sum() > 0)
+        self.assertFalse(out_idxs[:, 0].eq(out_idxs[0, 0]).all())
+
+    def _test_gradcheck(self, fps_func, device="cpu"):
+        N, P, D, K = 2, 10, 3, 2
+        points = torch.randn(
+            (N, P, D), dtype=torch.float32, device=device, requires_grad=True
+        )
+        lengths = torch.randint(low=1, high=P, size=(N,), device=device)
+        torch.autograd.gradcheck(
+            fps_func,
+            (points, lengths, K),
+            check_undefined_grad=False,
+            eps=2e-3,
+            atol=0.001,
+        )
+
+    def test_sample_farthest_points_naive(self):
+        device = get_random_cuda_device()
+        self._test_simple(sample_farthest_points_naive, device)
+        self._test_errors(sample_farthest_points_naive, device)
+        self._test_random_start(sample_farthest_points_naive, device)
+        self._test_gradcheck(sample_farthest_points_naive, device)
+
+    def test_sample_farthest_points_cpu(self):
+        self._test_simple(sample_farthest_points, "cpu")
+        self._test_errors(sample_farthest_points, "cpu")
+        self._test_compare_random_heterogeneous("cpu")
+        self._test_random_start(sample_farthest_points, "cpu")
+        self._test_gradcheck(sample_farthest_points, "cpu")
+
+    def test_sample_farthest_points_cuda(self):
+        device = get_random_cuda_device()
+        self._test_simple(sample_farthest_points, device)
+        self._test_errors(sample_farthest_points, device)
+        self._test_compare_random_heterogeneous(device)
+        self._test_random_start(sample_farthest_points, device)
+        self._test_gradcheck(sample_farthest_points, device)
+
+    def test_cuda_vs_cpu(self):
+        """
+        Compare cuda vs cpu on a complex object
+        """
+        obj_filename = TUTORIAL_DATA_DIR / "cow_mesh/cow.obj"
+        K = 250
+
+        # Run on CPU
+        device = "cpu"
+        points, _, _ = load_obj(obj_filename, device=device, load_textures=False)
+        points = points[None, ...]
+        out_points_cpu, out_idxs_cpu = sample_farthest_points(points, K=K)
+
+        # Run on GPU
+        device = get_random_cuda_device()
+        points_cuda = points.to(device)
+        out_points_cuda, out_idxs_cuda = sample_farthest_points(points_cuda, K=K)
+
+        # Check that the indices from CUDA and CPU match
+        self.assertClose(out_idxs_cpu, out_idxs_cuda.cpu())
+
+        # Check there are no duplicate indices
+        val_mask = out_idxs_cuda[0].ne(-1)
+        vals, counts = torch.unique(out_idxs_cuda[0][val_mask], return_counts=True)
+        self.assertTrue(counts.le(1).all())
+
+        # Plot all results
+        if DEBUG:
+            # mplot3d is required for 3d projection plots
+            import matplotlib.pyplot as plt
+            from mpl_toolkits import mplot3d  # noqa: F401
+
+            # Move to cpu and convert to numpy for plotting
+            points = points.squeeze()
+            out_points_cpu = out_points_cpu.squeeze().numpy()
+            out_points_cuda = out_points_cuda.squeeze().cpu().numpy()
+
+            # Farthest point sampling CPU
+            fig = plt.figure(figsize=plt.figaspect(1.0 / 3))
+            ax1 = fig.add_subplot(1, 3, 1, projection="3d")
+            ax1.scatter(*points.T, alpha=0.1)
+            ax1.scatter(*out_points_cpu.T, color="black")
+            ax1.set_title("FPS CPU")
+
+            # Farthest point sampling CUDA
+            ax2 = fig.add_subplot(1, 3, 2, projection="3d")
+            ax2.scatter(*points.T, alpha=0.1)
+            ax2.scatter(*out_points_cuda.T, color="red")
+            ax2.set_title("FPS CUDA")
+
+            # Random Sampling
+            random_points = np.random.permutation(points)[:K]
+            ax3 = fig.add_subplot(1, 3, 3, projection="3d")
+            ax3.scatter(*points.T, alpha=0.1)
+            ax3.scatter(*random_points.T, color="green")
+            ax3.set_title("Random")
+
+            # Save image
+            filename = "DEBUG_fps.jpg"
+            filepath = DATA_DIR / filename
+            plt.savefig(filepath)
+
+    @staticmethod
+    def sample_farthest_points_naive(N: int, P: int, D: int, K: int, device: str):
+        device = torch.device(device)
+        pts = torch.randn(
+            N, P, D, dtype=torch.float32, device=device, requires_grad=True
+        )
+        grad_pts = torch.randn(N, K, D, dtype=torch.float32, device=device)
+        torch.cuda.synchronize()
+
+        def output():
+            out_points, _ = sample_farthest_points_naive(pts, K=K)
+            loss = (out_points * grad_pts).sum()
+            loss.backward()
+            torch.cuda.synchronize()
+
+        return output
+
+    @staticmethod
+    def sample_farthest_points(N: int, P: int, D: int, K: int, device: str):
+        device = torch.device(device)
+        pts = torch.randn(
+            N, P, D, dtype=torch.float32, device=device, requires_grad=True
+        )
+        grad_pts = torch.randn(N, K, D, dtype=torch.float32, device=device)
+        torch.cuda.synchronize()
+
+        def output():
+            out_points, _ = sample_farthest_points(pts, K=K)
+            loss = (out_points * grad_pts).sum()
+            loss.backward()
+            torch.cuda.synchronize()
+
+        return output
diff --git a/pytorch3d/tests/test_sample_pdf.py b/pytorch3d/tests/test_sample_pdf.py
new file mode 100644
index 0000000000000000000000000000000000000000..4c904cad697d513d569225a7666ae48601e61ed7
--- /dev/null
+++ b/pytorch3d/tests/test_sample_pdf.py
@@ -0,0 +1,94 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+from itertools import product
+
+import torch
+from pytorch3d.renderer.implicit.sample_pdf import sample_pdf, sample_pdf_python
+
+from .common_testing import TestCaseMixin
+
+
+class TestSamplePDF(TestCaseMixin, unittest.TestCase):
+    def setUp(self) -> None:
+        super().setUp()
+        torch.manual_seed(1)
+
+    def test_single_bin(self):
+        bins = torch.arange(2).expand(5, 2) + 17
+        weights = torch.ones(5, 1)
+        output = sample_pdf_python(bins, weights, 100, True)
+        calc = torch.linspace(17, 18, 100).expand(5, -1)
+        self.assertClose(output, calc)
+
+    def test_simple_det(self):
+        for n_bins, n_samples, batch in product(
+            [7, 20], [2, 7, 31, 32, 33], [(), (1, 4), (31,), (32,), (33,)]
+        ):
+            weights = torch.rand(size=(batch + (n_bins,)))
+            bins = torch.cumsum(torch.rand(size=(batch + (n_bins + 1,))), dim=-1)
+            python = sample_pdf_python(bins, weights, n_samples, det=True)
+
+            cpp = sample_pdf(bins, weights, n_samples, det=True)
+            self.assertClose(cpp, python, atol=2e-3)
+
+            nthreads = torch.get_num_threads()
+            torch.set_num_threads(1)
+            cpp_singlethread = sample_pdf(bins, weights, n_samples, det=True)
+            self.assertClose(cpp_singlethread, python, atol=2e-3)
+            torch.set_num_threads(nthreads)
+
+            device = torch.device("cuda:0")
+            cuda = sample_pdf(
+                bins.to(device), weights.to(device), n_samples, det=True
+            ).cpu()
+
+            self.assertClose(cuda, python, atol=2e-3)
+
+    def test_rand_cpu(self):
+        n_bins, n_samples, batch_size = 11, 17, 9
+        weights = torch.rand(size=(batch_size, n_bins))
+        bins = torch.cumsum(torch.rand(size=(batch_size, n_bins + 1)), dim=-1)
+        torch.manual_seed(1)
+        python = sample_pdf_python(bins, weights, n_samples)
+        torch.manual_seed(1)
+        cpp = sample_pdf(bins, weights, n_samples)
+
+        self.assertClose(cpp, python, atol=2e-3)
+
+    def test_rand_nogap(self):
+        # Case where random is actually deterministic
+        weights = torch.FloatTensor([0, 10, 0])
+        bins = torch.FloatTensor([0, 10, 10, 25])
+        n_samples = 8
+        predicted = torch.full((n_samples,), 10.0)
+        python = sample_pdf_python(bins, weights, n_samples)
+        self.assertClose(python, predicted)
+        cpp = sample_pdf(bins, weights, n_samples)
+        self.assertClose(cpp, predicted)
+
+        device = torch.device("cuda:0")
+        cuda = sample_pdf(bins.to(device), weights.to(device), n_samples).cpu()
+        self.assertClose(cuda, predicted)
+
+    @staticmethod
+    def bm_fn(*, backend: str, n_samples, batch_size, n_bins):
+        f = sample_pdf_python if "python" in backend else sample_pdf
+        weights = torch.rand(size=(batch_size, n_bins))
+        bins = torch.cumsum(torch.rand(size=(batch_size, n_bins + 1)), dim=-1)
+
+        if "cuda" in backend:
+            weights = weights.cuda()
+            bins = bins.cuda()
+
+        torch.cuda.synchronize()
+
+        def output():
+            f(bins, weights, n_samples)
+            torch.cuda.synchronize()
+
+        return output
diff --git a/pytorch3d/tests/test_sample_points_from_meshes.py b/pytorch3d/tests/test_sample_points_from_meshes.py
new file mode 100644
index 0000000000000000000000000000000000000000..afe4a3a140f4b783d7f988f9d829b7fd39a722b6
--- /dev/null
+++ b/pytorch3d/tests/test_sample_points_from_meshes.py
@@ -0,0 +1,468 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import unittest
+
+import numpy as np
+import torch
+from PIL import Image
+from pytorch3d.io import load_objs_as_meshes
+from pytorch3d.ops import sample_points_from_meshes
+from pytorch3d.renderer import TexturesVertex
+from pytorch3d.renderer.cameras import FoVPerspectiveCameras, look_at_view_transform
+from pytorch3d.renderer.mesh.rasterize_meshes import barycentric_coordinates
+from pytorch3d.renderer.points import (
+    NormWeightedCompositor,
+    PointsRasterizationSettings,
+    PointsRasterizer,
+    PointsRenderer,
+)
+from pytorch3d.structures import Meshes, Pointclouds
+from pytorch3d.utils.ico_sphere import ico_sphere
+
+from .common_testing import (
+    get_pytorch3d_dir,
+    get_random_cuda_device,
+    get_tests_dir,
+    TestCaseMixin,
+)
+
+
+# If DEBUG=True, save out images generated in the tests for debugging.
+# All saved images have prefix DEBUG_
+DEBUG = False
+DATA_DIR = get_tests_dir() / "data"
+
+
+def init_meshes(
+    num_meshes: int = 10,
+    num_verts: int = 1000,
+    num_faces: int = 3000,
+    device: str = "cpu",
+    add_texture: bool = False,
+):
+    device = torch.device(device)
+    verts_list = []
+    faces_list = []
+    texts_list = []
+    for _ in range(num_meshes):
+        verts = torch.rand((num_verts, 3), dtype=torch.float32, device=device)
+        faces = torch.randint(
+            num_verts, size=(num_faces, 3), dtype=torch.int64, device=device
+        )
+        texts = torch.rand((num_verts, 3), dtype=torch.float32, device=device)
+        verts_list.append(verts)
+        faces_list.append(faces)
+        texts_list.append(texts)
+
+    # create textures
+    textures = None
+    if add_texture:
+        textures = TexturesVertex(texts_list)
+    meshes = Meshes(verts=verts_list, faces=faces_list, textures=textures)
+
+    return meshes
+
+
+class TestSamplePoints(TestCaseMixin, unittest.TestCase):
+    def setUp(self) -> None:
+        super().setUp()
+        torch.manual_seed(1)
+
+    def test_all_empty_meshes(self):
+        """
+        Check sample_points_from_meshes raises an exception if all meshes are
+        invalid.
+        """
+        device = get_random_cuda_device()
+        verts1 = torch.tensor([], dtype=torch.float32, device=device)
+        faces1 = torch.tensor([], dtype=torch.int64, device=device)
+        meshes = Meshes(verts=[verts1, verts1, verts1], faces=[faces1, faces1, faces1])
+        with self.assertRaises(ValueError) as err:
+            sample_points_from_meshes(meshes, num_samples=100, return_normals=True)
+        self.assertTrue("Meshes are empty." in str(err.exception))
+
+    def test_sampling_output(self):
+        """
+        Check outputs of sampling are correct for different meshes.
+        For an ico_sphere, the sampled vertices should lie on a unit sphere.
+        For an empty mesh, the samples and normals should be 0.
+        """
+        device = get_random_cuda_device()
+
+        # Unit simplex.
+        verts_pyramid = torch.tensor(
+            [[0.0, 0.0, 0.0], [1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]],
+            dtype=torch.float32,
+            device=device,
+        )
+        faces_pyramid = torch.tensor(
+            [[0, 1, 2], [0, 2, 3], [0, 1, 3], [1, 2, 3]],
+            dtype=torch.int64,
+            device=device,
+        )
+        sphere_mesh = ico_sphere(9, device)
+        verts_sphere, faces_sphere = sphere_mesh.get_mesh_verts_faces(0)
+        verts_empty = torch.tensor([], dtype=torch.float32, device=device)
+        faces_empty = torch.tensor([], dtype=torch.int64, device=device)
+        num_samples = 10
+        meshes = Meshes(
+            verts=[verts_empty, verts_sphere, verts_pyramid],
+            faces=[faces_empty, faces_sphere, faces_pyramid],
+        )
+        samples, normals = sample_points_from_meshes(
+            meshes, num_samples=num_samples, return_normals=True
+        )
+        samples = samples.cpu()
+        normals = normals.cpu()
+
+        self.assertEqual(samples.shape, (3, num_samples, 3))
+        self.assertEqual(normals.shape, (3, num_samples, 3))
+
+        # Empty meshes: should have all zeros for samples and normals.
+        self.assertClose(samples[0, :], torch.zeros((num_samples, 3)))
+        self.assertClose(normals[0, :], torch.zeros((num_samples, 3)))
+
+        # Sphere: points should have radius 1.
+        x, y, z = samples[1, :].unbind(1)
+        radius = torch.sqrt(x**2 + y**2 + z**2)
+
+        self.assertClose(radius, torch.ones(num_samples))
+
+        # Pyramid: points shoudl lie on one of the faces.
+        pyramid_verts = samples[2, :]
+        pyramid_normals = normals[2, :]
+
+        self.assertClose(pyramid_verts.lt(1).float(), torch.ones_like(pyramid_verts))
+        self.assertClose((pyramid_verts >= 0).float(), torch.ones_like(pyramid_verts))
+
+        # Face 1: z = 0,  x + y <= 1, normals = (0, 0, 1).
+        face_1_idxs = pyramid_verts[:, 2] == 0
+        face_1_verts, face_1_normals = (
+            pyramid_verts[face_1_idxs, :],
+            pyramid_normals[face_1_idxs, :],
+        )
+        self.assertTrue(torch.all((face_1_verts[:, 0] + face_1_verts[:, 1]) <= 1))
+        self.assertClose(
+            face_1_normals,
+            torch.tensor([0, 0, 1], dtype=torch.float32).expand(face_1_normals.size()),
+        )
+
+        # Face 2: x = 0,  z + y <= 1, normals = (1, 0, 0).
+        face_2_idxs = pyramid_verts[:, 0] == 0
+        face_2_verts, face_2_normals = (
+            pyramid_verts[face_2_idxs, :],
+            pyramid_normals[face_2_idxs, :],
+        )
+        self.assertTrue(torch.all((face_2_verts[:, 1] + face_2_verts[:, 2]) <= 1))
+        self.assertClose(
+            face_2_normals,
+            torch.tensor([1, 0, 0], dtype=torch.float32).expand(face_2_normals.size()),
+        )
+
+        # Face 3: y = 0, x + z <= 1, normals = (0, -1, 0).
+        face_3_idxs = pyramid_verts[:, 1] == 0
+        face_3_verts, face_3_normals = (
+            pyramid_verts[face_3_idxs, :],
+            pyramid_normals[face_3_idxs, :],
+        )
+        self.assertTrue(torch.all((face_3_verts[:, 0] + face_3_verts[:, 2]) <= 1))
+        self.assertClose(
+            face_3_normals,
+            torch.tensor([0, -1, 0], dtype=torch.float32).expand(face_3_normals.size()),
+        )
+
+        # Face 4: x + y + z = 1, normals = (1, 1, 1)/sqrt(3).
+        face_4_idxs = pyramid_verts.gt(0).all(1)
+        face_4_verts, face_4_normals = (
+            pyramid_verts[face_4_idxs, :],
+            pyramid_normals[face_4_idxs, :],
+        )
+        self.assertClose(face_4_verts.sum(1), torch.ones(face_4_verts.size(0)))
+        self.assertClose(
+            face_4_normals,
+            (
+                torch.tensor([1, 1, 1], dtype=torch.float32)
+                / torch.sqrt(torch.tensor(3, dtype=torch.float32))
+            ).expand(face_4_normals.size()),
+        )
+
+    def test_multinomial(self):
+        """
+        Confirm that torch.multinomial does not sample elements which have
+        zero probability.
+        """
+        freqs = torch.cuda.FloatTensor(
+            [
+                0.0,
+                0.0,
+                0.0,
+                0.0,
+                0.0,
+                0.0,
+                0.0,
+                0.0,
+                0.0,
+                0.03178183361887932,
+                0.027680952101945877,
+                0.033176131546497345,
+                0.046052902936935425,
+                0.07742464542388916,
+                0.11543981730937958,
+                0.14148041605949402,
+                0.15784293413162231,
+                0.13180233538150787,
+                0.08271478116512299,
+                0.049702685326337814,
+                0.027557924389839172,
+                0.018125897273421288,
+                0.011851548217236996,
+                0.010252203792333603,
+                0.007422595750540495,
+                0.005372154992073774,
+                0.0045109698548913,
+                0.0036087757907807827,
+                0.0035267581697553396,
+                0.0018864056328311563,
+                0.0024605290964245796,
+                0.0022964938543736935,
+                0.0018453967059031129,
+                0.0010662291897460818,
+                0.0009842115687206388,
+                0.00045109697384759784,
+                0.0007791675161570311,
+                0.00020504408166743815,
+                0.00020504408166743815,
+                0.00020504408166743815,
+                0.00012302644609007984,
+                0.0,
+                0.00012302644609007984,
+                4.100881778867915e-05,
+                0.0,
+                0.0,
+                0.0,
+                0.0,
+                0.0,
+                0.0,
+            ]
+        )
+
+        sample = []
+        for _ in range(1000):
+            torch.cuda.get_rng_state()
+            sample = torch.multinomial(freqs, 1000, True)
+            if freqs[sample].min() == 0:
+                sample_idx = (freqs[sample] == 0).nonzero()[0][0]
+                sampled = sample[sample_idx]
+                print(
+                    "%s th element of last sample was %s, which has probability %s"
+                    % (sample_idx, sampled, freqs[sampled])
+                )
+                return False
+        return True
+
+    def test_multinomial_weights(self):
+        """
+        Confirm that torch.multinomial does not sample elements which have
+        zero probability using a real example of input from a training run.
+        """
+        weights = torch.load(get_tests_dir() / "weights.pt")
+        S = 4096
+        num_trials = 100
+        for _ in range(0, num_trials):
+            weights[weights < 0] = 0.0
+            samples = weights.multinomial(S, replacement=True)
+            sampled_weights = weights[samples]
+            assert sampled_weights.min() > 0
+            if sampled_weights.min() <= 0:
+                return False
+        return True
+
+    def test_verts_nan(self):
+        num_verts = 30
+        num_faces = 50
+        for device in ["cpu", "cuda:0"]:
+            for invalid in ["nan", "inf"]:
+                verts = torch.rand((num_verts, 3), dtype=torch.float32, device=device)
+                # randomly assign an invalid type
+                verts[torch.randperm(num_verts)[:10]] = float(invalid)
+                faces = torch.randint(
+                    num_verts, size=(num_faces, 3), dtype=torch.int64, device=device
+                )
+                meshes = Meshes(verts=[verts], faces=[faces])
+
+                with self.assertRaisesRegex(ValueError, "Meshes contain nan or inf."):
+                    sample_points_from_meshes(
+                        meshes, num_samples=100, return_normals=True
+                    )
+
+    def test_outputs(self):
+
+        for add_texture in (True, False):
+            meshes = init_meshes(device=torch.device("cuda:0"), add_texture=add_texture)
+            out1 = sample_points_from_meshes(meshes, num_samples=100)
+            self.assertTrue(torch.is_tensor(out1))
+
+            out2 = sample_points_from_meshes(
+                meshes, num_samples=100, return_normals=True
+            )
+            self.assertTrue(isinstance(out2, tuple) and len(out2) == 2)
+
+            if add_texture:
+                out3 = sample_points_from_meshes(
+                    meshes, num_samples=100, return_textures=True
+                )
+                self.assertTrue(isinstance(out3, tuple) and len(out3) == 2)
+
+                out4 = sample_points_from_meshes(
+                    meshes, num_samples=100, return_normals=True, return_textures=True
+                )
+                self.assertTrue(isinstance(out4, tuple) and len(out4) == 3)
+            else:
+                with self.assertRaisesRegex(
+                    ValueError, "Meshes do not contain textures."
+                ):
+                    sample_points_from_meshes(
+                        meshes, num_samples=100, return_textures=True
+                    )
+
+                with self.assertRaisesRegex(
+                    ValueError, "Meshes do not contain textures."
+                ):
+                    sample_points_from_meshes(
+                        meshes,
+                        num_samples=100,
+                        return_normals=True,
+                        return_textures=True,
+                    )
+
+    def test_texture_sampling(self):
+        device = torch.device("cuda:0")
+        batch_size = 6
+        # verts
+        verts = torch.rand((batch_size, 6, 3), device=device, dtype=torch.float32)
+        verts[:, :3, 2] = 1.0
+        verts[:, 3:, 2] = -1.0
+        # textures
+        texts = torch.rand((batch_size, 6, 3), device=device, dtype=torch.float32)
+        # faces
+        faces = torch.tensor([[0, 1, 2], [3, 4, 5]], device=device, dtype=torch.int64)
+        faces = faces.view(1, 2, 3).expand(batch_size, -1, -1)
+
+        meshes = Meshes(verts=verts, faces=faces, textures=TexturesVertex(texts))
+
+        num_samples = 24
+        samples, normals, textures = sample_points_from_meshes(
+            meshes, num_samples=num_samples, return_normals=True, return_textures=True
+        )
+
+        textures_naive = torch.zeros(
+            (batch_size, num_samples, 3), dtype=torch.float32, device=device
+        )
+        for n in range(batch_size):
+            for i in range(num_samples):
+                p = samples[n, i]
+                if p[2] > 0.0:  # sampled from 1st face
+                    v0, v1, v2 = verts[n, 0, :2], verts[n, 1, :2], verts[n, 2, :2]
+                    w0, w1, w2 = barycentric_coordinates(p[:2], v0, v1, v2)
+                    t0, t1, t2 = texts[n, 0], texts[n, 1], texts[n, 2]
+                else:  # sampled from 2nd face
+                    v0, v1, v2 = verts[n, 3, :2], verts[n, 4, :2], verts[n, 5, :2]
+                    w0, w1, w2 = barycentric_coordinates(p[:2], v0, v1, v2)
+                    t0, t1, t2 = texts[n, 3], texts[n, 4], texts[n, 5]
+
+                tt = w0 * t0 + w1 * t1 + w2 * t2
+                textures_naive[n, i] = tt
+
+        self.assertClose(textures, textures_naive)
+
+    def test_texture_sampling_cow(self):
+        # test texture sampling for the cow example by converting
+        # the cow mesh and its texture uv to a pointcloud with texture
+
+        device = torch.device("cuda:0")
+        obj_dir = get_pytorch3d_dir() / "docs/tutorials/data"
+        obj_filename = obj_dir / "cow_mesh/cow.obj"
+
+        for text_type in ("uv", "atlas"):
+            # Load mesh + texture
+            if text_type == "uv":
+                mesh = load_objs_as_meshes(
+                    [obj_filename], device=device, load_textures=True, texture_wrap=None
+                )
+            elif text_type == "atlas":
+                mesh = load_objs_as_meshes(
+                    [obj_filename],
+                    device=device,
+                    load_textures=True,
+                    create_texture_atlas=True,
+                    texture_atlas_size=8,
+                    texture_wrap=None,
+                )
+
+            points, normals, textures = sample_points_from_meshes(
+                mesh, num_samples=50000, return_normals=True, return_textures=True
+            )
+            pointclouds = Pointclouds(points, normals=normals, features=textures)
+
+            for pos in ("front", "back"):
+                # Init rasterizer settings
+                if pos == "back":
+                    azim = 0.0
+                elif pos == "front":
+                    azim = 180
+                R, T = look_at_view_transform(2.7, 0, azim)
+                cameras = FoVPerspectiveCameras(device=device, R=R, T=T)
+
+                raster_settings = PointsRasterizationSettings(
+                    image_size=512, radius=1e-2, points_per_pixel=1
+                )
+
+                rasterizer = PointsRasterizer(
+                    cameras=cameras, raster_settings=raster_settings
+                )
+                compositor = NormWeightedCompositor()
+                renderer = PointsRenderer(rasterizer=rasterizer, compositor=compositor)
+                images = renderer(pointclouds)
+
+                rgb = images[0, ..., :3].squeeze().cpu()
+                if DEBUG:
+                    filename = "DEBUG_cow_mesh_to_pointcloud_%s_%s.png" % (
+                        text_type,
+                        pos,
+                    )
+                    Image.fromarray((rgb.numpy() * 255).astype(np.uint8)).save(
+                        DATA_DIR / filename
+                    )
+
+    @staticmethod
+    def sample_points_with_init(
+        num_meshes: int,
+        num_verts: int,
+        num_faces: int,
+        num_samples: int,
+        device: str = "cpu",
+    ):
+        verts_list = []
+        faces_list = []
+        for _ in range(num_meshes):
+            verts = torch.rand((num_verts, 3), dtype=torch.float32, device=device)
+            faces = torch.randint(
+                num_verts, size=(num_faces, 3), dtype=torch.int64, device=device
+            )
+            verts_list.append(verts)
+            faces_list.append(faces)
+        meshes = Meshes(verts_list, faces_list)
+        torch.cuda.synchronize()
+
+        def sample_points():
+            sample_points_from_meshes(
+                meshes, num_samples=num_samples, return_normals=True
+            )
+            torch.cuda.synchronize()
+
+        return sample_points
diff --git a/pytorch3d/tests/test_se3.py b/pytorch3d/tests/test_se3.py
new file mode 100644
index 0000000000000000000000000000000000000000..4bd4f8eb072ca19d6152ac52f9a973508f87a0c9
--- /dev/null
+++ b/pytorch3d/tests/test_se3.py
@@ -0,0 +1,325 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import unittest
+
+import numpy as np
+import torch
+from pytorch3d.transforms.rotation_conversions import random_rotations
+from pytorch3d.transforms.se3 import se3_exp_map, se3_log_map
+from pytorch3d.transforms.so3 import so3_exp_map, so3_log_map, so3_rotation_angle
+
+from .common_testing import TestCaseMixin
+
+
+class TestSE3(TestCaseMixin, unittest.TestCase):
+    precomputed_log_transform = torch.tensor(
+        [
+            [0.1900, 2.1600, -0.1700, 0.8500, -1.9200, 0.6500],
+            [-0.6500, -0.8200, 0.5300, -1.2800, -1.6600, -0.3000],
+            [-0.0900, 0.2000, -1.1200, 1.8600, -0.7100, 0.6900],
+            [0.8000, -0.0300, 1.4900, -0.5200, -0.2500, 1.4700],
+            [-0.3300, -1.1600, 2.3600, -0.6900, 0.1800, -1.1800],
+            [-1.8000, -1.5800, 0.8400, 1.4200, 0.6500, 0.4300],
+            [-1.5900, 0.6200, 1.6900, -0.6600, 0.9400, 0.0800],
+            [0.0800, -0.1400, 0.3300, -0.5900, -1.0700, 0.1000],
+            [-0.3300, -0.5300, -0.8800, 0.3900, 0.1600, -0.2000],
+            [1.0100, -1.3500, -0.3500, -0.6400, 0.4500, -0.5400],
+        ],
+        dtype=torch.float32,
+    )
+
+    precomputed_transform = torch.tensor(
+        [
+            [
+                [-0.3496, -0.2966, 0.8887, 0.0000],
+                [-0.7755, 0.6239, -0.0968, 0.0000],
+                [-0.5258, -0.7230, -0.4481, 0.0000],
+                [-0.7392, 1.9119, 0.3122, 1.0000],
+            ],
+            [
+                [0.0354, 0.5992, 0.7998, 0.0000],
+                [0.8413, 0.4141, -0.3475, 0.0000],
+                [-0.5395, 0.6852, -0.4894, 0.0000],
+                [-0.9902, -0.4840, 0.1226, 1.0000],
+            ],
+            [
+                [0.6664, -0.1679, 0.7264, 0.0000],
+                [-0.7309, -0.3394, 0.5921, 0.0000],
+                [0.1471, -0.9255, -0.3489, 0.0000],
+                [-0.0815, 0.8719, -0.4516, 1.0000],
+            ],
+            [
+                [0.1010, 0.9834, -0.1508, 0.0000],
+                [-0.8783, 0.0169, -0.4779, 0.0000],
+                [-0.4674, 0.1807, 0.8654, 0.0000],
+                [0.2375, 0.7043, 1.4159, 1.0000],
+            ],
+            [
+                [0.3935, -0.8930, 0.2184, 0.0000],
+                [0.7873, 0.2047, -0.5817, 0.0000],
+                [0.4747, 0.4009, 0.7836, 0.0000],
+                [-0.3476, -0.0424, 2.5408, 1.0000],
+            ],
+            [
+                [0.7572, 0.6342, -0.1567, 0.0000],
+                [0.1039, 0.1199, 0.9873, 0.0000],
+                [0.6449, -0.7638, 0.0249, 0.0000],
+                [-1.2885, -2.0666, -0.1137, 1.0000],
+            ],
+            [
+                [0.6020, -0.2140, -0.7693, 0.0000],
+                [-0.3409, 0.8024, -0.4899, 0.0000],
+                [0.7221, 0.5572, 0.4101, 0.0000],
+                [-0.7550, 1.1928, 1.8480, 1.0000],
+            ],
+            [
+                [0.4913, 0.3548, 0.7954, 0.0000],
+                [0.2013, 0.8423, -0.5000, 0.0000],
+                [-0.8474, 0.4058, 0.3424, 0.0000],
+                [-0.1003, -0.0406, 0.3295, 1.0000],
+            ],
+            [
+                [0.9678, -0.1622, -0.1926, 0.0000],
+                [0.2235, 0.9057, 0.3603, 0.0000],
+                [0.1160, -0.3917, 0.9128, 0.0000],
+                [-0.4417, -0.3111, -0.9227, 1.0000],
+            ],
+            [
+                [0.7710, -0.5957, -0.2250, 0.0000],
+                [0.3288, 0.6750, -0.6605, 0.0000],
+                [0.5454, 0.4352, 0.7163, 0.0000],
+                [0.5623, -1.5886, -0.0182, 1.0000],
+            ],
+        ],
+        dtype=torch.float32,
+    )
+
+    def setUp(self) -> None:
+        super().setUp()
+        torch.manual_seed(42)
+        np.random.seed(42)
+
+    @staticmethod
+    def init_log_transform(batch_size: int = 10):
+        """
+        Initialize a list of `batch_size` 6-dimensional vectors representing
+        randomly generated logarithms of SE(3) transforms.
+        """
+        device = torch.device("cuda:0")
+        log_rot = torch.randn((batch_size, 6), dtype=torch.float32, device=device)
+        return log_rot
+
+    @staticmethod
+    def init_transform(batch_size: int = 10):
+        """
+        Initialize a list of `batch_size` 4x4 SE(3) transforms.
+        """
+        device = torch.device("cuda:0")
+        transform = torch.zeros(batch_size, 4, 4, dtype=torch.float32, device=device)
+        transform[:, :3, :3] = random_rotations(
+            batch_size, dtype=torch.float32, device=device
+        )
+        transform[:, 3, :3] = torch.randn(
+            (batch_size, 3), dtype=torch.float32, device=device
+        )
+        transform[:, 3, 3] = 1.0
+        return transform
+
+    def test_se3_exp_output_format(self, batch_size: int = 100):
+        """
+        Check that the output of `se3_exp_map` is a valid SE3 matrix.
+        """
+        transform = se3_exp_map(TestSE3.init_log_transform(batch_size=batch_size))
+        R = transform[:, :3, :3]
+        T = transform[:, 3, :3]
+        rest = transform[:, :, 3]
+        Rdet = R.det()
+
+        # check det(R)==1
+        self.assertClose(Rdet, torch.ones_like(Rdet), atol=1e-4)
+
+        # check that the translation is a finite vector
+        self.assertTrue(torch.isfinite(T).all())
+
+        # check last column == [0,0,0,1]
+        last_col = rest.new_zeros(batch_size, 4)
+        last_col[:, -1] = 1.0
+        self.assertClose(rest, last_col)
+
+    def test_compare_with_precomputed(self):
+        """
+        Compare the outputs against precomputed results.
+        """
+        self.assertClose(
+            se3_log_map(self.precomputed_transform),
+            self.precomputed_log_transform,
+            atol=1e-4,
+        )
+        self.assertClose(
+            self.precomputed_transform,
+            se3_exp_map(self.precomputed_log_transform),
+            atol=1e-4,
+        )
+
+    def test_se3_exp_singularity(self, batch_size: int = 100):
+        """
+        Tests whether the `se3_exp_map` is robust to the input vectors
+        with low L2 norms, where the algorithm is numerically unstable.
+        """
+        # generate random log-rotations with a tiny angle
+        log_rot = TestSE3.init_log_transform(batch_size=batch_size)
+        log_rot_small = log_rot * 1e-6
+        log_rot_small.requires_grad = True
+        transforms = se3_exp_map(log_rot_small)
+        # tests whether all outputs are finite
+        self.assertTrue(torch.isfinite(transforms).all())
+        # tests whether all gradients are finite and not None
+        loss = transforms.sum()
+        loss.backward()
+        self.assertIsNotNone(log_rot_small.grad)
+        self.assertTrue(torch.isfinite(log_rot_small.grad).all())
+
+    def test_se3_log_singularity(self, batch_size: int = 100):
+        """
+        Tests whether the `se3_log_map` is robust to the input matrices
+        whose rotation angles and translations are close to the numerically
+        unstable region (i.e. matrices with low rotation angles
+        and 0 translation).
+        """
+        # generate random rotations with a tiny angle
+        device = torch.device("cuda:0")
+        identity = torch.eye(3, device=device)
+        rot180 = identity * torch.tensor([[1.0, -1.0, -1.0]], device=device)
+        r = [identity, rot180]
+        r.extend(
+            [
+                torch.linalg.qr(identity + torch.randn_like(identity) * 1e-6)[0]
+                + float(i > batch_size // 2) * (0.5 - torch.rand_like(identity)) * 1e-8
+                # this adds random noise to the second half
+                # of the random orthogonal matrices to generate
+                # near-orthogonal matrices
+                for i in range(batch_size - 2)
+            ]
+        )
+        r = torch.stack(r)
+        # tiny translations
+        t = torch.randn(batch_size, 3, dtype=r.dtype, device=device) * 1e-6
+        # create the transform matrix
+        transform = torch.zeros(batch_size, 4, 4, dtype=torch.float32, device=device)
+        transform[:, :3, :3] = r
+        transform[:, 3, :3] = t
+        transform[:, 3, 3] = 1.0
+        transform.requires_grad = True
+        # the log of the transform
+        log_transform = se3_log_map(transform, eps=1e-4, cos_bound=1e-4)
+        # tests whether all outputs are finite
+        self.assertTrue(torch.isfinite(log_transform).all())
+        # tests whether all gradients are finite and not None
+        loss = log_transform.sum()
+        loss.backward()
+        self.assertIsNotNone(transform.grad)
+        self.assertTrue(torch.isfinite(transform.grad).all())
+
+    def test_se3_exp_zero_translation(self, batch_size: int = 100):
+        """
+        Check that `se3_exp_map` with zero translation gives
+        the same result as corresponding `so3_exp_map`.
+        """
+        log_transform = TestSE3.init_log_transform(batch_size=batch_size)
+        log_transform[:, :3] *= 0.0
+        transform = se3_exp_map(log_transform, eps=1e-8)
+        transform_so3 = so3_exp_map(log_transform[:, 3:], eps=1e-8)
+        self.assertClose(
+            transform[:, :3, :3], transform_so3.permute(0, 2, 1), atol=1e-4
+        )
+        self.assertClose(
+            transform[:, 3, :3], torch.zeros_like(transform[:, :3, 3]), atol=1e-4
+        )
+
+    def test_se3_log_zero_translation(self, batch_size: int = 100):
+        """
+        Check that `se3_log_map` with zero translation gives
+        the same result as corresponding `so3_log_map`.
+        """
+        transform = TestSE3.init_transform(batch_size=batch_size)
+        transform[:, 3, :3] *= 0.0
+        log_transform = se3_log_map(transform, eps=1e-8, cos_bound=1e-4)
+        log_transform_so3 = so3_log_map(transform[:, :3, :3], eps=1e-8, cos_bound=1e-4)
+        self.assertClose(log_transform[:, 3:], -log_transform_so3, atol=1e-4)
+        self.assertClose(
+            log_transform[:, :3], torch.zeros_like(log_transform[:, :3]), atol=1e-4
+        )
+
+    def test_se3_exp_to_log_to_exp(self, batch_size: int = 10000):
+        """
+        Check that `se3_exp_map(se3_log_map(A))==A` for
+        a batch of randomly generated SE(3) matrices `A`.
+        """
+        transform = TestSE3.init_transform(batch_size=batch_size)
+        # Limit test transforms to those not around the singularity where
+        # the rotation angle~=pi.
+        nonsingular = so3_rotation_angle(transform[:, :3, :3]) < 3.134
+        transform = transform[nonsingular]
+        transform_ = se3_exp_map(
+            se3_log_map(transform, eps=1e-8, cos_bound=0.0), eps=1e-8
+        )
+        self.assertClose(transform, transform_, atol=0.02)
+
+    def test_se3_log_to_exp_to_log(self, batch_size: int = 100):
+        """
+        Check that `se3_log_map(se3_exp_map(log_transform))==log_transform`
+        for a randomly generated batch of SE(3) matrix logarithms `log_transform`.
+        """
+        log_transform = TestSE3.init_log_transform(batch_size=batch_size)
+        log_transform_ = se3_log_map(se3_exp_map(log_transform, eps=1e-8), eps=1e-8)
+        self.assertClose(log_transform, log_transform_, atol=1e-1)
+
+    def test_bad_se3_input_value_err(self):
+        """
+        Tests whether `se3_exp_map` and `se3_log_map` correctly return
+        a ValueError if called with an argument of incorrect shape, or with
+        an tensor containing illegal values.
+        """
+        device = torch.device("cuda:0")
+
+        for size in ([5, 4], [3, 4, 5], [3, 5, 6]):
+            log_transform = torch.randn(size=size, device=device)
+            with self.assertRaises(ValueError):
+                se3_exp_map(log_transform)
+
+        for size in ([5, 4], [3, 4, 5], [3, 5, 6], [2, 2, 3, 4]):
+            transform = torch.randn(size=size, device=device)
+            with self.assertRaises(ValueError):
+                se3_log_map(transform)
+
+        # Test the case where transform[:, :, :3] != 0.
+        transform = torch.rand(size=[5, 4, 4], device=device) + 0.1
+        with self.assertRaises(ValueError):
+            se3_log_map(transform)
+
+    @staticmethod
+    def se3_expmap(batch_size: int = 10):
+        log_transform = TestSE3.init_log_transform(batch_size=batch_size)
+        torch.cuda.synchronize()
+
+        def compute_transforms():
+            se3_exp_map(log_transform)
+            torch.cuda.synchronize()
+
+        return compute_transforms
+
+    @staticmethod
+    def se3_logmap(batch_size: int = 10):
+        log_transform = TestSE3.init_transform(batch_size=batch_size)
+        torch.cuda.synchronize()
+
+        def compute_logs():
+            se3_log_map(log_transform)
+            torch.cuda.synchronize()
+
+        return compute_logs
diff --git a/pytorch3d/tests/test_shader.py b/pytorch3d/tests/test_shader.py
new file mode 100644
index 0000000000000000000000000000000000000000..396132e168d609e2b802c53cc47ee9c8dcf16dbe
--- /dev/null
+++ b/pytorch3d/tests/test_shader.py
@@ -0,0 +1,125 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+
+import torch
+from pytorch3d.renderer.cameras import look_at_view_transform, PerspectiveCameras
+from pytorch3d.renderer.mesh.rasterizer import Fragments
+from pytorch3d.renderer.mesh.shader import (
+    HardDepthShader,
+    HardFlatShader,
+    HardGouraudShader,
+    HardPhongShader,
+    SoftDepthShader,
+    SoftPhongShader,
+    SplatterPhongShader,
+)
+from pytorch3d.structures.meshes import Meshes
+
+from .common_testing import TestCaseMixin
+
+
+class TestShader(TestCaseMixin, unittest.TestCase):
+    def setUp(self):
+        self.shader_classes = [
+            HardDepthShader,
+            HardFlatShader,
+            HardGouraudShader,
+            HardPhongShader,
+            SoftDepthShader,
+            SoftPhongShader,
+            SplatterPhongShader,
+        ]
+
+    def test_to(self):
+        cpu_device = torch.device("cpu")
+        cuda_device = torch.device("cuda:0")
+
+        R, T = look_at_view_transform()
+
+        for shader_class in self.shader_classes:
+            for cameras_class in (None, PerspectiveCameras):
+                if cameras_class is None:
+                    cameras = None
+                else:
+                    cameras = PerspectiveCameras(device=cpu_device, R=R, T=T)
+
+                cpu_shader = shader_class(device=cpu_device, cameras=cameras)
+                if cameras is None:
+                    self.assertIsNone(cpu_shader.cameras)
+                else:
+                    self.assertEqual(cpu_device, cpu_shader.cameras.device)
+                self.assertEqual(cpu_device, cpu_shader.materials.device)
+                self.assertEqual(cpu_device, cpu_shader.lights.device)
+
+                cuda_shader = cpu_shader.to(cuda_device)
+                self.assertIs(cpu_shader, cuda_shader)
+                if cameras is None:
+                    self.assertIsNone(cuda_shader.cameras)
+                    with self.assertRaisesRegex(ValueError, "Cameras must be"):
+                        cuda_shader._get_cameras()
+                else:
+                    self.assertEqual(cuda_device, cuda_shader.cameras.device)
+                    self.assertIsInstance(cuda_shader._get_cameras(), cameras_class)
+                self.assertEqual(cuda_device, cuda_shader.materials.device)
+                self.assertEqual(cuda_device, cuda_shader.lights.device)
+
+    def test_cameras_check(self):
+        verts = torch.tensor(
+            [[-1, -1, 0], [1, -1, 1], [1, 1, 0], [-1, 1, 1]], dtype=torch.float32
+        )
+        faces = torch.tensor([[0, 1, 2], [2, 3, 0]], dtype=torch.int64)
+        meshes = Meshes(verts=[verts], faces=[faces])
+
+        pix_to_face = torch.tensor([0, 1], dtype=torch.int64).view(1, 1, 1, 2)
+        barycentric_coords = torch.tensor(
+            [[0.1, 0.2, 0.7], [0.3, 0.5, 0.2]], dtype=torch.float32
+        ).view(1, 1, 1, 2, -1)
+        fragments = Fragments(
+            pix_to_face=pix_to_face,
+            bary_coords=barycentric_coords,
+            zbuf=torch.ones_like(pix_to_face),
+            dists=torch.ones_like(pix_to_face),
+        )
+
+        for shader_class in self.shader_classes:
+            shader = shader_class()
+
+            with self.assertRaises(ValueError):
+                shader(fragments, meshes)
+
+    def test_depth_shader(self):
+        shader_classes = [
+            HardDepthShader,
+            SoftDepthShader,
+        ]
+
+        verts = torch.tensor(
+            [[-1, -1, 0], [1, -1, 1], [1, 1, 0], [-1, 1, 1]], dtype=torch.float32
+        )
+        faces = torch.tensor([[0, 1, 2], [2, 3, 0]], dtype=torch.int64)
+        meshes = Meshes(verts=[verts], faces=[faces])
+
+        pix_to_face = torch.tensor([0, 1], dtype=torch.int64).view(1, 1, 1, 2)
+        barycentric_coords = torch.tensor(
+            [[0.1, 0.2, 0.7], [0.3, 0.5, 0.2]], dtype=torch.float32
+        ).view(1, 1, 1, 2, -1)
+        for faces_per_pixel in [1, 2]:
+            fragments = Fragments(
+                pix_to_face=pix_to_face[:, :, :, :faces_per_pixel],
+                bary_coords=barycentric_coords[:, :, :, :faces_per_pixel],
+                zbuf=torch.ones_like(pix_to_face),
+                dists=torch.ones_like(pix_to_face),
+            )
+            R, T = look_at_view_transform()
+            cameras = PerspectiveCameras(R=R, T=T)
+
+            for shader_class in shader_classes:
+                shader = shader_class()
+
+                out = shader(fragments, meshes, cameras=cameras)
+                self.assertEqual(out.shape, (1, 1, 1, 1))
diff --git a/pytorch3d/tests/test_shapenet_core.py b/pytorch3d/tests/test_shapenet_core.py
new file mode 100644
index 0000000000000000000000000000000000000000..63f1c8556d86224a031e91a051385724a6457dd8
--- /dev/null
+++ b/pytorch3d/tests/test_shapenet_core.py
@@ -0,0 +1,297 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""
+Sanity checks for loading ShapeNetCore.
+"""
+import os
+import unittest
+
+import numpy as np
+import torch
+from PIL import Image
+from pytorch3d.datasets import collate_batched_meshes, ShapeNetCore
+from pytorch3d.renderer import (
+    FoVPerspectiveCameras,
+    look_at_view_transform,
+    PointLights,
+    RasterizationSettings,
+)
+from torch.utils.data import DataLoader
+
+from .common_testing import get_tests_dir, load_rgb_image, TestCaseMixin
+
+
+# Set the SHAPENET_PATH to the local path to the dataset
+SHAPENET_PATH = None
+VERSION = 1
+# If DEBUG=True, save out images generated in the tests for debugging.
+# All saved images have prefix DEBUG_
+DEBUG = False
+DATA_DIR = get_tests_dir() / "data"
+
+
+class TestShapenetCore(TestCaseMixin, unittest.TestCase):
+    def setUp(self):
+        """
+        Check if the ShapeNet dataset is provided in the repo.
+        If not, download this separately and update the shapenet_path`
+        with the location of the dataset in order to run the tests.
+        """
+        if SHAPENET_PATH is None or not os.path.exists(SHAPENET_PATH):
+            url = "https://www.shapenet.org/"
+            msg = (
+                "ShapeNet data not found, download from %s, update "
+                "SHAPENET_PATH at the top of the file, and rerun."
+            )
+
+            self.skipTest(msg % url)
+
+    def test_load_shapenet_core(self):
+        """
+        Test loading both the entire ShapeNetCore dataset and a subset of the ShapeNetCore
+        dataset. Check the loaded datasets return items of the correct shapes and types.
+        """
+        # Try loading ShapeNetCore with an invalid version number and catch error.
+        with self.assertRaises(ValueError) as err:
+            ShapeNetCore(SHAPENET_PATH, version=3)
+        self.assertTrue("Version number must be either 1 or 2." in str(err.exception))
+
+        # Load ShapeNetCore without specifying any particular categories.
+        shapenet_dataset = ShapeNetCore(SHAPENET_PATH, version=VERSION)
+
+        # Count the number of grandchildren directories (which should be equal to
+        # the total number of objects in the dataset) by walking through the given
+        # directory.
+        wnsynset_list = [
+            wnsynset
+            for wnsynset in os.listdir(SHAPENET_PATH)
+            if os.path.isdir(os.path.join(SHAPENET_PATH, wnsynset))
+        ]
+        model_num_list = [
+            (len(next(os.walk(os.path.join(SHAPENET_PATH, wnsynset)))[1]))
+            for wnsynset in wnsynset_list
+        ]
+        # Check total number of objects in the dataset is correct.
+        self.assertEqual(len(shapenet_dataset), sum(model_num_list))
+
+        # Randomly retrieve an object from the dataset.
+        rand_obj = shapenet_dataset[torch.randint(len(shapenet_dataset), (1,))]
+        # Check that data types and shapes of items returned by __getitem__ are correct.
+        verts, faces = rand_obj["verts"], rand_obj["faces"]
+        self.assertTrue(verts.dtype == torch.float32)
+        self.assertTrue(faces.dtype == torch.int64)
+        self.assertEqual(verts.ndim, 2)
+        self.assertEqual(verts.shape[-1], 3)
+        self.assertEqual(faces.ndim, 2)
+        self.assertEqual(faces.shape[-1], 3)
+
+        # Load six categories from ShapeNetCore.
+        # Specify categories with a combination of offsets and labels.
+        shapenet_subset = ShapeNetCore(
+            SHAPENET_PATH,
+            synsets=[
+                "04330267",
+                "guitar",
+                "02801938",
+                "birdhouse",
+                "03991062",
+                "tower",
+            ],
+            version=1,
+        )
+        subset_offsets = [
+            "04330267",
+            "03467517",
+            "02801938",
+            "02843684",
+            "03991062",
+            "04460130",
+        ]
+        subset_model_nums = [
+            (len(next(os.walk(os.path.join(SHAPENET_PATH, offset)))[1]))
+            for offset in subset_offsets
+        ]
+        self.assertEqual(len(shapenet_subset), sum(subset_model_nums))
+
+    def test_collate_models(self):
+        """
+        Test collate_batched_meshes returns items of the correct shapes and types.
+        Check that when collate_batched_meshes is passed to Dataloader, batches of
+        the correct shapes and types are returned.
+        """
+        # Load ShapeNetCore without specifying any particular categories.
+        shapenet_dataset = ShapeNetCore(SHAPENET_PATH)
+        # Randomly retrieve several objects from the dataset.
+        rand_idxs = torch.randint(len(shapenet_dataset), (6,))
+        rand_objs = [shapenet_dataset[idx] for idx in rand_idxs]
+
+        # Collate the randomly selected objects
+        collated_meshes = collate_batched_meshes(rand_objs)
+        verts, faces = (collated_meshes["verts"], collated_meshes["faces"])
+        self.assertEqual(len(verts), 6)
+        self.assertEqual(len(faces), 6)
+
+        # Pass the custom collate_fn function to DataLoader and check elements
+        # in batch have the correct shape.
+        batch_size = 12
+        shapenet_core_loader = DataLoader(
+            shapenet_dataset, batch_size=batch_size, collate_fn=collate_batched_meshes
+        )
+        it = iter(shapenet_core_loader)
+        object_batch = next(it)
+        self.assertEqual(len(object_batch["synset_id"]), batch_size)
+        self.assertEqual(len(object_batch["model_id"]), batch_size)
+        self.assertEqual(len(object_batch["label"]), batch_size)
+        self.assertEqual(object_batch["mesh"].verts_padded().shape[0], batch_size)
+        self.assertEqual(object_batch["mesh"].faces_padded().shape[0], batch_size)
+
+    def test_catch_render_arg_errors(self):
+        """
+        Test rendering ShapeNetCore with invalid model_ids, categories or indices,
+        and catch corresponding errors.
+        """
+        # Load ShapeNetCore.
+        shapenet_dataset = ShapeNetCore(SHAPENET_PATH)
+
+        # Try loading with an invalid model_id and catch error.
+        with self.assertRaises(ValueError) as err:
+            shapenet_dataset.render(model_ids=["piano0"])
+        self.assertTrue("not found in the loaded dataset" in str(err.exception))
+
+        # Try loading with an index out of bounds and catch error.
+        with self.assertRaises(IndexError) as err:
+            shapenet_dataset.render(idxs=[100000])
+        self.assertTrue("are out of bounds" in str(err.exception))
+
+    def test_render_shapenet_core(self):
+        """
+        Test rendering objects from ShapeNetCore.
+        """
+        # Setup device and seed for random selections.
+        device = torch.device("cuda:0")
+        torch.manual_seed(39)
+
+        # Load category piano from ShapeNetCore.
+        piano_dataset = ShapeNetCore(SHAPENET_PATH, synsets=["piano"])
+
+        # Rendering settings.
+        R, T = look_at_view_transform(1.0, 1.0, 90)
+        cameras = FoVPerspectiveCameras(R=R, T=T, device=device)
+        raster_settings = RasterizationSettings(image_size=512)
+        lights = PointLights(
+            location=torch.tensor([0.0, 1.0, -2.0], device=device)[None],
+            # TODO: debug the source of the discrepancy in two images when rendering on GPU.
+            diffuse_color=((0, 0, 0),),
+            specular_color=((0, 0, 0),),
+            device=device,
+        )
+
+        # Render first three models in the piano category.
+        pianos = piano_dataset.render(
+            idxs=list(range(3)),
+            device=device,
+            cameras=cameras,
+            raster_settings=raster_settings,
+            lights=lights,
+        )
+        # Check that there are three images in the batch.
+        self.assertEqual(pianos.shape[0], 3)
+
+        # Compare the rendered models to the reference images.
+        for idx in range(3):
+            piano_rgb = pianos[idx, ..., :3].squeeze().cpu()
+            if DEBUG:
+                Image.fromarray((piano_rgb.numpy() * 255).astype(np.uint8)).save(
+                    DATA_DIR / ("DEBUG_shapenet_core_render_piano_by_idxs_%s.png" % idx)
+                )
+            image_ref = load_rgb_image(
+                "test_shapenet_core_render_piano_%s.png" % idx, DATA_DIR
+            )
+            self.assertClose(piano_rgb, image_ref, atol=0.05)
+
+        # Render the same piano models but by model_ids this time.
+        pianos_2 = piano_dataset.render(
+            model_ids=[
+                "13394ca47c89f91525a3aaf903a41c90",
+                "14755c2ee8e693aba508f621166382b0",
+                "156c4207af6d2c8f1fdc97905708b8ea",
+            ],
+            device=device,
+            cameras=cameras,
+            raster_settings=raster_settings,
+            lights=lights,
+        )
+
+        # Compare the rendered models to the reference images.
+        for idx in range(3):
+            piano_rgb_2 = pianos_2[idx, ..., :3].squeeze().cpu()
+            if DEBUG:
+                Image.fromarray((piano_rgb_2.numpy() * 255).astype(np.uint8)).save(
+                    DATA_DIR / ("DEBUG_shapenet_core_render_piano_by_ids_%s.png" % idx)
+                )
+            image_ref = load_rgb_image(
+                "test_shapenet_core_render_piano_%s.png" % idx, DATA_DIR
+            )
+            self.assertClose(piano_rgb_2, image_ref, atol=0.05)
+
+        #######################
+        # Render by categories
+        #######################
+
+        # Load ShapeNetCore.
+        shapenet_dataset = ShapeNetCore(SHAPENET_PATH)
+
+        # Render a mixture of categories and specify the number of models to be
+        # randomly sampled from each category.
+        mixed_objs = shapenet_dataset.render(
+            categories=["faucet", "chair"],
+            sample_nums=[2, 1],
+            device=device,
+            cameras=cameras,
+            raster_settings=raster_settings,
+            lights=lights,
+        )
+        # Compare the rendered models to the reference images.
+        for idx in range(3):
+            mixed_rgb = mixed_objs[idx, ..., :3].squeeze().cpu()
+            if DEBUG:
+                Image.fromarray((mixed_rgb.numpy() * 255).astype(np.uint8)).save(
+                    DATA_DIR
+                    / ("DEBUG_shapenet_core_render_mixed_by_categories_%s.png" % idx)
+                )
+            image_ref = load_rgb_image(
+                "test_shapenet_core_render_mixed_by_categories_%s.png" % idx, DATA_DIR
+            )
+            self.assertClose(mixed_rgb, image_ref, atol=0.05)
+
+        # Render a mixture of categories without specifying sample_nums.
+        mixed_objs_2 = shapenet_dataset.render(
+            categories=["faucet", "chair"],
+            device=device,
+            cameras=cameras,
+            raster_settings=raster_settings,
+            lights=lights,
+        )
+        # Compare the rendered models to the reference images.
+        for idx in range(2):
+            mixed_rgb_2 = mixed_objs_2[idx, ..., :3].squeeze().cpu()
+            if DEBUG:
+                Image.fromarray((mixed_rgb_2.numpy() * 255).astype(np.uint8)).save(
+                    DATA_DIR
+                    / ("DEBUG_shapenet_core_render_without_sample_nums_%s.png" % idx)
+                )
+            image_ref = load_rgb_image(
+                "test_shapenet_core_render_without_sample_nums_%s.png" % idx, DATA_DIR
+            )
+            self.assertClose(mixed_rgb_2, image_ref, atol=0.05)
+
+    def test_load_textures_false(self):
+        shapenet_dataset = ShapeNetCore(
+            SHAPENET_PATH, load_textures=False, version=VERSION
+        )
+        model = shapenet_dataset[0]
+        self.assertIsNone(model["textures"])
diff --git a/pytorch3d/tests/test_so3.py b/pytorch3d/tests/test_so3.py
new file mode 100644
index 0000000000000000000000000000000000000000..5819957c013e935f42863851f6c2db74b0a136d5
--- /dev/null
+++ b/pytorch3d/tests/test_so3.py
@@ -0,0 +1,283 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import math
+import unittest
+from distutils.version import LooseVersion
+
+import numpy as np
+import torch
+from pytorch3d.transforms.so3 import (
+    hat,
+    so3_exp_map,
+    so3_log_map,
+    so3_relative_angle,
+    so3_rotation_angle,
+)
+
+from .common_testing import TestCaseMixin
+
+
+class TestSO3(TestCaseMixin, unittest.TestCase):
+    def setUp(self) -> None:
+        super().setUp()
+        torch.manual_seed(42)
+        np.random.seed(42)
+
+    @staticmethod
+    def init_log_rot(batch_size: int = 10):
+        """
+        Initialize a list of `batch_size` 3-dimensional vectors representing
+        randomly generated logarithms of rotation matrices.
+        """
+        device = torch.device("cuda:0")
+        log_rot = torch.randn((batch_size, 3), dtype=torch.float32, device=device)
+        return log_rot
+
+    @staticmethod
+    def init_rot(batch_size: int = 10):
+        """
+        Randomly generate a batch of `batch_size` 3x3 rotation matrices.
+        """
+        device = torch.device("cuda:0")
+
+        # TODO(dnovotny): replace with random_rotation from random_rotation.py
+        rot = []
+        for _ in range(batch_size):
+            r = torch.linalg.qr(torch.randn((3, 3), device=device))[0]
+            f = torch.randint(2, (3,), device=device, dtype=torch.float32)
+            if f.sum() % 2 == 0:
+                f = 1 - f
+            rot.append(r * (2 * f - 1).float())
+        rot = torch.stack(rot)
+
+        return rot
+
+    def test_determinant(self):
+        """
+        Tests whether the determinants of 3x3 rotation matrices produced
+        by `so3_exp_map` are (almost) equal to 1.
+        """
+        log_rot = TestSO3.init_log_rot(batch_size=30)
+        Rs = so3_exp_map(log_rot)
+        dets = torch.det(Rs)
+        self.assertClose(dets, torch.ones_like(dets), atol=1e-4)
+
+    def test_cross(self):
+        """
+        For a pair of randomly generated 3-dimensional vectors `a` and `b`,
+        tests whether a matrix product of `hat(a)` and `b` equals the result
+        of a cross product between `a` and `b`.
+        """
+        device = torch.device("cuda:0")
+        a, b = torch.randn((2, 100, 3), dtype=torch.float32, device=device)
+        hat_a = hat(a)
+        cross = torch.bmm(hat_a, b[:, :, None])[:, :, 0]
+        torch_cross = torch.cross(a, b, dim=1)
+        self.assertClose(torch_cross, cross, atol=1e-4)
+
+    def test_bad_so3_input_value_err(self):
+        """
+        Tests whether `so3_exp_map` and `so3_log_map` correctly return
+        a ValueError if called with an argument of incorrect shape or, in case
+        of `so3_exp_map`, unexpected trace.
+        """
+        device = torch.device("cuda:0")
+        log_rot = torch.randn(size=[5, 4], device=device)
+        with self.assertRaises(ValueError) as err:
+            so3_exp_map(log_rot)
+        self.assertTrue("Input tensor shape has to be Nx3." in str(err.exception))
+
+        rot = torch.randn(size=[5, 3, 5], device=device)
+        with self.assertRaises(ValueError) as err:
+            so3_log_map(rot)
+        self.assertTrue("Input has to be a batch of 3x3 Tensors." in str(err.exception))
+
+    def test_so3_exp_singularity(self, batch_size: int = 100):
+        """
+        Tests whether the `so3_exp_map` is robust to the input vectors
+        the norms of which are close to the numerically unstable region
+        (vectors with low l2-norms).
+        """
+        # generate random log-rotations with a tiny angle
+        log_rot = TestSO3.init_log_rot(batch_size=batch_size)
+        log_rot_small = log_rot * 1e-6
+        log_rot_small.requires_grad = True
+        R = so3_exp_map(log_rot_small)
+        # tests whether all outputs are finite
+        self.assertTrue(torch.isfinite(R).all())
+        # tests whether the gradient is not None and all finite
+        loss = R.sum()
+        loss.backward()
+        self.assertIsNotNone(log_rot_small.grad)
+        self.assertTrue(torch.isfinite(log_rot_small.grad).all())
+
+    def test_so3_log_singularity(self, batch_size: int = 100):
+        """
+        Tests whether the `so3_log_map` is robust to the input matrices
+        who's rotation angles are close to the numerically unstable region
+        (i.e. matrices with low rotation angles).
+        """
+        # generate random rotations with a tiny angle
+        device = torch.device("cuda:0")
+        identity = torch.eye(3, device=device)
+        rot180 = identity * torch.tensor([[1.0, -1.0, -1.0]], device=device)
+        r = [identity, rot180]
+        # add random rotations and random almost orthonormal matrices
+        r.extend(
+            [
+                torch.linalg.qr(identity + torch.randn_like(identity) * 1e-4)[0]
+                + float(i > batch_size // 2) * (0.5 - torch.rand_like(identity)) * 1e-3
+                # this adds random noise to the second half
+                # of the random orthogonal matrices to generate
+                # near-orthogonal matrices
+                for i in range(batch_size - 2)
+            ]
+        )
+        r = torch.stack(r)
+        r.requires_grad = True
+        # the log of the rotation matrix r
+        r_log = so3_log_map(r, cos_bound=1e-4, eps=1e-2)
+        # tests whether all outputs are finite
+        self.assertTrue(torch.isfinite(r_log).all())
+        # tests whether the gradient is not None and all finite
+        loss = r.sum()
+        loss.backward()
+        self.assertIsNotNone(r.grad)
+        self.assertTrue(torch.isfinite(r.grad).all())
+
+    def test_so3_log_to_exp_to_log_to_exp(self, batch_size: int = 100):
+        """
+        Check that
+        `so3_exp_map(so3_log_map(so3_exp_map(log_rot)))
+        == so3_exp_map(log_rot)`
+        for a randomly generated batch of rotation matrix logarithms `log_rot`.
+        Unlike `test_so3_log_to_exp_to_log`, this test checks the
+        correctness of converting a `log_rot` which contains values > math.pi.
+        """
+        log_rot = 2.0 * TestSO3.init_log_rot(batch_size=batch_size)
+        # check also the singular cases where rot. angle = {0, 2pi}
+        log_rot[:2] = 0
+        log_rot[1, 0] = 2.0 * math.pi - 1e-6
+        rot = so3_exp_map(log_rot, eps=1e-4)
+        rot_ = so3_exp_map(so3_log_map(rot, eps=1e-4, cos_bound=1e-6), eps=1e-6)
+        self.assertClose(rot, rot_, atol=0.01)
+        angles = so3_relative_angle(rot, rot_, cos_bound=1e-6)
+        self.assertClose(angles, torch.zeros_like(angles), atol=0.01)
+
+    def test_so3_log_to_exp_to_log(self, batch_size: int = 100):
+        """
+        Check that `so3_log_map(so3_exp_map(log_rot))==log_rot` for
+        a randomly generated batch of rotation matrix logarithms `log_rot`.
+        """
+        log_rot = TestSO3.init_log_rot(batch_size=batch_size)
+        # check also the singular cases where rot. angle = 0
+        log_rot[:1] = 0
+        log_rot_ = so3_log_map(so3_exp_map(log_rot))
+        self.assertClose(log_rot, log_rot_, atol=1e-4)
+
+    def test_so3_exp_to_log_to_exp(self, batch_size: int = 100):
+        """
+        Check that `so3_exp_map(so3_log_map(R))==R` for
+        a batch of randomly generated rotation matrices `R`.
+        """
+        rot = TestSO3.init_rot(batch_size=batch_size)
+        non_singular = (so3_rotation_angle(rot) - math.pi).abs() > 1e-2
+        rot = rot[non_singular]
+        rot_ = so3_exp_map(so3_log_map(rot, eps=1e-8, cos_bound=1e-8), eps=1e-8)
+        self.assertClose(rot_, rot, atol=0.1)
+        angles = so3_relative_angle(rot, rot_, cos_bound=1e-4)
+        self.assertClose(angles, torch.zeros_like(angles), atol=0.1)
+
+    def test_so3_cos_relative_angle(self, batch_size: int = 100):
+        """
+        Check that `so3_relative_angle(R1, R2, cos_angle=False).cos()`
+        is the same as `so3_relative_angle(R1, R2, cos_angle=True)` for
+        batches of randomly generated rotation matrices `R1` and `R2`.
+        """
+        rot1 = TestSO3.init_rot(batch_size=batch_size)
+        rot2 = TestSO3.init_rot(batch_size=batch_size)
+        angles = so3_relative_angle(rot1, rot2, cos_angle=False).cos()
+        angles_ = so3_relative_angle(rot1, rot2, cos_angle=True)
+        self.assertClose(angles, angles_, atol=1e-4)
+
+    def test_so3_cos_angle(self, batch_size: int = 100):
+        """
+        Check that `so3_rotation_angle(R, cos_angle=False).cos()`
+        is the same as `so3_rotation_angle(R, cos_angle=True)` for
+        a batch of randomly generated rotation matrices `R`.
+        """
+        rot = TestSO3.init_rot(batch_size=batch_size)
+        angles = so3_rotation_angle(rot, cos_angle=False).cos()
+        angles_ = so3_rotation_angle(rot, cos_angle=True)
+        self.assertClose(angles, angles_, atol=1e-4)
+
+    def test_so3_cos_bound(self, batch_size: int = 100):
+        """
+        Checks that for an identity rotation `R=I`, the so3_rotation_angle returns
+        non-finite gradients when `cos_bound=None` and finite gradients
+        for `cos_bound > 0.0`.
+        """
+        # generate random rotations with a tiny angle to generate cases
+        # with the gradient singularity
+        device = torch.device("cuda:0")
+        identity = torch.eye(3, device=device)
+        rot180 = identity * torch.tensor([[1.0, -1.0, -1.0]], device=device)
+        r = [identity, rot180]
+        r.extend(
+            [
+                torch.linalg.qr(identity + torch.randn_like(identity) * 1e-4)[0]
+                for _ in range(batch_size - 2)
+            ]
+        )
+        r = torch.stack(r)
+        r.requires_grad = True
+        for is_grad_finite in (True, False):
+            # clear the gradients and decide the cos_bound:
+            #     for is_grad_finite we run so3_rotation_angle with cos_bound
+            #     set to a small float, otherwise we set to 0.0
+            r.grad = None
+            cos_bound = 1e-4 if is_grad_finite else 0.0
+            # compute the angles of r
+            angles = so3_rotation_angle(r, cos_bound=cos_bound)
+            # tests whether all outputs are finite in both cases
+            self.assertTrue(torch.isfinite(angles).all())
+            # compute the gradients
+            loss = angles.sum()
+            loss.backward()
+            # tests whether the gradient is not None for both cases
+            self.assertIsNotNone(r.grad)
+            if is_grad_finite:
+                # all grad values have to be finite
+                self.assertTrue(torch.isfinite(r.grad).all())
+
+    @unittest.skipIf(LooseVersion(torch.__version__) < "1.9", "recent torchscript only")
+    def test_scriptable(self):
+        torch.jit.script(so3_exp_map)
+        torch.jit.script(so3_log_map)
+
+    @staticmethod
+    def so3_expmap(batch_size: int = 10):
+        log_rot = TestSO3.init_log_rot(batch_size=batch_size)
+        torch.cuda.synchronize()
+
+        def compute_rots():
+            so3_exp_map(log_rot)
+            torch.cuda.synchronize()
+
+        return compute_rots
+
+    @staticmethod
+    def so3_logmap(batch_size: int = 10):
+        log_rot = TestSO3.init_rot(batch_size=batch_size)
+        torch.cuda.synchronize()
+
+        def compute_logs():
+            so3_log_map(log_rot)
+            torch.cuda.synchronize()
+
+        return compute_logs
diff --git a/pytorch3d/tests/test_splatter_blend.py b/pytorch3d/tests/test_splatter_blend.py
new file mode 100644
index 0000000000000000000000000000000000000000..e90bc152ffb1f2ca2fde44dd4f78083b8fa3b93f
--- /dev/null
+++ b/pytorch3d/tests/test_splatter_blend.py
@@ -0,0 +1,627 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+
+import torch
+from pytorch3d.common.compat import meshgrid_ij
+from pytorch3d.renderer.cameras import FoVPerspectiveCameras
+from pytorch3d.renderer.splatter_blend import (
+    _compute_occlusion_layers,
+    _compute_splatted_colors_and_weights,
+    _compute_splatting_colors_and_weights,
+    _get_splat_kernel_normalization,
+    _normalize_and_compose_all_layers,
+    _offset_splats,
+    _precompute,
+    _prepare_pixels_and_colors,
+)
+
+from .common_testing import TestCaseMixin
+
+offsets = torch.tensor(
+    [
+        [-1, -1],
+        [-1, 0],
+        [-1, 1],
+        [0, -1],
+        [0, 0],
+        [0, 1],
+        [1, -1],
+        [1, 0],
+        [1, 1],
+    ],
+    device=torch.device("cpu"),
+)
+
+
+def compute_splatting_colors_and_weights_naive(pixel_coords_screen, colors, sigma):
+    normalizer = float(_get_splat_kernel_normalization(offsets))
+    N, H, W, K, _ = colors.shape
+    splat_weights_and_colors = torch.zeros((N, H, W, K, 9, 5))
+    for n in range(N):
+        for h in range(H):
+            for w in range(W):
+                for k in range(K):
+                    q_xy = pixel_coords_screen[n, h, w, k]
+                    q_to_px_center = torch.floor(q_xy) - q_xy + 0.5
+                    color = colors[n, h, w, k]
+                    alpha = colors[n, h, w, k, 3:4]
+                    for d in range(9):
+                        dist_p_q = torch.sum((q_to_px_center + offsets[d]) ** 2)
+                        splat_weight = (
+                            alpha * torch.exp(-dist_p_q / (2 * sigma**2)) * normalizer
+                        )
+                        splat_color = splat_weight * color
+                        splat_weights_and_colors[n, h, w, k, d, :4] = splat_color
+                        splat_weights_and_colors[n, h, w, k, d, 4:5] = splat_weight
+    return splat_weights_and_colors
+
+
+class TestPrecompute(TestCaseMixin, unittest.TestCase):
+    def setUp(self):
+        self.results_cpu = _precompute((2, 3, 4, 5), torch.device("cpu"))
+        self.results1_cpu = _precompute((1, 1, 1, 1), torch.device("cpu"))
+
+    def test_offsets(self):
+        self.assertClose(self.results_cpu[2].shape, offsets.shape, atol=0)
+        self.assertClose(self.results_cpu[2], offsets, atol=0)
+
+        # Offsets should be independent of input_size.
+        self.assertClose(self.results_cpu[2], self.results1_cpu[2], atol=0)
+
+    def test_crops_h(self):
+        target_crops_h1 = torch.tensor(
+            [
+                # chennels being offset:
+                # R  G  B  A  W(eight)
+                [0, 0, 0, 0, 0],
+                [1, 1, 1, 1, 1],
+                [2, 2, 2, 2, 2],
+                [0, 0, 0, 0, 0],
+                [1, 1, 1, 1, 1],
+                [2, 2, 2, 2, 2],
+                [0, 0, 0, 0, 0],
+                [1, 1, 1, 1, 1],
+                [2, 2, 2, 2, 2],
+            ]
+            * 3,  # 3 because we're aiming at (N, H, W+2, K, 9, 5) with W=1.
+            device=torch.device("cpu"),
+        ).reshape(1, 1, 3, 1, 9, 5)
+        self.assertClose(self.results1_cpu[0], target_crops_h1, atol=0)
+
+        target_crops_h_base = target_crops_h1[0, 0, 0]
+        target_crops_h = torch.cat(
+            [target_crops_h_base, target_crops_h_base + 1, target_crops_h_base + 2],
+            dim=0,
+        )
+
+        # Check that we have the right shape, and (after broadcasting) it has the right
+        # values. These should be repeated (tiled) for each n and k.
+        self.assertClose(
+            self.results_cpu[0].shape, torch.tensor([2, 3, 6, 5, 9, 5]), atol=0
+        )
+        for n in range(2):
+            for w in range(6):
+                for k in range(5):
+                    self.assertClose(
+                        self.results_cpu[0][n, :, w, k],
+                        target_crops_h,
+                    )
+
+    def test_crops_w(self):
+        target_crops_w1 = torch.tensor(
+            [
+                # chennels being offset:
+                # R  G  B  A  W(eight)
+                [0, 0, 0, 0, 0],
+                [0, 0, 0, 0, 0],
+                [0, 0, 0, 0, 0],
+                [1, 1, 1, 1, 1],
+                [1, 1, 1, 1, 1],
+                [1, 1, 1, 1, 1],
+                [2, 2, 2, 2, 2],
+                [2, 2, 2, 2, 2],
+                [2, 2, 2, 2, 2],
+            ],
+            device=torch.device("cpu"),
+        ).reshape(1, 1, 1, 1, 9, 5)
+        self.assertClose(self.results1_cpu[1], target_crops_w1)
+
+        target_crops_w_base = target_crops_w1[0, 0, 0]
+        target_crops_w = torch.cat(
+            [
+                target_crops_w_base,
+                target_crops_w_base + 1,
+                target_crops_w_base + 2,
+                target_crops_w_base + 3,
+            ],
+            dim=0,
+        )  # Each w value needs an increment.
+
+        # Check that we have the right shape, and (after broadcasting) it has the right
+        # values. These should be repeated (tiled) for each n and k.
+        self.assertClose(self.results_cpu[1].shape, torch.tensor([2, 3, 4, 5, 9, 5]))
+        for n in range(2):
+            for h in range(3):
+                for k in range(5):
+                    self.assertClose(
+                        self.results_cpu[1][n, h, :, k],
+                        target_crops_w,
+                        atol=0,
+                    )
+
+
+class TestPreparPixelsAndColors(TestCaseMixin, unittest.TestCase):
+    def setUp(self):
+        self.device = torch.device("cpu")
+        N, H, W, K = 2, 3, 4, 5
+        self.pixel_coords_cameras = torch.randn(
+            (N, H, W, K, 3), device=self.device, requires_grad=True
+        )
+        self.colors_before = torch.rand((N, H, W, K, 3), device=self.device)
+        self.cameras = FoVPerspectiveCameras(device=self.device)
+        self.background_mask = torch.rand((N, H, W, K), device=self.device) < 0.5
+        self.pixel_coords_screen, self.colors_after = _prepare_pixels_and_colors(
+            self.pixel_coords_cameras,
+            self.colors_before,
+            self.cameras,
+            self.background_mask,
+        )
+
+    def test_background_z(self):
+        self.assertTrue(
+            torch.all(self.pixel_coords_screen[..., 2][self.background_mask] == 1.0)
+        )
+
+    def test_background_alpha(self):
+        self.assertTrue(
+            torch.all(self.colors_after[..., 3][self.background_mask] == 0.0)
+        )
+
+
+class TestGetSplatKernelNormalization(TestCaseMixin, unittest.TestCase):
+    def test_splat_kernel_normalization(self):
+        self.assertAlmostEqual(
+            float(_get_splat_kernel_normalization(offsets)), 0.6503, places=3
+        )
+        self.assertAlmostEqual(
+            float(_get_splat_kernel_normalization(offsets, 0.01)), 1.05, places=3
+        )
+        with self.assertRaisesRegex(ValueError, "Only positive standard deviations"):
+            _get_splat_kernel_normalization(offsets, 0)
+
+
+class TestComputeOcclusionLayers(TestCaseMixin, unittest.TestCase):
+    def test_single_layer(self):
+        # If there's only one layer, all splats must be on the surface level.
+        N, H, W, K = 2, 3, 4, 1
+        q_depth = torch.rand(N, H, W, K)
+        occlusion_layers = _compute_occlusion_layers(q_depth)
+        self.assertClose(occlusion_layers, torch.zeros(N, H, W, 9).long(), atol=0.0)
+
+    def test_all_equal(self):
+        # If all q-vals are equal, then all splats must be on the surface level.
+        N, H, W, K = 2, 3, 4, 5
+        q_depth = torch.ones((N, H, W, K)) * 0.1234
+        occlusion_layers = _compute_occlusion_layers(q_depth)
+        self.assertClose(occlusion_layers, torch.zeros(N, H, W, 9).long(), atol=0.0)
+
+    def test_mid_to_top_level_splatting(self):
+        # Check that occlusion buffers get accumulated as expected when the splatting
+        # and splatted pixels are co-surface on different intersection layers.
+        # This test will make best sense with accompanying Fig. 4 from "Differentiable
+        # Surface Rendering via Non-differentiable Sampling" by Cole et al.
+        for direction, offset in enumerate(offsets):
+            if direction == 4:
+                continue  # Skip self-splatting which is always co-surface.
+
+            depths = torch.zeros(1, 3, 3, 3)
+
+            # This is our q, the pixel splatted onto, in the center of the image.
+            depths[0, 1, 1] = torch.tensor([0.71, 0.8, 1.0])
+
+            # This is our p, the splatting pixel.
+            depths[0, offset[0] + 1, offset[1] + 1] = torch.tensor([0.5, 0.7, 0.9])
+
+            occlusion_layers = _compute_occlusion_layers(depths)
+
+            # Check that we computed that it is the middle layer of p that is co-
+            # surface with q. (1, 1) is the id of q in the depth array, and offset_id
+            # is the id of p's direction w.r.t. q.
+            psurfaceid_onto_q = occlusion_layers[0, 1, 1, direction]
+            self.assertEqual(int(psurfaceid_onto_q), 1)
+
+            # Conversely, if we swap p and q, we have a top-level splatting onto
+            # mid-level. offset + 1 is the id of p, and 8-offset_id is the id of
+            # q's direction w.r.t. p (e.g. if p is [-1, -1] w.r.t. q, then q is
+            # [1, 1] w.r.t. p; we use the ids of these two directions in the offsets
+            # array).
+            qsurfaceid_onto_p = occlusion_layers[
+                0, offset[0] + 1, offset[1] + 1, 8 - direction
+            ]
+            self.assertEqual(int(qsurfaceid_onto_p), -1)
+
+
+class TestComputeSplattingColorsAndWeights(TestCaseMixin, unittest.TestCase):
+    def setUp(self):
+        self.N, self.H, self.W, self.K = 2, 3, 4, 5
+        self.pixel_coords_screen = (
+            torch.stack(
+                meshgrid_ij(torch.arange(self.H), torch.arange(self.W)),
+                dim=-1,
+            )
+            .reshape(1, self.H, self.W, 1, 2)
+            .expand(self.N, self.H, self.W, self.K, 2)
+            .float()
+            + 0.5
+        )
+        self.colors = torch.ones((self.N, self.H, self.W, self.K, 4))
+
+    def test_all_equal(self):
+        # If all colors are equal and on a regular grid, all weights and reweighted
+        # colors should be equal given a specific splatting direction.
+        splatting_colors_and_weights = _compute_splatting_colors_and_weights(
+            self.pixel_coords_screen, self.colors * 0.2345, sigma=0.5, offsets=offsets
+        )
+
+        # Splatting directly to the top/bottom/left/right should have the same strenght.
+        non_diag_splats = splatting_colors_and_weights[
+            :, :, :, :, torch.tensor([1, 3, 5, 7])
+        ]
+
+        # Same for diagonal splats.
+        diag_splats = splatting_colors_and_weights[
+            :, :, :, :, torch.tensor([0, 2, 6, 8])
+        ]
+
+        # And for self-splats.
+        self_splats = splatting_colors_and_weights[:, :, :, :, torch.tensor([4])]
+
+        for splats in non_diag_splats, diag_splats, self_splats:
+            # Colors should be equal.
+            self.assertTrue(torch.all(splats[..., :4] == splats[0, 0, 0, 0, 0, 0]))
+
+            # Weights should be equal.
+            self.assertTrue(torch.all(splats[..., 4] == splats[0, 0, 0, 0, 0, 4]))
+
+        # Non-diagonal weights should be greater than diagonal weights.
+        self.assertGreater(
+            non_diag_splats[0, 0, 0, 0, 0, 0], diag_splats[0, 0, 0, 0, 0, 0]
+        )
+
+        # Self-splats should be strongest of all.
+        self.assertGreater(
+            self_splats[0, 0, 0, 0, 0, 0], non_diag_splats[0, 0, 0, 0, 0, 0]
+        )
+
+        # Splatting colors should be reweighted proportionally to their splat weights.
+        diag_self_color_ratio = (
+            diag_splats[0, 0, 0, 0, 0, 0] / self_splats[0, 0, 0, 0, 0, 0]
+        )
+        diag_self_weight_ratio = (
+            diag_splats[0, 0, 0, 0, 0, 4] / self_splats[0, 0, 0, 0, 0, 4]
+        )
+        self.assertEqual(diag_self_color_ratio, diag_self_weight_ratio)
+
+        non_diag_self_color_ratio = (
+            non_diag_splats[0, 0, 0, 0, 0, 0] / self_splats[0, 0, 0, 0, 0, 0]
+        )
+        non_diag_self_weight_ratio = (
+            non_diag_splats[0, 0, 0, 0, 0, 4] / self_splats[0, 0, 0, 0, 0, 4]
+        )
+        self.assertEqual(non_diag_self_color_ratio, non_diag_self_weight_ratio)
+
+    def test_zero_alpha_zero_weight(self):
+        # Pixels with zero alpha do no splatting, but should still be splatted on.
+        colors = self.colors.clone()
+        colors[0, 1, 1, 0, 3] = 0.0
+        splatting_colors_and_weights = _compute_splatting_colors_and_weights(
+            self.pixel_coords_screen, colors, sigma=0.5, offsets=offsets
+        )
+
+        # The transparent pixel should do no splatting.
+        self.assertTrue(torch.all(splatting_colors_and_weights[0, 1, 1, 0] == 0.0))
+
+        # Splatting *onto* the transparent pixel should be unaffected.
+        reference_weights_colors = splatting_colors_and_weights[0, 1, 1, 1]
+        for direction, offset in enumerate(offsets):
+            if direction == 4:
+                continue  # Ignore self-splats
+            # We invert the direction to get the right (h, w, d) coordinate of each
+            # pixel splatting *onto* the pixel with zero alpha.
+            self.assertClose(
+                splatting_colors_and_weights[
+                    0, 1 + offset[0], 1 + offset[1], 0, 8 - direction
+                ],
+                reference_weights_colors[8 - direction],
+                atol=0.001,
+            )
+
+    def test_random_inputs(self):
+        pixel_coords_screen = (
+            self.pixel_coords_screen
+            + torch.randn((self.N, self.H, self.W, self.K, 2)) * 0.1
+        )
+        colors = torch.rand((self.N, self.H, self.W, self.K, 4))
+        splatting_colors_and_weights = _compute_splatting_colors_and_weights(
+            pixel_coords_screen, colors, sigma=0.5, offsets=offsets
+        )
+        naive_colors_and_weights = compute_splatting_colors_and_weights_naive(
+            pixel_coords_screen, colors, sigma=0.5
+        )
+
+        self.assertClose(
+            splatting_colors_and_weights, naive_colors_and_weights, atol=0.01
+        )
+
+
+class TestOffsetSplats(TestCaseMixin, unittest.TestCase):
+    def test_offset(self):
+        device = torch.device("cuda:0")
+        N, H, W, K = 2, 3, 4, 5
+        colors_and_weights = torch.rand((N, H, W, K, 9, 5), device=device)
+        crop_ids_h, crop_ids_w, _ = _precompute((N, H, W, K), device=device)
+        offset_colors_and_weights = _offset_splats(
+            colors_and_weights, crop_ids_h, crop_ids_w
+        )
+
+        # Check each splatting direction individually, for clarity.
+        # offset_x, offset_y = (-1, -1)
+        direction = 0
+        self.assertClose(
+            offset_colors_and_weights[:, 1:, 1:, :, direction],
+            colors_and_weights[:, :-1, :-1, :, direction],
+            atol=0.001,
+        )
+        self.assertTrue(
+            torch.all(offset_colors_and_weights[:, 0, :, :, direction] == 0.0)
+        )
+        self.assertTrue(
+            torch.all(offset_colors_and_weights[:, :, 0, :, direction] == 0.0)
+        )
+
+        # offset_x, offset_y = (-1, 0)
+        direction = 1
+        self.assertClose(
+            offset_colors_and_weights[:, :, 1:, :, direction],
+            colors_and_weights[:, :, :-1, :, direction],
+            atol=0.001,
+        )
+        self.assertTrue(
+            torch.all(offset_colors_and_weights[:, :, 0, :, direction] == 0.0)
+        )
+
+        # offset_x, offset_y = (-1, 1)
+        direction = 2
+        self.assertClose(
+            offset_colors_and_weights[:, :-1, 1:, :, direction],
+            colors_and_weights[:, 1:, :-1, :, direction],
+            atol=0.001,
+        )
+        self.assertTrue(
+            torch.all(offset_colors_and_weights[:, -1, :, :, direction] == 0.0)
+        )
+        self.assertTrue(
+            torch.all(offset_colors_and_weights[:, :, 0, :, direction] == 0.0)
+        )
+
+        # offset_x, offset_y = (0, -1)
+        direction = 3
+        self.assertClose(
+            offset_colors_and_weights[:, 1:, :, :, direction],
+            colors_and_weights[:, :-1, :, :, direction],
+            atol=0.001,
+        )
+        self.assertTrue(
+            torch.all(offset_colors_and_weights[:, 0, :, :, direction] == 0.0)
+        )
+
+        # self-splat
+        direction = 4
+        self.assertClose(
+            offset_colors_and_weights[..., direction, :],
+            colors_and_weights[..., direction, :],
+            atol=0.001,
+        )
+
+        # offset_x, offset_y = (0, 1)
+        direction = 5
+        self.assertClose(
+            offset_colors_and_weights[:, :-1, :, :, direction],
+            colors_and_weights[:, 1:, :, :, direction],
+            atol=0.001,
+        )
+        self.assertTrue(
+            torch.all(offset_colors_and_weights[:, -1, :, :, direction] == 0.0)
+        )
+
+        # offset_x, offset_y = (1, -1)
+        direction = 6
+        self.assertClose(
+            offset_colors_and_weights[:, 1:, :-1, :, direction],
+            colors_and_weights[:, :-1, 1:, :, direction],
+            atol=0.001,
+        )
+        self.assertTrue(
+            torch.all(offset_colors_and_weights[:, 0, :, :, direction] == 0.0)
+        )
+        self.assertTrue(
+            torch.all(offset_colors_and_weights[:, :, -1, :, direction] == 0.0)
+        )
+
+        # offset_x, offset_y = (1, 0)
+        direction = 7
+        self.assertClose(
+            offset_colors_and_weights[:, :, :-1, :, direction],
+            colors_and_weights[:, :, 1:, :, direction],
+            atol=0.001,
+        )
+        self.assertTrue(
+            torch.all(offset_colors_and_weights[:, :, -1, :, direction] == 0.0)
+        )
+
+        # offset_x, offset_y = (1, 1)
+        direction = 8
+        self.assertClose(
+            offset_colors_and_weights[:, :-1, :-1, :, direction],
+            colors_and_weights[:, 1:, 1:, :, direction],
+            atol=0.001,
+        )
+        self.assertTrue(
+            torch.all(offset_colors_and_weights[:, -1, :, :, direction] == 0.0)
+        )
+        self.assertTrue(
+            torch.all(offset_colors_and_weights[:, :, -1, :, direction] == 0.0)
+        )
+
+
+class TestComputeSplattedColorsAndWeights(TestCaseMixin, unittest.TestCase):
+    def test_accumulation_background(self):
+        # Set occlusion_layers to all -1, so all splats are background splats.
+        splat_colors_and_weights = torch.rand((1, 1, 1, 3, 9, 5))
+        occlusion_layers = torch.zeros((1, 1, 1, 9)) - 1
+        splatted_colors, splatted_weights = _compute_splatted_colors_and_weights(
+            occlusion_layers, splat_colors_and_weights
+        )
+
+        # Foreground splats (there are none).
+        self.assertClose(
+            splatted_colors[0, 0, 0, :, 0],
+            torch.zeros((4)),
+            atol=0.001,
+        )
+
+        # Surface splats (there are none).
+        self.assertClose(
+            splatted_colors[0, 0, 0, :, 1],
+            torch.zeros((4)),
+            atol=0.001,
+        )
+
+        # Background splats.
+        self.assertClose(
+            splatted_colors[0, 0, 0, :, 2],
+            splat_colors_and_weights[0, 0, 0, :, :, :4].sum(dim=0).sum(dim=0),
+            atol=0.001,
+        )
+
+    def test_accumulation_middle(self):
+        # Set occlusion_layers to all 0, so top splats are co-surface with splatted
+        # pixels. Thus, the top splatting layer should be accumulated to surface, and
+        # all other layers to background.
+        splat_colors_and_weights = torch.rand((1, 1, 1, 3, 9, 5))
+        occlusion_layers = torch.zeros((1, 1, 1, 9))
+        splatted_colors, splatted_weights = _compute_splatted_colors_and_weights(
+            occlusion_layers, splat_colors_and_weights
+        )
+
+        # Foreground splats (there are none).
+        self.assertClose(
+            splatted_colors[0, 0, 0, :, 0],
+            torch.zeros((4)),
+            atol=0.001,
+        )
+
+        # Surface splats
+        self.assertClose(
+            splatted_colors[0, 0, 0, :, 1],
+            splat_colors_and_weights[0, 0, 0, 0, :, :4].sum(dim=0),
+            atol=0.001,
+        )
+
+        # Background splats
+        self.assertClose(
+            splatted_colors[0, 0, 0, :, 2],
+            splat_colors_and_weights[0, 0, 0, 1:, :, :4].sum(dim=0).sum(dim=0),
+            atol=0.001,
+        )
+
+    def test_accumulation_foreground(self):
+        # Set occlusion_layers to all 1. Then the top splatter is a foreground
+        # splatter, mid splatter is surface, and bottom splatter is background.
+        splat_colors_and_weights = torch.rand((1, 1, 1, 3, 9, 5))
+        occlusion_layers = torch.zeros((1, 1, 1, 9)) + 1
+        splatted_colors, splatted_weights = _compute_splatted_colors_and_weights(
+            occlusion_layers, splat_colors_and_weights
+        )
+
+        # Foreground splats
+        self.assertClose(
+            splatted_colors[0, 0, 0, :, 0],
+            splat_colors_and_weights[0, 0, 0, 0:1, :, :4].sum(dim=0).sum(dim=0),
+            atol=0.001,
+        )
+
+        # Surface splats
+        self.assertClose(
+            splatted_colors[0, 0, 0, :, 1],
+            splat_colors_and_weights[0, 0, 0, 1:2, :, :4].sum(dim=0).sum(dim=0),
+            atol=0.001,
+        )
+
+        # Background splats
+        self.assertClose(
+            splatted_colors[0, 0, 0, :, 2],
+            splat_colors_and_weights[0, 0, 0, 2:3, :, :4].sum(dim=0).sum(dim=0),
+            atol=0.001,
+        )
+
+
+class TestNormalizeAndComposeAllLayers(TestCaseMixin, unittest.TestCase):
+    def test_background_color(self):
+        # Background should always have alpha=0, and the chosen RGB.
+        N, H, W = 2, 3, 4
+        # Make a mask with background in the zeroth row of the first image.
+        bg_mask = torch.zeros([N, H, W, 1, 1])
+        bg_mask[0, :, 0] = 1
+
+        bg_color = torch.tensor([0.2, 0.3, 0.4])
+
+        color_layers = torch.rand((N, H, W, 4, 3)) * (1 - bg_mask)
+        color_weights = torch.rand((N, H, W, 1, 3)) * (1 - bg_mask)
+
+        colors = _normalize_and_compose_all_layers(
+            bg_color, color_layers, color_weights
+        )
+
+        # Background RGB should be .2, .3, .4, and alpha should be 0.
+        self.assertClose(
+            torch.masked_select(colors, bg_mask.bool()[..., 0]),
+            torch.tensor([0.2, 0.3, 0.4, 0, 0.2, 0.3, 0.4, 0, 0.2, 0.3, 0.4, 0.0]),
+            atol=0.001,
+        )
+
+    def test_compositing_opaque(self):
+        # When all colors are opaque, only the foreground layer should be visible.
+        N, H, W = 2, 3, 4
+        color_layers = torch.rand((N, H, W, 4, 3))
+        color_layers[..., 3, :] = 1.0
+        color_weights = torch.ones((N, H, W, 1, 3))
+
+        out_colors = _normalize_and_compose_all_layers(
+            torch.tensor([0.0, 0.0, 0.0]), color_layers, color_weights
+        )
+        self.assertClose(out_colors, color_layers[..., 0], atol=0.001)
+
+    def test_compositing_transparencies(self):
+        # When foreground layer is transparent and surface and bg are semi-transparent,
+        # we should return a  mix of the two latter.
+        N, H, W = 2, 3, 4
+        color_layers = torch.rand((N, H, W, 4, 3))
+        color_layers[..., 3, 0] = 0.1  # fg
+        color_layers[..., 3, 1] = 0.2  # surface
+        color_layers[..., 3, 2] = 0.3  # bg
+        color_weights = torch.ones((N, H, W, 1, 3))
+
+        out_colors = _normalize_and_compose_all_layers(
+            torch.tensor([0.0, 0.0, 0.0]), color_layers, color_weights
+        )
+        self.assertClose(
+            out_colors,
+            color_layers[..., 0]
+            + 0.9 * (color_layers[..., 1] + 0.8 * color_layers[..., 2]),
+        )
diff --git a/pytorch3d/tests/test_struct_utils.py b/pytorch3d/tests/test_struct_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..a40a6548fe22e473f7b7390756aad46b6c963c67
--- /dev/null
+++ b/pytorch3d/tests/test_struct_utils.py
@@ -0,0 +1,227 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import unittest
+
+import torch
+from pytorch3d.structures import utils as struct_utils
+
+from .common_testing import TestCaseMixin
+
+
+class TestStructUtils(TestCaseMixin, unittest.TestCase):
+    def setUp(self) -> None:
+        super().setUp()
+        torch.manual_seed(43)
+
+    def _check_list_to_padded_slices(self, x, x_padded, ndim):
+        N = len(x)
+        for i in range(N):
+            slices = [i]
+            for dim in range(ndim):
+                if x[i].nelement() == 0 and x[i].ndim == 1:
+                    slice_ = slice(0, 0, 1)
+                else:
+                    slice_ = slice(0, x[i].shape[dim], 1)
+                slices.append(slice_)
+            if x[i].nelement() == 0 and x[i].ndim == 1:
+                x_correct = x[i].new_zeros(*[[0] * ndim])
+            else:
+                x_correct = x[i]
+            self.assertClose(x_padded[slices], x_correct)
+
+    def test_list_to_padded(self):
+        device = torch.device("cuda:0")
+        N = 5
+        K = 20
+        for ndim in [1, 2, 3, 4]:
+            x = []
+            for _ in range(N):
+                dims = torch.randint(K, size=(ndim,)).tolist()
+                x.append(torch.rand(dims, device=device))
+
+            # set 0th element to an empty 1D tensor
+            x[0] = torch.tensor([], dtype=x[0].dtype, device=device)
+
+            # set 1st element to an empty tensor with correct number of dims
+            x[1] = x[1].new_zeros(*[[0] * ndim])
+
+            pad_size = [K] * ndim
+            x_padded = struct_utils.list_to_padded(
+                x, pad_size=pad_size, pad_value=0.0, equisized=False
+            )
+
+            for dim in range(ndim):
+                self.assertEqual(x_padded.shape[dim + 1], K)
+
+            self._check_list_to_padded_slices(x, x_padded, ndim)
+
+            # check for no pad size (defaults to max dimension)
+            x_padded = struct_utils.list_to_padded(x, pad_value=0.0, equisized=False)
+            max_sizes = (
+                max(
+                    (0 if (y.nelement() == 0 and y.ndim == 1) else y.shape[dim])
+                    for y in x
+                )
+                for dim in range(ndim)
+            )
+            for dim, max_size in enumerate(max_sizes):
+                self.assertEqual(x_padded.shape[dim + 1], max_size)
+
+            self._check_list_to_padded_slices(x, x_padded, ndim)
+
+            # check for equisized
+            x = [torch.rand((K, *([10] * (ndim - 1))), device=device) for _ in range(N)]
+            x_padded = struct_utils.list_to_padded(x, equisized=True)
+            self.assertClose(x_padded, torch.stack(x, 0))
+
+        # catch ValueError for invalid dimensions
+        pad_size = [K] * (ndim + 1)
+        with self.assertRaisesRegex(ValueError, "Pad size must"):
+            struct_utils.list_to_padded(
+                x, pad_size=pad_size, pad_value=0.0, equisized=False
+            )
+
+        # invalid input tensor dimensions
+        x = []
+        ndim = 3
+        for _ in range(N):
+            dims = torch.randint(K, size=(ndim,)).tolist()
+            x.append(torch.rand(dims, device=device))
+        pad_size = [K] * 2
+        with self.assertRaisesRegex(ValueError, "Pad size must"):
+            x_padded = struct_utils.list_to_padded(
+                x, pad_size=pad_size, pad_value=0.0, equisized=False
+            )
+
+    def test_padded_to_list(self):
+        device = torch.device("cuda:0")
+        N = 5
+        K = 20
+        ndim = 2
+
+        for ndim in (2, 3, 4):
+
+            dims = [K] * ndim
+            x = torch.rand([N] + dims, device=device)
+
+            x_list = struct_utils.padded_to_list(x)
+            for i in range(N):
+                self.assertClose(x_list[i], x[i])
+
+            split_size = torch.randint(1, K, size=(N, ndim)).unbind(0)
+            x_list = struct_utils.padded_to_list(x, split_size)
+            for i in range(N):
+                slices = [i]
+                for dim in range(ndim):
+                    slices.append(slice(0, split_size[i][dim], 1))
+                self.assertClose(x_list[i], x[slices])
+
+            # split size is a list of ints
+            split_size = [int(z) for z in torch.randint(1, K, size=(N,)).unbind(0)]
+            x_list = struct_utils.padded_to_list(x, split_size)
+            for i in range(N):
+                self.assertClose(x_list[i], x[i][: split_size[i]])
+
+    def test_padded_to_packed(self):
+        device = torch.device("cuda:0")
+        N = 5
+        K = 20
+        ndim = 2
+        dims = [K] * ndim
+        x = torch.rand([N] + dims, device=device)
+
+        # Case 1: no split_size or pad_value provided
+        # Check output is just the flattened input.
+        x_packed = struct_utils.padded_to_packed(x)
+        self.assertTrue(x_packed.shape == (x.shape[0] * x.shape[1], x.shape[2]))
+        self.assertClose(x_packed, x.reshape(-1, K))
+
+        # Case 2: pad_value is provided.
+        # Check each section of the packed tensor matches the
+        # corresponding unpadded elements of the padded tensor.
+        # Check that only rows where all the values are padded
+        # are removed in the conversion to packed.
+        pad_value = -1
+        x_list = []
+        split_size = []
+        for _ in range(N):
+            dim = torch.randint(K, size=(1,)).item()
+            # Add some random values in the input which are the same as the pad_value.
+            # These should not be filtered out.
+            x_list.append(
+                torch.randint(low=pad_value, high=10, size=(dim, K), device=device)
+            )
+            split_size.append(dim)
+        x_padded = struct_utils.list_to_padded(x_list, pad_value=pad_value)
+        x_packed = struct_utils.padded_to_packed(x_padded, pad_value=pad_value)
+        curr = 0
+        for i in range(N):
+            self.assertClose(x_packed[curr : curr + split_size[i], ...], x_list[i])
+            self.assertClose(torch.cat(x_list), x_packed)
+            curr += split_size[i]
+
+        # Case 3: split_size is provided.
+        # Check each section of the packed tensor matches the corresponding
+        # unpadded elements.
+        x_packed = struct_utils.padded_to_packed(x_padded, split_size=split_size)
+        curr = 0
+        for i in range(N):
+            self.assertClose(x_packed[curr : curr + split_size[i], ...], x_list[i])
+            self.assertClose(torch.cat(x_list), x_packed)
+            curr += split_size[i]
+
+        # Case 4: split_size of the wrong shape is provided.
+        # Raise an error.
+        split_size = torch.randint(1, K, size=(2 * N,)).view(N, 2).unbind(0)
+        with self.assertRaisesRegex(ValueError, "1-dimensional"):
+            x_packed = struct_utils.padded_to_packed(x_padded, split_size=split_size)
+
+        split_size = torch.randint(1, K, size=(2 * N,)).view(N * 2).tolist()
+        with self.assertRaisesRegex(
+            ValueError, "same length as inputs first dimension"
+        ):
+            x_packed = struct_utils.padded_to_packed(x_padded, split_size=split_size)
+
+        # Case 5: both pad_value and split_size are provided.
+        # Raise an error.
+        with self.assertRaisesRegex(ValueError, "Only one of"):
+            x_packed = struct_utils.padded_to_packed(
+                x_padded, split_size=split_size, pad_value=-1
+            )
+
+        # Case 6: Input has more than 3 dims.
+        # Raise an error.
+        x = torch.rand((N, K, K, K, K), device=device)
+        split_size = torch.randint(1, K, size=(N,)).tolist()
+        with self.assertRaisesRegex(ValueError, "Supports only"):
+            struct_utils.padded_to_packed(x, split_size=split_size)
+
+    def test_list_to_packed(self):
+        device = torch.device("cuda:0")
+        N = 5
+        K = 20
+        x, x_dims = [], []
+        dim2 = torch.randint(K, size=(1,)).item()
+        for _ in range(N):
+            dim1 = torch.randint(K, size=(1,)).item()
+            x_dims.append(dim1)
+            x.append(torch.rand([dim1, dim2], device=device))
+
+        out = struct_utils.list_to_packed(x)
+        x_packed = out[0]
+        num_items = out[1]
+        item_packed_first_idx = out[2]
+        item_packed_to_list_idx = out[3]
+
+        cur = 0
+        for i in range(N):
+            self.assertTrue(num_items[i] == x_dims[i])
+            self.assertTrue(item_packed_first_idx[i] == cur)
+            self.assertTrue(item_packed_to_list_idx[cur : cur + x_dims[i]].eq(i).all())
+            self.assertClose(x_packed[cur : cur + x_dims[i]], x[i])
+            cur += x_dims[i]
diff --git a/pytorch3d/tests/test_subdivide_meshes.py b/pytorch3d/tests/test_subdivide_meshes.py
new file mode 100644
index 0000000000000000000000000000000000000000..aebef50aaf865ddfe7eb4d79400d6aa0c8d8b196
--- /dev/null
+++ b/pytorch3d/tests/test_subdivide_meshes.py
@@ -0,0 +1,234 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import unittest
+
+import torch
+from pytorch3d.ops.subdivide_meshes import SubdivideMeshes
+from pytorch3d.structures.meshes import Meshes
+from pytorch3d.utils.ico_sphere import ico_sphere
+
+from .common_testing import TestCaseMixin
+
+
+class TestSubdivideMeshes(TestCaseMixin, unittest.TestCase):
+    def simple_subdivide(self, with_init=False):
+        # Create a mesh with one face and check the subdivided mesh has
+        # 4 faces with the correct vertex coordinates.
+        device = torch.device("cuda:0")
+        verts = torch.tensor(
+            [[0.5, 1.0, 0.0], [1.0, 0.0, 0.0], [0.0, 0.0, 0.0]],
+            dtype=torch.float32,
+            device=device,
+            requires_grad=True,
+        )
+        faces = torch.tensor([[0, 1, 2]], dtype=torch.int64, device=device)
+        mesh = Meshes(verts=[verts], faces=[faces])
+        mesh_init = mesh.clone() if with_init else None
+        subdivide = SubdivideMeshes(meshes=mesh_init)
+        new_mesh = subdivide(mesh)
+
+        # Subdivided face:
+        #
+        #           v0
+        #           /\
+        #          /  \
+        #         / f0 \
+        #     v4 /______\ v3
+        #       /\      /\
+        #      /  \ f3 /  \
+        #     / f2 \  / f1 \
+        #    /______\/______\
+        #  v2       v5       v1
+        #
+        gt_subdivide_verts = torch.tensor(
+            [
+                [0.5, 1.0, 0.0],
+                [1.0, 0.0, 0.0],
+                [0.0, 0.0, 0.0],
+                [0.75, 0.5, 0.0],
+                [0.25, 0.5, 0.0],
+                [0.5, 0.0, 0.0],
+            ],
+            dtype=torch.float32,
+            device=device,
+        )
+        gt_subdivide_faces = torch.tensor(
+            [[0, 3, 4], [1, 5, 3], [2, 4, 5], [5, 4, 3]],
+            dtype=torch.int64,
+            device=device,
+        )
+        new_verts, new_faces = new_mesh.get_mesh_verts_faces(0)
+        self.assertClose(new_verts, gt_subdivide_verts)
+        self.assertClose(new_faces, gt_subdivide_faces)
+        self.assertTrue(new_verts.requires_grad == verts.requires_grad)
+
+    def test_simple_subdivide(self):
+        self.simple_subdivide()
+
+    def test_simple_subdivide_with_init(self):
+        self.simple_subdivide(with_init=True)
+
+    def test_heterogeneous_meshes(self):
+        device = torch.device("cuda:0")
+        verts1 = torch.tensor(
+            [[0.5, 1.0, 0.0], [1.0, 0.0, 0.0], [0.0, 0.0, 0.0]],
+            dtype=torch.float32,
+            device=device,
+            requires_grad=True,
+        )
+        faces1 = torch.tensor([[0, 1, 2]], dtype=torch.int64, device=device)
+        verts2 = torch.tensor(
+            [[0.5, 1.0, 0.0], [1.0, 0.0, 0.0], [0.0, 0.0, 0.0], [1.5, 1.0, 0.0]],
+            dtype=torch.float32,
+            device=device,
+            requires_grad=True,
+        )
+        faces2 = torch.tensor([[0, 1, 2], [0, 3, 1]], dtype=torch.int64, device=device)
+        faces3 = torch.tensor([[0, 1, 2], [0, 2, 3]], dtype=torch.int64, device=device)
+        mesh = Meshes(verts=[verts1, verts2, verts2], faces=[faces1, faces2, faces3])
+        subdivide = SubdivideMeshes()
+        new_mesh = subdivide(mesh.clone())
+
+        gt_subdivided_verts1 = torch.tensor(
+            [
+                [0.5, 1.0, 0.0],
+                [1.0, 0.0, 0.0],
+                [0.0, 0.0, 0.0],
+                [0.75, 0.5, 0.0],
+                [0.25, 0.5, 0.0],
+                [0.5, 0.0, 0.0],
+            ],
+            dtype=torch.float32,
+            device=device,
+        )
+        gt_subdivided_faces1 = torch.tensor(
+            [[0, 3, 4], [1, 5, 3], [2, 4, 5], [5, 4, 3]],
+            dtype=torch.int64,
+            device=device,
+        )
+        # faces2:
+        #
+        #         v0 _______e2_______ v3
+        #           /\              /
+        #          /  \            /
+        #         /    \          /
+        #     e1 /      \ e0     / e4
+        #       /        \      /
+        #      /          \    /
+        #     /            \  /
+        #    /______________\/
+        #  v2       e3      v1
+        #
+        # Subdivided faces2:
+        #
+        #         v0 _______v6_______ v3
+        #           /\      /\      /
+        #          /  \ f1 /  \ f3 /
+        #         / f0 \  / f7 \  /
+        #     v5 /______v4______\/v8
+        #       /\      /\      /
+        #      /  \ f6 /  \ f5 /
+        #     / f4 \  / f2 \  /
+        #    /______\/______\/
+        #  v2       v7       v1
+        #
+        gt_subdivided_verts2 = torch.tensor(
+            [
+                [0.5, 1.0, 0.0],
+                [1.0, 0.0, 0.0],
+                [0.0, 0.0, 0.0],
+                [1.5, 1.0, 0.0],
+                [0.75, 0.5, 0.0],
+                [0.25, 0.5, 0.0],
+                [1.0, 1.0, 0.0],
+                [0.5, 0.0, 0.0],
+                [1.25, 0.5, 0.0],
+            ],
+            dtype=torch.float32,
+            device=device,
+        )
+        gt_subdivided_faces2 = torch.tensor(
+            [
+                [0, 4, 5],
+                [0, 6, 4],
+                [1, 7, 4],
+                [3, 8, 6],
+                [2, 5, 7],
+                [1, 4, 8],
+                [7, 5, 4],
+                [8, 4, 6],
+            ],
+            dtype=torch.int64,
+            device=device,
+        )
+        gt_subdivided_verts3 = gt_subdivided_verts2.clone()
+        gt_subdivided_verts3[-1, :] = torch.tensor(
+            [0.75, 0.5, 0], dtype=torch.float32, device=device
+        )
+        gt_subdivided_faces3 = torch.tensor(
+            [
+                [0, 4, 5],
+                [0, 5, 6],
+                [1, 7, 4],
+                [2, 8, 5],
+                [2, 5, 7],
+                [3, 6, 8],
+                [7, 5, 4],
+                [8, 6, 5],
+            ],
+            dtype=torch.int64,
+            device=device,
+        )
+        new_mesh_verts1, new_mesh_faces1 = new_mesh.get_mesh_verts_faces(0)
+        new_mesh_verts2, new_mesh_faces2 = new_mesh.get_mesh_verts_faces(1)
+        new_mesh_verts3, new_mesh_faces3 = new_mesh.get_mesh_verts_faces(2)
+        self.assertClose(new_mesh_verts1, gt_subdivided_verts1)
+        self.assertClose(new_mesh_faces1, gt_subdivided_faces1)
+        self.assertClose(new_mesh_verts2, gt_subdivided_verts2)
+        self.assertClose(new_mesh_faces2, gt_subdivided_faces2)
+        self.assertClose(new_mesh_verts3, gt_subdivided_verts3)
+        self.assertClose(new_mesh_faces3, gt_subdivided_faces3)
+        self.assertTrue(new_mesh_verts1.requires_grad == verts1.requires_grad)
+        self.assertTrue(new_mesh_verts2.requires_grad == verts2.requires_grad)
+        self.assertTrue(new_mesh_verts3.requires_grad == verts2.requires_grad)
+
+    def test_subdivide_features(self):
+        device = torch.device("cuda:0")
+        mesh = ico_sphere(0, device)
+        N = 10
+        mesh = mesh.extend(N)
+        edges = mesh.edges_packed()
+        V = mesh.num_verts_per_mesh()[0]
+        D = 256
+        feats = torch.rand(
+            (N * V, D), dtype=torch.float32, device=device, requires_grad=True
+        )  # packed features
+        app_feats = feats[edges].mean(1)
+        subdivide = SubdivideMeshes()
+        new_mesh, new_feats = subdivide(mesh, feats)
+        gt_feats = torch.cat(
+            (feats.view(N, V, D), app_feats.view(N, -1, D)), dim=1
+        ).view(-1, D)
+        self.assertClose(new_feats, gt_feats)
+        self.assertTrue(new_feats.requires_grad == gt_feats.requires_grad)
+
+    @staticmethod
+    def subdivide_meshes_with_init(num_meshes: int = 10, same_topo: bool = False):
+        device = torch.device("cuda:0")
+        meshes = ico_sphere(0, device=device)
+        if num_meshes > 1:
+            meshes = meshes.extend(num_meshes)
+        meshes_init = meshes.clone() if same_topo else None
+        torch.cuda.synchronize()
+
+        def subdivide_meshes():
+            subdivide = SubdivideMeshes(meshes=meshes_init)
+            subdivide(meshes=meshes.clone())
+            torch.cuda.synchronize()
+
+        return subdivide_meshes
diff --git a/pytorch3d/tests/test_symeig3x3.py b/pytorch3d/tests/test_symeig3x3.py
new file mode 100644
index 0000000000000000000000000000000000000000..29a2f955915dd478698f2fa0ea817ad1f687058d
--- /dev/null
+++ b/pytorch3d/tests/test_symeig3x3.py
@@ -0,0 +1,264 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import unittest
+
+import torch
+from pytorch3d.common.workaround import symeig3x3
+from pytorch3d.transforms.rotation_conversions import random_rotations
+
+from .common_testing import get_random_cuda_device, TestCaseMixin
+
+
+class TestSymEig3x3(TestCaseMixin, unittest.TestCase):
+    TEST_BATCH_SIZE = 1024
+
+    @staticmethod
+    def create_random_sym3x3(device, n):
+        random_3x3 = torch.randn((n, 3, 3), device=device)
+        random_3x3_T = torch.transpose(random_3x3, 1, 2)
+        random_sym_3x3 = (random_3x3 * random_3x3_T).contiguous()
+
+        return random_sym_3x3
+
+    @staticmethod
+    def create_diag_sym3x3(device, n, noise=0.0):
+        # Create purly diagonal matrices
+        random_diag_3x3 = torch.randn((n, 3), device=device).diag_embed()
+
+        # Make them 'almost' diagonal
+        random_diag_3x3 += noise * TestSymEig3x3.create_random_sym3x3(device, n)
+
+        return random_diag_3x3
+
+    def setUp(self) -> None:
+        super().setUp()
+        torch.manual_seed(42)
+
+        self._gpu = get_random_cuda_device()
+        self._cpu = torch.device("cpu")
+
+    def test_is_eigen_gpu(self):
+        test_input = self.create_random_sym3x3(self._gpu, n=self.TEST_BATCH_SIZE)
+
+        self._test_is_eigen(test_input)
+
+    def test_is_eigen_cpu(self):
+        test_input = self.create_random_sym3x3(self._cpu, n=self.TEST_BATCH_SIZE)
+
+        self._test_is_eigen(test_input)
+
+    def _test_is_eigen(self, test_input, atol=1e-04, rtol=1e-02):
+        """
+        Verify that values and vectors produced are really eigenvalues and eigenvectors
+        and can restore the original input matrix with good precision
+        """
+        eigenvalues, eigenvectors = symeig3x3(test_input, eigenvectors=True)
+
+        self.assertClose(
+            test_input,
+            eigenvectors @ eigenvalues.diag_embed() @ eigenvectors.transpose(-2, -1),
+            atol=atol,
+            rtol=rtol,
+        )
+
+    def test_eigenvectors_are_orthonormal_gpu(self):
+        test_input = self.create_random_sym3x3(self._gpu, n=self.TEST_BATCH_SIZE)
+
+        self._test_eigenvectors_are_orthonormal(test_input)
+
+    def test_eigenvectors_are_orthonormal_cpu(self):
+        test_input = self.create_random_sym3x3(self._cpu, n=self.TEST_BATCH_SIZE)
+
+        self._test_eigenvectors_are_orthonormal(test_input)
+
+    def _test_eigenvectors_are_orthonormal(self, test_input):
+        """
+        Verify that eigenvectors are an orthonormal set
+        """
+        eigenvalues, eigenvectors = symeig3x3(test_input, eigenvectors=True)
+
+        batched_eye = torch.zeros_like(test_input)
+        batched_eye[..., :, :] = torch.eye(3, device=batched_eye.device)
+
+        self.assertClose(
+            batched_eye, eigenvectors @ eigenvectors.transpose(-2, -1), atol=1e-06
+        )
+
+    def test_is_not_nan_or_inf_gpu(self):
+        test_input = self.create_random_sym3x3(self._gpu, n=self.TEST_BATCH_SIZE)
+
+        self._test_is_not_nan_or_inf(test_input)
+
+    def test_is_not_nan_or_inf_cpu(self):
+        test_input = self.create_random_sym3x3(self._cpu, n=self.TEST_BATCH_SIZE)
+
+        self._test_is_not_nan_or_inf(test_input)
+
+    def _test_is_not_nan_or_inf(self, test_input):
+        eigenvalues, eigenvectors = symeig3x3(test_input, eigenvectors=True)
+
+        self.assertTrue(torch.isfinite(eigenvalues).all())
+        self.assertTrue(torch.isfinite(eigenvectors).all())
+
+    def test_degenerate_inputs_gpu(self):
+        self._test_degenerate_inputs(self._gpu)
+
+    def test_degenerate_inputs_cpu(self):
+        self._test_degenerate_inputs(self._cpu)
+
+    def _test_degenerate_inputs(self, device):
+        """
+        Test degenerate case when input matrices are diagonal or near-diagonal
+        """
+
+        # Purely diagonal case
+        test_input = self.create_diag_sym3x3(device, self.TEST_BATCH_SIZE)
+
+        self._test_is_not_nan_or_inf(test_input)
+        self._test_is_eigen(test_input)
+        self._test_eigenvectors_are_orthonormal(test_input)
+
+        # Almost-diagonal case
+        test_input = self.create_diag_sym3x3(device, self.TEST_BATCH_SIZE, noise=1e-4)
+
+        self._test_is_not_nan_or_inf(test_input)
+        self._test_is_eigen(test_input)
+        self._test_eigenvectors_are_orthonormal(test_input)
+
+    def test_gradients_cpu(self):
+        self._test_gradients(self._cpu)
+
+    def test_gradients_gpu(self):
+        self._test_gradients(self._gpu)
+
+    def _test_gradients(self, device):
+        """
+        Tests if gradients pass though without any problems (infs, nans etc) and
+        also performs gradcheck (compares numerical and analytical gradients)
+        """
+        test_random_input = self.create_random_sym3x3(device, n=16)
+        test_diag_input = self.create_diag_sym3x3(device, n=16)
+        test_almost_diag_input = self.create_diag_sym3x3(device, n=16, noise=1e-4)
+
+        test_input = torch.cat(
+            (test_random_input, test_diag_input, test_almost_diag_input)
+        )
+        test_input.requires_grad = True
+
+        with torch.autograd.detect_anomaly():
+            eigenvalues, eigenvectors = symeig3x3(test_input, eigenvectors=True)
+
+            loss = eigenvalues.mean() + eigenvectors.mean()
+            loss.backward()
+
+        test_random_input.requires_grad = True
+        # Inputs are converted to double to increase the precision of gradcheck.
+        torch.autograd.gradcheck(
+            symeig3x3, test_random_input.double(), eps=1e-6, atol=1e-2, rtol=1e-2
+        )
+
+    def _test_eigenvalues_and_eigenvectors(
+        self, test_eigenvectors, test_eigenvalues, atol=1e-04, rtol=1e-04
+    ):
+        test_input = (
+            test_eigenvectors.transpose(-2, -1)
+            @ test_eigenvalues.diag_embed()
+            @ test_eigenvectors
+        )
+
+        test_eigenvalues_sorted, _ = torch.sort(test_eigenvalues, dim=-1)
+
+        eigenvalues, eigenvectors = symeig3x3(test_input, eigenvectors=True)
+
+        self.assertClose(
+            test_eigenvalues_sorted,
+            eigenvalues,
+            atol=atol,
+            rtol=rtol,
+        )
+
+        self._test_is_not_nan_or_inf(test_input)
+        self._test_is_eigen(test_input, atol=atol, rtol=rtol)
+        self._test_eigenvectors_are_orthonormal(test_input)
+
+    def test_degenerate_eigenvalues_gpu(self):
+        self._test_degenerate_eigenvalues(self._gpu)
+
+    def test_degenerate_eigenvalues_cpu(self):
+        self._test_degenerate_eigenvalues(self._cpu)
+
+    def _test_degenerate_eigenvalues(self, device):
+        """
+        Test degenerate eigenvalues like zero-valued and with 2-/3-multiplicity
+        """
+        # Error tolerances for degenerate values are increased as things might become
+        #  numerically unstable
+        deg_atol = 1e-3
+        deg_rtol = 1.0
+
+        # Construct random orthonormal sets
+        test_eigenvecs = random_rotations(n=self.TEST_BATCH_SIZE, device=device)
+
+        # Construct random eigenvalues
+        test_eigenvals = torch.randn(
+            (self.TEST_BATCH_SIZE, 3), device=test_eigenvecs.device
+        )
+        self._test_eigenvalues_and_eigenvectors(
+            test_eigenvecs, test_eigenvals, atol=deg_atol, rtol=deg_rtol
+        )
+
+        # First eigenvalue is always 0.0 here: [0.0 X Y]
+        test_eigenvals_with_zero = test_eigenvals.clone()
+        test_eigenvals_with_zero[..., 0] = 0.0
+        self._test_eigenvalues_and_eigenvectors(
+            test_eigenvecs, test_eigenvals_with_zero, atol=deg_atol, rtol=deg_rtol
+        )
+
+        # First two eigenvalues are always the same here: [X X Y]
+        test_eigenvals_with_multiplicity2 = test_eigenvals.clone()
+        test_eigenvals_with_multiplicity2[..., 1] = test_eigenvals_with_multiplicity2[
+            ..., 0
+        ]
+        self._test_eigenvalues_and_eigenvectors(
+            test_eigenvecs,
+            test_eigenvals_with_multiplicity2,
+            atol=deg_atol,
+            rtol=deg_rtol,
+        )
+
+        # All three eigenvalues are the same here: [X X X]
+        test_eigenvals_with_multiplicity3 = test_eigenvals_with_multiplicity2.clone()
+        test_eigenvals_with_multiplicity3[..., 2] = test_eigenvals_with_multiplicity2[
+            ..., 0
+        ]
+        self._test_eigenvalues_and_eigenvectors(
+            test_eigenvecs,
+            test_eigenvals_with_multiplicity3,
+            atol=deg_atol,
+            rtol=deg_rtol,
+        )
+
+    def test_more_dimensions(self):
+        """
+        Tests if function supports arbitrary leading dimensions
+        """
+        repeat = 4
+
+        test_input = self.create_random_sym3x3(self._cpu, n=16)
+        test_input_4d = test_input[None, ...].expand((repeat,) + test_input.shape)
+
+        eigenvalues, eigenvectors = symeig3x3(test_input, eigenvectors=True)
+        eigenvalues_4d, eigenvectors_4d = symeig3x3(test_input_4d, eigenvectors=True)
+
+        eigenvalues_4d_gt = eigenvalues[None, ...].expand((repeat,) + eigenvalues.shape)
+        eigenvectors_4d_gt = eigenvectors[None, ...].expand(
+            (repeat,) + eigenvectors.shape
+        )
+
+        self.assertClose(eigenvalues_4d_gt, eigenvalues_4d)
+        self.assertClose(eigenvectors_4d_gt, eigenvectors_4d)
diff --git a/pytorch3d/tests/test_texturing.py b/pytorch3d/tests/test_texturing.py
new file mode 100644
index 0000000000000000000000000000000000000000..71ffa3e20142eaf1fbee084d2e68c38b906427d9
--- /dev/null
+++ b/pytorch3d/tests/test_texturing.py
@@ -0,0 +1,1185 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import unittest
+
+import torch
+import torch.nn.functional as F
+from pytorch3d.renderer.mesh.rasterizer import Fragments
+from pytorch3d.renderer.mesh.textures import (
+    _list_to_padded_wrapper,
+    TexturesAtlas,
+    TexturesUV,
+    TexturesVertex,
+)
+from pytorch3d.renderer.mesh.utils import (
+    pack_rectangles,
+    pack_unique_rectangles,
+    Rectangle,
+)
+from pytorch3d.structures import list_to_packed, Meshes, packed_to_list
+
+from .common_testing import TestCaseMixin
+from .test_meshes import init_mesh
+
+
+def tryindex(self, index, tex, meshes, source):
+    tex2 = tex[index]
+    meshes2 = meshes[index]
+    tex_from_meshes = meshes2.textures
+    for item in source:
+        basic = source[item][index]
+        from_texture = getattr(tex2, item + "_padded")()
+        from_meshes = getattr(tex_from_meshes, item + "_padded")()
+        if isinstance(index, int):
+            basic = basic[None]
+
+        if len(basic) == 0:
+            self.assertEqual(len(from_texture), 0)
+            self.assertEqual(len(from_meshes), 0)
+        else:
+            self.assertClose(basic, from_texture)
+            self.assertClose(basic, from_meshes)
+            self.assertEqual(from_texture.ndim, getattr(tex, item + "_padded")().ndim)
+            item_list = getattr(tex_from_meshes, item + "_list")()
+            self.assertEqual(basic.shape[0], len(item_list))
+            for i, elem in enumerate(item_list):
+                self.assertClose(elem, basic[i])
+
+
+class TestTexturesVertex(TestCaseMixin, unittest.TestCase):
+    def test_sample_vertex_textures(self):
+        """
+        This tests both interpolate_vertex_colors as well as
+        interpolate_face_attributes.
+        """
+        verts = torch.randn((4, 3), dtype=torch.float32)
+        faces = torch.tensor([[2, 1, 0], [3, 1, 0]], dtype=torch.int64)
+        vert_tex = torch.tensor(
+            [[0, 1, 0], [0, 1, 1], [1, 1, 0], [1, 1, 1]], dtype=torch.float32
+        )
+        verts_features = vert_tex
+        tex = TexturesVertex(verts_features=[verts_features])
+        mesh = Meshes(verts=[verts], faces=[faces], textures=tex)
+        pix_to_face = torch.tensor([0, 1], dtype=torch.int64).view(1, 1, 1, 2)
+        barycentric_coords = torch.tensor(
+            [[0.5, 0.3, 0.2], [0.3, 0.6, 0.1]], dtype=torch.float32
+        ).view(1, 1, 1, 2, -1)
+        expected_vals = torch.tensor(
+            [[0.5, 1.0, 0.3], [0.3, 1.0, 0.9]], dtype=torch.float32
+        ).view(1, 1, 1, 2, -1)
+        fragments = Fragments(
+            pix_to_face=pix_to_face,
+            bary_coords=barycentric_coords,
+            zbuf=torch.ones_like(pix_to_face),
+            dists=torch.ones_like(pix_to_face),
+        )
+        # sample_textures calls interpolate_vertex_colors
+        texels = mesh.sample_textures(fragments)
+        self.assertTrue(torch.allclose(texels, expected_vals[None, :]))
+
+    def test_sample_vertex_textures_grad(self):
+        verts = torch.randn((4, 3), dtype=torch.float32)
+        faces = torch.tensor([[2, 1, 0], [3, 1, 0]], dtype=torch.int64)
+        vert_tex = torch.tensor(
+            [[0, 1, 0], [0, 1, 1], [1, 1, 0], [1, 1, 1]],
+            dtype=torch.float32,
+            requires_grad=True,
+        )
+        verts_features = vert_tex
+        tex = TexturesVertex(verts_features=[verts_features])
+        mesh = Meshes(verts=[verts], faces=[faces], textures=tex)
+        pix_to_face = torch.tensor([0, 1], dtype=torch.int64).view(1, 1, 1, 2)
+        barycentric_coords = torch.tensor(
+            [[0.5, 0.3, 0.2], [0.3, 0.6, 0.1]], dtype=torch.float32
+        ).view(1, 1, 1, 2, -1)
+        fragments = Fragments(
+            pix_to_face=pix_to_face,
+            bary_coords=barycentric_coords,
+            zbuf=torch.ones_like(pix_to_face),
+            dists=torch.ones_like(pix_to_face),
+        )
+        grad_vert_tex = torch.tensor(
+            [[0.3, 0.3, 0.3], [0.9, 0.9, 0.9], [0.5, 0.5, 0.5], [0.3, 0.3, 0.3]],
+            dtype=torch.float32,
+        )
+        texels = mesh.sample_textures(fragments)
+        texels.sum().backward()
+        self.assertTrue(hasattr(vert_tex, "grad"))
+        self.assertTrue(torch.allclose(vert_tex.grad, grad_vert_tex[None, :]))
+
+    def test_textures_vertex_init_fail(self):
+        # Incorrect sized tensors
+        with self.assertRaisesRegex(ValueError, "verts_features"):
+            TexturesVertex(verts_features=torch.rand(size=(5, 10)))
+
+        # Not a list or a tensor
+        with self.assertRaisesRegex(ValueError, "verts_features"):
+            TexturesVertex(verts_features=(1, 1, 1))
+
+    def test_faces_verts_textures(self):
+        device = torch.device("cuda:0")
+        verts = torch.randn((2, 4, 3), dtype=torch.float32, device=device)
+        faces = torch.tensor(
+            [[[2, 1, 0], [3, 1, 0]], [[1, 3, 0], [2, 1, 3]]],
+            dtype=torch.int64,
+            device=device,
+        )
+
+        # define TexturesVertex
+        verts_texture = torch.rand(verts.shape, device=device)
+        textures = TexturesVertex(verts_features=verts_texture)
+
+        # compute packed faces
+        ff = faces.unbind(0)
+        faces_packed = torch.cat([ff[0], ff[1] + verts.shape[1]])
+
+        # face verts textures
+        faces_verts_texts = textures.faces_verts_textures_packed(faces_packed)
+
+        verts_texts_packed = torch.cat(verts_texture.unbind(0))
+        faces_verts_texts_packed = verts_texts_packed[faces_packed]
+
+        self.assertClose(faces_verts_texts_packed, faces_verts_texts)
+
+    def test_submeshes(self):
+        # define TexturesVertex
+        verts_features = torch.tensor(
+            [
+                [1, 0, 0],
+                [1, 0, 0],
+                [1, 0, 0],
+                [1, 0, 0],
+                [0, 1, 0],
+                [0, 1, 0],
+                [0, 1, 0],
+                [0, 1, 0],
+            ],
+            dtype=torch.float32,
+        )
+
+        textures = TexturesVertex(
+            verts_features=[verts_features, verts_features, verts_features]
+        )
+        subtextures = textures.submeshes(
+            [
+                [
+                    torch.LongTensor([0, 2, 3]),
+                    torch.LongTensor(list(range(8))),
+                ],
+                [],
+                [
+                    torch.LongTensor([4]),
+                ],
+            ],
+            None,
+        )
+
+        subtextures_features = subtextures.verts_features_list()
+
+        self.assertEqual(len(subtextures_features), 3)
+        self.assertTrue(
+            torch.equal(
+                subtextures_features[0],
+                torch.FloatTensor([[1, 0, 0], [1, 0, 0], [1, 0, 0]]),
+            )
+        )
+        self.assertTrue(torch.equal(subtextures_features[1], verts_features))
+        self.assertTrue(
+            torch.equal(subtextures_features[2], torch.FloatTensor([[0, 1, 0]]))
+        )
+
+    def test_clone(self):
+        tex = TexturesVertex(verts_features=torch.rand(size=(10, 100, 128)))
+        tex.verts_features_list()
+        tex_cloned = tex.clone()
+        self.assertSeparate(
+            tex._verts_features_padded, tex_cloned._verts_features_padded
+        )
+        self.assertClose(tex._verts_features_padded, tex_cloned._verts_features_padded)
+        self.assertSeparate(tex.valid, tex_cloned.valid)
+        self.assertTrue(tex.valid.eq(tex_cloned.valid).all())
+        for i in range(tex._N):
+            self.assertSeparate(
+                tex._verts_features_list[i], tex_cloned._verts_features_list[i]
+            )
+            self.assertClose(
+                tex._verts_features_list[i], tex_cloned._verts_features_list[i]
+            )
+
+    def test_detach(self):
+        tex = TexturesVertex(
+            verts_features=torch.rand(size=(10, 100, 128), requires_grad=True)
+        )
+        tex.verts_features_list()
+        tex_detached = tex.detach()
+        self.assertFalse(tex_detached._verts_features_padded.requires_grad)
+        self.assertClose(
+            tex_detached._verts_features_padded, tex._verts_features_padded
+        )
+        for i in range(tex._N):
+            self.assertClose(
+                tex._verts_features_list[i], tex_detached._verts_features_list[i]
+            )
+            self.assertFalse(tex_detached._verts_features_list[i].requires_grad)
+
+    def test_extend(self):
+        B = 10
+        mesh = init_mesh(B, 30, 50)
+        V = mesh._V
+        tex_uv = TexturesVertex(verts_features=torch.randn((B, V, 3)))
+        tex_mesh = Meshes(
+            verts=mesh.verts_padded(), faces=mesh.faces_padded(), textures=tex_uv
+        )
+        N = 20
+        new_mesh = tex_mesh.extend(N)
+
+        self.assertEqual(len(tex_mesh) * N, len(new_mesh))
+
+        tex_init = tex_mesh.textures
+        new_tex = new_mesh.textures
+
+        for i in range(len(tex_mesh)):
+            for n in range(N):
+                self.assertClose(
+                    tex_init.verts_features_list()[i],
+                    new_tex.verts_features_list()[i * N + n],
+                )
+                self.assertClose(
+                    tex_init._num_faces_per_mesh[i],
+                    new_tex._num_faces_per_mesh[i * N + n],
+                )
+
+        self.assertAllSeparate(
+            [tex_init.verts_features_padded(), new_tex.verts_features_padded()]
+        )
+
+        with self.assertRaises(ValueError):
+            tex_mesh.extend(N=-1)
+
+    def test_padded_to_packed(self):
+        # Case where each face in the mesh has 3 unique uv vertex indices
+        # - i.e. even if a vertex is shared between multiple faces it will
+        # have a unique uv coordinate for each face.
+        num_verts_per_mesh = [9, 6]
+        D = 10
+        verts_features_list = [torch.rand(v, D) for v in num_verts_per_mesh]
+        verts_features_packed = list_to_packed(verts_features_list)[0]
+        verts_features_list = packed_to_list(verts_features_packed, num_verts_per_mesh)
+        tex = TexturesVertex(verts_features=verts_features_list)
+
+        # This is set inside Meshes when textures is passed as an input.
+        # Here we set _num_faces_per_mesh and _num_verts_per_mesh explicity.
+        tex1 = tex.clone()
+        tex1._num_verts_per_mesh = num_verts_per_mesh
+        verts_packed = tex1.verts_features_packed()
+        verts_verts_list = tex1.verts_features_list()
+        verts_padded = tex1.verts_features_padded()
+
+        for f1, f2 in zip(verts_verts_list, verts_features_list):
+            self.assertTrue((f1 == f2).all().item())
+
+        self.assertTrue(verts_packed.shape == (sum(num_verts_per_mesh), D))
+        self.assertTrue(verts_padded.shape == (2, 9, D))
+
+        # Case where num_verts_per_mesh is not set and textures
+        # are initialized with a padded tensor.
+        tex2 = TexturesVertex(verts_features=verts_padded)
+        verts_packed = tex2.verts_features_packed()
+        verts_list = tex2.verts_features_list()
+
+        # Packed is just flattened padded as num_verts_per_mesh
+        # has not been provided.
+        self.assertTrue(verts_packed.shape == (9 * 2, D))
+
+        for i, (f1, f2) in enumerate(zip(verts_list, verts_features_list)):
+            n = num_verts_per_mesh[i]
+            self.assertTrue((f1[:n] == f2).all().item())
+
+    def test_getitem(self):
+        N = 5
+        V = 20
+        source = {"verts_features": torch.randn(size=(N, V, 128))}
+        tex = TexturesVertex(verts_features=source["verts_features"])
+
+        verts = torch.rand(size=(N, V, 3))
+        faces = torch.randint(size=(N, 10, 3), high=V)
+        meshes = Meshes(verts=verts, faces=faces, textures=tex)
+
+        tryindex(self, 2, tex, meshes, source)
+        tryindex(self, slice(0, 2, 1), tex, meshes, source)
+        index = torch.tensor([1, 0, 1, 0, 0], dtype=torch.bool)
+        tryindex(self, index, tex, meshes, source)
+        index = torch.tensor([0, 0, 0, 0, 0], dtype=torch.bool)
+        tryindex(self, index, tex, meshes, source)
+        index = torch.tensor([1, 2], dtype=torch.int64)
+        tryindex(self, index, tex, meshes, source)
+        tryindex(self, [2, 4], tex, meshes, source)
+
+    def test_sample_textures_error(self):
+        N = 5
+        V = 20
+        verts = torch.rand(size=(N, V, 3))
+        faces = torch.randint(size=(N, 10, 3), high=V)
+        tex = TexturesVertex(verts_features=torch.randn(size=(N, 10, 128)))
+
+        # Verts features have the wrong number of verts
+        with self.assertRaisesRegex(ValueError, "do not match the dimensions"):
+            Meshes(verts=verts, faces=faces, textures=tex)
+
+        # Verts features have the wrong batch dim
+        tex = TexturesVertex(verts_features=torch.randn(size=(1, V, 128)))
+        with self.assertRaisesRegex(ValueError, "do not match the dimensions"):
+            Meshes(verts=verts, faces=faces, textures=tex)
+
+        meshes = Meshes(verts=verts, faces=faces)
+        meshes.textures = tex
+
+        # Cannot use the texture attribute set on meshes for sampling
+        # textures if the dimensions don't match
+        with self.assertRaisesRegex(ValueError, "do not match the dimensions"):
+            meshes.sample_textures(None)
+
+
+class TestTexturesAtlas(TestCaseMixin, unittest.TestCase):
+    def test_sample_texture_atlas(self):
+        N, F, R = 1, 2, 2
+        verts = torch.randn((4, 3), dtype=torch.float32)
+        faces = torch.tensor([[2, 1, 0], [3, 1, 0]], dtype=torch.int64)
+        faces_atlas = torch.rand(size=(N, F, R, R, 3))
+        tex = TexturesAtlas(atlas=faces_atlas)
+        mesh = Meshes(verts=[verts], faces=[faces], textures=tex)
+        pix_to_face = torch.tensor([0, 1], dtype=torch.int64).view(1, 1, 1, 2)
+        barycentric_coords = torch.tensor(
+            [[0.5, 0.3, 0.2], [0.3, 0.6, 0.1]], dtype=torch.float32
+        ).view(1, 1, 1, 2, -1)
+        expected_vals = torch.tensor(
+            [[0.5, 1.0, 0.3], [0.3, 1.0, 0.9]], dtype=torch.float32
+        )
+        expected_vals = torch.zeros((1, 1, 1, 2, 3), dtype=torch.float32)
+        expected_vals[..., 0, :] = faces_atlas[0, 0, 0, 1, ...]
+        expected_vals[..., 1, :] = faces_atlas[0, 1, 1, 0, ...]
+
+        fragments = Fragments(
+            pix_to_face=pix_to_face,
+            bary_coords=barycentric_coords,
+            zbuf=torch.ones_like(pix_to_face),
+            dists=torch.ones_like(pix_to_face),
+        )
+        texels = mesh.textures.sample_textures(fragments)
+        self.assertTrue(torch.allclose(texels, expected_vals))
+
+    def test_textures_atlas_grad(self):
+        N, F, R = 1, 2, 2
+        verts = torch.randn((4, 3), dtype=torch.float32)
+        faces = torch.tensor([[2, 1, 0], [3, 1, 0]], dtype=torch.int64)
+        faces_atlas = torch.rand(size=(N, F, R, R, 3), requires_grad=True)
+        tex = TexturesAtlas(atlas=faces_atlas)
+        mesh = Meshes(verts=[verts], faces=[faces], textures=tex)
+        pix_to_face = torch.tensor([0, 1], dtype=torch.int64).view(1, 1, 1, 2)
+        barycentric_coords = torch.tensor(
+            [[0.5, 0.3, 0.2], [0.3, 0.6, 0.1]], dtype=torch.float32
+        ).view(1, 1, 1, 2, -1)
+        fragments = Fragments(
+            pix_to_face=pix_to_face,
+            bary_coords=barycentric_coords,
+            zbuf=torch.ones_like(pix_to_face),
+            dists=torch.ones_like(pix_to_face),
+        )
+        texels = mesh.textures.sample_textures(fragments)
+        grad_tex = torch.rand_like(texels)
+        grad_expected = torch.zeros_like(faces_atlas)
+        grad_expected[0, 0, 0, 1, :] = grad_tex[..., 0:1, :]
+        grad_expected[0, 1, 1, 0, :] = grad_tex[..., 1:2, :]
+        texels.backward(grad_tex)
+        self.assertTrue(hasattr(faces_atlas, "grad"))
+        self.assertTrue(torch.allclose(faces_atlas.grad, grad_expected))
+
+    def test_textures_atlas_init_fail(self):
+        # Incorrect sized tensors
+        with self.assertRaisesRegex(ValueError, "atlas"):
+            TexturesAtlas(atlas=torch.rand(size=(5, 10, 3)))
+
+        # Not a list or a tensor
+        with self.assertRaisesRegex(ValueError, "atlas"):
+            TexturesAtlas(atlas=(1, 1, 1))
+
+    def test_faces_verts_textures(self):
+        device = torch.device("cuda:0")
+        N, F, R = 2, 2, 8
+        num_faces = torch.randint(low=1, high=F, size=(N,))
+        faces_atlas = [
+            torch.rand(size=(num_faces[i].item(), R, R, 3), device=device)
+            for i in range(N)
+        ]
+        tex = TexturesAtlas(atlas=faces_atlas)
+
+        # faces_verts naive
+        faces_verts = []
+        for n in range(N):
+            ff = num_faces[n].item()
+            temp = torch.zeros(ff, 3, 3)
+            for f in range(ff):
+                t0 = faces_atlas[n][f, 0, -1]  # for v0, bary = (1, 0)
+                t1 = faces_atlas[n][f, -1, 0]  # for v1, bary = (0, 1)
+                t2 = faces_atlas[n][f, 0, 0]  # for v2, bary = (0, 0)
+                temp[f, 0] = t0
+                temp[f, 1] = t1
+                temp[f, 2] = t2
+            faces_verts.append(temp)
+        faces_verts = torch.cat(faces_verts, 0)
+
+        self.assertClose(faces_verts, tex.faces_verts_textures_packed().cpu())
+
+    def test_clone(self):
+        tex = TexturesAtlas(atlas=torch.rand(size=(1, 10, 2, 2, 3)))
+        tex.atlas_list()
+        tex_cloned = tex.clone()
+        self.assertSeparate(tex._atlas_padded, tex_cloned._atlas_padded)
+        self.assertClose(tex._atlas_padded, tex_cloned._atlas_padded)
+        self.assertSeparate(tex.valid, tex_cloned.valid)
+        self.assertTrue(tex.valid.eq(tex_cloned.valid).all())
+        for i in range(tex._N):
+            self.assertSeparate(tex._atlas_list[i], tex_cloned._atlas_list[i])
+            self.assertClose(tex._atlas_list[i], tex_cloned._atlas_list[i])
+
+    def test_detach(self):
+        tex = TexturesAtlas(atlas=torch.rand(size=(1, 10, 2, 2, 3), requires_grad=True))
+        tex.atlas_list()
+        tex_detached = tex.detach()
+        self.assertFalse(tex_detached._atlas_padded.requires_grad)
+        self.assertClose(tex_detached._atlas_padded, tex._atlas_padded)
+        for i in range(tex._N):
+            self.assertFalse(tex_detached._atlas_list[i].requires_grad)
+            self.assertClose(tex._atlas_list[i], tex_detached._atlas_list[i])
+
+    def test_extend(self):
+        B = 10
+        mesh = init_mesh(B, 30, 50)
+        F = mesh._F
+        tex_uv = TexturesAtlas(atlas=torch.randn((B, F, 2, 2, 3)))
+        tex_mesh = Meshes(
+            verts=mesh.verts_padded(), faces=mesh.faces_padded(), textures=tex_uv
+        )
+        N = 20
+        new_mesh = tex_mesh.extend(N)
+
+        self.assertEqual(len(tex_mesh) * N, len(new_mesh))
+
+        tex_init = tex_mesh.textures
+        new_tex = new_mesh.textures
+
+        for i in range(len(tex_mesh)):
+            for n in range(N):
+                self.assertClose(
+                    tex_init.atlas_list()[i], new_tex.atlas_list()[i * N + n]
+                )
+                self.assertClose(
+                    tex_init._num_faces_per_mesh[i],
+                    new_tex._num_faces_per_mesh[i * N + n],
+                )
+
+        self.assertAllSeparate([tex_init.atlas_padded(), new_tex.atlas_padded()])
+
+        with self.assertRaises(ValueError):
+            tex_mesh.extend(N=-1)
+
+    def test_padded_to_packed(self):
+        # Case where each face in the mesh has 3 unique uv vertex indices
+        # - i.e. even if a vertex is shared between multiple faces it will
+        # have a unique uv coordinate for each face.
+        R = 2
+        N = 20
+        num_faces_per_mesh = torch.randint(size=(N,), low=0, high=30)
+        atlas_list = [torch.rand(f, R, R, 3) for f in num_faces_per_mesh]
+        tex = TexturesAtlas(atlas=atlas_list)
+
+        # This is set inside Meshes when textures is passed as an input.
+        # Here we set _num_faces_per_mesh explicity.
+        tex1 = tex.clone()
+        tex1._num_faces_per_mesh = num_faces_per_mesh.tolist()
+        atlas_packed = tex1.atlas_packed()
+        atlas_list_new = tex1.atlas_list()
+        atlas_padded = tex1.atlas_padded()
+
+        for f1, f2 in zip(atlas_list_new, atlas_list):
+            self.assertTrue((f1 == f2).all().item())
+
+        sum_F = num_faces_per_mesh.sum()
+        max_F = num_faces_per_mesh.max().item()
+        self.assertTrue(atlas_packed.shape == (sum_F, R, R, 3))
+        self.assertTrue(atlas_padded.shape == (N, max_F, R, R, 3))
+
+        # Case where num_faces_per_mesh is not set and textures
+        # are initialized with a padded tensor.
+        atlas_list_padded = _list_to_padded_wrapper(atlas_list)
+        tex2 = TexturesAtlas(atlas=atlas_list_padded)
+        atlas_packed = tex2.atlas_packed()
+        atlas_list_new = tex2.atlas_list()
+
+        # Packed is just flattened padded as num_faces_per_mesh
+        # has not been provided.
+        self.assertTrue(atlas_packed.shape == (N * max_F, R, R, 3))
+
+        for i, (f1, f2) in enumerate(zip(atlas_list_new, atlas_list)):
+            n = num_faces_per_mesh[i]
+            self.assertTrue((f1[:n] == f2).all().item())
+
+    def test_getitem(self):
+        N = 5
+        V = 20
+        F = 10
+        source = {"atlas": torch.randn(size=(N, F, 4, 4, 3))}
+        tex = TexturesAtlas(atlas=source["atlas"])
+
+        verts = torch.rand(size=(N, V, 3))
+        faces = torch.randint(size=(N, F, 3), high=V)
+        meshes = Meshes(verts=verts, faces=faces, textures=tex)
+
+        tryindex(self, 2, tex, meshes, source)
+        tryindex(self, slice(0, 2, 1), tex, meshes, source)
+        index = torch.tensor([1, 0, 1, 0, 0], dtype=torch.bool)
+        tryindex(self, index, tex, meshes, source)
+        index = torch.tensor([0, 0, 0, 0, 0], dtype=torch.bool)
+        tryindex(self, index, tex, meshes, source)
+        index = torch.tensor([1, 2], dtype=torch.int64)
+        tryindex(self, index, tex, meshes, source)
+        tryindex(self, [2, 4], tex, meshes, source)
+
+    def test_sample_textures_error(self):
+        N = 1
+        V = 20
+        F = 10
+        verts = torch.rand(size=(5, V, 3))
+        faces = torch.randint(size=(5, F, 3), high=V)
+        meshes = Meshes(verts=verts, faces=faces)
+
+        # TexturesAtlas have the wrong batch dim
+        tex = TexturesAtlas(atlas=torch.randn(size=(1, F, 4, 4, 3)))
+        with self.assertRaisesRegex(ValueError, "do not match the dimensions"):
+            Meshes(verts=verts, faces=faces, textures=tex)
+
+        # TexturesAtlas have the wrong number of faces
+        tex = TexturesAtlas(atlas=torch.randn(size=(N, 15, 4, 4, 3)))
+        with self.assertRaisesRegex(ValueError, "do not match the dimensions"):
+            Meshes(verts=verts, faces=faces, textures=tex)
+
+        meshes = Meshes(verts=verts, faces=faces)
+        meshes.textures = tex
+
+        # Cannot use the texture attribute set on meshes for sampling
+        # textures if the dimensions don't match
+        with self.assertRaisesRegex(ValueError, "do not match the dimensions"):
+            meshes.sample_textures(None)
+
+    def test_submeshes(self):
+        N = 2
+        V = 5
+        F = 5
+        tex = TexturesAtlas(
+            atlas=torch.arange(N * F * 4 * 4 * 3, dtype=torch.float32).reshape(
+                N, F, 4, 4, 3
+            )
+        )
+
+        verts = torch.rand(size=(N, V, 3))
+        faces = torch.randint(size=(N, F, 3), high=V)
+        mesh = Meshes(verts=verts, faces=faces, textures=tex)
+
+        sub_faces = [
+            [torch.tensor([0, 2]), torch.tensor([1, 2])],
+            [],
+        ]
+        subtex = mesh.submeshes(sub_faces).textures
+        subtex_faces = subtex.atlas_list()
+
+        self.assertEqual(len(subtex_faces), 2)
+        self.assertClose(
+            subtex_faces[0].flatten().msort(),
+            torch.cat(
+                (
+                    torch.arange(4 * 4 * 3, dtype=torch.float32),
+                    torch.arange(96, 96 + 4 * 4 * 3, dtype=torch.float32),
+                ),
+                0,
+            ),
+        )
+
+
+class TestTexturesUV(TestCaseMixin, unittest.TestCase):
+    def setUp(self) -> None:
+        super().setUp()
+        torch.manual_seed(42)
+
+    def test_sample_textures_uv(self):
+        barycentric_coords = torch.tensor(
+            [[0.5, 0.3, 0.2], [0.3, 0.6, 0.1]], dtype=torch.float32
+        ).view(1, 1, 1, 2, -1)
+        dummy_verts = torch.zeros(4, 3)
+        vert_uvs = torch.tensor([[1, 0], [0, 1], [1, 1], [0, 0]], dtype=torch.float32)
+        face_uvs = torch.tensor([[0, 1, 2], [1, 2, 3]], dtype=torch.int64)
+        interpolated_uvs = torch.tensor(
+            [[0.5 + 0.2, 0.3 + 0.2], [0.6, 0.3 + 0.6]], dtype=torch.float32
+        )
+
+        # Create a dummy texture map
+        H = 2
+        W = 2
+        x = torch.linspace(0, 1, W).view(1, W).expand(H, W)
+        y = torch.linspace(0, 1, H).view(H, 1).expand(H, W)
+        tex_map = torch.stack([x, y], dim=2).view(1, H, W, 2)
+        pix_to_face = torch.tensor([0, 1], dtype=torch.int64).view(1, 1, 1, 2)
+        fragments = Fragments(
+            pix_to_face=pix_to_face,
+            bary_coords=barycentric_coords,
+            zbuf=pix_to_face,
+            dists=pix_to_face,
+        )
+
+        for align_corners in [True, False]:
+            tex = TexturesUV(
+                maps=tex_map,
+                faces_uvs=[face_uvs],
+                verts_uvs=[vert_uvs],
+                align_corners=align_corners,
+            )
+            meshes = Meshes(verts=[dummy_verts], faces=[face_uvs], textures=tex)
+            mesh_textures = meshes.textures
+            texels = mesh_textures.sample_textures(fragments)
+
+            # Expected output
+            pixel_uvs = interpolated_uvs * 2.0 - 1.0
+            pixel_uvs = pixel_uvs.view(2, 1, 1, 2)
+            tex_map_ = torch.flip(tex_map, [1]).permute(0, 3, 1, 2)
+            tex_map_ = torch.cat([tex_map_, tex_map_], dim=0)
+            expected_out = F.grid_sample(
+                tex_map_, pixel_uvs, align_corners=align_corners, padding_mode="border"
+            )
+            self.assertTrue(torch.allclose(texels.squeeze(), expected_out.squeeze()))
+
+    def test_textures_uv_init_fail(self):
+        # Maps has wrong shape
+        with self.assertRaisesRegex(ValueError, "maps"):
+            TexturesUV(
+                maps=torch.ones((5, 16, 16, 3, 4)),
+                faces_uvs=torch.rand(size=(5, 10, 3)),
+                verts_uvs=torch.rand(size=(5, 15, 2)),
+            )
+
+        # faces_uvs has wrong shape
+        with self.assertRaisesRegex(ValueError, "faces_uvs"):
+            TexturesUV(
+                maps=torch.ones((5, 16, 16, 3)),
+                faces_uvs=torch.rand(size=(5, 10, 3, 3)),
+                verts_uvs=torch.rand(size=(5, 15, 2)),
+            )
+
+        # verts_uvs has wrong shape
+        with self.assertRaisesRegex(ValueError, "verts_uvs"):
+            TexturesUV(
+                maps=torch.ones((5, 16, 16, 3)),
+                faces_uvs=torch.rand(size=(5, 10, 3)),
+                verts_uvs=torch.rand(size=(5, 15, 2, 3)),
+            )
+
+        # verts has different batch dim to faces
+        with self.assertRaisesRegex(ValueError, "verts_uvs"):
+            TexturesUV(
+                maps=torch.ones((5, 16, 16, 3)),
+                faces_uvs=torch.rand(size=(5, 10, 3)),
+                verts_uvs=torch.rand(size=(8, 15, 2)),
+            )
+
+        # maps has different batch dim to faces
+        with self.assertRaisesRegex(ValueError, "maps"):
+            TexturesUV(
+                maps=torch.ones((8, 16, 16, 3)),
+                faces_uvs=torch.rand(size=(5, 10, 3)),
+                verts_uvs=torch.rand(size=(5, 15, 2)),
+            )
+
+        # verts on different device to faces
+        with self.assertRaisesRegex(ValueError, "verts_uvs"):
+            TexturesUV(
+                maps=torch.ones((5, 16, 16, 3)),
+                faces_uvs=torch.rand(size=(5, 10, 3)),
+                verts_uvs=torch.rand(size=(5, 15, 2, 3), device="cuda"),
+            )
+
+        # maps on different device to faces
+        with self.assertRaisesRegex(ValueError, "map"):
+            TexturesUV(
+                maps=torch.ones((5, 16, 16, 3), device="cuda"),
+                faces_uvs=torch.rand(size=(5, 10, 3)),
+                verts_uvs=torch.rand(size=(5, 15, 2)),
+            )
+
+    def test_faces_verts_textures(self):
+        device = torch.device("cuda:0")
+        N, V, F, H, W = 2, 5, 12, 8, 8
+        vert_uvs = torch.rand((N, V, 2), dtype=torch.float32, device=device)
+        face_uvs = torch.randint(
+            high=V, size=(N, F, 3), dtype=torch.int64, device=device
+        )
+        maps = torch.rand((N, H, W, 3), dtype=torch.float32, device=device)
+
+        tex = TexturesUV(maps=maps, verts_uvs=vert_uvs, faces_uvs=face_uvs)
+
+        # naive faces_verts_textures
+        faces_verts_texs = []
+        for n in range(N):
+            temp = torch.zeros((F, 3, 3), device=device, dtype=torch.float32)
+            for f in range(F):
+                uv0 = vert_uvs[n, face_uvs[n, f, 0]]
+                uv1 = vert_uvs[n, face_uvs[n, f, 1]]
+                uv2 = vert_uvs[n, face_uvs[n, f, 2]]
+
+                idx = torch.stack((uv0, uv1, uv2), dim=0).view(1, 1, 3, 2)  # 1x1x3x2
+                idx = idx * 2.0 - 1.0
+                imap = maps[n].view(1, H, W, 3).permute(0, 3, 1, 2)  # 1x3xHxW
+                imap = torch.flip(imap, [2])
+
+                texts = torch.nn.functional.grid_sample(
+                    imap,
+                    idx,
+                    align_corners=tex.align_corners,
+                    padding_mode=tex.padding_mode,
+                )  # 1x3x1x3
+                temp[f] = texts[0, :, 0, :].permute(1, 0)
+            faces_verts_texs.append(temp)
+        faces_verts_texs = torch.cat(faces_verts_texs, 0)
+
+        self.assertClose(faces_verts_texs, tex.faces_verts_textures_packed())
+
+    def test_clone(self):
+        tex = TexturesUV(
+            maps=torch.ones((5, 16, 16, 3)),
+            faces_uvs=torch.rand(size=(5, 10, 3)),
+            verts_uvs=torch.rand(size=(5, 15, 2)),
+        )
+        tex.faces_uvs_list()
+        tex.verts_uvs_list()
+        tex_cloned = tex.clone()
+        self.assertSeparate(tex._faces_uvs_padded, tex_cloned._faces_uvs_padded)
+        self.assertClose(tex._faces_uvs_padded, tex_cloned._faces_uvs_padded)
+        self.assertSeparate(tex._verts_uvs_padded, tex_cloned._verts_uvs_padded)
+        self.assertClose(tex._verts_uvs_padded, tex_cloned._verts_uvs_padded)
+        self.assertSeparate(tex._maps_padded, tex_cloned._maps_padded)
+        self.assertClose(tex._maps_padded, tex_cloned._maps_padded)
+        self.assertSeparate(tex.valid, tex_cloned.valid)
+        self.assertTrue(tex.valid.eq(tex_cloned.valid).all())
+        for i in range(tex._N):
+            self.assertSeparate(tex._faces_uvs_list[i], tex_cloned._faces_uvs_list[i])
+            self.assertClose(tex._faces_uvs_list[i], tex_cloned._faces_uvs_list[i])
+            self.assertSeparate(tex._verts_uvs_list[i], tex_cloned._verts_uvs_list[i])
+            self.assertClose(tex._verts_uvs_list[i], tex_cloned._verts_uvs_list[i])
+            # tex._maps_list is not use anywhere so it's not stored. We call it explicitly
+            self.assertSeparate(tex.maps_list()[i], tex_cloned.maps_list()[i])
+            self.assertClose(tex.maps_list()[i], tex_cloned.maps_list()[i])
+
+    def test_detach(self):
+        tex = TexturesUV(
+            maps=torch.ones((5, 16, 16, 3), requires_grad=True),
+            faces_uvs=torch.rand(size=(5, 10, 3)),
+            verts_uvs=torch.rand(size=(5, 15, 2)),
+        )
+        tex.faces_uvs_list()
+        tex.verts_uvs_list()
+        tex_detached = tex.detach()
+        self.assertFalse(tex_detached._maps_padded.requires_grad)
+        self.assertClose(tex._maps_padded, tex_detached._maps_padded)
+        self.assertFalse(tex_detached._verts_uvs_padded.requires_grad)
+        self.assertClose(tex._verts_uvs_padded, tex_detached._verts_uvs_padded)
+        self.assertFalse(tex_detached._faces_uvs_padded.requires_grad)
+        self.assertClose(tex._faces_uvs_padded, tex_detached._faces_uvs_padded)
+        for i in range(tex._N):
+            self.assertFalse(tex_detached._verts_uvs_list[i].requires_grad)
+            self.assertClose(tex._verts_uvs_list[i], tex_detached._verts_uvs_list[i])
+            self.assertFalse(tex_detached._faces_uvs_list[i].requires_grad)
+            self.assertClose(tex._faces_uvs_list[i], tex_detached._faces_uvs_list[i])
+            # tex._maps_list is not use anywhere so it's not stored. We call it explicitly
+            self.assertFalse(tex_detached.maps_list()[i].requires_grad)
+            self.assertClose(tex.maps_list()[i], tex_detached.maps_list()[i])
+
+    def test_extend(self):
+        B = 5
+        mesh = init_mesh(B, 30, 50)
+        V = mesh._V
+        num_faces = mesh.num_faces_per_mesh()
+        num_verts = mesh.num_verts_per_mesh()
+        faces_uvs_list = [torch.randint(size=(f, 3), low=0, high=V) for f in num_faces]
+        verts_uvs_list = [torch.rand(v, 2) for v in num_verts]
+        tex_uv = TexturesUV(
+            maps=torch.ones((B, 16, 16, 3)),
+            faces_uvs=faces_uvs_list,
+            verts_uvs=verts_uvs_list,
+        )
+        tex_mesh = Meshes(
+            verts=mesh.verts_list(), faces=mesh.faces_list(), textures=tex_uv
+        )
+        N = 2
+        new_mesh = tex_mesh.extend(N)
+
+        self.assertEqual(len(tex_mesh) * N, len(new_mesh))
+
+        tex_init = tex_mesh.textures
+        new_tex = new_mesh.textures
+
+        new_tex_num_verts = new_mesh.num_verts_per_mesh()
+        for i in range(len(tex_mesh)):
+            for n in range(N):
+                tex_nv = new_tex_num_verts[i * N + n]
+                self.assertClose(
+                    # The original textures were initialized using
+                    # verts uvs list
+                    tex_init.verts_uvs_list()[i],
+                    # In the new textures, the verts_uvs are initialized
+                    # from padded. The verts per mesh are not used to
+                    # convert from padded to list. See TexturesUV for an
+                    # explanation.
+                    new_tex.verts_uvs_list()[i * N + n][:tex_nv, ...],
+                )
+                self.assertClose(
+                    tex_init.faces_uvs_list()[i], new_tex.faces_uvs_list()[i * N + n]
+                )
+                self.assertClose(
+                    tex_init.maps_padded()[i, ...], new_tex.maps_padded()[i * N + n]
+                )
+                self.assertClose(
+                    tex_init._num_faces_per_mesh[i],
+                    new_tex._num_faces_per_mesh[i * N + n],
+                )
+
+        self.assertAllSeparate(
+            [
+                tex_init.faces_uvs_padded(),
+                new_tex.faces_uvs_padded(),
+                tex_init.verts_uvs_padded(),
+                new_tex.verts_uvs_padded(),
+                tex_init.maps_padded(),
+                new_tex.maps_padded(),
+            ]
+        )
+
+        with self.assertRaises(ValueError):
+            tex_mesh.extend(N=-1)
+
+    def test_padded_to_packed(self):
+        # Case where each face in the mesh has 3 unique uv vertex indices
+        # - i.e. even if a vertex is shared between multiple faces it will
+        # have a unique uv coordinate for each face.
+        N = 2
+        faces_uvs_list = [
+            torch.tensor([[0, 1, 2], [3, 5, 4], [7, 6, 8]]),
+            torch.tensor([[0, 1, 2], [3, 4, 5]]),
+        ]  # (N, 3, 3)
+        verts_uvs_list = [torch.ones(9, 2), torch.ones(6, 2)]
+
+        num_faces_per_mesh = [f.shape[0] for f in faces_uvs_list]
+        num_verts_per_mesh = [v.shape[0] for v in verts_uvs_list]
+        tex = TexturesUV(
+            maps=torch.ones((N, 16, 16, 3)),
+            faces_uvs=faces_uvs_list,
+            verts_uvs=verts_uvs_list,
+        )
+
+        # This is set inside Meshes when textures is passed as an input.
+        # Here we set _num_faces_per_mesh and _num_verts_per_mesh explicity.
+        tex1 = tex.clone()
+        tex1._num_faces_per_mesh = num_faces_per_mesh
+        tex1._num_verts_per_mesh = num_verts_per_mesh
+        verts_list = tex1.verts_uvs_list()
+        verts_padded = tex1.verts_uvs_padded()
+
+        faces_list = tex1.faces_uvs_list()
+        faces_padded = tex1.faces_uvs_padded()
+
+        for f1, f2 in zip(faces_list, faces_uvs_list):
+            self.assertTrue((f1 == f2).all().item())
+
+        for f1, f2 in zip(verts_list, verts_uvs_list):
+            self.assertTrue((f1 == f2).all().item())
+
+        self.assertTrue(faces_padded.shape == (2, 3, 3))
+        self.assertTrue(verts_padded.shape == (2, 9, 2))
+
+        # Case where num_faces_per_mesh is not set and faces_verts_uvs
+        # are initialized with a padded tensor.
+        tex2 = TexturesUV(
+            maps=torch.ones((N, 16, 16, 3)),
+            verts_uvs=verts_padded,
+            faces_uvs=faces_padded,
+        )
+        faces_list = tex2.faces_uvs_list()
+        verts_list = tex2.verts_uvs_list()
+
+        for i, (f1, f2) in enumerate(zip(faces_list, faces_uvs_list)):
+            n = num_faces_per_mesh[i]
+            self.assertTrue((f1[:n] == f2).all().item())
+
+        for i, (f1, f2) in enumerate(zip(verts_list, verts_uvs_list)):
+            n = num_verts_per_mesh[i]
+            self.assertTrue((f1[:n] == f2).all().item())
+
+    def test_to(self):
+        tex = TexturesUV(
+            maps=torch.ones((5, 16, 16, 3)),
+            faces_uvs=torch.randint(size=(5, 10, 3), high=15),
+            verts_uvs=torch.rand(size=(5, 15, 2)),
+        )
+        device = torch.device("cuda:0")
+        tex = tex.to(device)
+        self.assertEqual(tex._faces_uvs_padded.device, device)
+        self.assertEqual(tex._verts_uvs_padded.device, device)
+        self.assertEqual(tex._maps_padded.device, device)
+
+    def test_mesh_to(self):
+        tex_cpu = TexturesUV(
+            maps=torch.ones((5, 16, 16, 3)),
+            faces_uvs=torch.randint(size=(5, 10, 3), high=15),
+            verts_uvs=torch.rand(size=(5, 15, 2)),
+        )
+        verts = torch.rand(size=(5, 15, 3))
+        faces = torch.randint(size=(5, 10, 3), high=15)
+        mesh_cpu = Meshes(faces=faces, verts=verts, textures=tex_cpu)
+        cpu = torch.device("cpu")
+        device = torch.device("cuda:0")
+        tex = mesh_cpu.to(device).textures
+        self.assertEqual(tex._faces_uvs_padded.device, device)
+        self.assertEqual(tex._verts_uvs_padded.device, device)
+        self.assertEqual(tex._maps_padded.device, device)
+        self.assertEqual(tex_cpu._verts_uvs_padded.device, cpu)
+
+        self.assertEqual(tex_cpu.device, cpu)
+        self.assertEqual(tex.device, device)
+
+    def test_getitem(self):
+        N = 5
+        V = 20
+        F = 10
+        source = {
+            "maps": torch.rand(size=(N, 1, 1, 3)),
+            "faces_uvs": torch.randint(size=(N, F, 3), high=V),
+            "verts_uvs": torch.randn(size=(N, V, 2)),
+        }
+        tex = TexturesUV(
+            maps=source["maps"],
+            faces_uvs=source["faces_uvs"],
+            verts_uvs=source["verts_uvs"],
+        )
+
+        verts = torch.rand(size=(N, V, 3))
+        faces = torch.randint(size=(N, F, 3), high=V)
+        meshes = Meshes(verts=verts, faces=faces, textures=tex)
+
+        tryindex(self, 2, tex, meshes, source)
+        tryindex(self, slice(0, 2, 1), tex, meshes, source)
+        index = torch.tensor([1, 0, 1, 0, 0], dtype=torch.bool)
+        tryindex(self, index, tex, meshes, source)
+        index = torch.tensor([0, 0, 0, 0, 0], dtype=torch.bool)
+        tryindex(self, index, tex, meshes, source)
+        index = torch.tensor([1, 2], dtype=torch.int64)
+        tryindex(self, index, tex, meshes, source)
+        tryindex(self, [2, 4], tex, meshes, source)
+
+    def test_centers_for_image(self):
+        maps = torch.rand(size=(1, 257, 129, 3))
+        verts_uvs = torch.FloatTensor([[[0.25, 0.125], [0.5, 0.625], [0.5, 0.5]]])
+        faces_uvs = torch.zeros(size=(1, 0, 3), dtype=torch.int64)
+        tex = TexturesUV(maps=maps, faces_uvs=faces_uvs, verts_uvs=verts_uvs)
+
+        expected = torch.FloatTensor([[32, 224], [64, 96], [64, 128]])
+        self.assertClose(tex.centers_for_image(0), expected)
+
+    def test_sample_textures_error(self):
+        N = 1
+        V = 20
+        F = 10
+        maps = torch.rand(size=(N, 1, 1, 3))
+        verts_uvs = torch.randn(size=(N, V, 2))
+        tex = TexturesUV(
+            maps=maps,
+            faces_uvs=torch.randint(size=(N, 15, 3), high=V),
+            verts_uvs=verts_uvs,
+        )
+        verts = torch.rand(size=(5, V, 3))
+        faces = torch.randint(size=(5, 10, 3), high=V)
+        meshes = Meshes(verts=verts, faces=faces)
+
+        # Wrong number of faces
+        with self.assertRaisesRegex(ValueError, "do not match the dimensions"):
+            Meshes(verts=verts, faces=faces, textures=tex)
+
+        # Wrong batch dim for faces
+        tex = TexturesUV(
+            maps=maps,
+            faces_uvs=torch.randint(size=(1, F, 3), high=V),
+            verts_uvs=verts_uvs,
+        )
+        with self.assertRaisesRegex(ValueError, "do not match the dimensions"):
+            Meshes(verts=verts, faces=faces, textures=tex)
+
+        # Wrong batch dim for verts_uvs is not necessary to check as
+        # there is already a check inside TexturesUV for a batch dim
+        # mismatch with faces_uvs
+
+        meshes = Meshes(verts=verts, faces=faces)
+        meshes.textures = tex
+
+        # Cannot use the texture attribute set on meshes for sampling
+        # textures if the dimensions don't match
+        with self.assertRaisesRegex(ValueError, "do not match the dimensions"):
+            meshes.sample_textures(None)
+
+    def test_submeshes(self):
+        N = 2
+        faces_uvs_list = [
+            torch.LongTensor([[0, 1, 2], [3, 5, 4], [7, 6, 8]]),
+            torch.LongTensor([[0, 1, 2], [3, 4, 5]]),
+        ]
+        verts_uvs_list = [
+            torch.arange(18, dtype=torch.float32).reshape(9, 2),
+            torch.ones(6, 2),
+        ]
+        tex = TexturesUV(
+            maps=torch.rand((N, 16, 16, 3)),
+            faces_uvs=faces_uvs_list,
+            verts_uvs=verts_uvs_list,
+        )
+
+        sub_faces = [
+            [torch.tensor([0, 1]), torch.tensor([1, 2])],
+            [],
+        ]
+
+        mesh = Meshes(
+            verts=[torch.rand(9, 3), torch.rand(6, 3)],
+            faces=faces_uvs_list,
+            textures=tex,
+        )
+        subtex = mesh.submeshes(sub_faces).textures
+        subtex_faces = subtex.faces_uvs_padded()
+        self.assertEqual(len(subtex_faces), 2)
+        self.assertClose(
+            subtex_faces[0],
+            torch.tensor([[0, 1, 2], [3, 5, 4]]),
+        )
+        self.assertClose(
+            subtex.verts_uvs_list()[0][subtex.faces_uvs_list()[0].flatten()]
+            .flatten()
+            .msort(),
+            torch.arange(12, dtype=torch.float32),
+        )
+        self.assertClose(
+            subtex.maps_padded(), tex.maps_padded()[:1].expand(2, -1, -1, -1)
+        )
+
+
+class TestRectanglePacking(TestCaseMixin, unittest.TestCase):
+    def setUp(self) -> None:
+        super().setUp()
+        torch.manual_seed(42)
+
+    def wrap_pack(self, sizes):
+        """
+        Call the pack_rectangles function, which we want to test,
+        and return its outputs.
+        Additionally makes some sanity checks on the output.
+        """
+        res = pack_rectangles(sizes)
+        total = res.total_size
+        self.assertGreaterEqual(total[0], 0)
+        self.assertGreaterEqual(total[1], 0)
+        mask = torch.zeros(total, dtype=torch.bool)
+        seen_x_bound = False
+        seen_y_bound = False
+        for (in_x, in_y), (out_x, out_y, flipped, is_first) in zip(
+            sizes, res.locations
+        ):
+            self.assertTrue(is_first)
+            self.assertGreaterEqual(out_x, 0)
+            self.assertGreaterEqual(out_y, 0)
+            placed_x, placed_y = (in_y, in_x) if flipped else (in_x, in_y)
+            upper_x = placed_x + out_x
+            upper_y = placed_y + out_y
+            self.assertGreaterEqual(total[0], upper_x)
+            if total[0] == upper_x:
+                seen_x_bound = True
+            self.assertGreaterEqual(total[1], upper_y)
+            if total[1] == upper_y:
+                seen_y_bound = True
+            already_taken = torch.sum(mask[out_x:upper_x, out_y:upper_y])
+            self.assertEqual(already_taken, 0)
+            mask[out_x:upper_x, out_y:upper_y] = 1
+        self.assertTrue(seen_x_bound)
+        self.assertTrue(seen_y_bound)
+
+        self.assertTrue(torch.all(torch.sum(mask, dim=0, dtype=torch.int32) > 0))
+        self.assertTrue(torch.all(torch.sum(mask, dim=1, dtype=torch.int32) > 0))
+        return res
+
+    def assert_bb(self, sizes, expected):
+        """
+        Apply the pack_rectangles function to sizes and verify the
+        bounding box dimensions are expected.
+        """
+        self.assertSetEqual(set(self.wrap_pack(sizes).total_size), expected)
+
+    def test_simple(self):
+        self.assert_bb([(3, 4), (4, 3)], {6, 4})
+        self.assert_bb([(2, 2), (2, 4), (2, 2)], {4})
+
+        # many squares
+        self.assert_bb([(2, 2)] * 9, {2, 18})
+
+        # One big square and many small ones.
+        self.assert_bb([(3, 3)] + [(1, 1)] * 2, {3, 4})
+        self.assert_bb([(3, 3)] + [(1, 1)] * 3, {3, 4})
+        self.assert_bb([(3, 3)] + [(1, 1)] * 4, {3, 5})
+        self.assert_bb([(3, 3)] + [(1, 1)] * 5, {3, 5})
+        self.assert_bb([(1, 1)] * 6 + [(3, 3)], {3, 5})
+        self.assert_bb([(3, 3)] + [(1, 1)] * 7, {3, 6})
+
+        # many identical rectangles
+        self.assert_bb([(7, 190)] * 4 + [(190, 7)] * 4, {190, 56})
+
+        # require placing the flipped version of a rectangle
+        self.assert_bb([(1, 100), (5, 96), (4, 5)], {100, 6})
+
+    def test_random(self):
+        for _ in range(5):
+            vals = torch.randint(size=(20, 2), low=1, high=18)
+            sizes = []
+            for j in range(vals.shape[0]):
+                sizes.append((int(vals[j, 0]), int(vals[j, 1])))
+            self.wrap_pack(sizes)
+
+    def test_all_identical(self):
+        sizes = [Rectangle(xsize=61, ysize=82, identifier=1729)] * 3
+        total_size, locations = pack_unique_rectangles(sizes)
+        self.assertEqual(total_size, (61, 82))
+        self.assertEqual(len(locations), 3)
+        for i, (x, y, is_flipped, is_first) in enumerate(locations):
+            self.assertEqual(x, 0)
+            self.assertEqual(y, 0)
+            self.assertFalse(is_flipped)
+            self.assertEqual(is_first, i == 0)
+
+    def test_one_different_id(self):
+        sizes = [Rectangle(xsize=61, ysize=82, identifier=220)] * 3
+        sizes.extend([Rectangle(xsize=61, ysize=82, identifier=284)] * 3)
+        total_size, locations = pack_unique_rectangles(sizes)
+        self.assertEqual(total_size, (82, 122))
+        self.assertEqual(len(locations), 6)
+        for i, (x, y, is_flipped, is_first) in enumerate(locations):
+            self.assertTrue(is_flipped)
+            self.assertEqual(is_first, i % 3 == 0)
+            self.assertEqual(x, 0)
+            if i < 3:
+                self.assertEqual(y, 61)
+            else:
+                self.assertEqual(y, 0)
diff --git a/pytorch3d/tests/test_transforms.py b/pytorch3d/tests/test_transforms.py
new file mode 100644
index 0000000000000000000000000000000000000000..5a2d729f7c75f41aac7f798899e2190a98550450
--- /dev/null
+++ b/pytorch3d/tests/test_transforms.py
@@ -0,0 +1,1323 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import math
+import os
+import unittest
+from unittest import mock
+
+import torch
+from pytorch3d.transforms import random_rotations
+from pytorch3d.transforms.se3 import se3_log_map
+from pytorch3d.transforms.so3 import so3_exp_map
+from pytorch3d.transforms.transform3d import (
+    Rotate,
+    RotateAxisAngle,
+    Scale,
+    Transform3d,
+    Translate,
+)
+
+from .common_testing import TestCaseMixin
+
+
+class TestTransform(TestCaseMixin, unittest.TestCase):
+    def setUp(self) -> None:
+        torch.manual_seed(42)
+
+    def test_to(self):
+        tr = Translate(torch.FloatTensor([[1.0, 2.0, 3.0]]))
+        R = torch.FloatTensor([[0.0, 1.0, 0.0], [0.0, 0.0, 1.0], [1.0, 0.0, 0.0]])
+        R = Rotate(R)
+        t = Transform3d().compose(R, tr)
+
+        cpu_device = torch.device("cpu")
+
+        cpu_t = t.to("cpu")
+        self.assertEqual(cpu_device, cpu_t.device)
+        self.assertEqual(cpu_device, t.device)
+        self.assertEqual(torch.float32, cpu_t.dtype)
+        self.assertEqual(torch.float32, t.dtype)
+        self.assertIs(t, cpu_t)
+
+        cpu_t = t.to(cpu_device)
+        self.assertEqual(cpu_device, cpu_t.device)
+        self.assertEqual(cpu_device, t.device)
+        self.assertEqual(torch.float32, cpu_t.dtype)
+        self.assertEqual(torch.float32, t.dtype)
+        self.assertIs(t, cpu_t)
+
+        cpu_t = t.to(dtype=torch.float64, device=cpu_device)
+        self.assertEqual(cpu_device, cpu_t.device)
+        self.assertEqual(cpu_device, t.device)
+        self.assertEqual(torch.float64, cpu_t.dtype)
+        self.assertEqual(torch.float32, t.dtype)
+        self.assertIsNot(t, cpu_t)
+
+        cuda_device = torch.device("cuda:0")
+
+        cuda_t = t.to("cuda:0")
+        self.assertEqual(cuda_device, cuda_t.device)
+        self.assertEqual(cpu_device, t.device)
+        self.assertEqual(torch.float32, cuda_t.dtype)
+        self.assertEqual(torch.float32, t.dtype)
+        self.assertIsNot(t, cuda_t)
+
+        cuda_t = t.to(cuda_device)
+        self.assertEqual(cuda_device, cuda_t.device)
+        self.assertEqual(cpu_device, t.device)
+        self.assertEqual(torch.float32, cuda_t.dtype)
+        self.assertEqual(torch.float32, t.dtype)
+        self.assertIsNot(t, cuda_t)
+
+        cuda_t = t.to(dtype=torch.float64, device=cuda_device)
+        self.assertEqual(cuda_device, cuda_t.device)
+        self.assertEqual(cpu_device, t.device)
+        self.assertEqual(torch.float64, cuda_t.dtype)
+        self.assertEqual(torch.float32, t.dtype)
+        self.assertIsNot(t, cuda_t)
+
+        cpu_points = torch.rand(9, 3)
+        cuda_points = cpu_points.cuda()
+        for _ in range(3):
+            t = t.cpu()
+            t.transform_points(cpu_points)
+            t = t.cuda()
+            t.transform_points(cuda_points)
+            t = t.cuda()
+            t = t.cpu()
+
+    def test_dtype_propagation(self):
+        """
+        Check that a given dtype is correctly passed along to child
+        transformations.
+        """
+        # Use at least two dtypes so we avoid only testing on the
+        # default dtype.
+        for dtype in [torch.float32, torch.float64]:
+            R = torch.tensor(
+                [[0.0, 1.0, 0.0], [0.0, 0.0, 1.0], [1.0, 0.0, 0.0]],
+                dtype=dtype,
+            )
+            tf = (
+                Transform3d(dtype=dtype)
+                .rotate(R)
+                .rotate_axis_angle(
+                    R[0],
+                    "X",
+                )
+                .translate(3, 2, 1)
+                .scale(0.5)
+            )
+
+            self.assertEqual(tf.dtype, dtype)
+            for inner_tf in tf._transforms:
+                self.assertEqual(inner_tf.dtype, dtype)
+
+            transformed = tf.transform_points(R)
+            self.assertEqual(transformed.dtype, dtype)
+
+    def test_clone(self):
+        """
+        Check that cloned transformations contain different _matrix objects.
+        Also, the clone of a composed translation and rotation has to be
+        the same as composition of clones of translation and rotation.
+        """
+        tr = Translate(torch.FloatTensor([[1.0, 2.0, 3.0]]))
+        R = torch.FloatTensor([[0.0, 1.0, 0.0], [0.0, 0.0, 1.0], [1.0, 0.0, 0.0]])
+        R = Rotate(R)
+
+        # check that the _matrix property of clones of
+        # both transforms are different
+        for t in (R, tr):
+            self.assertTrue(t._matrix is not t.clone()._matrix)
+
+        # check that the _transforms lists of composition of R, tr contain
+        # different objects
+        t1 = Transform3d().compose(R, tr)
+        for t, t_clone in (t1._transforms, t1.clone()._transforms):
+            self.assertTrue(t is not t_clone)
+            self.assertTrue(t._matrix is not t_clone._matrix)
+
+        # check that all composed transforms are numerically equivalent
+        t2 = Transform3d().compose(R.clone(), tr.clone())
+        t3 = t1.clone()
+        for t_pair in ((t1, t2), (t1, t3), (t2, t3)):
+            matrix1 = t_pair[0].get_matrix()
+            matrix2 = t_pair[1].get_matrix()
+            self.assertTrue(torch.allclose(matrix1, matrix2))
+
+    def test_init_with_custom_matrix(self):
+        for matrix in (torch.randn(10, 4, 4), torch.randn(4, 4)):
+            t = Transform3d(matrix=matrix)
+            self.assertTrue(t.device == matrix.device)
+            self.assertTrue(t._matrix.dtype == matrix.dtype)
+            self.assertTrue(torch.allclose(t._matrix, matrix.view(t._matrix.shape)))
+
+    def test_init_with_custom_matrix_errors(self):
+        bad_shapes = [[10, 5, 4], [3, 4], [10, 4, 4, 1], [10, 4, 4, 2], [4, 4, 4, 3]]
+        for bad_shape in bad_shapes:
+            matrix = torch.randn(*bad_shape).float()
+            self.assertRaises(ValueError, Transform3d, matrix=matrix)
+
+    def test_get_se3(self):
+        N = 16
+        random_rotations(N)
+        tr = Translate(torch.rand((N, 3)))
+        R = Rotate(random_rotations(N))
+        transform = Transform3d().compose(R, tr)
+        se3_log = transform.get_se3_log()
+        gt_se3_log = se3_log_map(transform.get_matrix())
+        self.assertClose(se3_log, gt_se3_log)
+
+    def test_translate(self):
+        t = Transform3d().translate(1, 2, 3)
+        points = torch.tensor([[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.5, 0.5, 0.0]]).view(
+            1, 3, 3
+        )
+        normals = torch.tensor(
+            [[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [1.0, 1.0, 0.0]]
+        ).view(1, 3, 3)
+        points_out = t.transform_points(points)
+        normals_out = t.transform_normals(normals)
+        points_out_expected = torch.tensor(
+            [[2.0, 2.0, 3.0], [1.0, 3.0, 3.0], [1.5, 2.5, 3.0]]
+        ).view(1, 3, 3)
+        normals_out_expected = torch.tensor(
+            [[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [1.0, 1.0, 0.0]]
+        ).view(1, 3, 3)
+        self.assertTrue(torch.allclose(points_out, points_out_expected))
+        self.assertTrue(torch.allclose(normals_out, normals_out_expected))
+
+    @mock.patch.dict(os.environ, {"PYTORCH3D_CHECK_ROTATION_MATRICES": "1"}, clear=True)
+    def test_rotate_check_rot_valid_on(self):
+        R = so3_exp_map(torch.randn((1, 3)))
+        t = Transform3d().rotate(R)
+        points = torch.tensor([[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.5, 0.5, 0.0]]).view(
+            1, 3, 3
+        )
+        normals = torch.tensor(
+            [[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [1.0, 1.0, 0.0]]
+        ).view(1, 3, 3)
+        points_out = t.transform_points(points)
+        normals_out = t.transform_normals(normals)
+        points_out_expected = torch.bmm(points, R)
+        normals_out_expected = torch.bmm(normals, R)
+        self.assertTrue(torch.allclose(points_out, points_out_expected))
+        self.assertTrue(torch.allclose(normals_out, normals_out_expected))
+
+    @mock.patch.dict(os.environ, {"PYTORCH3D_CHECK_ROTATION_MATRICES": "0"}, clear=True)
+    def test_rotate_check_rot_valid_off(self):
+        R = so3_exp_map(torch.randn((1, 3)))
+        t = Transform3d().rotate(R)
+        points = torch.tensor([[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.5, 0.5, 0.0]]).view(
+            1, 3, 3
+        )
+        normals = torch.tensor(
+            [[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [1.0, 1.0, 0.0]]
+        ).view(1, 3, 3)
+        points_out = t.transform_points(points)
+        normals_out = t.transform_normals(normals)
+        points_out_expected = torch.bmm(points, R)
+        normals_out_expected = torch.bmm(normals, R)
+        self.assertTrue(torch.allclose(points_out, points_out_expected))
+        self.assertTrue(torch.allclose(normals_out, normals_out_expected))
+
+    def test_scale(self):
+        t = Transform3d().scale(2.0).scale(0.5, 0.25, 1.0)
+        points = torch.tensor([[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.5, 0.5, 0.0]]).view(
+            1, 3, 3
+        )
+        normals = torch.tensor(
+            [[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [1.0, 1.0, 0.0]]
+        ).view(1, 3, 3)
+        points_out = t.transform_points(points)
+        normals_out = t.transform_normals(normals)
+        points_out_expected = torch.tensor(
+            [[1.00, 0.00, 0.00], [0.00, 0.50, 0.00], [0.50, 0.25, 0.00]]
+        ).view(1, 3, 3)
+        normals_out_expected = torch.tensor(
+            [[1.0, 0.0, 0.0], [0.0, 2.0, 0.0], [1.0, 2.0, 0.0]]
+        ).view(1, 3, 3)
+        self.assertTrue(torch.allclose(points_out, points_out_expected))
+        self.assertTrue(torch.allclose(normals_out, normals_out_expected))
+
+    def test_scale_translate(self):
+        t = Transform3d().scale(2, 1, 3).translate(1, 2, 3)
+        points = torch.tensor([[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.5, 0.5, 0.0]]).view(
+            1, 3, 3
+        )
+        normals = torch.tensor(
+            [[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [1.0, 1.0, 0.0]]
+        ).view(1, 3, 3)
+        points_out = t.transform_points(points)
+        normals_out = t.transform_normals(normals)
+        points_out_expected = torch.tensor(
+            [[3.0, 2.0, 3.0], [1.0, 3.0, 3.0], [2.0, 2.5, 3.0]]
+        ).view(1, 3, 3)
+        normals_out_expected = torch.tensor(
+            [[0.5, 0.0, 0.0], [0.0, 1.0, 0.0], [0.5, 1.0, 0.0]]
+        ).view(1, 3, 3)
+        self.assertTrue(torch.allclose(points_out, points_out_expected))
+        self.assertTrue(torch.allclose(normals_out, normals_out_expected))
+
+    def test_rotate_axis_angle(self):
+        t = Transform3d().rotate_axis_angle(90.0, axis="Z")
+        points = torch.tensor([[0.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 1.0, 1.0]]).view(
+            1, 3, 3
+        )
+        normals = torch.tensor(
+            [[1.0, 0.0, 0.0], [1.0, 0.0, 0.0], [1.0, 0.0, 0.0]]
+        ).view(1, 3, 3)
+        points_out = t.transform_points(points)
+        normals_out = t.transform_normals(normals)
+        points_out_expected = torch.tensor(
+            [[0.0, 0.0, 0.0], [-1.0, 0.0, 0.0], [-1.0, 0.0, 1.0]]
+        ).view(1, 3, 3)
+        normals_out_expected = torch.tensor(
+            [[0.0, 1.0, 0.0], [0.0, 1.0, 0.0], [0.0, 1.0, 0.0]]
+        ).view(1, 3, 3)
+        self.assertTrue(torch.allclose(points_out, points_out_expected, atol=1e-7))
+        self.assertTrue(torch.allclose(normals_out, normals_out_expected, atol=1e-7))
+
+    def test_transform_points_fail(self):
+        t1 = Scale(0.1, 0.1, 0.1)
+        P = 7
+        with self.assertRaises(ValueError):
+            t1.transform_points(torch.randn(P))
+
+    def test_compose_fail(self):
+        # Only composing Transform3d objects is possible
+        t1 = Scale(0.1, 0.1, 0.1)
+        with self.assertRaises(ValueError):
+            t1.compose(torch.randn(100))
+
+    def test_transform_points_eps(self):
+        t1 = Transform3d()
+        persp_proj = [
+            [
+                [1.0, 0.0, 0.0, 0.0],
+                [0.0, 1.0, 0.0, 0.0],
+                [0.0, 0.0, 0.0, 1.0],
+                [0.0, 0.0, 1.0, 0.0],
+            ]
+        ]
+        t1._matrix = torch.FloatTensor(persp_proj)
+        points = torch.tensor(
+            [[0.0, 1.0, 0.0], [0.0, 0.0, 1e-5], [-1.0, 0.0, 1e-5]]
+        ).view(
+            1, 3, 3
+        )  # a set of points with z-coord very close to 0
+
+        proj = t1.transform_points(points)
+        proj_eps = t1.transform_points(points, eps=1e-4)
+
+        self.assertTrue(not bool(torch.isfinite(proj.sum())))
+        self.assertTrue(bool(torch.isfinite(proj_eps.sum())))
+
+    def test_inverse(self, batch_size=5):
+        device = torch.device("cuda:0")
+
+        # generate a random chain of transforms
+        for _ in range(10):  # 10 different tries
+
+            # list of transform matrices
+            ts = []
+
+            for i in range(10):
+                choice = float(torch.rand(1))
+                if choice <= 1.0 / 3.0:
+                    t_ = Translate(
+                        torch.randn(
+                            (batch_size, 3), dtype=torch.float32, device=device
+                        ),
+                        device=device,
+                    )
+                elif choice <= 2.0 / 3.0:
+                    t_ = Rotate(
+                        so3_exp_map(
+                            torch.randn(
+                                (batch_size, 3), dtype=torch.float32, device=device
+                            )
+                        ),
+                        device=device,
+                    )
+                else:
+                    rand_t = torch.randn(
+                        (batch_size, 3), dtype=torch.float32, device=device
+                    )
+                    rand_t = rand_t.sign() * torch.clamp(rand_t.abs(), 0.2)
+                    t_ = Scale(rand_t, device=device)
+                ts.append(t_._matrix.clone())
+
+                if i == 0:
+                    t = t_
+                else:
+                    t = t.compose(t_)
+
+            # generate the inverse transformation in several possible ways
+            m1 = t.inverse(invert_composed=True).get_matrix()
+            m2 = t.inverse(invert_composed=True)._matrix
+            m3 = t.inverse(invert_composed=False).get_matrix()
+            m4 = t.get_matrix().inverse()
+
+            # compute the inverse explicitly ...
+            m5 = torch.eye(4, dtype=torch.float32, device=device)
+            m5 = m5[None].repeat(batch_size, 1, 1)
+            for t_ in ts:
+                m5 = torch.bmm(torch.inverse(t_), m5)
+
+            # assert all same
+            for m in (m1, m2, m3, m4):
+                self.assertTrue(torch.allclose(m, m5, atol=1e-3))
+
+    def _check_indexed_transforms(self, t3d, t3d_selected, indices):
+        t3d_matrix = t3d.get_matrix()
+        t3d_selected_matrix = t3d_selected.get_matrix()
+        for order_index, selected_index in indices:
+            self.assertClose(
+                t3d_matrix[selected_index], t3d_selected_matrix[order_index]
+            )
+
+    def test_get_item(self, batch_size=5):
+        device = torch.device("cuda:0")
+
+        matrices = torch.randn(
+            size=[batch_size, 4, 4], device=device, dtype=torch.float32
+        )
+
+        # init the Transforms3D class
+        t3d = Transform3d(matrix=matrices)
+
+        # int index
+        index = 1
+        t3d_selected = t3d[index]
+        self.assertEqual(len(t3d_selected), 1)
+        self._check_indexed_transforms(t3d, t3d_selected, [(0, 1)])
+
+        # negative int index
+        index = -1
+        t3d_selected = t3d[index]
+        self.assertEqual(len(t3d_selected), 1)
+        self._check_indexed_transforms(t3d, t3d_selected, [(0, -1)])
+
+        # list index
+        index = [1, 2]
+        t3d_selected = t3d[index]
+        self.assertEqual(len(t3d_selected), len(index))
+        self._check_indexed_transforms(t3d, t3d_selected, enumerate(index))
+
+        # empty list index
+        index = []
+        t3d_selected = t3d[index]
+        self.assertEqual(len(t3d_selected), 0)
+        self.assertEqual(t3d_selected.get_matrix().nelement(), 0)
+
+        # slice index
+        index = slice(0, 2, 1)
+        t3d_selected = t3d[index]
+        self.assertEqual(len(t3d_selected), 2)
+        self._check_indexed_transforms(t3d, t3d_selected, [(0, 0), (1, 1)])
+
+        # empty slice index
+        index = slice(0, 0, 1)
+        t3d_selected = t3d[index]
+        self.assertEqual(len(t3d_selected), 0)
+        self.assertEqual(t3d_selected.get_matrix().nelement(), 0)
+
+        # bool tensor
+        index = (torch.rand(batch_size) > 0.5).to(device)
+        index[:2] = True  # make sure smth is selected
+        t3d_selected = t3d[index]
+        self.assertEqual(len(t3d_selected), index.sum())
+        self._check_indexed_transforms(
+            t3d,
+            t3d_selected,
+            zip(
+                torch.arange(index.sum()),
+                torch.nonzero(index, as_tuple=False).squeeze(),
+            ),
+        )
+
+        # all false bool tensor
+        index = torch.zeros(batch_size).bool()
+        t3d_selected = t3d[index]
+        self.assertEqual(len(t3d_selected), 0)
+        self.assertEqual(t3d_selected.get_matrix().nelement(), 0)
+
+        # int tensor
+        index = torch.tensor([1, 2], dtype=torch.int64, device=device)
+        t3d_selected = t3d[index]
+        self.assertEqual(len(t3d_selected), index.numel())
+        self._check_indexed_transforms(t3d, t3d_selected, enumerate(index.tolist()))
+
+        # negative int tensor
+        index = -(torch.tensor([1, 2], dtype=torch.int64, device=device))
+        t3d_selected = t3d[index]
+        self.assertEqual(len(t3d_selected), index.numel())
+        self._check_indexed_transforms(t3d, t3d_selected, enumerate(index.tolist()))
+
+        # invalid index
+        for invalid_index in (
+            torch.tensor([1, 0, 1], dtype=torch.float32, device=device),  # float tensor
+            1.2,  # float index
+        ):
+            with self.assertRaises(IndexError):
+                t3d_selected = t3d[invalid_index]
+
+    def test_stack(self):
+        rotations = random_rotations(3)
+        transform3 = Transform3d().rotate(rotations).translate(torch.full((3, 3), 0.3))
+        transform1 = Scale(37)
+        transform4 = transform1.stack(transform3)
+        self.assertEqual(len(transform1), 1)
+        self.assertEqual(len(transform3), 3)
+        self.assertEqual(len(transform4), 4)
+        self.assertClose(
+            transform4.get_matrix(),
+            torch.cat([transform1.get_matrix(), transform3.get_matrix()]),
+        )
+        points = torch.rand(4, 5, 3)
+        new_points_expect = torch.cat(
+            [
+                transform1.transform_points(points[:1]),
+                transform3.transform_points(points[1:]),
+            ]
+        )
+        new_points = transform4.transform_points(points)
+        self.assertClose(new_points, new_points_expect)
+
+
+class TestTranslate(unittest.TestCase):
+    def test_python_scalar(self):
+        t = Translate(0.2, 0.3, 0.4)
+        matrix = torch.tensor(
+            [
+                [
+                    [1.0, 0.0, 0.0, 0],
+                    [0.0, 1.0, 0.0, 0],
+                    [0.0, 0.0, 1.0, 0],
+                    [0.2, 0.3, 0.4, 1],
+                ]
+            ],
+            dtype=torch.float32,
+        )
+        self.assertTrue(torch.allclose(t._matrix, matrix))
+
+    def test_torch_scalar(self):
+        x = torch.tensor(0.2)
+        y = torch.tensor(0.3)
+        z = torch.tensor(0.4)
+        t = Translate(x, y, z)
+        matrix = torch.tensor(
+            [
+                [
+                    [1.0, 0.0, 0.0, 0],
+                    [0.0, 1.0, 0.0, 0],
+                    [0.0, 0.0, 1.0, 0],
+                    [0.2, 0.3, 0.4, 1],
+                ]
+            ],
+            dtype=torch.float32,
+        )
+        self.assertTrue(torch.allclose(t._matrix, matrix))
+
+    def test_mixed_scalars(self):
+        x = 0.2
+        y = torch.tensor(0.3)
+        z = 0.4
+        t = Translate(x, y, z)
+        matrix = torch.tensor(
+            [
+                [
+                    [1.0, 0.0, 0.0, 0],
+                    [0.0, 1.0, 0.0, 0],
+                    [0.0, 0.0, 1.0, 0],
+                    [0.2, 0.3, 0.4, 1],
+                ]
+            ],
+            dtype=torch.float32,
+        )
+        self.assertTrue(torch.allclose(t._matrix, matrix))
+
+    def test_torch_scalar_grads(self):
+        # Make sure backprop works if we give torch scalars
+        x = torch.tensor(0.2, requires_grad=True)
+        y = torch.tensor(0.3, requires_grad=True)
+        z = torch.tensor(0.4)
+        t = Translate(x, y, z)
+        t._matrix.sum().backward()
+        self.assertTrue(hasattr(x, "grad"))
+        self.assertTrue(hasattr(y, "grad"))
+        self.assertTrue(torch.allclose(x.grad, x.new_ones(x.shape)))
+        self.assertTrue(torch.allclose(y.grad, y.new_ones(y.shape)))
+
+    def test_torch_vectors(self):
+        x = torch.tensor([0.2, 2.0])
+        y = torch.tensor([0.3, 3.0])
+        z = torch.tensor([0.4, 4.0])
+        t = Translate(x, y, z)
+        matrix = torch.tensor(
+            [
+                [
+                    [1.0, 0.0, 0.0, 0],
+                    [0.0, 1.0, 0.0, 0],
+                    [0.0, 0.0, 1.0, 0],
+                    [0.2, 0.3, 0.4, 1],
+                ],
+                [
+                    [1.0, 0.0, 0.0, 0],
+                    [0.0, 1.0, 0.0, 0],
+                    [0.0, 0.0, 1.0, 0],
+                    [2.0, 3.0, 4.0, 1],
+                ],
+            ],
+            dtype=torch.float32,
+        )
+        self.assertTrue(torch.allclose(t._matrix, matrix))
+
+    def test_vector_broadcast(self):
+        x = torch.tensor([0.2, 2.0])
+        y = torch.tensor([0.3, 3.0])
+        z = torch.tensor([0.4])
+        t = Translate(x, y, z)
+        matrix = torch.tensor(
+            [
+                [
+                    [1.0, 0.0, 0.0, 0],
+                    [0.0, 1.0, 0.0, 0],
+                    [0.0, 0.0, 1.0, 0],
+                    [0.2, 0.3, 0.4, 1],
+                ],
+                [
+                    [1.0, 0.0, 0.0, 0],
+                    [0.0, 1.0, 0.0, 0],
+                    [0.0, 0.0, 1.0, 0],
+                    [2.0, 3.0, 0.4, 1],
+                ],
+            ],
+            dtype=torch.float32,
+        )
+        self.assertTrue(torch.allclose(t._matrix, matrix))
+
+    def test_bad_broadcast(self):
+        x = torch.tensor([0.2, 2.0, 20.0])
+        y = torch.tensor([0.3, 3.0])
+        z = torch.tensor([0.4])
+        with self.assertRaises(ValueError):
+            Translate(x, y, z)
+
+    def test_mixed_broadcast(self):
+        x = 0.2
+        y = torch.tensor(0.3)
+        z = torch.tensor([0.4, 4.0])
+        t = Translate(x, y, z)
+        matrix = torch.tensor(
+            [
+                [
+                    [1.0, 0.0, 0.0, 0],
+                    [0.0, 1.0, 0.0, 0],
+                    [0.0, 0.0, 1.0, 0],
+                    [0.2, 0.3, 0.4, 1],
+                ],
+                [
+                    [1.0, 0.0, 0.0, 0],
+                    [0.0, 1.0, 0.0, 0],
+                    [0.0, 0.0, 1.0, 0],
+                    [0.2, 0.3, 4.0, 1],
+                ],
+            ],
+            dtype=torch.float32,
+        )
+        self.assertTrue(torch.allclose(t._matrix, matrix))
+
+    def test_mixed_broadcast_grad(self):
+        x = 0.2
+        y = torch.tensor(0.3, requires_grad=True)
+        z = torch.tensor([0.4, 4.0], requires_grad=True)
+        t = Translate(x, y, z)
+        t._matrix.sum().backward()
+        self.assertTrue(hasattr(y, "grad"))
+        self.assertTrue(hasattr(z, "grad"))
+        y_grad = torch.tensor(2.0)
+        z_grad = torch.tensor([1.0, 1.0])
+        self.assertEqual(y.grad.shape, y_grad.shape)
+        self.assertEqual(z.grad.shape, z_grad.shape)
+        self.assertTrue(torch.allclose(y.grad, y_grad))
+        self.assertTrue(torch.allclose(z.grad, z_grad))
+
+    def test_matrix(self):
+        xyz = torch.tensor([[0.2, 0.3, 0.4], [2.0, 3.0, 4.0]])
+        t = Translate(xyz)
+        matrix = torch.tensor(
+            [
+                [
+                    [1.0, 0.0, 0.0, 0],
+                    [0.0, 1.0, 0.0, 0],
+                    [0.0, 0.0, 1.0, 0],
+                    [0.2, 0.3, 0.4, 1],
+                ],
+                [
+                    [1.0, 0.0, 0.0, 0],
+                    [0.0, 1.0, 0.0, 0],
+                    [0.0, 0.0, 1.0, 0],
+                    [2.0, 3.0, 4.0, 1],
+                ],
+            ],
+            dtype=torch.float32,
+        )
+        self.assertTrue(torch.allclose(t._matrix, matrix))
+
+    def test_matrix_extra_args(self):
+        xyz = torch.tensor([[0.2, 0.3, 0.4], [2.0, 3.0, 4.0]])
+        with self.assertRaises(ValueError):
+            Translate(xyz, xyz[:, 1], xyz[:, 2])
+
+    def test_inverse(self):
+        xyz = torch.tensor([[0.2, 0.3, 0.4], [2.0, 3.0, 4.0]])
+        t = Translate(xyz)
+        im = t.inverse()._matrix
+        im_2 = t._matrix.inverse()
+        im_comp = t.get_matrix().inverse()
+        self.assertTrue(torch.allclose(im, im_comp))
+        self.assertTrue(torch.allclose(im, im_2))
+
+
+class TestScale(unittest.TestCase):
+    def test_single_python_scalar(self):
+        t = Scale(0.1)
+        matrix = torch.tensor(
+            [
+                [
+                    [0.1, 0.0, 0.0, 0.0],
+                    [0.0, 0.1, 0.0, 0.0],
+                    [0.0, 0.0, 0.1, 0.0],
+                    [0.0, 0.0, 0.0, 1.0],
+                ]
+            ],
+            dtype=torch.float32,
+        )
+        self.assertTrue(torch.allclose(t._matrix, matrix))
+
+    def test_single_torch_scalar(self):
+        t = Scale(torch.tensor(0.1))
+        matrix = torch.tensor(
+            [
+                [
+                    [0.1, 0.0, 0.0, 0.0],
+                    [0.0, 0.1, 0.0, 0.0],
+                    [0.0, 0.0, 0.1, 0.0],
+                    [0.0, 0.0, 0.0, 1.0],
+                ]
+            ],
+            dtype=torch.float32,
+        )
+        self.assertTrue(torch.allclose(t._matrix, matrix))
+
+    def test_single_vector(self):
+        t = Scale(torch.tensor([0.1, 0.2]))
+        matrix = torch.tensor(
+            [
+                [
+                    [0.1, 0.0, 0.0, 0.0],
+                    [0.0, 0.1, 0.0, 0.0],
+                    [0.0, 0.0, 0.1, 0.0],
+                    [0.0, 0.0, 0.0, 1.0],
+                ],
+                [
+                    [0.2, 0.0, 0.0, 0.0],
+                    [0.0, 0.2, 0.0, 0.0],
+                    [0.0, 0.0, 0.2, 0.0],
+                    [0.0, 0.0, 0.0, 1.0],
+                ],
+            ],
+            dtype=torch.float32,
+        )
+        self.assertTrue(torch.allclose(t._matrix, matrix))
+
+    def test_single_matrix(self):
+        xyz = torch.tensor([[0.1, 0.2, 0.3], [1.0, 2.0, 3.0]])
+        t = Scale(xyz)
+        matrix = torch.tensor(
+            [
+                [
+                    [0.1, 0.0, 0.0, 0.0],
+                    [0.0, 0.2, 0.0, 0.0],
+                    [0.0, 0.0, 0.3, 0.0],
+                    [0.0, 0.0, 0.0, 1.0],
+                ],
+                [
+                    [1.0, 0.0, 0.0, 0.0],
+                    [0.0, 2.0, 0.0, 0.0],
+                    [0.0, 0.0, 3.0, 0.0],
+                    [0.0, 0.0, 0.0, 1.0],
+                ],
+            ],
+            dtype=torch.float32,
+        )
+        self.assertTrue(torch.allclose(t._matrix, matrix))
+
+    def test_three_python_scalar(self):
+        t = Scale(0.1, 0.2, 0.3)
+        matrix = torch.tensor(
+            [
+                [
+                    [0.1, 0.0, 0.0, 0.0],
+                    [0.0, 0.2, 0.0, 0.0],
+                    [0.0, 0.0, 0.3, 0.0],
+                    [0.0, 0.0, 0.0, 1.0],
+                ]
+            ],
+            dtype=torch.float32,
+        )
+        self.assertTrue(torch.allclose(t._matrix, matrix))
+
+    def test_three_torch_scalar(self):
+        t = Scale(torch.tensor(0.1), torch.tensor(0.2), torch.tensor(0.3))
+        matrix = torch.tensor(
+            [
+                [
+                    [0.1, 0.0, 0.0, 0.0],
+                    [0.0, 0.2, 0.0, 0.0],
+                    [0.0, 0.0, 0.3, 0.0],
+                    [0.0, 0.0, 0.0, 1.0],
+                ]
+            ],
+            dtype=torch.float32,
+        )
+        self.assertTrue(torch.allclose(t._matrix, matrix))
+
+    def test_three_mixed_scalar(self):
+        t = Scale(torch.tensor(0.1), 0.2, torch.tensor(0.3))
+        matrix = torch.tensor(
+            [
+                [
+                    [0.1, 0.0, 0.0, 0.0],
+                    [0.0, 0.2, 0.0, 0.0],
+                    [0.0, 0.0, 0.3, 0.0],
+                    [0.0, 0.0, 0.0, 1.0],
+                ]
+            ],
+            dtype=torch.float32,
+        )
+        self.assertTrue(torch.allclose(t._matrix, matrix))
+
+    def test_three_vector_broadcast(self):
+        x = torch.tensor([0.1])
+        y = torch.tensor([0.2, 2.0])
+        z = torch.tensor([0.3, 3.0])
+        t = Scale(x, y, z)
+        matrix = torch.tensor(
+            [
+                [
+                    [0.1, 0.0, 0.0, 0.0],
+                    [0.0, 0.2, 0.0, 0.0],
+                    [0.0, 0.0, 0.3, 0.0],
+                    [0.0, 0.0, 0.0, 1.0],
+                ],
+                [
+                    [0.1, 0.0, 0.0, 0.0],
+                    [0.0, 2.0, 0.0, 0.0],
+                    [0.0, 0.0, 3.0, 0.0],
+                    [0.0, 0.0, 0.0, 1.0],
+                ],
+            ],
+            dtype=torch.float32,
+        )
+        self.assertTrue(torch.allclose(t._matrix, matrix))
+
+    def test_three_mixed_broadcast_grad(self):
+        x = 0.1
+        y = torch.tensor(0.2, requires_grad=True)
+        z = torch.tensor([0.3, 3.0], requires_grad=True)
+        t = Scale(x, y, z)
+        matrix = torch.tensor(
+            [
+                [
+                    [0.1, 0.0, 0.0, 0.0],
+                    [0.0, 0.2, 0.0, 0.0],
+                    [0.0, 0.0, 0.3, 0.0],
+                    [0.0, 0.0, 0.0, 1.0],
+                ],
+                [
+                    [0.1, 0.0, 0.0, 0.0],
+                    [0.0, 0.2, 0.0, 0.0],
+                    [0.0, 0.0, 3.0, 0.0],
+                    [0.0, 0.0, 0.0, 1.0],
+                ],
+            ],
+            dtype=torch.float32,
+        )
+        self.assertTrue(torch.allclose(t._matrix, matrix))
+        t._matrix.sum().backward()
+        self.assertTrue(hasattr(y, "grad"))
+        self.assertTrue(hasattr(z, "grad"))
+        y_grad = torch.tensor(2.0)
+        z_grad = torch.tensor([1.0, 1.0])
+        self.assertTrue(torch.allclose(y.grad, y_grad))
+        self.assertTrue(torch.allclose(z.grad, z_grad))
+
+    def test_inverse(self):
+        x = torch.tensor([0.1])
+        y = torch.tensor([0.2, 2.0])
+        z = torch.tensor([0.3, 3.0])
+        t = Scale(x, y, z)
+        im = t.inverse()._matrix
+        im_2 = t._matrix.inverse()
+        im_comp = t.get_matrix().inverse()
+        self.assertTrue(torch.allclose(im, im_comp))
+        self.assertTrue(torch.allclose(im, im_2))
+
+
+class TestTransformBroadcast(unittest.TestCase):
+    def test_broadcast_transform_points(self):
+        t1 = Scale(0.1, 0.1, 0.1)
+        N = 10
+        P = 7
+        M = 20
+        x = torch.tensor([0.2] * N)
+        y = torch.tensor([0.3] * N)
+        z = torch.tensor([0.4] * N)
+        tN = Translate(x, y, z)
+        p1 = t1.transform_points(torch.randn(P, 3))
+        self.assertTrue(p1.shape == (P, 3))
+        p2 = t1.transform_points(torch.randn(1, P, 3))
+        self.assertTrue(p2.shape == (1, P, 3))
+        p3 = t1.transform_points(torch.randn(M, P, 3))
+        self.assertTrue(p3.shape == (M, P, 3))
+        p4 = tN.transform_points(torch.randn(P, 3))
+        self.assertTrue(p4.shape == (N, P, 3))
+        p5 = tN.transform_points(torch.randn(1, P, 3))
+        self.assertTrue(p5.shape == (N, P, 3))
+
+    def test_broadcast_transform_normals(self):
+        t1 = Scale(0.1, 0.1, 0.1)
+        N = 10
+        P = 7
+        M = 20
+        x = torch.tensor([0.2] * N)
+        y = torch.tensor([0.3] * N)
+        z = torch.tensor([0.4] * N)
+        tN = Translate(x, y, z)
+        p1 = t1.transform_normals(torch.randn(P, 3))
+        self.assertTrue(p1.shape == (P, 3))
+        p2 = t1.transform_normals(torch.randn(1, P, 3))
+        self.assertTrue(p2.shape == (1, P, 3))
+        p3 = t1.transform_normals(torch.randn(M, P, 3))
+        self.assertTrue(p3.shape == (M, P, 3))
+        p4 = tN.transform_normals(torch.randn(P, 3))
+        self.assertTrue(p4.shape == (N, P, 3))
+        p5 = tN.transform_normals(torch.randn(1, P, 3))
+        self.assertTrue(p5.shape == (N, P, 3))
+
+    def test_broadcast_compose(self):
+        t1 = Scale(0.1, 0.1, 0.1)
+        N = 10
+        scale_n = torch.tensor([0.3] * N)
+        tN = Scale(scale_n)
+        t1N = t1.compose(tN)
+        self.assertTrue(t1._matrix.shape == (1, 4, 4))
+        self.assertTrue(tN._matrix.shape == (N, 4, 4))
+        self.assertTrue(t1N.get_matrix().shape == (N, 4, 4))
+        t11 = t1.compose(t1)
+        self.assertTrue(t11.get_matrix().shape == (1, 4, 4))
+
+    def test_broadcast_compose_fail(self):
+        # Cannot compose two transforms which have batch dimensions N and M
+        # other than the case where either N or M is 1
+        N = 10
+        M = 20
+        scale_n = torch.tensor([0.3] * N)
+        tN = Scale(scale_n)
+        x = torch.tensor([0.2] * M)
+        y = torch.tensor([0.3] * M)
+        z = torch.tensor([0.4] * M)
+        tM = Translate(x, y, z)
+        t = tN.compose(tM)
+        with self.assertRaises(ValueError):
+            t.get_matrix()
+
+    def test_multiple_broadcast_compose(self):
+        t1 = Scale(0.1, 0.1, 0.1)
+        t2 = Scale(0.2, 0.2, 0.2)
+        N = 10
+        scale_n = torch.tensor([0.3] * N)
+        tN = Scale(scale_n)
+        t1N2 = t1.compose(tN.compose(t2))
+        composed_mat = t1N2.get_matrix()
+        self.assertTrue(composed_mat.shape == (N, 4, 4))
+        expected_mat = torch.eye(3, dtype=torch.float32) * 0.3 * 0.2 * 0.1
+        self.assertTrue(torch.allclose(composed_mat[0, :3, :3], expected_mat))
+
+
+class TestRotate(unittest.TestCase):
+    def test_single_matrix(self):
+        R = torch.eye(3)
+        t = Rotate(R)
+        matrix = torch.tensor(
+            [
+                [
+                    [1.0, 0.0, 0.0, 0.0],
+                    [0.0, 1.0, 0.0, 0.0],
+                    [0.0, 0.0, 1.0, 0.0],
+                    [0.0, 0.0, 0.0, 1.0],
+                ]
+            ],
+            dtype=torch.float32,
+        )
+        self.assertTrue(torch.allclose(t._matrix, matrix))
+
+    def test_invalid_dimensions(self):
+        R = torch.eye(4)
+        with self.assertRaises(ValueError):
+            Rotate(R)
+
+    def test_inverse(self, batch_size=5):
+        device = torch.device("cuda:0")
+        log_rot = torch.randn((batch_size, 3), dtype=torch.float32, device=device)
+        R = so3_exp_map(log_rot)
+        t = Rotate(R)
+        im = t.inverse()._matrix
+        im_2 = t._matrix.inverse()
+        im_comp = t.get_matrix().inverse()
+        self.assertTrue(torch.allclose(im, im_comp, atol=1e-4))
+        self.assertTrue(torch.allclose(im, im_2, atol=1e-4))
+
+
+class TestRotateAxisAngle(unittest.TestCase):
+    def test_rotate_x_python_scalar(self):
+        t = RotateAxisAngle(angle=90, axis="X")
+        # fmt: off
+        matrix = torch.tensor(
+            [
+                [
+                    [1.0,  0.0, 0.0, 0.0],  # noqa: E241, E201
+                    [0.0,  0.0, 1.0, 0.0],  # noqa: E241, E201
+                    [0.0, -1.0, 0.0, 0.0],  # noqa: E241, E201
+                    [0.0,  0.0, 0.0, 1.0],  # noqa: E241, E201
+                ]
+            ],
+            dtype=torch.float32,
+        )
+        # fmt: on
+        points = torch.tensor([0.0, 1.0, 0.0])[None, None, :]  # (1, 1, 3)
+        transformed_points = t.transform_points(points)
+        expected_points = torch.tensor([0.0, 0.0, 1.0])
+        self.assertTrue(
+            torch.allclose(transformed_points.squeeze(), expected_points, atol=1e-7)
+        )
+        self.assertTrue(torch.allclose(t._matrix, matrix, atol=1e-7))
+
+    def test_rotate_x_torch_scalar(self):
+        angle = torch.tensor(90.0)
+        t = RotateAxisAngle(angle=angle, axis="X")
+        # fmt: off
+        matrix = torch.tensor(
+            [
+                [
+                    [1.0,  0.0, 0.0, 0.0],  # noqa: E241, E201
+                    [0.0,  0.0, 1.0, 0.0],  # noqa: E241, E201
+                    [0.0, -1.0, 0.0, 0.0],  # noqa: E241, E201
+                    [0.0,  0.0, 0.0, 1.0],  # noqa: E241, E201
+                ]
+            ],
+            dtype=torch.float32,
+        )
+        # fmt: on
+        points = torch.tensor([0.0, 1.0, 0.0])[None, None, :]  # (1, 1, 3)
+        transformed_points = t.transform_points(points)
+        expected_points = torch.tensor([0.0, 0.0, 1.0])
+        self.assertTrue(
+            torch.allclose(transformed_points.squeeze(), expected_points, atol=1e-7)
+        )
+        self.assertTrue(torch.allclose(t._matrix, matrix, atol=1e-7))
+
+    def test_rotate_x_torch_tensor(self):
+        angle = torch.tensor([0, 45.0, 90.0])  # (N)
+        t = RotateAxisAngle(angle=angle, axis="X")
+        r2_i = 1 / math.sqrt(2)
+        r2_2 = math.sqrt(2) / 2
+        # fmt: off
+        matrix = torch.tensor(
+            [
+                [
+                    [1.0, 0.0, 0.0, 0.0],
+                    [0.0, 1.0, 0.0, 0.0],
+                    [0.0, 0.0, 1.0, 0.0],
+                    [0.0, 0.0, 0.0, 1.0],
+                ],
+                [
+                    [1.0,   0.0,  0.0, 0.0],  # noqa: E241, E201
+                    [0.0,  r2_2, r2_i, 0.0],  # noqa: E241, E201
+                    [0.0, -r2_i, r2_2, 0.0],  # noqa: E241, E201
+                    [0.0,   0.0,  0.0, 1.0],  # noqa: E241, E201
+                ],
+                [
+                    [1.0,  0.0, 0.0,  0.0],   # noqa: E241, E201
+                    [0.0,  0.0, 1.0,  0.0],   # noqa: E241, E201
+                    [0.0, -1.0, 0.0,  0.0],   # noqa: E241, E201
+                    [0.0,  0.0, 0.0,  1.0],   # noqa: E241, E201
+                ]
+            ],
+            dtype=torch.float32,
+        )
+        # fmt: on
+        self.assertTrue(torch.allclose(t._matrix, matrix, atol=1e-7))
+        angle = angle
+        t = RotateAxisAngle(angle=angle, axis="X")
+        self.assertTrue(torch.allclose(t._matrix, matrix, atol=1e-7))
+
+    def test_rotate_y_python_scalar(self):
+        t = RotateAxisAngle(angle=90, axis="Y")
+        # fmt: off
+        matrix = torch.tensor(
+            [
+                [
+                    [0.0, 0.0, -1.0, 0.0],  # noqa: E241, E201
+                    [0.0, 1.0,  0.0, 0.0],  # noqa: E241, E201
+                    [1.0, 0.0,  0.0, 0.0],  # noqa: E241, E201
+                    [0.0, 0.0,  0.0, 1.0],  # noqa: E241, E201
+                ]
+            ],
+            dtype=torch.float32,
+        )
+        # fmt: on
+        points = torch.tensor([1.0, 0.0, 0.0])[None, None, :]  # (1, 1, 3)
+        transformed_points = t.transform_points(points)
+        expected_points = torch.tensor([0.0, 0.0, -1.0])
+        self.assertTrue(
+            torch.allclose(transformed_points.squeeze(), expected_points, atol=1e-7)
+        )
+        self.assertTrue(torch.allclose(t._matrix, matrix, atol=1e-7))
+
+    def test_rotate_y_torch_scalar(self):
+        """
+        Test rotation about Y axis. With a right hand coordinate system this
+        should result in a vector pointing along the x-axis being rotated to
+        point along the negative z axis.
+        """
+        angle = torch.tensor(90.0)
+        t = RotateAxisAngle(angle=angle, axis="Y")
+        # fmt: off
+        matrix = torch.tensor(
+            [
+                [
+                    [0.0, 0.0, -1.0, 0.0],  # noqa: E241, E201
+                    [0.0, 1.0,  0.0, 0.0],  # noqa: E241, E201
+                    [1.0, 0.0,  0.0, 0.0],  # noqa: E241, E201
+                    [0.0, 0.0,  0.0, 1.0],  # noqa: E241, E201
+                ]
+            ],
+            dtype=torch.float32,
+        )
+        # fmt: on
+        points = torch.tensor([1.0, 0.0, 0.0])[None, None, :]  # (1, 1, 3)
+        transformed_points = t.transform_points(points)
+        expected_points = torch.tensor([0.0, 0.0, -1.0])
+        self.assertTrue(
+            torch.allclose(transformed_points.squeeze(), expected_points, atol=1e-7)
+        )
+        self.assertTrue(torch.allclose(t._matrix, matrix, atol=1e-7))
+
+    def test_rotate_y_torch_tensor(self):
+        angle = torch.tensor([0, 45.0, 90.0])
+        t = RotateAxisAngle(angle=angle, axis="Y")
+        r2_i = 1 / math.sqrt(2)
+        r2_2 = math.sqrt(2) / 2
+        # fmt: off
+        matrix = torch.tensor(
+            [
+                [
+                    [1.0, 0.0, 0.0, 0.0],
+                    [0.0, 1.0, 0.0, 0.0],
+                    [0.0, 0.0, 1.0, 0.0],
+                    [0.0, 0.0, 0.0, 1.0],
+                ],
+                [
+                    [r2_2,  0.0, -r2_i, 0.0],  # noqa: E241, E201
+                    [ 0.0,  1.0,   0.0, 0.0],  # noqa: E241, E201
+                    [r2_i,  0.0,  r2_2, 0.0],  # noqa: E241, E201
+                    [ 0.0,  0.0,   0.0, 1.0],  # noqa: E241, E201
+                ],
+                [
+                    [0.0,  0.0, -1.0, 0.0],  # noqa: E241, E201
+                    [0.0,  1.0,  0.0, 0.0],  # noqa: E241, E201
+                    [1.0,  0.0,  0.0, 0.0],  # noqa: E241, E201
+                    [0.0,  0.0,  0.0, 1.0],  # noqa: E241, E201
+                ]
+            ],
+            dtype=torch.float32,
+        )
+        # fmt: on
+        self.assertTrue(torch.allclose(t._matrix, matrix, atol=1e-7))
+
+    def test_rotate_z_python_scalar(self):
+        t = RotateAxisAngle(angle=90, axis="Z")
+        # fmt: off
+        matrix = torch.tensor(
+            [
+                [
+                    [ 0.0, 1.0, 0.0, 0.0],  # noqa: E241, E201
+                    [-1.0, 0.0, 0.0, 0.0],  # noqa: E241, E201
+                    [ 0.0, 0.0, 1.0, 0.0],  # noqa: E241, E201
+                    [ 0.0, 0.0, 0.0, 1.0],  # noqa: E241, E201
+                ]
+            ],
+            dtype=torch.float32,
+        )
+        # fmt: on
+        points = torch.tensor([1.0, 0.0, 0.0])[None, None, :]  # (1, 1, 3)
+        transformed_points = t.transform_points(points)
+        expected_points = torch.tensor([0.0, 1.0, 0.0])
+        self.assertTrue(
+            torch.allclose(transformed_points.squeeze(), expected_points, atol=1e-7)
+        )
+        self.assertTrue(torch.allclose(t._matrix, matrix, atol=1e-7))
+
+    def test_rotate_z_torch_scalar(self):
+        angle = torch.tensor(90.0)
+        t = RotateAxisAngle(angle=angle, axis="Z")
+        # fmt: off
+        matrix = torch.tensor(
+            [
+                [
+                    [ 0.0, 1.0, 0.0, 0.0],  # noqa: E241, E201
+                    [-1.0, 0.0, 0.0, 0.0],  # noqa: E241, E201
+                    [ 0.0, 0.0, 1.0, 0.0],  # noqa: E241, E201
+                    [ 0.0, 0.0, 0.0, 1.0],  # noqa: E241, E201
+                ]
+            ],
+            dtype=torch.float32,
+        )
+        # fmt: on
+        points = torch.tensor([1.0, 0.0, 0.0])[None, None, :]  # (1, 1, 3)
+        transformed_points = t.transform_points(points)
+        expected_points = torch.tensor([0.0, 1.0, 0.0])
+        self.assertTrue(
+            torch.allclose(transformed_points.squeeze(), expected_points, atol=1e-7)
+        )
+        self.assertTrue(torch.allclose(t._matrix, matrix, atol=1e-7))
+
+    def test_rotate_z_torch_tensor(self):
+        angle = torch.tensor([0, 45.0, 90.0])
+        t = RotateAxisAngle(angle=angle, axis="Z")
+        r2_i = 1 / math.sqrt(2)
+        r2_2 = math.sqrt(2) / 2
+        # fmt: off
+        matrix = torch.tensor(
+            [
+                [
+                    [1.0, 0.0, 0.0, 0.0],
+                    [0.0, 1.0, 0.0, 0.0],
+                    [0.0, 0.0, 1.0, 0.0],
+                    [0.0, 0.0, 0.0, 1.0],
+                ],
+                [
+                    [ r2_2,   r2_i,  0.0, 0.0],  # noqa: E241, E201
+                    [-r2_i,   r2_2,  0.0, 0.0],  # noqa: E241, E201
+                    [  0.0,    0.0,  1.0, 0.0],  # noqa: E241, E201
+                    [  0.0,    0.0,  0.0, 1.0],  # noqa: E241, E201
+                ],
+                [
+                    [ 0.0,  1.0, 0.0, 0.0],  # noqa: E241, E201
+                    [-1.0,  0.0, 0.0, 0.0],  # noqa: E241, E201
+                    [ 0.0,  0.0, 1.0, 0.0],  # noqa: E241, E201
+                    [ 0.0,  0.0, 0.0, 1.0],  # noqa: E241, E201
+                ]
+            ],
+            dtype=torch.float32,
+        )
+        # fmt: on
+        self.assertTrue(torch.allclose(t._matrix, matrix, atol=1e-7))
+
+    def test_rotate_compose_x_y_z(self):
+        angle = torch.tensor(90.0)
+        t1 = RotateAxisAngle(angle=angle, axis="X")
+        t2 = RotateAxisAngle(angle=angle, axis="Y")
+        t3 = RotateAxisAngle(angle=angle, axis="Z")
+        t = t1.compose(t2, t3)
+        # fmt: off
+        matrix1 = torch.tensor(
+            [
+                [
+                    [1.0,  0.0, 0.0, 0.0],  # noqa: E241, E201
+                    [0.0,  0.0, 1.0, 0.0],  # noqa: E241, E201
+                    [0.0, -1.0, 0.0, 0.0],  # noqa: E241, E201
+                    [0.0,  0.0, 0.0, 1.0],  # noqa: E241, E201
+                ]
+            ],
+            dtype=torch.float32,
+        )
+        matrix2 = torch.tensor(
+            [
+                [
+                    [0.0, 0.0, -1.0, 0.0],  # noqa: E241, E201
+                    [0.0, 1.0,  0.0, 0.0],  # noqa: E241, E201
+                    [1.0, 0.0,  0.0, 0.0],  # noqa: E241, E201
+                    [0.0, 0.0,  0.0, 1.0],  # noqa: E241, E201
+                ]
+            ],
+            dtype=torch.float32,
+        )
+        matrix3 = torch.tensor(
+            [
+                [
+                    [ 0.0, 1.0, 0.0, 0.0],  # noqa: E241, E201
+                    [-1.0, 0.0, 0.0, 0.0],  # noqa: E241, E201
+                    [ 0.0, 0.0, 1.0, 0.0],  # noqa: E241, E201
+                    [ 0.0, 0.0, 0.0, 1.0],  # noqa: E241, E201
+                ]
+            ],
+            dtype=torch.float32,
+        )
+        # fmt: on
+        # order of transforms is t1 -> t2
+        matrix = torch.matmul(matrix1, torch.matmul(matrix2, matrix3))
+        composed_matrix = t.get_matrix()
+        self.assertTrue(torch.allclose(composed_matrix, matrix, atol=1e-7))
+
+    def test_rotate_angle_radians(self):
+        t = RotateAxisAngle(angle=math.pi / 2, degrees=False, axis="Z")
+        # fmt: off
+        matrix = torch.tensor(
+            [
+                [
+                    [ 0.0, 1.0, 0.0, 0.0],  # noqa: E241, E201
+                    [-1.0, 0.0, 0.0, 0.0],  # noqa: E241, E201
+                    [ 0.0, 0.0, 1.0, 0.0],  # noqa: E241, E201
+                    [ 0.0, 0.0, 0.0, 1.0],  # noqa: E241, E201
+                ]
+            ],
+            dtype=torch.float32,
+        )
+        # fmt: on
+        self.assertTrue(torch.allclose(t._matrix, matrix, atol=1e-7))
+
+    def test_lower_case_axis(self):
+        t = RotateAxisAngle(angle=90.0, axis="z")
+        # fmt: off
+        matrix = torch.tensor(
+            [
+                [
+                    [ 0.0, 1.0, 0.0, 0.0],  # noqa: E241, E201
+                    [-1.0, 0.0, 0.0, 0.0],  # noqa: E241, E201
+                    [ 0.0, 0.0, 1.0, 0.0],  # noqa: E241, E201
+                    [ 0.0, 0.0, 0.0, 1.0],  # noqa: E241, E201
+                ]
+            ],
+            dtype=torch.float32,
+        )
+        # fmt: on
+        self.assertTrue(torch.allclose(t._matrix, matrix, atol=1e-7))
+
+    def test_axis_fail(self):
+        with self.assertRaises(ValueError):
+            RotateAxisAngle(angle=90.0, axis="P")
+
+    def test_rotate_angle_fail(self):
+        angle = torch.tensor([[0, 45.0, 90.0], [0, 45.0, 90.0]])
+        with self.assertRaises(ValueError):
+            RotateAxisAngle(angle=angle, axis="X")
diff --git a/pytorch3d/tests/test_vert_align.py b/pytorch3d/tests/test_vert_align.py
new file mode 100644
index 0000000000000000000000000000000000000000..c264521c4e923c385ca526bcfa517c939b15a7e5
--- /dev/null
+++ b/pytorch3d/tests/test_vert_align.py
@@ -0,0 +1,194 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import unittest
+
+import torch
+import torch.nn.functional as F
+from pytorch3d.ops.vert_align import vert_align
+from pytorch3d.structures.meshes import Meshes
+from pytorch3d.structures.pointclouds import Pointclouds
+
+from .common_testing import TestCaseMixin
+
+
+class TestVertAlign(TestCaseMixin, unittest.TestCase):
+    @staticmethod
+    def vert_align_naive(
+        feats, verts, return_packed: bool = False, align_corners: bool = True
+    ):
+        """
+        Naive implementation of vert_align.
+        """
+        if torch.is_tensor(feats):
+            feats = [feats]
+        N = feats[0].shape[0]
+
+        out_feats = []
+        # sample every example in the batch separately
+        for i in range(N):
+            out_i_feats = []
+            for feat in feats:
+                feats_i = feat[i][None, :, :, :]  # (1, C, H, W)
+                if torch.is_tensor(verts):
+                    grid = verts[i][None, None, :, :2]  # (1, 1, V, 2)
+                elif hasattr(verts, "verts_list"):
+                    grid = verts.verts_list()[i][None, None, :, :2]  # (1, 1, V, 2)
+                elif hasattr(verts, "points_list"):
+                    grid = verts.points_list()[i][None, None, :, :2]  # (1, 1, V, 2)
+                else:
+                    raise ValueError("verts_or_meshes is invalid")
+                feat_sampled_i = F.grid_sample(
+                    feats_i,
+                    grid,
+                    mode="bilinear",
+                    padding_mode="zeros",
+                    align_corners=align_corners,
+                )  # (1, C, 1, V)
+                feat_sampled_i = feat_sampled_i.squeeze(2).squeeze(0)  # (C, V)
+                feat_sampled_i = feat_sampled_i.transpose(1, 0)  # (V, C)
+                out_i_feats.append(feat_sampled_i)
+            out_i_feats = torch.cat(out_i_feats, 1)  # (V, sum(C))
+            out_feats.append(out_i_feats)
+
+        if return_packed:
+            out_feats = torch.cat(out_feats, 0)  # (sum(V), sum(C))
+        else:
+            out_feats = torch.stack(out_feats, 0)  # (N, V, sum(C))
+        return out_feats
+
+    @staticmethod
+    def init_meshes(
+        num_meshes: int = 10, num_verts: int = 1000, num_faces: int = 3000
+    ) -> Meshes:
+        device = torch.device("cuda:0")
+        verts_list = []
+        faces_list = []
+        for _ in range(num_meshes):
+            verts = (
+                torch.rand((num_verts, 3), dtype=torch.float32, device=device) * 2.0
+                - 1.0
+            )  # verts in the space of [-1, 1]
+            faces = torch.randint(
+                num_verts, size=(num_faces, 3), dtype=torch.int64, device=device
+            )
+            verts_list.append(verts)
+            faces_list.append(faces)
+        meshes = Meshes(verts_list, faces_list)
+
+        return meshes
+
+    @staticmethod
+    def init_pointclouds(num_clouds: int = 10, num_points: int = 1000) -> Pointclouds:
+        device = torch.device("cuda:0")
+        points_list = []
+        for _ in range(num_clouds):
+            points = (
+                torch.rand((num_points, 3), dtype=torch.float32, device=device) * 2.0
+                - 1.0
+            )  # points in the space of [-1, 1]
+            points_list.append(points)
+        pointclouds = Pointclouds(points=points_list)
+
+        return pointclouds
+
+    @staticmethod
+    def init_feats(batch_size: int = 10, num_channels: int = 256, device: str = "cuda"):
+        H, W = [14, 28], [14, 28]
+        feats = []
+        for (h, w) in zip(H, W):
+            feats.append(torch.rand((batch_size, num_channels, h, w), device=device))
+        return feats
+
+    def test_vert_align_with_meshes(self):
+        """
+        Test vert align vs naive implementation with meshes.
+        """
+        meshes = TestVertAlign.init_meshes(10, 1000, 3000)
+        feats = TestVertAlign.init_feats(10, 256)
+
+        # feats in list
+        out = vert_align(feats, meshes, return_packed=True)
+        naive_out = TestVertAlign.vert_align_naive(feats, meshes, return_packed=True)
+        self.assertClose(out, naive_out)
+
+        # feats as tensor
+        out = vert_align(feats[0], meshes, return_packed=True)
+        naive_out = TestVertAlign.vert_align_naive(feats[0], meshes, return_packed=True)
+        self.assertClose(out, naive_out)
+
+    def test_vert_align_with_pointclouds(self):
+        """
+        Test vert align vs naive implementation with meshes.
+        """
+        pointclouds = TestVertAlign.init_pointclouds(10, 1000)
+        feats = TestVertAlign.init_feats(10, 256)
+
+        # feats in list
+        out = vert_align(feats, pointclouds, return_packed=True)
+        naive_out = TestVertAlign.vert_align_naive(
+            feats, pointclouds, return_packed=True
+        )
+        self.assertClose(out, naive_out)
+
+        # feats as tensor
+        out = vert_align(feats[0], pointclouds, return_packed=True)
+        naive_out = TestVertAlign.vert_align_naive(
+            feats[0], pointclouds, return_packed=True
+        )
+        self.assertClose(out, naive_out)
+
+    def test_vert_align_with_verts(self):
+        """
+        Test vert align vs naive implementation with verts as tensor.
+        """
+        feats = TestVertAlign.init_feats(10, 256)
+        verts = (
+            torch.rand((10, 100, 3), dtype=torch.float32, device=feats[0].device) * 2.0
+            - 1.0
+        )
+
+        # feats in list
+        out = vert_align(feats, verts, return_packed=True)
+        naive_out = TestVertAlign.vert_align_naive(feats, verts, return_packed=True)
+        self.assertClose(out, naive_out)
+
+        # feats as tensor
+        out = vert_align(feats[0], verts, return_packed=True)
+        naive_out = TestVertAlign.vert_align_naive(feats[0], verts, return_packed=True)
+        self.assertClose(out, naive_out)
+
+        out2 = vert_align(feats[0], verts, return_packed=True, align_corners=False)
+        naive_out2 = TestVertAlign.vert_align_naive(
+            feats[0], verts, return_packed=True, align_corners=False
+        )
+        self.assertFalse(torch.allclose(out, out2))
+        self.assertTrue(torch.allclose(out2, naive_out2))
+
+    @staticmethod
+    def vert_align_with_init(
+        num_meshes: int, num_verts: int, num_faces: int, device: str = "cpu"
+    ):
+        device = torch.device(device)
+        verts_list = []
+        faces_list = []
+        for _ in range(num_meshes):
+            verts = torch.rand((num_verts, 3), dtype=torch.float32, device=device)
+            faces = torch.randint(
+                num_verts, size=(num_faces, 3), dtype=torch.int64, device=device
+            )
+            verts_list.append(verts)
+            faces_list.append(faces)
+        meshes = Meshes(verts_list, faces_list)
+        feats = TestVertAlign.init_feats(num_meshes, device=device)
+        torch.cuda.synchronize()
+
+        def sample_features():
+            vert_align(feats, meshes, return_packed=True)
+            torch.cuda.synchronize()
+
+        return sample_features
diff --git a/pytorch3d/tests/test_vis.py b/pytorch3d/tests/test_vis.py
new file mode 100644
index 0000000000000000000000000000000000000000..00a3abe4a583487982f707250855093f0accbd65
--- /dev/null
+++ b/pytorch3d/tests/test_vis.py
@@ -0,0 +1,74 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+
+import torch
+from pytorch3d.renderer import HeterogeneousRayBundle, PerspectiveCameras, RayBundle
+from pytorch3d.structures import Meshes, Pointclouds
+from pytorch3d.transforms import random_rotations
+
+# Some of these imports are only needed for testing code coverage
+from pytorch3d.vis import (  # noqa: F401
+    get_camera_wireframe,  # noqa: F401
+    plot_batch_individually,  # noqa: F401
+    plot_scene,
+    texturesuv_image_PIL,  # noqa: F401
+)
+
+
+class TestPlotlyVis(unittest.TestCase):
+    def test_plot_scene(
+        self,
+        B: int = 3,
+        n_rays: int = 128,
+        n_pts_per_ray: int = 32,
+        n_verts: int = 32,
+        n_edges: int = 64,
+        n_pts: int = 256,
+    ):
+        """
+        Tests plotting of all supported structures using plot_scene.
+        """
+        for device in ["cpu", "cuda:0"]:
+            plot_scene(
+                {
+                    "scene": {
+                        "ray_bundle": RayBundle(
+                            origins=torch.randn(B, n_rays, 3, device=device),
+                            xys=torch.randn(B, n_rays, 2, device=device),
+                            directions=torch.randn(B, n_rays, 3, device=device),
+                            lengths=torch.randn(
+                                B, n_rays, n_pts_per_ray, device=device
+                            ),
+                        ),
+                        "heterogeneous_ray_bundle": HeterogeneousRayBundle(
+                            origins=torch.randn(B * n_rays, 3, device=device),
+                            xys=torch.randn(B * n_rays, 2, device=device),
+                            directions=torch.randn(B * n_rays, 3, device=device),
+                            lengths=torch.randn(
+                                B * n_rays, n_pts_per_ray, device=device
+                            ),
+                            camera_ids=torch.randint(
+                                low=0, high=B, size=(B * n_rays,), device=device
+                            ),
+                        ),
+                        "camera": PerspectiveCameras(
+                            R=random_rotations(B, device=device),
+                            T=torch.randn(B, 3, device=device),
+                        ),
+                        "mesh": Meshes(
+                            verts=torch.randn(B, n_verts, 3, device=device),
+                            faces=torch.randint(
+                                low=0, high=n_verts, size=(B, n_edges, 3), device=device
+                            ),
+                        ),
+                        "point_clouds": Pointclouds(
+                            points=torch.randn(B, n_pts, 3, device=device),
+                        ),
+                    }
+                }
+            )
diff --git a/pytorch3d/tests/test_volumes.py b/pytorch3d/tests/test_volumes.py
new file mode 100644
index 0000000000000000000000000000000000000000..76a30413f9e7439c7913e4e557d56d63e94c1543
--- /dev/null
+++ b/pytorch3d/tests/test_volumes.py
@@ -0,0 +1,987 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import copy
+import itertools
+import random
+import unittest
+
+import numpy as np
+import torch
+from pytorch3d.structures.volumes import VolumeLocator, Volumes
+from pytorch3d.transforms import Scale
+
+from .common_testing import TestCaseMixin
+
+
+class TestVolumes(TestCaseMixin, unittest.TestCase):
+    def setUp(self) -> None:
+        np.random.seed(42)
+        torch.manual_seed(42)
+        random.seed(42)
+
+    @staticmethod
+    def _random_volume_list(
+        num_volumes, min_size, max_size, num_channels, device, rand_sizes=None
+    ):
+        """
+        Init a list of `num_volumes` random tensors of size [num_channels, *rand_size].
+        If `rand_sizes` is None, rand_size is a 3D long vector sampled
+        from [min_size, max_size]. Otherwise, rand_size should be a list
+        [rand_size_1, rand_size_2, ..., rand_size_num_volumes] where each
+        `rand_size_i` denotes the size of the corresponding `i`-th tensor.
+        """
+        if rand_sizes is None:
+            rand_sizes = [
+                [random.randint(min_size, vs) for vs in max_size]
+                for _ in range(num_volumes)
+            ]
+
+        volume_list = [
+            torch.randn(
+                size=[num_channels, *rand_size], device=device, dtype=torch.float32
+            )
+            for rand_size in rand_sizes
+        ]
+
+        return volume_list, rand_sizes
+
+    def _check_indexed_volumes(self, v, selected, indices):
+        for selectedIdx, index in indices:
+            self.assertClose(selected.densities()[selectedIdx], v.densities()[index])
+            self.assertClose(
+                v.locator._local_to_world_transform.get_matrix()[index],
+                selected.locator._local_to_world_transform.get_matrix()[selectedIdx],
+            )
+            if selected.features() is not None:
+                self.assertClose(selected.features()[selectedIdx], v.features()[index])
+
+    def test_get_item(
+        self,
+        num_volumes=5,
+        num_channels=4,
+        volume_size=(10, 13, 8),
+        dtype=torch.float32,
+    ):
+
+        device = torch.device("cuda:0")
+
+        # make sure we have at least 3 volumes to prevent indexing crash
+        num_volumes = max(num_volumes, 3)
+
+        features = torch.randn(
+            size=[num_volumes, num_channels, *volume_size],
+            device=device,
+            dtype=torch.float32,
+        )
+        densities = torch.randn(
+            size=[num_volumes, 1, *volume_size], device=device, dtype=torch.float32
+        )
+
+        features_list, rand_sizes = TestVolumes._random_volume_list(
+            num_volumes, 3, volume_size, num_channels, device
+        )
+        densities_list, _ = TestVolumes._random_volume_list(
+            num_volumes, 3, volume_size, 1, device, rand_sizes=rand_sizes
+        )
+
+        volume_translation = -torch.randn(num_volumes, 3).type_as(features)
+        voxel_size = torch.rand(num_volumes, 1).type_as(features) + 0.5
+
+        for features_, densities_ in zip(
+            (None, features, features_list), (densities, densities, densities_list)
+        ):
+
+            # init the volume structure
+            v = Volumes(
+                features=features_,
+                densities=densities_,
+                volume_translation=volume_translation,
+                voxel_size=voxel_size,
+            )
+
+            # int index
+            index = 1
+            v_selected = v[index]
+            self.assertEqual(len(v_selected), 1)
+            self._check_indexed_volumes(v, v_selected, [(0, 1)])
+
+            # list index
+            index = [1, 2]
+            v_selected = v[index]
+            self.assertEqual(len(v_selected), len(index))
+            self._check_indexed_volumes(v, v_selected, enumerate(index))
+
+            # slice index
+            index = slice(0, 2, 1)
+            v_selected = v[0:2]
+            self.assertEqual(len(v_selected), 2)
+            self._check_indexed_volumes(v, v_selected, [(0, 0), (1, 1)])
+
+            # bool tensor
+            index = (torch.rand(num_volumes) > 0.5).to(device)
+            index[:2] = True  # make sure smth is selected
+            v_selected = v[index]
+            self.assertEqual(len(v_selected), index.sum())
+            self._check_indexed_volumes(
+                v,
+                v_selected,
+                zip(
+                    torch.arange(index.sum()),
+                    torch.nonzero(index, as_tuple=False).squeeze(),
+                ),
+            )
+
+            # int tensor
+            index = torch.tensor([1, 2], dtype=torch.int64, device=device)
+            v_selected = v[index]
+            self.assertEqual(len(v_selected), index.numel())
+            self._check_indexed_volumes(v, v_selected, enumerate(index.tolist()))
+
+            # invalid index
+            index = torch.tensor([1, 0, 1], dtype=torch.float32, device=device)
+            with self.assertRaises(IndexError):
+                v_selected = v[index]
+            index = 1.2  # floating point index
+            with self.assertRaises(IndexError):
+                v_selected = v[index]
+
+    def test_locator_init(self, batch_size=9, resolution=(3, 5, 7)):
+        with self.subTest("VolumeLocator init with all sizes equal"):
+            grid_sizes = [resolution for _ in range(batch_size)]
+            locator_tuple = VolumeLocator(
+                batch_size=batch_size, grid_sizes=resolution, device=torch.device("cpu")
+            )
+            locator_list = VolumeLocator(
+                batch_size=batch_size, grid_sizes=grid_sizes, device=torch.device("cpu")
+            )
+            locator_tensor = VolumeLocator(
+                batch_size=batch_size,
+                grid_sizes=torch.tensor(grid_sizes),
+                device=torch.device("cpu"),
+            )
+            expected_grid_sizes = torch.tensor(grid_sizes)
+            expected_resolution = resolution
+            assert torch.allclose(expected_grid_sizes, locator_tuple._grid_sizes)
+            assert torch.allclose(expected_grid_sizes, locator_list._grid_sizes)
+            assert torch.allclose(expected_grid_sizes, locator_tensor._grid_sizes)
+            self.assertEqual(expected_resolution, locator_tuple._resolution)
+            self.assertEqual(expected_resolution, locator_list._resolution)
+            self.assertEqual(expected_resolution, locator_tensor._resolution)
+
+        with self.subTest("VolumeLocator with different sizes in different grids"):
+            grid_sizes_list = [
+                torch.randint(low=1, high=42, size=(3,)) for _ in range(batch_size)
+            ]
+            grid_sizes_tensor = torch.cat([el[None] for el in grid_sizes_list])
+            locator_list = VolumeLocator(
+                batch_size=batch_size,
+                grid_sizes=grid_sizes_list,
+                device=torch.device("cpu"),
+            )
+            locator_tensor = VolumeLocator(
+                batch_size=batch_size,
+                grid_sizes=grid_sizes_tensor,
+                device=torch.device("cpu"),
+            )
+            expected_grid_sizes = grid_sizes_tensor
+            expected_resolution = tuple(torch.max(expected_grid_sizes, dim=0).values)
+            assert torch.allclose(expected_grid_sizes, locator_list._grid_sizes)
+            assert torch.allclose(expected_grid_sizes, locator_tensor._grid_sizes)
+            self.assertEqual(expected_resolution, locator_list._resolution)
+            self.assertEqual(expected_resolution, locator_tensor._resolution)
+
+    def test_coord_transforms(self, num_volumes=3, num_channels=4, dtype=torch.float32):
+        """
+        Test the correctness of the conversion between the internal
+        Transform3D Volumes.VolumeLocator._local_to_world_transform and the initialization
+        from the translation and voxel_size.
+        """
+
+        device = torch.device("cuda:0")
+
+        # try for 10 sets of different random sizes/centers/voxel_sizes
+        for _ in range(10):
+
+            size = torch.randint(high=10, size=(3,), low=3).tolist()
+
+            densities = torch.randn(
+                size=[num_volumes, num_channels, *size],
+                device=device,
+                dtype=torch.float32,
+            )
+
+            # init the transformation params
+            volume_translation = torch.randn(num_volumes, 3)
+            voxel_size = torch.rand(num_volumes, 3) * 3.0 + 0.5
+
+            # get the corresponding Transform3d object
+            local_offset = torch.tensor(list(size), dtype=torch.float32, device=device)[
+                [2, 1, 0]
+            ][None].repeat(num_volumes, 1)
+            local_to_world_transform = (
+                Scale(0.5 * local_offset - 0.5, device=device)
+                .scale(voxel_size)
+                .translate(-volume_translation)
+            )
+
+            # init the volume structures with the scale and translation,
+            # then get the coord grid in world coords
+            v_trans_vs = Volumes(
+                densities=densities,
+                voxel_size=voxel_size,
+                volume_translation=volume_translation,
+            )
+            grid_rot_trans_vs = v_trans_vs.get_coord_grid(world_coordinates=True)
+
+            # map the default local coords to the world coords
+            # with local_to_world_transform
+            v_default = Volumes(densities=densities)
+            grid_default_local = v_default.get_coord_grid(world_coordinates=False)
+            grid_default_world = local_to_world_transform.transform_points(
+                grid_default_local.view(num_volumes, -1, 3)
+            ).view(num_volumes, *size, 3)
+
+            # check that both grids are the same
+            self.assertClose(grid_rot_trans_vs, grid_default_world, atol=1e-5)
+
+            # check that the transformations are the same
+            self.assertClose(
+                v_trans_vs.get_local_to_world_coords_transform().get_matrix(),
+                local_to_world_transform.get_matrix(),
+                atol=1e-5,
+            )
+
+    def test_coord_grid_convention(
+        self, num_volumes=3, num_channels=4, dtype=torch.float32
+    ):
+        """
+        Check that for a trivial volume with spatial size DxHxW=5x7x5:
+        1) xyz_world=(0, 0, 0) lands right in the middle of the volume
+        with xyz_local=(0, 0, 0).
+        2) xyz_world=(-2, 3, 2) results in xyz_local=(-1, 1, -1).
+        3) The centeral voxel of the volume coordinate grid
+        has coords x_world=(0, 0, 0) and x_local=(0, 0, 0)
+        4) grid_sampler(world_coordinate_grid, local_coordinate_grid)
+        is the same as world_coordinate_grid itself. I.e. the local coordinate
+        grid matches the grid_sampler coordinate convention.
+        """
+
+        device = torch.device("cuda:0")
+
+        densities = torch.randn(
+            size=[num_volumes, num_channels, 5, 7, 5],
+            device=device,
+            dtype=torch.float32,
+        )
+        v_trivial = Volumes(densities=densities)
+
+        # check the case with x_world=(0,0,0)
+        pts_world = torch.zeros(num_volumes, 1, 3, device=device, dtype=torch.float32)
+        pts_local = v_trivial.world_to_local_coords(pts_world)
+        pts_local_expected = torch.zeros_like(pts_local)
+        self.assertClose(pts_local, pts_local_expected)
+
+        # check the case with x_world=(-2, 3, -2)
+        pts_world = torch.tensor([-2, 3, -2], device=device, dtype=torch.float32)[
+            None, None
+        ].repeat(num_volumes, 1, 1)
+        pts_local = v_trivial.world_to_local_coords(pts_world)
+        pts_local_expected = torch.tensor(
+            [-1, 1, -1], device=device, dtype=torch.float32
+        )[None, None].repeat(num_volumes, 1, 1)
+        self.assertClose(pts_local, pts_local_expected)
+
+        # check that the central voxel has coords x_world=(0, 0, 0) and x_local(0, 0, 0)
+        grid_world = v_trivial.get_coord_grid(world_coordinates=True)
+        grid_local = v_trivial.get_coord_grid(world_coordinates=False)
+        for grid in (grid_world, grid_local):
+            x0 = grid[0, :, :, 2, 0]
+            y0 = grid[0, :, 3, :, 1]
+            z0 = grid[0, 2, :, :, 2]
+            for coord_line in (x0, y0, z0):
+                self.assertClose(coord_line, torch.zeros_like(coord_line), atol=1e-7)
+
+        # resample grid_world using grid_sampler with local coords
+        # -> make sure the resampled version is the same as original
+        grid_world_resampled = torch.nn.functional.grid_sample(
+            grid_world.permute(0, 4, 1, 2, 3), grid_local, align_corners=True
+        ).permute(0, 2, 3, 4, 1)
+        self.assertClose(grid_world_resampled, grid_world, atol=1e-7)
+
+        for align_corners in [True, False]:
+            v_trivial = Volumes(densities=densities, align_corners=align_corners)
+
+            # check the case with x_world=(0,0,0)
+            pts_world = torch.zeros(
+                num_volumes, 1, 3, device=device, dtype=torch.float32
+            )
+            pts_local = v_trivial.world_to_local_coords(pts_world)
+            pts_local_expected = torch.zeros_like(pts_local)
+            self.assertClose(pts_local, pts_local_expected)
+
+            # check the case with x_world=(-2, 3, -2)
+            pts_world_tuple = [-2, 3, -2]
+            pts_world = torch.tensor(
+                pts_world_tuple, device=device, dtype=torch.float32
+            )[None, None].repeat(num_volumes, 1, 1)
+            pts_local = v_trivial.world_to_local_coords(pts_world)
+            pts_local_expected = torch.tensor(
+                [-1, 1, -1], device=device, dtype=torch.float32
+            )[None, None].repeat(num_volumes, 1, 1)
+            self.assertClose(pts_local, pts_local_expected)
+
+            # # check that the central voxel has coords x_world=(0, 0, 0) and x_local(0, 0, 0)
+            grid_world = v_trivial.get_coord_grid(world_coordinates=True)
+            grid_local = v_trivial.get_coord_grid(world_coordinates=False)
+            for grid in (grid_world, grid_local):
+                x0 = grid[0, :, :, 2, 0]
+                y0 = grid[0, :, 3, :, 1]
+                z0 = grid[0, 2, :, :, 2]
+                for coord_line in (x0, y0, z0):
+                    self.assertClose(
+                        coord_line, torch.zeros_like(coord_line), atol=1e-7
+                    )
+
+            # resample grid_world using grid_sampler with local coords
+            # -> make sure the resampled version is the same as original
+            grid_world_resampled = torch.nn.functional.grid_sample(
+                grid_world.permute(0, 4, 1, 2, 3),
+                grid_local,
+                align_corners=align_corners,
+            ).permute(0, 2, 3, 4, 1)
+            self.assertClose(grid_world_resampled, grid_world, atol=1e-7)
+
+    def test_coord_grid_convention_heterogeneous(
+        self, num_channels=4, dtype=torch.float32
+    ):
+        """
+        Check that for a list of 2 trivial volumes with
+        spatial sizes DxHxW=(5x7x5, 3x5x5):
+        1) xyz_world=(0, 0, 0) lands right in the middle of the volume
+        with xyz_local=(0, 0, 0).
+        2) xyz_world=((-2, 3, -2), (-2, -2,  1)) results
+        in xyz_local=((-1, 1, -1), (-1, -1, 1)).
+        3) The centeral voxel of the volume coordinate grid
+        has coords x_world=(0, 0, 0) and x_local=(0, 0, 0)
+        4) grid_sampler(world_coordinate_grid, local_coordinate_grid)
+        is the same as world_coordinate_grid itself. I.e. the local coordinate
+        grid matches the grid_sampler coordinate convention.
+        """
+
+        device = torch.device("cuda:0")
+
+        sizes = [(5, 7, 5), (3, 5, 5)]
+
+        densities_list = [
+            torch.randn(size=[num_channels, *size], device=device, dtype=torch.float32)
+            for size in sizes
+        ]
+
+        # init the volume
+        v_trivial = Volumes(densities=densities_list)
+
+        # check the border point locations
+        pts_world = torch.tensor(
+            [[-2.0, 3.0, -2.0], [-2.0, -2.0, 1.0]], device=device, dtype=torch.float32
+        )[:, None]
+        pts_local = v_trivial.world_to_local_coords(pts_world)
+        pts_local_expected = torch.tensor(
+            [[-1.0, 1.0, -1.0], [-1.0, -1.0, 1.0]], device=device, dtype=torch.float32
+        )[:, None]
+        self.assertClose(pts_local, pts_local_expected)
+
+        # check that the central voxel has coords x_world=(0, 0, 0) and x_local(0, 0, 0)
+        grid_world = v_trivial.get_coord_grid(world_coordinates=True)
+        grid_local = v_trivial.get_coord_grid(world_coordinates=False)
+        for grid in (grid_world, grid_local):
+            x0 = grid[0, :, :, 2, 0]
+            y0 = grid[0, :, 3, :, 1]
+            z0 = grid[0, 2, :, :, 2]
+            for coord_line in (x0, y0, z0):
+                self.assertClose(coord_line, torch.zeros_like(coord_line), atol=1e-7)
+            x0 = grid[1, :, :, 2, 0]
+            y0 = grid[1, :, 2, :, 1]
+            z0 = grid[1, 1, :, :, 2]
+            for coord_line in (x0, y0, z0):
+                self.assertClose(coord_line, torch.zeros_like(coord_line), atol=1e-7)
+
+        # resample grid_world using grid_sampler with local coords
+        # -> make sure the resampled version is the same as original
+        for grid_world_, grid_local_, size in zip(grid_world, grid_local, sizes):
+            grid_world_crop = grid_world_[: size[0], : size[1], : size[2], :][None]
+            grid_local_crop = grid_local_[: size[0], : size[1], : size[2], :][None]
+            grid_world_crop_resampled = torch.nn.functional.grid_sample(
+                grid_world_crop.permute(0, 4, 1, 2, 3),
+                grid_local_crop,
+                align_corners=True,
+            ).permute(0, 2, 3, 4, 1)
+            self.assertClose(grid_world_crop_resampled, grid_world_crop, atol=1e-7)
+
+    def test_coord_grid_transforms(
+        self, num_volumes=3, num_channels=4, dtype=torch.float32
+    ):
+        """
+        Test whether conversion between local-world coordinates of the
+        volume returns correct results.
+        """
+
+        device = torch.device("cuda:0")
+
+        # try for 10 sets of different random sizes/centers/voxel_sizes
+        for _ in range(10):
+
+            size = torch.randint(high=10, size=(3,), low=3).tolist()
+
+            center = torch.randn(num_volumes, 3, dtype=torch.float32, device=device)
+            voxel_size = torch.rand(1, dtype=torch.float32, device=device) * 5.0 + 0.5
+
+            for densities in (
+                torch.randn(
+                    size=[num_volumes, num_channels, *size],
+                    device=device,
+                    dtype=torch.float32,
+                ),
+                TestVolumes._random_volume_list(
+                    num_volumes, 3, size, num_channels, device, rand_sizes=None
+                )[0],
+            ):
+
+                # init the volume structure
+                v = Volumes(
+                    densities=densities,
+                    voxel_size=voxel_size,
+                    volume_translation=-center,
+                )
+
+                # get local coord grid
+                grid_local = v.get_coord_grid(world_coordinates=False)
+
+                # convert from world to local to world
+                grid_world = v.get_coord_grid(world_coordinates=True)
+                grid_local_2 = v.world_to_local_coords(grid_world)
+                grid_world_2 = v.local_to_world_coords(grid_local_2)
+
+                # assertions on shape and values of grid_world and grid_local
+                self.assertClose(grid_world, grid_world_2, atol=1e-5)
+                self.assertClose(grid_local, grid_local_2, atol=1e-5)
+
+                # check that the individual slices of the location grid have
+                # constant values along expected dimensions
+                for plane_dim in (1, 2, 3):
+                    for grid_plane in grid_world.split(1, dim=plane_dim):
+                        grid_coord_dim = {1: 2, 2: 1, 3: 0}[plane_dim]
+                        grid_coord_plane = grid_plane.squeeze()[..., grid_coord_dim]
+                        # check that all elements of grid_coord_plane are
+                        # the same for each batch element
+                        self.assertClose(
+                            grid_coord_plane.reshape(num_volumes, -1).max(dim=1).values,
+                            grid_coord_plane.reshape(num_volumes, -1).min(dim=1).values,
+                        )
+
+    def test_clone(
+        self, num_volumes=3, num_channels=4, size=(6, 8, 10), dtype=torch.float32
+    ):
+        """
+        Test cloning of a `Volumes` object
+        """
+
+        device = torch.device("cuda:0")
+
+        features = torch.randn(
+            size=[num_volumes, num_channels, *size], device=device, dtype=torch.float32
+        )
+        densities = torch.rand(
+            size=[num_volumes, 1, *size], device=device, dtype=torch.float32
+        )
+
+        for has_features in (True, False):
+            v = Volumes(
+                densities=densities, features=features if has_features else None
+            )
+            vnew = v.clone()
+            vnew._densities.data[0, 0, 0, 0, 0] += 1.0
+            self.assertNotAlmostEqual(
+                float(
+                    (vnew.densities()[0, 0, 0, 0, 0] - v.densities()[0, 0, 0, 0, 0])
+                    .abs()
+                    .max()
+                ),
+                0.0,
+            )
+
+            if has_features:
+                vnew._features.data[0, 0, 0, 0, 0] += 1.0
+                self.assertNotAlmostEqual(
+                    float(
+                        (vnew.features()[0, 0, 0, 0, 0] - v.features()[0, 0, 0, 0, 0])
+                        .abs()
+                        .max()
+                    ),
+                    0.0,
+                )
+
+    def _check_vars_on_device(self, v, desired_device):
+        for var_name, var in vars(v).items():
+            if var_name != "device":
+                if var is not None:
+                    self.assertTrue(
+                        var.device.type == desired_device.type,
+                        (var_name, var.device, desired_device),
+                    )
+            else:
+                self.assertTrue(var.type == desired_device.type)
+
+    def test_to(
+        self, num_volumes=3, num_channels=4, size=(6, 8, 10), dtype=torch.float32
+    ):
+        """
+        Test the moving of the volumes from/to gpu and cpu
+        """
+
+        features = torch.randn(
+            size=[num_volumes, num_channels, *size], dtype=torch.float32
+        )
+        densities = torch.rand(size=[num_volumes, 1, *size], dtype=dtype)
+        volumes = Volumes(densities=densities, features=features)
+        locator = VolumeLocator(
+            batch_size=5, grid_sizes=(3, 5, 7), device=volumes.device
+        )
+
+        for name, obj in (("VolumeLocator", locator), ("Volumes", volumes)):
+            with self.subTest(f"Moving {name} from/to gpu and cpu"):
+                # Test support for str and torch.device
+                cpu_device = torch.device("cpu")
+
+                converted_obj = obj.to("cpu")
+                self.assertEqual(cpu_device, converted_obj.device)
+                self.assertEqual(cpu_device, obj.device)
+                self.assertIs(obj, converted_obj)
+
+                converted_obj = obj.to(cpu_device)
+                self.assertEqual(cpu_device, converted_obj.device)
+                self.assertEqual(cpu_device, obj.device)
+                self.assertIs(obj, converted_obj)
+
+                cuda_device = torch.device("cuda:0")
+
+                converted_obj = obj.to("cuda:0")
+                self.assertEqual(cuda_device, converted_obj.device)
+                self.assertEqual(cpu_device, obj.device)
+                self.assertIsNot(obj, converted_obj)
+
+                converted_obj = obj.to(cuda_device)
+                self.assertEqual(cuda_device, converted_obj.device)
+                self.assertEqual(cpu_device, obj.device)
+                self.assertIsNot(obj, converted_obj)
+
+        with self.subTest("Test device placement of internal tensors of Volumes"):
+            features = features.to(cuda_device)
+            densities = features.to(cuda_device)
+
+            for features_ in (features, None):
+                volumes = Volumes(densities=densities, features=features_)
+
+                cpu_volumes = volumes.cpu()
+                cuda_volumes = cpu_volumes.cuda()
+                cuda_volumes2 = cuda_volumes.cuda()
+                cpu_volumes2 = cuda_volumes2.cpu()
+
+                for volumes1, volumes2 in itertools.combinations(
+                    (volumes, cpu_volumes, cpu_volumes2, cuda_volumes, cuda_volumes2), 2
+                ):
+                    if volumes1 is cuda_volumes and volumes2 is cuda_volumes2:
+                        # checks that we do not copy if the devices stay the same
+                        assert_fun = self.assertIs
+                    else:
+                        assert_fun = self.assertSeparate
+                    assert_fun(volumes1._densities, volumes2._densities)
+                    if features_ is not None:
+                        assert_fun(volumes1._features, volumes2._features)
+                    for volumes_ in (volumes1, volumes2):
+                        if volumes_ in (cpu_volumes, cpu_volumes2):
+                            self._check_vars_on_device(volumes_, cpu_device)
+                        else:
+                            self._check_vars_on_device(volumes_, cuda_device)
+
+        with self.subTest("Test device placement of internal tensors of VolumeLocator"):
+            for device1, device2 in itertools.combinations(
+                (torch.device("cpu"), torch.device("cuda:0")), 2
+            ):
+                locator = locator.to(device1)
+                locator = locator.to(device2)
+                self.assertEqual(locator._grid_sizes.device, device2)
+                self.assertEqual(locator._local_to_world_transform.device, device2)
+
+    def _check_padded(self, x_pad, x_list, grid_sizes):
+        """
+        Check that padded tensors x_pad are the same as x_list tensors.
+        """
+        num_volumes = len(x_list)
+        for i in range(num_volumes):
+            self.assertClose(
+                x_pad[i][:, : grid_sizes[i][0], : grid_sizes[i][1], : grid_sizes[i][2]],
+                x_list[i],
+            )
+
+    def test_feature_density_setters(self):
+        """
+        Tests getters and setters for padded/list representations.
+        """
+
+        device = torch.device("cuda:0")
+        diff_device = torch.device("cpu")
+
+        num_volumes = 30
+        num_channels = 4
+        K = 20
+
+        densities = []
+        features = []
+        grid_sizes = []
+        diff_grid_sizes = []
+
+        for _ in range(num_volumes):
+            grid_size = torch.randint(K - 1, size=(3,)).long() + 1
+            densities.append(
+                torch.rand((1, *grid_size), device=device, dtype=torch.float32)
+            )
+            features.append(
+                torch.rand(
+                    (num_channels, *grid_size), device=device, dtype=torch.float32
+                )
+            )
+            grid_sizes.append(grid_size)
+
+            diff_grid_size = (
+                copy.deepcopy(grid_size) + torch.randint(2, size=(3,)).long() + 1
+            )
+            diff_grid_sizes.append(diff_grid_size)
+        grid_sizes = torch.stack(grid_sizes).to(device)
+        diff_grid_sizes = torch.stack(diff_grid_sizes).to(device)
+
+        volumes = Volumes(densities=densities, features=features)
+        self.assertClose(volumes.get_grid_sizes(), grid_sizes)
+
+        # test the getters
+        features_padded = volumes.features()
+        densities_padded = volumes.densities()
+        features_list = volumes.features_list()
+        densities_list = volumes.densities_list()
+        for x_pad, x_list in zip(
+            (densities_padded, features_padded, densities_padded, features_padded),
+            (densities_list, features_list, densities, features),
+        ):
+            self._check_padded(x_pad, x_list, grid_sizes)
+
+        # test feature setters
+        features_new = [
+            torch.rand((num_channels, *grid_size), device=device, dtype=torch.float32)
+            for grid_size in grid_sizes
+        ]
+        volumes._set_features(features_new)
+        features_new_list = volumes.features_list()
+        features_new_padded = volumes.features()
+        for x_pad, x_list in zip(
+            (features_new_padded, features_new_padded),
+            (features_new, features_new_list),
+        ):
+            self._check_padded(x_pad, x_list, grid_sizes)
+
+        # wrong features to update
+        bad_features_new = [
+            [
+                torch.rand(
+                    (num_channels, *grid_size), device=diff_device, dtype=torch.float32
+                )
+                for grid_size in diff_grid_sizes
+            ],
+            torch.rand(
+                (num_volumes, num_channels, K + 1, K, K),
+                device=device,
+                dtype=torch.float32,
+            ),
+            None,
+        ]
+        for bad_features_new_ in bad_features_new:
+            with self.assertRaises(ValueError):
+                volumes._set_densities(bad_features_new_)
+
+        # test density setters
+        densities_new = [
+            torch.rand((1, *grid_size), device=device, dtype=torch.float32)
+            for grid_size in grid_sizes
+        ]
+        volumes._set_densities(densities_new)
+        densities_new_list = volumes.densities_list()
+        densities_new_padded = volumes.densities()
+        for x_pad, x_list in zip(
+            (densities_new_padded, densities_new_padded),
+            (densities_new, densities_new_list),
+        ):
+            self._check_padded(x_pad, x_list, grid_sizes)
+
+        # wrong densities to update
+        bad_densities_new = [
+            [
+                torch.rand((1, *grid_size), device=diff_device, dtype=torch.float32)
+                for grid_size in diff_grid_sizes
+            ],
+            torch.rand(
+                (num_volumes, 1, K + 1, K, K), device=device, dtype=torch.float32
+            ),
+            None,
+        ]
+        for bad_densities_new_ in bad_densities_new:
+            with self.assertRaises(ValueError):
+                volumes._set_densities(bad_densities_new_)
+
+        # test update_padded
+        volumes = Volumes(densities=densities, features=features)
+        volumes_updated = volumes.update_padded(
+            densities_new, new_features=features_new
+        )
+        densities_new_list = volumes_updated.densities_list()
+        densities_new_padded = volumes_updated.densities()
+        features_new_list = volumes_updated.features_list()
+        features_new_padded = volumes_updated.features()
+        for x_pad, x_list in zip(
+            (
+                densities_new_padded,
+                densities_new_padded,
+                features_new_padded,
+                features_new_padded,
+            ),
+            (densities_new, densities_new_list, features_new, features_new_list),
+        ):
+            self._check_padded(x_pad, x_list, grid_sizes)
+        self.assertIs(volumes.get_grid_sizes(), volumes_updated.get_grid_sizes())
+        self.assertIs(
+            volumes.get_local_to_world_coords_transform(),
+            volumes_updated.get_local_to_world_coords_transform(),
+        )
+        self.assertIs(volumes.device, volumes_updated.device)
+
+    def test_constructor_for_padded_lists(self):
+        """
+        Tests constructor for padded/list representations.
+        """
+
+        device = torch.device("cuda:0")
+        diff_device = torch.device("cpu")
+
+        num_volumes = 3
+        num_channels = 4
+        size = (6, 8, 10)
+        diff_size = (6, 8, 11)
+
+        # good ways to define densities
+        ok_densities = [
+            torch.randn(
+                size=[num_volumes, 1, *size], device=device, dtype=torch.float32
+            ).unbind(0),
+            torch.randn(
+                size=[num_volumes, 1, *size], device=device, dtype=torch.float32
+            ),
+        ]
+
+        # bad ways to define features
+        bad_features = [
+            torch.randn(
+                size=[num_volumes + 1, num_channels, *size],
+                device=device,
+                dtype=torch.float32,
+            ).unbind(
+                0
+            ),  # list with diff batch size
+            torch.randn(
+                size=[num_volumes + 1, num_channels, *size],
+                device=device,
+                dtype=torch.float32,
+            ),  # diff batch size
+            torch.randn(
+                size=[num_volumes, num_channels, *diff_size],
+                device=device,
+                dtype=torch.float32,
+            ).unbind(
+                0
+            ),  # list with different size
+            torch.randn(
+                size=[num_volumes, num_channels, *diff_size],
+                device=device,
+                dtype=torch.float32,
+            ),  # different size
+            torch.randn(
+                size=[num_volumes, num_channels, *size],
+                device=diff_device,
+                dtype=torch.float32,
+            ),  # different device
+            torch.randn(
+                size=[num_volumes, num_channels, *size],
+                device=diff_device,
+                dtype=torch.float32,
+            ).unbind(
+                0
+            ),  # list with different device
+        ]
+
+        # good ways to define features
+        ok_features = [
+            torch.randn(
+                size=[num_volumes, num_channels, *size],
+                device=device,
+                dtype=torch.float32,
+            ).unbind(
+                0
+            ),  # list of features of correct size
+            torch.randn(
+                size=[num_volumes, num_channels, *size],
+                device=device,
+                dtype=torch.float32,
+            ),
+        ]
+
+        for densities in ok_densities:
+            for features in bad_features:
+                self.assertRaises(
+                    ValueError, Volumes, densities=densities, features=features
+                )
+            for features in ok_features:
+                Volumes(densities=densities, features=features)
+
+    def test_constructor(
+        self, num_volumes=3, num_channels=4, size=(6, 8, 10), dtype=torch.float32
+    ):
+        """
+        Test different ways of calling the `Volumes` constructor
+        """
+
+        device = torch.device("cuda:0")
+
+        # all ways to define features
+        features = [
+            torch.randn(
+                size=[num_volumes, num_channels, *size],
+                device=device,
+                dtype=torch.float32,
+            ),  # padded tensor
+            torch.randn(
+                size=[num_volumes, num_channels, *size],
+                device=device,
+                dtype=torch.float32,
+            ).unbind(
+                0
+            ),  # list of features
+            None,  # no features
+        ]
+
+        # bad ways to define features
+        bad_features = [
+            torch.randn(
+                size=[num_volumes, num_channels, 2, *size],
+                device=device,
+                dtype=torch.float32,
+            ),  # 6 dims
+            torch.randn(
+                size=[num_volumes, *size], device=device, dtype=torch.float32
+            ),  # 4 dims
+            torch.randn(
+                size=[num_volumes, *size], device=device, dtype=torch.float32
+            ).unbind(
+                0
+            ),  # list of 4 dim tensors
+        ]
+
+        # all ways to define densities
+        densities = [
+            torch.randn(
+                size=[num_volumes, 1, *size], device=device, dtype=torch.float32
+            ),  # padded tensor
+            torch.randn(
+                size=[num_volumes, 1, *size], device=device, dtype=torch.float32
+            ).unbind(
+                0
+            ),  # list of densities
+        ]
+
+        # bad ways to define densities
+        bad_densities = [
+            None,  # omitted
+            torch.randn(
+                size=[num_volumes, 1, 1, *size], device=device, dtype=torch.float32
+            ),  # 6-dim tensor
+            torch.randn(
+                size=[num_volumes, 1, 1, *size], device=device, dtype=torch.float32
+            ).unbind(
+                0
+            ),  # list of 5-dim densities
+        ]
+
+        # all possible ways to define the voxels sizes
+        vox_sizes = [
+            torch.Tensor([1.0, 1.0, 1.0]),
+            [1.0, 1.0, 1.0],
+            torch.Tensor([1.0, 1.0, 1.0])[None].repeat(num_volumes, 1),
+            torch.Tensor([1.0])[None].repeat(num_volumes, 1),
+            1.0,
+            torch.Tensor([1.0]),
+        ]
+
+        # all possible ways to define the volume translations
+        vol_translations = [
+            torch.Tensor([1.0, 1.0, 1.0]),
+            [1.0, 1.0, 1.0],
+            torch.Tensor([1.0, 1.0, 1.0])[None].repeat(num_volumes, 1),
+        ]
+
+        # wrong ways to define voxel sizes
+        bad_vox_sizes = [
+            torch.Tensor([1.0, 1.0, 1.0, 1.0]),
+            [1.0, 1.0, 1.0, 1.0],
+            torch.Tensor([]),
+            None,
+        ]
+
+        # wrong ways to define the volume translations
+        bad_vol_translations = [
+            torch.Tensor([1.0, 1.0]),
+            [1.0, 1.0],
+            1.0,
+            torch.Tensor([1.0, 1.0, 1.0])[None].repeat(num_volumes + 1, 1),
+        ]
+
+        def zip_with_ok_indicator(good, bad):
+            return zip([*good, *bad], [*([True] * len(good)), *([False] * len(bad))])
+
+        for features_, features_ok in zip_with_ok_indicator(features, bad_features):
+            for densities_, densities_ok in zip_with_ok_indicator(
+                densities, bad_densities
+            ):
+                for vox_size, size_ok in zip_with_ok_indicator(
+                    vox_sizes, bad_vox_sizes
+                ):
+                    for vol_translation, trans_ok in zip_with_ok_indicator(
+                        vol_translations, bad_vol_translations
+                    ):
+                        if (
+                            size_ok and trans_ok and features_ok and densities_ok
+                        ):  # if all entries are good we check that this doesnt throw
+                            Volumes(
+                                features=features_,
+                                densities=densities_,
+                                voxel_size=vox_size,
+                                volume_translation=vol_translation,
+                            )
+
+                        else:  # otherwise we check for ValueError
+                            self.assertRaises(
+                                ValueError,
+                                Volumes,
+                                features=features_,
+                                densities=densities_,
+                                voxel_size=vox_size,
+                                volume_translation=vol_translation,
+                            )
diff --git a/pytorch3d/website/.dockerignore b/pytorch3d/website/.dockerignore
new file mode 100644
index 0000000000000000000000000000000000000000..27d2dae2b493488b48bdb18b95af471821ece9bf
--- /dev/null
+++ b/pytorch3d/website/.dockerignore
@@ -0,0 +1,2 @@
+*/node_modules
+*.log
diff --git a/pytorch3d/website/.gitignore b/pytorch3d/website/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..64150c13c1bfcfc78adc4750ab3d9ef97f2454e4
--- /dev/null
+++ b/pytorch3d/website/.gitignore
@@ -0,0 +1,13 @@
+.DS_Store
+
+node_modules
+
+lib/core/metadata.js
+lib/core/MetadataBlog.js
+
+website/translated_docs
+website/build/
+website/yarn.lock
+website/node_modules
+website/i18n/*
+website/_tutorials/*
diff --git a/pytorch3d/website/README.md b/pytorch3d/website/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..bfd420afe376fcee68a8214f4995b3a5ceeab8cd
--- /dev/null
+++ b/pytorch3d/website/README.md
@@ -0,0 +1,265 @@
+This website was created with [Docusaurus](https://docusaurus.io/).
+
+# Building the PyTorch3D website
+
+## Install
+
+1. Make sure all the dependencies for the website are installed:
+
+```sh
+# Install dependencies
+$ yarn
+
+or
+
+$ npm install docusaurus-init
+```
+
+2. Run your dev server:
+
+```sh
+# Start the site
+$ yarn start
+
+or
+$ ./node_modules/docusaurus/lib/start-server.js
+```
+
+## Build the tutorials
+
+We convert the ipython notebooks to html using `parse_tutorials.py` which is found in the scripts folder at the root of the PyTorch3D directory.
+
+Before running this script install the following dependencies:
+
+```
+pip install nbformat==4.4.0 nbconvert==5.3.1 ipywidgets==7.5.1 tornado==4.2 bs4
+```
+
+Install yarn:
+
+```
+brew install yarn
+
+# or
+
+curl -o- -L https://yarnpkg.com/install.sh | bash
+```
+
+Then run the build script:
+
+```
+bash scripts/build_website.sh
+```
+
+This will build the docusaurus website and run a script to parse the tutorials and generate:
+- `.html` files in the `website/_tutorials` folder
+- `.js` files in the `website/pages/tutorials` folder
+- `.py`/`.ipynb` files in the `website/static/files` folder
+
+
+TODO: Add support for latex in markdown in jupyter notebooks and embedded images.
+
+## Build and publish the website
+
+To update for a new version, you need to build the tutorials and the website and push to the gh-pages
+branch of `github.com/facebookresearch/pytorch3d`. The instructions in `scripts/publish_website.sh`
+bring it all together.
+
+## Add a new tutorial
+
+The tutorials to include in the website are listed in `website/tutorials.json`. If you create a new tutorial add an entry to the list in this file. This is needed in order to generate the sidebar for the tutorials page.
+
+
+## Edit the landing page
+
+To change the content of the landing page modify: `website/pages/en/index.js`.
+
+
+## Edit the tutorials page
+
+To change the content of the tutorials home page modify: `website/pages/tutorials/index.js`.
+
+
+---------------------------------------------------------
+
+## Docusaurus docs
+
+- [Get Started in 5 Minutes](#get-started-in-5-minutes)
+- [Directory Structure](#directory-structure)
+- [Editing Content](#editing-content)
+- [Adding Content](#adding-content)
+- [Full Documentation](#full-documentation)
+
+
+## Directory Structure
+
+Your project file structure should look something like this
+
+```
+my-docusaurus/
+  docs/
+    doc-1.md
+    doc-2.md
+    doc-3.md
+  website/
+    blog/
+      2016-3-11-oldest-post.md
+      2017-10-24-newest-post.md
+    core/
+    node_modules/
+    pages/
+    static/
+      css/
+      img/
+    package.json
+    sidebars.json
+    siteConfig.js
+```
+
+# Editing Content
+
+## Editing an existing docs page
+
+Edit docs by navigating to `docs/` and editing the corresponding document:
+
+`docs/doc-to-be-edited.md`
+
+```markdown
+---
+id: page-needs-edit
+title: This Doc Needs To Be Edited
+---
+
+Edit me...
+```
+
+For more information about docs, click [here](https://docusaurus.io/docs/en/navigation)
+
+## Editing an existing blog post
+
+Edit blog posts by navigating to `website/blog` and editing the corresponding post:
+
+`website/blog/post-to-be-edited.md`
+
+```markdown
+---
+id: post-needs-edit
+title: This Blog Post Needs To Be Edited
+---
+
+Edit me...
+```
+
+For more information about blog posts, click [here](https://docusaurus.io/docs/en/adding-blog)
+
+# Adding Content
+
+## Adding a new docs page to an existing sidebar
+
+1. Create the doc as a new markdown file in `/docs`, example `docs/newly-created-doc.md`:
+
+```md
+---
+id: newly-created-doc
+title: This Doc Needs To Be Edited
+---
+
+My new content here..
+```
+
+1. Refer to that doc's ID in an existing sidebar in `website/sidebars.json`:
+
+```javascript
+// Add newly-created-doc to the Getting Started category of docs
+{
+  "docs": {
+    "Getting Started": [
+      "quick-start",
+      "newly-created-doc" // new doc here
+    ],
+    ...
+  },
+  ...
+}
+```
+
+For more information about adding new docs, click [here](https://docusaurus.io/docs/en/navigation)
+
+## Adding a new blog post
+
+1. Make sure there is a header link to your blog in `website/siteConfig.js`:
+
+`website/siteConfig.js`
+
+```javascript
+headerLinks: [
+    ...
+    { blog: true, label: 'Blog' },
+    ...
+]
+```
+
+2. Create the blog post with the format `YYYY-MM-DD-My-Blog-Post-Title.md` in `website/blog`:
+
+`website/blog/2018-05-21-New-Blog-Post.md`
+
+```markdown
+---
+author: Frank Li
+authorURL: https://twitter.com/foobarbaz
+authorFBID: 503283835
+title: New Blog Post
+---
+
+Lorem Ipsum...
+```
+
+For more information about blog posts, click [here](https://docusaurus.io/docs/en/adding-blog)
+
+## Adding items to your site's top navigation bar
+
+1. Add links to docs, custom pages or external links by editing the headerLinks field of `website/siteConfig.js`:
+
+`website/siteConfig.js`
+
+```javascript
+{
+  headerLinks: [
+    ...
+    /* you can add docs */
+    { doc: 'my-examples', label: 'Examples' },
+    /* you can add custom pages */
+    { page: 'help', label: 'Help' },
+    /* you can add external links */
+    { href: 'https://github.com/facebook/docusaurus', label: 'GitHub' },
+    ...
+  ],
+  ...
+}
+```
+
+For more information about the navigation bar, click [here](https://docusaurus.io/docs/en/navigation)
+
+## Adding custom pages
+
+1. Docusaurus uses React components to build pages. The components are saved as .js files in `website/pages/en`:
+1. If you want your page to show up in your navigation header, you will need to update `website/siteConfig.js` to add to the `headerLinks` element:
+
+`website/siteConfig.js`
+
+```javascript
+{
+  headerLinks: [
+    ...
+    { page: 'my-new-custom-page', label: 'My New Custom Page' },
+    ...
+  ],
+  ...
+}
+```
+
+For more information about custom pages, click [here](https://docusaurus.io/docs/en/custom-pages).
+
+# Full Documentation
+
+Full documentation can be found on the [website](https://docusaurus.io/).
diff --git a/pytorch3d/website/core/Footer.js b/pytorch3d/website/core/Footer.js
new file mode 100644
index 0000000000000000000000000000000000000000..9e8c4a79b390e342a86675309b50265ca7258d28
--- /dev/null
+++ b/pytorch3d/website/core/Footer.js
@@ -0,0 +1,91 @@
+/**
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+const PropTypes = require("prop-types");
+const React = require('react');
+
+function SocialFooter(props) {
+  const repoUrl = `https://github.com/${props.config.organizationName}/${props.config.projectName}`;
+  return (
+    <div className="footerSection">
+      <div className="social">
+        <a
+          className="github-button" // part of the https://buttons.github.io/buttons.js script in siteConfig.js
+          href={repoUrl}
+          data-count-href={`${repoUrl}/stargazers`}
+          data-show-count="true"
+          data-count-aria-label="# stargazers on GitHub"
+          aria-label="Star PyTorch3D on GitHub"
+        >
+          {props.config.projectName}
+        </a>
+      </div>
+    </div>
+  );
+}
+
+SocialFooter.propTypes = {
+  config: PropTypes.object
+};
+
+class Footer extends React.Component {
+  docUrl(doc, language) {
+    const baseUrl = this.props.config.baseUrl;
+    const docsUrl = this.props.config.docsUrl;
+    const docsPart = `${docsUrl ? `${docsUrl}/` : ''}`;
+    const langPart = `${language ? `${language}/` : ''}`;
+    return `${baseUrl}${docsPart}${langPart}${doc}`;
+  }
+
+  pageUrl(doc, language) {
+    const baseUrl = this.props.config.baseUrl;
+    return baseUrl + (language ? `${language}/` : '') + doc;
+  }
+
+  render() {
+    const repoUrl = `https://github.com/${this.props.config.organizationName}/${this.props.config.projectName}`;
+    return (
+      <footer className="nav-footer" id="footer">
+        <section className="sitemap">
+          <SocialFooter config={this.props.config} />
+        </section>
+
+        <a
+          href="https://opensource.facebook.com/"
+          target="_blank"
+          rel="noreferrer noopener"
+          className="fbOpenSource">
+          <img
+            src={`${this.props.config.baseUrl}img/oss_logo.png`}
+            alt="Facebook Open Source"
+            width="170"
+            height="45"
+          />
+        </a>
+        <section className="copyright">{this.props.config.copyright}
+          <br/>
+          Legal:
+          <a
+            href="https://opensource.facebook.com/legal/privacy/"
+            target="_blank"
+            rel="noreferrer noopener">
+            Privacy
+          </a>
+          <a
+            href="https://opensource.facebook.com/legal/terms/"
+            target="_blank"
+            rel="noreferrer noopener">
+            Terms
+          </a>
+        </section>
+      </footer>
+    );
+  }
+}
+
+module.exports = Footer;
diff --git a/pytorch3d/website/core/Tutorial.js b/pytorch3d/website/core/Tutorial.js
new file mode 100644
index 0000000000000000000000000000000000000000..866fa6e34b938ae7bd7e2df16a2845887c5bc3fd
--- /dev/null
+++ b/pytorch3d/website/core/Tutorial.js
@@ -0,0 +1,100 @@
+/**
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ *
+ * @format
+ */
+
+const React = require('react');
+
+const fs = require('fs-extra');
+const path = require('path');
+const CWD = process.cwd();
+
+const CompLibrary = require(`${CWD}/node_modules/docusaurus/lib/core/CompLibrary.js`);
+const Container = CompLibrary.Container;
+
+const TutorialSidebar = require(`${CWD}/core/TutorialSidebar.js`);
+
+function renderDownloadIcon() {
+  return (
+    <svg
+      aria-hidden="true"
+      focusable="false"
+      data-prefix="fas"
+      data-icon="file-download"
+      className="svg-inline--fa fa-file-download fa-w-12"
+      role="img"
+      xmlns="http://www.w3.org/2000/svg"
+      viewBox="0 0 384 512">
+      <path
+        fill="currentColor"
+        d="M224 136V0H24C10.7 0 0 10.7 0 24v464c0 13.3 10.7 24 24 24h336c13.3 0 24-10.7 24-24V160H248c-13.2 0-24-10.8-24-24zm76.45 211.36l-96.42 95.7c-6.65 6.61-17.39 6.61-24.04 0l-96.42-95.7C73.42 337.29 80.54 320 94.82 320H160v-80c0-8.84 7.16-16 16-16h32c8.84 0 16 7.16 16 16v80h65.18c14.28 0 21.4 17.29 11.27 27.36zM377 105L279.1 7c-4.5-4.5-10.6-7-17-7H256v128h128v-6.1c0-6.3-2.5-12.4-7-16.9z"
+      />
+    </svg>
+  );
+}
+
+class Tutorial extends React.Component {
+  render() {
+    const {baseUrl, tutorialID} = this.props;
+
+    const htmlFile = `${CWD}/_tutorials/${tutorialID}.html`;
+    const normalizedHtmlFile = path.normalize(htmlFile);
+
+    return (
+      <div className="docMainWrapper wrapper">
+        <TutorialSidebar currentTutorialID={tutorialID} />
+        <Container className="mainContainer">
+          <div className="tutorialButtonsWrapper">
+            <div className="tutorialButtonWrapper buttonWrapper">
+              <a
+                className="tutorialButton button"
+                download
+                href={`https://colab.research.google.com/github/facebookresearch/pytorch3d/blob/stable/docs/tutorials/${tutorialID}.ipynb`}
+                target="_blank">
+                <img
+                  className="colabButton"
+                  align="left"
+                  src={`${baseUrl}img/colab_icon.png`}
+                />
+                {'Run in Google Colab'}
+              </a>
+            </div>
+            <div className="tutorialButtonWrapper buttonWrapper">
+              <a
+                className="tutorialButton button"
+                download
+                href={`${baseUrl}files/${tutorialID}.ipynb`}
+                target="_blank">
+                {renderDownloadIcon()}
+                {'Download Tutorial Jupyter Notebook'}
+              </a>
+            </div>
+            <div className="tutorialButtonWrapper buttonWrapper">
+              <a
+                className="tutorialButton button"
+                download
+                href={`${baseUrl}files/${tutorialID}.py`}
+                target="_blank">
+                {renderDownloadIcon()}
+                {'Download Tutorial Source Code'}
+              </a>
+            </div>
+          </div>
+          <div
+            className="tutorialBody"
+            dangerouslySetInnerHTML={{
+              __html: fs.readFileSync(normalizedHtmlFile, {encoding: 'utf8'}),
+            }}
+          />
+        </Container>
+      </div>
+    );
+  }
+}
+
+module.exports = Tutorial;
diff --git a/pytorch3d/website/core/TutorialSidebar.js b/pytorch3d/website/core/TutorialSidebar.js
new file mode 100644
index 0000000000000000000000000000000000000000..b53c683de9368bcbdfa4bf84a98698fcd65c311d
--- /dev/null
+++ b/pytorch3d/website/core/TutorialSidebar.js
@@ -0,0 +1,93 @@
+/**
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ *
+ * @format
+ */
+
+const React = require('react');
+const fs = require('fs-extra');
+const path = require('path');
+const join = path.join;
+const CWD = process.cwd();
+
+const CompLibrary = require(join(
+  CWD,
+  '/node_modules/docusaurus/lib/core/CompLibrary.js',
+));
+const SideNav = require(join(
+  CWD,
+  '/node_modules/docusaurus/lib/core/nav/SideNav.js',
+));
+
+const Container = CompLibrary.Container;
+
+const OVERVIEW_ID = 'tutorial_overview';
+
+class TutorialSidebar extends React.Component {
+  render() {
+    const {currentTutorialID} = this.props;
+    const current = {
+      id: currentTutorialID || OVERVIEW_ID,
+    };
+
+    const toc = [
+      {
+        type: 'CATEGORY',
+        title: 'Tutorials',
+        children: [
+          {
+            type: 'LINK',
+            item: {
+              permalink: 'tutorials/',
+              id: OVERVIEW_ID,
+              title: 'Overview',
+            },
+          },
+        ],
+      },
+    ];
+
+    const jsonFile = join(CWD, 'tutorials.json');
+    const normJsonFile = path.normalize(jsonFile);
+    const json = JSON.parse(fs.readFileSync(normJsonFile, {encoding: 'utf8'}));
+
+    Object.keys(json).forEach(category => {
+      const categoryItems = json[category];
+      const items = [];
+      categoryItems.map(item => {
+        items.push({
+          type: 'LINK',
+          item: {
+            permalink: `tutorials/${item.id}`,
+            id: item.id,
+            title: item.title,
+          },
+        });
+      });
+
+      toc.push({
+        type: 'CATEGORY',
+        title: category,
+        children: items,
+      });
+    });
+
+    return (
+      <Container className="docsNavContainer" id="docsNav" wrapper={false}>
+        <SideNav
+          language={'tutorials'}
+          root={'tutorials'}
+          title="Tutorials"
+          contents={toc}
+          current={current}
+        />
+      </Container>
+    );
+  }
+}
+
+module.exports = TutorialSidebar;
diff --git a/pytorch3d/website/package.json b/pytorch3d/website/package.json
new file mode 100644
index 0000000000000000000000000000000000000000..1f87392a9d8236e74a6de6a90608d201cf8a44db
--- /dev/null
+++ b/pytorch3d/website/package.json
@@ -0,0 +1,14 @@
+{
+  "scripts": {
+    "examples": "docusaurus-examples",
+    "start": "docusaurus-start",
+    "build": "docusaurus-build",
+    "publish-gh-pages": "docusaurus-publish",
+    "write-translations": "docusaurus-write-translations",
+    "version": "docusaurus-version",
+    "rename-version": "docusaurus-rename-version"
+  },
+  "devDependencies": {
+    "docusaurus": "^1.14.4"
+  }
+}
diff --git a/pytorch3d/website/pages/en/help.js b/pytorch3d/website/pages/en/help.js
new file mode 100644
index 0000000000000000000000000000000000000000..323ba7123f725bf4358b7a3a18879bf7a7052d1a
--- /dev/null
+++ b/pytorch3d/website/pages/en/help.js
@@ -0,0 +1,55 @@
+/**
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+const React = require('react');
+
+const CompLibrary = require('../../core/CompLibrary.js');
+
+const Container = CompLibrary.Container;
+const GridBlock = CompLibrary.GridBlock;
+
+function Help(props) {
+  const {config: siteConfig, language = ''} = props;
+  const {baseUrl, docsUrl} = siteConfig;
+  const docsPart = `${docsUrl ? `${docsUrl}/` : ''}`;
+  const langPart = `${language ? `${language}/` : ''}`;
+  const docUrl = doc => `${baseUrl}${docsPart}${langPart}${doc}`;
+
+  const supportLinks = [
+    {
+      content: `Learn more using the [documentation on this site.](${docUrl(
+        'doc1.html',
+      )})`,
+      title: 'Browse Docs',
+    },
+    {
+      content: 'Ask questions about the documentation and project',
+      title: 'Join the community',
+    },
+    {
+      content: "Find out what's new with this project",
+      title: 'Stay up to date',
+    },
+  ];
+
+  return (
+    <div className="docMainWrapper wrapper">
+      <Container className="mainContainer documentContainer postContainer">
+        <div className="post">
+          <header className="postHeader">
+            <h1>Need help?</h1>
+          </header>
+          <p>This project is maintained by a dedicated group of people.</p>
+          <GridBlock contents={supportLinks} layout="threeColumn" />
+        </div>
+      </Container>
+    </div>
+  );
+}
+
+module.exports = Help;
diff --git a/pytorch3d/website/pages/en/index.js b/pytorch3d/website/pages/en/index.js
new file mode 100644
index 0000000000000000000000000000000000000000..59afb0f6d9bd07c7abe93a8e9ca0bf35e4f70cf8
--- /dev/null
+++ b/pytorch3d/website/pages/en/index.js
@@ -0,0 +1,240 @@
+/**
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+const React = require('react');
+
+const CompLibrary = require('../../core/CompLibrary.js');
+
+const MarkdownBlock = CompLibrary.MarkdownBlock; /* Used to read markdown */
+const Container = CompLibrary.Container;
+const GridBlock = CompLibrary.GridBlock;
+const bash = (...args) => `~~~bash\n${String.raw(...args)}\n~~~`;
+class HomeSplash extends React.Component {
+  render() {
+    const {siteConfig, language = ''} = this.props;
+    const {baseUrl, docsUrl} = siteConfig;
+    const docsPart = `${docsUrl ? `${docsUrl}/` : ''}`;
+    const langPart = `${language ? `${language}/` : ''}`;
+    const docUrl = doc => `${baseUrl}${docsPart}${langPart}${doc}`;
+
+    const SplashContainer = props => (
+      <div className="homeContainer">
+        <div className="homeSplashFade">
+          <div className="wrapper homeWrapper">{props.children}</div>
+        </div>
+      </div>
+    );
+
+    const Logo = props => (
+      <div className="splashLogo">
+        <img src={props.img_src} alt="Project Logo" />
+      </div>
+    );
+
+    const ProjectTitle = props => (
+      <h2 className="projectTitle">
+        <small>{props.tagline}</small>
+      </h2>
+    );
+
+    const PromoSection = props => (
+      <div className="section promoSection">
+        <div className="promoRow">
+          <div className="pluginRowBlock">{props.children}</div>
+        </div>
+      </div>
+    );
+
+    const Button = props => (
+      <div className="pluginWrapper buttonWrapper">
+        <a className="button" href={props.href} target={props.target}>
+          {props.children}
+        </a>
+      </div>
+    );
+
+    return (
+      <SplashContainer>
+        <Logo img_src={baseUrl + 'img/pytorch3dlogowhite.svg'} />
+        <div className="inner">
+          <ProjectTitle tagline={siteConfig.tagline} title={siteConfig.title} />
+          <PromoSection>
+            <Button href={docUrl('why_pytorch3d.html')}>Docs</Button>
+            <Button href={`${baseUrl}tutorials/`}>Tutorials</Button>
+            <Button href={'#quickstart'}>Get Started</Button>
+          </PromoSection>
+        </div>
+      </SplashContainer>
+    );
+  }
+}
+
+function SocialBanner() {
+  return (
+    <div className="socialBanner">
+      <div>
+        Support Ukraine 🇺🇦{' '}
+        <a href="https://opensource.fb.com/support-ukraine">
+          Help Provide Humanitarian Aid to Ukraine
+        </a>
+        .
+      </div>
+    </div>
+  );
+}
+
+class Index extends React.Component {
+  render() {
+    const {config: siteConfig, language = ''} = this.props;
+    const {baseUrl} = siteConfig;
+
+    const Block = props => (
+      <Container
+        padding={['bottom', 'top']}
+        id={props.id}
+        background={props.background}>
+        <GridBlock
+          align="center"
+          contents={props.children}
+          layout={props.layout}
+        />
+      </Container>
+    );
+
+    const Description = () => (
+      <Block background="light">
+        {[
+          {
+            content:
+              'This is another description of how this project is useful',
+            image: `${baseUrl}img/docusaurus.svg`,
+            imageAlign: 'right',
+            title: 'Description',
+          },
+        ]}
+      </Block>
+    );
+
+    const pre = '```';
+
+    const codeExample = `${pre}python
+from pytorch3d.utils import ico_sphere
+from pytorch3d.io import load_obj
+from pytorch3d.structures import Meshes
+from pytorch3d.ops import sample_points_from_meshes
+from pytorch3d.loss import chamfer_distance
+
+# Use an ico_sphere mesh and load a mesh from an .obj e.g. model.obj
+sphere_mesh = ico_sphere(level=3)
+verts, faces, _ = load_obj("model.obj")
+test_mesh = Meshes(verts=[verts], faces=[faces.verts_idx])
+
+# Differentiably sample 5k points from the surface of each mesh and then compute the loss.
+sample_sphere = sample_points_from_meshes(sphere_mesh, 5000)
+sample_test = sample_points_from_meshes(test_mesh, 5000)
+loss_chamfer, _ = chamfer_distance(sample_sphere, sample_test)
+    `;
+
+    const QuickStart = () => (
+      <div
+        className="productShowcaseSection"
+        id="quickstart"
+        style={{textAlign: 'center'}}>
+        <h2>Get Started</h2>
+        <Container>
+          <ol>
+            <li>
+              <strong>Install PyTorch3D  </strong> (following the instructions <a href="https://github.com/facebookresearch/pytorch3d/blob/main/INSTALL.md">here</a>)
+            </li>
+            <li>
+              <strong>Try a few 3D operators  </strong>
+              e.g. compute the chamfer loss between two meshes:
+              <MarkdownBlock>{codeExample}</MarkdownBlock>
+            </li>
+          </ol>
+        </Container>
+      </div>
+    );
+
+    const Features = () => (
+      <div className="productShowcaseSection" style={{textAlign: 'center'}}>
+        <Block layout="fourColumn">
+          {[
+            {
+              content:
+                'Supports batching of 3D inputs of different sizes ' +
+                'such as meshes' ,
+              image: `${baseUrl}img/batching.svg`,
+              imageAlign: 'top',
+              title: 'Heterogeneous Batching',
+            },
+            {
+              content:
+                'Supports optimized implementations of ' +
+                'several common  functions for 3D data',
+              image: `${baseUrl}img/ops.png`,
+              imageAlign: 'top',
+              title: 'Fast 3D Operators',
+            },
+            {
+              content:
+                'Modular differentiable rendering API ' +
+                'with parallel implementations in ' +
+                'PyTorch, C++ and CUDA' ,
+              image: `${baseUrl}img/rendering.svg`,
+              imageAlign: 'top',
+              title: 'Differentiable Rendering',
+            },
+          ]}
+        </Block>
+      </div>
+    );
+
+    const Showcase = () => {
+      if ((siteConfig.users || []).length === 0) {
+        return null;
+      }
+
+      const showcase = siteConfig.users
+        .filter(user => user.pinned)
+        .map(user => (
+          <a href={user.infoLink} key={user.infoLink}>
+            <img src={user.image} alt={user.caption} title={user.caption} />
+          </a>
+        ));
+
+      const pageUrl = page => baseUrl + (language ? `${language}/` : '') + page;
+
+      return (
+        <div className="productShowcaseSection paddingBottom">
+          <h2>Who is Using This?</h2>
+          <p>This project is used by all these people</p>
+          <div className="logos">{showcase}</div>
+          <div className="more-users">
+            <a className="button" href={pageUrl('users.html')}>
+              More {siteConfig.title} Users
+            </a>
+          </div>
+        </div>
+      );
+    };
+
+    return (
+      <div>
+        <SocialBanner />
+        <HomeSplash siteConfig={siteConfig} language={language} />
+        <div className="landingPage mainContainer">
+          <Features />
+          <QuickStart />
+        </div>
+      </div>
+    );
+  }
+}
+
+module.exports = Index;
diff --git a/pytorch3d/website/pages/en/users.js b/pytorch3d/website/pages/en/users.js
new file mode 100644
index 0000000000000000000000000000000000000000..2439c3eefb24bd80a554d547ea0629c741b992ed
--- /dev/null
+++ b/pytorch3d/website/pages/en/users.js
@@ -0,0 +1,49 @@
+/**
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+const React = require('react');
+
+const CompLibrary = require('../../core/CompLibrary.js');
+
+const Container = CompLibrary.Container;
+
+class Users extends React.Component {
+  render() {
+    const {config: siteConfig} = this.props;
+    if ((siteConfig.users || []).length === 0) {
+      return null;
+    }
+
+    const editUrl = `${siteConfig.repoUrl}/edit/main/website/siteConfig.js`;
+    const showcase = siteConfig.users.map(user => (
+      <a href={user.infoLink} key={user.infoLink}>
+        <img src={user.image} alt={user.caption} title={user.caption} />
+      </a>
+    ));
+
+    return (
+      <div className="mainContainer">
+        <Container padding={['bottom', 'top']}>
+          <div className="showcaseSection">
+            <div className="prose">
+              <h1>Who is Using This?</h1>
+              <p>This project is used by many folks</p>
+            </div>
+            <div className="logos">{showcase}</div>
+            <p>Are you using this project?</p>
+            <a href={editUrl} className="button">
+              Add your company
+            </a>
+          </div>
+        </Container>
+      </div>
+    );
+  }
+}
+
+module.exports = Users;
diff --git a/pytorch3d/website/pages/tutorials/index.js b/pytorch3d/website/pages/tutorials/index.js
new file mode 100644
index 0000000000000000000000000000000000000000..1a97c2b343763133d23d7a2eeb59d5d5040b3142
--- /dev/null
+++ b/pytorch3d/website/pages/tutorials/index.js
@@ -0,0 +1,83 @@
+/**
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ *
+ * @format
+ */
+
+const React = require('react');
+
+const CWD = process.cwd();
+
+const CompLibrary = require(`${CWD}/node_modules/docusaurus/lib/core/CompLibrary.js`);
+const Container = CompLibrary.Container;
+const MarkdownBlock = CompLibrary.MarkdownBlock;
+
+const TutorialSidebar = require(`${CWD}/core/TutorialSidebar.js`);
+const bash = (...args) => `~~~bash\n${String.raw(...args)}\n~~~`;
+
+class TutorialHome extends React.Component {
+  render() {
+    return (
+      <div className="docMainWrapper wrapper">
+        <TutorialSidebar currentTutorialID={null} />
+        <Container className="mainContainer documentContainer postContainer">
+          <div className="post">
+            <header className="postHeader">
+              <h1 className="postHeaderTitle">
+                Welcome to the PyTorch3D Tutorials
+              </h1>
+            </header>
+            <p>
+              Here you can learn about the structure and applications of
+              PyTorch3D from examples which are in the form of ipython
+              notebooks.
+            </p>
+            <h3> Run interactively </h3>
+            <p>
+              At the top of each example you can find a button named{' '}
+              <strong>"Run in Google Colab"</strong> which will open the
+              notebook in{' '}
+              <a href="https://colab.research.google.com/notebooks/intro.ipynb">
+                {' '}
+                Google Colaboratory{' '}
+              </a>{' '}
+              where you can run the code directly in the browser with access to
+              GPU support - it looks like this:
+            </p>
+            <div className="tutorialButtonsWrapper">
+              <div className="tutorialButtonWrapper buttonWrapper">
+                <a className="tutorialButton button" target="_blank">
+                  <img
+                    className="colabButton"
+                    align="left"
+                    src="/img/colab_icon.png"
+                  />
+                  {'Run in Google Colab'}
+                </a>
+              </div>
+            </div>
+            <p>
+              {' '}
+              You can modify the code and experiment with varying different
+              settings. Remember to install the latest stable version of
+              PyTorch3D and its dependencies. Code to do this with pip is
+              provided in each notebook.{' '}
+            </p>
+            <h3> Run locally </h3>
+            <p>
+              {' '}
+              There is also a button to download the notebook and source code to
+              run it locally.{' '}
+            </p>
+          </div>
+        </Container>
+      </div>
+    );
+  }
+}
+
+module.exports = TutorialHome;
diff --git a/pytorch3d/website/sidebars.json b/pytorch3d/website/sidebars.json
new file mode 100644
index 0000000000000000000000000000000000000000..92932fbac354d801883a1ade884bed9792f799b5
--- /dev/null
+++ b/pytorch3d/website/sidebars.json
@@ -0,0 +1,9 @@
+{
+  "docs": {
+    "Introduction": ["why_pytorch3d"],
+    "Data": ["io", "meshes_io", "datasets", "batching"],
+    "Ops": ["cubify", "iou3d"],
+    "Visualization": ["visualization"],
+    "Renderer": ["renderer", "renderer_getting_started", "cameras"]
+  }
+}
diff --git a/pytorch3d/website/siteConfig.js b/pytorch3d/website/siteConfig.js
new file mode 100644
index 0000000000000000000000000000000000000000..98e6225d6d92ee49e3a37e936c4481f2571a9092
--- /dev/null
+++ b/pytorch3d/website/siteConfig.js
@@ -0,0 +1,91 @@
+/**
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+// @licenselint-loose-mode
+
+// See https://docusaurus.io/docs/site-config for all the possible
+// site configuration options.
+
+// List of projects/orgs using your project for the users page.
+const users = [
+  {
+    caption: 'User1',
+    // You will need to prepend the image path with your baseUrl
+    // if it is not '/', like: '/test-site/img/image.jpg'.
+    image: '/img/undraw_open_source.svg',
+    infoLink: 'https://www.facebook.com',
+    pinned: true,
+  },
+];
+
+const baseUrl = '/'
+
+const siteConfig = {
+  title: 'PyTorch3D', // Title for your website.
+  tagline: 'A library for deep learning with 3D data',
+  url: 'https://pytorch3d.org', // Your website URL
+  baseUrl: baseUrl, // Base URL for your project */
+  projectName: 'pytorch3d',
+  organizationName: 'facebookresearch',
+  customDocsPath: 'docs/notes',
+  headerLinks: [
+    {doc: 'why_pytorch3d', label: 'Docs'},
+    {page: 'tutorials', label: 'Tutorials'},
+    {href: "https://pytorch3d.readthedocs.io/", label: 'API'},
+    {href: "https://github.com/facebookresearch/pytorch3d", label: 'GitHub'},
+  ],
+
+  // If you have users set above, you add it here:
+  users,
+
+  /* path to images for header/footer */
+  headerIcon: 'img/pytorch3dfavicon.png',
+  footerIcon: 'img/pytorch3dfavicon.png',
+  favicon: 'img/pytorch3dfavicon.png',
+
+  /* Colors for website */
+  colors: {
+    primaryColor: '#812CE5',
+    secondaryColor: '#FFAF00',
+  },
+
+  // This copyright info is used in /core/Footer.js and blog RSS/Atom feeds.
+  copyright: `Copyright \u{00A9} ${new Date().getFullYear()} Meta Platforms, Inc`,
+
+  highlight: {
+    // Highlight.js theme to use for syntax highlighting in code blocks.
+    theme: 'default',
+  },
+
+  // Add custom scripts here that would be placed in <script> tags.
+  scripts: ['https://buttons.github.io/buttons.js'],
+
+  // On page navigation for the current documentation page.
+  onPageNav: 'separate',
+  // No .html extensions for paths.
+  cleanUrl: true,
+
+  // Open Graph and Twitter card images.
+  ogImage: 'img/pytorch3dlogoicon.svg',
+  twitterImage: 'img/pytorch3dlogoicon.svg',
+
+   // Google analytics
+   gaTrackingId: 'UA-157376881-1',
+
+  // For sites with a sizable amount of content, set collapsible to true.
+  // Expand/collapse the links and subcategories under categories.
+  // docsSideNavCollapsible: true,
+
+  // Show documentation's last contributor's name.
+  enableUpdateBy: true,
+
+  // Show documentation's last update time.
+  // enableUpdateTime: true,
+};
+
+module.exports = siteConfig;
diff --git a/pytorch3d/website/static/css/custom.css b/pytorch3d/website/static/css/custom.css
new file mode 100644
index 0000000000000000000000000000000000000000..3aed147d30e204078732b3c0788ef378f36cad56
--- /dev/null
+++ b/pytorch3d/website/static/css/custom.css
@@ -0,0 +1,360 @@
+/**
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+html body {
+  font-family: 'Montserrat', sans-serif;
+  overflow-x: hidden;
+}
+
+.fixedHeaderContainer {
+  background-color: #222222;
+}
+
+
+.fixedHeaderContainer header .headerTitleWithLogo {
+  display: block;
+  color: #ffffff;
+}
+
+.fixedHeaderContainer header .logo {
+  height: 50px;
+}
+
+.fixedHeaderContainer header a:nth-child(2) {
+  position: absolute;
+  right: 0px;
+}
+
+.fixedHeaderContainer header a:nth-child(2) h3 {
+  font-size: 14px;
+}
+
+.fixedHeaderContainer header a:nth-child(2) h3::before {
+  content: 'v: ';
+}
+
+.navigationSlider {
+  margin-right: 80px;
+}
+
+.navigationSlider .slidingNav ul {
+  background: #222222;
+}
+
+.navigationSlider .slidingNav ul li a {
+  color: #c7d4fd;
+}
+
+.navigationSlider .slidingNav ul li a:hover,
+.navigationSlider .slidingNav ul li a:focus {
+  color: #ffffff;
+  background-color: inherit;
+}
+
+.navigationSlider .slidingNav ul li.siteNavItemActive > a,
+.navigationSlider .slidingNav ul li.siteNavGroupActive > a {
+  background-color: inherit;
+}
+
+.homeContainer {
+  background: linear-gradient(
+    rgba(129, 44, 229, 1) 0%,
+    rgba(255, 175, 0, 1) 100%
+  );
+  padding: 25px 0px;
+}
+
+.splashLogo {
+  display: block;
+  margin: 0 auto;
+  width: 65%;
+}
+
+.projectTitle {
+  color: #ffffff;
+  font-variant: small-caps;
+  font-weight: 300;
+}
+
+.promoSection .button {
+  border: 2px solid #fff;
+  color: #ffffff;
+  font-size: 19px;
+  margin: 10px;
+}
+
+.promoSection .button:hover {
+  background: inherit;
+  border: 2px solid #ffffff;
+  color: #ffffff;
+}
+
+.landingPage {
+  padding: 0px;
+}
+
+
+.productShowcaseSection {
+   padding: 45px 20px 30px 20px;
+}
+
+div.productShowcaseSection {
+  color: #6c6c6c;
+  padding-top: 40px;
+}
+
+#quickstart {
+  padding-top: 80px;
+}
+
+.productShowcaseSection > h2 {
+  font-variant: small-caps;
+  font-weight: 360;
+  margin: 0px;
+  padding: 0px;
+  color: #5b1861;
+}
+
+.productShowcaseSection p {
+  font-weight: 360;
+}
+
+# Subtitles for key features
+.productShowcaseSection .blockContent > div span p {
+  font-size: 18px;
+}
+
+.productShowcaseSection div.container {
+  padding-bottom: 40px;
+  padding-top: 10px;
+  padding-left: 0px;
+  padding-right: 0px;
+}
+
+.productShowcaseSection img {
+  height: 100px;
+}
+
+.gridBlock .fourByGridBlock img {
+    max-width: 200%;
+}
+
+.productShowcaseSection li {
+  padding: 10px 0;
+}
+
+.productShowcaseSection pre {
+  margin: 10px 0;
+}
+
+.productShowcaseSection code {
+  background: #fff;
+}
+
+.container .wrapper .alignCenter h2 {
+  color: #222222;
+}
+
+div#quickstart {
+  background: #efefef;
+}
+
+div#quickstart ol {
+  margin-bottom: 0px;
+}
+
+.nav-footer {
+  background-color: #222222;
+}
+
+.nav-footer .sitemap a {
+  color: #c7d4fd;
+}
+
+.nav-footer .sitemap a:hover {
+  color: #ffffff;
+}
+
+.social {
+ text-align: center
+}
+
+a,
+p a {
+  color: #4872f9;
+}
+
+a:hover,
+p a:hover {
+  color: #4872f9;
+}
+
+.imageAlignTop .blockImage {
+    margin-bottom: 20px;
+    max-width: 200px;
+}
+
+/* Style tutorials */
+.tutorialBody {
+  margin-top: -20px;
+  color: #6c6c6c;
+}
+
+.tutorialBody h1 {
+  margin: 0px;
+}
+
+.tutorialBody h1,
+.tutorialBody h2,
+.tutorialBody h3 {
+  color: #222222;
+}
+
+.tutorialBody pre {
+  font-family: 'IBM Plex Mono', monospace;
+  font-size: 14px;
+  margin: 0px;
+}
+
+.tutorialBody .input_prompt,
+.tutorialBody .output_prompt {
+  color: darkred;
+  font-size: 12px;
+}
+
+.tutorialBody .highlight {
+  background: #f3f4f7;
+  padding: 10px 20px;
+  border: lightgray 1px solid;
+  border-radius: 3px;
+}
+
+.tutorialBody .cell {
+  margin: 20px;
+}
+
+.tutorialBody .output_stderr {
+  background-color: #fdede9;
+}
+
+.tutorialBody .anchor-link {
+  color: lightgray;
+}
+
+.tutorialBody iframe {
+  width: 100%;
+  height: 100vh;
+}
+
+.tutorialButtonWrapper,
+.tutorialRuntime {
+  margin: 20px;
+}
+
+.tutorialButtonWrapper {
+  float: left;
+  margin: 5px;
+}
+
+.colabButtonWrapper {
+  float: left;
+  margin: 5px;
+}
+
+.colabButtonWrapper img {
+ padding-right: 0.25em;
+}
+
+.colabButton {
+  width: 24px;
+}
+
+.tutorialButtonsWrapper {
+ display: flex;
+ align-items: center;
+ padding-bottom: 15px;
+}
+
+/* .tutorialButton {
+  color: #4872f9;
+  border: 1px solid #4872f9;
+}
+ */
+.tutorialButton svg {
+  height: 15px;
+  margin-right: 5px;
+}
+
+.tutorialButton:hover {
+  color: #4872f9;
+  background-color: inherit;
+}
+
+.wrapper {
+  max-width: 1400px;
+}
+
+
+@media only screen and (min-device-width: 360px) and (max-device-width: 736px) {
+  .fixedHeaderContainer header a:nth-child(2) {
+    position: absolute;
+    right: 150px;
+  }
+  .promoSection .button {
+    font-size: 12px;
+    margin: 3px;
+  }
+  .inner h2 {
+   margin-top: 0px;
+  }
+  .splashLogo {
+    width: 90%;
+  }
+  .headerTitleWithLogo {
+    display: block !important;
+  }
+  .blockContent > div span p {
+    margin-bottom: 30px
+  }
+  .productShowcaseSection div.container {
+    padding-top: 0px;
+  }
+  .productShowcaseSection > h2 {
+    padding-bottom: 20px;
+  }
+}
+
+@media only screen and (max-width: 1023px) {
+  .fixedHeaderContainer header a:nth-child(2) {
+    position: absolute;
+    right: 200px;
+  }
+}
+
+@media only screen and (min-width: 1024px) {
+}
+
+@media only screen and (min-width: 1400px) {
+}
+
+@media only screen and (min-width: 1500px) {
+}
+
+/* Social Banner */
+.socialBanner {
+  font-weight: bold;
+  font-size: 20px;
+  padding: 20px;
+  max-width: 768px;
+  margin: 0 auto;
+  text-align: center;
+}
+
+.socialBanner a {
+  text-decoration: underline;
+}
diff --git a/pytorch3d/website/static/css/pygments.css b/pytorch3d/website/static/css/pygments.css
new file mode 100644
index 0000000000000000000000000000000000000000..b57ec2516fbc6b3d76d8ba7e16df66333a88743b
--- /dev/null
+++ b/pytorch3d/website/static/css/pygments.css
@@ -0,0 +1,213 @@
+/**
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+.highlight .hll {
+  background-color: #ffffcc;
+}
+.highlight .c {
+  color: #60a0b0;
+  font-style: italic;
+} /* Comment */
+.highlight .err {
+  border: 1px solid #ff0000;
+} /* Error */
+.highlight .k {
+  color: #007020;
+  font-weight: bold;
+} /* Keyword */
+.highlight .o {
+  color: #666666;
+} /* Operator */
+.highlight .cm {
+  color: #60a0b0;
+  font-style: italic;
+} /* Comment.Multiline */
+.highlight .cp {
+  color: #007020;
+} /* Comment.Preproc */
+.highlight .c1 {
+  color: #60a0b0;
+  font-style: italic;
+} /* Comment.Single */
+.highlight .cs {
+  color: #60a0b0;
+  background-color: #fff0f0;
+} /* Comment.Special */
+.highlight .gd {
+  color: #a00000;
+} /* Generic.Deleted */
+.highlight .ge {
+  font-style: italic;
+} /* Generic.Emph */
+.highlight .gr {
+  color: #ff0000;
+} /* Generic.Error */
+.highlight .gh {
+  color: #000080;
+  font-weight: bold;
+} /* Generic.Heading */
+.highlight .gi {
+  color: #00a000;
+} /* Generic.Inserted */
+.highlight .go {
+  color: #808080;
+} /* Generic.Output */
+.highlight .gp {
+  color: #c65d09;
+  font-weight: bold;
+} /* Generic.Prompt */
+.highlight .gs {
+  font-weight: bold;
+} /* Generic.Strong */
+.highlight .gu {
+  color: #800080;
+  font-weight: bold;
+} /* Generic.Subheading */
+.highlight .gt {
+  color: #0040d0;
+} /* Generic.Traceback */
+.highlight .kc {
+  color: #007020;
+  font-weight: bold;
+} /* Keyword.Constant */
+.highlight .kd {
+  color: #007020;
+  font-weight: bold;
+} /* Keyword.Declaration */
+.highlight .kn {
+  color: #007020;
+  font-weight: bold;
+} /* Keyword.Namespace */
+.highlight .kp {
+  color: #007020;
+} /* Keyword.Pseudo */
+.highlight .kr {
+  color: #007020;
+  font-weight: bold;
+} /* Keyword.Reserved */
+.highlight .kt {
+  color: #902000;
+} /* Keyword.Type */
+.highlight .m {
+  color: #40a070;
+} /* Literal.Number */
+.highlight .s {
+  color: #4070a0;
+} /* Literal.String */
+.highlight .na {
+  color: #4070a0;
+} /* Name.Attribute */
+.highlight .nb {
+  color: #007020;
+} /* Name.Builtin */
+.highlight .nc {
+  color: #0e84b5;
+  font-weight: bold;
+} /* Name.Class */
+.highlight .no {
+  color: #60add5;
+} /* Name.Constant */
+.highlight .nd {
+  color: #555555;
+  font-weight: bold;
+} /* Name.Decorator */
+.highlight .ni {
+  color: #d55537;
+  font-weight: bold;
+} /* Name.Entity */
+.highlight .ne {
+  color: #007020;
+} /* Name.Exception */
+.highlight .nf {
+  color: #06287e;
+} /* Name.Function */
+.highlight .nl {
+  color: #002070;
+  font-weight: bold;
+} /* Name.Label */
+.highlight .nn {
+  color: #0e84b5;
+  font-weight: bold;
+} /* Name.Namespace */
+.highlight .nt {
+  color: #062873;
+  font-weight: bold;
+} /* Name.Tag */
+.highlight .nv {
+  color: #bb60d5;
+} /* Name.Variable */
+.highlight .ow {
+  color: #007020;
+  font-weight: bold;
+} /* Operator.Word */
+.highlight .w {
+  color: #bbbbbb;
+} /* Text.Whitespace */
+.highlight .mf {
+  color: #40a070;
+} /* Literal.Number.Float */
+.highlight .mh {
+  color: #40a070;
+} /* Literal.Number.Hex */
+.highlight .mi {
+  color: #40a070;
+} /* Literal.Number.Integer */
+.highlight .mo {
+  color: #40a070;
+} /* Literal.Number.Oct */
+.highlight .sb {
+  color: #4070a0;
+} /* Literal.String.Backtick */
+.highlight .sc {
+  color: #4070a0;
+} /* Literal.String.Char */
+.highlight .sd {
+  color: #4070a0;
+  font-style: italic;
+} /* Literal.String.Doc */
+.highlight .s2 {
+  color: #4070a0;
+} /* Literal.String.Double */
+.highlight .se {
+  color: #4070a0;
+  font-weight: bold;
+} /* Literal.String.Escape */
+.highlight .sh {
+  color: #4070a0;
+} /* Literal.String.Heredoc */
+.highlight .si {
+  color: #70a0d0;
+  font-style: italic;
+} /* Literal.String.Interpol */
+.highlight .sx {
+  color: #c65d09;
+} /* Literal.String.Other */
+.highlight .sr {
+  color: #235388;
+} /* Literal.String.Regex */
+.highlight .s1 {
+  color: #4070a0;
+} /* Literal.String.Single */
+.highlight .ss {
+  color: #517918;
+} /* Literal.String.Symbol */
+.highlight .bp {
+  color: #007020;
+} /* Name.Builtin.Pseudo */
+.highlight .vc {
+  color: #bb60d5;
+} /* Name.Variable.Class */
+.highlight .vg {
+  color: #bb60d5;
+} /* Name.Variable.Global */
+.highlight .vi {
+  color: #bb60d5;
+} /* Name.Variable.Instance */
+.highlight .il {
+  color: #40a070;
+} /* Literal.Number.Integer.Long */
diff --git a/pytorch3d/website/static/img/batching.svg b/pytorch3d/website/static/img/batching.svg
new file mode 100644
index 0000000000000000000000000000000000000000..2c160f97e6524b6eca9d8fad0cb48d5e9f4b977b
--- /dev/null
+++ b/pytorch3d/website/static/img/batching.svg
@@ -0,0 +1,16 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<svg width="110px" height="100px" viewBox="0 0 110 100" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+    <!-- Generator: Sketch 58 (84663) - https://sketch.com -->
+    <title>Group 3</title>
+    <desc>Created with Sketch.</desc>
+    <g id="Page-1" stroke="none" stroke-width="1" fill="none" fill-rule="evenodd">
+        <g id="Artboard" transform="translate(-190.000000, -85.000000)">
+            <g id="Group-3" transform="translate(190.000000, 85.000000)">
+                <rect id="Rectangle" fill="#FF7D1E" x="60" y="3" width="44" height="44"></rect>
+                <ellipse id="Oval" fill="#000000" cx="84.5108694" cy="75.9893615" rx="24.5108694" ry="23.9893615"></ellipse>
+                <polygon id="Triangle" fill="#FFAF00" points="28.5 53 57 98 0 98"></polygon>
+                <polygon id="Polygon" fill="#812CE5" points="26.5099999 0 51.7225081 17.9587837 42.0921869 47.0167063 10.9278129 47.0167063 1.29749175 17.9587837"></polygon>
+            </g>
+        </g>
+    </g>
+</svg>
\ No newline at end of file
diff --git a/pytorch3d/website/static/img/favicon.ico b/pytorch3d/website/static/img/favicon.ico
new file mode 100644
index 0000000000000000000000000000000000000000..56093a9417fe17aaca9d5b54bed35cf975e88be5
Binary files /dev/null and b/pytorch3d/website/static/img/favicon.ico differ
diff --git a/pytorch3d/website/static/img/ops.svg b/pytorch3d/website/static/img/ops.svg
new file mode 100644
index 0000000000000000000000000000000000000000..b934e539df7535774dde3ec26f20a07926d8ff86
--- /dev/null
+++ b/pytorch3d/website/static/img/ops.svg
@@ -0,0 +1,23 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<svg width="176px" height="175px" viewBox="0 0 176 175" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+    <!-- Generator: Sketch 58 (84663) - https://sketch.com -->
+    <title>Group 2</title>
+    <desc>Created with Sketch.</desc>
+    <g id="Page-1" stroke="none" stroke-width="1" fill="none" fill-rule="evenodd">
+        <g id="Artboard" transform="translate(-413.000000, -451.000000)">
+            <g id="Group-2" transform="translate(422.000000, 456.000000)">
+                <g id="Group-4" transform="translate(39.000000, 64.499932)" fill="#FFFFFF" stroke="#812CE5" stroke-linecap="round" stroke-linejoin="round" stroke-width="8">
+                    <path d="M0,22 L20,0" id="Line-4"></path>
+                    <path d="M0,23 L22,42" id="Line-4"></path>
+                </g>
+                <g id="Group-4" transform="translate(105.000000, 85.499932) scale(-1, 1) translate(-105.000000, -85.499932) translate(94.000000, 64.499932)" fill="#FFFFFF" stroke="#812CE5" stroke-linecap="round" stroke-linejoin="round" stroke-width="8">
+                    <path d="M0,22 L20,0" id="Line-4"></path>
+                    <path d="M0,23 L22,42" id="Line-4"></path>
+                </g>
+                <path d="M72,106.499932 L83,65.4999323" id="Line-4" stroke="#812CE5" stroke-width="8" fill="#FFFFFF" stroke-linecap="round" stroke-linejoin="round"></path>
+                <path id="Line-9" d="M74.4859874,86.9999949 L75.5383535,87.0032598 C103.773173,87.2063259 125.510981,96.896589 140.284811,116.02653 L159.026859,108.8732 C160.688876,108.238748 162.550533,109.071754 163.184985,110.733771 C163.363327,111.200957 163.430447,111.70325 163.381029,112.200872 L163.381029,112.200872 L158.126597,165.110922 C157.950791,166.881211 156.37317,168.173795 154.602881,167.99799 C153.90933,167.929114 153.256827,167.637149 152.743311,167.165918 L152.743311,167.165918 L113.56793,131.216342 C112.257182,130.013524 112.169687,127.975877 113.372505,126.665129 C113.710612,126.296683 114.12986,126.012017 114.597046,125.833674 L114.597046,125.833674 L130.458207,119.778029 C117.558504,104.578587 99.0874369,97 74.6296296,97 L74.6296296,97 L73.0820878,97.004911 C66.4718147,97.0540006 62.2741863,97.4876879 56.5279179,99.0603259 C47.7855392,101.452939 38.689293,106.123227 28.9324354,113.829564 C14.5080834,125.222466 6.51709607,142.264661 4.98906448,165.330508 C4.80652993,168.085892 2.42487619,170.171599 -0.330508042,169.989064 C-3.08589227,169.80653 -5.17159904,167.424876 -4.98906448,164.669492 C-3.28379511,138.928216 5.94743978,119.240974 22.7342313,105.982127 C33.5277551,97.4569916 43.8289935,92.1680242 53.888193,89.4150238 C60.6332478,87.5690381 65.5743724,87.0613052 72.9723676,87.0054117 L72.9723676,87.0054117 L74.4859874,86.9999949 Z" fill="#FFAF00" fill-rule="nonzero"></path>
+                <path id="Line-9" d="M76.486002,-5.0000054 L77.5660101,-4.99635656 C106.765823,-4.76855288 129.01092,6.14121529 143.768839,27.5954736 L162.584107,21.0066878 C164.263104,20.4186391 166.100907,21.3030272 166.688956,22.9820238 C166.854254,23.4539832 166.907398,23.9579467 166.844178,24.4540036 L166.844178,24.4540036 L160.12231,77.1977136 C159.897407,78.9624371 158.284495,80.210708 156.519772,79.9858046 C155.828401,79.8976935 155.184258,79.5877189 154.684028,79.1024075 L154.684028,79.1024075 L116.522181,42.0786869 C115.245344,40.8399301 115.214474,38.8006385 116.453231,37.5238017 C116.801441,37.1648878 117.228433,36.8919753 117.700392,36.7266771 L117.700392,36.7266771 L133.937253,31.0392764 C120.964757,13.6380026 102.023945,5 76.6296296,5 L76.6296296,5 L75.1288112,5.004828 C68.513673,5.05473503 64.3301462,5.50847659 58.5948099,7.16413065 C49.8731212,9.68187305 40.7881482,14.601962 31.0337645,22.7285034 C16.5631727,34.784197 8.52566138,52.8648677 6.99016787,77.3134081 C6.81707736,80.0694017 4.44258552,82.1632584 1.68659191,81.9901679 C-1.06940171,81.8170774 -3.16325838,79.4425855 -2.99016787,76.6865919 C-1.29236043,49.6536455 7.89235048,28.9923081 24.6329022,15.0454726 C35.4288998,6.05114514 45.7414116,0.466266085 55.821301,-2.44355631 C62.5896722,-4.39742276 67.5506001,-4.93507734 74.9684167,-4.99426877 L74.9684167,-4.99426877 L76.486002,-5.0000054 Z" fill="#FFAF00" fill-rule="nonzero"></path>
+            </g>
+        </g>
+    </g>
+</svg>
\ No newline at end of file
diff --git a/pytorch3d/website/static/img/pytorch3dicon.svg b/pytorch3d/website/static/img/pytorch3dicon.svg
new file mode 100644
index 0000000000000000000000000000000000000000..e6927b791d546373528f6895362f48721f34e806
--- /dev/null
+++ b/pytorch3d/website/static/img/pytorch3dicon.svg
@@ -0,0 +1 @@
+<svg id="CrypTen_Symbol_Logos" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 300 300"><defs><style>.cls-1{fill:#fff;}</style></defs><title>PyTorch3D_Identity_Symbol</title><path class="cls-1" d="M180.57,50H61.7V250H180.57L238.3,150ZM162.84,233.91H99.77l63.07-36.41Zm-85-5.89V72l85.05,49.11v57.82ZM162.84,102.5,99.77,66.09h63.07Zm16.09,27.88,34,19.62-34,19.62ZM207,128l-28.1-16.23V79.35Zm-28.1,92.63V188.21L207,172Z"/></svg>
\ No newline at end of file
diff --git a/pytorch3d/website/static/img/pytorch3dlogo.svg b/pytorch3d/website/static/img/pytorch3dlogo.svg
new file mode 100644
index 0000000000000000000000000000000000000000..b914e1b28a3b173c4e492985e7638e34c376c2ff
--- /dev/null
+++ b/pytorch3d/website/static/img/pytorch3dlogo.svg
@@ -0,0 +1 @@
+<svg id="CrypTen_Horizontal_Logos" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 0 1300 330"><defs><style>.cls-1{fill:#ffb000;}.cls-2{fill:url(#linear-gradient);}.cls-3{fill:url(#linear-gradient-2);}</style><linearGradient id="linear-gradient" x1="130.95" y1="257.01" x2="198.97" y2="70.13" gradientUnits="userSpaceOnUse"><stop offset="0.28" stop-color="#ffb000"/><stop offset="0.75" stop-color="#812ce5"/></linearGradient><linearGradient id="linear-gradient-2" x1="173.52" y1="233.48" x2="226.58" y2="87.7" gradientUnits="userSpaceOnUse"><stop offset="0.2" stop-color="#ffb000"/><stop offset="0.85" stop-color="#812ce5"/></linearGradient></defs><title>PyTorch3D_Identity_Horizontal_Lockup</title><path d="M336.72,184.41l-18.53.22v48h-14V96.34h34.23c36,0,53.19,17.44,53.19,42.51C391.65,168.71,370.51,184,336.72,184.41ZM338,109.2H318.19v62.56l19.4-.43c25.72-.44,39.67-10.68,39.67-31.83C377.26,120.1,363.53,109.2,338,109.2Z"/><path d="M452.45,231.5l-8.07,21.58c-9.15,24.42-18.74,31.39-32.26,31.39-7.63,0-13.3-2-19.4-4.58l4.14-12.42c4.8,2.61,9.81,4.36,15.26,4.36,7.63,0,13.3-4.14,20.49-23.11l6.76-17.66-39-98.32H415l31.4,82.41,30.74-82.41h14.17Z"/><path d="M533.87,109.42V232.59h-14V109.42H472V96.34H581.83v13.08Z"/><path d="M617.76,235.42c-27.69,0-48.18-20.49-48.18-52.54,0-31.83,21.36-53,49.27-53s48,20.49,48,52.54C666.81,214.28,645.44,235.42,617.76,235.42Zm.43-93.3c-21.14,0-35.1,16.78-35.1,40.33,0,24.41,14.17,40.76,35.32,40.76s35.1-16.78,35.1-40.33C653.51,158.47,639.34,142.12,618.19,142.12Z"/><path d="M698.48,232.59H685V132.74l13.52-2.83v21.36c6.54-12.86,16.35-21.36,29.43-21.36a36.22,36.22,0,0,1,17.44,4.58l-3.49,12.64a29.68,29.68,0,0,0-15.26-4.36c-10.46,0-20.27,7.85-28.12,25.73Z"/><path d="M796.18,235.42c-30.08,0-48.83-21.58-48.83-52.54,0-31.17,20.71-53,49-53,12.21,0,22.67,3.05,31.18,8.5l-3.49,12.21a50.39,50.39,0,0,0-27.69-8.07c-21.58,0-35.1,16.13-35.1,39.9,0,24.41,14.17,40.33,35.32,40.33a51.66,51.66,0,0,0,27.69-8.29l2.83,12.43A59.72,59.72,0,0,1,796.18,235.42Z"/><path d="M908.65,232.59V168.06c0-17.44-7-25.29-21.37-25.29-11.55,0-22.67,5.89-30.95,14.17v75.65H842.81V85.65l13.52-2.83v63c10.46-10.46,23.54-15.91,34.66-15.91,19.4,0,31.18,12.42,31.18,34.22v68.46Z"/><path class="cls-1" d="M1023.12,194.44c0,25.72-21.8,41-48,41-12.65,0-27-3.92-36-8.94l3.27-12.2A70,70,0,0,0,975.59,223c17.88,0,33.79-9.6,33.79-27.47,0-15-12.42-25.51-32.91-25.51a113.19,113.19,0,0,0-18.75,1.74V160.43c29.86-7.63,46.87-17.22,46.87-33.14,0-13.51-10.68-21.36-26.82-21.36-11.33,0-24.41,5.45-33.35,13.51l-5-11.77C949,99.82,963.82,93.5,978,93.5c24.2,0,40.33,12.65,40.33,32.7,0,16.13-12,27.69-35.53,34.88C1009.17,160.43,1023.12,176.13,1023.12,194.44Z"/><path class="cls-1" d="M1082.33,232.59h-36V96.34h37.06c44.69,0,72.16,27,72.16,67.14C1155.58,206.43,1128.76,232.59,1082.33,232.59Zm.22-123.39h-22.24V219.73H1083c34.88,0,58.64-17.88,58.64-55.16C1141.62,129.91,1119.82,109.2,1082.55,109.2Z"/><path class="cls-2" d="M208.48,73.59H105.06v174H208.48l50.23-87Zm28.14,87L119.06,228.46V92.71Zm-5.12-19.13L138.18,87.59h62.21Zm-31.11,92.13H138.18l93.32-53.88Z"/><rect class="cls-3" x="193.05" y="80.59" width="14" height="160"/></svg>
\ No newline at end of file
diff --git a/pytorch3d/website/static/img/pytorch3dlogowhite.svg b/pytorch3d/website/static/img/pytorch3dlogowhite.svg
new file mode 100644
index 0000000000000000000000000000000000000000..f16b3dfbb63934dfa45c42556efb250e5ca30ad2
--- /dev/null
+++ b/pytorch3d/website/static/img/pytorch3dlogowhite.svg
@@ -0,0 +1 @@
+<svg id="CrypTen_Horizontal_Logos" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 1300 330"><defs><style>.cls-1{fill:#fff;}</style></defs><title>PyTorch3D_Identity_Horizontal_Lockup</title><path class="cls-1" d="M336.72,184.41l-18.53.22v48h-14V96.34h34.23c36,0,53.19,17.44,53.19,42.51C391.65,168.71,370.51,184,336.72,184.41ZM338,109.2H318.19v62.56l19.4-.43c25.72-.44,39.67-10.68,39.67-31.83C377.26,120.1,363.53,109.2,338,109.2Z"/><path class="cls-1" d="M452.45,231.5l-8.07,21.58c-9.15,24.42-18.74,31.39-32.26,31.39-7.63,0-13.3-2-19.4-4.58l4.14-12.42c4.8,2.61,9.81,4.36,15.26,4.36,7.63,0,13.3-4.14,20.49-23.11l6.76-17.66-39-98.32H415l31.4,82.41,30.74-82.41h14.17Z"/><path class="cls-1" d="M533.87,109.42V232.59h-14V109.42H472V96.34H581.83v13.08Z"/><path class="cls-1" d="M617.76,235.42c-27.69,0-48.18-20.49-48.18-52.54,0-31.83,21.36-53,49.27-53s48,20.49,48,52.54C666.81,214.28,645.44,235.42,617.76,235.42Zm.43-93.3c-21.14,0-35.1,16.78-35.1,40.33,0,24.41,14.17,40.76,35.32,40.76s35.1-16.78,35.1-40.33C653.51,158.47,639.34,142.12,618.19,142.12Z"/><path class="cls-1" d="M698.48,232.59H685V132.74l13.52-2.83v21.36c6.54-12.86,16.35-21.36,29.43-21.36a36.22,36.22,0,0,1,17.44,4.58l-3.49,12.64a29.68,29.68,0,0,0-15.26-4.36c-10.46,0-20.27,7.85-28.12,25.73Z"/><path class="cls-1" d="M796.18,235.42c-30.08,0-48.83-21.58-48.83-52.54,0-31.17,20.71-53,49-53,12.21,0,22.67,3.05,31.18,8.5l-3.49,12.21a50.39,50.39,0,0,0-27.69-8.07c-21.58,0-35.1,16.13-35.1,39.9,0,24.41,14.17,40.33,35.32,40.33a51.66,51.66,0,0,0,27.69-8.29l2.83,12.43A59.72,59.72,0,0,1,796.18,235.42Z"/><path class="cls-1" d="M908.65,232.59V168.06c0-17.44-7-25.29-21.37-25.29-11.55,0-22.67,5.89-30.95,14.17v75.65H842.81V85.65l13.52-2.83v63c10.46-10.46,23.54-15.91,34.66-15.91,19.4,0,31.18,12.42,31.18,34.22v68.46Z"/><path class="cls-1" d="M1023.12,194.44c0,25.72-21.8,41-48,41-12.65,0-27-3.92-36-8.94l3.27-12.2A70,70,0,0,0,975.59,223c17.88,0,33.79-9.6,33.79-27.47,0-15-12.42-25.51-32.91-25.51a113.19,113.19,0,0,0-18.75,1.74V160.43c29.86-7.63,46.87-17.22,46.87-33.14,0-13.51-10.68-21.36-26.82-21.36-11.33,0-24.41,5.45-33.35,13.51l-5-11.77C949,99.82,963.82,93.5,978,93.5c24.2,0,40.33,12.65,40.33,32.7,0,16.13-12,27.69-35.53,34.88C1009.17,160.43,1023.12,176.13,1023.12,194.44Z"/><path class="cls-1" d="M1082.33,232.59h-36V96.34h37.06c44.69,0,72.16,27,72.16,67.14C1155.58,206.43,1128.76,232.59,1082.33,232.59Zm.22-123.39h-22.24V219.73H1083c34.88,0,58.64-17.88,58.64-55.16C1141.62,129.91,1119.82,109.2,1082.55,109.2Z"/><path class="cls-1" d="M208.48,73.59H105.06v174H208.48l50.23-87Zm23,67.87-24.45-14.11V99.12Zm5.12,19.13-29.57,17.07V143.52Zm-43.57-73v31.68L138.18,87.59Zm-74,5.12,74,42.72v50.31l-74,42.72Zm74,109.2v31.68H138.18Zm14,20.14V193.83l24.45-14.12Z"/></svg>
\ No newline at end of file
diff --git a/pytorch3d/website/static/img/rendering.svg b/pytorch3d/website/static/img/rendering.svg
new file mode 100644
index 0000000000000000000000000000000000000000..30605d8a246a35f9e58cc180f19ca6d22b97dc58
--- /dev/null
+++ b/pytorch3d/website/static/img/rendering.svg
@@ -0,0 +1,19 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<svg width="185px" height="127px" viewBox="0 0 185 127" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+    <!-- Generator: Sketch 58 (84663) - https://sketch.com -->
+    <title>Group 4</title>
+    <desc>Created with Sketch.</desc>
+    <g id="Page-1" stroke="none" stroke-width="1" fill="none" fill-rule="evenodd">
+        <g id="Artboard" transform="translate(-85.000000, -519.000000)">
+            <g id="Group-4" transform="translate(89.000000, 519.000000)">
+                <rect id="Rectangle" fill="#812CE5" x="2" y="0" width="175" height="127" rx="9"></rect>
+                <circle id="Oval" stroke="#FFFFFF" stroke-width="5" fill="#812CE5" cx="148" cy="31" r="14"></circle>
+                <path d="M0,86 L45,31" id="Line-4" stroke="#FFFFFF" stroke-width="8" fill="#FFFFFF" stroke-linecap="round" stroke-linejoin="round"></path>
+                <path d="M70,93 L45,31" id="Line-4" stroke="#FFFFFF" stroke-width="8" fill="#FFFFFF" stroke-linecap="round" stroke-linejoin="round"></path>
+                <path d="M70,93 L96,67" id="Line-4" stroke="#FFFFFF" stroke-width="8" fill="#FFFFFF" stroke-linecap="round" stroke-linejoin="round"></path>
+                <path d="M128.467742,94 L96.5322581,67" id="Line-4" stroke="#FFFFFF" stroke-width="8" fill="#FFFFFF" stroke-linecap="round" stroke-linejoin="round"></path>
+                <path d="M128.5,94 L177,75" id="Line-4" stroke="#FFFFFF" stroke-width="8" fill="#FFFFFF" stroke-linecap="round" stroke-linejoin="round"></path>
+            </g>
+        </g>
+    </g>
+</svg>
\ No newline at end of file
diff --git a/pytorch3d/website/tutorials.json b/pytorch3d/website/tutorials.json
new file mode 100644
index 0000000000000000000000000000000000000000..e4345c459723bc8bdbf2f830e1000a7a312fc876
--- /dev/null
+++ b/pytorch3d/website/tutorials.json
@@ -0,0 +1,50 @@
+{
+   "3D operators": [
+      {
+       "id": "deform_source_mesh_to_target_mesh",
+       "title": "Fit Mesh"
+      },{
+       "id": "bundle_adjustment",
+       "title": "Bundle Adjustment"
+      }
+   ],
+   "Rendering": [
+      {
+       "id": "render_textured_meshes",
+       "title": "Render Textured Meshes"
+      },{
+       "id": "render_densepose",
+       "title": "Render DensePose Meshes"
+      }, {
+       "id": "render_colored_points",
+       "title": "Render Colored Pointclouds"
+      },{
+       "id": "fit_textured_mesh",
+       "title": "Fit a Mesh with Texture via Rendering"
+      }, {
+       "id": "camera_position_optimization_with_differentiable_rendering",
+       "title": "Camera Position Optimization with Differentiable Rendering"
+       },{
+       "id": "fit_textured_volume",
+       "title": "Fit a volume via raymarching"
+       },{
+       "id": "fit_simple_neural_radiance_field",
+       "title": "Fit a simplified NeRF via raymarching"
+      }
+   ],
+   "Dataloaders": [
+       {
+          "id": "dataloaders_ShapeNetCore_R2N2",
+          "title": "Data loaders for ShapeNetCore and R2N2"
+       }
+    ],
+    "Implicitron": [
+      {
+         "id": "implicitron_volumes",
+         "title": "Training a custom volumes function with implicitron"
+      }, {
+         "id": "implicitron_config_system",
+         "title": "Implicitron config system deep dive"
+      }
+   ]
+}
diff --git a/utils/audio/__init__.py b/utils/audio/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..6a0738f53cf37654bfeeae2d4ed7d990fa6a9cff
--- /dev/null
+++ b/utils/audio/__init__.py
@@ -0,0 +1,198 @@
+import librosa
+import numpy as np
+import pyloudnorm as pyln
+import torch
+from scipy.signal import get_window
+
+from utils.audio.dct import dct
+from utils.audio.vad import trim_long_silences
+
+
+def librosa_pad_lr(x, fsize, fshift, pad_sides=1):
+    '''compute right padding (final frame) or both sides padding (first and final frames)
+    '''
+    assert pad_sides in (1, 2)
+    # return int(fsize // 2)
+    pad = (x.shape[0] // fshift + 1) * fshift - x.shape[0]
+    if pad_sides == 1:
+        return 0, pad
+    else:
+        return pad // 2, pad // 2 + pad % 2
+
+
+def amp_to_db(x):
+    return 20 * np.log10(np.maximum(1e-5, x))
+
+
+def db_to_amp(x):
+    return 10.0 ** (x * 0.05)
+
+
+def normalize(S, min_level_db):
+    return (S - min_level_db) / -min_level_db
+
+
+def denormalize(D, min_level_db):
+    return (D * -min_level_db) + min_level_db
+
+
+def librosa_wav2spec(wav_path,
+                     fft_size=None,
+                     hop_size=256,
+                     win_length=1024,
+                     window="hann",
+                     num_mels=80,
+                     fmin=80,
+                     fmax=-1,
+                     eps=1e-6,
+                     sample_rate=22050,
+                     loud_norm=False,
+                     trim_long_sil=False,
+                     center=True):
+    if isinstance(wav_path, str):
+        if trim_long_sil:
+            wav, _, _ = trim_long_silences(wav_path, sample_rate)
+        else:
+            wav, _ = librosa.core.load(wav_path, sr=sample_rate)
+    else:
+        wav = wav_path
+    if fft_size is None:
+        fft_size = win_length
+    if loud_norm:
+        meter = pyln.Meter(sample_rate)  # create BS.1770 meter
+        loudness = meter.integrated_loudness(wav)
+        wav = pyln.normalize.loudness(wav, loudness, -16.0)
+        if np.abs(wav).max() > 1:
+            wav = wav / np.abs(wav).max()
+
+    # get amplitude spectrogram
+    x_stft = librosa.stft(wav, n_fft=fft_size, hop_length=hop_size,
+                          win_length=win_length, window=window, center=center)
+    linear_spc = np.abs(x_stft)  # (n_bins, T)
+
+    # get mel basis
+    fmin = 0 if fmin == -1 else fmin
+    fmax = sample_rate / 2 if fmax == -1 else fmax
+    mel_basis = librosa.filters.mel(sr=sample_rate, n_fft=fft_size, n_mels=num_mels, fmin=fmin, fmax=fmax)
+
+    # calculate mel spec
+    mel = mel_basis @ linear_spc
+    mel = np.log10(np.maximum(eps, mel))  # (n_mel_bins, T)
+    if center:
+        l_pad, r_pad = librosa_pad_lr(wav, fft_size, hop_size, 1)
+        wav = np.pad(wav, (l_pad, r_pad), mode='constant', constant_values=0.0)
+        wav = wav[:mel.shape[1] * hop_size]
+
+    # log linear spec
+    linear_spc = np.log10(np.maximum(eps, linear_spc))
+    return {'wav': wav, 'mel': mel.T, 'linear': linear_spc.T, 'mel_basis': mel_basis}
+
+
+def librosa_wav2mfcc(wav_path,
+                     fft_size=None,
+                     hop_size=256,
+                     win_length=1024,
+                     window="hann",
+                     num_mels=80,
+                     fmin=80,
+                     fmax=-1,
+                     sample_rate=22050,
+                     center=True):
+    if isinstance(wav_path, str):
+        wav, _ = librosa.core.load(wav_path, sr=sample_rate)
+    else:
+        wav = wav_path
+    mfcc = librosa.feature.mfcc(y=wav, sr=sample_rate, n_mfcc=13,
+                                n_fft=fft_size, n_mels=num_mels, fmin=fmin, fmax=fmax,
+                                hop_length=hop_size,
+                                win_length=win_length, window=window, center=center)
+    return mfcc.T
+
+
+def torch_wav2spec(wav,
+                   mel_basis,
+                   fft_size=1024,
+                   hop_size=256,
+                   win_length=1024,
+                   eps=1e-6):
+    fft_window = get_window('hann', win_length, fftbins=True)
+    fft_window = torch.FloatTensor(fft_window).to(wav.device)
+    mel_basis = torch.FloatTensor(mel_basis).to(wav.device)
+    x_stft = torch.stft(wav, fft_size, hop_size, win_length, fft_window,
+                        center=False, pad_mode='constant', normalized=False, onesided=True, return_complex=True)
+    linear_spc = torch.abs(x_stft)
+    mel = mel_basis @ linear_spc
+    mel = torch.log10(torch.clamp_min(mel, eps))  # (n_mel_bins, T)
+    return mel.transpose(1, 2)
+
+
+def mel2mfcc_torch(mel, n_coef=13):
+    return dct(mel, norm='ortho')[:, :, :n_coef]
+
+
+def librosa_wav2linearspec(wav_path,
+                     fft_size=None,
+                     hop_size=256,
+                     win_length=1024,
+                     window="hann",
+                     num_mels=80,
+                     fmin=80,
+                     fmax=-1,
+                     eps=1e-6,
+                     sample_rate=22050,
+                     loud_norm=False,
+                     trim_long_sil=False,
+                     center=True):
+    if isinstance(wav_path, str):
+        if trim_long_sil:
+            wav, _, _ = trim_long_silences(wav_path, sample_rate)
+        else:
+            wav, _ = librosa.core.load(wav_path, sr=sample_rate)
+    else:
+        wav = wav_path
+    if fft_size is None:
+        fft_size = win_length
+    if loud_norm:
+        meter = pyln.Meter(sample_rate)  # create BS.1770 meter
+        loudness = meter.integrated_loudness(wav)
+        wav = pyln.normalize.loudness(wav, loudness, -16.0)
+        if np.abs(wav).max() > 1:
+            wav = wav / np.abs(wav).max()
+
+    # get amplitude spectrogram
+    x_stft = librosa.stft(wav, n_fft=fft_size, hop_length=hop_size,
+                          win_length=win_length, window=window, center=center)
+    linear_spc = np.abs(x_stft)  # (n_bins, T)
+
+    # pad wav
+    if center:
+        l_pad, r_pad = librosa_pad_lr(wav, fft_size, hop_size, 1)
+        wav = np.pad(wav, (l_pad, r_pad), mode='constant', constant_values=0.0)
+        wav = wav[:linear_spc.shape[1] * hop_size]
+
+    # log linear spec
+    linear_spc = np.log10(np.maximum(eps, linear_spc))
+    return {'wav': wav, 'linear': linear_spc.T}
+
+
+def librosa_linear2mel(linear_spec, hparams, num_mels=160, eps=1e-6):
+    
+    fft_size=hparams['fft_size']
+    hop_size=hparams['hop_size']
+    win_length=hparams['win_size']
+    fmin=hparams['fmin']
+    fmax=hparams['fmax']
+    sample_rate=hparams['audio_sample_rate']
+
+    # get mel basis
+    fmin = 0 if fmin == -1 else fmin
+    fmax = sample_rate / 2 if fmax == -1 else fmax
+    mel_basis = librosa.filters.mel(sample_rate, fft_size, num_mels, fmin, fmax)
+    mel_basis = torch.FloatTensor(mel_basis).to(linear_spec.device)[None, :].repeat(linear_spec.shape[0], 1, 1)
+
+    # perform linear spec to mel spec
+    linear_spec = torch.pow(10, linear_spec)
+    mel = torch.bmm(mel_basis, linear_spec.transpose(1, 2))
+    mel = torch.log10(torch.clamp_min(mel, eps))  # (n_mel_bins, T)
+    return mel.transpose(1, 2)
+
diff --git a/utils/audio/align.py b/utils/audio/align.py
new file mode 100644
index 0000000000000000000000000000000000000000..096e593f01b51ee2d9f565666cfa8c40a90d76f1
--- /dev/null
+++ b/utils/audio/align.py
@@ -0,0 +1,90 @@
+import re
+
+import torch
+import numpy as np
+from textgrid import TextGrid
+
+from utils.text.text_encoder import is_sil_phoneme
+
+
+def get_mel2ph(tg_fn, ph, mel, hop_size, audio_sample_rate, min_sil_duration=0):
+    ph_list = ph.split(" ")
+    itvs = TextGrid.fromFile(tg_fn)[1]
+    itvs_ = []
+    for i in range(len(itvs)):
+        if itvs[i].maxTime - itvs[i].minTime < min_sil_duration and i > 0 and is_sil_phoneme(itvs[i].mark):
+            itvs_[-1].maxTime = itvs[i].maxTime
+        else:
+            itvs_.append(itvs[i])
+    itvs.intervals = itvs_
+    itv_marks = [itv.mark for itv in itvs]
+    tg_len = len([x for x in itvs if not is_sil_phoneme(x.mark)])
+    ph_len = len([x for x in ph_list if not is_sil_phoneme(x)])
+    assert tg_len == ph_len, (tg_len, ph_len, itv_marks, ph_list, tg_fn)
+    mel2ph = np.zeros([mel.shape[0]], int)
+    i_itv = 0
+    i_ph = 0
+    while i_itv < len(itvs):
+        itv = itvs[i_itv]
+        ph = ph_list[i_ph]
+        itv_ph = itv.mark
+        start_frame = int(itv.minTime * audio_sample_rate / hop_size + 0.5)
+        end_frame = int(itv.maxTime * audio_sample_rate / hop_size + 0.5)
+        if is_sil_phoneme(itv_ph) and not is_sil_phoneme(ph):
+            mel2ph[start_frame:end_frame] = i_ph
+            i_itv += 1
+        elif not is_sil_phoneme(itv_ph) and is_sil_phoneme(ph):
+            i_ph += 1
+        else:
+            if not ((is_sil_phoneme(itv_ph) and is_sil_phoneme(ph)) \
+                    or re.sub(r'\d+', '', itv_ph.lower()) == re.sub(r'\d+', '', ph.lower())):
+                print(f"| WARN: {tg_fn} phs are not same: ", itv_ph, ph, itv_marks, ph_list)
+            mel2ph[start_frame:end_frame] = i_ph + 1
+            i_ph += 1
+            i_itv += 1
+    mel2ph[-1] = mel2ph[-2]
+    assert not np.any(mel2ph == 0)
+    T_t = len(ph_list)
+    dur = mel2token_to_dur(mel2ph, T_t)
+    return mel2ph.tolist(), dur.tolist()
+
+
+def split_audio_by_mel2ph(audio, mel2ph, hop_size, audio_num_mel_bins):
+    if isinstance(audio, torch.Tensor):
+        audio = audio.numpy()
+    if isinstance(mel2ph, torch.Tensor):
+        mel2ph = mel2ph.numpy()
+    assert len(audio.shape) == 1, len(mel2ph.shape) == 1
+    split_locs = []
+    for i in range(1, len(mel2ph)):
+        if mel2ph[i] != mel2ph[i - 1]:
+            split_loc = i * hop_size
+            split_locs.append(split_loc)
+
+    new_audio = []
+    for i in range(len(split_locs) - 1):
+        new_audio.append(audio[split_locs[i]:split_locs[i + 1]])
+        new_audio.append(np.zeros([0.5 * audio_num_mel_bins]))
+    return np.concatenate(new_audio)
+
+
+def mel2token_to_dur(mel2token, T_txt=None, max_dur=None):
+    is_torch = isinstance(mel2token, torch.Tensor)
+    has_batch_dim = True
+    if not is_torch:
+        mel2token = torch.LongTensor(mel2token)
+    if T_txt is None:
+        T_txt = mel2token.max()
+    if len(mel2token.shape) == 1:
+        mel2token = mel2token[None, ...]
+        has_batch_dim = False
+    B, _ = mel2token.shape
+    dur = mel2token.new_zeros(B, T_txt + 1).scatter_add(1, mel2token, torch.ones_like(mel2token))
+    dur = dur[:, 1:]
+    if max_dur is not None:
+        dur = dur.clamp(max=max_dur)
+    if not is_torch:
+        dur = dur.numpy()
+    if not has_batch_dim:
+        dur = dur[0]
+    return dur
diff --git a/utils/audio/dct.py b/utils/audio/dct.py
new file mode 100644
index 0000000000000000000000000000000000000000..c37a2ea9eb817c66a241572d29dd8e81fccce5c0
--- /dev/null
+++ b/utils/audio/dct.py
@@ -0,0 +1,58 @@
+import numpy as np
+import torch
+
+
+def dct(x, norm=None):
+    x_shape = x.shape
+    N = x_shape[-1]
+    x = x.contiguous().view(-1, N)
+
+    v = torch.cat([x[:, ::2], x[:, 1::2].flip([1])], dim=1)
+
+    Vc = torch.view_as_real(torch.fft.fft(v, dim=1))  # add this line
+
+    k = - torch.arange(N, dtype=x.dtype, device=x.device)[None, :] * np.pi / (2 * N)
+    W_r = torch.cos(k)
+    W_i = torch.sin(k)
+
+    V = Vc[:, :, 0] * W_r - Vc[:, :, 1] * W_i
+
+    if norm == 'ortho':
+        V[:, 0] /= np.sqrt(N) * 2
+        V[:, 1:] /= np.sqrt(N / 2) * 2
+
+    V = 2 * V.view(*x_shape)
+
+    return V
+
+
+def idct(X, norm=None):
+    x_shape = X.shape
+    N = x_shape[-1]
+
+    X_v = X.contiguous().view(-1, x_shape[-1]) / 2
+
+    if norm == 'ortho':
+        X_v[:, 0] *= np.sqrt(N) * 2
+        X_v[:, 1:] *= np.sqrt(N / 2) * 2
+
+    k = torch.arange(x_shape[-1], dtype=X.dtype, device=X.device)[None, :] * np.pi / (2 * N)
+    W_r = torch.cos(k)
+    W_i = torch.sin(k)
+
+    V_t_r = X_v
+    V_t_i = torch.cat([X_v[:, :1] * 0, -X_v.flip([1])[:, :-1]], dim=1)
+
+    V_r = V_t_r * W_r - V_t_i * W_i
+    V_i = V_t_r * W_i + V_t_i * W_r
+
+    V = torch.cat([V_r.unsqueeze(2), V_i.unsqueeze(2)], dim=2)
+
+    # v = torch.irfft(V, 1, onesided=False)                             # comment this line
+    v = torch.fft.irfft(torch.view_as_complex(V), n=V.shape[1], dim=1)  # add this line
+
+    x = v.new_zeros(v.shape)
+    x[:, ::2] += v[:, :N - (N // 2)]
+    x[:, 1::2] += v.flip([1])[:, :N // 2]
+
+    return x.view(*x_shape)
diff --git a/utils/audio/griffin_lim.py b/utils/audio/griffin_lim.py
new file mode 100644
index 0000000000000000000000000000000000000000..960132b6a1b8befaf5d0ca968f9908405323d89f
--- /dev/null
+++ b/utils/audio/griffin_lim.py
@@ -0,0 +1,85 @@
+import librosa
+import numpy as np
+import torch
+import torch.nn.functional as F
+
+
+def _stft(y, hop_size, win_size, fft_size):
+    return librosa.stft(y=y, n_fft=fft_size, hop_length=hop_size, win_length=win_size, pad_mode='constant')
+
+
+def _istft(y, hop_size, win_size):
+    return librosa.istft(y, hop_length=hop_size, win_length=win_size)
+
+
+def griffin_lim(S, hop_size, win_size, fft_size, angles=None, n_iters=30):
+    angles = np.exp(2j * np.pi * np.random.rand(*S.shape)) if angles is None else angles
+    S_complex = np.abs(S).astype(np.complex)
+    y = _istft(S_complex * angles, hop_size, win_size)
+    for i in range(n_iters):
+        angles = np.exp(1j * np.angle(_stft(y, hop_size, win_size, fft_size)))
+        y = _istft(S_complex * angles, hop_size, win_size)
+    return y
+
+
+def istft(amp, ang, hop_size, win_size, fft_size, pad=False, window=None):
+    spec = amp * torch.exp(1j * ang)
+    spec_r = spec.real
+    spec_i = spec.imag
+    spec = torch.stack([spec_r, spec_i], -1)
+    if window is None:
+        window = torch.hann_window(win_size).to(amp.device)
+    if pad:
+        spec = F.pad(spec, [0, 0, 0, 1], mode='reflect')
+    wav = torch.istft(spec, fft_size, hop_size, win_size)
+    return wav
+
+
+def griffin_lim_torch(S, hop_size, win_size, fft_size, angles=None, n_iters=30):
+    """
+
+    Examples:
+    >>> x_stft = librosa.stft(wav, n_fft=fft_size, hop_length=hop_size, win_length=win_length, pad_mode="constant")
+    >>> x_stft = x_stft[None, ...]
+    >>> amp = np.abs(x_stft)
+    >>> angle_init = np.exp(2j * np.pi * np.random.rand(*x_stft.shape))
+    >>> amp = torch.FloatTensor(amp)
+    >>> wav = griffin_lim_torch(amp, angle_init, hparams)
+
+    :param amp: [B, n_fft, T]
+    :param ang: [B, n_fft, T]
+    :return: [B, T_wav]
+    """
+    angles = torch.exp(2j * np.pi * torch.rand(*S.shape)) if angles is None else angles
+    window = torch.hann_window(win_size).to(S.device)
+    y = istft(S, angles, hop_size, win_size, fft_size, window=window)
+    for i in range(n_iters):
+        x_stft = torch.stft(y, fft_size, hop_size, win_size, window)
+        x_stft = x_stft[..., 0] + 1j * x_stft[..., 1]
+        angles = torch.angle(x_stft)
+        y = istft(S, angles, hop_size, win_size, fft_size, window=window)
+    return y
+
+
+# Conversions
+_mel_basis = None
+_inv_mel_basis = None
+
+
+def _build_mel_basis(audio_sample_rate, fft_size, audio_num_mel_bins, fmin, fmax):
+    assert fmax <= audio_sample_rate // 2
+    return librosa.filters.mel(audio_sample_rate, fft_size, n_mels=audio_num_mel_bins, fmin=fmin, fmax=fmax)
+
+
+def _linear_to_mel(spectogram, audio_sample_rate, fft_size, audio_num_mel_bins, fmin, fmax):
+    global _mel_basis
+    if _mel_basis is None:
+        _mel_basis = _build_mel_basis(audio_sample_rate, fft_size, audio_num_mel_bins, fmin, fmax)
+    return np.dot(_mel_basis, spectogram)
+
+
+def _mel_to_linear(mel_spectrogram, audio_sample_rate, fft_size, audio_num_mel_bins, fmin, fmax):
+    global _inv_mel_basis
+    if _inv_mel_basis is None:
+        _inv_mel_basis = np.linalg.pinv(_build_mel_basis(audio_sample_rate, fft_size, audio_num_mel_bins, fmin, fmax))
+    return np.maximum(1e-10, np.dot(_inv_mel_basis, mel_spectrogram))
diff --git a/utils/audio/io.py b/utils/audio/io.py
new file mode 100644
index 0000000000000000000000000000000000000000..6e4e219d045946b52ea957a5eebe59c0652e8006
--- /dev/null
+++ b/utils/audio/io.py
@@ -0,0 +1,28 @@
+import subprocess
+
+import numpy as np
+from scipy.io import wavfile
+import pyloudnorm as pyln
+
+
+def save_wav(wav, path, sr, norm=False):
+    wav = wav.astype(float)
+    if norm:
+        meter = pyln.Meter(sr)  # create BS.1770 meter
+        loudness = meter.integrated_loudness(wav)
+        wav = pyln.normalize.loudness(wav, loudness, -18.0)
+        if np.abs(wav).max() >= 1:
+            wav = wav / np.abs(wav).max() * 0.95
+    wav = wav * 32767
+    wavfile.write(path[:-4] + '.wav', sr, wav.astype(np.int16))
+    if path[-4:] == '.mp3':
+        to_mp3(path[:-4])
+
+
+def to_mp3(out_path):
+    if out_path[-4:] == '.wav':
+        out_path = out_path[:-4]
+    subprocess.check_call(
+        f'ffmpeg -threads 1 -loglevel error -i "{out_path}.wav" -vn -b:a 192k -y -hide_banner -async 1 "{out_path}.mp3"',
+        shell=True, stdin=subprocess.PIPE)
+    subprocess.check_call(f'rm -f "{out_path}.wav"', shell=True)
diff --git a/utils/audio/pitch/bin/ExtractF0ByStraight b/utils/audio/pitch/bin/ExtractF0ByStraight
new file mode 100644
index 0000000000000000000000000000000000000000..13687e3d7196438d19ac1dc08d117c57df86705e
Binary files /dev/null and b/utils/audio/pitch/bin/ExtractF0ByStraight differ
diff --git a/utils/audio/pitch/bin/InterpF0 b/utils/audio/pitch/bin/InterpF0
new file mode 100644
index 0000000000000000000000000000000000000000..522c69b28232dd7f41849667b272b39a033fec98
Binary files /dev/null and b/utils/audio/pitch/bin/InterpF0 differ
diff --git a/utils/audio/pitch/bin/ReaperF0 b/utils/audio/pitch/bin/ReaperF0
new file mode 100644
index 0000000000000000000000000000000000000000..6d598e924683ea2ec19f2e46ed485445e789985f
Binary files /dev/null and b/utils/audio/pitch/bin/ReaperF0 differ
diff --git a/utils/audio/pitch/crepe_utils.py b/utils/audio/pitch/crepe_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..d0783c34441fc76cf43e41bdedf0c77f486042d1
--- /dev/null
+++ b/utils/audio/pitch/crepe_utils.py
@@ -0,0 +1,312 @@
+import os
+
+import librosa
+import numpy as np
+import torch
+from scipy.interpolate import interp1d
+from scipy.ndimage import binary_erosion
+from scipy.signal import medfilt
+
+from utils.audio.pitch.extractor_utils import get_med_curve, clean_short_v_frag
+
+
+def crepe_predict(audio, sr, model_capacity='full', center=True, step_size=10, verbose=1):
+    from crepe.core import to_viterbi_cents, to_local_average_cents
+    from crepe import get_activation
+    np.seterr(divide='ignore', invalid='ignore')
+    activation = get_activation(audio, sr, model_capacity=model_capacity,
+                                center=center, step_size=step_size,
+                                verbose=verbose)
+    confidence = activation.max(axis=1)
+
+    cents_v = to_viterbi_cents(activation)
+    frequency_v = 10 * 2 ** (cents_v / 1200)
+    frequency_v[np.isnan(frequency_v)] = 0
+
+    cents = to_local_average_cents(activation)
+    frequency = 10 * 2 ** (cents / 1200)
+    frequency[np.isnan(frequency)] = 0
+
+    time = np.arange(confidence.shape[0]) * step_size / 1000.0
+
+    return time, frequency_v, frequency, confidence, activation
+
+
+def load_model(device, capacity='full'):
+    import torchcrepe
+    # Bind model and capacity
+    capacity = capacity
+    model = torchcrepe.Crepe(capacity)
+
+    # Load weights
+    file = os.path.join(os.path.dirname(torchcrepe.__file__), 'assets', f'{capacity}.pth')
+    model.load_state_dict(torch.load(file, map_location='cpu'))
+
+    # Place on device
+    model = model.to(torch.device(device))
+
+    # Eval mode
+    model.eval()
+    return model
+
+
+def crepe_predict_torch(audio, sr, hop_length=None, model_capacity='full',
+                        batch_size=None, device='cpu', pad=True):
+    from torchcrepe import preprocess, PITCH_BINS
+    import warnings
+    from crepe.core import to_viterbi_cents, to_local_average_cents
+
+    warnings.filterwarnings('ignore', message=r'Named tensors and all their associated APIs.*')
+
+    # Postprocessing breaks gradients, so just don't compute them
+    with torch.no_grad():
+        # Preprocess audio
+        generator = preprocess(audio,
+                               sr,
+                               hop_length,
+                               batch_size,
+                               device,
+                               pad)
+        frames = next(generator)
+        # Infer independent probabilities for each pitch bin
+        model = load_model(device, model_capacity)
+        model = model.to(frames.device)
+        activation = model(frames)
+        del model
+        del frames
+
+    # shape=(batch, 360, time / hop_length)
+    activation = activation.reshape(-1, PITCH_BINS).cpu().numpy()
+    torch.cuda.empty_cache()
+    confidence = activation.max(axis=1)
+
+    cents_v = to_viterbi_cents(activation)
+    frequency_v = 10 * 2 ** (cents_v / 1200)
+    frequency_v[np.isnan(frequency_v)] = 0
+
+    cents = to_local_average_cents(activation)
+    frequency = 10 * 2 ** (cents / 1200)
+    frequency[np.isnan(frequency)] = 0
+
+    return frequency_v, frequency, confidence, activation
+
+
+def cents_to_bins(cents):
+    """Converts cents to pitch bins"""
+    CENTS_PER_BIN = 20  # cents
+    bins = (cents - 1997.3794084376191) / CENTS_PER_BIN
+    return np.round(bins).astype(int)
+
+
+def cents_to_frequency(cents):
+    """Converts cents to frequency in Hz"""
+    return 10 * 2 ** (cents / 1200)
+
+
+def frequency_to_bins(frequency):
+    """Convert frequency in Hz to pitch bins"""
+    return cents_to_bins(frequency_to_cents(frequency))
+
+
+def frequency_to_cents(frequency):
+    """Convert frequency in Hz to cents"""
+    return 1200 * np.log2(frequency / 10. + 1e-8)
+
+
+def find_nearest_f0_in_piptrack(f0, pitches):
+    i_frame = np.arange(len(f0))
+    return pitches[i_frame, np.abs(f0[:, None] - pitches).argmin(-1)]
+
+
+def f0_energy_corrector(wav_data_16k, f0_func, f0_min, f0_max, fix_octave_error=True):
+    hop_size = 256
+    win_size = hop_size * 6
+    sr = 16000
+
+    spec = np.abs(librosa.stft(wav_data_16k, n_fft=win_size, hop_length=hop_size,
+                               win_length=win_size, pad_mode="constant").T)
+    T = spec.shape[0]
+    x_h256 = np.arange(0, 1, 1 / T)[:T]
+    x_h256[-1] = 1
+    f0 = f0_func(x_h256)
+    freqs = librosa.fft_frequencies(sr=sr, n_fft=win_size)
+    x_idx = np.arange(T)
+
+    def find_nearest_stft_bin(f0_):
+        return np.abs(freqs[None, :] - f0_[:, None]).argmin(-1)
+
+    def get_energy_mask(f0_lambda, hars=None, win_size=3):
+        if hars is None:
+            hars = [1]
+        mask = np.zeros([T, 10000]).astype(bool)
+        mask_bins = []
+        for multiple in hars:
+            f0_bin_idx = find_nearest_stft_bin(f0_lambda(f0, multiple))
+            for delta in range(-win_size // 2, 1 + win_size // 2):
+                y_idx = f0_bin_idx + delta
+                if np.max(y_idx) < spec.shape[1]:
+                    mask_bins.append(spec[x_idx, y_idx])
+                mask[x_idx, y_idx] = 1
+        mask_bins = np.stack(mask_bins, 1)
+        energy_ = np.mean(mask_bins, 1)
+        return energy_, mask
+
+    bottom_idx = find_nearest_stft_bin(np.array([70]))[0]
+    bottom_energy = spec[:, :bottom_idx].mean()
+    pitches, _ = librosa.piptrack(
+        wav_data_16k, sr,
+        n_fft=win_size, win_length=win_size, hop_length=hop_size,
+        fmin=50, fmax=3000, ref=bottom_energy)
+    pitches = pitches.T[:T]
+    f0_piptrack = find_nearest_f0_in_piptrack(f0, pitches)
+    f0_raw = f0
+    f0 = f0_piptrack
+
+    # find uv first (for obtaining mean_energy_mharfhar)
+    energy_har, mask_har = get_energy_mask(lambda f0, m: f0 * m, [1, 2], 3)
+    energy_mhalfhar, mask_mhalfhar = get_energy_mask(lambda f0, m: f0 * (m - 0.5), [1], 5)
+    r_energy = energy_har / np.clip(energy_mhalfhar, 1e-8, None)
+
+    uv = np.zeros_like(f0).astype(bool)
+    uv |= r_energy < 10
+    uv |= (f0 > f0_max) | (f0 < f0_min)
+    uv |= energy_har < bottom_energy
+    mean_energy_mharfhar = np.clip(energy_mhalfhar[~uv].mean(), 1e-8, None)
+    if len(uv) > 0:
+        spec = np.clip(spec - spec[uv].mean(0)[None, :], 1e-8, None)
+
+    # fix octave error
+    r_energy_div_dict = {}
+    if fix_octave_error:
+        for div, mul, thres in [
+            (2, (1,), 20),
+            (3, (1, 2), 20),
+            (5, (1, 2, 3), 20),
+        ]:
+            energy_div_har, mask_div_har = get_energy_mask(lambda f0, m: f0 / div * m, mul, 3)
+            r_energy_div = energy_div_har / mean_energy_mharfhar
+            r_energy_div = medfilt(r_energy_div, 5)
+
+            r_energy_div_dict[div] = r_energy_div
+            div_mask = (r_energy_div > thres) & (f0 / div > f0_min)
+            f0[div_mask] /= div
+
+            div_mask_erosion = binary_erosion(div_mask, iterations=2)
+            div_pos = sorted(np.where(div_mask_erosion)[0])
+            for pos in div_pos:
+                for s in range(10):
+                    if pos - s not in div_pos and pos - s >= 0:
+                        f0[pos - s] = pitches[pos - s, np.abs(f0[pos] - pitches[pos - s]).argmin()]
+                    if pos + s not in div_pos and pos + s < T:
+                        f0[pos + s] = pitches[pos + s, np.abs(f0[pos] - pitches[pos + s]).argmin()]
+
+    # find uv second
+    energy_har, mask_har = get_energy_mask(lambda f0, m: f0 * m, [1, 2], 3)
+    energy_mhalfhar, mask_mhalfhar = get_energy_mask(lambda f0, m: f0 * (m - 0.5), [1], 5)
+    energy_har_2, _ = get_energy_mask(lambda f0, m: f0 * m, [2], 3)
+    energy_mhalfhar_2, _ = get_energy_mask(lambda f0, m: f0 * (m - 0.5), [2, 3], 3)
+
+    r_energy = energy_har / np.clip(energy_mhalfhar, 1e-8, None)
+    r_energy = medfilt(r_energy, 3)
+    r_energy_2 = energy_har_2 / np.clip(energy_mhalfhar_2, 1e-8, None)
+    r_energy_2 = medfilt(r_energy_2, 3)
+    r_energy_2_mask = r_energy_2 < 3
+    r_energy_2_mask = binary_erosion(r_energy_2_mask, iterations=3)
+
+    uv = np.zeros_like(f0).astype(bool)
+    uv |= r_energy < 8
+    uv |= r_energy_2_mask
+    uv |= (f0 > f0_max) | (f0 < f0_min)
+    uv |= energy_har < bottom_energy
+
+    func_uv = interp1d(x_h256, uv, 'nearest')
+    func_f0_div = interp1d(x_h256, f0, 'nearest')
+
+    spec_log = np.log10(spec + 1e-8)
+
+    return func_uv, func_f0_div, {
+        'spec': spec_log,
+        'energy_har': energy_har, 'energy_halfhar': energy_mhalfhar,
+        'r_energy': r_energy, 'r_energy_2': r_energy_2,
+        'mask_har': mask_har, 'mask_halfhar': mask_mhalfhar,
+        'bottom_energy': bottom_energy,
+        'r_energy_div_dict': r_energy_div_dict,
+        'f0_piptrack': f0_piptrack,
+        'f0_raw': f0_raw
+    }
+
+
+def crepe_with_corrector(wav_data, hop_size, audio_sample_rate, f0_min, f0_max, return_states=False, *args, **kwargs):
+    wav_data = wav_data.astype(np.double)
+    wav_data_16k = librosa.resample(wav_data, audio_sample_rate, 16000)
+    time, f0_10ms, f0_nov, confi, activation = crepe_predict(
+        wav_data_16k, 16000, step_size=10, model_capacity='small', center=True, verbose=0)
+    T_10ms = len(f0_10ms)
+    x_10ms = np.arange(0, 1, 1 / T_10ms)[:T_10ms]
+    x_10ms[-1] = 1.0
+    func_f0 = interp1d(x_10ms, f0_10ms, 'nearest')
+
+    n_mel_frames = int(len(wav_data) // hop_size)
+    x_new = np.arange(0, 1, 1 / n_mel_frames)[:n_mel_frames]
+    x_new[-1] = 1.0
+
+    # correct f0 using energy spec (first round)
+    func_uv, func_f0, states = f0_energy_corrector(wav_data_16k, func_f0, f0_min, f0_max, fix_octave_error=True)
+    f0_10ms = func_f0(x_10ms)
+    uv_10ms = (func_uv(x_10ms) > 1e-4) & (confi < 0.9)
+    uv_10ms = medfilt(uv_10ms.astype(float), 3) > 1e-4
+    states['activation'] = activation
+    states['confidence'] = confi
+
+    # viterbi by voiced chunk, to fix incorrect viterbi smoothing in UV border.
+    f0_10ms[uv_10ms] = 0
+    f0_10ms_new = np.zeros_like(f0_10ms).astype(float)
+    v_begin = -1
+    for i in range(T_10ms):
+        if not uv_10ms[i] and i < T_10ms - 1:
+            if v_begin == -1:
+                v_begin = i
+        elif v_begin != -1:
+            v_end = i - 1 if uv_10ms[i] else i
+            if v_end - v_begin > 3:
+                f0_bins = frequency_to_bins(f0_10ms[v_begin:v_end + 1])
+                for j, k in zip(range(v_begin, v_end + 1), f0_bins):
+                    if f0_10ms[j] > 1e-4:
+                        activation[j, k + 10:] /= 5
+                cents_v = to_viterbi_cents(activation[v_begin:v_end + 1])
+                f0__ = 10 * 2 ** (cents_v / 1200)
+                f0__[np.isnan(f0__)] = 0
+                f0_10ms_new[v_begin:v_end + 1] = f0__
+                v_begin = -1
+    f0_10ms = f0_10ms_new
+
+    # remove pitch deviated from median
+    f0_10ms[confi < 0.1] = 0
+    try:
+        x_med_curve, y_med_curve = get_med_curve(f0_10ms)
+        f0_med_curve = interp1d(np.array(x_med_curve), np.array(y_med_curve), 'nearest')(np.arange(len(f0_10ms)))
+        f0_10ms[(f0_10ms < f0_med_curve - 100) | (f0_10ms > f0_med_curve + 100)] = 0
+        states['f0_med_curve'] = interp1d(x_10ms, f0_med_curve)(x_new)
+    except:
+        pass
+        # print("| WARN: catch an Error in get_med_curve.")
+        # traceback.print_exc()
+
+    # correct f0 using energy spec (second round), for better UV
+    func_f0 = interp1d(x_10ms, f0_10ms, 'nearest')
+    func_uv, func_f0, states_ = f0_energy_corrector(wav_data_16k, func_f0, f0_min, f0_max, fix_octave_error=False)
+    del states_['r_energy_div_dict']
+    states.update(states_)
+
+    # interpolate f0
+    confi_new = interp1d(x_10ms, confi)(x_new)
+    f0 = func_f0(x_new)
+    uv = (clean_short_v_frag(f0) | (func_uv(x_new) > 1e-4)) & (confi_new < 0.9)
+    uv = medfilt(uv.astype(float), 3) > 1e-4
+    f0 = medfilt(f0, 3)
+    f0[uv] = 0
+
+    if return_states:
+        return f0, states
+    else:
+        return f0
diff --git a/utils/audio/pitch/extractor_utils.py b/utils/audio/pitch/extractor_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..e165c319f7a9ae9c7bd48e74981e7dbe61b73020
--- /dev/null
+++ b/utils/audio/pitch/extractor_utils.py
@@ -0,0 +1,99 @@
+import numba as nb
+import numpy as np
+
+
+@nb.njit()
+def find_nearest_stft_bin(f0_, freqs):
+    freqs = np.expand_dims(freqs, 0)
+    f0_ = np.expand_dims(f0_, 1)
+    return np.abs(freqs - f0_).argmin()
+
+
+@nb.njit()
+def get_med_curve(f0, step_size=20):
+    v_begin = -1
+    v_end = -1
+    x_med_curve = []
+    y_med_curve = []
+    T = len(f0)
+
+    for i in range(T):
+        if f0[i] >= 50 and i < T - 1:
+            if v_begin == -1:
+                v_begin = i
+            v_end = i
+        else:
+            if v_end != -1:
+                if v_end - v_begin > 3:
+                    for j in range(v_begin, v_end + 1 - step_size, step_size):
+                        frag_med = np.median(f0[j:j + step_size])
+                        x_med_curve.append(j)
+                        y_med_curve.append(frag_med)
+                    x_med_curve.append(v_end)
+                    y_med_curve.append(np.median(f0[v_end - step_size:v_end + 1]))
+            v_end = v_begin = -1
+    x_med_curve = [0] + x_med_curve + [T]
+    x_med_curve = np.array(x_med_curve)
+    y_med_curve = [y_med_curve[0]] + y_med_curve + [y_med_curve[-1]]
+    y_med_curve = np.array(y_med_curve)
+    return x_med_curve, y_med_curve
+
+
+@nb.njit()
+def clean_short_v_frag(f0):
+    v_begin = -1
+    T = len(f0)
+
+    uv = np.zeros_like(f0).astype(np.bool_)
+    for i in range(T):
+        if f0[i] >= 1e-4 and i < T - 1:
+            if v_begin == -1:
+                v_begin = i
+        else:
+            if v_begin != -1:
+                v_end = i if f0[i] >= 1e-4 else i - 1
+                if v_end - v_begin + 1 < 3:
+                    uv[v_begin:v_end + 1] = 1
+            v_begin = -1
+    return uv
+
+
+@nb.njit()
+def find_best_f0_using_har_energy(spec, pitches, freqs, hars, hars_mhalf, f0_min, f0_max):
+    re = np.zeros_like(spec)
+    T = len(spec)
+    for i in range(T):
+        spec_i = spec[i]
+        for j, f0_j in enumerate(pitches[i]):
+            if f0_j == 0 or f0_j < f0_min[i] or f0_j > f0_max[i]:
+                continue
+            mask = np.zeros((10000,))
+            mask_mhalf = np.zeros((10000,))
+            for mul in hars:
+                b = find_nearest_stft_bin(np.array((f0_j * mul,)), freqs)
+                for delta in range(-1, 2):
+                    mask[b + delta] = 1
+            for mul in hars_mhalf:
+                b_mhalf = find_nearest_stft_bin(np.array((f0_j * (mul - 0.5),)), freqs)
+                for delta in range(-1, 2):
+                    mask_mhalf[b_mhalf + delta] = 1
+            mask = mask[:len(spec_i)]
+            mask_mhalf = mask_mhalf[:len(spec_i)]
+            energy = (np.exp(spec_i) * mask).sum() / mask.sum()
+            energy_mhalf = (np.exp(spec_i) * mask_mhalf).sum() / mask_mhalf.sum()
+            re[i, j] = energy / energy_mhalf
+    f0_2d_mask = 10000 * (re > 2) + 20000 * (re > 3) + np.expand_dims(np.arange(re.shape[1])[::-1], 0)
+    f0_idx = np.zeros((T,), dtype=np.int_)
+    for i in range(T):
+        f0_idx[i] = f0_2d_mask[i].argmax()
+    uv = re.sum(-1) == 0
+
+    f0 = np.zeros((T,))
+    for i in range(T):
+        f0[i] = pitches[i, f0_idx[i]]
+    f0 = f0 * (1 - uv)
+    uv = clean_short_v_frag(f0)
+    f0[uv] = 0
+    x_med_curve, y_med_curve = get_med_curve(f0)
+    re = re * (re > 1.5)
+    return re, f0, x_med_curve, y_med_curve
diff --git a/utils/audio/pitch/utils.py b/utils/audio/pitch/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..238b8022185753a7d4d9d674d189a99050c29b6f
--- /dev/null
+++ b/utils/audio/pitch/utils.py
@@ -0,0 +1,82 @@
+import numpy as np
+import torch
+
+
+def to_lf0(f0):
+    f0[f0 < 1.0e-5] = 1.0e-6
+    lf0 = f0.log() if isinstance(f0, torch.Tensor) else np.log(f0)
+    lf0[f0 < 1.0e-5] = - 1.0E+10
+    return lf0
+
+
+def to_f0(lf0):
+    f0 = np.where(lf0 <= 0, 0.0, np.exp(lf0))
+    return f0.flatten()
+
+
+def f0_to_coarse(f0, f0_bin=256, f0_max=900.0, f0_min=50.0):
+    f0_mel_min = 1127 * np.log(1 + f0_min / 700)
+    f0_mel_max = 1127 * np.log(1 + f0_max / 700)
+    is_torch = isinstance(f0, torch.Tensor)
+    f0_mel = 1127 * (1 + f0 / 700).log() if is_torch else 1127 * np.log(1 + f0 / 700)
+    f0_mel[f0_mel > 0] = (f0_mel[f0_mel > 0] - f0_mel_min) * (f0_bin - 2) / (f0_mel_max - f0_mel_min) + 1
+
+    f0_mel[f0_mel <= 1] = 1
+    f0_mel[f0_mel > f0_bin - 1] = f0_bin - 1
+    f0_coarse = (f0_mel + 0.5).long() if is_torch else np.rint(f0_mel).astype(int)
+    assert f0_coarse.max() <= 255 and f0_coarse.min() >= 1, (f0_coarse.max(), f0_coarse.min(), f0.min(), f0.max())
+    return f0_coarse
+
+
+def coarse_to_f0(f0_coarse, f0_bin=256, f0_max=900.0, f0_min=50.0):
+    f0_mel_min = 1127 * np.log(1 + f0_min / 700)
+    f0_mel_max = 1127 * np.log(1 + f0_max / 700)
+    uv = f0_coarse == 1
+    f0 = f0_mel_min + (f0_coarse - 1) * (f0_mel_max - f0_mel_min) / (f0_bin - 2)
+    f0 = ((f0 / 1127).exp() - 1) * 700
+    f0[uv] = 0
+    return f0
+
+
+def norm_f0(f0, uv, pitch_norm='log', f0_mean=400, f0_std=100):
+    is_torch = isinstance(f0, torch.Tensor)
+    if pitch_norm == 'standard':
+        f0 = (f0 - f0_mean) / f0_std
+    if pitch_norm == 'log':
+        f0 = torch.log2(f0 + 1e-8) if is_torch else np.log2(f0 + 1e-8)
+    if uv is not None:
+        f0[uv > 0] = 0
+    return f0
+
+
+def norm_interp_f0(f0, pitch_norm='log', f0_mean=None, f0_std=None):
+    is_torch = isinstance(f0, torch.Tensor)
+    if is_torch:
+        device = f0.device
+        f0 = f0.data.cpu().numpy()
+    uv = f0 == 0
+    f0 = norm_f0(f0, uv, pitch_norm, f0_mean, f0_std)
+    if sum(uv) == len(f0):
+        f0[uv] = 0
+    elif sum(uv) > 0:
+        f0[uv] = np.interp(np.where(uv)[0], np.where(~uv)[0], f0[~uv])
+    if is_torch:
+        uv = torch.FloatTensor(uv)
+        f0 = torch.FloatTensor(f0)
+        f0 = f0.to(device)
+        uv = uv.to(device)
+    return f0, uv
+
+
+def denorm_f0(f0, uv, pitch_norm='log', f0_mean=400, f0_std=100, pitch_padding=None, min=50, max=900):
+    is_torch = isinstance(f0, torch.Tensor)
+    if pitch_norm == 'standard':
+        f0 = f0 * f0_std + f0_mean
+    if pitch_norm == 'log':
+        f0 = 2 ** f0
+    f0 = f0.clamp(min=min, max=max) if is_torch else np.clip(f0, a_min=min, a_max=max)
+    if uv is not None:
+        f0[uv > 0] = 0
+    if pitch_padding is not None:
+        f0[pitch_padding] = 0
+    return f0
diff --git a/utils/audio/pitch/uv_utils.py b/utils/audio/pitch/uv_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..2e3fb9e162a635aec3472d3c6df4d5ddc3741cc5
--- /dev/null
+++ b/utils/audio/pitch/uv_utils.py
@@ -0,0 +1,48 @@
+import librosa
+import numpy as np
+from scipy.interpolate import interp1d
+
+
+def uv_energy_corrector(wav_data_16k, f0_func, f0_min=50, f0_max=1000):
+    hop_size = 256
+    win_size = hop_size * 6
+    sr = 16000
+
+    spec = np.abs(librosa.stft(wav_data_16k, n_fft=win_size, hop_length=hop_size,
+                               win_length=win_size, pad_mode="constant").T)
+    T = spec.shape[0]
+    x_h256 = np.arange(0, 1, 1 / T)[:T]
+    x_h256[-1] = 1
+    f0 = f0_func(x_h256)
+    freqs = librosa.fft_frequencies(sr=sr, n_fft=win_size)
+    x_idx = np.arange(T)
+
+    def find_nearest_stft_bin(f0_):
+        return np.abs(freqs[None, :] - f0_[:, None]).argmin(-1)
+
+    def get_energy_mask(f0_lambda, hars=None, win_size=3):
+        if hars is None:
+            hars = [1]
+        mask = np.zeros([T, 10000]).astype(bool)
+        mask_bins = []
+        for multiple in hars:
+            f0_bin_idx = find_nearest_stft_bin(f0_lambda(f0, multiple))
+            for delta in range(-win_size // 2, 1 + win_size // 2):
+                y_idx = f0_bin_idx + delta
+                if np.max(y_idx) < spec.shape[1]:
+                    mask_bins.append(spec[x_idx, y_idx])
+                mask[x_idx, y_idx] = 1
+        mask_bins = np.stack(mask_bins, 1)
+        energy_ = np.mean(mask_bins, 1)
+        return energy_, mask
+
+    # find uv first (for obtaining mean_energy_mharfhar)
+    energy_har, mask_har = get_energy_mask(lambda f0, m: f0 * m, [1, 2], 3)
+    energy_mhalfhar, mask_mhalfhar = get_energy_mask(lambda f0, m: f0 * (m - 0.5), [1], 5)
+    r_energy = energy_har / np.clip(energy_mhalfhar, 1e-8, None)
+
+    uv = np.zeros_like(f0).astype(bool)
+    uv |= r_energy < 10
+    uv |= (f0 > f0_max) | (f0 < f0_min)
+    func_uv = interp1d(x_h256, uv, 'nearest', fill_value='extrapolate')
+    return func_uv
diff --git a/utils/audio/pitch_extractors.py b/utils/audio/pitch_extractors.py
new file mode 100644
index 0000000000000000000000000000000000000000..da0ad7349e778754c16739f843497c929099e4eb
--- /dev/null
+++ b/utils/audio/pitch_extractors.py
@@ -0,0 +1,189 @@
+import os
+import subprocess
+import tempfile
+import traceback
+import uuid
+
+import torch
+from scipy.signal import medfilt
+
+from utils.audio import librosa_wav2spec
+from utils.audio.io import save_wav
+
+os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
+import librosa
+import numpy as np
+from scipy.interpolate import interp1d
+from utils.audio.pitch.crepe_utils import crepe_with_corrector, crepe_predict, frequency_to_bins, \
+    crepe_predict_torch
+from utils.audio.pitch.extractor_utils import find_nearest_stft_bin, find_best_f0_using_har_energy
+
+PITCH_EXTRACTOR = {}
+
+
+def register_pitch_extractor(name):
+    def register_pitch_extractor_(cls):
+        PITCH_EXTRACTOR[name] = cls
+        return cls
+
+    return register_pitch_extractor_
+
+
+def get_pitch_extractor(name):
+    return PITCH_EXTRACTOR[name]
+
+
+def extract_pitch_simple(wav):
+    from utils.commons.hparams import hparams
+    n_mel_frames = (len(wav) + 1) // hparams['hop_size'] - hparams['win_size'] // hparams['hop_size']
+    return extract_pitch(hparams['pitch_extractor'], wav,
+                         hparams['hop_size'], hparams['audio_sample_rate'],
+                         f0_min=hparams['f0_min'], f0_max=hparams['f0_max'],
+                         n_mel_frames=n_mel_frames)
+
+
+def extract_pitch(extractor_name, wav_data, hop_size, audio_sample_rate, f0_min=75, f0_max=800, **kwargs):
+    return get_pitch_extractor(extractor_name)(wav_data, hop_size, audio_sample_rate, f0_min, f0_max, **kwargs)
+
+
+@register_pitch_extractor('harvest')
+def harvest(wav_data, hop_size, audio_sample_rate, *args, **kwargs):
+    import pyworld as pw
+    n_mel_frames = int(len(wav_data) // hop_size)
+    f0, t = pw.harvest(wav_data.astype(np.double), audio_sample_rate)
+    x_old = np.arange(0, 1, 1 / len(f0))[:len(f0)]
+    x_old[-1] = 1.0
+    x_new = np.arange(0, 1, 1 / n_mel_frames)[:n_mel_frames]
+    f0 = interp1d(x_old, f0, 'nearest')(x_new)
+    return f0
+
+
+@register_pitch_extractor('dio')
+def dio(wav_data, hop_size, audio_sample_rate, *args, **kwargs):
+    import pyworld as pw
+    n_mel_frames = int(len(wav_data) // hop_size)
+    _f0, t = pw.dio(wav_data.astype(np.double), audio_sample_rate)
+    f0 = pw.stonemask(wav_data.astype(np.double), _f0, t, audio_sample_rate)
+    x_old = np.arange(0, 1, 1 / len(f0))[:len(f0)]
+    x_old[-1] = 1.0
+    x_new = np.arange(0, 1, 1 / n_mel_frames)[:n_mel_frames]
+    f0 = interp1d(x_old, f0, 'nearest')(x_new)
+    return f0
+
+
+@register_pitch_extractor('parselmouth')
+def parselmouth_pitch(wav_data, hop_size, audio_sample_rate, f0_min, f0_max,
+                      voicing_threshold=0.45, *args, **kwargs):
+    import parselmouth
+    time_step = hop_size / audio_sample_rate * 1000
+    n_mel_frames = int(len(wav_data) // hop_size)
+    f0_pm = parselmouth.Sound(wav_data, audio_sample_rate).to_pitch_ac(
+        time_step=time_step / 1000, voicing_threshold=voicing_threshold,
+        pitch_floor=f0_min, pitch_ceiling=f0_max).selected_array['frequency']
+    pad_size = (n_mel_frames - len(f0_pm) + 1) // 2
+    f0 = np.pad(f0_pm, [[pad_size, n_mel_frames - len(f0_pm) - pad_size]], mode='constant')
+    return f0
+
+
+@register_pitch_extractor('reaper')
+def reaper_extract_f0(wav_data, hop_size, audio_sample_rate, f0_min, f0_max, denoise=True,
+                      return_denoised_wav=False,
+                      *args, **kwargs):
+    dirname = f'/tmp/reaper_tmp/{len(wav_data)}_{str(uuid.uuid1())}'
+    os.makedirs(dirname, exist_ok=True)
+    with tempfile.TemporaryDirectory(dir=dirname) as _:
+        if hop_size == 256:
+            if audio_sample_rate == 24000:
+                save_wav(wav_data, f'{dirname}/1.wav', 25600, norm=False)
+            if audio_sample_rate == 48000:
+                save_wav(wav_data, f'{dirname}/1.wav', 51200, norm=False)
+        else:
+            assert hop_size == 240
+            save_wav(wav_data, f'{dirname}/1.wav', audio_sample_rate, norm=False)
+
+        if denoise:
+            from utils.audio import trim_long_silences
+            wav_data_ = wav_data
+            _, audio_mask, sr = trim_long_silences(wav_data, audio_sample_rate, vad_max_silence_length=20)
+            sr = audio_sample_rate
+            wav_noise = wav_data[~audio_mask]
+            # wav_noise = wav_data[:round(audio_sample_rate * 0.1)]
+
+            # from scipy.signal import butter, lfilter
+            # Define the filter parameters
+            # cutoff_freq = 200.0  # Hz
+            # nyquist_freq = 0.5 * sr
+            # order = 5
+            # b, a = butter(order, cutoff_freq / nyquist_freq, btype='lowpass')
+
+            new_fn = f'{dirname}/0'
+            save_wav(wav_noise, f'{new_fn}-noise.wav', sr=sr)
+            save_wav(wav_data, f'{new_fn}.wav', sr=sr)
+            subprocess.check_call(
+                f'sox {new_fn}-noise.wav -n noiseprof {new_fn}-noise.prof; '
+                f'sox {new_fn}.wav {new_fn}.denoised.wav noisered {new_fn}-noise.prof 0.21; ', shell=True)
+            wav_data, _ = librosa.load(f'{new_fn}.denoised.wav', sr=sr)
+            wav_data = np.concatenate([wav_data, wav_data_[-1024:]], 0)
+            # wav_data = lfilter(b, a, wav_data)
+
+        if hop_size == 256:
+            if audio_sample_rate == 24000:
+                save_wav(wav_data, f'{dirname}/2.wav', 25600, norm=False)
+            if audio_sample_rate == 48000:
+                save_wav(wav_data, f'{dirname}/2.wav', 51200, norm=False)
+        else:
+            assert hop_size == 240
+            save_wav(wav_data, f'{dirname}/2.wav', audio_sample_rate, norm=False)
+
+        retry = 10
+        while retry > 0:
+            subprocess.check_call(f'rm -rf {dirname}/*f0', shell=True)
+            try:
+                f0 = reaper_extract_f0_(
+                    f'{dirname}/2.wav', f'{dirname}/1.wav', dirname, f0_min, f0_max)[:-8]
+                break
+            except KeyboardInterrupt:
+                raise KeyboardInterrupt
+            except:
+                traceback.print_exc()
+                retry -= 1
+    if audio_sample_rate == 24000:
+        if hop_size == 256:
+            f0 = f0 * audio_sample_rate / 25600
+        f0[f0 == 0] = -100000
+        f0 = f0.reshape(-1, 2).mean(-1)
+        f0[f0 < 0] = 0
+    if audio_sample_rate == 48000:
+        if hop_size == 256:
+            f0 = f0 * audio_sample_rate / 51200
+    if return_denoised_wav:
+        return f0, wav_data
+    else:
+        return f0
+
+
+def reaper_extract_f0_(fwav1, fwav2, temp_dir, pitch_lower, pitch_upper):
+    frame_shift = 5
+    use_reaper = True
+    straight_f0_file = f'{temp_dir}/1.sf0'
+    if not os.path.exists(straight_f0_file):
+        subprocess.check_call('utils/audio/pitch/bin/ExtractF0ByStraight frame_shift=%d ' \
+                              'min_f0=%d max_f0=%d wave="%s" output="%s"' % (
+                                  frame_shift, pitch_lower, pitch_upper,
+                                  fwav1, straight_f0_file), shell=True, timeout=20)
+    if use_reaper:
+        reaper_f0_file = f'{temp_dir}/1.rf0'
+        if not os.path.exists(reaper_f0_file):
+            subprocess.check_call('utils/audio/pitch/bin/ReaperF0 wave="%s" output="%s" ' \
+                                  'f0_min=%d f0_max=%d' % (
+                                      fwav2, reaper_f0_file,
+                                      pitch_lower, pitch_upper), shell=True, timeout=20)  # ignore_security_alert
+        interp_f0_file = f'{temp_dir}/1.tf0'
+        if not os.path.exists(interp_f0_file):
+            subprocess.check_call('utils/audio/pitch/bin/InterpF0 straight="%s" ' \
+                                  'reaper="%s" output="%s"' % (
+                                      straight_f0_file, reaper_f0_file, interp_f0_file), shell=True, timeout=20)
+        straight_f0_file = interp_f0_file
+
+    f0 = np.loadtxt(straight_f0_file, dtype=np.float32)
+    return f0
diff --git a/utils/audio/vad.py b/utils/audio/vad.py
new file mode 100644
index 0000000000000000000000000000000000000000..3bc2f7aee74c0858b2a1becef0a81ea7a19519ac
--- /dev/null
+++ b/utils/audio/vad.py
@@ -0,0 +1,82 @@
+from skimage.transform import resize
+import struct
+import webrtcvad
+from scipy.ndimage.morphology import binary_dilation, binary_erosion
+import librosa
+import numpy as np
+import pyloudnorm as pyln
+import warnings
+
+warnings.filterwarnings("ignore", message="Possible clipped samples in output")
+
+int16_max = (2 ** 15) - 1
+
+
+def trim_long_silences(path, sr=None, return_raw_wav=False, norm=True, vad_max_silence_length=12):
+    """
+    Ensures that segments without voice in the waveform remain no longer than a
+    threshold determined by the VAD parameters in params.py.
+    :param wav: the raw waveform as a numpy array of floats
+    :param vad_max_silence_length: Maximum number of consecutive silent frames a segment can have.
+    :return: the same waveform with silences trimmed away (length <= original wav length)
+    """
+
+    ## Voice Activation Detection
+    # Window size of the VAD. Must be either 10, 20 or 30 milliseconds.
+    # This sets the granularity of the VAD. Should not need to be changed.
+    sampling_rate = 16000
+    if isinstance(path, str):
+        wav_raw, sr = librosa.core.load(path, sr=sr)
+    else:
+        wav_raw = path
+        sr = 16000
+
+    if norm:
+        meter = pyln.Meter(sr)  # create BS.1770 meter
+        loudness = meter.integrated_loudness(wav_raw)
+        wav_raw = pyln.normalize.loudness(wav_raw, loudness, -20.0)
+        if np.abs(wav_raw).max() > 0.95:
+            wav_raw = wav_raw / np.abs(wav_raw).max() * 0.95
+
+    wav = librosa.resample(wav_raw, sr, sampling_rate, res_type='kaiser_best')
+
+    vad_window_length = 30  # In milliseconds
+    # Number of frames to average together when performing the moving average smoothing.
+    # The larger this value, the larger the VAD variations must be to not get smoothed out.
+    vad_moving_average_width = 8
+
+    # Compute the voice detection window size
+    samples_per_window = (vad_window_length * sampling_rate) // 1000
+
+    # Trim the end of the audio to have a multiple of the window size
+    wav = wav[:len(wav) - (len(wav) % samples_per_window)]
+
+    # Convert the float waveform to 16-bit mono PCM
+    pcm_wave = struct.pack("%dh" % len(wav), *(np.round(wav * int16_max)).astype(np.int16))
+
+    # Perform voice activation detection
+    voice_flags = []
+    vad = webrtcvad.Vad(mode=3)
+    for window_start in range(0, len(wav), samples_per_window):
+        window_end = window_start + samples_per_window
+        voice_flags.append(vad.is_speech(pcm_wave[window_start * 2:window_end * 2],
+                                         sample_rate=sampling_rate))
+    voice_flags = np.array(voice_flags)
+
+    # Smooth the voice detection with a moving average
+    def moving_average(array, width):
+        array_padded = np.concatenate((np.zeros((width - 1) // 2), array, np.zeros(width // 2)))
+        ret = np.cumsum(array_padded, dtype=float)
+        ret[width:] = ret[width:] - ret[:-width]
+        return ret[width - 1:] / width
+
+    audio_mask = moving_average(voice_flags, vad_moving_average_width)
+    audio_mask = np.round(audio_mask).astype(bool)
+
+    # Dilate the voiced regions
+    audio_mask = binary_dilation(audio_mask, np.ones(vad_max_silence_length + 1))
+    audio_mask = np.repeat(audio_mask, samples_per_window)
+    audio_mask = resize(audio_mask, (len(wav_raw),)) > 0
+    if return_raw_wav:
+        return wav_raw, audio_mask, sr
+    return wav_raw[audio_mask], audio_mask, sr
diff --git a/utils/commons/base_task.py b/utils/commons/base_task.py
new file mode 100644
index 0000000000000000000000000000000000000000..d2832cb99d835d7f7cf814cffaae1b0592dd2d03
--- /dev/null
+++ b/utils/commons/base_task.py
@@ -0,0 +1,277 @@
+import logging
+import os
+import time
+import random
+import subprocess
+import sys
+from datetime import datetime
+import numpy as np
+import torch.utils.data
+from torch import nn
+from torch.utils.tensorboard import SummaryWriter
+from utils.commons.dataset_utils import data_loader
+from utils.commons.hparams import hparams
+from utils.commons.meters import AvgrageMeter
+from utils.commons.tensor_utils import tensors_to_scalars
+from utils.commons.trainer import Trainer
+from utils.nn.model_utils import print_arch, num_params
+
+torch.multiprocessing.set_sharing_strategy(os.getenv('TORCH_SHARE_STRATEGY', 'file_system'))
+
+log_format = '%(asctime)s %(message)s'
+logging.basicConfig(stream=sys.stdout, level=logging.INFO,
+                    format=log_format, datefmt='%m/%d %I:%M:%S %p')
+
+
+class BaseTask(nn.Module):
+    def __init__(self, *args, **kwargs):
+        super(BaseTask, self).__init__()
+        self.current_epoch = 0
+        self.global_step = 0
+        self.trainer = None
+        self.use_ddp = False
+        self.gradient_clip_norm = hparams['clip_grad_norm']
+        self.gradient_clip_val = hparams.get('clip_grad_value', 0)
+        self.model = None
+        self.epoch_training_losses_meter = None
+        self.logger: SummaryWriter = None
+
+    ######################
+    # build model, dataloaders, optimizer, scheduler and tensorboard
+    ######################
+    def build_model(self):
+        raise NotImplementedError
+
+    @data_loader
+    def train_dataloader(self):
+        raise NotImplementedError
+
+    @data_loader
+    def test_dataloader(self):
+        raise NotImplementedError
+
+    @data_loader
+    def val_dataloader(self):
+        raise NotImplementedError
+
+    def build_scheduler(self, optimizer):
+        return None
+
+    def build_optimizer(self, model):
+        raise NotImplementedError
+
+    def configure_optimizers(self):
+        optm = self.build_optimizer(self.model)
+        self.scheduler = self.build_scheduler(optm)
+        if isinstance(optm, (list, tuple)):
+            return optm
+        return [optm]
+
+    def build_tensorboard(self, save_dir, name, **kwargs):
+        log_dir = os.path.join(save_dir, name)
+        os.makedirs(log_dir, exist_ok=True)
+        self.logger = SummaryWriter(log_dir=log_dir, **kwargs)
+
+    ######################
+    # training
+    ######################
+    def on_train_start(self):
+        for n, m in self.model.named_children():
+            num_params(m, model_name=n)
+        if torch.__version__.split(".")[0] == '2' and hparams.get("torch_compile", False):
+            self.model = torch.compile(self.model, mode='default')
+
+    def on_train_end(self):
+        pass
+
+    def on_epoch_start(self):
+        self.epoch_training_losses_meter = {'total_loss': AvgrageMeter()}
+
+    def on_epoch_end(self):
+        loss_outputs = {k: v.avg for k, v in self.epoch_training_losses_meter.items()}
+        print(f"Epoch {self.current_epoch} ended. Steps: {self.global_step}. {loss_outputs}")
+        loss_outputs = {"epoch_mean/"+k:v for k,v in loss_outputs.items()}
+        return loss_outputs
+
+    def _training_step(self, sample, batch_idx, optimizer_idx):
+        """
+
+        :param sample:
+        :param batch_idx:
+        :return: total loss: torch.Tensor, loss_log: dict
+        """
+        raise NotImplementedError
+
+    def training_step(self, sample, batch_idx, optimizer_idx=-1):
+        """
+
+        :param sample:
+        :param batch_idx:
+        :param optimizer_idx:
+        :return: {'loss': torch.Tensor, 'progress_bar': dict, 'tb_log': dict}
+        """
+        # perform the main training step in a specific task
+        loss_ret = self._training_step(sample, batch_idx, optimizer_idx)
+        if loss_ret is None:
+            return {'loss': None}
+        total_loss, log_outputs = loss_ret
+        log_outputs = tensors_to_scalars(log_outputs)
+
+        # add to epoch meter
+        for k, v in log_outputs.items():
+            if '/' in k:
+                k_split = k.split("/")
+                assert len(k_split) == 2, "we only support one `/` in tag_name, i.e., `<tag>/<sub_tag>`"
+                k = k.replace("/", "_")
+            if k not in self.epoch_training_losses_meter:
+                self.epoch_training_losses_meter[k] = AvgrageMeter()
+            if not np.isnan(v):
+                self.epoch_training_losses_meter[k].update(v)
+        
+        if optimizer_idx >= 0:
+            for params_group_i in range(len(self.trainer.optimizers[optimizer_idx].param_groups)):
+                log_outputs[f'lr/optimizer{optimizer_idx}_params_group{params_group_i}'] = self.trainer.optimizers[optimizer_idx].param_groups[params_group_i]['lr']
+
+        # add to progress bar
+        progress_bar_log = {}
+        for k, v in log_outputs.items():
+            if '/' in k:
+                k_split = k.split("/")
+                assert len(k_split) == 2, "we only support one `/` in tag_name, i.e., `<tag>/<sub_tag>`"
+                k = k.replace("/", "_")
+            assert k not in progress_bar_log, f"we got duplicate tags in log_outputs, check this `{k}`"
+            progress_bar_log[k] = v
+
+        # add to progress bar
+        tb_log = {}
+        for k, v in log_outputs.items():
+            if '/' in k:
+                tb_log[k] = v
+            else:
+                tb_log[f'tr/{k}'] = v
+
+        if not isinstance(total_loss, torch.Tensor):
+            return {'loss': None}
+        self.epoch_training_losses_meter['total_loss'].update(total_loss.item())
+
+        return {
+            'loss': total_loss,
+            'progress_bar': progress_bar_log,
+            'tb_log': tb_log
+        }
+
+    def on_before_optimization(self, opt_idx):
+        if self.gradient_clip_norm > 0:
+            torch.nn.utils.clip_grad_norm_(self.parameters(), self.gradient_clip_norm)
+        if self.gradient_clip_val > 0:
+            torch.nn.utils.clip_grad_value_(self.parameters(), self.gradient_clip_val)
+
+    def on_after_optimization(self, epoch, batch_idx, optimizer, optimizer_idx):
+        if self.scheduler is not None:
+            self.scheduler.step(self.global_step // hparams['accumulate_grad_batches'])
+
+    ######################
+    # validation
+    ######################
+    def validation_start(self):
+        pass
+
+    def validation_step(self, sample, batch_idx):
+        """
+
+        :param sample:
+        :param batch_idx:
+        :return: output: {"losses": {...}, "total_loss": float, ...} or (total loss: torch.Tensor, loss_log: dict)
+        """
+        raise NotImplementedError
+
+    def validation_end(self, outputs):
+        """
+
+        :param outputs:
+        :return: loss_output: dict
+        """
+        all_losses_meter = {'total_loss': AvgrageMeter()}
+        for output in outputs:
+            if output is None or len(output) == 0:
+                continue
+            if isinstance(output, dict):
+                assert 'losses' in output, 'Key "losses" should exist in validation output.'
+                n = output.pop('nsamples', 1)
+                losses = tensors_to_scalars(output['losses'])
+                total_loss = output.get('total_loss', sum(losses.values()))
+            else:
+                assert len(output) == 2, 'Validation output should only consist of two elements: (total_loss, losses)'
+                n = 1
+                total_loss, losses = output
+                losses = tensors_to_scalars(losses)
+            if isinstance(total_loss, torch.Tensor):
+                total_loss = total_loss.item()
+            for k, v in losses.items():
+                if k not in all_losses_meter:
+                    all_losses_meter[k] = AvgrageMeter()
+                all_losses_meter[k].update(v, n)
+            all_losses_meter['total_loss'].update(total_loss, n)
+        loss_output = {k: round(v.avg, 10) for k, v in all_losses_meter.items()}
+        print(f"| Validation results@{self.global_step}: {loss_output}")
+        return {
+            'tb_log': {f'val/{k}': v for k, v in loss_output.items()},
+            'val_loss': loss_output['total_loss']
+        }
+
+    ######################
+    # testing
+    ######################
+    def test_start(self):
+        pass
+
+    def test_step(self, sample, batch_idx):
+        return self.validation_step(sample, batch_idx)
+
+    def test_end(self, outputs):
+        return self.validation_end(outputs)
+
+    ######################
+    # start training/testing
+    ######################
+    @classmethod
+    def start(cls):
+
+        def is_port_in_use(port: int) -> bool:
+            import socket
+            with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+                return s.connect_ex(('localhost', port)) == 0
+
+        os.environ['MASTER_PORT'] = str(random.randint(10000, 11000))
+        while is_port_in_use(int(os.environ['MASTER_PORT'])):
+            print(f"| Port {os.environ['MASTER_PORT']} is in use. Change another port...")
+            os.environ['MASTER_PORT'] = str(random.randint(10000, 11000))
+            time.sleep(1)
+
+        random.seed(hparams['seed'])
+        np.random.seed(hparams['seed'])
+        work_dir = hparams['work_dir']
+        trainer = Trainer(
+            work_dir=work_dir,
+            val_check_interval=hparams['val_check_interval'],
+            tb_log_interval=hparams['tb_log_interval'],
+            max_updates=hparams['max_updates'],
+            num_sanity_val_steps=hparams['num_sanity_val_steps'] if not hparams['validate'] else 10000,
+            accumulate_grad_batches=hparams['accumulate_grad_batches'],
+            print_nan_grads=hparams['print_nan_grads'],
+            resume_from_checkpoint=hparams.get('resume_from_checkpoint', 0),
+            amp=hparams['amp'],
+            monitor_key=hparams['valid_monitor_key'],
+            monitor_mode=hparams['valid_monitor_mode'],
+            num_ckpt_keep=hparams['num_ckpt_keep'],
+            save_best=hparams['save_best'],
+            seed=hparams['seed'],
+            debug=hparams['debug']
+        )
+        if not hparams['infer']:  # train
+            trainer.fit(cls)
+        else:
+            trainer.test(cls)
+
+    def on_keyboard_interrupt(self):
+        pass
diff --git a/utils/commons/ckpt_utils.py b/utils/commons/ckpt_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..9d68bdf6555d8ff0aa2602c221e2642f47224684
--- /dev/null
+++ b/utils/commons/ckpt_utils.py
@@ -0,0 +1,103 @@
+import glob
+import os
+import re
+import torch
+
+
+def get_last_checkpoint(work_dir, steps=None):
+    checkpoint = None
+    last_ckpt_path = None
+    if work_dir.endswith(".ckpt"):
+        ckpt_paths = [work_dir]
+    else:
+        ckpt_paths = get_all_ckpts(work_dir, steps)
+    if len(ckpt_paths) > 0:
+        last_ckpt_path = ckpt_paths[0]
+        checkpoint = torch.load(last_ckpt_path, map_location='cpu')
+    return checkpoint, last_ckpt_path
+
+
+def get_all_ckpts(work_dir, steps=None):
+    if steps is None:
+        ckpt_path_pattern = f'{work_dir}/model_ckpt_steps_*.ckpt'
+    else:
+        ckpt_path_pattern = f'{work_dir}/model_ckpt_steps_{steps}.ckpt'
+    return sorted(glob.glob(ckpt_path_pattern),
+                  key=lambda x: -int(re.findall('.*steps\_(\d+)\.ckpt', x)[0]))
+
+
+def load_ckpt(cur_model, ckpt_base_dir, model_name='model', force=True, strict=True, steps=None, verbose=True):
+    if os.path.isfile(ckpt_base_dir):
+        base_dir = os.path.dirname(ckpt_base_dir)
+        ckpt_path = ckpt_base_dir
+        checkpoint = torch.load(ckpt_base_dir, map_location='cpu')
+    else:
+        base_dir = ckpt_base_dir
+        checkpoint, ckpt_path = get_last_checkpoint(ckpt_base_dir, steps)
+    if checkpoint is not None:
+        state_dict = checkpoint["state_dict"]
+        if len([k for k in state_dict.keys() if '.' in k]) > 0:
+            state_dict = {k[len(model_name) + 1:]: v for k, v in state_dict.items()
+                          if k.startswith(f'{model_name}.')}
+        else:
+            if '.' not in model_name:
+                state_dict = state_dict[model_name]
+            else:
+                base_model_name = model_name.split('.')[0]
+                rest_model_name = model_name[len(base_model_name) + 1:]
+                state_dict = {
+                    k[len(rest_model_name) + 1:]: v for k, v in state_dict[base_model_name].items()
+                    if k.startswith(f'{rest_model_name}.')}
+        if not strict:
+            cur_model_state_dict = cur_model.state_dict()
+            unmatched_keys = []
+            for key, param in state_dict.items():
+                if key in cur_model_state_dict:
+                    new_param = cur_model_state_dict[key]
+                    if new_param.shape != param.shape:
+                        unmatched_keys.append(key)
+                        print("| Unmatched keys (shape mismatch): ", key, new_param.shape, param.shape)
+                else:
+                    print(f"Skipping unmatched keys (in state_dict but not in cur_model): {key}")
+            for key in unmatched_keys:
+                if verbose:
+                    print(f"Del unmatched keys {key}")
+                del state_dict[key]
+        if hasattr(cur_model, 'load_state_dict'):
+            cur_model.load_state_dict(state_dict, strict=strict)
+        else: # when cur_model is nn.Parameter
+            cur_model.data = state_dict
+        print(f"| load '{model_name}' from '{ckpt_path}', strict={strict}")
+    else:
+        e_msg = f"| ckpt not found in {base_dir}."
+        if force:
+            assert False, e_msg
+        else:
+            print(e_msg)
+
+def restore_weights(task_ref, checkpoint):
+    # load model state
+    for k, v in checkpoint['state_dict'].items():
+        if hasattr(task_ref, k):
+            getattr(task_ref, k).load_state_dict(v, strict=True)
+            print(f"| resotred {k} from pretrained checkpoints")
+        else:
+            print(f"| the checkpoint has unmatched keys {k}")
+
+def restore_opt_state(optimizers, checkpoint):
+    # restore the optimizers
+    optimizer_states = checkpoint['optimizer_states']
+    for optimizer, opt_state in zip(optimizers, optimizer_states):
+        if optimizer is None:
+            return
+        try:
+            optimizer.load_state_dict(opt_state)
+            # move optimizer to GPU 1 weight at a time
+            # if self.on_gpu:
+            #     for state in optimizer.state.values():
+            #         for k, v in state.items():
+            #             if isinstance(v, torch.Tensor):
+            #                 state[k] = v.cuda(self.root_gpu)
+        except ValueError:
+            print("| WARMING: optimizer parameters not match !!!")
+    
\ No newline at end of file
diff --git a/utils/commons/crop_head.py b/utils/commons/crop_head.py
new file mode 100644
index 0000000000000000000000000000000000000000..e61cfc519b4f985fa623a0f8341512fba60cb842
--- /dev/null
+++ b/utils/commons/crop_head.py
@@ -0,0 +1,106 @@
+import face_alignment
+import os
+import cv2
+import skimage.transform as trans
+import argparse
+import torch
+import numpy as np
+import tqdm
+
+device = 'cuda' if torch.cuda.is_available() else 'cpu'
+
+
+def get_affine(src):
+    dst = np.array([[87,  59],
+                    [137,  59],
+                    [112, 120]], dtype=np.float32)
+    tform = trans.SimilarityTransform()
+    tform.estimate(src, dst)
+    M = tform.params[0:2, :]
+    return M
+
+
+def affine_align_img(img, M, crop_size=224):
+    warped = cv2.warpAffine(img, M, (crop_size, crop_size), borderValue=0.0)
+    return warped
+
+
+def affine_align_3landmarks(landmarks, M):
+    new_landmarks = np.concatenate([landmarks, np.ones((3, 1))], 1)
+    affined_landmarks = np.matmul(new_landmarks, M.transpose())
+    return affined_landmarks
+
+
+def get_eyes_mouths(landmark):
+    three_points = np.zeros((3, 2))
+    three_points[0] = landmark[36:42].mean(0)
+    three_points[1] = landmark[42:48].mean(0)
+    three_points[2] = landmark[60:68].mean(0)
+    return three_points
+
+
+def get_mouth_bias(three_points):
+    bias = np.array([112, 120]) - three_points[2]
+    return bias
+
+
+def align_folder(folder_path, folder_save_path):
+
+    fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D, device=device)
+    preds = fa.get_landmarks_from_directory(folder_path)
+
+    sumpoints = 0
+    three_points_list = []
+
+    for img in tqdm.tqdm(preds.keys(), desc='preprocessing..'):
+        pred_points = np.array(preds[img])
+        if pred_points is None or len(pred_points.shape) != 3:
+            print('preprocessing failed')
+            return False
+        else:
+            num_faces, size, _ = pred_points.shape
+            if num_faces == 1 and size == 68:
+
+                three_points = get_eyes_mouths(pred_points[0])
+                sumpoints += three_points
+                three_points_list.append(three_points)
+            else:
+
+                print('preprocessing failed')
+                return False
+    avg_points = sumpoints / len(preds)
+    M = get_affine(avg_points)
+    p_bias = None
+    for i, img_pth in tqdm.tqdm(enumerate(preds.keys()), desc='affine and save'):
+        three_points = three_points_list[i]
+        affined_3landmarks = affine_align_3landmarks(three_points, M)
+        bias = get_mouth_bias(affined_3landmarks)
+        if p_bias is None:
+            bias = bias
+        else:
+            bias = p_bias * 0.2 + bias * 0.8
+        p_bias = bias
+        M_i = M.copy()
+        M_i[:, 2] = M[:, 2] + bias
+        img = cv2.imread(img_pth)
+        wrapped = affine_align_img(img, M_i)
+        img_save_path = os.path.join(folder_save_path, img_pth.split('/')[-1])
+        cv2.imwrite(img_save_path, wrapped)
+    print('cropped files saved at {}'.format(folder_save_path))
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--folder_path', help='the folder which needs processing')
+    args = parser.parse_args()
+
+    if os.path.isdir(args.folder_path):
+        home_path = '/'.join(args.folder_path.split('/')[:-1])
+        save_img_path = os.path.join(home_path, args.folder_path.split('/')[-1] + '_cropped')
+        os.makedirs(save_img_path, exist_ok=True)
+
+        align_folder(args.folder_path, save_img_path)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/utils/commons/dataset_utils.py b/utils/commons/dataset_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..f3dc4fd54d3056d822d6cde19b1458847c6579bc
--- /dev/null
+++ b/utils/commons/dataset_utils.py
@@ -0,0 +1,272 @@
+import os
+import sys
+import traceback
+import types
+from functools import wraps
+from itertools import chain
+import numpy as np
+import torch.utils.data
+from torch.utils.data import ConcatDataset
+from utils.commons.hparams import hparams
+
+
+
+def collate_xd(values, pad_idx=0, left_pad=False, shift_right=False, max_len=None, shift_id=1):
+    if len(values[0].shape) == 1:
+        return collate_1d(values, pad_idx, left_pad, shift_right, max_len, shift_id)
+    elif len(values[0].shape) == 2:
+        return collate_2d(values, pad_idx, left_pad, shift_right, max_len)
+    elif len(values[0].shape) == 3:
+        return collate_3d(values, pad_idx, left_pad, shift_right, max_len)
+
+def collate_1d_or_2d(values, pad_idx=0, left_pad=False, shift_right=False, max_len=None, shift_id=1):
+    if len(values[0].shape) == 1:
+        return collate_1d(values, pad_idx, left_pad, shift_right, max_len, shift_id)
+    else:
+        return collate_2d(values, pad_idx, left_pad, shift_right, max_len)
+
+
+def collate_1d(values, pad_idx=0, left_pad=False, shift_right=False, max_len=None, shift_id=1):
+    """Convert a list of 1d tensors into a padded 2d tensor."""
+    size = max(v.size(0) for v in values) if max_len is None else max_len
+    res = values[0].new(len(values), size).fill_(pad_idx)
+
+    def copy_tensor(src, dst):
+        assert dst.numel() == src.numel()
+        if shift_right:
+            dst[1:] = src[:-1]
+            dst[0] = shift_id
+        else:
+            dst.copy_(src)
+
+    for i, v in enumerate(values):
+        copy_tensor(v, res[i][size - len(v):] if left_pad else res[i][:len(v)])
+    return res
+
+
+def collate_2d(values, pad_idx=0, left_pad=False, shift_right=False, max_len=None):
+    """Convert a list of 2d tensors into a padded 3d tensor."""
+    size = max(v.size(0) for v in values) if max_len is None else max_len
+    res = values[0].new(len(values), size, values[0].shape[1]).fill_(pad_idx)
+
+    def copy_tensor(src, dst):
+        assert dst.numel() == src.numel()
+        if shift_right:
+            dst[1:] = src[:-1]
+        else:
+            dst.copy_(src)
+
+    for i, v in enumerate(values):
+        copy_tensor(v, res[i][size - len(v):] if left_pad else res[i][:len(v)])
+    return res
+
+def collate_3d(values, pad_idx=0, left_pad=False, shift_right=False, max_len=None):
+    """Convert a list of 2d tensors into a padded 3d tensor."""
+    size = max(v.size(0) for v in values) if max_len is None else max_len
+    res = values[0].new(len(values), size, values[0].shape[1], values[0].shape[2]).fill_(pad_idx)
+
+    def copy_tensor(src, dst):
+        assert dst.numel() == src.numel()
+        if shift_right:
+            dst[1:] = src[:-1]
+        else:
+            dst.copy_(src)
+
+    for i, v in enumerate(values):
+        copy_tensor(v, res[i][size - len(v):] if left_pad else res[i][:len(v)])
+    return res
+
+
+def _is_batch_full(batch, num_tokens, max_tokens, max_sentences):
+    if len(batch) == 0:
+        return 0
+    if len(batch) == max_sentences:
+        return 1
+    if num_tokens > max_tokens:
+        return 1
+    return 0
+
+
+def batch_by_size(
+        indices, num_tokens_fn, max_tokens=None, max_sentences=None,
+        required_batch_size_multiple=1, distributed=False
+):
+    """
+    Yield mini-batches of indices bucketed by size. Batches may contain
+    sequences of different lengths.
+
+    Args:
+        indices (List[int]): ordered list of dataset indices
+        num_tokens_fn (callable): function that returns the number of tokens at
+            a given index
+        max_tokens (int, optional): max number of tokens in each batch
+            (default: None).
+        max_sentences (int, optional): max number of sentences in each
+            batch (default: None).
+        required_batch_size_multiple (int, optional): require batch size to
+            be a multiple of N (default: 1).
+    """
+    max_tokens = max_tokens if max_tokens is not None else sys.maxsize
+    max_sentences = max_sentences if max_sentences is not None else sys.maxsize
+    bsz_mult = required_batch_size_multiple
+
+    if isinstance(indices, types.GeneratorType):
+        indices = np.fromiter(indices, dtype=np.int64, count=-1)
+
+    sample_len = 0
+    sample_lens = []
+    batch = []
+    batches = []
+    for i in range(len(indices)):
+        idx = indices[i]
+        num_tokens = num_tokens_fn(idx)
+        sample_lens.append(num_tokens)
+        sample_len = max(sample_len, num_tokens)
+
+        assert sample_len <= max_tokens, (
+            "sentence at index {} of size {} exceeds max_tokens "
+            "limit of {}!".format(idx, sample_len, max_tokens)
+        )
+        num_tokens = (len(batch) + 1) * sample_len
+
+        if _is_batch_full(batch, num_tokens, max_tokens, max_sentences):
+            mod_len = max(
+                bsz_mult * (len(batch) // bsz_mult),
+                len(batch) % bsz_mult,
+            )
+            batches.append(batch[:mod_len])
+            batch = batch[mod_len:]
+            sample_lens = sample_lens[mod_len:]
+            sample_len = max(sample_lens) if len(sample_lens) > 0 else 0
+        batch.append(idx)
+    if len(batch) > 0:
+        batches.append(batch)
+    return batches
+
+
+def unpack_dict_to_list(samples):
+    samples_ = []
+    bsz = samples.get('outputs').size(0)
+    for i in range(bsz):
+        res = {}
+        for k, v in samples.items():
+            try:
+                res[k] = v[i]
+            except:
+                pass
+        samples_.append(res)
+    return samples_
+
+
+def remove_padding(x, padding_idx=0):
+    if x is None:
+        return None
+    assert len(x.shape) in [1, 2]
+    if len(x.shape) == 2:  # [T, H]
+        return x[np.abs(x).sum(-1) != padding_idx]
+    elif len(x.shape) == 1:  # [T]
+        return x[x != padding_idx]
+
+
+def data_loader(fn):
+    """
+    Decorator to make any fx with this use the lazy property
+    :param fn:
+    :return:
+    """
+
+    wraps(fn)
+    attr_name = '_lazy_' + fn.__name__
+
+    def _get_data_loader(self):
+        try:
+            value = getattr(self, attr_name)
+        except AttributeError:
+            try:
+                value = fn(self)  # Lazy evaluation, done only once.
+            except AttributeError as e:
+                # Guard against AttributeError suppression. (Issue #142)
+                traceback.print_exc()
+                error = f'{fn.__name__}: An AttributeError was encountered: ' + str(e)
+                raise RuntimeError(error) from e
+            setattr(self, attr_name, value)  # Memoize evaluation.
+        return value
+
+    return _get_data_loader
+
+
+class BaseDataset(torch.utils.data.Dataset):
+    def __init__(self, shuffle):
+        super().__init__()
+        self.hparams = hparams
+        self.shuffle = shuffle
+        self.sort_by_len = hparams['sort_by_len']
+        self.sizes = None
+
+    @property
+    def _sizes(self):
+        return self.sizes
+
+    def __getitem__(self, index):
+        raise NotImplementedError
+
+    def collater(self, samples):
+        raise NotImplementedError
+
+    def __len__(self):
+        return len(self._sizes)
+
+    def num_tokens(self, index):
+        return self.size(index)
+
+    def size(self, index):
+        """Return an example's size as a float or tuple. This value is used when
+        filtering a dataset with ``--max-positions``."""
+        return min(self._sizes[index], hparams['max_frames'])
+
+    def ordered_indices(self):
+        """Return an ordered list of indices. Batches will be constructed based
+        on this order."""
+        if self.shuffle:
+            indices = np.random.permutation(len(self))
+            if self.sort_by_len:
+                indices = indices[np.argsort(np.array(self._sizes)[indices], kind='mergesort')]
+        else:
+            indices = np.arange(len(self))
+        return indices
+
+    @property
+    def num_workers(self):
+        return int(os.getenv('NUM_WORKERS', hparams['num_workers']))
+
+
+class BaseConcatDataset(ConcatDataset):
+    def collater(self, samples):
+        return self.datasets[0].collater(samples)
+
+    @property
+    def _sizes(self):
+        if not hasattr(self, 'sizes'):
+            self.sizes = list(chain.from_iterable([d._sizes for d in self.datasets]))
+        return self.sizes
+
+    def size(self, index):
+        return min(self._sizes[index], hparams['max_frames'])
+
+    def num_tokens(self, index):
+        return self.size(index)
+
+    def ordered_indices(self):
+        """Return an ordered list of indices. Batches will be constructed based
+        on this order."""
+        if self.datasets[0].shuffle:
+            indices = np.random.permutation(len(self))
+            if self.datasets[0].sort_by_len:
+                indices = indices[np.argsort(np.array(self._sizes)[indices], kind='mergesort')]
+        else:
+            indices = np.arange(len(self))
+        return indices
+
+    @property
+    def num_workers(self):
+        return self.datasets[0].num_workers
diff --git a/utils/commons/ddp_utils.py b/utils/commons/ddp_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..a86048c2d92fe344d18077f83d896581d9893bdc
--- /dev/null
+++ b/utils/commons/ddp_utils.py
@@ -0,0 +1,163 @@
+from torch.nn.parallel import DistributedDataParallel
+from torch.nn.parallel.distributed import _find_tensors
+import torch.optim
+import torch.utils.data
+import torch
+from packaging import version
+
+def get_torch_version():
+    torch_version = torch.__version__
+    torch_version = torch_version.split("dev")[0]
+    torch_version = torch_version.split("cu")[0]
+    if torch_version[-1] == '.':
+        torch_version = torch_version[:-1]
+    torch_version = torch_version.replace("+","")
+    return torch_version
+
+    
+class DDP(DistributedDataParallel):
+    """
+    Override the forward call in lightning so it goes to training and validation step respectively
+    """
+
+    def forward(self, *inputs, **kwargs):  # pragma: no cover
+        torch_version = get_torch_version()
+        if version.parse(torch_version) < version.parse("1.11"):
+            self._sync_params()
+            inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids)
+            assert len(self.device_ids) == 1
+            if self.module.training:
+                output = self.module.training_step(*inputs[0], **kwargs[0])
+            elif self.module.testing:
+                output = self.module.test_step(*inputs[0], **kwargs[0])
+            else:
+                output = self.module.validation_step(*inputs[0], **kwargs[0])
+            if torch.is_grad_enabled():
+                # We'll return the output object verbatim since it is a freeform
+                # object. We need to find any tensors in this object, though,
+                # because we need to figure out which parameters were used during
+                # this forward pass, to ensure we short circuit reduction for any
+                # unused parameters. Only if `find_unused_parameters` is set.
+                if self.find_unused_parameters:
+                    self.reducer.prepare_for_backward(list(_find_tensors(output)))
+                else:
+                    self.reducer.prepare_for_backward([])
+        elif version.parse(torch_version) < version.parse("2.1"):
+            from torch.nn.parallel.distributed import \
+                logging, Join, _DDPSink, _tree_flatten_with_rref, _tree_unflatten_with_rref
+            with torch.autograd.profiler.record_function("DistributedDataParallel.forward"):
+                if torch.is_grad_enabled() and self.require_backward_grad_sync:
+                    self.logger.set_runtime_stats_and_log()
+                    self.num_iterations += 1
+                    self.reducer.prepare_for_forward()
+
+                # Notify the join context that this process has not joined, if
+                # needed
+                work = Join.notify_join_context(self)
+                if work:
+                    self.reducer._set_forward_pass_work_handle(
+                        work, self._divide_by_initial_world_size
+                    )
+
+                # Calling _rebuild_buckets before forward compuation,
+                # It may allocate new buckets before deallocating old buckets
+                # inside _rebuild_buckets. To save peak memory usage,
+                # call _rebuild_buckets before the peak memory usage increases
+                # during forward computation.
+                # This should be called only once during whole training period.
+                if torch.is_grad_enabled() and self.reducer._rebuild_buckets():
+                    logging.info("Reducer buckets have been rebuilt in this iteration.")
+                    self._has_rebuilt_buckets = True
+
+                # sync params according to location (before/after forward) user
+                # specified as part of hook, if hook was specified.
+                buffer_hook_registered = hasattr(self, 'buffer_hook')
+                if self._check_sync_bufs_pre_fwd():
+                    self._sync_buffers()
+
+                if self._join_config.enable:
+                    # Notify joined ranks whether they should sync in backwards pass or not.
+                    self._check_global_requires_backward_grad_sync(is_joined_rank=False)
+
+                inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids)
+                if self.module.training:
+                    output = self.module.training_step(*inputs[0], **kwargs[0])
+                elif self.module.testing:
+                    output = self.module.test_step(*inputs[0], **kwargs[0])
+                else:
+                    output = self.module.validation_step(*inputs[0], **kwargs[0])
+
+                # sync params according to location (before/after forward) user
+                # specified as part of hook, if hook was specified.
+                if self._check_sync_bufs_post_fwd():
+                    self._sync_buffers()
+
+                if torch.is_grad_enabled() and self.require_backward_grad_sync:
+                    self.require_forward_param_sync = True
+                    # We'll return the output object verbatim since it is a freeform
+                    # object. We need to find any tensors in this object, though,
+                    # because we need to figure out which parameters were used during
+                    # this forward pass, to ensure we short circuit reduction for any
+                    # unused parameters. Only if `find_unused_parameters` is set.
+                    if self.find_unused_parameters and not self.static_graph:
+                        # Do not need to populate this for static graph.
+                        self.reducer.prepare_for_backward(list(_find_tensors(output)))
+                    else:
+                        self.reducer.prepare_for_backward([])
+                else:
+                    self.require_forward_param_sync = False
+
+            # TODO: DDPSink is currently enabled for unused parameter detection and
+            # static graph training for first iteration.
+            if (self.find_unused_parameters and not self.static_graph) or (
+                    self.static_graph and self.num_iterations == 1
+            ):
+                state_dict = {
+                    'static_graph': self.static_graph,
+                    'num_iterations': self.num_iterations,
+                }
+
+                output_tensor_list, treespec, output_is_rref = _tree_flatten_with_rref(
+                    output
+                )
+                output_placeholders = [None for _ in range(len(output_tensor_list))]
+                # Do not touch tensors that have no grad_fn, which can cause issues
+                # such as https://github.com/pytorch/pytorch/issues/60733
+                for i, output in enumerate(output_tensor_list):
+                    if torch.is_tensor(output) and output.grad_fn is None:
+                        output_placeholders[i] = output
+
+                # When find_unused_parameters=True, makes tensors which require grad
+                # run through the DDPSink backward pass. When not all outputs are
+                # used in loss, this makes those corresponding tensors receive
+                # undefined gradient which the reducer then handles to ensure
+                # param.grad field is not touched and we don't error out.
+                passthrough_tensor_list = _DDPSink.apply(
+                    self.reducer,
+                    state_dict,
+                    *output_tensor_list,
+                )
+                for i in range(len(output_placeholders)):
+                    if output_placeholders[i] is None:
+                        output_placeholders[i] = passthrough_tensor_list[i]
+
+                # Reconstruct output data structure.
+                output = _tree_unflatten_with_rref(
+                    output_placeholders, treespec, output_is_rref
+                )
+        else:
+            output = super().forward(*inputs, **kwargs) # use _run_ddp_forward()
+        return output
+
+    def _run_ddp_forward(self, *inputs, **kwargs):
+        torch_version = get_torch_version()
+        if version.parse(torch_version) < version.parse("2.1"):
+            return super()._run_ddp_forward(*inputs, **kwargs)
+        with self._inside_ddp_forward():
+            if self.module.training:
+                output = self.module.training_step(*inputs, **kwargs)
+            elif self.module.testing:
+                output = self.module.test_step(*inputs, **kwargs)
+            else:
+                output = self.module.validation_step(*inputs, **kwargs)
+            return output
diff --git a/utils/commons/euler2rot.py b/utils/commons/euler2rot.py
new file mode 100644
index 0000000000000000000000000000000000000000..9a9202d63bf0f4af0abe5f0b36e8496760a45bd0
--- /dev/null
+++ b/utils/commons/euler2rot.py
@@ -0,0 +1,37 @@
+import torch
+from scipy.spatial.transform import Rotation as R
+from utils.commons.tensor_utils import convert_to_tensor
+
+
+def rot2euler(rot, use_radian=True):
+    r = R.from_matrix(rot)
+    return r.as_euler('xyz', degrees=not use_radian)
+
+def euler2rot(euler, use_radian=True):
+    r = R.from_euler('xyz',euler, degrees=not use_radian)
+    return r.as_matrix()
+
+def c2w_to_euler_trans(c2w):
+    if c2w.ndim == 3:
+        e = rot2euler(c2w[:, :3, :3]) # [B, 3]
+        t = c2w[:, :3, 3].reshape([-1, 3])
+    else:
+        e = rot2euler(c2w[:3, :3]) # [B, 3]
+        t = c2w[:3, 3].reshape([3])
+    return e, t # [3+3]
+
+def euler_trans_2_c2w(euler, trans):
+    if euler.ndim == 2:
+        rot = euler2rot(euler) # [b, 3, 3]
+        bs = trans.shape[0]
+        trans = trans.reshape([bs, 3, 1])
+        rot = convert_to_tensor(rot).float()
+        trans = convert_to_tensor(trans).float()
+        c2w = torch.cat([rot, trans], dim=-1) # [b, 3, 4]
+    else:
+        rot = euler2rot(euler) # [3, 3]
+        trans = trans.reshape([3, 1])
+        rot = convert_to_tensor(rot).float()
+        trans = convert_to_tensor(trans).float()
+        c2w = torch.cat([rot, trans], dim=-1) # [3, 4]
+    return c2w
\ No newline at end of file
diff --git a/utils/commons/face_alignment_utils.py b/utils/commons/face_alignment_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..529853bdcce01eaeaaf48b5e85f3b2ce12238851
--- /dev/null
+++ b/utils/commons/face_alignment_utils.py
@@ -0,0 +1,37 @@
+import numpy as np
+import copy
+
+yaw_idx_in_mediapipe_mesh = [356, 454, 361, 288, 397, 379, 378, 377, 152, 148, 149, 150, 172,58, 132, 234, 127]
+brow_idx_in_mediapipe_mesh = [70,  63, 105,  66, 107, 336, 296, 334, 293, 300]
+nose_idx_in_mediapipe_mesh = [6, 5, 1, 2, 129, 240, 2, 460, 358]
+eye_idx_in_mediapipe_mesh = [33, 160, 158, 133, 153, 144, 362, 385, 387, 263, 373, 380]
+mouth_idx_in_mediapipe_mesh = [78,191,80,81,82,13,312,311,310,415,308,324,318,402,317,14,87,178,88,95]
+lm68_idx_in_mediapipe_mesh = yaw_idx_in_mediapipe_mesh + brow_idx_in_mediapipe_mesh + nose_idx_in_mediapipe_mesh + eye_idx_in_mediapipe_mesh + mouth_idx_in_mediapipe_mesh
+
+
+def mediapipe_lm478_to_face_alignment_lm68(lm478, H, W, return_2d=True):
+    """
+    lm478: [B, 478, 3] or [478,3]
+    """
+    lm478 = copy.deepcopy(lm478)
+    lm478[..., 0] *= W
+    lm478[..., 1] *= H
+    n_dim = 2 if return_2d else False
+    if lm478.ndim == 2:
+        return lm478[lm68_idx_in_mediapipe_mesh, :n_dim].astype(np.int16)
+    elif lm478.ndim == 3:
+        return lm478[:, lm68_idx_in_mediapipe_mesh, :n_dim].astype(np.int16)
+    else:
+        raise ValueError("input lm478 ndim should in 2 or 3!")
+
+def mediapipe_lm478_to_lm68_3d(lm478):
+    """
+    lm478: [B, 478, 3] or [478,3]
+    also works for lm468
+    """
+    if lm478.ndim == 2:
+        return lm478[lm68_idx_in_mediapipe_mesh]
+    elif lm478.ndim == 3:
+        return lm478[:, lm68_idx_in_mediapipe_mesh]
+    else:
+        raise ValueError("input lm478 ndim should in 2 or 3!")
\ No newline at end of file
diff --git a/utils/commons/hparams.py b/utils/commons/hparams.py
new file mode 100644
index 0000000000000000000000000000000000000000..36755f842f518075d5640eaa574deebcd2e730d6
--- /dev/null
+++ b/utils/commons/hparams.py
@@ -0,0 +1,194 @@
+import argparse
+import json
+import os
+import yaml
+
+from utils.commons.os_utils import remove_file
+
+global_print_hparams = True
+hparams = {}
+
+
+class Args:
+    def __init__(self, **kwargs):
+        for k, v in kwargs.items():
+            self.__setattr__(k, v)
+
+
+def override_config(old_config: dict, new_config: dict):
+    if new_config.get('__replace', False):
+        old_config.clear()
+    for k, v in new_config.items():
+        if isinstance(v, dict) and k in old_config:
+            override_config(old_config[k], new_config[k])
+        else:
+            old_config[k] = v
+
+
+def traverse_dict(d, func):
+    for k in d.keys():
+        v = d[k]
+        if isinstance(v, dict):
+            traverse_dict(v, func)
+        else:
+            d[k] = func(v)
+
+
+def parse_config_ref(v):
+    if isinstance(v, str) and v.startswith('^'):
+        return load_config(v[1:], [], set())
+    return v
+
+
+def remove_meta_key(d):
+    for k in list(d.keys()):
+        v = d[k]
+        if isinstance(v, dict):
+            remove_meta_key(v)
+        else:
+            if k[:2] == '__':
+                del d[k]
+
+
+def load_config(config_fn, config_chains, loaded_configs):
+    # deep first inheritance and avoid the second visit of one node
+    if not os.path.exists(config_fn):
+        print(f"| WARN: {config_fn} not exist.", )
+        return {}
+    with open(config_fn) as f:
+        hparams_ = yaml.safe_load(f)
+    loaded_configs.add(config_fn)
+
+    traverse_dict(hparams_, parse_config_ref)
+
+    if 'base_config' in hparams_:
+        ret_hparams = {}
+        if not isinstance(hparams_['base_config'], list):
+            hparams_['base_config'] = [hparams_['base_config']]
+        for c in hparams_['base_config']:
+            if c.startswith('.'):
+                c = f'{os.path.dirname(config_fn)}/{c}'
+                c = os.path.normpath(c)
+            if c not in loaded_configs:
+                override_config(ret_hparams, load_config(c, config_chains, loaded_configs))
+        override_config(ret_hparams, hparams_)
+    else:
+        ret_hparams = hparams_
+
+    config_chains.append(config_fn)
+    return ret_hparams
+
+
+def set_hparams(config='', exp_name='', hparams_str='', print_hparams=True, global_hparams=True):
+    if config == '' and exp_name == '':
+        parser = argparse.ArgumentParser(description='')
+        parser.add_argument('--config', type=str, default='',
+                            help='location of the data corpus')
+        parser.add_argument('--exp_name', type=str, default='', help='exp_name')
+        parser.add_argument('-hp', '--hparams', type=str, default='',
+                            help='location of the data corpus')
+        parser.add_argument('--infer', action='store_true', help='infer')
+        parser.add_argument('--validate', action='store_true', help='validate')
+        parser.add_argument('--reset', action='store_true', help='reset hparams')
+        parser.add_argument('--remove', action='store_true', help='remove old ckpt')
+        parser.add_argument('--debug', action='store_true', help='debug')
+        parser.add_argument('--start_rank', type=int, default=0, help='the start rank id for DDP, keep 0 when single-machine multi-GPU')
+        parser.add_argument('--world_size', type=int, default=-1, help='the total number of GPU used across all machines, keep -1 for single-machine multi-GPU')
+        parser.add_argument('--init_method', type=str, default='tcp', help='method to init ddp, use tcp or file')
+
+        args, unknown = parser.parse_known_args()
+        if print_hparams:
+            print("| set_hparams Unknow hparams: ", unknown)
+    else:
+        args = Args(config=config, exp_name=exp_name, hparams=hparams_str,
+                    infer=False, validate=False, reset=False, debug=False, remove=False, start_rank=0, world_size=-1, init_method='tcp')
+    global hparams
+    assert args.config != '' or args.exp_name != ''
+    if args.config != '':
+        assert os.path.exists(args.config), args.config
+
+    saved_hparams = {}
+    args_work_dir = ''
+    if args.exp_name != '':
+        args_work_dir = f'checkpoints/{args.exp_name}'
+        ckpt_config_path = f'{args_work_dir}/config.yaml'
+        if os.path.exists(ckpt_config_path):
+            with open(ckpt_config_path) as f:
+                saved_hparams_ = yaml.safe_load(f)
+                if saved_hparams_ is not None:
+                    saved_hparams.update(saved_hparams_)
+    hparams_ = {}
+    config_chains = []
+    if args.config != '':
+        hparams_.update(load_config(args.config, config_chains, set()))
+        if len(config_chains) > 1 and print_hparams:
+            print('| Hparams chains: ', config_chains)
+    if not args.reset:
+        hparams_.update(saved_hparams)
+    hparams_['work_dir'] = args_work_dir
+
+    # Support config overriding in command line. Support list type config overriding.
+    # Examples: --hparams="a=1,b.c=2,d=[1 1 1]"
+    if args.hparams != "":
+        for new_hparam in args.hparams.split(","):
+            k, v = new_hparam.split("=")
+            v = v.strip("\'\" ")
+            config_node = hparams_
+            for k_ in k.split(".")[:-1]:
+                config_node = config_node[k_]
+            k = k.split(".")[-1]
+            if k in config_node:
+                if v in ['True', 'False'] or type(config_node[k]) in [bool, list, dict]:
+                    if type(config_node[k]) == list:
+                        v = v.replace(" ", ",").replace('^', "\"")
+                        if '|' in v:
+                            tp = type(config_node[k][0]) if len(config_node[k]) else str
+                            config_node[k] = [tp(x) for x in v.split("|") if x != '']
+                            continue
+                    config_node[k] = eval(v)
+                else:
+                    config_node[k] = type(config_node[k])(v)
+            else:
+                config_node[k] = v
+                try:
+                    config_node[k] = float(v)
+                except:
+                    pass
+                try:
+                    config_node[k] = int(v)
+                except:
+                    pass
+                if v.lower() in ['false', 'true']:
+                    config_node[k] = v.lower() == 'true'
+    if args_work_dir != '' and args.remove:
+        answer = input("REMOVE old checkpoint? Y/N [Default: N]: ")
+        if answer.lower() == "y":
+            remove_file(args_work_dir)
+    if args_work_dir != '' and (not os.path.exists(ckpt_config_path) or args.reset) and not args.infer:
+        os.makedirs(hparams_['work_dir'], exist_ok=True)
+        with open(ckpt_config_path, 'w') as f:
+            yaml.safe_dump(hparams_, f)
+
+    hparams_['infer'] = args.infer
+    hparams_['debug'] = args.debug
+    hparams_['validate'] = args.validate
+    hparams_['exp_name'] = args.exp_name
+
+    hparams_['start_rank'] = args.start_rank # useful for multi-machine training
+    hparams_['world_size'] = args.world_size
+    hparams_['init_method'] = args.init_method
+
+    remove_meta_key(hparams_)
+    global global_print_hparams
+    if global_hparams:
+        hparams.clear()
+        hparams.update(hparams_)
+    if print_hparams and global_print_hparams and global_hparams:
+        print('| Hparams: ', json.dumps(hparams_, indent=2, sort_keys=True))
+        # for i, (k, v) in enumerate(sorted(hparams_.items())):
+        #     print(f"\033[;33;m{k}\033[0m: {v}, ", end="\n" if i % 5 == 4 else "")
+        global_print_hparams = False
+    return hparams_
+
+if __name__ == '__main__':
+    set_hparams('checkpoints/1205_os_secc2planes/os_secc2plane_trigridv2/config.yaml')
\ No newline at end of file
diff --git a/utils/commons/image_utils.py b/utils/commons/image_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..cc45eb7fc08d59e5ae4dfa6c27ab705aec05b393
--- /dev/null
+++ b/utils/commons/image_utils.py
@@ -0,0 +1,52 @@
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import cv2
+import os
+import imageio
+
+
+def dilate(bin_img, ksize=5):
+    # bin_img
+    pad = (ksize-1)//2
+    bin_img = F.pad(bin_img, pad=[pad,pad,pad,pad], mode='reflect')
+    out = F.max_pool2d(bin_img, kernel_size=ksize, stride=1, padding=0)
+    return out
+
+def erode(bin_img, ksize=5):
+    out = 1 - dilate(1-bin_img, ksize)
+    return out
+
+def to8b(x): 
+    return (255*np.clip(x, 0, 1)).astype(np.uint8)
+
+def mse2psnr(x): 
+    return -10. * torch.log(x) / torch.log(torch.Tensor([10.]))
+
+def img2mse(x, y): 
+    return torch.mean((x - y) ** 2)
+
+def video2images(video_name, out_dir):
+    cap = cv2.VideoCapture(video_name)
+    frame_num = 0
+    while(True):
+        _, frame = cap.read()
+        if frame is None:
+            break
+        out_frame_name = os.path.join(out_dir, str(frame_num) + '.jpg')
+        cv2.imwrite(out_frame_name, frame)
+        frame_num += + 1
+    cap.release()
+
+def load_image_as_uint8_tensor(fname):
+    """
+    img: (H, W, 3) floatTensor
+    """
+    img = torch.as_tensor(imageio.imread(fname))
+    return img
+
+if __name__ =='__main__':
+    video2images("test_data/May_val/AD-NeRF.mp4", "test_data/May_val/AD-NeRF")
+    video2images("test_data/May_val/GeneFace.mp4", "test_data/May_val/GeneFace")
+    video2images("test_data/May_val/GT.mp4", "test_data/May_val/GT")
\ No newline at end of file
diff --git a/utils/commons/indexed_datasets.py b/utils/commons/indexed_datasets.py
new file mode 100644
index 0000000000000000000000000000000000000000..64dbddd0ff30cd1d39879e106efe0bf4ab21b9b0
--- /dev/null
+++ b/utils/commons/indexed_datasets.py
@@ -0,0 +1,203 @@
+import pickle
+from bisect import bisect
+from copy import deepcopy
+import numpy as np
+import gzip
+
+
+def int2bytes(i: int, *, signed: bool = False) -> bytes:
+    length = ((i + ((i * signed) < 0)).bit_length() + 7 + signed) // 8
+    return i.to_bytes(length, byteorder='little', signed=signed)
+
+
+def bytes2int(b: bytes, *, signed: bool = False) -> int:
+    return int.from_bytes(b, byteorder='little', signed=signed)
+
+
+def load_index_data(data_file):
+    index_data_size = bytes2int(data_file.read(32))
+    index_data = data_file.read(index_data_size)
+    index_data = pickle.loads(index_data)
+    data_offsets = deepcopy(index_data['offsets'])
+    id2pos = deepcopy(index_data.get('id2pos', {}))
+    meta = deepcopy(index_data.get('meta', {}))
+    return data_offsets, id2pos, meta
+
+
+class IndexedDataset:
+    def __init__(self, path, unpickle=True):
+        self.path = path
+        self.root_data_file = open(f"{path}.data", 'rb', buffering=-1)
+        try:
+            self.byte_offsets, self.id2pos, self.meta = load_index_data(self.root_data_file)
+            self.data_files = [self.root_data_file]
+        except:
+            self.__init__old(path)
+            self.meta = {}
+        self.gzip = self.meta.get('gzip', False)
+        if 'chunk_begin' not in self.meta:
+            self.meta['chunk_begin'] = [0]
+        for i in range(len(self.meta['chunk_begin'][1:])):
+            self.data_files.append(open(f"{self.path}.{i + 1}.data", 'rb'))
+        self.unpickle = unpickle
+
+    def __init__old(self, path):
+        self.path = path
+        index_data = np.load(f"{path}.idx", allow_pickle=True).item()
+        self.byte_offsets = index_data['offsets']
+        self.id2pos = index_data.get('id2pos', {})
+        self.data_files = [open(f"{path}.data", 'rb', buffering=-1)]
+
+    def __getitem__(self, i):
+        if self.id2pos is not None and len(self.id2pos) > 0:
+            i = self.id2pos[i]
+        self.check_index(i)
+        
+        # chunk_id = bisect(self.meta['chunk_begin'][1:], self.byte_offsets[i])
+        # if chunk_id == 0:
+        #     data_file = open(f"{self.path}.data", 'rb', buffering=-1)
+        # else:
+        #     data_file = open(f"{self.path}.{chunk_id}.data", 'rb', buffering=-1)
+        # data_file.seek(self.byte_offsets[i] - self.meta['chunk_begin'][chunk_id])
+        # b = data_file.read(self.byte_offsets[i + 1] - self.byte_offsets[i])
+        # data_file.close()
+        
+        chunk_id = bisect(self.meta['chunk_begin'][1:], self.byte_offsets[i])
+        data_file = self.data_files[chunk_id]
+        data_file.seek(self.byte_offsets[i] - self.meta['chunk_begin'][chunk_id])
+        b = data_file.read(self.byte_offsets[i + 1] - self.byte_offsets[i])
+
+        unpickle = self.unpickle
+        if unpickle:
+            if self.gzip:
+                b = gzip.decompress(b)
+            item = pickle.loads(b)
+        else:
+            item = b
+        return item
+
+    def __del__(self):
+        for data_file in self.data_files:
+            data_file.close()
+
+    def check_index(self, i):
+        if i < 0 or i >= len(self.byte_offsets) - 1:
+            raise IndexError('index out of range')
+
+    def __len__(self):
+        return len(self.byte_offsets) - 1
+
+    def __iter__(self):
+        self.iter_i = 0
+        return self
+
+    def __next__(self):
+        if self.iter_i == len(self):
+            raise StopIteration
+        else:
+            item = self[self.iter_i]
+            self.iter_i += 1
+            return item
+
+
+class IndexedDatasetBuilder:
+    def __init__(self, path, append=False, max_size=1024 * 1024 * 1024 * 64,
+                 default_idx_size=1024 * 1024 * 16, gzip=False):
+        self.path = self.root_path = path
+        self.default_idx_size = default_idx_size
+        if append:
+            self.data_file = open(f"{path}.data", 'r+b')
+            self.data_file.seek(0)
+            self.byte_offsets, self.id2pos, self.meta = load_index_data(self.data_file)
+            self.data_file.seek(0)
+            self.data_file.write(bytes(default_idx_size))
+            self.data_file.seek(self.byte_offsets[-1])
+            self.gzip = self.meta['gzip']
+        else:
+            self.data_file = open(f"{path}.data", 'wb')
+            self.data_file.seek(default_idx_size)
+            self.byte_offsets = [default_idx_size]
+            self.id2pos = {}
+            self.meta = {}
+            self.meta['chunk_begin'] = [0]
+            self.gzip = self.meta['gzip'] = gzip
+        self.root_data_file = self.data_file
+        self.max_size = max_size
+        self.data_chunk_id = 0
+
+    def add_item(self, item, id=None, use_pickle=True):
+        if self.byte_offsets[-1] > self.meta['chunk_begin'][-1] + self.max_size:
+            if self.data_file != self.root_data_file:
+                self.data_file.close()
+            self.data_chunk_id += 1
+            self.data_file = open(f"{self.path}.{self.data_chunk_id}.data", 'wb')
+            self.data_file.seek(0)
+            self.meta['chunk_begin'].append(self.byte_offsets[-1])
+        if not use_pickle:
+            s = item
+        else:
+            s = pickle.dumps(item)
+            if self.gzip:
+                s = gzip.compress(s, 1)
+        bytes = self.data_file.write(s)
+        if id is not None:
+            self.id2pos[id] = len(self.byte_offsets) - 1
+        self.byte_offsets.append(self.byte_offsets[-1] + bytes)
+
+    def finalize(self):
+        self.root_data_file.seek(0)
+        s = pickle.dumps({'offsets': self.byte_offsets, 'id2pos': self.id2pos, 'meta': self.meta})
+        assert len(s) < self.default_idx_size, (len(s), self.default_idx_size)
+        len_bytes = int2bytes(len(s))
+        self.root_data_file.write(len_bytes)
+        self.root_data_file.seek(32)
+        self.root_data_file.write(s)
+        self.root_data_file.close()
+        try:
+            self.data_file.close()
+        except:
+            pass
+
+
+if __name__ == "__main__":
+    import random
+    from tqdm import tqdm
+
+    # builder = IndexedDatasetBuilder(ds_path, append=True)
+    # for i in tqdm(range(size)):
+    #     builder.add_item(items[i], i + size)
+    # builder.finalize()
+    # ds = IndexedDataset(ds_path)
+    # for i in tqdm(range(1000)):
+    #     idx = random.randint(size, 2 * size - 1)
+    #     assert (ds[idx]['a'] == items[idx - size]['a']).all()
+    #     idx = random.randint(0, size - 1)
+    #     assert (ds[idx]['a'] == items[idx]['a']).all()
+
+    ds_path = '/tmp/indexed_ds_example'
+    size = 100
+    items = [{"a": np.random.normal(size=[10000, 10]),
+              "b": np.random.normal(size=[10000, 10])} for i in range(size)]
+    builder = IndexedDatasetBuilder(ds_path, max_size=1024 * 1024 * 40)
+    builder.meta['lengths'] = [1, 2, 3]
+    for i in tqdm(range(size)):
+        builder.add_item(pickle.dumps(items[i]), i, use_pickle=False)
+    builder.finalize()
+    ds = IndexedDataset(ds_path)
+    assert ds.meta['lengths'] == [1, 2, 3]
+    for i in tqdm(range(1000)):
+        idx = random.randint(0, size - 1)
+        assert (ds[idx]['a'] == items[idx]['a']).all()
+
+    # builder = IndexedDataset2Builder(ds_path, append=True)
+    # builder.meta['lengths'] = [1, 2, 3, 5, 6, 7]
+    # for i in tqdm(range(size)):
+    #     builder.add_item(items[i], i + size)
+    # builder.finalize()
+    # ds = IndexedDataset2(ds_path)
+    # assert ds.meta['lengths'] == [1, 2, 3, 5, 6, 7]
+    # for i in tqdm(range(1000)):
+    #     idx = random.randint(size, 2 * size - 1)
+    #     assert (ds[idx]['a'] == items[idx - size]['a']).all()
+    #     idx = random.randint(0, size - 1)
+    #     assert (ds[idx]['a'] == items[idx]['a']).all()
diff --git a/utils/commons/mesh_utils.py b/utils/commons/mesh_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..e16f6cc82a59d9d3e455ba334abf68b576fdc10f
--- /dev/null
+++ b/utils/commons/mesh_utils.py
@@ -0,0 +1,124 @@
+# SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: LicenseRef-NvidiaProprietary
+#
+# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+# property and proprietary rights in and to this material, related
+# documentation and any modifications thereto. Any use, reproduction,
+# disclosure or distribution of this material and related documentation
+# without an express license agreement from NVIDIA CORPORATION or
+# its affiliates is strictly prohibited.
+
+
+"""
+Utils for extracting 3D shapes using marching cubes. Based on code from DeepSDF (Park et al.)
+
+Takes as input an .mrc file and extracts a mesh.
+
+Ex.
+    python shape_utils.py my_shape.mrc
+Ex.
+    python shape_utils.py myshapes_directory --level=12
+"""
+
+
+import time
+import plyfile
+import glob
+import logging
+import numpy as np
+import os
+import random
+import torch
+import torch.utils.data
+import trimesh
+import skimage.measure
+import argparse
+import mrcfile
+from tqdm import tqdm
+        
+
+def convert_sdf_samples_to_ply(
+    numpy_3d_sdf_tensor,
+    voxel_grid_origin,
+    voxel_size,
+    ply_filename_out,
+    offset=None,
+    scale=None,
+    level=0.0
+):
+    """
+    Convert sdf samples to .ply
+    :param pytorch_3d_sdf_tensor: a torch.FloatTensor of shape (n,n,n)
+    :voxel_grid_origin: a list of three floats: the bottom, left, down origin of the voxel grid
+    :voxel_size: float, the size of the voxels
+    :ply_filename_out: string, path of the filename to save to
+    This function adapted from: https://github.com/RobotLocomotion/spartan
+    """
+    start_time = time.time()
+
+    verts, faces, normals, values = np.zeros((0, 3)), np.zeros((0, 3)), np.zeros((0, 3)), np.zeros(0)
+    # try:
+    verts, faces, normals, values = skimage.measure.marching_cubes(
+        numpy_3d_sdf_tensor, level=level, spacing=[voxel_size] * 3
+    )
+    # except:
+    #     pass
+
+    # transform from voxel coordinates to camera coordinates
+    # note x and y are flipped in the output of marching_cubes
+    mesh_points = np.zeros_like(verts)
+    mesh_points[:, 0] = voxel_grid_origin[0] + verts[:, 0]
+    mesh_points[:, 1] = voxel_grid_origin[1] + verts[:, 1]
+    mesh_points[:, 2] = voxel_grid_origin[2] + verts[:, 2]
+
+    # apply additional offset and scale
+    if scale is not None:
+        mesh_points = mesh_points / scale
+    if offset is not None:
+        mesh_points = mesh_points - offset
+
+    # try writing to the ply file
+
+    num_verts = verts.shape[0]
+    num_faces = faces.shape[0]
+
+    verts_tuple = np.zeros((num_verts,), dtype=[("x", "f4"), ("y", "f4"), ("z", "f4")])
+
+    for i in range(0, num_verts):
+        verts_tuple[i] = tuple(mesh_points[i, :])
+
+    faces_building = []
+    for i in range(0, num_faces):
+        faces_building.append(((faces[i, :].tolist(),)))
+    faces_tuple = np.array(faces_building, dtype=[("vertex_indices", "i4", (3,))])
+
+    el_verts = plyfile.PlyElement.describe(verts_tuple, "vertex")
+    el_faces = plyfile.PlyElement.describe(faces_tuple, "face")
+
+    ply_data = plyfile.PlyData([el_verts, el_faces])
+    ply_data.write(ply_filename_out)
+    print(f"wrote to {ply_filename_out}")
+
+
+def convert_mrc(input_filename, output_filename, isosurface_level=1):
+    with mrcfile.open(input_filename) as mrc:
+        convert_sdf_samples_to_ply(np.transpose(mrc.data, (2, 1, 0)), [0, 0, 0], 1, output_filename, level=isosurface_level)
+
+if __name__ == '__main__':
+    start_time = time.time()
+    parser = argparse.ArgumentParser()
+    parser.add_argument('input_mrc_path')
+    parser.add_argument('--level', type=float, default=10, help="The isosurface level for marching cubes")
+    args = parser.parse_args()
+
+    if os.path.isfile(args.input_mrc_path) and args.input_mrc_path.split('.')[-1] == 'ply':
+        output_obj_path = args.input_mrc_path.split('.mrc')[0] + '.ply'
+        convert_mrc(args.input_mrc_path, output_obj_path, isosurface_level=1)
+
+        print(f"{time.time() - start_time:02f} s")
+    else:
+        assert os.path.isdir(args.input_mrc_path)
+
+        for mrc_path in tqdm(glob.glob(os.path.join(args.input_mrc_path, '*.mrc'))):
+            output_obj_path = mrc_path.split('.mrc')[0] + '.ply'
+            convert_mrc(mrc_path, output_obj_path, isosurface_level=args.level)
\ No newline at end of file
diff --git a/utils/commons/meters.py b/utils/commons/meters.py
new file mode 100644
index 0000000000000000000000000000000000000000..1b5716547cefd33fb68ab99dc2a6a70f55336625
--- /dev/null
+++ b/utils/commons/meters.py
@@ -0,0 +1,42 @@
+import time
+import torch
+
+
+class AvgrageMeter(object):
+
+    def __init__(self):
+        self.reset()
+
+    def reset(self):
+        self.avg = 0
+        self.sum = 0
+        self.cnt = 0
+
+    def update(self, val, n=1):
+        self.sum += val * n
+        self.cnt += n
+        self.avg = self.sum / self.cnt
+
+
+class Timer:
+    timer_map = {}
+
+    def __init__(self, name, enable=False):
+        if name not in Timer.timer_map:
+            Timer.timer_map[name] = 0
+        self.name = name
+        self.enable = enable
+
+    def __enter__(self):
+        if self.enable:
+            # if torch.cuda.is_available():
+                # torch.cuda.synchronize()
+            self.t = time.time()
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        if self.enable:
+            # if torch.cuda.is_available():
+                # torch.cuda.synchronize()
+            Timer.timer_map[self.name] += time.time() - self.t
+            if self.enable:
+                print(f'[Timer] {self.name}: {Timer.timer_map[self.name]}')
diff --git a/utils/commons/multiprocess_utils.py b/utils/commons/multiprocess_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..e2773543c702d2819559dfde4c5febab03899790
--- /dev/null
+++ b/utils/commons/multiprocess_utils.py
@@ -0,0 +1,130 @@
+import os
+import traceback
+from functools import partial
+from tqdm import tqdm
+
+
+def chunked_worker(worker_id, args_queue=None, results_queue=None, init_ctx_func=None):
+    ctx = init_ctx_func(worker_id) if init_ctx_func is not None else None
+    while True:
+        args = args_queue.get()
+        if args == '<KILL>':
+            return
+        job_idx, map_func, arg = args
+        try:
+            map_func_ = partial(map_func, ctx=ctx) if ctx is not None else map_func
+            if isinstance(arg, dict):
+                res = map_func_(**arg)
+            elif isinstance(arg, (list, tuple)):
+                res = map_func_(*arg)
+            else:
+                res = map_func_(arg)
+            results_queue.put((job_idx, res))
+        except:
+            traceback.print_exc()
+            results_queue.put((job_idx, None))
+
+
+class MultiprocessManager:
+    def __init__(self, num_workers=None, init_ctx_func=None, multithread=False, queue_max=-1):
+        if multithread:
+            from multiprocessing.dummy import Queue, Process
+        else:
+            from multiprocessing import Queue, Process
+        if num_workers is None:
+            num_workers = int(os.getenv('N_PROC', os.cpu_count()))
+        self.num_workers = num_workers
+        self.results_queue = Queue(maxsize=-1)
+        self.jobs_pending = []
+        self.args_queue = Queue(maxsize=queue_max)
+        self.workers = []
+        self.total_jobs = 0
+        self.multithread = multithread
+        for i in range(num_workers):
+            if multithread:
+                p = Process(target=chunked_worker,
+                            args=(i, self.args_queue, self.results_queue, init_ctx_func))
+            else:
+                p = Process(target=chunked_worker,
+                            args=(i, self.args_queue, self.results_queue, init_ctx_func),
+                            daemon=True)
+            self.workers.append(p)
+            p.start()
+
+    def add_job(self, func, args):
+        if not self.args_queue.full():
+            self.args_queue.put((self.total_jobs, func, args))
+        else:
+            self.jobs_pending.append((self.total_jobs, func, args))
+        self.total_jobs += 1
+
+    def get_results(self):
+        self.n_finished = 0
+        while self.n_finished < self.total_jobs:
+            while len(self.jobs_pending) > 0 and not self.args_queue.full():
+                self.args_queue.put(self.jobs_pending[0])
+                self.jobs_pending = self.jobs_pending[1:]
+            job_id, res = self.results_queue.get()
+            yield job_id, res
+            self.n_finished += 1
+        for w in range(self.num_workers):
+            self.args_queue.put("<KILL>")
+        for w in self.workers:
+            w.join()
+
+    def close(self):
+        if not self.multithread:
+            for w in self.workers:
+                w.terminate()
+
+    def __len__(self):
+        return self.total_jobs
+
+
+def multiprocess_run_tqdm(map_func, args, num_workers=None, ordered=True, init_ctx_func=None,
+                          multithread=False, queue_max=-1, desc=None):
+    for i, res in tqdm(
+            multiprocess_run(map_func, args, num_workers, ordered, init_ctx_func, multithread,
+                             queue_max=queue_max),
+            total=len(args), desc=desc):
+        yield i, res
+
+
+def multiprocess_run(map_func, args, num_workers=None, ordered=True, init_ctx_func=None, multithread=False,
+                     queue_max=-1):
+    """
+    Multiprocessing running chunked jobs.
+
+    Examples:
+    >>> for res in tqdm(multiprocess_run(job_func, args):
+    >>>     print(res)
+
+    :param map_func:
+    :param args:
+    :param num_workers:
+    :param ordered:
+    :param init_ctx_func:
+    :param q_max_size:
+    :param multithread:
+    :return:
+    """
+    if num_workers is None:
+        num_workers = int(os.getenv('N_PROC', os.cpu_count()))
+        # num_workers = 1
+    manager = MultiprocessManager(num_workers, init_ctx_func, multithread, queue_max=queue_max)
+    for arg in args:
+        manager.add_job(map_func, arg)
+    if ordered:
+        n_jobs = len(args)
+        results = ['<WAIT>' for _ in range(n_jobs)]
+        i_now = 0
+        for job_i, res in manager.get_results():
+            results[job_i] = res
+            while i_now < n_jobs and (not isinstance(results[i_now], str) or results[i_now] != '<WAIT>'):
+                yield i_now, results[i_now]
+                results[i_now] = None
+                i_now += 1
+    else:
+        for job_i, res in manager.get_results():
+            yield job_i, res
+    manager.close()
diff --git a/utils/commons/os_utils.py b/utils/commons/os_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..07d68cd5bccbc26c4f5867ae1b893026cf875d67
--- /dev/null
+++ b/utils/commons/os_utils.py
@@ -0,0 +1,42 @@
+import os
+import subprocess
+import glob
+from utils.commons.multiprocess_utils import multiprocess_run_tqdm
+
+
+def link_file(from_file, to_file):
+    subprocess.check_call(
+        f'ln -s "`realpath --relative-to="{os.path.dirname(to_file)}" "{from_file}"`" "{to_file}"', shell=True)
+
+
+def move_file(from_file, to_file):
+    subprocess.check_call(f'mv "{from_file}" "{to_file}"', shell=True)
+
+
+def copy_file(from_file, to_file):
+    subprocess.check_call(f'cp -r "{from_file}" "{to_file}"', shell=True)
+
+
+def remove_file(*fns):
+    for f in fns:
+        subprocess.check_call(f'rm -rf "{f}"', shell=True)
+
+def glob_job(d, f):
+    pattern = os.path.join(d, f)
+    return glob.glob(pattern)
+
+def multiprocess_glob(pattern, num_workers=None):
+    split_pattern = pattern.split("/")
+    recursive_depth = 0 # number of recursive depth
+    for split in split_pattern:
+        if '*' in split:
+            recursive_depth += 1
+    if recursive_depth == 1:
+        return glob.glob(pattern)
+    else:
+        dirs = glob.glob('/'.join(split_pattern[:-1]))
+        ret = []
+        args = [(d, split_pattern[-1]) for d in dirs]
+        for (i,res) in multiprocess_run_tqdm(glob_job, args=args, desc=f"globing {pattern}", num_workers=num_workers):
+            ret += res
+        return ret
\ No newline at end of file
diff --git a/utils/commons/pitch_utils.py b/utils/commons/pitch_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..05ccfe56de261d545fd91bbe5c848eb713772f8a
--- /dev/null
+++ b/utils/commons/pitch_utils.py
@@ -0,0 +1,37 @@
+import numpy as np
+import torch
+
+f0_bin = 256
+f0_max = 1100.0
+f0_min = 50.0
+f0_mel_min = 1127 * np.log(1 + f0_min / 700)
+f0_mel_max = 1127 * np.log(1 + f0_max / 700)
+
+def coarse_to_f0(coarse):
+    uv = coarse == 1
+    f0_mel = (coarse - 1) * (f0_mel_max - f0_mel_min) / (f0_bin - 2) + f0_mel_min
+    f0 = ((f0_mel / 1127).exp() - 1) * 700
+    f0[uv] = 0
+    return f0
+
+def f0_to_coarse(f0):
+    is_torch = isinstance(f0, torch.Tensor)
+    f0_mel = 1127 * (1 + f0 / 700).log() if is_torch else 1127 * np.log(1 + f0 / 700)
+    f0_mel[f0_mel > 0] = (f0_mel[f0_mel > 0] - f0_mel_min) * (f0_bin - 2) / (f0_mel_max - f0_mel_min) + 1
+
+    f0_mel[f0_mel <= 1] = 1
+    f0_mel[f0_mel > f0_bin - 1] = f0_bin - 1
+    f0_coarse = (f0_mel + 0.5).long() if is_torch else np.rint(f0_mel).astype(np.int_)
+    assert f0_coarse.max() <= 255 and f0_coarse.min() >= 1, (f0_coarse.max(), f0_coarse.min(), f0.min(), f0.max())
+    return f0_coarse
+
+
+def norm_f0(f0, uv, hparams):
+    is_torch = isinstance(f0, torch.Tensor)
+    if hparams['pitch_norm'] == 'standard':
+        f0 = (f0 - hparams['f0_mean']) / hparams['f0_std']
+    if hparams['pitch_norm'] == 'log':
+        f0 = torch.log2(f0 + 1e-8) if is_torch else np.log2(f0 + 1e-8)
+    if uv is not None and hparams['use_uv']:
+        f0[uv > 0] = 0
+    return f0
\ No newline at end of file
diff --git a/utils/commons/tensor_utils.py b/utils/commons/tensor_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..04e2fbf06cf8cba8946930801f5ae33e5896a82d
--- /dev/null
+++ b/utils/commons/tensor_utils.py
@@ -0,0 +1,151 @@
+import torch
+import torch.distributed as dist
+import numpy as np
+
+
+def reduce_tensors(metrics):
+    new_metrics = {}
+    for k, v in metrics.items():
+        if isinstance(v, torch.Tensor):
+            dist.all_reduce(v)
+            v = v / dist.get_world_size()
+        if type(v) is dict:
+            v = reduce_tensors(v)
+        new_metrics[k] = v
+    return new_metrics
+
+
+def tensors_to_scalars(tensors):
+    if isinstance(tensors, torch.Tensor):
+        tensors = tensors.item()
+        return tensors
+    elif isinstance(tensors, dict):
+        new_tensors = {}
+        for k, v in tensors.items():
+            v = tensors_to_scalars(v)
+            new_tensors[k] = v
+        return new_tensors
+    elif isinstance(tensors, list):
+        return [tensors_to_scalars(v) for v in tensors]
+    else:
+        return tensors
+
+
+def convert_to_np(tensors):
+    if isinstance(tensors, np.ndarray):
+        return tensors
+    elif isinstance(tensors, dict):
+        new_np = {}
+        for k, v in tensors.items():
+            if isinstance(v, torch.Tensor):
+                v = v.cpu().numpy()
+            if type(v) is dict:
+                v = convert_to_np(v)
+            new_np[k] = v
+    elif isinstance(tensors, list):
+        new_np = []
+        for v in tensors:
+            if isinstance(v, torch.Tensor):
+                v = v.cpu().numpy()
+            if type(v) is dict:
+                v = convert_to_np(v)
+            new_np.append(v)
+    elif isinstance(tensors, torch.Tensor):
+        v = tensors
+        if isinstance(v, torch.Tensor):
+            v = v.cpu().numpy()
+        if type(v) is dict:
+            v = convert_to_np(v)
+        new_np = v
+    else:
+        raise Exception(f'tensors_to_np does not support type {type(tensors)}.')
+    return new_np
+
+
+def convert_to_tensor(arrays):
+    if isinstance(arrays, np.ndarray):
+        v = torch.from_numpy(arrays).float()
+        ret = v
+    elif isinstance(arrays, torch.Tensor):
+        ret = arrays
+    elif isinstance(arrays, list):
+        v = torch.from_numpy(np.array(arrays)).float()
+    elif type(arrays) is dict:
+        ret = {}
+        for k, v in arrays.items():
+            if isinstance(v, np.ndarray):
+                v = torch.from_numpy(v).float()
+            if type(v) is dict:
+                v = convert_to_tensor(v)
+            ret[k] = v
+    return ret
+
+def convert_like(inp, target):
+    if isinstance(target, np.ndarray):
+        return convert_to_np(inp)
+    elif isinstance(target, torch.Tensor):
+        inp = convert_to_tensor(inp)
+        inp = inp.to()
+        if target.device == 'cpu':
+            return move_to_cpu(inp)
+        else:
+            return move_to_cuda(inp)
+
+def move_to_cpu(tensors):
+    ret = {}
+    for k, v in tensors.items():
+        if isinstance(v, torch.Tensor):
+            v = v.cpu()
+        if type(v) is dict:
+            v = move_to_cpu(v)
+        ret[k] = v
+    return ret
+
+
+def move_to_cuda(batch, gpu_id=0):
+    # base case: object can be directly moved using `cuda` or `to`
+    if callable(getattr(batch, 'cuda', None)):
+        return batch.cuda(gpu_id, non_blocking=True)
+    elif callable(getattr(batch, 'to', None)):
+        return batch.to(torch.device('cuda', gpu_id), non_blocking=True)
+    elif isinstance(batch, list):
+        for i, x in enumerate(batch):
+            batch[i] = move_to_cuda(x, gpu_id)
+        return batch
+    elif isinstance(batch, tuple):
+        batch = list(batch)
+        for i, x in enumerate(batch):
+            batch[i] = move_to_cuda(x, gpu_id)
+        return tuple(batch)
+    elif isinstance(batch, dict):
+        for k, v in batch.items():
+            batch[k] = move_to_cuda(v, gpu_id)
+        return batch
+    elif isinstance(batch, int) or isinstance(batch, float) or isinstance(batch, str):
+        return batch
+    elif batch is None:
+        return None
+    else:
+        print("| Error in move_to_batch: ",type(batch), batch)
+        raise NotImplementedError()
+    return batch
+
+def convert_to_half(arrays):
+    if isinstance(arrays, np.ndarray):
+        v = torch.from_numpy(arrays).half()
+        ret = v
+    elif isinstance(arrays, torch.Tensor):
+        ret = arrays.half()
+    elif isinstance(arrays, list):
+        ret = [None for _ in range(len(arrays))]
+        for i, v in enumerate(arrays):
+            ret[i] = v.half()
+    elif type(arrays) is dict:
+        ret = {}
+        for k, v in arrays.items():
+            if isinstance(v, np.ndarray):
+                v = torch.from_numpy(v).half()
+            if type(v) is dict:
+                v = convert_to_tensor(v)
+            ret[k] = v
+    return ret
\ No newline at end of file
diff --git a/utils/commons/trainer.py b/utils/commons/trainer.py
new file mode 100644
index 0000000000000000000000000000000000000000..768386368d8f805fd8ca8bdd9ce445cbfa448457
--- /dev/null
+++ b/utils/commons/trainer.py
@@ -0,0 +1,674 @@
+import random
+import time
+import subprocess
+import traceback
+import socket
+import setproctitle
+
+from torch.cuda.amp import GradScaler, autocast
+import numpy as np
+import torch.optim
+import torch.utils.data
+import copy
+import logging
+import os
+import re
+import sys
+import torch
+import torch.distributed as dist
+import torch.multiprocessing as mp
+import tqdm
+import datetime
+
+from utils.commons.ckpt_utils import get_last_checkpoint, get_all_ckpts
+from utils.commons.ddp_utils import DDP
+from utils.commons.hparams import hparams
+from utils.commons.tensor_utils import move_to_cuda
+from utils.commons.os_utils import remove_file
+from utils.commons.meters import Timer
+
+
+def check_port_is_occupied(host='localhost', port=10080):
+    s = socket.socket()
+    try:
+        s.connect((host, port))
+        print(f"{host}:{port} is occupied!")
+        return True
+    except:
+        print(f"{host}:{port} is not occupied!")
+        return False
+    finally:
+        s.close()
+
+
+class Tee(object):
+    def __init__(self, name, mode):
+        self.file = open(name, mode)
+        self.stdout = sys.stdout
+        sys.stdout = self
+
+    def __del__(self):
+        sys.stdout = self.stdout
+        self.file.close()
+
+    def write(self, data):
+        self.file.write(data)
+        self.stdout.write(data)
+
+    def flush(self):
+        self.file.flush()
+
+
+class Trainer:
+    def __init__(
+            self,
+            work_dir,
+            default_save_path=None,
+            accumulate_grad_batches=1,
+            max_updates=160000,
+            print_nan_grads=False,
+            val_check_interval=2000,
+            num_sanity_val_steps=5,
+            amp=False,
+            # tb logger
+            log_save_interval=100,
+            tb_log_interval=10,
+            # checkpoint
+            monitor_key='val_loss',
+            monitor_mode='min',
+            num_ckpt_keep=5,
+            save_best=True,
+            resume_from_checkpoint=0,
+            seed=1234,
+            debug=False,
+    ):
+        os.makedirs(work_dir, exist_ok=True)
+        self.work_dir = work_dir
+        self.accumulate_grad_batches = accumulate_grad_batches
+        self.max_updates = max_updates
+        self.num_sanity_val_steps = num_sanity_val_steps
+        self.print_nan_grads = print_nan_grads
+        self.default_save_path = default_save_path
+        self.resume_from_checkpoint = resume_from_checkpoint if resume_from_checkpoint > 0 else None
+        self.seed = seed
+        self.debug = debug
+        # model and optm
+        self.task = None
+        self.optimizers = []
+
+        # trainer state
+        self.testing = False
+        self.global_step = 0
+        self.current_epoch = 0
+        self.total_batches = 0
+
+        # configure checkpoint
+        self.monitor_key = monitor_key
+        self.num_ckpt_keep = num_ckpt_keep
+        self.save_best = save_best
+        self.monitor_op = np.less if monitor_mode == 'min' else np.greater
+        self.best_val_results = np.Inf if monitor_mode == 'min' else -np.Inf
+        self.mode = 'min'
+
+        # allow int, string and gpu list
+        self.all_gpu_ids = [
+            int(x) for x in os.environ.get("CUDA_VISIBLE_DEVICES", "").split(",") if x != '']
+        
+        self.use_multi_machine_ddp = hparams['world_size'] != -1
+        self.num_local_gpus = len(self.all_gpu_ids) # if world_size is not -1, multi-machine setting
+        self.num_total_gpus = len(self.all_gpu_ids) if not self.use_multi_machine_ddp else hparams['world_size'] # if world_size is not -1, multi-machine setting
+        # self.num_gpus = len(self.all_gpu_ids) 
+        self.on_gpu = self.num_local_gpus > 0
+        self.root_gpu = 0
+        logging.info(f'GPU available: {torch.cuda.is_available()}, GPU used: {self.all_gpu_ids}, world_size: {self.num_total_gpus}, multi-machine training: {self.use_multi_machine_ddp}')
+        self.use_ddp = self.num_local_gpus > 1 or self.use_multi_machine_ddp
+        self.proc_rank = 0
+        # Tensorboard logging
+        self.log_save_interval = log_save_interval
+        self.val_check_interval = val_check_interval
+        self.tb_log_interval = tb_log_interval
+        self.amp = amp
+        self.amp_scalar = GradScaler()
+
+    def test(self, task_cls):
+        self.testing = True
+        self.fit(task_cls)
+
+    def fit(self, task_cls):
+        try:
+            if self.use_ddp:
+                # mp.spawn(self.ddp_run, nprocs=self.num_local_gpus, args=(task_cls, copy.deepcopy(hparams)))
+                mp.start_processes(self.ddp_run,nprocs=self.num_local_gpus, args=(task_cls, copy.deepcopy(hparams)), start_method='spawn')
+            else:
+                # File "/mnt/bn/ailabrenyi/entries/yezhenhui/projects/GeneFace_private/venv_113/lib/python3.9/site-packages/torch/nn/modules/batchnorm.py", line 735, in forward
+                # world_size = torch.distributed.get_world_size(process_group)
+                # to address the error in batchNorm using venv_113
+                # self.init_ddp_connection_tcp(0, 1) 
+                self.task = task_cls()
+                self.task.trainer = self
+                setproctitle.setproctitle(f'GeneFace_worker ({hparams["work_dir"]})')
+                self.run_single_process(self.task)
+        except:
+            traceback.print_exc()
+            time.sleep(5)
+            subprocess.check_call(f'pkill -f "GeneFace_worker \({hparams["work_dir"]}"', shell=True)
+        return 1
+
+        return 1
+
+    def ddp_run(self, gpu_idx, task_cls, hparams_):
+        hparams.update(hparams_)
+        setproctitle.setproctitle(f'GeneFace_worker ({hparams_["work_dir"]}_{gpu_idx})')
+
+        if hparams.get('use_file_system_mp'):
+            torch.multiprocessing.set_sharing_strategy('file_system')
+        if hparams.get('use_fork', True):
+            torch.multiprocessing.set_start_method('fork', force=True)
+        self.root_gpu = gpu_idx
+        self.proc_rank = gpu_idx + hparams['start_rank'] if self.use_multi_machine_ddp else gpu_idx
+        print("before init_tcp!")
+        if hparams['init_method'] == 'file':
+            self.init_ddp_connection_file(self.proc_rank, self.num_total_gpus)
+        elif hparams['init_method'] == 'tcp':
+            self.init_ddp_connection_tcp(self.proc_rank, self.num_total_gpus)
+        else:
+            raise NotImplementedError()
+
+        if gpu_idx != 0 and not self.debug:
+            sys.stdout = open(os.devnull, "w")
+            sys.stderr = open(os.devnull, "w")
+        dist.barrier()
+        print("after init_tcp!")
+
+        task = task_cls()
+        task.trainer = self
+        torch.cuda.set_device(gpu_idx)
+        self.task = task
+        self.run_single_process(task)
+
+    def run_single_process(self, task):
+        """Sanity check a few things before starting actual training.
+
+        :param task:
+        """
+        # build model, optm and load checkpoint
+        if self.proc_rank == 0:
+            self.save_terminal_logs()
+            if not self.testing:
+                self.save_codes()
+
+        model = task.build_model()
+        if model is not None:
+            task.model = model
+        checkpoint, _ = get_last_checkpoint(self.work_dir, self.resume_from_checkpoint)
+        if checkpoint is not None:
+            self.restore_weights(checkpoint)
+        elif self.on_gpu:
+            task.cuda(self.root_gpu)
+        if not self.testing:
+            self.optimizers = task.configure_optimizers()
+            self.fisrt_epoch = True
+        if checkpoint is not None:
+            self.restore_opt_state(checkpoint)
+        del checkpoint
+        # clear cache after restore
+        if self.on_gpu:
+            torch.cuda.empty_cache()
+
+        if self.use_ddp:
+            self.task = self.configure_ddp(self.task)
+            dist.barrier()
+
+        task_ref = self.get_task_ref()
+        task_ref.trainer = self
+        task_ref.testing = self.testing
+        # link up experiment object
+        if self.proc_rank == 0:
+            task_ref.build_tensorboard(save_dir=self.work_dir, name='tb_logs')
+        else:
+            os.makedirs('tmp', exist_ok=True)
+            task_ref.build_tensorboard(save_dir='tmp', name='tb_tmp')
+        self.logger = task_ref.logger
+        try:
+            if self.testing:
+                self.run_evaluation(test=True)
+            else:
+                self.train()
+        except:
+            traceback.print_exc()
+            task_ref.on_keyboard_interrupt()
+            time.sleep(5)
+            if self.proc_rank == 0:
+                subprocess.check_call(f'pkill -f "GeneFace_worker \({hparams["work_dir"]}"', shell=True)
+
+    ####################
+    # valid and test
+    ####################
+    def run_evaluation(self, test=False):
+        eval_results = self.evaluate(self.task, test, tqdm_desc='Valid' if not test else 'test',
+                                     max_batches=hparams['eval_max_batches'])
+        if eval_results is not None and 'tb_log' in eval_results:
+            tb_log_output = eval_results['tb_log']
+            self.log_metrics_to_tb(tb_log_output)
+        if self.proc_rank == 0 and not test:
+            self.save_checkpoint(epoch=self.current_epoch, logs=eval_results)
+
+    def evaluate(self, task, test=False, tqdm_desc='Valid', max_batches=None):
+        if max_batches == -1:
+            max_batches = None
+        # enable eval mode
+        task.zero_grad()
+        task.eval()
+        torch.set_grad_enabled(False)
+
+        task_ref = self.get_task_ref()
+        if test:
+            ret = task_ref.test_start()
+            if ret == 'EXIT':
+                return
+        else:
+            task_ref.validation_start()
+        outputs = []
+        dataloader = task_ref.test_dataloader() if test else task_ref.val_dataloader()
+        pbar = tqdm.tqdm(dataloader, desc=tqdm_desc, total=max_batches, dynamic_ncols=True, unit='step',
+                         disable=self.root_gpu > 0)
+        # give model a chance to do something with the outputs (and method defined)
+        for batch_idx, batch in enumerate(pbar):
+            if batch is None:  # pragma: no cover
+                continue
+            # stop short when on fast_dev_run (sets max_batch=1)
+            if max_batches is not None and batch_idx >= max_batches:
+                break
+
+            # make dataloader_idx arg in validation_step optional
+            if self.on_gpu:
+                batch = move_to_cuda(batch, self.root_gpu)
+            args = [batch, batch_idx]
+            if self.use_ddp:
+                output = task(*args)
+            else:
+                if test:
+                    output = task_ref.test_step(*args)
+                else:
+                    output = task_ref.validation_step(*args)
+            # track outputs for collation
+            outputs.append(output)
+        # give model a chance to do something with the outputs (and method defined)
+        if test:
+            eval_results = task_ref.test_end(outputs)
+        else:
+            eval_results = task_ref.validation_end(outputs)
+        # enable train mode again
+        task.train()
+        torch.set_grad_enabled(True)
+        return eval_results
+
+    ####################
+    # train
+    ####################
+    def train(self):
+        task_ref = self.get_task_ref()
+        task_ref.on_train_start()
+        if self.num_sanity_val_steps > 0:
+            # run tiny validation (if validation defined) to make sure program won't crash during val
+            self.evaluate(self.task, False, 'Sanity Val', max_batches=self.num_sanity_val_steps)
+        # clear cache before training
+        if self.on_gpu:
+            torch.cuda.empty_cache()
+        dataloader = task_ref.train_dataloader()
+        epoch = self.current_epoch
+        # run all epochs
+        while True:
+            # set seed for distributed sampler (enables shuffling for each epoch)
+            if self.use_ddp and hasattr(dataloader.sampler, 'set_epoch'):
+                dataloader.sampler.set_epoch(epoch)
+            # update training progress in trainer and model
+            task_ref.current_epoch = epoch
+            self.current_epoch = epoch
+            # total batches includes multiple val checks
+            self.batch_loss_value = 0  # accumulated grads
+            # before epoch hook
+            task_ref.on_epoch_start()
+
+            # run epoch
+            train_pbar = tqdm.tqdm(dataloader, initial=self.global_step, total=float('inf'),
+                                   dynamic_ncols=True, unit='step', disable=self.root_gpu > 0)
+            # for batch_idx, batch in enumerate(train_pbar):
+            train_iterator = iter(enumerate(train_pbar))
+            while True:
+                with Timer("get_batch", enable=self.debug):
+                    try:
+                        batch_idx, batch = next(train_iterator)
+                    except StopIteration:
+                        train_iterator = iter(enumerate(train_pbar))
+                        batch_idx, batch = next(train_iterator)
+
+                if self.global_step % self.val_check_interval == 0 and not self.fisrt_epoch:
+                    self.run_evaluation()
+                pbar_metrics, tb_metrics = self.run_training_batch(batch_idx, batch)
+                train_pbar.set_postfix(**pbar_metrics)
+                self.fisrt_epoch = False
+                # when metrics should be logged
+                if (self.global_step + 1) % self.tb_log_interval == 0:
+                    # logs user requested information to logger
+                    self.log_metrics_to_tb(tb_metrics)
+
+                self.global_step += 1
+                task_ref.global_step = self.global_step
+                if self.global_step > self.max_updates:
+                    print("| Training end..")
+                    break
+            # epoch end hook
+            epoch_loss_dict = task_ref.on_epoch_end()
+            self.log_metrics_to_tb(epoch_loss_dict)
+            epoch += 1
+            if self.global_step > self.max_updates:
+                break
+        task_ref.on_train_end()
+
+    def run_training_batch(self, batch_idx, batch):
+        if batch is None:
+            return {}
+        all_progress_bar_metrics = []
+        all_log_metrics = []
+        task_ref = self.get_task_ref()
+        for opt_idx, optimizer in enumerate(self.optimizers):
+            if optimizer is None:
+                continue
+            # make sure only the gradients of the current optimizer's paramaters are calculated
+            # in the training step to prevent dangling gradients in multiple-optimizer setup.
+            if len(self.optimizers) > 1:
+                for k, param in task_ref.named_parameters():
+                    param.requires_grad = False
+                for group in optimizer.param_groups:
+                    for param in group['params']:
+                        param.requires_grad = True
+
+            # forward pass
+            with Timer("forward_training_step", enable=self.debug):
+                with autocast(enabled=self.amp):
+                    if self.on_gpu:
+                        batch = move_to_cuda(copy.copy(batch), self.root_gpu)
+                    args = [batch, batch_idx, opt_idx]
+                    if self.use_ddp:
+                        output = self.task(*args)
+                    else:
+                        output = task_ref.training_step(*args)
+                    loss = output['loss']
+                    if loss is None:
+                        continue
+                    progress_bar_metrics = output['progress_bar']
+                    log_metrics = output['tb_log']
+                    # accumulate loss
+                    loss = loss / self.accumulate_grad_batches
+                
+            # backward pass
+            with Timer("backward_training_step", enable=self.debug):
+                if loss.requires_grad:
+                    if self.amp:
+                        self.amp_scalar.scale(loss).backward()
+                    else:
+                        loss.backward()
+
+                # track progress bar metrics
+                all_log_metrics.append(log_metrics)
+                all_progress_bar_metrics.append(progress_bar_metrics)
+
+                if loss is None:
+                    continue
+
+            # nan grads
+            with Timer("checkNan_training_step", enable=self.debug):
+                has_nan_grad = False
+                nan_params_names = []
+                if self.print_nan_grads:
+                    for name, param in task_ref.named_parameters():
+                        if (param.grad is not None) and torch.isnan(param.grad.float()).any():
+                            print("| NaN params: ", name, param, param.grad)
+                            has_nan_grad = True
+                            nan_params_names.append(name)
+                    if has_nan_grad:
+                        # exit(0)
+                        print(f"| WARN: found nan in grad! first nan params: {nan_params_names[0]}; last nan params: {nan_params_names[-1]}.")
+                        pass
+
+            # gradient update with accumulated gradients
+            with Timer("optimUpdate_training_step", enable=self.debug):
+                if (self.global_step + 1) % self.accumulate_grad_batches == 0 and not has_nan_grad:
+                # if (self.global_step + 1) % self.accumulate_grad_batches == 0:
+                    # Unscales the gradients of optimizer's assigned params in-place
+                    if self.amp:
+                        self.amp_scalar.unscale_(optimizer)
+                    grad_norm_dict = task_ref.on_before_optimization(opt_idx)
+                    if grad_norm_dict is not None:
+                        all_log_metrics[-1].update(grad_norm_dict)
+                    if self.amp:
+                        self.amp_scalar.step(optimizer)
+                        self.amp_scalar.update()
+                    else:
+                        optimizer.step()
+                    optimizer.zero_grad()
+                    task_ref.on_after_optimization(self.current_epoch, batch_idx, optimizer, opt_idx)
+
+        # collapse all metrics into one dict
+        all_progress_bar_metrics = {k: v for d in all_progress_bar_metrics for k, v in d.items()}
+        all_log_metrics = {k: v for d in all_log_metrics for k, v in d.items()}
+        return all_progress_bar_metrics, all_log_metrics
+
+    ####################
+    # load and save checkpoint
+    ####################
+    def restore_weights(self, checkpoint):
+        # load model state
+        task_ref = self.get_task_ref()
+
+        for k, v in checkpoint['state_dict'].items():
+            if hasattr(task_ref, k):
+                getattr(task_ref, k).load_state_dict(v, strict=True)
+            else:
+                print(f"| the checkpoint has unmatched keys {k}")
+
+        if self.on_gpu:
+            task_ref.cuda(self.root_gpu)
+        # load training state (affects trainer only)
+        self.best_val_results = checkpoint['checkpoint_callback_best']
+        self.global_step = checkpoint['global_step']
+        self.current_epoch = checkpoint['epoch']
+        task_ref.global_step = self.global_step
+
+        # wait for all models to restore weights
+        if self.use_ddp:
+            # wait for all processes to catch up
+            dist.barrier()
+
+    def restore_opt_state(self, checkpoint):
+        if self.testing:
+            return
+        # restore the optimizers
+        optimizer_states = checkpoint['optimizer_states']
+        for optimizer, opt_state in zip(self.optimizers, optimizer_states):
+            if optimizer is None:
+                return
+            try:
+                optimizer.load_state_dict(opt_state)
+                # move optimizer to GPU 1 weight at a time
+                if self.on_gpu:
+                    for state in optimizer.state.values():
+                        for k, v in state.items():
+                            if isinstance(v, torch.Tensor):
+                                state[k] = v.cuda(self.root_gpu)
+            except ValueError:
+                print("| WARMING: optimizer parameters not match !!!")
+        try:
+            if dist.is_initialized() and dist.get_rank() > 0:
+                return
+        except Exception as e:
+            print(e)
+            return
+        did_restore = True
+        return did_restore
+
+    def save_checkpoint(self, epoch, logs=None):
+        monitor_op = np.less
+        ckpt_path = f'{self.work_dir}/model_ckpt_steps_{self.global_step}.ckpt'
+        logging.info(f'Epoch {epoch:05d}@{self.global_step}: saving model to {ckpt_path}')
+        self._atomic_save(ckpt_path)
+
+        get_ckpt_step_fn = lambda x: int(re.findall('.*steps\_(\d+)\.ckpt', x)[0])
+        for old_ckpt in get_all_ckpts(self.work_dir)[self.num_ckpt_keep:]:
+            # leave the milestone ckpts
+            if hparams.get("ckpt_milestone_interval", 10_0000) != 0 and get_ckpt_step_fn(old_ckpt) % hparams.get("ckpt_milestone_interval", 10_0000) == 0:
+                pass
+            else:
+                remove_file(old_ckpt)
+                logging.info(f'Delete ckpt: {os.path.basename(old_ckpt)}')
+        current = None
+        if logs is not None and self.monitor_key in logs:
+            current = logs[self.monitor_key]
+        if current is not None and self.save_best:
+            if monitor_op(current, self.best_val_results):
+                best_filepath = f'{self.work_dir}/model_ckpt_best.pt'
+                self.best_val_results = current
+                logging.info(
+                    f'Epoch {epoch:05d}@{self.global_step}: {self.monitor_key} reached {current:0.5f}. '
+                    f'Saving model to {best_filepath}')
+                self._atomic_save(best_filepath)
+
+    def _atomic_save(self, filepath):
+        checkpoint = self.dump_checkpoint()
+        tmp_path = str(filepath) + ".part"
+        torch.save(checkpoint, tmp_path, _use_new_zipfile_serialization=False)
+        os.replace(tmp_path, filepath)
+    
+    def dump_checkpoint(self):
+        checkpoint = {'epoch': self.current_epoch, 'global_step': self.global_step,
+                      'checkpoint_callback_best': self.best_val_results}
+        # save optimizers
+        optimizer_states = []
+        for i, optimizer in enumerate(self.optimizers):
+            if optimizer is not None:
+                state_dict = optimizer.state_dict()
+                state_dict = {k.replace('_orig_mod.', ''): v for k, v in state_dict.items()}
+                optimizer_states.append(state_dict)
+
+        checkpoint['optimizer_states'] = optimizer_states
+        task_ref = self.get_task_ref()
+        state_dict = {
+            k: v.state_dict() for k, v in task_ref.named_children()
+            if len(list(v.parameters())) > 0 and (k not in hparams.get('not_save_modules', []))
+        }
+        for module_k, module_k_dict in list(state_dict.items()):
+            for k, v in list(module_k_dict.items()):
+                if '_orig_mod.' in k:
+                    module_k_dict[k.replace('_orig_mod.', '')] = v
+                    del module_k_dict[k]
+            
+        checkpoint['state_dict'] = state_dict
+        return checkpoint
+    ####################
+    # DDP
+    ####################
+    def configure_ddp(self, task):
+        task = torch.nn.SyncBatchNorm.convert_sync_batchnorm(task)
+        task = DDP(task, device_ids=[self.root_gpu], find_unused_parameters=True)
+        # task = DDP(task, device_ids=[self.root_gpu], find_unused_parameters=False)
+        random.seed(self.seed)
+        np.random.seed(self.seed)
+        return task
+
+    def init_ddp_connection_file(self, proc_rank, world_size):
+        """
+        use a shared file in the network file system to bind all process
+        if you found num_worker is larger than world_size, remove the old shard_file_name
+        """
+        exp_name = hparams['exp_name']
+        shared_file_name = f'file:///home/tiger/nfs/pytorch_ddp_sharedfile/{exp_name}'
+        os.makedirs(os.path.dirname(shared_file_name).replace("file://",""), exist_ok=True)
+        dist.init_process_group('nccl', init_method=shared_file_name,
+                            world_size=world_size, rank=proc_rank)
+        
+    def init_ddp_connection_tcp(self, proc_rank, world_size):
+        if self.use_multi_machine_ddp:
+            root_node, port = os.environ['ARNOLD_WORKER_HOSTS'].split(",")[0].split(":")
+            os.environ['MASTER_PORT'] = '6668'
+        else:
+            root_node = '127.0.0.1'
+        root_node = self.resolve_root_node_address(root_node)
+        os.environ['MASTER_ADDR'] = root_node
+
+        dist.init_process_group('nccl', rank=proc_rank, world_size=world_size, timeout=datetime.timedelta(seconds=600))
+        # dist.init_process_group('gloo', rank=proc_rank, world_size=world_size)
+
+    def resolve_root_node_address(self, root_node):
+        if '[' in root_node:
+            name = root_node.split('[')[0]
+            number = root_node.split(',')[0]
+            if '-' in number:
+                number = number.split('-')[0]
+            number = re.sub('[^0-9]', '', number)
+            root_node = name + number
+        return root_node
+
+    ####################
+    # utils
+    ####################
+    def get_task_ref(self):
+        from utils.commons.base_task import BaseTask
+        task: BaseTask = self.task.module if isinstance(self.task, DDP) else self.task
+        return task
+
+    def log_metrics_to_tb(self, metrics, step=None):
+        """Logs the metric dict passed in.
+
+        :param metrics:
+        """
+        # turn all tensors to scalars
+        scalar_metrics = self.metrics_to_scalars(metrics)
+
+        step = step if step is not None else self.global_step
+        # log actual metrics
+        if self.proc_rank == 0:
+            self.log_metrics(self.logger, scalar_metrics, step=step)
+
+    @staticmethod
+    def log_metrics(logger, metrics, step=None):
+        for k, v in metrics.items():
+            if isinstance(v, torch.Tensor):
+                v = v.item()
+            logger.add_scalar(k, v, step)
+
+    def metrics_to_scalars(self, metrics):
+        new_metrics = {}
+        for k, v in metrics.items():
+            if isinstance(v, torch.Tensor):
+                v = v.item()
+
+            if type(v) is dict:
+                v = self.metrics_to_scalars(v)
+
+            new_metrics[k] = v
+
+        return new_metrics
+
+    def save_terminal_logs(self):
+        t = datetime.datetime.now().strftime('%Y%m%d%H%M%S')
+        os.makedirs(f'{self.work_dir}/terminal_logs', exist_ok=True)
+        Tee(f'{self.work_dir}/terminal_logs/log_{t}.txt', 'w')
+
+    def save_codes(self):
+        if len(hparams['save_codes']) > 0:
+            t = datetime.datetime.now().strftime('%Y%m%d%H%M%S')
+            code_dir = f'{self.work_dir}/codes/{t}'
+            subprocess.check_call(f'mkdir -p "{code_dir}"', shell=True)
+            for c in hparams['save_codes']:
+                if os.path.exists(c):
+                    subprocess.check_call(
+                        f'rsync -aR '
+                        f'--include="*.py" '
+                        f'--include="*.yaml" '
+                        f'--exclude="__pycache__" '
+                        f'--include="*/" '
+                        f'--exclude="*" '
+                        f'"./{c}" "{code_dir}/"',
+                        shell=True)
+            print(f"| Copied codes to {code_dir}.")
diff --git a/utils/nn/grad.py b/utils/nn/grad.py
new file mode 100644
index 0000000000000000000000000000000000000000..7a09808251a017f91a24f443d699b2db7c1d17ff
--- /dev/null
+++ b/utils/nn/grad.py
@@ -0,0 +1,44 @@
+import torch
+
+def get_grad_norm(model, l=2):
+    num_para = 0
+    accu_grad = 0
+    if isinstance(model, torch.nn.Module):
+        params = model.parameters()
+    else:
+        params = model
+    for p in params:
+        if p.grad is None:
+            continue
+        num_para += p.numel()
+        if l == 1:
+            accu_grad += p.grad.abs(1).sum()
+        elif l == 2:
+            accu_grad += p.grad.pow(2).sum()
+        else:
+            raise ValueError("Now we only implement l1/l2 norm !")
+    if l == 2:
+        accu_grad = accu_grad ** 0.5
+    if isinstance(accu_grad, float):
+        return accu_grad
+    return accu_grad.item()
+
+class GradBuffer:
+    def __init__(self):
+        self.buffer = {}
+    
+    def add(self, model):
+        for item in model.named_parameters():
+            name, param = item
+            if param.grad is None:
+                continue
+            self.buffer[name] = self.buffer.get(name, 0) + param.grad.data
+    
+    def apply(self, model):
+        for item in model.named_parameters():
+            name, param = item
+            if param.grad is None:
+                continue
+            if name in self.buffer.keys():
+                param.grad.data += self.buffer[name]
+        self.buffer = {}
\ No newline at end of file
diff --git a/utils/nn/model_utils.py b/utils/nn/model_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..3585da67555ff6ec53c12de14c2678fc10195c25
--- /dev/null
+++ b/utils/nn/model_utils.py
@@ -0,0 +1,32 @@
+import numpy as np
+import torch
+
+
+def print_arch(model, model_name='model'):
+    print(f"| {model_name} Arch: ", model)
+    num_params(model, model_name=model_name)
+
+
+def num_params(model, print_out=True, model_name="model"):
+    parameters = filter(lambda p: p.requires_grad, model.parameters())
+    parameters = sum([np.prod(p.size()) for p in parameters]) / 1_000_000
+    if print_out:
+        print(f'| {model_name} Trainable Parameters: %.3fM' % parameters)
+    return parameters
+
+def get_device_of_model(model):
+    return model.parameters().__next__().device
+
+def requires_grad(model):
+    if isinstance(model, torch.nn.Module):
+        for p in model.parameters():
+            p.requires_grad = True
+    else:
+        model.requires_grad = True
+
+def not_requires_grad(model):
+    if isinstance(model, torch.nn.Module):
+        for p in model.parameters():
+            p.requires_grad = False
+    else:
+        model.requires_grad = False
diff --git a/utils/nn/schedulers.py b/utils/nn/schedulers.py
new file mode 100644
index 0000000000000000000000000000000000000000..d5f3f671007c915adcc0bffc874421cf7ceeeff4
--- /dev/null
+++ b/utils/nn/schedulers.py
@@ -0,0 +1,208 @@
+import numpy as np
+from utils.commons.hparams import hparams
+
+
+class NoneSchedule(object):
+    def __init__(self, optimizer, lr):
+        self.optimizer = optimizer
+        self.constant_lr = lr
+        self.step(0)
+
+    def step(self, num_updates):
+        self.lr = self.constant_lr
+        for param_group in self.optimizer.param_groups:
+            param_group['lr'] = self.lr
+        return self.lr
+
+    def get_lr(self):
+        return self.optimizer.param_groups[0]['lr']
+
+    def get_last_lr(self):
+        return self.get_lr()
+
+
+class RSQRTSchedule(NoneSchedule):
+    def __init__(self, optimizer, lr, warmup_updates, hidden_size):
+        self.optimizer = optimizer
+        self.constant_lr = lr
+        self.warmup_updates = warmup_updates
+        self.hidden_size = hidden_size
+        self.lr = lr
+        for param_group in optimizer.param_groups:
+            param_group['lr'] = self.lr
+        self.step(0)
+
+    def step(self, num_updates):
+        constant_lr = self.constant_lr
+        warmup = min(num_updates / self.warmup_updates, 1.0)
+        rsqrt_decay = max(self.warmup_updates, num_updates) ** -0.5
+        rsqrt_hidden = self.hidden_size ** -0.5
+        self.lr = max(constant_lr * warmup * rsqrt_decay * rsqrt_hidden, 1e-7)
+        for param_group in self.optimizer.param_groups:
+            param_group['lr'] = self.lr
+        return self.lr
+
+
+class WarmupSchedule(NoneSchedule):
+    def __init__(self, optimizer, lr, warmup_updates):
+        self.optimizer = optimizer
+        self.constant_lr = self.lr = lr
+        self.warmup_updates = warmup_updates
+        for param_group in optimizer.param_groups:
+            param_group['lr'] = self.lr
+        self.step(0)
+
+    def step(self, num_updates):
+        constant_lr = self.constant_lr
+        warmup = min(num_updates / self.warmup_updates, 1.0)
+        self.lr = max(constant_lr * warmup, 1e-7)
+        for param_group in self.optimizer.param_groups:
+            param_group['lr'] = self.lr
+        return self.lr
+
+
+class ExponentialSchedule(NoneSchedule):
+    def __init__(self, optimizer, lr, warmup_updates):
+        self.optimizer = optimizer
+        self.constant_lr = self.lr = lr
+        self.warmup_updates = warmup_updates
+        for param_group in optimizer.param_groups:
+            param_group['lr'] = self.lr
+        self.step(0)
+
+    def step(self, num_updates):
+        constant_lr = self.constant_lr
+        if self.warmup_updates > 0 and num_updates <= self.warmup_updates:
+            warmup = min(num_updates / self.warmup_updates, 1.0)
+            self.lr = max(constant_lr * warmup, 1e-7)
+        else:
+            new_lrate = constant_lr * (0.1 ** (num_updates / 250_000)) # decay by 0.1x for every 250k steps
+            self.lr = max(new_lrate, hparams.get("min_lr", 1e-6))
+        for param_group in self.optimizer.param_groups:
+            param_group['lr'] = self.lr
+        return self.lr
+
+
+class ExponentialScheduleWithAudattNet(NoneSchedule):
+    """
+    Default Scheduler in AD-NeRF
+    for audatt net, since it starts at 20_0000 steps, we need to enlarge its lr
+    in optimizer, we set param_groups[1] to optimize audatt net
+    """
+    def __init__(self, optimizer, lr, warmup_updates=0):
+        self.optimizer = optimizer
+        self.constant_lr = self.lr = lr
+        self.warmup_updates = warmup_updates
+        optimizer.param_groups[0]['lr'] = self.lr
+        optimizer.param_groups[1]['lr'] = self.lr * 5
+        self.step(0)
+
+    def step(self, num_updates):
+        constant_lr = self.constant_lr
+        if self.warmup_updates > 0 and num_updates <= self.warmup_updates:
+            warmup = min(num_updates / self.warmup_updates, 1.0)
+            self.lr = max(constant_lr * warmup, 1e-7)
+        else:
+            new_lrate = constant_lr * (0.1 ** (num_updates / 250_000)) # decay by 0.1x for every 250k steps
+            self.lr = max(new_lrate, 1e-7)
+
+        self.optimizer.param_groups[0]['lr'] = self.lr
+        self.optimizer.param_groups[1]['lr'] = self.lr * 5
+        return self.lr
+
+class ExponentialScheduleForRADNeRF(NoneSchedule):
+    """
+    Default Scheduler in RAD-NeRF
+    RAD-NeRF has two groups of params with different lr
+    for tileGrid embedding, the lr=5e-3
+    for other network params, the lr=5e-4
+    """
+    def __init__(self, optimizer, lr, warmup_updates=0):
+        self.optimizer = optimizer
+        self.constant_lr = self.lr = lr # 0.0005
+        self.warmup_updates = warmup_updates
+        self.finetune_lips = hparams['finetune_lips']
+        self.finetune_lips_start_iter = hparams['finetune_lips_start_iter']
+
+        optimizer.param_groups[0]['lr'] = self.lr # for Net_params in RAD-NeRF, lr starts from 0.0005
+        optimizer.param_groups[1]['lr'] = self.lr * 10 # for tileGrid, lr starts from 0.005
+        optimizer.param_groups[2]['lr'] = self.lr * 5 # for Att Net, lr starts from 0.0025
+        self.step(0)
+
+    def step(self, num_updates):
+        constant_lr = self.constant_lr
+        if self.warmup_updates > 0 and num_updates <= self.warmup_updates:
+            warmup = min(num_updates / self.warmup_updates, 1.0)
+            self.lr = max(constant_lr * warmup, 1e-5)
+        else:
+            if self.finetune_lips and num_updates > self.finetune_lips_start_iter:
+                new_lrate = constant_lr * (0.1 ** (num_updates / 250_000)) # decay by 0.05x for every 200k steps
+            else:
+                new_lrate = constant_lr * (0.1 ** (num_updates / 250_000)) # decay by 0.1x for every 200k steps
+
+            self.lr = max(new_lrate, 1e-5)
+
+        self.optimizer.param_groups[0]['lr'] = self.lr
+        self.optimizer.param_groups[1]['lr'] = self.lr * 10
+        self.optimizer.param_groups[2]['lr'] = self.lr * 5
+        return self.lr
+    
+
+class ExponentialScheduleForRADNeRFTorso(NoneSchedule):
+    """
+    Default Scheduler in RAD-NeRF
+    RAD-NeRF has two groups of params with different lr
+    for tileGrid embedding, the lr=5e-3
+    for other network params, the lr=5e-4
+    """
+    def __init__(self, optimizer, lr, warmup_updates=0):
+        self.optimizer = optimizer
+        self.constant_lr = self.lr = lr # 0.0005
+        self.warmup_updates = warmup_updates
+
+        optimizer.param_groups[0]['lr'] = self.lr # for Net_params in RAD-NeRF, lr starts from 0.0005
+        optimizer.param_groups[1]['lr'] = self.lr * 10 # for tileGrid, lr starts from 0.005
+        self.step(0)
+
+    def step(self, num_updates):
+        constant_lr = self.constant_lr
+        if self.warmup_updates > 0 and num_updates <= self.warmup_updates:
+            warmup = min(num_updates / self.warmup_updates, 1.0)
+            self.lr = max(constant_lr * warmup, 1e-5)
+        else:
+            new_lrate = constant_lr * (0.1 ** (num_updates / 250_000)) # decay by 0.1x for every 200k steps
+            self.lr = max(new_lrate, 1e-5)
+        self.optimizer.param_groups[0]['lr'] = self.lr
+        self.optimizer.param_groups[1]['lr'] = self.lr * 10
+        return self.lr
+    
+
+class CosineSchedule(NoneSchedule):
+    def __init__(self, optimizer, lr, warmup_updates, total_updates):
+        self.optimizer = optimizer
+        self.constant_lr = lr
+        self.warmup_updates = warmup_updates
+        self.total_updates = total_updates
+        self.lr = lr
+        self.assign_learning_rate(self.optimizer, self.lr)
+        self.step(0)
+
+    def assign_learning_rate(self, optimizer, new_lr):
+        for param_group in optimizer.param_groups:
+            param_group["lr"] = new_lr
+
+    def _warmup_lr(self, base_lr, warmup_length, step):
+        return base_lr * (step + 1) / warmup_length
+
+    def step(self, num_updates):
+        if self.warmup_updates > 0 and num_updates <= self.warmup_updates:
+            lr = self._warmup_lr(self.lr, self.warmup_updates, num_updates)
+        elif num_updates <= self.total_updates:
+            e = num_updates - self.warmup_updates
+            es = self.total_updates - self.warmup_updates
+            lr = 0.5 * (1 + np.cos(np.pi * e / es)) * self.lr
+        else:
+            lr = 1e-5
+        lr = max(1e-5, lr)
+        self.assign_learning_rate(self.optimizer, lr)
+        return lr
diff --git a/utils/nn/seq_utils.py b/utils/nn/seq_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..82779cbc9d9c348d72ca345606933fdccc00665d
--- /dev/null
+++ b/utils/nn/seq_utils.py
@@ -0,0 +1,328 @@
+from collections import defaultdict
+import torch
+import torch.nn.functional as F
+
+
+def make_positions(tensor, padding_idx):
+    """Replace non-padding symbols with their position numbers.
+
+    Position numbers begin at padding_idx+1. Padding symbols are ignored.
+    """
+    # The series of casts and type-conversions here are carefully
+    # balanced to both work with ONNX export and XLA. In particular XLA
+    # prefers ints, cumsum defaults to output longs, and ONNX doesn't know
+    # how to handle the dtype kwarg in cumsum.
+    mask = tensor.ne(padding_idx).int()
+    return (
+                   torch.cumsum(mask, dim=1).type_as(mask) * mask
+           ).long() + padding_idx
+
+
+def softmax(x, dim):
+    return F.softmax(x, dim=dim, dtype=torch.float32)
+
+
+def sequence_mask(lengths, maxlen, dtype=torch.bool):
+    if maxlen is None:
+        maxlen = lengths.max()
+    mask = ~(torch.ones((len(lengths), maxlen)).to(lengths.device).cumsum(dim=1).t() > lengths).t()
+    mask.type(dtype)
+    return mask
+
+
+def weights_nonzero_speech(target):
+    # target : B x T x mel
+    # Assign weight 1.0 to all labels except for padding (id=0).
+    dim = target.size(-1)
+    return target.abs().sum(-1, keepdim=True).ne(0).float().repeat(1, 1, dim)
+
+
+INCREMENTAL_STATE_INSTANCE_ID = defaultdict(lambda: 0)
+
+
+def _get_full_incremental_state_key(module_instance, key):
+    module_name = module_instance.__class__.__name__
+
+    # assign a unique ID to each module instance, so that incremental state is
+    # not shared across module instances
+    if not hasattr(module_instance, '_instance_id'):
+        INCREMENTAL_STATE_INSTANCE_ID[module_name] += 1
+        module_instance._instance_id = INCREMENTAL_STATE_INSTANCE_ID[module_name]
+
+    return '{}.{}.{}'.format(module_name, module_instance._instance_id, key)
+
+
+def get_incremental_state(module, incremental_state, key):
+    """Helper for getting incremental state for an nn.Module."""
+    full_key = _get_full_incremental_state_key(module, key)
+    if incremental_state is None or full_key not in incremental_state:
+        return None
+    return incremental_state[full_key]
+
+
+def set_incremental_state(module, incremental_state, key, value):
+    """Helper for setting incremental state for an nn.Module."""
+    if incremental_state is not None:
+        full_key = _get_full_incremental_state_key(module, key)
+        incremental_state[full_key] = value
+
+
+def fill_with_neg_inf(t):
+    """FP16-compatible function that fills a tensor with -inf."""
+    return t.float().fill_(float('-inf')).type_as(t)
+
+
+def fill_with_neg_inf2(t):
+    """FP16-compatible function that fills a tensor with -inf."""
+    return t.float().fill_(-1e8).type_as(t)
+
+
+def select_attn(attn_logits, type='best'):
+    """
+
+    :param attn_logits: [n_layers, B, n_head, T_sp, T_txt]
+    :return:
+    """
+    encdec_attn = torch.stack(attn_logits, 0).transpose(1, 2)
+    # [n_layers * n_head, B, T_sp, T_txt]
+    encdec_attn = (encdec_attn.reshape([-1, *encdec_attn.shape[2:]])).softmax(-1)
+    if type == 'best':
+        indices = encdec_attn.max(-1).values.sum(-1).argmax(0)
+        encdec_attn = encdec_attn.gather(
+            0, indices[None, :, None, None].repeat(1, 1, encdec_attn.size(-2), encdec_attn.size(-1)))[0]
+        return encdec_attn
+    elif type == 'mean':
+        return encdec_attn.mean(0)
+
+
+def make_pad_mask(lengths, xs=None, length_dim=-1):
+    """Make mask tensor containing indices of padded part.
+    Args:
+        lengths (LongTensor or List): Batch of lengths (B,).
+        xs (Tensor, optional): The reference tensor.
+            If set, masks will be the same shape as this tensor.
+        length_dim (int, optional): Dimension indicator of the above tensor.
+            See the example.
+    Returns:
+        Tensor: Mask tensor containing indices of padded part.
+                dtype=torch.uint8 in PyTorch 1.2-
+                dtype=torch.bool in PyTorch 1.2+ (including 1.2)
+    Examples:
+        With only lengths.
+        >>> lengths = [5, 3, 2]
+        >>> make_non_pad_mask(lengths)
+        masks = [[0, 0, 0, 0 ,0],
+                 [0, 0, 0, 1, 1],
+                 [0, 0, 1, 1, 1]]
+        With the reference tensor.
+        >>> xs = torch.zeros((3, 2, 4))
+        >>> make_pad_mask(lengths, xs)
+        tensor([[[0, 0, 0, 0],
+                 [0, 0, 0, 0]],
+                [[0, 0, 0, 1],
+                 [0, 0, 0, 1]],
+                [[0, 0, 1, 1],
+                 [0, 0, 1, 1]]], dtype=torch.uint8)
+        >>> xs = torch.zeros((3, 2, 6))
+        >>> make_pad_mask(lengths, xs)
+        tensor([[[0, 0, 0, 0, 0, 1],
+                 [0, 0, 0, 0, 0, 1]],
+                [[0, 0, 0, 1, 1, 1],
+                 [0, 0, 0, 1, 1, 1]],
+                [[0, 0, 1, 1, 1, 1],
+                 [0, 0, 1, 1, 1, 1]]], dtype=torch.uint8)
+        With the reference tensor and dimension indicator.
+        >>> xs = torch.zeros((3, 6, 6))
+        >>> make_pad_mask(lengths, xs, 1)
+        tensor([[[0, 0, 0, 0, 0, 0],
+                 [0, 0, 0, 0, 0, 0],
+                 [0, 0, 0, 0, 0, 0],
+                 [0, 0, 0, 0, 0, 0],
+                 [0, 0, 0, 0, 0, 0],
+                 [1, 1, 1, 1, 1, 1]],
+                [[0, 0, 0, 0, 0, 0],
+                 [0, 0, 0, 0, 0, 0],
+                 [0, 0, 0, 0, 0, 0],
+                 [1, 1, 1, 1, 1, 1],
+                 [1, 1, 1, 1, 1, 1],
+                 [1, 1, 1, 1, 1, 1]],
+                [[0, 0, 0, 0, 0, 0],
+                 [0, 0, 0, 0, 0, 0],
+                 [1, 1, 1, 1, 1, 1],
+                 [1, 1, 1, 1, 1, 1],
+                 [1, 1, 1, 1, 1, 1],
+                 [1, 1, 1, 1, 1, 1]]], dtype=torch.uint8)
+        >>> make_pad_mask(lengths, xs, 2)
+        tensor([[[0, 0, 0, 0, 0, 1],
+                 [0, 0, 0, 0, 0, 1],
+                 [0, 0, 0, 0, 0, 1],
+                 [0, 0, 0, 0, 0, 1],
+                 [0, 0, 0, 0, 0, 1],
+                 [0, 0, 0, 0, 0, 1]],
+                [[0, 0, 0, 1, 1, 1],
+                 [0, 0, 0, 1, 1, 1],
+                 [0, 0, 0, 1, 1, 1],
+                 [0, 0, 0, 1, 1, 1],
+                 [0, 0, 0, 1, 1, 1],
+                 [0, 0, 0, 1, 1, 1]],
+                [[0, 0, 1, 1, 1, 1],
+                 [0, 0, 1, 1, 1, 1],
+                 [0, 0, 1, 1, 1, 1],
+                 [0, 0, 1, 1, 1, 1],
+                 [0, 0, 1, 1, 1, 1],
+                 [0, 0, 1, 1, 1, 1]]], dtype=torch.uint8)
+    """
+    if length_dim == 0:
+        raise ValueError("length_dim cannot be 0: {}".format(length_dim))
+
+    if not isinstance(lengths, list):
+        lengths = lengths.tolist()
+    bs = int(len(lengths))
+    if xs is None:
+        maxlen = int(max(lengths))
+    else:
+        maxlen = xs.size(length_dim)
+
+    seq_range = torch.arange(0, maxlen, dtype=torch.int64)
+    seq_range_expand = seq_range.unsqueeze(0).expand(bs, maxlen)
+    seq_length_expand = seq_range_expand.new(lengths).unsqueeze(-1)
+    mask = seq_range_expand >= seq_length_expand
+
+    if xs is not None:
+        assert xs.size(0) == bs, (xs.size(0), bs)
+
+        if length_dim < 0:
+            length_dim = xs.dim() + length_dim
+        # ind = (:, None, ..., None, :, , None, ..., None)
+        ind = tuple(
+            slice(None) if i in (0, length_dim) else None for i in range(xs.dim())
+        )
+        mask = mask[ind].expand_as(xs).to(xs.device)
+    return mask
+
+
+def make_non_pad_mask(lengths, xs=None, length_dim=-1):
+    """Make mask tensor containing indices of non-padded part.
+    Args:
+        lengths (LongTensor or List): Batch of lengths (B,).
+        xs (Tensor, optional): The reference tensor.
+            If set, masks will be the same shape as this tensor.
+        length_dim (int, optional): Dimension indicator of the above tensor.
+            See the example.
+    Returns:
+        ByteTensor: mask tensor containing indices of padded part.
+                    dtype=torch.uint8 in PyTorch 1.2-
+                    dtype=torch.bool in PyTorch 1.2+ (including 1.2)
+    Examples:
+        With only lengths.
+        >>> lengths = [5, 3, 2]
+        >>> make_non_pad_mask(lengths)
+        masks = [[1, 1, 1, 1 ,1],
+                 [1, 1, 1, 0, 0],
+                 [1, 1, 0, 0, 0]]
+        With the reference tensor.
+        >>> xs = torch.zeros((3, 2, 4))
+        >>> make_non_pad_mask(lengths, xs)
+        tensor([[[1, 1, 1, 1],
+                 [1, 1, 1, 1]],
+                [[1, 1, 1, 0],
+                 [1, 1, 1, 0]],
+                [[1, 1, 0, 0],
+                 [1, 1, 0, 0]]], dtype=torch.uint8)
+        >>> xs = torch.zeros((3, 2, 6))
+        >>> make_non_pad_mask(lengths, xs)
+        tensor([[[1, 1, 1, 1, 1, 0],
+                 [1, 1, 1, 1, 1, 0]],
+                [[1, 1, 1, 0, 0, 0],
+                 [1, 1, 1, 0, 0, 0]],
+                [[1, 1, 0, 0, 0, 0],
+                 [1, 1, 0, 0, 0, 0]]], dtype=torch.uint8)
+        With the reference tensor and dimension indicator.
+        >>> xs = torch.zeros((3, 6, 6))
+        >>> make_non_pad_mask(lengths, xs, 1)
+        tensor([[[1, 1, 1, 1, 1, 1],
+                 [1, 1, 1, 1, 1, 1],
+                 [1, 1, 1, 1, 1, 1],
+                 [1, 1, 1, 1, 1, 1],
+                 [1, 1, 1, 1, 1, 1],
+                 [0, 0, 0, 0, 0, 0]],
+                [[1, 1, 1, 1, 1, 1],
+                 [1, 1, 1, 1, 1, 1],
+                 [1, 1, 1, 1, 1, 1],
+                 [0, 0, 0, 0, 0, 0],
+                 [0, 0, 0, 0, 0, 0],
+                 [0, 0, 0, 0, 0, 0]],
+                [[1, 1, 1, 1, 1, 1],
+                 [1, 1, 1, 1, 1, 1],
+                 [0, 0, 0, 0, 0, 0],
+                 [0, 0, 0, 0, 0, 0],
+                 [0, 0, 0, 0, 0, 0],
+                 [0, 0, 0, 0, 0, 0]]], dtype=torch.uint8)
+        >>> make_non_pad_mask(lengths, xs, 2)
+        tensor([[[1, 1, 1, 1, 1, 0],
+                 [1, 1, 1, 1, 1, 0],
+                 [1, 1, 1, 1, 1, 0],
+                 [1, 1, 1, 1, 1, 0],
+                 [1, 1, 1, 1, 1, 0],
+                 [1, 1, 1, 1, 1, 0]],
+                [[1, 1, 1, 0, 0, 0],
+                 [1, 1, 1, 0, 0, 0],
+                 [1, 1, 1, 0, 0, 0],
+                 [1, 1, 1, 0, 0, 0],
+                 [1, 1, 1, 0, 0, 0],
+                 [1, 1, 1, 0, 0, 0]],
+                [[1, 1, 0, 0, 0, 0],
+                 [1, 1, 0, 0, 0, 0],
+                 [1, 1, 0, 0, 0, 0],
+                 [1, 1, 0, 0, 0, 0],
+                 [1, 1, 0, 0, 0, 0],
+                 [1, 1, 0, 0, 0, 0]]], dtype=torch.uint8)
+    """
+    return ~make_pad_mask(lengths, xs, length_dim)
+
+
+def get_mask_from_lengths(lengths):
+    max_len = torch.max(lengths).item()
+    ids = torch.arange(0, max_len).to(lengths.device)
+    mask = (ids < lengths.unsqueeze(1)).bool()
+    return mask
+
+
+def group_hidden_by_segs(h, seg_ids, max_len):
+    """
+
+    :param h: [B, T, H]
+    :param seg_ids: [B, T]
+    :return: h_ph: [B, T_ph, H]
+    """
+    B, T, H = h.shape
+    h_gby_segs = h.new_zeros([B, max_len + 1, H]).scatter_add_(1, seg_ids[:, :, None].repeat([1, 1, H]), h)
+    all_ones = h.new_ones(h.shape[:2])
+    cnt_gby_segs = h.new_zeros([B, max_len + 1]).scatter_add_(1, seg_ids, all_ones).contiguous()
+    h_gby_segs = h_gby_segs[:, 1:]
+    cnt_gby_segs = cnt_gby_segs[:, 1:]
+    h_gby_segs = h_gby_segs / torch.clamp(cnt_gby_segs[:, :, None], min=1)
+    return h_gby_segs, cnt_gby_segs
+
+def expand_by_repeat_times(source_encoding, lengths):
+    """
+    source_encoding: [T, C]
+    lengths, list of int, [T,], how many times each token should repeat
+    return:
+        expanded_encoding: [T_expand, C]
+    """
+    hid_dim = source_encoding.shape[1]
+    out2source = []
+    for i, length in enumerate(lengths):
+        out2source += [i for _ in range(length)]
+    out2source = torch.LongTensor(out2source).to(source_encoding.device)
+    out2source_ = out2source[:, None].repeat([1, hid_dim])
+    expanded_encoding = torch.gather(source_encoding, 0, out2source_)  # [B, T, H]
+    return expanded_encoding
+
+
+def expand_word2ph(word_encoding, ph2word):
+    word_encoding = F.pad(word_encoding,[0,0,1,0])
+    ph2word_ = ph2word[:, :, None].repeat([1, 1, word_encoding.shape[-1]])
+    out = torch.gather(word_encoding, 1, ph2word_)  # [B, T, H]
+    return out
diff --git a/utils/useful_cmd_lines/clean_gpu.py b/utils/useful_cmd_lines/clean_gpu.py
new file mode 100644
index 0000000000000000000000000000000000000000..e80496155a2b8e35a78f07dcc1784d29e413d908
--- /dev/null
+++ b/utils/useful_cmd_lines/clean_gpu.py
@@ -0,0 +1,19 @@
+import os, re
+def clean_gpu():
+    ret = os.popen("fuser -v /dev/nvidia*").read()
+    ret = re.sub("kernel", " ", ret)
+    ids = set(ret.split(" "))
+    ids = [int(i) for i in ids if i != '']
+    ids = [str(i) for i in sorted(ids)]
+    ids_string = ' '.join(ids)
+    cmd = f"kill -9 {ids_string}"
+    os.system("fuser -v /dev/nvidia*")
+    flag = input(f"You are going run this command: \n  ==>  \"{cmd}\" \nEnter y/Y to proceed, or other to abort.\n[y/n]")
+    if flag.lower() == 'y':
+        os.system(cmd)
+        print("All gpu process cleaned!")
+    else:
+        print("Aborted!")
+
+if __name__ == '__main__':
+    clean_gpu()
\ No newline at end of file
diff --git a/utils/visualization/auto_plot_image.py b/utils/visualization/auto_plot_image.py
new file mode 100644
index 0000000000000000000000000000000000000000..596881cf7b32b0611d8ab1b05eb8d1096b221d0b
--- /dev/null
+++ b/utils/visualization/auto_plot_image.py
@@ -0,0 +1,21 @@
+import torch
+import numpy as np
+import cv2
+
+def plot_image(save_path, image, convert_RGB2BGR=True):
+    if isinstance(image, torch.Tensor):
+        image = image.detach().cpu().numpy()
+    image = image.astype(float)
+    if image.max() < 1.1 and image.min() > -0.1: # [0, 1]
+        image = image * 255
+    elif image.max() < 1.1 and image.min() > -1.1: # [-1, 1]
+        image = (image + 1.0) * 0.5 * 255
+    image = image.clip(0, 255)  
+    image = image.astype(np.uint8)
+    if len(image.shape) == 4 and image.shape[0] == 1:
+        image = image[0]
+    if len(image.shape) == 3 and image.shape[0] <= 4: # C, H, W
+        image = torch.from_numpy(image).permute(1, 2, 0).numpy()
+    if len(image.shape) == 3 and convert_RGB2BGR:
+        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
+    cv2.imwrite(save_path, image)
\ No newline at end of file
diff --git a/utils/visualization/draw_3d_landmark.py b/utils/visualization/draw_3d_landmark.py
new file mode 100644
index 0000000000000000000000000000000000000000..fe0ead4c75d52ff6329e1fd2a42d955578011026
--- /dev/null
+++ b/utils/visualization/draw_3d_landmark.py
@@ -0,0 +1,364 @@
+import cv2
+import math
+import numpy as np
+import matplotlib.pyplot as plt
+import dearpygui.dearpygui as dpg
+from scipy.spatial.transform import Rotation as R
+from utils.commons.hparams import set_hparams, hparams
+from data_util.face3d_helper import Face3DHelper
+
+face3d_helper = Face3DHelper(use_gpu=False)
+
+
+set_hparams("egs/datasets/videos/May/radnerf_torso.yaml")
+
+from tasks.radnerfs.dataset_utils import RADNeRFDataset
+dataset = RADNeRFDataset("val")
+idexp_lm3d_mean = dataset.idexp_lm3d_mean.reshape([68,3])
+lm3d_mean = idexp_lm3d_mean / 10 + face3d_helper.key_mean_shape
+lm3d_mean /= 1.5 # normalize to [-1,1]
+
+class Landmark3D:
+
+    def __init__(self):
+
+        # init pose [18, 3], in [-1, 1]^3
+        self.points3D = np.concatenate([lm3d_mean.numpy(), np.ones([68,1])],axis=1).reshape([68,4])
+
+        # lines [17, 2]
+        self.lines = [ 
+                        # yaw
+                        [0, 1], [1, 2], [2, 3], [3, 4], [4, 5], [5,6], [6,7], [7,8], [8,9], [9,10], [10,11], [11,12], [12,13], [13,14], [14,15], [15,16],
+                        # left brow
+                        [17,18], [18,19], [19,20], [20,21], 
+                        # right brow
+                        [22, 23], [23,24], [24,25], [25,26],
+                        # nose
+                        [27,28], [28,29], [29,30], [31,32], [32,33], [33,34], [34,35],
+                        # left eye
+                        [36,37], [37,38], [38,39], [39,40], [40,41], [41,36],
+                        # right eye
+                        [42,43], [43,44], [44,45], [45,46], [46,47], [47,42],
+                        # mouth
+                        [48, 49], [49,50], [50,51], [51,52], [52,53], [53,54], [54,55], [55,56], [56,57], [57,58], [58,59],[59,48],
+                        [48, 60], [60,61], [61,62], [62,63], [63,64], [64,65], [65,66], [66,67], [67,60], [54,64]
+                      ]
+        # # keypoint color [18, 3]
+        # self.colors = [[0, 0, 255], [255, 0, 0], [255, 170, 0], [255, 255, 0], [255, 85, 0], [170, 255, 0], 
+        #                [85, 255, 0], [0, 255, 0], [0, 255, 85], [0, 255, 170], [0, 255, 255], [0, 170, 255], 
+        #                [0, 85, 255], [85, 0, 255], [170, 0, 255], [255, 0, 255], [255, 0, 170], [255, 0, 85]]
+    
+        self.colors = [[0,0,255] for _ in range(36)] + [[0,255,0] for _ in range(12)]+ [[255,0,0] for _ in range(20)]
+        self.line_colors = [[0,0,255] for _ in range(31)] + [[0,255,0] for _ in range(12)]+ [[255,0,0] for _ in range(22)]
+
+    def draw(self, mvp, H, W):
+        # mvp: [4, 4]    
+
+        canvas = np.zeros((H, W, 3), dtype=np.uint8)
+
+        points2D = self.points3D @ mvp.T # [18, 4]
+        points2D = points2D[:, :3] / points2D[:, 3:] # NDC in [-1, 1]
+
+        xs = (points2D[:, 0] + 1) / 2 * H # [18]
+        ys = (points2D[:, 1] + 1) / 2 * W # [18]
+
+        # 18 points
+        for i in range(len(self.points3D)):
+            cv2.circle(canvas, (int(xs[i]), int(ys[i])), 4, self.colors[i], thickness=-1)
+
+        # 17 lines
+        for i in range(len(self.lines)):
+            cur_canvas = canvas.copy()
+            X = xs[self.lines[i]]
+            Y = ys[self.lines[i]]
+            mY = np.mean(Y)
+            mX = np.mean(X)
+            length = ((Y[0] - Y[1]) ** 2 + (X[0] - X[1]) ** 2) ** 0.5
+            angle = math.degrees(math.atan2(Y[0] - Y[1], X[0] - X[1]))
+            polygon = cv2.ellipse2Poly((int(mX), int(mY)), (int(length / 2), 4), int(angle), 0, 360, 1)
+            
+            cv2.fillConvexPoly(cur_canvas, polygon, self.line_colors[i])
+            
+            canvas = cv2.addWeighted(canvas, 0.4, cur_canvas, 0.6, 0)
+        
+        canvas = canvas.astype(np.float32) / 255
+        return canvas, np.stack([xs, ys], axis=1)
+        
+
+class OrbitCamera:
+    def __init__(self, W, H, r=2, fovy=60, near=0.01, far=100):
+        self.W = W
+        self.H = H
+        self.radius = r # camera distance from center
+        self.fovy = fovy # in degree
+        self.near = near
+        self.far = far
+        self.center = np.array([0, 0, 0], dtype=np.float32) # look at this point
+        self.rot = R.from_matrix(np.eye(3))
+        self.up = np.array([0, 1, 0], dtype=np.float32) # need to be normalized!
+
+    # pose
+    @property
+    def pose(self):
+        # first move camera to radius
+        res = np.eye(4, dtype=np.float32)
+        res[2, 3] = self.radius # opengl convention...
+        # rotate
+        rot = np.eye(4, dtype=np.float32)
+        rot[:3, :3] = self.rot.as_matrix()
+        res = rot @ res
+        # translate
+        res[:3, 3] -= self.center
+        return res
+
+    # view
+    @property
+    def view(self):
+        return np.linalg.inv(self.pose)
+    
+    # intrinsics
+    @property
+    def intrinsics(self):
+        focal = self.H / (2 * np.tan(np.radians(self.fovy) / 2))
+        return np.array([focal, focal, self.W // 2, self.H // 2], dtype=np.float32)
+
+    # projection (perspective)
+    @property
+    def perspective(self):
+        y = np.tan(np.radians(self.fovy) / 2)
+        aspect = self.W / self.H
+        return np.array([[1/(y*aspect),    0,            0,              0], 
+                         [           0,  -1/y,            0,              0],
+                         [           0,    0, -(self.far+self.near)/(self.far-self.near), -(2*self.far*self.near)/(self.far-self.near)], 
+                         [           0,    0,           -1,              0]], dtype=np.float32)
+
+    
+    def orbit(self, dx, dy):
+        # rotate along camera up/side axis!
+        side = self.rot.as_matrix()[:3, 0] # why this is side --> ? # already normalized.
+        rotvec_x = self.up * np.radians(-0.05 * dx)
+        rotvec_y = side * np.radians(-0.05 * dy)
+        self.rot = R.from_rotvec(rotvec_x) * R.from_rotvec(rotvec_y) * self.rot
+
+    def scale(self, delta):
+        self.radius *= 1.1 ** (-delta)
+
+    def pan(self, dx, dy, dz=0):
+        # pan in camera coordinate system (careful on the sensitivity!)
+        self.center += 0.0005 * self.rot.as_matrix()[:3, :3] @ np.array([dx, -dy, dz])
+
+
+class GUI:
+    def __init__(self, opt):
+        self.opt = opt
+        self.W = opt.W
+        self.H = opt.H
+        self.cam = OrbitCamera(opt.W, opt.H, r=opt.radius, fovy=opt.fovy)
+
+        self.skel = Landmark3D()
+        
+        self.render_buffer = np.zeros((self.W, self.H, 3), dtype=np.float32)
+        self.need_update = True # camera moved, should reset accumulation
+
+        self.save_path = 'pose.png'
+        self.mouse_loc = np.array([0, 0])
+        self.points2D = None # [18, 2]
+        self.point_idx = 0
+        
+        dpg.create_context()
+        self.register_dpg()
+        self.step()
+        
+
+    def __del__(self):
+        dpg.destroy_context()
+
+
+    def step(self):
+
+        if self.need_update:
+        
+            # mvp
+            mv = self.cam.view # [4, 4]
+            proj = self.cam.perspective # [4, 4]
+            mvp = proj @ mv
+
+            # render our openpose image, somehow
+            self.render_buffer, self.points2D = self.skel.draw(mvp, self.H, self.W)
+        
+            self.need_update = False
+            
+            dpg.set_value("_texture", self.render_buffer)
+
+        
+    def register_dpg(self):
+
+        ### register texture 
+
+        with dpg.texture_registry(show=False):
+            dpg.add_raw_texture(self.W, self.H, self.render_buffer, format=dpg.mvFormat_Float_rgb, tag="_texture")
+
+        ### register window
+
+        # the rendered image, as the primary window
+        with dpg.window(label="Viewer", tag="_primary_window", width=self.W, height=self.H):
+            dpg.add_image("_texture")
+
+        dpg.set_primary_window("_primary_window", True)
+
+        # control window
+        with dpg.window(label="Control", tag="_control_window", width=-1, height=-1):
+
+            # button theme
+            with dpg.theme() as theme_button:
+                with dpg.theme_component(dpg.mvButton):
+                    dpg.add_theme_color(dpg.mvThemeCol_Button, (23, 3, 18))
+                    dpg.add_theme_color(dpg.mvThemeCol_ButtonHovered, (51, 3, 47))
+                    dpg.add_theme_color(dpg.mvThemeCol_ButtonActive, (83, 18, 83))
+                    dpg.add_theme_style(dpg.mvStyleVar_FrameRounding, 5)
+                    dpg.add_theme_style(dpg.mvStyleVar_FramePadding, 3, 3)
+                
+            def callback_save(sender, app_data):
+                image = (self.render_buffer * 255).astype(np.uint8)
+                image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
+                cv2.imwrite(self.save_path, image)
+                print(f'[INFO] write image to {self.save_path}')
+            
+            def callback_set_save_path(sender, app_data):
+                self.save_path = app_data
+            
+            with dpg.group(horizontal=True):
+                dpg.add_button(label="save image", tag="_button_save", callback=callback_save)
+                dpg.bind_item_theme("_button_save", theme_button)
+
+                dpg.add_input_text(label="", default_value=self.save_path, callback=callback_set_save_path)
+
+            # fov slider
+            def callback_set_fovy(sender, app_data):
+                self.cam.fovy = app_data
+                self.need_update = True
+
+            dpg.add_slider_int(label="FoV (vertical)", min_value=1, max_value=120, format="%d deg", default_value=self.cam.fovy, callback=callback_set_fovy)
+
+              
+        ### register camera handler
+
+        def callback_camera_drag_rotate(sender, app_data):
+
+            if not dpg.is_item_focused("_primary_window"):
+                return
+
+            # dx = app_data[1]
+            # dy = app_data[2]
+
+            # self.cam.orbit(dx, dy)
+            self.need_update = True
+
+
+        def callback_camera_wheel_scale(sender, app_data):
+
+            if not dpg.is_item_focused("_primary_window"):
+                return
+
+            delta = app_data
+
+            self.cam.scale(delta)
+            self.need_update = True
+
+
+        def callback_camera_drag_pan(sender, app_data):
+
+            if not dpg.is_item_focused("_primary_window"):
+                return
+
+            dx = app_data[1]
+            dy = app_data[2]
+
+            self.cam.pan(dx, dy)
+            self.need_update = True
+
+        def callback_set_mouse_loc(sender, app_data):
+
+            if not dpg.is_item_focused("_primary_window"):
+                return
+
+            # just the pixel coordinate in image
+            self.mouse_loc = np.array(app_data)
+
+        def callback_skel_select(sender, app_data):
+
+            if not dpg.is_item_focused("_primary_window"):
+                return
+            
+            # determine the selected keypoint from mouse_loc
+            if self.points2D is None: return # not prepared
+
+            dist = np.linalg.norm(self.points2D - self.mouse_loc, axis=1) # [18]
+            self.point_idx = np.argmin(dist)
+
+        
+        def callback_skel_drag(sender, app_data):
+
+            if not dpg.is_item_focused("_primary_window"):
+                return
+
+            # 2D to 3D delta
+            dx = app_data[1]
+            dy = app_data[2]
+        
+            self.skel.points3D[self.point_idx, :3] += 0.0002 * self.cam.rot.as_matrix()[:3, :3] @ np.array([dx, -dy, 0])
+            self.need_update = True
+
+
+        with dpg.handler_registry():
+            dpg.add_mouse_drag_handler(button=dpg.mvMouseButton_Left, callback=callback_camera_drag_rotate)
+            dpg.add_mouse_wheel_handler(callback=callback_camera_wheel_scale)
+            dpg.add_mouse_drag_handler(button=dpg.mvMouseButton_Middle, callback=callback_camera_drag_pan)
+
+            # for skeleton editing
+            dpg.add_mouse_move_handler(callback=callback_set_mouse_loc)
+            dpg.add_mouse_click_handler(button=dpg.mvMouseButton_Right, callback=callback_skel_select)
+            dpg.add_mouse_drag_handler(button=dpg.mvMouseButton_Right, callback=callback_skel_drag)
+
+        
+        dpg.create_viewport(title='pose viewer', resizable=False, width=self.W, height=self.H)
+        
+        ### global theme
+        with dpg.theme() as theme_no_padding:
+            with dpg.theme_component(dpg.mvAll):
+                # set all padding to 0 to avoid scroll bar
+                dpg.add_theme_style(dpg.mvStyleVar_WindowPadding, 0, 0, category=dpg.mvThemeCat_Core)
+                dpg.add_theme_style(dpg.mvStyleVar_FramePadding, 0, 0, category=dpg.mvThemeCat_Core)
+                dpg.add_theme_style(dpg.mvStyleVar_CellPadding, 0, 0, category=dpg.mvThemeCat_Core)
+        
+        dpg.bind_item_theme("_primary_window", theme_no_padding)
+        dpg.focus_item("_primary_window")
+
+        dpg.setup_dearpygui()
+
+        #dpg.show_metrics()
+
+        dpg.show_viewport()
+
+
+    def render(self):
+
+        while dpg.is_dearpygui_running():
+            self.step()
+            dpg.render_dearpygui_frame()
+
+
+if __name__ == '__main__':
+
+    import argparse
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--W', type=int, default=512, help="GUI width")
+    parser.add_argument('--H', type=int, default=512, help="GUI height")
+    parser.add_argument('--radius', type=float, default=3, help="default GUI camera radius from center")
+    parser.add_argument('--fovy', type=float, default=25, help="default GUI camera fovy")
+
+    opt = parser.parse_args()
+
+    gui = GUI(opt)
+    gui.render()
\ No newline at end of file
diff --git a/utils/visualization/ffmpeg_utils.py b/utils/visualization/ffmpeg_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..e4f4a11613a5a22db70264d76f8cbd90c5933329
--- /dev/null
+++ b/utils/visualization/ffmpeg_utils.py
@@ -0,0 +1,18 @@
+import os
+
+def imgs_to_video(img_dir, video_path, audio_path=None, verbose=True):
+    cmd = f"ffmpeg -i {img_dir}/%5d.png "
+    if audio_path is not None:
+        cmd += f"-i {audio_path} "
+        cmd += "-strict -2 "
+    cmd += "-c:v libx264 -pix_fmt yuv420p -b:v 2000k -y -shortest "
+    if verbose is False:
+        cmd += " -v quiet "
+    cmd += f"{video_path} "
+    os.makedirs(os.path.dirname(video_path), exist_ok=True)
+    os.system(cmd)
+
+
+if __name__ == '__main__':
+    imgs_to_video('infer_out/tmp_imgs', 'infer_out/tmp_imgs/out.mp4', 'data/raw/val_wavs/zozo.wav')
+    imgs_to_video('infer_out/tmp_imgs', 'infer_out/tmp_imgs/out2.mp4', 'data/raw/val_wavs/zozo.wav')
\ No newline at end of file
diff --git a/utils/visualization/lm_visualizer.py b/utils/visualization/lm_visualizer.py
new file mode 100644
index 0000000000000000000000000000000000000000..654285b2cc8a5567bd591c243ddcac04b35e220b
--- /dev/null
+++ b/utils/visualization/lm_visualizer.py
@@ -0,0 +1,62 @@
+import numpy as np
+import cv2
+from data_util.face3d_helper import Face3DHelper
+from utils.visualization.ffmpeg_utils import imgs_to_video
+import os
+
+face3d_helper = Face3DHelper('deep_3drecon/BFM', keypoint_mode='mediapipe')
+# lrs3_stats = np.load('data/binary/lrs3/stats.npy',allow_pickle=True).tolist()
+# lrs3_idexp_mean = lrs3_stats['idexp_lm3d_mean'].reshape([1,204])
+# lrs3_idexp_std = lrs3_stats['idexp_lm3d_std'].reshape([1,204])
+
+
+def render_idexp_npy_to_lm_video(npy_name, out_video_name, audio_name=None):
+    try:
+        idexp_lm3d = np.load(npy_name)
+    except:
+        coeff = np.load(npy_name, allow_pickle=True).tolist()
+        t = coeff['exp'].shape[0]
+        # print(coeff['id'][0]-coeff['id'][1])
+        if len(coeff['id']) == 1:
+            coeff['id'] = np.repeat(coeff['id'], t, axis=0)
+        idexp_lm3d = face3d_helper.reconstruct_idexp_lm3d_np(coeff['id'], coeff['exp']).reshape([t, -1])
+    lm3d = idexp_lm3d / 10 + face3d_helper.key_mean_shape.squeeze().reshape([1, -1]).cpu().numpy()
+    lm3d = lm3d.reshape([t, -1, 3])
+    # lm3d[..., 0] = 0.5 # lm3d[:,:1, 0].repeat(lm3d.shape[1], axis=1)
+
+    tmp_img_dir = os.path.join(os.path.dirname(out_video_name), "tmp_lm3d_imgs")
+    os.makedirs(tmp_img_dir, exist_ok=True)
+
+    WH = 512
+    lm3d = (lm3d * WH/2 + WH/2).astype(int)
+    # eye_idx = list(range(36,48))
+    # mouth_idx = list(range(48,68))
+    for i_img in range(len(lm3d)):
+        lm2d = lm3d[i_img ,:, :2] # [68, 2]
+        img = np.ones([WH, WH, 3], dtype=np.uint8) * 255
+        
+        for i in range(len(lm2d)):
+            x, y = lm2d[i]
+            color = (255,0,0)
+            img = cv2.circle(img, center=(x,y), radius=3, color=color, thickness=-1)
+            font = cv2.FONT_HERSHEY_SIMPLEX
+        img = cv2.flip(img, 0)
+        for i in range(len(lm2d)):
+            x, y = lm2d[i]
+            y = WH - y
+            img = cv2.putText(img, f"{i}", org=(x,y), fontFace=font, fontScale=0.3, color=(255,0,0))
+        
+        out_name = os.path.join(tmp_img_dir, f'{format(i_img, "05d")}.png')
+        cv2.imwrite(out_name, img)
+    imgs_to_video(tmp_img_dir, out_video_name, audio_name)
+    os.system(f"rm -r {tmp_img_dir}")
+    print(f"landmark video saved at {out_video_name}")
+
+if __name__ == '__main__':
+    import argparse
+    argparser = argparse.ArgumentParser()
+    argparser.add_argument('--npy_name', type=str, default="infer_out/May/pred_lm3d/zozo.npy", help='the path of landmark .npy')
+    argparser.add_argument('--audio_name', type=str, default="data/raw/val_wavs/zozo.wav", help='the path of audio file')
+    argparser.add_argument('--out_path', type=str, default="infer_out/May/visualized_lm3d/zozo.mp4", help='the path to save visualization results')
+    args = argparser.parse_args()
+    render_idexp_npy_to_lm_video(args.npy_name, args.out_path, audio_name=args.audio_name)
\ No newline at end of file
diff --git a/utils/visualization/plot_attention.py b/utils/visualization/plot_attention.py
new file mode 100644
index 0000000000000000000000000000000000000000..155ad1004905e0bc66f2274f6374d06696f2bae1
--- /dev/null
+++ b/utils/visualization/plot_attention.py
@@ -0,0 +1,23 @@
+import numpy as np
+import cv2
+from utils.commons.tensor_utils import convert_to_np
+
+
+def plot_attention_img(attention_img, color_bar='jet'):
+    """
+    attention_img: raw attention in network, tensor or array, in 0~1 scale, shape [H, W,]
+    color_bar: jet, summer, etc see this https://blog.csdn.net/loveliuzz/article/details/73648505
+    return: ready-to-visualize attention img in -1~1 scale.
+    """
+    attention_img = convert_to_np(attention_img)
+    assert attention_img.ndim == 2
+    attention_img = np.uint8(255 * attention_img)
+    color_bar_dict = {
+        'jet': cv2.COLORMAP_JET,
+        'summer': cv2.COLORMAP_SUMMER,
+        'hot': cv2.COLORMAP_HOT
+    }
+    color_bar = color_bar_dict.get(color_bar, getattr(cv2, f"COLORMAP_{color_bar.upper()}"))
+    attention_img = cv2.applyColorMap(attention_img, color_bar) / 127.5 - 1
+    attention_img = attention_img[:, :, ::-1] # flip RGB
+    return attention_img
\ No newline at end of file
diff --git a/utils/visualization/plot_spec.py b/utils/visualization/plot_spec.py
new file mode 100644
index 0000000000000000000000000000000000000000..03851c09bea12081c3a2ac60813d4c5f3a1c21ca
--- /dev/null
+++ b/utils/visualization/plot_spec.py
@@ -0,0 +1,76 @@
+import matplotlib
+
+matplotlib.use('Agg')
+import matplotlib.pyplot as plt
+import numpy as np
+import torch
+
+LINE_COLORS = ['w', 'r', 'orange', 'k', 'cyan', 'm', 'b', 'lime', 'g', 'brown', 'navy']
+
+
+def spec_to_figure(spec, vmin=None, vmax=None, title='', f0s=None, dur_info=None):
+    if isinstance(spec, torch.Tensor):
+        spec = spec.cpu().numpy()
+    H = spec.shape[1] // 2
+    fig = plt.figure(figsize=(12, 6))
+    plt.title(title)
+    plt.pcolor(spec.T, vmin=vmin, vmax=vmax)
+
+    if dur_info is not None:
+        assert isinstance(dur_info, dict)
+        txt = dur_info['txt']
+        dur_gt = dur_info['dur_gt']
+        if isinstance(dur_gt, torch.Tensor):
+            dur_gt = dur_gt.cpu().numpy()
+        dur_gt = np.cumsum(dur_gt).astype(int)
+        for i in range(len(dur_gt)):
+            shift = (i % 8) + 1
+            plt.text(dur_gt[i], shift * 4, txt[i])
+            plt.vlines(dur_gt[i], 0, H // 2, colors='b')  # blue is gt
+        plt.xlim(0, dur_gt[-1])
+        if 'dur_pred' in dur_info:
+            dur_pred = dur_info['dur_pred']
+            if isinstance(dur_pred, torch.Tensor):
+                dur_pred = dur_pred.cpu().numpy()
+            dur_pred = np.cumsum(dur_pred).astype(int)
+            for i in range(len(dur_pred)):
+                shift = (i % 8) + 1
+                plt.text(dur_pred[i], H + shift * 4, txt[i])
+                plt.vlines(dur_pred[i], H, H * 1.5, colors='r')  # red is pred
+            plt.xlim(0, max(dur_gt[-1], dur_pred[-1]))
+    if f0s is not None:
+        ax = plt.gca()
+        ax2 = ax.twinx()
+        # ax.set_xticks()
+
+        if not isinstance(f0s, dict):
+            f0s = {'f0': f0s}
+        for i, (k, f0) in enumerate(f0s.items()):
+            if f0 is not None:
+                if isinstance(f0, torch.Tensor):
+                    f0 = f0.cpu().numpy()
+                ax2.plot(
+                    np.arange(len(f0)) + 0.5, f0, label=k, c=LINE_COLORS[i], linewidth=1, alpha=0.5)
+        ax2.set_ylim(0, 1000)
+        ax2.legend()
+    return fig
+
+
+def align_to_figure(align, dur_info):
+    if isinstance(align, torch.Tensor):
+        align = align.cpu().numpy()
+    H = align.shape[1]
+    fig = plt.figure(figsize=(12, 6))
+    plt.pcolor(align.T, vmin=0, vmax=1)
+    if dur_info is not None:
+        assert isinstance(dur_info, dict)
+        txt = dur_info['txt']
+        dur_gt = dur_info['dur_gt']
+        if isinstance(dur_gt, torch.Tensor):
+            dur_gt = dur_gt.cpu().numpy()
+        dur_gt = np.cumsum(dur_gt).astype(int) // 2
+        for i in range(len(dur_gt)):
+            plt.text(dur_gt[i], i, txt[i], color='red')
+            plt.vlines(dur_gt[i], 0, H, colors='b')  # blue is gt
+        # plt.xlim(0, dur_gt[-1])
+    return fig
diff --git a/utils/visualization/t-sne.py b/utils/visualization/t-sne.py
new file mode 100644
index 0000000000000000000000000000000000000000..a032265040885d161762303e7332d2c6442f8d44
--- /dev/null
+++ b/utils/visualization/t-sne.py
@@ -0,0 +1,132 @@
+from openTSNE import TSNE
+import numpy as np
+import matplotlib
+import matplotlib.pyplot as plt
+import random
+
+def visualize(
+    x,
+    y,
+    ax=None,
+    title=None,
+    draw_legend=True,
+    draw_centers=False,
+    draw_cluster_labels=False,
+    colors=None,
+    legend_kwargs=None,
+    label_order=None,
+    **kwargs
+):
+
+    if ax is None:
+        _, ax = matplotlib.pyplot.subplots(figsize=(10, 8))
+
+    if title is not None:
+        ax.set_title(title)
+
+    plot_params = {"alpha": kwargs.get("alpha", 0.6), "s": kwargs.get("s", 1)}
+
+    # Create main plot
+    if label_order is not None:
+        assert all(np.isin(np.unique(y), label_order))
+        classes = [l for l in label_order if l in np.unique(y)]
+    else:
+        classes = np.unique(y)
+    if colors is None:
+        default_colors = matplotlib.rcParams["axes.prop_cycle"]
+        colors = {k: v["color"] for k, v in zip(classes, default_colors())}
+
+    point_colors = list(map(colors.get, y))
+
+    ax.scatter(x[:, 0], x[:, 1], c=point_colors, rasterized=True, **plot_params)
+
+    # Plot mediods
+    if draw_centers:
+        centers = []
+        for yi in classes:
+            mask = yi == y
+            centers.append(np.median(x[mask, :2], axis=0))
+        centers = np.array(centers)
+
+        center_colors = list(map(colors.get, classes))
+        ax.scatter(
+            centers[:, 0], centers[:, 1], c=center_colors, s=48, alpha=1, edgecolor="k"
+        )
+
+        # Draw mediod labels
+        if draw_cluster_labels:
+            for idx, label in enumerate(classes):
+                ax.text(
+                    centers[idx, 0],
+                    centers[idx, 1] + 2.2,
+                    label,
+                    fontsize=kwargs.get("fontsize", 6),
+                    horizontalalignment="center",
+                )
+
+    # Hide ticks and axis
+    ax.set_xticks([]), ax.set_yticks([]), ax.axis("off")
+
+    if draw_legend:
+        legend_handles = [
+            matplotlib.lines.Line2D(
+                [],
+                [],
+                marker="s",
+                color="w",
+                markerfacecolor=colors[yi],
+                ms=10,
+                alpha=1,
+                linewidth=0,
+                label=yi,
+                markeredgecolor="k",
+            )
+            for yi in classes
+        ]
+        legend_kwargs_ = dict(loc="best", bbox_to_anchor=(0.05, 0.5), frameon=False, )
+        if legend_kwargs is not None:
+            legend_kwargs_.update(legend_kwargs)
+        ax.legend(handles=legend_handles, **legend_kwargs_)
+
+
+tsne = TSNE(
+    perplexity=30,
+    metric="euclidean",
+    n_jobs=8,
+    random_state=42,
+    verbose=True,
+)
+
+idexp_lm3d_pred_lrs3 = np.load("infer_out/tmp_npys/lrs3_pred_all.npy")
+idx = np.random.choice(np.arange(len(idexp_lm3d_pred_lrs3)), 10000)
+idexp_lm3d_pred_lrs3 = idexp_lm3d_pred_lrs3[idx]
+
+person_ds = np.load("data/binary/videos/May/trainval_dataset.npy", allow_pickle=True).tolist()
+person_idexp_mean = person_ds['idexp_lm3d_mean'].reshape([1,204])
+person_idexp_std = person_ds['idexp_lm3d_std'].reshape([1,204])
+person_idexp_lm3d_train = np.stack([s['idexp_lm3d_normalized'].reshape([204,]) for s in person_ds['train_samples']])
+person_idexp_lm3d_val = np.stack([s['idexp_lm3d_normalized'].reshape([204,]) for s in person_ds['val_samples']])
+
+lrs3_stats = np.load('/home/yezhenhui/datasets/binary/lrs3_0702/stats.npy',allow_pickle=True).tolist()
+lrs3_idexp_mean = lrs3_stats['idexp_lm3d_mean'].reshape([1,204])
+lrs3_idexp_std = lrs3_stats['idexp_lm3d_std'].reshape([1,204])
+person_idexp_lm3d_train = person_idexp_lm3d_train * person_idexp_std + person_idexp_mean
+# person_idexp_lm3d_train = (person_idexp_lm3d_train - lrs3_idexp_mean) / lrs3_idexp_std
+person_idexp_lm3d_val = person_idexp_lm3d_val * person_idexp_std + person_idexp_mean
+# person_idexp_lm3d_val = (person_idexp_lm3d_val - lrs3_idexp_mean) / lrs3_idexp_std
+idexp_lm3d_pred_lrs3 = idexp_lm3d_pred_lrs3 * lrs3_idexp_std + lrs3_idexp_mean
+
+
+idexp_lm3d_pred_vae = np.load("infer_out/tmp_npys/pred_exp_0_vae.npy").reshape([-1,204])
+idexp_lm3d_pred_postnet = np.load("infer_out/tmp_npys/pred_exp_0_postnet_hubert.npy").reshape([-1,204])
+# idexp_lm3d_pred_postnet = idexp_lm3d_pred_postnet * lrs3_idexp_std + lrs3_idexp_mean
+
+idexp_lm3d_all = np.concatenate([idexp_lm3d_pred_lrs3, person_idexp_lm3d_train,idexp_lm3d_pred_vae, idexp_lm3d_pred_postnet])
+idexp_lm3d_all_emb = tsne.fit(idexp_lm3d_all) # array(float64) [B,50]==>[B, 2]
+# z_p_emb = tsne.fit(z_p) # array(float64) [B,50]==>[B, 2]
+y1 = ["pred_lrs3" for _ in range(len(idexp_lm3d_pred_lrs3))]
+y2 = ["person_train" for _ in range(len(person_idexp_lm3d_train))]
+y3 = ["vae" for _ in range(len(idexp_lm3d_pred_vae))]
+y4 = ["postnet" for _ in range(len(idexp_lm3d_pred_postnet))]
+visualize(idexp_lm3d_all_emb, y1+y2+y3+y4)
+plt.savefig("infer_out/tmp_npys/lrs3_pred_all_0k.png")
\ No newline at end of file
diff --git a/utils/visualization/t-sne_0423.py b/utils/visualization/t-sne_0423.py
new file mode 100644
index 0000000000000000000000000000000000000000..82334483e8e596ca376b1954a92c5150fb8b639f
--- /dev/null
+++ b/utils/visualization/t-sne_0423.py
@@ -0,0 +1,167 @@
+from openTSNE import TSNE
+import numpy as np
+import matplotlib
+import matplotlib.pyplot as plt
+import random
+
+def visualize(
+    x,
+    y,
+    ax=None,
+    title=None,
+    draw_legend=True,
+    draw_centers=False,
+    draw_cluster_labels=False,
+    colors=None,
+    legend_kwargs=None,
+    label_order=None,
+    **kwargs
+):
+
+    if ax is None:
+        _, ax = matplotlib.pyplot.subplots(figsize=(10, 8))
+
+    if title is not None:
+        ax.set_title(title)
+
+    plot_params = {"alpha": kwargs.get("alpha", 0.6), "s": kwargs.get("s", 1)}
+
+    # Create main plot
+    if label_order is not None:
+        assert all(np.isin(np.unique(y), label_order))
+        classes = [l for l in label_order if l in np.unique(y)]
+    else:
+        classes = np.unique(y)
+    if colors is None:
+        default_colors = matplotlib.rcParams["axes.prop_cycle"]
+        colors = {k: v["color"] for k, v in zip(classes, default_colors())}
+
+    point_colors = list(map(colors.get, y))
+
+    ax.scatter(x[:, 0], x[:, 1], c=point_colors, rasterized=True, **plot_params)
+
+    # Plot mediods
+    if draw_centers:
+        centers = []
+        for yi in classes:
+            mask = yi == y
+            centers.append(np.median(x[mask, :2], axis=0))
+        centers = np.array(centers)
+
+        center_colors = list(map(colors.get, classes))
+        ax.scatter(
+            centers[:, 0], centers[:, 1], c=center_colors, s=48, alpha=1, edgecolor="k"
+        )
+
+        # Draw mediod labels
+        if draw_cluster_labels:
+            for idx, label in enumerate(classes):
+                ax.text(
+                    centers[idx, 0],
+                    centers[idx, 1] + 2.2,
+                    label,
+                    fontsize=kwargs.get("fontsize", 6),
+                    horizontalalignment="center",
+                )
+
+    # Hide ticks and axis
+    ax.set_xticks([]), ax.set_yticks([]), ax.axis("off")
+
+    if draw_legend:
+        legend_handles = [
+            matplotlib.lines.Line2D(
+                [],
+                [],
+                marker="s",
+                color="w",
+                markerfacecolor=colors[yi],
+                ms=10,
+                alpha=1,
+                linewidth=0,
+                label=yi,
+                markeredgecolor="k",
+            )
+            for yi in classes
+        ]
+        legend_kwargs_ = dict(loc="best", bbox_to_anchor=(0.05, 0.5), frameon=False, )
+        if legend_kwargs is not None:
+            legend_kwargs_.update(legend_kwargs)
+        ax.legend(handles=legend_handles, **legend_kwargs_)
+
+
+tsne = TSNE(
+    perplexity=30,
+    metric="euclidean",
+    n_jobs=8,
+    random_state=42,
+    verbose=True,
+)
+
+# idexp_lm3d_pred_lrs3 = np.load("autio2motion_dream_it_possible.npy")
+# idx = np.random.choice(np.arange(len(idexp_lm3d_pred_lrs3)), 10000)
+# idexp_lm3d_pred_lrs3 = idexp_lm3d_pred_lrs3[idx]
+
+person_ds = np.load("data/binary/videos/May/trainval_dataset.npy", allow_pickle=True).tolist()
+person_idexp_mean = person_ds['idexp_lm3d_mean'].reshape([1,204])
+person_idexp_std = person_ds['idexp_lm3d_std'].reshape([1,204])
+person_idexp_lm3d_train = np.stack([s['idexp_lm3d_normalized'].reshape([204,]) for s in person_ds['train_samples']])
+person_idexp_lm3d_val = np.stack([s['idexp_lm3d_normalized'].reshape([204,]) for s in person_ds['val_samples']])
+person_idexp_lm3d_train = person_idexp_lm3d_train * person_idexp_std + person_idexp_mean
+person_idexp_lm3d_val = person_idexp_lm3d_val * person_idexp_std + person_idexp_mean
+
+# lrs3_stats = np.load('/home/yezhenhui/datasets/binary/lrs3_0702/stats.npy',allow_pickle=True).tolist()
+# lrs3_idexp_mean = lrs3_stats['idexp_lm3d_mean'].reshape([1,204])
+# lrs3_idexp_std = lrs3_stats['idexp_lm3d_std'].reshape([1,204])
+# person_idexp_lm3d_train = (person_idexp_lm3d_train - lrs3_idexp_mean) / lrs3_idexp_std
+# person_idexp_lm3d_val = (person_idexp_lm3d_val - lrs3_idexp_mean) / lrs3_idexp_std
+# idexp_lm3d_pred_lrs3 = idexp_lm3d_pred_lrs3 * lrs3_idexp_std + lrs3_idexp_mean
+
+
+idexp_lm3d_pred_vae = np.load("autio2motion_dream_it_possible.npy").reshape([-1,204])[:1000]
+idexp_lm3d_pred_postnet = np.load("postnet_dream_it_possible.npy").reshape([-1,204])[:1000]
+idexp_lm3d_pred_lle = np.load("lle_dream_it_possible.npy").reshape([-1,204])[:1000]
+# idexp_lm3d_pred_postnet = idexp_lm3d_pred_postnet * lrs3_idexp_std + lrs3_idexp_mean
+
+idexp_lm3d_all = np.concatenate([person_idexp_lm3d_train,idexp_lm3d_pred_vae, idexp_lm3d_pred_postnet,idexp_lm3d_pred_lle])
+idexp_lm3d_all_emb = tsne.fit(idexp_lm3d_all) # array(float64) [B,50]==>[B, 2]
+# z_p_emb = tsne.fit(z_p) # array(float64) [B,50]==>[B, 2]
+# y1 = ["pred_lrs3" for _ in range(len(idexp_lm3d_pred_lrs3))]
+y2 = ["person_train" for _ in range(len(person_idexp_lm3d_train))]
+y3 = ["vae" for _ in range(len(idexp_lm3d_pred_vae))]
+y4 = ["postnet" for _ in range(len(idexp_lm3d_pred_postnet))]
+y5 = ["lle" for _ in range(len(idexp_lm3d_pred_lle))]
+visualize(idexp_lm3d_all_emb, y2+y3+y4+y5)
+plt.savefig("0.png")
+
+idexp_lm3d_pred_vae = np.load("autio2motion_dream_it_possible.npy").reshape([-1,204])[1000:2000]
+idexp_lm3d_pred_postnet = np.load("postnet_dream_it_possible.npy").reshape([-1,204])[1000:2000]
+idexp_lm3d_pred_lle = np.load("lle_dream_it_possible.npy").reshape([-1,204])[1000:2000]
+# idexp_lm3d_pred_postnet = idexp_lm3d_pred_postnet * lrs3_idexp_std + lrs3_idexp_mean
+
+idexp_lm3d_all = np.concatenate([person_idexp_lm3d_train,idexp_lm3d_pred_vae, idexp_lm3d_pred_postnet,idexp_lm3d_pred_lle])
+idexp_lm3d_all_emb = tsne.fit(idexp_lm3d_all) # array(float64) [B,50]==>[B, 2]
+# z_p_emb = tsne.fit(z_p) # array(float64) [B,50]==>[B, 2]
+# y1 = ["pred_lrs3" for _ in range(len(idexp_lm3d_pred_lrs3))]
+y2 = ["person_train" for _ in range(len(person_idexp_lm3d_train))]
+y3 = ["vae" for _ in range(len(idexp_lm3d_pred_vae))]
+y4 = ["postnet" for _ in range(len(idexp_lm3d_pred_postnet))]
+y5 = ["lle" for _ in range(len(idexp_lm3d_pred_lle))]
+visualize(idexp_lm3d_all_emb, y2+y3+y4+y5)
+plt.savefig("1.png")
+
+
+idexp_lm3d_pred_vae = np.load("autio2motion_dream_it_possible.npy").reshape([-1,204])[2000:2500]
+idexp_lm3d_pred_postnet = np.load("postnet_dream_it_possible.npy").reshape([-1,204])[2000:2500]
+idexp_lm3d_pred_lle = np.load("lle_dream_it_possible.npy").reshape([-1,204])[2000:2500]
+# idexp_lm3d_pred_postnet = idexp_lm3d_pred_postnet * lrs3_idexp_std + lrs3_idexp_mean
+
+idexp_lm3d_all = np.concatenate([person_idexp_lm3d_train,idexp_lm3d_pred_vae, idexp_lm3d_pred_postnet,idexp_lm3d_pred_lle])
+idexp_lm3d_all_emb = tsne.fit(idexp_lm3d_all) # array(float64) [B,50]==>[B, 2]
+# z_p_emb = tsne.fit(z_p) # array(float64) [B,50]==>[B, 2]
+# y1 = ["pred_lrs3" for _ in range(len(idexp_lm3d_pred_lrs3))]
+y2 = ["person_train" for _ in range(len(person_idexp_lm3d_train))]
+y3 = ["vae" for _ in range(len(idexp_lm3d_pred_vae))]
+y4 = ["postnet" for _ in range(len(idexp_lm3d_pred_postnet))]
+y5 = ["lle" for _ in range(len(idexp_lm3d_pred_lle))]
+visualize(idexp_lm3d_all_emb, y2+y3+y4+y5)
+plt.savefig("2.png")
\ No newline at end of file
diff --git a/utils/visualization/vis_cam3d/camera_parameter_loader.py b/utils/visualization/vis_cam3d/camera_parameter_loader.py
new file mode 100644
index 0000000000000000000000000000000000000000..88cf9856f976e1243d1a4b1c28645a232d99dbd0
--- /dev/null
+++ b/utils/visualization/vis_cam3d/camera_parameter_loader.py
@@ -0,0 +1,35 @@
+import json
+import os
+import numpy as np
+import quaternion
+
+class CameraParameterLoader:
+    def __init__(self):
+        print('initialize camera parameter lodaer')
+
+    def get_intrinsic(self, path):
+        with open(os.path.join(path, '_camera_settings.json'), 'r') as f:
+            param_cam = json.load(f)
+            param_intrinsic = param_cam['camera_settings'][0]['intrinsic_settings']
+            cx = param_intrinsic['cx']
+            cy = param_intrinsic['cy']
+            fx = param_intrinsic['fx']
+            fy = param_intrinsic['fy']
+            s = param_intrinsic['s']
+            mat_intrinsic = np.array([[fx, s, cx],
+                                      [0, fy, cy],
+                                      [0, 0, 1]])
+        return mat_intrinsic
+
+    def get_extrinsic(self, path):
+        with open(path, 'r') as f:
+            param_cam = json.load(f)['camera_data']
+            param_translation = param_cam['location_worldframe']
+            param_rotation = param_cam['quaternion_xyzw_worldframe']
+
+            mat_rotation = quaternion.as_rotation_matrix(
+                np.quaternion(param_rotation[3], param_rotation[0], param_rotation[1], param_rotation[2]))
+            mat_translation = np.array([[param_translation[0]], [param_translation[1]], [param_translation[2]]])
+            mat_extrinsic = np.concatenate(
+                [np.concatenate([mat_rotation, mat_translation], axis=1), np.array([[0, 0, 0, 1]])], axis=0)
+            return mat_extrinsic
diff --git a/utils/visualization/vis_cam3d/camera_pose_visualizer.py b/utils/visualization/vis_cam3d/camera_pose_visualizer.py
new file mode 100644
index 0000000000000000000000000000000000000000..934f85371f6af9a1bf8a7d2f1570b379f76df978
--- /dev/null
+++ b/utils/visualization/vis_cam3d/camera_pose_visualizer.py
@@ -0,0 +1,77 @@
+import numpy as np
+import matplotlib as mpl
+import matplotlib.pyplot as plt
+from matplotlib.patches import Patch
+from mpl_toolkits.mplot3d.art3d import Poly3DCollection
+
+class CameraPoseVisualizer:
+    def __init__(self, xlim=[-1, 1], ylim=[-1, 1], zlim=[-10, 0], view_mode='none'):
+        self.xlim = xlim
+        self.ylim = ylim
+        self.zlim = zlim
+        self.view_mode = view_mode
+        self.fig = plt.figure(figsize=(5.12, 5.12))
+        self.ax = self.fig.add_subplot(projection = '3d')
+
+        # https://matplotlib.org/3.6.0/api/toolkits/mplot3d/view_angles.html#toolkit-mplot3d-view-angles
+        # self.ax.view_init(elev=30, azim=45, roll=15)
+        if view_mode == 'xz':
+            self.ax.view_init(elev=0, azim=-90, roll=0) # XZ
+        elif view_mode == 'xy':
+            self.ax.view_init(elev=90, azim=-90, roll=0) # XY
+
+        self.ax.set_aspect("auto")
+        self.ax.set_xlim(xlim)
+        self.ax.set_ylim(ylim)
+        self.ax.set_zlim(zlim)
+        self.ax.set_xlabel('x')
+        self.ax.set_ylabel('y')
+        self.ax.set_zlabel('z')
+
+    def extrinsic2pyramid(self, extrinsic, color='r', focal_len_scaled=0.5, aspect_ratio=0.15):
+        # aspect ratio: fov
+        # focal_len_scaled: scale 
+        self.reset(self.xlim, self.ylim, self.zlim, self.view_mode)
+        vertex_std = np.array([[0, 0, 0, 1],
+                               [focal_len_scaled * aspect_ratio, -focal_len_scaled * aspect_ratio, focal_len_scaled, 1],
+                               [focal_len_scaled * aspect_ratio, focal_len_scaled * aspect_ratio, focal_len_scaled, 1],
+                               [-focal_len_scaled * aspect_ratio, focal_len_scaled * aspect_ratio, focal_len_scaled, 1],
+                               [-focal_len_scaled * aspect_ratio, -focal_len_scaled * aspect_ratio, focal_len_scaled, 1]])
+        vertex_transformed = vertex_std @ extrinsic.T
+        meshes = [
+                            [vertex_transformed[0, :-1], vertex_transformed[1][:-1], vertex_transformed[2, :-1]],
+                            [vertex_transformed[0, :-1], vertex_transformed[2, :-1], vertex_transformed[3, :-1]],
+                            [vertex_transformed[0, :-1], vertex_transformed[3, :-1], vertex_transformed[4, :-1]],
+                            [vertex_transformed[0, :-1], vertex_transformed[4, :-1], vertex_transformed[1, :-1]],
+                            [vertex_transformed[1, :-1], vertex_transformed[2, :-1], vertex_transformed[3, :-1], vertex_transformed[4, :-1]]
+        ]
+        self.ax.add_collection3d(
+            # Poly3DCollection(meshes, facecolors=color, linewidths=0.05, edgecolors=color, alpha=0.35))
+            Poly3DCollection(meshes, facecolors=color, linewidths=0.3, edgecolors=color, alpha=0.35))
+        # return a ndarray img
+        canvas = self.fig.canvas
+        canvas.draw()
+        width, height = canvas.get_width_height()
+        image_array = np.frombuffer(canvas.tostring_rgb(), dtype='uint8')
+        image_array = image_array.reshape(height, width, 3)
+        return image_array
+    
+    def customize_legend(self, list_label):
+        list_handle = []
+        for idx, label in enumerate(list_label):
+            color = plt.cm.rainbow(idx / len(list_label))
+            patch = Patch(color=color, label=label)
+            list_handle.append(patch)
+        plt.legend(loc='right', bbox_to_anchor=(1.8, 0.5), handles=list_handle)
+
+    def colorbar(self, max_frame_length):
+        cmap = mpl.cm.rainbow
+        norm = mpl.colors.Normalize(vmin=0, vmax=max_frame_length)
+        self.fig.colorbar(mpl.cm.ScalarMappable(norm=norm, cmap=cmap), orientation='vertical', label='Frame Number')
+
+    def show(self):
+        plt.title('Extrinsic Parameters')
+        plt.show()
+    
+    def reset(self, xlim=[-50, 50], ylim=[-50, 50], zlim=[0, 50], view_mode='none'):
+        self.__init__(xlim, ylim, zlim, view_mode)
\ No newline at end of file